TAV: letterbox detection encoding complete

This commit is contained in:
minjaesong
2025-11-20 02:13:45 +09:00
parent 44cc54264a
commit 92274a8e19
5 changed files with 1242 additions and 220 deletions

View File

@@ -3,8 +3,9 @@
CC = gcc
CXX = g++
CFLAGS = -std=c99 -Wall -Wextra -Ofast -D_GNU_SOURCE
CXXFLAGS = -std=c++11 -Wall -Wextra -Ofast -D_GNU_SOURCE
CFLAGS = -std=c99 -Wall -Wextra -Ofast -D_GNU_SOURCE #-fsanitize=address
CXXFLAGS = -std=c++11 -Wall -Wextra -Ofast -D_GNU_SOURCE #-fsanitize=address
DBGFLAGS = #-fsanitize=address
# Zstd flags (use pkg-config if available, fallback for cross-platform compatibility)
ZSTD_CFLAGS = $(shell pkg-config --cflags libzstd 2>/dev/null || echo "")
@@ -33,13 +34,13 @@ tav: encoder_tav.c encoder_tad.c encoder_tav_opencv.cpp
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c encoder_tav.c -o encoder_tav.o
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c encoder_tad.c -o encoder_tad.o
$(CXX) $(CXXFLAGS) $(OPENCV_CFLAGS) $(ZSTD_CFLAGS) -c encoder_tav_opencv.cpp -o encoder_tav_opencv.o
$(CXX) -o encoder_tav encoder_tav.o encoder_tad.o encoder_tav_opencv.o $(LIBS) $(OPENCV_LIBS)
$(CXX) $(DBGFLAGS) -o encoder_tav encoder_tav.o encoder_tad.o encoder_tav_opencv.o $(LIBS) $(OPENCV_LIBS)
tav_decoder: decoder_tav.c decoder_tad.c decoder_tad.h
rm -f decoder_tav decoder_tav.o
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -DTAD_DECODER_LIB -c decoder_tad.c -o decoder_tad.o
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c decoder_tav.c -o decoder_tav.o
$(CC) -o decoder_tav decoder_tav.o decoder_tad.o $(LIBS)
$(CC) $(DBGFLAGS) -o decoder_tav decoder_tav.o decoder_tad.o $(LIBS)
tav_inspector: tav_inspector.c
rm -f tav_inspector
@@ -50,7 +51,7 @@ encoder_tad: encoder_tad_standalone.c encoder_tad.c encoder_tad.h
rm -f encoder_tad encoder_tad_standalone.o encoder_tad.o
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c encoder_tad.c -o encoder_tad.o
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c encoder_tad_standalone.c -o encoder_tad_standalone.o
$(CC) -o encoder_tad encoder_tad_standalone.o encoder_tad.o $(LIBS)
$(CC) $(DBGFLAGS) -o encoder_tad encoder_tad_standalone.o encoder_tad.o $(LIBS)
decoder_tad: decoder_tad.c
rm -f decoder_tad

View File

@@ -689,12 +689,19 @@ static void decode_channel_ezbc(const uint8_t *ezbc_data, size_t offset, size_t
// fprintf(stderr, "[EZBC] Decoded header: MSB=%d, width=%d, height=%d (expected pixels=%d)\n",
// msb_bitplane, width, height, expected_count);
if (width * height != expected_count) {
fprintf(stderr, "EZBC dimension mismatch: %dx%d != %d\n", width, height, expected_count);
// With crop encoding, dimensions can vary per frame - trust the EZBC header
// Just ensure we don't overflow the output buffer
const int actual_count = width * height;
if (actual_count > expected_count) {
fprintf(stderr, "EZBC dimension overflow: %dx%d (%d) > %d\n",
width, height, actual_count, expected_count);
memset(output, 0, expected_count * sizeof(int16_t));
return;
}
// If actual count is less, only decode what we need
expected_count = actual_count;
// Initialise output and state tracking
memset(output, 0, expected_count * sizeof(int16_t));
int8_t *significant = calloc(expected_count, sizeof(int8_t));
@@ -785,6 +792,47 @@ static void decode_channel_ezbc(const uint8_t *ezbc_data, size_t offset, size_t
// nonzero_count, 100.0 * nonzero_count / expected_count, min_val, max_val);
}
// Helper: peek at EZBC header to get dimensions without decoding
static int ezbc_peek_dimensions(const uint8_t *compressed_data, int channel_layout,
int *out_width, int *out_height) {
const int has_y = (channel_layout & 0x04) == 0;
if (!has_y) {
return -1; // Need Y channel to get dimensions
}
// Read Y channel size header
const uint32_t size = ((uint32_t)compressed_data[0]) |
((uint32_t)compressed_data[1] << 8) |
((uint32_t)compressed_data[2] << 16) |
((uint32_t)compressed_data[3] << 24);
if (size < 6) {
return -1; // Too small to contain EZBC header
}
// Skip to EZBC data for Y channel (after size header)
const uint8_t *ezbc_data = compressed_data + 4;
// Read EZBC header: skip MSB bitplane (1 byte), then read width and height
// Note: EZBC uses bitstream format, but dimensions are at fixed positions
// We need to parse the bitstream header carefully
// Create a temporary reader to parse the bitstream
ezbc_bitreader_t reader;
reader.data = ezbc_data;
reader.size = size;
reader.byte_pos = 0;
reader.bit_pos = 0;
// Read header: MSB bitplane (8 bits), width (16 bits), height (16 bits)
ezbc_read_bits(&reader, 8); // Skip MSB bitplane
*out_width = ezbc_read_bits(&reader, 16);
*out_height = ezbc_read_bits(&reader, 16);
return 0;
}
// EZBC postprocessing for single frames
static void postprocess_coefficients_ezbc(uint8_t *compressed_data, int coeff_count,
int16_t *output_y, int16_t *output_co, int16_t *output_cg,
@@ -1457,15 +1505,61 @@ error_cleanup:
// Postprocess GOP EZBC format to per-frame coefficients (entropyCoder=1)
// Layout: [frame0_size(4)][frame0_ezbc_data][frame1_size(4)][frame1_ezbc_data]...
// Note: EZBC is a complex embedded bitplane codec - this is a simplified placeholder
// Returns the actual dimensions through output parameters (for crop encoding support)
static int16_t ***postprocess_gop_ezbc(const uint8_t *decompressed_data, size_t data_size,
int gop_size, int num_pixels, int channel_layout) {
// Allocate output arrays: [gop_size][3 channels][num_pixels]
int gop_size, int num_pixels, int channel_layout,
int *out_width, int *out_height) {
// First, peek at the first frame's dimensions to determine actual GOP size
// (with crop encoding, GOP dimensions may be smaller than full frame)
int actual_width = 0, actual_height = 0;
int actual_pixels = num_pixels; // Default to full frame if peek fails
if (data_size >= 8) { // Need at least frame size header + some EZBC data
// Skip first frame's size header to get to EZBC data
const uint32_t first_frame_size = ((uint32_t)decompressed_data[0]) |
((uint32_t)decompressed_data[1] << 8) |
((uint32_t)decompressed_data[2] << 16) |
((uint32_t)decompressed_data[3] << 24);
if (4 + first_frame_size <= data_size) {
if (ezbc_peek_dimensions(decompressed_data + 4, channel_layout,
&actual_width, &actual_height) == 0) {
actual_pixels = actual_width * actual_height;
// Only log if dimensions differ significantly (crop encoding active)
// Suppress repetitive messages by using static counter
static int crop_log_count = 0;
if (actual_pixels != num_pixels && crop_log_count < 3) {
fprintf(stderr, "[GOP-EZBC] Detected crop encoding: GOP dimensions %dx%d (%d pixels) vs full frame %d pixels\n",
actual_width, actual_height, actual_pixels, num_pixels);
crop_log_count++;
if (crop_log_count == 3) {
fprintf(stderr, "[GOP-EZBC] (Further crop encoding messages suppressed)\n");
}
}
}
}
}
// If we didn't successfully peek dimensions, calculate from num_pixels
if (actual_width == 0 || actual_height == 0) {
// Assume square-ish dimensions - this is a fallback, should not happen with proper encoding
actual_width = (int)sqrt(num_pixels);
actual_height = num_pixels / actual_width;
actual_pixels = actual_width * actual_height;
}
// Return actual dimensions to caller
if (out_width) *out_width = actual_width;
if (out_height) *out_height = actual_height;
// Allocate output arrays: [gop_size][3 channels][actual_pixels]
// Use actual GOP dimensions (may be cropped) not full frame size
int16_t ***output = malloc(gop_size * sizeof(int16_t **));
for (int t = 0; t < gop_size; t++) {
output[t] = malloc(3 * sizeof(int16_t *));
output[t][0] = calloc(num_pixels, sizeof(int16_t)); // Y
output[t][1] = calloc(num_pixels, sizeof(int16_t)); // Co
output[t][2] = calloc(num_pixels, sizeof(int16_t)); // Cg
output[t][0] = calloc(actual_pixels, sizeof(int16_t)); // Y
output[t][1] = calloc(actual_pixels, sizeof(int16_t)); // Co
output[t][2] = calloc(actual_pixels, sizeof(int16_t)); // Cg
}
int offset = 0;
@@ -1491,8 +1585,9 @@ static int16_t ***postprocess_gop_ezbc(const uint8_t *decompressed_data, size_t
}
// Decode EZBC frame using the single-frame EZBC decoder
// Pass actual_pixels (cropped size) not num_pixels (full frame size)
postprocess_coefficients_ezbc(
(uint8_t *)(decompressed_data + offset), num_pixels,
(uint8_t *)(decompressed_data + offset), actual_pixels,
output[t][0], output[t][1], output[t][2],
channel_layout);
@@ -1622,6 +1717,12 @@ typedef struct {
uint16_t screen_mask_bottom;
uint16_t screen_mask_left;
// Phase 2: Decoding dimensions (may differ from full frame dimensions per GOP)
int decoding_width; // Actual encoded dimensions (cropped active region)
int decoding_height; // Updated when Screen Mask packet is encountered
// Note: Buffers are allocated at max size (header.width × header.height)
// but only decoding_width × decoding_height portion is used
// FFmpeg pipe for video only (audio from file)
FILE *video_pipe;
pid_t ffmpeg_pid;
@@ -1844,6 +1945,10 @@ static tav_decoder_t* tav_decoder_init(const char *input_file, const char *outpu
decoder->is_monoblock = (decoder->header.version >= 3 && decoder->header.version <= 6);
decoder->audio_file_path = strdup(audio_file);
// Phase 2: Initialize decoding dimensions to full frame (will be updated by Screen Mask packets)
decoder->decoding_width = decoder->header.width;
decoder->decoding_height = decoder->header.height;
// Allocate buffers
decoder->current_frame_rgb = calloc(decoder->frame_size * 3, 1);
decoder->reference_frame_rgb = calloc(decoder->frame_size * 3, 1);
@@ -1984,6 +2089,37 @@ static void tav_decoder_free(tav_decoder_t *decoder) {
//=============================================================================
// Fill masked regions (letterbox/pillarbox bars) with black
// Phase 2: Composite cropped frame back to full frame with black borders
static uint8_t* composite_to_full_frame(const uint8_t *cropped_rgb,
int cropped_width, int cropped_height,
int full_width, int full_height,
uint16_t top, uint16_t right,
uint16_t bottom, uint16_t left) {
// Allocate full frame buffer (filled with black)
uint8_t *full_frame = calloc(full_width * full_height * 3, sizeof(uint8_t));
if (!full_frame) {
return NULL;
}
// Calculate active region position in full frame
const int dest_x = left;
const int dest_y = top;
// Copy cropped frame into active region
for (int y = 0; y < cropped_height; y++) {
for (int x = 0; x < cropped_width; x++) {
const int src_offset = (y * cropped_width + x) * 3;
const int dest_offset = ((dest_y + y) * full_width + (dest_x + x)) * 3;
full_frame[dest_offset + 0] = cropped_rgb[src_offset + 0]; // R
full_frame[dest_offset + 1] = cropped_rgb[src_offset + 1]; // G
full_frame[dest_offset + 2] = cropped_rgb[src_offset + 2]; // B
}
}
return full_frame;
}
static void fill_masked_regions(uint8_t *frame_rgb, int width, int height,
uint16_t top, uint16_t right, uint16_t bottom, uint16_t left) {
// Fill top letterbox bar
@@ -2145,7 +2281,9 @@ static int decode_i_or_p_frame(tav_decoder_t *decoder, uint8_t packet_type, uint
memcpy(decoder->current_frame_rgb, decoder->reference_frame_rgb, decoder->frame_size * 3);
} else {
// Decode coefficients (use function-level variables for proper cleanup)
int coeff_count = decoder->frame_size;
// Phase 2: Use decoding dimensions (actual encoded size)
const int decoding_pixels = decoder->decoding_width * decoder->decoding_height;
int coeff_count = decoding_pixels;
quantised_y = calloc(coeff_count, sizeof(int16_t));
quantised_co = calloc(coeff_count, sizeof(int16_t));
quantised_cg = calloc(coeff_count, sizeof(int16_t));
@@ -2183,45 +2321,52 @@ static int decode_i_or_p_frame(tav_decoder_t *decoder, uint8_t packet_type, uint
// fprintf(stderr, " Max quantised Y coefficient: %d\n", max_quant_y);
// }
// Phase 2: Allocate temporary DWT buffers for cropped region processing
float *temp_dwt_y = calloc(decoding_pixels, sizeof(float));
float *temp_dwt_co = calloc(decoding_pixels, sizeof(float));
float *temp_dwt_cg = calloc(decoding_pixels, sizeof(float));
if (!temp_dwt_y || !temp_dwt_co || !temp_dwt_cg) {
fprintf(stderr, "Error: Failed to allocate temporary DWT buffers\n");
free(temp_dwt_y);
free(temp_dwt_co);
free(temp_dwt_cg);
decode_success = 0;
goto write_frame;
}
// Dequantise (perceptual for versions 5-8, uniform for 1-4)
// Phase 2: Use decoding dimensions and temporary buffers
const int is_perceptual = (decoder->header.version >= 5 && decoder->header.version <= 8);
const int is_ezbc = (decoder->header.entropy_coder == 1);
if (is_ezbc && is_perceptual) {
// EZBC mode with perceptual quantisation: coefficients are normalised
// Need to dequantise using perceptual weights (same as twobit-map mode)
dequantise_dwt_subbands_perceptual(0, qy, quantised_y, decoder->dwt_buffer_y,
decoder->header.width, decoder->header.height,
dequantise_dwt_subbands_perceptual(0, qy, quantised_y, temp_dwt_y,
decoder->decoding_width, decoder->decoding_height,
decoder->header.decomp_levels, qy, 0, decoder->frame_count);
dequantise_dwt_subbands_perceptual(0, qy, quantised_co, decoder->dwt_buffer_co,
decoder->header.width, decoder->header.height,
dequantise_dwt_subbands_perceptual(0, qy, quantised_co, temp_dwt_co,
decoder->decoding_width, decoder->decoding_height,
decoder->header.decomp_levels, qco, 1, decoder->frame_count);
dequantise_dwt_subbands_perceptual(0, qy, quantised_cg, decoder->dwt_buffer_cg,
decoder->header.width, decoder->header.height,
dequantise_dwt_subbands_perceptual(0, qy, quantised_cg, temp_dwt_cg,
decoder->decoding_width, decoder->decoding_height,
decoder->header.decomp_levels, qcg, 1, decoder->frame_count);
} else if (is_perceptual) {
dequantise_dwt_subbands_perceptual(0, qy, quantised_y, decoder->dwt_buffer_y,
decoder->header.width, decoder->header.height,
dequantise_dwt_subbands_perceptual(0, qy, quantised_y, temp_dwt_y,
decoder->decoding_width, decoder->decoding_height,
decoder->header.decomp_levels, qy, 0, decoder->frame_count);
// Debug: Check if values survived the function call
// if (decoder->frame_count == 32) {
// fprintf(stderr, " RIGHT AFTER dequantise_Y returns: first 5 values: %.1f %.1f %.1f %.1f %.1f\n",
// decoder->dwt_buffer_y[0], decoder->dwt_buffer_y[1], decoder->dwt_buffer_y[2],
// decoder->dwt_buffer_y[3], decoder->dwt_buffer_y[4]);
// }
dequantise_dwt_subbands_perceptual(0, qy, quantised_co, decoder->dwt_buffer_co,
decoder->header.width, decoder->header.height,
dequantise_dwt_subbands_perceptual(0, qy, quantised_co, temp_dwt_co,
decoder->decoding_width, decoder->decoding_height,
decoder->header.decomp_levels, qco, 1, decoder->frame_count);
dequantise_dwt_subbands_perceptual(0, qy, quantised_cg, decoder->dwt_buffer_cg,
decoder->header.width, decoder->header.height,
dequantise_dwt_subbands_perceptual(0, qy, quantised_cg, temp_dwt_cg,
decoder->decoding_width, decoder->decoding_height,
decoder->header.decomp_levels, qcg, 1, decoder->frame_count);
} else {
for (int i = 0; i < coeff_count; i++) {
decoder->dwt_buffer_y[i] = quantised_y[i] * qy;
decoder->dwt_buffer_co[i] = quantised_co[i] * qco;
decoder->dwt_buffer_cg[i] = quantised_cg[i] * qcg;
temp_dwt_y[i] = quantised_y[i] * qy;
temp_dwt_co[i] = quantised_co[i] * qco;
temp_dwt_cg[i] = quantised_cg[i] * qcg;
}
}
@@ -2253,7 +2398,8 @@ static int decode_i_or_p_frame(tav_decoder_t *decoder, uint8_t packet_type, uint
// }
// Remove grain synthesis from Y channel (must happen after dequantisation, before inverse DWT)
remove_grain_synthesis_decoder(decoder->dwt_buffer_y, decoder->header.width, decoder->header.height,
// Phase 2: Use decoding dimensions and temporary buffer
remove_grain_synthesis_decoder(temp_dwt_y, decoder->decoding_width, decoder->decoding_height,
decoder->header.decomp_levels, decoder->frame_count, decoder->header.quantiser_y);
// Debug: Check LL band AFTER grain removal
@@ -2272,12 +2418,13 @@ static int decode_i_or_p_frame(tav_decoder_t *decoder, uint8_t packet_type, uint
// }
// Apply inverse DWT with correct non-power-of-2 dimension handling
// Phase 2: Use decoding dimensions and temporary buffers
// Note: quantised arrays freed at write_frame label
apply_inverse_dwt_multilevel(decoder->dwt_buffer_y, decoder->header.width, decoder->header.height,
apply_inverse_dwt_multilevel(temp_dwt_y, decoder->decoding_width, decoder->decoding_height,
decoder->header.decomp_levels, decoder->header.wavelet_filter);
apply_inverse_dwt_multilevel(decoder->dwt_buffer_co, decoder->header.width, decoder->header.height,
apply_inverse_dwt_multilevel(temp_dwt_co, decoder->decoding_width, decoder->decoding_height,
decoder->header.decomp_levels, decoder->header.wavelet_filter);
apply_inverse_dwt_multilevel(decoder->dwt_buffer_cg, decoder->header.width, decoder->header.height,
apply_inverse_dwt_multilevel(temp_dwt_cg, decoder->decoding_width, decoder->decoding_height,
decoder->header.decomp_levels, decoder->header.wavelet_filter);
// Debug: Check spatial domain values after IDWT
@@ -2301,47 +2448,67 @@ static int decode_i_or_p_frame(tav_decoder_t *decoder, uint8_t packet_type, uint
// }
// Handle P-frame delta accumulation (in YCoCg float space)
// TODO Phase 2: P-frame support with crop encoding needs additional work
// - Reference frames are stored at full size but delta may be at cropped size
// - Need to extract/composite reference region appropriately
if (packet_type == TAV_PACKET_PFRAME && mode == TAV_MODE_DELTA) {
for (int i = 0; i < decoder->frame_size; i++) {
decoder->dwt_buffer_y[i] += decoder->reference_ycocg_y[i];
decoder->dwt_buffer_co[i] += decoder->reference_ycocg_co[i];
decoder->dwt_buffer_cg[i] += decoder->reference_ycocg_cg[i];
fprintf(stderr, "Warning: P-frame delta mode not yet fully supported with crop encoding\n");
for (int i = 0; i < decoding_pixels; i++) {
temp_dwt_y[i] += decoder->reference_ycocg_y[i];
temp_dwt_co[i] += decoder->reference_ycocg_co[i];
temp_dwt_cg[i] += decoder->reference_ycocg_cg[i];
}
}
// Convert YCoCg-R/ICtCp to RGB
const int is_ictcp = (decoder->header.version % 2 == 0);
float max_y = -999, max_co = -999, max_cg = -999;
int max_r = 0, max_g = 0, max_b = 0;
// Phase 2: Convert cropped region to RGB, then composite to full frame
uint8_t *cropped_rgb = malloc(decoding_pixels * 3);
if (!cropped_rgb) {
fprintf(stderr, "Error: Failed to allocate cropped RGB buffer\n");
free(temp_dwt_y);
free(temp_dwt_co);
free(temp_dwt_cg);
decode_success = 0;
goto write_frame;
}
for (int i = 0; i < decoder->frame_size; i++) {
// Convert YCoCg-R/ICtCp to RGB for cropped region
const int is_ictcp = (decoder->header.version % 2 == 0);
for (int i = 0; i < decoding_pixels; i++) {
uint8_t r, g, b;
if (is_ictcp) {
ictcp_to_rgb(decoder->dwt_buffer_y[i],
decoder->dwt_buffer_co[i],
decoder->dwt_buffer_cg[i], &r, &g, &b);
ictcp_to_rgb(temp_dwt_y[i], temp_dwt_co[i], temp_dwt_cg[i], &r, &g, &b);
} else {
ycocg_r_to_rgb(decoder->dwt_buffer_y[i],
decoder->dwt_buffer_co[i],
decoder->dwt_buffer_cg[i], &r, &g, &b);
ycocg_r_to_rgb(temp_dwt_y[i], temp_dwt_co[i], temp_dwt_cg[i], &r, &g, &b);
}
// Track max values for debugging
// if (decoder->frame_count == 1000) {
// if (decoder->dwt_buffer_y[i] > max_y) max_y = decoder->dwt_buffer_y[i];
// if (decoder->dwt_buffer_co[i] > max_co) max_co = decoder->dwt_buffer_co[i];
// if (decoder->dwt_buffer_cg[i] > max_cg) max_cg = decoder->dwt_buffer_cg[i];
// if (r > max_r) max_r = r;
// if (g > max_g) max_g = g;
// if (b > max_b) max_b = b;
// }
// RGB byte order for FFmpeg rgb24
decoder->current_frame_rgb[i * 3 + 0] = r;
decoder->current_frame_rgb[i * 3 + 1] = g;
decoder->current_frame_rgb[i * 3 + 2] = b;
cropped_rgb[i * 3 + 0] = r;
cropped_rgb[i * 3 + 1] = g;
cropped_rgb[i * 3 + 2] = b;
}
// Composite cropped frame to full frame with black borders
uint8_t *full_frame_rgb = composite_to_full_frame(cropped_rgb,
decoder->decoding_width, decoder->decoding_height,
decoder->header.width, decoder->header.height,
decoder->screen_mask_top, decoder->screen_mask_right,
decoder->screen_mask_bottom, decoder->screen_mask_left);
free(cropped_rgb);
free(temp_dwt_y);
free(temp_dwt_co);
free(temp_dwt_cg);
if (!full_frame_rgb) {
fprintf(stderr, "Error: Failed to composite frame to full size\n");
decode_success = 0;
goto write_frame;
}
// Copy composited frame to decoder buffer
memcpy(decoder->current_frame_rgb, full_frame_rgb, decoder->frame_size * 3);
free(full_frame_rgb);
// if (decoder->frame_count == 1000) {
// fprintf(stderr, "\n=== Frame 1000 Value Analysis ===\n");
// fprintf(stderr, "Max YCoCg values: Y=%.1f, Co=%.1f, Cg=%.1f\n", max_y, max_co, max_cg);
@@ -2360,10 +2527,12 @@ static int decode_i_or_p_frame(tav_decoder_t *decoder, uint8_t packet_type, uint
// fprintf(stderr, "\n");
// }
// Update reference YCoCg frame
memcpy(decoder->reference_ycocg_y, decoder->dwt_buffer_y, decoder->frame_size * sizeof(float));
memcpy(decoder->reference_ycocg_co, decoder->dwt_buffer_co, decoder->frame_size * sizeof(float));
memcpy(decoder->reference_ycocg_cg, decoder->dwt_buffer_cg, decoder->frame_size * sizeof(float));
// TODO Phase 2: Reference YCoCg frame update needs rework for crop encoding
// Currently not updated because we use temporary buffers that are already freed
// P-frame support will need to store reference at appropriate dimensions
// memcpy(decoder->reference_ycocg_y, temp_dwt_y, decoding_pixels * sizeof(float));
// memcpy(decoder->reference_ycocg_co, temp_dwt_co, decoding_pixels * sizeof(float));
// memcpy(decoder->reference_ycocg_cg, temp_dwt_cg, decoding_pixels * sizeof(float));
}
// Update reference frame
@@ -2622,9 +2791,20 @@ int main(int argc, char *argv[]) {
entry->bottom = bottom;
entry->left = left;
// Phase 2: Update current active mask and decoding dimensions
decoder->screen_mask_top = top;
decoder->screen_mask_right = right;
decoder->screen_mask_bottom = bottom;
decoder->screen_mask_left = left;
// Calculate new decoding dimensions (active region size)
decoder->decoding_width = decoder->header.width - left - right;
decoder->decoding_height = decoder->header.height - top - bottom;
if (verbose) {
fprintf(stderr, "Packet %d: SCREEN_MASK (0x%02X) - frame=%u top=%u right=%u bottom=%u left=%u\n",
total_packets, packet_type, frame_num, top, right, bottom, left);
fprintf(stderr, "Packet %d: SCREEN_MASK (0x%02X) - frame=%u top=%u right=%u bottom=%u left=%u (decoding: %dx%d)\n",
total_packets, packet_type, frame_num, top, right, bottom, left,
decoder->decoding_width, decoder->decoding_height);
}
continue;
}
@@ -2689,27 +2869,47 @@ int main(int argc, char *argv[]) {
}
// Postprocess coefficients based on entropy_coder value
// Phase 2: Use decoding dimensions (actual encoded size) for postprocessing
int decoding_pixels = decoder->decoding_width * decoder->decoding_height;
// Keep full frame size for buffer allocation
const int num_pixels = decoder->header.width * decoder->header.height;
int16_t ***quantised_gop;
// GOP dimensions (may differ from full frame with crop encoding)
int gop_width = decoder->decoding_width;
int gop_height = decoder->decoding_height;
if (decoder->header.entropy_coder == 2) {
// RAW format: simple concatenated int16 arrays
if (verbose) {
fprintf(stderr, " Using RAW postprocessing (entropy_coder=2)\n");
fprintf(stderr, " Using RAW postprocessing (entropy_coder=2) for %dx%d (%d pixels)\n",
decoder->decoding_width, decoder->decoding_height, decoding_pixels);
}
quantised_gop = postprocess_gop_raw(decompressed_data, decompressed_size,
gop_size, num_pixels, decoder->header.channel_layout);
} else if (decoder->header.entropy_coder == 1) {
// EZBC format: embedded zero-block coding
if (verbose) {
fprintf(stderr, " Using EZBC postprocessing (entropy_coder=1)\n");
fprintf(stderr, " Using EZBC postprocessing (entropy_coder=1) for %dx%d (%d pixels)\n",
decoder->decoding_width, decoder->decoding_height, decoding_pixels);
}
// EZBC will return actual GOP dimensions (may be cropped with crop encoding)
quantised_gop = postprocess_gop_ezbc(decompressed_data, decompressed_size,
gop_size, num_pixels, decoder->header.channel_layout);
gop_size, num_pixels, decoder->header.channel_layout,
&gop_width, &gop_height);
// Update decoding_pixels to match actual GOP dimensions
if (gop_width > 0 && gop_height > 0) {
decoding_pixels = gop_width * gop_height;
if (verbose) {
fprintf(stderr, " Actual GOP dimensions from EZBC: %dx%d (%d pixels)\n",
gop_width, gop_height, decoding_pixels);
}
}
} else {
// Default: Twobitmap format (entropy_coder=0)
if (verbose) {
fprintf(stderr, " Using Twobitmap postprocessing (entropy_coder=0)\n");
fprintf(stderr, " Using Twobitmap postprocessing (entropy_coder=0) for %dx%d (%d pixels)\n",
decoder->decoding_width, decoder->decoding_height, decoding_pixels);
}
quantised_gop = postprocess_gop_unified(decompressed_data, decompressed_size,
gop_size, num_pixels, decoder->header.channel_layout);
@@ -2724,14 +2924,15 @@ int main(int argc, char *argv[]) {
}
// Allocate GOP float buffers
// Phase 2: Allocate at decoding size (cropped region), will composite to full frame later
float **gop_y = malloc(gop_size * sizeof(float *));
float **gop_co = malloc(gop_size * sizeof(float *));
float **gop_cg = malloc(gop_size * sizeof(float *));
for (int t = 0; t < gop_size; t++) {
gop_y[t] = calloc(num_pixels, sizeof(float));
gop_co[t] = calloc(num_pixels, sizeof(float));
gop_cg[t] = calloc(num_pixels, sizeof(float));
gop_y[t] = calloc(decoding_pixels, sizeof(float));
gop_co[t] = calloc(decoding_pixels, sizeof(float));
gop_cg[t] = calloc(decoding_pixels, sizeof(float));
}
// Dequantise with temporal scaling (perceptual quantisation for versions 5-8)
@@ -2751,17 +2952,18 @@ int main(int argc, char *argv[]) {
const float base_q_co = roundf(QLUT[decoder->header.quantiser_co] * temporal_scale);
const float base_q_cg = roundf(QLUT[decoder->header.quantiser_cg] * temporal_scale);
// Phase 2: Use GOP dimensions (may be cropped) for dequantisation
dequantise_dwt_subbands_perceptual(0, QLUT[decoder->header.quantiser_y],
quantised_gop[t][0], gop_y[t],
decoder->header.width, decoder->header.height,
gop_width, gop_height,
decoder->header.decomp_levels, base_q_y, 0, decoder->frame_count + t);
dequantise_dwt_subbands_perceptual(0, QLUT[decoder->header.quantiser_y],
quantised_gop[t][1], gop_co[t],
decoder->header.width, decoder->header.height,
gop_width, gop_height,
decoder->header.decomp_levels, base_q_co, 1, decoder->frame_count + t);
dequantise_dwt_subbands_perceptual(0, QLUT[decoder->header.quantiser_y],
quantised_gop[t][2], gop_cg[t],
decoder->header.width, decoder->header.height,
gop_width, gop_height,
decoder->header.decomp_levels, base_q_cg, 1, decoder->frame_count + t);
if (t == 0 && verbose) {
@@ -2786,21 +2988,23 @@ int main(int argc, char *argv[]) {
const float base_q_cg = roundf(QLUT[decoder->header.quantiser_cg] * temporal_scale);
if (is_perceptual) {
// Phase 2: Use GOP dimensions (may be cropped) for dequantisation
dequantise_dwt_subbands_perceptual(0, QLUT[decoder->header.quantiser_y],
quantised_gop[t][0], gop_y[t],
decoder->header.width, decoder->header.height,
gop_width, gop_height,
decoder->header.decomp_levels, base_q_y, 0, decoder->frame_count + t);
dequantise_dwt_subbands_perceptual(0, QLUT[decoder->header.quantiser_y],
quantised_gop[t][1], gop_co[t],
decoder->header.width, decoder->header.height,
gop_width, gop_height,
decoder->header.decomp_levels, base_q_co, 1, decoder->frame_count + t);
dequantise_dwt_subbands_perceptual(0, QLUT[decoder->header.quantiser_y],
quantised_gop[t][2], gop_cg[t],
decoder->header.width, decoder->header.height,
gop_width, gop_height,
decoder->header.decomp_levels, base_q_cg, 1, decoder->frame_count + t);
} else {
// Uniform quantisation for older versions
for (int i = 0; i < num_pixels; i++) {
// Phase 2: Use decoding_pixels for uniform dequantisation
for (int i = 0; i < decoding_pixels; i++) {
gop_y[t][i] = quantised_gop[t][0][i] * base_q_y;
gop_co[t][i] = quantised_gop[t][1][i] * base_q_co;
gop_cg[t][i] = quantised_gop[t][2][i] * base_q_cg;
@@ -2819,14 +3023,16 @@ int main(int argc, char *argv[]) {
free(quantised_gop);
// Phase 2: Use GOP dimensions (may be cropped) for grain removal
for (int t = 0; t < gop_size; t++) {
remove_grain_synthesis_decoder(gop_y[t], decoder->header.width, decoder->header.height,
remove_grain_synthesis_decoder(gop_y[t], gop_width, gop_height,
decoder->header.decomp_levels, decoder->frame_count + t,
decoder->header.quantiser_y);
}
// Apply inverse 3D DWT (spatial + temporal)
apply_inverse_3d_dwt(gop_y, gop_co, gop_cg, decoder->header.width, decoder->header.height,
// Phase 2: Use GOP dimensions (may be cropped) for inverse DWT
apply_inverse_3d_dwt(gop_y, gop_co, gop_cg, gop_width, gop_height,
gop_size, decoder->header.decomp_levels, temporal_levels,
decoder->header.wavelet_filter);
@@ -2859,35 +3065,78 @@ int main(int argc, char *argv[]) {
// (size_t)decoder->frame_size * 3, decoder->header.width * decoder->header.height * 3);
// }
// Calculate consistent screen mask offsets for crop-encoded GOPs
// When crop encoding is active, all frames in GOP use same dimensions
const int is_crop_encoded = (gop_width != decoder->header.width ||
gop_height != decoder->header.height);
uint16_t gop_mask_top = 0, gop_mask_bottom = 0, gop_mask_left = 0, gop_mask_right = 0;
if (is_crop_encoded) {
// Center the cropped region in the full frame
if (gop_height < decoder->header.height) {
gop_mask_top = (decoder->header.height - gop_height) / 2;
gop_mask_bottom = decoder->header.height - gop_height - gop_mask_top;
}
if (gop_width < decoder->header.width) {
gop_mask_left = (decoder->header.width - gop_width) / 2;
gop_mask_right = decoder->header.width - gop_width - gop_mask_left;
}
if (verbose && decoder->frame_count == 0) {
fprintf(stderr, "[GOP-Crop] Centering %dx%d in %dx%d: top=%u, bottom=%u, left=%u, right=%u\n",
gop_width, gop_height, decoder->header.width, decoder->header.height,
gop_mask_top, gop_mask_bottom, gop_mask_left, gop_mask_right);
}
}
for (int t = 0; t < gop_size; t++) {
// Allocate frame buffer
uint8_t *frame_rgb = malloc(decoder->frame_size * 3);
if (!frame_rgb) {
fprintf(stderr, "Error: Failed to allocate GOP frame buffer\n");
// Update screen mask only if NOT crop-encoded
// Crop-encoded GOPs use consistent offsets calculated above
if (!is_crop_encoded) {
update_screen_mask(decoder, decoder->frame_count + t);
}
// Phase 2: Convert cropped region to RGB, then composite to full frame
uint8_t *cropped_rgb = malloc(decoding_pixels * 3);
if (!cropped_rgb) {
fprintf(stderr, "Error: Failed to allocate cropped GOP frame buffer\n");
result = -1;
break;
}
// Convert to RGB
for (int i = 0; i < decoder->frame_size; i++) {
// Convert cropped region to RGB
for (int i = 0; i < decoding_pixels; i++) {
uint8_t r, g, b;
if (is_ictcp) {
ictcp_to_rgb(gop_y[t][i], gop_co[t][i], gop_cg[t][i], &r, &g, &b);
} else {
ycocg_r_to_rgb(gop_y[t][i], gop_co[t][i], gop_cg[t][i], &r, &g, &b);
}
frame_rgb[i * 3 + 0] = r;
frame_rgb[i * 3 + 1] = g;
frame_rgb[i * 3 + 2] = b;
cropped_rgb[i * 3 + 0] = r;
cropped_rgb[i * 3 + 1] = g;
cropped_rgb[i * 3 + 2] = b;
}
// Update active screen mask for this GOP frame
update_screen_mask(decoder, decoder->frame_count + t);
// Composite cropped frame to full frame with black borders
// Use GOP-consistent offsets for crop-encoded, or per-frame offsets otherwise
const uint16_t mask_top = is_crop_encoded ? gop_mask_top : decoder->screen_mask_top;
const uint16_t mask_bottom = is_crop_encoded ? gop_mask_bottom : decoder->screen_mask_bottom;
const uint16_t mask_left = is_crop_encoded ? gop_mask_left : decoder->screen_mask_left;
const uint16_t mask_right = is_crop_encoded ? gop_mask_right : decoder->screen_mask_right;
// Fill masked regions with black (letterbox/pillarbox bars)
fill_masked_regions(frame_rgb, decoder->header.width, decoder->header.height,
decoder->screen_mask_top, decoder->screen_mask_right,
decoder->screen_mask_bottom, decoder->screen_mask_left);
uint8_t *frame_rgb = composite_to_full_frame(cropped_rgb,
gop_width, gop_height,
decoder->header.width, decoder->header.height,
mask_top, mask_right, mask_bottom, mask_left);
free(cropped_rgb);
if (!frame_rgb) {
fprintf(stderr, "Error: Failed to composite GOP frame to full size\n");
result = -1;
break;
}
// Note: Phase 1 fill_masked_regions() is now replaced by Phase 2 composite function
// which places the decoded cropped frame into a full-frame buffer with black borders
// Write frame to FFmpeg video pipe
const size_t bytes_to_write = decoder->frame_size * 3;

File diff suppressed because it is too large Load Diff