mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-03-07 11:51:49 +09:00
TAV: letterbox detection encoding complete
This commit is contained in:
@@ -1007,6 +1007,12 @@ transmission capability, and region-of-interest coding.
|
||||
0xE2: ID3v2 packet
|
||||
0xE3: Vorbis Comment packet
|
||||
0xE4: CD-text packet
|
||||
<Extensible>
|
||||
0x01: Vendor-specific video packets
|
||||
0x02: Vendor-specific audio frame
|
||||
0x03: Vendor-specific subtitle
|
||||
0x04: Vendor-specific audio file
|
||||
0x0E: Vendor-specific metadata
|
||||
<Special packets>
|
||||
0x00: No-op (no payload)
|
||||
0xEF: TAV Extended Header
|
||||
@@ -1068,6 +1074,16 @@ transmission capability, and region-of-interest coding.
|
||||
- Bytes VNDR: Name and version of the encoder (for Reference encoder: "Encoder-TAV 20251014 (list,of,features)")
|
||||
- Bytes FMPG: FFmpeg version (typically "ffmpeg version 8.0 Copyright (c) 2000-2025 the FFmpeg developers"; the first line of text FFmpeg emits)
|
||||
|
||||
## Extensible Packet Structure
|
||||
uint8 Packet Type
|
||||
uint8 Flags
|
||||
- 0x01: 64-bit size
|
||||
uint8 Identifier[4]
|
||||
<if 64-bit size>
|
||||
uint64 Length of the payload
|
||||
<if not>
|
||||
uint32 Length of the payload
|
||||
* Payload
|
||||
|
||||
## Standard Metadata Payload Packet Structure
|
||||
uint8 Packet Type (0xE0/0xE1/0xE2/.../0xEE; see Packet Types section)
|
||||
|
||||
@@ -4713,24 +4713,120 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
return output
|
||||
}
|
||||
|
||||
/**
|
||||
* Peek at EZBC dimensions without decoding
|
||||
* Reads width and height from the EZBC bitstream header
|
||||
*
|
||||
* @param compressedData EZBC frame data
|
||||
* @param compressedOffset Offset to EZBC channel data
|
||||
* @param channelLayout Channel layout to find Y channel
|
||||
* @return Pair of (width, height) or null if unable to read
|
||||
*/
|
||||
private fun ezbc_peek_dimensions(compressedData: ByteArray, compressedOffset: Int, channelLayout: Int): Pair<Int, Int>? {
|
||||
val hasY = (channelLayout and 4) == 0
|
||||
if (!hasY) return null // Need Y channel to get dimensions
|
||||
|
||||
try {
|
||||
// Read Y channel size header (4 bytes)
|
||||
val size = ((compressedData[compressedOffset].toInt() and 0xFF) or
|
||||
((compressedData[compressedOffset + 1].toInt() and 0xFF) shl 8) or
|
||||
((compressedData[compressedOffset + 2].toInt() and 0xFF) shl 16) or
|
||||
((compressedData[compressedOffset + 3].toInt() and 0xFF) shl 24))
|
||||
|
||||
if (size < 6) return null // Too small for EZBC header
|
||||
|
||||
// Parse EZBC bitstream header (skip MSB bitplane, read width/height)
|
||||
val ezbc_offset = compressedOffset + 4
|
||||
var bytePos = ezbc_offset
|
||||
var bitPos = 0
|
||||
|
||||
fun readBits(numBits: Int): Int {
|
||||
var result = 0
|
||||
for (i in 0 until numBits) {
|
||||
if (bytePos >= ezbc_offset + size) return 0
|
||||
val bit = (compressedData[bytePos].toInt() shr bitPos) and 1
|
||||
result = result or (bit shl i)
|
||||
bitPos++
|
||||
if (bitPos == 8) {
|
||||
bitPos = 0
|
||||
bytePos++
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// Read header: MSB bitplane (8 bits), width (16 bits), height (16 bits)
|
||||
readBits(8) // Skip MSB bitplane
|
||||
val width = readBits(16)
|
||||
val height = readBits(16)
|
||||
|
||||
return if (width > 0 && height > 0) Pair(width, height) else null
|
||||
} catch (e: Exception) {
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Result of EZBC GOP decoding with actual dimensions (for crop encoding support)
|
||||
*/
|
||||
data class EZBCGopResult(
|
||||
val coeffs: Array<Array<ShortArray>>,
|
||||
val width: Int,
|
||||
val height: Int
|
||||
)
|
||||
|
||||
/**
|
||||
* Reconstruct per-frame coefficients from unified GOP block (EZBC format)
|
||||
* Format: [frame0_size(4)][frame0_ezbc][frame1_size(4)][frame1_ezbc]...
|
||||
*
|
||||
* With crop encoding, GOP dimensions may differ from full frame size.
|
||||
* This function detects the actual dimensions from EZBC headers.
|
||||
*
|
||||
* @param decompressedData Unified EZBC block data (after Zstd decompression)
|
||||
* @param numFrames Number of frames in GOP
|
||||
* @param numPixels Pixels per frame (width × height)
|
||||
* @param numPixels Pixels per frame for full frame (width × height)
|
||||
* @param channelLayout Channel layout (0=YCoCg, 2=Y-only, etc)
|
||||
* @return Array of [frame][channel] where channel: 0=Y, 1=Co, 2=Cg
|
||||
* @param fullWidth Full frame width (for crop detection)
|
||||
* @param fullHeight Full frame height (for crop detection)
|
||||
* @return EZBCGopResult with coeffs array and actual GOP dimensions
|
||||
*/
|
||||
private fun tavPostprocessGopEZBC(
|
||||
decompressedData: ByteArray,
|
||||
numFrames: Int,
|
||||
numPixels: Int,
|
||||
channelLayout: Int
|
||||
): Array<Array<ShortArray>> {
|
||||
// Allocate output arrays
|
||||
val output = Array(numFrames) { Array(3) { ShortArray(numPixels) } }
|
||||
channelLayout: Int,
|
||||
fullWidth: Int,
|
||||
fullHeight: Int
|
||||
): EZBCGopResult {
|
||||
// Peek at first frame's EZBC dimensions to detect crop encoding
|
||||
var actualWidth = fullWidth
|
||||
var actualHeight = fullHeight
|
||||
var actualPixels = numPixels
|
||||
|
||||
if (decompressedData.size >= 8) {
|
||||
// Read first frame size
|
||||
val firstFrameSize = ((decompressedData[0].toInt() and 0xFF) or
|
||||
((decompressedData[1].toInt() and 0xFF) shl 8) or
|
||||
((decompressedData[2].toInt() and 0xFF) shl 16) or
|
||||
((decompressedData[3].toInt() and 0xFF) shl 24))
|
||||
|
||||
if (4 + firstFrameSize <= decompressedData.size) {
|
||||
// Peek at EZBC dimensions from first frame's Y channel
|
||||
val dims = ezbc_peek_dimensions(decompressedData, 4, channelLayout)
|
||||
if (dims != null) {
|
||||
actualWidth = dims.first
|
||||
actualHeight = dims.second
|
||||
actualPixels = actualWidth * actualHeight
|
||||
|
||||
if (actualPixels != numPixels) {
|
||||
println("[TAV-GOP-EZBC] Detected crop encoding: GOP ${actualWidth}x${actualHeight} (${actualPixels} px) vs full frame ${fullWidth}x${fullHeight} (${numPixels} px)")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Allocate output arrays with actual GOP dimensions
|
||||
val output = Array(numFrames) { Array(3) { ShortArray(actualPixels) } }
|
||||
|
||||
var offset = 0
|
||||
for (frame in 0 until numFrames) {
|
||||
@@ -4745,39 +4841,52 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
|
||||
if (offset + frameSize > decompressedData.size) break
|
||||
|
||||
// Decode this frame with EZBC
|
||||
// Decode this frame with EZBC using actual GOP dimensions
|
||||
postprocessCoefficientsEZBC(
|
||||
decompressedData, offset, numPixels, channelLayout,
|
||||
decompressedData, offset, actualPixels, channelLayout,
|
||||
output[frame][0], output[frame][1], output[frame][2], null
|
||||
)
|
||||
|
||||
offset += frameSize
|
||||
}
|
||||
|
||||
return output
|
||||
return EZBCGopResult(output, actualWidth, actualHeight)
|
||||
}
|
||||
|
||||
/**
|
||||
* Result of GOP decoding with actual dimensions (for crop encoding support)
|
||||
*/
|
||||
data class GopDecodeResult(
|
||||
val isEZBC: Boolean,
|
||||
val coeffs: Array<Array<ShortArray>>,
|
||||
val width: Int,
|
||||
val height: Int
|
||||
)
|
||||
|
||||
/**
|
||||
* Auto-detecting GOP postprocessor
|
||||
* Detects EZBC vs twobit-map format and calls appropriate decoder
|
||||
* Returns actual GOP dimensions (may differ from full frame with crop encoding)
|
||||
*/
|
||||
private fun tavPostprocessGopAuto(
|
||||
decompressedData: ByteArray,
|
||||
numFrames: Int,
|
||||
numPixels: Int,
|
||||
channelLayout: Int,
|
||||
entropyCoder: Int
|
||||
): Pair<Boolean, Array<Array<ShortArray>>> {
|
||||
entropyCoder: Int,
|
||||
fullWidth: Int,
|
||||
fullHeight: Int
|
||||
): GopDecodeResult {
|
||||
// Read entropy coder from header: 0 = Twobit-map, 1 = EZBC
|
||||
val isEZBC = (entropyCoder == 1)
|
||||
|
||||
val data = if (isEZBC) {
|
||||
tavPostprocessGopEZBC(decompressedData, numFrames, numPixels, channelLayout)
|
||||
return if (isEZBC) {
|
||||
val result = tavPostprocessGopEZBC(decompressedData, numFrames, numPixels, channelLayout, fullWidth, fullHeight)
|
||||
GopDecodeResult(true, result.coeffs, result.width, result.height)
|
||||
} else {
|
||||
tavPostprocessGopUnified(decompressedData, numFrames, numPixels, channelLayout)
|
||||
val coeffs = tavPostprocessGopUnified(decompressedData, numFrames, numPixels, channelLayout)
|
||||
GopDecodeResult(false, coeffs, fullWidth, fullHeight)
|
||||
}
|
||||
|
||||
return Pair(isEZBC, data)
|
||||
}
|
||||
|
||||
// TAV Simulated overlapping tiles constants (must match encoder)
|
||||
@@ -6446,21 +6555,33 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
}
|
||||
|
||||
// Step 2: Postprocess unified block to per-frame coefficients
|
||||
val (isEZBCMode, quantizedCoeffs) = tavPostprocessGopAuto(
|
||||
// With crop encoding, GOP dimensions may differ from full frame
|
||||
val gopResult = tavPostprocessGopAuto(
|
||||
decompressedData,
|
||||
gopSize,
|
||||
outputPixels,
|
||||
channelLayout,
|
||||
entropyCoder
|
||||
entropyCoder,
|
||||
width,
|
||||
height
|
||||
)
|
||||
|
||||
// Step 3: Allocate GOP buffers for float coefficients (expanded canvas size)
|
||||
val gopY = Array(gopSize) { FloatArray(outputPixels) }
|
||||
val gopCo = Array(gopSize) { FloatArray(outputPixels) }
|
||||
val gopCg = Array(gopSize) { FloatArray(outputPixels) }
|
||||
val isEZBCMode = gopResult.isEZBC
|
||||
val quantizedCoeffs = gopResult.coeffs
|
||||
val gopWidth = gopResult.width
|
||||
val gopHeight = gopResult.height
|
||||
val gopPixels = gopWidth * gopHeight
|
||||
|
||||
// Step 4: Calculate subband layout for expanded canvas
|
||||
val subbands = calculateSubbandLayout(width, height, spatialLevels)
|
||||
// Detect crop encoding
|
||||
val isCropEncoded = (gopWidth != width || gopHeight != height)
|
||||
|
||||
// Step 3: Allocate GOP buffers for float coefficients (GOP dimensions)
|
||||
val gopY = Array(gopSize) { FloatArray(gopPixels) }
|
||||
val gopCo = Array(gopSize) { FloatArray(gopPixels) }
|
||||
val gopCg = Array(gopSize) { FloatArray(gopPixels) }
|
||||
|
||||
// Step 4: Calculate subband layout for GOP dimensions (may be cropped)
|
||||
val subbands = calculateSubbandLayout(gopWidth, gopHeight, spatialLevels)
|
||||
|
||||
// Step 5: Dequantize with temporal-spatial scaling
|
||||
for (t in 0 until gopSize) {
|
||||
@@ -6503,32 +6624,44 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
|
||||
// Step 5.5: Remove grain synthesis from Y channel for each GOP frame
|
||||
// This must happen after dequantization but before inverse DWT
|
||||
// Use GOP dimensions (may be cropped)
|
||||
for (t in 0 until gopSize) {
|
||||
removeGrainSynthesisDecoder(
|
||||
gopY[t], width, height,
|
||||
gopY[t], gopWidth, gopHeight,
|
||||
rngFrameTick.getAndAdd(1) + t,
|
||||
subbands, qIndex
|
||||
)
|
||||
}
|
||||
|
||||
// Step 6: Apply inverse 3D DWT
|
||||
tavApplyInverse3DDWT(gopY, width, height, gopSize, spatialLevels, temporalLevels, spatialFilter)
|
||||
tavApplyInverse3DDWT(gopCo, width, height, gopSize, spatialLevels, temporalLevels, spatialFilter)
|
||||
tavApplyInverse3DDWT(gopCg, width, height, gopSize, spatialLevels, temporalLevels, spatialFilter)
|
||||
// Step 6: Apply inverse 3D DWT using GOP dimensions (may be cropped)
|
||||
tavApplyInverse3DDWT(gopY, gopWidth, gopHeight, gopSize, spatialLevels, temporalLevels, spatialFilter)
|
||||
tavApplyInverse3DDWT(gopCo, gopWidth, gopHeight, gopSize, spatialLevels, temporalLevels, spatialFilter)
|
||||
tavApplyInverse3DDWT(gopCg, gopWidth, gopHeight, gopSize, spatialLevels, temporalLevels, spatialFilter)
|
||||
|
||||
// Step 8: Convert to RGB and composite to full frame
|
||||
// With crop encoding, center the cropped frame and fill letterbox areas with black
|
||||
val maskTop = if (isCropEncoded && gopHeight < height) (height - gopHeight) / 2 else 0
|
||||
val maskLeft = if (isCropEncoded && gopWidth < width) (width - gopWidth) / 2 else 0
|
||||
|
||||
// Step 8: Crop and convert to RGB, write directly to videoBuffer
|
||||
for (t in 0 until gopSize) {
|
||||
val videoBufferOffset = bufferOffset + (t * frameSize) // Each frame sequentially, starting at bufferOffset
|
||||
|
||||
for (py in 0 until height) {
|
||||
for (px in 0 until width) {
|
||||
// Destination pixel in videoBuffer
|
||||
val outIdx = py * width + px
|
||||
val offset = videoBufferOffset + outIdx * 3L
|
||||
// Fill entire frame with black (for letterbox/pillarbox areas)
|
||||
if (isCropEncoded) {
|
||||
for (i in 0 until (width * height * 3L)) {
|
||||
gpu.videoBuffer[videoBufferOffset + i] = 0
|
||||
}
|
||||
}
|
||||
|
||||
val yVal = gopY[t][outIdx]
|
||||
val co = gopCo[t][outIdx]
|
||||
val cg = gopCg[t][outIdx]
|
||||
// Write cropped content to centered position
|
||||
for (py in 0 until gopHeight) {
|
||||
for (px in 0 until gopWidth) {
|
||||
// Source pixel from GOP
|
||||
val srcIdx = py * gopWidth + px
|
||||
|
||||
val yVal = gopY[t][srcIdx]
|
||||
val co = gopCo[t][srcIdx]
|
||||
val cg = gopCg[t][srcIdx]
|
||||
|
||||
// YCoCg-R to RGB conversion
|
||||
val tmp = yVal - (cg / 2.0f)
|
||||
@@ -6536,6 +6669,12 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
val b = tmp - (co / 2.0f)
|
||||
val r = b + co
|
||||
|
||||
// Destination pixel in full frame (with centering offset)
|
||||
val dstY = py + maskTop
|
||||
val dstX = px + maskLeft
|
||||
val dstIdx = dstY * width + dstX
|
||||
val offset = videoBufferOffset + dstIdx * 3L
|
||||
|
||||
// Clamp and write to videoBuffer
|
||||
gpu.videoBuffer[offset + 0] = r.roundToInt().coerceIn(0, 255).toByte()
|
||||
gpu.videoBuffer[offset + 1] = g.roundToInt().coerceIn(0, 255).toByte()
|
||||
|
||||
@@ -3,8 +3,9 @@
|
||||
|
||||
CC = gcc
|
||||
CXX = g++
|
||||
CFLAGS = -std=c99 -Wall -Wextra -Ofast -D_GNU_SOURCE
|
||||
CXXFLAGS = -std=c++11 -Wall -Wextra -Ofast -D_GNU_SOURCE
|
||||
CFLAGS = -std=c99 -Wall -Wextra -Ofast -D_GNU_SOURCE #-fsanitize=address
|
||||
CXXFLAGS = -std=c++11 -Wall -Wextra -Ofast -D_GNU_SOURCE #-fsanitize=address
|
||||
DBGFLAGS = #-fsanitize=address
|
||||
|
||||
# Zstd flags (use pkg-config if available, fallback for cross-platform compatibility)
|
||||
ZSTD_CFLAGS = $(shell pkg-config --cflags libzstd 2>/dev/null || echo "")
|
||||
@@ -33,13 +34,13 @@ tav: encoder_tav.c encoder_tad.c encoder_tav_opencv.cpp
|
||||
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c encoder_tav.c -o encoder_tav.o
|
||||
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c encoder_tad.c -o encoder_tad.o
|
||||
$(CXX) $(CXXFLAGS) $(OPENCV_CFLAGS) $(ZSTD_CFLAGS) -c encoder_tav_opencv.cpp -o encoder_tav_opencv.o
|
||||
$(CXX) -o encoder_tav encoder_tav.o encoder_tad.o encoder_tav_opencv.o $(LIBS) $(OPENCV_LIBS)
|
||||
$(CXX) $(DBGFLAGS) -o encoder_tav encoder_tav.o encoder_tad.o encoder_tav_opencv.o $(LIBS) $(OPENCV_LIBS)
|
||||
|
||||
tav_decoder: decoder_tav.c decoder_tad.c decoder_tad.h
|
||||
rm -f decoder_tav decoder_tav.o
|
||||
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -DTAD_DECODER_LIB -c decoder_tad.c -o decoder_tad.o
|
||||
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c decoder_tav.c -o decoder_tav.o
|
||||
$(CC) -o decoder_tav decoder_tav.o decoder_tad.o $(LIBS)
|
||||
$(CC) $(DBGFLAGS) -o decoder_tav decoder_tav.o decoder_tad.o $(LIBS)
|
||||
|
||||
tav_inspector: tav_inspector.c
|
||||
rm -f tav_inspector
|
||||
@@ -50,7 +51,7 @@ encoder_tad: encoder_tad_standalone.c encoder_tad.c encoder_tad.h
|
||||
rm -f encoder_tad encoder_tad_standalone.o encoder_tad.o
|
||||
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c encoder_tad.c -o encoder_tad.o
|
||||
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c encoder_tad_standalone.c -o encoder_tad_standalone.o
|
||||
$(CC) -o encoder_tad encoder_tad_standalone.o encoder_tad.o $(LIBS)
|
||||
$(CC) $(DBGFLAGS) -o encoder_tad encoder_tad_standalone.o encoder_tad.o $(LIBS)
|
||||
|
||||
decoder_tad: decoder_tad.c
|
||||
rm -f decoder_tad
|
||||
|
||||
@@ -689,12 +689,19 @@ static void decode_channel_ezbc(const uint8_t *ezbc_data, size_t offset, size_t
|
||||
// fprintf(stderr, "[EZBC] Decoded header: MSB=%d, width=%d, height=%d (expected pixels=%d)\n",
|
||||
// msb_bitplane, width, height, expected_count);
|
||||
|
||||
if (width * height != expected_count) {
|
||||
fprintf(stderr, "EZBC dimension mismatch: %dx%d != %d\n", width, height, expected_count);
|
||||
// With crop encoding, dimensions can vary per frame - trust the EZBC header
|
||||
// Just ensure we don't overflow the output buffer
|
||||
const int actual_count = width * height;
|
||||
if (actual_count > expected_count) {
|
||||
fprintf(stderr, "EZBC dimension overflow: %dx%d (%d) > %d\n",
|
||||
width, height, actual_count, expected_count);
|
||||
memset(output, 0, expected_count * sizeof(int16_t));
|
||||
return;
|
||||
}
|
||||
|
||||
// If actual count is less, only decode what we need
|
||||
expected_count = actual_count;
|
||||
|
||||
// Initialise output and state tracking
|
||||
memset(output, 0, expected_count * sizeof(int16_t));
|
||||
int8_t *significant = calloc(expected_count, sizeof(int8_t));
|
||||
@@ -785,6 +792,47 @@ static void decode_channel_ezbc(const uint8_t *ezbc_data, size_t offset, size_t
|
||||
// nonzero_count, 100.0 * nonzero_count / expected_count, min_val, max_val);
|
||||
}
|
||||
|
||||
// Helper: peek at EZBC header to get dimensions without decoding
|
||||
static int ezbc_peek_dimensions(const uint8_t *compressed_data, int channel_layout,
|
||||
int *out_width, int *out_height) {
|
||||
const int has_y = (channel_layout & 0x04) == 0;
|
||||
|
||||
if (!has_y) {
|
||||
return -1; // Need Y channel to get dimensions
|
||||
}
|
||||
|
||||
// Read Y channel size header
|
||||
const uint32_t size = ((uint32_t)compressed_data[0]) |
|
||||
((uint32_t)compressed_data[1] << 8) |
|
||||
((uint32_t)compressed_data[2] << 16) |
|
||||
((uint32_t)compressed_data[3] << 24);
|
||||
|
||||
if (size < 6) {
|
||||
return -1; // Too small to contain EZBC header
|
||||
}
|
||||
|
||||
// Skip to EZBC data for Y channel (after size header)
|
||||
const uint8_t *ezbc_data = compressed_data + 4;
|
||||
|
||||
// Read EZBC header: skip MSB bitplane (1 byte), then read width and height
|
||||
// Note: EZBC uses bitstream format, but dimensions are at fixed positions
|
||||
// We need to parse the bitstream header carefully
|
||||
|
||||
// Create a temporary reader to parse the bitstream
|
||||
ezbc_bitreader_t reader;
|
||||
reader.data = ezbc_data;
|
||||
reader.size = size;
|
||||
reader.byte_pos = 0;
|
||||
reader.bit_pos = 0;
|
||||
|
||||
// Read header: MSB bitplane (8 bits), width (16 bits), height (16 bits)
|
||||
ezbc_read_bits(&reader, 8); // Skip MSB bitplane
|
||||
*out_width = ezbc_read_bits(&reader, 16);
|
||||
*out_height = ezbc_read_bits(&reader, 16);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// EZBC postprocessing for single frames
|
||||
static void postprocess_coefficients_ezbc(uint8_t *compressed_data, int coeff_count,
|
||||
int16_t *output_y, int16_t *output_co, int16_t *output_cg,
|
||||
@@ -1457,15 +1505,61 @@ error_cleanup:
|
||||
// Postprocess GOP EZBC format to per-frame coefficients (entropyCoder=1)
|
||||
// Layout: [frame0_size(4)][frame0_ezbc_data][frame1_size(4)][frame1_ezbc_data]...
|
||||
// Note: EZBC is a complex embedded bitplane codec - this is a simplified placeholder
|
||||
// Returns the actual dimensions through output parameters (for crop encoding support)
|
||||
static int16_t ***postprocess_gop_ezbc(const uint8_t *decompressed_data, size_t data_size,
|
||||
int gop_size, int num_pixels, int channel_layout) {
|
||||
// Allocate output arrays: [gop_size][3 channels][num_pixels]
|
||||
int gop_size, int num_pixels, int channel_layout,
|
||||
int *out_width, int *out_height) {
|
||||
// First, peek at the first frame's dimensions to determine actual GOP size
|
||||
// (with crop encoding, GOP dimensions may be smaller than full frame)
|
||||
int actual_width = 0, actual_height = 0;
|
||||
int actual_pixels = num_pixels; // Default to full frame if peek fails
|
||||
|
||||
if (data_size >= 8) { // Need at least frame size header + some EZBC data
|
||||
// Skip first frame's size header to get to EZBC data
|
||||
const uint32_t first_frame_size = ((uint32_t)decompressed_data[0]) |
|
||||
((uint32_t)decompressed_data[1] << 8) |
|
||||
((uint32_t)decompressed_data[2] << 16) |
|
||||
((uint32_t)decompressed_data[3] << 24);
|
||||
|
||||
if (4 + first_frame_size <= data_size) {
|
||||
if (ezbc_peek_dimensions(decompressed_data + 4, channel_layout,
|
||||
&actual_width, &actual_height) == 0) {
|
||||
actual_pixels = actual_width * actual_height;
|
||||
// Only log if dimensions differ significantly (crop encoding active)
|
||||
// Suppress repetitive messages by using static counter
|
||||
static int crop_log_count = 0;
|
||||
if (actual_pixels != num_pixels && crop_log_count < 3) {
|
||||
fprintf(stderr, "[GOP-EZBC] Detected crop encoding: GOP dimensions %dx%d (%d pixels) vs full frame %d pixels\n",
|
||||
actual_width, actual_height, actual_pixels, num_pixels);
|
||||
crop_log_count++;
|
||||
if (crop_log_count == 3) {
|
||||
fprintf(stderr, "[GOP-EZBC] (Further crop encoding messages suppressed)\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we didn't successfully peek dimensions, calculate from num_pixels
|
||||
if (actual_width == 0 || actual_height == 0) {
|
||||
// Assume square-ish dimensions - this is a fallback, should not happen with proper encoding
|
||||
actual_width = (int)sqrt(num_pixels);
|
||||
actual_height = num_pixels / actual_width;
|
||||
actual_pixels = actual_width * actual_height;
|
||||
}
|
||||
|
||||
// Return actual dimensions to caller
|
||||
if (out_width) *out_width = actual_width;
|
||||
if (out_height) *out_height = actual_height;
|
||||
|
||||
// Allocate output arrays: [gop_size][3 channels][actual_pixels]
|
||||
// Use actual GOP dimensions (may be cropped) not full frame size
|
||||
int16_t ***output = malloc(gop_size * sizeof(int16_t **));
|
||||
for (int t = 0; t < gop_size; t++) {
|
||||
output[t] = malloc(3 * sizeof(int16_t *));
|
||||
output[t][0] = calloc(num_pixels, sizeof(int16_t)); // Y
|
||||
output[t][1] = calloc(num_pixels, sizeof(int16_t)); // Co
|
||||
output[t][2] = calloc(num_pixels, sizeof(int16_t)); // Cg
|
||||
output[t][0] = calloc(actual_pixels, sizeof(int16_t)); // Y
|
||||
output[t][1] = calloc(actual_pixels, sizeof(int16_t)); // Co
|
||||
output[t][2] = calloc(actual_pixels, sizeof(int16_t)); // Cg
|
||||
}
|
||||
|
||||
int offset = 0;
|
||||
@@ -1491,8 +1585,9 @@ static int16_t ***postprocess_gop_ezbc(const uint8_t *decompressed_data, size_t
|
||||
}
|
||||
|
||||
// Decode EZBC frame using the single-frame EZBC decoder
|
||||
// Pass actual_pixels (cropped size) not num_pixels (full frame size)
|
||||
postprocess_coefficients_ezbc(
|
||||
(uint8_t *)(decompressed_data + offset), num_pixels,
|
||||
(uint8_t *)(decompressed_data + offset), actual_pixels,
|
||||
output[t][0], output[t][1], output[t][2],
|
||||
channel_layout);
|
||||
|
||||
@@ -1622,6 +1717,12 @@ typedef struct {
|
||||
uint16_t screen_mask_bottom;
|
||||
uint16_t screen_mask_left;
|
||||
|
||||
// Phase 2: Decoding dimensions (may differ from full frame dimensions per GOP)
|
||||
int decoding_width; // Actual encoded dimensions (cropped active region)
|
||||
int decoding_height; // Updated when Screen Mask packet is encountered
|
||||
// Note: Buffers are allocated at max size (header.width × header.height)
|
||||
// but only decoding_width × decoding_height portion is used
|
||||
|
||||
// FFmpeg pipe for video only (audio from file)
|
||||
FILE *video_pipe;
|
||||
pid_t ffmpeg_pid;
|
||||
@@ -1844,6 +1945,10 @@ static tav_decoder_t* tav_decoder_init(const char *input_file, const char *outpu
|
||||
decoder->is_monoblock = (decoder->header.version >= 3 && decoder->header.version <= 6);
|
||||
decoder->audio_file_path = strdup(audio_file);
|
||||
|
||||
// Phase 2: Initialize decoding dimensions to full frame (will be updated by Screen Mask packets)
|
||||
decoder->decoding_width = decoder->header.width;
|
||||
decoder->decoding_height = decoder->header.height;
|
||||
|
||||
// Allocate buffers
|
||||
decoder->current_frame_rgb = calloc(decoder->frame_size * 3, 1);
|
||||
decoder->reference_frame_rgb = calloc(decoder->frame_size * 3, 1);
|
||||
@@ -1984,6 +2089,37 @@ static void tav_decoder_free(tav_decoder_t *decoder) {
|
||||
//=============================================================================
|
||||
|
||||
// Fill masked regions (letterbox/pillarbox bars) with black
|
||||
// Phase 2: Composite cropped frame back to full frame with black borders
|
||||
static uint8_t* composite_to_full_frame(const uint8_t *cropped_rgb,
|
||||
int cropped_width, int cropped_height,
|
||||
int full_width, int full_height,
|
||||
uint16_t top, uint16_t right,
|
||||
uint16_t bottom, uint16_t left) {
|
||||
// Allocate full frame buffer (filled with black)
|
||||
uint8_t *full_frame = calloc(full_width * full_height * 3, sizeof(uint8_t));
|
||||
if (!full_frame) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Calculate active region position in full frame
|
||||
const int dest_x = left;
|
||||
const int dest_y = top;
|
||||
|
||||
// Copy cropped frame into active region
|
||||
for (int y = 0; y < cropped_height; y++) {
|
||||
for (int x = 0; x < cropped_width; x++) {
|
||||
const int src_offset = (y * cropped_width + x) * 3;
|
||||
const int dest_offset = ((dest_y + y) * full_width + (dest_x + x)) * 3;
|
||||
|
||||
full_frame[dest_offset + 0] = cropped_rgb[src_offset + 0]; // R
|
||||
full_frame[dest_offset + 1] = cropped_rgb[src_offset + 1]; // G
|
||||
full_frame[dest_offset + 2] = cropped_rgb[src_offset + 2]; // B
|
||||
}
|
||||
}
|
||||
|
||||
return full_frame;
|
||||
}
|
||||
|
||||
static void fill_masked_regions(uint8_t *frame_rgb, int width, int height,
|
||||
uint16_t top, uint16_t right, uint16_t bottom, uint16_t left) {
|
||||
// Fill top letterbox bar
|
||||
@@ -2145,7 +2281,9 @@ static int decode_i_or_p_frame(tav_decoder_t *decoder, uint8_t packet_type, uint
|
||||
memcpy(decoder->current_frame_rgb, decoder->reference_frame_rgb, decoder->frame_size * 3);
|
||||
} else {
|
||||
// Decode coefficients (use function-level variables for proper cleanup)
|
||||
int coeff_count = decoder->frame_size;
|
||||
// Phase 2: Use decoding dimensions (actual encoded size)
|
||||
const int decoding_pixels = decoder->decoding_width * decoder->decoding_height;
|
||||
int coeff_count = decoding_pixels;
|
||||
quantised_y = calloc(coeff_count, sizeof(int16_t));
|
||||
quantised_co = calloc(coeff_count, sizeof(int16_t));
|
||||
quantised_cg = calloc(coeff_count, sizeof(int16_t));
|
||||
@@ -2183,45 +2321,52 @@ static int decode_i_or_p_frame(tav_decoder_t *decoder, uint8_t packet_type, uint
|
||||
// fprintf(stderr, " Max quantised Y coefficient: %d\n", max_quant_y);
|
||||
// }
|
||||
|
||||
// Phase 2: Allocate temporary DWT buffers for cropped region processing
|
||||
float *temp_dwt_y = calloc(decoding_pixels, sizeof(float));
|
||||
float *temp_dwt_co = calloc(decoding_pixels, sizeof(float));
|
||||
float *temp_dwt_cg = calloc(decoding_pixels, sizeof(float));
|
||||
|
||||
if (!temp_dwt_y || !temp_dwt_co || !temp_dwt_cg) {
|
||||
fprintf(stderr, "Error: Failed to allocate temporary DWT buffers\n");
|
||||
free(temp_dwt_y);
|
||||
free(temp_dwt_co);
|
||||
free(temp_dwt_cg);
|
||||
decode_success = 0;
|
||||
goto write_frame;
|
||||
}
|
||||
|
||||
// Dequantise (perceptual for versions 5-8, uniform for 1-4)
|
||||
// Phase 2: Use decoding dimensions and temporary buffers
|
||||
const int is_perceptual = (decoder->header.version >= 5 && decoder->header.version <= 8);
|
||||
const int is_ezbc = (decoder->header.entropy_coder == 1);
|
||||
|
||||
if (is_ezbc && is_perceptual) {
|
||||
// EZBC mode with perceptual quantisation: coefficients are normalised
|
||||
// Need to dequantise using perceptual weights (same as twobit-map mode)
|
||||
dequantise_dwt_subbands_perceptual(0, qy, quantised_y, decoder->dwt_buffer_y,
|
||||
decoder->header.width, decoder->header.height,
|
||||
dequantise_dwt_subbands_perceptual(0, qy, quantised_y, temp_dwt_y,
|
||||
decoder->decoding_width, decoder->decoding_height,
|
||||
decoder->header.decomp_levels, qy, 0, decoder->frame_count);
|
||||
dequantise_dwt_subbands_perceptual(0, qy, quantised_co, decoder->dwt_buffer_co,
|
||||
decoder->header.width, decoder->header.height,
|
||||
dequantise_dwt_subbands_perceptual(0, qy, quantised_co, temp_dwt_co,
|
||||
decoder->decoding_width, decoder->decoding_height,
|
||||
decoder->header.decomp_levels, qco, 1, decoder->frame_count);
|
||||
dequantise_dwt_subbands_perceptual(0, qy, quantised_cg, decoder->dwt_buffer_cg,
|
||||
decoder->header.width, decoder->header.height,
|
||||
dequantise_dwt_subbands_perceptual(0, qy, quantised_cg, temp_dwt_cg,
|
||||
decoder->decoding_width, decoder->decoding_height,
|
||||
decoder->header.decomp_levels, qcg, 1, decoder->frame_count);
|
||||
} else if (is_perceptual) {
|
||||
dequantise_dwt_subbands_perceptual(0, qy, quantised_y, decoder->dwt_buffer_y,
|
||||
decoder->header.width, decoder->header.height,
|
||||
dequantise_dwt_subbands_perceptual(0, qy, quantised_y, temp_dwt_y,
|
||||
decoder->decoding_width, decoder->decoding_height,
|
||||
decoder->header.decomp_levels, qy, 0, decoder->frame_count);
|
||||
|
||||
// Debug: Check if values survived the function call
|
||||
// if (decoder->frame_count == 32) {
|
||||
// fprintf(stderr, " RIGHT AFTER dequantise_Y returns: first 5 values: %.1f %.1f %.1f %.1f %.1f\n",
|
||||
// decoder->dwt_buffer_y[0], decoder->dwt_buffer_y[1], decoder->dwt_buffer_y[2],
|
||||
// decoder->dwt_buffer_y[3], decoder->dwt_buffer_y[4]);
|
||||
// }
|
||||
|
||||
dequantise_dwt_subbands_perceptual(0, qy, quantised_co, decoder->dwt_buffer_co,
|
||||
decoder->header.width, decoder->header.height,
|
||||
dequantise_dwt_subbands_perceptual(0, qy, quantised_co, temp_dwt_co,
|
||||
decoder->decoding_width, decoder->decoding_height,
|
||||
decoder->header.decomp_levels, qco, 1, decoder->frame_count);
|
||||
dequantise_dwt_subbands_perceptual(0, qy, quantised_cg, decoder->dwt_buffer_cg,
|
||||
decoder->header.width, decoder->header.height,
|
||||
dequantise_dwt_subbands_perceptual(0, qy, quantised_cg, temp_dwt_cg,
|
||||
decoder->decoding_width, decoder->decoding_height,
|
||||
decoder->header.decomp_levels, qcg, 1, decoder->frame_count);
|
||||
} else {
|
||||
for (int i = 0; i < coeff_count; i++) {
|
||||
decoder->dwt_buffer_y[i] = quantised_y[i] * qy;
|
||||
decoder->dwt_buffer_co[i] = quantised_co[i] * qco;
|
||||
decoder->dwt_buffer_cg[i] = quantised_cg[i] * qcg;
|
||||
temp_dwt_y[i] = quantised_y[i] * qy;
|
||||
temp_dwt_co[i] = quantised_co[i] * qco;
|
||||
temp_dwt_cg[i] = quantised_cg[i] * qcg;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2253,7 +2398,8 @@ static int decode_i_or_p_frame(tav_decoder_t *decoder, uint8_t packet_type, uint
|
||||
// }
|
||||
|
||||
// Remove grain synthesis from Y channel (must happen after dequantisation, before inverse DWT)
|
||||
remove_grain_synthesis_decoder(decoder->dwt_buffer_y, decoder->header.width, decoder->header.height,
|
||||
// Phase 2: Use decoding dimensions and temporary buffer
|
||||
remove_grain_synthesis_decoder(temp_dwt_y, decoder->decoding_width, decoder->decoding_height,
|
||||
decoder->header.decomp_levels, decoder->frame_count, decoder->header.quantiser_y);
|
||||
|
||||
// Debug: Check LL band AFTER grain removal
|
||||
@@ -2272,12 +2418,13 @@ static int decode_i_or_p_frame(tav_decoder_t *decoder, uint8_t packet_type, uint
|
||||
// }
|
||||
|
||||
// Apply inverse DWT with correct non-power-of-2 dimension handling
|
||||
// Phase 2: Use decoding dimensions and temporary buffers
|
||||
// Note: quantised arrays freed at write_frame label
|
||||
apply_inverse_dwt_multilevel(decoder->dwt_buffer_y, decoder->header.width, decoder->header.height,
|
||||
apply_inverse_dwt_multilevel(temp_dwt_y, decoder->decoding_width, decoder->decoding_height,
|
||||
decoder->header.decomp_levels, decoder->header.wavelet_filter);
|
||||
apply_inverse_dwt_multilevel(decoder->dwt_buffer_co, decoder->header.width, decoder->header.height,
|
||||
apply_inverse_dwt_multilevel(temp_dwt_co, decoder->decoding_width, decoder->decoding_height,
|
||||
decoder->header.decomp_levels, decoder->header.wavelet_filter);
|
||||
apply_inverse_dwt_multilevel(decoder->dwt_buffer_cg, decoder->header.width, decoder->header.height,
|
||||
apply_inverse_dwt_multilevel(temp_dwt_cg, decoder->decoding_width, decoder->decoding_height,
|
||||
decoder->header.decomp_levels, decoder->header.wavelet_filter);
|
||||
|
||||
// Debug: Check spatial domain values after IDWT
|
||||
@@ -2301,47 +2448,67 @@ static int decode_i_or_p_frame(tav_decoder_t *decoder, uint8_t packet_type, uint
|
||||
// }
|
||||
|
||||
// Handle P-frame delta accumulation (in YCoCg float space)
|
||||
// TODO Phase 2: P-frame support with crop encoding needs additional work
|
||||
// - Reference frames are stored at full size but delta may be at cropped size
|
||||
// - Need to extract/composite reference region appropriately
|
||||
if (packet_type == TAV_PACKET_PFRAME && mode == TAV_MODE_DELTA) {
|
||||
for (int i = 0; i < decoder->frame_size; i++) {
|
||||
decoder->dwt_buffer_y[i] += decoder->reference_ycocg_y[i];
|
||||
decoder->dwt_buffer_co[i] += decoder->reference_ycocg_co[i];
|
||||
decoder->dwt_buffer_cg[i] += decoder->reference_ycocg_cg[i];
|
||||
fprintf(stderr, "Warning: P-frame delta mode not yet fully supported with crop encoding\n");
|
||||
for (int i = 0; i < decoding_pixels; i++) {
|
||||
temp_dwt_y[i] += decoder->reference_ycocg_y[i];
|
||||
temp_dwt_co[i] += decoder->reference_ycocg_co[i];
|
||||
temp_dwt_cg[i] += decoder->reference_ycocg_cg[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Convert YCoCg-R/ICtCp to RGB
|
||||
const int is_ictcp = (decoder->header.version % 2 == 0);
|
||||
float max_y = -999, max_co = -999, max_cg = -999;
|
||||
int max_r = 0, max_g = 0, max_b = 0;
|
||||
// Phase 2: Convert cropped region to RGB, then composite to full frame
|
||||
uint8_t *cropped_rgb = malloc(decoding_pixels * 3);
|
||||
if (!cropped_rgb) {
|
||||
fprintf(stderr, "Error: Failed to allocate cropped RGB buffer\n");
|
||||
free(temp_dwt_y);
|
||||
free(temp_dwt_co);
|
||||
free(temp_dwt_cg);
|
||||
decode_success = 0;
|
||||
goto write_frame;
|
||||
}
|
||||
|
||||
for (int i = 0; i < decoder->frame_size; i++) {
|
||||
// Convert YCoCg-R/ICtCp to RGB for cropped region
|
||||
const int is_ictcp = (decoder->header.version % 2 == 0);
|
||||
|
||||
for (int i = 0; i < decoding_pixels; i++) {
|
||||
uint8_t r, g, b;
|
||||
if (is_ictcp) {
|
||||
ictcp_to_rgb(decoder->dwt_buffer_y[i],
|
||||
decoder->dwt_buffer_co[i],
|
||||
decoder->dwt_buffer_cg[i], &r, &g, &b);
|
||||
ictcp_to_rgb(temp_dwt_y[i], temp_dwt_co[i], temp_dwt_cg[i], &r, &g, &b);
|
||||
} else {
|
||||
ycocg_r_to_rgb(decoder->dwt_buffer_y[i],
|
||||
decoder->dwt_buffer_co[i],
|
||||
decoder->dwt_buffer_cg[i], &r, &g, &b);
|
||||
ycocg_r_to_rgb(temp_dwt_y[i], temp_dwt_co[i], temp_dwt_cg[i], &r, &g, &b);
|
||||
}
|
||||
|
||||
// Track max values for debugging
|
||||
// if (decoder->frame_count == 1000) {
|
||||
// if (decoder->dwt_buffer_y[i] > max_y) max_y = decoder->dwt_buffer_y[i];
|
||||
// if (decoder->dwt_buffer_co[i] > max_co) max_co = decoder->dwt_buffer_co[i];
|
||||
// if (decoder->dwt_buffer_cg[i] > max_cg) max_cg = decoder->dwt_buffer_cg[i];
|
||||
// if (r > max_r) max_r = r;
|
||||
// if (g > max_g) max_g = g;
|
||||
// if (b > max_b) max_b = b;
|
||||
// }
|
||||
|
||||
// RGB byte order for FFmpeg rgb24
|
||||
decoder->current_frame_rgb[i * 3 + 0] = r;
|
||||
decoder->current_frame_rgb[i * 3 + 1] = g;
|
||||
decoder->current_frame_rgb[i * 3 + 2] = b;
|
||||
cropped_rgb[i * 3 + 0] = r;
|
||||
cropped_rgb[i * 3 + 1] = g;
|
||||
cropped_rgb[i * 3 + 2] = b;
|
||||
}
|
||||
|
||||
// Composite cropped frame to full frame with black borders
|
||||
uint8_t *full_frame_rgb = composite_to_full_frame(cropped_rgb,
|
||||
decoder->decoding_width, decoder->decoding_height,
|
||||
decoder->header.width, decoder->header.height,
|
||||
decoder->screen_mask_top, decoder->screen_mask_right,
|
||||
decoder->screen_mask_bottom, decoder->screen_mask_left);
|
||||
free(cropped_rgb);
|
||||
free(temp_dwt_y);
|
||||
free(temp_dwt_co);
|
||||
free(temp_dwt_cg);
|
||||
|
||||
if (!full_frame_rgb) {
|
||||
fprintf(stderr, "Error: Failed to composite frame to full size\n");
|
||||
decode_success = 0;
|
||||
goto write_frame;
|
||||
}
|
||||
|
||||
// Copy composited frame to decoder buffer
|
||||
memcpy(decoder->current_frame_rgb, full_frame_rgb, decoder->frame_size * 3);
|
||||
free(full_frame_rgb);
|
||||
|
||||
// if (decoder->frame_count == 1000) {
|
||||
// fprintf(stderr, "\n=== Frame 1000 Value Analysis ===\n");
|
||||
// fprintf(stderr, "Max YCoCg values: Y=%.1f, Co=%.1f, Cg=%.1f\n", max_y, max_co, max_cg);
|
||||
@@ -2360,10 +2527,12 @@ static int decode_i_or_p_frame(tav_decoder_t *decoder, uint8_t packet_type, uint
|
||||
// fprintf(stderr, "\n");
|
||||
// }
|
||||
|
||||
// Update reference YCoCg frame
|
||||
memcpy(decoder->reference_ycocg_y, decoder->dwt_buffer_y, decoder->frame_size * sizeof(float));
|
||||
memcpy(decoder->reference_ycocg_co, decoder->dwt_buffer_co, decoder->frame_size * sizeof(float));
|
||||
memcpy(decoder->reference_ycocg_cg, decoder->dwt_buffer_cg, decoder->frame_size * sizeof(float));
|
||||
// TODO Phase 2: Reference YCoCg frame update needs rework for crop encoding
|
||||
// Currently not updated because we use temporary buffers that are already freed
|
||||
// P-frame support will need to store reference at appropriate dimensions
|
||||
// memcpy(decoder->reference_ycocg_y, temp_dwt_y, decoding_pixels * sizeof(float));
|
||||
// memcpy(decoder->reference_ycocg_co, temp_dwt_co, decoding_pixels * sizeof(float));
|
||||
// memcpy(decoder->reference_ycocg_cg, temp_dwt_cg, decoding_pixels * sizeof(float));
|
||||
}
|
||||
|
||||
// Update reference frame
|
||||
@@ -2622,9 +2791,20 @@ int main(int argc, char *argv[]) {
|
||||
entry->bottom = bottom;
|
||||
entry->left = left;
|
||||
|
||||
// Phase 2: Update current active mask and decoding dimensions
|
||||
decoder->screen_mask_top = top;
|
||||
decoder->screen_mask_right = right;
|
||||
decoder->screen_mask_bottom = bottom;
|
||||
decoder->screen_mask_left = left;
|
||||
|
||||
// Calculate new decoding dimensions (active region size)
|
||||
decoder->decoding_width = decoder->header.width - left - right;
|
||||
decoder->decoding_height = decoder->header.height - top - bottom;
|
||||
|
||||
if (verbose) {
|
||||
fprintf(stderr, "Packet %d: SCREEN_MASK (0x%02X) - frame=%u top=%u right=%u bottom=%u left=%u\n",
|
||||
total_packets, packet_type, frame_num, top, right, bottom, left);
|
||||
fprintf(stderr, "Packet %d: SCREEN_MASK (0x%02X) - frame=%u top=%u right=%u bottom=%u left=%u (decoding: %dx%d)\n",
|
||||
total_packets, packet_type, frame_num, top, right, bottom, left,
|
||||
decoder->decoding_width, decoder->decoding_height);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
@@ -2689,27 +2869,47 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
|
||||
// Postprocess coefficients based on entropy_coder value
|
||||
// Phase 2: Use decoding dimensions (actual encoded size) for postprocessing
|
||||
int decoding_pixels = decoder->decoding_width * decoder->decoding_height;
|
||||
// Keep full frame size for buffer allocation
|
||||
const int num_pixels = decoder->header.width * decoder->header.height;
|
||||
int16_t ***quantised_gop;
|
||||
|
||||
// GOP dimensions (may differ from full frame with crop encoding)
|
||||
int gop_width = decoder->decoding_width;
|
||||
int gop_height = decoder->decoding_height;
|
||||
|
||||
if (decoder->header.entropy_coder == 2) {
|
||||
// RAW format: simple concatenated int16 arrays
|
||||
if (verbose) {
|
||||
fprintf(stderr, " Using RAW postprocessing (entropy_coder=2)\n");
|
||||
fprintf(stderr, " Using RAW postprocessing (entropy_coder=2) for %dx%d (%d pixels)\n",
|
||||
decoder->decoding_width, decoder->decoding_height, decoding_pixels);
|
||||
}
|
||||
quantised_gop = postprocess_gop_raw(decompressed_data, decompressed_size,
|
||||
gop_size, num_pixels, decoder->header.channel_layout);
|
||||
} else if (decoder->header.entropy_coder == 1) {
|
||||
// EZBC format: embedded zero-block coding
|
||||
if (verbose) {
|
||||
fprintf(stderr, " Using EZBC postprocessing (entropy_coder=1)\n");
|
||||
fprintf(stderr, " Using EZBC postprocessing (entropy_coder=1) for %dx%d (%d pixels)\n",
|
||||
decoder->decoding_width, decoder->decoding_height, decoding_pixels);
|
||||
}
|
||||
// EZBC will return actual GOP dimensions (may be cropped with crop encoding)
|
||||
quantised_gop = postprocess_gop_ezbc(decompressed_data, decompressed_size,
|
||||
gop_size, num_pixels, decoder->header.channel_layout);
|
||||
gop_size, num_pixels, decoder->header.channel_layout,
|
||||
&gop_width, &gop_height);
|
||||
// Update decoding_pixels to match actual GOP dimensions
|
||||
if (gop_width > 0 && gop_height > 0) {
|
||||
decoding_pixels = gop_width * gop_height;
|
||||
if (verbose) {
|
||||
fprintf(stderr, " Actual GOP dimensions from EZBC: %dx%d (%d pixels)\n",
|
||||
gop_width, gop_height, decoding_pixels);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Default: Twobitmap format (entropy_coder=0)
|
||||
if (verbose) {
|
||||
fprintf(stderr, " Using Twobitmap postprocessing (entropy_coder=0)\n");
|
||||
fprintf(stderr, " Using Twobitmap postprocessing (entropy_coder=0) for %dx%d (%d pixels)\n",
|
||||
decoder->decoding_width, decoder->decoding_height, decoding_pixels);
|
||||
}
|
||||
quantised_gop = postprocess_gop_unified(decompressed_data, decompressed_size,
|
||||
gop_size, num_pixels, decoder->header.channel_layout);
|
||||
@@ -2724,14 +2924,15 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
|
||||
// Allocate GOP float buffers
|
||||
// Phase 2: Allocate at decoding size (cropped region), will composite to full frame later
|
||||
float **gop_y = malloc(gop_size * sizeof(float *));
|
||||
float **gop_co = malloc(gop_size * sizeof(float *));
|
||||
float **gop_cg = malloc(gop_size * sizeof(float *));
|
||||
|
||||
for (int t = 0; t < gop_size; t++) {
|
||||
gop_y[t] = calloc(num_pixels, sizeof(float));
|
||||
gop_co[t] = calloc(num_pixels, sizeof(float));
|
||||
gop_cg[t] = calloc(num_pixels, sizeof(float));
|
||||
gop_y[t] = calloc(decoding_pixels, sizeof(float));
|
||||
gop_co[t] = calloc(decoding_pixels, sizeof(float));
|
||||
gop_cg[t] = calloc(decoding_pixels, sizeof(float));
|
||||
}
|
||||
|
||||
// Dequantise with temporal scaling (perceptual quantisation for versions 5-8)
|
||||
@@ -2751,17 +2952,18 @@ int main(int argc, char *argv[]) {
|
||||
const float base_q_co = roundf(QLUT[decoder->header.quantiser_co] * temporal_scale);
|
||||
const float base_q_cg = roundf(QLUT[decoder->header.quantiser_cg] * temporal_scale);
|
||||
|
||||
// Phase 2: Use GOP dimensions (may be cropped) for dequantisation
|
||||
dequantise_dwt_subbands_perceptual(0, QLUT[decoder->header.quantiser_y],
|
||||
quantised_gop[t][0], gop_y[t],
|
||||
decoder->header.width, decoder->header.height,
|
||||
gop_width, gop_height,
|
||||
decoder->header.decomp_levels, base_q_y, 0, decoder->frame_count + t);
|
||||
dequantise_dwt_subbands_perceptual(0, QLUT[decoder->header.quantiser_y],
|
||||
quantised_gop[t][1], gop_co[t],
|
||||
decoder->header.width, decoder->header.height,
|
||||
gop_width, gop_height,
|
||||
decoder->header.decomp_levels, base_q_co, 1, decoder->frame_count + t);
|
||||
dequantise_dwt_subbands_perceptual(0, QLUT[decoder->header.quantiser_y],
|
||||
quantised_gop[t][2], gop_cg[t],
|
||||
decoder->header.width, decoder->header.height,
|
||||
gop_width, gop_height,
|
||||
decoder->header.decomp_levels, base_q_cg, 1, decoder->frame_count + t);
|
||||
|
||||
if (t == 0 && verbose) {
|
||||
@@ -2786,21 +2988,23 @@ int main(int argc, char *argv[]) {
|
||||
const float base_q_cg = roundf(QLUT[decoder->header.quantiser_cg] * temporal_scale);
|
||||
|
||||
if (is_perceptual) {
|
||||
// Phase 2: Use GOP dimensions (may be cropped) for dequantisation
|
||||
dequantise_dwt_subbands_perceptual(0, QLUT[decoder->header.quantiser_y],
|
||||
quantised_gop[t][0], gop_y[t],
|
||||
decoder->header.width, decoder->header.height,
|
||||
gop_width, gop_height,
|
||||
decoder->header.decomp_levels, base_q_y, 0, decoder->frame_count + t);
|
||||
dequantise_dwt_subbands_perceptual(0, QLUT[decoder->header.quantiser_y],
|
||||
quantised_gop[t][1], gop_co[t],
|
||||
decoder->header.width, decoder->header.height,
|
||||
gop_width, gop_height,
|
||||
decoder->header.decomp_levels, base_q_co, 1, decoder->frame_count + t);
|
||||
dequantise_dwt_subbands_perceptual(0, QLUT[decoder->header.quantiser_y],
|
||||
quantised_gop[t][2], gop_cg[t],
|
||||
decoder->header.width, decoder->header.height,
|
||||
gop_width, gop_height,
|
||||
decoder->header.decomp_levels, base_q_cg, 1, decoder->frame_count + t);
|
||||
} else {
|
||||
// Uniform quantisation for older versions
|
||||
for (int i = 0; i < num_pixels; i++) {
|
||||
// Phase 2: Use decoding_pixels for uniform dequantisation
|
||||
for (int i = 0; i < decoding_pixels; i++) {
|
||||
gop_y[t][i] = quantised_gop[t][0][i] * base_q_y;
|
||||
gop_co[t][i] = quantised_gop[t][1][i] * base_q_co;
|
||||
gop_cg[t][i] = quantised_gop[t][2][i] * base_q_cg;
|
||||
@@ -2819,14 +3023,16 @@ int main(int argc, char *argv[]) {
|
||||
free(quantised_gop);
|
||||
|
||||
|
||||
// Phase 2: Use GOP dimensions (may be cropped) for grain removal
|
||||
for (int t = 0; t < gop_size; t++) {
|
||||
remove_grain_synthesis_decoder(gop_y[t], decoder->header.width, decoder->header.height,
|
||||
remove_grain_synthesis_decoder(gop_y[t], gop_width, gop_height,
|
||||
decoder->header.decomp_levels, decoder->frame_count + t,
|
||||
decoder->header.quantiser_y);
|
||||
}
|
||||
|
||||
// Apply inverse 3D DWT (spatial + temporal)
|
||||
apply_inverse_3d_dwt(gop_y, gop_co, gop_cg, decoder->header.width, decoder->header.height,
|
||||
// Phase 2: Use GOP dimensions (may be cropped) for inverse DWT
|
||||
apply_inverse_3d_dwt(gop_y, gop_co, gop_cg, gop_width, gop_height,
|
||||
gop_size, decoder->header.decomp_levels, temporal_levels,
|
||||
decoder->header.wavelet_filter);
|
||||
|
||||
@@ -2859,35 +3065,78 @@ int main(int argc, char *argv[]) {
|
||||
// (size_t)decoder->frame_size * 3, decoder->header.width * decoder->header.height * 3);
|
||||
// }
|
||||
|
||||
// Calculate consistent screen mask offsets for crop-encoded GOPs
|
||||
// When crop encoding is active, all frames in GOP use same dimensions
|
||||
const int is_crop_encoded = (gop_width != decoder->header.width ||
|
||||
gop_height != decoder->header.height);
|
||||
uint16_t gop_mask_top = 0, gop_mask_bottom = 0, gop_mask_left = 0, gop_mask_right = 0;
|
||||
|
||||
if (is_crop_encoded) {
|
||||
// Center the cropped region in the full frame
|
||||
if (gop_height < decoder->header.height) {
|
||||
gop_mask_top = (decoder->header.height - gop_height) / 2;
|
||||
gop_mask_bottom = decoder->header.height - gop_height - gop_mask_top;
|
||||
}
|
||||
if (gop_width < decoder->header.width) {
|
||||
gop_mask_left = (decoder->header.width - gop_width) / 2;
|
||||
gop_mask_right = decoder->header.width - gop_width - gop_mask_left;
|
||||
}
|
||||
if (verbose && decoder->frame_count == 0) {
|
||||
fprintf(stderr, "[GOP-Crop] Centering %dx%d in %dx%d: top=%u, bottom=%u, left=%u, right=%u\n",
|
||||
gop_width, gop_height, decoder->header.width, decoder->header.height,
|
||||
gop_mask_top, gop_mask_bottom, gop_mask_left, gop_mask_right);
|
||||
}
|
||||
}
|
||||
|
||||
for (int t = 0; t < gop_size; t++) {
|
||||
// Allocate frame buffer
|
||||
uint8_t *frame_rgb = malloc(decoder->frame_size * 3);
|
||||
if (!frame_rgb) {
|
||||
fprintf(stderr, "Error: Failed to allocate GOP frame buffer\n");
|
||||
// Update screen mask only if NOT crop-encoded
|
||||
// Crop-encoded GOPs use consistent offsets calculated above
|
||||
if (!is_crop_encoded) {
|
||||
update_screen_mask(decoder, decoder->frame_count + t);
|
||||
}
|
||||
|
||||
// Phase 2: Convert cropped region to RGB, then composite to full frame
|
||||
uint8_t *cropped_rgb = malloc(decoding_pixels * 3);
|
||||
if (!cropped_rgb) {
|
||||
fprintf(stderr, "Error: Failed to allocate cropped GOP frame buffer\n");
|
||||
result = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
// Convert to RGB
|
||||
for (int i = 0; i < decoder->frame_size; i++) {
|
||||
// Convert cropped region to RGB
|
||||
for (int i = 0; i < decoding_pixels; i++) {
|
||||
uint8_t r, g, b;
|
||||
if (is_ictcp) {
|
||||
ictcp_to_rgb(gop_y[t][i], gop_co[t][i], gop_cg[t][i], &r, &g, &b);
|
||||
} else {
|
||||
ycocg_r_to_rgb(gop_y[t][i], gop_co[t][i], gop_cg[t][i], &r, &g, &b);
|
||||
}
|
||||
frame_rgb[i * 3 + 0] = r;
|
||||
frame_rgb[i * 3 + 1] = g;
|
||||
frame_rgb[i * 3 + 2] = b;
|
||||
cropped_rgb[i * 3 + 0] = r;
|
||||
cropped_rgb[i * 3 + 1] = g;
|
||||
cropped_rgb[i * 3 + 2] = b;
|
||||
}
|
||||
|
||||
// Update active screen mask for this GOP frame
|
||||
update_screen_mask(decoder, decoder->frame_count + t);
|
||||
// Composite cropped frame to full frame with black borders
|
||||
// Use GOP-consistent offsets for crop-encoded, or per-frame offsets otherwise
|
||||
const uint16_t mask_top = is_crop_encoded ? gop_mask_top : decoder->screen_mask_top;
|
||||
const uint16_t mask_bottom = is_crop_encoded ? gop_mask_bottom : decoder->screen_mask_bottom;
|
||||
const uint16_t mask_left = is_crop_encoded ? gop_mask_left : decoder->screen_mask_left;
|
||||
const uint16_t mask_right = is_crop_encoded ? gop_mask_right : decoder->screen_mask_right;
|
||||
|
||||
// Fill masked regions with black (letterbox/pillarbox bars)
|
||||
fill_masked_regions(frame_rgb, decoder->header.width, decoder->header.height,
|
||||
decoder->screen_mask_top, decoder->screen_mask_right,
|
||||
decoder->screen_mask_bottom, decoder->screen_mask_left);
|
||||
uint8_t *frame_rgb = composite_to_full_frame(cropped_rgb,
|
||||
gop_width, gop_height,
|
||||
decoder->header.width, decoder->header.height,
|
||||
mask_top, mask_right, mask_bottom, mask_left);
|
||||
free(cropped_rgb);
|
||||
|
||||
if (!frame_rgb) {
|
||||
fprintf(stderr, "Error: Failed to composite GOP frame to full size\n");
|
||||
result = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
// Note: Phase 1 fill_masked_regions() is now replaced by Phase 2 composite function
|
||||
// which places the decoded cropped frame into a full-frame buffer with black borders
|
||||
|
||||
// Write frame to FFmpeg video pipe
|
||||
const size_t bytes_to_write = decoder->frame_size * 3;
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user