Files
tsvm/video_encoder/decoder_tav.c
2025-11-04 18:22:41 +09:00

3288 lines
134 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// Created by CuriousTorvald and Claude on 2025-11-03.
// TAV Decoder - Converts TAV video to FFV1 format with TAD audio to PCMu8
// Based on TSVM decoder implementation (GraphicsJSR223Delegate.kt + playtav.js)
// Only supports features available in TSVM decoder (no MC-EZBC, no MPEG-style motion compensation)
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <math.h>
#include <zstd.h>
#include <unistd.h>
#include <sys/wait.h>
#include <getopt.h>
#include <signal.h>
#define DECODER_VENDOR_STRING "Decoder-TAV 20251103 (ffv1+pcmu8)"
// TAV format constants
#define TAV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x56"
#define TAV_MODE_SKIP 0x00
#define TAV_MODE_INTRA 0x01
#define TAV_MODE_DELTA 0x02
// TAV packet types (only those supported by TSVM decoder)
#define TAV_PACKET_IFRAME 0x10 // Intra frame (keyframe) - SUPPORTED
#define TAV_PACKET_PFRAME 0x11 // Predicted frame - SUPPORTED (delta mode)
#define TAV_PACKET_GOP_UNIFIED 0x12 // Unified 3D DWT GOP - SUPPORTED
#define TAV_PACKET_AUDIO_MP2 0x20 // MP2 audio - SUPPORTED (passthrough)
#define TAV_PACKET_AUDIO_PCM8 0x21 // 8-bit PCM audio - SUPPORTED
#define TAV_PACKET_AUDIO_TAD 0x24 // TAD audio - SUPPORTED (decode to PCMu8)
#define TAV_PACKET_AUDIO_TRACK 0x40 // Bundled audio track - SUPPORTED (passthrough)
#define TAV_PACKET_SUBTITLE 0x30 // Subtitle - SKIPPED
#define TAV_PACKET_EXTENDED_HDR 0xEF // Extended header - SKIPPED
#define TAV_PACKET_GOP_SYNC 0xFC // GOP sync packet - SKIPPED
#define TAV_PACKET_TIMECODE 0xFD // Timecode - SKIPPED
#define TAV_PACKET_SYNC_NTSC 0xFE // NTSC sync - SKIPPED
#define TAV_PACKET_SYNC 0xFF // Sync - SKIPPED
// Unsupported packet types (not in TSVM decoder)
#define TAV_PACKET_PFRAME_RESIDUAL 0x14 // P-frame MPEG-style - NOT SUPPORTED
#define TAV_PACKET_BFRAME_RESIDUAL 0x15 // B-frame MPEG-style - NOT SUPPORTED
// Channel layout definitions
#define CHANNEL_LAYOUT_YCOCG 0 // Y-Co-Cg/I-Ct-Cp
#define CHANNEL_LAYOUT_YCOCG_A 1 // Y-Co-Cg-A/I-Ct-Cp-A
#define CHANNEL_LAYOUT_Y_ONLY 2 // Y/I only
#define CHANNEL_LAYOUT_Y_A 3 // Y-A/I-A
#define CHANNEL_LAYOUT_COCG 4 // Co-Cg/Ct-Cp
#define CHANNEL_LAYOUT_COCG_A 5 // Co-Cg-A/Ct-Cp-A
// Wavelet filter types
#define WAVELET_5_3_REVERSIBLE 0
#define WAVELET_9_7_IRREVERSIBLE 1
#define WAVELET_BIORTHOGONAL_13_7 2
#define WAVELET_DD4 16
#define WAVELET_HAAR 255
// Tile sizes (match TSVM)
#define TILE_SIZE_X 640
#define TILE_SIZE_Y 540
#define DWT_FILTER_HALF_SUPPORT 4
#define TILE_MARGIN_LEVELS 3
#define TILE_MARGIN (DWT_FILTER_HALF_SUPPORT * (1 << TILE_MARGIN_LEVELS))
#define PADDED_TILE_SIZE_X (TILE_SIZE_X + 2 * TILE_MARGIN)
#define PADDED_TILE_SIZE_Y (TILE_SIZE_Y + 2 * TILE_MARGIN)
static inline int CLAMP(int x, int min, int max) {
return x < min ? min : (x > max ? max : x);
}
//=============================================================================
// TAV Header Structure (32 bytes)
//=============================================================================
typedef struct {
uint8_t magic[8];
uint8_t version;
uint16_t width;
uint16_t height;
uint8_t fps;
uint32_t total_frames;
uint8_t wavelet_filter;
uint8_t decomp_levels;
uint8_t quantiser_y;
uint8_t quantiser_co;
uint8_t quantiser_cg;
uint8_t extra_flags;
uint8_t video_flags;
uint8_t encoder_quality;
uint8_t channel_layout;
uint8_t entropy_coder;
uint8_t reserved[2];
uint8_t device_orientation;
uint8_t file_role;
} __attribute__((packed)) tav_header_t;
//=============================================================================
// Quantization Lookup Table (matches TSVM exactly)
//=============================================================================
static const int QLUT[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128,132,136,140,144,148,152,156,160,164,168,172,176,180,184,188,192,196,200,204,208,212,216,220,224,228,232,236,240,244,248,252,256,264,272,280,288,296,304,312,320,328,336,344,352,360,368,376,384,392,400,408,416,424,432,440,448,456,464,472,480,488,496,504,512,528,544,560,576,592,608,624,640,656,672,688,704,720,736,752,768,784,800,816,832,848,864,880,896,912,928,944,960,976,992,1008,1024,1056,1088,1120,1152,1184,1216,1248,1280,1312,1344,1376,1408,1440,1472,1504,1536,1568,1600,1632,1664,1696,1728,1760,1792,1824,1856,1888,1920,1952,1984,2016,2048,2112,2176,2240,2304,2368,2432,2496,2560,2624,2688,2752,2816,2880,2944,3008,3072,3136,3200,3264,3328,3392,3456,3520,3584,3648,3712,3776,3840,3904,3968,4032,4096};
// Perceptual quantization constants (match TSVM)
static const float ANISOTROPY_MULT[] = {2.0f, 1.8f, 1.6f, 1.4f, 1.2f, 1.0f};
static const float ANISOTROPY_BIAS[] = {0.4f, 0.2f, 0.1f, 0.0f, 0.0f, 0.0f};
static const float ANISOTROPY_MULT_CHROMA[] = {6.6f, 5.5f, 4.4f, 3.3f, 2.2f, 1.1f};
static const float ANISOTROPY_BIAS_CHROMA[] = {1.0f, 0.8f, 0.6f, 0.4f, 0.2f, 0.0f};
static const float FOUR_PIXEL_DETAILER = 0.88f;
static const float TWO_PIXEL_DETAILER = 0.92f;
//=============================================================================
// DWT Subband Layout Calculation (matches TSVM)
//=============================================================================
typedef struct {
int level; // Decomposition level (1 to decompLevels)
int subband_type; // 0=LL, 1=LH, 2=HL, 3=HH
int coeff_start; // Starting index in linear coefficient array
int coeff_count; // Number of coefficients in this subband
} dwt_subband_info_t;
static int calculate_subband_layout(int width, int height, int decomp_levels, dwt_subband_info_t *subbands) {
int subband_count = 0;
// LL subband at maximum decomposition level
const int ll_width = width >> decomp_levels;
const int ll_height = height >> decomp_levels;
subbands[subband_count++] = (dwt_subband_info_t){decomp_levels, 0, 0, ll_width * ll_height};
int coeff_offset = ll_width * ll_height;
// LH, HL, HH subbands for each level from max down to 1
for (int level = decomp_levels; level >= 1; level--) {
const int level_width = width >> (decomp_levels - level + 1);
const int level_height = height >> (decomp_levels - level + 1);
const int subband_size = level_width * level_height;
// LH subband
subbands[subband_count++] = (dwt_subband_info_t){level, 1, coeff_offset, subband_size};
coeff_offset += subband_size;
// HL subband
subbands[subband_count++] = (dwt_subband_info_t){level, 2, coeff_offset, subband_size};
coeff_offset += subband_size;
// HH subband
subbands[subband_count++] = (dwt_subband_info_t){level, 3, coeff_offset, subband_size};
coeff_offset += subband_size;
}
return subband_count;
}
//=============================================================================
// Perceptual Quantization Model (matches TSVM exactly)
//=============================================================================
static int tav_derive_encoder_qindex(int q_index, int q_y_global) {
if (q_index > 0) return q_index - 1;
if (q_y_global >= 60) return 0;
else if (q_y_global >= 42) return 1;
else if (q_y_global >= 25) return 2;
else if (q_y_global >= 12) return 3;
else if (q_y_global >= 6) return 4;
else if (q_y_global >= 2) return 5;
else return 5;
}
static float perceptual_model3_LH(float level) {
const float H4 = 1.2f;
const float K = 2.0f; // CRITICAL: Fixed value for fixed curve; quantiser will scale it up anyway
const float K12 = K * 12.0f;
const float x = level;
const float Lx = H4 - ((K + 1.0f) / 15.0f) * (x - 4.0f);
const float C3 = -1.0f / 45.0f * (K12 + 92.0f);
const float G3x = (-x / 180.0f) * (K12 + 5.0f * x * x - 60.0f * x + 252.0f) - C3 + H4;
return (level >= 4.0f) ? Lx : G3x;
}
static float perceptual_model3_HL(int quality, float LH) {
return LH * ANISOTROPY_MULT[quality] + ANISOTROPY_BIAS[quality];
}
static float lerp(float x, float y, float a) {
return x * (1.0f - a) + y * a;
}
static float perceptual_model3_HH(float LH, float HL, float level) {
const float Kx = (sqrtf(level) - 1.0f) * 0.5f + 0.5f;
return lerp(LH, HL, Kx);
}
static float perceptual_model3_LL(float level) {
const float n = perceptual_model3_LH(level);
const float m = perceptual_model3_LH(level - 1.0f) / n;
return n / m;
}
static float perceptual_model3_chroma_basecurve(int quality, float level) {
return 1.0f - (1.0f / (0.5f * quality * quality + 1.0f)) * (level - 4.0f);
}
static float get_perceptual_weight(int q_index, int q_y_global, int level0, int subband_type,
int is_chroma, int max_levels) {
// Convert to perceptual level (1-6 scale)
const float level = 1.0f + ((level0 - 1.0f) / (max_levels - 1.0f)) * 5.0f;
const int quality_level = tav_derive_encoder_qindex(q_index, q_y_global);
if (!is_chroma) {
// LUMA CHANNEL
if (subband_type == 0) {
return perceptual_model3_LL(level);
}
const float LH = perceptual_model3_LH(level);
if (subband_type == 1) {
return LH;
}
const float HL = perceptual_model3_HL(quality_level, LH);
if (subband_type == 2) {
float detailer = 1.0f;
if (level >= 1.8f && level <= 2.2f) detailer = TWO_PIXEL_DETAILER;
else if (level >= 2.8f && level <= 3.2f) detailer = FOUR_PIXEL_DETAILER;
return HL * detailer;
} else {
// HH subband
float detailer = 1.0f;
if (level >= 1.8f && level <= 2.2f) detailer = TWO_PIXEL_DETAILER;
else if (level >= 2.8f && level <= 3.2f) detailer = FOUR_PIXEL_DETAILER;
return perceptual_model3_HH(LH, HL, level) * detailer;
}
} else {
// CHROMA CHANNELS
const float base = perceptual_model3_chroma_basecurve(quality_level, level - 1);
if (subband_type == 0) {
return 1.0f;
} else if (subband_type == 1) {
return fmaxf(base, 1.0f);
} else if (subband_type == 2) {
return fmaxf(base * ANISOTROPY_MULT_CHROMA[quality_level], 1.0f);
} else {
return fmaxf(base * ANISOTROPY_MULT_CHROMA[quality_level] + ANISOTROPY_BIAS_CHROMA[quality_level], 1.0f);
}
}
}
static void dequantize_dwt_subbands_perceptual(int q_index, int q_y_global, const int16_t *quantized,
float *dequantized, int width, int height, int decomp_levels,
float base_quantizer, int is_chroma, int frame_num) {
dwt_subband_info_t subbands[32]; // Max possible subbands
const int subband_count = calculate_subband_layout(width, height, decomp_levels, subbands);
const int coeff_count = width * height;
memset(dequantized, 0, coeff_count * sizeof(float));
int is_debug = 0;//(frame_num == 32);
// if (frame_num == 32) {
// fprintf(stderr, "DEBUG: dequantize called for frame %d, is_chroma=%d\n", frame_num, is_chroma);
// }
// Apply perceptual weighting to each subband
for (int s = 0; s < subband_count; s++) {
const dwt_subband_info_t *subband = &subbands[s];
const float weight = get_perceptual_weight(q_index, q_y_global, subband->level,
subband->subband_type, is_chroma, decomp_levels);
const float effective_quantizer = base_quantizer * weight;
if (is_debug && !is_chroma) {
if (subband->subband_type == 0) { // LL band
fprintf(stderr, " Subband level %d (LL): weight=%.6f, base_q=%.1f, effective_q=%.1f, count=%d\n",
subband->level, weight, base_quantizer, effective_quantizer, subband->coeff_count);
// Print first 5 quantized LL coefficients
fprintf(stderr, " First 5 quantized LL: ");
for (int k = 0; k < 5 && k < subband->coeff_count; k++) {
int idx = subband->coeff_start + k;
fprintf(stderr, "%d ", quantized[idx]);
}
fprintf(stderr, "\n");
// Find max quantized LL coefficient
int max_quant_ll = 0;
for (int k = 0; k < subband->coeff_count; k++) {
int idx = subband->coeff_start + k;
int abs_val = quantized[idx] < 0 ? -quantized[idx] : quantized[idx];
if (abs_val > max_quant_ll) max_quant_ll = abs_val;
}
fprintf(stderr, " Max quantized LL coefficient: %d (dequantizes to %.1f)\n",
max_quant_ll, max_quant_ll * effective_quantizer);
}
}
for (int i = 0; i < subband->coeff_count; i++) {
const int idx = subband->coeff_start + i;
if (idx < coeff_count) {
// CRITICAL: Must ROUND to match EZBC encoder's roundf() behavior
// Without rounding, truncation limits brightness range (e.g., Y maxes at 227 instead of 255)
const float untruncated = quantized[idx] * effective_quantizer;
dequantized[idx] = roundf(untruncated);
}
}
}
// Debug: Verify LL band was dequantized correctly
if (is_debug && !is_chroma) {
// Find LL band again to verify
for (int s = 0; s < subband_count; s++) {
const dwt_subband_info_t *subband = &subbands[s];
if (subband->level == decomp_levels && subband->subband_type == 0) {
fprintf(stderr, " AFTER all subbands processed - First 5 dequantized LL: ");
for (int k = 0; k < 5 && k < subband->coeff_count; k++) {
int idx = subband->coeff_start + k;
fprintf(stderr, "%.1f ", dequantized[idx]);
}
fprintf(stderr, "\n");
// Find max dequantized LL
float max_dequant_ll = -999.0f;
for (int k = 0; k < subband->coeff_count; k++) {
int idx = subband->coeff_start + k;
float abs_val = dequantized[idx] < 0 ? -dequantized[idx] : dequantized[idx];
if (abs_val > max_dequant_ll) max_dequant_ll = abs_val;
}
fprintf(stderr, " AFTER all subbands - Max dequantized LL: %.1f\n", max_dequant_ll);
break;
}
}
}
}
//=============================================================================
// Grain Synthesis Removal (matches TSVM exactly)
//=============================================================================
// Deterministic RNG for grain synthesis (matches encoder)
static inline uint32_t tav_grain_synthesis_rng(uint32_t frame, uint32_t band, uint32_t x, uint32_t y) {
uint32_t key = frame * 0x9e3779b9u ^ band * 0x7f4a7c15u ^ (y << 16) ^ x;
// rng_hash implementation
uint32_t hash = key;
hash = hash ^ (hash >> 16);
hash = hash * 0x7feb352du;
hash = hash ^ (hash >> 15);
hash = hash * 0x846ca68bu;
hash = hash ^ (hash >> 16);
return hash;
}
// Generate triangular noise from uint32 RNG (returns value in range [-1.0, 1.0])
static inline float tav_grain_triangular_noise(uint32_t rng_val) {
// Get two uniform random values in [0, 1]
float u1 = (rng_val & 0xFFFFu) / 65535.0f;
float u2 = ((rng_val >> 16) & 0xFFFFu) / 65535.0f;
// Convert to range [-1, 1] and average for triangular distribution
return (u1 + u2) - 1.0f;
}
// Remove grain synthesis from DWT coefficients (decoder subtracts noise)
// This must be called AFTER dequantization but BEFORE inverse DWT
static void remove_grain_synthesis_decoder(float *coeffs, int width, int height,
int decomp_levels, int frame_num, int q_y_global) {
dwt_subband_info_t subbands[32];
const int subband_count = calculate_subband_layout(width, height, decomp_levels, subbands);
// Noise amplitude (matches Kotlin: qYGlobal.coerceAtMost(32) * 0.8f)
const float noise_amplitude = (q_y_global < 32 ? q_y_global : 32) * 0.25f; // somehow noise amplitude works differently than Kotlin?
// Process each subband (skip LL band which is level 0)
for (int s = 0; s < subband_count; s++) {
const dwt_subband_info_t *subband = &subbands[s];
if (subband->level == 0) continue; // Skip LL band
// Calculate band index for RNG (matches Kotlin: level + subbandType * 31 + 16777619)
uint32_t band = subband->level + subband->subband_type * 31 + 16777619;
// Remove noise from each coefficient in this subband
for (int i = 0; i < subband->coeff_count; i++) {
const int idx = subband->coeff_start + i;
if (idx < width * height) {
// Calculate 2D position from linear index
int y = idx / width;
int x = idx % width;
// Generate same deterministic noise as encoder
uint32_t rng_val = tav_grain_synthesis_rng(frame_num, band, x, y);
float noise = tav_grain_triangular_noise(rng_val);
// Subtract noise from coefficient
coeffs[idx] -= noise * noise_amplitude;
}
}
}
}
//=============================================================================
static int calculate_dwt_levels(int chunk_size) {
/*if (chunk_size < TAD_MIN_CHUNK_SIZE) {
fprintf(stderr, "Error: Chunk size %d is below minimum %d\n", chunk_size, TAD_MIN_CHUNK_SIZE);
return -1;
}
// Calculate levels: log2(chunk_size) - 1
int levels = 0;
int size = chunk_size;
while (size > 1) {
size >>= 1;
levels++;
}
return levels - 2;*/
return 9;
}
//=============================================================================
// Haar DWT Implementation (inverse only needed for decoder)
//=============================================================================
// Forward declaration (defined later in TAV decoder section)
static void dwt_97_inverse_1d(float *data, int length);
static void dwt_inverse_multilevel(float *data, int length, int levels) {
// Pre-calculate all intermediate lengths used during forward transform
// Forward uses: data[0..length-1], then data[0..(length+1)/2-1], etc.
int *lengths = malloc((levels + 1) * sizeof(int));
lengths[0] = length;
for (int i = 1; i <= levels; i++) {
lengths[i] = (lengths[i - 1] + 1) / 2;
}
// Inverse transform: apply inverse DWT using exact forward lengths in reverse order
// Forward applied DWT with lengths: [length, (length+1)/2, ((length+1)/2+1)/2, ...]
// Inverse must use same lengths in reverse: [..., ((length+1)/2+1)/2, (length+1)/2, length]
for (int level = levels - 1; level >= 0; level--) {
int current_length = lengths[level];
// dwt_haar_inverse_1d(data, current_length); // THEN apply inverse
// dwt_dd4_inverse_1d(data, current_length); // THEN apply inverse
dwt_97_inverse_1d(data, current_length); // THEN apply inverse
}
free(lengths);
}
//=============================================================================
// Helper Functions for TAD Decoder
//=============================================================================
static inline float FCLAMP(float x, float min, float max) {
return x < min ? min : (x > max ? max : x);
}
//=============================================================================
// M/S Stereo Correlation (inverse of decorrelation)
//=============================================================================
// Uniform random in [0, 1)
static inline float frand01(void) {
return (float)rand() / ((float)RAND_MAX + 1.0f);
}
// TPDF noise in [-1, +1)
static inline float tpdf1(void) {
return (frand01() - frand01());
}
static void ms_correlate(const float *mid, const float *side, float *left, float *right, size_t count) {
for (size_t i = 0; i < count; i++) {
// Decode M/S → L/R
float m = mid[i];
float s = side[i];
left[i] = FCLAMP((m + s), -1.0f, 1.0f);
right[i] = FCLAMP((m - s), -1.0f, 1.0f);
}
}
static float signum(float x) {
if (x > 0.0f) return 1.0f;
if (x < 0.0f) return -1.0f;
return 0.0f;
}
static void expand_gamma(float *left, float *right, size_t count) {
for (size_t i = 0; i < count; i++) {
// decode(y) = sign(y) * |y|^(1/γ) where γ=0.5
float x = left[i]; float a = fabsf(x);
left[i] = signum(x) * powf(a, 1.4142f);
float y = right[i]; float b = fabsf(y);
right[i] = signum(y) * powf(b, 1.4142f);
}
}
static void expand_mu_law(float *left, float *right, size_t count) {
static float MU = 255.0f;
for (size_t i = 0; i < count; i++) {
// decode(y) = sign(y) * |y|^(1/γ) where γ=0.5
float x = left[i];
left[i] = signum(x) * (powf(1.0f + MU, fabsf(x)) - 1.0f) / MU;
float y = right[i];
right[i] = signum(y) * (powf(1.0f + MU, fabsf(y)) - 1.0f) / MU;
}
}
static void pcm32f_to_pcm8(const float *fleft, const float *fright, uint8_t *left, uint8_t *right, size_t count, float dither_error[2][2]) {
const float b1 = 1.5f; // 1st feedback coefficient
const float b2 = -0.75f; // 2nd feedback coefficient
const float scale = 127.5f;
const float bias = 128.0f;
// Reduced dither amplitude to coordinate with coefficient-domain dithering
// The decoder now adds TPDF dither in coefficient domain, so we reduce
// sample-domain dither by ~60% to avoid doubling the noise floor
const float dither_scale = 0.2f; // Reduced from 0.5 (was ±0.5 LSB, now ±0.2 LSB)
for (size_t i = 0; i < count; i++) {
// --- LEFT channel ---
float feedbackL = b1 * dither_error[0][0] + b2 * dither_error[0][1];
float ditherL = dither_scale * tpdf1(); // Reduced TPDF dither
float shapedL = fleft[i] + feedbackL + ditherL / scale;
shapedL = FCLAMP(shapedL, -1.0f, 1.0f);
int qL = (int)lrintf(shapedL * scale);
if (qL < -128) qL = -128;
else if (qL > 127) qL = 127;
left[i] = (uint8_t)(qL + bias);
float qerrL = shapedL - (float)qL / scale;
dither_error[0][1] = dither_error[0][0]; // shift history
dither_error[0][0] = qerrL;
// --- RIGHT channel ---
float feedbackR = b1 * dither_error[1][0] + b2 * dither_error[1][1];
float ditherR = dither_scale * tpdf1(); // Reduced TPDF dither
float shapedR = fright[i] + feedbackR + ditherR / scale;
shapedR = FCLAMP(shapedR, -1.0f, 1.0f);
int qR = (int)lrintf(shapedR * scale);
if (qR < -128) qR = -128;
else if (qR > 127) qR = 127;
right[i] = (uint8_t)(qR + bias);
float qerrR = shapedR - (float)qR / scale;
dither_error[1][1] = dither_error[1][0];
dither_error[1][0] = qerrR;
}
}
//=============================================================================
// TAD (Terrarum Advanced Audio) Decoder - Constants and Helpers
//=============================================================================
// Coefficient scalars for each subband (CDF 9/7 with 9 decomposition levels)
static const float TAD32_COEFF_SCALARS[] = {64.0f, 45.255f, 32.0f, 22.627f, 16.0f, 11.314f, 8.0f, 5.657f, 4.0f, 2.828f};
// Base quantiser weight table (10 subbands: LL + 9 H bands)
static const float BASE_QUANTISER_WEIGHTS[] = {
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.1f, 1.2f, 1.3f, 1.4f, 1.5f
};
//=============================================================================
// Spectral Interpolation for Coefficient Reconstruction (TAD)
//=============================================================================
// Fast PRNG for light dithering (xorshift32)
static inline uint32_t xorshift32(uint32_t *s) {
uint32_t x = *s;
x ^= x << 13;
x ^= x >> 17;
x ^= x << 5;
return *s = x;
}
static inline float urand(uint32_t *s) {
return (xorshift32(s) & 0xFFFFFF) / 16777216.0f;
}
static inline float tpdf_tad(uint32_t *s) {
return urand(s) - urand(s);
}
// Compute RMS energy of a coefficient band
static float compute_band_rms(const float *c, size_t len) {
if (len == 0) return 0.0f;
double sumsq = 0.0;
for (size_t i = 0; i < len; i++) {
sumsq += (double)c[i] * c[i];
}
return sqrtf((float)(sumsq / (double)len));
}
// Simplified spectral reconstruction for wavelet coefficients
static void spectral_interpolate_band(float *c, size_t len, float Q, float lower_band_rms) {
if (len < 4) return;
uint32_t seed = 0x9E3779B9u ^ (uint32_t)len ^ (uint32_t)(Q * 65536.0f);
const float dither_amp = 0.05f * Q;
for (size_t i = 0; i < len; i++) {
c[i] += tpdf_tad(&seed) * dither_amp;
}
(void)lower_band_rms;
}
//=============================================================================
// Dequantization (inverse of quantization)
//=============================================================================
#define LAMBDA_FIXED 6.0f
// Lambda-based decompanding decoder (inverse of Laplacian CDF-based encoder)
// Converts quantized index back to normalized float in [-1, 1]
static float lambda_decompanding(int8_t quant_val, int max_index) {
// Handle zero
if (quant_val == 0) {
return 0.0f;
}
int sign = (quant_val < 0) ? -1 : 1;
int abs_index = abs(quant_val);
// Clamp to valid range
if (abs_index > max_index) abs_index = max_index;
// Map index back to normalized CDF [0, 1]
float normalized_cdf = (float)abs_index / max_index;
// Map from [0, 1] back to [0.5, 1.0] (CDF range for positive half)
float cdf = 0.5f + normalized_cdf * 0.5f;
// Inverse Laplacian CDF for x >= 0: x = -(1/λ) * ln(2*(1-F))
// For F in [0.5, 1.0]: x = -(1/λ) * ln(2*(1-F))
float abs_val = -(1.0f / LAMBDA_FIXED) * logf(2.0f * (1.0f - cdf));
// Clamp to [0, 1]
if (abs_val > 1.0f) abs_val = 1.0f;
if (abs_val < 0.0f) abs_val = 0.0f;
return sign * abs_val;
}
static void dequantize_dwt_coefficients(const int8_t *quantized, float *coeffs, size_t count, int chunk_size, int dwt_levels, int max_index, float quantiser_scale) {
// Calculate sideband boundaries dynamically
int first_band_size = chunk_size >> dwt_levels;
int *sideband_starts = malloc((dwt_levels + 2) * sizeof(int));
sideband_starts[0] = 0;
sideband_starts[1] = first_band_size;
for (int i = 2; i <= dwt_levels + 1; i++) {
sideband_starts[i] = sideband_starts[i-1] + (first_band_size << (i-2));
}
// Step 1: Dequantize all coefficients (no dithering yet)
for (size_t i = 0; i < count; i++) {
int sideband = dwt_levels;
for (int s = 0; s <= dwt_levels; s++) {
if (i < sideband_starts[s + 1]) {
sideband = s;
break;
}
}
// Decode using lambda companding
float normalized_val = lambda_decompanding(quantized[i], max_index);
// Denormalize using the subband scalar and apply base weight + quantiser scaling
float weight = BASE_QUANTISER_WEIGHTS[sideband] * quantiser_scale;
coeffs[i] = normalized_val * TAD32_COEFF_SCALARS[sideband] * weight;
}
// Step 2: Apply spectral interpolation per band
// Process bands from high to low frequency (dwt_levels down to 0)
// so we can use lower bands' RMS for higher band reconstruction
float prev_band_rms = 0.0f;
for (int band = dwt_levels; band >= 0; band--) {
size_t band_start = sideband_starts[band];
size_t band_end = sideband_starts[band + 1];
size_t band_len = band_end - band_start;
// Calculate quantization step Q for this band
float weight = BASE_QUANTISER_WEIGHTS[band] * quantiser_scale;
float scalar = TAD32_COEFF_SCALARS[band] * weight;
float Q = scalar / max_index;
// Apply spectral interpolation to this band
spectral_interpolate_band(&coeffs[band_start], band_len, Q, prev_band_rms);
// Compute RMS for this band to use as reference for next (lower frequency) band
prev_band_rms = compute_band_rms(&coeffs[band_start], band_len);
}
free(sideband_starts);
}
//=============================================================================
// Chunk Decoding
//=============================================================================
static int decode_chunk(const uint8_t *input, size_t input_size, uint8_t *pcmu8_stereo,
size_t *bytes_consumed, size_t *samples_decoded) {
const uint8_t *read_ptr = input;
// Read chunk header
uint16_t sample_count = *((const uint16_t*)read_ptr);
read_ptr += sizeof(uint16_t);
uint8_t max_index = *read_ptr;
read_ptr += sizeof(uint8_t);
uint32_t payload_size = *((const uint32_t*)read_ptr);
read_ptr += sizeof(uint32_t);
// Calculate DWT levels from sample count
int dwt_levels = calculate_dwt_levels(sample_count);
if (dwt_levels < 0) {
fprintf(stderr, "Error: Invalid sample count %u\n", sample_count);
return -1;
}
// Decompress if needed
const uint8_t *payload;
uint8_t *decompressed = NULL;
// Estimate decompressed size (generous upper bound)
size_t decompressed_size = sample_count * 4 * sizeof(int8_t);
decompressed = malloc(decompressed_size);
size_t actual_size = ZSTD_decompress(decompressed, decompressed_size, read_ptr, payload_size);
if (ZSTD_isError(actual_size)) {
fprintf(stderr, "Error: Zstd decompression failed: %s\n", ZSTD_getErrorName(actual_size));
free(decompressed);
return -1;
}
read_ptr += payload_size;
*bytes_consumed = read_ptr - input;
*samples_decoded = sample_count;
// Allocate working buffers
int8_t *quant_mid = malloc(sample_count * sizeof(int8_t));
int8_t *quant_side = malloc(sample_count * sizeof(int8_t));
float *dwt_mid = malloc(sample_count * sizeof(float));
float *dwt_side = malloc(sample_count * sizeof(float));
float *pcm32_left = malloc(sample_count * sizeof(float));
float *pcm32_right = malloc(sample_count * sizeof(float));
uint8_t *pcm8_left = malloc(sample_count * sizeof(uint8_t));
uint8_t *pcm8_right = malloc(sample_count * sizeof(uint8_t));
// Separate Mid/Side
memcpy(quant_mid, decompressed, sample_count);
memcpy(quant_side, decompressed + sample_count, sample_count);
// Debug: Check if we have non-zero coefficients
// static int debug_coeff_count = 0;
// if (debug_coeff_count < 3) {
// int nonzero_mid = 0, nonzero_side = 0;
// for (int i = 0; i < sample_count; i++) {
// if (quant_mid[i] != 0) nonzero_mid++;
// if (quant_side[i] != 0) nonzero_side++;
// }
// debug_coeff_count++;
// }
// Dequantize with quantiser scaling and spectral interpolation
// Use quantiser_scale = 1.0f for baseline (must match encoder)
float quantiser_scale = 1.0f;
dequantize_dwt_coefficients(quant_mid, dwt_mid, sample_count, sample_count, dwt_levels, max_index, quantiser_scale);
dequantize_dwt_coefficients(quant_side, dwt_side, sample_count, sample_count, dwt_levels, max_index, quantiser_scale);
// Inverse DWT
dwt_inverse_multilevel(dwt_mid, sample_count, dwt_levels);
dwt_inverse_multilevel(dwt_side, sample_count, dwt_levels);
float err[2][2] = {{0,0},{0,0}};
// M/S to L/R correlation
ms_correlate(dwt_mid, dwt_side, pcm32_left, pcm32_right, sample_count);
// expand dynamic range
expand_gamma(pcm32_left, pcm32_right, sample_count);
// dither to 8-bit
pcm32f_to_pcm8(pcm32_left, pcm32_right, pcm8_left, pcm8_right, sample_count, err);
// Interleave stereo output (PCMu8)
for (size_t i = 0; i < sample_count; i++) {
pcmu8_stereo[i * 2] = pcm8_left[i];
pcmu8_stereo[i * 2 + 1] = pcm8_right[i];
}
// Cleanup
free(quant_mid); free(quant_side); free(dwt_mid); free(dwt_side);
free(pcm32_left); free(pcm32_right); free(pcm8_left); free(pcm8_right);
if (decompressed) free(decompressed);
return 0;
}
//=============================================================================
// Significance Map Postprocessing (matches TSVM exactly)
//=============================================================================
// Helper: Extract 2-bit code from bit-packed array
static inline int get_twobit_code(const uint8_t *map_data, int map_bytes, int coeff_idx) {
int bit_pos = coeff_idx * 2;
int byte_idx = bit_pos / 8;
int bit_offset = bit_pos % 8;
uint8_t byte0 = map_data[byte_idx];
int code = (byte0 >> bit_offset) & 0x03;
// Handle byte boundary crossing
if (bit_offset == 7 && byte_idx + 1 < map_bytes) {
uint8_t byte1 = map_data[byte_idx + 1];
code = ((byte0 >> 7) & 0x01) | ((byte1 << 1) & 0x02);
}
return code;
}
// Decoder: reconstruct coefficients from 2-bit map format (entropyCoder=0)
// Layout: [Y_map_2bit][Co_map_2bit][Cg_map_2bit][Y_others][Co_others][Cg_others]
// 2-bit encoding: 00=0, 01=+1, 10=-1, 11=other (stored in value array)
static void postprocess_coefficients_twobit(uint8_t *compressed_data, int coeff_count,
int16_t *output_y, int16_t *output_co, int16_t *output_cg) {
int map_bytes = (coeff_count * 2 + 7) / 8; // 2 bits per coefficient
// (Debug output removed)
// Map offsets (all channels present for Y-Co-Cg layout)
uint8_t *y_map = compressed_data;
uint8_t *co_map = compressed_data + map_bytes;
uint8_t *cg_map = compressed_data + map_bytes * 2;
// Count "other" values (code 11) for each channel
int y_others = 0, co_others = 0, cg_others = 0;
for (int i = 0; i < coeff_count; i++) {
if (get_twobit_code(y_map, map_bytes, i) == 3) y_others++;
if (get_twobit_code(co_map, map_bytes, i) == 3) co_others++;
if (get_twobit_code(cg_map, map_bytes, i) == 3) cg_others++;
}
// (Debug output removed)
// Value array offsets (after all maps)
uint8_t *value_ptr = compressed_data + map_bytes * 3;
int16_t *y_values = (int16_t *)value_ptr;
int16_t *co_values = (int16_t *)(value_ptr + y_others * 2);
int16_t *cg_values = (int16_t *)(value_ptr + y_others * 2 + co_others * 2);
// Reconstruct coefficients
int y_value_idx = 0, co_value_idx = 0, cg_value_idx = 0;
for (int i = 0; i < coeff_count; i++) {
// Y channel
int y_code = get_twobit_code(y_map, map_bytes, i);
switch (y_code) {
case 0: output_y[i] = 0; break;
case 1: output_y[i] = 1; break;
case 2: output_y[i] = -1; break;
case 3: output_y[i] = y_values[y_value_idx++]; break;
}
// Co channel
int co_code = get_twobit_code(co_map, map_bytes, i);
switch (co_code) {
case 0: output_co[i] = 0; break;
case 1: output_co[i] = 1; break;
case 2: output_co[i] = -1; break;
case 3: output_co[i] = co_values[co_value_idx++]; break;
}
// Cg channel
int cg_code = get_twobit_code(cg_map, map_bytes, i);
switch (cg_code) {
case 0: output_cg[i] = 0; break;
case 1: output_cg[i] = 1; break;
case 2: output_cg[i] = -1; break;
case 3: output_cg[i] = cg_values[cg_value_idx++]; break;
}
}
}
//=============================================================================
// EZBC (Embedded Zero Block Coding) Decoder
//=============================================================================
// EZBC Block structure for quadtree
typedef struct {
int x, y;
int width, height;
} ezbc_block_t;
// EZBC bitstream reader state
typedef struct {
const uint8_t *data;
size_t size;
size_t byte_pos;
int bit_pos;
} ezbc_bitreader_t;
// Read N bits from EZBC bitstream (LSB-first within each byte)
static int ezbc_read_bits(ezbc_bitreader_t *reader, int num_bits) {
int result = 0;
for (int i = 0; i < num_bits; i++) {
if (reader->byte_pos >= reader->size) {
return result; // End of stream
}
const int bit = (reader->data[reader->byte_pos] >> reader->bit_pos) & 1;
result |= (bit << i);
reader->bit_pos++;
if (reader->bit_pos == 8) {
reader->bit_pos = 0;
reader->byte_pos++;
}
}
return result;
}
// EZBC block queues (simple dynamic arrays)
typedef struct {
ezbc_block_t *blocks;
int count;
int capacity;
} ezbc_block_queue_t;
static void ezbc_queue_init(ezbc_block_queue_t *q) {
q->capacity = 256;
q->count = 0;
q->blocks = malloc(q->capacity * sizeof(ezbc_block_t));
}
static void ezbc_queue_free(ezbc_block_queue_t *q) {
free(q->blocks);
q->blocks = NULL;
q->count = 0;
}
static void ezbc_queue_add(ezbc_block_queue_t *q, ezbc_block_t block) {
if (q->count >= q->capacity) {
q->capacity *= 2;
q->blocks = realloc(q->blocks, q->capacity * sizeof(ezbc_block_t));
}
q->blocks[q->count++] = block;
}
// Forward declaration
static int ezbc_process_significant_block_recursive(
ezbc_bitreader_t *reader, ezbc_block_t block, int bitplane, int threshold,
int16_t *output, int width, int8_t *significant, int *first_bitplane,
ezbc_block_queue_t *next_significant, ezbc_block_queue_t *next_insignificant);
// EZBC recursive block decoder (matches Kotlin implementation)
static int ezbc_process_significant_block_recursive(
ezbc_bitreader_t *reader, ezbc_block_t block, int bitplane, int threshold,
int16_t *output, int width, int8_t *significant, int *first_bitplane,
ezbc_block_queue_t *next_significant, ezbc_block_queue_t *next_insignificant) {
int sign_bits_read = 0;
// If 1x1 block: read sign bit and add to significant queue
if (block.width == 1 && block.height == 1) {
const int idx = block.y * width + block.x;
const int sign_bit = ezbc_read_bits(reader, 1);
sign_bits_read++;
// Set coefficient to threshold value with sign
output[idx] = sign_bit ? -threshold : threshold;
significant[idx] = 1;
first_bitplane[idx] = bitplane;
ezbc_queue_add(next_significant, block);
return sign_bits_read;
}
// Block is > 1x1: subdivide and recursively process children
int mid_x = block.width / 2;
int mid_y = block.height / 2;
if (mid_x == 0) mid_x = 1;
if (mid_y == 0) mid_y = 1;
// Top-left child
ezbc_block_t tl = {block.x, block.y, mid_x, mid_y};
const int tl_flag = ezbc_read_bits(reader, 1);
if (tl_flag) {
sign_bits_read += ezbc_process_significant_block_recursive(
reader, tl, bitplane, threshold, output, width, significant, first_bitplane,
next_significant, next_insignificant);
} else {
ezbc_queue_add(next_insignificant, tl);
}
// Top-right child (if exists)
if (block.width > mid_x) {
ezbc_block_t tr = {block.x + mid_x, block.y, block.width - mid_x, mid_y};
const int tr_flag = ezbc_read_bits(reader, 1);
if (tr_flag) {
sign_bits_read += ezbc_process_significant_block_recursive(
reader, tr, bitplane, threshold, output, width, significant, first_bitplane,
next_significant, next_insignificant);
} else {
ezbc_queue_add(next_insignificant, tr);
}
}
// Bottom-left child (if exists)
if (block.height > mid_y) {
ezbc_block_t bl = {block.x, block.y + mid_y, mid_x, block.height - mid_y};
const int bl_flag = ezbc_read_bits(reader, 1);
if (bl_flag) {
sign_bits_read += ezbc_process_significant_block_recursive(
reader, bl, bitplane, threshold, output, width, significant, first_bitplane,
next_significant, next_insignificant);
} else {
ezbc_queue_add(next_insignificant, bl);
}
}
// Bottom-right child (if exists)
if (block.width > mid_x && block.height > mid_y) {
ezbc_block_t br = {block.x + mid_x, block.y + mid_y, block.width - mid_x, block.height - mid_y};
const int br_flag = ezbc_read_bits(reader, 1);
if (br_flag) {
sign_bits_read += ezbc_process_significant_block_recursive(
reader, br, bitplane, threshold, output, width, significant, first_bitplane,
next_significant, next_insignificant);
} else {
ezbc_queue_add(next_insignificant, br);
}
}
return sign_bits_read;
}
// Decode a single channel with EZBC
static void decode_channel_ezbc(const uint8_t *ezbc_data, size_t offset, size_t size,
int16_t *output, int expected_count) {
ezbc_bitreader_t reader = {ezbc_data, offset + size, offset, 0};
// Debug: Print first few bytes
// fprintf(stderr, "[EZBC] Channel decode: offset=%zu, size=%zu, first 5 bytes: %02X %02X %02X %02X %02X\n",
// offset, size,
// ezbc_data[offset], ezbc_data[offset+1], ezbc_data[offset+2],
// ezbc_data[offset+3], ezbc_data[offset+4]);
// Read header: MSB bitplane (8 bits), width (16 bits), height (16 bits)
const int msb_bitplane = ezbc_read_bits(&reader, 8);
const int width = ezbc_read_bits(&reader, 16);
const int height = ezbc_read_bits(&reader, 16);
// fprintf(stderr, "[EZBC] Decoded header: MSB=%d, width=%d, height=%d (expected pixels=%d)\n",
// msb_bitplane, width, height, expected_count);
if (width * height != expected_count) {
fprintf(stderr, "EZBC dimension mismatch: %dx%d != %d\n", width, height, expected_count);
memset(output, 0, expected_count * sizeof(int16_t));
return;
}
// Initialize output and state tracking
memset(output, 0, expected_count * sizeof(int16_t));
int8_t *significant = calloc(expected_count, sizeof(int8_t));
int *first_bitplane = calloc(expected_count, sizeof(int));
// Initialize queues
ezbc_block_queue_t insignificant, next_insignificant, significant_queue, next_significant;
ezbc_queue_init(&insignificant);
ezbc_queue_init(&next_insignificant);
ezbc_queue_init(&significant_queue);
ezbc_queue_init(&next_significant);
// Start with root block
ezbc_block_t root = {0, 0, width, height};
ezbc_queue_add(&insignificant, root);
// Process bitplanes from MSB to LSB
for (int bitplane = msb_bitplane; bitplane >= 0; bitplane--) {
const int threshold = 1 << bitplane;
// Process insignificant blocks
for (int i = 0; i < insignificant.count; i++) {
const int flag = ezbc_read_bits(&reader, 1);
if (flag == 0) {
// Still insignificant
ezbc_queue_add(&next_insignificant, insignificant.blocks[i]);
} else {
// Became significant - use recursive processing
ezbc_process_significant_block_recursive(
&reader, insignificant.blocks[i], bitplane, threshold,
output, width, significant, first_bitplane,
&next_significant, &next_insignificant);
}
}
// Process significant 1x1 blocks (refinement)
for (int i = 0; i < significant_queue.count; i++) {
ezbc_block_t block = significant_queue.blocks[i];
const int idx = block.y * width + block.x;
const int refine_bit = ezbc_read_bits(&reader, 1);
// Add refinement bit at current bitplane
if (refine_bit) {
const int bit_value = 1 << bitplane;
if (output[idx] < 0) {
output[idx] -= bit_value;
} else {
output[idx] += bit_value;
}
}
// Keep in significant queue
ezbc_queue_add(&next_significant, block);
}
// Swap queues
ezbc_block_queue_t temp_insig = insignificant;
insignificant = next_insignificant;
next_insignificant = temp_insig;
next_insignificant.count = 0;
ezbc_block_queue_t temp_sig = significant_queue;
significant_queue = next_significant;
next_significant = temp_sig;
next_significant.count = 0;
}
// Cleanup
free(significant);
free(first_bitplane);
ezbc_queue_free(&insignificant);
ezbc_queue_free(&next_insignificant);
ezbc_queue_free(&significant_queue);
ezbc_queue_free(&next_significant);
// Debug: Count non-zero coefficients
int nonzero_count = 0;
int16_t max_val = 0, min_val = 0;
for (int i = 0; i < expected_count; i++) {
if (output[i] != 0) {
nonzero_count++;
if (output[i] > max_val) max_val = output[i];
if (output[i] < min_val) min_val = output[i];
}
}
// fprintf(stderr, "[EZBC] Decoded %d non-zero coeffs (%.1f%%), range: [%d, %d]\n",
// nonzero_count, 100.0 * nonzero_count / expected_count, min_val, max_val);
}
// EZBC postprocessing for single frames
static void postprocess_coefficients_ezbc(uint8_t *compressed_data, int coeff_count,
int16_t *output_y, int16_t *output_co, int16_t *output_cg,
int channel_layout) {
const int has_y = (channel_layout & 0x04) == 0;
const int has_co = (channel_layout & 0x02) == 0;
const int has_cg = (channel_layout & 0x02) == 0;
int offset = 0;
// Decode Y channel
if (has_y && output_y) {
const uint32_t size = ((uint32_t)compressed_data[offset + 0]) |
((uint32_t)compressed_data[offset + 1] << 8) |
((uint32_t)compressed_data[offset + 2] << 16) |
((uint32_t)compressed_data[offset + 3] << 24);
offset += 4;
decode_channel_ezbc(compressed_data, offset, size, output_y, coeff_count);
offset += size;
}
// Decode Co channel
if (has_co && output_co) {
const uint32_t size = ((uint32_t)compressed_data[offset + 0]) |
((uint32_t)compressed_data[offset + 1] << 8) |
((uint32_t)compressed_data[offset + 2] << 16) |
((uint32_t)compressed_data[offset + 3] << 24);
offset += 4;
decode_channel_ezbc(compressed_data, offset, size, output_co, coeff_count);
offset += size;
}
// Decode Cg channel
if (has_cg && output_cg) {
const uint32_t size = ((uint32_t)compressed_data[offset + 0]) |
((uint32_t)compressed_data[offset + 1] << 8) |
((uint32_t)compressed_data[offset + 2] << 16) |
((uint32_t)compressed_data[offset + 3] << 24);
offset += 4;
decode_channel_ezbc(compressed_data, offset, size, output_cg, coeff_count);
offset += size;
}
}
//=============================================================================
// DWT Inverse Transforms (matches TSVM)
//=============================================================================
// 9/7 inverse DWT (from TSVM Kotlin code)
static void dwt_97_inverse_1d(float *data, int length) {
if (length < 2) return;
// Debug: Check if input has non-zero values
// static int call_count = 0;
// if (call_count < 5) {
// Debug: count non-zero coefficients (disabled to reduce stderr output)
// int nonzero = 0;
// for (int i = 0; i < length; i++) {
// if (data[i] != 0.0f) nonzero++;
// }
// fprintf(stderr, " dwt_97_inverse_1d call #%d: length=%d, nonzero=%d, first 5: %.1f %.1f %.1f %.1f %.1f\n",
// call_count, length, nonzero,
// data[0], length > 1 ? data[1] : 0.0f, length > 2 ? data[2] : 0.0f,
// length > 3 ? data[3] : 0.0f, length > 4 ? data[4] : 0.0f);
// call_count++;
// }
float *temp = malloc(length * sizeof(float));
int half = (length + 1) / 2;
// Split into low and high frequency components (matching TSVM layout)
for (int i = 0; i < half; i++) {
temp[i] = data[i]; // Low-pass coefficients (first half)
}
for (int i = 0; i < length / 2; i++) {
if (half + i < length) {
temp[half + i] = data[half + i]; // High-pass coefficients (second half)
}
}
// 9/7 inverse lifting coefficients from TSVM
const float alpha = -1.586134342f;
const float beta = -0.052980118f;
const float gamma = 0.882911076f;
const float delta = 0.443506852f;
const float K = 1.230174105f;
// Step 1: Undo scaling
for (int i = 0; i < half; i++) {
temp[i] /= K; // Low-pass coefficients
}
for (int i = 0; i < length / 2; i++) {
if (half + i < length) {
temp[half + i] *= K; // High-pass coefficients
}
}
// Step 2: Undo δ update
for (int i = 0; i < half; i++) {
float d_curr = (half + i < length) ? temp[half + i] : 0.0f;
float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr;
temp[i] -= delta * (d_curr + d_prev);
}
// Step 3: Undo γ predict
for (int i = 0; i < length / 2; i++) {
if (half + i < length) {
float s_curr = temp[i];
float s_next = (i + 1 < half) ? temp[i + 1] : s_curr;
temp[half + i] -= gamma * (s_curr + s_next);
}
}
// Step 4: Undo β update
for (int i = 0; i < half; i++) {
float d_curr = (half + i < length) ? temp[half + i] : 0.0f;
float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr;
temp[i] -= beta * (d_curr + d_prev);
}
// Step 5: Undo α predict
for (int i = 0; i < length / 2; i++) {
if (half + i < length) {
float s_curr = temp[i];
float s_next = (i + 1 < half) ? temp[i + 1] : s_curr;
temp[half + i] -= alpha * (s_curr + s_next);
}
}
// Reconstruction - interleave low and high pass
for (int i = 0; i < length; i++) {
if (i % 2 == 0) {
// Even positions: low-pass coefficients
data[i] = temp[i / 2];
} else {
// Odd positions: high-pass coefficients
int idx = i / 2;
if (half + idx < length) {
data[i] = temp[half + idx];
} else {
data[i] = 0.0f;
}
}
}
// Debug: Check output (disabled to reduce stderr output)
// if (call_count <= 5) {
// int nonzero_out = 0;
// for (int i = 0; i < length; i++) {
// if (data[i] != 0.0f) nonzero_out++;
// }
// fprintf(stderr, " -> OUTPUT: nonzero=%d, first 5: %.1f %.1f %.1f %.1f %.1f\n",
// nonzero_out,
// data[0], length > 1 ? data[1] : 0.0f, length > 2 ? data[2] : 0.0f,
// length > 3 ? data[3] : 0.0f, length > 4 ? data[4] : 0.0f);
// }
free(temp);
}
// 5/3 inverse DWT (simplified - uses 9/7 for now)
static void dwt_53_inverse_1d(float *data, int length) {
if (length < 2) return;
// TODO: Implement proper 5/3 from TSVM if needed
dwt_97_inverse_1d(data, length);
}
// Multi-level inverse DWT (matches TSVM exactly with correct non-power-of-2 handling)
static void apply_inverse_dwt_multilevel(float *data, int width, int height, int levels, int filter_type) {
int max_size = (width > height) ? width : height;
float *temp_row = malloc(max_size * sizeof(float));
float *temp_col = malloc(max_size * sizeof(float));
// Pre-calculate exact sequence of widths/heights from forward transform
// This is CRITICAL for non-power-of-2 dimensions (e.g., 560, 448)
// Forward transform uses: width, (width+1)/2, ((width+1)/2+1)/2, ...
// Inverse MUST use the exact same sequence in reverse
int *widths = malloc((levels + 1) * sizeof(int));
int *heights = malloc((levels + 1) * sizeof(int));
widths[0] = width;
heights[0] = height;
for (int i = 1; i <= levels; i++) {
widths[i] = (widths[i - 1] + 1) / 2;
heights[i] = (heights[i - 1] + 1) / 2;
}
// Debug: Print dimension sequence
static int debug_once = 1;
if (debug_once) {
fprintf(stderr, "DWT dimension sequence for %dx%d with %d levels:\n", width, height, levels);
for (int i = 0; i <= levels; i++) {
fprintf(stderr, " Level %d: %dx%d\n", i, widths[i], heights[i]);
}
debug_once = 0;
}
// TSVM: for (level in levels - 1 downTo 0)
// Apply inverse transforms using pre-calculated dimensions
for (int level = levels - 1; level >= 0; level--) {
int current_width = widths[level];
int current_height = heights[level];
if (current_width < 1 || current_height < 1) continue;
if (current_width == 1 && current_height == 1) continue;
// TSVM: Column inverse transform first (vertical)
for (int x = 0; x < current_width; x++) {
for (int y = 0; y < current_height; y++) {
temp_col[y] = data[y * width + x];
}
if (filter_type == 0) {
dwt_53_inverse_1d(temp_col, current_height);
} else {
dwt_97_inverse_1d(temp_col, current_height);
}
for (int y = 0; y < current_height; y++) {
data[y * width + x] = temp_col[y];
}
}
// TSVM: Row inverse transform second (horizontal)
for (int y = 0; y < current_height; y++) {
for (int x = 0; x < current_width; x++) {
temp_row[x] = data[y * width + x];
}
if (filter_type == 0) {
dwt_53_inverse_1d(temp_row, current_width);
} else {
dwt_97_inverse_1d(temp_row, current_width);
}
for (int x = 0; x < current_width; x++) {
data[y * width + x] = temp_row[x];
}
}
// Debug after EVERY level
static int first_frame_levels = 1;
if (first_frame_levels && level <= 2) { // Only log levels 2, 1, 0 for first frame
int nonzero_level = 0;
for (int y = 0; y < current_height; y++) {
for (int x = 0; x < current_width; x++) {
if (fabsf(data[y * width + x]) > 0.001f) { // Use fabs for better zero detection
nonzero_level++;
}
}
}
// fprintf(stderr, "After level %d (%dx%d): nonzero=%d/%d, data[0]=%.1f, data[1]=%.1f, data[width]=%.1f\n",
// level, current_width, current_height, nonzero_level, current_width * current_height,
// data[0], data[1], data[width]);
if (level == 0) first_frame_levels = 0; // Stop after level 0 of first frame
}
}
// Debug: Check buffer after all levels complete (disabled to reduce stderr output)
// static int debug_output_once = 1;
// if (debug_output_once) {
// int nonzero_final = 0;
// for (int i = 0; i < width * height; i++) {
// if (data[i] != 0.0f) nonzero_final++;
// }
// fprintf(stderr, "After ALL IDWT levels complete: nonzero=%d/%d, first 10: ", nonzero_final, width * height);
// for (int i = 0; i < 10 && i < width * height; i++) {
// fprintf(stderr, "%.1f ", data[i]);
// }
// fprintf(stderr, "\n");
// debug_output_once = 0;
// }
free(widths);
free(heights);
free(temp_row);
free(temp_col);
}
//=============================================================================
// Temporal DWT and GOP Decoding (matches TSVM)
//=============================================================================
// Get temporal subband level for a given frame index in a GOP
static int get_temporal_subband_level(int frame_idx, int num_frames, int temporal_levels) {
// Match encoder logic exactly (encoder_tav.c:1487-1501)
// After temporal DWT with 2 levels:
// Frames 0...num_frames/(2^2) = tLL (temporal low-low, coarsest, level 0)
// Frames in first half but after tLL = tLH (level 1)
// Remaining frames = tH from first level (level 2, finest)
const int frames_per_level0 = num_frames >> temporal_levels; // e.g., 16 >> 2 = 4, or 8 >> 2 = 2
if (frame_idx < frames_per_level0) {
return 0; // Coarsest temporal level (tLL)
} else if (frame_idx < (num_frames >> 1)) {
return 1; // First level high-pass (tLH)
} else {
return 2; // Finest level high-pass (tH from level 1)
}
}
// Calculate temporal quantizer scale for a given temporal subband level
static float get_temporal_quantizer_scale(int temporal_level) {
// Uses exponential scaling: 2^(BETA × level^KAPPA)
// With BETA=0.6, KAPPA=1.14:
// - Level 0 (tLL): 2^0.0 = 1.00
// - Level 1 (tH): 2^0.68 = 1.61
// - Level 2 (tHH): 2^1.29 = 2.45
const float BETA = 0.6f; // Temporal scaling exponent
const float KAPPA = 1.14f;
return powf(2.0f, BETA * powf(temporal_level, KAPPA));
}
// Inverse Haar 1D DWT
static void dwt_haar_inverse_1d(float *data, int length) {
if (length < 2) return;
float *temp = malloc(length * sizeof(float));
const int half = (length + 1) / 2;
// Inverse Haar transform: reconstruct from averages and differences
// Read directly from data array (already has low-pass then high-pass layout)
for (int i = 0; i < half; i++) {
if (2 * i + 1 < length) {
// Reconstruct adjacent pairs from average and difference
temp[2 * i] = data[i] + data[half + i]; // average + difference
temp[2 * i + 1] = data[i] - data[half + i]; // average - difference
} else {
// Handle odd length: last sample comes from low-pass only
temp[2 * i] = data[i];
}
}
// Copy reconstructed data back
for (int i = 0; i < length; i++) {
data[i] = temp[i];
}
free(temp);
}
// Apply inverse 3D DWT to GOP data (spatial + temporal)
// Order: SPATIAL first (each frame), then TEMPORAL (across frames)
static void apply_inverse_3d_dwt(float **gop_y, float **gop_co, float **gop_cg,
int width, int height, int gop_size,
int spatial_levels, int temporal_levels, int filter_type) {
// Step 1: Apply inverse 2D spatial DWT to each frame
for (int t = 0; t < gop_size; t++) {
apply_inverse_dwt_multilevel(gop_y[t], width, height, spatial_levels, filter_type);
apply_inverse_dwt_multilevel(gop_co[t], width, height, spatial_levels, filter_type);
apply_inverse_dwt_multilevel(gop_cg[t], width, height, spatial_levels, filter_type);
}
// Step 2: Apply inverse temporal DWT to each spatial location
// Only needed for GOPs with multiple frames (skip for I-frames)
if (gop_size < 2) return;
// Pre-calculate all intermediate lengths for temporal DWT (same fix as TAD)
// This ensures correct reconstruction for non-power-of-2 GOP sizes
int *temporal_lengths = malloc((temporal_levels + 1) * sizeof(int));
temporal_lengths[0] = gop_size;
for (int i = 1; i <= temporal_levels; i++) {
temporal_lengths[i] = (temporal_lengths[i - 1] + 1) / 2;
}
float *temporal_line = malloc(gop_size * sizeof(float));
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
const int pixel_idx = y * width + x;
// Process Y channel
for (int t = 0; t < gop_size; t++) {
temporal_line[t] = gop_y[t][pixel_idx];
}
for (int level = temporal_levels - 1; level >= 0; level--) {
const int level_frames = temporal_lengths[level];
if (level_frames >= 2) {
dwt_haar_inverse_1d(temporal_line, level_frames);
}
}
for (int t = 0; t < gop_size; t++) {
gop_y[t][pixel_idx] = temporal_line[t];
}
// Process Co channel
for (int t = 0; t < gop_size; t++) {
temporal_line[t] = gop_co[t][pixel_idx];
}
for (int level = temporal_levels - 1; level >= 0; level--) {
const int level_frames = temporal_lengths[level];
if (level_frames >= 2) {
dwt_haar_inverse_1d(temporal_line, level_frames);
}
}
for (int t = 0; t < gop_size; t++) {
gop_co[t][pixel_idx] = temporal_line[t];
}
// Process Cg channel
for (int t = 0; t < gop_size; t++) {
temporal_line[t] = gop_cg[t][pixel_idx];
}
for (int level = temporal_levels - 1; level >= 0; level--) {
const int level_frames = temporal_lengths[level];
if (level_frames >= 2) {
dwt_haar_inverse_1d(temporal_line, level_frames);
}
}
for (int t = 0; t < gop_size; t++) {
gop_cg[t][pixel_idx] = temporal_line[t];
}
}
}
free(temporal_line);
free(temporal_lengths);
}
// Postprocess GOP unified block to per-frame coefficients (2-bit map format)
static int16_t ***postprocess_gop_unified(const uint8_t *decompressed_data, size_t data_size,
int gop_size, int num_pixels, int channel_layout) {
// 2 bits per coefficient
const int map_bytes_per_frame = (num_pixels * 2 + 7) / 8;
// Determine which channels are present
// Bit 0: has alpha, Bit 1: has chroma (inverted), Bit 2: has luma (inverted)
const int has_y = (channel_layout & 0x04) == 0;
const int has_co = (channel_layout & 0x02) == 0; // Inverted: 0 = has chroma
const int has_cg = (channel_layout & 0x02) == 0; // Inverted: 0 = has chroma
// Calculate buffer positions for maps
int read_ptr = 0;
const int y_maps_start = has_y ? read_ptr : -1;
if (has_y) read_ptr += map_bytes_per_frame * gop_size;
const int co_maps_start = has_co ? read_ptr : -1;
if (has_co) read_ptr += map_bytes_per_frame * gop_size;
const int cg_maps_start = has_cg ? read_ptr : -1;
if (has_cg) read_ptr += map_bytes_per_frame * gop_size;
// Count "other" values (code 11) across ALL frames
int y_other_count = 0;
int co_other_count = 0;
int cg_other_count = 0;
for (int frame = 0; frame < gop_size; frame++) {
const int frame_map_offset = frame * map_bytes_per_frame;
for (int i = 0; i < num_pixels; i++) {
const int bit_pos = i * 2;
const int byte_idx = bit_pos / 8;
const int bit_offset = bit_pos % 8;
if (has_y && y_maps_start + frame_map_offset + byte_idx < (int)data_size) {
int code = (decompressed_data[y_maps_start + frame_map_offset + byte_idx] >> bit_offset) & 0x03;
if (bit_offset == 7 && byte_idx + 1 < map_bytes_per_frame) {
const int next_byte = decompressed_data[y_maps_start + frame_map_offset + byte_idx + 1] & 0xFF;
code = (code & 0x01) | ((next_byte & 0x01) << 1);
}
if (code == 3) y_other_count++;
}
if (has_co && co_maps_start + frame_map_offset + byte_idx < (int)data_size) {
int code = (decompressed_data[co_maps_start + frame_map_offset + byte_idx] >> bit_offset) & 0x03;
if (bit_offset == 7 && byte_idx + 1 < map_bytes_per_frame) {
const int next_byte = decompressed_data[co_maps_start + frame_map_offset + byte_idx + 1] & 0xFF;
code = (code & 0x01) | ((next_byte & 0x01) << 1);
}
if (code == 3) co_other_count++;
}
if (has_cg && cg_maps_start + frame_map_offset + byte_idx < (int)data_size) {
int code = (decompressed_data[cg_maps_start + frame_map_offset + byte_idx] >> bit_offset) & 0x03;
if (bit_offset == 7 && byte_idx + 1 < map_bytes_per_frame) {
const int next_byte = decompressed_data[cg_maps_start + frame_map_offset + byte_idx + 1] & 0xFF;
code = (code & 0x01) | ((next_byte & 0x01) << 1);
}
if (code == 3) cg_other_count++;
}
}
}
// Value arrays start after all maps
const int y_values_start = read_ptr;
read_ptr += y_other_count * 2;
const int co_values_start = read_ptr;
read_ptr += co_other_count * 2;
const int cg_values_start = read_ptr;
// Allocate output arrays: [gop_size][3 channels][num_pixels]
int16_t ***output = malloc(gop_size * sizeof(int16_t **));
for (int t = 0; t < gop_size; t++) {
output[t] = malloc(3 * sizeof(int16_t *));
output[t][0] = calloc(num_pixels, sizeof(int16_t)); // Y
output[t][1] = calloc(num_pixels, sizeof(int16_t)); // Co
output[t][2] = calloc(num_pixels, sizeof(int16_t)); // Cg
}
int y_value_idx = 0;
int co_value_idx = 0;
int cg_value_idx = 0;
for (int frame = 0; frame < gop_size; frame++) {
const int frame_map_offset = frame * map_bytes_per_frame;
for (int i = 0; i < num_pixels; i++) {
const int bit_pos = i * 2;
const int byte_idx = bit_pos / 8;
const int bit_offset = bit_pos % 8;
// Decode Y
if (has_y && y_maps_start + frame_map_offset + byte_idx < (int)data_size) {
int code = (decompressed_data[y_maps_start + frame_map_offset + byte_idx] >> bit_offset) & 0x03;
if (bit_offset == 7 && byte_idx + 1 < map_bytes_per_frame) {
const int next_byte = decompressed_data[y_maps_start + frame_map_offset + byte_idx + 1] & 0xFF;
code = (code & 0x01) | ((next_byte & 0x01) << 1);
}
if (code == 0) {
output[frame][0][i] = 0;
} else if (code == 1) {
output[frame][0][i] = 1;
} else if (code == 2) {
output[frame][0][i] = -1;
} else { // code == 3
const int val_offset = y_values_start + y_value_idx * 2;
y_value_idx++;
if (val_offset + 1 < (int)data_size) {
const int lo = decompressed_data[val_offset] & 0xFF;
const int hi = (int8_t)decompressed_data[val_offset + 1];
output[frame][0][i] = (int16_t)((hi << 8) | lo);
} else {
output[frame][0][i] = 0;
}
}
}
// Decode Co
if (has_co && co_maps_start + frame_map_offset + byte_idx < (int)data_size) {
int code = (decompressed_data[co_maps_start + frame_map_offset + byte_idx] >> bit_offset) & 0x03;
if (bit_offset == 7 && byte_idx + 1 < map_bytes_per_frame) {
const int next_byte = decompressed_data[co_maps_start + frame_map_offset + byte_idx + 1] & 0xFF;
code = (code & 0x01) | ((next_byte & 0x01) << 1);
}
if (code == 0) {
output[frame][1][i] = 0;
} else if (code == 1) {
output[frame][1][i] = 1;
} else if (code == 2) {
output[frame][1][i] = -1;
} else { // code == 3
const int val_offset = co_values_start + co_value_idx * 2;
co_value_idx++;
if (val_offset + 1 < (int)data_size) {
const int lo = decompressed_data[val_offset] & 0xFF;
const int hi = (int8_t)decompressed_data[val_offset + 1];
output[frame][1][i] = (int16_t)((hi << 8) | lo);
} else {
output[frame][1][i] = 0;
}
}
}
// Decode Cg
if (has_cg && cg_maps_start + frame_map_offset + byte_idx < (int)data_size) {
int code = (decompressed_data[cg_maps_start + frame_map_offset + byte_idx] >> bit_offset) & 0x03;
if (bit_offset == 7 && byte_idx + 1 < map_bytes_per_frame) {
const int next_byte = decompressed_data[cg_maps_start + frame_map_offset + byte_idx + 1] & 0xFF;
code = (code & 0x01) | ((next_byte & 0x01) << 1);
}
if (code == 0) {
output[frame][2][i] = 0;
} else if (code == 1) {
output[frame][2][i] = 1;
} else if (code == 2) {
output[frame][2][i] = -1;
} else { // code == 3
const int val_offset = cg_values_start + cg_value_idx * 2;
cg_value_idx++;
if (val_offset + 1 < (int)data_size) {
const int lo = decompressed_data[val_offset] & 0xFF;
const int hi = (int8_t)decompressed_data[val_offset + 1];
output[frame][2][i] = (int16_t)((hi << 8) | lo);
} else {
output[frame][2][i] = 0;
}
}
}
}
}
return output;
}
// Postprocess GOP RAW format to per-frame coefficients (entropyCoder=2)
// Layout: [All_Y_coeffs][All_Co_coeffs][All_Cg_coeffs] (raw int16 arrays)
static int16_t ***postprocess_gop_raw(const uint8_t *decompressed_data, size_t data_size,
int gop_size, int num_pixels, int channel_layout) {
// Determine which channels are present
const int has_y = (channel_layout & 0x04) == 0;
const int has_co = (channel_layout & 0x02) == 0;
const int has_cg = (channel_layout & 0x02) == 0;
// Allocate output arrays: [gop_size][3 channels][num_pixels]
int16_t ***output = malloc(gop_size * sizeof(int16_t **));
for (int t = 0; t < gop_size; t++) {
output[t] = malloc(3 * sizeof(int16_t *));
output[t][0] = calloc(num_pixels, sizeof(int16_t)); // Y
output[t][1] = calloc(num_pixels, sizeof(int16_t)); // Co
output[t][2] = calloc(num_pixels, sizeof(int16_t)); // Cg
}
int offset = 0;
// Read Y channel (all frames concatenated)
if (has_y) {
const int channel_size = gop_size * num_pixels * sizeof(int16_t);
if (offset + channel_size > (int)data_size) {
fprintf(stderr, "Error: Not enough data for Y channel in RAW GOP\n");
goto error_cleanup;
}
const int16_t *y_data = (const int16_t *)(decompressed_data + offset);
for (int t = 0; t < gop_size; t++) {
memcpy(output[t][0], y_data + t * num_pixels, num_pixels * sizeof(int16_t));
}
offset += channel_size;
}
// Read Co channel (all frames concatenated)
if (has_co) {
const int channel_size = gop_size * num_pixels * sizeof(int16_t);
if (offset + channel_size > (int)data_size) {
fprintf(stderr, "Error: Not enough data for Co channel in RAW GOP\n");
goto error_cleanup;
}
const int16_t *co_data = (const int16_t *)(decompressed_data + offset);
for (int t = 0; t < gop_size; t++) {
memcpy(output[t][1], co_data + t * num_pixels, num_pixels * sizeof(int16_t));
}
offset += channel_size;
}
// Read Cg channel (all frames concatenated)
if (has_cg) {
const int channel_size = gop_size * num_pixels * sizeof(int16_t);
if (offset + channel_size > (int)data_size) {
fprintf(stderr, "Error: Not enough data for Cg channel in RAW GOP\n");
goto error_cleanup;
}
const int16_t *cg_data = (const int16_t *)(decompressed_data + offset);
for (int t = 0; t < gop_size; t++) {
memcpy(output[t][2], cg_data + t * num_pixels, num_pixels * sizeof(int16_t));
}
offset += channel_size;
}
return output;
error_cleanup:
for (int t = 0; t < gop_size; t++) {
free(output[t][0]);
free(output[t][1]);
free(output[t][2]);
free(output[t]);
}
free(output);
return NULL;
}
// Postprocess GOP EZBC format to per-frame coefficients (entropyCoder=1)
// Layout: [frame0_size(4)][frame0_ezbc_data][frame1_size(4)][frame1_ezbc_data]...
// Note: EZBC is a complex embedded bitplane codec - this is a simplified placeholder
static int16_t ***postprocess_gop_ezbc(const uint8_t *decompressed_data, size_t data_size,
int gop_size, int num_pixels, int channel_layout) {
// Allocate output arrays: [gop_size][3 channels][num_pixels]
int16_t ***output = malloc(gop_size * sizeof(int16_t **));
for (int t = 0; t < gop_size; t++) {
output[t] = malloc(3 * sizeof(int16_t *));
output[t][0] = calloc(num_pixels, sizeof(int16_t)); // Y
output[t][1] = calloc(num_pixels, sizeof(int16_t)); // Co
output[t][2] = calloc(num_pixels, sizeof(int16_t)); // Cg
}
int offset = 0;
// Read each frame
for (int t = 0; t < gop_size; t++) {
if (offset + 4 > (int)data_size) {
fprintf(stderr, "Error: Not enough data for frame %d size in EZBC GOP\n", t);
goto error_cleanup;
}
// Read frame size (4 bytes, little-endian)
const uint32_t frame_size = ((uint32_t)decompressed_data[offset + 0]) |
((uint32_t)decompressed_data[offset + 1] << 8) |
((uint32_t)decompressed_data[offset + 2] << 16) |
((uint32_t)decompressed_data[offset + 3] << 24);
offset += 4;
if (offset + frame_size > data_size) {
fprintf(stderr, "Error: Frame %d EZBC data exceeds buffer (size=%u, available=%zu)\n",
t, frame_size, data_size - offset);
goto error_cleanup;
}
// Decode EZBC frame using the single-frame EZBC decoder
postprocess_coefficients_ezbc(
(uint8_t *)(decompressed_data + offset), num_pixels,
output[t][0], output[t][1], output[t][2],
channel_layout);
offset += frame_size;
}
return output;
error_cleanup:
for (int t = 0; t < gop_size; t++) {
free(output[t][0]);
free(output[t][1]);
free(output[t][2]);
free(output[t]);
}
free(output);
return NULL;
}
//=============================================================================
// YCoCg-R / ICtCp to RGB Conversion (matches TSVM)
//=============================================================================
static void ycocg_r_to_rgb(float y, float co, float cg, uint8_t *r, uint8_t *g, uint8_t *b) {
float tmp = y - cg / 2.0f;
float g_val = cg + tmp;
float b_val = tmp - co / 2.0f;
float r_val = co + b_val;
*r = CLAMP((int)(r_val + 0.5f), 0, 255);
*g = CLAMP((int)(g_val + 0.5f), 0, 255);
*b = CLAMP((int)(b_val + 0.5f), 0, 255);
}
// ICtCp to RGB conversion (for even TAV versions)
static void ictcp_to_rgb(float i, float ct, float cp, uint8_t *r, uint8_t *g, uint8_t *b) {
// ICtCp → RGB conversion (inverse of RGB → ICtCp)
// Step 1: ICtCp → LMS
float l = i + 0.008609f * ct;
float m = i - 0.008609f * ct;
float s = i + 0.560031f * cp;
// Step 2: LMS (nonlinear) → LMS (linear)
// Inverse PQ transfer function (simplified)
l = powf(fmaxf(l, 0.0f), 1.0f / 0.1593f);
m = powf(fmaxf(m, 0.0f), 1.0f / 0.1593f);
s = powf(fmaxf(s, 0.0f), 1.0f / 0.1593f);
// Step 3: LMS → RGB
float r_val = 5.432622f * l - 4.679910f * m + 0.247288f * s;
float g_val = -1.106160f * l + 2.311198f * m - 0.205038f * s;
float b_val = 0.028262f * l - 0.195689f * m + 1.167427f * s;
*r = CLAMP((int)(r_val * 255.0f + 0.5f), 0, 255);
*g = CLAMP((int)(g_val * 255.0f + 0.5f), 0, 255);
*b = CLAMP((int)(b_val * 255.0f + 0.5f), 0, 255);
}
//=============================================================================
// WAV File Writing
//=============================================================================
static void write_wav_header(FILE *fp, uint32_t sample_rate, uint16_t channels, uint32_t data_size) {
// RIFF header
fwrite("RIFF", 1, 4, fp);
uint32_t file_size = 36 + data_size;
fwrite(&file_size, 4, 1, fp);
fwrite("WAVE", 1, 4, fp);
// fmt chunk
fwrite("fmt ", 1, 4, fp);
uint32_t fmt_size = 16;
fwrite(&fmt_size, 4, 1, fp);
uint16_t audio_format = 1; // PCM
fwrite(&audio_format, 2, 1, fp);
fwrite(&channels, 2, 1, fp);
fwrite(&sample_rate, 4, 1, fp);
uint32_t byte_rate = sample_rate * channels * 1; // 1 byte per sample (u8)
fwrite(&byte_rate, 4, 1, fp);
uint16_t block_align = channels * 1;
fwrite(&block_align, 2, 1, fp);
uint16_t bits_per_sample = 8;
fwrite(&bits_per_sample, 2, 1, fp);
// data chunk
fwrite("data", 1, 4, fp);
fwrite(&data_size, 4, 1, fp);
}
//=============================================================================
// Decoder State Structure
//=============================================================================
typedef struct {
FILE *input_fp;
tav_header_t header;
uint8_t *current_frame_rgb;
uint8_t *reference_frame_rgb;
float *dwt_buffer_y;
float *dwt_buffer_co;
float *dwt_buffer_cg;
float *reference_ycocg_y; // For P-frame delta accumulation
float *reference_ycocg_co;
float *reference_ycocg_cg;
int frame_count;
int frame_size;
int is_monoblock; // True if version 3-6 (single tile mode)
// FFmpeg pipe for video only (audio from file)
FILE *video_pipe;
pid_t ffmpeg_pid;
// Temporary audio file
char *audio_file_path;
} tav_decoder_t;
//=============================================================================
// Pass 1: Extract Audio to WAV File
//=============================================================================
static int extract_audio_to_wav(const char *input_file, const char *wav_file, int verbose) {
FILE *input_fp = fopen(input_file, "rb");
if (!input_fp) {
fprintf(stderr, "Failed to open input file for audio extraction\n");
return -1;
}
// Read header
tav_header_t header;
if (fread(&header, sizeof(tav_header_t), 1, input_fp) != 1) {
fclose(input_fp);
return -1;
}
// Open temporary audio file
FILE *wav_fp = fopen(wav_file, "wb");
if (!wav_fp) {
fprintf(stderr, "Failed to create temporary audio file\n");
fclose(input_fp);
return -1;
}
// Write placeholder WAV header (will be updated later)
write_wav_header(wav_fp, 32000, 2, 0);
uint32_t total_audio_bytes = 0;
int packet_count = 0;
if (verbose) {
fprintf(stderr, "[Pass 1] Extracting audio to %s...\n", wav_file);
}
// Read all packets and extract audio
while (1) {
uint8_t packet_type;
if (fread(&packet_type, 1, 1, input_fp) != 1) {
break; // EOF
}
packet_count++;
// Skip non-audio packets
if (packet_type == TAV_PACKET_SYNC || packet_type == TAV_PACKET_SYNC_NTSC) {
continue;
}
if (packet_type == TAV_PACKET_TIMECODE) {
fseek(input_fp, 8, SEEK_CUR); // Skip timecode
continue;
}
if (packet_type == TAV_PACKET_GOP_SYNC) {
fseek(input_fp, 1, SEEK_CUR); // Skip frame count
continue;
}
if (packet_type == TAV_PACKET_GOP_UNIFIED) {
uint8_t gop_size;
uint32_t compressed_size;
fread(&gop_size, 1, 1, input_fp);
fread(&compressed_size, 4, 1, input_fp);
fseek(input_fp, compressed_size, SEEK_CUR); // Skip GOP data
continue;
}
// Handle TAD audio
if (packet_type == TAV_PACKET_AUDIO_TAD) {
uint16_t sample_count_wrapper;
uint32_t payload_size_plus_7;
fread(&sample_count_wrapper, 2, 1, input_fp);
fread(&payload_size_plus_7, 4, 1, input_fp);
uint16_t sample_count_chunk;
uint8_t quantiser_index;
uint32_t compressed_size;
fread(&sample_count_chunk, 2, 1, input_fp);
fread(&quantiser_index, 1, 1, input_fp);
fread(&compressed_size, 4, 1, input_fp);
uint8_t *tad_compressed = malloc(compressed_size);
fread(tad_compressed, 1, compressed_size, input_fp);
// Build TAD chunk
size_t tad_chunk_size = 2 + 1 + 4 + compressed_size;
uint8_t *tad_chunk = malloc(tad_chunk_size);
memcpy(tad_chunk, &sample_count_chunk, 2);
memcpy(tad_chunk + 2, &quantiser_index, 1);
memcpy(tad_chunk + 3, &compressed_size, 4);
memcpy(tad_chunk + 7, tad_compressed, compressed_size);
free(tad_compressed);
// Decode TAD
uint8_t *pcmu8_output = malloc(sample_count_chunk * 2);
size_t bytes_consumed, samples_decoded;
int decode_result = decode_chunk(tad_chunk, tad_chunk_size,
pcmu8_output, &bytes_consumed, &samples_decoded);
if (decode_result >= 0) {
size_t pcm_bytes = samples_decoded * 2;
fwrite(pcmu8_output, 1, pcm_bytes, wav_fp);
total_audio_bytes += pcm_bytes;
}
free(tad_chunk);
free(pcmu8_output);
continue;
}
// Handle PCM8 audio
if (packet_type == TAV_PACKET_AUDIO_PCM8) {
uint32_t packet_size;
fread(&packet_size, 4, 1, input_fp);
uint8_t *compressed_data = malloc(packet_size);
fread(compressed_data, 1, packet_size, input_fp);
// Decompress
size_t decompressed_bound = ZSTD_getFrameContentSize(compressed_data, packet_size);
uint8_t *pcm_data = malloc(decompressed_bound);
size_t decompressed_size = ZSTD_decompress(pcm_data, decompressed_bound,
compressed_data, packet_size);
free(compressed_data);
if (!ZSTD_isError(decompressed_size)) {
fwrite(pcm_data, 1, decompressed_size, wav_fp);
total_audio_bytes += decompressed_size;
}
free(pcm_data);
continue;
}
// Handle EXTENDED_HDR packet (key-value pairs)
if (packet_type == TAV_PACKET_EXTENDED_HDR) {
uint16_t num_pairs;
fread(&num_pairs, 2, 1, input_fp);
for (int i = 0; i < num_pairs; i++) {
fseek(input_fp, 4, SEEK_CUR); // Skip key (4 bytes)
uint8_t value_type;
fread(&value_type, 1, 1, input_fp);
if (value_type == 0x04) {
fseek(input_fp, 8, SEEK_CUR); // uint64 value
} else if (value_type == 0x10) {
uint16_t str_len;
fread(&str_len, 2, 1, input_fp);
fseek(input_fp, str_len, SEEK_CUR); // string value
}
}
continue;
}
// Read packet size for standard packets
uint32_t packet_size;
if (fread(&packet_size, 4, 1, input_fp) == 1) {
fseek(input_fp, packet_size, SEEK_CUR);
}
}
// Update WAV header with actual data size
fseek(wav_fp, 0, SEEK_SET);
write_wav_header(wav_fp, 32000, 2, total_audio_bytes);
fclose(wav_fp);
fclose(input_fp);
if (verbose) {
fprintf(stderr, "[Pass 1] Extracted %u bytes of audio (%d packets processed)\n",
total_audio_bytes, packet_count);
}
return 0;
}
//=============================================================================
// Decoder Initialization and Cleanup
//=============================================================================
static tav_decoder_t* tav_decoder_init(const char *input_file, const char *output_file, const char *audio_file) {
tav_decoder_t *decoder = calloc(1, sizeof(tav_decoder_t));
if (!decoder) return NULL;
decoder->input_fp = fopen(input_file, "rb");
if (!decoder->input_fp) {
free(decoder);
return NULL;
}
// Read header
if (fread(&decoder->header, sizeof(tav_header_t), 1, decoder->input_fp) != 1) {
fclose(decoder->input_fp);
free(decoder);
return NULL;
}
// Verify magic
if (memcmp(decoder->header.magic, TAV_MAGIC, 8) != 0) {
fclose(decoder->input_fp);
free(decoder);
return NULL;
}
decoder->frame_size = decoder->header.width * decoder->header.height;
decoder->is_monoblock = (decoder->header.version >= 3 && decoder->header.version <= 6);
decoder->audio_file_path = strdup(audio_file);
// Allocate buffers
decoder->current_frame_rgb = calloc(decoder->frame_size * 3, 1);
decoder->reference_frame_rgb = calloc(decoder->frame_size * 3, 1);
decoder->dwt_buffer_y = calloc(decoder->frame_size, sizeof(float));
decoder->dwt_buffer_co = calloc(decoder->frame_size, sizeof(float));
decoder->dwt_buffer_cg = calloc(decoder->frame_size, sizeof(float));
decoder->reference_ycocg_y = calloc(decoder->frame_size, sizeof(float));
decoder->reference_ycocg_co = calloc(decoder->frame_size, sizeof(float));
decoder->reference_ycocg_cg = calloc(decoder->frame_size, sizeof(float));
// Create FFmpeg process for video encoding (video pipe only, audio from file)
int video_pipe_fd[2];
if (pipe(video_pipe_fd) == -1) {
fprintf(stderr, "Failed to create video pipe\n");
free(decoder->current_frame_rgb);
free(decoder->reference_frame_rgb);
free(decoder->dwt_buffer_y);
free(decoder->dwt_buffer_co);
free(decoder->dwt_buffer_cg);
free(decoder->reference_ycocg_y);
free(decoder->reference_ycocg_co);
free(decoder->reference_ycocg_cg);
free(decoder->audio_file_path);
fclose(decoder->input_fp);
free(decoder);
return NULL;
}
decoder->ffmpeg_pid = fork();
if (decoder->ffmpeg_pid == -1) {
fprintf(stderr, "Failed to fork FFmpeg process\n");
close(video_pipe_fd[0]); close(video_pipe_fd[1]);
free(decoder->current_frame_rgb);
free(decoder->reference_frame_rgb);
free(decoder->dwt_buffer_y);
free(decoder->dwt_buffer_co);
free(decoder->dwt_buffer_cg);
free(decoder->reference_ycocg_y);
free(decoder->reference_ycocg_co);
free(decoder->reference_ycocg_cg);
free(decoder->audio_file_path);
fclose(decoder->input_fp);
free(decoder);
return NULL;
} else if (decoder->ffmpeg_pid == 0) {
// Child process - FFmpeg
close(video_pipe_fd[1]); // Close write end
char video_size[32];
char framerate[16];
snprintf(video_size, sizeof(video_size), "%dx%d", decoder->header.width, decoder->header.height);
snprintf(framerate, sizeof(framerate), "%d", decoder->header.fps);
// Redirect video pipe to fd 3
dup2(video_pipe_fd[0], 3); // Video input on fd 3
close(video_pipe_fd[0]);
execl("/usr/bin/ffmpeg", "ffmpeg",
"-f", "rawvideo",
"-pixel_format", "rgb24",
"-video_size", video_size,
"-framerate", framerate,
"-i", "pipe:3", // Video from fd 3
"-i", audio_file, // Audio from file
"-color_range", "2",
"-c:v", "ffv1", // FFV1 codec
"-level", "3", // FFV1 level 3
"-coder", "1", // Range coder
"-context", "1", // Large context
"-g", "1", // GOP size 1 (all I-frames)
"-slices", "24", // 24 slices for threading
"-slicecrc", "1", // CRC per slice
"-pixel_format", "rgb24", // make FFmpeg encode to RGB
"-color_range", "2",
"-c:a", "pcm_u8", // Audio codec (PCM unsigned 8-bit)
"-f", "matroska", // MKV container
output_file,
"-y", // Overwrite output
"-v", "warning", // Minimal logging
(char*)NULL);
fprintf(stderr, "Failed to start FFmpeg\n");
exit(1);
} else {
// Parent process
close(video_pipe_fd[0]); // Close read end
decoder->video_pipe = fdopen(video_pipe_fd[1], "wb");
if (!decoder->video_pipe) {
fprintf(stderr, "Failed to open video pipe for writing\n");
kill(decoder->ffmpeg_pid, SIGTERM);
free(decoder->current_frame_rgb);
free(decoder->reference_frame_rgb);
free(decoder->dwt_buffer_y);
free(decoder->dwt_buffer_co);
free(decoder->dwt_buffer_cg);
free(decoder->reference_ycocg_y);
free(decoder->reference_ycocg_co);
free(decoder->reference_ycocg_cg);
free(decoder->audio_file_path);
fclose(decoder->input_fp);
free(decoder);
return NULL;
}
}
return decoder;
}
static void tav_decoder_free(tav_decoder_t *decoder) {
if (!decoder) return;
if (decoder->input_fp) fclose(decoder->input_fp);
if (decoder->video_pipe) fclose(decoder->video_pipe);
// Wait for FFmpeg to finish
if (decoder->ffmpeg_pid > 0) {
int status;
waitpid(decoder->ffmpeg_pid, &status, 0);
}
free(decoder->current_frame_rgb);
free(decoder->reference_frame_rgb);
free(decoder->dwt_buffer_y);
free(decoder->dwt_buffer_co);
free(decoder->dwt_buffer_cg);
free(decoder->reference_ycocg_y);
free(decoder->reference_ycocg_co);
free(decoder->reference_ycocg_cg);
free(decoder->audio_file_path);
free(decoder);
}
//=============================================================================
// Frame Decoding Logic
//=============================================================================
static int decode_i_or_p_frame(tav_decoder_t *decoder, uint8_t packet_type, uint32_t packet_size) {
// Variable declarations for cleanup
uint8_t *compressed_data = NULL;
uint8_t *decompressed_data = NULL;
int16_t *quantized_y = NULL;
int16_t *quantized_co = NULL;
int16_t *quantized_cg = NULL;
int decode_success = 1; // Assume success, set to 0 on error
// Read and decompress frame data
compressed_data = malloc(packet_size);
if (!compressed_data) {
fprintf(stderr, "Error: Failed to allocate %u bytes for compressed data\n", packet_size);
decode_success = 0;
goto write_frame;
}
if (fread(compressed_data, 1, packet_size, decoder->input_fp) != packet_size) {
fprintf(stderr, "Error: Failed to read %u bytes of compressed frame data\n", packet_size);
decode_success = 0;
goto write_frame;
}
size_t decompressed_size = ZSTD_getFrameContentSize(compressed_data, packet_size);
if (decompressed_size == ZSTD_CONTENTSIZE_ERROR || decompressed_size == ZSTD_CONTENTSIZE_UNKNOWN) {
fprintf(stderr, "Warning: Could not determine decompressed size, using estimate\n");
decompressed_size = decoder->frame_size * 3 * sizeof(int16_t) + 1024;
}
decompressed_data = malloc(decompressed_size);
if (!decompressed_data) {
fprintf(stderr, "Error: Failed to allocate %zu bytes for decompressed data\n", decompressed_size);
decode_success = 0;
goto write_frame;
}
// Debug first 3 frames compression
// static int decomp_debug = 0;
// if (decomp_debug < 3) {
// fprintf(stderr, " [ZSTD frame %d] Compressed size: %u, buffer size: %zu\n", decomp_debug, packet_size, decompressed_size);
// fprintf(stderr, " [ZSTD frame %d] First 16 bytes of COMPRESSED data: ", decomp_debug);
// for (int i = 0; i < 16 && i < (int)packet_size; i++) {
// fprintf(stderr, "%02X ", compressed_data[i]);
// }
// fprintf(stderr, "\n");
// }
size_t actual_size = ZSTD_decompress(decompressed_data, decompressed_size, compressed_data, packet_size);
if (ZSTD_isError(actual_size)) {
fprintf(stderr, "Error: ZSTD decompression failed: %s\n", ZSTD_getErrorName(actual_size));
fprintf(stderr, " Compressed size: %u, Buffer size: %zu\n", packet_size, decompressed_size);
decode_success = 0;
goto write_frame;
}
// if (decomp_debug < 3) {
// fprintf(stderr, " [ZSTD frame %d] Decompressed size: %zu\n", decomp_debug, actual_size);
// fprintf(stderr, " [ZSTD frame %d] First 16 bytes of DECOMPRESSED data: ", decomp_debug);
// for (int i = 0; i < 16 && i < (int)actual_size; i++) {
// fprintf(stderr, "%02X ", decompressed_data[i]);
// }
// fprintf(stderr, "\n");
// decomp_debug++;
// }
// Parse block data
uint8_t *ptr = decompressed_data;
uint8_t mode = *ptr++;
uint8_t qy_override = *ptr++;
uint8_t qco_override = *ptr++;
uint8_t qcg_override = *ptr++;
// IMPORTANT: Both header and override store QLUT indices, not values!
// Override of 0 means "use header value"
int qy = qy_override ? QLUT[qy_override] : QLUT[decoder->header.quantiser_y];
int qco = qco_override ? QLUT[qco_override] : QLUT[decoder->header.quantiser_co];
int qcg = qcg_override ? QLUT[qcg_override] : QLUT[decoder->header.quantiser_cg];
// Debug first few frames
// if (decoder->frame_count < 2) {
// fprintf(stderr, "Frame %d: mode=%d, Q: Y=%d, Co=%d, Cg=%d, decompressed=%zu bytes\n",
// decoder->frame_count, mode, qy, qco, qcg, actual_size);
// }
if (mode == TAV_MODE_SKIP) {
// Copy from reference frame
memcpy(decoder->current_frame_rgb, decoder->reference_frame_rgb, decoder->frame_size * 3);
} else {
// Decode coefficients (use function-level variables for proper cleanup)
int coeff_count = decoder->frame_size;
quantized_y = calloc(coeff_count, sizeof(int16_t));
quantized_co = calloc(coeff_count, sizeof(int16_t));
quantized_cg = calloc(coeff_count, sizeof(int16_t));
if (!quantized_y || !quantized_co || !quantized_cg) {
fprintf(stderr, "Error: Failed to allocate coefficient buffers\n");
decode_success = 0;
goto write_frame;
}
// Postprocess coefficients based on entropy_coder value
if (decoder->header.entropy_coder == 1) {
// EZBC format (stub implementation)
postprocess_coefficients_ezbc(ptr, coeff_count, quantized_y, quantized_co, quantized_cg,
decoder->header.channel_layout);
} else {
// Default: Twobitmap format (entropy_coder=0)
postprocess_coefficients_twobit(ptr, coeff_count, quantized_y, quantized_co, quantized_cg);
}
// Debug: Check first few coefficients
// if (decoder->frame_count == 32) {
// fprintf(stderr, " First 10 quantized Y coeffs: ");
// for (int i = 0; i < 10 && i < coeff_count; i++) {
// fprintf(stderr, "%d ", quantized_y[i]);
// }
// fprintf(stderr, "\n");
//
// Check for any large quantized values that should produce bright pixels
// int max_quant_y = 0;
// for (int i = 0; i < coeff_count; i++) {
// int abs_val = quantized_y[i] < 0 ? -quantized_y[i] : quantized_y[i];
// if (abs_val > max_quant_y) max_quant_y = abs_val;
// }
// fprintf(stderr, " Max quantized Y coefficient: %d\n", max_quant_y);
// }
// Dequantize (perceptual for versions 5-8, uniform for 1-4)
const int is_perceptual = (decoder->header.version >= 5 && decoder->header.version <= 8);
const int is_ezbc = (decoder->header.entropy_coder == 1);
if (is_ezbc) {
// EZBC mode: coefficients are already denormalized by encoder
// Just convert int16 to float without multiplying by quantizer
for (int i = 0; i < coeff_count; i++) {
decoder->dwt_buffer_y[i] = (float)quantized_y[i];
decoder->dwt_buffer_co[i] = (float)quantized_co[i];
decoder->dwt_buffer_cg[i] = (float)quantized_cg[i];
}
} else if (is_perceptual) {
dequantize_dwt_subbands_perceptual(0, qy, quantized_y, decoder->dwt_buffer_y,
decoder->header.width, decoder->header.height,
decoder->header.decomp_levels, qy, 0, decoder->frame_count);
// Debug: Check if values survived the function call
// if (decoder->frame_count == 32) {
// fprintf(stderr, " RIGHT AFTER dequantize_Y returns: first 5 values: %.1f %.1f %.1f %.1f %.1f\n",
// decoder->dwt_buffer_y[0], decoder->dwt_buffer_y[1], decoder->dwt_buffer_y[2],
// decoder->dwt_buffer_y[3], decoder->dwt_buffer_y[4]);
// }
dequantize_dwt_subbands_perceptual(0, qy, quantized_co, decoder->dwt_buffer_co,
decoder->header.width, decoder->header.height,
decoder->header.decomp_levels, qco, 1, decoder->frame_count);
dequantize_dwt_subbands_perceptual(0, qy, quantized_cg, decoder->dwt_buffer_cg,
decoder->header.width, decoder->header.height,
decoder->header.decomp_levels, qcg, 1, decoder->frame_count);
} else {
for (int i = 0; i < coeff_count; i++) {
decoder->dwt_buffer_y[i] = quantized_y[i] * qy;
decoder->dwt_buffer_co[i] = quantized_co[i] * qco;
decoder->dwt_buffer_cg[i] = quantized_cg[i] * qcg;
}
}
// Debug: Check dequantized values using correct subband layout
// if (decoder->frame_count == 32) {
// dwt_subband_info_t subbands[32];
// const int subband_count = calculate_subband_layout(decoder->header.width, decoder->header.height,
// decoder->header.decomp_levels, subbands);
//
// Find LL band (highest level, type 0)
// for (int s = 0; s < subband_count; s++) {
// if (subbands[s].level == decoder->header.decomp_levels && subbands[s].subband_type == 0) {
// fprintf(stderr, " LL band: level=%d, start=%d, count=%d\n",
// subbands[s].level, subbands[s].coeff_start, subbands[s].coeff_count);
// fprintf(stderr, " Reading LL first 5 from dwt_buffer_y[0-4]: %.1f %.1f %.1f %.1f %.1f\n",
// decoder->dwt_buffer_y[0], decoder->dwt_buffer_y[1], decoder->dwt_buffer_y[2],
// decoder->dwt_buffer_y[3], decoder->dwt_buffer_y[4]);
//
// Find max in CORRECT LL band
// float max_ll = -999.0f;
// for (int i = 0; i < subbands[s].coeff_count; i++) {
// int idx = subbands[s].coeff_start + i;
// if (decoder->dwt_buffer_y[idx] > max_ll) max_ll = decoder->dwt_buffer_y[idx];
// }
// fprintf(stderr, " Max LL coefficient BEFORE grain removal: %.1f\n", max_ll);
// break;
// }
// }
// }
// Remove grain synthesis from Y channel (must happen after dequantization, before inverse DWT)
remove_grain_synthesis_decoder(decoder->dwt_buffer_y, decoder->header.width, decoder->header.height,
decoder->header.decomp_levels, decoder->frame_count, decoder->header.quantiser_y);
// Debug: Check LL band AFTER grain removal
// if (decoder->frame_count == 32) {
// int ll_width = decoder->header.width;
// int ll_height = decoder->header.height;
// for (int l = 0; l < decoder->header.decomp_levels; l++) {
// ll_width = (ll_width + 1) / 2;
// ll_height = (ll_height + 1) / 2;
// }
// float max_ll = -999.0f;
// for (int i = 0; i < ll_width * ll_height; i++) {
// if (decoder->dwt_buffer_y[i] > max_ll) max_ll = decoder->dwt_buffer_y[i];
// }
// fprintf(stderr, " Max LL coefficient AFTER grain removal: %.1f\n", max_ll);
// }
// Apply inverse DWT with correct non-power-of-2 dimension handling
// Note: quantized arrays freed at write_frame label
apply_inverse_dwt_multilevel(decoder->dwt_buffer_y, decoder->header.width, decoder->header.height,
decoder->header.decomp_levels, decoder->header.wavelet_filter);
apply_inverse_dwt_multilevel(decoder->dwt_buffer_co, decoder->header.width, decoder->header.height,
decoder->header.decomp_levels, decoder->header.wavelet_filter);
apply_inverse_dwt_multilevel(decoder->dwt_buffer_cg, decoder->header.width, decoder->header.height,
decoder->header.decomp_levels, decoder->header.wavelet_filter);
// Debug: Check spatial domain values after IDWT
// if (decoder->frame_count == 32) {
// float max_y_spatial = -999.0f;
// for (int i = 0; i < decoder->frame_size; i++) {
// if (decoder->dwt_buffer_y[i] > max_y_spatial) max_y_spatial = decoder->dwt_buffer_y[i];
// }
// fprintf(stderr, " Max Y in spatial domain AFTER IDWT: %.1f\n", max_y_spatial);
// }
// Debug: Check spatial domain values after IDWT (original debug)
// if (decoder->frame_count < 1) {
// fprintf(stderr, " After IDWT - First 10 Y values: ");
// for (int i = 0; i < 10 && i < decoder->frame_size; i++) {
// fprintf(stderr, "%.1f ", decoder->dwt_buffer_y[i]);
// }
// fprintf(stderr, "\n");
// fprintf(stderr, " Y range: min=%.1f, max=%.1f\n",
// decoder->dwt_buffer_y[0], decoder->dwt_buffer_y[decoder->frame_size-1]);
// }
// Handle P-frame delta accumulation (in YCoCg float space)
if (packet_type == TAV_PACKET_PFRAME && mode == TAV_MODE_DELTA) {
for (int i = 0; i < decoder->frame_size; i++) {
decoder->dwt_buffer_y[i] += decoder->reference_ycocg_y[i];
decoder->dwt_buffer_co[i] += decoder->reference_ycocg_co[i];
decoder->dwt_buffer_cg[i] += decoder->reference_ycocg_cg[i];
}
}
// Convert YCoCg-R/ICtCp to RGB
const int is_ictcp = (decoder->header.version % 2 == 0);
float max_y = -999, max_co = -999, max_cg = -999;
int max_r = 0, max_g = 0, max_b = 0;
for (int i = 0; i < decoder->frame_size; i++) {
uint8_t r, g, b;
if (is_ictcp) {
ictcp_to_rgb(decoder->dwt_buffer_y[i],
decoder->dwt_buffer_co[i],
decoder->dwt_buffer_cg[i], &r, &g, &b);
} else {
ycocg_r_to_rgb(decoder->dwt_buffer_y[i],
decoder->dwt_buffer_co[i],
decoder->dwt_buffer_cg[i], &r, &g, &b);
}
// Track max values for debugging
// if (decoder->frame_count == 1000) {
// if (decoder->dwt_buffer_y[i] > max_y) max_y = decoder->dwt_buffer_y[i];
// if (decoder->dwt_buffer_co[i] > max_co) max_co = decoder->dwt_buffer_co[i];
// if (decoder->dwt_buffer_cg[i] > max_cg) max_cg = decoder->dwt_buffer_cg[i];
// if (r > max_r) max_r = r;
// if (g > max_g) max_g = g;
// if (b > max_b) max_b = b;
// }
// RGB byte order for FFmpeg rgb24
decoder->current_frame_rgb[i * 3 + 0] = r;
decoder->current_frame_rgb[i * 3 + 1] = g;
decoder->current_frame_rgb[i * 3 + 2] = b;
}
// if (decoder->frame_count == 1000) {
// fprintf(stderr, "\n=== Frame 1000 Value Analysis ===\n");
// fprintf(stderr, "Max YCoCg values: Y=%.1f, Co=%.1f, Cg=%.1f\n", max_y, max_co, max_cg);
// fprintf(stderr, "Max RGB values: R=%d, G=%d, B=%d\n", max_r, max_g, max_b);
// }
// Debug: Check RGB output
// if (decoder->frame_count < 1) {
// fprintf(stderr, " First 5 pixels RGB: ");
// for (int i = 0; i < 5 && i < decoder->frame_size; i++) {
// fprintf(stderr, "(%d,%d,%d) ",
// decoder->current_frame_rgb[i*3],
// decoder->current_frame_rgb[i*3+1],
// decoder->current_frame_rgb[i*3+2]);
// }
// fprintf(stderr, "\n");
// }
// Update reference YCoCg frame
memcpy(decoder->reference_ycocg_y, decoder->dwt_buffer_y, decoder->frame_size * sizeof(float));
memcpy(decoder->reference_ycocg_co, decoder->dwt_buffer_co, decoder->frame_size * sizeof(float));
memcpy(decoder->reference_ycocg_cg, decoder->dwt_buffer_cg, decoder->frame_size * sizeof(float));
}
// Update reference frame
memcpy(decoder->reference_frame_rgb, decoder->current_frame_rgb, decoder->frame_size * 3);
write_frame:
// Clean up temporary allocations
if (compressed_data) free(compressed_data);
if (decompressed_data) free(decompressed_data);
if (quantized_y) free(quantized_y);
if (quantized_co) free(quantized_co);
if (quantized_cg) free(quantized_cg);
// If decoding failed, fill frame with black to maintain stream alignment
if (!decode_success) {
memset(decoder->current_frame_rgb, 0, decoder->frame_size * 3);
fprintf(stderr, "Warning: Writing black frame %d due to decode error\n", decoder->frame_count);
}
// Write frame to video pipe with retry on partial writes (ALWAYS write to maintain alignment)
size_t bytes_to_write = decoder->frame_size * 3;
size_t total_written = 0;
const uint8_t *write_ptr = decoder->current_frame_rgb;
while (total_written < bytes_to_write) {
size_t bytes_written = fwrite(write_ptr + total_written, 1,
bytes_to_write - total_written,
decoder->video_pipe);
if (bytes_written == 0) {
if (ferror(decoder->video_pipe)) {
fprintf(stderr, "Error: Pipe write error at frame %d (wrote %zu/%zu bytes) - aborting\n",
decoder->frame_count, total_written, bytes_to_write);
// Cannot maintain stream alignment if pipe is broken - this is fatal
return -1;
}
// Pipe might be full, flush and retry
fflush(decoder->video_pipe);
usleep(1000); // 1ms delay
} else {
total_written += bytes_written;
}
}
// Ensure data is flushed to FFmpeg
if (fflush(decoder->video_pipe) != 0) {
fprintf(stderr, "Error: Failed to flush video pipe at frame %d - aborting\n", decoder->frame_count);
// Cannot maintain stream alignment if pipe is broken - this is fatal
return -1;
}
decoder->frame_count++;
// Return success only if decoding succeeded; still return 1 to continue processing
// (we wrote a frame either way to maintain stream alignment)
return decode_success ? 1 : 1; // Always return 1 to continue, errors are non-fatal now
}
//=============================================================================
// Main Decoding Loop
//=============================================================================
static void print_usage(const char *prog) {
printf("TAV Decoder - Converts TAV video to FFV1+PCMu8 in MKV container\n");
printf("Version: %s\n\n", DECODER_VENDOR_STRING);
printf("Usage: %s -i input.tav -o output.mkv\n\n", prog);
printf("Options:\n");
printf(" -i <file> Input TAV file\n");
printf(" -o <file> Output MKV file (FFV1 video + PCMu8 audio)\n");
printf(" -v Verbose output\n");
printf(" -h, --help Show this help\n\n");
printf("Supported features (matches TSVM decoder):\n");
printf(" - I-frames and P-frames (delta mode)\n");
printf(" - GOP unified 3D DWT (temporal compression)\n");
printf(" - TAD audio (decoded to PCMu8)\n");
printf(" - MP2 audio (passed through)\n");
printf(" - All wavelet types (5/3, 9/7, CDF 13/7, DD-4, Haar)\n");
printf(" - Perceptual quantization (versions 5-8)\n");
printf(" - YCoCg-R and ICtCp color spaces\n\n");
printf("Unsupported features (not in TSVM decoder):\n");
printf(" - MC-EZBC motion compensation\n");
printf(" - MPEG-style residual coding (P/B-frames)\n");
printf(" - Adaptive block partitioning\n\n");
}
int main(int argc, char *argv[]) {
// Ignore SIGPIPE to prevent process termination if FFmpeg exits early
signal(SIGPIPE, SIG_IGN);
char *input_file = NULL;
char *output_file = NULL;
int verbose = 0;
static struct option long_options[] = {
{"help", no_argument, 0, 'h'},
{0, 0, 0, 0}
};
int opt;
while ((opt = getopt_long(argc, argv, "i:o:vh", long_options, NULL)) != -1) {
switch (opt) {
case 'i':
input_file = optarg;
break;
case 'o':
output_file = optarg;
break;
case 'v':
verbose = 1;
break;
case 'h':
print_usage(argv[0]);
return 0;
default:
print_usage(argv[0]);
return 1;
}
}
if (!input_file || !output_file) {
fprintf(stderr, "Error: Both input and output files are required\n\n");
print_usage(argv[0]);
return 1;
}
// Create temporary audio file path
char temp_audio_file[256];
snprintf(temp_audio_file, sizeof(temp_audio_file), "/tmp/tav_audio_%d.wav", getpid());
// Pass 1: Extract audio to WAV file
if (extract_audio_to_wav(input_file, temp_audio_file, verbose) < 0) {
fprintf(stderr, "Failed to extract audio\n");
unlink(temp_audio_file); // Clean up temp file if it exists
return 1;
}
// Pass 2: Decode video with audio file
tav_decoder_t *decoder = tav_decoder_init(input_file, output_file, temp_audio_file);
if (!decoder) {
fprintf(stderr, "Failed to initialize decoder\n");
unlink(temp_audio_file); // Clean up temp file
return 1;
}
if (verbose) {
printf("TAV Decoder - %dx%d @ %dfps\n", decoder->header.width, decoder->header.height, decoder->header.fps);
printf("Wavelet: %s, Levels: %d\n",
decoder->header.wavelet_filter == 0 ? "5/3" :
decoder->header.wavelet_filter == 1 ? "9/7" :
decoder->header.wavelet_filter == 2 ? "CDF 13/7" :
decoder->header.wavelet_filter == 16 ? "DD-4" :
decoder->header.wavelet_filter == 255 ? "Haar" : "Unknown",
decoder->header.decomp_levels);
printf("Version: %d (%s, %s)\n", decoder->header.version,
decoder->header.version % 2 == 0 ? "ICtCp" : "YCoCg-R",
decoder->is_monoblock ? "monoblock" : "tiled");
printf("Output: %s (FFV1 level 3 + PCMu8 @ 32 KHz)\n", output_file);
}
// Main decoding loop
int result = 1;
int total_packets = 0;
int iframe_count = 0;
while (result > 0) {
// Check file position before reading packet
long file_pos = ftell(decoder->input_fp);
uint8_t packet_type;
if (fread(&packet_type, 1, 1, decoder->input_fp) != 1) {
if (verbose) {
fprintf(stderr, "Reached EOF at file position %ld after %d packets\n", file_pos, total_packets);
}
result = 0; // EOF
break;
}
total_packets++;
if (verbose && total_packets <= 30) {
fprintf(stderr, "Packet %d at file pos %ld: Type 0x%02X\n", total_packets, file_pos, packet_type);
}
// Handle sync packets (no size field)
if (packet_type == TAV_PACKET_SYNC || packet_type == TAV_PACKET_SYNC_NTSC) {
if (verbose && total_packets < 20) {
fprintf(stderr, "Packet %d: SYNC (0x%02X)\n", total_packets, packet_type);
}
continue;
}
// Handle timecode packets (no size field, just 8 bytes of uint64 timecode)
if (packet_type == TAV_PACKET_TIMECODE) {
uint64_t timecode_ns;
if (fread(&timecode_ns, 8, 1, decoder->input_fp) != 1) {
fprintf(stderr, "Error: Failed to read timecode\n");
result = -1;
break;
}
if (verbose && total_packets < 20) {
double timecode_sec = timecode_ns / 1000000000.0;
fprintf(stderr, "Packet %d: TIMECODE (0x%02X) - %.6f seconds\n",
total_packets, packet_type, timecode_sec);
}
continue;
}
// Handle GOP sync packets (no size field, just 1 byte frame count)
if (packet_type == TAV_PACKET_GOP_SYNC) {
uint8_t gop_frame_count;
if (fread(&gop_frame_count, 1, 1, decoder->input_fp) != 1) {
fprintf(stderr, "Error: Failed to read GOP sync frame count\n");
result = -1;
break;
}
if (verbose) {
fprintf(stderr, "Packet %d: GOP_SYNC (0x%02X) - %u frames from GOP\n",
total_packets, packet_type, gop_frame_count);
}
// Update decoder frame count (GOP already wrote frames)
decoder->frame_count += gop_frame_count;
continue;
}
// Handle GOP unified packets (custom format: 1-byte gop_size + 4-byte compressed_size)
if (packet_type == TAV_PACKET_GOP_UNIFIED) {
uint8_t gop_size;
uint32_t compressed_size;
if (fread(&gop_size, 1, 1, decoder->input_fp) != 1 ||
fread(&compressed_size, 4, 1, decoder->input_fp) != 1) {
fprintf(stderr, "Error: Failed to read GOP unified packet header\n");
result = -1;
break;
}
if (verbose) {
fprintf(stderr, "Packet %d: GOP_UNIFIED (0x%02X), %u frames, %u bytes\n",
total_packets, packet_type, gop_size, compressed_size);
}
// Read compressed GOP data
uint8_t *compressed_data = malloc(compressed_size);
if (!compressed_data) {
fprintf(stderr, "Error: Failed to allocate GOP compressed buffer (%u bytes)\n", compressed_size);
result = -1;
break;
}
if (fread(compressed_data, 1, compressed_size, decoder->input_fp) != compressed_size) {
fprintf(stderr, "Error: Failed to read GOP compressed data\n");
free(compressed_data);
result = -1;
break;
}
// Decompress with Zstd
const size_t decompressed_bound = ZSTD_getFrameContentSize(compressed_data, compressed_size);
if (decompressed_bound == ZSTD_CONTENTSIZE_ERROR || decompressed_bound == ZSTD_CONTENTSIZE_UNKNOWN) {
fprintf(stderr, "Error: Invalid Zstd frame in GOP data\n");
free(compressed_data);
result = -1;
break;
}
uint8_t *decompressed_data = malloc(decompressed_bound);
if (!decompressed_data) {
fprintf(stderr, "Error: Failed to allocate GOP decompressed buffer (%zu bytes)\n", decompressed_bound);
free(compressed_data);
result = -1;
break;
}
const size_t decompressed_size = ZSTD_decompress(decompressed_data, decompressed_bound,
compressed_data, compressed_size);
free(compressed_data);
if (ZSTD_isError(decompressed_size)) {
fprintf(stderr, "Error: Zstd decompression failed: %s\n", ZSTD_getErrorName(decompressed_size));
free(decompressed_data);
result = -1;
break;
}
// Postprocess coefficients based on entropy_coder value
const int num_pixels = decoder->header.width * decoder->header.height;
int16_t ***quantized_gop;
if (decoder->header.entropy_coder == 2) {
// RAW format: simple concatenated int16 arrays
if (verbose) {
fprintf(stderr, " Using RAW postprocessing (entropy_coder=2)\n");
}
quantized_gop = postprocess_gop_raw(decompressed_data, decompressed_size,
gop_size, num_pixels, decoder->header.channel_layout);
} else if (decoder->header.entropy_coder == 1) {
// EZBC format: embedded zero-block coding
if (verbose) {
fprintf(stderr, " Using EZBC postprocessing (entropy_coder=1)\n");
}
quantized_gop = postprocess_gop_ezbc(decompressed_data, decompressed_size,
gop_size, num_pixels, decoder->header.channel_layout);
} else {
// Default: Twobitmap format (entropy_coder=0)
if (verbose) {
fprintf(stderr, " Using Twobitmap postprocessing (entropy_coder=0)\n");
}
quantized_gop = postprocess_gop_unified(decompressed_data, decompressed_size,
gop_size, num_pixels, decoder->header.channel_layout);
}
free(decompressed_data);
if (!quantized_gop) {
fprintf(stderr, "Error: Failed to postprocess GOP data\n");
result = -1;
break;
}
// Allocate GOP float buffers
float **gop_y = malloc(gop_size * sizeof(float *));
float **gop_co = malloc(gop_size * sizeof(float *));
float **gop_cg = malloc(gop_size * sizeof(float *));
for (int t = 0; t < gop_size; t++) {
gop_y[t] = calloc(num_pixels, sizeof(float));
gop_co[t] = calloc(num_pixels, sizeof(float));
gop_cg[t] = calloc(num_pixels, sizeof(float));
}
// Dequantize with temporal scaling (perceptual quantization for versions 5-8)
const int is_perceptual = (decoder->header.version >= 5 && decoder->header.version <= 8);
const int is_ezbc = (decoder->header.entropy_coder == 1);
const int temporal_levels = 2; // Fixed for TAV GOP encoding
for (int t = 0; t < gop_size; t++) {
if (is_ezbc) {
// EZBC mode: coefficients are already denormalized by encoder
// Just convert int16 to float without multiplying by quantizer
for (int i = 0; i < num_pixels; i++) {
gop_y[t][i] = (float)quantized_gop[t][0][i];
gop_co[t][i] = (float)quantized_gop[t][1][i];
gop_cg[t][i] = (float)quantized_gop[t][2][i];
}
if (t == 0) {
// Debug first frame
int16_t max_y = 0, min_y = 0;
for (int i = 0; i < num_pixels; i++) {
if (quantized_gop[t][0][i] > max_y) max_y = quantized_gop[t][0][i];
if (quantized_gop[t][0][i] < min_y) min_y = quantized_gop[t][0][i];
}
fprintf(stderr, "[GOP-EZBC] Frame 0 Y coeffs range: [%d, %d], first 5: %d %d %d %d %d\n",
min_y, max_y,
quantized_gop[t][0][0], quantized_gop[t][0][1], quantized_gop[t][0][2],
quantized_gop[t][0][3], quantized_gop[t][0][4]);
}
} else {
// Normal mode: multiply by quantizer
const int temporal_level = get_temporal_subband_level(t, gop_size, temporal_levels);
const float temporal_scale = get_temporal_quantizer_scale(temporal_level);
// CRITICAL: Must ROUND temporal quantizer to match encoder's roundf() behavior
const float base_q_y = roundf(decoder->header.quantiser_y * temporal_scale);
const float base_q_co = roundf(decoder->header.quantiser_co * temporal_scale);
const float base_q_cg = roundf(decoder->header.quantiser_cg * temporal_scale);
if (is_perceptual) {
dequantize_dwt_subbands_perceptual(0, decoder->header.quantiser_y,
quantized_gop[t][0], gop_y[t],
decoder->header.width, decoder->header.height,
decoder->header.decomp_levels, base_q_y, 0, decoder->frame_count + t);
dequantize_dwt_subbands_perceptual(0, decoder->header.quantiser_y,
quantized_gop[t][1], gop_co[t],
decoder->header.width, decoder->header.height,
decoder->header.decomp_levels, base_q_co, 1, decoder->frame_count + t);
dequantize_dwt_subbands_perceptual(0, decoder->header.quantiser_y,
quantized_gop[t][2], gop_cg[t],
decoder->header.width, decoder->header.height,
decoder->header.decomp_levels, base_q_cg, 1, decoder->frame_count + t);
} else {
// Uniform quantization for older versions
for (int i = 0; i < num_pixels; i++) {
gop_y[t][i] = quantized_gop[t][0][i] * base_q_y;
gop_co[t][i] = quantized_gop[t][1][i] * base_q_co;
gop_cg[t][i] = quantized_gop[t][2][i] * base_q_cg;
}
}
}
}
// Free quantized coefficients
for (int t = 0; t < gop_size; t++) {
free(quantized_gop[t][0]);
free(quantized_gop[t][1]);
free(quantized_gop[t][2]);
free(quantized_gop[t]);
}
free(quantized_gop);
// Remove grain synthesis from Y channel for each GOP frame
// This must happen after dequantization but before inverse DWT
for (int t = 0; t < gop_size; t++) {
remove_grain_synthesis_decoder(gop_y[t], decoder->header.width, decoder->header.height,
decoder->header.decomp_levels, decoder->frame_count + t,
decoder->header.quantiser_y);
}
// Apply inverse 3D DWT (spatial + temporal)
apply_inverse_3d_dwt(gop_y, gop_co, gop_cg, decoder->header.width, decoder->header.height,
gop_size, decoder->header.decomp_levels, temporal_levels,
decoder->header.wavelet_filter);
// Debug: Check spatial coefficients after inverse temporal DWT (before inverse spatial DWT)
// if (is_ezbc) {
// float max_y = 0.0f, min_y = 0.0f;
// for (int i = 0; i < num_pixels; i++) {
// if (gop_y[0][i] > max_y) max_y = gop_y[0][i];
// if (gop_y[0][i] < min_y) min_y = gop_y[0][i];
// }
// fprintf(stderr, "[GOP-EZBC] After inverse temporal DWT, Frame 0 Y spatial coeffs range: [%.1f, %.1f], first 5: %.1f %.1f %.1f %.1f %.1f\n",
// min_y, max_y,
// gop_y[0][0], gop_y[0][1], gop_y[0][2], gop_y[0][3], gop_y[0][4]);
// }
// Convert YCoCg→RGB and write all GOP frames
const int is_ictcp = (decoder->header.version % 2 == 0);
// DEBUG: Print frame size calculation
// if (decoder->frame_count == 0) {
// fprintf(stderr, "[DEBUG] decoder->frame_size=%d, decoder->header.width=%d, decoder->header.height=%d\n",
// decoder->frame_size, decoder->header.width, decoder->header.height);
// fprintf(stderr, "[DEBUG] bytes_to_write=%zu (should be %d)\n",
// (size_t)decoder->frame_size * 3, decoder->header.width * decoder->header.height * 3);
// }
for (int t = 0; t < gop_size; t++) {
// Allocate frame buffer
uint8_t *frame_rgb = malloc(decoder->frame_size * 3);
if (!frame_rgb) {
fprintf(stderr, "Error: Failed to allocate GOP frame buffer\n");
result = -1;
break;
}
// Convert to RGB
for (int i = 0; i < decoder->frame_size; i++) {
uint8_t r, g, b;
if (is_ictcp) {
ictcp_to_rgb(gop_y[t][i], gop_co[t][i], gop_cg[t][i], &r, &g, &b);
} else {
ycocg_r_to_rgb(gop_y[t][i], gop_co[t][i], gop_cg[t][i], &r, &g, &b);
}
frame_rgb[i * 3 + 0] = r;
frame_rgb[i * 3 + 1] = g;
frame_rgb[i * 3 + 2] = b;
}
// Write frame to FFmpeg video pipe
const size_t bytes_to_write = decoder->frame_size * 3;
// DEBUG: Verify we're writing to correct pipe
// if (decoder->frame_count == 0 && t == 0) {
// fprintf(stderr, "[DEBUG] Writing frame to video_pipe=%p, bytes_to_write=%zu\n",
// (void*)decoder->video_pipe, bytes_to_write);
// fprintf(stderr, "[DEBUG] First 10 RGB bytes: %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X\n",
// frame_rgb[0], frame_rgb[1], frame_rgb[2], frame_rgb[3], frame_rgb[4],
// frame_rgb[5], frame_rgb[6], frame_rgb[7], frame_rgb[8], frame_rgb[9]);
// }
const size_t bytes_written = fwrite(frame_rgb, 1, bytes_to_write, decoder->video_pipe);
if (bytes_written != bytes_to_write) {
fprintf(stderr, "Error: Failed to write GOP frame %d to FFmpeg (wrote %zu/%zu bytes)\n",
t, bytes_written, bytes_to_write);
free(frame_rgb);
result = -1;
break;
}
fflush(decoder->video_pipe);
free(frame_rgb);
}
// Free GOP buffers
for (int t = 0; t < gop_size; t++) {
free(gop_y[t]);
free(gop_co[t]);
free(gop_cg[t]);
}
free(gop_y);
free(gop_co);
free(gop_cg);
// BUGFIX: Only break on error (result < 0), not on success (result = 1)
if (result < 0) break;
// GOP decoding doesn't update frame_count here - GOP_SYNC packet will do it
if (verbose) {
long pos_after_gop = ftell(decoder->input_fp);
fprintf(stderr, "[DEBUG] After GOP: file pos = %ld, %d frames written (waiting for GOP_SYNC)\n",
pos_after_gop, gop_size);
}
continue;
}
// Handle TAD audio packets (already extracted in Pass 1, just skip)
if (packet_type == TAV_PACKET_AUDIO_TAD) {
uint16_t sample_count_wrapper;
uint32_t payload_size_plus_7;
fread(&sample_count_wrapper, 2, 1, decoder->input_fp);
fread(&payload_size_plus_7, 4, 1, decoder->input_fp);
// Skip TAD chunk (payload_size_plus_7 includes header and data)
fseek(decoder->input_fp, payload_size_plus_7, SEEK_CUR);
continue;
}
// Handle extended header (has 2-byte count, not 4-byte size)
if (packet_type == TAV_PACKET_EXTENDED_HDR) {
uint16_t num_pairs;
if (fread(&num_pairs, 2, 1, decoder->input_fp) != 1) {
fprintf(stderr, "Error: Failed to read extended header count\n");
result = -1;
break;
}
if (verbose && total_packets < 20) {
fprintf(stderr, "Packet %d: EXTENDED_HDR (0x%02X), %u pairs - skipping\n",
total_packets, packet_type, num_pairs);
}
// Skip the key-value pairs
// Format: each pair is [4-byte key][1-byte type][N-byte value]
// We need to parse each pair to know its size
for (int i = 0; i < num_pairs; i++) {
uint8_t key[4];
uint8_t value_type;
if (fread(key, 1, 4, decoder->input_fp) != 4 ||
fread(&value_type, 1, 1, decoder->input_fp) != 1) {
fprintf(stderr, "Error: Failed to read extended header pair %d\n", i);
result = -1;
break;
}
// Determine value size based on type
size_t value_size = 0;
switch (value_type) {
case 0x00: value_size = 2; break; // Int16
case 0x01: value_size = 3; break; // Int24
case 0x02: value_size = 4; break; // Int32
case 0x03: value_size = 6; break; // Int48
case 0x04: value_size = 8; break; // Int64
case 0x10: { // Bytes with 2-byte length prefix
uint16_t str_len;
if (fread(&str_len, 2, 1, decoder->input_fp) != 1) {
fprintf(stderr, "Error: Failed to read string length\n");
result = -1;
break;
}
value_size = str_len;
break;
}
default:
fprintf(stderr, "Warning: Unknown extended header value type 0x%02X\n", value_type);
break;
}
// Skip the value
if (value_size > 0) {
fseek(decoder->input_fp, value_size, SEEK_CUR);
}
}
if (result < 0) break;
continue;
}
// Read packet size (for remaining packet types with standard format)
uint32_t packet_size;
if (fread(&packet_size, 4, 1, decoder->input_fp) != 1) {
fprintf(stderr, "Error: Failed to read packet size at packet %d (type 0x%02X)\n",
total_packets, packet_type);
result = -1;
break;
}
if (verbose && total_packets < 20) {
fprintf(stderr, "Packet %d: Type 0x%02X, Size %u bytes\n", total_packets, packet_type, packet_size);
}
switch (packet_type) {
case TAV_PACKET_IFRAME:
case TAV_PACKET_PFRAME:
iframe_count++;
if (verbose && iframe_count <= 5) {
fprintf(stderr, "Processing %s (packet %d, size %u bytes)...\n",
packet_type == TAV_PACKET_IFRAME ? "I-frame" : "P-frame",
total_packets, packet_size);
}
result = decode_i_or_p_frame(decoder, packet_type, packet_size);
if (result < 0) {
fprintf(stderr, "Error: Frame decoding failed at frame %d\n", decoder->frame_count);
break;
}
if (verbose && decoder->frame_count % 100 == 0) {
printf("Decoded frame %d\r", decoder->frame_count);
fflush(stdout);
}
break;
case TAV_PACKET_AUDIO_MP2:
case TAV_PACKET_AUDIO_TRACK:
// MP2 audio - write directly to audio pipe
// Note: FFmpeg cannot decode MP2 from raw stream, so we skip for now
if (verbose && total_packets < 20) {
fprintf(stderr, "Skipping MP2 audio packet (%u bytes) - not yet supported\n", packet_size);
}
fseek(decoder->input_fp, packet_size, SEEK_CUR);
break;
case TAV_PACKET_AUDIO_PCM8:
// PCM8 audio - already extracted in Pass 1, just skip
fseek(decoder->input_fp, packet_size, SEEK_CUR);
break;
case TAV_PACKET_SUBTITLE:
// Skip subtitle packets
fseek(decoder->input_fp, packet_size, SEEK_CUR);
break;
case TAV_PACKET_PFRAME_RESIDUAL:
case TAV_PACKET_BFRAME_RESIDUAL:
fprintf(stderr, "\nError: Unsupported packet type 0x%02X (MPEG-style motion compensation not supported)\n", packet_type);
result = -1;
break;
default:
fprintf(stderr, "\nWarning: Unknown packet type 0x%02X (skipping)\n", packet_type);
fseek(decoder->input_fp, packet_size, SEEK_CUR);
break;
}
}
if (verbose) {
printf("\nDecoded %d frames\n", decoder->frame_count);
}
tav_decoder_free(decoder);
if (result < 0) {
fprintf(stderr, "Decoding error occurred\n");
unlink(temp_audio_file); // Clean up temp file
return 1;
}
printf("Successfully decoded to: %s\n", output_file);
// Clean up temporary audio file
if (unlink(temp_audio_file) == 0 && verbose) {
fprintf(stderr, "Cleaned up temporary audio file: %s\n", temp_audio_file);
}
return 0;
}