diff --git a/terranmon.txt b/terranmon.txt index af3bce9..4614761 100644 --- a/terranmon.txt +++ b/terranmon.txt @@ -1615,7 +1615,7 @@ start of the next packet # Video Format - Dimension: 720x480 for NTSC, 720x576 for PAL - FPS: arbitrary (defined in packet header) - - Wavelet: 9/7 Spatial, 5/3 Temporal + - Wavelet: 9/7 Spatial, Haar Temporal ("sport" preset always enabled) - Decomposition levels: 4 spatial, 2 temporal - Quantiser and encoder quality level: arbitrary (defined in packet header as quality index) - Extra features: diff --git a/video_encoder/decoder_tav_dt.c b/video_encoder/decoder_tav_dt.c index e68d4a9..212d9b8 100644 --- a/video_encoder/decoder_tav_dt.c +++ b/video_encoder/decoder_tav_dt.c @@ -702,15 +702,15 @@ int main(int argc, char *argv[]) { .decomp_levels = 4, // TAV-DT fixed: 4 spatial levels .temporal_levels = 2, // TAV-DT fixed: 2 temporal levels .wavelet_filter = 1, // TAV-DT fixed: CDF 9/7 - .temporal_wavelet = 1, // TAV-DT fixed: CDF 5/3 (NOT Haar!) + .temporal_wavelet = 0, // TAV-DT fixed: Haar .entropy_coder = 1, // TAV-DT fixed: EZBC .channel_layout = 0, // TAV-DT fixed: YCoCg-R .perceptual_tuning = 1, // TAV-DT fixed: Perceptual .quantiser_y = (uint8_t)quant_y, // From DT quality map .quantiser_co = (uint8_t)quant_co, .quantiser_cg = (uint8_t)quant_cg, - .encoder_preset = 0, // No special presets - .monoblock = 1 // TAV-DT fixed: Single tile + .encoder_preset = 1, // Sports mode + .monoblock = 1 // TAV-DT fixed: Single tile }; decoder.video_ctx = tav_video_create(&video_params); @@ -734,9 +734,7 @@ int main(int argc, char *argv[]) { long start_pos = ftell(decoder.input_fp); // Pass 1: Process all packets for audio only - if (decoder.verbose) { - printf("\n=== Pass 1: Extracting audio ===\n"); - } + printf("\n=== Pass 1: Extracting audio ===\n"); while ((result = process_dt_packet(&decoder)) == 0) { // Continue processing (only audio is written) } @@ -754,9 +752,7 @@ int main(int argc, char *argv[]) { } // Pass 2: Rewind and process all packets for video - if (decoder.verbose) { - printf("\n=== Pass 2: Decoding video ===\n"); - } + printf("\n=== Pass 2: Decoding video ===\n"); fseek(decoder.input_fp, start_pos, SEEK_SET); decoder.packets_processed = 0; // Reset statistics decoder.frames_decoded = 0; diff --git a/video_encoder/encoder_tav.c b/video_encoder/encoder_tav.c index 392d751..983edbc 100644 --- a/video_encoder/encoder_tav.c +++ b/video_encoder/encoder_tav.c @@ -12581,7 +12581,8 @@ int main(int argc, char *argv[]) { // Enforce mandatory settings enc->wavelet_filter = WAVELET_9_7_IRREVERSIBLE; // 9/7 spatial enc->decomp_levels = 4; // 4 spatial levels - enc->temporal_motion_coder = 1; // CDF 5/3 temporal + enc->temporal_motion_coder = 0; // Haar temporal + enc->encoder_preset = 0x01; // Sports mode enc->temporal_decomp_levels = 2; // 2 temporal levels enc->channel_layout = CHANNEL_LAYOUT_YCOCG; // Y-Co-Cg only enc->preprocess_mode = PREPROCESS_EZBC; // EZBC entropy coder @@ -12603,7 +12604,7 @@ int main(int argc, char *argv[]) { printf("TAV-DT: Quality level %d -> Y=%d, Co=%d, Cg=%d, TAD_quality=%d\n", enc->quality_level, enc->quantiser_y, enc->quantiser_co, enc->quantiser_cg, enc->quality_level); - printf("TAV-DT: Enforcing format constraints (9/7 spatial, 5/3 temporal, 4+2 levels, EZBC, monoblock)\n"); + printf("TAV-DT: Enforcing format constraints (9/7 spatial, Haar temporal, sports mode, 4+2 levels, EZBC, monoblock)\n"); } // Halve internal height for interlaced mode (FFmpeg will output half-height fields) @@ -12615,7 +12616,7 @@ int main(int argc, char *argv[]) { } // Smart preset for temporal motion coder based on resolution - // For small videos (<500k pixels), use CDF 5/3 (better for fine details) + // For small videos (<500k pixels), use Haar with sports preset (better for fine details) // For larger videos, use Haar (better compression, smoother motion matters less) if (enc->temporal_motion_coder == -1) { int num_pixels = enc->width * enc->height; @@ -12632,9 +12633,10 @@ int main(int argc, char *argv[]) { } } else { - enc->temporal_motion_coder = 1; // CDF 5/3 + enc->temporal_motion_coder = 0; // Haar + enc->encoder_preset |= 0x01; // Sports mode if (enc->verbose) { - printf("Auto-selected CDF 5/3 temporal wavelet (resolution: %dx%d = %d pixels, quantiser_y = %d)\n", + printf("Auto-selected Haar temporal wavelet with sports preset (resolution: %dx%d = %d pixels, quantiser_y = %d)\n", enc->width, enc->height, num_pixels, enc->quantiser_y); } } diff --git a/video_encoder/tav_video_decoder.c b/video_encoder/tav_video_decoder.c new file mode 100644 index 0000000..b1d1b55 --- /dev/null +++ b/video_encoder/tav_video_decoder.c @@ -0,0 +1,1858 @@ +// Created by CuriousTorvald and Claude on 2025-12-02. +// TAV Video Decoder Library - Shared decoding functions for TAV format +// Can be used by both regular TAV decoder and TAV-DT decoder + +#include "tav_video_decoder.h" +#include +#include +#include +#include +#include + +//============================================================================= +// Internal Constants and Macros +//============================================================================= + +#define CLAMP(x, min, max) ((x) < (min) ? (min) : ((x) > (max) ? (max) : (x))) + +// Perceptual quantisation constants (match TSVM) +static const float ANISOTROPY_MULT[] = {2.0f, 1.8f, 1.6f, 1.4f, 1.2f, 1.0f}; +static const float ANISOTROPY_BIAS[] = {0.4f, 0.2f, 0.1f, 0.0f, 0.0f, 0.0f}; +static const float ANISOTROPY_MULT_CHROMA[] = {7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f}; +static const float ANISOTROPY_BIAS_CHROMA[] = {1.0f, 0.8f, 0.6f, 0.4f, 0.2f, 0.0f, 0.0f}; +static const float FOUR_PIXEL_DETAILER = 0.88f; +static const float TWO_PIXEL_DETAILER = 0.92f; + +// Quantisation Lookup Table (matches TSVM exactly) +static const int QLUT[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128,132,136,140,144,148,152,156,160,164,168,172,176,180,184,188,192,196,200,204,208,212,216,220,224,228,232,236,240,244,248,252,256,264,272,280,288,296,304,312,320,328,336,344,352,360,368,376,384,392,400,408,416,424,432,440,448,456,464,472,480,488,496,504,512,528,544,560,576,592,608,624,640,656,672,688,704,720,736,752,768,784,800,816,832,848,864,880,896,912,928,944,960,976,992,1008,1024,1056,1088,1120,1152,1184,1216,1248,1280,1312,1344,1376,1408,1440,1472,1504,1536,1568,1600,1632,1664,1696,1728,1760,1792,1824,1856,1888,1920,1952,1984,2016,2048,2112,2176,2240,2304,2368,2432,2496,2560,2624,2688,2752,2816,2880,2944,3008,3072,3136,3200,3264,3328,3392,3456,3520,3584,3648,3712,3776,3840,3904,3968,4032,4096}; + +//============================================================================= +// Internal Structures +//============================================================================= + +// DWT subband information +typedef struct { + int level; // Decomposition level (1 to decompLevels) + int subband_type; // 0=LL, 1=LH, 2=HL, 3=HH + int coeff_start; // Starting index in linear coefficient array + int coeff_count; // Number of coefficients in this subband +} dwt_subband_info_t; + +// EZBC Block structure for quadtree +typedef struct { + int x, y; + int width, height; +} ezbc_block_t; + +// EZBC bitstream reader state +typedef struct { + const uint8_t *data; + size_t size; + size_t byte_pos; + int bit_pos; +} ezbc_bitreader_t; + +// EZBC block queues (simple dynamic arrays) +typedef struct { + ezbc_block_t *blocks; + int count; + int capacity; +} ezbc_block_queue_t; + +// Video decoder context (opaque to users) +struct tav_video_context { + tav_video_params_t params; + + // Working buffers + float *dwt_buffer_y; + float *dwt_buffer_co; + float *dwt_buffer_cg; + float *reference_ycocg_y; // For P-frame delta accumulation + float *reference_ycocg_co; + float *reference_ycocg_cg; + + // Error message buffer + char error_msg[256]; + + // Debug flag + int verbose; +}; + +//============================================================================= +// DWT Subband Layout Calculation (matches TSVM) +//============================================================================= + +static int calculate_subband_layout(int width, int height, int decomp_levels, dwt_subband_info_t *subbands) { + int subband_count = 0; + + // generate division series + int widths[decomp_levels + 1]; widths[0] = width; + int heights[decomp_levels + 1]; heights[0] = height; + + for (int i = 1; i < decomp_levels + 1; i++) { + widths[i] = (int)roundf(widths[i - 1] / 2.0f); + heights[i] = (int)roundf(heights[i - 1] / 2.0f); + } + + // LL subband at maximum decomposition level + int ll_width = widths[decomp_levels]; + int ll_height = heights[decomp_levels]; + subbands[subband_count++] = (dwt_subband_info_t){decomp_levels, 0, 0, ll_width * ll_height}; + int coeff_offset = ll_width * ll_height; + + // LH, HL, HH subbands for each level from max down to 1 + for (int level = decomp_levels; level >= 1; level--) { + int level_width = widths[decomp_levels - level + 1]; + int level_height = heights[decomp_levels - level + 1]; + const int subband_size = level_width * level_height; + + // LH subband + subbands[subband_count++] = (dwt_subband_info_t){level, 1, coeff_offset, subband_size}; + coeff_offset += subband_size; + + // HL subband + subbands[subband_count++] = (dwt_subband_info_t){level, 2, coeff_offset, subband_size}; + coeff_offset += subband_size; + + // HH subband + subbands[subband_count++] = (dwt_subband_info_t){level, 3, coeff_offset, subband_size}; + coeff_offset += subband_size; + } + + return subband_count; +} + +//============================================================================= +// Perceptual Quantisation Model (matches TSVM exactly) +//============================================================================= + +static int tav_derive_encoder_qindex(int q_index, int q_y_global) { + if (q_index > 0) return q_index - 1; + if (q_y_global >= 60) return 0; + else if (q_y_global >= 42) return 1; + else if (q_y_global >= 25) return 2; + else if (q_y_global >= 12) return 3; + else if (q_y_global >= 6) return 4; + else if (q_y_global >= 2) return 5; + else return 5; +} + +static float perceptual_model3_LH(float level) { + const float H4 = 1.2f; + const float K = 2.0f; + const float K12 = K * 12.0f; + const float x = level; + + const float Lx = H4 - ((K + 1.0f) / 15.0f) * (x - 4.0f); + const float C3 = -1.0f / 45.0f * (K12 + 92.0f); + const float G3x = (-x / 180.0f) * (K12 + 5.0f * x * x - 60.0f * x + 252.0f) - C3 + H4; + + return (level >= 4.0f) ? Lx : G3x; +} + +static float perceptual_model3_HL(int quality, float LH) { + return LH * ANISOTROPY_MULT[quality] + ANISOTROPY_BIAS[quality]; +} + +static float lerp(float x, float y, float a) { + return x * (1.0f - a) + y * a; +} + +static float perceptual_model3_HH(float LH, float HL, float level) { + const float Kx = (sqrtf(level) - 1.0f) * 0.5f + 0.5f; + return lerp(LH, HL, Kx); +} + +static float perceptual_model3_LL(float level) { + const float n = perceptual_model3_LH(level); + const float m = perceptual_model3_LH(level - 1.0f) / n; + return n / m; +} + +static float perceptual_model3_chroma_basecurve(int quality, float level) { + return 1.0f - (1.0f / (0.5f * quality * quality + 1.0f)) * (level - 4.0f); +} + +static float get_perceptual_weight(int q_index, int q_y_global, int level0, int subband_type, + int is_chroma, int max_levels) { + // Convert to perceptual level (1-6 scale) + const float level = 1.0f + ((level0 - 1.0f) / (max_levels - 1.0f)) * 5.0f; + const int quality_level = tav_derive_encoder_qindex(q_index, q_y_global); + + if (!is_chroma) { + // LUMA CHANNEL + if (subband_type == 0) { + return perceptual_model3_LL(level); + } + + const float LH = perceptual_model3_LH(level); + if (subband_type == 1) { + return LH; + } + + const float HL = perceptual_model3_HL(quality_level, LH); + if (subband_type == 2) { + float detailer = 1.0f; + if (level >= 1.8f && level <= 2.2f) detailer = TWO_PIXEL_DETAILER; + else if (level >= 2.8f && level <= 3.2f) detailer = FOUR_PIXEL_DETAILER; + return HL * detailer; + } else { + // HH subband + float detailer = 1.0f; + if (level >= 1.8f && level <= 2.2f) detailer = TWO_PIXEL_DETAILER; + else if (level >= 2.8f && level <= 3.2f) detailer = FOUR_PIXEL_DETAILER; + return perceptual_model3_HH(LH, HL, level) * detailer; + } + } else { + // CHROMA CHANNELS + const float base = perceptual_model3_chroma_basecurve(quality_level, level - 1); + if (subband_type == 0) { + return 1.0f; + } else if (subband_type == 1) { + return fmaxf(base, 1.0f); + } else if (subband_type == 2) { + return fmaxf(base * ANISOTROPY_MULT_CHROMA[quality_level], 1.0f); + } else { + return fmaxf(base * ANISOTROPY_MULT_CHROMA[quality_level] + ANISOTROPY_BIAS_CHROMA[quality_level], 1.0f); + } + } +} + +static void dequantise_dwt_subbands_perceptual(int q_index, int q_y_global, const int16_t *quantised, + float *dequantised, int width, int height, int decomp_levels, + float base_quantiser, int is_chroma) { + dwt_subband_info_t subbands[32]; // Max possible subbands + const int subband_count = calculate_subband_layout(width, height, decomp_levels, subbands); + + const int coeff_count = width * height; + memset(dequantised, 0, coeff_count * sizeof(float)); + + // Apply perceptual weighting to each subband + for (int s = 0; s < subband_count; s++) { + const dwt_subband_info_t *subband = &subbands[s]; + const float weight = get_perceptual_weight(q_index, q_y_global, subband->level, + subband->subband_type, is_chroma, decomp_levels); + const float effective_quantiser = base_quantiser * weight; + + // Apply linear dequantisation with perceptual weights + for (int i = 0; i < subband->coeff_count; i++) { + const int idx = subband->coeff_start + i; + if (idx < coeff_count) { + const float untruncated = quantised[idx] * effective_quantiser; + dequantised[idx] = untruncated; + } + } + } +} + +//============================================================================= +// Grain Synthesis (matches TSVM exactly) +//============================================================================= + +// Deterministic RNG for grain synthesis (matches encoder) +static inline uint32_t tav_grain_synthesis_rng(uint32_t frame, uint32_t band, uint32_t x, uint32_t y) { + uint32_t key = frame * 0x9e3779b9u ^ band * 0x7f4a7c15u ^ (y << 16) ^ x; + uint32_t hash = key; + hash = hash ^ (hash >> 16); + hash = hash * 0x7feb352du; + hash = hash ^ (hash >> 15); + hash = hash * 0x846ca68bu; + hash = hash ^ (hash >> 16); + return hash; +} + +// Generate triangular noise from uint32 RNG (returns value in range [-1.0, 1.0]) +static inline float tav_grain_triangular_noise(uint32_t rng_val) { + float u1 = (rng_val & 0xFFFFu) / 65535.0f; + float u2 = ((rng_val >> 16) & 0xFFFFu) / 65535.0f; + return (u1 + u2) - 1.0f; +} + +// Apply grain synthesis from DWT coefficients (decoder subtracts noise) +static void apply_grain_synthesis(float *coeffs, int width, int height, + int decomp_levels, int frame_num, int q_y_global, uint8_t encoder_preset) { + // Anime preset: completely disable grain synthesis + if (encoder_preset & 0x02) { + return; + } + + dwt_subband_info_t subbands[32]; + const int subband_count = calculate_subband_layout(width, height, decomp_levels, subbands); + + // Noise amplitude (matches Kotlin) + const float noise_amplitude = (q_y_global < 32 ? q_y_global : 32) * 0.4f; + + // Process each subband (skip LL band which is level 0) + for (int s = 0; s < subband_count; s++) { + const dwt_subband_info_t *subband = &subbands[s]; + if (subband->level == 0) continue; + + uint32_t band = subband->level + subband->subband_type * 31 + 16777619; + + for (int i = 0; i < subband->coeff_count; i++) { + const int idx = subband->coeff_start + i; + if (idx < width * height) { + int y = idx / width; + int x = idx % width; + + uint32_t rng_val = tav_grain_synthesis_rng(frame_num, band, x, y); + float noise = tav_grain_triangular_noise(rng_val); + + coeffs[idx] -= noise * noise_amplitude; + } + } + } +} + +//============================================================================= +// Significance Map Postprocessing (2-bit map format) +//============================================================================= + +// Helper: Extract 2-bit code from bit-packed array +static inline int get_twobit_code(const uint8_t *map_data, int map_bytes, int coeff_idx) { + int bit_pos = coeff_idx * 2; + int byte_idx = bit_pos / 8; + int bit_offset = bit_pos % 8; + + uint8_t byte0 = map_data[byte_idx]; + int code = (byte0 >> bit_offset) & 0x03; + + // Handle byte boundary crossing + if (bit_offset == 7 && byte_idx + 1 < map_bytes) { + uint8_t byte1 = map_data[byte_idx + 1]; + code = ((byte0 >> 7) & 0x01) | ((byte1 << 1) & 0x02); + } + + return code; +} + +// Decoder: reconstruct coefficients from 2-bit map format (entropyCoder=0) +static void postprocess_coefficients_twobit(uint8_t *compressed_data, int coeff_count, + int16_t *output_y, int16_t *output_co, int16_t *output_cg) { + int map_bytes = (coeff_count * 2 + 7) / 8; + + uint8_t *y_map = compressed_data; + uint8_t *co_map = compressed_data + map_bytes; + uint8_t *cg_map = compressed_data + map_bytes * 2; + + // Count "other" values (code 11) for each channel + int y_others = 0, co_others = 0, cg_others = 0; + for (int i = 0; i < coeff_count; i++) { + if (get_twobit_code(y_map, map_bytes, i) == 3) y_others++; + if (get_twobit_code(co_map, map_bytes, i) == 3) co_others++; + if (get_twobit_code(cg_map, map_bytes, i) == 3) cg_others++; + } + + // Value array offsets (after all maps) + uint8_t *value_ptr = compressed_data + map_bytes * 3; + int16_t *y_values = (int16_t *)value_ptr; + int16_t *co_values = (int16_t *)(value_ptr + y_others * 2); + int16_t *cg_values = (int16_t *)(value_ptr + y_others * 2 + co_others * 2); + + // Reconstruct coefficients + int y_value_idx = 0, co_value_idx = 0, cg_value_idx = 0; + + for (int i = 0; i < coeff_count; i++) { + // Y channel + int y_code = get_twobit_code(y_map, map_bytes, i); + switch (y_code) { + case 0: output_y[i] = 0; break; + case 1: output_y[i] = 1; break; + case 2: output_y[i] = -1; break; + case 3: output_y[i] = y_values[y_value_idx++]; break; + } + + // Co channel + int co_code = get_twobit_code(co_map, map_bytes, i); + switch (co_code) { + case 0: output_co[i] = 0; break; + case 1: output_co[i] = 1; break; + case 2: output_co[i] = -1; break; + case 3: output_co[i] = co_values[co_value_idx++]; break; + } + + // Cg channel + int cg_code = get_twobit_code(cg_map, map_bytes, i); + switch (cg_code) { + case 0: output_cg[i] = 0; break; + case 1: output_cg[i] = 1; break; + case 2: output_cg[i] = -1; break; + case 3: output_cg[i] = cg_values[cg_value_idx++]; break; + } + } +} + +//============================================================================= +// EZBC (Embedded Zero Block Coding) Decoder +//============================================================================= + +// Read N bits from EZBC bitstream (LSB-first within each byte) +static int ezbc_read_bits(ezbc_bitreader_t *reader, int num_bits) { + int result = 0; + for (int i = 0; i < num_bits; i++) { + if (reader->byte_pos >= reader->size) { + return result; + } + + const int bit = (reader->data[reader->byte_pos] >> reader->bit_pos) & 1; + result |= (bit << i); + + reader->bit_pos++; + if (reader->bit_pos == 8) { + reader->bit_pos = 0; + reader->byte_pos++; + } + } + return result; +} + +static void ezbc_queue_init(ezbc_block_queue_t *q) { + q->capacity = 256; + q->count = 0; + q->blocks = malloc(q->capacity * sizeof(ezbc_block_t)); +} + +static void ezbc_queue_free(ezbc_block_queue_t *q) { + free(q->blocks); + q->blocks = NULL; + q->count = 0; +} + +static void ezbc_queue_add(ezbc_block_queue_t *q, ezbc_block_t block) { + if (q->count >= q->capacity) { + q->capacity *= 2; + q->blocks = realloc(q->blocks, q->capacity * sizeof(ezbc_block_t)); + } + q->blocks[q->count++] = block; +} + +// Forward declaration +static int ezbc_process_significant_block_recursive( + ezbc_bitreader_t *reader, ezbc_block_t block, int bitplane, int threshold, + int16_t *output, int width, int8_t *significant, int *first_bitplane, + ezbc_block_queue_t *next_significant, ezbc_block_queue_t *next_insignificant); + +// EZBC recursive block decoder (matches Kotlin implementation) +static int ezbc_process_significant_block_recursive( + ezbc_bitreader_t *reader, ezbc_block_t block, int bitplane, int threshold, + int16_t *output, int width, int8_t *significant, int *first_bitplane, + ezbc_block_queue_t *next_significant, ezbc_block_queue_t *next_insignificant) { + + int sign_bits_read = 0; + + // If 1x1 block: read sign bit and add to significant queue + if (block.width == 1 && block.height == 1) { + const int idx = block.y * width + block.x; + const int sign_bit = ezbc_read_bits(reader, 1); + sign_bits_read++; + + output[idx] = sign_bit ? -threshold : threshold; + significant[idx] = 1; + first_bitplane[idx] = bitplane; + ezbc_queue_add(next_significant, block); + return sign_bits_read; + } + + // Block is > 1x1: subdivide and recursively process children + int mid_x = block.width / 2; + int mid_y = block.height / 2; + if (mid_x == 0) mid_x = 1; + if (mid_y == 0) mid_y = 1; + + // Top-left child + ezbc_block_t tl = {block.x, block.y, mid_x, mid_y}; + const int tl_flag = ezbc_read_bits(reader, 1); + if (tl_flag) { + sign_bits_read += ezbc_process_significant_block_recursive( + reader, tl, bitplane, threshold, output, width, significant, first_bitplane, + next_significant, next_insignificant); + } else { + ezbc_queue_add(next_insignificant, tl); + } + + // Top-right child (if exists) + if (block.width > mid_x) { + ezbc_block_t tr = {block.x + mid_x, block.y, block.width - mid_x, mid_y}; + const int tr_flag = ezbc_read_bits(reader, 1); + if (tr_flag) { + sign_bits_read += ezbc_process_significant_block_recursive( + reader, tr, bitplane, threshold, output, width, significant, first_bitplane, + next_significant, next_insignificant); + } else { + ezbc_queue_add(next_insignificant, tr); + } + } + + // Bottom-left child (if exists) + if (block.height > mid_y) { + ezbc_block_t bl = {block.x, block.y + mid_y, mid_x, block.height - mid_y}; + const int bl_flag = ezbc_read_bits(reader, 1); + if (bl_flag) { + sign_bits_read += ezbc_process_significant_block_recursive( + reader, bl, bitplane, threshold, output, width, significant, first_bitplane, + next_significant, next_insignificant); + } else { + ezbc_queue_add(next_insignificant, bl); + } + } + + // Bottom-right child (if exists) + if (block.width > mid_x && block.height > mid_y) { + ezbc_block_t br = {block.x + mid_x, block.y + mid_y, block.width - mid_x, block.height - mid_y}; + const int br_flag = ezbc_read_bits(reader, 1); + if (br_flag) { + sign_bits_read += ezbc_process_significant_block_recursive( + reader, br, bitplane, threshold, output, width, significant, first_bitplane, + next_significant, next_insignificant); + } else { + ezbc_queue_add(next_insignificant, br); + } + } + + return sign_bits_read; +} + +// Decode a single channel with EZBC +static void decode_channel_ezbc(const uint8_t *ezbc_data, size_t offset, size_t size, + int16_t *output, int expected_count) { + ezbc_bitreader_t reader = {ezbc_data, offset + size, offset, 0}; + + // Read header: MSB bitplane (8 bits), width (16 bits), height (16 bits) + const int msb_bitplane = ezbc_read_bits(&reader, 8); + const int width = ezbc_read_bits(&reader, 16); + const int height = ezbc_read_bits(&reader, 16); + + const int actual_count = width * height; + if (actual_count > expected_count) { + memset(output, 0, expected_count * sizeof(int16_t)); + return; + } + + expected_count = actual_count; + + // Initialise output and state tracking + memset(output, 0, expected_count * sizeof(int16_t)); + int8_t *significant = calloc(expected_count, sizeof(int8_t)); + int *first_bitplane = calloc(expected_count, sizeof(int)); + + // Initialise queues + ezbc_block_queue_t insignificant, next_insignificant, significant_queue, next_significant; + ezbc_queue_init(&insignificant); + ezbc_queue_init(&next_insignificant); + ezbc_queue_init(&significant_queue); + ezbc_queue_init(&next_significant); + + // Start with root block + ezbc_block_t root = {0, 0, width, height}; + ezbc_queue_add(&insignificant, root); + + // Process bitplanes from MSB to LSB + for (int bitplane = msb_bitplane; bitplane >= 0; bitplane--) { + const int threshold = 1 << bitplane; + + // Process insignificant blocks + for (int i = 0; i < insignificant.count; i++) { + const int flag = ezbc_read_bits(&reader, 1); + + if (flag == 0) { + ezbc_queue_add(&next_insignificant, insignificant.blocks[i]); + } else { + ezbc_process_significant_block_recursive( + &reader, insignificant.blocks[i], bitplane, threshold, + output, width, significant, first_bitplane, + &next_significant, &next_insignificant); + } + } + + // Process significant 1x1 blocks (refinement) + for (int i = 0; i < significant_queue.count; i++) { + ezbc_block_t block = significant_queue.blocks[i]; + const int idx = block.y * width + block.x; + const int refine_bit = ezbc_read_bits(&reader, 1); + + if (refine_bit) { + const int bit_value = 1 << bitplane; + if (output[idx] < 0) { + output[idx] -= bit_value; + } else { + output[idx] += bit_value; + } + } + + ezbc_queue_add(&next_significant, block); + } + + // Swap queues + ezbc_block_queue_t temp_insig = insignificant; + insignificant = next_insignificant; + next_insignificant = temp_insig; + next_insignificant.count = 0; + + ezbc_block_queue_t temp_sig = significant_queue; + significant_queue = next_significant; + next_significant = temp_sig; + next_significant.count = 0; + } + + // Cleanup + free(significant); + free(first_bitplane); + ezbc_queue_free(&insignificant); + ezbc_queue_free(&next_insignificant); + ezbc_queue_free(&significant_queue); + ezbc_queue_free(&next_significant); +} + +// Helper: peek at EZBC header to get dimensions without decoding +static int ezbc_peek_dimensions(const uint8_t *compressed_data, int channel_layout, + int *out_width, int *out_height) { + const int has_y = (channel_layout & 0x04) == 0; + + if (!has_y) { + return -1; + } + + const uint32_t size = ((uint32_t)compressed_data[0]) | + ((uint32_t)compressed_data[1] << 8) | + ((uint32_t)compressed_data[2] << 16) | + ((uint32_t)compressed_data[3] << 24); + + if (size < 6) { + return -1; + } + + const uint8_t *ezbc_data = compressed_data + 4; + + ezbc_bitreader_t reader; + reader.data = ezbc_data; + reader.size = size; + reader.byte_pos = 0; + reader.bit_pos = 0; + + ezbc_read_bits(&reader, 8); // Skip MSB bitplane + *out_width = ezbc_read_bits(&reader, 16); + *out_height = ezbc_read_bits(&reader, 16); + + return 0; +} + +// EZBC postprocessing for single frames +static void postprocess_coefficients_ezbc(uint8_t *compressed_data, int coeff_count, + int16_t *output_y, int16_t *output_co, int16_t *output_cg, + int channel_layout) { + const int has_y = (channel_layout & 0x04) == 0; + const int has_co = (channel_layout & 0x02) == 0; + const int has_cg = (channel_layout & 0x02) == 0; + + int offset = 0; + + // Decode Y channel + if (has_y && output_y) { + const uint32_t size = ((uint32_t)compressed_data[offset + 0]) | + ((uint32_t)compressed_data[offset + 1] << 8) | + ((uint32_t)compressed_data[offset + 2] << 16) | + ((uint32_t)compressed_data[offset + 3] << 24); + offset += 4; + decode_channel_ezbc(compressed_data, offset, size, output_y, coeff_count); + offset += size; + } + + // Decode Co channel + if (has_co && output_co) { + const uint32_t size = ((uint32_t)compressed_data[offset + 0]) | + ((uint32_t)compressed_data[offset + 1] << 8) | + ((uint32_t)compressed_data[offset + 2] << 16) | + ((uint32_t)compressed_data[offset + 3] << 24); + offset += 4; + decode_channel_ezbc(compressed_data, offset, size, output_co, coeff_count); + offset += size; + } + + // Decode Cg channel + if (has_cg && output_cg) { + const uint32_t size = ((uint32_t)compressed_data[offset + 0]) | + ((uint32_t)compressed_data[offset + 1] << 8) | + ((uint32_t)compressed_data[offset + 2] << 16) | + ((uint32_t)compressed_data[offset + 3] << 24); + offset += 4; + decode_channel_ezbc(compressed_data, offset, size, output_cg, coeff_count); + offset += size; + } +} + +//============================================================================= +// DWT Inverse Transforms (matches TSVM) +//============================================================================= + +// 9/7 inverse DWT (from TSVM Kotlin code) +static void dwt_97_inverse_1d(float *data, int length) { + if (length < 2) return; + + float *temp = malloc(length * sizeof(float)); + int half = (length + 1) / 2; + + // Split into low and high frequency components + for (int i = 0; i < half; i++) { + temp[i] = data[i]; + } + for (int i = 0; i < length / 2; i++) { + if (half + i < length) { + temp[half + i] = data[half + i]; + } + } + + // 9/7 inverse lifting coefficients + const float alpha = -1.586134342f; + const float beta = -0.052980118f; + const float gamma = 0.882911076f; + const float delta = 0.443506852f; + const float K = 1.230174105f; + + // Step 1: Undo scaling + for (int i = 0; i < half; i++) { + temp[i] /= K; + } + for (int i = 0; i < length / 2; i++) { + if (half + i < length) { + temp[half + i] *= K; + } + } + + // Step 2: Undo δ update + for (int i = 0; i < half; i++) { + float d_curr = (half + i < length) ? temp[half + i] : 0.0f; + float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr; + temp[i] -= delta * (d_curr + d_prev); + } + + // Step 3: Undo γ predict + for (int i = 0; i < length / 2; i++) { + if (half + i < length) { + float s_curr = temp[i]; + float s_next = (i + 1 < half) ? temp[i + 1] : s_curr; + temp[half + i] -= gamma * (s_curr + s_next); + } + } + + // Step 4: Undo β update + for (int i = 0; i < half; i++) { + float d_curr = (half + i < length) ? temp[half + i] : 0.0f; + float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr; + temp[i] -= beta * (d_curr + d_prev); + } + + // Step 5: Undo α predict + for (int i = 0; i < length / 2; i++) { + if (half + i < length) { + float s_curr = temp[i]; + float s_next = (i + 1 < half) ? temp[i + 1] : s_curr; + temp[half + i] -= alpha * (s_curr + s_next); + } + } + + // Reconstruction - interleave low and high pass + for (int i = 0; i < length; i++) { + if (i % 2 == 0) { + data[i] = temp[i / 2]; + } else { + int idx = i / 2; + if (half + idx < length) { + data[i] = temp[half + idx]; + } else { + data[i] = 0.0f; + } + } + } + + free(temp); +} + +// 5/3 inverse DWT using lifting scheme +static void dwt_53_inverse_1d(float *data, int length) { + if (length < 2) return; + + float *temp = malloc(length * sizeof(float)); + int half = (length + 1) / 2; + + memcpy(temp, data, length * sizeof(float)); + + // Undo update step + for (int i = 0; i < half; i++) { + float update = 0.25f * ((i > 0 ? temp[half + i - 1] : 0) + + (i < half - 1 ? temp[half + i] : 0)); + temp[i] -= update; + } + + // Undo predict step and interleave + for (int i = 0; i < half; i++) { + data[2 * i] = temp[i]; + int idx = 2 * i + 1; + if (idx < length) { + float pred = 0.5f * (temp[i] + (i < half - 1 ? temp[i + 1] : temp[i])); + data[idx] = temp[half + i] + pred; + } + } + + free(temp); +} + +// CDF 13/7 inverse DWT +static void dwt_cdf137_inverse_1d(float *data, int length) { + if (length < 2) return; + + float *temp = malloc(sizeof(float) * length); + int half = (length + 1) / 2; + + int nE = half; + int nO = length / 2; + + float *even = temp; + float *odd = temp + nE; + + // Load L and H + for (int i = 0; i < nE; i++) { + even[i] = data[i]; + } + for (int i = 0; i < nO; i++) { + odd[i] = data[half + i]; + } + + // Inverse update + for (int i = 0; i < nE; i++) { + float d = (i < nO) ? odd[i] : 0.0f; + even[i] = even[i] - 0.25f * d; + } + + // Inverse predict + for (int i = 0; i < nO; i++) { + odd[i] = odd[i] + 0.5f * even[i]; + } + + // Interleave + for (int i = 0; i < nO; i++) { + data[2 * i] = even[i]; + data[2 * i + 1] = odd[i]; + } + if (nE > nO) { + data[2 * nO] = even[nO]; + } + + free(temp); +} + +// DD-4 inverse DWT +static void dwt_dd4_inverse_1d(float *data, int length) { + if (length < 2) return; + + float *temp = malloc(length * sizeof(float)); + int half = (length + 1) / 2; + + memcpy(temp, data, length * sizeof(float)); + + // DD-4 inverse lifting + for (int i = 0; i < half; i++) { + float update = 0.25f * ((i > 0 ? temp[half + i - 1] : 0) + + (i < half - 1 ? temp[half + i] : 0)); + temp[i] -= update; + } + + for (int i = 0; i < half; i++) { + data[2 * i] = temp[i]; + int idx = 2 * i + 1; + if (idx < length) { + float pred = 0.5f * (temp[i] + (i < half - 1 ? temp[i + 1] : temp[i])); + data[idx] = temp[half + i] + pred; + } + } + + free(temp); +} + +// Haar inverse DWT +static void dwt_haar_inverse_1d(float *data, int length) { + if (length < 2) return; + + float *temp = malloc(length * sizeof(float)); + const int half = (length + 1) / 2; + + for (int i = 0; i < half; i++) { + if (2 * i + 1 < length) { + temp[2 * i] = data[i] + data[half + i]; + temp[2 * i + 1] = data[i] - data[half + i]; + } else { + temp[2 * i] = data[i]; + } + } + + for (int i = 0; i < length; i++) { + data[i] = temp[i]; + } + + free(temp); +} + +// Multi-level inverse DWT +static void apply_inverse_dwt_multilevel(float *data, int width, int height, int levels, int filter_type) { + int max_size = (width > height) ? width : height; + float *temp_row = malloc(max_size * sizeof(float)); + float *temp_col = malloc(max_size * sizeof(float)); + + // Pre-calculate exact sequence of widths/heights + int *widths = malloc((levels + 1) * sizeof(int)); + int *heights = malloc((levels + 1) * sizeof(int)); + + widths[0] = width; + heights[0] = height; + for (int i = 1; i <= levels; i++) { + widths[i] = (widths[i - 1] + 1) / 2; + heights[i] = (heights[i - 1] + 1) / 2; + } + + // Apply inverse transforms + for (int level = levels - 1; level >= 0; level--) { + int current_width = widths[level]; + int current_height = heights[level]; + + if (current_width < 1 || current_height < 1) continue; + if (current_width == 1 && current_height == 1) continue; + + // Column inverse transform first (vertical) + for (int x = 0; x < current_width; x++) { + for (int y = 0; y < current_height; y++) { + temp_col[y] = data[y * width + x]; + } + + if (filter_type == 0) { + dwt_53_inverse_1d(temp_col, current_height); + } else if (filter_type == 1) { + dwt_97_inverse_1d(temp_col, current_height); + } else if (filter_type == 2) { + dwt_cdf137_inverse_1d(temp_col, current_height); + } else if (filter_type == 16) { + dwt_dd4_inverse_1d(temp_col, current_height); + } else if (filter_type == 255) { + dwt_haar_inverse_1d(temp_col, current_height); + } + + for (int y = 0; y < current_height; y++) { + data[y * width + x] = temp_col[y]; + } + } + + // Row inverse transform second (horizontal) + for (int y = 0; y < current_height; y++) { + for (int x = 0; x < current_width; x++) { + temp_row[x] = data[y * width + x]; + } + + if (filter_type == 0) { + dwt_53_inverse_1d(temp_row, current_width); + } else if (filter_type == 1) { + dwt_97_inverse_1d(temp_row, current_width); + } else if (filter_type == 2) { + dwt_cdf137_inverse_1d(temp_row, current_width); + } else if (filter_type == 16) { + dwt_dd4_inverse_1d(temp_row, current_width); + } else if (filter_type == 255) { + dwt_haar_inverse_1d(temp_row, current_width); + } + + for (int x = 0; x < current_width; x++) { + data[y * width + x] = temp_row[x]; + } + } + } + + free(widths); + free(heights); + free(temp_row); + free(temp_col); +} + +//============================================================================= +// Temporal DWT Functions +//============================================================================= + +// Get temporal subband level for a given frame index in a GOP +static int get_temporal_subband_level(int frame_idx, int num_frames, int temporal_levels) { + for (int level = 0; level < temporal_levels; level++) { + int frames_at_this_level = num_frames >> (temporal_levels - level); + if (frame_idx < frames_at_this_level) { + return level; + } + } + return temporal_levels; +} + +// Calculate temporal quantiser scale for a given temporal subband level +static float get_temporal_quantiser_scale(uint8_t encoder_preset, int temporal_level) { + const float BETA = (encoder_preset & 0x01) ? 0.0f : 0.6f; + const float KAPPA = (encoder_preset & 0x01) ? 1.0f : 1.14f; + return powf(2.0f, BETA * powf(temporal_level, KAPPA)); +} + +// Apply inverse 3D DWT to GOP data (spatial + temporal) +static void apply_inverse_3d_dwt(float **gop_y, float **gop_co, float **gop_cg, + int width, int height, int gop_size, + int spatial_levels, int temporal_levels, int filter_type, + int temporal_wavelet) { + // Step 1: Apply inverse 2D spatial DWT to each frame + for (int t = 0; t < gop_size; t++) { + apply_inverse_dwt_multilevel(gop_y[t], width, height, spatial_levels, filter_type); + apply_inverse_dwt_multilevel(gop_co[t], width, height, spatial_levels, filter_type); + apply_inverse_dwt_multilevel(gop_cg[t], width, height, spatial_levels, filter_type); + } + + // Step 2: Apply inverse temporal DWT + if (gop_size < 2) return; + + int *temporal_lengths = malloc((temporal_levels + 1) * sizeof(int)); + temporal_lengths[0] = gop_size; + for (int i = 1; i <= temporal_levels; i++) { + temporal_lengths[i] = (temporal_lengths[i - 1] + 1) / 2; + } + + float *temporal_line = malloc(gop_size * sizeof(float)); + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + const int pixel_idx = y * width + x; + + // Process Y channel + for (int t = 0; t < gop_size; t++) { + temporal_line[t] = gop_y[t][pixel_idx]; + } + for (int level = temporal_levels - 1; level >= 0; level--) { + const int level_frames = temporal_lengths[level]; + if (level_frames >= 2) { + if (temporal_wavelet == 0) { + dwt_haar_inverse_1d(temporal_line, level_frames); + } else { + dwt_53_inverse_1d(temporal_line, level_frames); + } + } + } + for (int t = 0; t < gop_size; t++) { + gop_y[t][pixel_idx] = temporal_line[t]; + } + + // Process Co channel + for (int t = 0; t < gop_size; t++) { + temporal_line[t] = gop_co[t][pixel_idx]; + } + for (int level = temporal_levels - 1; level >= 0; level--) { + const int level_frames = temporal_lengths[level]; + if (level_frames >= 2) { + if (temporal_wavelet == 0) { + dwt_haar_inverse_1d(temporal_line, level_frames); + } else { + dwt_53_inverse_1d(temporal_line, level_frames); + } + } + } + for (int t = 0; t < gop_size; t++) { + gop_co[t][pixel_idx] = temporal_line[t]; + } + + // Process Cg channel + for (int t = 0; t < gop_size; t++) { + temporal_line[t] = gop_cg[t][pixel_idx]; + } + for (int level = temporal_levels - 1; level >= 0; level--) { + const int level_frames = temporal_lengths[level]; + if (level_frames >= 2) { + if (temporal_wavelet == 0) { + dwt_haar_inverse_1d(temporal_line, level_frames); + } else { + dwt_53_inverse_1d(temporal_line, level_frames); + } + } + } + for (int t = 0; t < gop_size; t++) { + gop_cg[t][pixel_idx] = temporal_line[t]; + } + } + } + + free(temporal_line); + free(temporal_lengths); +} + +//============================================================================= +// GOP Postprocessing Functions +//============================================================================= + +// Postprocess GOP unified block (2-bit map format) +static int16_t ***postprocess_gop_unified(const uint8_t *decompressed_data, size_t data_size, + int gop_size, int num_pixels, int channel_layout) { + const int map_bytes_per_frame = (num_pixels * 2 + 7) / 8; + + const int has_y = (channel_layout & 0x04) == 0; + const int has_co = (channel_layout & 0x02) == 0; + const int has_cg = (channel_layout & 0x02) == 0; + + int read_ptr = 0; + const int y_maps_start = has_y ? read_ptr : -1; + if (has_y) read_ptr += map_bytes_per_frame * gop_size; + + const int co_maps_start = has_co ? read_ptr : -1; + if (has_co) read_ptr += map_bytes_per_frame * gop_size; + + const int cg_maps_start = has_cg ? read_ptr : -1; + if (has_cg) read_ptr += map_bytes_per_frame * gop_size; + + // Count "other" values + int y_other_count = 0, co_other_count = 0, cg_other_count = 0; + + for (int frame = 0; frame < gop_size; frame++) { + const int frame_map_offset = frame * map_bytes_per_frame; + for (int i = 0; i < num_pixels; i++) { + const int bit_pos = i * 2; + const int byte_idx = bit_pos / 8; + const int bit_offset = bit_pos % 8; + + if (has_y && y_maps_start + frame_map_offset + byte_idx < (int)data_size) { + int code = (decompressed_data[y_maps_start + frame_map_offset + byte_idx] >> bit_offset) & 0x03; + if (bit_offset == 7 && byte_idx + 1 < map_bytes_per_frame) { + const int next_byte = decompressed_data[y_maps_start + frame_map_offset + byte_idx + 1] & 0xFF; + code = (code & 0x01) | ((next_byte & 0x01) << 1); + } + if (code == 3) y_other_count++; + } + if (has_co && co_maps_start + frame_map_offset + byte_idx < (int)data_size) { + int code = (decompressed_data[co_maps_start + frame_map_offset + byte_idx] >> bit_offset) & 0x03; + if (bit_offset == 7 && byte_idx + 1 < map_bytes_per_frame) { + const int next_byte = decompressed_data[co_maps_start + frame_map_offset + byte_idx + 1] & 0xFF; + code = (code & 0x01) | ((next_byte & 0x01) << 1); + } + if (code == 3) co_other_count++; + } + if (has_cg && cg_maps_start + frame_map_offset + byte_idx < (int)data_size) { + int code = (decompressed_data[cg_maps_start + frame_map_offset + byte_idx] >> bit_offset) & 0x03; + if (bit_offset == 7 && byte_idx + 1 < map_bytes_per_frame) { + const int next_byte = decompressed_data[cg_maps_start + frame_map_offset + byte_idx + 1] & 0xFF; + code = (code & 0x01) | ((next_byte & 0x01) << 1); + } + if (code == 3) cg_other_count++; + } + } + } + + const int y_values_start = read_ptr; + read_ptr += y_other_count * 2; + + const int co_values_start = read_ptr; + read_ptr += co_other_count * 2; + + const int cg_values_start = read_ptr; + + // Allocate output arrays + int16_t ***output = malloc(gop_size * sizeof(int16_t **)); + for (int t = 0; t < gop_size; t++) { + output[t] = malloc(3 * sizeof(int16_t *)); + output[t][0] = calloc(num_pixels, sizeof(int16_t)); + output[t][1] = calloc(num_pixels, sizeof(int16_t)); + output[t][2] = calloc(num_pixels, sizeof(int16_t)); + } + + int y_value_idx = 0, co_value_idx = 0, cg_value_idx = 0; + + for (int frame = 0; frame < gop_size; frame++) { + const int frame_map_offset = frame * map_bytes_per_frame; + for (int i = 0; i < num_pixels; i++) { + const int bit_pos = i * 2; + const int byte_idx = bit_pos / 8; + const int bit_offset = bit_pos % 8; + + // Decode Y + if (has_y && y_maps_start + frame_map_offset + byte_idx < (int)data_size) { + int code = (decompressed_data[y_maps_start + frame_map_offset + byte_idx] >> bit_offset) & 0x03; + if (bit_offset == 7 && byte_idx + 1 < map_bytes_per_frame) { + const int next_byte = decompressed_data[y_maps_start + frame_map_offset + byte_idx + 1] & 0xFF; + code = (code & 0x01) | ((next_byte & 0x01) << 1); + } + if (code == 0) { + output[frame][0][i] = 0; + } else if (code == 1) { + output[frame][0][i] = 1; + } else if (code == 2) { + output[frame][0][i] = -1; + } else { + const int val_offset = y_values_start + y_value_idx * 2; + y_value_idx++; + if (val_offset + 1 < (int)data_size) { + const int lo = decompressed_data[val_offset] & 0xFF; + const int hi = (int8_t)decompressed_data[val_offset + 1]; + output[frame][0][i] = (int16_t)((hi << 8) | lo); + } else { + output[frame][0][i] = 0; + } + } + } + + // Decode Co + if (has_co && co_maps_start + frame_map_offset + byte_idx < (int)data_size) { + int code = (decompressed_data[co_maps_start + frame_map_offset + byte_idx] >> bit_offset) & 0x03; + if (bit_offset == 7 && byte_idx + 1 < map_bytes_per_frame) { + const int next_byte = decompressed_data[co_maps_start + frame_map_offset + byte_idx + 1] & 0xFF; + code = (code & 0x01) | ((next_byte & 0x01) << 1); + } + if (code == 0) { + output[frame][1][i] = 0; + } else if (code == 1) { + output[frame][1][i] = 1; + } else if (code == 2) { + output[frame][1][i] = -1; + } else { + const int val_offset = co_values_start + co_value_idx * 2; + co_value_idx++; + if (val_offset + 1 < (int)data_size) { + const int lo = decompressed_data[val_offset] & 0xFF; + const int hi = (int8_t)decompressed_data[val_offset + 1]; + output[frame][1][i] = (int16_t)((hi << 8) | lo); + } else { + output[frame][1][i] = 0; + } + } + } + + // Decode Cg + if (has_cg && cg_maps_start + frame_map_offset + byte_idx < (int)data_size) { + int code = (decompressed_data[cg_maps_start + frame_map_offset + byte_idx] >> bit_offset) & 0x03; + if (bit_offset == 7 && byte_idx + 1 < map_bytes_per_frame) { + const int next_byte = decompressed_data[cg_maps_start + frame_map_offset + byte_idx + 1] & 0xFF; + code = (code & 0x01) | ((next_byte & 0x01) << 1); + } + if (code == 0) { + output[frame][2][i] = 0; + } else if (code == 1) { + output[frame][2][i] = 1; + } else if (code == 2) { + output[frame][2][i] = -1; + } else { + const int val_offset = cg_values_start + cg_value_idx * 2; + cg_value_idx++; + if (val_offset + 1 < (int)data_size) { + const int lo = decompressed_data[val_offset] & 0xFF; + const int hi = (int8_t)decompressed_data[val_offset + 1]; + output[frame][2][i] = (int16_t)((hi << 8) | lo); + } else { + output[frame][2][i] = 0; + } + } + } + } + } + + return output; +} + +// Postprocess GOP RAW format +static int16_t ***postprocess_gop_raw(const uint8_t *decompressed_data, size_t data_size, + int gop_size, int num_pixels, int channel_layout) { + const int has_y = (channel_layout & 0x04) == 0; + const int has_co = (channel_layout & 0x02) == 0; + const int has_cg = (channel_layout & 0x02) == 0; + + int16_t ***output = malloc(gop_size * sizeof(int16_t **)); + for (int t = 0; t < gop_size; t++) { + output[t] = malloc(3 * sizeof(int16_t *)); + output[t][0] = calloc(num_pixels, sizeof(int16_t)); + output[t][1] = calloc(num_pixels, sizeof(int16_t)); + output[t][2] = calloc(num_pixels, sizeof(int16_t)); + } + + int offset = 0; + + if (has_y) { + const int channel_size = gop_size * num_pixels * sizeof(int16_t); + if (offset + channel_size > (int)data_size) { + goto error_cleanup; + } + const int16_t *y_data = (const int16_t *)(decompressed_data + offset); + for (int t = 0; t < gop_size; t++) { + memcpy(output[t][0], y_data + t * num_pixels, num_pixels * sizeof(int16_t)); + } + offset += channel_size; + } + + if (has_co) { + const int channel_size = gop_size * num_pixels * sizeof(int16_t); + if (offset + channel_size > (int)data_size) { + goto error_cleanup; + } + const int16_t *co_data = (const int16_t *)(decompressed_data + offset); + for (int t = 0; t < gop_size; t++) { + memcpy(output[t][1], co_data + t * num_pixels, num_pixels * sizeof(int16_t)); + } + offset += channel_size; + } + + if (has_cg) { + const int channel_size = gop_size * num_pixels * sizeof(int16_t); + if (offset + channel_size > (int)data_size) { + goto error_cleanup; + } + const int16_t *cg_data = (const int16_t *)(decompressed_data + offset); + for (int t = 0; t < gop_size; t++) { + memcpy(output[t][2], cg_data + t * num_pixels, num_pixels * sizeof(int16_t)); + } + offset += channel_size; + } + + return output; + +error_cleanup: + for (int t = 0; t < gop_size; t++) { + free(output[t][0]); + free(output[t][1]); + free(output[t][2]); + free(output[t]); + } + free(output); + return NULL; +} + +// Postprocess GOP EZBC format +static int16_t ***postprocess_gop_ezbc(const uint8_t *decompressed_data, size_t data_size, + int gop_size, int num_pixels, int channel_layout, + int *out_width, int *out_height) { + int actual_width = 0, actual_height = 0; + int actual_pixels = num_pixels; + + if (data_size >= 8) { + const uint32_t first_frame_size = ((uint32_t)decompressed_data[0]) | + ((uint32_t)decompressed_data[1] << 8) | + ((uint32_t)decompressed_data[2] << 16) | + ((uint32_t)decompressed_data[3] << 24); + + if (4 + first_frame_size <= data_size) { + if (ezbc_peek_dimensions(decompressed_data + 4, channel_layout, + &actual_width, &actual_height) == 0) { + actual_pixels = actual_width * actual_height; + } + } + } + + if (actual_width == 0 || actual_height == 0) { + actual_width = (int)sqrt(num_pixels); + actual_height = num_pixels / actual_width; + actual_pixels = actual_width * actual_height; + } + + if (out_width) *out_width = actual_width; + if (out_height) *out_height = actual_height; + + int16_t ***output = malloc(gop_size * sizeof(int16_t **)); + for (int t = 0; t < gop_size; t++) { + output[t] = malloc(3 * sizeof(int16_t *)); + output[t][0] = calloc(actual_pixels, sizeof(int16_t)); + output[t][1] = calloc(actual_pixels, sizeof(int16_t)); + output[t][2] = calloc(actual_pixels, sizeof(int16_t)); + } + + int offset = 0; + + for (int t = 0; t < gop_size; t++) { + if (offset + 4 > (int)data_size) { + goto error_cleanup; + } + + const uint32_t frame_size = ((uint32_t)decompressed_data[offset + 0]) | + ((uint32_t)decompressed_data[offset + 1] << 8) | + ((uint32_t)decompressed_data[offset + 2] << 16) | + ((uint32_t)decompressed_data[offset + 3] << 24); + offset += 4; + + if (offset + frame_size > data_size) { + goto error_cleanup; + } + + postprocess_coefficients_ezbc( + (uint8_t *)(decompressed_data + offset), actual_pixels, + output[t][0], output[t][1], output[t][2], + channel_layout); + + offset += frame_size; + } + + return output; + +error_cleanup: + for (int t = 0; t < gop_size; t++) { + free(output[t][0]); + free(output[t][1]); + free(output[t][2]); + free(output[t]); + } + free(output); + return NULL; +} + +//============================================================================= +// Color Conversion +//============================================================================= + +static void ycocgr_to_rgb(float y, float co, float cg, uint8_t *r, uint8_t *g, uint8_t *b) { + float tmp = y - cg / 2.0f; + float g_val = cg + tmp; + float b_val = tmp - co / 2.0f; + float r_val = co + b_val; + + *r = CLAMP(roundf(r_val), 0, 255); + *g = CLAMP(roundf(g_val), 0, 255); + *b = CLAMP(roundf(b_val), 0, 255); +} + +static void ictcp_to_rgb(float i, float ct, float cp, uint8_t *r, uint8_t *g, uint8_t *b) { + float l = i + 0.008609f * ct; + float m = i - 0.008609f * ct; + float s = i + 0.560031f * cp; + + l = powf(fmaxf(l, 0.0f), 1.0f / 0.1593f); + m = powf(fmaxf(m, 0.0f), 1.0f / 0.1593f); + s = powf(fmaxf(s, 0.0f), 1.0f / 0.1593f); + + float r_val = 5.432622f * l - 4.679910f * m + 0.247288f * s; + float g_val = -1.106160f * l + 2.311198f * m - 0.205038f * s; + float b_val = 0.028262f * l - 0.195689f * m + 1.167427f * s; + + *r = CLAMP((int)(r_val * 255.0f + 0.5f), 0, 255); + *g = CLAMP((int)(g_val * 255.0f + 0.5f), 0, 255); + *b = CLAMP((int)(b_val * 255.0f + 0.5f), 0, 255); +} + +//============================================================================= +// Public API Implementation +//============================================================================= + +tav_video_context_t *tav_video_create(const tav_video_params_t *params) { + if (!params) return NULL; + + tav_video_context_t *ctx = calloc(1, sizeof(tav_video_context_t)); + if (!ctx) return NULL; + + ctx->params = *params; + ctx->verbose = 0; + + const int buffer_size = params->width * params->height; + + // Allocate working buffers + ctx->dwt_buffer_y = calloc(buffer_size, sizeof(float)); + ctx->dwt_buffer_co = calloc(buffer_size, sizeof(float)); + ctx->dwt_buffer_cg = calloc(buffer_size, sizeof(float)); + ctx->reference_ycocg_y = calloc(buffer_size, sizeof(float)); + ctx->reference_ycocg_co = calloc(buffer_size, sizeof(float)); + ctx->reference_ycocg_cg = calloc(buffer_size, sizeof(float)); + + if (!ctx->dwt_buffer_y || !ctx->dwt_buffer_co || !ctx->dwt_buffer_cg || + !ctx->reference_ycocg_y || !ctx->reference_ycocg_co || !ctx->reference_ycocg_cg) { + tav_video_free(ctx); + return NULL; + } + + snprintf(ctx->error_msg, sizeof(ctx->error_msg), "No error"); + return ctx; +} + +void tav_video_free(tav_video_context_t *ctx) { + if (!ctx) return; + + free(ctx->dwt_buffer_y); + free(ctx->dwt_buffer_co); + free(ctx->dwt_buffer_cg); + free(ctx->reference_ycocg_y); + free(ctx->reference_ycocg_co); + free(ctx->reference_ycocg_cg); + free(ctx); +} + +int tav_video_decode_gop(tav_video_context_t *ctx, + const uint8_t *compressed_data, uint32_t compressed_size, + uint8_t gop_size, uint8_t **rgb_frames) { + if (!ctx || !compressed_data || !rgb_frames) { + if (ctx) snprintf(ctx->error_msg, sizeof(ctx->error_msg), "Invalid parameters"); + return -1; + } + + const int width = ctx->params.width; + const int height = ctx->params.height; + const int num_pixels = width * height; + + // Decompress with Zstd + const size_t decompressed_bound = ZSTD_getFrameContentSize(compressed_data, compressed_size); + if (ZSTD_isError(decompressed_bound)) { + snprintf(ctx->error_msg, sizeof(ctx->error_msg), "Zstd decompression failed"); + return -1; + } + + uint8_t *decompressed_data = malloc(decompressed_bound); + if (!decompressed_data) { + snprintf(ctx->error_msg, sizeof(ctx->error_msg), "Memory allocation failed"); + return -1; + } + + const size_t decompressed_size = ZSTD_decompress(decompressed_data, decompressed_bound, + compressed_data, compressed_size); + if (ZSTD_isError(decompressed_size)) { + free(decompressed_data); + snprintf(ctx->error_msg, sizeof(ctx->error_msg), "Zstd decompression failed"); + return -1; + } + + // Postprocess GOP data based on entropy coder type + int16_t ***gop_coeffs = NULL; + int actual_width = width; + int actual_height = height; + + if (ctx->params.entropy_coder == 0) { + gop_coeffs = postprocess_gop_unified(decompressed_data, decompressed_size, gop_size, num_pixels, ctx->params.channel_layout); + } else if (ctx->params.entropy_coder == 1) { + gop_coeffs = postprocess_gop_ezbc(decompressed_data, decompressed_size, gop_size, num_pixels, ctx->params.channel_layout, &actual_width, &actual_height); + } else if (ctx->params.entropy_coder == 2) { + gop_coeffs = postprocess_gop_raw(decompressed_data, decompressed_size, gop_size, num_pixels, ctx->params.channel_layout); + } + + free(decompressed_data); + + if (!gop_coeffs) { + snprintf(ctx->error_msg, sizeof(ctx->error_msg), "GOP postprocessing failed"); + return -1; + } + + // Use actual dimensions from EZBC data (may differ from params for interlaced content) + int final_width = width; + int final_height = height; + int final_num_pixels = num_pixels; + + if (actual_width != 0 && actual_height != 0) { + if (actual_width != width || actual_height != height) { + if (ctx->verbose) { + fprintf(stderr, "Warning: EZBC dimensions (%dx%d) differ from params (%dx%d), using EZBC dimensions\n", + actual_width, actual_height, width, height); + } + } + final_width = actual_width; + final_height = actual_height; + final_num_pixels = actual_width * actual_height; + } + + // Allocate GOP float buffers for 3D DWT using actual dimensions + float **gop_y = malloc(gop_size * sizeof(float *)); + float **gop_co = malloc(gop_size * sizeof(float *)); + float **gop_cg = malloc(gop_size * sizeof(float *)); + + for (int t = 0; t < gop_size; t++) { + gop_y[t] = calloc(final_num_pixels, sizeof(float)); + gop_co[t] = calloc(final_num_pixels, sizeof(float)); + gop_cg[t] = calloc(final_num_pixels, sizeof(float)); + } + + // Dequantise each frame + for (int t = 0; t < gop_size; t++) { + const int temporal_level = get_temporal_subband_level(t, gop_size, ctx->params.temporal_levels); + const float temporal_scale = get_temporal_quantiser_scale(ctx->params.encoder_preset, temporal_level); + + const float base_q_y = roundf(QLUT[ctx->params.quantiser_y] * temporal_scale); + const float base_q_co = roundf(QLUT[ctx->params.quantiser_co] * temporal_scale); + const float base_q_cg = roundf(QLUT[ctx->params.quantiser_cg] * temporal_scale); + + if (ctx->params.perceptual_tuning) { + dequantise_dwt_subbands_perceptual(0, QLUT[ctx->params.quantiser_y], + gop_coeffs[t][0], gop_y[t], final_width, final_height, + ctx->params.decomp_levels, base_q_y, 0); + dequantise_dwt_subbands_perceptual(0, QLUT[ctx->params.quantiser_co], + gop_coeffs[t][1], gop_co[t], final_width, final_height, + ctx->params.decomp_levels, base_q_co, 1); + dequantise_dwt_subbands_perceptual(0, QLUT[ctx->params.quantiser_cg], + gop_coeffs[t][2], gop_cg[t], final_width, final_height, + ctx->params.decomp_levels, base_q_cg, 1); + } else { + // Uniform dequantisation + for (int i = 0; i < final_num_pixels; i++) { + gop_y[t][i] = gop_coeffs[t][0][i] * base_q_y; + gop_co[t][i] = gop_coeffs[t][1][i] * base_q_co; + gop_cg[t][i] = gop_coeffs[t][2][i] * base_q_cg; + } + } + + // Apply grain synthesis + apply_grain_synthesis(gop_y[t], final_width, final_height, ctx->params.decomp_levels, t, + QLUT[ctx->params.quantiser_y], ctx->params.encoder_preset); + apply_grain_synthesis(gop_co[t], final_width, final_height, ctx->params.decomp_levels, t, + QLUT[ctx->params.quantiser_co], ctx->params.encoder_preset); + apply_grain_synthesis(gop_cg[t], final_width, final_height, ctx->params.decomp_levels, t, + QLUT[ctx->params.quantiser_cg], ctx->params.encoder_preset); + } + + // Free quantised coefficients + for (int t = 0; t < gop_size; t++) { + free(gop_coeffs[t][0]); + free(gop_coeffs[t][1]); + free(gop_coeffs[t][2]); + free(gop_coeffs[t]); + } + free(gop_coeffs); + + // Apply inverse 3D DWT + apply_inverse_3d_dwt(gop_y, gop_co, gop_cg, final_width, final_height, gop_size, + ctx->params.decomp_levels, ctx->params.temporal_levels, + ctx->params.wavelet_filter, ctx->params.temporal_wavelet); + + // Convert to RGB and write to output frames + for (int t = 0; t < gop_size; t++) { + for (int y = 0; y < final_height; y++) { + for (int x = 0; x < final_width; x++) { + const int idx = y * final_width + x; + const int rgb_idx = (y * final_width + x) * 3; + + if (ctx->params.channel_layout == 0) { + ycocgr_to_rgb(gop_y[t][idx], gop_co[t][idx], gop_cg[t][idx], + &rgb_frames[t][rgb_idx], &rgb_frames[t][rgb_idx + 1], &rgb_frames[t][rgb_idx + 2]); + } else { + ictcp_to_rgb(gop_y[t][idx], gop_co[t][idx], gop_cg[t][idx], + &rgb_frames[t][rgb_idx], &rgb_frames[t][rgb_idx + 1], &rgb_frames[t][rgb_idx + 2]); + } + } + } + } + + // Free GOP buffers + for (int t = 0; t < gop_size; t++) { + free(gop_y[t]); + free(gop_co[t]); + free(gop_cg[t]); + } + free(gop_y); + free(gop_co); + free(gop_cg); + + return 0; +} + +int tav_video_decode_iframe(tav_video_context_t *ctx, + const uint8_t *compressed_data, uint32_t packet_size, + uint8_t *rgb_frame) { + if (!ctx || !compressed_data || !rgb_frame) { + if (ctx) snprintf(ctx->error_msg, sizeof(ctx->error_msg), "Invalid parameters"); + return -1; + } + + const int width = ctx->params.width; + const int height = ctx->params.height; + const int num_pixels = width * height; + + // Decompress + const size_t decompressed_bound = ZSTD_getFrameContentSize(compressed_data, packet_size); + if (ZSTD_isError(decompressed_bound)) { + snprintf(ctx->error_msg, sizeof(ctx->error_msg), "Zstd decompression failed"); + return -1; + } + + uint8_t *decompressed_data = malloc(decompressed_bound); + const size_t decompressed_size = ZSTD_decompress(decompressed_data, decompressed_bound, + compressed_data, packet_size); + if (ZSTD_isError(decompressed_size)) { + free(decompressed_data); + snprintf(ctx->error_msg, sizeof(ctx->error_msg), "Zstd decompression failed"); + return -1; + } + + // Allocate coefficient buffers + int16_t *coeffs_y = calloc(num_pixels, sizeof(int16_t)); + int16_t *coeffs_co = calloc(num_pixels, sizeof(int16_t)); + int16_t *coeffs_cg = calloc(num_pixels, sizeof(int16_t)); + + // Postprocess based on entropy coder + if (ctx->params.entropy_coder == 0) { + postprocess_coefficients_twobit(decompressed_data, num_pixels, coeffs_y, coeffs_co, coeffs_cg); + } else if (ctx->params.entropy_coder == 1) { + postprocess_coefficients_ezbc(decompressed_data, num_pixels, coeffs_y, coeffs_co, coeffs_cg, ctx->params.channel_layout); + } + + free(decompressed_data); + + // Dequantise + const float base_q_y = QLUT[ctx->params.quantiser_y]; + const float base_q_co = QLUT[ctx->params.quantiser_co]; + const float base_q_cg = QLUT[ctx->params.quantiser_cg]; + + if (ctx->params.perceptual_tuning) { + dequantise_dwt_subbands_perceptual(0, QLUT[ctx->params.quantiser_y], + coeffs_y, ctx->dwt_buffer_y, width, height, + ctx->params.decomp_levels, base_q_y, 0); + dequantise_dwt_subbands_perceptual(0, QLUT[ctx->params.quantiser_co], + coeffs_co, ctx->dwt_buffer_co, width, height, + ctx->params.decomp_levels, base_q_co, 1); + dequantise_dwt_subbands_perceptual(0, QLUT[ctx->params.quantiser_cg], + coeffs_cg, ctx->dwt_buffer_cg, width, height, + ctx->params.decomp_levels, base_q_cg, 1); + } else { + for (int i = 0; i < num_pixels; i++) { + ctx->dwt_buffer_y[i] = coeffs_y[i] * base_q_y; + ctx->dwt_buffer_co[i] = coeffs_co[i] * base_q_co; + ctx->dwt_buffer_cg[i] = coeffs_cg[i] * base_q_cg; + } + } + + free(coeffs_y); + free(coeffs_co); + free(coeffs_cg); + + // Apply grain synthesis + apply_grain_synthesis(ctx->dwt_buffer_y, width, height, ctx->params.decomp_levels, 0, + QLUT[ctx->params.quantiser_y], ctx->params.encoder_preset); + apply_grain_synthesis(ctx->dwt_buffer_co, width, height, ctx->params.decomp_levels, 0, + QLUT[ctx->params.quantiser_co], ctx->params.encoder_preset); + apply_grain_synthesis(ctx->dwt_buffer_cg, width, height, ctx->params.decomp_levels, 0, + QLUT[ctx->params.quantiser_cg], ctx->params.encoder_preset); + + // Apply inverse DWT + apply_inverse_dwt_multilevel(ctx->dwt_buffer_y, width, height, ctx->params.decomp_levels, ctx->params.wavelet_filter); + apply_inverse_dwt_multilevel(ctx->dwt_buffer_co, width, height, ctx->params.decomp_levels, ctx->params.wavelet_filter); + apply_inverse_dwt_multilevel(ctx->dwt_buffer_cg, width, height, ctx->params.decomp_levels, ctx->params.wavelet_filter); + + // Store as reference for P-frames + memcpy(ctx->reference_ycocg_y, ctx->dwt_buffer_y, num_pixels * sizeof(float)); + memcpy(ctx->reference_ycocg_co, ctx->dwt_buffer_co, num_pixels * sizeof(float)); + memcpy(ctx->reference_ycocg_cg, ctx->dwt_buffer_cg, num_pixels * sizeof(float)); + + // Convert to RGB + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + const int idx = y * width + x; + const int rgb_idx = (y * width + x) * 3; + + if (ctx->params.channel_layout == 0) { + ycocgr_to_rgb(ctx->dwt_buffer_y[idx], ctx->dwt_buffer_co[idx], ctx->dwt_buffer_cg[idx], + &rgb_frame[rgb_idx], &rgb_frame[rgb_idx + 1], &rgb_frame[rgb_idx + 2]); + } else { + ictcp_to_rgb(ctx->dwt_buffer_y[idx], ctx->dwt_buffer_co[idx], ctx->dwt_buffer_cg[idx], + &rgb_frame[rgb_idx], &rgb_frame[rgb_idx + 1], &rgb_frame[rgb_idx + 2]); + } + } + } + + return 0; +} + +int tav_video_decode_pframe(tav_video_context_t *ctx, + const uint8_t *compressed_data, uint32_t packet_size, + uint8_t *rgb_frame) { + if (!ctx || !compressed_data || !rgb_frame) { + if (ctx) snprintf(ctx->error_msg, sizeof(ctx->error_msg), "Invalid parameters"); + return -1; + } + + const int width = ctx->params.width; + const int height = ctx->params.height; + const int num_pixels = width * height; + + // Decompress + const size_t decompressed_bound = ZSTD_getFrameContentSize(compressed_data, packet_size); + if (ZSTD_isError(decompressed_bound)) { + snprintf(ctx->error_msg, sizeof(ctx->error_msg), "Zstd decompression failed"); + return -1; + } + + uint8_t *decompressed_data = malloc(decompressed_bound); + const size_t decompressed_size = ZSTD_decompress(decompressed_data, decompressed_bound, + compressed_data, packet_size); + if (ZSTD_isError(decompressed_size)) { + free(decompressed_data); + snprintf(ctx->error_msg, sizeof(ctx->error_msg), "Zstd decompression failed"); + return -1; + } + + // Allocate coefficient buffers + int16_t *coeffs_y = calloc(num_pixels, sizeof(int16_t)); + int16_t *coeffs_co = calloc(num_pixels, sizeof(int16_t)); + int16_t *coeffs_cg = calloc(num_pixels, sizeof(int16_t)); + + // Postprocess + if (ctx->params.entropy_coder == 0) { + postprocess_coefficients_twobit(decompressed_data, num_pixels, coeffs_y, coeffs_co, coeffs_cg); + } else if (ctx->params.entropy_coder == 1) { + postprocess_coefficients_ezbc(decompressed_data, num_pixels, coeffs_y, coeffs_co, coeffs_cg, ctx->params.channel_layout); + } + + free(decompressed_data); + + // Dequantise + const float base_q_y = QLUT[ctx->params.quantiser_y]; + const float base_q_co = QLUT[ctx->params.quantiser_co]; + const float base_q_cg = QLUT[ctx->params.quantiser_cg]; + + if (ctx->params.perceptual_tuning) { + dequantise_dwt_subbands_perceptual(0, QLUT[ctx->params.quantiser_y], + coeffs_y, ctx->dwt_buffer_y, width, height, + ctx->params.decomp_levels, base_q_y, 0); + dequantise_dwt_subbands_perceptual(0, QLUT[ctx->params.quantiser_co], + coeffs_co, ctx->dwt_buffer_co, width, height, + ctx->params.decomp_levels, base_q_co, 1); + dequantise_dwt_subbands_perceptual(0, QLUT[ctx->params.quantiser_cg], + coeffs_cg, ctx->dwt_buffer_cg, width, height, + ctx->params.decomp_levels, base_q_cg, 1); + } else { + for (int i = 0; i < num_pixels; i++) { + ctx->dwt_buffer_y[i] = coeffs_y[i] * base_q_y; + ctx->dwt_buffer_co[i] = coeffs_co[i] * base_q_co; + ctx->dwt_buffer_cg[i] = coeffs_cg[i] * base_q_cg; + } + } + + free(coeffs_y); + free(coeffs_co); + free(coeffs_cg); + + // Apply grain synthesis + apply_grain_synthesis(ctx->dwt_buffer_y, width, height, ctx->params.decomp_levels, 0, + QLUT[ctx->params.quantiser_y], ctx->params.encoder_preset); + apply_grain_synthesis(ctx->dwt_buffer_co, width, height, ctx->params.decomp_levels, 0, + QLUT[ctx->params.quantiser_co], ctx->params.encoder_preset); + apply_grain_synthesis(ctx->dwt_buffer_cg, width, height, ctx->params.decomp_levels, 0, + QLUT[ctx->params.quantiser_cg], ctx->params.encoder_preset); + + // Apply inverse DWT + apply_inverse_dwt_multilevel(ctx->dwt_buffer_y, width, height, ctx->params.decomp_levels, ctx->params.wavelet_filter); + apply_inverse_dwt_multilevel(ctx->dwt_buffer_co, width, height, ctx->params.decomp_levels, ctx->params.wavelet_filter); + apply_inverse_dwt_multilevel(ctx->dwt_buffer_cg, width, height, ctx->params.decomp_levels, ctx->params.wavelet_filter); + + // Add to reference frame (delta mode) + for (int i = 0; i < num_pixels; i++) { + ctx->dwt_buffer_y[i] += ctx->reference_ycocg_y[i]; + ctx->dwt_buffer_co[i] += ctx->reference_ycocg_co[i]; + ctx->dwt_buffer_cg[i] += ctx->reference_ycocg_cg[i]; + } + + // Store as new reference + memcpy(ctx->reference_ycocg_y, ctx->dwt_buffer_y, num_pixels * sizeof(float)); + memcpy(ctx->reference_ycocg_co, ctx->dwt_buffer_co, num_pixels * sizeof(float)); + memcpy(ctx->reference_ycocg_cg, ctx->dwt_buffer_cg, num_pixels * sizeof(float)); + + // Convert to RGB + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + const int idx = y * width + x; + const int rgb_idx = (y * width + x) * 3; + + if (ctx->params.channel_layout == 0) { + ycocgr_to_rgb(ctx->dwt_buffer_y[idx], ctx->dwt_buffer_co[idx], ctx->dwt_buffer_cg[idx], + &rgb_frame[rgb_idx], &rgb_frame[rgb_idx + 1], &rgb_frame[rgb_idx + 2]); + } else { + ictcp_to_rgb(ctx->dwt_buffer_y[idx], ctx->dwt_buffer_co[idx], ctx->dwt_buffer_cg[idx], + &rgb_frame[rgb_idx], &rgb_frame[rgb_idx + 1], &rgb_frame[rgb_idx + 2]); + } + } + } + + return 0; +} + +const char *tav_video_get_error(tav_video_context_t *ctx) { + if (!ctx) return "Invalid context"; + return ctx->error_msg; +} + +void tav_video_set_verbose(tav_video_context_t *ctx, int verbose) { + if (ctx) ctx->verbose = verbose; +} diff --git a/video_encoder/tav_video_decoder.h b/video_encoder/tav_video_decoder.h new file mode 100644 index 0000000..d3f4ed1 --- /dev/null +++ b/video_encoder/tav_video_decoder.h @@ -0,0 +1,77 @@ +// Created by CuriousTorvald and Claude on 2025-12-02. +// TAV Video Decoder Library - Shared decoding functions for TAV format +// Can be used by both regular TAV decoder and TAV-DT decoder + +#ifndef TAV_VIDEO_DECODER_H +#define TAV_VIDEO_DECODER_H + +#include +#include + +// Video decoder context - opaque to users +typedef struct tav_video_context tav_video_context_t; + +// Video parameters structure +typedef struct { + int width; + int height; + int decomp_levels; // Spatial DWT levels (typically 4) + int temporal_levels; // Temporal DWT levels (typically 2) + int wavelet_filter; // 0=CDF 5/3, 1=CDF 9/7, 2=CDF 13/7, 16=DD-4, 255=Haar + int temporal_wavelet; // Temporal wavelet (0=CDF 5/3, 1=CDF 9/7) + int entropy_coder; // 0=Twobitmap, 1=EZBC, 2=RAW + int channel_layout; // 0=YCoCg-R, 1=ICtCp + int perceptual_tuning; // 1=perceptual quantisation, 0=uniform + uint8_t quantiser_y; // Base quantiser index for Y/I + uint8_t quantiser_co; // Base quantiser index for Co/Ct + uint8_t quantiser_cg; // Base quantiser index for Cg/Cp + uint8_t encoder_preset; // Encoder preset flags (sports, anime, etc.) + int monoblock; // 1=single tile (monoblock), 0=multi-tile +} tav_video_params_t; + +// Create video decoder context +// Returns NULL on failure +tav_video_context_t *tav_video_create(const tav_video_params_t *params); + +// Free video decoder context +void tav_video_free(tav_video_context_t *ctx); + +// Decode GOP_UNIFIED packet (0x12) to RGB24 frames +// Input: compressed_data - GOP packet data (after packet type byte) +// compressed_size - size of compressed data +// gop_size - number of frames in GOP (read from packet) +// Output: rgb_frames - array of pointers to RGB24 frame buffers (width*height*3 each) +// Must be pre-allocated by caller (gop_size pointers, each pointing to width*height*3 bytes) +// Returns: 0 on success, -1 on error +int tav_video_decode_gop(tav_video_context_t *ctx, + const uint8_t *compressed_data, uint32_t compressed_size, + uint8_t gop_size, uint8_t **rgb_frames); + +// Decode IFRAME packet (0x10) to RGB24 frame +// Input: compressed_data - I-frame packet data (after packet type byte) +// packet_size - size of packet data +// Output: rgb_frame - pointer to RGB24 frame buffer (width*height*3 bytes) +// Must be pre-allocated by caller +// Returns: 0 on success, -1 on error +int tav_video_decode_iframe(tav_video_context_t *ctx, + const uint8_t *compressed_data, uint32_t packet_size, + uint8_t *rgb_frame); + +// Decode PFRAME packet (0x11) to RGB24 frame (delta from reference) +// Input: compressed_data - P-frame packet data (after packet type byte) +// packet_size - size of packet data +// Output: rgb_frame - pointer to RGB24 frame buffer (width*height*3 bytes) +// Must be pre-allocated by caller +// Returns: 0 on success, -1 on error +// Note: Requires previous frame to be decoded first (stored internally as reference) +int tav_video_decode_pframe(tav_video_context_t *ctx, + const uint8_t *compressed_data, uint32_t packet_size, + uint8_t *rgb_frame); + +// Get last error message +const char *tav_video_get_error(tav_video_context_t *ctx); + +// Enable verbose debug output +void tav_video_set_verbose(tav_video_context_t *ctx, int verbose); + +#endif // TAV_VIDEO_DECODER_H