// Created by CuriousTorvald and Claude on 2025-11-03. // TAV Decoder - Converts TAV video to FFV1 format with TAD audio to PCMu8 // Based on TSVM decoder implementation (GraphicsJSR223Delegate.kt + playtav.js) // Only supports features available in TSVM decoder (no MC-EZBC, no MPEG-style motion compensation) #include #include #include #include #include #include #include #include #include #define DECODER_VENDOR_STRING "Decoder-TAV 20251103 (ffv1+pcmu8)" // TAV format constants #define TAV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x56" #define TAV_MODE_SKIP 0x00 #define TAV_MODE_INTRA 0x01 #define TAV_MODE_DELTA 0x02 // TAV packet types (only those supported by TSVM decoder) #define TAV_PACKET_IFRAME 0x10 // Intra frame (keyframe) - SUPPORTED #define TAV_PACKET_PFRAME 0x11 // Predicted frame - SUPPORTED (delta mode) #define TAV_PACKET_GOP_UNIFIED 0x12 // Unified 3D DWT GOP - SUPPORTED #define TAV_PACKET_AUDIO_MP2 0x20 // MP2 audio - SUPPORTED (passthrough) #define TAV_PACKET_AUDIO_PCM8 0x21 // 8-bit PCM audio - SUPPORTED #define TAV_PACKET_AUDIO_TAD 0x24 // TAD audio - SUPPORTED (decode to PCMu8) #define TAV_PACKET_AUDIO_TRACK 0x40 // Bundled audio track - SUPPORTED (passthrough) #define TAV_PACKET_SUBTITLE 0x30 // Subtitle - SKIPPED #define TAV_PACKET_EXTENDED_HDR 0xEF // Extended header - SKIPPED #define TAV_PACKET_GOP_SYNC 0xFC // GOP sync packet - SKIPPED #define TAV_PACKET_TIMECODE 0xFD // Timecode - SKIPPED #define TAV_PACKET_SYNC_NTSC 0xFE // NTSC sync - SKIPPED #define TAV_PACKET_SYNC 0xFF // Sync - SKIPPED // Unsupported packet types (not in TSVM decoder) #define TAV_PACKET_PFRAME_RESIDUAL 0x14 // P-frame MPEG-style - NOT SUPPORTED #define TAV_PACKET_BFRAME_RESIDUAL 0x15 // B-frame MPEG-style - NOT SUPPORTED // Channel layout definitions #define CHANNEL_LAYOUT_YCOCG 0 // Y-Co-Cg/I-Ct-Cp #define CHANNEL_LAYOUT_YCOCG_A 1 // Y-Co-Cg-A/I-Ct-Cp-A #define CHANNEL_LAYOUT_Y_ONLY 2 // Y/I only #define CHANNEL_LAYOUT_Y_A 3 // Y-A/I-A #define CHANNEL_LAYOUT_COCG 4 // Co-Cg/Ct-Cp #define CHANNEL_LAYOUT_COCG_A 5 // Co-Cg-A/Ct-Cp-A // Wavelet filter types #define WAVELET_5_3_REVERSIBLE 0 #define WAVELET_9_7_IRREVERSIBLE 1 #define WAVELET_BIORTHOGONAL_13_7 2 #define WAVELET_DD4 16 #define WAVELET_HAAR 255 // Tile sizes (match TSVM) #define TILE_SIZE_X 640 #define TILE_SIZE_Y 540 #define DWT_FILTER_HALF_SUPPORT 4 #define TILE_MARGIN_LEVELS 3 #define TILE_MARGIN (DWT_FILTER_HALF_SUPPORT * (1 << TILE_MARGIN_LEVELS)) #define PADDED_TILE_SIZE_X (TILE_SIZE_X + 2 * TILE_MARGIN) #define PADDED_TILE_SIZE_Y (TILE_SIZE_Y + 2 * TILE_MARGIN) static inline int CLAMP(int x, int min, int max) { return x < min ? min : (x > max ? max : x); } //============================================================================= // TAV Header Structure (32 bytes) //============================================================================= typedef struct { uint8_t magic[8]; uint8_t version; uint16_t width; uint16_t height; uint8_t fps; uint32_t total_frames; uint8_t wavelet_filter; uint8_t decomp_levels; uint8_t quantiser_y; uint8_t quantiser_co; uint8_t quantiser_cg; uint8_t extra_flags; uint8_t video_flags; uint8_t encoder_quality; uint8_t channel_layout; uint8_t entropy_coder; uint8_t reserved[2]; uint8_t device_orientation; uint8_t file_role; } __attribute__((packed)) tav_header_t; //============================================================================= // Quantization Lookup Table (matches TSVM exactly) //============================================================================= static const int QLUT[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128,132,136,140,144,148,152,156,160,164,168,172,176,180,184,188,192,196,200,204,208,212,216,220,224,228,232,236,240,244,248,252,256,264,272,280,288,296,304,312,320,328,336,344,352,360,368,376,384,392,400,408,416,424,432,440,448,456,464,472,480,488,496,504,512,528,544,560,576,592,608,624,640,656,672,688,704,720,736,752,768,784,800,816,832,848,864,880,896,912,928,944,960,976,992,1008,1024,1056,1088,1120,1152,1184,1216,1248,1280,1312,1344,1376,1408,1440,1472,1504,1536,1568,1600,1632,1664,1696,1728,1760,1792,1824,1856,1888,1920,1952,1984,2016,2048,2112,2176,2240,2304,2368,2432,2496,2560,2624,2688,2752,2816,2880,2944,3008,3072,3136,3200,3264,3328,3392,3456,3520,3584,3648,3712,3776,3840,3904,3968,4032,4096}; // Perceptual quantization constants (match TSVM) static const float ANISOTROPY_MULT[] = {2.0f, 1.8f, 1.6f, 1.4f, 1.2f, 1.0f}; static const float ANISOTROPY_BIAS[] = {0.4f, 0.2f, 0.1f, 0.0f, 0.0f, 0.0f}; static const float ANISOTROPY_MULT_CHROMA[] = {6.6f, 5.5f, 4.4f, 3.3f, 2.2f, 1.1f}; static const float ANISOTROPY_BIAS_CHROMA[] = {1.0f, 0.8f, 0.6f, 0.4f, 0.2f, 0.0f}; static const float FOUR_PIXEL_DETAILER = 0.88f; static const float TWO_PIXEL_DETAILER = 0.92f; //============================================================================= // DWT Subband Layout Calculation (matches TSVM) //============================================================================= typedef struct { int level; // Decomposition level (1 to decompLevels) int subband_type; // 0=LL, 1=LH, 2=HL, 3=HH int coeff_start; // Starting index in linear coefficient array int coeff_count; // Number of coefficients in this subband } dwt_subband_info_t; static int calculate_subband_layout(int width, int height, int decomp_levels, dwt_subband_info_t *subbands) { int subband_count = 0; // LL subband at maximum decomposition level const int ll_width = width >> decomp_levels; const int ll_height = height >> decomp_levels; subbands[subband_count++] = (dwt_subband_info_t){decomp_levels, 0, 0, ll_width * ll_height}; int coeff_offset = ll_width * ll_height; // LH, HL, HH subbands for each level from max down to 1 for (int level = decomp_levels; level >= 1; level--) { const int level_width = width >> (decomp_levels - level + 1); const int level_height = height >> (decomp_levels - level + 1); const int subband_size = level_width * level_height; // LH subband subbands[subband_count++] = (dwt_subband_info_t){level, 1, coeff_offset, subband_size}; coeff_offset += subband_size; // HL subband subbands[subband_count++] = (dwt_subband_info_t){level, 2, coeff_offset, subband_size}; coeff_offset += subband_size; // HH subband subbands[subband_count++] = (dwt_subband_info_t){level, 3, coeff_offset, subband_size}; coeff_offset += subband_size; } return subband_count; } //============================================================================= // Perceptual Quantization Model (matches TSVM exactly) //============================================================================= static int tav_derive_encoder_qindex(int q_index, int q_y_global) { if (q_index > 0) return q_index - 1; if (q_y_global >= 60) return 0; else if (q_y_global >= 42) return 1; else if (q_y_global >= 25) return 2; else if (q_y_global >= 12) return 3; else if (q_y_global >= 6) return 4; else if (q_y_global >= 2) return 5; else return 5; } static float perceptual_model3_LH(float level) { const float H4 = 1.2f; const float K = 2.0f; // CRITICAL: Fixed value for fixed curve; quantiser will scale it up anyway const float K12 = K * 12.0f; const float x = level; const float Lx = H4 - ((K + 1.0f) / 15.0f) * (x - 4.0f); const float C3 = -1.0f / 45.0f * (K12 + 92.0f); const float G3x = (-x / 180.0f) * (K12 + 5.0f * x * x - 60.0f * x + 252.0f) - C3 + H4; return (level >= 4.0f) ? Lx : G3x; } static float perceptual_model3_HL(int quality, float LH) { return LH * ANISOTROPY_MULT[quality] + ANISOTROPY_BIAS[quality]; } static float lerp(float x, float y, float a) { return x * (1.0f - a) + y * a; } static float perceptual_model3_HH(float LH, float HL, float level) { const float Kx = (sqrtf(level) - 1.0f) * 0.5f + 0.5f; return lerp(LH, HL, Kx); } static float perceptual_model3_LL(float level) { const float n = perceptual_model3_LH(level); const float m = perceptual_model3_LH(level - 1.0f) / n; return n / m; } static float perceptual_model3_chroma_basecurve(int quality, float level) { return 1.0f - (1.0f / (0.5f * quality * quality + 1.0f)) * (level - 4.0f); } static float get_perceptual_weight(int q_index, int q_y_global, int level0, int subband_type, int is_chroma, int max_levels) { // Convert to perceptual level (1-6 scale) const float level = 1.0f + ((level0 - 1.0f) / (max_levels - 1.0f)) * 5.0f; const int quality_level = tav_derive_encoder_qindex(q_index, q_y_global); if (!is_chroma) { // LUMA CHANNEL if (subband_type == 0) { return perceptual_model3_LL(level); } const float LH = perceptual_model3_LH(level); if (subband_type == 1) { return LH; } const float HL = perceptual_model3_HL(quality_level, LH); if (subband_type == 2) { float detailer = 1.0f; if (level >= 1.8f && level <= 2.2f) detailer = TWO_PIXEL_DETAILER; else if (level >= 2.8f && level <= 3.2f) detailer = FOUR_PIXEL_DETAILER; return HL * detailer; } else { // HH subband float detailer = 1.0f; if (level >= 1.8f && level <= 2.2f) detailer = TWO_PIXEL_DETAILER; else if (level >= 2.8f && level <= 3.2f) detailer = FOUR_PIXEL_DETAILER; return perceptual_model3_HH(LH, HL, level) * detailer; } } else { // CHROMA CHANNELS const float base = perceptual_model3_chroma_basecurve(quality_level, level - 1); if (subband_type == 0) { return 1.0f; } else if (subband_type == 1) { return fmaxf(base, 1.0f); } else if (subband_type == 2) { return fmaxf(base * ANISOTROPY_MULT_CHROMA[quality_level], 1.0f); } else { return fmaxf(base * ANISOTROPY_MULT_CHROMA[quality_level] + ANISOTROPY_BIAS_CHROMA[quality_level], 1.0f); } } } static void dequantize_dwt_subbands_perceptual(int q_index, int q_y_global, const int16_t *quantized, float *dequantized, int width, int height, int decomp_levels, float base_quantizer, int is_chroma, int frame_num) { dwt_subband_info_t subbands[32]; // Max possible subbands const int subband_count = calculate_subband_layout(width, height, decomp_levels, subbands); const int coeff_count = width * height; memset(dequantized, 0, coeff_count * sizeof(float)); int is_debug = (frame_num == 32); if (frame_num == 32) { fprintf(stderr, "DEBUG: dequantize called for frame %d, is_chroma=%d\n", frame_num, is_chroma); } // Apply perceptual weighting to each subband for (int s = 0; s < subband_count; s++) { const dwt_subband_info_t *subband = &subbands[s]; const float weight = get_perceptual_weight(q_index, q_y_global, subband->level, subband->subband_type, is_chroma, decomp_levels); const float effective_quantizer = base_quantizer * weight; if (is_debug && !is_chroma) { if (subband->subband_type == 0) { // LL band fprintf(stderr, " Subband level %d (LL): weight=%.6f, base_q=%.1f, effective_q=%.1f, count=%d\n", subband->level, weight, base_quantizer, effective_quantizer, subband->coeff_count); // Print first 5 quantized LL coefficients fprintf(stderr, " First 5 quantized LL: "); for (int k = 0; k < 5 && k < subband->coeff_count; k++) { int idx = subband->coeff_start + k; fprintf(stderr, "%d ", quantized[idx]); } fprintf(stderr, "\n"); // Find max quantized LL coefficient int max_quant_ll = 0; for (int k = 0; k < subband->coeff_count; k++) { int idx = subband->coeff_start + k; int abs_val = quantized[idx] < 0 ? -quantized[idx] : quantized[idx]; if (abs_val > max_quant_ll) max_quant_ll = abs_val; } fprintf(stderr, " Max quantized LL coefficient: %d (dequantizes to %.1f)\n", max_quant_ll, max_quant_ll * effective_quantizer); } } for (int i = 0; i < subband->coeff_count; i++) { const int idx = subband->coeff_start + i; if (idx < coeff_count) { // CRITICAL: Must ROUND to match EZBC encoder's roundf() behavior // Without rounding, truncation limits brightness range (e.g., Y maxes at 227 instead of 255) const float untruncated = quantized[idx] * effective_quantizer; dequantized[idx] = roundf(untruncated); } } } // Debug: Verify LL band was dequantized correctly if (is_debug && !is_chroma) { // Find LL band again to verify for (int s = 0; s < subband_count; s++) { const dwt_subband_info_t *subband = &subbands[s]; if (subband->level == decomp_levels && subband->subband_type == 0) { fprintf(stderr, " AFTER all subbands processed - First 5 dequantized LL: "); for (int k = 0; k < 5 && k < subband->coeff_count; k++) { int idx = subband->coeff_start + k; fprintf(stderr, "%.1f ", dequantized[idx]); } fprintf(stderr, "\n"); // Find max dequantized LL float max_dequant_ll = -999.0f; for (int k = 0; k < subband->coeff_count; k++) { int idx = subband->coeff_start + k; float abs_val = dequantized[idx] < 0 ? -dequantized[idx] : dequantized[idx]; if (abs_val > max_dequant_ll) max_dequant_ll = abs_val; } fprintf(stderr, " AFTER all subbands - Max dequantized LL: %.1f\n", max_dequant_ll); break; } } } } //============================================================================= // Grain Synthesis Removal (matches TSVM exactly) //============================================================================= // Deterministic RNG for grain synthesis (matches encoder) static inline uint32_t tav_grain_synthesis_rng(uint32_t frame, uint32_t band, uint32_t x, uint32_t y) { uint32_t key = frame * 0x9e3779b9u ^ band * 0x7f4a7c15u ^ (y << 16) ^ x; // rng_hash implementation uint32_t hash = key; hash = hash ^ (hash >> 16); hash = hash * 0x7feb352du; hash = hash ^ (hash >> 15); hash = hash * 0x846ca68bu; hash = hash ^ (hash >> 16); return hash; } // Generate triangular noise from uint32 RNG (returns value in range [-1.0, 1.0]) static inline float tav_grain_triangular_noise(uint32_t rng_val) { // Get two uniform random values in [0, 1] float u1 = (rng_val & 0xFFFFu) / 65535.0f; float u2 = ((rng_val >> 16) & 0xFFFFu) / 65535.0f; // Convert to range [-1, 1] and average for triangular distribution return (u1 + u2) - 1.0f; } // Remove grain synthesis from DWT coefficients (decoder subtracts noise) // This must be called AFTER dequantization but BEFORE inverse DWT static void remove_grain_synthesis_decoder(float *coeffs, int width, int height, int decomp_levels, int frame_num, int q_y_global) { dwt_subband_info_t subbands[32]; const int subband_count = calculate_subband_layout(width, height, decomp_levels, subbands); // Noise amplitude (matches Kotlin: qYGlobal.coerceAtMost(32) * 0.5f) const float noise_amplitude = (q_y_global < 32 ? q_y_global : 32) * 0.5f; // Process each subband (skip LL band which is level 0) for (int s = 0; s < subband_count; s++) { const dwt_subband_info_t *subband = &subbands[s]; if (subband->level == 0) continue; // Skip LL band // Calculate band index for RNG (matches Kotlin: level + subbandType * 31 + 16777619) uint32_t band = subband->level + subband->subband_type * 31 + 16777619; // Remove noise from each coefficient in this subband for (int i = 0; i < subband->coeff_count; i++) { const int idx = subband->coeff_start + i; if (idx < width * height) { // Calculate 2D position from linear index int y = idx / width; int x = idx % width; // Generate same deterministic noise as encoder uint32_t rng_val = tav_grain_synthesis_rng(frame_num, band, x, y); float noise = tav_grain_triangular_noise(rng_val); // Subtract noise from coefficient coeffs[idx] -= noise * noise_amplitude; } } } } //============================================================================= // Significance Map Postprocessing (matches TSVM exactly) //============================================================================= // Helper: Extract 2-bit code from bit-packed array static inline int get_twobit_code(const uint8_t *map_data, int map_bytes, int coeff_idx) { int bit_pos = coeff_idx * 2; int byte_idx = bit_pos / 8; int bit_offset = bit_pos % 8; uint8_t byte0 = map_data[byte_idx]; int code = (byte0 >> bit_offset) & 0x03; // Handle byte boundary crossing if (bit_offset == 7 && byte_idx + 1 < map_bytes) { uint8_t byte1 = map_data[byte_idx + 1]; code = ((byte0 >> 7) & 0x01) | ((byte1 << 1) & 0x02); } return code; } // Decoder: reconstruct coefficients from 2-bit map format (entropyCoder=0) // Layout: [Y_map_2bit][Co_map_2bit][Cg_map_2bit][Y_others][Co_others][Cg_others] // 2-bit encoding: 00=0, 01=+1, 10=-1, 11=other (stored in value array) static void postprocess_coefficients_twobit(uint8_t *compressed_data, int coeff_count, int16_t *output_y, int16_t *output_co, int16_t *output_cg) { int map_bytes = (coeff_count * 2 + 7) / 8; // 2 bits per coefficient // (Debug output removed) // Map offsets (all channels present for Y-Co-Cg layout) uint8_t *y_map = compressed_data; uint8_t *co_map = compressed_data + map_bytes; uint8_t *cg_map = compressed_data + map_bytes * 2; // Count "other" values (code 11) for each channel int y_others = 0, co_others = 0, cg_others = 0; for (int i = 0; i < coeff_count; i++) { if (get_twobit_code(y_map, map_bytes, i) == 3) y_others++; if (get_twobit_code(co_map, map_bytes, i) == 3) co_others++; if (get_twobit_code(cg_map, map_bytes, i) == 3) cg_others++; } // (Debug output removed) // Value array offsets (after all maps) uint8_t *value_ptr = compressed_data + map_bytes * 3; int16_t *y_values = (int16_t *)value_ptr; int16_t *co_values = (int16_t *)(value_ptr + y_others * 2); int16_t *cg_values = (int16_t *)(value_ptr + y_others * 2 + co_others * 2); // Reconstruct coefficients int y_value_idx = 0, co_value_idx = 0, cg_value_idx = 0; for (int i = 0; i < coeff_count; i++) { // Y channel int y_code = get_twobit_code(y_map, map_bytes, i); switch (y_code) { case 0: output_y[i] = 0; break; case 1: output_y[i] = 1; break; case 2: output_y[i] = -1; break; case 3: output_y[i] = y_values[y_value_idx++]; break; } // Co channel int co_code = get_twobit_code(co_map, map_bytes, i); switch (co_code) { case 0: output_co[i] = 0; break; case 1: output_co[i] = 1; break; case 2: output_co[i] = -1; break; case 3: output_co[i] = co_values[co_value_idx++]; break; } // Cg channel int cg_code = get_twobit_code(cg_map, map_bytes, i); switch (cg_code) { case 0: output_cg[i] = 0; break; case 1: output_cg[i] = 1; break; case 2: output_cg[i] = -1; break; case 3: output_cg[i] = cg_values[cg_value_idx++]; break; } } } //============================================================================= // DWT Inverse Transforms (matches TSVM) //============================================================================= // 9/7 inverse DWT (from TSVM Kotlin code) static void dwt_97_inverse_1d(float *data, int length) { if (length < 2) return; // Debug: Check if input has non-zero values static int call_count = 0; if (call_count < 5) { int nonzero = 0; for (int i = 0; i < length; i++) { if (data[i] != 0.0f) nonzero++; } fprintf(stderr, " dwt_97_inverse_1d call #%d: length=%d, nonzero=%d, first 5: %.1f %.1f %.1f %.1f %.1f\n", call_count, length, nonzero, data[0], length > 1 ? data[1] : 0.0f, length > 2 ? data[2] : 0.0f, length > 3 ? data[3] : 0.0f, length > 4 ? data[4] : 0.0f); call_count++; } float *temp = malloc(length * sizeof(float)); int half = (length + 1) / 2; // Split into low and high frequency components (matching TSVM layout) for (int i = 0; i < half; i++) { temp[i] = data[i]; // Low-pass coefficients (first half) } for (int i = 0; i < length / 2; i++) { if (half + i < length) { temp[half + i] = data[half + i]; // High-pass coefficients (second half) } } // 9/7 inverse lifting coefficients from TSVM const float alpha = -1.586134342f; const float beta = -0.052980118f; const float gamma = 0.882911076f; const float delta = 0.443506852f; const float K = 1.230174105f; // Step 1: Undo scaling for (int i = 0; i < half; i++) { temp[i] /= K; // Low-pass coefficients } for (int i = 0; i < length / 2; i++) { if (half + i < length) { temp[half + i] *= K; // High-pass coefficients } } // Step 2: Undo δ update for (int i = 0; i < half; i++) { float d_curr = (half + i < length) ? temp[half + i] : 0.0f; float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr; temp[i] -= delta * (d_curr + d_prev); } // Step 3: Undo γ predict for (int i = 0; i < length / 2; i++) { if (half + i < length) { float s_curr = temp[i]; float s_next = (i + 1 < half) ? temp[i + 1] : s_curr; temp[half + i] -= gamma * (s_curr + s_next); } } // Step 4: Undo β update for (int i = 0; i < half; i++) { float d_curr = (half + i < length) ? temp[half + i] : 0.0f; float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr; temp[i] -= beta * (d_curr + d_prev); } // Step 5: Undo α predict for (int i = 0; i < length / 2; i++) { if (half + i < length) { float s_curr = temp[i]; float s_next = (i + 1 < half) ? temp[i + 1] : s_curr; temp[half + i] -= alpha * (s_curr + s_next); } } // Reconstruction - interleave low and high pass for (int i = 0; i < length; i++) { if (i % 2 == 0) { // Even positions: low-pass coefficients data[i] = temp[i / 2]; } else { // Odd positions: high-pass coefficients int idx = i / 2; if (half + idx < length) { data[i] = temp[half + idx]; } else { data[i] = 0.0f; } } } // Debug: Check output if (call_count <= 5) { int nonzero_out = 0; for (int i = 0; i < length; i++) { if (data[i] != 0.0f) nonzero_out++; } fprintf(stderr, " -> OUTPUT: nonzero=%d, first 5: %.1f %.1f %.1f %.1f %.1f\n", nonzero_out, data[0], length > 1 ? data[1] : 0.0f, length > 2 ? data[2] : 0.0f, length > 3 ? data[3] : 0.0f, length > 4 ? data[4] : 0.0f); } free(temp); } // 5/3 inverse DWT (simplified - uses 9/7 for now) static void dwt_53_inverse_1d(float *data, int length) { if (length < 2) return; // TODO: Implement proper 5/3 from TSVM if needed dwt_97_inverse_1d(data, length); } // Multi-level inverse DWT (matches TSVM exactly with correct non-power-of-2 handling) static void apply_inverse_dwt_multilevel(float *data, int width, int height, int levels, int filter_type) { int max_size = (width > height) ? width : height; float *temp_row = malloc(max_size * sizeof(float)); float *temp_col = malloc(max_size * sizeof(float)); // Pre-calculate exact sequence of widths/heights from forward transform // This is CRITICAL for non-power-of-2 dimensions (e.g., 560, 448) // Forward transform uses: width, (width+1)/2, ((width+1)/2+1)/2, ... // Inverse MUST use the exact same sequence in reverse int *widths = malloc((levels + 1) * sizeof(int)); int *heights = malloc((levels + 1) * sizeof(int)); widths[0] = width; heights[0] = height; for (int i = 1; i <= levels; i++) { widths[i] = (widths[i - 1] + 1) / 2; heights[i] = (heights[i - 1] + 1) / 2; } // Debug: Print dimension sequence static int debug_once = 1; if (debug_once) { fprintf(stderr, "DWT dimension sequence for %dx%d with %d levels:\n", width, height, levels); for (int i = 0; i <= levels; i++) { fprintf(stderr, " Level %d: %dx%d\n", i, widths[i], heights[i]); } debug_once = 0; } // TSVM: for (level in levels - 1 downTo 0) // Apply inverse transforms using pre-calculated dimensions for (int level = levels - 1; level >= 0; level--) { int current_width = widths[level]; int current_height = heights[level]; if (current_width < 1 || current_height < 1) continue; if (current_width == 1 && current_height == 1) continue; // TSVM: Column inverse transform first (vertical) for (int x = 0; x < current_width; x++) { for (int y = 0; y < current_height; y++) { temp_col[y] = data[y * width + x]; } if (filter_type == 0) { dwt_53_inverse_1d(temp_col, current_height); } else { dwt_97_inverse_1d(temp_col, current_height); } for (int y = 0; y < current_height; y++) { data[y * width + x] = temp_col[y]; } } // TSVM: Row inverse transform second (horizontal) for (int y = 0; y < current_height; y++) { for (int x = 0; x < current_width; x++) { temp_row[x] = data[y * width + x]; } if (filter_type == 0) { dwt_53_inverse_1d(temp_row, current_width); } else { dwt_97_inverse_1d(temp_row, current_width); } for (int x = 0; x < current_width; x++) { data[y * width + x] = temp_row[x]; } } // Debug after EVERY level static int first_frame_levels = 1; if (first_frame_levels && level <= 2) { // Only log levels 2, 1, 0 for first frame int nonzero_level = 0; for (int y = 0; y < current_height; y++) { for (int x = 0; x < current_width; x++) { if (fabsf(data[y * width + x]) > 0.001f) { // Use fabs for better zero detection nonzero_level++; } } } fprintf(stderr, "After level %d (%dx%d): nonzero=%d/%d, data[0]=%.1f, data[1]=%.1f, data[width]=%.1f\n", level, current_width, current_height, nonzero_level, current_width * current_height, data[0], data[1], data[width]); if (level == 0) first_frame_levels = 0; // Stop after level 0 of first frame } } // Debug: Check buffer after all levels complete static int debug_output_once = 1; if (debug_output_once) { int nonzero_final = 0; for (int i = 0; i < width * height; i++) { if (data[i] != 0.0f) nonzero_final++; } fprintf(stderr, "After ALL IDWT levels complete: nonzero=%d/%d, first 10: ", nonzero_final, width * height); for (int i = 0; i < 10 && i < width * height; i++) { fprintf(stderr, "%.1f ", data[i]); } fprintf(stderr, "\n"); debug_output_once = 0; } free(widths); free(heights); free(temp_row); free(temp_col); } //============================================================================= // YCoCg-R / ICtCp to RGB Conversion (matches TSVM) //============================================================================= static void ycocg_r_to_rgb(float y, float co, float cg, uint8_t *r, uint8_t *g, uint8_t *b) { float tmp = y - cg / 2.0f; float g_val = cg + tmp; float b_val = tmp - co / 2.0f; float r_val = co + b_val; *r = CLAMP((int)(r_val + 0.5f), 0, 255); *g = CLAMP((int)(g_val + 0.5f), 0, 255); *b = CLAMP((int)(b_val + 0.5f), 0, 255); } // ICtCp to RGB conversion (for even TAV versions) static void ictcp_to_rgb(float i, float ct, float cp, uint8_t *r, uint8_t *g, uint8_t *b) { // ICtCp → RGB conversion (inverse of RGB → ICtCp) // Step 1: ICtCp → LMS float l = i + 0.008609f * ct; float m = i - 0.008609f * ct; float s = i + 0.560031f * cp; // Step 2: LMS (nonlinear) → LMS (linear) // Inverse PQ transfer function (simplified) l = powf(fmaxf(l, 0.0f), 1.0f / 0.1593f); m = powf(fmaxf(m, 0.0f), 1.0f / 0.1593f); s = powf(fmaxf(s, 0.0f), 1.0f / 0.1593f); // Step 3: LMS → RGB float r_val = 5.432622f * l - 4.679910f * m + 0.247288f * s; float g_val = -1.106160f * l + 2.311198f * m - 0.205038f * s; float b_val = 0.028262f * l - 0.195689f * m + 1.167427f * s; *r = CLAMP((int)(r_val * 255.0f + 0.5f), 0, 255); *g = CLAMP((int)(g_val * 255.0f + 0.5f), 0, 255); *b = CLAMP((int)(b_val * 255.0f + 0.5f), 0, 255); } //============================================================================= // Decoder State Structure //============================================================================= typedef struct { FILE *input_fp; tav_header_t header; uint8_t *current_frame_rgb; uint8_t *reference_frame_rgb; float *dwt_buffer_y; float *dwt_buffer_co; float *dwt_buffer_cg; float *reference_ycocg_y; // For P-frame delta accumulation float *reference_ycocg_co; float *reference_ycocg_cg; int frame_count; int frame_size; int is_monoblock; // True if version 3-6 (single tile mode) // FFmpeg pipes for video and audio FILE *video_pipe; FILE *audio_pipe; pid_t ffmpeg_pid; // Audio buffer for TAD → PCMu8 conversion uint8_t *audio_buffer; size_t audio_buffer_size; size_t audio_buffer_used; } tav_decoder_t; //============================================================================= // Decoder Initialization and Cleanup //============================================================================= static tav_decoder_t* tav_decoder_init(const char *input_file, const char *output_file) { tav_decoder_t *decoder = calloc(1, sizeof(tav_decoder_t)); if (!decoder) return NULL; decoder->input_fp = fopen(input_file, "rb"); if (!decoder->input_fp) { free(decoder); return NULL; } // Read header if (fread(&decoder->header, sizeof(tav_header_t), 1, decoder->input_fp) != 1) { fclose(decoder->input_fp); free(decoder); return NULL; } // Verify magic if (memcmp(decoder->header.magic, TAV_MAGIC, 8) != 0) { fclose(decoder->input_fp); free(decoder); return NULL; } decoder->frame_size = decoder->header.width * decoder->header.height; decoder->is_monoblock = (decoder->header.version >= 3 && decoder->header.version <= 6); // Allocate buffers decoder->current_frame_rgb = calloc(decoder->frame_size * 3, 1); decoder->reference_frame_rgb = calloc(decoder->frame_size * 3, 1); decoder->dwt_buffer_y = calloc(decoder->frame_size, sizeof(float)); decoder->dwt_buffer_co = calloc(decoder->frame_size, sizeof(float)); decoder->dwt_buffer_cg = calloc(decoder->frame_size, sizeof(float)); decoder->reference_ycocg_y = calloc(decoder->frame_size, sizeof(float)); decoder->reference_ycocg_co = calloc(decoder->frame_size, sizeof(float)); decoder->reference_ycocg_cg = calloc(decoder->frame_size, sizeof(float)); // Audio buffer (32 KB should be enough for most audio packets) decoder->audio_buffer_size = 32768; decoder->audio_buffer = malloc(decoder->audio_buffer_size); decoder->audio_buffer_used = 0; // Create FFmpeg process for video encoding int video_pipe_fd[2], audio_pipe_fd[2]; if (pipe(video_pipe_fd) == -1 || pipe(audio_pipe_fd) == -1) { fprintf(stderr, "Failed to create pipes\n"); free(decoder->current_frame_rgb); free(decoder->reference_frame_rgb); free(decoder->dwt_buffer_y); free(decoder->dwt_buffer_co); free(decoder->dwt_buffer_cg); free(decoder->reference_ycocg_y); free(decoder->reference_ycocg_co); free(decoder->reference_ycocg_cg); free(decoder->audio_buffer); fclose(decoder->input_fp); free(decoder); return NULL; } decoder->ffmpeg_pid = fork(); if (decoder->ffmpeg_pid == -1) { fprintf(stderr, "Failed to fork FFmpeg process\n"); close(video_pipe_fd[0]); close(video_pipe_fd[1]); close(audio_pipe_fd[0]); close(audio_pipe_fd[1]); free(decoder->current_frame_rgb); free(decoder->reference_frame_rgb); free(decoder->dwt_buffer_y); free(decoder->dwt_buffer_co); free(decoder->dwt_buffer_cg); free(decoder->reference_ycocg_y); free(decoder->reference_ycocg_co); free(decoder->reference_ycocg_cg); free(decoder->audio_buffer); fclose(decoder->input_fp); free(decoder); return NULL; } else if (decoder->ffmpeg_pid == 0) { // Child process - FFmpeg close(video_pipe_fd[1]); // Close write end close(audio_pipe_fd[1]); char video_size[32]; char framerate[16]; snprintf(video_size, sizeof(video_size), "%dx%d", decoder->header.width, decoder->header.height); snprintf(framerate, sizeof(framerate), "%d", decoder->header.fps); // Redirect pipes to stdin dup2(video_pipe_fd[0], 3); // Video input on fd 3 dup2(audio_pipe_fd[0], 4); // Audio input on fd 4 close(video_pipe_fd[0]); close(audio_pipe_fd[0]); execl("/usr/bin/ffmpeg", "ffmpeg", "-f", "rawvideo", "-pixel_format", "rgb24", "-video_size", video_size, "-framerate", framerate, "-i", "pipe:3", // Video from fd 3 "-color_range", "2", // Note: Audio decoding not yet implemented, so we output video-only MKV "-c:v", "ffv1", // FFV1 codec "-level", "3", // FFV1 level 3 "-coder", "1", // Range coder "-context", "1", // Large context "-g", "1", // GOP size 1 (all I-frames) "-slices", "24", // 24 slices for threading "-slicecrc", "1", // CRC per slice "-pixel_format", "rgb24", // make FFmpeg encode to RGB "-color_range", "2", "-f", "matroska", // MKV container output_file, "-y", // Overwrite output "-v", "warning", // Minimal logging (char*)NULL); fprintf(stderr, "Failed to start FFmpeg\n"); exit(1); } else { // Parent process close(video_pipe_fd[0]); // Close read ends close(audio_pipe_fd[0]); decoder->video_pipe = fdopen(video_pipe_fd[1], "wb"); decoder->audio_pipe = fdopen(audio_pipe_fd[1], "wb"); if (!decoder->video_pipe || !decoder->audio_pipe) { fprintf(stderr, "Failed to open pipes for writing\n"); kill(decoder->ffmpeg_pid, SIGTERM); free(decoder->current_frame_rgb); free(decoder->reference_frame_rgb); free(decoder->dwt_buffer_y); free(decoder->dwt_buffer_co); free(decoder->dwt_buffer_cg); free(decoder->reference_ycocg_y); free(decoder->reference_ycocg_co); free(decoder->reference_ycocg_cg); free(decoder->audio_buffer); fclose(decoder->input_fp); free(decoder); return NULL; } } return decoder; } static void tav_decoder_free(tav_decoder_t *decoder) { if (!decoder) return; if (decoder->input_fp) fclose(decoder->input_fp); if (decoder->video_pipe) fclose(decoder->video_pipe); if (decoder->audio_pipe) fclose(decoder->audio_pipe); // Wait for FFmpeg to finish if (decoder->ffmpeg_pid > 0) { int status; waitpid(decoder->ffmpeg_pid, &status, 0); } free(decoder->current_frame_rgb); free(decoder->reference_frame_rgb); free(decoder->dwt_buffer_y); free(decoder->dwt_buffer_co); free(decoder->dwt_buffer_cg); free(decoder->reference_ycocg_y); free(decoder->reference_ycocg_co); free(decoder->reference_ycocg_cg); free(decoder->audio_buffer); free(decoder); } //============================================================================= // Frame Decoding Logic //============================================================================= static int decode_i_or_p_frame(tav_decoder_t *decoder, uint8_t packet_type, uint32_t packet_size) { // Variable declarations for cleanup uint8_t *compressed_data = NULL; uint8_t *decompressed_data = NULL; int16_t *quantized_y = NULL; int16_t *quantized_co = NULL; int16_t *quantized_cg = NULL; int decode_success = 1; // Assume success, set to 0 on error // Read and decompress frame data compressed_data = malloc(packet_size); if (!compressed_data) { fprintf(stderr, "Error: Failed to allocate %u bytes for compressed data\n", packet_size); decode_success = 0; goto write_frame; } if (fread(compressed_data, 1, packet_size, decoder->input_fp) != packet_size) { fprintf(stderr, "Error: Failed to read %u bytes of compressed frame data\n", packet_size); decode_success = 0; goto write_frame; } size_t decompressed_size = ZSTD_getFrameContentSize(compressed_data, packet_size); if (decompressed_size == ZSTD_CONTENTSIZE_ERROR || decompressed_size == ZSTD_CONTENTSIZE_UNKNOWN) { fprintf(stderr, "Warning: Could not determine decompressed size, using estimate\n"); decompressed_size = decoder->frame_size * 3 * sizeof(int16_t) + 1024; } decompressed_data = malloc(decompressed_size); if (!decompressed_data) { fprintf(stderr, "Error: Failed to allocate %zu bytes for decompressed data\n", decompressed_size); decode_success = 0; goto write_frame; } // Debug first 3 frames compression static int decomp_debug = 0; if (decomp_debug < 3) { fprintf(stderr, " [ZSTD frame %d] Compressed size: %u, buffer size: %zu\n", decomp_debug, packet_size, decompressed_size); fprintf(stderr, " [ZSTD frame %d] First 16 bytes of COMPRESSED data: ", decomp_debug); for (int i = 0; i < 16 && i < (int)packet_size; i++) { fprintf(stderr, "%02X ", compressed_data[i]); } fprintf(stderr, "\n"); } size_t actual_size = ZSTD_decompress(decompressed_data, decompressed_size, compressed_data, packet_size); if (ZSTD_isError(actual_size)) { fprintf(stderr, "Error: ZSTD decompression failed: %s\n", ZSTD_getErrorName(actual_size)); fprintf(stderr, " Compressed size: %u, Buffer size: %zu\n", packet_size, decompressed_size); decode_success = 0; goto write_frame; } if (decomp_debug < 3) { fprintf(stderr, " [ZSTD frame %d] Decompressed size: %zu\n", decomp_debug, actual_size); fprintf(stderr, " [ZSTD frame %d] First 16 bytes of DECOMPRESSED data: ", decomp_debug); for (int i = 0; i < 16 && i < (int)actual_size; i++) { fprintf(stderr, "%02X ", decompressed_data[i]); } fprintf(stderr, "\n"); decomp_debug++; } // Parse block data uint8_t *ptr = decompressed_data; uint8_t mode = *ptr++; uint8_t qy_override = *ptr++; uint8_t qco_override = *ptr++; uint8_t qcg_override = *ptr++; // IMPORTANT: Both header and override store QLUT indices, not values! // Override of 0 means "use header value" int qy = qy_override ? QLUT[qy_override] : QLUT[decoder->header.quantiser_y]; int qco = qco_override ? QLUT[qco_override] : QLUT[decoder->header.quantiser_co]; int qcg = qcg_override ? QLUT[qcg_override] : QLUT[decoder->header.quantiser_cg]; // Debug first few frames if (decoder->frame_count < 2) { fprintf(stderr, "Frame %d: mode=%d, Q: Y=%d, Co=%d, Cg=%d, decompressed=%zu bytes\n", decoder->frame_count, mode, qy, qco, qcg, actual_size); } if (mode == TAV_MODE_SKIP) { // Copy from reference frame memcpy(decoder->current_frame_rgb, decoder->reference_frame_rgb, decoder->frame_size * 3); } else { // Decode coefficients (use function-level variables for proper cleanup) int coeff_count = decoder->frame_size; quantized_y = calloc(coeff_count, sizeof(int16_t)); quantized_co = calloc(coeff_count, sizeof(int16_t)); quantized_cg = calloc(coeff_count, sizeof(int16_t)); if (!quantized_y || !quantized_co || !quantized_cg) { fprintf(stderr, "Error: Failed to allocate coefficient buffers\n"); decode_success = 0; goto write_frame; } // Use 2-bit map format (entropyCoder=0 / Twobit-map) postprocess_coefficients_twobit(ptr, coeff_count, quantized_y, quantized_co, quantized_cg); // Debug: Check first few coefficients if (decoder->frame_count == 32) { fprintf(stderr, " First 10 quantized Y coeffs: "); for (int i = 0; i < 10 && i < coeff_count; i++) { fprintf(stderr, "%d ", quantized_y[i]); } fprintf(stderr, "\n"); // Check for any large quantized values that should produce bright pixels int max_quant_y = 0; for (int i = 0; i < coeff_count; i++) { int abs_val = quantized_y[i] < 0 ? -quantized_y[i] : quantized_y[i]; if (abs_val > max_quant_y) max_quant_y = abs_val; } fprintf(stderr, " Max quantized Y coefficient: %d\n", max_quant_y); } // Dequantize (perceptual for versions 5-8, uniform for 1-4) const int is_perceptual = (decoder->header.version >= 5 && decoder->header.version <= 8); if (is_perceptual) { dequantize_dwt_subbands_perceptual(0, qy, quantized_y, decoder->dwt_buffer_y, decoder->header.width, decoder->header.height, decoder->header.decomp_levels, qy, 0, decoder->frame_count); // Debug: Check if values survived the function call if (decoder->frame_count == 32) { fprintf(stderr, " RIGHT AFTER dequantize_Y returns: first 5 values: %.1f %.1f %.1f %.1f %.1f\n", decoder->dwt_buffer_y[0], decoder->dwt_buffer_y[1], decoder->dwt_buffer_y[2], decoder->dwt_buffer_y[3], decoder->dwt_buffer_y[4]); } dequantize_dwt_subbands_perceptual(0, qy, quantized_co, decoder->dwt_buffer_co, decoder->header.width, decoder->header.height, decoder->header.decomp_levels, qco, 1, decoder->frame_count); dequantize_dwt_subbands_perceptual(0, qy, quantized_cg, decoder->dwt_buffer_cg, decoder->header.width, decoder->header.height, decoder->header.decomp_levels, qcg, 1, decoder->frame_count); } else { for (int i = 0; i < coeff_count; i++) { decoder->dwt_buffer_y[i] = quantized_y[i] * qy; decoder->dwt_buffer_co[i] = quantized_co[i] * qco; decoder->dwt_buffer_cg[i] = quantized_cg[i] * qcg; } } // Debug: Check dequantized values using correct subband layout if (decoder->frame_count == 32) { dwt_subband_info_t subbands[32]; const int subband_count = calculate_subband_layout(decoder->header.width, decoder->header.height, decoder->header.decomp_levels, subbands); // Find LL band (highest level, type 0) for (int s = 0; s < subband_count; s++) { if (subbands[s].level == decoder->header.decomp_levels && subbands[s].subband_type == 0) { fprintf(stderr, " LL band: level=%d, start=%d, count=%d\n", subbands[s].level, subbands[s].coeff_start, subbands[s].coeff_count); fprintf(stderr, " Reading LL first 5 from dwt_buffer_y[0-4]: %.1f %.1f %.1f %.1f %.1f\n", decoder->dwt_buffer_y[0], decoder->dwt_buffer_y[1], decoder->dwt_buffer_y[2], decoder->dwt_buffer_y[3], decoder->dwt_buffer_y[4]); // Find max in CORRECT LL band float max_ll = -999.0f; for (int i = 0; i < subbands[s].coeff_count; i++) { int idx = subbands[s].coeff_start + i; if (decoder->dwt_buffer_y[idx] > max_ll) max_ll = decoder->dwt_buffer_y[idx]; } fprintf(stderr, " Max LL coefficient BEFORE grain removal: %.1f\n", max_ll); break; } } } // Remove grain synthesis from Y channel (must happen after dequantization, before inverse DWT) remove_grain_synthesis_decoder(decoder->dwt_buffer_y, decoder->header.width, decoder->header.height, decoder->header.decomp_levels, decoder->frame_count, decoder->header.quantiser_y); // Debug: Check LL band AFTER grain removal if (decoder->frame_count == 32) { int ll_width = decoder->header.width; int ll_height = decoder->header.height; for (int l = 0; l < decoder->header.decomp_levels; l++) { ll_width = (ll_width + 1) / 2; ll_height = (ll_height + 1) / 2; } float max_ll = -999.0f; for (int i = 0; i < ll_width * ll_height; i++) { if (decoder->dwt_buffer_y[i] > max_ll) max_ll = decoder->dwt_buffer_y[i]; } fprintf(stderr, " Max LL coefficient AFTER grain removal: %.1f\n", max_ll); } // Apply inverse DWT with correct non-power-of-2 dimension handling // Note: quantized arrays freed at write_frame label apply_inverse_dwt_multilevel(decoder->dwt_buffer_y, decoder->header.width, decoder->header.height, decoder->header.decomp_levels, decoder->header.wavelet_filter); apply_inverse_dwt_multilevel(decoder->dwt_buffer_co, decoder->header.width, decoder->header.height, decoder->header.decomp_levels, decoder->header.wavelet_filter); apply_inverse_dwt_multilevel(decoder->dwt_buffer_cg, decoder->header.width, decoder->header.height, decoder->header.decomp_levels, decoder->header.wavelet_filter); // Debug: Check spatial domain values after IDWT if (decoder->frame_count == 32) { float max_y_spatial = -999.0f; for (int i = 0; i < decoder->frame_size; i++) { if (decoder->dwt_buffer_y[i] > max_y_spatial) max_y_spatial = decoder->dwt_buffer_y[i]; } fprintf(stderr, " Max Y in spatial domain AFTER IDWT: %.1f\n", max_y_spatial); } // Debug: Check spatial domain values after IDWT (original debug) if (decoder->frame_count < 1) { fprintf(stderr, " After IDWT - First 10 Y values: "); for (int i = 0; i < 10 && i < decoder->frame_size; i++) { fprintf(stderr, "%.1f ", decoder->dwt_buffer_y[i]); } fprintf(stderr, "\n"); fprintf(stderr, " Y range: min=%.1f, max=%.1f\n", decoder->dwt_buffer_y[0], decoder->dwt_buffer_y[decoder->frame_size-1]); } // Handle P-frame delta accumulation (in YCoCg float space) if (packet_type == TAV_PACKET_PFRAME && mode == TAV_MODE_DELTA) { for (int i = 0; i < decoder->frame_size; i++) { decoder->dwt_buffer_y[i] += decoder->reference_ycocg_y[i]; decoder->dwt_buffer_co[i] += decoder->reference_ycocg_co[i]; decoder->dwt_buffer_cg[i] += decoder->reference_ycocg_cg[i]; } } // Convert YCoCg-R/ICtCp to RGB const int is_ictcp = (decoder->header.version % 2 == 0); float max_y = -999, max_co = -999, max_cg = -999; int max_r = 0, max_g = 0, max_b = 0; for (int i = 0; i < decoder->frame_size; i++) { uint8_t r, g, b; if (is_ictcp) { ictcp_to_rgb(decoder->dwt_buffer_y[i], decoder->dwt_buffer_co[i], decoder->dwt_buffer_cg[i], &r, &g, &b); } else { ycocg_r_to_rgb(decoder->dwt_buffer_y[i], decoder->dwt_buffer_co[i], decoder->dwt_buffer_cg[i], &r, &g, &b); } // Track max values for debugging if (decoder->frame_count == 1000) { if (decoder->dwt_buffer_y[i] > max_y) max_y = decoder->dwt_buffer_y[i]; if (decoder->dwt_buffer_co[i] > max_co) max_co = decoder->dwt_buffer_co[i]; if (decoder->dwt_buffer_cg[i] > max_cg) max_cg = decoder->dwt_buffer_cg[i]; if (r > max_r) max_r = r; if (g > max_g) max_g = g; if (b > max_b) max_b = b; } // RGB byte order for FFmpeg rgb24 decoder->current_frame_rgb[i * 3 + 0] = r; decoder->current_frame_rgb[i * 3 + 1] = g; decoder->current_frame_rgb[i * 3 + 2] = b; } if (decoder->frame_count == 1000) { fprintf(stderr, "\n=== Frame 1000 Value Analysis ===\n"); fprintf(stderr, "Max YCoCg values: Y=%.1f, Co=%.1f, Cg=%.1f\n", max_y, max_co, max_cg); fprintf(stderr, "Max RGB values: R=%d, G=%d, B=%d\n", max_r, max_g, max_b); } // Debug: Check RGB output if (decoder->frame_count < 1) { fprintf(stderr, " First 5 pixels RGB: "); for (int i = 0; i < 5 && i < decoder->frame_size; i++) { fprintf(stderr, "(%d,%d,%d) ", decoder->current_frame_rgb[i*3], decoder->current_frame_rgb[i*3+1], decoder->current_frame_rgb[i*3+2]); } fprintf(stderr, "\n"); } // Update reference YCoCg frame memcpy(decoder->reference_ycocg_y, decoder->dwt_buffer_y, decoder->frame_size * sizeof(float)); memcpy(decoder->reference_ycocg_co, decoder->dwt_buffer_co, decoder->frame_size * sizeof(float)); memcpy(decoder->reference_ycocg_cg, decoder->dwt_buffer_cg, decoder->frame_size * sizeof(float)); } // Update reference frame memcpy(decoder->reference_frame_rgb, decoder->current_frame_rgb, decoder->frame_size * 3); write_frame: // Clean up temporary allocations if (compressed_data) free(compressed_data); if (decompressed_data) free(decompressed_data); if (quantized_y) free(quantized_y); if (quantized_co) free(quantized_co); if (quantized_cg) free(quantized_cg); // If decoding failed, fill frame with black to maintain stream alignment if (!decode_success) { memset(decoder->current_frame_rgb, 0, decoder->frame_size * 3); fprintf(stderr, "Warning: Writing black frame %d due to decode error\n", decoder->frame_count); } // Write frame to video pipe with retry on partial writes (ALWAYS write to maintain alignment) size_t bytes_to_write = decoder->frame_size * 3; size_t total_written = 0; const uint8_t *write_ptr = decoder->current_frame_rgb; while (total_written < bytes_to_write) { size_t bytes_written = fwrite(write_ptr + total_written, 1, bytes_to_write - total_written, decoder->video_pipe); if (bytes_written == 0) { if (ferror(decoder->video_pipe)) { fprintf(stderr, "Error: Pipe write error at frame %d (wrote %zu/%zu bytes) - aborting\n", decoder->frame_count, total_written, bytes_to_write); // Cannot maintain stream alignment if pipe is broken - this is fatal return -1; } // Pipe might be full, flush and retry fflush(decoder->video_pipe); usleep(1000); // 1ms delay } else { total_written += bytes_written; } } // Ensure data is flushed to FFmpeg if (fflush(decoder->video_pipe) != 0) { fprintf(stderr, "Error: Failed to flush video pipe at frame %d - aborting\n", decoder->frame_count); // Cannot maintain stream alignment if pipe is broken - this is fatal return -1; } decoder->frame_count++; // Return success only if decoding succeeded; still return 1 to continue processing // (we wrote a frame either way to maintain stream alignment) return decode_success ? 1 : 1; // Always return 1 to continue, errors are non-fatal now } //============================================================================= // Main Decoding Loop //============================================================================= static void print_usage(const char *prog) { printf("TAV Decoder - Converts TAV video to FFV1+PCMu8 in MKV container\n"); printf("Version: %s\n\n", DECODER_VENDOR_STRING); printf("Usage: %s -i input.tav -o output.mkv\n\n", prog); printf("Options:\n"); printf(" -i Input TAV file\n"); printf(" -o Output MKV file (FFV1 video + PCMu8 audio)\n"); printf(" -v Verbose output\n"); printf(" -h, --help Show this help\n\n"); printf("Supported features (matches TSVM decoder):\n"); printf(" - I-frames and P-frames (delta mode)\n"); printf(" - GOP unified 3D DWT (temporal compression)\n"); printf(" - TAD audio (decoded to PCMu8)\n"); printf(" - MP2 audio (passed through)\n"); printf(" - All wavelet types (5/3, 9/7, CDF 13/7, DD-4, Haar)\n"); printf(" - Perceptual quantization (versions 5-8)\n"); printf(" - YCoCg-R and ICtCp color spaces\n\n"); printf("Unsupported features (not in TSVM decoder):\n"); printf(" - MC-EZBC motion compensation\n"); printf(" - MPEG-style residual coding (P/B-frames)\n"); printf(" - Adaptive block partitioning\n\n"); } int main(int argc, char *argv[]) { char *input_file = NULL; char *output_file = NULL; int verbose = 0; static struct option long_options[] = { {"help", no_argument, 0, 'h'}, {0, 0, 0, 0} }; int opt; while ((opt = getopt_long(argc, argv, "i:o:vh", long_options, NULL)) != -1) { switch (opt) { case 'i': input_file = optarg; break; case 'o': output_file = optarg; break; case 'v': verbose = 1; break; case 'h': print_usage(argv[0]); return 0; default: print_usage(argv[0]); return 1; } } if (!input_file || !output_file) { fprintf(stderr, "Error: Both input and output files are required\n\n"); print_usage(argv[0]); return 1; } tav_decoder_t *decoder = tav_decoder_init(input_file, output_file); if (!decoder) { fprintf(stderr, "Failed to initialize decoder\n"); return 1; } if (verbose) { printf("TAV Decoder - %dx%d @ %dfps\n", decoder->header.width, decoder->header.height, decoder->header.fps); printf("Wavelet: %s, Levels: %d\n", decoder->header.wavelet_filter == 0 ? "5/3" : decoder->header.wavelet_filter == 1 ? "9/7" : decoder->header.wavelet_filter == 2 ? "CDF 13/7" : decoder->header.wavelet_filter == 16 ? "DD-4" : decoder->header.wavelet_filter == 255 ? "Haar" : "Unknown", decoder->header.decomp_levels); printf("Version: %d (%s, %s)\n", decoder->header.version, decoder->header.version % 2 == 0 ? "ICtCp" : "YCoCg-R", decoder->is_monoblock ? "monoblock" : "tiled"); printf("Output: %s (FFV1 level 3 + PCMu8 @ 32 KHz)\n", output_file); } // Main decoding loop int result = 1; int total_packets = 0; int iframe_count = 0; while (result > 0) { uint8_t packet_type; if (fread(&packet_type, 1, 1, decoder->input_fp) != 1) { result = 0; // EOF break; } total_packets++; // Handle sync packets (no size field) if (packet_type == TAV_PACKET_SYNC || packet_type == TAV_PACKET_SYNC_NTSC) { if (verbose && total_packets < 20) { fprintf(stderr, "Packet %d: SYNC (0x%02X)\n", total_packets, packet_type); } continue; } // Handle timecode packets (no size field, just 8 bytes of uint64 timecode) if (packet_type == TAV_PACKET_TIMECODE) { uint64_t timecode_ns; if (fread(&timecode_ns, 8, 1, decoder->input_fp) != 1) { fprintf(stderr, "Error: Failed to read timecode\n"); result = -1; break; } if (verbose && total_packets < 20) { double timecode_sec = timecode_ns / 1000000000.0; fprintf(stderr, "Packet %d: TIMECODE (0x%02X) - %.6f seconds\n", total_packets, packet_type, timecode_sec); } continue; } // Handle GOP sync packets (no size field, just 1 byte frame count) if (packet_type == TAV_PACKET_GOP_SYNC) { uint8_t frame_count; if (fread(&frame_count, 1, 1, decoder->input_fp) != 1) { fprintf(stderr, "Error: Failed to read GOP sync frame count\n"); result = -1; break; } if (verbose) { fprintf(stderr, "Packet %d: GOP_SYNC (0x%02X) - %u frames from GOP\n", total_packets, packet_type, frame_count); } // Frame count is informational only for now continue; } // Handle GOP unified packets (custom format: 1-byte gop_size + 4-byte compressed_size) if (packet_type == TAV_PACKET_GOP_UNIFIED) { uint8_t gop_size; uint32_t compressed_size; if (fread(&gop_size, 1, 1, decoder->input_fp) != 1 || fread(&compressed_size, 4, 1, decoder->input_fp) != 1) { fprintf(stderr, "Error: Failed to read GOP unified packet header\n"); result = -1; break; } if (verbose && total_packets < 20) { fprintf(stderr, "Packet %d: GOP_UNIFIED (0x%02X), %u frames, %u bytes - skipping\n", total_packets, packet_type, gop_size, compressed_size); } // Skip GOP data for now fseek(decoder->input_fp, compressed_size, SEEK_CUR); fprintf(stderr, "\nWarning: GOP unified packets not yet implemented (skipping %u frames)\n", gop_size); continue; } // Handle TAD audio packets (custom format: 2-byte sample_count + 4-byte payload_size) if (packet_type == TAV_PACKET_AUDIO_TAD) { uint16_t sample_count; uint32_t payload_size; if (fread(&sample_count, 2, 1, decoder->input_fp) != 1 || fread(&payload_size, 4, 1, decoder->input_fp) != 1) { fprintf(stderr, "\nError: Failed to read TAD packet header\n"); result = -1; break; } if (verbose && total_packets < 20) { fprintf(stderr, "Packet %d: TAD (0x%02X), %u samples, %u payload bytes - skipping\n", total_packets, packet_type, sample_count, payload_size); } // Skip TAD data for now fseek(decoder->input_fp, payload_size, SEEK_CUR); fprintf(stderr, "\nWarning: TAD audio decoding not yet fully implemented (skipping %u samples)\n", sample_count); continue; } // Handle extended header (has 2-byte count, not 4-byte size) if (packet_type == TAV_PACKET_EXTENDED_HDR) { uint16_t num_pairs; if (fread(&num_pairs, 2, 1, decoder->input_fp) != 1) { fprintf(stderr, "Error: Failed to read extended header count\n"); result = -1; break; } if (verbose && total_packets < 20) { fprintf(stderr, "Packet %d: EXTENDED_HDR (0x%02X), %u pairs - skipping\n", total_packets, packet_type, num_pairs); } // Skip the key-value pairs // Format: each pair is [4-byte key][1-byte type][N-byte value] // We need to parse each pair to know its size for (int i = 0; i < num_pairs; i++) { uint8_t key[4]; uint8_t value_type; if (fread(key, 1, 4, decoder->input_fp) != 4 || fread(&value_type, 1, 1, decoder->input_fp) != 1) { fprintf(stderr, "Error: Failed to read extended header pair %d\n", i); result = -1; break; } // Determine value size based on type size_t value_size = 0; switch (value_type) { case 0x00: value_size = 2; break; // Int16 case 0x01: value_size = 3; break; // Int24 case 0x02: value_size = 4; break; // Int32 case 0x03: value_size = 6; break; // Int48 case 0x04: value_size = 8; break; // Int64 case 0x10: { // Bytes with 2-byte length prefix uint16_t str_len; if (fread(&str_len, 2, 1, decoder->input_fp) != 1) { fprintf(stderr, "Error: Failed to read string length\n"); result = -1; break; } value_size = str_len; break; } default: fprintf(stderr, "Warning: Unknown extended header value type 0x%02X\n", value_type); break; } // Skip the value if (value_size > 0) { fseek(decoder->input_fp, value_size, SEEK_CUR); } } if (result < 0) break; continue; } // Read packet size (for remaining packet types with standard format) uint32_t packet_size; if (fread(&packet_size, 4, 1, decoder->input_fp) != 1) { fprintf(stderr, "Error: Failed to read packet size at packet %d (type 0x%02X)\n", total_packets, packet_type); result = -1; break; } if (verbose && total_packets < 20) { fprintf(stderr, "Packet %d: Type 0x%02X, Size %u bytes\n", total_packets, packet_type, packet_size); } switch (packet_type) { case TAV_PACKET_IFRAME: case TAV_PACKET_PFRAME: iframe_count++; if (verbose && iframe_count <= 5) { fprintf(stderr, "Processing %s (packet %d, size %u bytes)...\n", packet_type == TAV_PACKET_IFRAME ? "I-frame" : "P-frame", total_packets, packet_size); } result = decode_i_or_p_frame(decoder, packet_type, packet_size); if (result < 0) { fprintf(stderr, "Error: Frame decoding failed at frame %d\n", decoder->frame_count); break; } if (verbose && decoder->frame_count % 100 == 0) { printf("Decoded frame %d\r", decoder->frame_count); fflush(stdout); } break; case TAV_PACKET_AUDIO_MP2: case TAV_PACKET_AUDIO_PCM8: case TAV_PACKET_AUDIO_TRACK: // Skip audio for now fseek(decoder->input_fp, packet_size, SEEK_CUR); break; case TAV_PACKET_SUBTITLE: // Skip subtitle packets fseek(decoder->input_fp, packet_size, SEEK_CUR); break; case TAV_PACKET_PFRAME_RESIDUAL: case TAV_PACKET_BFRAME_RESIDUAL: fprintf(stderr, "\nError: Unsupported packet type 0x%02X (MPEG-style motion compensation not supported)\n", packet_type); result = -1; break; default: fprintf(stderr, "\nWarning: Unknown packet type 0x%02X (skipping)\n", packet_type); fseek(decoder->input_fp, packet_size, SEEK_CUR); break; } } if (verbose) { printf("\nDecoded %d frames\n", decoder->frame_count); } tav_decoder_free(decoder); if (result < 0) { fprintf(stderr, "Decoding error occurred\n"); return 1; } printf("Successfully decoded to: %s\n", output_file); return 0; }