diff --git a/video_encoder/decoder_tav.c b/video_encoder/decoder_tav.c index 012520c..7f30cde 100644 --- a/video_encoder/decoder_tav.c +++ b/video_encoder/decoder_tav.c @@ -12,6 +12,7 @@ #include #include #include +#include #define DECODER_VENDOR_STRING "Decoder-TAV 20251103 (ffv1+pcmu8)" @@ -255,10 +256,10 @@ static void dequantize_dwt_subbands_perceptual(int q_index, int q_y_global, cons const int coeff_count = width * height; memset(dequantized, 0, coeff_count * sizeof(float)); - int is_debug = (frame_num == 32); - if (frame_num == 32) { - fprintf(stderr, "DEBUG: dequantize called for frame %d, is_chroma=%d\n", frame_num, is_chroma); - } + int is_debug = 0;//(frame_num == 32); +// if (frame_num == 32) { +// fprintf(stderr, "DEBUG: dequantize called for frame %d, is_chroma=%d\n", frame_num, is_chroma); +// } // Apply perceptual weighting to each subband for (int s = 0; s < subband_count; s++) { @@ -394,6 +395,412 @@ static void remove_grain_synthesis_decoder(float *coeffs, int width, int height, } } +//============================================================================= +static int calculate_dwt_levels(int chunk_size) { + /*if (chunk_size < TAD_MIN_CHUNK_SIZE) { + fprintf(stderr, "Error: Chunk size %d is below minimum %d\n", chunk_size, TAD_MIN_CHUNK_SIZE); + return -1; + } + + // Calculate levels: log2(chunk_size) - 1 + int levels = 0; + int size = chunk_size; + while (size > 1) { + size >>= 1; + levels++; + } + return levels - 2;*/ + return 9; +} + +//============================================================================= +// Haar DWT Implementation (inverse only needed for decoder) +//============================================================================= + +// Forward declaration (defined later in TAV decoder section) +static void dwt_97_inverse_1d(float *data, int length); + +static void dwt_inverse_multilevel(float *data, int length, int levels) { + // Pre-calculate all intermediate lengths used during forward transform + // Forward uses: data[0..length-1], then data[0..(length+1)/2-1], etc. + int *lengths = malloc((levels + 1) * sizeof(int)); + lengths[0] = length; + for (int i = 1; i <= levels; i++) { + lengths[i] = (lengths[i - 1] + 1) / 2; + } + + // Inverse transform: apply inverse DWT using exact forward lengths in reverse order + // Forward applied DWT with lengths: [length, (length+1)/2, ((length+1)/2+1)/2, ...] + // Inverse must use same lengths in reverse: [..., ((length+1)/2+1)/2, (length+1)/2, length] + for (int level = levels - 1; level >= 0; level--) { + int current_length = lengths[level]; +// dwt_haar_inverse_1d(data, current_length); // THEN apply inverse +// dwt_dd4_inverse_1d(data, current_length); // THEN apply inverse + dwt_97_inverse_1d(data, current_length); // THEN apply inverse + } + + free(lengths); +} + +//============================================================================= +// Helper Functions for TAD Decoder +//============================================================================= + +static inline float FCLAMP(float x, float min, float max) { + return x < min ? min : (x > max ? max : x); +} + +//============================================================================= +// M/S Stereo Correlation (inverse of decorrelation) +//============================================================================= + +// Uniform random in [0, 1) +static inline float frand01(void) { + return (float)rand() / ((float)RAND_MAX + 1.0f); +} + +// TPDF noise in [-1, +1) +static inline float tpdf1(void) { + return (frand01() - frand01()); +} + +static void ms_correlate(const float *mid, const float *side, float *left, float *right, size_t count) { + for (size_t i = 0; i < count; i++) { + // Decode M/S → L/R + float m = mid[i]; + float s = side[i]; + left[i] = FCLAMP((m + s), -1.0f, 1.0f); + right[i] = FCLAMP((m - s), -1.0f, 1.0f); + } +} + +static float signum(float x) { + if (x > 0.0f) return 1.0f; + if (x < 0.0f) return -1.0f; + return 0.0f; +} + +static void expand_gamma(float *left, float *right, size_t count) { + for (size_t i = 0; i < count; i++) { + // decode(y) = sign(y) * |y|^(1/γ) where γ=0.5 + float x = left[i]; float a = fabsf(x); + left[i] = signum(x) * powf(a, 1.4142f); + float y = right[i]; float b = fabsf(y); + right[i] = signum(y) * powf(b, 1.4142f); + } +} + +static void expand_mu_law(float *left, float *right, size_t count) { + static float MU = 255.0f; + + for (size_t i = 0; i < count; i++) { + // decode(y) = sign(y) * |y|^(1/γ) where γ=0.5 + float x = left[i]; + left[i] = signum(x) * (powf(1.0f + MU, fabsf(x)) - 1.0f) / MU; + float y = right[i]; + right[i] = signum(y) * (powf(1.0f + MU, fabsf(y)) - 1.0f) / MU; + } +} + +static void pcm32f_to_pcm8(const float *fleft, const float *fright, uint8_t *left, uint8_t *right, size_t count, float dither_error[2][2]) { + const float b1 = 1.5f; // 1st feedback coefficient + const float b2 = -0.75f; // 2nd feedback coefficient + const float scale = 127.5f; + const float bias = 128.0f; + + // Reduced dither amplitude to coordinate with coefficient-domain dithering + // The decoder now adds TPDF dither in coefficient domain, so we reduce + // sample-domain dither by ~60% to avoid doubling the noise floor + const float dither_scale = 0.2f; // Reduced from 0.5 (was ±0.5 LSB, now ±0.2 LSB) + + for (size_t i = 0; i < count; i++) { + // --- LEFT channel --- + float feedbackL = b1 * dither_error[0][0] + b2 * dither_error[0][1]; + float ditherL = dither_scale * tpdf1(); // Reduced TPDF dither + float shapedL = fleft[i] + feedbackL + ditherL / scale; + shapedL = FCLAMP(shapedL, -1.0f, 1.0f); + + int qL = (int)lrintf(shapedL * scale); + if (qL < -128) qL = -128; + else if (qL > 127) qL = 127; + left[i] = (uint8_t)(qL + bias); + + float qerrL = shapedL - (float)qL / scale; + dither_error[0][1] = dither_error[0][0]; // shift history + dither_error[0][0] = qerrL; + + // --- RIGHT channel --- + float feedbackR = b1 * dither_error[1][0] + b2 * dither_error[1][1]; + float ditherR = dither_scale * tpdf1(); // Reduced TPDF dither + float shapedR = fright[i] + feedbackR + ditherR / scale; + shapedR = FCLAMP(shapedR, -1.0f, 1.0f); + + int qR = (int)lrintf(shapedR * scale); + if (qR < -128) qR = -128; + else if (qR > 127) qR = 127; + right[i] = (uint8_t)(qR + bias); + + float qerrR = shapedR - (float)qR / scale; + dither_error[1][1] = dither_error[1][0]; + dither_error[1][0] = qerrR; + } +} + +//============================================================================= +// TAD (Terrarum Advanced Audio) Decoder - Constants and Helpers +//============================================================================= + +// Coefficient scalars for each subband (CDF 9/7 with 9 decomposition levels) +static const float TAD32_COEFF_SCALARS[] = {64.0f, 45.255f, 32.0f, 22.627f, 16.0f, 11.314f, 8.0f, 5.657f, 4.0f, 2.828f}; + +// Base quantiser weight table (10 subbands: LL + 9 H bands) +static const float BASE_QUANTISER_WEIGHTS[] = { + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.1f, 1.2f, 1.3f, 1.4f, 1.5f +}; + +//============================================================================= +// Spectral Interpolation for Coefficient Reconstruction (TAD) +//============================================================================= + +// Fast PRNG for light dithering (xorshift32) +static inline uint32_t xorshift32(uint32_t *s) { + uint32_t x = *s; + x ^= x << 13; + x ^= x >> 17; + x ^= x << 5; + return *s = x; +} + +static inline float urand(uint32_t *s) { + return (xorshift32(s) & 0xFFFFFF) / 16777216.0f; +} + +static inline float tpdf_tad(uint32_t *s) { + return urand(s) - urand(s); +} + +// Compute RMS energy of a coefficient band +static float compute_band_rms(const float *c, size_t len) { + if (len == 0) return 0.0f; + double sumsq = 0.0; + for (size_t i = 0; i < len; i++) { + sumsq += (double)c[i] * c[i]; + } + return sqrtf((float)(sumsq / (double)len)); +} + +// Simplified spectral reconstruction for wavelet coefficients +static void spectral_interpolate_band(float *c, size_t len, float Q, float lower_band_rms) { + if (len < 4) return; + + uint32_t seed = 0x9E3779B9u ^ (uint32_t)len ^ (uint32_t)(Q * 65536.0f); + const float dither_amp = 0.05f * Q; + + for (size_t i = 0; i < len; i++) { + c[i] += tpdf_tad(&seed) * dither_amp; + } + + (void)lower_band_rms; +} + +//============================================================================= +// Dequantization (inverse of quantization) +//============================================================================= + + +#define LAMBDA_FIXED 6.0f + +// Lambda-based decompanding decoder (inverse of Laplacian CDF-based encoder) +// Converts quantized index back to normalized float in [-1, 1] +static float lambda_decompanding(int8_t quant_val, int max_index) { + // Handle zero + if (quant_val == 0) { + return 0.0f; + } + + int sign = (quant_val < 0) ? -1 : 1; + int abs_index = abs(quant_val); + + // Clamp to valid range + if (abs_index > max_index) abs_index = max_index; + + // Map index back to normalized CDF [0, 1] + float normalized_cdf = (float)abs_index / max_index; + + // Map from [0, 1] back to [0.5, 1.0] (CDF range for positive half) + float cdf = 0.5f + normalized_cdf * 0.5f; + + // Inverse Laplacian CDF for x >= 0: x = -(1/λ) * ln(2*(1-F)) + // For F in [0.5, 1.0]: x = -(1/λ) * ln(2*(1-F)) + float abs_val = -(1.0f / LAMBDA_FIXED) * logf(2.0f * (1.0f - cdf)); + + // Clamp to [0, 1] + if (abs_val > 1.0f) abs_val = 1.0f; + if (abs_val < 0.0f) abs_val = 0.0f; + + return sign * abs_val; +} + +static void dequantize_dwt_coefficients(const int8_t *quantized, float *coeffs, size_t count, int chunk_size, int dwt_levels, int max_index, float quantiser_scale) { + + // Calculate sideband boundaries dynamically + int first_band_size = chunk_size >> dwt_levels; + + int *sideband_starts = malloc((dwt_levels + 2) * sizeof(int)); + sideband_starts[0] = 0; + sideband_starts[1] = first_band_size; + for (int i = 2; i <= dwt_levels + 1; i++) { + sideband_starts[i] = sideband_starts[i-1] + (first_band_size << (i-2)); + } + + // Step 1: Dequantize all coefficients (no dithering yet) + for (size_t i = 0; i < count; i++) { + int sideband = dwt_levels; + for (int s = 0; s <= dwt_levels; s++) { + if (i < sideband_starts[s + 1]) { + sideband = s; + break; + } + } + + // Decode using lambda companding + float normalized_val = lambda_decompanding(quantized[i], max_index); + + // Denormalize using the subband scalar and apply base weight + quantiser scaling + float weight = BASE_QUANTISER_WEIGHTS[sideband] * quantiser_scale; + coeffs[i] = normalized_val * TAD32_COEFF_SCALARS[sideband] * weight; + } + + // Step 2: Apply spectral interpolation per band + // Process bands from high to low frequency (dwt_levels down to 0) + // so we can use lower bands' RMS for higher band reconstruction + float prev_band_rms = 0.0f; + + for (int band = dwt_levels; band >= 0; band--) { + size_t band_start = sideband_starts[band]; + size_t band_end = sideband_starts[band + 1]; + size_t band_len = band_end - band_start; + + // Calculate quantization step Q for this band + float weight = BASE_QUANTISER_WEIGHTS[band] * quantiser_scale; + float scalar = TAD32_COEFF_SCALARS[band] * weight; + float Q = scalar / max_index; + + // Apply spectral interpolation to this band + spectral_interpolate_band(&coeffs[band_start], band_len, Q, prev_band_rms); + + // Compute RMS for this band to use as reference for next (lower frequency) band + prev_band_rms = compute_band_rms(&coeffs[band_start], band_len); + } + + free(sideband_starts); +} + +//============================================================================= +// Chunk Decoding +//============================================================================= + +static int decode_chunk(const uint8_t *input, size_t input_size, uint8_t *pcmu8_stereo, + size_t *bytes_consumed, size_t *samples_decoded) { + const uint8_t *read_ptr = input; + + // Read chunk header + uint16_t sample_count = *((const uint16_t*)read_ptr); + read_ptr += sizeof(uint16_t); + + uint8_t max_index = *read_ptr; + read_ptr += sizeof(uint8_t); + + uint32_t payload_size = *((const uint32_t*)read_ptr); + read_ptr += sizeof(uint32_t); + + // Calculate DWT levels from sample count + int dwt_levels = calculate_dwt_levels(sample_count); + if (dwt_levels < 0) { + fprintf(stderr, "Error: Invalid sample count %u\n", sample_count); + return -1; + } + + // Decompress if needed + const uint8_t *payload; + uint8_t *decompressed = NULL; + + // Estimate decompressed size (generous upper bound) + size_t decompressed_size = sample_count * 4 * sizeof(int8_t); + decompressed = malloc(decompressed_size); + + size_t actual_size = ZSTD_decompress(decompressed, decompressed_size, read_ptr, payload_size); + + if (ZSTD_isError(actual_size)) { + fprintf(stderr, "Error: Zstd decompression failed: %s\n", ZSTD_getErrorName(actual_size)); + free(decompressed); + return -1; + } + + read_ptr += payload_size; + *bytes_consumed = read_ptr - input; + *samples_decoded = sample_count; + + // Allocate working buffers + int8_t *quant_mid = malloc(sample_count * sizeof(int8_t)); + int8_t *quant_side = malloc(sample_count * sizeof(int8_t)); + float *dwt_mid = malloc(sample_count * sizeof(float)); + float *dwt_side = malloc(sample_count * sizeof(float)); + float *pcm32_left = malloc(sample_count * sizeof(float)); + float *pcm32_right = malloc(sample_count * sizeof(float)); + uint8_t *pcm8_left = malloc(sample_count * sizeof(uint8_t)); + uint8_t *pcm8_right = malloc(sample_count * sizeof(uint8_t)); + + // Separate Mid/Side + memcpy(quant_mid, decompressed, sample_count); + memcpy(quant_side, decompressed + sample_count, sample_count); + + // Debug: Check if we have non-zero coefficients +// static int debug_coeff_count = 0; +// if (debug_coeff_count < 3) { +// int nonzero_mid = 0, nonzero_side = 0; +// for (int i = 0; i < sample_count; i++) { +// if (quant_mid[i] != 0) nonzero_mid++; +// if (quant_side[i] != 0) nonzero_side++; +// } +// debug_coeff_count++; +// } + + // Dequantize with quantiser scaling and spectral interpolation + // Use quantiser_scale = 1.0f for baseline (must match encoder) + float quantiser_scale = 1.0f; + dequantize_dwt_coefficients(quant_mid, dwt_mid, sample_count, sample_count, dwt_levels, max_index, quantiser_scale); + dequantize_dwt_coefficients(quant_side, dwt_side, sample_count, sample_count, dwt_levels, max_index, quantiser_scale); + + // Inverse DWT + dwt_inverse_multilevel(dwt_mid, sample_count, dwt_levels); + dwt_inverse_multilevel(dwt_side, sample_count, dwt_levels); + + float err[2][2] = {{0,0},{0,0}}; + + // M/S to L/R correlation + ms_correlate(dwt_mid, dwt_side, pcm32_left, pcm32_right, sample_count); + + // expand dynamic range + expand_gamma(pcm32_left, pcm32_right, sample_count); + + // dither to 8-bit + pcm32f_to_pcm8(pcm32_left, pcm32_right, pcm8_left, pcm8_right, sample_count, err); + + // Interleave stereo output (PCMu8) + for (size_t i = 0; i < sample_count; i++) { + pcmu8_stereo[i * 2] = pcm8_left[i]; + pcmu8_stereo[i * 2 + 1] = pcm8_right[i]; + } + + // Cleanup + free(quant_mid); free(quant_side); free(dwt_mid); free(dwt_side); + free(pcm32_left); free(pcm32_right); free(pcm8_left); free(pcm8_right); + if (decompressed) free(decompressed); + + return 0; +} + //============================================================================= // Significance Map Postprocessing (matches TSVM exactly) //============================================================================= @@ -637,18 +1044,18 @@ static void decode_channel_ezbc(const uint8_t *ezbc_data, size_t offset, size_t ezbc_bitreader_t reader = {ezbc_data, offset + size, offset, 0}; // Debug: Print first few bytes - fprintf(stderr, "[EZBC] Channel decode: offset=%zu, size=%zu, first 5 bytes: %02X %02X %02X %02X %02X\n", - offset, size, - ezbc_data[offset], ezbc_data[offset+1], ezbc_data[offset+2], - ezbc_data[offset+3], ezbc_data[offset+4]); +// fprintf(stderr, "[EZBC] Channel decode: offset=%zu, size=%zu, first 5 bytes: %02X %02X %02X %02X %02X\n", +// offset, size, +// ezbc_data[offset], ezbc_data[offset+1], ezbc_data[offset+2], +// ezbc_data[offset+3], ezbc_data[offset+4]); // Read header: MSB bitplane (8 bits), width (16 bits), height (16 bits) const int msb_bitplane = ezbc_read_bits(&reader, 8); const int width = ezbc_read_bits(&reader, 16); const int height = ezbc_read_bits(&reader, 16); - fprintf(stderr, "[EZBC] Decoded header: MSB=%d, width=%d, height=%d (expected pixels=%d)\n", - msb_bitplane, width, height, expected_count); +// fprintf(stderr, "[EZBC] Decoded header: MSB=%d, width=%d, height=%d (expected pixels=%d)\n", +// msb_bitplane, width, height, expected_count); if (width * height != expected_count) { fprintf(stderr, "EZBC dimension mismatch: %dx%d != %d\n", width, height, expected_count); @@ -742,8 +1149,8 @@ static void decode_channel_ezbc(const uint8_t *ezbc_data, size_t offset, size_t if (output[i] < min_val) min_val = output[i]; } } - fprintf(stderr, "[EZBC] Decoded %d non-zero coeffs (%.1f%%), range: [%d, %d]\n", - nonzero_count, 100.0 * nonzero_count / expected_count, min_val, max_val); +// fprintf(stderr, "[EZBC] Decoded %d non-zero coeffs (%.1f%%), range: [%d, %d]\n", +// nonzero_count, 100.0 * nonzero_count / expected_count, min_val, max_val); } // EZBC postprocessing for single frames @@ -799,18 +1206,19 @@ static void dwt_97_inverse_1d(float *data, int length) { if (length < 2) return; // Debug: Check if input has non-zero values - static int call_count = 0; - if (call_count < 5) { - int nonzero = 0; - for (int i = 0; i < length; i++) { - if (data[i] != 0.0f) nonzero++; - } - fprintf(stderr, " dwt_97_inverse_1d call #%d: length=%d, nonzero=%d, first 5: %.1f %.1f %.1f %.1f %.1f\n", - call_count, length, nonzero, - data[0], length > 1 ? data[1] : 0.0f, length > 2 ? data[2] : 0.0f, - length > 3 ? data[3] : 0.0f, length > 4 ? data[4] : 0.0f); - call_count++; - } +// static int call_count = 0; +// if (call_count < 5) { +// Debug: count non-zero coefficients (disabled to reduce stderr output) +// int nonzero = 0; +// for (int i = 0; i < length; i++) { +// if (data[i] != 0.0f) nonzero++; +// } +// fprintf(stderr, " dwt_97_inverse_1d call #%d: length=%d, nonzero=%d, first 5: %.1f %.1f %.1f %.1f %.1f\n", +// call_count, length, nonzero, +// data[0], length > 1 ? data[1] : 0.0f, length > 2 ? data[2] : 0.0f, +// length > 3 ? data[3] : 0.0f, length > 4 ? data[4] : 0.0f); +// call_count++; +// } float *temp = malloc(length * sizeof(float)); int half = (length + 1) / 2; @@ -890,17 +1298,17 @@ static void dwt_97_inverse_1d(float *data, int length) { } } - // Debug: Check output - if (call_count <= 5) { - int nonzero_out = 0; - for (int i = 0; i < length; i++) { - if (data[i] != 0.0f) nonzero_out++; - } - fprintf(stderr, " -> OUTPUT: nonzero=%d, first 5: %.1f %.1f %.1f %.1f %.1f\n", - nonzero_out, - data[0], length > 1 ? data[1] : 0.0f, length > 2 ? data[2] : 0.0f, - length > 3 ? data[3] : 0.0f, length > 4 ? data[4] : 0.0f); - } + // Debug: Check output (disabled to reduce stderr output) + // if (call_count <= 5) { + // int nonzero_out = 0; + // for (int i = 0; i < length; i++) { + // if (data[i] != 0.0f) nonzero_out++; + // } + // fprintf(stderr, " -> OUTPUT: nonzero=%d, first 5: %.1f %.1f %.1f %.1f %.1f\n", + // nonzero_out, + // data[0], length > 1 ? data[1] : 0.0f, length > 2 ? data[2] : 0.0f, + // length > 3 ? data[3] : 0.0f, length > 4 ? data[4] : 0.0f); + // } free(temp); } @@ -996,28 +1404,28 @@ static void apply_inverse_dwt_multilevel(float *data, int width, int height, int } } } - fprintf(stderr, "After level %d (%dx%d): nonzero=%d/%d, data[0]=%.1f, data[1]=%.1f, data[width]=%.1f\n", - level, current_width, current_height, nonzero_level, current_width * current_height, - data[0], data[1], data[width]); + // fprintf(stderr, "After level %d (%dx%d): nonzero=%d/%d, data[0]=%.1f, data[1]=%.1f, data[width]=%.1f\n", + // level, current_width, current_height, nonzero_level, current_width * current_height, + // data[0], data[1], data[width]); if (level == 0) first_frame_levels = 0; // Stop after level 0 of first frame } } - // Debug: Check buffer after all levels complete - static int debug_output_once = 1; - if (debug_output_once) { - int nonzero_final = 0; - for (int i = 0; i < width * height; i++) { - if (data[i] != 0.0f) nonzero_final++; - } - fprintf(stderr, "After ALL IDWT levels complete: nonzero=%d/%d, first 10: ", nonzero_final, width * height); - for (int i = 0; i < 10 && i < width * height; i++) { - fprintf(stderr, "%.1f ", data[i]); - } - fprintf(stderr, "\n"); - debug_output_once = 0; - } + // Debug: Check buffer after all levels complete (disabled to reduce stderr output) + // static int debug_output_once = 1; + // if (debug_output_once) { + // int nonzero_final = 0; + // for (int i = 0; i < width * height; i++) { + // if (data[i] != 0.0f) nonzero_final++; + // } + // fprintf(stderr, "After ALL IDWT levels complete: nonzero=%d/%d, first 10: ", nonzero_final, width * height); + // for (int i = 0; i < 10 && i < width * height; i++) { + // fprintf(stderr, "%.1f ", data[i]); + // } + // fprintf(stderr, "\n"); + // debug_output_once = 0; + // } free(widths); free(heights); @@ -1511,6 +1919,37 @@ static void ictcp_to_rgb(float i, float ct, float cp, uint8_t *r, uint8_t *g, ui *b = CLAMP((int)(b_val * 255.0f + 0.5f), 0, 255); } +//============================================================================= +// WAV File Writing +//============================================================================= + +static void write_wav_header(FILE *fp, uint32_t sample_rate, uint16_t channels, uint32_t data_size) { + // RIFF header + fwrite("RIFF", 1, 4, fp); + uint32_t file_size = 36 + data_size; + fwrite(&file_size, 4, 1, fp); + fwrite("WAVE", 1, 4, fp); + + // fmt chunk + fwrite("fmt ", 1, 4, fp); + uint32_t fmt_size = 16; + fwrite(&fmt_size, 4, 1, fp); + uint16_t audio_format = 1; // PCM + fwrite(&audio_format, 2, 1, fp); + fwrite(&channels, 2, 1, fp); + fwrite(&sample_rate, 4, 1, fp); + uint32_t byte_rate = sample_rate * channels * 1; // 1 byte per sample (u8) + fwrite(&byte_rate, 4, 1, fp); + uint16_t block_align = channels * 1; + fwrite(&block_align, 2, 1, fp); + uint16_t bits_per_sample = 8; + fwrite(&bits_per_sample, 2, 1, fp); + + // data chunk + fwrite("data", 1, 4, fp); + fwrite(&data_size, 4, 1, fp); +} + //============================================================================= // Decoder State Structure //============================================================================= @@ -1530,22 +1969,196 @@ typedef struct { int frame_size; int is_monoblock; // True if version 3-6 (single tile mode) - // FFmpeg pipes for video and audio + // FFmpeg pipe for video only (audio from file) FILE *video_pipe; - FILE *audio_pipe; pid_t ffmpeg_pid; - // Audio buffer for TAD → PCMu8 conversion - uint8_t *audio_buffer; - size_t audio_buffer_size; - size_t audio_buffer_used; + // Temporary audio file + char *audio_file_path; } tav_decoder_t; +//============================================================================= +// Pass 1: Extract Audio to WAV File +//============================================================================= + +static int extract_audio_to_wav(const char *input_file, const char *wav_file, int verbose) { + FILE *input_fp = fopen(input_file, "rb"); + if (!input_fp) { + fprintf(stderr, "Failed to open input file for audio extraction\n"); + return -1; + } + + // Read header + tav_header_t header; + if (fread(&header, sizeof(tav_header_t), 1, input_fp) != 1) { + fclose(input_fp); + return -1; + } + + // Open temporary audio file + FILE *wav_fp = fopen(wav_file, "wb"); + if (!wav_fp) { + fprintf(stderr, "Failed to create temporary audio file\n"); + fclose(input_fp); + return -1; + } + + // Write placeholder WAV header (will be updated later) + write_wav_header(wav_fp, 32000, 2, 0); + + uint32_t total_audio_bytes = 0; + int packet_count = 0; + + if (verbose) { + fprintf(stderr, "[Pass 1] Extracting audio to %s...\n", wav_file); + } + + // Read all packets and extract audio + while (1) { + uint8_t packet_type; + if (fread(&packet_type, 1, 1, input_fp) != 1) { + break; // EOF + } + + packet_count++; + + // Skip non-audio packets + if (packet_type == TAV_PACKET_SYNC || packet_type == TAV_PACKET_SYNC_NTSC) { + continue; + } + + if (packet_type == TAV_PACKET_TIMECODE) { + fseek(input_fp, 8, SEEK_CUR); // Skip timecode + continue; + } + + if (packet_type == TAV_PACKET_GOP_SYNC) { + fseek(input_fp, 1, SEEK_CUR); // Skip frame count + continue; + } + + if (packet_type == TAV_PACKET_GOP_UNIFIED) { + uint8_t gop_size; + uint32_t compressed_size; + fread(&gop_size, 1, 1, input_fp); + fread(&compressed_size, 4, 1, input_fp); + fseek(input_fp, compressed_size, SEEK_CUR); // Skip GOP data + continue; + } + + // Handle TAD audio + if (packet_type == TAV_PACKET_AUDIO_TAD) { + uint16_t sample_count_wrapper; + uint32_t payload_size_plus_7; + fread(&sample_count_wrapper, 2, 1, input_fp); + fread(&payload_size_plus_7, 4, 1, input_fp); + + uint16_t sample_count_chunk; + uint8_t quantiser_index; + uint32_t compressed_size; + fread(&sample_count_chunk, 2, 1, input_fp); + fread(&quantiser_index, 1, 1, input_fp); + fread(&compressed_size, 4, 1, input_fp); + + uint8_t *tad_compressed = malloc(compressed_size); + fread(tad_compressed, 1, compressed_size, input_fp); + + // Build TAD chunk + size_t tad_chunk_size = 2 + 1 + 4 + compressed_size; + uint8_t *tad_chunk = malloc(tad_chunk_size); + memcpy(tad_chunk, &sample_count_chunk, 2); + memcpy(tad_chunk + 2, &quantiser_index, 1); + memcpy(tad_chunk + 3, &compressed_size, 4); + memcpy(tad_chunk + 7, tad_compressed, compressed_size); + free(tad_compressed); + + // Decode TAD + uint8_t *pcmu8_output = malloc(sample_count_chunk * 2); + size_t bytes_consumed, samples_decoded; + int decode_result = decode_chunk(tad_chunk, tad_chunk_size, + pcmu8_output, &bytes_consumed, &samples_decoded); + + if (decode_result >= 0) { + size_t pcm_bytes = samples_decoded * 2; + fwrite(pcmu8_output, 1, pcm_bytes, wav_fp); + total_audio_bytes += pcm_bytes; + } + + free(tad_chunk); + free(pcmu8_output); + continue; + } + + // Handle PCM8 audio + if (packet_type == TAV_PACKET_AUDIO_PCM8) { + uint32_t packet_size; + fread(&packet_size, 4, 1, input_fp); + + uint8_t *compressed_data = malloc(packet_size); + fread(compressed_data, 1, packet_size, input_fp); + + // Decompress + size_t decompressed_bound = ZSTD_getFrameContentSize(compressed_data, packet_size); + uint8_t *pcm_data = malloc(decompressed_bound); + size_t decompressed_size = ZSTD_decompress(pcm_data, decompressed_bound, + compressed_data, packet_size); + free(compressed_data); + + if (!ZSTD_isError(decompressed_size)) { + fwrite(pcm_data, 1, decompressed_size, wav_fp); + total_audio_bytes += decompressed_size; + } + + free(pcm_data); + continue; + } + + // Handle EXTENDED_HDR packet (key-value pairs) + if (packet_type == TAV_PACKET_EXTENDED_HDR) { + uint16_t num_pairs; + fread(&num_pairs, 2, 1, input_fp); + for (int i = 0; i < num_pairs; i++) { + fseek(input_fp, 4, SEEK_CUR); // Skip key (4 bytes) + uint8_t value_type; + fread(&value_type, 1, 1, input_fp); + if (value_type == 0x04) { + fseek(input_fp, 8, SEEK_CUR); // uint64 value + } else if (value_type == 0x10) { + uint16_t str_len; + fread(&str_len, 2, 1, input_fp); + fseek(input_fp, str_len, SEEK_CUR); // string value + } + } + continue; + } + + // Read packet size for standard packets + uint32_t packet_size; + if (fread(&packet_size, 4, 1, input_fp) == 1) { + fseek(input_fp, packet_size, SEEK_CUR); + } + } + + // Update WAV header with actual data size + fseek(wav_fp, 0, SEEK_SET); + write_wav_header(wav_fp, 32000, 2, total_audio_bytes); + + fclose(wav_fp); + fclose(input_fp); + + if (verbose) { + fprintf(stderr, "[Pass 1] Extracted %u bytes of audio (%d packets processed)\n", + total_audio_bytes, packet_count); + } + + return 0; +} + //============================================================================= // Decoder Initialization and Cleanup //============================================================================= -static tav_decoder_t* tav_decoder_init(const char *input_file, const char *output_file) { +static tav_decoder_t* tav_decoder_init(const char *input_file, const char *output_file, const char *audio_file) { tav_decoder_t *decoder = calloc(1, sizeof(tav_decoder_t)); if (!decoder) return NULL; @@ -1571,6 +2184,7 @@ static tav_decoder_t* tav_decoder_init(const char *input_file, const char *outpu decoder->frame_size = decoder->header.width * decoder->header.height; decoder->is_monoblock = (decoder->header.version >= 3 && decoder->header.version <= 6); + decoder->audio_file_path = strdup(audio_file); // Allocate buffers decoder->current_frame_rgb = calloc(decoder->frame_size * 3, 1); @@ -1582,15 +2196,10 @@ static tav_decoder_t* tav_decoder_init(const char *input_file, const char *outpu decoder->reference_ycocg_co = calloc(decoder->frame_size, sizeof(float)); decoder->reference_ycocg_cg = calloc(decoder->frame_size, sizeof(float)); - // Audio buffer (32 KB should be enough for most audio packets) - decoder->audio_buffer_size = 32768; - decoder->audio_buffer = malloc(decoder->audio_buffer_size); - decoder->audio_buffer_used = 0; - - // Create FFmpeg process for video encoding - int video_pipe_fd[2], audio_pipe_fd[2]; - if (pipe(video_pipe_fd) == -1 || pipe(audio_pipe_fd) == -1) { - fprintf(stderr, "Failed to create pipes\n"); + // Create FFmpeg process for video encoding (video pipe only, audio from file) + int video_pipe_fd[2]; + if (pipe(video_pipe_fd) == -1) { + fprintf(stderr, "Failed to create video pipe\n"); free(decoder->current_frame_rgb); free(decoder->reference_frame_rgb); free(decoder->dwt_buffer_y); @@ -1599,7 +2208,7 @@ static tav_decoder_t* tav_decoder_init(const char *input_file, const char *outpu free(decoder->reference_ycocg_y); free(decoder->reference_ycocg_co); free(decoder->reference_ycocg_cg); - free(decoder->audio_buffer); + free(decoder->audio_file_path); fclose(decoder->input_fp); free(decoder); return NULL; @@ -1609,7 +2218,6 @@ static tav_decoder_t* tav_decoder_init(const char *input_file, const char *outpu if (decoder->ffmpeg_pid == -1) { fprintf(stderr, "Failed to fork FFmpeg process\n"); close(video_pipe_fd[0]); close(video_pipe_fd[1]); - close(audio_pipe_fd[0]); close(audio_pipe_fd[1]); free(decoder->current_frame_rgb); free(decoder->reference_frame_rgb); free(decoder->dwt_buffer_y); @@ -1618,25 +2226,22 @@ static tav_decoder_t* tav_decoder_init(const char *input_file, const char *outpu free(decoder->reference_ycocg_y); free(decoder->reference_ycocg_co); free(decoder->reference_ycocg_cg); - free(decoder->audio_buffer); + free(decoder->audio_file_path); fclose(decoder->input_fp); free(decoder); return NULL; } else if (decoder->ffmpeg_pid == 0) { // Child process - FFmpeg close(video_pipe_fd[1]); // Close write end - close(audio_pipe_fd[1]); char video_size[32]; char framerate[16]; snprintf(video_size, sizeof(video_size), "%dx%d", decoder->header.width, decoder->header.height); snprintf(framerate, sizeof(framerate), "%d", decoder->header.fps); - // Redirect pipes to stdin + // Redirect video pipe to fd 3 dup2(video_pipe_fd[0], 3); // Video input on fd 3 - dup2(audio_pipe_fd[0], 4); // Audio input on fd 4 close(video_pipe_fd[0]); - close(audio_pipe_fd[0]); execl("/usr/bin/ffmpeg", "ffmpeg", "-f", "rawvideo", @@ -1644,8 +2249,8 @@ static tav_decoder_t* tav_decoder_init(const char *input_file, const char *outpu "-video_size", video_size, "-framerate", framerate, "-i", "pipe:3", // Video from fd 3 + "-i", audio_file, // Audio from file "-color_range", "2", - // Note: Audio decoding not yet implemented, so we output video-only MKV "-c:v", "ffv1", // FFV1 codec "-level", "3", // FFV1 level 3 "-coder", "1", // Range coder @@ -1653,8 +2258,9 @@ static tav_decoder_t* tav_decoder_init(const char *input_file, const char *outpu "-g", "1", // GOP size 1 (all I-frames) "-slices", "24", // 24 slices for threading "-slicecrc", "1", // CRC per slice - "-pixel_format", "rgb24", // make FFmpeg encode to RGB + "-pixel_format", "rgb24", // make FFmpeg encode to RGB "-color_range", "2", + "-c:a", "pcm_u8", // Audio codec (PCM unsigned 8-bit) "-f", "matroska", // MKV container output_file, "-y", // Overwrite output @@ -1665,14 +2271,12 @@ static tav_decoder_t* tav_decoder_init(const char *input_file, const char *outpu exit(1); } else { // Parent process - close(video_pipe_fd[0]); // Close read ends - close(audio_pipe_fd[0]); + close(video_pipe_fd[0]); // Close read end decoder->video_pipe = fdopen(video_pipe_fd[1], "wb"); - decoder->audio_pipe = fdopen(audio_pipe_fd[1], "wb"); - if (!decoder->video_pipe || !decoder->audio_pipe) { - fprintf(stderr, "Failed to open pipes for writing\n"); + if (!decoder->video_pipe) { + fprintf(stderr, "Failed to open video pipe for writing\n"); kill(decoder->ffmpeg_pid, SIGTERM); free(decoder->current_frame_rgb); free(decoder->reference_frame_rgb); @@ -1682,7 +2286,7 @@ static tav_decoder_t* tav_decoder_init(const char *input_file, const char *outpu free(decoder->reference_ycocg_y); free(decoder->reference_ycocg_co); free(decoder->reference_ycocg_cg); - free(decoder->audio_buffer); + free(decoder->audio_file_path); fclose(decoder->input_fp); free(decoder); return NULL; @@ -1697,7 +2301,6 @@ static void tav_decoder_free(tav_decoder_t *decoder) { if (decoder->input_fp) fclose(decoder->input_fp); if (decoder->video_pipe) fclose(decoder->video_pipe); - if (decoder->audio_pipe) fclose(decoder->audio_pipe); // Wait for FFmpeg to finish if (decoder->ffmpeg_pid > 0) { @@ -1713,7 +2316,7 @@ static void tav_decoder_free(tav_decoder_t *decoder) { free(decoder->reference_ycocg_y); free(decoder->reference_ycocg_co); free(decoder->reference_ycocg_cg); - free(decoder->audio_buffer); + free(decoder->audio_file_path); free(decoder); } @@ -1758,15 +2361,15 @@ static int decode_i_or_p_frame(tav_decoder_t *decoder, uint8_t packet_type, uint } // Debug first 3 frames compression - static int decomp_debug = 0; - if (decomp_debug < 3) { - fprintf(stderr, " [ZSTD frame %d] Compressed size: %u, buffer size: %zu\n", decomp_debug, packet_size, decompressed_size); - fprintf(stderr, " [ZSTD frame %d] First 16 bytes of COMPRESSED data: ", decomp_debug); - for (int i = 0; i < 16 && i < (int)packet_size; i++) { - fprintf(stderr, "%02X ", compressed_data[i]); - } - fprintf(stderr, "\n"); - } +// static int decomp_debug = 0; +// if (decomp_debug < 3) { +// fprintf(stderr, " [ZSTD frame %d] Compressed size: %u, buffer size: %zu\n", decomp_debug, packet_size, decompressed_size); +// fprintf(stderr, " [ZSTD frame %d] First 16 bytes of COMPRESSED data: ", decomp_debug); +// for (int i = 0; i < 16 && i < (int)packet_size; i++) { +// fprintf(stderr, "%02X ", compressed_data[i]); +// } +// fprintf(stderr, "\n"); +// } size_t actual_size = ZSTD_decompress(decompressed_data, decompressed_size, compressed_data, packet_size); @@ -1777,15 +2380,15 @@ static int decode_i_or_p_frame(tav_decoder_t *decoder, uint8_t packet_type, uint goto write_frame; } - if (decomp_debug < 3) { - fprintf(stderr, " [ZSTD frame %d] Decompressed size: %zu\n", decomp_debug, actual_size); - fprintf(stderr, " [ZSTD frame %d] First 16 bytes of DECOMPRESSED data: ", decomp_debug); - for (int i = 0; i < 16 && i < (int)actual_size; i++) { - fprintf(stderr, "%02X ", decompressed_data[i]); - } - fprintf(stderr, "\n"); - decomp_debug++; - } +// if (decomp_debug < 3) { +// fprintf(stderr, " [ZSTD frame %d] Decompressed size: %zu\n", decomp_debug, actual_size); +// fprintf(stderr, " [ZSTD frame %d] First 16 bytes of DECOMPRESSED data: ", decomp_debug); +// for (int i = 0; i < 16 && i < (int)actual_size; i++) { +// fprintf(stderr, "%02X ", decompressed_data[i]); +// } +// fprintf(stderr, "\n"); +// decomp_debug++; +// } // Parse block data uint8_t *ptr = decompressed_data; @@ -1801,10 +2404,10 @@ static int decode_i_or_p_frame(tav_decoder_t *decoder, uint8_t packet_type, uint int qcg = qcg_override ? QLUT[qcg_override] : QLUT[decoder->header.quantiser_cg]; // Debug first few frames - if (decoder->frame_count < 2) { - fprintf(stderr, "Frame %d: mode=%d, Q: Y=%d, Co=%d, Cg=%d, decompressed=%zu bytes\n", - decoder->frame_count, mode, qy, qco, qcg, actual_size); - } +// if (decoder->frame_count < 2) { +// fprintf(stderr, "Frame %d: mode=%d, Q: Y=%d, Co=%d, Cg=%d, decompressed=%zu bytes\n", +// decoder->frame_count, mode, qy, qco, qcg, actual_size); +// } if (mode == TAV_MODE_SKIP) { // Copy from reference frame @@ -1833,21 +2436,21 @@ static int decode_i_or_p_frame(tav_decoder_t *decoder, uint8_t packet_type, uint } // Debug: Check first few coefficients - if (decoder->frame_count == 32) { - fprintf(stderr, " First 10 quantized Y coeffs: "); - for (int i = 0; i < 10 && i < coeff_count; i++) { - fprintf(stderr, "%d ", quantized_y[i]); - } - fprintf(stderr, "\n"); - - // Check for any large quantized values that should produce bright pixels - int max_quant_y = 0; - for (int i = 0; i < coeff_count; i++) { - int abs_val = quantized_y[i] < 0 ? -quantized_y[i] : quantized_y[i]; - if (abs_val > max_quant_y) max_quant_y = abs_val; - } - fprintf(stderr, " Max quantized Y coefficient: %d\n", max_quant_y); - } +// if (decoder->frame_count == 32) { +// fprintf(stderr, " First 10 quantized Y coeffs: "); +// for (int i = 0; i < 10 && i < coeff_count; i++) { +// fprintf(stderr, "%d ", quantized_y[i]); +// } +// fprintf(stderr, "\n"); +// + // Check for any large quantized values that should produce bright pixels +// int max_quant_y = 0; +// for (int i = 0; i < coeff_count; i++) { +// int abs_val = quantized_y[i] < 0 ? -quantized_y[i] : quantized_y[i]; +// if (abs_val > max_quant_y) max_quant_y = abs_val; +// } +// fprintf(stderr, " Max quantized Y coefficient: %d\n", max_quant_y); +// } // Dequantize (perceptual for versions 5-8, uniform for 1-4) const int is_perceptual = (decoder->header.version >= 5 && decoder->header.version <= 8); @@ -1867,11 +2470,11 @@ static int decode_i_or_p_frame(tav_decoder_t *decoder, uint8_t packet_type, uint decoder->header.decomp_levels, qy, 0, decoder->frame_count); // Debug: Check if values survived the function call - if (decoder->frame_count == 32) { - fprintf(stderr, " RIGHT AFTER dequantize_Y returns: first 5 values: %.1f %.1f %.1f %.1f %.1f\n", - decoder->dwt_buffer_y[0], decoder->dwt_buffer_y[1], decoder->dwt_buffer_y[2], - decoder->dwt_buffer_y[3], decoder->dwt_buffer_y[4]); - } +// if (decoder->frame_count == 32) { +// fprintf(stderr, " RIGHT AFTER dequantize_Y returns: first 5 values: %.1f %.1f %.1f %.1f %.1f\n", +// decoder->dwt_buffer_y[0], decoder->dwt_buffer_y[1], decoder->dwt_buffer_y[2], +// decoder->dwt_buffer_y[3], decoder->dwt_buffer_y[4]); +// } dequantize_dwt_subbands_perceptual(0, qy, quantized_co, decoder->dwt_buffer_co, decoder->header.width, decoder->header.height, @@ -1888,50 +2491,50 @@ static int decode_i_or_p_frame(tav_decoder_t *decoder, uint8_t packet_type, uint } // Debug: Check dequantized values using correct subband layout - if (decoder->frame_count == 32) { - dwt_subband_info_t subbands[32]; - const int subband_count = calculate_subband_layout(decoder->header.width, decoder->header.height, - decoder->header.decomp_levels, subbands); - - // Find LL band (highest level, type 0) - for (int s = 0; s < subband_count; s++) { - if (subbands[s].level == decoder->header.decomp_levels && subbands[s].subband_type == 0) { - fprintf(stderr, " LL band: level=%d, start=%d, count=%d\n", - subbands[s].level, subbands[s].coeff_start, subbands[s].coeff_count); - fprintf(stderr, " Reading LL first 5 from dwt_buffer_y[0-4]: %.1f %.1f %.1f %.1f %.1f\n", - decoder->dwt_buffer_y[0], decoder->dwt_buffer_y[1], decoder->dwt_buffer_y[2], - decoder->dwt_buffer_y[3], decoder->dwt_buffer_y[4]); - - // Find max in CORRECT LL band - float max_ll = -999.0f; - for (int i = 0; i < subbands[s].coeff_count; i++) { - int idx = subbands[s].coeff_start + i; - if (decoder->dwt_buffer_y[idx] > max_ll) max_ll = decoder->dwt_buffer_y[idx]; - } - fprintf(stderr, " Max LL coefficient BEFORE grain removal: %.1f\n", max_ll); - break; - } - } - } +// if (decoder->frame_count == 32) { +// dwt_subband_info_t subbands[32]; +// const int subband_count = calculate_subband_layout(decoder->header.width, decoder->header.height, +// decoder->header.decomp_levels, subbands); +// + // Find LL band (highest level, type 0) +// for (int s = 0; s < subband_count; s++) { +// if (subbands[s].level == decoder->header.decomp_levels && subbands[s].subband_type == 0) { +// fprintf(stderr, " LL band: level=%d, start=%d, count=%d\n", +// subbands[s].level, subbands[s].coeff_start, subbands[s].coeff_count); +// fprintf(stderr, " Reading LL first 5 from dwt_buffer_y[0-4]: %.1f %.1f %.1f %.1f %.1f\n", +// decoder->dwt_buffer_y[0], decoder->dwt_buffer_y[1], decoder->dwt_buffer_y[2], +// decoder->dwt_buffer_y[3], decoder->dwt_buffer_y[4]); +// + // Find max in CORRECT LL band +// float max_ll = -999.0f; +// for (int i = 0; i < subbands[s].coeff_count; i++) { +// int idx = subbands[s].coeff_start + i; +// if (decoder->dwt_buffer_y[idx] > max_ll) max_ll = decoder->dwt_buffer_y[idx]; +// } +// fprintf(stderr, " Max LL coefficient BEFORE grain removal: %.1f\n", max_ll); +// break; +// } +// } +// } // Remove grain synthesis from Y channel (must happen after dequantization, before inverse DWT) remove_grain_synthesis_decoder(decoder->dwt_buffer_y, decoder->header.width, decoder->header.height, decoder->header.decomp_levels, decoder->frame_count, decoder->header.quantiser_y); // Debug: Check LL band AFTER grain removal - if (decoder->frame_count == 32) { - int ll_width = decoder->header.width; - int ll_height = decoder->header.height; - for (int l = 0; l < decoder->header.decomp_levels; l++) { - ll_width = (ll_width + 1) / 2; - ll_height = (ll_height + 1) / 2; - } - float max_ll = -999.0f; - for (int i = 0; i < ll_width * ll_height; i++) { - if (decoder->dwt_buffer_y[i] > max_ll) max_ll = decoder->dwt_buffer_y[i]; - } - fprintf(stderr, " Max LL coefficient AFTER grain removal: %.1f\n", max_ll); - } +// if (decoder->frame_count == 32) { +// int ll_width = decoder->header.width; +// int ll_height = decoder->header.height; +// for (int l = 0; l < decoder->header.decomp_levels; l++) { +// ll_width = (ll_width + 1) / 2; +// ll_height = (ll_height + 1) / 2; +// } +// float max_ll = -999.0f; +// for (int i = 0; i < ll_width * ll_height; i++) { +// if (decoder->dwt_buffer_y[i] > max_ll) max_ll = decoder->dwt_buffer_y[i]; +// } +// fprintf(stderr, " Max LL coefficient AFTER grain removal: %.1f\n", max_ll); +// } // Apply inverse DWT with correct non-power-of-2 dimension handling // Note: quantized arrays freed at write_frame label @@ -1943,24 +2546,24 @@ static int decode_i_or_p_frame(tav_decoder_t *decoder, uint8_t packet_type, uint decoder->header.decomp_levels, decoder->header.wavelet_filter); // Debug: Check spatial domain values after IDWT - if (decoder->frame_count == 32) { - float max_y_spatial = -999.0f; - for (int i = 0; i < decoder->frame_size; i++) { - if (decoder->dwt_buffer_y[i] > max_y_spatial) max_y_spatial = decoder->dwt_buffer_y[i]; - } - fprintf(stderr, " Max Y in spatial domain AFTER IDWT: %.1f\n", max_y_spatial); - } +// if (decoder->frame_count == 32) { +// float max_y_spatial = -999.0f; +// for (int i = 0; i < decoder->frame_size; i++) { +// if (decoder->dwt_buffer_y[i] > max_y_spatial) max_y_spatial = decoder->dwt_buffer_y[i]; +// } +// fprintf(stderr, " Max Y in spatial domain AFTER IDWT: %.1f\n", max_y_spatial); +// } // Debug: Check spatial domain values after IDWT (original debug) - if (decoder->frame_count < 1) { - fprintf(stderr, " After IDWT - First 10 Y values: "); - for (int i = 0; i < 10 && i < decoder->frame_size; i++) { - fprintf(stderr, "%.1f ", decoder->dwt_buffer_y[i]); - } - fprintf(stderr, "\n"); - fprintf(stderr, " Y range: min=%.1f, max=%.1f\n", - decoder->dwt_buffer_y[0], decoder->dwt_buffer_y[decoder->frame_size-1]); - } +// if (decoder->frame_count < 1) { +// fprintf(stderr, " After IDWT - First 10 Y values: "); +// for (int i = 0; i < 10 && i < decoder->frame_size; i++) { +// fprintf(stderr, "%.1f ", decoder->dwt_buffer_y[i]); +// } +// fprintf(stderr, "\n"); +// fprintf(stderr, " Y range: min=%.1f, max=%.1f\n", +// decoder->dwt_buffer_y[0], decoder->dwt_buffer_y[decoder->frame_size-1]); +// } // Handle P-frame delta accumulation (in YCoCg float space) if (packet_type == TAV_PACKET_PFRAME && mode == TAV_MODE_DELTA) { @@ -1989,14 +2592,14 @@ static int decode_i_or_p_frame(tav_decoder_t *decoder, uint8_t packet_type, uint } // Track max values for debugging - if (decoder->frame_count == 1000) { - if (decoder->dwt_buffer_y[i] > max_y) max_y = decoder->dwt_buffer_y[i]; - if (decoder->dwt_buffer_co[i] > max_co) max_co = decoder->dwt_buffer_co[i]; - if (decoder->dwt_buffer_cg[i] > max_cg) max_cg = decoder->dwt_buffer_cg[i]; - if (r > max_r) max_r = r; - if (g > max_g) max_g = g; - if (b > max_b) max_b = b; - } +// if (decoder->frame_count == 1000) { +// if (decoder->dwt_buffer_y[i] > max_y) max_y = decoder->dwt_buffer_y[i]; +// if (decoder->dwt_buffer_co[i] > max_co) max_co = decoder->dwt_buffer_co[i]; +// if (decoder->dwt_buffer_cg[i] > max_cg) max_cg = decoder->dwt_buffer_cg[i]; +// if (r > max_r) max_r = r; +// if (g > max_g) max_g = g; +// if (b > max_b) max_b = b; +// } // RGB byte order for FFmpeg rgb24 decoder->current_frame_rgb[i * 3 + 0] = r; @@ -2004,23 +2607,23 @@ static int decode_i_or_p_frame(tav_decoder_t *decoder, uint8_t packet_type, uint decoder->current_frame_rgb[i * 3 + 2] = b; } - if (decoder->frame_count == 1000) { - fprintf(stderr, "\n=== Frame 1000 Value Analysis ===\n"); - fprintf(stderr, "Max YCoCg values: Y=%.1f, Co=%.1f, Cg=%.1f\n", max_y, max_co, max_cg); - fprintf(stderr, "Max RGB values: R=%d, G=%d, B=%d\n", max_r, max_g, max_b); - } +// if (decoder->frame_count == 1000) { +// fprintf(stderr, "\n=== Frame 1000 Value Analysis ===\n"); +// fprintf(stderr, "Max YCoCg values: Y=%.1f, Co=%.1f, Cg=%.1f\n", max_y, max_co, max_cg); +// fprintf(stderr, "Max RGB values: R=%d, G=%d, B=%d\n", max_r, max_g, max_b); +// } // Debug: Check RGB output - if (decoder->frame_count < 1) { - fprintf(stderr, " First 5 pixels RGB: "); - for (int i = 0; i < 5 && i < decoder->frame_size; i++) { - fprintf(stderr, "(%d,%d,%d) ", - decoder->current_frame_rgb[i*3], - decoder->current_frame_rgb[i*3+1], - decoder->current_frame_rgb[i*3+2]); - } - fprintf(stderr, "\n"); - } +// if (decoder->frame_count < 1) { +// fprintf(stderr, " First 5 pixels RGB: "); +// for (int i = 0; i < 5 && i < decoder->frame_size; i++) { +// fprintf(stderr, "(%d,%d,%d) ", +// decoder->current_frame_rgb[i*3], +// decoder->current_frame_rgb[i*3+1], +// decoder->current_frame_rgb[i*3+2]); +// } +// fprintf(stderr, "\n"); +// } // Update reference YCoCg frame memcpy(decoder->reference_ycocg_y, decoder->dwt_buffer_y, decoder->frame_size * sizeof(float)); @@ -2110,6 +2713,9 @@ static void print_usage(const char *prog) { } int main(int argc, char *argv[]) { + // Ignore SIGPIPE to prevent process termination if FFmpeg exits early + signal(SIGPIPE, SIG_IGN); + char *input_file = NULL; char *output_file = NULL; int verbose = 0; @@ -2146,9 +2752,22 @@ int main(int argc, char *argv[]) { return 1; } - tav_decoder_t *decoder = tav_decoder_init(input_file, output_file); + // Create temporary audio file path + char temp_audio_file[256]; + snprintf(temp_audio_file, sizeof(temp_audio_file), "/tmp/tav_audio_%d.wav", getpid()); + + // Pass 1: Extract audio to WAV file + if (extract_audio_to_wav(input_file, temp_audio_file, verbose) < 0) { + fprintf(stderr, "Failed to extract audio\n"); + unlink(temp_audio_file); // Clean up temp file if it exists + return 1; + } + + // Pass 2: Decode video with audio file + tav_decoder_t *decoder = tav_decoder_init(input_file, output_file, temp_audio_file); if (!decoder) { fprintf(stderr, "Failed to initialize decoder\n"); + unlink(temp_audio_file); // Clean up temp file return 1; } @@ -2420,20 +3039,28 @@ int main(int argc, char *argv[]) { decoder->header.wavelet_filter); // Debug: Check spatial coefficients after inverse temporal DWT (before inverse spatial DWT) - if (is_ezbc) { - float max_y = 0.0f, min_y = 0.0f; - for (int i = 0; i < num_pixels; i++) { - if (gop_y[0][i] > max_y) max_y = gop_y[0][i]; - if (gop_y[0][i] < min_y) min_y = gop_y[0][i]; - } - fprintf(stderr, "[GOP-EZBC] After inverse temporal DWT, Frame 0 Y spatial coeffs range: [%.1f, %.1f], first 5: %.1f %.1f %.1f %.1f %.1f\n", - min_y, max_y, - gop_y[0][0], gop_y[0][1], gop_y[0][2], gop_y[0][3], gop_y[0][4]); - } +// if (is_ezbc) { +// float max_y = 0.0f, min_y = 0.0f; +// for (int i = 0; i < num_pixels; i++) { +// if (gop_y[0][i] > max_y) max_y = gop_y[0][i]; +// if (gop_y[0][i] < min_y) min_y = gop_y[0][i]; +// } +// fprintf(stderr, "[GOP-EZBC] After inverse temporal DWT, Frame 0 Y spatial coeffs range: [%.1f, %.1f], first 5: %.1f %.1f %.1f %.1f %.1f\n", +// min_y, max_y, +// gop_y[0][0], gop_y[0][1], gop_y[0][2], gop_y[0][3], gop_y[0][4]); +// } // Convert YCoCg→RGB and write all GOP frames const int is_ictcp = (decoder->header.version % 2 == 0); + // DEBUG: Print frame size calculation +// if (decoder->frame_count == 0) { +// fprintf(stderr, "[DEBUG] decoder->frame_size=%d, decoder->header.width=%d, decoder->header.height=%d\n", +// decoder->frame_size, decoder->header.width, decoder->header.height); +// fprintf(stderr, "[DEBUG] bytes_to_write=%zu (should be %d)\n", +// (size_t)decoder->frame_size * 3, decoder->header.width * decoder->header.height * 3); +// } + for (int t = 0; t < gop_size; t++) { // Allocate frame buffer uint8_t *frame_rgb = malloc(decoder->frame_size * 3); @@ -2458,6 +3085,16 @@ int main(int argc, char *argv[]) { // Write frame to FFmpeg video pipe const size_t bytes_to_write = decoder->frame_size * 3; + + // DEBUG: Verify we're writing to correct pipe +// if (decoder->frame_count == 0 && t == 0) { +// fprintf(stderr, "[DEBUG] Writing frame to video_pipe=%p, bytes_to_write=%zu\n", +// (void*)decoder->video_pipe, bytes_to_write); +// fprintf(stderr, "[DEBUG] First 10 RGB bytes: %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X\n", +// frame_rgb[0], frame_rgb[1], frame_rgb[2], frame_rgb[3], frame_rgb[4], +// frame_rgb[5], frame_rgb[6], frame_rgb[7], frame_rgb[8], frame_rgb[9]); +// } + const size_t bytes_written = fwrite(frame_rgb, 1, bytes_to_write, decoder->video_pipe); if (bytes_written != bytes_to_write) { fprintf(stderr, "Error: Failed to write GOP frame %d to FFmpeg (wrote %zu/%zu bytes)\n", @@ -2494,23 +3131,15 @@ int main(int argc, char *argv[]) { continue; } - // Handle TAD audio packets (custom format: 2-byte sample_count + 4-byte payload_size) + // Handle TAD audio packets (already extracted in Pass 1, just skip) if (packet_type == TAV_PACKET_AUDIO_TAD) { - uint16_t sample_count; - uint32_t payload_size; - if (fread(&sample_count, 2, 1, decoder->input_fp) != 1 || - fread(&payload_size, 4, 1, decoder->input_fp) != 1) { - fprintf(stderr, "\nError: Failed to read TAD packet header\n"); - result = -1; - break; - } - if (verbose && total_packets < 20) { - fprintf(stderr, "Packet %d: TAD (0x%02X), %u samples, %u payload bytes - skipping\n", - total_packets, packet_type, sample_count, payload_size); - } - // Skip TAD data for now - fseek(decoder->input_fp, payload_size, SEEK_CUR); - fprintf(stderr, "\nWarning: TAD audio decoding not yet fully implemented (skipping %u samples)\n", sample_count); + uint16_t sample_count_wrapper; + uint32_t payload_size_plus_7; + fread(&sample_count_wrapper, 2, 1, decoder->input_fp); + fread(&payload_size_plus_7, 4, 1, decoder->input_fp); + + // Skip TAD chunk (payload_size_plus_7 includes header and data) + fseek(decoder->input_fp, payload_size_plus_7, SEEK_CUR); continue; } @@ -2603,9 +3232,17 @@ int main(int argc, char *argv[]) { break; case TAV_PACKET_AUDIO_MP2: - case TAV_PACKET_AUDIO_PCM8: case TAV_PACKET_AUDIO_TRACK: - // Skip audio for now + // MP2 audio - write directly to audio pipe + // Note: FFmpeg cannot decode MP2 from raw stream, so we skip for now + if (verbose && total_packets < 20) { + fprintf(stderr, "Skipping MP2 audio packet (%u bytes) - not yet supported\n", packet_size); + } + fseek(decoder->input_fp, packet_size, SEEK_CUR); + break; + + case TAV_PACKET_AUDIO_PCM8: + // PCM8 audio - already extracted in Pass 1, just skip fseek(decoder->input_fp, packet_size, SEEK_CUR); break; @@ -2635,9 +3272,16 @@ int main(int argc, char *argv[]) { if (result < 0) { fprintf(stderr, "Decoding error occurred\n"); + unlink(temp_audio_file); // Clean up temp file return 1; } printf("Successfully decoded to: %s\n", output_file); + + // Clean up temporary audio file + if (unlink(temp_audio_file) == 0 && verbose) { + fprintf(stderr, "Cleaned up temporary audio file: %s\n", temp_audio_file); + } + return 0; }