diff --git a/assets/disk0/tvdos/bin/playtav.js b/assets/disk0/tvdos/bin/playtav.js index caad91a..0058f65 100644 --- a/assets/disk0/tvdos/bin/playtav.js +++ b/assets/disk0/tvdos/bin/playtav.js @@ -422,8 +422,12 @@ seqread.skip(3) header.fileRole = seqread.readOneByte() -if (header.version < 1 || header.version > 8) { - printerrln(`Error: Unsupported TAV version ${header.version}`) +// Extract temporal motion coder from version (versions 9-16 use CDF 5/3, 1-8 use Haar) +const baseVersion = (header.version > 8) ? (header.version - 8) : header.version +header.temporalMotionCoder = (header.version > 8) ? 1 : 0 + +if (baseVersion < 1 || baseVersion > 8) { + printerrln(`Error: Unsupported TAV base version ${baseVersion}`) errorlevel = 1 return } @@ -1339,7 +1343,8 @@ try { header.channelLayout, header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS, header.entropyCoder, - bufferOffset + bufferOffset, + header.temporalMotionCoder ) asyncDecodeInProgress = true @@ -1412,7 +1417,8 @@ try { header.channelLayout, header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS, header.entropyCoder, - nextOffset + nextOffset, + header.temporalMotionCoder ) // Set async decode tracking variables @@ -1454,7 +1460,8 @@ try { header.channelLayout, header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS, header.entropyCoder, - decodingOffset + decodingOffset, + header.temporalMotionCoder ) // Set async decode tracking variables @@ -1821,7 +1828,8 @@ try { header.channelLayout, header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS, header.entropyCoder, - readyGopData.slot * SLOT_SIZE + readyGopData.slot * SLOT_SIZE, + header.temporalMotionCoder ) // CRITICAL FIX: Set async decode tracking variables so decode is properly tracked @@ -1998,7 +2006,8 @@ try { header.channelLayout, header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS, header.entropyCoder, - decodingGopData.slot * SLOT_SIZE + decodingGopData.slot * SLOT_SIZE, + header.temporalMotionCoder ) // CRITICAL FIX: Set async decode tracking variables so decode is properly tracked @@ -2038,7 +2047,8 @@ try { header.channelLayout, header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS, header.entropyCoder, - readyGopData.slot * SLOT_SIZE + readyGopData.slot * SLOT_SIZE, + header.temporalMotionCoder ) readyGopData.needsDecode = false readyGopData.startTime = sys.nanoTime() @@ -2115,7 +2125,8 @@ try { header.channelLayout, header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS, header.entropyCoder, - targetOffset + targetOffset, + header.temporalMotionCoder ) asyncDecodeInProgress = true @@ -2211,7 +2222,8 @@ try { } catch (e) { serial.printerr(`TAV decode error: ${e}`) - e.printStackTrace() + if (e.printStackTrace) + e.printStackTrace() errorlevel = 1 } finally { diff --git a/terranmon.txt b/terranmon.txt index 446d01f..d613362 100644 --- a/terranmon.txt +++ b/terranmon.txt @@ -905,6 +905,7 @@ transmission capability, and region-of-interest coding. ## Header (32 bytes) uint8 Magic[8]: "\x1F TSVM TAV" or "\x1F TSVM TAP" uint8 Version: + Base version number: - 1 = YCoCg-R multi-tile uniform - 2 = ICtCp multi-tile uniform - 3 = YCoCg-R monoblock uniform @@ -913,6 +914,8 @@ transmission capability, and region-of-interest coding. - 6 = ICtCp monoblock perceptual - 7 = YCoCg-R multi-tile perceptual - 8 = ICtCp multi-tile perceptual + When motion coder is Haar, take base version number. + When motion coder is CDF 5/3, add 8 to the base version number. uint16 Width: picture width in pixels. Columns count for Videotex-only file. uint16 Height: picture height in pixels. Rows count for Videotex-only file. uint8 FPS: frames per second. Use 0x00 for still pictures diff --git a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt index 892d94b..edde5d9 100644 --- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt +++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt @@ -6297,65 +6297,25 @@ class GraphicsJSR223Delegate(private val vm: VM) { if (length < 2) return val temp = FloatArray(length) - val half = (length + 1) / 2 // Handle odd lengths properly + val half = (length + 1) / 2 - // Split into low and high frequency components (matching encoder layout) + // Copy low-pass and high-pass subbands to temp + System.arraycopy(data, 0, temp, 0, length) + + // Undo update step (low-pass) for (i in 0 until half) { - temp[i] = data[i] // Low-pass coefficients (first half) - } - for (i in 0 until length / 2) { - if (half + i < length && half + i < data.size) { - temp[half + i] = data[half + i] // High-pass coefficients (second half) - } + val update = 0.25f * ((if (i > 0) temp[half + i - 1] else 0.0f) + + (if (i < half - 1) temp[half + i] else 0.0f)) + temp[i] -= update } - // 5/3 inverse lifting (undo forward steps in reverse order) - - // Step 2: Undo update step (1/4 coefficient) - JPEG2000 symmetric extension + // Undo predict step (high-pass) and interleave samples for (i in 0 until half) { - val leftIdx = half + i - 1 - val centerIdx = half + i - - // Symmetric extension for boundary handling - val left = when { - leftIdx >= 0 && leftIdx < length -> temp[leftIdx] - centerIdx < length && centerIdx + 1 < length -> temp[centerIdx + 1] // Mirror - centerIdx < length -> temp[centerIdx] - else -> 0.0f - } - val right = if (centerIdx < length) temp[centerIdx] else 0.0f - temp[i] -= 0.25f * (left + right) - } - - // Step 1: Undo predict step (1/2 coefficient) - JPEG2000 symmetric extension - for (i in 0 until length / 2) { - if (half + i < length) { - val left = temp[i] - // Symmetric extension for right boundary - val right = if (i < half - 1) temp[i + 1] else if (half > 2) temp[half - 2] else temp[half - 1] - temp[half + i] += 0.5f * (left + right) // ADD to undo the subtraction in encoder - } - } - - // Simple reconstruction (revert to working version) - for (i in 0 until length) { - if (i % 2 == 0) { - // Even positions: low-pass coefficients - data[i] = temp[i / 2] - } else { - // Odd positions: high-pass coefficients - val idx = i / 2 - if (half + idx < length) { - data[i] = temp[half + idx] - } else { - // Symmetric extension: mirror the last available high-pass coefficient - val lastHighIdx = (length / 2) - 1 - if (lastHighIdx >= 0 && half + lastHighIdx < length) { - data[i] = temp[half + lastHighIdx] - } else { - data[i] = 0.0f - } - } + data[2 * i] = temp[i] // Even samples (low-pass) + val idx = 2 * i + 1 + if (idx < length) { + val pred = 0.5f * (temp[i] + (if (i < half - 1) temp[i + 1] else temp[i])) + data[idx] = temp[half + i] + pred // Odd samples (high-pass) } } } @@ -6514,7 +6474,8 @@ class GraphicsJSR223Delegate(private val vm: VM) { spatialLevels: Int = 6, temporalLevels: Int = 2, entropyCoder: Int = 0, - bufferOffset: Long = 0 + bufferOffset: Long = 0, + temporalMotionCoder: Int = 0 ): Array { val dbgOut = HashMap() dbgOut["qY"] = qYGlobal @@ -6634,9 +6595,9 @@ class GraphicsJSR223Delegate(private val vm: VM) { } // Step 6: Apply inverse 3D DWT using GOP dimensions (may be cropped) - tavApplyInverse3DDWT(gopY, gopWidth, gopHeight, gopSize, spatialLevels, temporalLevels, spatialFilter) - tavApplyInverse3DDWT(gopCo, gopWidth, gopHeight, gopSize, spatialLevels, temporalLevels, spatialFilter) - tavApplyInverse3DDWT(gopCg, gopWidth, gopHeight, gopSize, spatialLevels, temporalLevels, spatialFilter) + tavApplyInverse3DDWT(gopY, gopWidth, gopHeight, gopSize, spatialLevels, temporalLevels, spatialFilter, temporalMotionCoder) + tavApplyInverse3DDWT(gopCo, gopWidth, gopHeight, gopSize, spatialLevels, temporalLevels, spatialFilter, temporalMotionCoder) + tavApplyInverse3DDWT(gopCg, gopWidth, gopHeight, gopSize, spatialLevels, temporalLevels, spatialFilter, temporalMotionCoder) // Step 8: Convert to RGB and composite to full frame // With crop encoding, center the cropped frame and fill letterbox areas with black @@ -6780,7 +6741,8 @@ class GraphicsJSR223Delegate(private val vm: VM) { spatialLevels: Int = 6, temporalLevels: Int = 3, entropyCoder: Int = 0, - bufferOffset: Long = 0 + bufferOffset: Long = 0, + temporalMotionCoder: Int = 0 ) { // Cancel any existing decode thread asyncDecodeThread?.interrupt() @@ -6798,7 +6760,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { width, height, qIndex, qYGlobal, qCoGlobal, qCgGlobal, channelLayout, spatialFilter, spatialLevels, temporalLevels, - entropyCoder, bufferOffset + entropyCoder, bufferOffset, temporalMotionCoder ) asyncDecodeResult = result asyncDecodeComplete.set(true) @@ -6943,12 +6905,17 @@ class GraphicsJSR223Delegate(private val vm: VM) { // ============================================================================= /** - * Inverse 1D temporal DWT (Haar) along time axis - * Reuses existing Haar inverse implementation + * Inverse 1D temporal DWT along time axis + * Supports both Haar and CDF 5/3 wavelets + * @param temporalMotionCoder 0=Haar, 1=CDF 5/3 */ - private fun tavApplyTemporalDWTInverse1D(data: FloatArray, numFrames: Int) { + private fun tavApplyTemporalDWTInverse1D(data: FloatArray, numFrames: Int, temporalMotionCoder: Int = 0) { if (numFrames < 2) return - tavApplyDWTHaarInverse1D(data, numFrames) + if (temporalMotionCoder == 0) { + tavApplyDWTHaarInverse1D(data, numFrames) + } else { + tavApplyDWT53Inverse1D(data, numFrames) + } } /** @@ -6962,6 +6929,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { * @param spatialLevels Spatial decomposition levels (typically 6) * @param temporalLevels Temporal decomposition levels (typically 2) * @param spatialFilter Spatial wavelet filter type (0=5/3, 1=9/7, 255=Haar) + * @param temporalMotionCoder Temporal wavelet type (0=Haar, 1=CDF 5/3) */ private fun tavApplyInverse3DDWT( gopData: Array, @@ -6970,7 +6938,8 @@ class GraphicsJSR223Delegate(private val vm: VM) { numFrames: Int, spatialLevels: Int, temporalLevels: Int, - spatialFilter: Int + spatialFilter: Int, + temporalMotionCoder: Int = 0 ) { // Step 1: Apply inverse 2D spatial DWT to each temporal subband (each frame) // This is required even for single frames (I-frames) to convert from DWT coefficients to pixel space @@ -7008,7 +6977,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { for (level in temporalLevels - 1 downTo 0) { val levelFrames = temporalLengths[level] if (levelFrames >= 2) { - tavApplyTemporalDWTInverse1D(temporalLine, levelFrames) + tavApplyTemporalDWTInverse1D(temporalLine, levelFrames, temporalMotionCoder) } } diff --git a/video_encoder/decoder_tav.c b/video_encoder/decoder_tav.c index fc4b557..bfff3f5 100644 --- a/video_encoder/decoder_tav.c +++ b/video_encoder/decoder_tav.c @@ -993,11 +993,34 @@ static void dwt_97_inverse_1d(float *data, int length) { free(temp); } -// 5/3 inverse DWT (simplified - uses 9/7 for now) +// 5/3 inverse DWT using lifting scheme (JPEG 2000 reversible filter) static void dwt_53_inverse_1d(float *data, int length) { if (length < 2) return; - // TODO: Implement proper 5/3 from TSVM if needed - dwt_97_inverse_1d(data, length); + + float *temp = malloc(length * sizeof(float)); + int half = (length + 1) / 2; + + // Copy low-pass and high-pass subbands to temp + memcpy(temp, data, length * sizeof(float)); + + // Undo update step (low-pass) + for (int i = 0; i < half; i++) { + float update = 0.25f * ((i > 0 ? temp[half + i - 1] : 0) + + (i < half - 1 ? temp[half + i] : 0)); + temp[i] -= update; + } + + // Undo predict step (high-pass) and interleave samples + for (int i = 0; i < half; i++) { + data[2 * i] = temp[i]; // Even samples (low-pass) + int idx = 2 * i + 1; + if (idx < length) { + float pred = 0.5f * (temp[i] + (i < half - 1 ? temp[i + 1] : temp[i])); + data[idx] = temp[half + i] + pred; // Odd samples (high-pass) + } + } + + free(temp); } // Multi-level inverse DWT (matches TSVM exactly with correct non-power-of-2 handling) @@ -1180,7 +1203,8 @@ static void dwt_haar_inverse_1d(float *data, int length) { // Order: SPATIAL first (each frame), then TEMPORAL (across frames) static void apply_inverse_3d_dwt(float **gop_y, float **gop_co, float **gop_cg, int width, int height, int gop_size, - int spatial_levels, int temporal_levels, int filter_type) { + int spatial_levels, int temporal_levels, int filter_type, + int temporal_motion_coder) { // Step 1: Apply inverse 2D spatial DWT to each frame for (int t = 0; t < gop_size; t++) { apply_inverse_dwt_multilevel(gop_y[t], width, height, spatial_levels, filter_type); @@ -1212,7 +1236,12 @@ static void apply_inverse_3d_dwt(float **gop_y, float **gop_co, float **gop_cg, for (int level = temporal_levels - 1; level >= 0; level--) { const int level_frames = temporal_lengths[level]; if (level_frames >= 2) { - dwt_haar_inverse_1d(temporal_line, level_frames); + // Use selected temporal wavelet (0=Haar, 1=CDF 5/3) + if (temporal_motion_coder == 0) { + dwt_haar_inverse_1d(temporal_line, level_frames); + } else { + dwt_53_inverse_1d(temporal_line, level_frames); + } } } for (int t = 0; t < gop_size; t++) { @@ -1226,7 +1255,12 @@ static void apply_inverse_3d_dwt(float **gop_y, float **gop_co, float **gop_cg, for (int level = temporal_levels - 1; level >= 0; level--) { const int level_frames = temporal_lengths[level]; if (level_frames >= 2) { - dwt_haar_inverse_1d(temporal_line, level_frames); + // Use selected temporal wavelet (0=Haar, 1=CDF 5/3) + if (temporal_motion_coder == 0) { + dwt_haar_inverse_1d(temporal_line, level_frames); + } else { + dwt_53_inverse_1d(temporal_line, level_frames); + } } } for (int t = 0; t < gop_size; t++) { @@ -1240,7 +1274,12 @@ static void apply_inverse_3d_dwt(float **gop_y, float **gop_co, float **gop_cg, for (int level = temporal_levels - 1; level >= 0; level--) { const int level_frames = temporal_lengths[level]; if (level_frames >= 2) { - dwt_haar_inverse_1d(temporal_line, level_frames); + // Use selected temporal wavelet (0=Haar, 1=CDF 5/3) + if (temporal_motion_coder == 0) { + dwt_haar_inverse_1d(temporal_line, level_frames); + } else { + dwt_53_inverse_1d(temporal_line, level_frames); + } } } for (int t = 0; t < gop_size; t++) { @@ -1706,6 +1745,7 @@ typedef struct { int frame_count; int frame_size; int is_monoblock; // True if version 3-6 (single tile mode) + int temporal_motion_coder; // Temporal wavelet: 0=Haar, 1=CDF 5/3 (extracted from version) // Screen masking (letterbox/pillarbox) - array of geometry changes screen_mask_entry_t *screen_masks; @@ -1942,7 +1982,11 @@ static tav_decoder_t* tav_decoder_init(const char *input_file, const char *outpu } decoder->frame_size = decoder->header.width * decoder->header.height; - decoder->is_monoblock = (decoder->header.version >= 3 && decoder->header.version <= 6); + // Extract temporal motion coder from version (versions 9-16 use CDF 5/3, 1-8 use Haar) + decoder->temporal_motion_coder = (decoder->header.version > 8) ? 1 : 0; + // Extract base version for determining monoblock mode + uint8_t base_version = (decoder->header.version > 8) ? (decoder->header.version - 8) : decoder->header.version; + decoder->is_monoblock = (base_version >= 3 && base_version <= 6); decoder->audio_file_path = strdup(audio_file); // Phase 2: Initialize decoding dimensions to full frame (will be updated by Screen Mask packets) @@ -2337,7 +2381,9 @@ static int decode_i_or_p_frame(tav_decoder_t *decoder, uint8_t packet_type, uint // Dequantise (perceptual for versions 5-8, uniform for 1-4) // Phase 2: Use decoding dimensions and temporary buffers - const int is_perceptual = (decoder->header.version >= 5 && decoder->header.version <= 8); + // Extract base version for perceptual check + uint8_t base_version = (decoder->header.version > 8) ? (decoder->header.version - 8) : decoder->header.version; + const int is_perceptual = (base_version >= 5 && base_version <= 8); const int is_ezbc = (decoder->header.entropy_coder == 1); if (is_ezbc && is_perceptual) { @@ -2472,7 +2518,9 @@ static int decode_i_or_p_frame(tav_decoder_t *decoder, uint8_t packet_type, uint } // Convert YCoCg-R/ICtCp to RGB for cropped region - const int is_ictcp = (decoder->header.version % 2 == 0); + // Extract base version for ICtCp check (even versions use ICtCp) + uint8_t base_version_rgb = (decoder->header.version > 8) ? (decoder->header.version - 8) : decoder->header.version; + const int is_ictcp = (base_version_rgb % 2 == 0); for (int i = 0; i < decoding_pixels; i++) { uint8_t r, g, b; @@ -2936,7 +2984,9 @@ int main(int argc, char *argv[]) { } // Dequantise with temporal scaling (perceptual quantisation for versions 5-8) - const int is_perceptual = (decoder->header.version >= 5 && decoder->header.version <= 8); + // Extract base version for perceptual check + uint8_t base_version_gop = (decoder->header.version > 8) ? (decoder->header.version - 8) : decoder->header.version; + const int is_perceptual = (base_version_gop >= 5 && base_version_gop <= 8); const int is_ezbc = (decoder->header.entropy_coder == 1); const int temporal_levels = 2; // Fixed for TAV GOP encoding @@ -3034,7 +3084,7 @@ int main(int argc, char *argv[]) { // Phase 2: Use GOP dimensions (may be cropped) for inverse DWT apply_inverse_3d_dwt(gop_y, gop_co, gop_cg, gop_width, gop_height, gop_size, decoder->header.decomp_levels, temporal_levels, - decoder->header.wavelet_filter); + decoder->header.wavelet_filter, decoder->temporal_motion_coder); // Debug: Check Y values after inverse DWT if (verbose && decoder->frame_count == 0) { diff --git a/video_encoder/encoder_tav.c b/video_encoder/encoder_tav.c index 74bd9b2..7269d8f 100644 --- a/video_encoder/encoder_tav.c +++ b/video_encoder/encoder_tav.c @@ -18,7 +18,7 @@ #include #include -#define ENCODER_VENDOR_STRING "Encoder-TAV 20251122 (3d-dwt,tad,ssf-tc)" +#define ENCODER_VENDOR_STRING "Encoder-TAV 20251123 (3d-dwt,tad,ssf-tc,cdf53-motion)" // TSVM Advanced Video (TAV) format constants #define TAV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x56" // "\x1FTSVM TAV" @@ -1867,6 +1867,7 @@ typedef struct tav_encoder_s { float **temporal_gop_co_frames; // [frame][pixel] - Co channel for each GOP frame float **temporal_gop_cg_frames; // [frame][pixel] - Cg channel for each GOP frame int temporal_decomp_levels; // Number of temporal DWT levels (default: 2) + int temporal_motion_coder; // Temporal wavelet type: 0=Haar, 1=CDF 5/3 (default: 1) // MC-EZBC block-based motion compensation for temporal 3D DWT (0x13 packets) int temporal_enable_mcezbc; // Flag to enable MC-EZBC block compensation (default: 0, uses translation if temporal_dwt enabled) @@ -2412,6 +2413,7 @@ static void show_usage(const char *program_name) { printf(" --enable-delta Enable delta encoding\n"); printf(" --delta-haar N Apply N-level Haar DWT to delta coefficients (1-6, auto-enables delta)\n"); printf(" --3d-dwt Enable temporal 3D DWT (GOP-based encoding with temporal transform; the default encoding mode)\n"); + printf(" --motion-coder N Temporal wavelet: 0=Haar, 1=CDF 5/3 (default: auto-select based on resolution; use 0 for older version compatibility)\n"); printf(" --single-pass Disable two-pass encoding with wavelet-based scene change detection (optimal GOP boundaries)\n"); // printf(" --mc-ezbc Enable MC-EZBC block-based motion compensation (requires --temporal-dwt, implies --ezbc)\n"); printf(" --ezbc Enable EZBC (Embedded Zero Block Coding) entropy coding. May help reducing file size on high-quality videos\n"); @@ -2514,6 +2516,7 @@ static tav_encoder_t* create_encoder(void) { enc->temporal_gop_width = 0; // Will be set when first frame is added to GOP enc->temporal_gop_height = 0; // Will be set when first frame is added to GOP enc->temporal_decomp_levels = TEMPORAL_DECOMP_LEVEL; // 3 levels of temporal DWT (24 -> 12 -> 6 -> 3 temporal subbands) + enc->temporal_motion_coder = -1; // Will be set automatically based on resolution (unless overridden) enc->temporal_gop_rgb_frames = NULL; enc->temporal_gop_y_frames = NULL; enc->temporal_gop_co_frames = NULL; @@ -2836,7 +2839,7 @@ static int initialise_encoder(tav_encoder_t *enc) { static void dwt_53_forward_1d(float *data, int length) { if (length < 2) return; - float *temp = malloc(length * sizeof(float)); + float *temp = calloc(length, sizeof(float)); // Use calloc to zero-initialize for odd-length arrays int half = (length + 1) / 2; // Handle odd lengths properly // Predict step (high-pass) @@ -2846,6 +2849,7 @@ static void dwt_53_forward_1d(float *data, int length) { float pred = 0.5f * (data[2 * i] + (2 * i + 2 < length ? data[2 * i + 2] : data[2 * i])); temp[half + i] = data[idx] - pred; } + // Note: For odd lengths, last high-pass position remains zero (from calloc) } // Update step (low-pass) @@ -5612,7 +5616,12 @@ static void dwt_3d_forward(tav_encoder_t *enc, float **gop_data, int width, int for (int level = 0; level < temporal_levels; level++) { int level_frames = temporal_lengths[level]; if (level_frames >= 2) { - dwt_haar_forward_1d(temporal_line, level_frames); // Haar better for imperfect alignment + // Use selected temporal wavelet (0=Haar, 1=CDF 5/3) + if (enc->temporal_motion_coder == 0) { + dwt_haar_forward_1d(temporal_line, level_frames); + } else { + dwt_53_forward_1d(temporal_line, level_frames); + } } } @@ -7425,7 +7434,8 @@ static int write_tav_header(tav_encoder_t *enc) { // Magic number fwrite(TAV_MAGIC, 1, 8, enc->output_fp); - // Version (dynamic based on colour space, monoblock mode, and perceptual tuning) + // Version (dynamic based on colour space, monoblock mode, perceptual tuning, and motion coder) + // Base versions 1-8, add 8 if temporal_motion_coder == 1 (CDF 5/3) uint8_t version; if (enc->monoblock) { if (enc->perceptual_tuning) { @@ -7440,6 +7450,10 @@ static int write_tav_header(tav_encoder_t *enc) { version = enc->ictcp_mode ? 2 : 1; } } + // Add 8 if using CDF 5/3 temporal wavelet (motion_coder == 1) + if (enc->temporal_motion_coder == 1) { + version += 8; + } fputc(version, enc->output_fp); // Video parameters @@ -10705,6 +10719,7 @@ int main(int argc, char *argv[]) { {"temporal-3d", no_argument, 0, 1019}, {"dwt-3d", no_argument, 0, 1019}, {"3d-dwt", no_argument, 0, 1019}, + {"motion-coder", required_argument, 0, 1030}, {"mc-ezbc", no_argument, 0, 1020}, {"residual-coding", no_argument, 0, 1021}, {"adaptive-blocks", no_argument, 0, 1022}, @@ -10946,6 +10961,12 @@ int main(int argc, char *argv[]) { enc->preprocess_mode = PREPROCESS_RAW; printf("Raw coefficient mode enabled (no significance map preprocessing)\n"); break; + case 1030: // --motion-coder + enc->temporal_motion_coder = CLAMP(atoi(optarg), 0, 1); + printf("Temporal motion coder set to: %d (%s)\n", + enc->temporal_motion_coder, + enc->temporal_motion_coder == 0 ? "Haar" : "CDF 5/3"); + break; case 1050: // --single-pass enc->two_pass_mode = 0; printf("Two-pass wavelet-based scene change detection disabled\n"); @@ -10987,6 +11008,26 @@ int main(int argc, char *argv[]) { } } + // Smart preset for temporal motion coder based on resolution + // For small videos (<500k pixels), use CDF 5/3 (better for fine details) + // For larger videos, use Haar (better compression, smoother motion matters less) + if (enc->temporal_motion_coder == -1) { + int num_pixels = enc->width * enc->height; + if (num_pixels >= 500000) { + enc->temporal_motion_coder = 0; // Haar + if (enc->verbose) { + printf("Auto-selected Haar temporal wavelet (resolution: %dx%d = %d pixels)\n", + enc->width, enc->height, num_pixels); + } + } else { + enc->temporal_motion_coder = 1; // CDF 5/3 + if (enc->verbose) { + printf("Auto-selected CDF 5/3 temporal wavelet (resolution: %dx%d = %d pixels)\n", + enc->width, enc->height, num_pixels); + } + } + } + // generate division series enc->widths = malloc((enc->decomp_levels + 2) * sizeof(int)); enc->heights = malloc((enc->decomp_levels + 2) * sizeof(int)); diff --git a/video_encoder/tav_inspector.c b/video_encoder/tav_inspector.c index 12029b2..4c680d8 100644 --- a/video_encoder/tav_inspector.c +++ b/video_encoder/tav_inspector.c @@ -498,6 +498,8 @@ int main(int argc, char *argv[]) { if (!opts.summary_only) { // Parse header fields uint8_t version = header[8]; + uint8_t base_version = (version > 8) ? (version - 8) : version; + uint8_t temporal_motion_coder = (version > 8) ? 1 : 0; uint16_t width = *((uint16_t*)&header[9]); uint16_t height = *((uint16_t*)&header[11]); uint8_t fps = header[13]; @@ -516,13 +518,15 @@ int main(int argc, char *argv[]) { static const int QLUT[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128,132,136,140,144,148,152,156,160,164,168,172,176,180,184,188,192,196,200,204,208,212,216,220,224,228,232,236,240,244,248,252,256,264,272,280,288,296,304,312,320,328,336,344,352,360,368,376,384,392,400,408,416,424,432,440,448,456,464,472,480,488,496,504,512,528,544,560,576,592,608,624,640,656,672,688,704,720,736,752,768,784,800,816,832,848,864,880,896,912,928,944,960,976,992,1008,1024,1056,1088,1120,1152,1184,1216,1248,1280,1312,1344,1376,1408,1440,1472,1504,1536,1568,1600,1632,1664,1696,1728,1760,1792,1824,1856,1888,1920,1952,1984,2016,2048,2112,2176,2240,2304,2368,2432,2496,2560,2624,2688,2752,2816,2880,2944,3008,3072,3136,3200,3264,3328,3392,3456,3520,3584,3648,3712,3776,3840,3904,3968,4032,4096}; static const char* CLAYOUT[] = {"Luma-Chroma", "Luma-Chroma-Alpha", "Luma", "Luma-Alpha", "Chroma", "Chroma-Alpha"}; - int is_monoblock = (3 <= version && version <= 6); - int is_perceptual = (5 <= version && version <= 8); + int is_monoblock = (3 <= base_version && base_version <= 6); + int is_perceptual = (5 <= base_version && base_version <= 8); static const char* VERDESC[] = {"null", "YCoCg tiled, uniform", "ICtCp tiled, uniform", "YCoCg monoblock, uniform", "ICtCp monoblock, uniform", "YCoCg monoblock, perceptual", "ICtCp monoblock, perceptual", "YCoCg tiled, perceptual", "ICtCp tiled, perceptual"}; +static const char* TEMPORAL_WAVELET[] = {"Haar", "CDF 5/3"}; printf("TAV Header:\n"); - printf(" Version: %d (%s)\n", version, VERDESC[version]); + printf(" Version: %d (base: %d - %s, temporal: %s)\n", + version, base_version, VERDESC[base_version], TEMPORAL_WAVELET[temporal_motion_coder]); printf(" Resolution: %dx%d\n", width, height); printf(" Frame rate: %d fps", fps); if (video_flags & 0x02) printf(" (NTSC)");