TAV update: CDF 5/3 for motion coder

This commit is contained in:
minjaesong
2025-11-23 18:16:12 +09:00
parent e928d2d3ec
commit 1c7ab17b1c
6 changed files with 174 additions and 95 deletions

View File

@@ -422,8 +422,12 @@ seqread.skip(3)
header.fileRole = seqread.readOneByte()
if (header.version < 1 || header.version > 8) {
printerrln(`Error: Unsupported TAV version ${header.version}`)
// Extract temporal motion coder from version (versions 9-16 use CDF 5/3, 1-8 use Haar)
const baseVersion = (header.version > 8) ? (header.version - 8) : header.version
header.temporalMotionCoder = (header.version > 8) ? 1 : 0
if (baseVersion < 1 || baseVersion > 8) {
printerrln(`Error: Unsupported TAV base version ${baseVersion}`)
errorlevel = 1
return
}
@@ -1339,7 +1343,8 @@ try {
header.channelLayout,
header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS,
header.entropyCoder,
bufferOffset
bufferOffset,
header.temporalMotionCoder
)
asyncDecodeInProgress = true
@@ -1412,7 +1417,8 @@ try {
header.channelLayout,
header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS,
header.entropyCoder,
nextOffset
nextOffset,
header.temporalMotionCoder
)
// Set async decode tracking variables
@@ -1454,7 +1460,8 @@ try {
header.channelLayout,
header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS,
header.entropyCoder,
decodingOffset
decodingOffset,
header.temporalMotionCoder
)
// Set async decode tracking variables
@@ -1821,7 +1828,8 @@ try {
header.channelLayout,
header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS,
header.entropyCoder,
readyGopData.slot * SLOT_SIZE
readyGopData.slot * SLOT_SIZE,
header.temporalMotionCoder
)
// CRITICAL FIX: Set async decode tracking variables so decode is properly tracked
@@ -1998,7 +2006,8 @@ try {
header.channelLayout,
header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS,
header.entropyCoder,
decodingGopData.slot * SLOT_SIZE
decodingGopData.slot * SLOT_SIZE,
header.temporalMotionCoder
)
// CRITICAL FIX: Set async decode tracking variables so decode is properly tracked
@@ -2038,7 +2047,8 @@ try {
header.channelLayout,
header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS,
header.entropyCoder,
readyGopData.slot * SLOT_SIZE
readyGopData.slot * SLOT_SIZE,
header.temporalMotionCoder
)
readyGopData.needsDecode = false
readyGopData.startTime = sys.nanoTime()
@@ -2115,7 +2125,8 @@ try {
header.channelLayout,
header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS,
header.entropyCoder,
targetOffset
targetOffset,
header.temporalMotionCoder
)
asyncDecodeInProgress = true
@@ -2211,6 +2222,7 @@ try {
}
catch (e) {
serial.printerr(`TAV decode error: ${e}`)
if (e.printStackTrace)
e.printStackTrace()
errorlevel = 1
}

View File

@@ -905,6 +905,7 @@ transmission capability, and region-of-interest coding.
## Header (32 bytes)
uint8 Magic[8]: "\x1F TSVM TAV" or "\x1F TSVM TAP"
uint8 Version:
Base version number:
- 1 = YCoCg-R multi-tile uniform
- 2 = ICtCp multi-tile uniform
- 3 = YCoCg-R monoblock uniform
@@ -913,6 +914,8 @@ transmission capability, and region-of-interest coding.
- 6 = ICtCp monoblock perceptual
- 7 = YCoCg-R multi-tile perceptual
- 8 = ICtCp multi-tile perceptual
When motion coder is Haar, take base version number.
When motion coder is CDF 5/3, add 8 to the base version number.
uint16 Width: picture width in pixels. Columns count for Videotex-only file.
uint16 Height: picture height in pixels. Rows count for Videotex-only file.
uint8 FPS: frames per second. Use 0x00 for still pictures

View File

@@ -6297,65 +6297,25 @@ class GraphicsJSR223Delegate(private val vm: VM) {
if (length < 2) return
val temp = FloatArray(length)
val half = (length + 1) / 2 // Handle odd lengths properly
val half = (length + 1) / 2
// Split into low and high frequency components (matching encoder layout)
// Copy low-pass and high-pass subbands to temp
System.arraycopy(data, 0, temp, 0, length)
// Undo update step (low-pass)
for (i in 0 until half) {
temp[i] = data[i] // Low-pass coefficients (first half)
}
for (i in 0 until length / 2) {
if (half + i < length && half + i < data.size) {
temp[half + i] = data[half + i] // High-pass coefficients (second half)
}
val update = 0.25f * ((if (i > 0) temp[half + i - 1] else 0.0f) +
(if (i < half - 1) temp[half + i] else 0.0f))
temp[i] -= update
}
// 5/3 inverse lifting (undo forward steps in reverse order)
// Step 2: Undo update step (1/4 coefficient) - JPEG2000 symmetric extension
// Undo predict step (high-pass) and interleave samples
for (i in 0 until half) {
val leftIdx = half + i - 1
val centerIdx = half + i
// Symmetric extension for boundary handling
val left = when {
leftIdx >= 0 && leftIdx < length -> temp[leftIdx]
centerIdx < length && centerIdx + 1 < length -> temp[centerIdx + 1] // Mirror
centerIdx < length -> temp[centerIdx]
else -> 0.0f
}
val right = if (centerIdx < length) temp[centerIdx] else 0.0f
temp[i] -= 0.25f * (left + right)
}
// Step 1: Undo predict step (1/2 coefficient) - JPEG2000 symmetric extension
for (i in 0 until length / 2) {
if (half + i < length) {
val left = temp[i]
// Symmetric extension for right boundary
val right = if (i < half - 1) temp[i + 1] else if (half > 2) temp[half - 2] else temp[half - 1]
temp[half + i] += 0.5f * (left + right) // ADD to undo the subtraction in encoder
}
}
// Simple reconstruction (revert to working version)
for (i in 0 until length) {
if (i % 2 == 0) {
// Even positions: low-pass coefficients
data[i] = temp[i / 2]
} else {
// Odd positions: high-pass coefficients
val idx = i / 2
if (half + idx < length) {
data[i] = temp[half + idx]
} else {
// Symmetric extension: mirror the last available high-pass coefficient
val lastHighIdx = (length / 2) - 1
if (lastHighIdx >= 0 && half + lastHighIdx < length) {
data[i] = temp[half + lastHighIdx]
} else {
data[i] = 0.0f
}
}
data[2 * i] = temp[i] // Even samples (low-pass)
val idx = 2 * i + 1
if (idx < length) {
val pred = 0.5f * (temp[i] + (if (i < half - 1) temp[i + 1] else temp[i]))
data[idx] = temp[half + i] + pred // Odd samples (high-pass)
}
}
}
@@ -6514,7 +6474,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
spatialLevels: Int = 6,
temporalLevels: Int = 2,
entropyCoder: Int = 0,
bufferOffset: Long = 0
bufferOffset: Long = 0,
temporalMotionCoder: Int = 0
): Array<Any> {
val dbgOut = HashMap<String, Any>()
dbgOut["qY"] = qYGlobal
@@ -6634,9 +6595,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
}
// Step 6: Apply inverse 3D DWT using GOP dimensions (may be cropped)
tavApplyInverse3DDWT(gopY, gopWidth, gopHeight, gopSize, spatialLevels, temporalLevels, spatialFilter)
tavApplyInverse3DDWT(gopCo, gopWidth, gopHeight, gopSize, spatialLevels, temporalLevels, spatialFilter)
tavApplyInverse3DDWT(gopCg, gopWidth, gopHeight, gopSize, spatialLevels, temporalLevels, spatialFilter)
tavApplyInverse3DDWT(gopY, gopWidth, gopHeight, gopSize, spatialLevels, temporalLevels, spatialFilter, temporalMotionCoder)
tavApplyInverse3DDWT(gopCo, gopWidth, gopHeight, gopSize, spatialLevels, temporalLevels, spatialFilter, temporalMotionCoder)
tavApplyInverse3DDWT(gopCg, gopWidth, gopHeight, gopSize, spatialLevels, temporalLevels, spatialFilter, temporalMotionCoder)
// Step 8: Convert to RGB and composite to full frame
// With crop encoding, center the cropped frame and fill letterbox areas with black
@@ -6780,7 +6741,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
spatialLevels: Int = 6,
temporalLevels: Int = 3,
entropyCoder: Int = 0,
bufferOffset: Long = 0
bufferOffset: Long = 0,
temporalMotionCoder: Int = 0
) {
// Cancel any existing decode thread
asyncDecodeThread?.interrupt()
@@ -6798,7 +6760,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
width, height,
qIndex, qYGlobal, qCoGlobal, qCgGlobal,
channelLayout, spatialFilter, spatialLevels, temporalLevels,
entropyCoder, bufferOffset
entropyCoder, bufferOffset, temporalMotionCoder
)
asyncDecodeResult = result
asyncDecodeComplete.set(true)
@@ -6943,12 +6905,17 @@ class GraphicsJSR223Delegate(private val vm: VM) {
// =============================================================================
/**
* Inverse 1D temporal DWT (Haar) along time axis
* Reuses existing Haar inverse implementation
* Inverse 1D temporal DWT along time axis
* Supports both Haar and CDF 5/3 wavelets
* @param temporalMotionCoder 0=Haar, 1=CDF 5/3
*/
private fun tavApplyTemporalDWTInverse1D(data: FloatArray, numFrames: Int) {
private fun tavApplyTemporalDWTInverse1D(data: FloatArray, numFrames: Int, temporalMotionCoder: Int = 0) {
if (numFrames < 2) return
if (temporalMotionCoder == 0) {
tavApplyDWTHaarInverse1D(data, numFrames)
} else {
tavApplyDWT53Inverse1D(data, numFrames)
}
}
/**
@@ -6962,6 +6929,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
* @param spatialLevels Spatial decomposition levels (typically 6)
* @param temporalLevels Temporal decomposition levels (typically 2)
* @param spatialFilter Spatial wavelet filter type (0=5/3, 1=9/7, 255=Haar)
* @param temporalMotionCoder Temporal wavelet type (0=Haar, 1=CDF 5/3)
*/
private fun tavApplyInverse3DDWT(
gopData: Array<FloatArray>,
@@ -6970,7 +6938,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
numFrames: Int,
spatialLevels: Int,
temporalLevels: Int,
spatialFilter: Int
spatialFilter: Int,
temporalMotionCoder: Int = 0
) {
// Step 1: Apply inverse 2D spatial DWT to each temporal subband (each frame)
// This is required even for single frames (I-frames) to convert from DWT coefficients to pixel space
@@ -7008,7 +6977,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
for (level in temporalLevels - 1 downTo 0) {
val levelFrames = temporalLengths[level]
if (levelFrames >= 2) {
tavApplyTemporalDWTInverse1D(temporalLine, levelFrames)
tavApplyTemporalDWTInverse1D(temporalLine, levelFrames, temporalMotionCoder)
}
}

View File

@@ -993,11 +993,34 @@ static void dwt_97_inverse_1d(float *data, int length) {
free(temp);
}
// 5/3 inverse DWT (simplified - uses 9/7 for now)
// 5/3 inverse DWT using lifting scheme (JPEG 2000 reversible filter)
static void dwt_53_inverse_1d(float *data, int length) {
if (length < 2) return;
// TODO: Implement proper 5/3 from TSVM if needed
dwt_97_inverse_1d(data, length);
float *temp = malloc(length * sizeof(float));
int half = (length + 1) / 2;
// Copy low-pass and high-pass subbands to temp
memcpy(temp, data, length * sizeof(float));
// Undo update step (low-pass)
for (int i = 0; i < half; i++) {
float update = 0.25f * ((i > 0 ? temp[half + i - 1] : 0) +
(i < half - 1 ? temp[half + i] : 0));
temp[i] -= update;
}
// Undo predict step (high-pass) and interleave samples
for (int i = 0; i < half; i++) {
data[2 * i] = temp[i]; // Even samples (low-pass)
int idx = 2 * i + 1;
if (idx < length) {
float pred = 0.5f * (temp[i] + (i < half - 1 ? temp[i + 1] : temp[i]));
data[idx] = temp[half + i] + pred; // Odd samples (high-pass)
}
}
free(temp);
}
// Multi-level inverse DWT (matches TSVM exactly with correct non-power-of-2 handling)
@@ -1180,7 +1203,8 @@ static void dwt_haar_inverse_1d(float *data, int length) {
// Order: SPATIAL first (each frame), then TEMPORAL (across frames)
static void apply_inverse_3d_dwt(float **gop_y, float **gop_co, float **gop_cg,
int width, int height, int gop_size,
int spatial_levels, int temporal_levels, int filter_type) {
int spatial_levels, int temporal_levels, int filter_type,
int temporal_motion_coder) {
// Step 1: Apply inverse 2D spatial DWT to each frame
for (int t = 0; t < gop_size; t++) {
apply_inverse_dwt_multilevel(gop_y[t], width, height, spatial_levels, filter_type);
@@ -1212,7 +1236,12 @@ static void apply_inverse_3d_dwt(float **gop_y, float **gop_co, float **gop_cg,
for (int level = temporal_levels - 1; level >= 0; level--) {
const int level_frames = temporal_lengths[level];
if (level_frames >= 2) {
// Use selected temporal wavelet (0=Haar, 1=CDF 5/3)
if (temporal_motion_coder == 0) {
dwt_haar_inverse_1d(temporal_line, level_frames);
} else {
dwt_53_inverse_1d(temporal_line, level_frames);
}
}
}
for (int t = 0; t < gop_size; t++) {
@@ -1226,7 +1255,12 @@ static void apply_inverse_3d_dwt(float **gop_y, float **gop_co, float **gop_cg,
for (int level = temporal_levels - 1; level >= 0; level--) {
const int level_frames = temporal_lengths[level];
if (level_frames >= 2) {
// Use selected temporal wavelet (0=Haar, 1=CDF 5/3)
if (temporal_motion_coder == 0) {
dwt_haar_inverse_1d(temporal_line, level_frames);
} else {
dwt_53_inverse_1d(temporal_line, level_frames);
}
}
}
for (int t = 0; t < gop_size; t++) {
@@ -1240,7 +1274,12 @@ static void apply_inverse_3d_dwt(float **gop_y, float **gop_co, float **gop_cg,
for (int level = temporal_levels - 1; level >= 0; level--) {
const int level_frames = temporal_lengths[level];
if (level_frames >= 2) {
// Use selected temporal wavelet (0=Haar, 1=CDF 5/3)
if (temporal_motion_coder == 0) {
dwt_haar_inverse_1d(temporal_line, level_frames);
} else {
dwt_53_inverse_1d(temporal_line, level_frames);
}
}
}
for (int t = 0; t < gop_size; t++) {
@@ -1706,6 +1745,7 @@ typedef struct {
int frame_count;
int frame_size;
int is_monoblock; // True if version 3-6 (single tile mode)
int temporal_motion_coder; // Temporal wavelet: 0=Haar, 1=CDF 5/3 (extracted from version)
// Screen masking (letterbox/pillarbox) - array of geometry changes
screen_mask_entry_t *screen_masks;
@@ -1942,7 +1982,11 @@ static tav_decoder_t* tav_decoder_init(const char *input_file, const char *outpu
}
decoder->frame_size = decoder->header.width * decoder->header.height;
decoder->is_monoblock = (decoder->header.version >= 3 && decoder->header.version <= 6);
// Extract temporal motion coder from version (versions 9-16 use CDF 5/3, 1-8 use Haar)
decoder->temporal_motion_coder = (decoder->header.version > 8) ? 1 : 0;
// Extract base version for determining monoblock mode
uint8_t base_version = (decoder->header.version > 8) ? (decoder->header.version - 8) : decoder->header.version;
decoder->is_monoblock = (base_version >= 3 && base_version <= 6);
decoder->audio_file_path = strdup(audio_file);
// Phase 2: Initialize decoding dimensions to full frame (will be updated by Screen Mask packets)
@@ -2337,7 +2381,9 @@ static int decode_i_or_p_frame(tav_decoder_t *decoder, uint8_t packet_type, uint
// Dequantise (perceptual for versions 5-8, uniform for 1-4)
// Phase 2: Use decoding dimensions and temporary buffers
const int is_perceptual = (decoder->header.version >= 5 && decoder->header.version <= 8);
// Extract base version for perceptual check
uint8_t base_version = (decoder->header.version > 8) ? (decoder->header.version - 8) : decoder->header.version;
const int is_perceptual = (base_version >= 5 && base_version <= 8);
const int is_ezbc = (decoder->header.entropy_coder == 1);
if (is_ezbc && is_perceptual) {
@@ -2472,7 +2518,9 @@ static int decode_i_or_p_frame(tav_decoder_t *decoder, uint8_t packet_type, uint
}
// Convert YCoCg-R/ICtCp to RGB for cropped region
const int is_ictcp = (decoder->header.version % 2 == 0);
// Extract base version for ICtCp check (even versions use ICtCp)
uint8_t base_version_rgb = (decoder->header.version > 8) ? (decoder->header.version - 8) : decoder->header.version;
const int is_ictcp = (base_version_rgb % 2 == 0);
for (int i = 0; i < decoding_pixels; i++) {
uint8_t r, g, b;
@@ -2936,7 +2984,9 @@ int main(int argc, char *argv[]) {
}
// Dequantise with temporal scaling (perceptual quantisation for versions 5-8)
const int is_perceptual = (decoder->header.version >= 5 && decoder->header.version <= 8);
// Extract base version for perceptual check
uint8_t base_version_gop = (decoder->header.version > 8) ? (decoder->header.version - 8) : decoder->header.version;
const int is_perceptual = (base_version_gop >= 5 && base_version_gop <= 8);
const int is_ezbc = (decoder->header.entropy_coder == 1);
const int temporal_levels = 2; // Fixed for TAV GOP encoding
@@ -3034,7 +3084,7 @@ int main(int argc, char *argv[]) {
// Phase 2: Use GOP dimensions (may be cropped) for inverse DWT
apply_inverse_3d_dwt(gop_y, gop_co, gop_cg, gop_width, gop_height,
gop_size, decoder->header.decomp_levels, temporal_levels,
decoder->header.wavelet_filter);
decoder->header.wavelet_filter, decoder->temporal_motion_coder);
// Debug: Check Y values after inverse DWT
if (verbose && decoder->frame_count == 0) {

View File

@@ -18,7 +18,7 @@
#include <limits.h>
#include <float.h>
#define ENCODER_VENDOR_STRING "Encoder-TAV 20251122 (3d-dwt,tad,ssf-tc)"
#define ENCODER_VENDOR_STRING "Encoder-TAV 20251123 (3d-dwt,tad,ssf-tc,cdf53-motion)"
// TSVM Advanced Video (TAV) format constants
#define TAV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x56" // "\x1FTSVM TAV"
@@ -1867,6 +1867,7 @@ typedef struct tav_encoder_s {
float **temporal_gop_co_frames; // [frame][pixel] - Co channel for each GOP frame
float **temporal_gop_cg_frames; // [frame][pixel] - Cg channel for each GOP frame
int temporal_decomp_levels; // Number of temporal DWT levels (default: 2)
int temporal_motion_coder; // Temporal wavelet type: 0=Haar, 1=CDF 5/3 (default: 1)
// MC-EZBC block-based motion compensation for temporal 3D DWT (0x13 packets)
int temporal_enable_mcezbc; // Flag to enable MC-EZBC block compensation (default: 0, uses translation if temporal_dwt enabled)
@@ -2412,6 +2413,7 @@ static void show_usage(const char *program_name) {
printf(" --enable-delta Enable delta encoding\n");
printf(" --delta-haar N Apply N-level Haar DWT to delta coefficients (1-6, auto-enables delta)\n");
printf(" --3d-dwt Enable temporal 3D DWT (GOP-based encoding with temporal transform; the default encoding mode)\n");
printf(" --motion-coder N Temporal wavelet: 0=Haar, 1=CDF 5/3 (default: auto-select based on resolution; use 0 for older version compatibility)\n");
printf(" --single-pass Disable two-pass encoding with wavelet-based scene change detection (optimal GOP boundaries)\n");
// printf(" --mc-ezbc Enable MC-EZBC block-based motion compensation (requires --temporal-dwt, implies --ezbc)\n");
printf(" --ezbc Enable EZBC (Embedded Zero Block Coding) entropy coding. May help reducing file size on high-quality videos\n");
@@ -2514,6 +2516,7 @@ static tav_encoder_t* create_encoder(void) {
enc->temporal_gop_width = 0; // Will be set when first frame is added to GOP
enc->temporal_gop_height = 0; // Will be set when first frame is added to GOP
enc->temporal_decomp_levels = TEMPORAL_DECOMP_LEVEL; // 3 levels of temporal DWT (24 -> 12 -> 6 -> 3 temporal subbands)
enc->temporal_motion_coder = -1; // Will be set automatically based on resolution (unless overridden)
enc->temporal_gop_rgb_frames = NULL;
enc->temporal_gop_y_frames = NULL;
enc->temporal_gop_co_frames = NULL;
@@ -2836,7 +2839,7 @@ static int initialise_encoder(tav_encoder_t *enc) {
static void dwt_53_forward_1d(float *data, int length) {
if (length < 2) return;
float *temp = malloc(length * sizeof(float));
float *temp = calloc(length, sizeof(float)); // Use calloc to zero-initialize for odd-length arrays
int half = (length + 1) / 2; // Handle odd lengths properly
// Predict step (high-pass)
@@ -2846,6 +2849,7 @@ static void dwt_53_forward_1d(float *data, int length) {
float pred = 0.5f * (data[2 * i] + (2 * i + 2 < length ? data[2 * i + 2] : data[2 * i]));
temp[half + i] = data[idx] - pred;
}
// Note: For odd lengths, last high-pass position remains zero (from calloc)
}
// Update step (low-pass)
@@ -5612,7 +5616,12 @@ static void dwt_3d_forward(tav_encoder_t *enc, float **gop_data, int width, int
for (int level = 0; level < temporal_levels; level++) {
int level_frames = temporal_lengths[level];
if (level_frames >= 2) {
dwt_haar_forward_1d(temporal_line, level_frames); // Haar better for imperfect alignment
// Use selected temporal wavelet (0=Haar, 1=CDF 5/3)
if (enc->temporal_motion_coder == 0) {
dwt_haar_forward_1d(temporal_line, level_frames);
} else {
dwt_53_forward_1d(temporal_line, level_frames);
}
}
}
@@ -7425,7 +7434,8 @@ static int write_tav_header(tav_encoder_t *enc) {
// Magic number
fwrite(TAV_MAGIC, 1, 8, enc->output_fp);
// Version (dynamic based on colour space, monoblock mode, and perceptual tuning)
// Version (dynamic based on colour space, monoblock mode, perceptual tuning, and motion coder)
// Base versions 1-8, add 8 if temporal_motion_coder == 1 (CDF 5/3)
uint8_t version;
if (enc->monoblock) {
if (enc->perceptual_tuning) {
@@ -7440,6 +7450,10 @@ static int write_tav_header(tav_encoder_t *enc) {
version = enc->ictcp_mode ? 2 : 1;
}
}
// Add 8 if using CDF 5/3 temporal wavelet (motion_coder == 1)
if (enc->temporal_motion_coder == 1) {
version += 8;
}
fputc(version, enc->output_fp);
// Video parameters
@@ -10705,6 +10719,7 @@ int main(int argc, char *argv[]) {
{"temporal-3d", no_argument, 0, 1019},
{"dwt-3d", no_argument, 0, 1019},
{"3d-dwt", no_argument, 0, 1019},
{"motion-coder", required_argument, 0, 1030},
{"mc-ezbc", no_argument, 0, 1020},
{"residual-coding", no_argument, 0, 1021},
{"adaptive-blocks", no_argument, 0, 1022},
@@ -10946,6 +10961,12 @@ int main(int argc, char *argv[]) {
enc->preprocess_mode = PREPROCESS_RAW;
printf("Raw coefficient mode enabled (no significance map preprocessing)\n");
break;
case 1030: // --motion-coder
enc->temporal_motion_coder = CLAMP(atoi(optarg), 0, 1);
printf("Temporal motion coder set to: %d (%s)\n",
enc->temporal_motion_coder,
enc->temporal_motion_coder == 0 ? "Haar" : "CDF 5/3");
break;
case 1050: // --single-pass
enc->two_pass_mode = 0;
printf("Two-pass wavelet-based scene change detection disabled\n");
@@ -10987,6 +11008,26 @@ int main(int argc, char *argv[]) {
}
}
// Smart preset for temporal motion coder based on resolution
// For small videos (<500k pixels), use CDF 5/3 (better for fine details)
// For larger videos, use Haar (better compression, smoother motion matters less)
if (enc->temporal_motion_coder == -1) {
int num_pixels = enc->width * enc->height;
if (num_pixels >= 500000) {
enc->temporal_motion_coder = 0; // Haar
if (enc->verbose) {
printf("Auto-selected Haar temporal wavelet (resolution: %dx%d = %d pixels)\n",
enc->width, enc->height, num_pixels);
}
} else {
enc->temporal_motion_coder = 1; // CDF 5/3
if (enc->verbose) {
printf("Auto-selected CDF 5/3 temporal wavelet (resolution: %dx%d = %d pixels)\n",
enc->width, enc->height, num_pixels);
}
}
}
// generate division series
enc->widths = malloc((enc->decomp_levels + 2) * sizeof(int));
enc->heights = malloc((enc->decomp_levels + 2) * sizeof(int));

View File

@@ -498,6 +498,8 @@ int main(int argc, char *argv[]) {
if (!opts.summary_only) {
// Parse header fields
uint8_t version = header[8];
uint8_t base_version = (version > 8) ? (version - 8) : version;
uint8_t temporal_motion_coder = (version > 8) ? 1 : 0;
uint16_t width = *((uint16_t*)&header[9]);
uint16_t height = *((uint16_t*)&header[11]);
uint8_t fps = header[13];
@@ -516,13 +518,15 @@ int main(int argc, char *argv[]) {
static const int QLUT[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128,132,136,140,144,148,152,156,160,164,168,172,176,180,184,188,192,196,200,204,208,212,216,220,224,228,232,236,240,244,248,252,256,264,272,280,288,296,304,312,320,328,336,344,352,360,368,376,384,392,400,408,416,424,432,440,448,456,464,472,480,488,496,504,512,528,544,560,576,592,608,624,640,656,672,688,704,720,736,752,768,784,800,816,832,848,864,880,896,912,928,944,960,976,992,1008,1024,1056,1088,1120,1152,1184,1216,1248,1280,1312,1344,1376,1408,1440,1472,1504,1536,1568,1600,1632,1664,1696,1728,1760,1792,1824,1856,1888,1920,1952,1984,2016,2048,2112,2176,2240,2304,2368,2432,2496,2560,2624,2688,2752,2816,2880,2944,3008,3072,3136,3200,3264,3328,3392,3456,3520,3584,3648,3712,3776,3840,3904,3968,4032,4096};
static const char* CLAYOUT[] = {"Luma-Chroma", "Luma-Chroma-Alpha", "Luma", "Luma-Alpha", "Chroma", "Chroma-Alpha"};
int is_monoblock = (3 <= version && version <= 6);
int is_perceptual = (5 <= version && version <= 8);
int is_monoblock = (3 <= base_version && base_version <= 6);
int is_perceptual = (5 <= base_version && base_version <= 8);
static const char* VERDESC[] = {"null", "YCoCg tiled, uniform", "ICtCp tiled, uniform", "YCoCg monoblock, uniform", "ICtCp monoblock, uniform", "YCoCg monoblock, perceptual", "ICtCp monoblock, perceptual", "YCoCg tiled, perceptual", "ICtCp tiled, perceptual"};
static const char* TEMPORAL_WAVELET[] = {"Haar", "CDF 5/3"};
printf("TAV Header:\n");
printf(" Version: %d (%s)\n", version, VERDESC[version]);
printf(" Version: %d (base: %d - %s, temporal: %s)\n",
version, base_version, VERDESC[base_version], TEMPORAL_WAVELET[temporal_motion_coder]);
printf(" Resolution: %dx%d\n", width, height);
printf(" Frame rate: %d fps", fps);
if (video_flags & 0x02) printf(" (NTSC)");