mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-03-07 19:51:51 +09:00
TAV update: CDF 5/3 for motion coder
This commit is contained in:
@@ -422,8 +422,12 @@ seqread.skip(3)
|
||||
|
||||
header.fileRole = seqread.readOneByte()
|
||||
|
||||
if (header.version < 1 || header.version > 8) {
|
||||
printerrln(`Error: Unsupported TAV version ${header.version}`)
|
||||
// Extract temporal motion coder from version (versions 9-16 use CDF 5/3, 1-8 use Haar)
|
||||
const baseVersion = (header.version > 8) ? (header.version - 8) : header.version
|
||||
header.temporalMotionCoder = (header.version > 8) ? 1 : 0
|
||||
|
||||
if (baseVersion < 1 || baseVersion > 8) {
|
||||
printerrln(`Error: Unsupported TAV base version ${baseVersion}`)
|
||||
errorlevel = 1
|
||||
return
|
||||
}
|
||||
@@ -1339,7 +1343,8 @@ try {
|
||||
header.channelLayout,
|
||||
header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS,
|
||||
header.entropyCoder,
|
||||
bufferOffset
|
||||
bufferOffset,
|
||||
header.temporalMotionCoder
|
||||
)
|
||||
|
||||
asyncDecodeInProgress = true
|
||||
@@ -1412,7 +1417,8 @@ try {
|
||||
header.channelLayout,
|
||||
header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS,
|
||||
header.entropyCoder,
|
||||
nextOffset
|
||||
nextOffset,
|
||||
header.temporalMotionCoder
|
||||
)
|
||||
|
||||
// Set async decode tracking variables
|
||||
@@ -1454,7 +1460,8 @@ try {
|
||||
header.channelLayout,
|
||||
header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS,
|
||||
header.entropyCoder,
|
||||
decodingOffset
|
||||
decodingOffset,
|
||||
header.temporalMotionCoder
|
||||
)
|
||||
|
||||
// Set async decode tracking variables
|
||||
@@ -1821,7 +1828,8 @@ try {
|
||||
header.channelLayout,
|
||||
header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS,
|
||||
header.entropyCoder,
|
||||
readyGopData.slot * SLOT_SIZE
|
||||
readyGopData.slot * SLOT_SIZE,
|
||||
header.temporalMotionCoder
|
||||
)
|
||||
|
||||
// CRITICAL FIX: Set async decode tracking variables so decode is properly tracked
|
||||
@@ -1998,7 +2006,8 @@ try {
|
||||
header.channelLayout,
|
||||
header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS,
|
||||
header.entropyCoder,
|
||||
decodingGopData.slot * SLOT_SIZE
|
||||
decodingGopData.slot * SLOT_SIZE,
|
||||
header.temporalMotionCoder
|
||||
)
|
||||
|
||||
// CRITICAL FIX: Set async decode tracking variables so decode is properly tracked
|
||||
@@ -2038,7 +2047,8 @@ try {
|
||||
header.channelLayout,
|
||||
header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS,
|
||||
header.entropyCoder,
|
||||
readyGopData.slot * SLOT_SIZE
|
||||
readyGopData.slot * SLOT_SIZE,
|
||||
header.temporalMotionCoder
|
||||
)
|
||||
readyGopData.needsDecode = false
|
||||
readyGopData.startTime = sys.nanoTime()
|
||||
@@ -2115,7 +2125,8 @@ try {
|
||||
header.channelLayout,
|
||||
header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS,
|
||||
header.entropyCoder,
|
||||
targetOffset
|
||||
targetOffset,
|
||||
header.temporalMotionCoder
|
||||
)
|
||||
|
||||
asyncDecodeInProgress = true
|
||||
@@ -2211,7 +2222,8 @@ try {
|
||||
}
|
||||
catch (e) {
|
||||
serial.printerr(`TAV decode error: ${e}`)
|
||||
e.printStackTrace()
|
||||
if (e.printStackTrace)
|
||||
e.printStackTrace()
|
||||
errorlevel = 1
|
||||
}
|
||||
finally {
|
||||
|
||||
@@ -905,6 +905,7 @@ transmission capability, and region-of-interest coding.
|
||||
## Header (32 bytes)
|
||||
uint8 Magic[8]: "\x1F TSVM TAV" or "\x1F TSVM TAP"
|
||||
uint8 Version:
|
||||
Base version number:
|
||||
- 1 = YCoCg-R multi-tile uniform
|
||||
- 2 = ICtCp multi-tile uniform
|
||||
- 3 = YCoCg-R monoblock uniform
|
||||
@@ -913,6 +914,8 @@ transmission capability, and region-of-interest coding.
|
||||
- 6 = ICtCp monoblock perceptual
|
||||
- 7 = YCoCg-R multi-tile perceptual
|
||||
- 8 = ICtCp multi-tile perceptual
|
||||
When motion coder is Haar, take base version number.
|
||||
When motion coder is CDF 5/3, add 8 to the base version number.
|
||||
uint16 Width: picture width in pixels. Columns count for Videotex-only file.
|
||||
uint16 Height: picture height in pixels. Rows count for Videotex-only file.
|
||||
uint8 FPS: frames per second. Use 0x00 for still pictures
|
||||
|
||||
@@ -6297,65 +6297,25 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
if (length < 2) return
|
||||
|
||||
val temp = FloatArray(length)
|
||||
val half = (length + 1) / 2 // Handle odd lengths properly
|
||||
val half = (length + 1) / 2
|
||||
|
||||
// Split into low and high frequency components (matching encoder layout)
|
||||
// Copy low-pass and high-pass subbands to temp
|
||||
System.arraycopy(data, 0, temp, 0, length)
|
||||
|
||||
// Undo update step (low-pass)
|
||||
for (i in 0 until half) {
|
||||
temp[i] = data[i] // Low-pass coefficients (first half)
|
||||
}
|
||||
for (i in 0 until length / 2) {
|
||||
if (half + i < length && half + i < data.size) {
|
||||
temp[half + i] = data[half + i] // High-pass coefficients (second half)
|
||||
}
|
||||
val update = 0.25f * ((if (i > 0) temp[half + i - 1] else 0.0f) +
|
||||
(if (i < half - 1) temp[half + i] else 0.0f))
|
||||
temp[i] -= update
|
||||
}
|
||||
|
||||
// 5/3 inverse lifting (undo forward steps in reverse order)
|
||||
|
||||
// Step 2: Undo update step (1/4 coefficient) - JPEG2000 symmetric extension
|
||||
// Undo predict step (high-pass) and interleave samples
|
||||
for (i in 0 until half) {
|
||||
val leftIdx = half + i - 1
|
||||
val centerIdx = half + i
|
||||
|
||||
// Symmetric extension for boundary handling
|
||||
val left = when {
|
||||
leftIdx >= 0 && leftIdx < length -> temp[leftIdx]
|
||||
centerIdx < length && centerIdx + 1 < length -> temp[centerIdx + 1] // Mirror
|
||||
centerIdx < length -> temp[centerIdx]
|
||||
else -> 0.0f
|
||||
}
|
||||
val right = if (centerIdx < length) temp[centerIdx] else 0.0f
|
||||
temp[i] -= 0.25f * (left + right)
|
||||
}
|
||||
|
||||
// Step 1: Undo predict step (1/2 coefficient) - JPEG2000 symmetric extension
|
||||
for (i in 0 until length / 2) {
|
||||
if (half + i < length) {
|
||||
val left = temp[i]
|
||||
// Symmetric extension for right boundary
|
||||
val right = if (i < half - 1) temp[i + 1] else if (half > 2) temp[half - 2] else temp[half - 1]
|
||||
temp[half + i] += 0.5f * (left + right) // ADD to undo the subtraction in encoder
|
||||
}
|
||||
}
|
||||
|
||||
// Simple reconstruction (revert to working version)
|
||||
for (i in 0 until length) {
|
||||
if (i % 2 == 0) {
|
||||
// Even positions: low-pass coefficients
|
||||
data[i] = temp[i / 2]
|
||||
} else {
|
||||
// Odd positions: high-pass coefficients
|
||||
val idx = i / 2
|
||||
if (half + idx < length) {
|
||||
data[i] = temp[half + idx]
|
||||
} else {
|
||||
// Symmetric extension: mirror the last available high-pass coefficient
|
||||
val lastHighIdx = (length / 2) - 1
|
||||
if (lastHighIdx >= 0 && half + lastHighIdx < length) {
|
||||
data[i] = temp[half + lastHighIdx]
|
||||
} else {
|
||||
data[i] = 0.0f
|
||||
}
|
||||
}
|
||||
data[2 * i] = temp[i] // Even samples (low-pass)
|
||||
val idx = 2 * i + 1
|
||||
if (idx < length) {
|
||||
val pred = 0.5f * (temp[i] + (if (i < half - 1) temp[i + 1] else temp[i]))
|
||||
data[idx] = temp[half + i] + pred // Odd samples (high-pass)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -6514,7 +6474,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
spatialLevels: Int = 6,
|
||||
temporalLevels: Int = 2,
|
||||
entropyCoder: Int = 0,
|
||||
bufferOffset: Long = 0
|
||||
bufferOffset: Long = 0,
|
||||
temporalMotionCoder: Int = 0
|
||||
): Array<Any> {
|
||||
val dbgOut = HashMap<String, Any>()
|
||||
dbgOut["qY"] = qYGlobal
|
||||
@@ -6634,9 +6595,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
}
|
||||
|
||||
// Step 6: Apply inverse 3D DWT using GOP dimensions (may be cropped)
|
||||
tavApplyInverse3DDWT(gopY, gopWidth, gopHeight, gopSize, spatialLevels, temporalLevels, spatialFilter)
|
||||
tavApplyInverse3DDWT(gopCo, gopWidth, gopHeight, gopSize, spatialLevels, temporalLevels, spatialFilter)
|
||||
tavApplyInverse3DDWT(gopCg, gopWidth, gopHeight, gopSize, spatialLevels, temporalLevels, spatialFilter)
|
||||
tavApplyInverse3DDWT(gopY, gopWidth, gopHeight, gopSize, spatialLevels, temporalLevels, spatialFilter, temporalMotionCoder)
|
||||
tavApplyInverse3DDWT(gopCo, gopWidth, gopHeight, gopSize, spatialLevels, temporalLevels, spatialFilter, temporalMotionCoder)
|
||||
tavApplyInverse3DDWT(gopCg, gopWidth, gopHeight, gopSize, spatialLevels, temporalLevels, spatialFilter, temporalMotionCoder)
|
||||
|
||||
// Step 8: Convert to RGB and composite to full frame
|
||||
// With crop encoding, center the cropped frame and fill letterbox areas with black
|
||||
@@ -6780,7 +6741,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
spatialLevels: Int = 6,
|
||||
temporalLevels: Int = 3,
|
||||
entropyCoder: Int = 0,
|
||||
bufferOffset: Long = 0
|
||||
bufferOffset: Long = 0,
|
||||
temporalMotionCoder: Int = 0
|
||||
) {
|
||||
// Cancel any existing decode thread
|
||||
asyncDecodeThread?.interrupt()
|
||||
@@ -6798,7 +6760,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
width, height,
|
||||
qIndex, qYGlobal, qCoGlobal, qCgGlobal,
|
||||
channelLayout, spatialFilter, spatialLevels, temporalLevels,
|
||||
entropyCoder, bufferOffset
|
||||
entropyCoder, bufferOffset, temporalMotionCoder
|
||||
)
|
||||
asyncDecodeResult = result
|
||||
asyncDecodeComplete.set(true)
|
||||
@@ -6943,12 +6905,17 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Inverse 1D temporal DWT (Haar) along time axis
|
||||
* Reuses existing Haar inverse implementation
|
||||
* Inverse 1D temporal DWT along time axis
|
||||
* Supports both Haar and CDF 5/3 wavelets
|
||||
* @param temporalMotionCoder 0=Haar, 1=CDF 5/3
|
||||
*/
|
||||
private fun tavApplyTemporalDWTInverse1D(data: FloatArray, numFrames: Int) {
|
||||
private fun tavApplyTemporalDWTInverse1D(data: FloatArray, numFrames: Int, temporalMotionCoder: Int = 0) {
|
||||
if (numFrames < 2) return
|
||||
tavApplyDWTHaarInverse1D(data, numFrames)
|
||||
if (temporalMotionCoder == 0) {
|
||||
tavApplyDWTHaarInverse1D(data, numFrames)
|
||||
} else {
|
||||
tavApplyDWT53Inverse1D(data, numFrames)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -6962,6 +6929,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
* @param spatialLevels Spatial decomposition levels (typically 6)
|
||||
* @param temporalLevels Temporal decomposition levels (typically 2)
|
||||
* @param spatialFilter Spatial wavelet filter type (0=5/3, 1=9/7, 255=Haar)
|
||||
* @param temporalMotionCoder Temporal wavelet type (0=Haar, 1=CDF 5/3)
|
||||
*/
|
||||
private fun tavApplyInverse3DDWT(
|
||||
gopData: Array<FloatArray>,
|
||||
@@ -6970,7 +6938,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
numFrames: Int,
|
||||
spatialLevels: Int,
|
||||
temporalLevels: Int,
|
||||
spatialFilter: Int
|
||||
spatialFilter: Int,
|
||||
temporalMotionCoder: Int = 0
|
||||
) {
|
||||
// Step 1: Apply inverse 2D spatial DWT to each temporal subband (each frame)
|
||||
// This is required even for single frames (I-frames) to convert from DWT coefficients to pixel space
|
||||
@@ -7008,7 +6977,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
for (level in temporalLevels - 1 downTo 0) {
|
||||
val levelFrames = temporalLengths[level]
|
||||
if (levelFrames >= 2) {
|
||||
tavApplyTemporalDWTInverse1D(temporalLine, levelFrames)
|
||||
tavApplyTemporalDWTInverse1D(temporalLine, levelFrames, temporalMotionCoder)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -993,11 +993,34 @@ static void dwt_97_inverse_1d(float *data, int length) {
|
||||
free(temp);
|
||||
}
|
||||
|
||||
// 5/3 inverse DWT (simplified - uses 9/7 for now)
|
||||
// 5/3 inverse DWT using lifting scheme (JPEG 2000 reversible filter)
|
||||
static void dwt_53_inverse_1d(float *data, int length) {
|
||||
if (length < 2) return;
|
||||
// TODO: Implement proper 5/3 from TSVM if needed
|
||||
dwt_97_inverse_1d(data, length);
|
||||
|
||||
float *temp = malloc(length * sizeof(float));
|
||||
int half = (length + 1) / 2;
|
||||
|
||||
// Copy low-pass and high-pass subbands to temp
|
||||
memcpy(temp, data, length * sizeof(float));
|
||||
|
||||
// Undo update step (low-pass)
|
||||
for (int i = 0; i < half; i++) {
|
||||
float update = 0.25f * ((i > 0 ? temp[half + i - 1] : 0) +
|
||||
(i < half - 1 ? temp[half + i] : 0));
|
||||
temp[i] -= update;
|
||||
}
|
||||
|
||||
// Undo predict step (high-pass) and interleave samples
|
||||
for (int i = 0; i < half; i++) {
|
||||
data[2 * i] = temp[i]; // Even samples (low-pass)
|
||||
int idx = 2 * i + 1;
|
||||
if (idx < length) {
|
||||
float pred = 0.5f * (temp[i] + (i < half - 1 ? temp[i + 1] : temp[i]));
|
||||
data[idx] = temp[half + i] + pred; // Odd samples (high-pass)
|
||||
}
|
||||
}
|
||||
|
||||
free(temp);
|
||||
}
|
||||
|
||||
// Multi-level inverse DWT (matches TSVM exactly with correct non-power-of-2 handling)
|
||||
@@ -1180,7 +1203,8 @@ static void dwt_haar_inverse_1d(float *data, int length) {
|
||||
// Order: SPATIAL first (each frame), then TEMPORAL (across frames)
|
||||
static void apply_inverse_3d_dwt(float **gop_y, float **gop_co, float **gop_cg,
|
||||
int width, int height, int gop_size,
|
||||
int spatial_levels, int temporal_levels, int filter_type) {
|
||||
int spatial_levels, int temporal_levels, int filter_type,
|
||||
int temporal_motion_coder) {
|
||||
// Step 1: Apply inverse 2D spatial DWT to each frame
|
||||
for (int t = 0; t < gop_size; t++) {
|
||||
apply_inverse_dwt_multilevel(gop_y[t], width, height, spatial_levels, filter_type);
|
||||
@@ -1212,7 +1236,12 @@ static void apply_inverse_3d_dwt(float **gop_y, float **gop_co, float **gop_cg,
|
||||
for (int level = temporal_levels - 1; level >= 0; level--) {
|
||||
const int level_frames = temporal_lengths[level];
|
||||
if (level_frames >= 2) {
|
||||
dwt_haar_inverse_1d(temporal_line, level_frames);
|
||||
// Use selected temporal wavelet (0=Haar, 1=CDF 5/3)
|
||||
if (temporal_motion_coder == 0) {
|
||||
dwt_haar_inverse_1d(temporal_line, level_frames);
|
||||
} else {
|
||||
dwt_53_inverse_1d(temporal_line, level_frames);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int t = 0; t < gop_size; t++) {
|
||||
@@ -1226,7 +1255,12 @@ static void apply_inverse_3d_dwt(float **gop_y, float **gop_co, float **gop_cg,
|
||||
for (int level = temporal_levels - 1; level >= 0; level--) {
|
||||
const int level_frames = temporal_lengths[level];
|
||||
if (level_frames >= 2) {
|
||||
dwt_haar_inverse_1d(temporal_line, level_frames);
|
||||
// Use selected temporal wavelet (0=Haar, 1=CDF 5/3)
|
||||
if (temporal_motion_coder == 0) {
|
||||
dwt_haar_inverse_1d(temporal_line, level_frames);
|
||||
} else {
|
||||
dwt_53_inverse_1d(temporal_line, level_frames);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int t = 0; t < gop_size; t++) {
|
||||
@@ -1240,7 +1274,12 @@ static void apply_inverse_3d_dwt(float **gop_y, float **gop_co, float **gop_cg,
|
||||
for (int level = temporal_levels - 1; level >= 0; level--) {
|
||||
const int level_frames = temporal_lengths[level];
|
||||
if (level_frames >= 2) {
|
||||
dwt_haar_inverse_1d(temporal_line, level_frames);
|
||||
// Use selected temporal wavelet (0=Haar, 1=CDF 5/3)
|
||||
if (temporal_motion_coder == 0) {
|
||||
dwt_haar_inverse_1d(temporal_line, level_frames);
|
||||
} else {
|
||||
dwt_53_inverse_1d(temporal_line, level_frames);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int t = 0; t < gop_size; t++) {
|
||||
@@ -1706,6 +1745,7 @@ typedef struct {
|
||||
int frame_count;
|
||||
int frame_size;
|
||||
int is_monoblock; // True if version 3-6 (single tile mode)
|
||||
int temporal_motion_coder; // Temporal wavelet: 0=Haar, 1=CDF 5/3 (extracted from version)
|
||||
|
||||
// Screen masking (letterbox/pillarbox) - array of geometry changes
|
||||
screen_mask_entry_t *screen_masks;
|
||||
@@ -1942,7 +1982,11 @@ static tav_decoder_t* tav_decoder_init(const char *input_file, const char *outpu
|
||||
}
|
||||
|
||||
decoder->frame_size = decoder->header.width * decoder->header.height;
|
||||
decoder->is_monoblock = (decoder->header.version >= 3 && decoder->header.version <= 6);
|
||||
// Extract temporal motion coder from version (versions 9-16 use CDF 5/3, 1-8 use Haar)
|
||||
decoder->temporal_motion_coder = (decoder->header.version > 8) ? 1 : 0;
|
||||
// Extract base version for determining monoblock mode
|
||||
uint8_t base_version = (decoder->header.version > 8) ? (decoder->header.version - 8) : decoder->header.version;
|
||||
decoder->is_monoblock = (base_version >= 3 && base_version <= 6);
|
||||
decoder->audio_file_path = strdup(audio_file);
|
||||
|
||||
// Phase 2: Initialize decoding dimensions to full frame (will be updated by Screen Mask packets)
|
||||
@@ -2337,7 +2381,9 @@ static int decode_i_or_p_frame(tav_decoder_t *decoder, uint8_t packet_type, uint
|
||||
|
||||
// Dequantise (perceptual for versions 5-8, uniform for 1-4)
|
||||
// Phase 2: Use decoding dimensions and temporary buffers
|
||||
const int is_perceptual = (decoder->header.version >= 5 && decoder->header.version <= 8);
|
||||
// Extract base version for perceptual check
|
||||
uint8_t base_version = (decoder->header.version > 8) ? (decoder->header.version - 8) : decoder->header.version;
|
||||
const int is_perceptual = (base_version >= 5 && base_version <= 8);
|
||||
const int is_ezbc = (decoder->header.entropy_coder == 1);
|
||||
|
||||
if (is_ezbc && is_perceptual) {
|
||||
@@ -2472,7 +2518,9 @@ static int decode_i_or_p_frame(tav_decoder_t *decoder, uint8_t packet_type, uint
|
||||
}
|
||||
|
||||
// Convert YCoCg-R/ICtCp to RGB for cropped region
|
||||
const int is_ictcp = (decoder->header.version % 2 == 0);
|
||||
// Extract base version for ICtCp check (even versions use ICtCp)
|
||||
uint8_t base_version_rgb = (decoder->header.version > 8) ? (decoder->header.version - 8) : decoder->header.version;
|
||||
const int is_ictcp = (base_version_rgb % 2 == 0);
|
||||
|
||||
for (int i = 0; i < decoding_pixels; i++) {
|
||||
uint8_t r, g, b;
|
||||
@@ -2936,7 +2984,9 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
|
||||
// Dequantise with temporal scaling (perceptual quantisation for versions 5-8)
|
||||
const int is_perceptual = (decoder->header.version >= 5 && decoder->header.version <= 8);
|
||||
// Extract base version for perceptual check
|
||||
uint8_t base_version_gop = (decoder->header.version > 8) ? (decoder->header.version - 8) : decoder->header.version;
|
||||
const int is_perceptual = (base_version_gop >= 5 && base_version_gop <= 8);
|
||||
const int is_ezbc = (decoder->header.entropy_coder == 1);
|
||||
const int temporal_levels = 2; // Fixed for TAV GOP encoding
|
||||
|
||||
@@ -3034,7 +3084,7 @@ int main(int argc, char *argv[]) {
|
||||
// Phase 2: Use GOP dimensions (may be cropped) for inverse DWT
|
||||
apply_inverse_3d_dwt(gop_y, gop_co, gop_cg, gop_width, gop_height,
|
||||
gop_size, decoder->header.decomp_levels, temporal_levels,
|
||||
decoder->header.wavelet_filter);
|
||||
decoder->header.wavelet_filter, decoder->temporal_motion_coder);
|
||||
|
||||
// Debug: Check Y values after inverse DWT
|
||||
if (verbose && decoder->frame_count == 0) {
|
||||
|
||||
@@ -18,7 +18,7 @@
|
||||
#include <limits.h>
|
||||
#include <float.h>
|
||||
|
||||
#define ENCODER_VENDOR_STRING "Encoder-TAV 20251122 (3d-dwt,tad,ssf-tc)"
|
||||
#define ENCODER_VENDOR_STRING "Encoder-TAV 20251123 (3d-dwt,tad,ssf-tc,cdf53-motion)"
|
||||
|
||||
// TSVM Advanced Video (TAV) format constants
|
||||
#define TAV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x56" // "\x1FTSVM TAV"
|
||||
@@ -1867,6 +1867,7 @@ typedef struct tav_encoder_s {
|
||||
float **temporal_gop_co_frames; // [frame][pixel] - Co channel for each GOP frame
|
||||
float **temporal_gop_cg_frames; // [frame][pixel] - Cg channel for each GOP frame
|
||||
int temporal_decomp_levels; // Number of temporal DWT levels (default: 2)
|
||||
int temporal_motion_coder; // Temporal wavelet type: 0=Haar, 1=CDF 5/3 (default: 1)
|
||||
|
||||
// MC-EZBC block-based motion compensation for temporal 3D DWT (0x13 packets)
|
||||
int temporal_enable_mcezbc; // Flag to enable MC-EZBC block compensation (default: 0, uses translation if temporal_dwt enabled)
|
||||
@@ -2412,6 +2413,7 @@ static void show_usage(const char *program_name) {
|
||||
printf(" --enable-delta Enable delta encoding\n");
|
||||
printf(" --delta-haar N Apply N-level Haar DWT to delta coefficients (1-6, auto-enables delta)\n");
|
||||
printf(" --3d-dwt Enable temporal 3D DWT (GOP-based encoding with temporal transform; the default encoding mode)\n");
|
||||
printf(" --motion-coder N Temporal wavelet: 0=Haar, 1=CDF 5/3 (default: auto-select based on resolution; use 0 for older version compatibility)\n");
|
||||
printf(" --single-pass Disable two-pass encoding with wavelet-based scene change detection (optimal GOP boundaries)\n");
|
||||
// printf(" --mc-ezbc Enable MC-EZBC block-based motion compensation (requires --temporal-dwt, implies --ezbc)\n");
|
||||
printf(" --ezbc Enable EZBC (Embedded Zero Block Coding) entropy coding. May help reducing file size on high-quality videos\n");
|
||||
@@ -2514,6 +2516,7 @@ static tav_encoder_t* create_encoder(void) {
|
||||
enc->temporal_gop_width = 0; // Will be set when first frame is added to GOP
|
||||
enc->temporal_gop_height = 0; // Will be set when first frame is added to GOP
|
||||
enc->temporal_decomp_levels = TEMPORAL_DECOMP_LEVEL; // 3 levels of temporal DWT (24 -> 12 -> 6 -> 3 temporal subbands)
|
||||
enc->temporal_motion_coder = -1; // Will be set automatically based on resolution (unless overridden)
|
||||
enc->temporal_gop_rgb_frames = NULL;
|
||||
enc->temporal_gop_y_frames = NULL;
|
||||
enc->temporal_gop_co_frames = NULL;
|
||||
@@ -2836,7 +2839,7 @@ static int initialise_encoder(tav_encoder_t *enc) {
|
||||
static void dwt_53_forward_1d(float *data, int length) {
|
||||
if (length < 2) return;
|
||||
|
||||
float *temp = malloc(length * sizeof(float));
|
||||
float *temp = calloc(length, sizeof(float)); // Use calloc to zero-initialize for odd-length arrays
|
||||
int half = (length + 1) / 2; // Handle odd lengths properly
|
||||
|
||||
// Predict step (high-pass)
|
||||
@@ -2846,6 +2849,7 @@ static void dwt_53_forward_1d(float *data, int length) {
|
||||
float pred = 0.5f * (data[2 * i] + (2 * i + 2 < length ? data[2 * i + 2] : data[2 * i]));
|
||||
temp[half + i] = data[idx] - pred;
|
||||
}
|
||||
// Note: For odd lengths, last high-pass position remains zero (from calloc)
|
||||
}
|
||||
|
||||
// Update step (low-pass)
|
||||
@@ -5612,7 +5616,12 @@ static void dwt_3d_forward(tav_encoder_t *enc, float **gop_data, int width, int
|
||||
for (int level = 0; level < temporal_levels; level++) {
|
||||
int level_frames = temporal_lengths[level];
|
||||
if (level_frames >= 2) {
|
||||
dwt_haar_forward_1d(temporal_line, level_frames); // Haar better for imperfect alignment
|
||||
// Use selected temporal wavelet (0=Haar, 1=CDF 5/3)
|
||||
if (enc->temporal_motion_coder == 0) {
|
||||
dwt_haar_forward_1d(temporal_line, level_frames);
|
||||
} else {
|
||||
dwt_53_forward_1d(temporal_line, level_frames);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7425,7 +7434,8 @@ static int write_tav_header(tav_encoder_t *enc) {
|
||||
// Magic number
|
||||
fwrite(TAV_MAGIC, 1, 8, enc->output_fp);
|
||||
|
||||
// Version (dynamic based on colour space, monoblock mode, and perceptual tuning)
|
||||
// Version (dynamic based on colour space, monoblock mode, perceptual tuning, and motion coder)
|
||||
// Base versions 1-8, add 8 if temporal_motion_coder == 1 (CDF 5/3)
|
||||
uint8_t version;
|
||||
if (enc->monoblock) {
|
||||
if (enc->perceptual_tuning) {
|
||||
@@ -7440,6 +7450,10 @@ static int write_tav_header(tav_encoder_t *enc) {
|
||||
version = enc->ictcp_mode ? 2 : 1;
|
||||
}
|
||||
}
|
||||
// Add 8 if using CDF 5/3 temporal wavelet (motion_coder == 1)
|
||||
if (enc->temporal_motion_coder == 1) {
|
||||
version += 8;
|
||||
}
|
||||
fputc(version, enc->output_fp);
|
||||
|
||||
// Video parameters
|
||||
@@ -10705,6 +10719,7 @@ int main(int argc, char *argv[]) {
|
||||
{"temporal-3d", no_argument, 0, 1019},
|
||||
{"dwt-3d", no_argument, 0, 1019},
|
||||
{"3d-dwt", no_argument, 0, 1019},
|
||||
{"motion-coder", required_argument, 0, 1030},
|
||||
{"mc-ezbc", no_argument, 0, 1020},
|
||||
{"residual-coding", no_argument, 0, 1021},
|
||||
{"adaptive-blocks", no_argument, 0, 1022},
|
||||
@@ -10946,6 +10961,12 @@ int main(int argc, char *argv[]) {
|
||||
enc->preprocess_mode = PREPROCESS_RAW;
|
||||
printf("Raw coefficient mode enabled (no significance map preprocessing)\n");
|
||||
break;
|
||||
case 1030: // --motion-coder
|
||||
enc->temporal_motion_coder = CLAMP(atoi(optarg), 0, 1);
|
||||
printf("Temporal motion coder set to: %d (%s)\n",
|
||||
enc->temporal_motion_coder,
|
||||
enc->temporal_motion_coder == 0 ? "Haar" : "CDF 5/3");
|
||||
break;
|
||||
case 1050: // --single-pass
|
||||
enc->two_pass_mode = 0;
|
||||
printf("Two-pass wavelet-based scene change detection disabled\n");
|
||||
@@ -10987,6 +11008,26 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
}
|
||||
|
||||
// Smart preset for temporal motion coder based on resolution
|
||||
// For small videos (<500k pixels), use CDF 5/3 (better for fine details)
|
||||
// For larger videos, use Haar (better compression, smoother motion matters less)
|
||||
if (enc->temporal_motion_coder == -1) {
|
||||
int num_pixels = enc->width * enc->height;
|
||||
if (num_pixels >= 500000) {
|
||||
enc->temporal_motion_coder = 0; // Haar
|
||||
if (enc->verbose) {
|
||||
printf("Auto-selected Haar temporal wavelet (resolution: %dx%d = %d pixels)\n",
|
||||
enc->width, enc->height, num_pixels);
|
||||
}
|
||||
} else {
|
||||
enc->temporal_motion_coder = 1; // CDF 5/3
|
||||
if (enc->verbose) {
|
||||
printf("Auto-selected CDF 5/3 temporal wavelet (resolution: %dx%d = %d pixels)\n",
|
||||
enc->width, enc->height, num_pixels);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// generate division series
|
||||
enc->widths = malloc((enc->decomp_levels + 2) * sizeof(int));
|
||||
enc->heights = malloc((enc->decomp_levels + 2) * sizeof(int));
|
||||
|
||||
@@ -498,6 +498,8 @@ int main(int argc, char *argv[]) {
|
||||
if (!opts.summary_only) {
|
||||
// Parse header fields
|
||||
uint8_t version = header[8];
|
||||
uint8_t base_version = (version > 8) ? (version - 8) : version;
|
||||
uint8_t temporal_motion_coder = (version > 8) ? 1 : 0;
|
||||
uint16_t width = *((uint16_t*)&header[9]);
|
||||
uint16_t height = *((uint16_t*)&header[11]);
|
||||
uint8_t fps = header[13];
|
||||
@@ -516,13 +518,15 @@ int main(int argc, char *argv[]) {
|
||||
static const int QLUT[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128,132,136,140,144,148,152,156,160,164,168,172,176,180,184,188,192,196,200,204,208,212,216,220,224,228,232,236,240,244,248,252,256,264,272,280,288,296,304,312,320,328,336,344,352,360,368,376,384,392,400,408,416,424,432,440,448,456,464,472,480,488,496,504,512,528,544,560,576,592,608,624,640,656,672,688,704,720,736,752,768,784,800,816,832,848,864,880,896,912,928,944,960,976,992,1008,1024,1056,1088,1120,1152,1184,1216,1248,1280,1312,1344,1376,1408,1440,1472,1504,1536,1568,1600,1632,1664,1696,1728,1760,1792,1824,1856,1888,1920,1952,1984,2016,2048,2112,2176,2240,2304,2368,2432,2496,2560,2624,2688,2752,2816,2880,2944,3008,3072,3136,3200,3264,3328,3392,3456,3520,3584,3648,3712,3776,3840,3904,3968,4032,4096};
|
||||
static const char* CLAYOUT[] = {"Luma-Chroma", "Luma-Chroma-Alpha", "Luma", "Luma-Alpha", "Chroma", "Chroma-Alpha"};
|
||||
|
||||
int is_monoblock = (3 <= version && version <= 6);
|
||||
int is_perceptual = (5 <= version && version <= 8);
|
||||
int is_monoblock = (3 <= base_version && base_version <= 6);
|
||||
int is_perceptual = (5 <= base_version && base_version <= 8);
|
||||
|
||||
static const char* VERDESC[] = {"null", "YCoCg tiled, uniform", "ICtCp tiled, uniform", "YCoCg monoblock, uniform", "ICtCp monoblock, uniform", "YCoCg monoblock, perceptual", "ICtCp monoblock, perceptual", "YCoCg tiled, perceptual", "ICtCp tiled, perceptual"};
|
||||
static const char* TEMPORAL_WAVELET[] = {"Haar", "CDF 5/3"};
|
||||
|
||||
printf("TAV Header:\n");
|
||||
printf(" Version: %d (%s)\n", version, VERDESC[version]);
|
||||
printf(" Version: %d (base: %d - %s, temporal: %s)\n",
|
||||
version, base_version, VERDESC[base_version], TEMPORAL_WAVELET[temporal_motion_coder]);
|
||||
printf(" Resolution: %dx%d\n", width, height);
|
||||
printf(" Frame rate: %d fps", fps);
|
||||
if (video_flags & 0x02) printf(" (NTSC)");
|
||||
|
||||
Reference in New Issue
Block a user