TAV: preset implementation

This commit is contained in:
minjaesong
2025-11-24 17:40:45 +09:00
parent 6132012e74
commit 08bb33bf27
6 changed files with 152 additions and 66 deletions

View File

@@ -416,9 +416,10 @@ header.videoFlags = seqread.readOneByte()
header.qualityLevel = seqread.readOneByte() // the decoder expects biased value
header.channelLayout = seqread.readOneByte()
header.entropyCoder = seqread.readOneByte()
header.encoderPreset = seqread.readOneByte() // Byte 28: bit 0 = sports, bit 1 = anime
// Skip reserved bytes (2) and device orientation (1)
seqread.skip(3)
// Skip reserved byte (1) and device orientation (1)
seqread.skip(2)
header.fileRole = seqread.readOneByte()
@@ -1248,7 +1249,8 @@ try {
header.decompLevels, // TAV-specific parameter
isLossless,
header.version, // TAV version for colour space detection
header.entropyCoder // Entropy coder: 0 = Twobit-map, 1 = EZBC
header.entropyCoder, // Entropy coder: 0 = Twobit-map, 1 = EZBC
header.encoderPreset // Encoder preset: bit 0 = sports, bit 1 = anime
)
decodeTime = (sys.nanoTime() - decodeStart) / 1000000.0
@@ -1344,7 +1346,8 @@ try {
header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS,
header.entropyCoder,
bufferOffset,
header.temporalMotionCoder
header.temporalMotionCoder,
header.encoderPreset // Encoder preset: bit 0 = sports, bit 1 = anime
)
asyncDecodeInProgress = true
@@ -1418,7 +1421,8 @@ try {
header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS,
header.entropyCoder,
nextOffset,
header.temporalMotionCoder
header.temporalMotionCoder,
header.encoderPreset
)
// Set async decode tracking variables
@@ -1461,7 +1465,8 @@ try {
header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS,
header.entropyCoder,
decodingOffset,
header.temporalMotionCoder
header.temporalMotionCoder,
header.encoderPreset
)
// Set async decode tracking variables
@@ -1829,7 +1834,8 @@ try {
header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS,
header.entropyCoder,
readyGopData.slot * SLOT_SIZE,
header.temporalMotionCoder
header.temporalMotionCoder,
header.encoderPreset
)
// CRITICAL FIX: Set async decode tracking variables so decode is properly tracked
@@ -2021,7 +2027,8 @@ try {
header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS,
header.entropyCoder,
decodingGopData.slot * SLOT_SIZE,
header.temporalMotionCoder
header.temporalMotionCoder,
header.encoderPreset
)
// CRITICAL FIX: Set async decode tracking variables so decode is properly tracked
@@ -2062,7 +2069,8 @@ try {
header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS,
header.entropyCoder,
readyGopData.slot * SLOT_SIZE,
header.temporalMotionCoder
header.temporalMotionCoder,
header.encoderPreset
)
readyGopData.needsDecode = false
readyGopData.startTime = sys.nanoTime()
@@ -2140,7 +2148,8 @@ try {
header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS,
header.entropyCoder,
targetOffset,
header.temporalMotionCoder
header.temporalMotionCoder,
header.encoderPreset
)
asyncDecodeInProgress = true

View File

@@ -47,25 +47,19 @@ import kotlin.collections.component2
import kotlin.collections.component3
import kotlin.collections.component4
import kotlin.collections.copyOf
import kotlin.collections.count
import kotlin.collections.fill
import kotlin.collections.first
import kotlin.collections.forEach
import kotlin.collections.forEachIndexed
import kotlin.collections.indices
import kotlin.collections.isNotEmpty
import kotlin.collections.last
import kotlin.collections.listOf
import kotlin.collections.map
import kotlin.collections.maxOfOrNull
import kotlin.collections.mutableListOf
import kotlin.collections.mutableMapOf
import kotlin.collections.set
import kotlin.collections.sliceArray
import kotlin.collections.sorted
import kotlin.collections.sumOf
import kotlin.collections.toFloatArray
import kotlin.collections.toList
import kotlin.error
import kotlin.floatArrayOf
import kotlin.fromBits
@@ -5039,9 +5033,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
* - Level 1 (tH): 1.0 × 2^0.8 = 1.74
* - Level 2 (tHH): 1.0 × 2^1.6 = 3.03
*/
private fun getTemporalQuantizerScale(temporalLevel: Int): Float {
val BETA = 0.6f // Temporal scaling exponent (aggressive for temporal high-pass)
val KAPPA = 1.14f
private fun getTemporalQuantizerScale(encoderPreset: Int, temporalLevel: Int): Float {
val BETA = if (encoderPreset and 0x01 == 1) 0.0f else 0.6f // Temporal scaling exponent (aggressive for temporal high-pass)
val KAPPA = if (encoderPreset and 0x01 == 1) 1.0f else 1.14f
return 2.0f.pow(BETA * temporalLevel.toFloat().pow(KAPPA))
}
@@ -5177,8 +5171,13 @@ class GraphicsJSR223Delegate(private val vm: VM) {
// Remove grain synthesis from DWT coefficients (decoder subtracts noise)
// This must be called AFTER dequantization but BEFORE inverse DWT
private fun removeGrainSynthesisDecoder(coeffs: FloatArray, width: Int, height: Int,
frameNum: Int, subbands: List<DWTSubbandInfo>, qYGlobal: Int) {
private fun tavApplyGrainSynthesis(coeffs: FloatArray, width: Int, height: Int,
frameNum: Int, subbands: List<DWTSubbandInfo>, qYGlobal: Int, encoderPreset: Int = 0) {
// Anime preset: completely disable grain synthesis
if ((encoderPreset and 0x02) != 0) {
return // Skip grain synthesis entirely
}
// Only apply to Y channel, excluding LL band
// Noise amplitude = half of quantization step (scaled by perceptual weight if enabled)
@@ -5220,7 +5219,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
// New tavDecode function that accepts compressed data and decompresses internally
fun tavDecodeCompressed(compressedDataPtr: Long, compressedSize: Int, currentRGBAddr: Long, prevRGBAddr: Long,
width: Int, height: Int, qIndex: Int, qYGlobal: Int, qCoGlobal: Int, qCgGlobal: Int, channelLayout: Int,
frameCount: Int, waveletFilter: Int = 1, decompLevels: Int = 6, isLossless: Boolean = false, tavVersion: Int = 1, entropyCoder: Int = 0): HashMap<String, Any> {
frameCount: Int, waveletFilter: Int = 1, decompLevels: Int = 6, isLossless: Boolean = false, tavVersion: Int = 1, entropyCoder: Int = 0, encoderPreset: Int = 0): HashMap<String, Any> {
// Read compressed data from VM memory into byte array
val compressedData = ByteArray(compressedSize)
@@ -5250,7 +5249,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
// Call the existing tavDecode function with decompressed data
tavDecode(decompressedBuffer.toLong(), currentRGBAddr, prevRGBAddr,
width, height, qIndex, qYGlobal, qCoGlobal, qCgGlobal, channelLayout,
frameCount, waveletFilter, decompLevels, isLossless, tavVersion, entropyCoder)
frameCount, waveletFilter, decompLevels, isLossless, tavVersion, entropyCoder, encoderPreset)
} finally {
// Clean up allocated buffer
@@ -5266,7 +5265,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
// Original tavDecode function for backward compatibility (now handles decompressed data)
fun tavDecode(blockDataPtr: Long, currentRGBAddr: Long, prevRGBAddr: Long,
width: Int, height: Int, qIndex: Int, qYGlobal: Int, qCoGlobal: Int, qCgGlobal: Int, channelLayout: Int,
frameCount: Int, waveletFilter: Int = 1, decompLevels: Int = 6, isLossless: Boolean = false, tavVersion: Int = 1, entropyCoder: Int = 0): HashMap<String, Any> {
frameCount: Int, waveletFilter: Int = 1, decompLevels: Int = 6, isLossless: Boolean = false, tavVersion: Int = 1, entropyCoder: Int = 0, encoderPreset: Int = 0): HashMap<String, Any> {
val dbgOut = HashMap<String, Any>()
@@ -5328,14 +5327,14 @@ class GraphicsJSR223Delegate(private val vm: VM) {
0x01 -> { // TAV_MODE_INTRA
// Decode DWT coefficients directly to RGB buffer
readPtr = tavDecodeDWTIntraTileRGB(qIndex, qYGlobal, channelLayout, readPtr, tileX, tileY, currentRGBAddr,
width, height, qY, qCo, qCg, entropyCoder,
width, height, qY, qCo, qCg, entropyCoder, encoderPreset,
waveletFilter, decompLevels, isLossless, tavVersion, isMonoblock, frameCount)
dbgOut["frameMode"] = "I"
}
0x02 -> { // TAV_MODE_DELTA (with optional Haar wavelet)
// Coefficient delta encoding for efficient P-frames
readPtr = tavDecodeDeltaTileRGB(readPtr, channelLayout, tileX, tileY, currentRGBAddr,
width, height, qY, qCo, qCg, entropyCoder,
width, height, qY, qCo, qCg, entropyCoder, encoderPreset,
waveletFilter, decompLevels, tavVersion, isMonoblock, frameCount, haarLevel)
dbgOut["frameMode"] = " "
}
@@ -5351,7 +5350,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
}
private fun tavDecodeDWTIntraTileRGB(qIndex: Int, qYGlobal: Int, channelLayout: Int, readPtr: Long, tileX: Int, tileY: Int, currentRGBAddr: Long,
width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, entropyCoder: Int,
width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, entropyCoder: Int, encoderPreset: Int,
waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int, isMonoblock: Boolean = false, frameCount: Int): Long {
// Determine coefficient count based on mode
val coeffCount = if (isMonoblock) {
@@ -5451,7 +5450,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
// Remove grain synthesis from Y channel (must happen after dequantization, before inverse DWT)
// Use perceptual weights since this is the perceptual quantization path
removeGrainSynthesisDecoder(yTile, tileWidth, tileHeight, frameCount, subbands, qYGlobal)
tavApplyGrainSynthesis(yTile, tileWidth, tileHeight, frameCount, subbands, qYGlobal, encoderPreset)
// Apply film grain filter if enabled
// commented; grain synthesis is now a part of the spec
@@ -5476,7 +5475,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
val tileWidth = if (isMonoblock) width else TAV_PADDED_TILE_SIZE_X
val tileHeight = if (isMonoblock) height else TAV_PADDED_TILE_SIZE_Y
val subbands = calculateSubbandLayout(tileWidth, tileHeight, decompLevels)
removeGrainSynthesisDecoder(yTile, tileWidth, tileHeight, frameCount, subbands, qYGlobal)
tavApplyGrainSynthesis(yTile, tileWidth, tileHeight, frameCount, subbands, qYGlobal, encoderPreset)
// Apply film grain filter if enabled
// commented; grain synthesis is now a part of the spec
@@ -5774,7 +5773,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
}
private fun tavDecodeDeltaTileRGB(readPtr: Long, channelLayout: Int, tileX: Int, tileY: Int, currentRGBAddr: Long,
width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, entropyCoder: Int,
width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, entropyCoder: Int, encoderPreset: Int,
spatialFilter: Int, decompLevels: Int, tavVersion: Int, isMonoblock: Boolean = false, frameCount: Int = 0, haarLevel: Int = 0): Long {
val tileIdx = if (isMonoblock) {
@@ -5927,7 +5926,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
// Remove grain synthesis from Y channel (must happen after dequantization, before inverse DWT)
val subbands = calculateSubbandLayout(tileWidth, tileHeight, decompLevels)
// Delta frames use uniform quantization for the deltas themselves, so no perceptual weights
removeGrainSynthesisDecoder(currentY, tileWidth, tileHeight, frameCount, subbands, qY)
tavApplyGrainSynthesis(currentY, tileWidth, tileHeight, frameCount, subbands, qY, encoderPreset)
// Store current coefficients as previous for next frame
tavPreviousCoeffsY!![tileIdx] = currentY.clone()
@@ -6475,7 +6474,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
temporalLevels: Int = 2,
entropyCoder: Int = 0,
bufferOffset: Long = 0,
temporalMotionCoder: Int = 0
temporalMotionCoder: Int = 0,
encoderPreset: Int = 0
): Array<Any> {
val dbgOut = HashMap<String, Any>()
dbgOut["qY"] = qYGlobal
@@ -6547,9 +6547,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
// Step 5: Dequantize with temporal-spatial scaling
for (t in 0 until gopSize) {
val temporalLevel = getTemporalSubbandLevel(t, gopSize, temporalLevels)
val temporalScale = getTemporalQuantizerScale(temporalLevel)
val temporalScale = getTemporalQuantizerScale(encoderPreset, temporalLevel)
// CRITICAL FIX: Must ROUND temporal quantizer to match encoder's roundf() behavior
// CRITICAL FIX: Must ROUND temporal quantizer to match encoder's roundf() behaviour
// Encoder (encoder_tav.c:3189): temporal_base_quantiser = (int)roundf(temporal_quantiser)
// Without rounding, decoder uses float values (e.g., 1.516) while encoder used integers (e.g., 2)
// This causes ~24% under-reconstruction for odd baseQ values in temporal high-pass frames (Frame 5+)
@@ -6587,10 +6587,10 @@ class GraphicsJSR223Delegate(private val vm: VM) {
// This must happen after dequantization but before inverse DWT
// Use GOP dimensions (may be cropped)
for (t in 0 until gopSize) {
removeGrainSynthesisDecoder(
tavApplyGrainSynthesis(
gopY[t], gopWidth, gopHeight,
rngFrameTick.getAndAdd(1) + t,
subbands, qIndex
subbands, qIndex, encoderPreset
)
}
@@ -6818,7 +6818,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
temporalLevels: Int = 3,
entropyCoder: Int = 0,
bufferOffset: Long = 0,
temporalMotionCoder: Int = 0
temporalMotionCoder: Int = 0,
encoderPreset: Int = 0
) {
// Cancel any existing decode thread
asyncDecodeThread?.interrupt()
@@ -6836,7 +6837,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
width, height,
qIndex, qYGlobal, qCoGlobal, qCgGlobal,
channelLayout, spatialFilter, spatialLevels, temporalLevels,
entropyCoder, bufferOffset, temporalMotionCoder
entropyCoder, bufferOffset, temporalMotionCoder, encoderPreset
)
asyncDecodeResult = result
asyncDecodeComplete.set(true)

View File

@@ -17,7 +17,7 @@
#include "decoder_tad.h" // Shared TAD decoder library
#include "tav_avx512.h" // AVX-512 SIMD optimisations
#define DECODER_VENDOR_STRING "Decoder-TAV 20251124 (avx512)"
#define DECODER_VENDOR_STRING "Decoder-TAV 20251124 (avx512,presets)"
// TAV format constants
#define TAV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x56"
@@ -95,7 +95,8 @@ typedef struct {
uint8_t encoder_quality;
uint8_t channel_layout;
uint8_t entropy_coder;
uint8_t reserved[2];
uint8_t encoder_preset; // Byte 28: bit 0 = sports, bit 1 = anime
uint8_t reserved;
uint8_t device_orientation;
uint8_t file_role;
} __attribute__((packed)) tav_header_t;
@@ -394,10 +395,20 @@ static inline float tav_grain_triangular_noise(uint32_t rng_val) {
return (u1 + u2) - 1.0f;
}
// Remove grain synthesis from DWT coefficients (decoder subtracts noise)
// Apply grain synthesis from DWT coefficients (decoder subtracts noise)
// This must be called AFTER dequantisation but BEFORE inverse DWT
static void remove_grain_synthesis_decoder(float *coeffs, int width, int height,
int decomp_levels, int frame_num, int q_y_global) {
static void apply_grain_synthesis(float *coeffs, int width, int height,
int decomp_levels, int frame_num, int q_y_global, uint8_t encoder_preset, int no_grain_synthesis) {
// Command-line override: disable grain synthesis
if (no_grain_synthesis) {
return; // Skip grain synthesis entirely
}
// Anime preset: completely disable grain synthesis
if (encoder_preset & 0x02) {
return; // Skip grain synthesis entirely
}
dwt_subband_info_t subbands[32];
const int subband_count = calculate_subband_layout(width, height, decomp_levels, subbands);
@@ -412,7 +423,7 @@ static void remove_grain_synthesis_decoder(float *coeffs, int width, int height,
// Calculate band index for RNG (matches Kotlin: level + subbandType * 31 + 16777619)
uint32_t band = subband->level + subband->subband_type * 31 + 16777619;
// Remove noise from each coefficient in this subband
// Apply noise from each coefficient in this subband
for (int i = 0; i < subband->coeff_count; i++) {
const int idx = subband->coeff_start + i;
if (idx < width * height) {
@@ -1226,14 +1237,14 @@ static int get_temporal_subband_level(int frame_idx, int num_frames, int tempora
}
// Calculate temporal quantiser scale for a given temporal subband level
static float get_temporal_quantiser_scale(int temporal_level) {
static float get_temporal_quantiser_scale(uint8_t encoder_preset, int temporal_level) {
// Uses exponential scaling: 2^(BETA × level^KAPPA)
// With BETA=0.6, KAPPA=1.14:
// - Level 0 (tLL): 2^0.0 = 1.00
// - Level 1 (tH): 2^0.68 = 1.61
// - Level 2 (tHH): 2^1.29 = 2.45
const float BETA = 0.6f; // Temporal scaling exponent
const float KAPPA = 1.14f;
const float BETA = (encoder_preset & 0x01) ? 0.0f : 0.6f;
const float KAPPA = (encoder_preset & 0x01) ? 1.0f : 1.14f;
return powf(2.0f, BETA * powf(temporal_level, KAPPA));
}
@@ -1812,6 +1823,7 @@ typedef struct {
int frame_size;
int is_monoblock; // True if version 3-6 (single tile mode)
int temporal_motion_coder; // Temporal wavelet: 0=Haar, 1=CDF 5/3 (extracted from version)
int no_grain_synthesis; // Command-line flag: disable grain synthesis
// Screen masking (letterbox/pillarbox) - array of geometry changes
screen_mask_entry_t *screen_masks;
@@ -2023,10 +2035,11 @@ static int extract_audio_to_wav(const char *input_file, const char *wav_file, in
// Decoder Initialisation and Cleanup
//=============================================================================
static tav_decoder_t* tav_decoder_init(const char *input_file, const char *output_file, const char *audio_file) {
static tav_decoder_t* tav_decoder_init(const char *input_file, const char *output_file, const char *audio_file, int no_grain_synthesis) {
tav_decoder_t *decoder = calloc(1, sizeof(tav_decoder_t));
if (!decoder) return NULL;
decoder->no_grain_synthesis = no_grain_synthesis;
decoder->input_fp = fopen(input_file, "rb");
if (!decoder->input_fp) {
free(decoder);
@@ -2511,8 +2524,9 @@ static int decode_i_or_p_frame(tav_decoder_t *decoder, uint8_t packet_type, uint
// Remove grain synthesis from Y channel (must happen after dequantisation, before inverse DWT)
// Phase 2: Use decoding dimensions and temporary buffer
remove_grain_synthesis_decoder(temp_dwt_y, decoder->decoding_width, decoder->decoding_height,
decoder->header.decomp_levels, decoder->frame_count, decoder->header.quantiser_y);
apply_grain_synthesis(temp_dwt_y, decoder->decoding_width, decoder->decoding_height,
decoder->header.decomp_levels, decoder->frame_count, decoder->header.quantiser_y,
decoder->header.encoder_preset, decoder->no_grain_synthesis);
// Debug: Check LL band AFTER grain removal
// if (decoder->frame_count == 32) {
@@ -2712,10 +2726,11 @@ static void print_usage(const char *prog) {
printf("Version: %s\n\n", DECODER_VENDOR_STRING);
printf("Usage: %s -i input.tav -o output.mkv\n\n", prog);
printf("Options:\n");
printf(" -i <file> Input TAV file\n");
printf(" -o <file> Output MKV file (optional, auto-generated from input)\n");
printf(" -v Verbose output\n");
printf(" -h, --help Show this help\n\n");
printf(" -i <file> Input TAV file\n");
printf(" -o <file> Output MKV file (optional, auto-generated from input)\n");
printf(" -v Verbose output\n");
printf(" --no-grain-synthesis Disable grain synthesis (override encoder preset)\n");
printf(" -h, --help Show this help\n\n");
printf("Supported features (matches TSVM decoder):\n");
printf(" - I-frames and P-frames (delta mode)\n");
printf(" - GOP unified 3D DWT (temporal compression)\n");
@@ -2740,9 +2755,11 @@ int main(int argc, char *argv[]) {
char *input_file = NULL;
char *output_file = NULL;
int verbose = 0;
int no_grain_synthesis = 0;
static struct option long_options[] = {
{"help", no_argument, 0, 'h'},
{"no-grain-synthesis", no_argument, 0, 1000},
{0, 0, 0, 0}
};
@@ -2761,6 +2778,12 @@ int main(int argc, char *argv[]) {
case 'h':
print_usage(argv[0]);
return 0;
case 1000: // --no-grain-synthesis
no_grain_synthesis = 1;
if (verbose) {
printf("Grain synthesis disabled\n");
}
break;
default:
print_usage(argv[0]);
return 1;
@@ -2819,7 +2842,7 @@ int main(int argc, char *argv[]) {
}
// Pass 2: Decode video with audio file
tav_decoder_t *decoder = tav_decoder_init(input_file, output_file, temp_audio_file);
tav_decoder_t *decoder = tav_decoder_init(input_file, output_file, temp_audio_file, no_grain_synthesis);
if (!decoder) {
fprintf(stderr, "Failed to initialise decoder\n");
unlink(temp_audio_file); // Clean up temp file
@@ -3126,7 +3149,7 @@ int main(int argc, char *argv[]) {
// EZBC mode with perceptual quantisation: coefficients are normalised
// Need to dequantise using perceptual weights (same as twobit-map mode)
const int temporal_level = get_temporal_subband_level(t, gop_size, temporal_levels);
const float temporal_scale = get_temporal_quantiser_scale(temporal_level);
const float temporal_scale = get_temporal_quantiser_scale(decoder->header.encoder_preset, temporal_level);
// FIX: Use QLUT to convert header quantiser indices to actual values
const float base_q_y = roundf(QLUT[decoder->header.quantiser_y] * temporal_scale);
@@ -3160,7 +3183,7 @@ int main(int argc, char *argv[]) {
} else if (!is_ezbc) {
// Normal mode: multiply by quantiser
const int temporal_level = get_temporal_subband_level(t, gop_size, temporal_levels);
const float temporal_scale = get_temporal_quantiser_scale(temporal_level);
const float temporal_scale = get_temporal_quantiser_scale(decoder->header.encoder_preset, temporal_level);
// CRITICAL: Must ROUND temporal quantiser to match encoder's roundf() behavior
// FIX: Use QLUT to convert header quantiser indices to actual values
@@ -3206,9 +3229,10 @@ int main(int argc, char *argv[]) {
// Phase 2: Use GOP dimensions (may be cropped) for grain removal
for (int t = 0; t < gop_size; t++) {
remove_grain_synthesis_decoder(gop_y[t], gop_width, gop_height,
apply_grain_synthesis(gop_y[t], gop_width, gop_height,
decoder->header.decomp_levels, decoder->frame_count + t,
decoder->header.quantiser_y);
decoder->header.quantiser_y, decoder->header.encoder_preset,
decoder->no_grain_synthesis);
}
// Apply inverse 3D DWT (spatial + temporal)

View File

@@ -19,7 +19,7 @@
#include <float.h>
#include "tav_avx512.h" // AVX-512 SIMD optimisations
#define ENCODER_VENDOR_STRING "Encoder-TAV 20251124 (3d-dwt,tad,ssf-tc,cdf53-motion,avx512)"
#define ENCODER_VENDOR_STRING "Encoder-TAV 20251124 (3d-dwt,tad,ssf-tc,cdf53-motion,avx512,presets)"
// TSVM Advanced Video (TAV) format constants
#define TAV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x56" // "\x1FTSVM TAV"
@@ -1835,6 +1835,7 @@ typedef struct tav_encoder_s {
int pcm8_audio; // 1 = use 8-bit PCM audio (packet 0x21), 0 = use MP2 (default)
int tad_audio; // 1 = use TAD audio (packet 0x24), 0 = use MP2/PCM8 (default, quality follows quality_level)
int enable_crop_encoding; // 1 = encode cropped active region only (Phase 2), 0 = encode full frame (default)
uint8_t encoder_preset; // Encoder preset flags: bit 0 = sports (finer temporal quantisation), bit 1 = anime (no grain)
// Active region tracking (for Phase 2 crop encoding)
uint16_t active_mask_top, active_mask_right, active_mask_bottom, active_mask_left;
@@ -2432,6 +2433,9 @@ static void show_usage(const char *program_name) {
printf(" --dump-frame N Dump quantised coefficients for frame N (creates .bin files)\n");
printf(" --wavelet N Wavelet filter: 0=LGT 5/3, 1=CDF 9/7, 2=CDF 13/7, 16=DD-4, 255=Haar (default: 1)\n");
printf(" --zstd-level N Zstd compression level 1-22 (default: %d, higher = better compression but slower)\n", DEFAULT_ZSTD_LEVEL);
printf(" --preset PRESET Encoder presets (comma-separated, e.g., 'sports,anime'):\n");
printf(" sports (or sport): Finer temporal quantisation for better motion detail\n");
printf(" anime (or animation): Disable grain synthesis for cleaner animated content\n");
printf(" --help Show this help\n\n");
printf("Audio Rate by Quality:\n ");
@@ -3355,8 +3359,9 @@ static void quantise_3d_dwt_coefficients(tav_encoder_t *enc,
int spatial_size,
int base_quantiser,
int is_chroma) {
const float BETA = 0.6f; // Temporal scaling exponent (aggressive for temporal high-pass)
const float KAPPA = 1.14f;
// Sports preset: use finer temporal quantisation (less aggressive)
const float BETA = (enc->encoder_preset & 0x01) ? 0.0f : 0.6f;
const float KAPPA = (enc->encoder_preset & 0x01) ? 1.0f : 1.14f;
// Process each temporal subband independently (separable approach)
for (int t = 0; t < num_frames; t++) {
@@ -7528,8 +7533,10 @@ static int write_tav_header(tav_encoder_t *enc) {
// Entropy Coder (0 = Twobit-map, 1 = EZBC, 2 = Raw)
fputc(enc->preprocess_mode, enc->output_fp);
// Reserved bytes (2 bytes)
fputc(0, enc->output_fp);
// Encoder Preset (byte 28): bit 0 = sports, bit 1 = anime
fputc(enc->encoder_preset, enc->output_fp);
// Reserved byte (1 byte)
fputc(0, enc->output_fp);
// Device Orientation (default: 0 = no rotation)
@@ -10775,6 +10782,7 @@ int main(int argc, char *argv[]) {
{"tad-audio", no_argument, 0, 1028},
{"raw-coeffs", no_argument, 0, 1029},
{"single-pass", no_argument, 0, 1050}, // disable two-pass encoding with wavelet-based scene detection
{"preset", required_argument, 0, 1051}, // Encoder presets: sports, anime (comma-separated)
{"enable-crop-encoding", no_argument, 0, 1052}, // Phase 2: encode cropped active region only (experimental)
{"help", no_argument, 0, '?'},
{0, 0, 0, 0}
@@ -11012,6 +11020,34 @@ int main(int argc, char *argv[]) {
enc->two_pass_mode = 0;
printf("Two-pass wavelet-based scene change detection disabled\n");
break;
case 1051: { // --preset
char *preset_str = strdup(optarg);
char *token = strtok(preset_str, ",");
while (token != NULL) {
// Trim leading/trailing whitespace
while (*token == ' ' || *token == '\t') token++;
char *end = token + strlen(token) - 1;
while (end > token && (*end == ' ' || *end == '\t')) {
*end = '\0';
end--;
}
// Check for presets and aliases
if (strcmp(token, "sports") == 0 || strcmp(token, "sport") == 0) {
enc->encoder_preset |= 0x01;
printf("Preset 'sports' enabled: finer temporal quantisation (BETA=0.25, KAPPA=1.0)\n");
} else if (strcmp(token, "anime") == 0 || strcmp(token, "animation") == 0) {
enc->encoder_preset |= 0x02;
printf("Preset 'anime' enabled: grain synthesis disabled\n");
} else {
fprintf(stderr, "Warning: Unknown preset '%s' (valid: sports, anime)\n", token);
}
token = strtok(NULL, ",");
}
free(preset_str);
break;
}
case 1052: // --enable-crop-encoding
enc->enable_crop_encoding = 1;
printf("Phase 2 crop encoding enabled (experimental)\n");

View File

@@ -456,7 +456,7 @@ static inline void quantise_dwt_coefficients_avx512(
quant = _mm512_mask_blend_ps(dead_mask, quant, zero_vec);
}
// Manual rounding to match scalar behavior (round away from zero)
// Manual rounding to match scalar behaviour (round away from zero)
// First add 0.5 or -0.5 based on sign
__mmask16 pos_mask = _mm512_cmp_ps_mask(quant, zero_vec, _CMP_GE_OQ);
__m512 round_val = _mm512_mask_blend_ps(pos_mask, nhalf_vec, half_vec);
@@ -510,7 +510,7 @@ static inline void quantise_dwt_coefficients_perceptual_avx512(
__m512 effective_q = _mm512_mul_ps(base_q_vec, weight);
__m512 quant = _mm512_div_ps(coeff, effective_q);
// Manual rounding to match scalar behavior
// Manual rounding to match scalar behaviour
__mmask16 pos_mask = _mm512_cmp_ps_mask(quant, zero_vec, _CMP_GE_OQ);
__m512 round_val = _mm512_mask_blend_ps(pos_mask, nhalf_vec, half_vec);
quant = _mm512_add_ps(quant, round_val);

View File

@@ -514,6 +514,7 @@ int main(int argc, char *argv[]) {
uint8_t quality = header[25];
uint8_t channel_layout = header[26];
uint8_t entropy_coder = header[27];
uint8_t encoder_preset = header[28];
static const int QLUT[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128,132,136,140,144,148,152,156,160,164,168,172,176,180,184,188,192,196,200,204,208,212,216,220,224,228,232,236,240,244,248,252,256,264,272,280,288,296,304,312,320,328,336,344,352,360,368,376,384,392,400,408,416,424,432,440,448,456,464,472,480,488,496,504,512,528,544,560,576,592,608,624,640,656,672,688,704,720,736,752,768,784,800,816,832,848,864,880,896,912,928,944,960,976,992,1008,1024,1056,1088,1120,1152,1184,1216,1248,1280,1312,1344,1376,1408,1440,1472,1504,1536,1568,1600,1632,1664,1696,1728,1760,1792,1824,1856,1888,1920,1952,1984,2016,2048,2112,2176,2240,2304,2368,2432,2496,2560,2624,2688,2752,2816,2880,2944,3008,3072,3136,3200,3264,3328,3392,3456,3520,3584,3648,3712,3776,3840,3904,3968,4032,4096};
static const char* CLAYOUT[] = {"Luma-Chroma", "Luma-Chroma-Alpha", "Luma", "Luma-Alpha", "Chroma", "Chroma-Alpha"};
@@ -548,6 +549,21 @@ static const char* TEMPORAL_WAVELET[] = {"Haar", "CDF 5/3"};
printf(" Quality: n/a\n");
printf(" Channel layout: %s\n", CLAYOUT[channel_layout]);
printf(" Entropy coder: %s\n", entropy_coder == 0 ? "Twobit-map" : "EZBC");
printf(" Encoder preset: ");
if (encoder_preset == 0) {
printf("Default\n");
} else {
int first = 1;
if (encoder_preset & 0x01) {
printf("%sSports", first ? "" : ", ");
first = 0;
}
if (encoder_preset & 0x02) {
printf("%sAnime", first ? "" : ", ");
first = 0;
}
printf("\n");
}
printf(" Flags:\n");
printf(" Has audio: %s\n", (extra_flags & 0x01) ? "Yes" : "No");
printf(" Has subtitles: %s\n", (extra_flags & 0x02) ? "Yes" : "No");