From 08bb33bf2740cba4b8a669940d02e340ab82e01c Mon Sep 17 00:00:00 2001 From: minjaesong Date: Mon, 24 Nov 2025 17:40:45 +0900 Subject: [PATCH] TAV: preset implementation --- assets/disk0/tvdos/bin/playtav.js | 29 +++++--- .../torvald/tsvm/GraphicsJSR223Delegate.kt | 57 ++++++++-------- video_encoder/decoder_tav.c | 66 +++++++++++++------ video_encoder/encoder_tav.c | 46 +++++++++++-- video_encoder/tav_avx512.h | 4 +- video_encoder/tav_inspector.c | 16 +++++ 6 files changed, 152 insertions(+), 66 deletions(-) diff --git a/assets/disk0/tvdos/bin/playtav.js b/assets/disk0/tvdos/bin/playtav.js index 6e76d0c..0c7f5c1 100644 --- a/assets/disk0/tvdos/bin/playtav.js +++ b/assets/disk0/tvdos/bin/playtav.js @@ -416,9 +416,10 @@ header.videoFlags = seqread.readOneByte() header.qualityLevel = seqread.readOneByte() // the decoder expects biased value header.channelLayout = seqread.readOneByte() header.entropyCoder = seqread.readOneByte() +header.encoderPreset = seqread.readOneByte() // Byte 28: bit 0 = sports, bit 1 = anime -// Skip reserved bytes (2) and device orientation (1) -seqread.skip(3) +// Skip reserved byte (1) and device orientation (1) +seqread.skip(2) header.fileRole = seqread.readOneByte() @@ -1248,7 +1249,8 @@ try { header.decompLevels, // TAV-specific parameter isLossless, header.version, // TAV version for colour space detection - header.entropyCoder // Entropy coder: 0 = Twobit-map, 1 = EZBC + header.entropyCoder, // Entropy coder: 0 = Twobit-map, 1 = EZBC + header.encoderPreset // Encoder preset: bit 0 = sports, bit 1 = anime ) decodeTime = (sys.nanoTime() - decodeStart) / 1000000.0 @@ -1344,7 +1346,8 @@ try { header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS, header.entropyCoder, bufferOffset, - header.temporalMotionCoder + header.temporalMotionCoder, + header.encoderPreset // Encoder preset: bit 0 = sports, bit 1 = anime ) asyncDecodeInProgress = true @@ -1418,7 +1421,8 @@ try { header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS, header.entropyCoder, nextOffset, - header.temporalMotionCoder + header.temporalMotionCoder, + header.encoderPreset ) // Set async decode tracking variables @@ -1461,7 +1465,8 @@ try { header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS, header.entropyCoder, decodingOffset, - header.temporalMotionCoder + header.temporalMotionCoder, + header.encoderPreset ) // Set async decode tracking variables @@ -1829,7 +1834,8 @@ try { header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS, header.entropyCoder, readyGopData.slot * SLOT_SIZE, - header.temporalMotionCoder + header.temporalMotionCoder, + header.encoderPreset ) // CRITICAL FIX: Set async decode tracking variables so decode is properly tracked @@ -2021,7 +2027,8 @@ try { header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS, header.entropyCoder, decodingGopData.slot * SLOT_SIZE, - header.temporalMotionCoder + header.temporalMotionCoder, + header.encoderPreset ) // CRITICAL FIX: Set async decode tracking variables so decode is properly tracked @@ -2062,7 +2069,8 @@ try { header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS, header.entropyCoder, readyGopData.slot * SLOT_SIZE, - header.temporalMotionCoder + header.temporalMotionCoder, + header.encoderPreset ) readyGopData.needsDecode = false readyGopData.startTime = sys.nanoTime() @@ -2140,7 +2148,8 @@ try { header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS, header.entropyCoder, targetOffset, - header.temporalMotionCoder + header.temporalMotionCoder, + header.encoderPreset ) asyncDecodeInProgress = true diff --git a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt index b70b9a9..f3269cd 100644 --- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt +++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt @@ -47,25 +47,19 @@ import kotlin.collections.component2 import kotlin.collections.component3 import kotlin.collections.component4 import kotlin.collections.copyOf -import kotlin.collections.count import kotlin.collections.fill -import kotlin.collections.first import kotlin.collections.forEach import kotlin.collections.forEachIndexed import kotlin.collections.indices import kotlin.collections.isNotEmpty -import kotlin.collections.last import kotlin.collections.listOf import kotlin.collections.map -import kotlin.collections.maxOfOrNull import kotlin.collections.mutableListOf import kotlin.collections.mutableMapOf import kotlin.collections.set import kotlin.collections.sliceArray import kotlin.collections.sorted -import kotlin.collections.sumOf import kotlin.collections.toFloatArray -import kotlin.collections.toList import kotlin.error import kotlin.floatArrayOf import kotlin.fromBits @@ -5039,9 +5033,9 @@ class GraphicsJSR223Delegate(private val vm: VM) { * - Level 1 (tH): 1.0 × 2^0.8 = 1.74 * - Level 2 (tHH): 1.0 × 2^1.6 = 3.03 */ - private fun getTemporalQuantizerScale(temporalLevel: Int): Float { - val BETA = 0.6f // Temporal scaling exponent (aggressive for temporal high-pass) - val KAPPA = 1.14f + private fun getTemporalQuantizerScale(encoderPreset: Int, temporalLevel: Int): Float { + val BETA = if (encoderPreset and 0x01 == 1) 0.0f else 0.6f // Temporal scaling exponent (aggressive for temporal high-pass) + val KAPPA = if (encoderPreset and 0x01 == 1) 1.0f else 1.14f return 2.0f.pow(BETA * temporalLevel.toFloat().pow(KAPPA)) } @@ -5177,8 +5171,13 @@ class GraphicsJSR223Delegate(private val vm: VM) { // Remove grain synthesis from DWT coefficients (decoder subtracts noise) // This must be called AFTER dequantization but BEFORE inverse DWT - private fun removeGrainSynthesisDecoder(coeffs: FloatArray, width: Int, height: Int, - frameNum: Int, subbands: List, qYGlobal: Int) { + private fun tavApplyGrainSynthesis(coeffs: FloatArray, width: Int, height: Int, + frameNum: Int, subbands: List, qYGlobal: Int, encoderPreset: Int = 0) { + // Anime preset: completely disable grain synthesis + if ((encoderPreset and 0x02) != 0) { + return // Skip grain synthesis entirely + } + // Only apply to Y channel, excluding LL band // Noise amplitude = half of quantization step (scaled by perceptual weight if enabled) @@ -5220,7 +5219,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { // New tavDecode function that accepts compressed data and decompresses internally fun tavDecodeCompressed(compressedDataPtr: Long, compressedSize: Int, currentRGBAddr: Long, prevRGBAddr: Long, width: Int, height: Int, qIndex: Int, qYGlobal: Int, qCoGlobal: Int, qCgGlobal: Int, channelLayout: Int, - frameCount: Int, waveletFilter: Int = 1, decompLevels: Int = 6, isLossless: Boolean = false, tavVersion: Int = 1, entropyCoder: Int = 0): HashMap { + frameCount: Int, waveletFilter: Int = 1, decompLevels: Int = 6, isLossless: Boolean = false, tavVersion: Int = 1, entropyCoder: Int = 0, encoderPreset: Int = 0): HashMap { // Read compressed data from VM memory into byte array val compressedData = ByteArray(compressedSize) @@ -5250,7 +5249,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { // Call the existing tavDecode function with decompressed data tavDecode(decompressedBuffer.toLong(), currentRGBAddr, prevRGBAddr, width, height, qIndex, qYGlobal, qCoGlobal, qCgGlobal, channelLayout, - frameCount, waveletFilter, decompLevels, isLossless, tavVersion, entropyCoder) + frameCount, waveletFilter, decompLevels, isLossless, tavVersion, entropyCoder, encoderPreset) } finally { // Clean up allocated buffer @@ -5266,7 +5265,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { // Original tavDecode function for backward compatibility (now handles decompressed data) fun tavDecode(blockDataPtr: Long, currentRGBAddr: Long, prevRGBAddr: Long, width: Int, height: Int, qIndex: Int, qYGlobal: Int, qCoGlobal: Int, qCgGlobal: Int, channelLayout: Int, - frameCount: Int, waveletFilter: Int = 1, decompLevels: Int = 6, isLossless: Boolean = false, tavVersion: Int = 1, entropyCoder: Int = 0): HashMap { + frameCount: Int, waveletFilter: Int = 1, decompLevels: Int = 6, isLossless: Boolean = false, tavVersion: Int = 1, entropyCoder: Int = 0, encoderPreset: Int = 0): HashMap { val dbgOut = HashMap() @@ -5328,14 +5327,14 @@ class GraphicsJSR223Delegate(private val vm: VM) { 0x01 -> { // TAV_MODE_INTRA // Decode DWT coefficients directly to RGB buffer readPtr = tavDecodeDWTIntraTileRGB(qIndex, qYGlobal, channelLayout, readPtr, tileX, tileY, currentRGBAddr, - width, height, qY, qCo, qCg, entropyCoder, + width, height, qY, qCo, qCg, entropyCoder, encoderPreset, waveletFilter, decompLevels, isLossless, tavVersion, isMonoblock, frameCount) dbgOut["frameMode"] = "I" } 0x02 -> { // TAV_MODE_DELTA (with optional Haar wavelet) // Coefficient delta encoding for efficient P-frames readPtr = tavDecodeDeltaTileRGB(readPtr, channelLayout, tileX, tileY, currentRGBAddr, - width, height, qY, qCo, qCg, entropyCoder, + width, height, qY, qCo, qCg, entropyCoder, encoderPreset, waveletFilter, decompLevels, tavVersion, isMonoblock, frameCount, haarLevel) dbgOut["frameMode"] = " " } @@ -5351,7 +5350,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { } private fun tavDecodeDWTIntraTileRGB(qIndex: Int, qYGlobal: Int, channelLayout: Int, readPtr: Long, tileX: Int, tileY: Int, currentRGBAddr: Long, - width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, entropyCoder: Int, + width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, entropyCoder: Int, encoderPreset: Int, waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int, isMonoblock: Boolean = false, frameCount: Int): Long { // Determine coefficient count based on mode val coeffCount = if (isMonoblock) { @@ -5451,7 +5450,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { // Remove grain synthesis from Y channel (must happen after dequantization, before inverse DWT) // Use perceptual weights since this is the perceptual quantization path - removeGrainSynthesisDecoder(yTile, tileWidth, tileHeight, frameCount, subbands, qYGlobal) + tavApplyGrainSynthesis(yTile, tileWidth, tileHeight, frameCount, subbands, qYGlobal, encoderPreset) // Apply film grain filter if enabled // commented; grain synthesis is now a part of the spec @@ -5476,7 +5475,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { val tileWidth = if (isMonoblock) width else TAV_PADDED_TILE_SIZE_X val tileHeight = if (isMonoblock) height else TAV_PADDED_TILE_SIZE_Y val subbands = calculateSubbandLayout(tileWidth, tileHeight, decompLevels) - removeGrainSynthesisDecoder(yTile, tileWidth, tileHeight, frameCount, subbands, qYGlobal) + tavApplyGrainSynthesis(yTile, tileWidth, tileHeight, frameCount, subbands, qYGlobal, encoderPreset) // Apply film grain filter if enabled // commented; grain synthesis is now a part of the spec @@ -5774,7 +5773,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { } private fun tavDecodeDeltaTileRGB(readPtr: Long, channelLayout: Int, tileX: Int, tileY: Int, currentRGBAddr: Long, - width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, entropyCoder: Int, + width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, entropyCoder: Int, encoderPreset: Int, spatialFilter: Int, decompLevels: Int, tavVersion: Int, isMonoblock: Boolean = false, frameCount: Int = 0, haarLevel: Int = 0): Long { val tileIdx = if (isMonoblock) { @@ -5927,7 +5926,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { // Remove grain synthesis from Y channel (must happen after dequantization, before inverse DWT) val subbands = calculateSubbandLayout(tileWidth, tileHeight, decompLevels) // Delta frames use uniform quantization for the deltas themselves, so no perceptual weights - removeGrainSynthesisDecoder(currentY, tileWidth, tileHeight, frameCount, subbands, qY) + tavApplyGrainSynthesis(currentY, tileWidth, tileHeight, frameCount, subbands, qY, encoderPreset) // Store current coefficients as previous for next frame tavPreviousCoeffsY!![tileIdx] = currentY.clone() @@ -6475,7 +6474,8 @@ class GraphicsJSR223Delegate(private val vm: VM) { temporalLevels: Int = 2, entropyCoder: Int = 0, bufferOffset: Long = 0, - temporalMotionCoder: Int = 0 + temporalMotionCoder: Int = 0, + encoderPreset: Int = 0 ): Array { val dbgOut = HashMap() dbgOut["qY"] = qYGlobal @@ -6547,9 +6547,9 @@ class GraphicsJSR223Delegate(private val vm: VM) { // Step 5: Dequantize with temporal-spatial scaling for (t in 0 until gopSize) { val temporalLevel = getTemporalSubbandLevel(t, gopSize, temporalLevels) - val temporalScale = getTemporalQuantizerScale(temporalLevel) + val temporalScale = getTemporalQuantizerScale(encoderPreset, temporalLevel) - // CRITICAL FIX: Must ROUND temporal quantizer to match encoder's roundf() behavior + // CRITICAL FIX: Must ROUND temporal quantizer to match encoder's roundf() behaviour // Encoder (encoder_tav.c:3189): temporal_base_quantiser = (int)roundf(temporal_quantiser) // Without rounding, decoder uses float values (e.g., 1.516) while encoder used integers (e.g., 2) // This causes ~24% under-reconstruction for odd baseQ values in temporal high-pass frames (Frame 5+) @@ -6587,10 +6587,10 @@ class GraphicsJSR223Delegate(private val vm: VM) { // This must happen after dequantization but before inverse DWT // Use GOP dimensions (may be cropped) for (t in 0 until gopSize) { - removeGrainSynthesisDecoder( + tavApplyGrainSynthesis( gopY[t], gopWidth, gopHeight, rngFrameTick.getAndAdd(1) + t, - subbands, qIndex + subbands, qIndex, encoderPreset ) } @@ -6818,7 +6818,8 @@ class GraphicsJSR223Delegate(private val vm: VM) { temporalLevels: Int = 3, entropyCoder: Int = 0, bufferOffset: Long = 0, - temporalMotionCoder: Int = 0 + temporalMotionCoder: Int = 0, + encoderPreset: Int = 0 ) { // Cancel any existing decode thread asyncDecodeThread?.interrupt() @@ -6836,7 +6837,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { width, height, qIndex, qYGlobal, qCoGlobal, qCgGlobal, channelLayout, spatialFilter, spatialLevels, temporalLevels, - entropyCoder, bufferOffset, temporalMotionCoder + entropyCoder, bufferOffset, temporalMotionCoder, encoderPreset ) asyncDecodeResult = result asyncDecodeComplete.set(true) diff --git a/video_encoder/decoder_tav.c b/video_encoder/decoder_tav.c index f0f86de..00bb616 100644 --- a/video_encoder/decoder_tav.c +++ b/video_encoder/decoder_tav.c @@ -17,7 +17,7 @@ #include "decoder_tad.h" // Shared TAD decoder library #include "tav_avx512.h" // AVX-512 SIMD optimisations -#define DECODER_VENDOR_STRING "Decoder-TAV 20251124 (avx512)" +#define DECODER_VENDOR_STRING "Decoder-TAV 20251124 (avx512,presets)" // TAV format constants #define TAV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x56" @@ -95,7 +95,8 @@ typedef struct { uint8_t encoder_quality; uint8_t channel_layout; uint8_t entropy_coder; - uint8_t reserved[2]; + uint8_t encoder_preset; // Byte 28: bit 0 = sports, bit 1 = anime + uint8_t reserved; uint8_t device_orientation; uint8_t file_role; } __attribute__((packed)) tav_header_t; @@ -394,10 +395,20 @@ static inline float tav_grain_triangular_noise(uint32_t rng_val) { return (u1 + u2) - 1.0f; } -// Remove grain synthesis from DWT coefficients (decoder subtracts noise) +// Apply grain synthesis from DWT coefficients (decoder subtracts noise) // This must be called AFTER dequantisation but BEFORE inverse DWT -static void remove_grain_synthesis_decoder(float *coeffs, int width, int height, - int decomp_levels, int frame_num, int q_y_global) { +static void apply_grain_synthesis(float *coeffs, int width, int height, + int decomp_levels, int frame_num, int q_y_global, uint8_t encoder_preset, int no_grain_synthesis) { + // Command-line override: disable grain synthesis + if (no_grain_synthesis) { + return; // Skip grain synthesis entirely + } + + // Anime preset: completely disable grain synthesis + if (encoder_preset & 0x02) { + return; // Skip grain synthesis entirely + } + dwt_subband_info_t subbands[32]; const int subband_count = calculate_subband_layout(width, height, decomp_levels, subbands); @@ -412,7 +423,7 @@ static void remove_grain_synthesis_decoder(float *coeffs, int width, int height, // Calculate band index for RNG (matches Kotlin: level + subbandType * 31 + 16777619) uint32_t band = subband->level + subband->subband_type * 31 + 16777619; - // Remove noise from each coefficient in this subband + // Apply noise from each coefficient in this subband for (int i = 0; i < subband->coeff_count; i++) { const int idx = subband->coeff_start + i; if (idx < width * height) { @@ -1226,14 +1237,14 @@ static int get_temporal_subband_level(int frame_idx, int num_frames, int tempora } // Calculate temporal quantiser scale for a given temporal subband level -static float get_temporal_quantiser_scale(int temporal_level) { +static float get_temporal_quantiser_scale(uint8_t encoder_preset, int temporal_level) { // Uses exponential scaling: 2^(BETA × level^KAPPA) // With BETA=0.6, KAPPA=1.14: // - Level 0 (tLL): 2^0.0 = 1.00 // - Level 1 (tH): 2^0.68 = 1.61 // - Level 2 (tHH): 2^1.29 = 2.45 - const float BETA = 0.6f; // Temporal scaling exponent - const float KAPPA = 1.14f; + const float BETA = (encoder_preset & 0x01) ? 0.0f : 0.6f; + const float KAPPA = (encoder_preset & 0x01) ? 1.0f : 1.14f; return powf(2.0f, BETA * powf(temporal_level, KAPPA)); } @@ -1812,6 +1823,7 @@ typedef struct { int frame_size; int is_monoblock; // True if version 3-6 (single tile mode) int temporal_motion_coder; // Temporal wavelet: 0=Haar, 1=CDF 5/3 (extracted from version) + int no_grain_synthesis; // Command-line flag: disable grain synthesis // Screen masking (letterbox/pillarbox) - array of geometry changes screen_mask_entry_t *screen_masks; @@ -2023,10 +2035,11 @@ static int extract_audio_to_wav(const char *input_file, const char *wav_file, in // Decoder Initialisation and Cleanup //============================================================================= -static tav_decoder_t* tav_decoder_init(const char *input_file, const char *output_file, const char *audio_file) { +static tav_decoder_t* tav_decoder_init(const char *input_file, const char *output_file, const char *audio_file, int no_grain_synthesis) { tav_decoder_t *decoder = calloc(1, sizeof(tav_decoder_t)); if (!decoder) return NULL; + decoder->no_grain_synthesis = no_grain_synthesis; decoder->input_fp = fopen(input_file, "rb"); if (!decoder->input_fp) { free(decoder); @@ -2511,8 +2524,9 @@ static int decode_i_or_p_frame(tav_decoder_t *decoder, uint8_t packet_type, uint // Remove grain synthesis from Y channel (must happen after dequantisation, before inverse DWT) // Phase 2: Use decoding dimensions and temporary buffer - remove_grain_synthesis_decoder(temp_dwt_y, decoder->decoding_width, decoder->decoding_height, - decoder->header.decomp_levels, decoder->frame_count, decoder->header.quantiser_y); + apply_grain_synthesis(temp_dwt_y, decoder->decoding_width, decoder->decoding_height, + decoder->header.decomp_levels, decoder->frame_count, decoder->header.quantiser_y, + decoder->header.encoder_preset, decoder->no_grain_synthesis); // Debug: Check LL band AFTER grain removal // if (decoder->frame_count == 32) { @@ -2712,10 +2726,11 @@ static void print_usage(const char *prog) { printf("Version: %s\n\n", DECODER_VENDOR_STRING); printf("Usage: %s -i input.tav -o output.mkv\n\n", prog); printf("Options:\n"); - printf(" -i Input TAV file\n"); - printf(" -o Output MKV file (optional, auto-generated from input)\n"); - printf(" -v Verbose output\n"); - printf(" -h, --help Show this help\n\n"); + printf(" -i Input TAV file\n"); + printf(" -o Output MKV file (optional, auto-generated from input)\n"); + printf(" -v Verbose output\n"); + printf(" --no-grain-synthesis Disable grain synthesis (override encoder preset)\n"); + printf(" -h, --help Show this help\n\n"); printf("Supported features (matches TSVM decoder):\n"); printf(" - I-frames and P-frames (delta mode)\n"); printf(" - GOP unified 3D DWT (temporal compression)\n"); @@ -2740,9 +2755,11 @@ int main(int argc, char *argv[]) { char *input_file = NULL; char *output_file = NULL; int verbose = 0; + int no_grain_synthesis = 0; static struct option long_options[] = { {"help", no_argument, 0, 'h'}, + {"no-grain-synthesis", no_argument, 0, 1000}, {0, 0, 0, 0} }; @@ -2761,6 +2778,12 @@ int main(int argc, char *argv[]) { case 'h': print_usage(argv[0]); return 0; + case 1000: // --no-grain-synthesis + no_grain_synthesis = 1; + if (verbose) { + printf("Grain synthesis disabled\n"); + } + break; default: print_usage(argv[0]); return 1; @@ -2819,7 +2842,7 @@ int main(int argc, char *argv[]) { } // Pass 2: Decode video with audio file - tav_decoder_t *decoder = tav_decoder_init(input_file, output_file, temp_audio_file); + tav_decoder_t *decoder = tav_decoder_init(input_file, output_file, temp_audio_file, no_grain_synthesis); if (!decoder) { fprintf(stderr, "Failed to initialise decoder\n"); unlink(temp_audio_file); // Clean up temp file @@ -3126,7 +3149,7 @@ int main(int argc, char *argv[]) { // EZBC mode with perceptual quantisation: coefficients are normalised // Need to dequantise using perceptual weights (same as twobit-map mode) const int temporal_level = get_temporal_subband_level(t, gop_size, temporal_levels); - const float temporal_scale = get_temporal_quantiser_scale(temporal_level); + const float temporal_scale = get_temporal_quantiser_scale(decoder->header.encoder_preset, temporal_level); // FIX: Use QLUT to convert header quantiser indices to actual values const float base_q_y = roundf(QLUT[decoder->header.quantiser_y] * temporal_scale); @@ -3160,7 +3183,7 @@ int main(int argc, char *argv[]) { } else if (!is_ezbc) { // Normal mode: multiply by quantiser const int temporal_level = get_temporal_subband_level(t, gop_size, temporal_levels); - const float temporal_scale = get_temporal_quantiser_scale(temporal_level); + const float temporal_scale = get_temporal_quantiser_scale(decoder->header.encoder_preset, temporal_level); // CRITICAL: Must ROUND temporal quantiser to match encoder's roundf() behavior // FIX: Use QLUT to convert header quantiser indices to actual values @@ -3206,9 +3229,10 @@ int main(int argc, char *argv[]) { // Phase 2: Use GOP dimensions (may be cropped) for grain removal for (int t = 0; t < gop_size; t++) { - remove_grain_synthesis_decoder(gop_y[t], gop_width, gop_height, + apply_grain_synthesis(gop_y[t], gop_width, gop_height, decoder->header.decomp_levels, decoder->frame_count + t, - decoder->header.quantiser_y); + decoder->header.quantiser_y, decoder->header.encoder_preset, + decoder->no_grain_synthesis); } // Apply inverse 3D DWT (spatial + temporal) diff --git a/video_encoder/encoder_tav.c b/video_encoder/encoder_tav.c index 59ea2f6..ec66460 100644 --- a/video_encoder/encoder_tav.c +++ b/video_encoder/encoder_tav.c @@ -19,7 +19,7 @@ #include #include "tav_avx512.h" // AVX-512 SIMD optimisations -#define ENCODER_VENDOR_STRING "Encoder-TAV 20251124 (3d-dwt,tad,ssf-tc,cdf53-motion,avx512)" +#define ENCODER_VENDOR_STRING "Encoder-TAV 20251124 (3d-dwt,tad,ssf-tc,cdf53-motion,avx512,presets)" // TSVM Advanced Video (TAV) format constants #define TAV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x56" // "\x1FTSVM TAV" @@ -1835,6 +1835,7 @@ typedef struct tav_encoder_s { int pcm8_audio; // 1 = use 8-bit PCM audio (packet 0x21), 0 = use MP2 (default) int tad_audio; // 1 = use TAD audio (packet 0x24), 0 = use MP2/PCM8 (default, quality follows quality_level) int enable_crop_encoding; // 1 = encode cropped active region only (Phase 2), 0 = encode full frame (default) + uint8_t encoder_preset; // Encoder preset flags: bit 0 = sports (finer temporal quantisation), bit 1 = anime (no grain) // Active region tracking (for Phase 2 crop encoding) uint16_t active_mask_top, active_mask_right, active_mask_bottom, active_mask_left; @@ -2432,6 +2433,9 @@ static void show_usage(const char *program_name) { printf(" --dump-frame N Dump quantised coefficients for frame N (creates .bin files)\n"); printf(" --wavelet N Wavelet filter: 0=LGT 5/3, 1=CDF 9/7, 2=CDF 13/7, 16=DD-4, 255=Haar (default: 1)\n"); printf(" --zstd-level N Zstd compression level 1-22 (default: %d, higher = better compression but slower)\n", DEFAULT_ZSTD_LEVEL); + printf(" --preset PRESET Encoder presets (comma-separated, e.g., 'sports,anime'):\n"); + printf(" sports (or sport): Finer temporal quantisation for better motion detail\n"); + printf(" anime (or animation): Disable grain synthesis for cleaner animated content\n"); printf(" --help Show this help\n\n"); printf("Audio Rate by Quality:\n "); @@ -3355,8 +3359,9 @@ static void quantise_3d_dwt_coefficients(tav_encoder_t *enc, int spatial_size, int base_quantiser, int is_chroma) { - const float BETA = 0.6f; // Temporal scaling exponent (aggressive for temporal high-pass) - const float KAPPA = 1.14f; + // Sports preset: use finer temporal quantisation (less aggressive) + const float BETA = (enc->encoder_preset & 0x01) ? 0.0f : 0.6f; + const float KAPPA = (enc->encoder_preset & 0x01) ? 1.0f : 1.14f; // Process each temporal subband independently (separable approach) for (int t = 0; t < num_frames; t++) { @@ -7528,8 +7533,10 @@ static int write_tav_header(tav_encoder_t *enc) { // Entropy Coder (0 = Twobit-map, 1 = EZBC, 2 = Raw) fputc(enc->preprocess_mode, enc->output_fp); - // Reserved bytes (2 bytes) - fputc(0, enc->output_fp); + // Encoder Preset (byte 28): bit 0 = sports, bit 1 = anime + fputc(enc->encoder_preset, enc->output_fp); + + // Reserved byte (1 byte) fputc(0, enc->output_fp); // Device Orientation (default: 0 = no rotation) @@ -10775,6 +10782,7 @@ int main(int argc, char *argv[]) { {"tad-audio", no_argument, 0, 1028}, {"raw-coeffs", no_argument, 0, 1029}, {"single-pass", no_argument, 0, 1050}, // disable two-pass encoding with wavelet-based scene detection + {"preset", required_argument, 0, 1051}, // Encoder presets: sports, anime (comma-separated) {"enable-crop-encoding", no_argument, 0, 1052}, // Phase 2: encode cropped active region only (experimental) {"help", no_argument, 0, '?'}, {0, 0, 0, 0} @@ -11012,6 +11020,34 @@ int main(int argc, char *argv[]) { enc->two_pass_mode = 0; printf("Two-pass wavelet-based scene change detection disabled\n"); break; + case 1051: { // --preset + char *preset_str = strdup(optarg); + char *token = strtok(preset_str, ","); + while (token != NULL) { + // Trim leading/trailing whitespace + while (*token == ' ' || *token == '\t') token++; + char *end = token + strlen(token) - 1; + while (end > token && (*end == ' ' || *end == '\t')) { + *end = '\0'; + end--; + } + + // Check for presets and aliases + if (strcmp(token, "sports") == 0 || strcmp(token, "sport") == 0) { + enc->encoder_preset |= 0x01; + printf("Preset 'sports' enabled: finer temporal quantisation (BETA=0.25, KAPPA=1.0)\n"); + } else if (strcmp(token, "anime") == 0 || strcmp(token, "animation") == 0) { + enc->encoder_preset |= 0x02; + printf("Preset 'anime' enabled: grain synthesis disabled\n"); + } else { + fprintf(stderr, "Warning: Unknown preset '%s' (valid: sports, anime)\n", token); + } + + token = strtok(NULL, ","); + } + free(preset_str); + break; + } case 1052: // --enable-crop-encoding enc->enable_crop_encoding = 1; printf("Phase 2 crop encoding enabled (experimental)\n"); diff --git a/video_encoder/tav_avx512.h b/video_encoder/tav_avx512.h index 6a66157..2d6736c 100644 --- a/video_encoder/tav_avx512.h +++ b/video_encoder/tav_avx512.h @@ -456,7 +456,7 @@ static inline void quantise_dwt_coefficients_avx512( quant = _mm512_mask_blend_ps(dead_mask, quant, zero_vec); } - // Manual rounding to match scalar behavior (round away from zero) + // Manual rounding to match scalar behaviour (round away from zero) // First add 0.5 or -0.5 based on sign __mmask16 pos_mask = _mm512_cmp_ps_mask(quant, zero_vec, _CMP_GE_OQ); __m512 round_val = _mm512_mask_blend_ps(pos_mask, nhalf_vec, half_vec); @@ -510,7 +510,7 @@ static inline void quantise_dwt_coefficients_perceptual_avx512( __m512 effective_q = _mm512_mul_ps(base_q_vec, weight); __m512 quant = _mm512_div_ps(coeff, effective_q); - // Manual rounding to match scalar behavior + // Manual rounding to match scalar behaviour __mmask16 pos_mask = _mm512_cmp_ps_mask(quant, zero_vec, _CMP_GE_OQ); __m512 round_val = _mm512_mask_blend_ps(pos_mask, nhalf_vec, half_vec); quant = _mm512_add_ps(quant, round_val); diff --git a/video_encoder/tav_inspector.c b/video_encoder/tav_inspector.c index 4c680d8..cdbdf3a 100644 --- a/video_encoder/tav_inspector.c +++ b/video_encoder/tav_inspector.c @@ -514,6 +514,7 @@ int main(int argc, char *argv[]) { uint8_t quality = header[25]; uint8_t channel_layout = header[26]; uint8_t entropy_coder = header[27]; + uint8_t encoder_preset = header[28]; static const int QLUT[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128,132,136,140,144,148,152,156,160,164,168,172,176,180,184,188,192,196,200,204,208,212,216,220,224,228,232,236,240,244,248,252,256,264,272,280,288,296,304,312,320,328,336,344,352,360,368,376,384,392,400,408,416,424,432,440,448,456,464,472,480,488,496,504,512,528,544,560,576,592,608,624,640,656,672,688,704,720,736,752,768,784,800,816,832,848,864,880,896,912,928,944,960,976,992,1008,1024,1056,1088,1120,1152,1184,1216,1248,1280,1312,1344,1376,1408,1440,1472,1504,1536,1568,1600,1632,1664,1696,1728,1760,1792,1824,1856,1888,1920,1952,1984,2016,2048,2112,2176,2240,2304,2368,2432,2496,2560,2624,2688,2752,2816,2880,2944,3008,3072,3136,3200,3264,3328,3392,3456,3520,3584,3648,3712,3776,3840,3904,3968,4032,4096}; static const char* CLAYOUT[] = {"Luma-Chroma", "Luma-Chroma-Alpha", "Luma", "Luma-Alpha", "Chroma", "Chroma-Alpha"}; @@ -548,6 +549,21 @@ static const char* TEMPORAL_WAVELET[] = {"Haar", "CDF 5/3"}; printf(" Quality: n/a\n"); printf(" Channel layout: %s\n", CLAYOUT[channel_layout]); printf(" Entropy coder: %s\n", entropy_coder == 0 ? "Twobit-map" : "EZBC"); + printf(" Encoder preset: "); + if (encoder_preset == 0) { + printf("Default\n"); + } else { + int first = 1; + if (encoder_preset & 0x01) { + printf("%sSports", first ? "" : ", "); + first = 0; + } + if (encoder_preset & 0x02) { + printf("%sAnime", first ? "" : ", "); + first = 0; + } + printf("\n"); + } printf(" Flags:\n"); printf(" Has audio: %s\n", (extra_flags & 0x01) ? "Yes" : "No"); printf(" Has subtitles: %s\n", (extra_flags & 0x02) ? "Yes" : "No");