TAV: preset implementation

2026-06-08 22:34:03 +09:00 · 2025-11-24 17:40:45 +09:00
parent 6132012e74
commit 08bb33bf27
6 changed files with 152 additions and 66 deletions
--- a/assets/disk0/tvdos/bin/playtav.js
+++ b/assets/disk0/tvdos/bin/playtav.js
@@ -416,9 +416,10 @@ header.videoFlags = seqread.readOneByte()
 header.qualityLevel = seqread.readOneByte() // the decoder expects biased value
 header.channelLayout = seqread.readOneByte()
 header.entropyCoder = seqread.readOneByte()
 header.encoderPreset = seqread.readOneByte()  // Byte 28: bit 0 = sports, bit 1 = anime
-// Skip reserved bytes (2) and device orientation (1)
+// Skip reserved byte (1) and device orientation (1)
-seqread.skip(3)
+seqread.skip(2)
 header.fileRole = seqread.readOneByte()
@@ -1248,7 +1249,8 @@ try {
                        header.decompLevels,       // TAV-specific parameter
                        isLossless,
                        header.version,            // TAV version for colour space detection
-                        header.entropyCoder        // Entropy coder: 0 = Twobit-map, 1 = EZBC
+                        header.entropyCoder,       // Entropy coder: 0 = Twobit-map, 1 = EZBC
                        header.encoderPreset       // Encoder preset: bit 0 = sports, bit 1 = anime
                    )
                    decodeTime = (sys.nanoTime() - decodeStart) / 1000000.0
@@ -1344,7 +1346,8 @@ try {
                        header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS,
                        header.entropyCoder,
                        bufferOffset,
-                        header.temporalMotionCoder
+                        header.temporalMotionCoder,
                        header.encoderPreset       // Encoder preset: bit 0 = sports, bit 1 = anime
                    )
                    asyncDecodeInProgress = true
@@ -1418,7 +1421,8 @@ try {
                        header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS,
                        header.entropyCoder,
                        nextOffset,
-                        header.temporalMotionCoder
+                        header.temporalMotionCoder,
                        header.encoderPreset
                    )
                    // Set async decode tracking variables
@@ -1461,7 +1465,8 @@ try {
                        header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS,
                        header.entropyCoder,
                        decodingOffset,
-                        header.temporalMotionCoder
+                        header.temporalMotionCoder,
                        header.encoderPreset
                    )
                    // Set async decode tracking variables
@@ -1829,7 +1834,8 @@ try {
                        header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS,
                        header.entropyCoder,
                        readyGopData.slot * SLOT_SIZE,
-                        header.temporalMotionCoder
+                        header.temporalMotionCoder,
                        header.encoderPreset
                    )
                    // CRITICAL FIX: Set async decode tracking variables so decode is properly tracked
@@ -2021,7 +2027,8 @@ try {
                        header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS,
                        header.entropyCoder,
                        decodingGopData.slot * SLOT_SIZE,
-                        header.temporalMotionCoder
+                        header.temporalMotionCoder,
                        header.encoderPreset
                    )
                    // CRITICAL FIX: Set async decode tracking variables so decode is properly tracked
@@ -2062,7 +2069,8 @@ try {
                        header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS,
                        header.entropyCoder,
                        readyGopData.slot * SLOT_SIZE,
-                        header.temporalMotionCoder
+                        header.temporalMotionCoder,
                        header.encoderPreset
                    )
                    readyGopData.needsDecode = false
                    readyGopData.startTime = sys.nanoTime()
@@ -2140,7 +2148,8 @@ try {
                            header.waveletFilter, header.decompLevels, TAV_TEMPORAL_LEVELS,
                            header.entropyCoder,
                            targetOffset,
-                            header.temporalMotionCoder
+                            header.temporalMotionCoder,
                            header.encoderPreset
                        )
                        asyncDecodeInProgress = true
--- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt
+++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt
@@ -47,25 +47,19 @@ import kotlin.collections.component2
 import kotlin.collections.component3
 import kotlin.collections.component4
 import kotlin.collections.copyOf
 import kotlin.collections.count
 import kotlin.collections.fill
 import kotlin.collections.first
 import kotlin.collections.forEach
 import kotlin.collections.forEachIndexed
 import kotlin.collections.indices
 import kotlin.collections.isNotEmpty
 import kotlin.collections.last
 import kotlin.collections.listOf
 import kotlin.collections.map
 import kotlin.collections.maxOfOrNull
 import kotlin.collections.mutableListOf
 import kotlin.collections.mutableMapOf
 import kotlin.collections.set
 import kotlin.collections.sliceArray
 import kotlin.collections.sorted
 import kotlin.collections.sumOf
 import kotlin.collections.toFloatArray
 import kotlin.collections.toList
 import kotlin.error
 import kotlin.floatArrayOf
 import kotlin.fromBits
@@ -5039,9 +5033,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
     *   - Level 1 (tH):   1.0 × 2^0.8 = 1.74
     *   - Level 2 (tHH):  1.0 × 2^1.6 = 3.03
     */
-    private fun getTemporalQuantizerScale(temporalLevel: Int): Float {
+    private fun getTemporalQuantizerScale(encoderPreset: Int, temporalLevel: Int): Float {
-        val BETA = 0.6f // Temporal scaling exponent (aggressive for temporal high-pass)
+        val BETA = if (encoderPreset and 0x01 == 1) 0.0f else 0.6f // Temporal scaling exponent (aggressive for temporal high-pass)
-        val KAPPA = 1.14f
+        val KAPPA = if (encoderPreset and 0x01 == 1) 1.0f else 1.14f
        return 2.0f.pow(BETA * temporalLevel.toFloat().pow(KAPPA))
    }
@@ -5177,8 +5171,13 @@ class GraphicsJSR223Delegate(private val vm: VM) {
    // Remove grain synthesis from DWT coefficients (decoder subtracts noise)
    // This must be called AFTER dequantization but BEFORE inverse DWT
-    private fun removeGrainSynthesisDecoder(coeffs: FloatArray, width: Int, height: Int,
+    private fun tavApplyGrainSynthesis(coeffs: FloatArray, width: Int, height: Int,
-                                           frameNum: Int, subbands: List<DWTSubbandInfo>, qYGlobal: Int) {
+                                       frameNum: Int, subbands: List<DWTSubbandInfo>, qYGlobal: Int, encoderPreset: Int = 0) {
        // Anime preset: completely disable grain synthesis
        if ((encoderPreset and 0x02) != 0) {
            return  // Skip grain synthesis entirely
        }
        // Only apply to Y channel, excluding LL band
        // Noise amplitude = half of quantization step (scaled by perceptual weight if enabled)
@@ -5220,7 +5219,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
    // New tavDecode function that accepts compressed data and decompresses internally
    fun tavDecodeCompressed(compressedDataPtr: Long, compressedSize: Int, currentRGBAddr: Long, prevRGBAddr: Long,
                            width: Int, height: Int, qIndex: Int, qYGlobal: Int, qCoGlobal: Int, qCgGlobal: Int, channelLayout: Int,
-                            frameCount: Int, waveletFilter: Int = 1, decompLevels: Int = 6, isLossless: Boolean = false, tavVersion: Int = 1, entropyCoder: Int = 0): HashMap<String, Any> {
+                            frameCount: Int, waveletFilter: Int = 1, decompLevels: Int = 6, isLossless: Boolean = false, tavVersion: Int = 1, entropyCoder: Int = 0, encoderPreset: Int = 0): HashMap<String, Any> {
        // Read compressed data from VM memory into byte array
        val compressedData = ByteArray(compressedSize)
@@ -5250,7 +5249,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
                // Call the existing tavDecode function with decompressed data
                tavDecode(decompressedBuffer.toLong(), currentRGBAddr, prevRGBAddr,
                    width, height, qIndex, qYGlobal, qCoGlobal, qCgGlobal, channelLayout,
-                    frameCount, waveletFilter, decompLevels, isLossless, tavVersion, entropyCoder)
+                    frameCount, waveletFilter, decompLevels, isLossless, tavVersion, entropyCoder, encoderPreset)
            } finally {
                // Clean up allocated buffer
@@ -5266,7 +5265,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
    // Original tavDecode function for backward compatibility (now handles decompressed data)
    fun tavDecode(blockDataPtr: Long, currentRGBAddr: Long, prevRGBAddr: Long,
                  width: Int, height: Int, qIndex: Int, qYGlobal: Int, qCoGlobal: Int, qCgGlobal: Int, channelLayout: Int,
-                  frameCount: Int, waveletFilter: Int = 1, decompLevels: Int = 6, isLossless: Boolean = false, tavVersion: Int = 1, entropyCoder: Int = 0): HashMap<String, Any> {
+                  frameCount: Int, waveletFilter: Int = 1, decompLevels: Int = 6, isLossless: Boolean = false, tavVersion: Int = 1, entropyCoder: Int = 0, encoderPreset: Int = 0): HashMap<String, Any> {
        val dbgOut = HashMap<String, Any>()
@@ -5328,14 +5327,14 @@ class GraphicsJSR223Delegate(private val vm: VM) {
                        0x01 -> { // TAV_MODE_INTRA
                            // Decode DWT coefficients directly to RGB buffer
                            readPtr = tavDecodeDWTIntraTileRGB(qIndex, qYGlobal, channelLayout, readPtr, tileX, tileY, currentRGBAddr,
-                                                          width, height, qY, qCo, qCg, entropyCoder,
+                                                          width, height, qY, qCo, qCg, entropyCoder, encoderPreset,
                                                          waveletFilter, decompLevels, isLossless, tavVersion, isMonoblock, frameCount)
                            dbgOut["frameMode"] = "I"
                        }
                        0x02 -> { // TAV_MODE_DELTA (with optional Haar wavelet)
                            // Coefficient delta encoding for efficient P-frames
                            readPtr = tavDecodeDeltaTileRGB(readPtr, channelLayout, tileX, tileY, currentRGBAddr,
-                                                      width, height, qY, qCo, qCg, entropyCoder,
+                                                      width, height, qY, qCo, qCg, entropyCoder, encoderPreset,
                                                      waveletFilter, decompLevels, tavVersion, isMonoblock, frameCount, haarLevel)
                            dbgOut["frameMode"] = " "
                        }
@@ -5351,7 +5350,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
    }
    private fun tavDecodeDWTIntraTileRGB(qIndex: Int, qYGlobal: Int, channelLayout: Int, readPtr: Long, tileX: Int, tileY: Int, currentRGBAddr: Long,
-                                         width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, entropyCoder: Int,
+                                         width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, entropyCoder: Int, encoderPreset: Int,
                                         waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int, isMonoblock: Boolean = false, frameCount: Int): Long {
        // Determine coefficient count based on mode
        val coeffCount = if (isMonoblock) {
@@ -5451,7 +5450,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
            // Remove grain synthesis from Y channel (must happen after dequantization, before inverse DWT)
            // Use perceptual weights since this is the perceptual quantization path
-            removeGrainSynthesisDecoder(yTile, tileWidth, tileHeight, frameCount, subbands, qYGlobal)
+            tavApplyGrainSynthesis(yTile, tileWidth, tileHeight, frameCount, subbands, qYGlobal, encoderPreset)
            // Apply film grain filter if enabled
            // commented; grain synthesis is now a part of the spec
@@ -5476,7 +5475,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
            val tileWidth = if (isMonoblock) width else TAV_PADDED_TILE_SIZE_X
            val tileHeight = if (isMonoblock) height else TAV_PADDED_TILE_SIZE_Y
            val subbands = calculateSubbandLayout(tileWidth, tileHeight, decompLevels)
-            removeGrainSynthesisDecoder(yTile, tileWidth, tileHeight, frameCount, subbands, qYGlobal)
+            tavApplyGrainSynthesis(yTile, tileWidth, tileHeight, frameCount, subbands, qYGlobal, encoderPreset)
            // Apply film grain filter if enabled
            // commented; grain synthesis is now a part of the spec
@@ -5774,7 +5773,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
    }
    private fun tavDecodeDeltaTileRGB(readPtr: Long, channelLayout: Int, tileX: Int, tileY: Int, currentRGBAddr: Long,
-                                      width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, entropyCoder: Int,
+                                      width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, entropyCoder: Int, encoderPreset: Int,
                                      spatialFilter: Int, decompLevels: Int, tavVersion: Int, isMonoblock: Boolean = false, frameCount: Int = 0, haarLevel: Int = 0): Long {
        val tileIdx = if (isMonoblock) {
@@ -5927,7 +5926,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
        // Remove grain synthesis from Y channel (must happen after dequantization, before inverse DWT)
        val subbands = calculateSubbandLayout(tileWidth, tileHeight, decompLevels)
        // Delta frames use uniform quantization for the deltas themselves, so no perceptual weights
-        removeGrainSynthesisDecoder(currentY, tileWidth, tileHeight, frameCount, subbands, qY)
+        tavApplyGrainSynthesis(currentY, tileWidth, tileHeight, frameCount, subbands, qY, encoderPreset)
        // Store current coefficients as previous for next frame
        tavPreviousCoeffsY!![tileIdx] = currentY.clone()
@@ -6475,7 +6474,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
        temporalLevels: Int = 2,
        entropyCoder: Int = 0,
        bufferOffset: Long = 0,
-        temporalMotionCoder: Int = 0
+        temporalMotionCoder: Int = 0,
        encoderPreset: Int = 0
    ): Array<Any> {
        val dbgOut = HashMap<String, Any>()
        dbgOut["qY"] = qYGlobal
@@ -6547,9 +6547,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
        // Step 5: Dequantize with temporal-spatial scaling
        for (t in 0 until gopSize) {
            val temporalLevel = getTemporalSubbandLevel(t, gopSize, temporalLevels)
-            val temporalScale = getTemporalQuantizerScale(temporalLevel)
+            val temporalScale = getTemporalQuantizerScale(encoderPreset, temporalLevel)
-            // CRITICAL FIX: Must ROUND temporal quantizer to match encoder's roundf() behavior
+            // CRITICAL FIX: Must ROUND temporal quantizer to match encoder's roundf() behaviour
            // Encoder (encoder_tav.c:3189): temporal_base_quantiser = (int)roundf(temporal_quantiser)
            // Without rounding, decoder uses float values (e.g., 1.516) while encoder used integers (e.g., 2)
            // This causes ~24% under-reconstruction for odd baseQ values in temporal high-pass frames (Frame 5+)
@@ -6587,10 +6587,10 @@ class GraphicsJSR223Delegate(private val vm: VM) {
        // This must happen after dequantization but before inverse DWT
        // Use GOP dimensions (may be cropped)
        for (t in 0 until gopSize) {
-            removeGrainSynthesisDecoder(
+            tavApplyGrainSynthesis(
                gopY[t], gopWidth, gopHeight,
                rngFrameTick.getAndAdd(1) + t,
-                subbands, qIndex
+                subbands, qIndex, encoderPreset
            )
        }
@@ -6818,7 +6818,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
        temporalLevels: Int = 3,
        entropyCoder: Int = 0,
        bufferOffset: Long = 0,
-        temporalMotionCoder: Int = 0
+        temporalMotionCoder: Int = 0,
        encoderPreset: Int = 0
    ) {
        // Cancel any existing decode thread
        asyncDecodeThread?.interrupt()
@@ -6836,7 +6837,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
                    width, height,
                    qIndex, qYGlobal, qCoGlobal, qCgGlobal,
                    channelLayout, spatialFilter, spatialLevels, temporalLevels,
-                    entropyCoder, bufferOffset, temporalMotionCoder
+                    entropyCoder, bufferOffset, temporalMotionCoder, encoderPreset
                )
                asyncDecodeResult = result
                asyncDecodeComplete.set(true)
--- a/video_encoder/decoder_tav.c
+++ b/video_encoder/decoder_tav.c
@@ -17,7 +17,7 @@
 #include "decoder_tad.h"  // Shared TAD decoder library
 #include "tav_avx512.h"  // AVX-512 SIMD optimisations
-#define DECODER_VENDOR_STRING "Decoder-TAV 20251124 (avx512)"
+#define DECODER_VENDOR_STRING "Decoder-TAV 20251124 (avx512,presets)"
 // TAV format constants
 #define TAV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x56"
@@ -95,7 +95,8 @@ typedef struct {
    uint8_t encoder_quality;
    uint8_t channel_layout;
    uint8_t entropy_coder;
-    uint8_t reserved[2];
+    uint8_t encoder_preset;  // Byte 28: bit 0 = sports, bit 1 = anime
    uint8_t reserved;
    uint8_t device_orientation;
    uint8_t file_role;
 } __attribute__((packed)) tav_header_t;
@@ -394,10 +395,20 @@ static inline float tav_grain_triangular_noise(uint32_t rng_val) {
    return (u1 + u2) - 1.0f;
 }
-// Remove grain synthesis from DWT coefficients (decoder subtracts noise)
+// Apply grain synthesis from DWT coefficients (decoder subtracts noise)
 // This must be called AFTER dequantisation but BEFORE inverse DWT
-static void remove_grain_synthesis_decoder(float *coeffs, int width, int height,
+static void apply_grain_synthesis(float *coeffs, int width, int height,
-                                          int decomp_levels, int frame_num, int q_y_global) {
+                                          int decomp_levels, int frame_num, int q_y_global, uint8_t encoder_preset, int no_grain_synthesis) {
    // Command-line override: disable grain synthesis
    if (no_grain_synthesis) {
        return;  // Skip grain synthesis entirely
    }
    // Anime preset: completely disable grain synthesis
    if (encoder_preset & 0x02) {
        return;  // Skip grain synthesis entirely
    }
    dwt_subband_info_t subbands[32];
    const int subband_count = calculate_subband_layout(width, height, decomp_levels, subbands);
@@ -412,7 +423,7 @@ static void remove_grain_synthesis_decoder(float *coeffs, int width, int height,
        // Calculate band index for RNG (matches Kotlin: level + subbandType * 31 + 16777619)
        uint32_t band = subband->level + subband->subband_type * 31 + 16777619;
-        // Remove noise from each coefficient in this subband
+        // Apply noise from each coefficient in this subband
        for (int i = 0; i < subband->coeff_count; i++) {
            const int idx = subband->coeff_start + i;
            if (idx < width * height) {
@@ -1226,14 +1237,14 @@ static int get_temporal_subband_level(int frame_idx, int num_frames, int tempora
 }
 // Calculate temporal quantiser scale for a given temporal subband level
-static float get_temporal_quantiser_scale(int temporal_level) {
+static float get_temporal_quantiser_scale(uint8_t encoder_preset, int temporal_level) {
    // Uses exponential scaling: 2^(BETA × level^KAPPA)
    // With BETA=0.6, KAPPA=1.14:
    //   - Level 0 (tLL):  2^0.0 = 1.00
    //   - Level 1 (tH):   2^0.68 = 1.61
    //   - Level 2 (tHH):  2^1.29 = 2.45
-    const float BETA = 0.6f;  // Temporal scaling exponent
+    const float BETA = (encoder_preset & 0x01) ? 0.0f : 0.6f;
-    const float KAPPA = 1.14f;
+    const float KAPPA = (encoder_preset & 0x01) ? 1.0f : 1.14f;
    return powf(2.0f, BETA * powf(temporal_level, KAPPA));
 }
@@ -1812,6 +1823,7 @@ typedef struct {
    int frame_size;
    int is_monoblock;           // True if version 3-6 (single tile mode)
    int temporal_motion_coder;  // Temporal wavelet: 0=Haar, 1=CDF 5/3 (extracted from version)
    int no_grain_synthesis;     // Command-line flag: disable grain synthesis
    // Screen masking (letterbox/pillarbox) - array of geometry changes
    screen_mask_entry_t *screen_masks;
@@ -2023,10 +2035,11 @@ static int extract_audio_to_wav(const char *input_file, const char *wav_file, in
 // Decoder Initialisation and Cleanup
 //=============================================================================
-static tav_decoder_t* tav_decoder_init(const char *input_file, const char *output_file, const char *audio_file) {
+static tav_decoder_t* tav_decoder_init(const char *input_file, const char *output_file, const char *audio_file, int no_grain_synthesis) {
    tav_decoder_t *decoder = calloc(1, sizeof(tav_decoder_t));
    if (!decoder) return NULL;
    decoder->no_grain_synthesis = no_grain_synthesis;
    decoder->input_fp = fopen(input_file, "rb");
    if (!decoder->input_fp) {
        free(decoder);
@@ -2511,8 +2524,9 @@ static int decode_i_or_p_frame(tav_decoder_t *decoder, uint8_t packet_type, uint
        // Remove grain synthesis from Y channel (must happen after dequantisation, before inverse DWT)
        // Phase 2: Use decoding dimensions and temporary buffer
-        remove_grain_synthesis_decoder(temp_dwt_y, decoder->decoding_width, decoder->decoding_height,
+        apply_grain_synthesis(temp_dwt_y, decoder->decoding_width, decoder->decoding_height,
-                                      decoder->header.decomp_levels, decoder->frame_count, decoder->header.quantiser_y);
+                                      decoder->header.decomp_levels, decoder->frame_count, decoder->header.quantiser_y,
                                      decoder->header.encoder_preset, decoder->no_grain_synthesis);
        // Debug: Check LL band AFTER grain removal
 //        if (decoder->frame_count == 32) {
@@ -2712,10 +2726,11 @@ static void print_usage(const char *prog) {
    printf("Version: %s\n\n", DECODER_VENDOR_STRING);
    printf("Usage: %s -i input.tav -o output.mkv\n\n", prog);
    printf("Options:\n");
-    printf("  -i <file>    Input TAV file\n");
+    printf("  -i <file>              Input TAV file\n");
-    printf("  -o <file>    Output MKV file (optional, auto-generated from input)\n");
+    printf("  -o <file>              Output MKV file (optional, auto-generated from input)\n");
-    printf("  -v           Verbose output\n");
+    printf("  -v                     Verbose output\n");
-    printf("  -h, --help   Show this help\n\n");
+    printf("  --no-grain-synthesis   Disable grain synthesis (override encoder preset)\n");
    printf("  -h, --help             Show this help\n\n");
    printf("Supported features (matches TSVM decoder):\n");
    printf("  - I-frames and P-frames (delta mode)\n");
    printf("  - GOP unified 3D DWT (temporal compression)\n");
@@ -2740,9 +2755,11 @@ int main(int argc, char *argv[]) {
    char *input_file = NULL;
    char *output_file = NULL;
    int verbose = 0;
    int no_grain_synthesis = 0;
    static struct option long_options[] = {
        {"help", no_argument, 0, 'h'},
        {"no-grain-synthesis", no_argument, 0, 1000},
        {0, 0, 0, 0}
    };
@@ -2761,6 +2778,12 @@ int main(int argc, char *argv[]) {
            case 'h':
                print_usage(argv[0]);
                return 0;
            case 1000:  // --no-grain-synthesis
                no_grain_synthesis = 1;
                if (verbose) {
                    printf("Grain synthesis disabled\n");
                }
                break;
            default:
                print_usage(argv[0]);
                return 1;
@@ -2819,7 +2842,7 @@ int main(int argc, char *argv[]) {
    }
    // Pass 2: Decode video with audio file
-    tav_decoder_t *decoder = tav_decoder_init(input_file, output_file, temp_audio_file);
+    tav_decoder_t *decoder = tav_decoder_init(input_file, output_file, temp_audio_file, no_grain_synthesis);
    if (!decoder) {
        fprintf(stderr, "Failed to initialise decoder\n");
        unlink(temp_audio_file);  // Clean up temp file
@@ -3126,7 +3149,7 @@ int main(int argc, char *argv[]) {
                    // EZBC mode with perceptual quantisation: coefficients are normalised
                    // Need to dequantise using perceptual weights (same as twobit-map mode)
                    const int temporal_level = get_temporal_subband_level(t, gop_size, temporal_levels);
-                    const float temporal_scale = get_temporal_quantiser_scale(temporal_level);
+                    const float temporal_scale = get_temporal_quantiser_scale(decoder->header.encoder_preset, temporal_level);
                    // FIX: Use QLUT to convert header quantiser indices to actual values
                    const float base_q_y = roundf(QLUT[decoder->header.quantiser_y] * temporal_scale);
@@ -3160,7 +3183,7 @@ int main(int argc, char *argv[]) {
                } else if (!is_ezbc) {
                    // Normal mode: multiply by quantiser
                    const int temporal_level = get_temporal_subband_level(t, gop_size, temporal_levels);
-                    const float temporal_scale = get_temporal_quantiser_scale(temporal_level);
+                    const float temporal_scale = get_temporal_quantiser_scale(decoder->header.encoder_preset, temporal_level);
                    // CRITICAL: Must ROUND temporal quantiser to match encoder's roundf() behavior
                    // FIX: Use QLUT to convert header quantiser indices to actual values
@@ -3206,9 +3229,10 @@ int main(int argc, char *argv[]) {
            // Phase 2: Use GOP dimensions (may be cropped) for grain removal
            for (int t = 0; t < gop_size; t++) {
-                remove_grain_synthesis_decoder(gop_y[t], gop_width, gop_height,
+                apply_grain_synthesis(gop_y[t], gop_width, gop_height,
                                              decoder->header.decomp_levels, decoder->frame_count + t,
-                                              decoder->header.quantiser_y);
+                                              decoder->header.quantiser_y, decoder->header.encoder_preset,
                                              decoder->no_grain_synthesis);
            }
            // Apply inverse 3D DWT (spatial + temporal)
--- a/video_encoder/encoder_tav.c
+++ b/video_encoder/encoder_tav.c
@@ -19,7 +19,7 @@
 #include <float.h>
 #include "tav_avx512.h"  // AVX-512 SIMD optimisations
-#define ENCODER_VENDOR_STRING "Encoder-TAV 20251124 (3d-dwt,tad,ssf-tc,cdf53-motion,avx512)"
+#define ENCODER_VENDOR_STRING "Encoder-TAV 20251124 (3d-dwt,tad,ssf-tc,cdf53-motion,avx512,presets)"
 // TSVM Advanced Video (TAV) format constants
 #define TAV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x56"  // "\x1FTSVM TAV"
@@ -1835,6 +1835,7 @@ typedef struct tav_encoder_s {
    int pcm8_audio; // 1 = use 8-bit PCM audio (packet 0x21), 0 = use MP2 (default)
    int tad_audio; // 1 = use TAD audio (packet 0x24), 0 = use MP2/PCM8 (default, quality follows quality_level)
    int enable_crop_encoding;    // 1 = encode cropped active region only (Phase 2), 0 = encode full frame (default)
    uint8_t encoder_preset;      // Encoder preset flags: bit 0 = sports (finer temporal quantisation), bit 1 = anime (no grain)
    // Active region tracking (for Phase 2 crop encoding)
    uint16_t active_mask_top, active_mask_right, active_mask_bottom, active_mask_left;
@@ -2432,6 +2433,9 @@ static void show_usage(const char *program_name) {
    printf("  --dump-frame N          Dump quantised coefficients for frame N (creates .bin files)\n");
    printf("  --wavelet N             Wavelet filter: 0=LGT 5/3, 1=CDF 9/7, 2=CDF 13/7, 16=DD-4, 255=Haar (default: 1)\n");
    printf("  --zstd-level N          Zstd compression level 1-22 (default: %d, higher = better compression but slower)\n", DEFAULT_ZSTD_LEVEL);
    printf("  --preset PRESET         Encoder presets (comma-separated, e.g., 'sports,anime'):\n");
    printf("                            sports (or sport): Finer temporal quantisation for better motion detail\n");
    printf("                            anime (or animation): Disable grain synthesis for cleaner animated content\n");
    printf("  --help                  Show this help\n\n");
    printf("Audio Rate by Quality:\n  ");
@@ -3355,8 +3359,9 @@ static void quantise_3d_dwt_coefficients(tav_encoder_t *enc,
                                        int spatial_size,
                                        int base_quantiser,
                                        int is_chroma) {
-    const float BETA = 0.6f;  // Temporal scaling exponent (aggressive for temporal high-pass)
+    // Sports preset: use finer temporal quantisation (less aggressive)
-    const float KAPPA = 1.14f;
+    const float BETA = (enc->encoder_preset & 0x01) ? 0.0f : 0.6f;
    const float KAPPA = (enc->encoder_preset & 0x01) ? 1.0f : 1.14f;
    // Process each temporal subband independently (separable approach)
    for (int t = 0; t < num_frames; t++) {
@@ -7528,8 +7533,10 @@ static int write_tav_header(tav_encoder_t *enc) {
    // Entropy Coder (0 = Twobit-map, 1 = EZBC, 2 = Raw)
    fputc(enc->preprocess_mode, enc->output_fp);
-    // Reserved bytes (2 bytes)
+    // Encoder Preset (byte 28): bit 0 = sports, bit 1 = anime
-    fputc(0, enc->output_fp);
+    fputc(enc->encoder_preset, enc->output_fp);
    // Reserved byte (1 byte)
    fputc(0, enc->output_fp);
    // Device Orientation (default: 0 = no rotation)
@@ -10775,6 +10782,7 @@ int main(int argc, char *argv[]) {
        {"tad-audio", no_argument, 0, 1028},
        {"raw-coeffs", no_argument, 0, 1029},
        {"single-pass", no_argument, 0, 1050},  // disable two-pass encoding with wavelet-based scene detection
        {"preset", required_argument, 0, 1051},  // Encoder presets: sports, anime (comma-separated)
        {"enable-crop-encoding", no_argument, 0, 1052},  // Phase 2: encode cropped active region only (experimental)
        {"help", no_argument, 0, '?'},
        {0, 0, 0, 0}
@@ -11012,6 +11020,34 @@ int main(int argc, char *argv[]) {
                enc->two_pass_mode = 0;
                printf("Two-pass wavelet-based scene change detection disabled\n");
                break;
            case 1051: { // --preset
                char *preset_str = strdup(optarg);
                char *token = strtok(preset_str, ",");
                while (token != NULL) {
                    // Trim leading/trailing whitespace
                    while (*token == ' ' || *token == '\t') token++;
                    char *end = token + strlen(token) - 1;
                    while (end > token && (*end == ' ' || *end == '\t')) {
                        *end = '\0';
                        end--;
                    }
                    // Check for presets and aliases
                    if (strcmp(token, "sports") == 0 || strcmp(token, "sport") == 0) {
                        enc->encoder_preset |= 0x01;
                        printf("Preset 'sports' enabled: finer temporal quantisation (BETA=0.25, KAPPA=1.0)\n");
                    } else if (strcmp(token, "anime") == 0 || strcmp(token, "animation") == 0) {
                        enc->encoder_preset |= 0x02;
                        printf("Preset 'anime' enabled: grain synthesis disabled\n");
                    } else {
                        fprintf(stderr, "Warning: Unknown preset '%s' (valid: sports, anime)\n", token);
                    }
                    token = strtok(NULL, ",");
                }
                free(preset_str);
                break;
            }
            case 1052: // --enable-crop-encoding
                enc->enable_crop_encoding = 1;
                printf("Phase 2 crop encoding enabled (experimental)\n");
--- a/video_encoder/tav_avx512.h
+++ b/video_encoder/tav_avx512.h
@@ -456,7 +456,7 @@ static inline void quantise_dwt_coefficients_avx512(
            quant = _mm512_mask_blend_ps(dead_mask, quant, zero_vec);
        }
-        // Manual rounding to match scalar behavior (round away from zero)
+        // Manual rounding to match scalar behaviour (round away from zero)
        // First add 0.5 or -0.5 based on sign
        __mmask16 pos_mask = _mm512_cmp_ps_mask(quant, zero_vec, _CMP_GE_OQ);
        __m512 round_val = _mm512_mask_blend_ps(pos_mask, nhalf_vec, half_vec);
@@ -510,7 +510,7 @@ static inline void quantise_dwt_coefficients_perceptual_avx512(
        __m512 effective_q = _mm512_mul_ps(base_q_vec, weight);
        __m512 quant = _mm512_div_ps(coeff, effective_q);
-        // Manual rounding to match scalar behavior
+        // Manual rounding to match scalar behaviour
        __mmask16 pos_mask = _mm512_cmp_ps_mask(quant, zero_vec, _CMP_GE_OQ);
        __m512 round_val = _mm512_mask_blend_ps(pos_mask, nhalf_vec, half_vec);
        quant = _mm512_add_ps(quant, round_val);
--- a/video_encoder/tav_inspector.c
+++ b/video_encoder/tav_inspector.c
@@ -514,6 +514,7 @@ int main(int argc, char *argv[]) {
        uint8_t quality = header[25];
        uint8_t channel_layout = header[26];
        uint8_t entropy_coder = header[27];
        uint8_t encoder_preset = header[28];
 static const int QLUT[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128,132,136,140,144,148,152,156,160,164,168,172,176,180,184,188,192,196,200,204,208,212,216,220,224,228,232,236,240,244,248,252,256,264,272,280,288,296,304,312,320,328,336,344,352,360,368,376,384,392,400,408,416,424,432,440,448,456,464,472,480,488,496,504,512,528,544,560,576,592,608,624,640,656,672,688,704,720,736,752,768,784,800,816,832,848,864,880,896,912,928,944,960,976,992,1008,1024,1056,1088,1120,1152,1184,1216,1248,1280,1312,1344,1376,1408,1440,1472,1504,1536,1568,1600,1632,1664,1696,1728,1760,1792,1824,1856,1888,1920,1952,1984,2016,2048,2112,2176,2240,2304,2368,2432,2496,2560,2624,2688,2752,2816,2880,2944,3008,3072,3136,3200,3264,3328,3392,3456,3520,3584,3648,3712,3776,3840,3904,3968,4032,4096};
 static const char* CLAYOUT[] = {"Luma-Chroma", "Luma-Chroma-Alpha", "Luma", "Luma-Alpha", "Chroma", "Chroma-Alpha"};
@@ -548,6 +549,21 @@ static const char* TEMPORAL_WAVELET[] = {"Haar", "CDF 5/3"};
            printf("  Quality:          n/a\n");
        printf("  Channel layout:   %s\n", CLAYOUT[channel_layout]);
        printf("  Entropy coder:    %s\n", entropy_coder == 0 ? "Twobit-map" : "EZBC");
        printf("  Encoder preset:   ");
        if (encoder_preset == 0) {
            printf("Default\n");
        } else {
            int first = 1;
            if (encoder_preset & 0x01) {
                printf("%sSports", first ? "" : ", ");
                first = 0;
            }
            if (encoder_preset & 0x02) {
                printf("%sAnime", first ? "" : ", ");
                first = 0;
            }
            printf("\n");
        }
        printf("  Flags:\n");
        printf("    Has audio:      %s\n", (extra_flags & 0x01) ? "Yes" : "No");
        printf("    Has subtitles:  %s\n", (extra_flags & 0x02) ? "Yes" : "No");