TAV: code cleanup, better preset selection

2026-06-06 13:38:30 +09:00 · 2025-11-24 11:16:19 +09:00
parent dbbb471a11
commit 6132012e74
5 changed files with 93 additions and 152 deletions
--- a/video_encoder/decoder_tad.c
+++ b/video_encoder/decoder_tad.c
@@ -1065,12 +1065,6 @@ int main(int argc, char *argv[]) {
            size_t name_len = ext - basename_start;
            strncpy(output_file + dir_len, basename_start, name_len);
            output_file[dir_len + name_len] = '\0';
-
-            // Replace last dot with underscore (for .qNN pattern)
-            /*char *last_dot = strrchr(output_file, '.');
-            if (last_dot && last_dot > output_file + dir_len) {
-                *last_dot = '_';
-            }*/
        } else {
            // No .tad extension, copy entire basename
            strcpy(output_file + dir_len, basename_start);
--- a/video_encoder/decoder_tav.c
+++ b/video_encoder/decoder_tav.c
@@ -15,7 +15,7 @@
 #include <getopt.h>
 #include <signal.h>
 #include "decoder_tad.h"  // Shared TAD decoder library
-#include "tav_avx512.h"  // AVX-512 SIMD optimizations
+#include "tav_avx512.h"  // AVX-512 SIMD optimisations

 #define DECODER_VENDOR_STRING "Decoder-TAV 20251124 (avx512)"

@@ -315,7 +315,7 @@ static void dequantise_dwt_subbands_perceptual(int q_index, int q_y_global, cons
        //                   for bright pixels, creating dark DWT-pattern blemishes

 #ifdef __AVX512F__
-        // Use AVX-512 optimized dequantization if available (1.1x speedup against -Ofast)
+        // Use AVX-512 optimised dequantization if available (1.1x speedup against -Ofast)
        // Check: subband has >=16 elements AND won't exceed buffer bounds
        const int subband_end = subband->coeff_start + subband->coeff_count;
        if (g_simd_level >= SIMD_AVX512F && subband->coeff_count >= 16 && subband_end <= coeff_count) {
@@ -2713,7 +2713,7 @@ static void print_usage(const char *prog) {
    printf("Usage: %s -i input.tav -o output.mkv\n\n", prog);
    printf("Options:\n");
    printf("  -i <file>    Input TAV file\n");
-    printf("  -o <file>    Output MKV file (FFV1 video + PCMu8 audio)\n");
+    printf("  -o <file>    Output MKV file (optional, auto-generated from input)\n");
    printf("  -v           Verbose output\n");
    printf("  -h, --help   Show this help\n\n");
    printf("Supported features (matches TSVM decoder):\n");
@@ -2767,12 +2767,46 @@ int main(int argc, char *argv[]) {
        }
    }

-    if (!input_file || !output_file) {
-        fprintf(stderr, "Error: Both input and output files are required\n\n");
+    if (!input_file) {
+        fprintf(stderr, "Error: Input file is required\n\n");
        print_usage(argv[0]);
        return 1;
    }

+    // Generate output filename if not provided
+    if (!output_file) {
+        size_t input_len = strlen(input_file);
+        output_file = malloc(input_len + 32);  // Extra space for extension
+
+        // Find the last directory separator
+        const char *basename_start = strrchr(input_file, '/');
+        if (!basename_start) basename_start = strrchr(input_file, '\\');
+        basename_start = basename_start ? basename_start + 1 : input_file;
+
+        // Copy directory part
+        size_t dir_len = basename_start - input_file;
+        strncpy(output_file, input_file, dir_len);
+
+        // Find the .tad extension
+        const char *ext = strrchr(basename_start, '.');
+        if (ext && (strcmp(ext, ".tav") == 0 || strcmp(ext, ".mv3") == 0)) {
+            // Copy basename without .tav or .mv3
+            size_t name_len = ext - basename_start;
+            strncpy(output_file + dir_len, basename_start, name_len);
+            output_file[dir_len + name_len] = '\0';
+        } else {
+            // No .tad extension, copy entire basename
+            strcpy(output_file + dir_len, basename_start);
+        }
+
+        // Append appropriate extension
+        strcat(output_file, ".mkv");
+
+        if (verbose) {
+            printf("Auto-generated output path: %s\n", output_file);
+        }
+    }
+
    // Create temporary audio file path
    char temp_audio_file[256];
    snprintf(temp_audio_file, sizeof(temp_audio_file), "/tmp/tav_audio_%d.wav", getpid());
--- a/video_encoder/encoder_tav.c
+++ b/video_encoder/encoder_tav.c
@@ -17,7 +17,7 @@
 #include <time.h>
 #include <limits.h>
 #include <float.h>
-#include "tav_avx512.h"  // AVX-512 SIMD optimizations
+#include "tav_avx512.h"  // AVX-512 SIMD optimisations

 #define ENCODER_VENDOR_STRING "Encoder-TAV 20251124 (3d-dwt,tad,ssf-tc,cdf53-motion,avx512)"

@@ -2383,10 +2383,6 @@ static void quantise_dwt_coefficients_perceptual_per_coeff(tav_encoder_t *enc,
                                                           float *coeffs, int16_t *quantised, int size,
                                                           int base_quantiser, int width, int height,
                                                           int decomp_levels, int is_chroma, int frame_count);
-static void quantise_dwt_coefficients_perceptual_per_coeff_no_normalisation(tav_encoder_t *enc,
-                                                           float *coeffs, int16_t *quantised, int size,
-                                                           int base_quantiser, int width, int height,
-                                                           int decomp_levels, int is_chroma, int frame_count);
 static size_t preprocess_coefficients_variable_layout(preprocess_mode_t preprocess_mode, int width, int height,
                                                       int16_t *coeffs_y, int16_t *coeffs_co, int16_t *coeffs_cg, int16_t *coeffs_alpha,
                                                       int coeff_count, int channel_layout, uint8_t *output_buffer);
@@ -2424,7 +2420,7 @@ static void show_usage(const char *program_name) {
    printf("  --enable-delta          Enable delta encoding\n");
    printf("  --delta-haar N          Apply N-level Haar DWT to delta coefficients (1-6, auto-enables delta)\n");
    printf("  --3d-dwt                Enable temporal 3D DWT (GOP-based encoding with temporal transform; the default encoding mode)\n");
-    printf("  --motion-coder N        Temporal wavelet: 0=Haar, 1=CDF 5/3 (default: auto-select based on resolution; use 0 for older version compatibility)\n");
+    printf("  --motion-coder N        Temporal wavelet: 0=Haar, 1=CDF 5/3 (default: auto-select based on resolution; use 0 for older version compatibility; use 1 for smoother motion)\n");
    printf("  --single-pass           Disable two-pass encoding with wavelet-based scene change detection (optimal GOP boundaries)\n");
 //    printf("  --mc-ezbc               Enable MC-EZBC block-based motion compensation (requires --temporal-dwt, implies --ezbc)\n");
    printf("  --ezbc                  Enable EZBC (Embedded Zero Block Coding) entropy coding. May help reducing file size on high-quality videos\n");
@@ -3388,37 +3384,18 @@ static void quantise_3d_dwt_coefficients(tav_encoder_t *enc,
        //   Q_effective = tH_base × spatial_weight
        // Where spatial_weight depends on spatial frequency (LL, LH, HL, HH subbands)
        // This reuses all existing perceptual weighting and dead-zone logic
-        //
-        // CRITICAL: Use no_normalisation variant when EZBC is enabled
-        // - EZBC mode: coefficients must be denormalised (quantise + multiply back)
-        // - Twobit-map/raw mode: coefficients stay normalised (quantise only)
-        if (enc->preprocess_mode == PREPROCESS_EZBC) {
-            quantise_dwt_coefficients_perceptual_per_coeff_no_normalisation(
-                enc,
-                gop_coeffs[t],           // Input: spatial coefficients for this temporal subband
-                quantised[t],            // Output: quantised spatial coefficients (denormalised for EZBC)
-                spatial_size,            // Number of spatial coefficients
-                temporal_base_quantiser, // Temporally-scaled base quantiser (tH_base)
-                enc->width,              // Frame width
-                enc->height,             // Frame height
-                enc->decomp_levels,      // Spatial decomposition levels (typically 6)
-                is_chroma,               // Is chroma channel (gets additional quantisation)
-                enc->frame_count + t     // Frame number (for any frame-dependent logic)
-            );
-        } else {
-            quantise_dwt_coefficients_perceptual_per_coeff(
-                enc,
-                gop_coeffs[t],           // Input: spatial coefficients for this temporal subband
-                quantised[t],            // Output: quantised spatial coefficients (normalised for twobit-map)
-                spatial_size,            // Number of spatial coefficients
-                temporal_base_quantiser, // Temporally-scaled base quantiser (tH_base)
-                enc->width,              // Frame width
-                enc->height,             // Frame height
-                enc->decomp_levels,      // Spatial decomposition levels (typically 6)
-                is_chroma,               // Is chroma channel (gets additional quantisation)
-                enc->frame_count + t     // Frame number (for any frame-dependent logic)
-            );
-        }
+        quantise_dwt_coefficients_perceptual_per_coeff(
+            enc,
+            gop_coeffs[t],           // Input: spatial coefficients for this temporal subband
+            quantised[t],            // Output: quantised spatial coefficients (normalised for twobit-map)
+            spatial_size,            // Number of spatial coefficients
+            temporal_base_quantiser, // Temporally-scaled base quantiser (tH_base)
+            enc->width,              // Frame width
+            enc->height,             // Frame height
+            enc->decomp_levels,      // Spatial decomposition levels (typically 6)
+            is_chroma,               // Is chroma channel (gets additional quantisation)
+            enc->frame_count + t     // Frame number (for any frame-dependent logic)
+        );

        if (enc->verbose && (t == 0 || t == num_frames - 1)) {
            printf("  Temporal subband %d: level=%d, tH_base=%d\n",
@@ -4092,13 +4069,13 @@ static size_t encode_pframe_residual(tav_encoder_t *enc, int qY) {
        // EZBC mode: Quantise with perceptual weighting but no normalisation (division by quantiser)
        // EZBC will compress by encoding only significant bitplanes
 //        fprintf(stderr, "[EZBC-QUANT-PFRAME] Using perceptual quantisation without normalisation\n");
-        quantise_dwt_coefficients_perceptual_per_coeff_no_normalisation(enc, residual_y_dwt, quantised_y, frame_size,
+        quantise_dwt_coefficients_perceptual_per_coeff(enc, residual_y_dwt, quantised_y, frame_size,
                                                      qY, enc->width, enc->height,
                                                      enc->decomp_levels, 0, 0);
-        quantise_dwt_coefficients_perceptual_per_coeff_no_normalisation(enc, residual_co_dwt, quantised_co, frame_size,
+        quantise_dwt_coefficients_perceptual_per_coeff(enc, residual_co_dwt, quantised_co, frame_size,
                                                      enc->quantiser_co, enc->width, enc->height,
                                                      enc->decomp_levels, 1, 0);
-        quantise_dwt_coefficients_perceptual_per_coeff_no_normalisation(enc, residual_cg_dwt, quantised_cg, frame_size,
+        quantise_dwt_coefficients_perceptual_per_coeff(enc, residual_cg_dwt, quantised_cg, frame_size,
                                                      enc->quantiser_cg, enc->width, enc->height,
                                                      enc->decomp_levels, 1, 0);

@@ -6431,7 +6408,7 @@ static void quantise_dwt_coefficients(float *coeffs, int16_t *quantised, int siz
    effective_q = FCLAMP(effective_q, 1.0f, 4096.0f);

 #ifdef __AVX512F__
-    // Use AVX-512 optimized version if available (2x speedup against -Ofast)
+    // Use AVX-512 optimised version if available (2x speedup against -Ofast)
    if (g_simd_level >= SIMD_AVX512F) {
        quantise_dwt_coefficients_avx512(coeffs, quantised, size, effective_q, dead_zone_threshold,
                                         width, height, decomp_levels, is_chroma,
@@ -6739,76 +6716,6 @@ static void quantise_dwt_coefficients_perceptual_per_coeff(tav_encoder_t *enc,
    }
 }

-// Quantisation for EZBC mode: quantises to discrete levels but doesn't normalise (shrink) values
-// This reduces coefficient precision while preserving magnitude for EZBC's bitplane encoding
-static void quantise_dwt_coefficients_perceptual_per_coeff_no_normalisation(tav_encoder_t *enc,
-                                                          float *coeffs, int16_t *quantised, int size,
-                                                          int base_quantiser, int width, int height,
-                                                          int decomp_levels, int is_chroma, int frame_count) {
-    (void)frame_count;  // Unused parameter
-
-    float effective_base_q = base_quantiser;
-    effective_base_q = FCLAMP(effective_base_q, 1.0f, 4096.0f);
-
-    for (int i = 0; i < size; i++) {
-        // Apply perceptual weight based on coefficient's position in DWT layout
-        float weight = get_perceptual_weight_for_position(enc, i, width, height, decomp_levels, is_chroma);
-        float effective_q = effective_base_q * weight;
-
-        // Step 1: Quantise - divide by quantiser to get normalised value
-        float quantised_val = coeffs[i] / effective_q;
-
-        // Step 2: Apply dead-zone quantisation to normalised value
-        if (enc->dead_zone_threshold > 0.0f && !is_chroma) {
-            int level = get_subband_level(i, width, height, decomp_levels);
-            int subband_type = get_subband_type(i, width, height, decomp_levels);
-            float level_threshold = 0.0f;
-
-            if (level == 1) {
-                // Finest level (level 1: 280x224)
-                if (subband_type == 3) {
-                    // HH1: full dead-zone
-                    level_threshold = enc->dead_zone_threshold * DEAD_ZONE_FINEST_SCALE;
-                } else if (subband_type == 1 || subband_type == 2) {
-                    // LH1, HL1: half dead-zone
-                    level_threshold = enc->dead_zone_threshold * DEAD_ZONE_FINE_SCALE;
-                }
-            } else if (level == 2) {
-                // Second-finest level (level 2: 140x112)
-                if (subband_type == 3) {
-                    // HH2: half dead-zone
-                    level_threshold = enc->dead_zone_threshold * DEAD_ZONE_FINE_SCALE;
-                }
-                // LH2, HL2: no dead-zone
-            }
-            // Coarser levels (3-6): no dead-zone to preserve structural information
-
-            if (fabsf(quantised_val) <= level_threshold) {
-                quantised_val = 0.0f;
-            }
-        }
-
-        // Step 3: Round to discrete quantisation levels
-        quantised_val = roundf(quantised_val); // file size explodes without rounding
-
-        // FIX: Store normalised values (not denormalised) to avoid int16_t overflow
-        // EZBC bitplane encoding works fine with normalised coefficients
-        // Denormalisation was causing bright pixels to clip at 32767
-        quantised[i] = (int16_t)CLAMP((int)quantised_val, -32768, 32767);
-
-        // Debug: Print LL subband coefficients (9×7 at top-left for 560×448)
-        /*static int debug_once = 1;
-        if (debug_once && i < 63 && width == 560 && !is_chroma) {
-            int x = i % width;
-            int y = i / width;
-            if (x < 9 && y < 7) {
-                fprintf(stderr, "[EZBC-QUANT-DEBUG] LL coeff[%d,%d] (idx=%d): coeff=%.1f, weight=%.3f, effective_q=%.1f, quantised_val=%.1f, stored=%d\n",
-                        x, y, i, coeffs[i], weight, effective_q, quantised_val, quantised[i]);
-                if (i == 62) debug_once = 0;
-            }
-        }*/
-    }
-}

 // Serialise tile data for compression
 static size_t serialise_tile_data(tav_encoder_t *enc, int tile_x, int tile_y,
@@ -6874,9 +6781,9 @@ static size_t serialise_tile_data(tav_encoder_t *enc, int tile_x, int tile_y,
        if (enc->preprocess_mode == PREPROCESS_EZBC) {
            // EZBC mode: Quantise with perceptual weighting but no normalisation (division by quantiser)
 //            fprintf(stderr, "[EZBC-QUANT-INTRA] Using perceptual quantisation without normalisation\n");
-            quantise_dwt_coefficients_perceptual_per_coeff_no_normalisation(enc, (float*)tile_y_data, quantised_y, tile_size, this_frame_qY, enc->temporal_gop_width, enc->temporal_gop_height, enc->decomp_levels, 0, enc->frame_count);
-            quantise_dwt_coefficients_perceptual_per_coeff_no_normalisation(enc, (float*)tile_co_data, quantised_co, tile_size, this_frame_qCo, enc->temporal_gop_width, enc->temporal_gop_height, enc->decomp_levels, 1, enc->frame_count);
-            quantise_dwt_coefficients_perceptual_per_coeff_no_normalisation(enc, (float*)tile_cg_data, quantised_cg, tile_size, this_frame_qCg, enc->temporal_gop_width, enc->temporal_gop_height, enc->decomp_levels, 1, enc->frame_count);
+            quantise_dwt_coefficients_perceptual_per_coeff(enc, (float*)tile_y_data, quantised_y, tile_size, this_frame_qY, enc->temporal_gop_width, enc->temporal_gop_height, enc->decomp_levels, 0, enc->frame_count);
+            quantise_dwt_coefficients_perceptual_per_coeff(enc, (float*)tile_co_data, quantised_co, tile_size, this_frame_qCo, enc->temporal_gop_width, enc->temporal_gop_height, enc->decomp_levels, 1, enc->frame_count);
+            quantise_dwt_coefficients_perceptual_per_coeff(enc, (float*)tile_cg_data, quantised_cg, tile_size, this_frame_qCg, enc->temporal_gop_width, enc->temporal_gop_height, enc->decomp_levels, 1, enc->frame_count);

            // Print max abs for debug
            int max_y = 0, max_co = 0, max_cg = 0;
@@ -8725,7 +8632,7 @@ static int detect_letterbox_pillarbox(tav_encoder_t *enc,
        for (int x = 0; x < width; x += SAMPLE_RATE_HORZ) {
            int idx = y * width + x;

-            // Use pre-converted YCoCg values (optimization: avoid RGB→YCoCg conversion in loop)
+            // Use pre-converted YCoCg values (optimisation: avoid RGB→YCoCg conversion in loop)
            float yval = enc->current_frame_y[idx];
             float co = enc->current_frame_co[idx];
             float cg = enc->current_frame_cg[idx];
@@ -8764,7 +8671,7 @@ static int detect_letterbox_pillarbox(tav_encoder_t *enc,
        for (int x = 0; x < width; x += SAMPLE_RATE_HORZ) {
            int idx = y * width + x;

-            // Use pre-converted YCoCg values (optimization)
+            // Use pre-converted YCoCg values (optimisation)
            float yval = enc->current_frame_y[idx];
             float co = enc->current_frame_co[idx];
             float cg = enc->current_frame_cg[idx];
@@ -8800,7 +8707,7 @@ static int detect_letterbox_pillarbox(tav_encoder_t *enc,
        for (int y = 0; y < height; y += SAMPLE_RATE_VERT) {
            int idx = y * width + x;

-            // Use pre-converted YCoCg values (optimization)
+            // Use pre-converted YCoCg values (optimisation)
            float yval = enc->current_frame_y[idx];
             float co = enc->current_frame_co[idx];
             float cg = enc->current_frame_cg[idx];
@@ -8836,7 +8743,7 @@ static int detect_letterbox_pillarbox(tav_encoder_t *enc,
        for (int y = 0; y < height; y += SAMPLE_RATE_VERT) {
            int idx = y * width + x;

-            // Use pre-converted YCoCg values (optimization)
+            // Use pre-converted YCoCg values (optimisation)
            float yval = enc->current_frame_y[idx];
             float co = enc->current_frame_co[idx];
             float cg = enc->current_frame_cg[idx];
@@ -11147,17 +11054,23 @@ int main(int argc, char *argv[]) {
    // For larger videos, use Haar (better compression, smoother motion matters less)
    if (enc->temporal_motion_coder == -1) {
        int num_pixels = enc->width * enc->height;
-        if (num_pixels >= 500000) {
+        if (
+            num_pixels >= 820000 && enc->quantiser_y <= 29 ||
+            num_pixels >= 500000 && enc->quantiser_y <= 14 ||
+            num_pixels >= 340000 && enc->quantiser_y <= 7 ||
+            num_pixels >= 260000 && enc->quantiser_y <= 3
+                ) {
            enc->temporal_motion_coder = 0;  // Haar
            if (enc->verbose) {
-                printf("Auto-selected Haar temporal wavelet (resolution: %dx%d = %d pixels)\n",
-                       enc->width, enc->height, num_pixels);
+                printf("Auto-selected Haar temporal wavelet (resolution: %dx%d = %d pixels, quantiser_y = %d)\n",
+                       enc->width, enc->height, num_pixels, enc->quantiser_y);
            }
-        } else {
+        }
+        else {
            enc->temporal_motion_coder = 1;  // CDF 5/3
            if (enc->verbose) {
-                printf("Auto-selected CDF 5/3 temporal wavelet (resolution: %dx%d = %d pixels)\n",
-                       enc->width, enc->height, num_pixels);
+                printf("Auto-selected CDF 5/3 temporal wavelet (resolution: %dx%d = %d pixels, quantiser_y = %d)\n",
+                       enc->width, enc->height, num_pixels, enc->quantiser_y);
            }
        }
    }
--- a/video_encoder/tav_avx512.h
+++ b/video_encoder/tav_avx512.h
@@ -1,13 +1,13 @@
 /*
- * TAV AVX-512 Optimizations
+ * TAV AVX-512 Optimisations
 *
- * This file contains AVX-512 optimized versions of performance-critical functions
+ * This file contains AVX-512 optimised versions of performance-critical functions
 * in the TAV encoder. Runtime CPU detection ensures fallback to scalar versions
 * on non-AVX-512 systems.
 *
- * Optimized functions:
+ * Optimised functions:
 * - 1D DWT transforms (5/3, 9/7, Haar, Bior13/7, DD4)
- * - Quantization functions
+ * - Quantisation functions
 * - RGB to YCoCg color conversion
 * - 2D DWT gather/scatter operations
 *
@@ -51,7 +51,7 @@ static inline void tav_simd_init(void) {
 #ifdef __AVX512F__
    if (cpu_has_avx512f()) {
        g_simd_level = SIMD_AVX512F;
-        fprintf(stderr, "[TAV] AVX-512 optimizations enabled\n");
+        fprintf(stderr, "[TAV] AVX-512 optimisations enabled\n");
    } else {
        g_simd_level = SIMD_NONE;
        fprintf(stderr, "[TAV] AVX-512 not available, using scalar fallback\n");
@@ -85,7 +85,7 @@ static inline __m512 _mm512_clamp_ps(__m512 v, __m512 min_val, __m512 max_val) {
 }

 // =============================================================================
-// AVX-512 Optimized 1D DWT Forward Transforms
+// AVX-512 Optimised 1D DWT Forward Transforms
 // =============================================================================

 // 5/3 Reversible Forward DWT with AVX-512
@@ -425,7 +425,7 @@ static inline void dwt_haar_forward_1d_avx512(float *data, int length) {
 }

 // =============================================================================
-// AVX-512 Optimized Quantization Functions
+// AVX-512 Optimised Quantisation Functions
 // =============================================================================

 static inline void quantise_dwt_coefficients_avx512(
@@ -488,7 +488,7 @@ static inline void quantise_dwt_coefficients_avx512(
    }
 }

-// Perceptual quantization with per-coefficient weighting
+// Perceptual quantisation with per-coefficient weighting
 static inline void quantise_dwt_coefficients_perceptual_avx512(
    float *coeffs, int16_t *quantised, int size,
    float *weights,  // Pre-computed per-coefficient weights
@@ -534,10 +534,10 @@ static inline void quantise_dwt_coefficients_perceptual_avx512(
 }

 // =============================================================================
-// AVX-512 Optimized Dequantization Functions
+// AVX-512 Optimised Dequantisation Functions
 // =============================================================================

-// Basic dequantization: quantised[i] * effective_q
+// Basic dequantisation: quantised[i] * effective_q
 static inline void dequantise_dwt_coefficients_avx512(
    const int16_t *quantised, float *coeffs, int size,
    float effective_q
@@ -555,7 +555,7 @@ static inline void dequantise_dwt_coefficients_avx512(
        // Convert int32 to float
        __m512 quant_f32 = _mm512_cvtepi32_ps(quant_i32);

-        // Multiply by quantizer
+        // Multiply by quantiser
        __m512 dequant = _mm512_mul_ps(quant_f32, q_vec);

        _mm512_storeu_ps(&coeffs[i], dequant);
@@ -567,7 +567,7 @@ static inline void dequantise_dwt_coefficients_avx512(
    }
 }

-// Perceptual dequantization with per-coefficient weights
+// Perceptual dequantisation with per-coefficient weights
 static inline void dequantise_dwt_coefficients_perceptual_avx512(
    const int16_t *quantised, float *coeffs, int size,
    const float *weights, float base_quantiser
@@ -603,7 +603,7 @@ static inline void dequantise_dwt_coefficients_perceptual_avx512(
 }

 // =============================================================================
-// AVX-512 Optimized RGB to YCoCg Conversion
+// AVX-512 Optimised RGB to YCoCg Conversion
 // =============================================================================

 static inline void rgb_to_ycocg_avx512(const uint8_t *rgb, float *y, float *co, float *cg, int width, int height) {
@@ -658,10 +658,10 @@ static inline void rgb_to_ycocg_avx512(const uint8_t *rgb, float *y, float *co,
 }

 // =============================================================================
-// AVX-512 Optimized 2D DWT with Gather/Scatter
+// AVX-512 Optimised 2D DWT with Gather/Scatter
 // =============================================================================

-// Optimized column extraction using gather
+// Optimised column extraction using gather
 static inline void dwt_2d_extract_column_avx512(
    const float *tile_data, float *column,
    int x, int width, int height
@@ -688,7 +688,7 @@ static inline void dwt_2d_extract_column_avx512(
    }
 }

-// Optimized column insertion using scatter
+// Optimised column insertion using scatter
 static inline void dwt_2d_insert_column_avx512(
    float *tile_data, const float *column,
    int x, int width, int height