diff --git a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt index a5137fb..b70b9a9 100644 --- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt +++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt @@ -5874,7 +5874,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { val currentCo = FloatArray(coeffCount) val currentCg = FloatArray(coeffCount) - // Delta-specific perceptual reconstruction using motion-optimized coefficients + // Delta-specific perceptual reconstruction using motion-optimised coefficients // Estimate quality level from quantisation parameters for perceptual weighting val estimatedQualityY = when { qY <= 6 -> 4 // High quality diff --git a/video_encoder/decoder_tad.c b/video_encoder/decoder_tad.c index 471cf79..983d9f1 100644 --- a/video_encoder/decoder_tad.c +++ b/video_encoder/decoder_tad.c @@ -1065,12 +1065,6 @@ int main(int argc, char *argv[]) { size_t name_len = ext - basename_start; strncpy(output_file + dir_len, basename_start, name_len); output_file[dir_len + name_len] = '\0'; - - // Replace last dot with underscore (for .qNN pattern) - /*char *last_dot = strrchr(output_file, '.'); - if (last_dot && last_dot > output_file + dir_len) { - *last_dot = '_'; - }*/ } else { // No .tad extension, copy entire basename strcpy(output_file + dir_len, basename_start); diff --git a/video_encoder/decoder_tav.c b/video_encoder/decoder_tav.c index 0be6689..f0f86de 100644 --- a/video_encoder/decoder_tav.c +++ b/video_encoder/decoder_tav.c @@ -15,7 +15,7 @@ #include #include #include "decoder_tad.h" // Shared TAD decoder library -#include "tav_avx512.h" // AVX-512 SIMD optimizations +#include "tav_avx512.h" // AVX-512 SIMD optimisations #define DECODER_VENDOR_STRING "Decoder-TAV 20251124 (avx512)" @@ -315,7 +315,7 @@ static void dequantise_dwt_subbands_perceptual(int q_index, int q_y_global, cons // for bright pixels, creating dark DWT-pattern blemishes #ifdef __AVX512F__ - // Use AVX-512 optimized dequantization if available (1.1x speedup against -Ofast) + // Use AVX-512 optimised dequantization if available (1.1x speedup against -Ofast) // Check: subband has >=16 elements AND won't exceed buffer bounds const int subband_end = subband->coeff_start + subband->coeff_count; if (g_simd_level >= SIMD_AVX512F && subband->coeff_count >= 16 && subband_end <= coeff_count) { @@ -2713,7 +2713,7 @@ static void print_usage(const char *prog) { printf("Usage: %s -i input.tav -o output.mkv\n\n", prog); printf("Options:\n"); printf(" -i Input TAV file\n"); - printf(" -o Output MKV file (FFV1 video + PCMu8 audio)\n"); + printf(" -o Output MKV file (optional, auto-generated from input)\n"); printf(" -v Verbose output\n"); printf(" -h, --help Show this help\n\n"); printf("Supported features (matches TSVM decoder):\n"); @@ -2767,12 +2767,46 @@ int main(int argc, char *argv[]) { } } - if (!input_file || !output_file) { - fprintf(stderr, "Error: Both input and output files are required\n\n"); + if (!input_file) { + fprintf(stderr, "Error: Input file is required\n\n"); print_usage(argv[0]); return 1; } + // Generate output filename if not provided + if (!output_file) { + size_t input_len = strlen(input_file); + output_file = malloc(input_len + 32); // Extra space for extension + + // Find the last directory separator + const char *basename_start = strrchr(input_file, '/'); + if (!basename_start) basename_start = strrchr(input_file, '\\'); + basename_start = basename_start ? basename_start + 1 : input_file; + + // Copy directory part + size_t dir_len = basename_start - input_file; + strncpy(output_file, input_file, dir_len); + + // Find the .tad extension + const char *ext = strrchr(basename_start, '.'); + if (ext && (strcmp(ext, ".tav") == 0 || strcmp(ext, ".mv3") == 0)) { + // Copy basename without .tav or .mv3 + size_t name_len = ext - basename_start; + strncpy(output_file + dir_len, basename_start, name_len); + output_file[dir_len + name_len] = '\0'; + } else { + // No .tad extension, copy entire basename + strcpy(output_file + dir_len, basename_start); + } + + // Append appropriate extension + strcat(output_file, ".mkv"); + + if (verbose) { + printf("Auto-generated output path: %s\n", output_file); + } + } + // Create temporary audio file path char temp_audio_file[256]; snprintf(temp_audio_file, sizeof(temp_audio_file), "/tmp/tav_audio_%d.wav", getpid()); diff --git a/video_encoder/encoder_tav.c b/video_encoder/encoder_tav.c index e0c49b0..59ea2f6 100644 --- a/video_encoder/encoder_tav.c +++ b/video_encoder/encoder_tav.c @@ -17,7 +17,7 @@ #include #include #include -#include "tav_avx512.h" // AVX-512 SIMD optimizations +#include "tav_avx512.h" // AVX-512 SIMD optimisations #define ENCODER_VENDOR_STRING "Encoder-TAV 20251124 (3d-dwt,tad,ssf-tc,cdf53-motion,avx512)" @@ -2383,10 +2383,6 @@ static void quantise_dwt_coefficients_perceptual_per_coeff(tav_encoder_t *enc, float *coeffs, int16_t *quantised, int size, int base_quantiser, int width, int height, int decomp_levels, int is_chroma, int frame_count); -static void quantise_dwt_coefficients_perceptual_per_coeff_no_normalisation(tav_encoder_t *enc, - float *coeffs, int16_t *quantised, int size, - int base_quantiser, int width, int height, - int decomp_levels, int is_chroma, int frame_count); static size_t preprocess_coefficients_variable_layout(preprocess_mode_t preprocess_mode, int width, int height, int16_t *coeffs_y, int16_t *coeffs_co, int16_t *coeffs_cg, int16_t *coeffs_alpha, int coeff_count, int channel_layout, uint8_t *output_buffer); @@ -2424,7 +2420,7 @@ static void show_usage(const char *program_name) { printf(" --enable-delta Enable delta encoding\n"); printf(" --delta-haar N Apply N-level Haar DWT to delta coefficients (1-6, auto-enables delta)\n"); printf(" --3d-dwt Enable temporal 3D DWT (GOP-based encoding with temporal transform; the default encoding mode)\n"); - printf(" --motion-coder N Temporal wavelet: 0=Haar, 1=CDF 5/3 (default: auto-select based on resolution; use 0 for older version compatibility)\n"); + printf(" --motion-coder N Temporal wavelet: 0=Haar, 1=CDF 5/3 (default: auto-select based on resolution; use 0 for older version compatibility; use 1 for smoother motion)\n"); printf(" --single-pass Disable two-pass encoding with wavelet-based scene change detection (optimal GOP boundaries)\n"); // printf(" --mc-ezbc Enable MC-EZBC block-based motion compensation (requires --temporal-dwt, implies --ezbc)\n"); printf(" --ezbc Enable EZBC (Embedded Zero Block Coding) entropy coding. May help reducing file size on high-quality videos\n"); @@ -3388,37 +3384,18 @@ static void quantise_3d_dwt_coefficients(tav_encoder_t *enc, // Q_effective = tH_base × spatial_weight // Where spatial_weight depends on spatial frequency (LL, LH, HL, HH subbands) // This reuses all existing perceptual weighting and dead-zone logic - // - // CRITICAL: Use no_normalisation variant when EZBC is enabled - // - EZBC mode: coefficients must be denormalised (quantise + multiply back) - // - Twobit-map/raw mode: coefficients stay normalised (quantise only) - if (enc->preprocess_mode == PREPROCESS_EZBC) { - quantise_dwt_coefficients_perceptual_per_coeff_no_normalisation( - enc, - gop_coeffs[t], // Input: spatial coefficients for this temporal subband - quantised[t], // Output: quantised spatial coefficients (denormalised for EZBC) - spatial_size, // Number of spatial coefficients - temporal_base_quantiser, // Temporally-scaled base quantiser (tH_base) - enc->width, // Frame width - enc->height, // Frame height - enc->decomp_levels, // Spatial decomposition levels (typically 6) - is_chroma, // Is chroma channel (gets additional quantisation) - enc->frame_count + t // Frame number (for any frame-dependent logic) - ); - } else { - quantise_dwt_coefficients_perceptual_per_coeff( - enc, - gop_coeffs[t], // Input: spatial coefficients for this temporal subband - quantised[t], // Output: quantised spatial coefficients (normalised for twobit-map) - spatial_size, // Number of spatial coefficients - temporal_base_quantiser, // Temporally-scaled base quantiser (tH_base) - enc->width, // Frame width - enc->height, // Frame height - enc->decomp_levels, // Spatial decomposition levels (typically 6) - is_chroma, // Is chroma channel (gets additional quantisation) - enc->frame_count + t // Frame number (for any frame-dependent logic) - ); - } + quantise_dwt_coefficients_perceptual_per_coeff( + enc, + gop_coeffs[t], // Input: spatial coefficients for this temporal subband + quantised[t], // Output: quantised spatial coefficients (normalised for twobit-map) + spatial_size, // Number of spatial coefficients + temporal_base_quantiser, // Temporally-scaled base quantiser (tH_base) + enc->width, // Frame width + enc->height, // Frame height + enc->decomp_levels, // Spatial decomposition levels (typically 6) + is_chroma, // Is chroma channel (gets additional quantisation) + enc->frame_count + t // Frame number (for any frame-dependent logic) + ); if (enc->verbose && (t == 0 || t == num_frames - 1)) { printf(" Temporal subband %d: level=%d, tH_base=%d\n", @@ -4092,13 +4069,13 @@ static size_t encode_pframe_residual(tav_encoder_t *enc, int qY) { // EZBC mode: Quantise with perceptual weighting but no normalisation (division by quantiser) // EZBC will compress by encoding only significant bitplanes // fprintf(stderr, "[EZBC-QUANT-PFRAME] Using perceptual quantisation without normalisation\n"); - quantise_dwt_coefficients_perceptual_per_coeff_no_normalisation(enc, residual_y_dwt, quantised_y, frame_size, + quantise_dwt_coefficients_perceptual_per_coeff(enc, residual_y_dwt, quantised_y, frame_size, qY, enc->width, enc->height, enc->decomp_levels, 0, 0); - quantise_dwt_coefficients_perceptual_per_coeff_no_normalisation(enc, residual_co_dwt, quantised_co, frame_size, + quantise_dwt_coefficients_perceptual_per_coeff(enc, residual_co_dwt, quantised_co, frame_size, enc->quantiser_co, enc->width, enc->height, enc->decomp_levels, 1, 0); - quantise_dwt_coefficients_perceptual_per_coeff_no_normalisation(enc, residual_cg_dwt, quantised_cg, frame_size, + quantise_dwt_coefficients_perceptual_per_coeff(enc, residual_cg_dwt, quantised_cg, frame_size, enc->quantiser_cg, enc->width, enc->height, enc->decomp_levels, 1, 0); @@ -6431,7 +6408,7 @@ static void quantise_dwt_coefficients(float *coeffs, int16_t *quantised, int siz effective_q = FCLAMP(effective_q, 1.0f, 4096.0f); #ifdef __AVX512F__ - // Use AVX-512 optimized version if available (2x speedup against -Ofast) + // Use AVX-512 optimised version if available (2x speedup against -Ofast) if (g_simd_level >= SIMD_AVX512F) { quantise_dwt_coefficients_avx512(coeffs, quantised, size, effective_q, dead_zone_threshold, width, height, decomp_levels, is_chroma, @@ -6739,76 +6716,6 @@ static void quantise_dwt_coefficients_perceptual_per_coeff(tav_encoder_t *enc, } } -// Quantisation for EZBC mode: quantises to discrete levels but doesn't normalise (shrink) values -// This reduces coefficient precision while preserving magnitude for EZBC's bitplane encoding -static void quantise_dwt_coefficients_perceptual_per_coeff_no_normalisation(tav_encoder_t *enc, - float *coeffs, int16_t *quantised, int size, - int base_quantiser, int width, int height, - int decomp_levels, int is_chroma, int frame_count) { - (void)frame_count; // Unused parameter - - float effective_base_q = base_quantiser; - effective_base_q = FCLAMP(effective_base_q, 1.0f, 4096.0f); - - for (int i = 0; i < size; i++) { - // Apply perceptual weight based on coefficient's position in DWT layout - float weight = get_perceptual_weight_for_position(enc, i, width, height, decomp_levels, is_chroma); - float effective_q = effective_base_q * weight; - - // Step 1: Quantise - divide by quantiser to get normalised value - float quantised_val = coeffs[i] / effective_q; - - // Step 2: Apply dead-zone quantisation to normalised value - if (enc->dead_zone_threshold > 0.0f && !is_chroma) { - int level = get_subband_level(i, width, height, decomp_levels); - int subband_type = get_subband_type(i, width, height, decomp_levels); - float level_threshold = 0.0f; - - if (level == 1) { - // Finest level (level 1: 280x224) - if (subband_type == 3) { - // HH1: full dead-zone - level_threshold = enc->dead_zone_threshold * DEAD_ZONE_FINEST_SCALE; - } else if (subband_type == 1 || subband_type == 2) { - // LH1, HL1: half dead-zone - level_threshold = enc->dead_zone_threshold * DEAD_ZONE_FINE_SCALE; - } - } else if (level == 2) { - // Second-finest level (level 2: 140x112) - if (subband_type == 3) { - // HH2: half dead-zone - level_threshold = enc->dead_zone_threshold * DEAD_ZONE_FINE_SCALE; - } - // LH2, HL2: no dead-zone - } - // Coarser levels (3-6): no dead-zone to preserve structural information - - if (fabsf(quantised_val) <= level_threshold) { - quantised_val = 0.0f; - } - } - - // Step 3: Round to discrete quantisation levels - quantised_val = roundf(quantised_val); // file size explodes without rounding - - // FIX: Store normalised values (not denormalised) to avoid int16_t overflow - // EZBC bitplane encoding works fine with normalised coefficients - // Denormalisation was causing bright pixels to clip at 32767 - quantised[i] = (int16_t)CLAMP((int)quantised_val, -32768, 32767); - - // Debug: Print LL subband coefficients (9×7 at top-left for 560×448) - /*static int debug_once = 1; - if (debug_once && i < 63 && width == 560 && !is_chroma) { - int x = i % width; - int y = i / width; - if (x < 9 && y < 7) { - fprintf(stderr, "[EZBC-QUANT-DEBUG] LL coeff[%d,%d] (idx=%d): coeff=%.1f, weight=%.3f, effective_q=%.1f, quantised_val=%.1f, stored=%d\n", - x, y, i, coeffs[i], weight, effective_q, quantised_val, quantised[i]); - if (i == 62) debug_once = 0; - } - }*/ - } -} // Serialise tile data for compression static size_t serialise_tile_data(tav_encoder_t *enc, int tile_x, int tile_y, @@ -6874,9 +6781,9 @@ static size_t serialise_tile_data(tav_encoder_t *enc, int tile_x, int tile_y, if (enc->preprocess_mode == PREPROCESS_EZBC) { // EZBC mode: Quantise with perceptual weighting but no normalisation (division by quantiser) // fprintf(stderr, "[EZBC-QUANT-INTRA] Using perceptual quantisation without normalisation\n"); - quantise_dwt_coefficients_perceptual_per_coeff_no_normalisation(enc, (float*)tile_y_data, quantised_y, tile_size, this_frame_qY, enc->temporal_gop_width, enc->temporal_gop_height, enc->decomp_levels, 0, enc->frame_count); - quantise_dwt_coefficients_perceptual_per_coeff_no_normalisation(enc, (float*)tile_co_data, quantised_co, tile_size, this_frame_qCo, enc->temporal_gop_width, enc->temporal_gop_height, enc->decomp_levels, 1, enc->frame_count); - quantise_dwt_coefficients_perceptual_per_coeff_no_normalisation(enc, (float*)tile_cg_data, quantised_cg, tile_size, this_frame_qCg, enc->temporal_gop_width, enc->temporal_gop_height, enc->decomp_levels, 1, enc->frame_count); + quantise_dwt_coefficients_perceptual_per_coeff(enc, (float*)tile_y_data, quantised_y, tile_size, this_frame_qY, enc->temporal_gop_width, enc->temporal_gop_height, enc->decomp_levels, 0, enc->frame_count); + quantise_dwt_coefficients_perceptual_per_coeff(enc, (float*)tile_co_data, quantised_co, tile_size, this_frame_qCo, enc->temporal_gop_width, enc->temporal_gop_height, enc->decomp_levels, 1, enc->frame_count); + quantise_dwt_coefficients_perceptual_per_coeff(enc, (float*)tile_cg_data, quantised_cg, tile_size, this_frame_qCg, enc->temporal_gop_width, enc->temporal_gop_height, enc->decomp_levels, 1, enc->frame_count); // Print max abs for debug int max_y = 0, max_co = 0, max_cg = 0; @@ -8725,7 +8632,7 @@ static int detect_letterbox_pillarbox(tav_encoder_t *enc, for (int x = 0; x < width; x += SAMPLE_RATE_HORZ) { int idx = y * width + x; - // Use pre-converted YCoCg values (optimization: avoid RGB→YCoCg conversion in loop) + // Use pre-converted YCoCg values (optimisation: avoid RGB→YCoCg conversion in loop) float yval = enc->current_frame_y[idx]; float co = enc->current_frame_co[idx]; float cg = enc->current_frame_cg[idx]; @@ -8764,7 +8671,7 @@ static int detect_letterbox_pillarbox(tav_encoder_t *enc, for (int x = 0; x < width; x += SAMPLE_RATE_HORZ) { int idx = y * width + x; - // Use pre-converted YCoCg values (optimization) + // Use pre-converted YCoCg values (optimisation) float yval = enc->current_frame_y[idx]; float co = enc->current_frame_co[idx]; float cg = enc->current_frame_cg[idx]; @@ -8800,7 +8707,7 @@ static int detect_letterbox_pillarbox(tav_encoder_t *enc, for (int y = 0; y < height; y += SAMPLE_RATE_VERT) { int idx = y * width + x; - // Use pre-converted YCoCg values (optimization) + // Use pre-converted YCoCg values (optimisation) float yval = enc->current_frame_y[idx]; float co = enc->current_frame_co[idx]; float cg = enc->current_frame_cg[idx]; @@ -8836,7 +8743,7 @@ static int detect_letterbox_pillarbox(tav_encoder_t *enc, for (int y = 0; y < height; y += SAMPLE_RATE_VERT) { int idx = y * width + x; - // Use pre-converted YCoCg values (optimization) + // Use pre-converted YCoCg values (optimisation) float yval = enc->current_frame_y[idx]; float co = enc->current_frame_co[idx]; float cg = enc->current_frame_cg[idx]; @@ -11147,17 +11054,23 @@ int main(int argc, char *argv[]) { // For larger videos, use Haar (better compression, smoother motion matters less) if (enc->temporal_motion_coder == -1) { int num_pixels = enc->width * enc->height; - if (num_pixels >= 500000) { + if ( + num_pixels >= 820000 && enc->quantiser_y <= 29 || + num_pixels >= 500000 && enc->quantiser_y <= 14 || + num_pixels >= 340000 && enc->quantiser_y <= 7 || + num_pixels >= 260000 && enc->quantiser_y <= 3 + ) { enc->temporal_motion_coder = 0; // Haar if (enc->verbose) { - printf("Auto-selected Haar temporal wavelet (resolution: %dx%d = %d pixels)\n", - enc->width, enc->height, num_pixels); + printf("Auto-selected Haar temporal wavelet (resolution: %dx%d = %d pixels, quantiser_y = %d)\n", + enc->width, enc->height, num_pixels, enc->quantiser_y); } - } else { + } + else { enc->temporal_motion_coder = 1; // CDF 5/3 if (enc->verbose) { - printf("Auto-selected CDF 5/3 temporal wavelet (resolution: %dx%d = %d pixels)\n", - enc->width, enc->height, num_pixels); + printf("Auto-selected CDF 5/3 temporal wavelet (resolution: %dx%d = %d pixels, quantiser_y = %d)\n", + enc->width, enc->height, num_pixels, enc->quantiser_y); } } } diff --git a/video_encoder/tav_avx512.h b/video_encoder/tav_avx512.h index 614c3e8..6a66157 100644 --- a/video_encoder/tav_avx512.h +++ b/video_encoder/tav_avx512.h @@ -1,13 +1,13 @@ /* - * TAV AVX-512 Optimizations + * TAV AVX-512 Optimisations * - * This file contains AVX-512 optimized versions of performance-critical functions + * This file contains AVX-512 optimised versions of performance-critical functions * in the TAV encoder. Runtime CPU detection ensures fallback to scalar versions * on non-AVX-512 systems. * - * Optimized functions: + * Optimised functions: * - 1D DWT transforms (5/3, 9/7, Haar, Bior13/7, DD4) - * - Quantization functions + * - Quantisation functions * - RGB to YCoCg color conversion * - 2D DWT gather/scatter operations * @@ -51,7 +51,7 @@ static inline void tav_simd_init(void) { #ifdef __AVX512F__ if (cpu_has_avx512f()) { g_simd_level = SIMD_AVX512F; - fprintf(stderr, "[TAV] AVX-512 optimizations enabled\n"); + fprintf(stderr, "[TAV] AVX-512 optimisations enabled\n"); } else { g_simd_level = SIMD_NONE; fprintf(stderr, "[TAV] AVX-512 not available, using scalar fallback\n"); @@ -85,7 +85,7 @@ static inline __m512 _mm512_clamp_ps(__m512 v, __m512 min_val, __m512 max_val) { } // ============================================================================= -// AVX-512 Optimized 1D DWT Forward Transforms +// AVX-512 Optimised 1D DWT Forward Transforms // ============================================================================= // 5/3 Reversible Forward DWT with AVX-512 @@ -425,7 +425,7 @@ static inline void dwt_haar_forward_1d_avx512(float *data, int length) { } // ============================================================================= -// AVX-512 Optimized Quantization Functions +// AVX-512 Optimised Quantisation Functions // ============================================================================= static inline void quantise_dwt_coefficients_avx512( @@ -488,7 +488,7 @@ static inline void quantise_dwt_coefficients_avx512( } } -// Perceptual quantization with per-coefficient weighting +// Perceptual quantisation with per-coefficient weighting static inline void quantise_dwt_coefficients_perceptual_avx512( float *coeffs, int16_t *quantised, int size, float *weights, // Pre-computed per-coefficient weights @@ -534,10 +534,10 @@ static inline void quantise_dwt_coefficients_perceptual_avx512( } // ============================================================================= -// AVX-512 Optimized Dequantization Functions +// AVX-512 Optimised Dequantisation Functions // ============================================================================= -// Basic dequantization: quantised[i] * effective_q +// Basic dequantisation: quantised[i] * effective_q static inline void dequantise_dwt_coefficients_avx512( const int16_t *quantised, float *coeffs, int size, float effective_q @@ -555,7 +555,7 @@ static inline void dequantise_dwt_coefficients_avx512( // Convert int32 to float __m512 quant_f32 = _mm512_cvtepi32_ps(quant_i32); - // Multiply by quantizer + // Multiply by quantiser __m512 dequant = _mm512_mul_ps(quant_f32, q_vec); _mm512_storeu_ps(&coeffs[i], dequant); @@ -567,7 +567,7 @@ static inline void dequantise_dwt_coefficients_avx512( } } -// Perceptual dequantization with per-coefficient weights +// Perceptual dequantisation with per-coefficient weights static inline void dequantise_dwt_coefficients_perceptual_avx512( const int16_t *quantised, float *coeffs, int size, const float *weights, float base_quantiser @@ -603,7 +603,7 @@ static inline void dequantise_dwt_coefficients_perceptual_avx512( } // ============================================================================= -// AVX-512 Optimized RGB to YCoCg Conversion +// AVX-512 Optimised RGB to YCoCg Conversion // ============================================================================= static inline void rgb_to_ycocg_avx512(const uint8_t *rgb, float *y, float *co, float *cg, int width, int height) { @@ -658,10 +658,10 @@ static inline void rgb_to_ycocg_avx512(const uint8_t *rgb, float *y, float *co, } // ============================================================================= -// AVX-512 Optimized 2D DWT with Gather/Scatter +// AVX-512 Optimised 2D DWT with Gather/Scatter // ============================================================================= -// Optimized column extraction using gather +// Optimised column extraction using gather static inline void dwt_2d_extract_column_avx512( const float *tile_data, float *column, int x, int width, int height @@ -688,7 +688,7 @@ static inline void dwt_2d_extract_column_avx512( } } -// Optimized column insertion using scatter +// Optimised column insertion using scatter static inline void dwt_2d_insert_column_avx512( float *tile_data, const float *column, int x, int width, int height