tav: encoder ENDT fix

2026-06-06 05:28:31 +09:00 · 2025-11-29 01:58:54 +09:00
parent 3b401139e9
commit 9edeca929d
6 changed files with 52 additions and 39 deletions
--- a/video_encoder/decoder_tav.c
+++ b/video_encoder/decoder_tav.c
@@ -17,7 +17,7 @@
 #include "decoder_tad.h"  // Shared TAD decoder library
 #include "tav_avx512.h"  // AVX-512 SIMD optimisations

-#define DECODER_VENDOR_STRING "Decoder-TAV 20251124 (avx512,presets)"
+#define DECODER_VENDOR_STRING "Decoder-TAV 20251126 (presets)"

 // TAV format constants
 #define TAV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x56"
@@ -315,7 +315,7 @@ static void dequantise_dwt_subbands_perceptual(int q_index, int q_y_global, cons
        //                   Previous denormalization in EZBC caused int16_t overflow (clipping at 32767)
        //                   for bright pixels, creating dark DWT-pattern blemishes

-#ifdef __AVX512F__
+/*#ifdef __AVX512F__
        // Use AVX-512 optimised dequantization if available (1.1x speedup against -Ofast)
        // Check: subband has >=16 elements AND won't exceed buffer bounds
        const int subband_end = subband->coeff_start + subband->coeff_count;
@@ -327,7 +327,7 @@ static void dequantise_dwt_subbands_perceptual(int q_index, int q_y_global, cons
                effective_quantiser
            );
        } else {
-#endif
+#endif*/
            // Scalar fallback or small subbands
            for (int i = 0; i < subband->coeff_count; i++) {
                const int idx = subband->coeff_start + i;
@@ -336,9 +336,9 @@ static void dequantise_dwt_subbands_perceptual(int q_index, int q_y_global, cons
                    dequantised[idx] = untruncated;
                }
            }
-#ifdef __AVX512F__
+/*#ifdef __AVX512F__
        }
-#endif
+#endif*/
    }

    // Debug: Verify LL band was dequantised correctly
--- a/video_encoder/encoder_tav.c
+++ b/video_encoder/encoder_tav.c
@@ -19,7 +19,7 @@
 #include <float.h>
 #include "tav_avx512.h"  // AVX-512 SIMD optimisations

-#define ENCODER_VENDOR_STRING "Encoder-TAV 20251124 (3d-dwt,tad,ssf-tc,cdf53-motion,avx512,presets)"
+#define ENCODER_VENDOR_STRING "Encoder-TAV 20251128 (3d-dwt,tad,ssf-tc,cdf53-motion,avx512,presets)"

 // TSVM Advanced Video (TAV) format constants
 #define TAV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x56"  // "\x1FTSVM TAV"
@@ -12158,13 +12158,13 @@ int main(int argc, char *argv[]) {
            printf("Updated header with actual frame count: %d\n", frame_count);
        }

-        // Update ENDT in extended header (calculate end time for last frame)
+        // Update ENDT in extended header (calculate end time of video)
        uint64_t endt_ns;
        if (enc->is_ntsc_framerate) {
            // NTSC framerates use denominator 1001 (e.g., 24000/1001, 30000/1001, 60000/1001)
-            endt_ns = ((uint64_t)(frame_count - 1) * 1001ULL * 1000000000ULL) / ((uint64_t)enc->output_fps * 1000ULL);
+            endt_ns = ((uint64_t)frame_count * 1001ULL * 1000000000ULL) / ((uint64_t)enc->output_fps * 1000ULL);
        } else {
-            endt_ns = ((uint64_t)(frame_count - 1) * 1000000000ULL) / (uint64_t)enc->output_fps;
+            endt_ns = ((uint64_t)frame_count * 1000000000ULL) / (uint64_t)enc->output_fps;
        }
        fseek(enc->output_fp, enc->extended_header_offset, SEEK_SET);
        fwrite(&endt_ns, sizeof(uint64_t), 1, enc->output_fp);
--- a/video_encoder/encoder_tav_text.c
+++ b/video_encoder/encoder_tav_text.c
@@ -24,8 +24,8 @@ Audio: MP2 encoding at 96 kbps, 32 KHz stereo (packet 0x20)
 Each text frame is treated as an I-frame with sync packet

 Usage:
-  gcc -O3 -std=c11 -Wall encoder_tav_text.c -o encoder_tav_text -lm -lzstd
-  ./encoder_tav_text -i video.mp4 -f font.chr -o output.vtx
+  gcc -Ofast -std=c11 -Wall encoder_tav_text.c -o encoder_tav_text -lm -lzstd
+  ./encoder_tav_text -i video.mp4 -f font.chr -o output.mv3
 */

 #define _POSIX_C_SOURCE 200809L
@@ -93,6 +93,9 @@ static void generate_random_filename(char *filename) {

 char TEMP_AUDIO_FILE[42];

+// Global flag to disable inverted character matching
+int g_no_invert_char = 0;
+
 typedef struct {
    uint8_t *data;     // Binary glyph data (PATCH_SZ bytes per glyph)
    int count;         // Number of glyphs
@@ -193,7 +196,7 @@ FontROM *load_font_rom(const char *path) {

 // Find best matching glyph for a grayscale patch
 int find_best_glyph(const uint8_t *patch, const FontROM *rom, uint8_t *out_bg, uint8_t *out_fg) {
-    // Try both normal and inverted matching
+    // Try both normal and inverted matching (unless --no-invert-char is set)
    int best_glyph = 0;
    float best_error = INFINITY;
    uint8_t best_bg = COLOR_BLACK, best_fg = COLOR_WHITE;
@@ -209,25 +212,28 @@ int find_best_glyph(const uint8_t *patch, const FontROM *rom, uint8_t *out_bg, u
            err_normal += diff * diff;
        }

-        // Try inverted: glyph 0 = fg, glyph 1 = bg
-        float err_inverted = 0;
-        for (int i = 0; i < PATCH_SZ; i++) {
-            int expected = glyph[i] ? 0 : 255;
-            int diff = patch[i] - expected;
-            err_inverted += diff * diff;
-        }
-
        if (err_normal < best_error) {
            best_error = err_normal;
            best_glyph = g;
            best_bg = COLOR_BLACK;
            best_fg = COLOR_WHITE;
        }
-        if (err_inverted < best_error) {
-            best_error = err_inverted;
-            best_glyph = g;
-            best_bg = COLOR_WHITE;
-            best_fg = COLOR_BLACK;
+
+        // Try inverted: glyph 0 = fg, glyph 1 = bg (skip if --no-invert-char)
+        if (!g_no_invert_char) {
+            float err_inverted = 0;
+            for (int i = 0; i < PATCH_SZ; i++) {
+                int expected = glyph[i] ? 0 : 255;
+                int diff = patch[i] - expected;
+                err_inverted += diff * diff;
+            }
+
+            if (err_inverted < best_error) {
+                best_error = err_inverted;
+                best_glyph = g;
+                best_bg = COLOR_WHITE;
+                best_fg = COLOR_BLACK;
+            }
        }
    }

@@ -479,7 +485,7 @@ void write_text_packet(FILE *f, const uint8_t *bg_col, const uint8_t *fg_col,

 int main(int argc, char **argv) {
    if (argc < 7) {
-        fprintf(stderr, "Usage: %s -i <video> -f <font.chr> -o <output.tav>\n", argv[0]);
+        fprintf(stderr, "Usage: %s -i <video> -f <font.chr> -o <output.tav> [--no-invert-char]\n", argv[0]);
        return 1;
    }

@@ -491,6 +497,7 @@ int main(int argc, char **argv) {
        if (strcmp(argv[i], "-i") == 0 && i+1 < argc) input_video = argv[++i];
        else if (strcmp(argv[i], "-f") == 0 && i+1 < argc) font_path = argv[++i];
        else if (strcmp(argv[i], "-o") == 0 && i+1 < argc) output_path = argv[++i];
+        else if (strcmp(argv[i], "--no-invert-char") == 0) g_no_invert_char = 1;
    }

    if (!input_video || !font_path || !output_path) {
@@ -498,6 +505,10 @@ int main(int argc, char **argv) {
        return 1;
    }

+    if (g_no_invert_char) {
+        fprintf(stderr, "Inverted character matching disabled\n");
+    }
+
    // Generate random temp filename for audio
    generate_random_filename(TEMP_AUDIO_FILE);