TAV: videotex mode; TSVM documentation fix

2026-06-06 05:28:31 +09:00 · 2025-11-21 16:53:35 +09:00
parent a61a21d28b
commit 2533b2dc19
7 changed files with 794 additions and 22 deletions
--- a/assets/disk0/tvdos/bin/playtav.js
+++ b/assets/disk0/tvdos/bin/playtav.js
@@ -37,6 +37,7 @@ const TAV_PACKET_AUDIO_ADPCM = 0x23
 const TAV_PACKET_AUDIO_TAD = 0x24
 const TAV_PACKET_SUBTITLE = 0x30       // Legacy SSF (frame-locked)
 const TAV_PACKET_SUBTITLE_TC = 0x31    // SSF-TC (timecode-based)
+const TAV_PACKET_VIDEOTEX = 0x3F       // Videotex (text-mode video)
 const TAV_PACKET_AUDIO_BUNDLED = 0x40  // Entire MP2 audio file in single packet
 const TAV_PACKET_EXTENDED_HDR = 0xEF
 const TAV_PACKET_SCREEN_MASK = 0xF2  // Screen masking (letterbox/pillarbox)
@@ -1614,6 +1615,38 @@ try {
                let packetSize = seqread.readInt()
                parseSubtitlePacketTC(packetSize)
            }
+            else if (packetType === TAV_PACKET_VIDEOTEX) {
+                // Videotex packet (0x3F) - text-mode video
+                let compressedSize = seqread.readInt()
+
+                // Read compressed data
+                let compressedPtr = seqread.readBytes(compressedSize)
+
+                // Decompress with Zstd
+                // Allocate buffer for decompressed data (max: 2 + 80*32*3 = 7682 bytes)
+                let decompressedPtr = sys.malloc(8192)
+                let decompressedSize = gzip.decompFromTo(compressedPtr, compressedSize, decompressedPtr)
+
+                // Read grid dimensions from first 2 bytes
+                let rows = sys.peek(decompressedPtr)
+                let cols = sys.peek(decompressedPtr + 1)
+                let gridSize = rows * cols
+
+                // Calculate array offsets within decompressed data
+                let dataOffset = decompressedPtr + 2
+
+                // Copy arrays directly to graphics adapter memory
+                // Format: [fg-array][bg-array][char-array]
+                // Each array is gridSize bytes (typically 2560 for 80×32)
+                sys.memcpy(dataOffset, -1302529, gridSize * 3)
+
+                // Free buffers
+                sys.free(compressedPtr)
+                sys.free(decompressedPtr)
+
+                // Mark frame as ready
+                iframeReady = true
+            }
            else if (packetType === TAV_PACKET_EXTENDED_HDR) {
                // Extended header packet - metadata key-value pairs
                let numPairs = seqread.readShort()
@@ -2178,6 +2211,7 @@ try {
 }
 catch (e) {
    serial.printerr(`TAV decode error: ${e}`)
+    e.printStackTrace()
    errorlevel = 1
 }
 finally {
--- a/terranmon.txt
+++ b/terranmon.txt
@@ -219,7 +219,9 @@ Memory Space
 12 bytes
    argument for "command" (arg1: Byte, arg2: Byte)
    write to this address FIRST and then write to "command" to execute the command
-1134 bytes
+1008 bytes
+    reserved
+2046 bytes
    unused
 2 bytes
    Cursor position in: (y*80 + x)
@@ -911,8 +913,8 @@ transmission capability, and region-of-interest coding.
            - 6 = ICtCp monoblock perceptual
            - 7 = YCoCg-R multi-tile perceptual
            - 8 = ICtCp multi-tile perceptual
-    uint16 Width: picture width in pixels
-    uint16 Height: picture height in pixels
+    uint16 Width: picture width in pixels. Columns count for Videotex-only file.
+    uint16 Height: picture height in pixels. Rows count for Videotex-only file.
    uint8  FPS: frames per second. Use 0x00 for still pictures
    uint32 Total Frames: number of video frames
            - use 0 to denote not-finalised video stream
@@ -923,7 +925,7 @@ transmission capability, and region-of-interest coding.
            - 2 = CDF 13/7 (experimental)
            - 16 = DD-4 (Four-point interpolating Deslauriers-Dubuc; experimental)
            - 255 = Haar (demonstration purpose only)
-    uint8  Decomposition Levels: number of DWT levels (1-6+)
+    uint8  Decomposition Levels: number of DWT levels (1-6+; use 0 if it has no video or Videotex only)
    uint8  Quantiser Index for Y channel (uses exponential numeric system; 0: lossless, 255: potato)
    uint8  Quantiser Index for Co channel (uses exponential numeric system; 0: lossless, 255: potato)
    uint8  Quantiser Index for Cg channel (uses exponential numeric system; 0: lossless, 255: potato)
@@ -938,6 +940,8 @@ transmission capability, and region-of-interest coding.
            - bit 2 = is lossless mode
                (shorthand for `-q 6 -Q0,0,0 -w 0 --intra-only --no-perceptual-tuning --arate 384`)
            - bit 3 = has region-of-interest coding (for still pictures only)
+            - bit 4 = reserved (crop encoding?)
+            - bit 7 = has no video
    uint8  Encoder quality level (stored with bias of 1 (q0=1); used to derive anisotropy value)
    uint8  Channel layout (bit-field: bit 0=has alpha, bit 1=has chroma inverted, bit 2=has luma inverted)
            * Luma-only videos must be decoded with fixed Chroma=0
@@ -954,7 +958,13 @@ transmission capability, and region-of-interest coding.
            - 0 = Twobit-plane significance map (deprecated)
            - 1 = Embedded Zero Block Coding
            - 2 = Raw coefficients (debugging purpose only)
-    uint8  Reserved[2]: fill with zeros
+    uint8  Encoder Preset
+            - Bit 0 = use finer motion (finer temporal quantisation)
+            - Bit 1 = reduce grain synthesis
+            Preset "Default" -> 0x00
+            Preset "Sports" -> 0x01
+            Preset "Anime" -> 0x02
+    uint8  Reserved[1]: fill with zeros
    uint8  Device Orientation
            - 0 = No rotation
            - 1 = Clockwise 90 deg
@@ -992,6 +1002,7 @@ transmission capability, and region-of-interest coding.
    0x31: Subtitle in "Simple" format with timecodes
    0x32: Subtitle in "Karaoke" format
    0x33: Subtitle in "Karaoke" format with timecodes
+    0x3F: Videotex (full-frame text buffer image)
    <synchronised tracks>
    0x40: MP2 audio track (32 KHz)
    0x41: Zstd-compressed 8-bit PCM (32 KHz, audio hardware's native format)
@@ -1128,6 +1139,18 @@ transmission capability, and region-of-interest coding.
    uint32 Compressed Size
    *      Zstd-compressed TAD

+## Videotex Packet Structure
+    uint8  Packet Type (0x3F)
+    uint32 Compressed Size
+    *      Zstd-compressed payload, where:
+        uint8  Rows
+        uint8  Columns
+        *      Foreground colours
+        *      Background colours
+        *      Characters
+
+
+
 ## GOP Unified Packet Structure (0x12)
 Implemented on 2025-10-15 for temporal 3D DWT with unified preprocessing.

--- a/tsvm_core/src/net/torvald/tsvm/VM.kt
+++ b/tsvm_core/src/net/torvald/tsvm/VM.kt
@@ -667,8 +667,8 @@ class VM(
        val fromDev = getDev(from, len, false)
        val toDev = getDev(to, len, true)

-//        println("from = $from, to = $to")
-//        println("fromDev = $fromDev, toDev = $toDev")
+//        System.err.println("[VM.memcpy] from = $from, to = $to")
+//        System.err.println("[VM.memcpy] fromDev = $fromDev, toDev = $toDev")

        if (fromDev != null && toDev != null)
            UnsafeHelper.memcpy(fromDev, toDev, len)
--- a/tsvm_core/src/net/torvald/tsvm/VMJSR223Delegate.kt
+++ b/tsvm_core/src/net/torvald/tsvm/VMJSR223Delegate.kt
@@ -4,6 +4,7 @@ import net.torvald.UnsafeHelper
 import net.torvald.terrarum.modulecomputers.virtualcomputer.tvd.toUint
 import net.torvald.terrarum.modulecomputers.virtualcomputer.tvd.toUlong
 import net.torvald.tsvm.peripheral.*
+import kotlin.math.absoluteValue

 /**
 * Pass the instance of the class to the ScriptEngine's binding, preferably under the namespace of "vm"
@@ -14,14 +15,22 @@ class VMJSR223Delegate(private val vm: VM) {
        (from in start..end && (from + len) in start..end)

    private fun getDev(from: Long, len: Long, isDest: Boolean): Long? {
-        return if (from >= 0) vm.usermem.ptr + from
+//        System.err.print("getDev(from=$from, len=$len, isDest=$isDest) -> ")
+
+        return if (from >= 0) {
+//            System.err.println("USERMEM offset=$from")
+
+            vm.usermem.ptr + from
+        }
        // MMIO area
        else if (from in -1048576..-1 && (from - len) in -1048577..-1) {
-            val fromIndex = (-from-1) / 131072
+            val fromIndex = ((-from-1) / 131072).absoluteValue
            val dev = vm.peripheralTable[fromIndex.toInt()].peripheral ?: return null
            val fromRel = (-from-1) % 131072
            if (fromRel + len > 131072) return null

+//            System.err.println("MMIO dev=${dev.typestring}, fromIndex=$fromIndex, fromRel=$fromRel")
+
            return if (dev is IOSpace) {
                if (relPtrInDev(fromRel, len, 1024, 2047)) dev.peripheralFast.ptr + fromRel - 1024
                else if (relPtrInDev(fromRel, len, 4096, 8191)) (if (isDest) dev.blockTransferTx[0] else dev.blockTransferRx[0]).ptr + fromRel - 4096
@@ -50,6 +59,8 @@ class VMJSR223Delegate(private val vm: VM) {
            val fromRel = (-from-1) % 1048576
            if (fromRel + len > 1048576) return null

+//            System.err.println("MEMORY dev=${dev.typestring}, fromIndex=$fromIndex, fromRel=$fromRel")
+
            return if (dev is AudioAdapter) {
                if (relPtrInDev(fromRel, len, 0, 114687)) dev.sampleBin.ptr + fromRel - 0
                else null
@@ -111,8 +122,8 @@ class VMJSR223Delegate(private val vm: VM) {
        val fromDev = getDev(from, len, false)
        val toDev = getDev(to, len, true)

-//        println("from = $from, to = $to")
-//        println("fromDev = $fromDev, toDev = $toDev")
+//        System.err.println("[sys.memcpy] from = $from, to = $to")
+//        System.err.println("[sys.memcpy] fromDev = $fromDev, toDev = $toDev")

        if (fromDev != null && toDev != null)
            UnsafeHelper.memcpy(fromDev, toDev, len)
--- a/video_encoder/encoder_tav.c
+++ b/video_encoder/encoder_tav.c
@@ -1826,7 +1826,6 @@ typedef struct tav_encoder_s {
    int separate_audio_track; // 1 = write entire MP2 file as packet 0x40 after header, 0 = interleave audio (default)
    int pcm8_audio; // 1 = use 8-bit PCM audio (packet 0x21), 0 = use MP2 (default)
    int tad_audio; // 1 = use TAD audio (packet 0x24), 0 = use MP2/PCM8 (default, quality follows quality_level)
-    int enable_letterbox_detect; // 1 = detect and emit letterbox/pillarbox packets (default), 0 = disable
    int enable_crop_encoding;    // 1 = encode cropped active region only (Phase 2), 0 = encode full frame (default)

    // Active region tracking (for Phase 2 crop encoding)
@@ -2454,7 +2453,6 @@ static tav_encoder_t* create_encoder(void) {
    enc->separate_audio_track = 0;  // Default: interleave audio packets
    enc->pcm8_audio = 0;  // Default: use MP2 audio
    enc->tad_audio = 0;  // Default: use MP2 audio (TAD quality follows quality_level)
-    enc->enable_letterbox_detect = 1;  // Default: enable letterbox/pillarbox detection
    enc->enable_crop_encoding = 0;  // Default: disabled (Phase 2 experimental)

    // Active region tracking (initialized to full frame, updated when crop encoding enabled)
@@ -8736,7 +8734,7 @@ static void normalize_dimension_clusters(uint16_t *values, int count) {
 // Write all screen masking packets before first frame (similar to SSF-TC subtitles)
 // Uses median filtering + clustering to normalize geometry to predominant aspect ratios
 static void write_all_screen_mask_packets(tav_encoder_t *enc, FILE *output) {
-    if (!enc->enable_letterbox_detect || !enc->two_pass_mode) {
+    if (!enc->enable_crop_encoding || !enc->two_pass_mode) {
        return;  // Letterbox detection requires two-pass mode
    }

@@ -10412,7 +10410,7 @@ static int two_pass_first_pass(tav_encoder_t *enc, const char *input_file) {
        }

        // Detect letterbox/pillarbox if enabled
-        if (enc->enable_letterbox_detect) {
+        if (enc->enable_crop_encoding) {
            // Set current_frame_rgb temporarily for detection
            uint8_t *saved_current = enc->current_frame_rgb;
            enc->current_frame_rgb = frame_rgb;
@@ -10666,7 +10664,6 @@ int main(int argc, char *argv[]) {
        {"tad-audio", no_argument, 0, 1028},
        {"raw-coeffs", no_argument, 0, 1029},
        {"single-pass", no_argument, 0, 1050},  // disable two-pass encoding with wavelet-based scene detection
-        {"no-letterbox-detect", no_argument, 0, 1051},  // disable letterbox/pillarbox detection
        {"enable-crop-encoding", no_argument, 0, 1052},  // Phase 2: encode cropped active region only (experimental)
        {"help", no_argument, 0, '?'},
        {0, 0, 0, 0}
@@ -10898,10 +10895,6 @@ int main(int argc, char *argv[]) {
                enc->two_pass_mode = 0;
                printf("Two-pass wavelet-based scene change detection disabled\n");
                break;
-            case 1051: // --no-letterbox-detect
-                enc->enable_letterbox_detect = 0;
-                printf("Letterbox/pillarbox detection disabled\n");
-                break;
            case 1052: // --enable-crop-encoding
                enc->enable_crop_encoding = 1;
                printf("Phase 2 crop encoding enabled (experimental)\n");
@@ -11380,7 +11373,7 @@ int main(int argc, char *argv[]) {
        enc->encoding_width = enc->width;
        enc->encoding_height = enc->height;

-        if (enc->enable_crop_encoding && enc->enable_letterbox_detect && enc->two_pass_mode) {
+        if (enc->enable_crop_encoding && enc->two_pass_mode) {
            // Phase 2: Use GOP-level dimensions for temporal DWT (3D-DWT mode)
            // This ensures all frames in a GOP have the same encoding dimensions
            // IMPORTANT: Always use GOP-level dimensions in temporal DWT mode, even if there's no cropping benefit,
--- a/video_encoder/encoder_tav_text.c
+++ b/video_encoder/encoder_tav_text.c
@@ -0,0 +1,662 @@
+/*
+encoder_tav_text.c
+Text-based video encoder for TSVM using custom font ROMs
+
+Outputs Videotex files with custom header and packet type 0x3F (text mode)
+
+File structure:
+  - Videotex header (32 bytes): magic "\x1FTSVM-VT", version, grid dims, fps, total_frames
+  - Extended header packet (0xEF): BGNT, ENDT, CDAT, VNDR, FMPG
+  - Font ROM packets (0x30): lowrom and highrom (1920 bytes each)
+  - Per-frame sequence: [audio 0x20], [timecode 0xFD], [videotex 0x3F], [sync 0xFF]
+
+Videotex packet structure (0x3F): Zstd([rows][cols][fg-array][bg-array][char-array])
+  - rows: uint8 (32)
+  - cols: uint8 (80)
+  - fg-array: rows*cols bytes (foreground colors, 0xF0=black, 0xFE=white)
+  - bg-array: rows*cols bytes (background colors, 0xF0=black, 0xFE=white)
+  - char-array: rows*cols bytes (glyph indices 0-255)
+
+Total uncompressed size: 2 + (80*32*3) = 7682 bytes
+Separated arrays compress much better (fg/bg are just 0xF0/0xFE runs)
+Video size: 80×32 characters (560×448 pixels with 7×14 font)
+Audio: MP2 encoding at 96 kbps, 32 KHz stereo (packet 0x20)
+Each text frame is treated as an I-frame with sync packet
+
+Usage:
+  gcc -O3 -std=c11 -Wall encoder_tav_text.c -o encoder_tav_text -lm -lzstd
+  ./encoder_tav_text -i video.mp4 -f font.chr -o output.vtx
+*/
+
+#define _POSIX_C_SOURCE 200809L
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <math.h>
+#include <zstd.h>
+#include <unistd.h>
+#include <time.h>
+#include <sys/time.h>
+
+#define ENCODER_VENDOR_STRING "Encoder-TAV-Text 20251121 (videotex)"
+
+#define CHAR_W 7
+#define CHAR_H 14
+#define GRID_W 80
+#define GRID_H 32
+#define PIXEL_W (GRID_W * CHAR_W)  // 560
+#define PIXEL_H (GRID_H * CHAR_H)  // 448
+#define PATCH_SZ (CHAR_W * CHAR_H)
+#define SAMPLE_RATE 32000
+
+// TAV packet types
+#define PACKET_TIMECODE 0xFD
+#define PACKET_SYNC 0xFF
+#define PACKET_AUDIO_MP2 0x20
+#define PACKET_SSF 0x30
+#define PACKET_TEXT 0x3F
+#define PACKET_EXTENDED_HDR 0xEF
+
+// SSF opcodes for font ROM
+#define SSF_OPCODE_LOWROM 0x80
+#define SSF_OPCODE_HIGHROM 0x81
+
+// Font ROM size constants
+#define FONTROM_PADDED_SIZE 1920
+#define GLYPHS_PER_ROM 128
+
+// Color mapping (4-bit RGB to TSVM palette)
+#define COLOR_BLACK 0xF0
+#define COLOR_WHITE 0xFE
+
+typedef struct {
+    uint8_t *data;     // Binary glyph data (PATCH_SZ bytes per glyph)
+    int count;         // Number of glyphs
+} FontROM;
+
+// Get FFmpeg version string
+char *get_ffmpeg_version(void) {
+    FILE *pipe = popen("ffmpeg -version 2>&1 | head -1", "r");
+    if (!pipe) return NULL;
+
+    char *version = malloc(256);
+    if (!version) {
+        pclose(pipe);
+        return NULL;
+    }
+
+    if (fgets(version, 256, pipe)) {
+        // Remove trailing newline
+        size_t len = strlen(version);
+        if (len > 0 && version[len - 1] == '\n') {
+            version[len - 1] = '\0';
+        }
+        pclose(pipe);
+        return version;
+    }
+
+    free(version);
+    pclose(pipe);
+    return NULL;
+}
+
+// Detect video FPS using ffprobe
+float detect_fps(const char *video_path) {
+    char cmd[1024];
+    snprintf(cmd, sizeof(cmd),
+             "ffprobe -v error -select_streams v:0 -show_entries stream=r_frame_rate "
+             "-of default=noprint_wrappers=1:nokey=1 \"%s\" 2>/dev/null",
+             video_path);
+
+    FILE *pipe = popen(cmd, "r");
+    if (!pipe) return 30.0f; // fallback
+
+    char fps_str[64] = {0};
+    if (fgets(fps_str, sizeof(fps_str), pipe)) {
+        // Parse fraction like "30/1" or "24000/1001"
+        int num = 0, den = 1;
+        if (sscanf(fps_str, "%d/%d", &num, &den) == 2 && den > 0) {
+            pclose(pipe);
+            return (float)num / (float)den;
+        }
+    }
+    pclose(pipe);
+    return 30.0f; // fallback
+}
+
+// Load font ROM (14 bytes per glyph, no header)
+FontROM *load_font_rom(const char *path) {
+    FILE *f = fopen(path, "rb");
+    if (!f) return NULL;
+
+    fseek(f, 0, SEEK_END);
+    long size = ftell(f);
+    fseek(f, 0, SEEK_SET);
+
+    if (size % 14 != 0) {
+        fprintf(stderr, "Warning: ROM size not divisible by 14 (got %ld bytes)\n", size);
+    }
+
+    int glyph_count = size / 14;
+    FontROM *rom = malloc(sizeof(FontROM));
+    rom->count = glyph_count;
+    rom->data = malloc(glyph_count * PATCH_SZ);
+
+    // Read and unpack glyphs
+    for (int g = 0; g < glyph_count; g++) {
+        uint8_t row_bytes[14];
+        if (fread(row_bytes, 14, 1, f) != 1) {
+            free(rom->data);
+            free(rom);
+            fclose(f);
+            return NULL;
+        }
+
+        // Unpack bits to binary pixels
+        for (int row = 0; row < CHAR_H; row++) {
+            for (int col = 0; col < CHAR_W; col++) {
+                // Bit 6 = leftmost, bit 0 = rightmost
+                int bit = (row_bytes[row] >> (6 - col)) & 1;
+                rom->data[g * PATCH_SZ + row * CHAR_W + col] = bit;
+            }
+        }
+    }
+
+    fclose(f);
+    fprintf(stderr, "Loaded font ROM: %d glyphs\n", glyph_count);
+    return rom;
+}
+
+// Find best matching glyph for a grayscale patch
+int find_best_glyph(const uint8_t *patch, const FontROM *rom, uint8_t *out_bg, uint8_t *out_fg) {
+    // Try both normal and inverted matching
+    int best_glyph = 0;
+    float best_error = INFINITY;
+    uint8_t best_bg = COLOR_BLACK, best_fg = COLOR_WHITE;
+
+    for (int g = 0; g < rom->count; g++) {
+        const uint8_t *glyph = &rom->data[g * PATCH_SZ];
+
+        // Try normal: glyph 1 = fg, glyph 0 = bg
+        float err_normal = 0;
+        for (int i = 0; i < PATCH_SZ; i++) {
+            int expected = glyph[i] ? 255 : 0;
+            int diff = patch[i] - expected;
+            err_normal += diff * diff;
+        }
+
+        // Try inverted: glyph 0 = fg, glyph 1 = bg
+        float err_inverted = 0;
+        for (int i = 0; i < PATCH_SZ; i++) {
+            int expected = glyph[i] ? 0 : 255;
+            int diff = patch[i] - expected;
+            err_inverted += diff * diff;
+        }
+
+        if (err_normal < best_error) {
+            best_error = err_normal;
+            best_glyph = g;
+            best_bg = COLOR_BLACK;
+            best_fg = COLOR_WHITE;
+        }
+        if (err_inverted < best_error) {
+            best_error = err_inverted;
+            best_glyph = g;
+            best_bg = COLOR_WHITE;
+            best_fg = COLOR_BLACK;
+        }
+    }
+
+    *out_bg = best_bg;
+    *out_fg = best_fg;
+    return best_glyph;
+}
+
+// Convert frame to text mode
+void frame_to_text(const uint8_t *pixels, const FontROM *rom,
+                   uint8_t *bg_col, uint8_t *fg_col, uint8_t *chars) {
+    uint8_t patch[PATCH_SZ];
+
+    for (int gr = 0; gr < GRID_H; gr++) {
+        for (int gc = 0; gc < GRID_W; gc++) {
+            int idx = gr * GRID_W + gc;
+
+            // Extract patch
+            for (int y = 0; y < CHAR_H; y++) {
+                for (int x = 0; x < CHAR_W; x++) {
+                    int px = gc * CHAR_W + x;
+                    int py = gr * CHAR_H + y;
+                    patch[y * CHAR_W + x] = pixels[py * PIXEL_W + px];
+                }
+            }
+
+            // Find best match
+            chars[idx] = find_best_glyph(patch, rom, &bg_col[idx], &fg_col[idx]);
+        }
+    }
+}
+
+// Get current time in nanoseconds since UNIX epoch
+uint64_t get_current_time_ns(void) {
+    struct timeval tv;
+    gettimeofday(&tv, NULL);
+    return (uint64_t)tv.tv_sec * 1000000000ULL + (uint64_t)tv.tv_usec * 1000ULL;
+}
+
+// Write Videotex header (32 bytes, similar to TAV but simpler)
+void write_videotex_header(FILE *f, uint8_t fps, uint32_t total_frames) {
+    fwrite("\x1FTSVMTAV", 8, 1, f);
+
+    // Version: 1 (uint8)
+    fputc(1, f);
+
+    // Grid dimensions (uint8 each)
+    fputc(GRID_W, f);  // cols = 80
+    fputc(0, f);
+    fputc(GRID_H, f);  // rows = 32
+    fputc(0, f);
+
+    // FPS (uint8)
+    fputc(fps, f);
+
+    // Total frames (uint32, little-endian)
+    fwrite(&total_frames, sizeof(uint32_t), 1, f);
+
+    fputc(0, f); // wavelet filter type
+    fputc(0, f); // decomposition levels
+    fputc(0, f); // quantiser Y
+    fputc(0, f); // quantiser Co
+    fputc(0, f); // quantiser Cg
+
+    // Feature Flags
+    fputc(0x03, f);  // bit 0 = has audio; bit 1 = has subtitle (Videotex is classified as subtitles)
+
+    // Video Flags
+    fputc(0x80, f); // bit 7 = has no video (Videotex is classified as subtitles)
+
+
+    fputc(0, f); // encoder quality level
+    fputc(0x02, f); // channel layout: Y only
+    fputc(0, f); // entropy coder
+
+    fputc(0, f); // reserved
+    fputc(0, f); // reserved
+
+    fputc(0, f); // device orientation: no rotation
+    fputc(0, f); // file role: generic
+}
+
+// Write extended header packet with metadata
+// Returns the file offset where ENDT value is written (for later update)
+long write_extended_header(FILE *f, uint64_t creation_time_ns, const char *ffmpeg_version) {
+    fputc(PACKET_EXTENDED_HDR, f);
+
+    // Helper macros for key-value pairs
+    #define WRITE_KV_UINT64(key_str, value) do { \
+        fwrite(key_str, 1, 4, f); \
+        uint8_t value_type = 0x04; /* Uint64 */ \
+        fwrite(&value_type, 1, 1, f); \
+        uint64_t val = (value); \
+        fwrite(&val, sizeof(uint64_t), 1, f); \
+    } while(0)
+
+    #define WRITE_KV_BYTES(key_str, data, len) do { \
+        fwrite(key_str, 1, 4, f); \
+        uint8_t value_type = 0x10; /* Bytes */ \
+        fwrite(&value_type, 1, 1, f); \
+        uint16_t length = (len); \
+        fwrite(&length, sizeof(uint16_t), 1, f); \
+        fwrite((data), 1, (len), f); \
+    } while(0)
+
+    // Count key-value pairs (BGNT, ENDT, CDAT, VNDR, FMPG)
+    uint16_t num_pairs = ffmpeg_version ? 5 : 4;  // FMPG is optional
+    fwrite(&num_pairs, sizeof(uint16_t), 1, f);
+
+    // BGNT: Video begin time (0 for frame 0)
+    WRITE_KV_UINT64("BGNT", 0ULL);
+
+    // ENDT: Video end time (placeholder, will be updated at end)
+    long endt_offset = ftell(f);
+    WRITE_KV_UINT64("ENDT", 0ULL);
+
+    // CDAT: Creation time in nanoseconds since UNIX epoch
+    WRITE_KV_UINT64("CDAT", creation_time_ns);
+
+    // VNDR: Encoder name and version
+    const char *vendor_str = ENCODER_VENDOR_STRING;
+    WRITE_KV_BYTES("VNDR", vendor_str, strlen(vendor_str));
+
+    // FMPG: FFmpeg version (if available)
+    if (ffmpeg_version) {
+        WRITE_KV_BYTES("FMPG", ffmpeg_version, strlen(ffmpeg_version));
+    }
+
+    #undef WRITE_KV_UINT64
+    #undef WRITE_KV_BYTES
+
+    // Return offset of ENDT value (skip key, type byte)
+    return endt_offset + 4 + 1;  // 4 bytes for "ENDT", 1 byte for type
+}
+
+// Write font ROM packet (SSF packet type 0x30)
+void write_fontrom_packet(FILE *f, const uint8_t *rom_data, size_t data_size, uint8_t opcode) {
+    // Prepare padded ROM data (pad to FONTROM_PADDED_SIZE with zeros)
+    uint8_t *padded_data = calloc(1, FONTROM_PADDED_SIZE);
+    memcpy(padded_data, rom_data, data_size);
+
+    // Packet structure:
+    // [type:0x30][size:uint32][index:uint24][opcode:uint8][length:uint16][data][terminator:0x00]
+    uint32_t packet_size = 3 + 1 + 2 + FONTROM_PADDED_SIZE + 1;
+
+    // Write packet type and size
+    fputc(PACKET_SSF, f);
+    fwrite(&packet_size, sizeof(uint32_t), 1, f);
+
+    // Write SSF payload
+    // Index (3 bytes, always 0 for font ROM)
+    fputc(0, f);
+    fputc(0, f);
+    fputc(0, f);
+
+    // Opcode (0x80=lowrom, 0x81=highrom)
+    fputc(opcode, f);
+
+    // Payload length (uint16, little-endian)
+    uint16_t payload_len = FONTROM_PADDED_SIZE;
+    fwrite(&payload_len, sizeof(uint16_t), 1, f);
+
+    // Font data (padded to 1920 bytes)
+    fwrite(padded_data, 1, FONTROM_PADDED_SIZE, f);
+
+    // Terminator
+    fputc(0x00, f);
+
+    free(padded_data);
+
+    fprintf(stderr, "Font ROM uploaded: %zu bytes (padded to %d), opcode 0x%02X\n",
+            data_size, FONTROM_PADDED_SIZE, opcode);
+}
+
+// Write timecode packet (nanoseconds)
+void write_timecode(FILE *f, uint64_t timecode_ns) {
+    fputc(PACKET_TIMECODE, f);
+    fwrite(&timecode_ns, sizeof(uint64_t), 1, f);
+}
+
+// Write sync packet
+void write_sync(FILE *f) {
+    fputc(PACKET_SYNC, f);
+}
+
+// Write MP2 audio packet
+void write_audio_mp2(FILE *f, const uint8_t *data, uint32_t size) {
+    fputc(PACKET_AUDIO_MP2, f);
+    fwrite(&size, sizeof(uint32_t), 1, f);
+    fwrite(data, 1, size, f);
+}
+
+// Write text packet with separated arrays (better compression)
+void write_text_packet(FILE *f, const uint8_t *bg_col, const uint8_t *fg_col,
+                       const uint8_t *chars, int rows, int cols) {
+    int grid_size = rows * cols;
+
+    // Prepare uncompressed data: [rows][cols][fg-array][bg-array][char-array]
+    // Separated arrays compress much better (fg/bg are just 0xF0/0xFE runs)
+    size_t uncompressed_size = 2 + grid_size * 3;
+    uint8_t *uncompressed = malloc(uncompressed_size);
+
+    uncompressed[0] = rows;
+    uncompressed[1] = cols;
+
+    // Copy arrays in order: foreground, background, characters
+    memcpy(&uncompressed[2], fg_col, grid_size);                    // Foreground first
+    memcpy(&uncompressed[2 + grid_size], bg_col, grid_size);        // Background second
+    memcpy(&uncompressed[2 + grid_size * 2], chars, grid_size);     // Characters third
+
+    // Compress with Zstd
+    size_t max_compressed = ZSTD_compressBound(uncompressed_size);
+    uint8_t *compressed = malloc(max_compressed);
+    size_t compressed_size = ZSTD_compress(compressed, max_compressed,
+                                           uncompressed, uncompressed_size, 3);
+
+    if (ZSTD_isError(compressed_size)) {
+        fprintf(stderr, "Zstd compression error\n");
+        exit(1);
+    }
+
+    // Write packet: [type][size][data]
+    fputc(PACKET_TEXT, f);
+    uint32_t size32 = compressed_size;
+    fwrite(&size32, 4, 1, f);
+    fwrite(compressed, compressed_size, 1, f);
+
+    free(compressed);
+    free(uncompressed);
+}
+
+int main(int argc, char **argv) {
+    if (argc < 7) {
+        fprintf(stderr, "Usage: %s -i <video> -f <font.chr> -o <output.tav>\n", argv[0]);
+        return 1;
+    }
+
+    const char *input_video = NULL;
+    const char *font_path = NULL;
+    const char *output_path = NULL;
+
+    for (int i = 1; i < argc; i++) {
+        if (strcmp(argv[i], "-i") == 0 && i+1 < argc) input_video = argv[++i];
+        else if (strcmp(argv[i], "-f") == 0 && i+1 < argc) font_path = argv[++i];
+        else if (strcmp(argv[i], "-o") == 0 && i+1 < argc) output_path = argv[++i];
+    }
+
+    if (!input_video || !font_path || !output_path) {
+        fprintf(stderr, "Missing required arguments\n");
+        return 1;
+    }
+
+    // Capture creation time and FFmpeg version for extended header
+    uint64_t creation_time_ns = get_current_time_ns();
+    char *ffmpeg_version = get_ffmpeg_version();
+
+    // Detect video FPS
+    float fps_float = detect_fps(input_video);
+    uint8_t fps = (uint8_t)(fps_float + 0.5f); // Round to nearest integer
+    fprintf(stderr, "Detected FPS: %.2f (using %d in TAV header)\n", fps_float, fps);
+
+    // Load font ROM
+    FontROM *rom = load_font_rom(font_path);
+    if (!rom) {
+        fprintf(stderr, "Failed to load font ROM: %s\n", font_path);
+        return 1;
+    }
+
+    // Open FFmpeg pipe for grayscale frames at 560×448
+    char ffmpeg_cmd[1024];
+    snprintf(ffmpeg_cmd, sizeof(ffmpeg_cmd),
+             "ffmpeg -i \"%s\" -vf \"scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d\" "
+             "-f rawvideo -pix_fmt gray - 2>/dev/null",
+             input_video, PIXEL_W, PIXEL_H, PIXEL_W, PIXEL_H);
+
+    fprintf(stderr, "Opening video stream...\n");
+    FILE *video_pipe = popen(ffmpeg_cmd, "r");
+    if (!video_pipe) {
+        fprintf(stderr, "Failed to open FFmpeg pipe\n");
+        return 1;
+    }
+
+    // Open FFmpeg pipe for MP2 audio (32 KHz stereo)
+    char audio_cmd[1024];
+    snprintf(audio_cmd, sizeof(audio_cmd),
+             "ffmpeg -i \"%s\" -vn -ar %d -ac 2 -f mp2 -b:a 96k - 2>/dev/null",
+             input_video, SAMPLE_RATE);
+
+    fprintf(stderr, "Opening audio stream...\n");
+    FILE *audio_pipe = popen(audio_cmd, "r");
+    if (!audio_pipe) {
+        fprintf(stderr, "Failed to open audio FFmpeg pipe\n");
+        pclose(video_pipe);
+        return 1;
+    }
+
+    // Open output file
+    FILE *out = fopen(output_path, "wb");
+    if (!out) {
+        fprintf(stderr, "Failed to open output file\n");
+        pclose(video_pipe);
+        pclose(audio_pipe);
+        return 1;
+    }
+
+    // Write Videotex header with placeholder total_frames (will update at end)
+    long header_offset = ftell(out);
+    write_videotex_header(out, fps, 0);
+
+    // Write extended header packet (before first timecode)
+    long endt_offset = write_extended_header(out, creation_time_ns, ffmpeg_version);
+
+    // Upload font ROM to TSVM (split into lowrom and highrom)
+    fprintf(stderr, "Uploading font ROM to TSVM...\n");
+    FILE *rom_file = fopen(font_path, "rb");
+    if (rom_file) {
+        fseek(rom_file, 0, SEEK_END);
+        long rom_size = ftell(rom_file);
+        fseek(rom_file, 0, SEEK_SET);
+
+        uint8_t *raw_rom = malloc(rom_size);
+        if (raw_rom && fread(raw_rom, 1, rom_size, rom_file) == rom_size) {
+            // Split into lowrom and highrom
+            size_t bytes_per_half = (GLYPHS_PER_ROM * 14); // 128 glyphs × 14 bytes = 1792
+
+            // Write lowrom (first 128 glyphs)
+            if (rom_size >= bytes_per_half) {
+                write_fontrom_packet(out, raw_rom, bytes_per_half, SSF_OPCODE_LOWROM);
+            }
+
+            // Write highrom (second 128 glyphs)
+            if (rom_size >= bytes_per_half * 2) {
+                write_fontrom_packet(out, raw_rom + bytes_per_half, bytes_per_half, SSF_OPCODE_HIGHROM);
+            } else if (rom_size > bytes_per_half) {
+                // Partial highrom
+                write_fontrom_packet(out, raw_rom + bytes_per_half, rom_size - bytes_per_half, SSF_OPCODE_HIGHROM);
+            }
+
+            free(raw_rom);
+        }
+        fclose(rom_file);
+    }
+
+    // Allocate buffers
+    size_t frame_size = PIXEL_W * PIXEL_H;
+    uint8_t *gray_pixels = malloc(frame_size);
+    uint8_t *bg_col = malloc(GRID_W * GRID_H);
+    uint8_t *fg_col = malloc(GRID_W * GRID_H);
+    uint8_t *chars = malloc(GRID_W * GRID_H);
+
+    // Audio buffer (read MP2 frames in 1152-sample chunks, ~36ms at 32 KHz)
+    #define AUDIO_CHUNK_SIZE 8192  // Arbitrary MP2 frame buffer size
+    uint8_t *audio_buffer = malloc(AUDIO_CHUNK_SIZE);
+    size_t audio_available = 0;
+
+    uint32_t frame_num = 0;
+    uint64_t total_audio_bytes = 0;
+
+    fprintf(stderr, "Encoding text-mode video (%dx%d chars, %dx%d pixels)...\n",
+            GRID_W, GRID_H, PIXEL_W, PIXEL_H);
+
+    // Track encoding start time
+    struct timeval start_time, now;
+    gettimeofday(&start_time, NULL);
+
+    // Read and process frames
+    while (fread(gray_pixels, 1, frame_size, video_pipe) == frame_size) {
+        // Calculate timecode in nanoseconds
+        uint64_t timecode_ns = (uint64_t)(frame_num * 1000000000.0 / fps_float);
+
+        // Write audio packet first (if available)
+        // Try to read ~1 frame worth of audio
+        audio_available = fread(audio_buffer, 1, AUDIO_CHUNK_SIZE, audio_pipe);
+        if (audio_available > 0) {
+            write_audio_mp2(out, audio_buffer, audio_available);
+            total_audio_bytes += audio_available;
+        }
+
+        // Write timecode
+        write_timecode(out, timecode_ns);
+
+        // Convert to text mode
+        frame_to_text(gray_pixels, rom, bg_col, fg_col, chars);
+
+        // Write text packet (treated as I-frame)
+        write_text_packet(out, bg_col, fg_col, chars, GRID_H, GRID_W);
+
+        // Write sync packet after each frame
+        write_sync(out);
+
+        frame_num++;
+        if (frame_num % 30 == 0) {
+            // Calculate encoding speed
+            gettimeofday(&now, NULL);
+            double elapsed = (now.tv_sec - start_time.tv_sec) +
+                           (now.tv_usec - start_time.tv_usec) / 1000000.0;
+            double encoding_fps = frame_num / elapsed;
+
+            fprintf(stderr, "\rEncoded %u frames (%.1f fps)", frame_num, encoding_fps);
+            fflush(stderr);
+        }
+    }
+
+    // Read any remaining audio
+    while ((audio_available = fread(audio_buffer, 1, AUDIO_CHUNK_SIZE, audio_pipe)) > 0) {
+        write_audio_mp2(out, audio_buffer, audio_available);
+        total_audio_bytes += audio_available;
+    }
+
+    // Final timing
+    gettimeofday(&now, NULL);
+    double total_time = (now.tv_sec - start_time.tv_sec) +
+                       (now.tv_usec - start_time.tv_usec) / 1000000.0;
+    double final_fps = frame_num / total_time;
+
+    fprintf(stderr, "\nDone! Encoded %u frames in %.2fs (%.1f fps)\n",
+            frame_num, total_time, final_fps);
+    fprintf(stderr, "Audio: %llu bytes (%.2f MB)\n",
+            (unsigned long long)total_audio_bytes,
+            total_audio_bytes / 1024.0 / 1024.0);
+
+    // Update total_frames in header
+    if (frame_num > 0) {
+        fseek(out, header_offset + 12, SEEK_SET);  // Offset to total_frames field
+        fwrite(&frame_num, sizeof(uint32_t), 1, out);
+        fprintf(stderr, "Updated total_frames in header: %u\n", frame_num);
+    }
+
+    // Update ENDT in extended header (calculate end time for last frame)
+    if (frame_num > 0) {
+        // Calculate duration: (frame_num - 1) frames * (1/fps) seconds in nanoseconds
+        uint64_t duration_ns = (uint64_t)((frame_num - 1) * 1000000000.0 / fps_float);
+        uint64_t endt_ns = duration_ns;
+
+        fseek(out, endt_offset, SEEK_SET);
+        fwrite(&endt_ns, sizeof(uint64_t), 1, out);
+        fprintf(stderr, "Updated ENDT in extended header: %llu ns (%.3f seconds)\n",
+                (unsigned long long)endt_ns, endt_ns / 1000000000.0);
+    }
+
+    // Cleanup
+    pclose(video_pipe);
+    pclose(audio_pipe);
+    fclose(out);
+    free(gray_pixels);
+    free(bg_col);
+    free(fg_col);
+    free(chars);
+    free(audio_buffer);
+    free(rom->data);
+    free(rom);
+    if (ffmpeg_version) free(ffmpeg_version);
+
+    return 0;
+}
--- a/video_encoder/tav_inspector.c
+++ b/video_encoder/tav_inspector.c
@@ -26,8 +26,9 @@
 #define TAV_PACKET_AUDIO_MP2      0x20
 #define TAV_PACKET_AUDIO_PCM8     0x21
 #define TAV_PACKET_AUDIO_TAD      0x24
-#define TAV_PACKET_SUBTITLE       0x30  // Legacy SSF (frame-locked)
+#define TAV_PACKET_SUBTITLE       0x30  // Legacy SSF (frame-locked), also used for Font ROM upload
 #define TAV_PACKET_SUBTITLE_TC    0x31  // SSF-TC (timecode-based)
+#define TAV_PACKET_VIDEOTEX       0x3F  // Videotex (text-mode video)
 #define TAV_PACKET_AUDIO_TRACK    0x40
 #define TAV_PACKET_VIDEO_CH2_I    0x70
 #define TAV_PACKET_VIDEO_CH2_P    0x71
@@ -77,6 +78,7 @@ typedef struct {
    int audio_tad_count;
    int audio_track_count;
    int subtitle_count;
+    int videotex_count;
    int timecode_count;
    int sync_count;
    int sync_ntsc_count;
@@ -91,6 +93,7 @@ typedef struct {
    uint64_t audio_pcm8_bytes;
    uint64_t audio_tad_bytes;
    uint64_t audio_track_bytes;
+    uint64_t videotex_bytes;
 } packet_stats_t;

 // Display options
@@ -122,6 +125,7 @@ const char* get_packet_type_name(uint8_t type) {
        case TAV_PACKET_AUDIO_TAD: return "AUDIO TAD (zstd)";
        case TAV_PACKET_SUBTITLE: return "SUBTITLE (SSF frame-locked)";
        case TAV_PACKET_SUBTITLE_TC: return "SUBTITLE (SSF-TC timecoded)";
+        case TAV_PACKET_VIDEOTEX: return "VIDEOTEX (text-mode video)";
        case TAV_PACKET_AUDIO_TRACK: return "AUDIO TRACK (Separate MP2)";
        case TAV_PACKET_EXIF: return "METADATA (EXIF)";
        case TAV_PACKET_ID3V1: return "METADATA (ID3v1)";
@@ -820,6 +824,45 @@ static const char* VERDESC[] = {"null", "YCoCg tiled, uniform", "ICtCp tiled, un
                break;
            }

+            case TAV_PACKET_VIDEOTEX: {
+                stats.videotex_count++;
+                uint32_t size;
+                if (fread(&size, sizeof(uint32_t), 1, fp) != 1) break;
+                stats.videotex_bytes += size;
+
+                if (!opts.summary_only && display) {
+                    // Read compressed data
+                    uint8_t *compressed_data = malloc(size);
+                    if (compressed_data && fread(compressed_data, 1, size, fp) == size) {
+                        // Allocate decompression buffer (max 2 + 80*32*3 = 7682 bytes)
+                        size_t const decompress_size = 8192;
+                        uint8_t *decompressed_data = malloc(decompress_size);
+                        if (decompressed_data) {
+                            size_t actual_size = ZSTD_decompress(decompressed_data, decompress_size,
+                                                                compressed_data, size);
+                            if (!ZSTD_isError(actual_size) && actual_size >= 2) {
+                                uint8_t rows = decompressed_data[0];
+                                uint8_t cols = decompressed_data[1];
+                                printf(" - size=%u bytes (decompressed: %zu bytes, grid: %ux%u, ratio: %.2f:1)",
+                                       size, actual_size, cols, rows, (double)actual_size / size);
+                            } else {
+                                printf(" - size=%u bytes (decompression failed)", size);
+                            }
+                            free(decompressed_data);
+                        } else {
+                            printf(" - size=%u bytes", size);
+                        }
+                        free(compressed_data);
+                    } else {
+                        printf(" - size=%u bytes", size);
+                        fseek(fp, size, SEEK_CUR);
+                    }
+                } else {
+                    fseek(fp, size, SEEK_CUR);
+                }
+                break;
+            }
+
            case TAV_PACKET_EXIF:
            case TAV_PACKET_ID3V1:
            case TAV_PACKET_ID3V2:
@@ -950,6 +993,12 @@ static const char* VERDESC[] = {"null", "YCoCg tiled, uniform", "ICtCp tiled, un
    printf("\nOther:\n");
    printf("  Timecodes:          %d\n", stats.timecode_count);
    printf("  Subtitles:          %d\n", stats.subtitle_count);
+    if (stats.videotex_count > 0) {
+        printf("  Videotex frames:    %d (%llu bytes, %.2f MB)\n",
+               stats.videotex_count,
+               (unsigned long long)stats.videotex_bytes,
+               stats.videotex_bytes / 1024.0 / 1024.0);
+    }
    printf("  Extended headers:   %d\n", stats.extended_header_count);
    printf("  Metadata packets:   %d\n", stats.metadata_count);
    printf("  Loop points:        %d\n", stats.loop_point_count);