TAV: videotex mode; TSVM documentation fix

This commit is contained in:
minjaesong
2025-11-21 16:53:35 +09:00
parent a61a21d28b
commit 2533b2dc19
7 changed files with 794 additions and 22 deletions

View File

@@ -37,6 +37,7 @@ const TAV_PACKET_AUDIO_ADPCM = 0x23
const TAV_PACKET_AUDIO_TAD = 0x24
const TAV_PACKET_SUBTITLE = 0x30 // Legacy SSF (frame-locked)
const TAV_PACKET_SUBTITLE_TC = 0x31 // SSF-TC (timecode-based)
const TAV_PACKET_VIDEOTEX = 0x3F // Videotex (text-mode video)
const TAV_PACKET_AUDIO_BUNDLED = 0x40 // Entire MP2 audio file in single packet
const TAV_PACKET_EXTENDED_HDR = 0xEF
const TAV_PACKET_SCREEN_MASK = 0xF2 // Screen masking (letterbox/pillarbox)
@@ -1614,6 +1615,38 @@ try {
let packetSize = seqread.readInt()
parseSubtitlePacketTC(packetSize)
}
else if (packetType === TAV_PACKET_VIDEOTEX) {
// Videotex packet (0x3F) - text-mode video
let compressedSize = seqread.readInt()
// Read compressed data
let compressedPtr = seqread.readBytes(compressedSize)
// Decompress with Zstd
// Allocate buffer for decompressed data (max: 2 + 80*32*3 = 7682 bytes)
let decompressedPtr = sys.malloc(8192)
let decompressedSize = gzip.decompFromTo(compressedPtr, compressedSize, decompressedPtr)
// Read grid dimensions from first 2 bytes
let rows = sys.peek(decompressedPtr)
let cols = sys.peek(decompressedPtr + 1)
let gridSize = rows * cols
// Calculate array offsets within decompressed data
let dataOffset = decompressedPtr + 2
// Copy arrays directly to graphics adapter memory
// Format: [fg-array][bg-array][char-array]
// Each array is gridSize bytes (typically 2560 for 80×32)
sys.memcpy(dataOffset, -1302529, gridSize * 3)
// Free buffers
sys.free(compressedPtr)
sys.free(decompressedPtr)
// Mark frame as ready
iframeReady = true
}
else if (packetType === TAV_PACKET_EXTENDED_HDR) {
// Extended header packet - metadata key-value pairs
let numPairs = seqread.readShort()
@@ -2178,6 +2211,7 @@ try {
}
catch (e) {
serial.printerr(`TAV decode error: ${e}`)
e.printStackTrace()
errorlevel = 1
}
finally {

View File

@@ -219,7 +219,9 @@ Memory Space
12 bytes
argument for "command" (arg1: Byte, arg2: Byte)
write to this address FIRST and then write to "command" to execute the command
1134 bytes
1008 bytes
reserved
2046 bytes
unused
2 bytes
Cursor position in: (y*80 + x)
@@ -911,8 +913,8 @@ transmission capability, and region-of-interest coding.
- 6 = ICtCp monoblock perceptual
- 7 = YCoCg-R multi-tile perceptual
- 8 = ICtCp multi-tile perceptual
uint16 Width: picture width in pixels
uint16 Height: picture height in pixels
uint16 Width: picture width in pixels. Columns count for Videotex-only file.
uint16 Height: picture height in pixels. Rows count for Videotex-only file.
uint8 FPS: frames per second. Use 0x00 for still pictures
uint32 Total Frames: number of video frames
- use 0 to denote not-finalised video stream
@@ -923,7 +925,7 @@ transmission capability, and region-of-interest coding.
- 2 = CDF 13/7 (experimental)
- 16 = DD-4 (Four-point interpolating Deslauriers-Dubuc; experimental)
- 255 = Haar (demonstration purpose only)
uint8 Decomposition Levels: number of DWT levels (1-6+)
uint8 Decomposition Levels: number of DWT levels (1-6+; use 0 if it has no video or Videotex only)
uint8 Quantiser Index for Y channel (uses exponential numeric system; 0: lossless, 255: potato)
uint8 Quantiser Index for Co channel (uses exponential numeric system; 0: lossless, 255: potato)
uint8 Quantiser Index for Cg channel (uses exponential numeric system; 0: lossless, 255: potato)
@@ -938,6 +940,8 @@ transmission capability, and region-of-interest coding.
- bit 2 = is lossless mode
(shorthand for `-q 6 -Q0,0,0 -w 0 --intra-only --no-perceptual-tuning --arate 384`)
- bit 3 = has region-of-interest coding (for still pictures only)
- bit 4 = reserved (crop encoding?)
- bit 7 = has no video
uint8 Encoder quality level (stored with bias of 1 (q0=1); used to derive anisotropy value)
uint8 Channel layout (bit-field: bit 0=has alpha, bit 1=has chroma inverted, bit 2=has luma inverted)
* Luma-only videos must be decoded with fixed Chroma=0
@@ -954,7 +958,13 @@ transmission capability, and region-of-interest coding.
- 0 = Twobit-plane significance map (deprecated)
- 1 = Embedded Zero Block Coding
- 2 = Raw coefficients (debugging purpose only)
uint8 Reserved[2]: fill with zeros
uint8 Encoder Preset
- Bit 0 = use finer motion (finer temporal quantisation)
- Bit 1 = reduce grain synthesis
Preset "Default" -> 0x00
Preset "Sports" -> 0x01
Preset "Anime" -> 0x02
uint8 Reserved[1]: fill with zeros
uint8 Device Orientation
- 0 = No rotation
- 1 = Clockwise 90 deg
@@ -992,6 +1002,7 @@ transmission capability, and region-of-interest coding.
0x31: Subtitle in "Simple" format with timecodes
0x32: Subtitle in "Karaoke" format
0x33: Subtitle in "Karaoke" format with timecodes
0x3F: Videotex (full-frame text buffer image)
<synchronised tracks>
0x40: MP2 audio track (32 KHz)
0x41: Zstd-compressed 8-bit PCM (32 KHz, audio hardware's native format)
@@ -1128,6 +1139,18 @@ transmission capability, and region-of-interest coding.
uint32 Compressed Size
* Zstd-compressed TAD
## Videotex Packet Structure
uint8 Packet Type (0x3F)
uint32 Compressed Size
* Zstd-compressed payload, where:
uint8 Rows
uint8 Columns
* Foreground colours
* Background colours
* Characters
## GOP Unified Packet Structure (0x12)
Implemented on 2025-10-15 for temporal 3D DWT with unified preprocessing.

View File

@@ -667,8 +667,8 @@ class VM(
val fromDev = getDev(from, len, false)
val toDev = getDev(to, len, true)
// println("from = $from, to = $to")
// println("fromDev = $fromDev, toDev = $toDev")
// System.err.println("[VM.memcpy] from = $from, to = $to")
// System.err.println("[VM.memcpy] fromDev = $fromDev, toDev = $toDev")
if (fromDev != null && toDev != null)
UnsafeHelper.memcpy(fromDev, toDev, len)

View File

@@ -4,6 +4,7 @@ import net.torvald.UnsafeHelper
import net.torvald.terrarum.modulecomputers.virtualcomputer.tvd.toUint
import net.torvald.terrarum.modulecomputers.virtualcomputer.tvd.toUlong
import net.torvald.tsvm.peripheral.*
import kotlin.math.absoluteValue
/**
* Pass the instance of the class to the ScriptEngine's binding, preferably under the namespace of "vm"
@@ -14,14 +15,22 @@ class VMJSR223Delegate(private val vm: VM) {
(from in start..end && (from + len) in start..end)
private fun getDev(from: Long, len: Long, isDest: Boolean): Long? {
return if (from >= 0) vm.usermem.ptr + from
// System.err.print("getDev(from=$from, len=$len, isDest=$isDest) -> ")
return if (from >= 0) {
// System.err.println("USERMEM offset=$from")
vm.usermem.ptr + from
}
// MMIO area
else if (from in -1048576..-1 && (from - len) in -1048577..-1) {
val fromIndex = (-from-1) / 131072
val fromIndex = ((-from-1) / 131072).absoluteValue
val dev = vm.peripheralTable[fromIndex.toInt()].peripheral ?: return null
val fromRel = (-from-1) % 131072
if (fromRel + len > 131072) return null
// System.err.println("MMIO dev=${dev.typestring}, fromIndex=$fromIndex, fromRel=$fromRel")
return if (dev is IOSpace) {
if (relPtrInDev(fromRel, len, 1024, 2047)) dev.peripheralFast.ptr + fromRel - 1024
else if (relPtrInDev(fromRel, len, 4096, 8191)) (if (isDest) dev.blockTransferTx[0] else dev.blockTransferRx[0]).ptr + fromRel - 4096
@@ -50,6 +59,8 @@ class VMJSR223Delegate(private val vm: VM) {
val fromRel = (-from-1) % 1048576
if (fromRel + len > 1048576) return null
// System.err.println("MEMORY dev=${dev.typestring}, fromIndex=$fromIndex, fromRel=$fromRel")
return if (dev is AudioAdapter) {
if (relPtrInDev(fromRel, len, 0, 114687)) dev.sampleBin.ptr + fromRel - 0
else null
@@ -111,8 +122,8 @@ class VMJSR223Delegate(private val vm: VM) {
val fromDev = getDev(from, len, false)
val toDev = getDev(to, len, true)
// println("from = $from, to = $to")
// println("fromDev = $fromDev, toDev = $toDev")
// System.err.println("[sys.memcpy] from = $from, to = $to")
// System.err.println("[sys.memcpy] fromDev = $fromDev, toDev = $toDev")
if (fromDev != null && toDev != null)
UnsafeHelper.memcpy(fromDev, toDev, len)

View File

@@ -1826,7 +1826,6 @@ typedef struct tav_encoder_s {
int separate_audio_track; // 1 = write entire MP2 file as packet 0x40 after header, 0 = interleave audio (default)
int pcm8_audio; // 1 = use 8-bit PCM audio (packet 0x21), 0 = use MP2 (default)
int tad_audio; // 1 = use TAD audio (packet 0x24), 0 = use MP2/PCM8 (default, quality follows quality_level)
int enable_letterbox_detect; // 1 = detect and emit letterbox/pillarbox packets (default), 0 = disable
int enable_crop_encoding; // 1 = encode cropped active region only (Phase 2), 0 = encode full frame (default)
// Active region tracking (for Phase 2 crop encoding)
@@ -2454,7 +2453,6 @@ static tav_encoder_t* create_encoder(void) {
enc->separate_audio_track = 0; // Default: interleave audio packets
enc->pcm8_audio = 0; // Default: use MP2 audio
enc->tad_audio = 0; // Default: use MP2 audio (TAD quality follows quality_level)
enc->enable_letterbox_detect = 1; // Default: enable letterbox/pillarbox detection
enc->enable_crop_encoding = 0; // Default: disabled (Phase 2 experimental)
// Active region tracking (initialized to full frame, updated when crop encoding enabled)
@@ -8736,7 +8734,7 @@ static void normalize_dimension_clusters(uint16_t *values, int count) {
// Write all screen masking packets before first frame (similar to SSF-TC subtitles)
// Uses median filtering + clustering to normalize geometry to predominant aspect ratios
static void write_all_screen_mask_packets(tav_encoder_t *enc, FILE *output) {
if (!enc->enable_letterbox_detect || !enc->two_pass_mode) {
if (!enc->enable_crop_encoding || !enc->two_pass_mode) {
return; // Letterbox detection requires two-pass mode
}
@@ -10412,7 +10410,7 @@ static int two_pass_first_pass(tav_encoder_t *enc, const char *input_file) {
}
// Detect letterbox/pillarbox if enabled
if (enc->enable_letterbox_detect) {
if (enc->enable_crop_encoding) {
// Set current_frame_rgb temporarily for detection
uint8_t *saved_current = enc->current_frame_rgb;
enc->current_frame_rgb = frame_rgb;
@@ -10666,7 +10664,6 @@ int main(int argc, char *argv[]) {
{"tad-audio", no_argument, 0, 1028},
{"raw-coeffs", no_argument, 0, 1029},
{"single-pass", no_argument, 0, 1050}, // disable two-pass encoding with wavelet-based scene detection
{"no-letterbox-detect", no_argument, 0, 1051}, // disable letterbox/pillarbox detection
{"enable-crop-encoding", no_argument, 0, 1052}, // Phase 2: encode cropped active region only (experimental)
{"help", no_argument, 0, '?'},
{0, 0, 0, 0}
@@ -10898,10 +10895,6 @@ int main(int argc, char *argv[]) {
enc->two_pass_mode = 0;
printf("Two-pass wavelet-based scene change detection disabled\n");
break;
case 1051: // --no-letterbox-detect
enc->enable_letterbox_detect = 0;
printf("Letterbox/pillarbox detection disabled\n");
break;
case 1052: // --enable-crop-encoding
enc->enable_crop_encoding = 1;
printf("Phase 2 crop encoding enabled (experimental)\n");
@@ -11380,7 +11373,7 @@ int main(int argc, char *argv[]) {
enc->encoding_width = enc->width;
enc->encoding_height = enc->height;
if (enc->enable_crop_encoding && enc->enable_letterbox_detect && enc->two_pass_mode) {
if (enc->enable_crop_encoding && enc->two_pass_mode) {
// Phase 2: Use GOP-level dimensions for temporal DWT (3D-DWT mode)
// This ensures all frames in a GOP have the same encoding dimensions
// IMPORTANT: Always use GOP-level dimensions in temporal DWT mode, even if there's no cropping benefit,

View File

@@ -0,0 +1,662 @@
/*
encoder_tav_text.c
Text-based video encoder for TSVM using custom font ROMs
Outputs Videotex files with custom header and packet type 0x3F (text mode)
File structure:
- Videotex header (32 bytes): magic "\x1FTSVM-VT", version, grid dims, fps, total_frames
- Extended header packet (0xEF): BGNT, ENDT, CDAT, VNDR, FMPG
- Font ROM packets (0x30): lowrom and highrom (1920 bytes each)
- Per-frame sequence: [audio 0x20], [timecode 0xFD], [videotex 0x3F], [sync 0xFF]
Videotex packet structure (0x3F): Zstd([rows][cols][fg-array][bg-array][char-array])
- rows: uint8 (32)
- cols: uint8 (80)
- fg-array: rows*cols bytes (foreground colors, 0xF0=black, 0xFE=white)
- bg-array: rows*cols bytes (background colors, 0xF0=black, 0xFE=white)
- char-array: rows*cols bytes (glyph indices 0-255)
Total uncompressed size: 2 + (80*32*3) = 7682 bytes
Separated arrays compress much better (fg/bg are just 0xF0/0xFE runs)
Video size: 80×32 characters (560×448 pixels with 7×14 font)
Audio: MP2 encoding at 96 kbps, 32 KHz stereo (packet 0x20)
Each text frame is treated as an I-frame with sync packet
Usage:
gcc -O3 -std=c11 -Wall encoder_tav_text.c -o encoder_tav_text -lm -lzstd
./encoder_tav_text -i video.mp4 -f font.chr -o output.vtx
*/
#define _POSIX_C_SOURCE 200809L
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <math.h>
#include <zstd.h>
#include <unistd.h>
#include <time.h>
#include <sys/time.h>
#define ENCODER_VENDOR_STRING "Encoder-TAV-Text 20251121 (videotex)"
#define CHAR_W 7
#define CHAR_H 14
#define GRID_W 80
#define GRID_H 32
#define PIXEL_W (GRID_W * CHAR_W) // 560
#define PIXEL_H (GRID_H * CHAR_H) // 448
#define PATCH_SZ (CHAR_W * CHAR_H)
#define SAMPLE_RATE 32000
// TAV packet types
#define PACKET_TIMECODE 0xFD
#define PACKET_SYNC 0xFF
#define PACKET_AUDIO_MP2 0x20
#define PACKET_SSF 0x30
#define PACKET_TEXT 0x3F
#define PACKET_EXTENDED_HDR 0xEF
// SSF opcodes for font ROM
#define SSF_OPCODE_LOWROM 0x80
#define SSF_OPCODE_HIGHROM 0x81
// Font ROM size constants
#define FONTROM_PADDED_SIZE 1920
#define GLYPHS_PER_ROM 128
// Color mapping (4-bit RGB to TSVM palette)
#define COLOR_BLACK 0xF0
#define COLOR_WHITE 0xFE
typedef struct {
uint8_t *data; // Binary glyph data (PATCH_SZ bytes per glyph)
int count; // Number of glyphs
} FontROM;
// Get FFmpeg version string
char *get_ffmpeg_version(void) {
FILE *pipe = popen("ffmpeg -version 2>&1 | head -1", "r");
if (!pipe) return NULL;
char *version = malloc(256);
if (!version) {
pclose(pipe);
return NULL;
}
if (fgets(version, 256, pipe)) {
// Remove trailing newline
size_t len = strlen(version);
if (len > 0 && version[len - 1] == '\n') {
version[len - 1] = '\0';
}
pclose(pipe);
return version;
}
free(version);
pclose(pipe);
return NULL;
}
// Detect video FPS using ffprobe
float detect_fps(const char *video_path) {
char cmd[1024];
snprintf(cmd, sizeof(cmd),
"ffprobe -v error -select_streams v:0 -show_entries stream=r_frame_rate "
"-of default=noprint_wrappers=1:nokey=1 \"%s\" 2>/dev/null",
video_path);
FILE *pipe = popen(cmd, "r");
if (!pipe) return 30.0f; // fallback
char fps_str[64] = {0};
if (fgets(fps_str, sizeof(fps_str), pipe)) {
// Parse fraction like "30/1" or "24000/1001"
int num = 0, den = 1;
if (sscanf(fps_str, "%d/%d", &num, &den) == 2 && den > 0) {
pclose(pipe);
return (float)num / (float)den;
}
}
pclose(pipe);
return 30.0f; // fallback
}
// Load font ROM (14 bytes per glyph, no header)
FontROM *load_font_rom(const char *path) {
FILE *f = fopen(path, "rb");
if (!f) return NULL;
fseek(f, 0, SEEK_END);
long size = ftell(f);
fseek(f, 0, SEEK_SET);
if (size % 14 != 0) {
fprintf(stderr, "Warning: ROM size not divisible by 14 (got %ld bytes)\n", size);
}
int glyph_count = size / 14;
FontROM *rom = malloc(sizeof(FontROM));
rom->count = glyph_count;
rom->data = malloc(glyph_count * PATCH_SZ);
// Read and unpack glyphs
for (int g = 0; g < glyph_count; g++) {
uint8_t row_bytes[14];
if (fread(row_bytes, 14, 1, f) != 1) {
free(rom->data);
free(rom);
fclose(f);
return NULL;
}
// Unpack bits to binary pixels
for (int row = 0; row < CHAR_H; row++) {
for (int col = 0; col < CHAR_W; col++) {
// Bit 6 = leftmost, bit 0 = rightmost
int bit = (row_bytes[row] >> (6 - col)) & 1;
rom->data[g * PATCH_SZ + row * CHAR_W + col] = bit;
}
}
}
fclose(f);
fprintf(stderr, "Loaded font ROM: %d glyphs\n", glyph_count);
return rom;
}
// Find best matching glyph for a grayscale patch
int find_best_glyph(const uint8_t *patch, const FontROM *rom, uint8_t *out_bg, uint8_t *out_fg) {
// Try both normal and inverted matching
int best_glyph = 0;
float best_error = INFINITY;
uint8_t best_bg = COLOR_BLACK, best_fg = COLOR_WHITE;
for (int g = 0; g < rom->count; g++) {
const uint8_t *glyph = &rom->data[g * PATCH_SZ];
// Try normal: glyph 1 = fg, glyph 0 = bg
float err_normal = 0;
for (int i = 0; i < PATCH_SZ; i++) {
int expected = glyph[i] ? 255 : 0;
int diff = patch[i] - expected;
err_normal += diff * diff;
}
// Try inverted: glyph 0 = fg, glyph 1 = bg
float err_inverted = 0;
for (int i = 0; i < PATCH_SZ; i++) {
int expected = glyph[i] ? 0 : 255;
int diff = patch[i] - expected;
err_inverted += diff * diff;
}
if (err_normal < best_error) {
best_error = err_normal;
best_glyph = g;
best_bg = COLOR_BLACK;
best_fg = COLOR_WHITE;
}
if (err_inverted < best_error) {
best_error = err_inverted;
best_glyph = g;
best_bg = COLOR_WHITE;
best_fg = COLOR_BLACK;
}
}
*out_bg = best_bg;
*out_fg = best_fg;
return best_glyph;
}
// Convert frame to text mode
void frame_to_text(const uint8_t *pixels, const FontROM *rom,
uint8_t *bg_col, uint8_t *fg_col, uint8_t *chars) {
uint8_t patch[PATCH_SZ];
for (int gr = 0; gr < GRID_H; gr++) {
for (int gc = 0; gc < GRID_W; gc++) {
int idx = gr * GRID_W + gc;
// Extract patch
for (int y = 0; y < CHAR_H; y++) {
for (int x = 0; x < CHAR_W; x++) {
int px = gc * CHAR_W + x;
int py = gr * CHAR_H + y;
patch[y * CHAR_W + x] = pixels[py * PIXEL_W + px];
}
}
// Find best match
chars[idx] = find_best_glyph(patch, rom, &bg_col[idx], &fg_col[idx]);
}
}
}
// Get current time in nanoseconds since UNIX epoch
uint64_t get_current_time_ns(void) {
struct timeval tv;
gettimeofday(&tv, NULL);
return (uint64_t)tv.tv_sec * 1000000000ULL + (uint64_t)tv.tv_usec * 1000ULL;
}
// Write Videotex header (32 bytes, similar to TAV but simpler)
void write_videotex_header(FILE *f, uint8_t fps, uint32_t total_frames) {
fwrite("\x1FTSVMTAV", 8, 1, f);
// Version: 1 (uint8)
fputc(1, f);
// Grid dimensions (uint8 each)
fputc(GRID_W, f); // cols = 80
fputc(0, f);
fputc(GRID_H, f); // rows = 32
fputc(0, f);
// FPS (uint8)
fputc(fps, f);
// Total frames (uint32, little-endian)
fwrite(&total_frames, sizeof(uint32_t), 1, f);
fputc(0, f); // wavelet filter type
fputc(0, f); // decomposition levels
fputc(0, f); // quantiser Y
fputc(0, f); // quantiser Co
fputc(0, f); // quantiser Cg
// Feature Flags
fputc(0x03, f); // bit 0 = has audio; bit 1 = has subtitle (Videotex is classified as subtitles)
// Video Flags
fputc(0x80, f); // bit 7 = has no video (Videotex is classified as subtitles)
fputc(0, f); // encoder quality level
fputc(0x02, f); // channel layout: Y only
fputc(0, f); // entropy coder
fputc(0, f); // reserved
fputc(0, f); // reserved
fputc(0, f); // device orientation: no rotation
fputc(0, f); // file role: generic
}
// Write extended header packet with metadata
// Returns the file offset where ENDT value is written (for later update)
long write_extended_header(FILE *f, uint64_t creation_time_ns, const char *ffmpeg_version) {
fputc(PACKET_EXTENDED_HDR, f);
// Helper macros for key-value pairs
#define WRITE_KV_UINT64(key_str, value) do { \
fwrite(key_str, 1, 4, f); \
uint8_t value_type = 0x04; /* Uint64 */ \
fwrite(&value_type, 1, 1, f); \
uint64_t val = (value); \
fwrite(&val, sizeof(uint64_t), 1, f); \
} while(0)
#define WRITE_KV_BYTES(key_str, data, len) do { \
fwrite(key_str, 1, 4, f); \
uint8_t value_type = 0x10; /* Bytes */ \
fwrite(&value_type, 1, 1, f); \
uint16_t length = (len); \
fwrite(&length, sizeof(uint16_t), 1, f); \
fwrite((data), 1, (len), f); \
} while(0)
// Count key-value pairs (BGNT, ENDT, CDAT, VNDR, FMPG)
uint16_t num_pairs = ffmpeg_version ? 5 : 4; // FMPG is optional
fwrite(&num_pairs, sizeof(uint16_t), 1, f);
// BGNT: Video begin time (0 for frame 0)
WRITE_KV_UINT64("BGNT", 0ULL);
// ENDT: Video end time (placeholder, will be updated at end)
long endt_offset = ftell(f);
WRITE_KV_UINT64("ENDT", 0ULL);
// CDAT: Creation time in nanoseconds since UNIX epoch
WRITE_KV_UINT64("CDAT", creation_time_ns);
// VNDR: Encoder name and version
const char *vendor_str = ENCODER_VENDOR_STRING;
WRITE_KV_BYTES("VNDR", vendor_str, strlen(vendor_str));
// FMPG: FFmpeg version (if available)
if (ffmpeg_version) {
WRITE_KV_BYTES("FMPG", ffmpeg_version, strlen(ffmpeg_version));
}
#undef WRITE_KV_UINT64
#undef WRITE_KV_BYTES
// Return offset of ENDT value (skip key, type byte)
return endt_offset + 4 + 1; // 4 bytes for "ENDT", 1 byte for type
}
// Write font ROM packet (SSF packet type 0x30)
void write_fontrom_packet(FILE *f, const uint8_t *rom_data, size_t data_size, uint8_t opcode) {
// Prepare padded ROM data (pad to FONTROM_PADDED_SIZE with zeros)
uint8_t *padded_data = calloc(1, FONTROM_PADDED_SIZE);
memcpy(padded_data, rom_data, data_size);
// Packet structure:
// [type:0x30][size:uint32][index:uint24][opcode:uint8][length:uint16][data][terminator:0x00]
uint32_t packet_size = 3 + 1 + 2 + FONTROM_PADDED_SIZE + 1;
// Write packet type and size
fputc(PACKET_SSF, f);
fwrite(&packet_size, sizeof(uint32_t), 1, f);
// Write SSF payload
// Index (3 bytes, always 0 for font ROM)
fputc(0, f);
fputc(0, f);
fputc(0, f);
// Opcode (0x80=lowrom, 0x81=highrom)
fputc(opcode, f);
// Payload length (uint16, little-endian)
uint16_t payload_len = FONTROM_PADDED_SIZE;
fwrite(&payload_len, sizeof(uint16_t), 1, f);
// Font data (padded to 1920 bytes)
fwrite(padded_data, 1, FONTROM_PADDED_SIZE, f);
// Terminator
fputc(0x00, f);
free(padded_data);
fprintf(stderr, "Font ROM uploaded: %zu bytes (padded to %d), opcode 0x%02X\n",
data_size, FONTROM_PADDED_SIZE, opcode);
}
// Write timecode packet (nanoseconds)
void write_timecode(FILE *f, uint64_t timecode_ns) {
fputc(PACKET_TIMECODE, f);
fwrite(&timecode_ns, sizeof(uint64_t), 1, f);
}
// Write sync packet
void write_sync(FILE *f) {
fputc(PACKET_SYNC, f);
}
// Write MP2 audio packet
void write_audio_mp2(FILE *f, const uint8_t *data, uint32_t size) {
fputc(PACKET_AUDIO_MP2, f);
fwrite(&size, sizeof(uint32_t), 1, f);
fwrite(data, 1, size, f);
}
// Write text packet with separated arrays (better compression)
void write_text_packet(FILE *f, const uint8_t *bg_col, const uint8_t *fg_col,
const uint8_t *chars, int rows, int cols) {
int grid_size = rows * cols;
// Prepare uncompressed data: [rows][cols][fg-array][bg-array][char-array]
// Separated arrays compress much better (fg/bg are just 0xF0/0xFE runs)
size_t uncompressed_size = 2 + grid_size * 3;
uint8_t *uncompressed = malloc(uncompressed_size);
uncompressed[0] = rows;
uncompressed[1] = cols;
// Copy arrays in order: foreground, background, characters
memcpy(&uncompressed[2], fg_col, grid_size); // Foreground first
memcpy(&uncompressed[2 + grid_size], bg_col, grid_size); // Background second
memcpy(&uncompressed[2 + grid_size * 2], chars, grid_size); // Characters third
// Compress with Zstd
size_t max_compressed = ZSTD_compressBound(uncompressed_size);
uint8_t *compressed = malloc(max_compressed);
size_t compressed_size = ZSTD_compress(compressed, max_compressed,
uncompressed, uncompressed_size, 3);
if (ZSTD_isError(compressed_size)) {
fprintf(stderr, "Zstd compression error\n");
exit(1);
}
// Write packet: [type][size][data]
fputc(PACKET_TEXT, f);
uint32_t size32 = compressed_size;
fwrite(&size32, 4, 1, f);
fwrite(compressed, compressed_size, 1, f);
free(compressed);
free(uncompressed);
}
int main(int argc, char **argv) {
if (argc < 7) {
fprintf(stderr, "Usage: %s -i <video> -f <font.chr> -o <output.tav>\n", argv[0]);
return 1;
}
const char *input_video = NULL;
const char *font_path = NULL;
const char *output_path = NULL;
for (int i = 1; i < argc; i++) {
if (strcmp(argv[i], "-i") == 0 && i+1 < argc) input_video = argv[++i];
else if (strcmp(argv[i], "-f") == 0 && i+1 < argc) font_path = argv[++i];
else if (strcmp(argv[i], "-o") == 0 && i+1 < argc) output_path = argv[++i];
}
if (!input_video || !font_path || !output_path) {
fprintf(stderr, "Missing required arguments\n");
return 1;
}
// Capture creation time and FFmpeg version for extended header
uint64_t creation_time_ns = get_current_time_ns();
char *ffmpeg_version = get_ffmpeg_version();
// Detect video FPS
float fps_float = detect_fps(input_video);
uint8_t fps = (uint8_t)(fps_float + 0.5f); // Round to nearest integer
fprintf(stderr, "Detected FPS: %.2f (using %d in TAV header)\n", fps_float, fps);
// Load font ROM
FontROM *rom = load_font_rom(font_path);
if (!rom) {
fprintf(stderr, "Failed to load font ROM: %s\n", font_path);
return 1;
}
// Open FFmpeg pipe for grayscale frames at 560×448
char ffmpeg_cmd[1024];
snprintf(ffmpeg_cmd, sizeof(ffmpeg_cmd),
"ffmpeg -i \"%s\" -vf \"scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d\" "
"-f rawvideo -pix_fmt gray - 2>/dev/null",
input_video, PIXEL_W, PIXEL_H, PIXEL_W, PIXEL_H);
fprintf(stderr, "Opening video stream...\n");
FILE *video_pipe = popen(ffmpeg_cmd, "r");
if (!video_pipe) {
fprintf(stderr, "Failed to open FFmpeg pipe\n");
return 1;
}
// Open FFmpeg pipe for MP2 audio (32 KHz stereo)
char audio_cmd[1024];
snprintf(audio_cmd, sizeof(audio_cmd),
"ffmpeg -i \"%s\" -vn -ar %d -ac 2 -f mp2 -b:a 96k - 2>/dev/null",
input_video, SAMPLE_RATE);
fprintf(stderr, "Opening audio stream...\n");
FILE *audio_pipe = popen(audio_cmd, "r");
if (!audio_pipe) {
fprintf(stderr, "Failed to open audio FFmpeg pipe\n");
pclose(video_pipe);
return 1;
}
// Open output file
FILE *out = fopen(output_path, "wb");
if (!out) {
fprintf(stderr, "Failed to open output file\n");
pclose(video_pipe);
pclose(audio_pipe);
return 1;
}
// Write Videotex header with placeholder total_frames (will update at end)
long header_offset = ftell(out);
write_videotex_header(out, fps, 0);
// Write extended header packet (before first timecode)
long endt_offset = write_extended_header(out, creation_time_ns, ffmpeg_version);
// Upload font ROM to TSVM (split into lowrom and highrom)
fprintf(stderr, "Uploading font ROM to TSVM...\n");
FILE *rom_file = fopen(font_path, "rb");
if (rom_file) {
fseek(rom_file, 0, SEEK_END);
long rom_size = ftell(rom_file);
fseek(rom_file, 0, SEEK_SET);
uint8_t *raw_rom = malloc(rom_size);
if (raw_rom && fread(raw_rom, 1, rom_size, rom_file) == rom_size) {
// Split into lowrom and highrom
size_t bytes_per_half = (GLYPHS_PER_ROM * 14); // 128 glyphs × 14 bytes = 1792
// Write lowrom (first 128 glyphs)
if (rom_size >= bytes_per_half) {
write_fontrom_packet(out, raw_rom, bytes_per_half, SSF_OPCODE_LOWROM);
}
// Write highrom (second 128 glyphs)
if (rom_size >= bytes_per_half * 2) {
write_fontrom_packet(out, raw_rom + bytes_per_half, bytes_per_half, SSF_OPCODE_HIGHROM);
} else if (rom_size > bytes_per_half) {
// Partial highrom
write_fontrom_packet(out, raw_rom + bytes_per_half, rom_size - bytes_per_half, SSF_OPCODE_HIGHROM);
}
free(raw_rom);
}
fclose(rom_file);
}
// Allocate buffers
size_t frame_size = PIXEL_W * PIXEL_H;
uint8_t *gray_pixels = malloc(frame_size);
uint8_t *bg_col = malloc(GRID_W * GRID_H);
uint8_t *fg_col = malloc(GRID_W * GRID_H);
uint8_t *chars = malloc(GRID_W * GRID_H);
// Audio buffer (read MP2 frames in 1152-sample chunks, ~36ms at 32 KHz)
#define AUDIO_CHUNK_SIZE 8192 // Arbitrary MP2 frame buffer size
uint8_t *audio_buffer = malloc(AUDIO_CHUNK_SIZE);
size_t audio_available = 0;
uint32_t frame_num = 0;
uint64_t total_audio_bytes = 0;
fprintf(stderr, "Encoding text-mode video (%dx%d chars, %dx%d pixels)...\n",
GRID_W, GRID_H, PIXEL_W, PIXEL_H);
// Track encoding start time
struct timeval start_time, now;
gettimeofday(&start_time, NULL);
// Read and process frames
while (fread(gray_pixels, 1, frame_size, video_pipe) == frame_size) {
// Calculate timecode in nanoseconds
uint64_t timecode_ns = (uint64_t)(frame_num * 1000000000.0 / fps_float);
// Write audio packet first (if available)
// Try to read ~1 frame worth of audio
audio_available = fread(audio_buffer, 1, AUDIO_CHUNK_SIZE, audio_pipe);
if (audio_available > 0) {
write_audio_mp2(out, audio_buffer, audio_available);
total_audio_bytes += audio_available;
}
// Write timecode
write_timecode(out, timecode_ns);
// Convert to text mode
frame_to_text(gray_pixels, rom, bg_col, fg_col, chars);
// Write text packet (treated as I-frame)
write_text_packet(out, bg_col, fg_col, chars, GRID_H, GRID_W);
// Write sync packet after each frame
write_sync(out);
frame_num++;
if (frame_num % 30 == 0) {
// Calculate encoding speed
gettimeofday(&now, NULL);
double elapsed = (now.tv_sec - start_time.tv_sec) +
(now.tv_usec - start_time.tv_usec) / 1000000.0;
double encoding_fps = frame_num / elapsed;
fprintf(stderr, "\rEncoded %u frames (%.1f fps)", frame_num, encoding_fps);
fflush(stderr);
}
}
// Read any remaining audio
while ((audio_available = fread(audio_buffer, 1, AUDIO_CHUNK_SIZE, audio_pipe)) > 0) {
write_audio_mp2(out, audio_buffer, audio_available);
total_audio_bytes += audio_available;
}
// Final timing
gettimeofday(&now, NULL);
double total_time = (now.tv_sec - start_time.tv_sec) +
(now.tv_usec - start_time.tv_usec) / 1000000.0;
double final_fps = frame_num / total_time;
fprintf(stderr, "\nDone! Encoded %u frames in %.2fs (%.1f fps)\n",
frame_num, total_time, final_fps);
fprintf(stderr, "Audio: %llu bytes (%.2f MB)\n",
(unsigned long long)total_audio_bytes,
total_audio_bytes / 1024.0 / 1024.0);
// Update total_frames in header
if (frame_num > 0) {
fseek(out, header_offset + 12, SEEK_SET); // Offset to total_frames field
fwrite(&frame_num, sizeof(uint32_t), 1, out);
fprintf(stderr, "Updated total_frames in header: %u\n", frame_num);
}
// Update ENDT in extended header (calculate end time for last frame)
if (frame_num > 0) {
// Calculate duration: (frame_num - 1) frames * (1/fps) seconds in nanoseconds
uint64_t duration_ns = (uint64_t)((frame_num - 1) * 1000000000.0 / fps_float);
uint64_t endt_ns = duration_ns;
fseek(out, endt_offset, SEEK_SET);
fwrite(&endt_ns, sizeof(uint64_t), 1, out);
fprintf(stderr, "Updated ENDT in extended header: %llu ns (%.3f seconds)\n",
(unsigned long long)endt_ns, endt_ns / 1000000000.0);
}
// Cleanup
pclose(video_pipe);
pclose(audio_pipe);
fclose(out);
free(gray_pixels);
free(bg_col);
free(fg_col);
free(chars);
free(audio_buffer);
free(rom->data);
free(rom);
if (ffmpeg_version) free(ffmpeg_version);
return 0;
}

View File

@@ -26,8 +26,9 @@
#define TAV_PACKET_AUDIO_MP2 0x20
#define TAV_PACKET_AUDIO_PCM8 0x21
#define TAV_PACKET_AUDIO_TAD 0x24
#define TAV_PACKET_SUBTITLE 0x30 // Legacy SSF (frame-locked)
#define TAV_PACKET_SUBTITLE 0x30 // Legacy SSF (frame-locked), also used for Font ROM upload
#define TAV_PACKET_SUBTITLE_TC 0x31 // SSF-TC (timecode-based)
#define TAV_PACKET_VIDEOTEX 0x3F // Videotex (text-mode video)
#define TAV_PACKET_AUDIO_TRACK 0x40
#define TAV_PACKET_VIDEO_CH2_I 0x70
#define TAV_PACKET_VIDEO_CH2_P 0x71
@@ -77,6 +78,7 @@ typedef struct {
int audio_tad_count;
int audio_track_count;
int subtitle_count;
int videotex_count;
int timecode_count;
int sync_count;
int sync_ntsc_count;
@@ -91,6 +93,7 @@ typedef struct {
uint64_t audio_pcm8_bytes;
uint64_t audio_tad_bytes;
uint64_t audio_track_bytes;
uint64_t videotex_bytes;
} packet_stats_t;
// Display options
@@ -122,6 +125,7 @@ const char* get_packet_type_name(uint8_t type) {
case TAV_PACKET_AUDIO_TAD: return "AUDIO TAD (zstd)";
case TAV_PACKET_SUBTITLE: return "SUBTITLE (SSF frame-locked)";
case TAV_PACKET_SUBTITLE_TC: return "SUBTITLE (SSF-TC timecoded)";
case TAV_PACKET_VIDEOTEX: return "VIDEOTEX (text-mode video)";
case TAV_PACKET_AUDIO_TRACK: return "AUDIO TRACK (Separate MP2)";
case TAV_PACKET_EXIF: return "METADATA (EXIF)";
case TAV_PACKET_ID3V1: return "METADATA (ID3v1)";
@@ -820,6 +824,45 @@ static const char* VERDESC[] = {"null", "YCoCg tiled, uniform", "ICtCp tiled, un
break;
}
case TAV_PACKET_VIDEOTEX: {
stats.videotex_count++;
uint32_t size;
if (fread(&size, sizeof(uint32_t), 1, fp) != 1) break;
stats.videotex_bytes += size;
if (!opts.summary_only && display) {
// Read compressed data
uint8_t *compressed_data = malloc(size);
if (compressed_data && fread(compressed_data, 1, size, fp) == size) {
// Allocate decompression buffer (max 2 + 80*32*3 = 7682 bytes)
size_t const decompress_size = 8192;
uint8_t *decompressed_data = malloc(decompress_size);
if (decompressed_data) {
size_t actual_size = ZSTD_decompress(decompressed_data, decompress_size,
compressed_data, size);
if (!ZSTD_isError(actual_size) && actual_size >= 2) {
uint8_t rows = decompressed_data[0];
uint8_t cols = decompressed_data[1];
printf(" - size=%u bytes (decompressed: %zu bytes, grid: %ux%u, ratio: %.2f:1)",
size, actual_size, cols, rows, (double)actual_size / size);
} else {
printf(" - size=%u bytes (decompression failed)", size);
}
free(decompressed_data);
} else {
printf(" - size=%u bytes", size);
}
free(compressed_data);
} else {
printf(" - size=%u bytes", size);
fseek(fp, size, SEEK_CUR);
}
} else {
fseek(fp, size, SEEK_CUR);
}
break;
}
case TAV_PACKET_EXIF:
case TAV_PACKET_ID3V1:
case TAV_PACKET_ID3V2:
@@ -950,6 +993,12 @@ static const char* VERDESC[] = {"null", "YCoCg tiled, uniform", "ICtCp tiled, un
printf("\nOther:\n");
printf(" Timecodes: %d\n", stats.timecode_count);
printf(" Subtitles: %d\n", stats.subtitle_count);
if (stats.videotex_count > 0) {
printf(" Videotex frames: %d (%llu bytes, %.2f MB)\n",
stats.videotex_count,
(unsigned long long)stats.videotex_bytes,
stats.videotex_bytes / 1024.0 / 1024.0);
}
printf(" Extended headers: %d\n", stats.extended_header_count);
printf(" Metadata packets: %d\n", stats.metadata_count);
printf(" Loop points: %d\n", stats.loop_point_count);