Files
tsvm/video_encoder/encoder_tav_text.c

785 lines
26 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/*
encoder_tav_text.c
Text-based video encoder for TSVM using custom font ROMs
Outputs Videotex files with custom header and packet type 0x3F (text mode)
File structure:
- Videotex header (32 bytes): magic "\x1FTSVM-VT", version, grid dims, fps, total_frames
- Extended header packet (0xEF): BGNT, ENDT, CDAT, VNDR, FMPG
- Font ROM packets (0x30): lowrom and highrom (1920 bytes each)
- Per-frame sequence: [audio 0x20], [timecode 0xFD], [videotex 0x3F], [sync 0xFF]
Videotex packet structure (0x3F): Zstd([rows][cols][fg-array][bg-array][char-array])
- rows: uint8 (32)
- cols: uint8 (80)
- fg-array: rows*cols bytes (foreground colors, 0xF0=black, 0xFE=white)
- bg-array: rows*cols bytes (background colors, 0xF0=black, 0xFE=white)
- char-array: rows*cols bytes (glyph indices 0-255)
Total uncompressed size: 2 + (80*32*3) = 7682 bytes
Separated arrays compress much better (fg/bg are just 0xF0/0xFE runs)
Video size: 80×32 characters (560×448 pixels with 7×14 font)
Audio: MP2 encoding at 96 kbps, 32 KHz stereo (packet 0x20)
Each text frame is treated as an I-frame with sync packet
Usage:
gcc -O3 -std=c11 -Wall encoder_tav_text.c -o encoder_tav_text -lm -lzstd
./encoder_tav_text -i video.mp4 -f font.chr -o output.vtx
*/
#define _POSIX_C_SOURCE 200809L
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <math.h>
#include <zstd.h>
#include <unistd.h>
#include <time.h>
#include <sys/time.h>
#define ENCODER_VENDOR_STRING "Encoder-TAV-Text 20251121 (videotex)"
#define CHAR_W 7
#define CHAR_H 14
#define GRID_W 80
#define GRID_H 32
#define PIXEL_W (GRID_W * CHAR_W) // 560
#define PIXEL_H (GRID_H * CHAR_H) // 448
#define PATCH_SZ (CHAR_W * CHAR_H)
#define SAMPLE_RATE 32000
#define MP2_DEFAULT_PACKET_SIZE 1152
// TAV packet types
#define PACKET_TIMECODE 0xFD
#define PACKET_SYNC 0xFF
#define PACKET_AUDIO_MP2 0x20
#define PACKET_SSF 0x30
#define PACKET_TEXT 0x3F
#define PACKET_EXTENDED_HDR 0xEF
// SSF opcodes for font ROM
#define SSF_OPCODE_LOWROM 0x80
#define SSF_OPCODE_HIGHROM 0x81
// Font ROM size constants
#define FONTROM_PADDED_SIZE 1920
#define GLYPHS_PER_ROM 128
// Color mapping (4-bit RGB to TSVM palette)
#define COLOR_BLACK 0xF0
#define COLOR_WHITE 0xFE
// Generate random filename for temporary audio file
static void generate_random_filename(char *filename) {
srand(time(NULL));
const char charset[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
const int charset_size = sizeof(charset) - 1;
// Start with the prefix
strcpy(filename, "/tmp/");
// Generate 32 random characters
for (int i = 0; i < 32; i++) {
filename[5 + i] = charset[rand() % charset_size];
}
// Add the .mp2 extension
strcpy(filename + 37, ".mp2");
filename[41] = '\0'; // Null terminate
}
char TEMP_AUDIO_FILE[42];
typedef struct {
uint8_t *data; // Binary glyph data (PATCH_SZ bytes per glyph)
int count; // Number of glyphs
} FontROM;
// Get FFmpeg version string
char *get_ffmpeg_version(void) {
FILE *pipe = popen("ffmpeg -version 2>&1 | head -1", "r");
if (!pipe) return NULL;
char *version = malloc(256);
if (!version) {
pclose(pipe);
return NULL;
}
if (fgets(version, 256, pipe)) {
// Remove trailing newline
size_t len = strlen(version);
if (len > 0 && version[len - 1] == '\n') {
version[len - 1] = '\0';
}
pclose(pipe);
return version;
}
free(version);
pclose(pipe);
return NULL;
}
// Detect video FPS using ffprobe
float detect_fps(const char *video_path) {
char cmd[1024];
snprintf(cmd, sizeof(cmd),
"ffprobe -v error -select_streams v:0 -show_entries stream=r_frame_rate "
"-of default=noprint_wrappers=1:nokey=1 \"%s\" 2>/dev/null",
video_path);
FILE *pipe = popen(cmd, "r");
if (!pipe) return 30.0f; // fallback
char fps_str[64] = {0};
if (fgets(fps_str, sizeof(fps_str), pipe)) {
// Parse fraction like "30/1" or "24000/1001"
int num = 0, den = 1;
if (sscanf(fps_str, "%d/%d", &num, &den) == 2 && den > 0) {
pclose(pipe);
return (float)num / (float)den;
}
}
pclose(pipe);
return 30.0f; // fallback
}
// Load font ROM (14 bytes per glyph, no header)
FontROM *load_font_rom(const char *path) {
FILE *f = fopen(path, "rb");
if (!f) return NULL;
fseek(f, 0, SEEK_END);
long size = ftell(f);
fseek(f, 0, SEEK_SET);
if (size % 14 != 0) {
fprintf(stderr, "Warning: ROM size not divisible by 14 (got %ld bytes)\n", size);
}
int glyph_count = size / 14;
FontROM *rom = malloc(sizeof(FontROM));
rom->count = glyph_count;
rom->data = malloc(glyph_count * PATCH_SZ);
// Read and unpack glyphs
for (int g = 0; g < glyph_count; g++) {
uint8_t row_bytes[14];
if (fread(row_bytes, 14, 1, f) != 1) {
free(rom->data);
free(rom);
fclose(f);
return NULL;
}
// Unpack bits to binary pixels
for (int row = 0; row < CHAR_H; row++) {
for (int col = 0; col < CHAR_W; col++) {
// Bit 6 = leftmost, bit 0 = rightmost
int bit = (row_bytes[row] >> (6 - col)) & 1;
rom->data[g * PATCH_SZ + row * CHAR_W + col] = bit;
}
}
}
fclose(f);
fprintf(stderr, "Loaded font ROM: %d glyphs\n", glyph_count);
return rom;
}
// Find best matching glyph for a grayscale patch
int find_best_glyph(const uint8_t *patch, const FontROM *rom, uint8_t *out_bg, uint8_t *out_fg) {
// Try both normal and inverted matching
int best_glyph = 0;
float best_error = INFINITY;
uint8_t best_bg = COLOR_BLACK, best_fg = COLOR_WHITE;
for (int g = 0; g < rom->count; g++) {
const uint8_t *glyph = &rom->data[g * PATCH_SZ];
// Try normal: glyph 1 = fg, glyph 0 = bg
float err_normal = 0;
for (int i = 0; i < PATCH_SZ; i++) {
int expected = glyph[i] ? 255 : 0;
int diff = patch[i] - expected;
err_normal += diff * diff;
}
// Try inverted: glyph 0 = fg, glyph 1 = bg
float err_inverted = 0;
for (int i = 0; i < PATCH_SZ; i++) {
int expected = glyph[i] ? 0 : 255;
int diff = patch[i] - expected;
err_inverted += diff * diff;
}
if (err_normal < best_error) {
best_error = err_normal;
best_glyph = g;
best_bg = COLOR_BLACK;
best_fg = COLOR_WHITE;
}
if (err_inverted < best_error) {
best_error = err_inverted;
best_glyph = g;
best_bg = COLOR_WHITE;
best_fg = COLOR_BLACK;
}
}
*out_bg = best_bg;
*out_fg = best_fg;
return best_glyph;
}
// Convert frame to text mode
void frame_to_text(const uint8_t *pixels, const FontROM *rom,
uint8_t *bg_col, uint8_t *fg_col, uint8_t *chars) {
uint8_t patch[PATCH_SZ];
for (int gr = 0; gr < GRID_H; gr++) {
for (int gc = 0; gc < GRID_W; gc++) {
int idx = gr * GRID_W + gc;
// Extract patch
for (int y = 0; y < CHAR_H; y++) {
for (int x = 0; x < CHAR_W; x++) {
int px = gc * CHAR_W + x;
int py = gr * CHAR_H + y;
patch[y * CHAR_W + x] = pixels[py * PIXEL_W + px];
}
}
// Find best match
chars[idx] = find_best_glyph(patch, rom, &bg_col[idx], &fg_col[idx]);
}
}
}
// Get current time in nanoseconds since UNIX epoch
uint64_t get_current_time_ns(void) {
struct timeval tv;
gettimeofday(&tv, NULL);
return (uint64_t)tv.tv_sec * 1000000000ULL + (uint64_t)tv.tv_usec * 1000ULL;
}
// Parse MP2 packet header to get accurate packet size
int get_mp2_packet_size(uint8_t *header) {
int bitrate_index = (header[2] >> 4) & 0x0F;
int bitrates[] = {0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384};
if (bitrate_index >= 15) return MP2_DEFAULT_PACKET_SIZE;
int bitrate = bitrates[bitrate_index];
if (bitrate == 0) return MP2_DEFAULT_PACKET_SIZE;
int sampling_freq_index = (header[2] >> 2) & 0x03;
int sampling_freqs[] = {44100, 48000, 32000, 0};
int sampling_freq = sampling_freqs[sampling_freq_index];
if (sampling_freq == 0) return MP2_DEFAULT_PACKET_SIZE;
int padding = (header[2] >> 1) & 0x01;
return (144 * bitrate * 1000) / sampling_freq + padding;
}
// Write Videotex header (32 bytes, similar to TAV but simpler)
void write_videotex_header(FILE *f, uint8_t fps, uint32_t total_frames) {
fwrite("\x1FTSVMTAV", 8, 1, f);
// Version: 1 (uint8)
fputc(1, f);
// Grid dimensions (uint8 each)
uint16_t width = GRID_W;
uint16_t height = GRID_H;
fwrite(&width, sizeof(uint16_t), 1, f); // cols = 80
fwrite(&height, sizeof(uint16_t), 1, f); // rows = 32
// FPS (uint8)
fputc(fps, f);
// Total frames (uint32, little-endian)
fwrite(&total_frames, sizeof(uint32_t), 1, f);
fputc(0, f); // wavelet filter type
fputc(0, f); // decomposition levels
fputc(0, f); // quantiser Y
fputc(0, f); // quantiser Co
fputc(0, f); // quantiser Cg
// Feature Flags
fputc(0x03, f); // bit 0 = has audio; bit 1 = has subtitle (Videotex is classified as subtitles)
// Video Flags
fputc(0x80, f); // bit 7 = has no video (Videotex is classified as subtitles)
fputc(0, f); // encoder quality level
fputc(0x02, f); // channel layout: Y only
fputc(0, f); // entropy coder
fputc(0, f); // reserved
fputc(0, f); // reserved
fputc(0, f); // device orientation: no rotation
fputc(0, f); // file role: generic
}
// Write extended header packet with metadata
// Returns the file offset where ENDT value is written (for later update)
long write_extended_header(FILE *f, uint64_t creation_time_ns, const char *ffmpeg_version) {
fputc(PACKET_EXTENDED_HDR, f);
// Helper macros for key-value pairs
#define WRITE_KV_UINT64(key_str, value) do { \
fwrite(key_str, 1, 4, f); \
uint8_t value_type = 0x04; /* Uint64 */ \
fwrite(&value_type, 1, 1, f); \
uint64_t val = (value); \
fwrite(&val, sizeof(uint64_t), 1, f); \
} while(0)
#define WRITE_KV_BYTES(key_str, data, len) do { \
fwrite(key_str, 1, 4, f); \
uint8_t value_type = 0x10; /* Bytes */ \
fwrite(&value_type, 1, 1, f); \
uint16_t length = (len); \
fwrite(&length, sizeof(uint16_t), 1, f); \
fwrite((data), 1, (len), f); \
} while(0)
// Count key-value pairs (BGNT, ENDT, CDAT, VNDR, FMPG)
uint16_t num_pairs = ffmpeg_version ? 5 : 4; // FMPG is optional
fwrite(&num_pairs, sizeof(uint16_t), 1, f);
// BGNT: Video begin time (0 for frame 0)
WRITE_KV_UINT64("BGNT", 0ULL);
// ENDT: Video end time (placeholder, will be updated at end)
long endt_offset = ftell(f);
WRITE_KV_UINT64("ENDT", 0ULL);
// CDAT: Creation time in nanoseconds since UNIX epoch
WRITE_KV_UINT64("CDAT", creation_time_ns);
// VNDR: Encoder name and version
const char *vendor_str = ENCODER_VENDOR_STRING;
WRITE_KV_BYTES("VNDR", vendor_str, strlen(vendor_str));
// FMPG: FFmpeg version (if available)
if (ffmpeg_version) {
WRITE_KV_BYTES("FMPG", ffmpeg_version, strlen(ffmpeg_version));
}
#undef WRITE_KV_UINT64
#undef WRITE_KV_BYTES
// Return offset of ENDT value (skip key, type byte)
return endt_offset + 4 + 1; // 4 bytes for "ENDT", 1 byte for type
}
// Write font ROM packet (SSF packet type 0x30)
void write_fontrom_packet(FILE *f, const uint8_t *rom_data, size_t data_size, uint8_t opcode) {
// Prepare padded ROM data (pad to FONTROM_PADDED_SIZE with zeros)
uint8_t *padded_data = calloc(1, FONTROM_PADDED_SIZE);
memcpy(padded_data, rom_data, data_size);
// Packet structure:
// [type:0x30][size:uint32][index:uint24][opcode:uint8][length:uint16][data][terminator:0x00]
uint32_t packet_size = 3 + 1 + 2 + FONTROM_PADDED_SIZE + 1;
// Write packet type and size
fputc(PACKET_SSF, f);
fwrite(&packet_size, sizeof(uint32_t), 1, f);
// Write SSF payload
// Index (3 bytes, always 0 for font ROM)
fputc(0, f);
fputc(0, f);
fputc(0, f);
// Opcode (0x80=lowrom, 0x81=highrom)
fputc(opcode, f);
// Payload length (uint16, little-endian)
uint16_t payload_len = FONTROM_PADDED_SIZE;
fwrite(&payload_len, sizeof(uint16_t), 1, f);
// Font data (padded to 1920 bytes)
fwrite(padded_data, 1, FONTROM_PADDED_SIZE, f);
// Terminator
fputc(0x00, f);
free(padded_data);
fprintf(stderr, "Font ROM uploaded: %zu bytes (padded to %d), opcode 0x%02X\n",
data_size, FONTROM_PADDED_SIZE, opcode);
}
// Write timecode packet (nanoseconds)
void write_timecode(FILE *f, uint64_t timecode_ns) {
fputc(PACKET_TIMECODE, f);
fwrite(&timecode_ns, sizeof(uint64_t), 1, f);
}
// Write sync packet
void write_sync(FILE *f) {
fputc(PACKET_SYNC, f);
}
// Write MP2 audio packet
void write_audio_mp2(FILE *f, const uint8_t *data, uint32_t size) {
fputc(PACKET_AUDIO_MP2, f);
fwrite(&size, sizeof(uint32_t), 1, f);
fwrite(data, 1, size, f);
}
// Write text packet with separated arrays (better compression)
void write_text_packet(FILE *f, const uint8_t *bg_col, const uint8_t *fg_col,
const uint8_t *chars, int rows, int cols) {
int grid_size = rows * cols;
// Prepare uncompressed data: [rows][cols][fg-array][bg-array][char-array]
// Separated arrays compress much better (fg/bg are just 0xF0/0xFE runs)
size_t uncompressed_size = 2 + grid_size * 3;
uint8_t *uncompressed = malloc(uncompressed_size);
uncompressed[0] = rows;
uncompressed[1] = cols;
// Copy arrays in order: foreground, background, characters
memcpy(&uncompressed[2], fg_col, grid_size); // Foreground first
memcpy(&uncompressed[2 + grid_size], bg_col, grid_size); // Background second
memcpy(&uncompressed[2 + grid_size * 2], chars, grid_size); // Characters third
// Compress with Zstd
size_t max_compressed = ZSTD_compressBound(uncompressed_size);
uint8_t *compressed = malloc(max_compressed);
size_t compressed_size = ZSTD_compress(compressed, max_compressed,
uncompressed, uncompressed_size, 3);
if (ZSTD_isError(compressed_size)) {
fprintf(stderr, "Zstd compression error\n");
exit(1);
}
// Write packet: [type][size][data]
fputc(PACKET_TEXT, f);
uint32_t size32 = compressed_size;
fwrite(&size32, 4, 1, f);
fwrite(compressed, compressed_size, 1, f);
free(compressed);
free(uncompressed);
}
int main(int argc, char **argv) {
if (argc < 7) {
fprintf(stderr, "Usage: %s -i <video> -f <font.chr> -o <output.tav>\n", argv[0]);
return 1;
}
const char *input_video = NULL;
const char *font_path = NULL;
const char *output_path = NULL;
for (int i = 1; i < argc; i++) {
if (strcmp(argv[i], "-i") == 0 && i+1 < argc) input_video = argv[++i];
else if (strcmp(argv[i], "-f") == 0 && i+1 < argc) font_path = argv[++i];
else if (strcmp(argv[i], "-o") == 0 && i+1 < argc) output_path = argv[++i];
}
if (!input_video || !font_path || !output_path) {
fprintf(stderr, "Missing required arguments\n");
return 1;
}
// Generate random temp filename for audio
generate_random_filename(TEMP_AUDIO_FILE);
// Capture creation time and FFmpeg version for extended header
uint64_t creation_time_ns = get_current_time_ns();
char *ffmpeg_version = get_ffmpeg_version();
// Detect video FPS
float fps_float = detect_fps(input_video);
uint8_t fps = (uint8_t)(fps_float + 0.5f); // Round to nearest integer
fprintf(stderr, "Detected FPS: %.2f (using %d in TAV header)\n", fps_float, fps);
// Load font ROM
FontROM *rom = load_font_rom(font_path);
if (!rom) {
fprintf(stderr, "Failed to load font ROM: %s\n", font_path);
return 1;
}
// Open FFmpeg pipe for grayscale frames at 560×448
char ffmpeg_cmd[1024];
snprintf(ffmpeg_cmd, sizeof(ffmpeg_cmd),
"ffmpeg -i \"%s\" -vf \"scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d\" "
"-f rawvideo -pix_fmt gray - 2>/dev/null",
input_video, PIXEL_W, PIXEL_H, PIXEL_W, PIXEL_H);
fprintf(stderr, "Opening video stream...\n");
FILE *video_pipe = popen(ffmpeg_cmd, "r");
if (!video_pipe) {
fprintf(stderr, "Failed to open FFmpeg pipe\n");
return 1;
}
// Extract MP2 audio to temporary file using libtwolame
fprintf(stderr, "Extracting MP2 audio...\n");
char audio_cmd[1024];
snprintf(audio_cmd, sizeof(audio_cmd),
"ffmpeg -v quiet -i \"%s\" -acodec libtwolame -psymodel 4 -b:a 224k -ar %d -ac 2 -y \"%s\" 2>/dev/null",
input_video, SAMPLE_RATE, TEMP_AUDIO_FILE);
int audio_result = system(audio_cmd);
if (audio_result != 0) {
fprintf(stderr, "Warning: Audio extraction failed, continuing without audio\n");
}
// Open MP2 file for reading
FILE *mp2_file = NULL;
long audio_remaining = 0;
if (audio_result == 0) {
mp2_file = fopen(TEMP_AUDIO_FILE, "rb");
if (mp2_file) {
fseek(mp2_file, 0, SEEK_END);
audio_remaining = ftell(mp2_file);
fseek(mp2_file, 0, SEEK_SET);
fprintf(stderr, "Audio ready: %ld bytes\n", audio_remaining);
}
}
// Open output file
FILE *out = fopen(output_path, "wb");
if (!out) {
fprintf(stderr, "Failed to open output file\n");
pclose(video_pipe);
if (mp2_file) fclose(mp2_file);
return 1;
}
// Write Videotex header with placeholder total_frames (will update at end)
long header_offset = ftell(out);
write_videotex_header(out, fps, 0);
// Write extended header packet (before first timecode)
long endt_offset = write_extended_header(out, creation_time_ns, ffmpeg_version);
// Upload font ROM to TSVM (split into lowrom and highrom)
fprintf(stderr, "Uploading font ROM to TSVM...\n");
FILE *rom_file = fopen(font_path, "rb");
if (rom_file) {
fseek(rom_file, 0, SEEK_END);
long rom_size = ftell(rom_file);
fseek(rom_file, 0, SEEK_SET);
uint8_t *raw_rom = malloc(rom_size);
if (raw_rom && fread(raw_rom, 1, rom_size, rom_file) == rom_size) {
// Split into lowrom and highrom
size_t bytes_per_half = (GLYPHS_PER_ROM * 14); // 128 glyphs × 14 bytes = 1792
// Write lowrom (first 128 glyphs)
if (rom_size >= bytes_per_half) {
write_fontrom_packet(out, raw_rom, bytes_per_half, SSF_OPCODE_LOWROM);
}
// Write highrom (second 128 glyphs)
if (rom_size >= bytes_per_half * 2) {
write_fontrom_packet(out, raw_rom + bytes_per_half, bytes_per_half, SSF_OPCODE_HIGHROM);
} else if (rom_size > bytes_per_half) {
// Partial highrom
write_fontrom_packet(out, raw_rom + bytes_per_half, rom_size - bytes_per_half, SSF_OPCODE_HIGHROM);
}
free(raw_rom);
}
fclose(rom_file);
}
// Allocate buffers
size_t frame_size = PIXEL_W * PIXEL_H;
uint8_t *gray_pixels = malloc(frame_size);
uint8_t *bg_col = malloc(GRID_W * GRID_H);
uint8_t *fg_col = malloc(GRID_W * GRID_H);
uint8_t *chars = malloc(GRID_W * GRID_H);
// Audio buffer for MP2 packets
#define MP2_BUFFER_SIZE 2048
uint8_t *audio_buffer = malloc(MP2_BUFFER_SIZE);
uint32_t frame_num = 0;
uint64_t total_audio_bytes = 0;
// Audio timing calculation
double frame_audio_time = 1.0 / fps_float; // Time per video frame
double packet_audio_time = (double)MP2_DEFAULT_PACKET_SIZE / SAMPLE_RATE; // Time per audio packet
double packets_per_frame = frame_audio_time / packet_audio_time;
double audio_frames_in_buffer = 0.0; // Simulated audio buffer level
fprintf(stderr, "Encoding text-mode video (%dx%d chars, %dx%d pixels)...\n",
GRID_W, GRID_H, PIXEL_W, PIXEL_H);
// Track encoding start time
struct timeval start_time, now;
gettimeofday(&start_time, NULL);
// Read and process frames
while (fread(gray_pixels, 1, frame_size, video_pipe) == frame_size) {
// Calculate timecode in nanoseconds
uint64_t timecode_ns = (uint64_t)(frame_num * 1000000000.0 / fps_float);
// Write audio packets for this frame (based on timing)
if (mp2_file && audio_remaining > 0) {
// Simulate buffer consumption
audio_frames_in_buffer -= packets_per_frame;
// Calculate how many packets we need to maintain buffer
double target_level = fmax(packets_per_frame, 2.0);
int packets_to_insert = 0;
if (audio_frames_in_buffer < target_level) {
double deficit = target_level - audio_frames_in_buffer;
packets_to_insert = (int)ceil(deficit);
}
// Insert the calculated number of audio packets
for (int q = 0; q < packets_to_insert; q++) {
// Peek at header to get actual packet size
long pos = ftell(mp2_file);
uint8_t header[4];
if (fread(header, 1, 4, mp2_file) != 4) break;
fseek(mp2_file, pos, SEEK_SET); // Rewind to re-read with full packet
int actual_packet_size = get_mp2_packet_size(header);
size_t bytes_to_read = actual_packet_size;
// Clamp to remaining audio
if (bytes_to_read > audio_remaining) {
bytes_to_read = audio_remaining;
}
// Sanity check
if (bytes_to_read > MP2_BUFFER_SIZE) {
fprintf(stderr, "ERROR: MP2 packet size %zu exceeds buffer\n", bytes_to_read);
break;
}
// Read full packet
size_t bytes_read = fread(audio_buffer, 1, bytes_to_read, mp2_file);
if (bytes_read == 0) break;
// Write MP2 audio packet
write_audio_mp2(out, audio_buffer, bytes_read);
// Track audio
audio_remaining -= bytes_read;
audio_frames_in_buffer++;
total_audio_bytes += bytes_read;
}
}
// Write timecode
write_timecode(out, timecode_ns);
// Convert to text mode
frame_to_text(gray_pixels, rom, bg_col, fg_col, chars);
// Write text packet (treated as I-frame)
write_text_packet(out, bg_col, fg_col, chars, GRID_H, GRID_W);
// Write sync packet after each frame
write_sync(out);
frame_num++;
if (frame_num % 30 == 0) {
// Calculate encoding speed
gettimeofday(&now, NULL);
double elapsed = (now.tv_sec - start_time.tv_sec) +
(now.tv_usec - start_time.tv_usec) / 1000000.0;
double encoding_fps = frame_num / elapsed;
fprintf(stderr, "\rEncoded %u frames (%.1f fps)", frame_num, encoding_fps);
fflush(stderr);
}
}
// Write any remaining audio
if (mp2_file && audio_remaining > 0) {
while (audio_remaining > 0) {
// Peek at header to get actual packet size
long pos = ftell(mp2_file);
uint8_t header[4];
if (fread(header, 1, 4, mp2_file) != 4) break;
fseek(mp2_file, pos, SEEK_SET);
int actual_packet_size = get_mp2_packet_size(header);
size_t bytes_to_read = (actual_packet_size < audio_remaining) ? actual_packet_size : audio_remaining;
if (bytes_to_read > MP2_BUFFER_SIZE) break;
size_t bytes_read = fread(audio_buffer, 1, bytes_to_read, mp2_file);
if (bytes_read == 0) break;
write_audio_mp2(out, audio_buffer, bytes_read);
audio_remaining -= bytes_read;
total_audio_bytes += bytes_read;
}
}
// Final timing
gettimeofday(&now, NULL);
double total_time = (now.tv_sec - start_time.tv_sec) +
(now.tv_usec - start_time.tv_usec) / 1000000.0;
double final_fps = frame_num / total_time;
fprintf(stderr, "\nDone! Encoded %u frames in %.2fs (%.1f fps)\n",
frame_num, total_time, final_fps);
fprintf(stderr, "Audio: %llu bytes (%.2f MB)\n",
(unsigned long long)total_audio_bytes,
total_audio_bytes / 1024.0 / 1024.0);
// Update total_frames in header
if (frame_num > 0) {
fseek(out, header_offset + 14, SEEK_SET); // Offset to total_frames field
fwrite(&frame_num, sizeof(uint32_t), 1, out);
fprintf(stderr, "Updated total_frames in header: %u\n", frame_num);
}
// Update ENDT in extended header (calculate end time for last frame)
if (frame_num > 0) {
// Calculate duration: (frame_num - 1) frames * (1/fps) seconds in nanoseconds
uint64_t duration_ns = (uint64_t)((frame_num - 1) * 1000000000.0 / fps_float);
uint64_t endt_ns = duration_ns;
fseek(out, endt_offset, SEEK_SET);
fwrite(&endt_ns, sizeof(uint64_t), 1, out);
fprintf(stderr, "Updated ENDT in extended header: %llu ns (%.3f seconds)\n",
(unsigned long long)endt_ns, endt_ns / 1000000000.0);
}
// Cleanup
pclose(video_pipe);
if (mp2_file) {
fclose(mp2_file);
unlink(TEMP_AUDIO_FILE); // Remove temporary audio file
}
fclose(out);
free(gray_pixels);
free(bg_col);
free(fg_col);
free(chars);
free(audio_buffer);
free(rom->data);
free(rom);
if (ffmpeg_version) free(ffmpeg_version);
return 0;
}