From 94749a3ad600c6932c1b626062d6414daa45a912 Mon Sep 17 00:00:00 2001 From: minjaesong Date: Tue, 14 Oct 2025 00:27:26 +0900 Subject: [PATCH] TAV: timecode packets --- terranmon.txt | 77 +++++++++++++++++++++++++++++-------- video_encoder/encoder_tav.c | 35 ++++++++++++++++- 2 files changed, 95 insertions(+), 17 deletions(-) diff --git a/terranmon.txt b/terranmon.txt index 283e47d..7ca3b46 100644 --- a/terranmon.txt +++ b/terranmon.txt @@ -930,14 +930,23 @@ transmission capability, and region-of-interest coding. - 4 = Co-Cg/Ct-Cp (100: no alpha, has chroma, no luma) - 5 = Co-Cg-A/Ct-Cp-A (101: has alpha, has chroma, no luma) - 6-7 = Reserved/invalid (would indicate no luma and no chroma) - uint8 Reserved[4]: fill with zeros + uint8 Device Orientation + - 0 = No rotation + - 1 = Clockwise 90 deg + - 2 = 180 deg + - 3 = Clockwise 270 deg + - 4 = Mirrored, No rotation + - 5 = Mirrored, Clockwise 90 deg + - 6 = Mirrored, 180 deg + - 7 = Mirrored, Clockwise 270 deg + uint8 Reserved[3]: fill with zeros uint8 File Role - 0 = generic - 1 = this file is header-only, and UCF payload will be followed (used by seekable movie file) When header-only file contain video packets, they should be presented as an Intro Movie before the user-interactable selector (served by the UCF payoad) -## Packet Structure (all packets EXCEPT sync packets follow this structure; sync packets are one-byte packet) +## Packet Structure (some special packets have no payload. See Packet Types for details) uint8 Packet Type uint32 Payload Size * Payload @@ -967,30 +976,62 @@ transmission capability, and region-of-interest coding. 0xE3: Vorbis Comment packet 0xE4: CD-text packet - 0x00: No-op - 0xF0: Loop point start - 0xF1: Loop point end - 0xFE: NTSC sync packet (used by player to calculate exact framerate-wise performance) - 0xFF: Sync packet + 0x00: No-op (no payload) + 0xEF: TAV Extended Header + 0xF0: Loop point start (insert right AFTER the TC packet; no payload) + 0xF1: Loop point end (insert right AFTER the TC packet; no payload) + 0xFD: Timecode (TC) Packet [for frame 0, insert at the beginning; otherwise, insert right AFTER the sync] + 0xFE: NTSC sync packet (used by player to calculate exact framerate-wise performance; no payload) + 0xFF: Sync packet (no payload) ### Packet Precedence Before the first frame group: - 1. Standard metadata payloads + 1. TAV Extended header (if any) + 2. Standard metadata payloads (if any) Frame group: - 1. File packet (0x1F) - 2. Audio packets - 3. Subtitle packets - 4. Main video packets (0x10-0x1E) - 5. Multiplexed video packets (0x70-7F) - 6. Loop point packets + 1. TC Packet (0xEF) or File packet (0x1F) [mutually exclusive!] + 2. Loop point packets + 3. Audio packets + 4. Subtitle packets + 5. Main video packets (0x10-0x1E) + 6. Multiplexed video packets (0x70-7F) After a frame group: 1. Sync packet -## Standard metadata payload packet structure - uint8 0xE0/0xE1/0xE2/.../0xEF (see Packet Types section) + +## TAV Extended Header Specification and Structure + uint8 0xEF + uint16 Number of Key-Value pairs + * Key-Value pairs + + ### Key-Value Pair + uint8 Key[4] + uint8 Value Type + - 0x00: (U)Int16 + - 0x01: (U)Int24 + - 0x02: (U)Int32 + - 0x03: (U)Int48 + - 0x04: (U)Int64 + - 0x10: Bytes + + uint16 Length of bytes + * Bytes + + type_t Value + + ### List of Keys + - Uint64 BGNT: Video begin time (must be equal to the value of the first Timecode packet) + - Uint64 ENDT: Video end time (must be equal to the value of the last Timecode packet) + - Bytes VNDR: Name and version of the encoder (for Reference encoder: "Encoder-TAV 20251014") + - Bytes FMPG: FFmpeg version (typically "ffmpeg version 6.1.2"; the first line of text FFmpeg emits right before the copyright text) + - Uint64 CDAT: Creation time in nanoseconds since UNIX Epoch + + +## Standard Metadata Payload Packet Structure + uint8 0xE0/0xE1/0xE2/.../0xEE (see Packet Types section) uint32 Length of the payload * Standard payload @@ -1000,6 +1041,10 @@ transmission capability, and region-of-interest coding. which gets precedence is implementation-dependent. ONE EXCEPTION is ID3v1 and ID3v2 where ID3v2 gets precedence. +## Timecode Packet Structure + uint8 Packet Type (0xFE) + uint64 Time since stream start in nanoseconds (this may NOT start from zero if the video is coming from a livestream) + ## Video Packet Structure uint8 Packet Type uint32 Compressed Size diff --git a/video_encoder/encoder_tav.c b/video_encoder/encoder_tav.c index d7ced27..9baebb6 100644 --- a/video_encoder/encoder_tav.c +++ b/video_encoder/encoder_tav.c @@ -38,9 +38,10 @@ // Video packet types #define TAV_PACKET_IFRAME 0x10 // Intra frame (keyframe) -#define TAV_PACKET_PFRAME 0x11 // Predicted frame +#define TAV_PACKET_PFRAME 0x11 // Predicted frame #define TAV_PACKET_AUDIO_MP2 0x20 // MP2 audio #define TAV_PACKET_SUBTITLE 0x30 // Subtitle packet +#define TAV_PACKET_TIMECODE 0xFD // Timecode packet #define TAV_PACKET_SYNC_NTSC 0xFE // NTSC Sync packet #define TAV_PACKET_SYNC 0xFF // Sync packet @@ -607,6 +608,7 @@ static int calculate_max_decomp_levels(int width, int height); static int start_audio_conversion(tav_encoder_t *enc); static int get_mp2_packet_size(uint8_t *header); static int mp2_packet_size_to_rate_index(int packet_size, int is_mono); +static void write_timecode_packet(FILE *output, int frame_num, int fps, int is_ntsc_framerate); static int process_audio(tav_encoder_t *enc, int frame_num, FILE *output); static subtitle_entry_t* parse_subtitle_file(const char *filename, int fps); static subtitle_entry_t* parse_srt_file(const char *filename, int fps); @@ -3285,6 +3287,29 @@ static int write_subtitle_packet(FILE *output, uint32_t index, uint8_t opcode, c return 1 + 4 + packet_size; // Total bytes written } +// Write timecode packet for current frame +// Timecode is the time since stream start in nanoseconds +static void write_timecode_packet(FILE *output, int frame_num, int fps, int is_ntsc_framerate) { + uint8_t packet_type = TAV_PACKET_TIMECODE; + fwrite(&packet_type, 1, 1, output); + + // Calculate timecode in nanoseconds + // For NTSC (29.97 fps): time = frame_num * 1001000000 / 30000 + // For other framerates: time = frame_num * 1000000000 / fps + uint64_t timecode_ns; + if (is_ntsc_framerate) { + // NTSC uses 30000/1001 fps (29.97...) + // To avoid floating point: time_ns = frame_num * 1001000000 / 30000 + timecode_ns = ((uint64_t)frame_num * 1001000000ULL) / 30000ULL; + } else { + // Standard framerate + timecode_ns = ((uint64_t)frame_num * 1000000000ULL) / (uint64_t)fps; + } + + // Write timecode as little-endian uint64 + fwrite(&timecode_ns, sizeof(uint64_t), 1, output); +} + // Process audio for current frame (copied and adapted from TEV) static int process_audio(tav_encoder_t *enc, int frame_num, FILE *output) { if (!enc->has_audio || !enc->mp2_file || enc->audio_remaining <= 0) { @@ -3991,6 +4016,9 @@ int main(int argc, char *argv[]) { KEYFRAME_INTERVAL = CLAMP(enc->output_fps >> 4, 2, 4); // refresh often because deltas in DWT are more visible than DCT // how in the world GOP of 2 produces smallest file??? I refuse to believe it but that's the test result. + // Write timecode packet for frame 0 (before the first frame) + write_timecode_packet(enc->output_fp, 0, enc->output_fps, enc->is_ntsc_framerate); + while (continue_encoding) { // Check encode limit if specified if (enc->encode_limit > 0 && frame_count >= enc->encode_limit) { @@ -3999,6 +4027,11 @@ int main(int argc, char *argv[]) { break; } + // Write timecode packet for frames 1+ (right after sync packet from previous frame) + if (frame_count > 0) { + write_timecode_packet(enc->output_fp, frame_count, enc->output_fps, enc->is_ntsc_framerate); + } + if (enc->test_mode) { // Test mode has a fixed frame count if (frame_count >= enc->total_frames) {