TAV: timecode packets

This commit is contained in:
minjaesong
2025-10-14 00:27:26 +09:00
parent e705d274de
commit 94749a3ad6
2 changed files with 95 additions and 17 deletions

View File

@@ -930,14 +930,23 @@ transmission capability, and region-of-interest coding.
- 4 = Co-Cg/Ct-Cp (100: no alpha, has chroma, no luma)
- 5 = Co-Cg-A/Ct-Cp-A (101: has alpha, has chroma, no luma)
- 6-7 = Reserved/invalid (would indicate no luma and no chroma)
uint8 Reserved[4]: fill with zeros
uint8 Device Orientation
- 0 = No rotation
- 1 = Clockwise 90 deg
- 2 = 180 deg
- 3 = Clockwise 270 deg
- 4 = Mirrored, No rotation
- 5 = Mirrored, Clockwise 90 deg
- 6 = Mirrored, 180 deg
- 7 = Mirrored, Clockwise 270 deg
uint8 Reserved[3]: fill with zeros
uint8 File Role
- 0 = generic
- 1 = this file is header-only, and UCF payload will be followed (used by seekable movie file)
When header-only file contain video packets, they should be presented as an Intro Movie
before the user-interactable selector (served by the UCF payoad)
## Packet Structure (all packets EXCEPT sync packets follow this structure; sync packets are one-byte packet)
## Packet Structure (some special packets have no payload. See Packet Types for details)
uint8 Packet Type
uint32 Payload Size
* Payload
@@ -967,30 +976,62 @@ transmission capability, and region-of-interest coding.
0xE3: Vorbis Comment packet
0xE4: CD-text packet
<Special packets>
0x00: No-op
0xF0: Loop point start
0xF1: Loop point end
0xFE: NTSC sync packet (used by player to calculate exact framerate-wise performance)
0xFF: Sync packet
0x00: No-op (no payload)
0xEF: TAV Extended Header
0xF0: Loop point start (insert right AFTER the TC packet; no payload)
0xF1: Loop point end (insert right AFTER the TC packet; no payload)
0xFD: Timecode (TC) Packet [for frame 0, insert at the beginning; otherwise, insert right AFTER the sync]
0xFE: NTSC sync packet (used by player to calculate exact framerate-wise performance; no payload)
0xFF: Sync packet (no payload)
### Packet Precedence
Before the first frame group:
1. Standard metadata payloads
1. TAV Extended header (if any)
2. Standard metadata payloads (if any)
Frame group:
1. File packet (0x1F)
2. Audio packets
3. Subtitle packets
4. Main video packets (0x10-0x1E)
5. Multiplexed video packets (0x70-7F)
6. Loop point packets
1. TC Packet (0xEF) or File packet (0x1F) [mutually exclusive!]
2. Loop point packets
3. Audio packets
4. Subtitle packets
5. Main video packets (0x10-0x1E)
6. Multiplexed video packets (0x70-7F)
After a frame group:
1. Sync packet
## Standard metadata payload packet structure
uint8 0xE0/0xE1/0xE2/.../0xEF (see Packet Types section)
## TAV Extended Header Specification and Structure
uint8 0xEF
uint16 Number of Key-Value pairs
* Key-Value pairs
### Key-Value Pair
uint8 Key[4]
uint8 Value Type
- 0x00: (U)Int16
- 0x01: (U)Int24
- 0x02: (U)Int32
- 0x03: (U)Int48
- 0x04: (U)Int64
- 0x10: Bytes
<if Value Type is Bytes>
uint16 Length of bytes
* Bytes
<otherwise>
type_t Value
### List of Keys
- Uint64 BGNT: Video begin time (must be equal to the value of the first Timecode packet)
- Uint64 ENDT: Video end time (must be equal to the value of the last Timecode packet)
- Bytes VNDR: Name and version of the encoder (for Reference encoder: "Encoder-TAV 20251014")
- Bytes FMPG: FFmpeg version (typically "ffmpeg version 6.1.2"; the first line of text FFmpeg emits right before the copyright text)
- Uint64 CDAT: Creation time in nanoseconds since UNIX Epoch
## Standard Metadata Payload Packet Structure
uint8 0xE0/0xE1/0xE2/.../0xEE (see Packet Types section)
uint32 Length of the payload
* Standard payload
@@ -1000,6 +1041,10 @@ transmission capability, and region-of-interest coding.
which gets precedence is implementation-dependent. ONE EXCEPTION is ID3v1 and ID3v2 where ID3v2 gets
precedence.
## Timecode Packet Structure
uint8 Packet Type (0xFE)
uint64 Time since stream start in nanoseconds (this may NOT start from zero if the video is coming from a livestream)
## Video Packet Structure
uint8 Packet Type
uint32 Compressed Size

View File

@@ -41,6 +41,7 @@
#define TAV_PACKET_PFRAME 0x11 // Predicted frame
#define TAV_PACKET_AUDIO_MP2 0x20 // MP2 audio
#define TAV_PACKET_SUBTITLE 0x30 // Subtitle packet
#define TAV_PACKET_TIMECODE 0xFD // Timecode packet
#define TAV_PACKET_SYNC_NTSC 0xFE // NTSC Sync packet
#define TAV_PACKET_SYNC 0xFF // Sync packet
@@ -607,6 +608,7 @@ static int calculate_max_decomp_levels(int width, int height);
static int start_audio_conversion(tav_encoder_t *enc);
static int get_mp2_packet_size(uint8_t *header);
static int mp2_packet_size_to_rate_index(int packet_size, int is_mono);
static void write_timecode_packet(FILE *output, int frame_num, int fps, int is_ntsc_framerate);
static int process_audio(tav_encoder_t *enc, int frame_num, FILE *output);
static subtitle_entry_t* parse_subtitle_file(const char *filename, int fps);
static subtitle_entry_t* parse_srt_file(const char *filename, int fps);
@@ -3285,6 +3287,29 @@ static int write_subtitle_packet(FILE *output, uint32_t index, uint8_t opcode, c
return 1 + 4 + packet_size; // Total bytes written
}
// Write timecode packet for current frame
// Timecode is the time since stream start in nanoseconds
static void write_timecode_packet(FILE *output, int frame_num, int fps, int is_ntsc_framerate) {
uint8_t packet_type = TAV_PACKET_TIMECODE;
fwrite(&packet_type, 1, 1, output);
// Calculate timecode in nanoseconds
// For NTSC (29.97 fps): time = frame_num * 1001000000 / 30000
// For other framerates: time = frame_num * 1000000000 / fps
uint64_t timecode_ns;
if (is_ntsc_framerate) {
// NTSC uses 30000/1001 fps (29.97...)
// To avoid floating point: time_ns = frame_num * 1001000000 / 30000
timecode_ns = ((uint64_t)frame_num * 1001000000ULL) / 30000ULL;
} else {
// Standard framerate
timecode_ns = ((uint64_t)frame_num * 1000000000ULL) / (uint64_t)fps;
}
// Write timecode as little-endian uint64
fwrite(&timecode_ns, sizeof(uint64_t), 1, output);
}
// Process audio for current frame (copied and adapted from TEV)
static int process_audio(tav_encoder_t *enc, int frame_num, FILE *output) {
if (!enc->has_audio || !enc->mp2_file || enc->audio_remaining <= 0) {
@@ -3991,6 +4016,9 @@ int main(int argc, char *argv[]) {
KEYFRAME_INTERVAL = CLAMP(enc->output_fps >> 4, 2, 4); // refresh often because deltas in DWT are more visible than DCT
// how in the world GOP of 2 produces smallest file??? I refuse to believe it but that's the test result.
// Write timecode packet for frame 0 (before the first frame)
write_timecode_packet(enc->output_fp, 0, enc->output_fps, enc->is_ntsc_framerate);
while (continue_encoding) {
// Check encode limit if specified
if (enc->encode_limit > 0 && frame_count >= enc->encode_limit) {
@@ -3999,6 +4027,11 @@ int main(int argc, char *argv[]) {
break;
}
// Write timecode packet for frames 1+ (right after sync packet from previous frame)
if (frame_count > 0) {
write_timecode_packet(enc->output_fp, frame_count, enc->output_fps, enc->is_ntsc_framerate);
}
if (enc->test_mode) {
// Test mode has a fixed frame count
if (frame_count >= enc->total_frames) {