fix: wrong timecode calculation on NTSC framerates

This commit is contained in:
minjaesong
2025-11-16 02:49:03 +09:00
parent 5c87325366
commit aa7e20695d
4 changed files with 74 additions and 27 deletions

View File

@@ -902,13 +902,21 @@ transmission capability, and region-of-interest coding.
## Header (32 bytes)
uint8 Magic[8]: "\x1F TSVM TAV" or "\x1F TSVM TAP"
uint8 Version: 3 (YCoCg-R uniform), 4 (ICtCp uniform), 5 (YCoCg-R perceptual), 6 (ICtCp perceptual)
uint16 Width: video width in pixels
uint16 Height: video height in pixels
uint8 FPS: frames per second. Use 0x00 for still images
uint8 Version:
- 1 = YCoCg-R multi-tile uniform
- 2 = ICtCp multi-tile uniform
- 3 = YCoCg-R monoblock uniform
- 4 = ICtCp monoblock uniform
- 5 = YCoCg-R monoblock perceptual
- 6 = ICtCp monoblock perceptual
- 7 = YCoCg-R multi-tile perceptual
- 8 = ICtCp multi-tile perceptual
uint16 Width: picture width in pixels
uint16 Height: picture height in pixels
uint8 FPS: frames per second. Use 0x00 for still pictures
uint32 Total Frames: number of video frames
- use 0 to denote not-finalised video stream
- use 0xFFFFFFFF to denote still image (.im3 file)
- use 0xFFFFFFFF to denote still picture (.im3 file)
uint8 Wavelet Filter Type:
- 0 = 5/3 reversible (LGT 5/3, JPEG 2000 standard)
- 1 = 9/7 irreversible (CDF 9/7, slight modification of JPEG 2000, default choice)
@@ -919,19 +927,22 @@ transmission capability, and region-of-interest coding.
uint8 Quantiser Index for Y channel (uses exponential numeric system; 0: lossless, 255: potato)
uint8 Quantiser Index for Co channel (uses exponential numeric system; 0: lossless, 255: potato)
uint8 Quantiser Index for Cg channel (uses exponential numeric system; 0: lossless, 255: potato)
uint8 Extra Feature Flags (must be ignored for still images)
- bit 0 = has audio
- bit 1 = has subtitle
- bit 2 = infinite loop (must be ignored when File Role is 1)
uint8 Extra Feature Flags
- bit 0 = has audio (for still pictures: has background music)
- bit 1 = has subtitle (for still pictures: has timed captions)
- bit 2 = infinite loop (has no effect for still pictures)
- bit 7 = has no actual packets, this file is header-only without an Intro Movie
uint8 Video Flags
- bit 0 = interlaced
- bit 1 = is NTSC framerate
- bit 2 = is lossless mode
(shorthand for `-q 6 -Q0,0,0 -w 0 --intra-only --no-perceptual-tuning --arate 384`)
- bit 3 = has region-of-interest coding (for still images only)
- bit 3 = has region-of-interest coding (for still pictures only)
uint8 Encoder quality level (stored with bias of 1 (q0=1); used to derive anisotropy value)
uint8 Channel layout (bit-field: bit 0=has alpha, bit 1=has chroma inverted, bit 2=has luma inverted)
* Luma-only videos must be decoded with fixed Chroma=0
* Chroma-only videos must be decoded with fixed Luma=127
* No-alpha videos must be decoded with fixed Alpha=255
- 0 = Y-Co-Cg/I-Ct-Cp (000: no alpha, has chroma, has luma)
- 1 = Y-Co-Cg-A/I-Ct-Cp-A (001: has alpha, has chroma, has luma)
- 2 = Y/I only (010: no alpha, no chroma, has luma)
@@ -940,9 +951,9 @@ transmission capability, and region-of-interest coding.
- 5 = Co-Cg-A/Ct-Cp-A (101: has alpha, has chroma, no luma)
- 6-7 = Reserved/invalid (would indicate no luma and no chroma)
uint8 Entropy Coder
- 0 = Twobit-plane significance map
- 0 = Twobit-plane significance map (deprecated)
- 1 = Embedded Zero Block Coding
- 2 = Raw coefficients
- 2 = Raw coefficients (debugging purpose only)
uint8 Reserved[2]: fill with zeros
uint8 Device Orientation
- 0 = No rotation
@@ -1001,6 +1012,7 @@ transmission capability, and region-of-interest coding.
0xEF: TAV Extended Header
0xF0: Loop point start (insert right AFTER the TC packet; no payload)
0xF1: Loop point end (insert right AFTER the TC packet; no payload)
0xF2: Screen masking info
0xFC: GOP Sync packet (indicates N frames decoded from GOP block)
0xFD: Timecode (TC) Packet [for frame 0, insert at the beginning; otherwise, insert right AFTER the sync]
0xFE: NTSC sync packet (used by player to calculate exact framerate-wise performance; no payload)
@@ -1012,11 +1024,12 @@ transmission capability, and region-of-interest coding.
1. TAV Extended header (if any)
2. Standard metadata payloads (if any)
3. SSF-TC/KSF-TC packets (if any)
When time-coded subtitles are used, the entire subtitle bytes must precede the first video frame.
When time-coded subtitles are used, the entire subtitles must precede the first video frame.
Think of it as tacking the whole subtitle file before the actual video.
4. Screen Masking packets (if any)
Frame group:
1. TC Packet (0xFD) or Next TAV File (0x1F) [mutually exclusive!]
1. Timecode Packet (0xFD) or Next TAV File (0x1F) [mutually exclusive!]
2. Loop point packet (if any)
3. Audio packets (if any)
4. Subtitle packets (if any) [mutually exclusive with SSF-TC/KSF-TC packets]
@@ -1024,11 +1037,12 @@ transmission capability, and region-of-interest coding.
6. Multiplexed video packets (0x70-7F; if any)
After a frame group:
1. Sync packet
1. Sync packet (0xFC or 0xFF)
2. NTSC Sync packet (if required; it will instruct players to duplicate the current frame)
## TAV Extended Header Specification and Structure
uint8 0xEF
uint8 Packet Type (0xEF)
uint16 Number of Key-Value pairs
* Key-Value pairs
@@ -1056,7 +1070,7 @@ transmission capability, and region-of-interest coding.
## Standard Metadata Payload Packet Structure
uint8 0xE0/0xE1/0xE2/.../0xEE (see Packet Types section)
uint8 Packet Type (0xE0/0xE1/0xE2/.../0xEE; see Packet Types section)
uint32 Length of the payload
* Standard payload
@@ -1070,13 +1084,25 @@ transmission capability, and region-of-interest coding.
uint8 Packet Type (0xFE)
uint64 Time since stream start in nanoseconds (this may NOT start from zero if the video is coming from a livestream)
## Video Packet Structure (0x10, 0x11)
uint8 Packet Type
## Screen Masking Packet Structure
When letterbox/pillarbox detection is active, the encoder will only encode pictures in the active area.
Decoders must use this value to derive the size of the active area for decoding, and fill the blank on playback.
Encoders only need to insert this packets at the start of the video (if necessary) and whenever geometry change occurs.
uint8 Packet Type (0xF2)
uint32 Starting frame number
uint16 Mask size top in pixels
uint16 Mask size right in pixels
uint16 Mask size bottom in pixels
uint16 Mask size left in pixels
## Video Packet Structure
uint8 Packet Type (0x10/0x11)
uint32 Compressed Size
* Zstd-compressed Block Data
## TAD Packet Structure
uint8 Packet type (0x24)
uint8 Packet Type (0x24)
<header for decoding packet>
uint16 Sample Count
uint32 Compressed Size + 7

View File

@@ -3,8 +3,8 @@
CC = gcc
CXX = g++
CFLAGS = -std=c99 -Wall -Wextra -O2 -D_GNU_SOURCE
CXXFLAGS = -std=c++11 -Wall -Wextra -O2 -D_GNU_SOURCE
CFLAGS = -std=c99 -Wall -Wextra -Ofast -D_GNU_SOURCE
CXXFLAGS = -std=c++11 -Wall -Wextra -Ofast -D_GNU_SOURCE
# Zstd flags (use pkg-config if available, fallback for cross-platform compatibility)
ZSTD_CFLAGS = $(shell pkg-config --cflags libzstd 2>/dev/null || echo "")

View File

@@ -8108,13 +8108,14 @@ static void write_timecode_packet(FILE *output, int frame_num, int fps, int is_n
fwrite(&packet_type, 1, 1, output);
// Calculate timecode in nanoseconds
// For NTSC (29.97 fps): time = frame_num * 1001000000 / 30000
// For NTSC framerates (X000/1001): time = frame_num * 1001 * 1000000000 / (fps * 1000)
// For other framerates: time = frame_num * 1000000000 / fps
uint64_t timecode_ns;
if (is_ntsc_framerate) {
// NTSC uses 30000/1001 fps (29.97...)
// To avoid floating point: time_ns = frame_num * 1001000000 / 30000
timecode_ns = ((uint64_t)frame_num * 1001000000ULL) / 30000ULL;
// NTSC framerates use denominator 1001 (e.g., 24000/1001, 30000/1001, 60000/1001)
// To avoid floating point: time_ns = frame_num * 1001 * 1e9 / (fps * 1000)
// This works for 24fps NTSC (23.976), 30fps NTSC (29.97), 60fps NTSC (59.94), etc.
timecode_ns = ((uint64_t)frame_num * 1001ULL * 1000000000ULL) / ((uint64_t)fps * 1000ULL);
} else {
// Standard framerate
timecode_ns = ((uint64_t)frame_num * 1000000000ULL) / (uint64_t)fps;
@@ -10779,7 +10780,8 @@ int main(int argc, char *argv[]) {
// Update ENDT in extended header (calculate end time for last frame)
uint64_t endt_ns;
if (enc->is_ntsc_framerate) {
endt_ns = ((uint64_t)(frame_count - 1) * 1001000000ULL) / 30000ULL;
// NTSC framerates use denominator 1001 (e.g., 24000/1001, 30000/1001, 60000/1001)
endt_ns = ((uint64_t)(frame_count - 1) * 1001ULL * 1000000000ULL) / ((uint64_t)enc->output_fps * 1000ULL);
} else {
endt_ns = ((uint64_t)(frame_count - 1) * 1000000000ULL) / (uint64_t)enc->output_fps;
}

View File

@@ -53,6 +53,7 @@
#define TAV_PACKET_EXTENDED_HDR 0xEF
#define TAV_PACKET_LOOP_START 0xF0
#define TAV_PACKET_LOOP_END 0xF1
#define TAV_PACKET_SCREEN_MASK 0xF2
#define TAV_PACKET_GOP_SYNC 0xFC // GOP sync packet (N frames decoded)
#define TAV_PACKET_TIMECODE 0xFD
#define TAV_PACKET_SYNC_NTSC 0xFE
@@ -130,6 +131,7 @@ const char* get_packet_type_name(uint8_t type) {
case TAV_PACKET_EXTENDED_HDR: return "EXTENDED HEADER";
case TAV_PACKET_LOOP_START: return "LOOP START";
case TAV_PACKET_LOOP_END: return "LOOP END";
case TAV_PACKET_SCREEN_MASK: return "SCREEN MASK";
case TAV_PACKET_GOP_SYNC: return "GOP SYNC";
case TAV_PACKET_TIMECODE: return "TIMECODE";
case TAV_PACKET_SYNC_NTSC: return "SYNC (NTSC)";
@@ -842,6 +844,23 @@ static const char* VERDESC[] = {"null", "YCoCg tiled, uniform", "ICtCp tiled, un
}
break;
case TAV_PACKET_SCREEN_MASK:
uint32_t frame_number;
if (fread(&frame_number, sizeof(uint32_t), 1, fp) != 1) break;
uint16_t top;
if (fread(&top, sizeof(uint16_t), 1, fp) != 1) break;
uint16_t right;
if (fread(&right, sizeof(uint16_t), 1, fp) != 1) break;
uint16_t bottom;
if (fread(&bottom, sizeof(uint16_t), 1, fp) != 1) break;
uint16_t left;
if (fread(&left, sizeof(uint16_t), 1, fp) != 1) break;
if (!opts.summary_only && display) {
printf(" - Frame=%u [top=%u, right=%u, bottom=%u, left=%u]", frame_number, top, right, bottom, left);
}
break;
case TAV_PACKET_SYNC:
stats.sync_count++;
break;