From aa7e20695dedc92e0e0773790464991d9c6a0f99 Mon Sep 17 00:00:00 2001 From: minjaesong Date: Sun, 16 Nov 2025 02:49:03 +0900 Subject: [PATCH] fix: wrong timecode calculation on NTSC framerates --- terranmon.txt | 66 ++++++++++++++++++++++++----------- video_encoder/Makefile | 4 +-- video_encoder/encoder_tav.c | 12 ++++--- video_encoder/tav_inspector.c | 19 ++++++++++ 4 files changed, 74 insertions(+), 27 deletions(-) diff --git a/terranmon.txt b/terranmon.txt index e253d99..2de529c 100644 --- a/terranmon.txt +++ b/terranmon.txt @@ -902,13 +902,21 @@ transmission capability, and region-of-interest coding. ## Header (32 bytes) uint8 Magic[8]: "\x1F TSVM TAV" or "\x1F TSVM TAP" - uint8 Version: 3 (YCoCg-R uniform), 4 (ICtCp uniform), 5 (YCoCg-R perceptual), 6 (ICtCp perceptual) - uint16 Width: video width in pixels - uint16 Height: video height in pixels - uint8 FPS: frames per second. Use 0x00 for still images + uint8 Version: + - 1 = YCoCg-R multi-tile uniform + - 2 = ICtCp multi-tile uniform + - 3 = YCoCg-R monoblock uniform + - 4 = ICtCp monoblock uniform + - 5 = YCoCg-R monoblock perceptual + - 6 = ICtCp monoblock perceptual + - 7 = YCoCg-R multi-tile perceptual + - 8 = ICtCp multi-tile perceptual + uint16 Width: picture width in pixels + uint16 Height: picture height in pixels + uint8 FPS: frames per second. Use 0x00 for still pictures uint32 Total Frames: number of video frames - use 0 to denote not-finalised video stream - - use 0xFFFFFFFF to denote still image (.im3 file) + - use 0xFFFFFFFF to denote still picture (.im3 file) uint8 Wavelet Filter Type: - 0 = 5/3 reversible (LGT 5/3, JPEG 2000 standard) - 1 = 9/7 irreversible (CDF 9/7, slight modification of JPEG 2000, default choice) @@ -919,19 +927,22 @@ transmission capability, and region-of-interest coding. uint8 Quantiser Index for Y channel (uses exponential numeric system; 0: lossless, 255: potato) uint8 Quantiser Index for Co channel (uses exponential numeric system; 0: lossless, 255: potato) uint8 Quantiser Index for Cg channel (uses exponential numeric system; 0: lossless, 255: potato) - uint8 Extra Feature Flags (must be ignored for still images) - - bit 0 = has audio - - bit 1 = has subtitle - - bit 2 = infinite loop (must be ignored when File Role is 1) + uint8 Extra Feature Flags + - bit 0 = has audio (for still pictures: has background music) + - bit 1 = has subtitle (for still pictures: has timed captions) + - bit 2 = infinite loop (has no effect for still pictures) - bit 7 = has no actual packets, this file is header-only without an Intro Movie uint8 Video Flags - bit 0 = interlaced - bit 1 = is NTSC framerate - bit 2 = is lossless mode (shorthand for `-q 6 -Q0,0,0 -w 0 --intra-only --no-perceptual-tuning --arate 384`) - - bit 3 = has region-of-interest coding (for still images only) + - bit 3 = has region-of-interest coding (for still pictures only) uint8 Encoder quality level (stored with bias of 1 (q0=1); used to derive anisotropy value) uint8 Channel layout (bit-field: bit 0=has alpha, bit 1=has chroma inverted, bit 2=has luma inverted) + * Luma-only videos must be decoded with fixed Chroma=0 + * Chroma-only videos must be decoded with fixed Luma=127 + * No-alpha videos must be decoded with fixed Alpha=255 - 0 = Y-Co-Cg/I-Ct-Cp (000: no alpha, has chroma, has luma) - 1 = Y-Co-Cg-A/I-Ct-Cp-A (001: has alpha, has chroma, has luma) - 2 = Y/I only (010: no alpha, no chroma, has luma) @@ -940,9 +951,9 @@ transmission capability, and region-of-interest coding. - 5 = Co-Cg-A/Ct-Cp-A (101: has alpha, has chroma, no luma) - 6-7 = Reserved/invalid (would indicate no luma and no chroma) uint8 Entropy Coder - - 0 = Twobit-plane significance map + - 0 = Twobit-plane significance map (deprecated) - 1 = Embedded Zero Block Coding - - 2 = Raw coefficients + - 2 = Raw coefficients (debugging purpose only) uint8 Reserved[2]: fill with zeros uint8 Device Orientation - 0 = No rotation @@ -1001,6 +1012,7 @@ transmission capability, and region-of-interest coding. 0xEF: TAV Extended Header 0xF0: Loop point start (insert right AFTER the TC packet; no payload) 0xF1: Loop point end (insert right AFTER the TC packet; no payload) + 0xF2: Screen masking info 0xFC: GOP Sync packet (indicates N frames decoded from GOP block) 0xFD: Timecode (TC) Packet [for frame 0, insert at the beginning; otherwise, insert right AFTER the sync] 0xFE: NTSC sync packet (used by player to calculate exact framerate-wise performance; no payload) @@ -1012,11 +1024,12 @@ transmission capability, and region-of-interest coding. 1. TAV Extended header (if any) 2. Standard metadata payloads (if any) 3. SSF-TC/KSF-TC packets (if any) - When time-coded subtitles are used, the entire subtitle bytes must precede the first video frame. + When time-coded subtitles are used, the entire subtitles must precede the first video frame. Think of it as tacking the whole subtitle file before the actual video. + 4. Screen Masking packets (if any) Frame group: - 1. TC Packet (0xFD) or Next TAV File (0x1F) [mutually exclusive!] + 1. Timecode Packet (0xFD) or Next TAV File (0x1F) [mutually exclusive!] 2. Loop point packet (if any) 3. Audio packets (if any) 4. Subtitle packets (if any) [mutually exclusive with SSF-TC/KSF-TC packets] @@ -1024,11 +1037,12 @@ transmission capability, and region-of-interest coding. 6. Multiplexed video packets (0x70-7F; if any) After a frame group: - 1. Sync packet + 1. Sync packet (0xFC or 0xFF) + 2. NTSC Sync packet (if required; it will instruct players to duplicate the current frame) ## TAV Extended Header Specification and Structure - uint8 0xEF + uint8 Packet Type (0xEF) uint16 Number of Key-Value pairs * Key-Value pairs @@ -1056,7 +1070,7 @@ transmission capability, and region-of-interest coding. ## Standard Metadata Payload Packet Structure - uint8 0xE0/0xE1/0xE2/.../0xEE (see Packet Types section) + uint8 Packet Type (0xE0/0xE1/0xE2/.../0xEE; see Packet Types section) uint32 Length of the payload * Standard payload @@ -1070,13 +1084,25 @@ transmission capability, and region-of-interest coding. uint8 Packet Type (0xFE) uint64 Time since stream start in nanoseconds (this may NOT start from zero if the video is coming from a livestream) -## Video Packet Structure (0x10, 0x11) - uint8 Packet Type +## Screen Masking Packet Structure + When letterbox/pillarbox detection is active, the encoder will only encode pictures in the active area. + Decoders must use this value to derive the size of the active area for decoding, and fill the blank on playback. + Encoders only need to insert this packets at the start of the video (if necessary) and whenever geometry change occurs. + + uint8 Packet Type (0xF2) + uint32 Starting frame number + uint16 Mask size top in pixels + uint16 Mask size right in pixels + uint16 Mask size bottom in pixels + uint16 Mask size left in pixels + +## Video Packet Structure + uint8 Packet Type (0x10/0x11) uint32 Compressed Size * Zstd-compressed Block Data ## TAD Packet Structure - uint8 Packet type (0x24) + uint8 Packet Type (0x24)
uint16 Sample Count uint32 Compressed Size + 7 diff --git a/video_encoder/Makefile b/video_encoder/Makefile index 647b6dd..6aaecf4 100644 --- a/video_encoder/Makefile +++ b/video_encoder/Makefile @@ -3,8 +3,8 @@ CC = gcc CXX = g++ -CFLAGS = -std=c99 -Wall -Wextra -O2 -D_GNU_SOURCE -CXXFLAGS = -std=c++11 -Wall -Wextra -O2 -D_GNU_SOURCE +CFLAGS = -std=c99 -Wall -Wextra -Ofast -D_GNU_SOURCE +CXXFLAGS = -std=c++11 -Wall -Wextra -Ofast -D_GNU_SOURCE # Zstd flags (use pkg-config if available, fallback for cross-platform compatibility) ZSTD_CFLAGS = $(shell pkg-config --cflags libzstd 2>/dev/null || echo "") diff --git a/video_encoder/encoder_tav.c b/video_encoder/encoder_tav.c index deee447..27de097 100644 --- a/video_encoder/encoder_tav.c +++ b/video_encoder/encoder_tav.c @@ -8108,13 +8108,14 @@ static void write_timecode_packet(FILE *output, int frame_num, int fps, int is_n fwrite(&packet_type, 1, 1, output); // Calculate timecode in nanoseconds - // For NTSC (29.97 fps): time = frame_num * 1001000000 / 30000 + // For NTSC framerates (X000/1001): time = frame_num * 1001 * 1000000000 / (fps * 1000) // For other framerates: time = frame_num * 1000000000 / fps uint64_t timecode_ns; if (is_ntsc_framerate) { - // NTSC uses 30000/1001 fps (29.97...) - // To avoid floating point: time_ns = frame_num * 1001000000 / 30000 - timecode_ns = ((uint64_t)frame_num * 1001000000ULL) / 30000ULL; + // NTSC framerates use denominator 1001 (e.g., 24000/1001, 30000/1001, 60000/1001) + // To avoid floating point: time_ns = frame_num * 1001 * 1e9 / (fps * 1000) + // This works for 24fps NTSC (23.976), 30fps NTSC (29.97), 60fps NTSC (59.94), etc. + timecode_ns = ((uint64_t)frame_num * 1001ULL * 1000000000ULL) / ((uint64_t)fps * 1000ULL); } else { // Standard framerate timecode_ns = ((uint64_t)frame_num * 1000000000ULL) / (uint64_t)fps; @@ -10779,7 +10780,8 @@ int main(int argc, char *argv[]) { // Update ENDT in extended header (calculate end time for last frame) uint64_t endt_ns; if (enc->is_ntsc_framerate) { - endt_ns = ((uint64_t)(frame_count - 1) * 1001000000ULL) / 30000ULL; + // NTSC framerates use denominator 1001 (e.g., 24000/1001, 30000/1001, 60000/1001) + endt_ns = ((uint64_t)(frame_count - 1) * 1001ULL * 1000000000ULL) / ((uint64_t)enc->output_fps * 1000ULL); } else { endt_ns = ((uint64_t)(frame_count - 1) * 1000000000ULL) / (uint64_t)enc->output_fps; } diff --git a/video_encoder/tav_inspector.c b/video_encoder/tav_inspector.c index 99f360f..a299484 100644 --- a/video_encoder/tav_inspector.c +++ b/video_encoder/tav_inspector.c @@ -53,6 +53,7 @@ #define TAV_PACKET_EXTENDED_HDR 0xEF #define TAV_PACKET_LOOP_START 0xF0 #define TAV_PACKET_LOOP_END 0xF1 +#define TAV_PACKET_SCREEN_MASK 0xF2 #define TAV_PACKET_GOP_SYNC 0xFC // GOP sync packet (N frames decoded) #define TAV_PACKET_TIMECODE 0xFD #define TAV_PACKET_SYNC_NTSC 0xFE @@ -130,6 +131,7 @@ const char* get_packet_type_name(uint8_t type) { case TAV_PACKET_EXTENDED_HDR: return "EXTENDED HEADER"; case TAV_PACKET_LOOP_START: return "LOOP START"; case TAV_PACKET_LOOP_END: return "LOOP END"; + case TAV_PACKET_SCREEN_MASK: return "SCREEN MASK"; case TAV_PACKET_GOP_SYNC: return "GOP SYNC"; case TAV_PACKET_TIMECODE: return "TIMECODE"; case TAV_PACKET_SYNC_NTSC: return "SYNC (NTSC)"; @@ -842,6 +844,23 @@ static const char* VERDESC[] = {"null", "YCoCg tiled, uniform", "ICtCp tiled, un } break; + case TAV_PACKET_SCREEN_MASK: + uint32_t frame_number; + if (fread(&frame_number, sizeof(uint32_t), 1, fp) != 1) break; + uint16_t top; + if (fread(&top, sizeof(uint16_t), 1, fp) != 1) break; + uint16_t right; + if (fread(&right, sizeof(uint16_t), 1, fp) != 1) break; + uint16_t bottom; + if (fread(&bottom, sizeof(uint16_t), 1, fp) != 1) break; + uint16_t left; + if (fread(&left, sizeof(uint16_t), 1, fp) != 1) break; + + if (!opts.summary_only && display) { + printf(" - Frame=%u [top=%u, right=%u, bottom=%u, left=%u]", frame_number, top, right, bottom, left); + } + break; + case TAV_PACKET_SYNC: stats.sync_count++; break;