mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-06-10 23:04:04 +09:00
TAV: minimal size for GOP
This commit is contained in:
@@ -1143,6 +1143,9 @@ try {
|
|||||||
startTime: 0,
|
startTime: 0,
|
||||||
timeRemaining: 0
|
timeRemaining: 0
|
||||||
}
|
}
|
||||||
|
if (interactive) {
|
||||||
|
console.log(`[GOP] Buffered GOP ${gopSize} frames to ready slot during first GOP decode`)
|
||||||
|
}
|
||||||
} else if (decodingGopData === null) {
|
} else if (decodingGopData === null) {
|
||||||
// Buffer as decoding GOP (will decode after ready GOP)
|
// Buffer as decoding GOP (will decode after ready GOP)
|
||||||
const decodingSlot = (currentGopBufferSlot + 2) % BUFFER_SLOTS
|
const decodingSlot = (currentGopBufferSlot + 2) % BUFFER_SLOTS
|
||||||
@@ -1155,8 +1158,20 @@ try {
|
|||||||
startTime: 0,
|
startTime: 0,
|
||||||
timeRemaining: 0
|
timeRemaining: 0
|
||||||
}
|
}
|
||||||
|
if (interactive) {
|
||||||
|
console.log(`[GOP] Buffered GOP ${gopSize} frames to decoding slot during first GOP decode`)
|
||||||
|
}
|
||||||
|
|
||||||
|
// CRITICAL: Stop reading packets now that all 3 buffers are full
|
||||||
|
shouldReadPackets = false
|
||||||
|
if (interactive) {
|
||||||
|
console.log(`[GOP] All 3 buffers full during first GOP decode - stopping packet reading`)
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// All 3 buffers full - discard this GOP
|
// All 3 buffers full - discard this GOP (shouldn't happen now with gate)
|
||||||
|
if (interactive) {
|
||||||
|
console.log(`[GOP] WARNING: All 3 buffers full during first GOP decode - discarding GOP ${gopSize} frames`)
|
||||||
|
}
|
||||||
sys.free(compressedPtr)
|
sys.free(compressedPtr)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1180,14 +1195,28 @@ try {
|
|||||||
nextOffset
|
nextOffset
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// Set async decode tracking variables
|
||||||
|
asyncDecodeInProgress = true
|
||||||
|
asyncDecodeSlot = nextSlot
|
||||||
|
asyncDecodeGopSize = gopSize
|
||||||
|
asyncDecodePtr = compressedPtr
|
||||||
|
asyncDecodeStartTime = sys.nanoTime()
|
||||||
|
|
||||||
readyGopData = {
|
readyGopData = {
|
||||||
gopSize: gopSize,
|
gopSize: gopSize,
|
||||||
slot: nextSlot,
|
slot: nextSlot,
|
||||||
compressedPtr: compressedPtr,
|
compressedPtr: compressedPtr,
|
||||||
startTime: sys.nanoTime(),
|
startTime: asyncDecodeStartTime,
|
||||||
timeRemaining: timeRemaining
|
timeRemaining: timeRemaining
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// CRITICAL: Stop reading packets immediately after starting decode
|
||||||
|
// to prevent next GOP from being discarded in Case 5
|
||||||
|
shouldReadPackets = false
|
||||||
|
if (interactive) {
|
||||||
|
console.log(`[GOP] Case 3: Started decode to ready slot - stopping packet reading`)
|
||||||
|
}
|
||||||
|
|
||||||
} else if (currentGopSize > 0 && readyGopData !== null && decodingGopData === null && !asyncDecodeInProgress && graphics.tavDecodeGopIsComplete()) {
|
} else if (currentGopSize > 0 && readyGopData !== null && decodingGopData === null && !asyncDecodeInProgress && graphics.tavDecodeGopIsComplete()) {
|
||||||
// Case 4: GOP playing, ready GOP exists, no decoding GOP, no decode in progress - decode to decoding slot
|
// Case 4: GOP playing, ready GOP exists, no decoding GOP, no decode in progress - decode to decoding slot
|
||||||
const decodingSlot = (currentGopBufferSlot + 2) % BUFFER_SLOTS
|
const decodingSlot = (currentGopBufferSlot + 2) % BUFFER_SLOTS
|
||||||
@@ -1208,16 +1237,33 @@ try {
|
|||||||
decodingOffset
|
decodingOffset
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// Set async decode tracking variables
|
||||||
|
asyncDecodeInProgress = true
|
||||||
|
asyncDecodeSlot = decodingSlot
|
||||||
|
asyncDecodeGopSize = gopSize
|
||||||
|
asyncDecodePtr = compressedPtr
|
||||||
|
asyncDecodeStartTime = sys.nanoTime()
|
||||||
|
|
||||||
decodingGopData = {
|
decodingGopData = {
|
||||||
gopSize: gopSize,
|
gopSize: gopSize,
|
||||||
slot: decodingSlot,
|
slot: decodingSlot,
|
||||||
compressedPtr: compressedPtr,
|
compressedPtr: compressedPtr,
|
||||||
startTime: sys.nanoTime(),
|
startTime: asyncDecodeStartTime,
|
||||||
timeRemaining: timeRemaining
|
timeRemaining: timeRemaining
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// CRITICAL: Stop reading packets immediately after starting decode
|
||||||
|
// All 3 buffers are now full (playing + ready + decoding)
|
||||||
|
shouldReadPackets = false
|
||||||
|
if (interactive) {
|
||||||
|
console.log(`[GOP] Case 4: Started decode to decoding slot - all buffers full, stopping packet reading`)
|
||||||
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
// Case 5: All 3 buffers full (playing + ready + decoding) - ignore packet
|
// Case 5: All 3 buffers full (playing + ready + decoding) - ignore packet
|
||||||
|
if (interactive) {
|
||||||
|
console.log(`[GOP] Case 5: Discarding GOP ${gopSize} frames (current=${currentGopSize}, ready=${readyGopData !== null}, decoding=${decodingGopData !== null}, asyncInProgress=${asyncDecodeInProgress})`)
|
||||||
|
}
|
||||||
sys.free(compressedPtr)
|
sys.free(compressedPtr)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1230,6 +1276,9 @@ try {
|
|||||||
// (one GOP playing + ready GOP + decoding GOP)
|
// (one GOP playing + ready GOP + decoding GOP)
|
||||||
if (currentGopSize > 0 && readyGopData !== null && decodingGopData !== null) {
|
if (currentGopSize > 0 && readyGopData !== null && decodingGopData !== null) {
|
||||||
shouldReadPackets = false
|
shouldReadPackets = false
|
||||||
|
if (interactive) {
|
||||||
|
console.log(`[GOP] All 3 buffers full - stopping packet reading`)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (packetType === TAV_PACKET_AUDIO_BUNDLED) {
|
else if (packetType === TAV_PACKET_AUDIO_BUNDLED) {
|
||||||
@@ -1433,8 +1482,18 @@ try {
|
|||||||
// Set first frame time to NOW
|
// Set first frame time to NOW
|
||||||
nextFrameTime = sys.nanoTime()
|
nextFrameTime = sys.nanoTime()
|
||||||
|
|
||||||
// Resume packet reading to get next GOP (only one buffer occupied now)
|
// Resume packet reading only if not all 3 buffers are full
|
||||||
shouldReadPackets = true
|
// (might have buffered GOP 2 and 3 during GOP 1 decode)
|
||||||
|
if (!(currentGopSize > 0 && readyGopData !== null && decodingGopData !== null)) {
|
||||||
|
shouldReadPackets = true
|
||||||
|
if (interactive) {
|
||||||
|
console.log(`[GOP] First GOP ready - resuming packet reading (ready=${readyGopData !== null}, decoding=${decodingGopData !== null})`)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (interactive) {
|
||||||
|
console.log(`[GOP] First GOP ready - all 3 buffers full, keeping packet reading paused`)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// if (interactive) {
|
// if (interactive) {
|
||||||
// console.log(`[GOP] First GOP ready (slot ${asyncDecodeSlot}, ${asyncDecodeGopSize} frames) in ${decodeTime.toFixed(1)}ms - starting playback`)
|
// console.log(`[GOP] First GOP ready (slot ${asyncDecodeSlot}, ${asyncDecodeGopSize} frames) in ${decodeTime.toFixed(1)}ms - starting playback`)
|
||||||
@@ -1461,13 +1520,30 @@ try {
|
|||||||
readyGopData.slot * SLOT_SIZE
|
readyGopData.slot * SLOT_SIZE
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// CRITICAL FIX: Set async decode tracking variables so decode is properly tracked
|
||||||
|
asyncDecodeInProgress = true
|
||||||
|
asyncDecodeSlot = readyGopData.slot
|
||||||
|
asyncDecodeGopSize = readyGopData.gopSize
|
||||||
|
asyncDecodePtr = readyGopData.compressedPtr
|
||||||
|
asyncDecodeStartTime = sys.nanoTime()
|
||||||
|
|
||||||
readyGopData.needsDecode = false
|
readyGopData.needsDecode = false
|
||||||
readyGopData.startTime = sys.nanoTime()
|
readyGopData.startTime = asyncDecodeStartTime
|
||||||
readyGopData.timeRemaining = timeRemaining
|
readyGopData.timeRemaining = timeRemaining
|
||||||
|
|
||||||
|
if (interactive) {
|
||||||
|
console.log(`[GOP] Started decode of buffered GOP ${readyGopData.gopSize} frames (slot ${readyGopData.slot})`)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Fire audio on first frame
|
||||||
|
if (!audioFired) {
|
||||||
|
audio.play(0)
|
||||||
|
audioFired = true
|
||||||
|
}
|
||||||
|
|
||||||
// Step 2 & 3: Display current GOP frame if it's time
|
// Step 2 & 3: Display current GOP frame if it's time
|
||||||
if (!paused && currentGopSize > 0 && currentGopFrameIndex < currentGopSize) {
|
if (!paused && currentGopSize > 0 && currentGopFrameIndex < currentGopSize) {
|
||||||
// Spin-wait for next frame time
|
// Spin-wait for next frame time
|
||||||
@@ -1483,6 +1559,10 @@ try {
|
|||||||
graphics.uploadVideoBufferFrameToFramebuffer(currentGopFrameIndex, header.width, header.height, trueFrameCount, bufferOffset)
|
graphics.uploadVideoBufferFrameToFramebuffer(currentGopFrameIndex, header.width, header.height, trueFrameCount, bufferOffset)
|
||||||
uploadTime = (sys.nanoTime() - uploadStart) / 1000000.0
|
uploadTime = (sys.nanoTime() - uploadStart) / 1000000.0
|
||||||
|
|
||||||
|
if (interactive && currentGopFrameIndex === 0) {
|
||||||
|
console.log(`[GOP] Playing GOP: ${currentGopSize} frames from slot ${currentGopBufferSlot}`)
|
||||||
|
}
|
||||||
|
|
||||||
// Apply bias lighting
|
// Apply bias lighting
|
||||||
let biasStart = sys.nanoTime()
|
let biasStart = sys.nanoTime()
|
||||||
if (currentGopFrameIndex === 0 || currentGopFrameIndex === currentGopSize - 1) {
|
if (currentGopFrameIndex === 0 || currentGopFrameIndex === currentGopSize - 1) {
|
||||||
@@ -1531,9 +1611,20 @@ try {
|
|||||||
decodingGopData.slot * SLOT_SIZE
|
decodingGopData.slot * SLOT_SIZE
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// CRITICAL FIX: Set async decode tracking variables so decode is properly tracked
|
||||||
|
asyncDecodeInProgress = true
|
||||||
|
asyncDecodeSlot = decodingGopData.slot
|
||||||
|
asyncDecodeGopSize = decodingGopData.gopSize
|
||||||
|
asyncDecodePtr = decodingGopData.compressedPtr
|
||||||
|
asyncDecodeStartTime = sys.nanoTime()
|
||||||
|
|
||||||
decodingGopData.needsDecode = false
|
decodingGopData.needsDecode = false
|
||||||
decodingGopData.startTime = sys.nanoTime()
|
decodingGopData.startTime = asyncDecodeStartTime
|
||||||
decodingGopData.timeRemaining = timeRemaining
|
decodingGopData.timeRemaining = timeRemaining
|
||||||
|
|
||||||
|
if (interactive) {
|
||||||
|
console.log(`[GOP] Started decode of buffered GOP ${decodingGopData.gopSize} frames from decoding slot (slot ${decodingGopData.slot})`)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Schedule next frame
|
// Schedule next frame
|
||||||
@@ -1543,6 +1634,9 @@ try {
|
|||||||
|
|
||||||
// Step 4-7: GOP finished? Transition to ready GOP (triple-buffering)
|
// Step 4-7: GOP finished? Transition to ready GOP (triple-buffering)
|
||||||
if (!paused && currentGopSize > 0 && currentGopFrameIndex >= currentGopSize) {
|
if (!paused && currentGopSize > 0 && currentGopFrameIndex >= currentGopSize) {
|
||||||
|
if (interactive) {
|
||||||
|
console.log(`[GOP] GOP finished: played ${currentGopFrameIndex}/${currentGopSize} frames from slot ${currentGopBufferSlot}`)
|
||||||
|
}
|
||||||
if (readyGopData !== null) {
|
if (readyGopData !== null) {
|
||||||
// If ready GOP still needs decode, start it now (defensive - should already be started)
|
// If ready GOP still needs decode, start it now (defensive - should already be started)
|
||||||
if (readyGopData.needsDecode) {
|
if (readyGopData.needsDecode) {
|
||||||
@@ -1581,8 +1675,19 @@ try {
|
|||||||
readyGopData = decodingGopData
|
readyGopData = decodingGopData
|
||||||
decodingGopData = null
|
decodingGopData = null
|
||||||
|
|
||||||
|
// CRITICAL: Only clear async decode tracking if NO decode is in progress
|
||||||
|
// (the promoted readyGop might be decoding from Case 4)
|
||||||
|
if (graphics.tavDecodeGopIsComplete()) {
|
||||||
|
asyncDecodeInProgress = false
|
||||||
|
asyncDecodePtr = 0
|
||||||
|
asyncDecodeGopSize = 0
|
||||||
|
}
|
||||||
|
|
||||||
// Resume packet reading now that one buffer is free (decoding slot available)
|
// Resume packet reading now that one buffer is free (decoding slot available)
|
||||||
shouldReadPackets = true
|
shouldReadPackets = true
|
||||||
|
if (interactive) {
|
||||||
|
console.log(`[GOP] Transition complete - resuming packet reading (asyncInProgress=${asyncDecodeInProgress})`)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// No ready GOP available - hiccup (shouldn't happen with triple-buffering)
|
// No ready GOP available - hiccup (shouldn't happen with triple-buffering)
|
||||||
|
|||||||
@@ -1038,9 +1038,9 @@ transmission capability, and region-of-interest coding.
|
|||||||
type_t Value
|
type_t Value
|
||||||
|
|
||||||
### List of Keys
|
### List of Keys
|
||||||
- Uint64 BGNT: Video begin time (must be equal to the value of the first Timecode packet)
|
- Uint64 BGNT: Video begin time in nanoseconds (must be equal to the value of the first Timecode packet)
|
||||||
- Uint64 ENDT: Video end time (must be equal to the value of the last Timecode packet)
|
- Uint64 ENDT: Video end time in nanoseconds (must be equal to the value of the last Timecode packet)
|
||||||
- Uint64 CDAT: Creation time in nanoseconds since UNIX Epoch (must be in UTC timezone)
|
- Uint64 CDAT: Creation time in microseconds since UNIX Epoch (must be in UTC timezone)
|
||||||
- Bytes VNDR: Name and version of the encoder (for Reference encoder: "Encoder-TAV 20251014 (list,of,features)")
|
- Bytes VNDR: Name and version of the encoder (for Reference encoder: "Encoder-TAV 20251014 (list,of,features)")
|
||||||
- Bytes FMPG: FFmpeg version (typically "ffmpeg version 8.0 Copyright (c) 2000-2025 the FFmpeg developers"; the first line of text FFmpeg emits)
|
- Bytes FMPG: FFmpeg version (typically "ffmpeg version 8.0 Copyright (c) 2000-2025 the FFmpeg developers"; the first line of text FFmpeg emits)
|
||||||
|
|
||||||
@@ -1067,7 +1067,6 @@ transmission capability, and region-of-interest coding.
|
|||||||
|
|
||||||
## GOP Unified Packet Structure (0x12)
|
## GOP Unified Packet Structure (0x12)
|
||||||
Implemented on 2025-10-15 for temporal 3D DWT with unified preprocessing.
|
Implemented on 2025-10-15 for temporal 3D DWT with unified preprocessing.
|
||||||
Updated on 2025-10-17 to include canvas expansion margins.
|
|
||||||
|
|
||||||
This packet contains multiple frames encoded as a single spacetime block for optimal
|
This packet contains multiple frames encoded as a single spacetime block for optimal
|
||||||
temporal compression.
|
temporal compression.
|
||||||
@@ -1084,6 +1083,7 @@ temporal compression.
|
|||||||
### Unified Block Data Format
|
### Unified Block Data Format
|
||||||
The entire GOP (width×height×N_frames×3_channels) is preprocessed as a single block:
|
The entire GOP (width×height×N_frames×3_channels) is preprocessed as a single block:
|
||||||
|
|
||||||
|
<if significance maps are used>
|
||||||
uint8 Y Significance Maps[(width*height + 7) / 8 * GOP Size] // All Y frames concatenated
|
uint8 Y Significance Maps[(width*height + 7) / 8 * GOP Size] // All Y frames concatenated
|
||||||
uint8 Co Significance Maps[(width*height + 7) / 8 * GOP Size] // All Co frames concatenated
|
uint8 Co Significance Maps[(width*height + 7) / 8 * GOP Size] // All Co frames concatenated
|
||||||
uint8 Cg Significance Maps[(width*height + 7) / 8 * GOP Size] // All Cg frames concatenated
|
uint8 Cg Significance Maps[(width*height + 7) / 8 * GOP Size] // All Cg frames concatenated
|
||||||
@@ -1091,28 +1091,17 @@ The entire GOP (width×height×N_frames×3_channels) is preprocessed as a single
|
|||||||
int16 Co Non-zero Values[variable length] // All Co non-zero coefficients
|
int16 Co Non-zero Values[variable length] // All Co non-zero coefficients
|
||||||
int16 Cg Non-zero Values[variable length] // All Cg non-zero coefficients
|
int16 Cg Non-zero Values[variable length] // All Cg non-zero coefficients
|
||||||
|
|
||||||
|
<if EZBC is used>
|
||||||
|
uint32 EZBC Size for Y
|
||||||
|
* EZBC Structure for Y
|
||||||
|
uint32 EZBC Size for Co
|
||||||
|
* EZBC Structure for Co
|
||||||
|
uint32 EZBC Size for Cg
|
||||||
|
* EZBC Structure for Cg
|
||||||
|
|
||||||
This layout enables Zstd to find patterns across both spatial and temporal dimensions,
|
This layout enables Zstd to find patterns across both spatial and temporal dimensions,
|
||||||
resulting in superior compression compared to per-frame encoding.
|
resulting in superior compression compared to per-frame encoding.
|
||||||
|
|
||||||
### Canvas Expansion for Motion Compensation
|
|
||||||
When frames in a GOP have camera motion, they must be aligned before temporal DWT.
|
|
||||||
However, alignment creates "gaps" at frame edges. To preserve ALL original pixels:
|
|
||||||
|
|
||||||
1. **Calculate motion range**: Determine the total shift range across all GOP frames
|
|
||||||
- Example: If frames shift by ±3 pixels horizontally, total range = 6 pixels
|
|
||||||
2. **Expand canvas**: Create a larger canvas = original_size + margin
|
|
||||||
- Canvas width = header.width + margin_left + margin_right
|
|
||||||
- Canvas height = header.height + margin_top + margin_bottom
|
|
||||||
3. **Place aligned frames**: Each frame is positioned on the expanded canvas
|
|
||||||
- All original pixels from all frames are preserved
|
|
||||||
- No artificial padding or cropping occurs
|
|
||||||
4. **Encode expanded canvas**: Apply 3D DWT to the larger canvas dimensions
|
|
||||||
5. **Store margins**: 4 bytes (L/R/T/B) tell decoder the canvas expansion
|
|
||||||
6. **Decoder extraction**: Decoder extracts display region for each frame based on
|
|
||||||
motion vectors and margins
|
|
||||||
|
|
||||||
This approach ensures lossless preservation of original video content during GOP encoding.
|
|
||||||
|
|
||||||
### Motion Vectors
|
### Motion Vectors
|
||||||
- Stored in 1/16-pixel units (divide by 16.0 for pixel displacement)
|
- Stored in 1/16-pixel units (divide by 16.0 for pixel displacement)
|
||||||
- Used for global motion compensation (camera movement, scene translation)
|
- Used for global motion compensation (camera movement, scene translation)
|
||||||
|
|||||||
@@ -121,7 +121,11 @@ static int needs_alpha_channel(int channel_layout) {
|
|||||||
#define DEFAULT_ZSTD_LEVEL 3
|
#define DEFAULT_ZSTD_LEVEL 3
|
||||||
#define DEFAULT_PCM_ZSTD_LEVEL 3
|
#define DEFAULT_PCM_ZSTD_LEVEL 3
|
||||||
#define TEMPORAL_GOP_SIZE 20
|
#define TEMPORAL_GOP_SIZE 20
|
||||||
|
#define TEMPORAL_GOP_SIZE_MIN 8 // Minimum GOP size to avoid decoder hiccups
|
||||||
#define TEMPORAL_DECOMP_LEVEL 2
|
#define TEMPORAL_DECOMP_LEVEL 2
|
||||||
|
|
||||||
|
#define SCENE_CHANGE_THRESHOLD_SOFT 0.6
|
||||||
|
#define SCENE_CHANGE_THRESHOLD_HARD 0.8
|
||||||
#define MOTION_THRESHOLD 24.0f // Flush if motion exceeds 24 pixels in any direction
|
#define MOTION_THRESHOLD 24.0f // Flush if motion exceeds 24 pixels in any direction
|
||||||
|
|
||||||
// Audio/subtitle constants (reused from TEV)
|
// Audio/subtitle constants (reused from TEV)
|
||||||
@@ -1897,7 +1901,7 @@ typedef struct tav_encoder_s {
|
|||||||
|
|
||||||
// Extended header support
|
// Extended header support
|
||||||
char *ffmpeg_version; // FFmpeg version string
|
char *ffmpeg_version; // FFmpeg version string
|
||||||
uint64_t creation_time_ns; // Creation time in nanoseconds since UNIX epoch
|
uint64_t creation_time_us; // Creation time in nanoseconds since UNIX epoch
|
||||||
long extended_header_offset; // File offset of extended header for ENDT update
|
long extended_header_offset; // File offset of extended header for ENDT update
|
||||||
|
|
||||||
} tav_encoder_t;
|
} tav_encoder_t;
|
||||||
@@ -2267,7 +2271,7 @@ static void show_usage(const char *program_name) {
|
|||||||
printf(" -a, --arate N MP2 audio bitrate in kbps (overrides quality-based audio rate)\n");
|
printf(" -a, --arate N MP2 audio bitrate in kbps (overrides quality-based audio rate)\n");
|
||||||
printf(" Valid values: 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384\n");
|
printf(" Valid values: 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384\n");
|
||||||
printf(" --separate-audio-track Write entire MP2 file as single packet 0x40 (instead of interleaved)\n");
|
printf(" --separate-audio-track Write entire MP2 file as single packet 0x40 (instead of interleaved)\n");
|
||||||
printf(" --pcm8-audio Use 8-bit PCM audio (packet 0x21, zstd compressed, per-frame packets)\n");
|
printf(" --pcm8-audio Use 8-bit PCM audio instead of MP2 (TSVM native audio format)\n");
|
||||||
printf(" -S, --subtitles FILE SubRip (.srt) or SAMI (.smi) subtitle file\n");
|
printf(" -S, --subtitles FILE SubRip (.srt) or SAMI (.smi) subtitle file\n");
|
||||||
printf(" --fontrom-lo FILE Low font ROM file for internationalised subtitles\n");
|
printf(" --fontrom-lo FILE Low font ROM file for internationalised subtitles\n");
|
||||||
printf(" --fontrom-hi FILE High font ROM file for internationalised subtitles\n");
|
printf(" --fontrom-hi FILE High font ROM file for internationalised subtitles\n");
|
||||||
@@ -4063,7 +4067,7 @@ static size_t encode_pframe_residual(tav_encoder_t *enc, int qY) {
|
|||||||
if (enc->enable_ezbc) {
|
if (enc->enable_ezbc) {
|
||||||
// EZBC mode: Quantize with perceptual weighting but no normalization (division by quantizer)
|
// EZBC mode: Quantize with perceptual weighting but no normalization (division by quantizer)
|
||||||
// EZBC will compress by encoding only significant bitplanes
|
// EZBC will compress by encoding only significant bitplanes
|
||||||
fprintf(stderr, "[EZBC-QUANT-PFRAME] Using perceptual quantization without normalization\n");
|
// fprintf(stderr, "[EZBC-QUANT-PFRAME] Using perceptual quantization without normalization\n");
|
||||||
quantise_dwt_coefficients_perceptual_per_coeff_no_normalisation(enc, residual_y_dwt, quantised_y, frame_size,
|
quantise_dwt_coefficients_perceptual_per_coeff_no_normalisation(enc, residual_y_dwt, quantised_y, frame_size,
|
||||||
qY, enc->width, enc->height,
|
qY, enc->width, enc->height,
|
||||||
enc->decomp_levels, 0, 0);
|
enc->decomp_levels, 0, 0);
|
||||||
@@ -4081,7 +4085,7 @@ static size_t encode_pframe_residual(tav_encoder_t *enc, int qY) {
|
|||||||
if (abs(quantised_co[i]) > max_co) max_co = abs(quantised_co[i]);
|
if (abs(quantised_co[i]) > max_co) max_co = abs(quantised_co[i]);
|
||||||
if (abs(quantised_cg[i]) > max_cg) max_cg = abs(quantised_cg[i]);
|
if (abs(quantised_cg[i]) > max_cg) max_cg = abs(quantised_cg[i]);
|
||||||
}
|
}
|
||||||
fprintf(stderr, "[EZBC-QUANT-PFRAME] Quantized coeff max: Y=%d, Co=%d, Cg=%d\n", max_y, max_co, max_cg);
|
// fprintf(stderr, "[EZBC-QUANT-PFRAME] Quantized coeff max: Y=%d, Co=%d, Cg=%d\n", max_y, max_co, max_cg);
|
||||||
} else {
|
} else {
|
||||||
// Twobit-map mode: Use traditional quantization
|
// Twobit-map mode: Use traditional quantization
|
||||||
quantise_dwt_coefficients_perceptual_per_coeff(enc, residual_y_dwt, quantised_y, frame_size,
|
quantise_dwt_coefficients_perceptual_per_coeff(enc, residual_y_dwt, quantised_y, frame_size,
|
||||||
@@ -5396,9 +5400,84 @@ static size_t gop_process_and_flush(tav_encoder_t *enc, FILE *output, int base_q
|
|||||||
// Trim GOP if scene change detected
|
// Trim GOP if scene change detected
|
||||||
if (scene_change_frame > 0) {
|
if (scene_change_frame > 0) {
|
||||||
actual_gop_size = scene_change_frame;
|
actual_gop_size = scene_change_frame;
|
||||||
if (enc->verbose) {
|
|
||||||
printf("Trimming GOP from %d to %d frames due to scene change\n",
|
// If trimmed GOP would be too small, encode as separate I-frames instead
|
||||||
enc->temporal_gop_frame_count, actual_gop_size);
|
if (actual_gop_size < TEMPORAL_GOP_SIZE_MIN) {
|
||||||
|
if (enc->verbose) {
|
||||||
|
printf("Scene change at frame %d would create GOP of %d frames (< %d), encoding as I-frames instead\n",
|
||||||
|
frame_numbers[scene_change_frame], actual_gop_size, TEMPORAL_GOP_SIZE_MIN);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Encode each frame before scene change as separate I-frame
|
||||||
|
size_t total_bytes = 0;
|
||||||
|
int original_gop_frame_count = enc->temporal_gop_frame_count;
|
||||||
|
|
||||||
|
for (int i = 0; i < actual_gop_size; i++) {
|
||||||
|
// Temporarily set up single-frame GOP
|
||||||
|
uint8_t *saved_rgb_frame0 = enc->temporal_gop_rgb_frames[0];
|
||||||
|
float *saved_y_frame0 = enc->temporal_gop_y_frames[0];
|
||||||
|
float *saved_co_frame0 = enc->temporal_gop_co_frames[0];
|
||||||
|
float *saved_cg_frame0 = enc->temporal_gop_cg_frames[0];
|
||||||
|
|
||||||
|
// Set up single-frame GOP by moving frame i to position 0
|
||||||
|
enc->temporal_gop_rgb_frames[0] = enc->temporal_gop_rgb_frames[i];
|
||||||
|
enc->temporal_gop_y_frames[0] = enc->temporal_gop_y_frames[i];
|
||||||
|
enc->temporal_gop_co_frames[0] = enc->temporal_gop_co_frames[i];
|
||||||
|
enc->temporal_gop_cg_frames[0] = enc->temporal_gop_cg_frames[i];
|
||||||
|
enc->temporal_gop_frame_count = 1;
|
||||||
|
|
||||||
|
// Encode as I-frame
|
||||||
|
size_t bytes = gop_flush(enc, output, base_quantiser, &frame_numbers[i], 1);
|
||||||
|
if (bytes == 0) {
|
||||||
|
fprintf(stderr, "Error: Failed to encode I-frame during GOP trimming\n");
|
||||||
|
enc->temporal_gop_frame_count = original_gop_frame_count;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
total_bytes += bytes;
|
||||||
|
|
||||||
|
// Restore position 0 (but keep frame i in place for the shift operation below)
|
||||||
|
enc->temporal_gop_rgb_frames[0] = saved_rgb_frame0;
|
||||||
|
enc->temporal_gop_y_frames[0] = saved_y_frame0;
|
||||||
|
enc->temporal_gop_co_frames[0] = saved_co_frame0;
|
||||||
|
enc->temporal_gop_cg_frames[0] = saved_cg_frame0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Restore original frame count
|
||||||
|
enc->temporal_gop_frame_count = original_gop_frame_count;
|
||||||
|
|
||||||
|
// Shift remaining frames (after scene change) to start of buffer
|
||||||
|
int remaining_frames = original_gop_frame_count - scene_change_frame;
|
||||||
|
for (int i = 0; i < remaining_frames; i++) {
|
||||||
|
int src = scene_change_frame + i;
|
||||||
|
// Swap pointers
|
||||||
|
uint8_t *temp_rgb = enc->temporal_gop_rgb_frames[i];
|
||||||
|
float *temp_y = enc->temporal_gop_y_frames[i];
|
||||||
|
float *temp_co = enc->temporal_gop_co_frames[i];
|
||||||
|
float *temp_cg = enc->temporal_gop_cg_frames[i];
|
||||||
|
|
||||||
|
enc->temporal_gop_rgb_frames[i] = enc->temporal_gop_rgb_frames[src];
|
||||||
|
enc->temporal_gop_y_frames[i] = enc->temporal_gop_y_frames[src];
|
||||||
|
enc->temporal_gop_co_frames[i] = enc->temporal_gop_co_frames[src];
|
||||||
|
enc->temporal_gop_cg_frames[i] = enc->temporal_gop_cg_frames[src];
|
||||||
|
|
||||||
|
enc->temporal_gop_rgb_frames[src] = temp_rgb;
|
||||||
|
enc->temporal_gop_y_frames[src] = temp_y;
|
||||||
|
enc->temporal_gop_co_frames[src] = temp_co;
|
||||||
|
enc->temporal_gop_cg_frames[src] = temp_cg;
|
||||||
|
|
||||||
|
enc->temporal_gop_translation_x[i] = enc->temporal_gop_translation_x[src];
|
||||||
|
enc->temporal_gop_translation_y[i] = enc->temporal_gop_translation_y[src];
|
||||||
|
}
|
||||||
|
enc->temporal_gop_frame_count = remaining_frames;
|
||||||
|
|
||||||
|
return total_bytes;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
// GOP large enough after trimming - proceed normally
|
||||||
|
if (enc->verbose) {
|
||||||
|
printf("Trimming GOP from %d to %d frames due to scene change\n",
|
||||||
|
enc->temporal_gop_frame_count, actual_gop_size);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -7017,7 +7096,7 @@ static size_t serialise_tile_data(tav_encoder_t *enc, int tile_x, int tile_y,
|
|||||||
// INTRA mode: quantise coefficients directly and store for future reference
|
// INTRA mode: quantise coefficients directly and store for future reference
|
||||||
if (enc->enable_ezbc) {
|
if (enc->enable_ezbc) {
|
||||||
// EZBC mode: Quantize with perceptual weighting but no normalization (division by quantizer)
|
// EZBC mode: Quantize with perceptual weighting but no normalization (division by quantizer)
|
||||||
fprintf(stderr, "[EZBC-QUANT-INTRA] Using perceptual quantization without normalization\n");
|
// fprintf(stderr, "[EZBC-QUANT-INTRA] Using perceptual quantization without normalization\n");
|
||||||
quantise_dwt_coefficients_perceptual_per_coeff_no_normalisation(enc, (float*)tile_y_data, quantised_y, tile_size, this_frame_qY, enc->width, enc->height, enc->decomp_levels, 0, enc->frame_count);
|
quantise_dwt_coefficients_perceptual_per_coeff_no_normalisation(enc, (float*)tile_y_data, quantised_y, tile_size, this_frame_qY, enc->width, enc->height, enc->decomp_levels, 0, enc->frame_count);
|
||||||
quantise_dwt_coefficients_perceptual_per_coeff_no_normalisation(enc, (float*)tile_co_data, quantised_co, tile_size, this_frame_qCo, enc->width, enc->height, enc->decomp_levels, 1, enc->frame_count);
|
quantise_dwt_coefficients_perceptual_per_coeff_no_normalisation(enc, (float*)tile_co_data, quantised_co, tile_size, this_frame_qCo, enc->width, enc->height, enc->decomp_levels, 1, enc->frame_count);
|
||||||
quantise_dwt_coefficients_perceptual_per_coeff_no_normalisation(enc, (float*)tile_cg_data, quantised_cg, tile_size, this_frame_qCg, enc->width, enc->height, enc->decomp_levels, 1, enc->frame_count);
|
quantise_dwt_coefficients_perceptual_per_coeff_no_normalisation(enc, (float*)tile_cg_data, quantised_cg, tile_size, this_frame_qCg, enc->width, enc->height, enc->decomp_levels, 1, enc->frame_count);
|
||||||
@@ -7029,7 +7108,7 @@ static size_t serialise_tile_data(tav_encoder_t *enc, int tile_x, int tile_y,
|
|||||||
if (abs(quantised_co[i]) > max_co) max_co = abs(quantised_co[i]);
|
if (abs(quantised_co[i]) > max_co) max_co = abs(quantised_co[i]);
|
||||||
if (abs(quantised_cg[i]) > max_cg) max_cg = abs(quantised_cg[i]);
|
if (abs(quantised_cg[i]) > max_cg) max_cg = abs(quantised_cg[i]);
|
||||||
}
|
}
|
||||||
fprintf(stderr, "[EZBC-QUANT-INTRA] Quantized coeff max: Y=%d, Co=%d, Cg=%d\n", max_y, max_co, max_cg);
|
// fprintf(stderr, "[EZBC-QUANT-INTRA] Quantized coeff max: Y=%d, Co=%d, Cg=%d\n", max_y, max_co, max_cg);
|
||||||
} else if (enc->perceptual_tuning) {
|
} else if (enc->perceptual_tuning) {
|
||||||
// Perceptual quantisation: EXACTLY like uniform but with per-coefficient weights
|
// Perceptual quantisation: EXACTLY like uniform but with per-coefficient weights
|
||||||
quantise_dwt_coefficients_perceptual_per_coeff(enc, (float*)tile_y_data, quantised_y, tile_size, this_frame_qY, enc->width, enc->height, enc->decomp_levels, 0, enc->frame_count);
|
quantise_dwt_coefficients_perceptual_per_coeff(enc, (float*)tile_y_data, quantised_y, tile_size, this_frame_qY, enc->width, enc->height, enc->decomp_levels, 0, enc->frame_count);
|
||||||
@@ -8627,7 +8706,7 @@ static long write_extended_header(tav_encoder_t *enc) {
|
|||||||
WRITE_KV_UINT64("ENDT", 0ULL);
|
WRITE_KV_UINT64("ENDT", 0ULL);
|
||||||
|
|
||||||
// CDAT: Creation time in nanoseconds since UNIX epoch
|
// CDAT: Creation time in nanoseconds since UNIX epoch
|
||||||
WRITE_KV_UINT64("CDAT", enc->creation_time_ns);
|
WRITE_KV_UINT64("CDAT", enc->creation_time_us);
|
||||||
|
|
||||||
// VNDR: Encoder name and version
|
// VNDR: Encoder name and version
|
||||||
const char *vendor_str = ENCODER_VENDOR_STRING;
|
const char *vendor_str = ENCODER_VENDOR_STRING;
|
||||||
@@ -9157,15 +9236,13 @@ static int detect_scene_change_between_frames(
|
|||||||
if (out_avg_diff) *out_avg_diff = avg_diff;
|
if (out_avg_diff) *out_avg_diff = avg_diff;
|
||||||
if (out_changed_ratio) *out_changed_ratio = changed_ratio;
|
if (out_changed_ratio) *out_changed_ratio = changed_ratio;
|
||||||
|
|
||||||
// Scene change threshold
|
return changed_ratio > SCENE_CHANGE_THRESHOLD_SOFT;
|
||||||
double threshold = 0.50;
|
|
||||||
|
|
||||||
return changed_ratio > threshold;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wrapper for normal mode: compare current frame with previous frame
|
// Wrapper for normal mode: compare current frame with previous frame
|
||||||
static int detect_scene_change(tav_encoder_t *enc) {
|
static int detect_scene_change(tav_encoder_t *enc, double *out_changed_ratio) {
|
||||||
if (!enc->current_frame_rgb || enc->intra_only) {
|
if (!enc->current_frame_rgb || enc->intra_only) {
|
||||||
|
if (out_changed_ratio) *out_changed_ratio = 0.0;
|
||||||
return 0; // No current frame to compare
|
return 0; // No current frame to compare
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -9179,6 +9256,8 @@ static int detect_scene_change(tav_encoder_t *enc) {
|
|||||||
&changed_ratio
|
&changed_ratio
|
||||||
);
|
);
|
||||||
|
|
||||||
|
if (out_changed_ratio) *out_changed_ratio = changed_ratio;
|
||||||
|
|
||||||
if (is_scene_change) {
|
if (is_scene_change) {
|
||||||
printf("Scene change detection: avg_diff=%.2f\tchanged_ratio=%.4f\n", avg_diff, changed_ratio);
|
printf("Scene change detection: avg_diff=%.2f\tchanged_ratio=%.4f\n", avg_diff, changed_ratio);
|
||||||
}
|
}
|
||||||
@@ -9364,6 +9443,9 @@ int main(int argc, char *argv[]) {
|
|||||||
{"ezbc", no_argument, 0, 1025},
|
{"ezbc", no_argument, 0, 1025},
|
||||||
{"separate-audio-track", no_argument, 0, 1026},
|
{"separate-audio-track", no_argument, 0, 1026},
|
||||||
{"pcm8-audio", no_argument, 0, 1027},
|
{"pcm8-audio", no_argument, 0, 1027},
|
||||||
|
{"pcm-audio", no_argument, 0, 1027},
|
||||||
|
{"native-audio", no_argument, 0, 1027},
|
||||||
|
{"native-audio-format", no_argument, 0, 1027},
|
||||||
{"help", no_argument, 0, '?'},
|
{"help", no_argument, 0, '?'},
|
||||||
{0, 0, 0, 0}
|
{0, 0, 0, 0}
|
||||||
};
|
};
|
||||||
@@ -9478,6 +9560,7 @@ int main(int argc, char *argv[]) {
|
|||||||
break;
|
break;
|
||||||
case 1006: // --intra-only
|
case 1006: // --intra-only
|
||||||
enc->intra_only = 1;
|
enc->intra_only = 1;
|
||||||
|
enc->enable_temporal_dwt = 0;
|
||||||
break;
|
break;
|
||||||
case 1007: // --no-perceptual-tuning
|
case 1007: // --no-perceptual-tuning
|
||||||
enc->perceptual_tuning = 0;
|
enc->perceptual_tuning = 0;
|
||||||
@@ -9518,6 +9601,7 @@ int main(int argc, char *argv[]) {
|
|||||||
break;
|
break;
|
||||||
case 1017: // --enable-delta
|
case 1017: // --enable-delta
|
||||||
enc->use_delta_encoding = 1;
|
enc->use_delta_encoding = 1;
|
||||||
|
enc->enable_temporal_dwt = 0;
|
||||||
break;
|
break;
|
||||||
case 1018: // --delta-haar
|
case 1018: // --delta-haar
|
||||||
enc->delta_haar_levels = CLAMP(atoi(optarg), 0, 6);
|
enc->delta_haar_levels = CLAMP(atoi(optarg), 0, 6);
|
||||||
@@ -9697,7 +9781,7 @@ int main(int argc, char *argv[]) {
|
|||||||
enc->ffmpeg_version = get_ffmpeg_version();
|
enc->ffmpeg_version = get_ffmpeg_version();
|
||||||
struct timeval tv;
|
struct timeval tv;
|
||||||
gettimeofday(&tv, NULL);
|
gettimeofday(&tv, NULL);
|
||||||
enc->creation_time_ns = (uint64_t)tv.tv_sec * 1000000000ULL + (uint64_t)tv.tv_usec * 1000ULL;
|
enc->creation_time_us = (uint64_t)tv.tv_sec * 1000000ULL + (uint64_t)tv.tv_usec * 1ULL;
|
||||||
|
|
||||||
// Start FFmpeg process for video input (using TEV-compatible filtergraphs)
|
// Start FFmpeg process for video input (using TEV-compatible filtergraphs)
|
||||||
if (enc->test_mode) {
|
if (enc->test_mode) {
|
||||||
@@ -9862,7 +9946,8 @@ int main(int argc, char *argv[]) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Determine frame type
|
// Determine frame type
|
||||||
int is_scene_change = detect_scene_change(enc);
|
double scene_change_ratio = 0.0;
|
||||||
|
int is_scene_change = detect_scene_change(enc, &scene_change_ratio);
|
||||||
int is_time_keyframe = (frame_count % TEMPORAL_GOP_SIZE) == 0;
|
int is_time_keyframe = (frame_count % TEMPORAL_GOP_SIZE) == 0;
|
||||||
|
|
||||||
// Check if we can use SKIP mode (DWT coefficient-based detection)
|
// Check if we can use SKIP mode (DWT coefficient-based detection)
|
||||||
@@ -9926,6 +10011,109 @@ int main(int argc, char *argv[]) {
|
|||||||
|
|
||||||
if (enc->enable_temporal_dwt) {
|
if (enc->enable_temporal_dwt) {
|
||||||
// GOP-based temporal 3D DWT encoding path
|
// GOP-based temporal 3D DWT encoding path
|
||||||
|
|
||||||
|
// Two-tier scene change handling:
|
||||||
|
// - Hard scene change (ratio >= 0.7): Force I-frames for current GOP, then flush
|
||||||
|
// - Soft scene change (0.5 <= ratio < 0.7): Only flush if GOP >= 10 frames (enforce minimum GOP size)
|
||||||
|
// - No scene change (ratio < 0.5): Don't flush
|
||||||
|
|
||||||
|
int should_flush_scene_change = 0;
|
||||||
|
int force_iframes_for_scene_change = 0;
|
||||||
|
|
||||||
|
if (is_scene_change && enc->temporal_gop_frame_count > 0) {
|
||||||
|
|
||||||
|
if (scene_change_ratio >= SCENE_CHANGE_THRESHOLD_HARD) {
|
||||||
|
// Hard scene change: Force current GOP to be I-frames, then flush immediately
|
||||||
|
should_flush_scene_change = 1;
|
||||||
|
force_iframes_for_scene_change = 1;
|
||||||
|
if (enc->verbose) {
|
||||||
|
printf("Hard scene change (ratio=%.4f) at frame %d, forcing I-frames and flushing GOP...\n",
|
||||||
|
scene_change_ratio, frame_count);
|
||||||
|
}
|
||||||
|
} else if (enc->temporal_gop_frame_count >= TEMPORAL_GOP_SIZE_MIN) {
|
||||||
|
// Soft scene change with sufficient GOP size: Flush normally
|
||||||
|
should_flush_scene_change = 1;
|
||||||
|
if (enc->verbose) {
|
||||||
|
printf("Soft scene change (ratio=%.4f) at frame %d with GOP size %d >= %d, flushing GOP...\n",
|
||||||
|
scene_change_ratio, frame_count, enc->temporal_gop_frame_count, TEMPORAL_GOP_SIZE_MIN);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Soft scene change with small GOP: Ignore to enforce minimum GOP size
|
||||||
|
if (enc->verbose) {
|
||||||
|
printf("Soft scene change (ratio=%.4f) at frame %d ignored (GOP size %d < %d)\n",
|
||||||
|
scene_change_ratio, frame_count, enc->temporal_gop_frame_count, TEMPORAL_GOP_SIZE_MIN);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (should_flush_scene_change) {
|
||||||
|
// Get quantiser
|
||||||
|
int qY = enc->bitrate_mode ? quantiser_float_to_int_dithered(enc) : enc->quantiser_y;
|
||||||
|
|
||||||
|
if (force_iframes_for_scene_change) {
|
||||||
|
// Hard scene change: Encode each frame in GOP as separate I-frame (GOP size = 1)
|
||||||
|
// This ensures clean cut at major scene transitions
|
||||||
|
size_t total_bytes = 0;
|
||||||
|
int original_gop_frame_count = enc->temporal_gop_frame_count;
|
||||||
|
|
||||||
|
for (int i = 0; i < original_gop_frame_count; i++) {
|
||||||
|
// Temporarily set up GOP to contain only this single frame
|
||||||
|
// Save position 0 pointers
|
||||||
|
uint8_t *saved_rgb_frame0 = enc->temporal_gop_rgb_frames[0];
|
||||||
|
float *saved_y_frame0 = enc->temporal_gop_y_frames[0];
|
||||||
|
float *saved_co_frame0 = enc->temporal_gop_co_frames[0];
|
||||||
|
float *saved_cg_frame0 = enc->temporal_gop_cg_frames[0];
|
||||||
|
|
||||||
|
// Set up single-frame GOP by moving frame i to position 0
|
||||||
|
enc->temporal_gop_rgb_frames[0] = enc->temporal_gop_rgb_frames[i];
|
||||||
|
enc->temporal_gop_y_frames[0] = enc->temporal_gop_y_frames[i];
|
||||||
|
enc->temporal_gop_co_frames[0] = enc->temporal_gop_co_frames[i];
|
||||||
|
enc->temporal_gop_cg_frames[0] = enc->temporal_gop_cg_frames[i];
|
||||||
|
enc->temporal_gop_frame_count = 1;
|
||||||
|
|
||||||
|
// Encode single frame as I-frame (GOP size 1)
|
||||||
|
int frame_num = frame_count - original_gop_frame_count + i;
|
||||||
|
size_t bytes = gop_flush(enc, enc->output_fp, qY, &frame_num, 1);
|
||||||
|
|
||||||
|
if (bytes == 0) {
|
||||||
|
fprintf(stderr, "Error: Failed to encode I-frame %d during hard scene change\n", frame_num);
|
||||||
|
enc->temporal_gop_frame_count = original_gop_frame_count;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
total_bytes += bytes;
|
||||||
|
|
||||||
|
// Restore position 0 pointers
|
||||||
|
enc->temporal_gop_rgb_frames[0] = saved_rgb_frame0;
|
||||||
|
enc->temporal_gop_y_frames[0] = saved_y_frame0;
|
||||||
|
enc->temporal_gop_co_frames[0] = saved_co_frame0;
|
||||||
|
enc->temporal_gop_cg_frames[0] = saved_cg_frame0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Restore original frame count
|
||||||
|
enc->temporal_gop_frame_count = original_gop_frame_count;
|
||||||
|
packet_size = total_bytes;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
// Soft scene change: Flush GOP normally as temporal GOP
|
||||||
|
int *gop_frame_numbers = malloc(enc->temporal_gop_frame_count * sizeof(int));
|
||||||
|
for (int i = 0; i < enc->temporal_gop_frame_count; i++) {
|
||||||
|
gop_frame_numbers[i] = frame_count - enc->temporal_gop_frame_count + i;
|
||||||
|
}
|
||||||
|
|
||||||
|
packet_size = gop_process_and_flush(enc, enc->output_fp, qY,
|
||||||
|
gop_frame_numbers, 1);
|
||||||
|
free(gop_frame_numbers);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (packet_size == 0) {
|
||||||
|
fprintf(stderr, "Error: Failed to flush GOP before scene change at frame %d\n", frame_count);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
gop_reset(enc);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now add current frame to GOP (will be first frame of new GOP if scene change)
|
||||||
int add_result = temporal_gop_add_frame(enc, enc->current_frame_rgb,
|
int add_result = temporal_gop_add_frame(enc, enc->current_frame_rgb,
|
||||||
enc->current_frame_y, enc->current_frame_co, enc->current_frame_cg);
|
enc->current_frame_y, enc->current_frame_co, enc->current_frame_cg);
|
||||||
|
|
||||||
@@ -9934,7 +10122,7 @@ int main(int argc, char *argv[]) {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if GOP should be flushed
|
// Check if GOP should be flushed (after adding frame)
|
||||||
int should_flush = 0;
|
int should_flush = 0;
|
||||||
int force_flush = 0;
|
int force_flush = 0;
|
||||||
|
|
||||||
@@ -9945,23 +10133,24 @@ int main(int argc, char *argv[]) {
|
|||||||
printf("GOP buffer full (%d frames), flushing...\n", enc->temporal_gop_frame_count);
|
printf("GOP buffer full (%d frames), flushing...\n", enc->temporal_gop_frame_count);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Flush if large motion detected (breaks temporal coherence)
|
// Flush if large motion detected (breaks temporal coherence) AND GOP is large enough
|
||||||
else if (gop_should_flush_motion(enc)) {
|
else if (gop_should_flush_motion(enc) && enc->temporal_gop_frame_count >= TEMPORAL_GOP_SIZE_MIN) {
|
||||||
should_flush = 1;
|
should_flush = 1;
|
||||||
if (enc->verbose) {
|
if (enc->verbose) {
|
||||||
printf("Large motion detected (>24 pixels), flushing GOP early...\n");
|
printf("Large motion detected (>24 pixels) with GOP size %d >= %d, flushing GOP early...\n",
|
||||||
|
enc->temporal_gop_frame_count, TEMPORAL_GOP_SIZE_MIN);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Flush if scene change detected
|
else if (gop_should_flush_motion(enc) && enc->temporal_gop_frame_count < TEMPORAL_GOP_SIZE_MIN) {
|
||||||
else if (is_scene_change && enc->temporal_gop_frame_count > 1) {
|
// Large motion but GOP too small - keep accumulating
|
||||||
should_flush = 1;
|
|
||||||
force_flush = 1; // Skip internal scene change detection (already detected)
|
|
||||||
if (enc->verbose) {
|
if (enc->verbose) {
|
||||||
printf("Scene change detected, flushing GOP early...\n");
|
printf("Large motion detected but GOP size %d < %d, continuing to accumulate...\n",
|
||||||
|
enc->temporal_gop_frame_count, TEMPORAL_GOP_SIZE_MIN);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Note: Scene change flush is now handled BEFORE adding frame (above)
|
||||||
|
|
||||||
// Flush GOP if needed
|
// Flush GOP if needed (for reasons other than scene change)
|
||||||
if (should_flush) {
|
if (should_flush) {
|
||||||
// Build frame number array for this GOP
|
// Build frame number array for this GOP
|
||||||
int *gop_frame_numbers = malloc(enc->temporal_gop_frame_count * sizeof(int));
|
int *gop_frame_numbers = malloc(enc->temporal_gop_frame_count * sizeof(int));
|
||||||
@@ -9982,9 +10171,10 @@ int main(int argc, char *argv[]) {
|
|||||||
fprintf(stderr, "Error: Failed to flush GOP at frame %d\n", frame_count);
|
fprintf(stderr, "Error: Failed to flush GOP at frame %d\n", frame_count);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} else {
|
} else if (packet_size == 0) {
|
||||||
// Frame added to GOP buffer but not flushed yet
|
// Frame added to GOP buffer but not flushed yet
|
||||||
// Skip normal packet processing (no packet written yet)
|
// Skip normal packet processing (no packet written yet)
|
||||||
|
// Note: packet_size might already be > 0 from scene change flush above
|
||||||
packet_size = 0;
|
packet_size = 0;
|
||||||
}
|
}
|
||||||
} else if (enc->enable_residual_coding) {
|
} else if (enc->enable_residual_coding) {
|
||||||
|
|||||||
@@ -260,7 +260,7 @@ void print_extended_header(FILE *fp, int verbose) {
|
|||||||
|
|
||||||
if (verbose) {
|
if (verbose) {
|
||||||
if (strcmp(key, "CDAT") == 0) {
|
if (strcmp(key, "CDAT") == 0) {
|
||||||
time_t time_sec = value / 1000000000ULL;
|
time_t time_sec = value / 1000000ULL; // microseconds
|
||||||
struct tm *time_info = gmtime(&time_sec);
|
struct tm *time_info = gmtime(&time_sec);
|
||||||
if (time_info) {
|
if (time_info) {
|
||||||
char time_str[64];
|
char time_str[64];
|
||||||
@@ -268,7 +268,7 @@ void print_extended_header(FILE *fp, int verbose) {
|
|||||||
printf("%s", time_str);
|
printf("%s", time_str);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
printf("%.6f seconds", value / 1000000000.0);
|
printf("%.6f seconds", value / 1000000000.0); // nanoseconds
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (value_type == 0x10) { // Bytes
|
} else if (value_type == 0x10) { // Bytes
|
||||||
|
|||||||
Reference in New Issue
Block a user