mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-03-07 19:51:51 +09:00
TAV: half-fixed 3d dwt playback
This commit is contained in:
@@ -355,11 +355,12 @@ let decodeHeight = isInterlaced ? (header.height >> 1) : header.height
|
|||||||
const FRAME_PIXELS = header.width * header.height
|
const FRAME_PIXELS = header.width * header.height
|
||||||
const FRAME_SIZE = FRAME_PIXELS * 3 // RGB buffer size
|
const FRAME_SIZE = FRAME_PIXELS * 3 // RGB buffer size
|
||||||
|
|
||||||
// Double-buffering: Fixed slot sizes in videoBuffer (32 MB total)
|
// Triple-buffering: Fixed slot sizes in videoBuffer (48 MB total)
|
||||||
const MAX_GOP_SIZE = 21 // Maximum frames per slot (21 * 752KB = ~15MB per slot)
|
const BUFFER_SLOTS = 3 // Three slots: playing, ready, decoding
|
||||||
|
const MAX_GOP_SIZE = 21 // Maximum frames per slot (21 * 752KB = ~15.8MB per slot)
|
||||||
const SLOT_SIZE = MAX_GOP_SIZE * FRAME_SIZE // Fixed slot size regardless of actual GOP size
|
const SLOT_SIZE = MAX_GOP_SIZE * FRAME_SIZE // Fixed slot size regardless of actual GOP size
|
||||||
|
|
||||||
console.log(`Double-buffering: Max ${MAX_GOP_SIZE} frames/slot, ${(SLOT_SIZE / 1048576).toFixed(1)}MB per slot`)
|
console.log(`Triple-buffering: ${BUFFER_SLOTS} slots, max ${MAX_GOP_SIZE} frames/slot, ${(SLOT_SIZE / 1048576).toFixed(1)}MB per slot`)
|
||||||
|
|
||||||
const RGB_BUFFER_A = sys.malloc(FRAME_SIZE)
|
const RGB_BUFFER_A = sys.malloc(FRAME_SIZE)
|
||||||
const RGB_BUFFER_B = sys.malloc(FRAME_SIZE)
|
const RGB_BUFFER_B = sys.malloc(FRAME_SIZE)
|
||||||
@@ -484,17 +485,18 @@ let currentFileIndex = 1 // Track which file we're playing in concatenated stre
|
|||||||
let totalFilesProcessed = 0
|
let totalFilesProcessed = 0
|
||||||
let decoderDbgInfo = {}
|
let decoderDbgInfo = {}
|
||||||
|
|
||||||
// GOP double-buffering state
|
// GOP triple-buffering state (3 slots: playing, ready, decoding)
|
||||||
let currentGopBufferSlot = 0 // Which buffer slot is currently being displayed (0 or 1)
|
let currentGopBufferSlot = 0 // Which buffer slot is currently being displayed (0, 1, or 2)
|
||||||
let currentGopSize = 0 // Number of frames in current GOP being displayed
|
let currentGopSize = 0 // Number of frames in current GOP being displayed
|
||||||
let currentGopFrameIndex = 0 // Which frame of current GOP we're displaying
|
let currentGopFrameIndex = 0 // Which frame of current GOP we're displaying
|
||||||
let nextGopData = null // Buffered next GOP packet data for background decode
|
let readyGopData = null // GOP that's already decoded and ready to play (next in line)
|
||||||
|
let decodingGopData = null // GOP currently being decoded in background
|
||||||
let asyncDecodeInProgress = false // Track if async decode is running
|
let asyncDecodeInProgress = false // Track if async decode is running
|
||||||
let asyncDecodeSlot = 0 // Which slot the async decode is targeting
|
let asyncDecodeSlot = 0 // Which slot the async decode is targeting
|
||||||
let asyncDecodeGopSize = 0 // Size of GOP being decoded async
|
let asyncDecodeGopSize = 0 // Size of GOP being decoded async
|
||||||
let asyncDecodePtr = 0 // Compressed data pointer to free after decode
|
let asyncDecodePtr = 0 // Compressed data pointer to free after decode
|
||||||
let asyncDecodeStartTime = 0 // When async decode started (for diagnostics)
|
let asyncDecodeStartTime = 0 // When async decode started (for diagnostics)
|
||||||
let shouldReadPackets = true // Gate packet reading: false when both buffers are full
|
let shouldReadPackets = true // Gate packet reading: false when all 3 buffers are full
|
||||||
|
|
||||||
let cueElements = []
|
let cueElements = []
|
||||||
let currentCueIndex = -1 // Track current cue position
|
let currentCueIndex = -1 // Track current cue position
|
||||||
@@ -510,12 +512,19 @@ function cleanupAsyncDecode() {
|
|||||||
asyncDecodeGopSize = 0
|
asyncDecodeGopSize = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
// Free background GOP decode memory if in progress
|
// Free ready GOP memory if present
|
||||||
if (nextGopData !== null && nextGopData.compressedPtr && nextGopData.compressedPtr !== 0) {
|
if (readyGopData !== null && readyGopData.compressedPtr && readyGopData.compressedPtr !== 0) {
|
||||||
sys.free(nextGopData.compressedPtr)
|
sys.free(readyGopData.compressedPtr)
|
||||||
nextGopData.compressedPtr = 0
|
readyGopData.compressedPtr = 0
|
||||||
}
|
}
|
||||||
nextGopData = null
|
readyGopData = null
|
||||||
|
|
||||||
|
// Free decoding GOP memory if present
|
||||||
|
if (decodingGopData !== null && decodingGopData.compressedPtr && decodingGopData.compressedPtr !== 0) {
|
||||||
|
sys.free(decodingGopData.compressedPtr)
|
||||||
|
decodingGopData.compressedPtr = 0
|
||||||
|
}
|
||||||
|
decodingGopData = null
|
||||||
|
|
||||||
// Reset GOP playback state
|
// Reset GOP playback state
|
||||||
currentGopSize = 0
|
currentGopSize = 0
|
||||||
@@ -751,7 +760,10 @@ let paused = false
|
|||||||
try {
|
try {
|
||||||
let t1 = sys.nanoTime()
|
let t1 = sys.nanoTime()
|
||||||
|
|
||||||
while (!stopPlay && seqread.getReadCount() < FILE_LENGTH) {
|
// Continue loop while:
|
||||||
|
// 1. Reading packets (not EOF yet), OR
|
||||||
|
// 2. There are buffered GOPs to play (after EOF)
|
||||||
|
while (!stopPlay && (seqread.getReadCount() < FILE_LENGTH || currentGopSize > 0 || readyGopData !== null || decodingGopData !== null || asyncDecodeInProgress)) {
|
||||||
|
|
||||||
|
|
||||||
// Handle interactive controls
|
// Handle interactive controls
|
||||||
@@ -866,9 +878,10 @@ try {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// GATED PACKET READING
|
// GATED PACKET READING
|
||||||
// Stop reading when both buffers are full (GOP playing + GOP decoding/ready)
|
// Stop reading when all 3 buffers are full (GOP playing + ready GOP + decoding GOP)
|
||||||
// Resume reading when GOP finishes (one buffer becomes free)
|
// Resume reading when GOP finishes (one buffer becomes free)
|
||||||
if (shouldReadPackets && !paused) {
|
// Also stop reading at EOF
|
||||||
|
if (shouldReadPackets && !paused && seqread.getReadCount() < FILE_LENGTH) {
|
||||||
// Read packet header (record position before reading for I-frame tracking)
|
// Read packet header (record position before reading for I-frame tracking)
|
||||||
let packetOffset = seqread.getReadCount()
|
let packetOffset = seqread.getReadCount()
|
||||||
var packetType = seqread.readOneByte()
|
var packetType = seqread.readOneByte()
|
||||||
@@ -1051,32 +1064,15 @@ try {
|
|||||||
|
|
||||||
// Read GOP packet data
|
// Read GOP packet data
|
||||||
const gopSize = seqread.readOneByte()
|
const gopSize = seqread.readOneByte()
|
||||||
const marginLeft = seqread.readOneByte()
|
|
||||||
const marginRight = seqread.readOneByte()
|
|
||||||
const marginTop = seqread.readOneByte()
|
|
||||||
const marginBottom = seqread.readOneByte()
|
|
||||||
|
|
||||||
const canvasWidth = header.width + marginLeft + marginRight
|
|
||||||
const canvasHeight = header.height + marginTop + marginBottom
|
|
||||||
|
|
||||||
// Read motion vectors (1/16-pixel units, int16)
|
|
||||||
let motionX = new Array(gopSize)
|
|
||||||
let motionY = new Array(gopSize)
|
|
||||||
|
|
||||||
for (let i = 0; i < gopSize; i++) {
|
|
||||||
let mx = seqread.readShort()
|
|
||||||
let my = seqread.readShort()
|
|
||||||
motionX[i] = (mx > 32767) ? (mx - 65536) : mx
|
|
||||||
motionY[i] = (my > 32767) ? (my - 65536) : my
|
|
||||||
}
|
|
||||||
|
|
||||||
const compressedSize = seqread.readInt()
|
const compressedSize = seqread.readInt()
|
||||||
let compressedPtr = seqread.readBytes(compressedSize)
|
let compressedPtr = seqread.readBytes(compressedSize)
|
||||||
updateDataRateBin(compressedSize)
|
updateDataRateBin(compressedSize)
|
||||||
|
|
||||||
// DOUBLE-BUFFERING LOGIC:
|
// TRIPLE-BUFFERING LOGIC (3 slots: playing, ready, decoding):
|
||||||
// - If no GOP is currently playing: decode immediately to current slot
|
// - If no GOP playing: decode first GOP to slot 0
|
||||||
// - Otherwise: buffer this GOP for decode during next GOP's playback
|
// - If GOP playing but no ready GOP: decode to ready slot (next in rotation)
|
||||||
|
// - If GOP playing and ready GOP exists but no decoding: decode to decoding slot
|
||||||
|
// - Otherwise: all 3 buffers full, ignore packet
|
||||||
|
|
||||||
// Check GOP size fits in slot
|
// Check GOP size fits in slot
|
||||||
if (gopSize > MAX_GOP_SIZE) {
|
if (gopSize > MAX_GOP_SIZE) {
|
||||||
@@ -1086,11 +1082,11 @@ try {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (currentGopSize === 0 && !asyncDecodeInProgress) {
|
if (currentGopSize === 0 && !asyncDecodeInProgress) {
|
||||||
// No active GOP and no decode in progress: decode asynchronously and start playback when ready
|
// Case 1: No active GOP and no decode in progress - decode first GOP
|
||||||
const bufferSlot = currentGopBufferSlot
|
const bufferSlot = currentGopBufferSlot
|
||||||
const bufferOffset = bufferSlot * SLOT_SIZE
|
const bufferOffset = bufferSlot * SLOT_SIZE
|
||||||
|
|
||||||
// Defensive: free any old async decode memory (shouldn't happen but be safe)
|
// Defensive: free any old async decode memory
|
||||||
if (asyncDecodePtr !== 0) {
|
if (asyncDecodePtr !== 0) {
|
||||||
sys.free(asyncDecodePtr)
|
sys.free(asyncDecodePtr)
|
||||||
asyncDecodePtr = 0
|
asyncDecodePtr = 0
|
||||||
@@ -1099,10 +1095,7 @@ try {
|
|||||||
// Start async decode
|
// Start async decode
|
||||||
graphics.tavDecodeGopToVideoBufferAsync(
|
graphics.tavDecodeGopToVideoBufferAsync(
|
||||||
compressedPtr, compressedSize, gopSize,
|
compressedPtr, compressedSize, gopSize,
|
||||||
motionX, motionY,
|
|
||||||
header.width, header.height,
|
header.width, header.height,
|
||||||
canvasWidth, canvasHeight,
|
|
||||||
marginLeft, marginTop,
|
|
||||||
header.qualityLevel,
|
header.qualityLevel,
|
||||||
QLUT[header.qualityY], QLUT[header.qualityCo], QLUT[header.qualityCg],
|
QLUT[header.qualityY], QLUT[header.qualityCo], QLUT[header.qualityCg],
|
||||||
header.channelLayout,
|
header.channelLayout,
|
||||||
@@ -1114,49 +1107,25 @@ try {
|
|||||||
asyncDecodeInProgress = true
|
asyncDecodeInProgress = true
|
||||||
asyncDecodeSlot = bufferSlot
|
asyncDecodeSlot = bufferSlot
|
||||||
asyncDecodeGopSize = gopSize
|
asyncDecodeGopSize = gopSize
|
||||||
asyncDecodePtr = compressedPtr // Will free after decode completes
|
asyncDecodePtr = compressedPtr
|
||||||
asyncDecodeStartTime = sys.nanoTime()
|
asyncDecodeStartTime = sys.nanoTime()
|
||||||
|
|
||||||
// Note: compressedPtr will be freed after decode completes
|
|
||||||
// We'll check for completion in main loop and start playback then
|
|
||||||
if (interactive) {
|
|
||||||
console.log(`[GOP] Started async decode of first GOP (slot ${bufferSlot}, ${gopSize} frames)`)
|
|
||||||
}
|
|
||||||
} else if (currentGopSize === 0 && asyncDecodeInProgress) {
|
} else if (currentGopSize === 0 && asyncDecodeInProgress) {
|
||||||
// First GOP still decoding but another arrived - ignore it to avoid cancelling first GOP
|
// Case 2: First GOP still decoding - ignore to avoid cancellation
|
||||||
if (interactive) {
|
|
||||||
console.log(`[GOP] Warning: GOP arrived while first GOP still decoding - ignoring to avoid cancellation`)
|
|
||||||
}
|
|
||||||
sys.free(compressedPtr)
|
sys.free(compressedPtr)
|
||||||
} else if (currentGopSize > 0 && !asyncDecodeInProgress) {
|
|
||||||
// GOP is playing and first GOP decode is done: decode this one to other slot in background (async)
|
} else if (currentGopSize > 0 && readyGopData === null && !asyncDecodeInProgress && graphics.tavDecodeGopIsComplete()) {
|
||||||
const nextSlot = 1 - currentGopBufferSlot
|
// Case 3: GOP playing, no ready GOP, no decode in progress - decode to ready slot
|
||||||
|
const nextSlot = (currentGopBufferSlot + 1) % BUFFER_SLOTS
|
||||||
const nextOffset = nextSlot * SLOT_SIZE
|
const nextOffset = nextSlot * SLOT_SIZE
|
||||||
|
|
||||||
// DIAGNOSTIC: Measure background decode timing
|
|
||||||
const framesRemaining = currentGopSize - currentGopFrameIndex
|
const framesRemaining = currentGopSize - currentGopFrameIndex
|
||||||
const timeRemaining = framesRemaining * FRAME_TIME * 1000.0 // milliseconds
|
const timeRemaining = framesRemaining * FRAME_TIME * 1000.0
|
||||||
|
|
||||||
// If previous GOP still decoding, free its memory (will be overwritten)
|
// Start async decode to ready slot
|
||||||
if (nextGopData !== null && !nextGopData.decoded && nextGopData.compressedPtr && nextGopData.compressedPtr !== 0) {
|
|
||||||
if (interactive) {
|
|
||||||
console.log(`[GOP] Warning: New GOP arrived before previous decode completed - freeing old data`)
|
|
||||||
}
|
|
||||||
sys.free(nextGopData.compressedPtr)
|
|
||||||
nextGopData.compressedPtr = 0
|
|
||||||
}
|
|
||||||
|
|
||||||
if (interactive) {
|
|
||||||
console.log(`[GOP] Background decode started: frame ${currentGopFrameIndex}/${currentGopSize}, ${framesRemaining} frames (${timeRemaining.toFixed(0)}ms) remaining`)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Start async background decode
|
|
||||||
graphics.tavDecodeGopToVideoBufferAsync(
|
graphics.tavDecodeGopToVideoBufferAsync(
|
||||||
compressedPtr, compressedSize, gopSize,
|
compressedPtr, compressedSize, gopSize,
|
||||||
motionX, motionY,
|
|
||||||
header.width, header.height,
|
header.width, header.height,
|
||||||
canvasWidth, canvasHeight,
|
|
||||||
marginLeft, marginTop,
|
|
||||||
header.qualityLevel,
|
header.qualityLevel,
|
||||||
QLUT[header.qualityY], QLUT[header.qualityCo], QLUT[header.qualityCg],
|
QLUT[header.qualityY], QLUT[header.qualityCo], QLUT[header.qualityCg],
|
||||||
header.channelLayout,
|
header.channelLayout,
|
||||||
@@ -1165,20 +1134,44 @@ try {
|
|||||||
nextOffset
|
nextOffset
|
||||||
)
|
)
|
||||||
|
|
||||||
// Mark as decoding (will check completion in main loop)
|
readyGopData = {
|
||||||
nextGopData = {
|
|
||||||
gopSize: gopSize,
|
gopSize: gopSize,
|
||||||
decoded: false, // Will be set to true when async decode completes
|
|
||||||
slot: nextSlot,
|
slot: nextSlot,
|
||||||
compressedPtr: compressedPtr, // Will free after decode completes
|
compressedPtr: compressedPtr,
|
||||||
startTime: sys.nanoTime(),
|
startTime: sys.nanoTime(),
|
||||||
timeRemaining: timeRemaining
|
timeRemaining: timeRemaining
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
// Fallback: unexpected state, just free the memory
|
} else if (currentGopSize > 0 && readyGopData !== null && decodingGopData === null && !asyncDecodeInProgress && graphics.tavDecodeGopIsComplete()) {
|
||||||
if (interactive) {
|
// Case 4: GOP playing, ready GOP exists, no decoding GOP, no decode in progress - decode to decoding slot
|
||||||
console.log(`[GOP] Warning: Unexpected state - currentGopSize=${currentGopSize}, asyncDecodeInProgress=${asyncDecodeInProgress} - freeing GOP data`)
|
const decodingSlot = (currentGopBufferSlot + 2) % BUFFER_SLOTS
|
||||||
|
const decodingOffset = decodingSlot * SLOT_SIZE
|
||||||
|
|
||||||
|
const framesRemaining = currentGopSize - currentGopFrameIndex
|
||||||
|
const timeRemaining = framesRemaining * FRAME_TIME * 1000.0
|
||||||
|
|
||||||
|
// Start async decode to decoding slot
|
||||||
|
graphics.tavDecodeGopToVideoBufferAsync(
|
||||||
|
compressedPtr, compressedSize, gopSize,
|
||||||
|
header.width, header.height,
|
||||||
|
header.qualityLevel,
|
||||||
|
QLUT[header.qualityY], QLUT[header.qualityCo], QLUT[header.qualityCg],
|
||||||
|
header.channelLayout,
|
||||||
|
header.waveletFilter, header.decompLevels, 2,
|
||||||
|
header.entropyCoder,
|
||||||
|
decodingOffset
|
||||||
|
)
|
||||||
|
|
||||||
|
decodingGopData = {
|
||||||
|
gopSize: gopSize,
|
||||||
|
slot: decodingSlot,
|
||||||
|
compressedPtr: compressedPtr,
|
||||||
|
startTime: sys.nanoTime(),
|
||||||
|
timeRemaining: timeRemaining
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
// Case 5: All 3 buffers full (playing + ready + decoding) - ignore packet
|
||||||
sys.free(compressedPtr)
|
sys.free(compressedPtr)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1187,13 +1180,10 @@ try {
|
|||||||
const framesInGOP = seqread.readOneByte()
|
const framesInGOP = seqread.readOneByte()
|
||||||
// Ignore - we display frames based on time accumulator, not this packet
|
// Ignore - we display frames based on time accumulator, not this packet
|
||||||
|
|
||||||
// CRITICAL: Stop reading packets if both buffers are full
|
// CRITICAL: Stop reading packets if all 3 buffers are full
|
||||||
// (one GOP playing + one GOP ready/decoding)
|
// (one GOP playing + ready GOP + decoding GOP)
|
||||||
if (currentGopSize > 0 && nextGopData !== null) {
|
if (currentGopSize > 0 && readyGopData !== null && decodingGopData !== null) {
|
||||||
shouldReadPackets = false
|
shouldReadPackets = false
|
||||||
if (interactive) {
|
|
||||||
console.log(`[GOP] Both buffers full - stopping packet reading until current GOP finishes`)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (packetType === TAV_PACKET_AUDIO_MP2) {
|
else if (packetType === TAV_PACKET_AUDIO_MP2) {
|
||||||
@@ -1326,9 +1316,9 @@ try {
|
|||||||
// Resume packet reading to get next GOP (only one buffer occupied now)
|
// Resume packet reading to get next GOP (only one buffer occupied now)
|
||||||
shouldReadPackets = true
|
shouldReadPackets = true
|
||||||
|
|
||||||
if (interactive) {
|
// if (interactive) {
|
||||||
console.log(`[GOP] First GOP ready (slot ${asyncDecodeSlot}, ${asyncDecodeGopSize} frames) in ${decodeTime.toFixed(1)}ms - starting playback`)
|
// console.log(`[GOP] First GOP ready (slot ${asyncDecodeSlot}, ${asyncDecodeGopSize} frames) in ${decodeTime.toFixed(1)}ms - starting playback`)
|
||||||
}
|
// }
|
||||||
|
|
||||||
// Free compressed data
|
// Free compressed data
|
||||||
sys.free(asyncDecodePtr)
|
sys.free(asyncDecodePtr)
|
||||||
@@ -1374,44 +1364,37 @@ try {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Step 4 & 7: GOP finished? Wait for background decode, then transition
|
// Step 4-7: GOP finished? Transition to ready GOP (triple-buffering)
|
||||||
if (!paused && currentGopSize > 0 && currentGopFrameIndex >= currentGopSize) {
|
if (!paused && currentGopSize > 0 && currentGopFrameIndex >= currentGopSize) {
|
||||||
if (nextGopData !== null) {
|
if (readyGopData !== null) {
|
||||||
// Wait for background decode to complete
|
// Ready GOP exists - wait for it to finish decoding if still in progress
|
||||||
while (!graphics.tavDecodeGopIsComplete() && !paused) {
|
while (!graphics.tavDecodeGopIsComplete() && !paused) {
|
||||||
sys.sleep(1)
|
sys.sleep(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!paused) {
|
if (!paused) {
|
||||||
const [r1, r2] = graphics.tavDecodeGopGetResult()
|
const [r1, r2] = graphics.tavDecodeGopGetResult()
|
||||||
decodeTime = (sys.nanoTime() - nextGopData.startTime) / 1000000.0
|
decodeTime = (sys.nanoTime() - readyGopData.startTime) / 1000000.0
|
||||||
|
|
||||||
if (interactive) {
|
|
||||||
const margin = nextGopData.timeRemaining - decodeTime
|
|
||||||
const status = margin > 0 ? "✓ ON TIME" : "✗ TOO LATE"
|
|
||||||
console.log(`[GOP] Background decode finished in ${decodeTime.toFixed(1)}ms (margin: ${margin.toFixed(0)}ms) ${status}`)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Free compressed data
|
// Free compressed data
|
||||||
sys.free(nextGopData.compressedPtr)
|
sys.free(readyGopData.compressedPtr)
|
||||||
|
|
||||||
// Transition to next GOP
|
// Transition to ready GOP
|
||||||
currentGopBufferSlot = 1 - currentGopBufferSlot
|
currentGopBufferSlot = readyGopData.slot
|
||||||
currentGopSize = nextGopData.gopSize
|
currentGopSize = readyGopData.gopSize
|
||||||
currentGopFrameIndex = 0
|
currentGopFrameIndex = 0
|
||||||
nextGopData = null
|
|
||||||
|
|
||||||
// Resume packet reading now that one buffer is free
|
// Promote decoding GOP to ready GOP
|
||||||
|
readyGopData = decodingGopData
|
||||||
|
decodingGopData = null
|
||||||
|
|
||||||
|
// Resume packet reading now that one buffer is free (decoding slot available)
|
||||||
shouldReadPackets = true
|
shouldReadPackets = true
|
||||||
|
|
||||||
if (interactive) {
|
|
||||||
console.log(`[GOP] ✓ SEAMLESS TRANSITION to next GOP (slot ${currentGopBufferSlot}, ${currentGopSize} frames)`)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// No next GOP available, pause playback
|
// No ready GOP available - hiccup (shouldn't happen with triple-buffering)
|
||||||
if (interactive) {
|
if (interactive) {
|
||||||
console.log(`[GOP] ✗ HICCUP - next GOP NOT READY! Playback paused.`)
|
console.log(`[GOP] ✗ HICCUP - ready GOP NOT READY! Playback paused.`)
|
||||||
}
|
}
|
||||||
currentGopSize = 0
|
currentGopSize = 0
|
||||||
currentGopFrameIndex = 0
|
currentGopFrameIndex = 0
|
||||||
|
|||||||
@@ -1030,9 +1030,9 @@ transmission capability, and region-of-interest coding.
|
|||||||
### List of Keys
|
### List of Keys
|
||||||
- Uint64 BGNT: Video begin time (must be equal to the value of the first Timecode packet)
|
- Uint64 BGNT: Video begin time (must be equal to the value of the first Timecode packet)
|
||||||
- Uint64 ENDT: Video end time (must be equal to the value of the last Timecode packet)
|
- Uint64 ENDT: Video end time (must be equal to the value of the last Timecode packet)
|
||||||
- Uint64 CDAT: Creation time in nanoseconds since UNIX Epoch
|
- Uint64 CDAT: Creation time in nanoseconds since UNIX Epoch (must be in UTC timezone)
|
||||||
- Bytes VNDR: Name and version of the encoder (for Reference encoder: "Encoder-TAV 20251014")
|
- Bytes VNDR: Name and version of the encoder (for Reference encoder: "Encoder-TAV 20251014 (list,of,features)")
|
||||||
- Bytes FMPG: FFmpeg version (typically "ffmpeg version 6.1.2"; the first line of text FFmpeg emits right before the copyright text)
|
- Bytes FMPG: FFmpeg version (typically "ffmpeg version 8.0 Copyright (c) 2000-2025 the FFmpeg developers"; the first line of text FFmpeg emits)
|
||||||
|
|
||||||
|
|
||||||
## Standard Metadata Payload Packet Structure
|
## Standard Metadata Payload Packet Structure
|
||||||
@@ -1062,10 +1062,12 @@ Updated on 2025-10-17 to include canvas expansion margins.
|
|||||||
This packet contains multiple frames encoded as a single spacetime block for optimal
|
This packet contains multiple frames encoded as a single spacetime block for optimal
|
||||||
temporal compression.
|
temporal compression.
|
||||||
|
|
||||||
uint8 Packet Type (0x12)
|
uint8 Packet Type (0x12/0x13)
|
||||||
uint8 GOP Size (number of frames in this GOP, typically 16)
|
uint8 GOP Size (number of frames in this GOP, typically 16)
|
||||||
int16 Motion Vectors X[GOP Size] (quarter-pixel precision for global motion compensation)
|
<if packet type is 0x13>
|
||||||
int16 Motion Vectors Y[GOP Size] (quarter-pixel precision for global motion compensation)
|
uint32 Compressed Size
|
||||||
|
* Zstd-compressed Motion Data
|
||||||
|
<endif>
|
||||||
uint32 Compressed Size
|
uint32 Compressed Size
|
||||||
* Zstd-compressed Unified Block Data
|
* Zstd-compressed Unified Block Data
|
||||||
|
|
||||||
|
|||||||
@@ -6662,194 +6662,6 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
System.arraycopy(output, 0, frameData, 0, frameData.size)
|
System.arraycopy(output, 0, frameData, 0, frameData.size)
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Main GOP unified decoder function.
|
|
||||||
* Decodes a unified 3D DWT GOP block (temporal + spatial) and outputs RGB frames.
|
|
||||||
*
|
|
||||||
* @param compressedDataPtr Pointer to compressed Zstd data
|
|
||||||
* @param compressedSize Size of compressed data
|
|
||||||
* @param gopSize Number of frames in GOP (1-16)
|
|
||||||
* @param motionVectorsX X motion vectors in 1/16-pixel units
|
|
||||||
* @param motionVectorsY Y motion vectors in 1/16-pixel units
|
|
||||||
* @param outputRGBAddrs Array of output RGB buffer addresses
|
|
||||||
* @param width Original frame width (output dimensions)
|
|
||||||
* @param height Original frame height (output dimensions)
|
|
||||||
* @param canvasWidth Expanded canvas width (for motion compensation)
|
|
||||||
* @param canvasHeight Expanded canvas height (for motion compensation)
|
|
||||||
* @param marginLeft Left margin to crop from expanded canvas
|
|
||||||
* @param marginTop Top margin to crop from expanded canvas
|
|
||||||
* @param qIndex Quality index
|
|
||||||
* @param qYGlobal Global Y quantizer
|
|
||||||
* @param qCoGlobal Global Co quantizer
|
|
||||||
* @param qCgGlobal Global Cg quantizer
|
|
||||||
* @param channelLayout Channel layout flags
|
|
||||||
* @param spatialFilter Wavelet filter type
|
|
||||||
* @param spatialLevels Number of spatial DWT levels (default 6)
|
|
||||||
* @param temporalLevels Number of temporal DWT levels (default 2)
|
|
||||||
* @return Number of frames decoded
|
|
||||||
*/
|
|
||||||
fun tavDecodeGopUnified(
|
|
||||||
compressedDataPtr: Long,
|
|
||||||
compressedSize: Int,
|
|
||||||
gopSize: Int,
|
|
||||||
motionVectorsX: IntArray,
|
|
||||||
motionVectorsY: IntArray,
|
|
||||||
outputRGBAddrs: LongArray,
|
|
||||||
width: Int,
|
|
||||||
height: Int,
|
|
||||||
canvasWidth: Int,
|
|
||||||
canvasHeight: Int,
|
|
||||||
marginLeft: Int,
|
|
||||||
marginTop: Int,
|
|
||||||
qIndex: Int,
|
|
||||||
qYGlobal: Int,
|
|
||||||
qCoGlobal: Int,
|
|
||||||
qCgGlobal: Int,
|
|
||||||
channelLayout: Int,
|
|
||||||
spatialFilter: Int = 1,
|
|
||||||
spatialLevels: Int = 6,
|
|
||||||
temporalLevels: Int = 2,
|
|
||||||
entropyCoder: Int = 0
|
|
||||||
): Array<Any> {
|
|
||||||
val dbgOut = HashMap<String, Any>()
|
|
||||||
dbgOut["qY"] = qYGlobal
|
|
||||||
dbgOut["qCo"] = qCoGlobal
|
|
||||||
dbgOut["qCg"] = qCgGlobal
|
|
||||||
dbgOut["frameMode"] = "G"
|
|
||||||
|
|
||||||
// Use expanded canvas dimensions for DWT processing
|
|
||||||
val canvasPixels = canvasWidth * canvasHeight
|
|
||||||
val outputPixels = width * height
|
|
||||||
|
|
||||||
// Step 1: Decompress unified GOP block
|
|
||||||
val compressedData = ByteArray(compressedSize)
|
|
||||||
UnsafeHelper.memcpyRaw(
|
|
||||||
null,
|
|
||||||
vm.usermem.ptr + compressedDataPtr,
|
|
||||||
compressedData,
|
|
||||||
UnsafeHelper.getArrayOffset(compressedData),
|
|
||||||
compressedSize.toLong()
|
|
||||||
)
|
|
||||||
|
|
||||||
val decompressedData = try {
|
|
||||||
ZstdInputStream(java.io.ByteArrayInputStream(compressedData)).use { zstd ->
|
|
||||||
zstd.readBytes()
|
|
||||||
}
|
|
||||||
} catch (e: Exception) {
|
|
||||||
println("ERROR: Zstd decompression failed: ${e.message}")
|
|
||||||
return arrayOf(0, dbgOut)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Step 2: Postprocess unified block to per-frame coefficients (based on header's entropy coder field)
|
|
||||||
val (isEZBCMode, quantizedCoeffs) = tavPostprocessGopAuto(
|
|
||||||
decompressedData,
|
|
||||||
gopSize,
|
|
||||||
canvasPixels, // Use expanded canvas size
|
|
||||||
channelLayout,
|
|
||||||
entropyCoder
|
|
||||||
)
|
|
||||||
|
|
||||||
// Step 3: Allocate GOP buffers for float coefficients (expanded canvas size)
|
|
||||||
val gopY = Array(gopSize) { FloatArray(canvasPixels) }
|
|
||||||
val gopCo = Array(gopSize) { FloatArray(canvasPixels) }
|
|
||||||
val gopCg = Array(gopSize) { FloatArray(canvasPixels) }
|
|
||||||
|
|
||||||
// Step 4: Calculate subband layout for expanded canvas (needed for perceptual dequantization)
|
|
||||||
val subbands = calculateSubbandLayout(canvasWidth, canvasHeight, spatialLevels)
|
|
||||||
|
|
||||||
// Step 5: Dequantize with temporal-spatial scaling
|
|
||||||
for (t in 0 until gopSize) {
|
|
||||||
val temporalLevel = getTemporalSubbandLevel(t, gopSize, temporalLevels)
|
|
||||||
val temporalScale = getTemporalQuantizerScale(temporalLevel)
|
|
||||||
|
|
||||||
// Apply temporal scaling to base quantizers for each channel
|
|
||||||
val baseQY = (qYGlobal * temporalScale).coerceIn(1.0f, 4096.0f)
|
|
||||||
val baseQCo = (qCoGlobal * temporalScale).coerceIn(1.0f, 4096.0f)
|
|
||||||
val baseQCg = (qCgGlobal * temporalScale).coerceIn(1.0f, 4096.0f)
|
|
||||||
|
|
||||||
// Use existing perceptual dequantization for spatial weighting
|
|
||||||
dequantiseDWTSubbandsPerceptual(
|
|
||||||
qIndex, qYGlobal,
|
|
||||||
quantizedCoeffs[t][0], gopY[t],
|
|
||||||
subbands, baseQY, false, spatialLevels, // isChroma=false
|
|
||||||
isEZBCMode
|
|
||||||
)
|
|
||||||
|
|
||||||
dequantiseDWTSubbandsPerceptual(
|
|
||||||
qIndex, qYGlobal,
|
|
||||||
quantizedCoeffs[t][1], gopCo[t],
|
|
||||||
subbands, baseQCo, true, spatialLevels, // isChroma=true
|
|
||||||
isEZBCMode
|
|
||||||
)
|
|
||||||
|
|
||||||
dequantiseDWTSubbandsPerceptual(
|
|
||||||
qIndex, qYGlobal,
|
|
||||||
quantizedCoeffs[t][2], gopCg[t],
|
|
||||||
subbands, baseQCg, true, spatialLevels, // isChroma=true
|
|
||||||
isEZBCMode
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Step 6: Apply inverse 3D DWT (spatial first, then temporal) on expanded canvas
|
|
||||||
tavApplyInverse3DDWT(gopY, canvasWidth, canvasHeight, gopSize, spatialLevels, temporalLevels, spatialFilter)
|
|
||||||
tavApplyInverse3DDWT(gopCo, canvasWidth, canvasHeight, gopSize, spatialLevels, temporalLevels, spatialFilter)
|
|
||||||
tavApplyInverse3DDWT(gopCg, canvasWidth, canvasHeight, gopSize, spatialLevels, temporalLevels, spatialFilter)
|
|
||||||
|
|
||||||
// Step 7: Apply inverse motion compensation (shift frames back) on expanded canvas
|
|
||||||
// Note: Motion vectors are in 1/16-pixel units, cumulative relative to frame 0
|
|
||||||
for (t in 1 until gopSize) { // Skip frame 0 (reference)
|
|
||||||
val dx = motionVectorsX[t] / 16 // Convert to pixel units
|
|
||||||
val dy = motionVectorsY[t] / 16
|
|
||||||
|
|
||||||
if (dx != 0 || dy != 0) {
|
|
||||||
applyInverseTranslation(gopY[t], canvasWidth, canvasHeight, dx, dy)
|
|
||||||
applyInverseTranslation(gopCo[t], canvasWidth, canvasHeight, dx, dy)
|
|
||||||
applyInverseTranslation(gopCg[t], canvasWidth, canvasHeight, dx, dy)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Step 8: Crop expanded canvas to original dimensions and convert to RGB
|
|
||||||
for (t in 0 until gopSize) {
|
|
||||||
val rgbAddr = outputRGBAddrs[t]
|
|
||||||
|
|
||||||
// Crop from expanded canvas (canvasWidth x canvasHeight) to output (width x height)
|
|
||||||
for (row in 0 until height) {
|
|
||||||
for (col in 0 until width) {
|
|
||||||
// Source pixel in expanded canvas
|
|
||||||
val canvasX = col + marginLeft
|
|
||||||
val canvasY = row + marginTop
|
|
||||||
val canvasIdx = canvasY * canvasWidth + canvasX
|
|
||||||
|
|
||||||
// Destination pixel in output buffer
|
|
||||||
val outIdx = row * width + col
|
|
||||||
|
|
||||||
val yVal = gopY[t][canvasIdx]
|
|
||||||
val co = gopCo[t][canvasIdx]
|
|
||||||
val cg = gopCg[t][canvasIdx]
|
|
||||||
|
|
||||||
// YCoCg-R to RGB conversion
|
|
||||||
val tmp = yVal - (cg / 2.0f)
|
|
||||||
val g = cg + tmp
|
|
||||||
val b = tmp - (co / 2.0f)
|
|
||||||
val r = b + co
|
|
||||||
|
|
||||||
// Clamp to 0-255 range
|
|
||||||
val rClamped = r.toInt().coerceIn(0, 255)
|
|
||||||
val gClamped = g.toInt().coerceIn(0, 255)
|
|
||||||
val bClamped = b.toInt().coerceIn(0, 255)
|
|
||||||
|
|
||||||
// Write RGB24 format (3 bytes per pixel)
|
|
||||||
val offset = rgbAddr + outIdx * 3L
|
|
||||||
vm.usermem[offset] = rClamped.toByte()
|
|
||||||
vm.usermem[offset + 1] = gClamped.toByte()
|
|
||||||
vm.usermem[offset + 2] = bClamped.toByte()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return arrayOf(gopSize, dbgOut)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Decode GOP frames directly into GraphicsAdapter.videoBuffer (Java heap).
|
* Decode GOP frames directly into GraphicsAdapter.videoBuffer (Java heap).
|
||||||
* This avoids allocating GOP frames in VM user memory, saving ~6 MB for 8-frame GOPs.
|
* This avoids allocating GOP frames in VM user memory, saving ~6 MB for 8-frame GOPs.
|
||||||
@@ -6864,14 +6676,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
compressedDataPtr: Long,
|
compressedDataPtr: Long,
|
||||||
compressedSize: Int,
|
compressedSize: Int,
|
||||||
gopSize: Int,
|
gopSize: Int,
|
||||||
motionVectorsX: IntArray,
|
|
||||||
motionVectorsY: IntArray,
|
|
||||||
width: Int,
|
width: Int,
|
||||||
height: Int,
|
height: Int,
|
||||||
canvasWidth: Int,
|
|
||||||
canvasHeight: Int,
|
|
||||||
marginLeft: Int,
|
|
||||||
marginTop: Int,
|
|
||||||
qIndex: Int,
|
qIndex: Int,
|
||||||
qYGlobal: Int,
|
qYGlobal: Int,
|
||||||
qCoGlobal: Int,
|
qCoGlobal: Int,
|
||||||
@@ -6900,7 +6706,6 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Use expanded canvas dimensions for DWT processing
|
// Use expanded canvas dimensions for DWT processing
|
||||||
val canvasPixels = canvasWidth * canvasHeight
|
|
||||||
val outputPixels = width * height
|
val outputPixels = width * height
|
||||||
|
|
||||||
// Step 1: Decompress unified GOP block
|
// Step 1: Decompress unified GOP block
|
||||||
@@ -6926,18 +6731,18 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
val (isEZBCMode, quantizedCoeffs) = tavPostprocessGopAuto(
|
val (isEZBCMode, quantizedCoeffs) = tavPostprocessGopAuto(
|
||||||
decompressedData,
|
decompressedData,
|
||||||
gopSize,
|
gopSize,
|
||||||
canvasPixels,
|
outputPixels,
|
||||||
channelLayout,
|
channelLayout,
|
||||||
entropyCoder
|
entropyCoder
|
||||||
)
|
)
|
||||||
|
|
||||||
// Step 3: Allocate GOP buffers for float coefficients (expanded canvas size)
|
// Step 3: Allocate GOP buffers for float coefficients (expanded canvas size)
|
||||||
val gopY = Array(gopSize) { FloatArray(canvasPixels) }
|
val gopY = Array(gopSize) { FloatArray(outputPixels) }
|
||||||
val gopCo = Array(gopSize) { FloatArray(canvasPixels) }
|
val gopCo = Array(gopSize) { FloatArray(outputPixels) }
|
||||||
val gopCg = Array(gopSize) { FloatArray(canvasPixels) }
|
val gopCg = Array(gopSize) { FloatArray(outputPixels) }
|
||||||
|
|
||||||
// Step 4: Calculate subband layout for expanded canvas
|
// Step 4: Calculate subband layout for expanded canvas
|
||||||
val subbands = calculateSubbandLayout(canvasWidth, canvasHeight, spatialLevels)
|
val subbands = calculateSubbandLayout(width, height, spatialLevels)
|
||||||
|
|
||||||
// Step 5: Dequantize with temporal-spatial scaling
|
// Step 5: Dequantize with temporal-spatial scaling
|
||||||
for (t in 0 until gopSize) {
|
for (t in 0 until gopSize) {
|
||||||
@@ -6971,40 +6776,23 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Step 6: Apply inverse 3D DWT
|
// Step 6: Apply inverse 3D DWT
|
||||||
tavApplyInverse3DDWT(gopY, canvasWidth, canvasHeight, gopSize, spatialLevels, temporalLevels, spatialFilter)
|
tavApplyInverse3DDWT(gopY, width, height, gopSize, spatialLevels, temporalLevels, spatialFilter)
|
||||||
tavApplyInverse3DDWT(gopCo, canvasWidth, canvasHeight, gopSize, spatialLevels, temporalLevels, spatialFilter)
|
tavApplyInverse3DDWT(gopCo, width, height, gopSize, spatialLevels, temporalLevels, spatialFilter)
|
||||||
tavApplyInverse3DDWT(gopCg, canvasWidth, canvasHeight, gopSize, spatialLevels, temporalLevels, spatialFilter)
|
tavApplyInverse3DDWT(gopCg, width, height, gopSize, spatialLevels, temporalLevels, spatialFilter)
|
||||||
|
|
||||||
// Step 7: Apply inverse motion compensation
|
|
||||||
for (t in 1 until gopSize) {
|
|
||||||
val dx = motionVectorsX[t] / 16
|
|
||||||
val dy = motionVectorsY[t] / 16
|
|
||||||
|
|
||||||
if (dx != 0 || dy != 0) {
|
|
||||||
applyInverseTranslation(gopY[t], canvasWidth, canvasHeight, dx, dy)
|
|
||||||
applyInverseTranslation(gopCo[t], canvasWidth, canvasHeight, dx, dy)
|
|
||||||
applyInverseTranslation(gopCg[t], canvasWidth, canvasHeight, dx, dy)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Step 8: Crop and convert to RGB, write directly to videoBuffer
|
// Step 8: Crop and convert to RGB, write directly to videoBuffer
|
||||||
for (t in 0 until gopSize) {
|
for (t in 0 until gopSize) {
|
||||||
val videoBufferOffset = bufferOffset + (t * frameSize) // Each frame sequentially, starting at bufferOffset
|
val videoBufferOffset = bufferOffset + (t * frameSize) // Each frame sequentially, starting at bufferOffset
|
||||||
|
|
||||||
for (row in 0 until height) {
|
for (py in 0 until height) {
|
||||||
for (col in 0 until width) {
|
for (px in 0 until width) {
|
||||||
// Source pixel in expanded canvas
|
|
||||||
val canvasX = col + marginLeft
|
|
||||||
val canvasY = row + marginTop
|
|
||||||
val canvasIdx = canvasY * canvasWidth + canvasX
|
|
||||||
|
|
||||||
// Destination pixel in videoBuffer
|
// Destination pixel in videoBuffer
|
||||||
val outIdx = row * width + col
|
val outIdx = py * width + px
|
||||||
val offset = videoBufferOffset + outIdx * 3L
|
val offset = videoBufferOffset + outIdx * 3L
|
||||||
|
|
||||||
val yVal = gopY[t][canvasIdx]
|
val yVal = gopY[t][outIdx]
|
||||||
val co = gopCo[t][canvasIdx]
|
val co = gopCo[t][outIdx]
|
||||||
val cg = gopCg[t][canvasIdx]
|
val cg = gopCg[t][outIdx]
|
||||||
|
|
||||||
// YCoCg-R to RGB conversion
|
// YCoCg-R to RGB conversion
|
||||||
val tmp = yVal - (cg / 2.0f)
|
val tmp = yVal - (cg / 2.0f)
|
||||||
@@ -7113,14 +6901,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
compressedDataPtr: Long,
|
compressedDataPtr: Long,
|
||||||
compressedSize: Int,
|
compressedSize: Int,
|
||||||
gopSize: Int,
|
gopSize: Int,
|
||||||
motionVectorsX: IntArray,
|
|
||||||
motionVectorsY: IntArray,
|
|
||||||
width: Int,
|
width: Int,
|
||||||
height: Int,
|
height: Int,
|
||||||
canvasWidth: Int,
|
|
||||||
canvasHeight: Int,
|
|
||||||
marginLeft: Int,
|
|
||||||
marginTop: Int,
|
|
||||||
qIndex: Int,
|
qIndex: Int,
|
||||||
qYGlobal: Int,
|
qYGlobal: Int,
|
||||||
qCoGlobal: Int,
|
qCoGlobal: Int,
|
||||||
@@ -7128,7 +6910,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
channelLayout: Int,
|
channelLayout: Int,
|
||||||
spatialFilter: Int = 1,
|
spatialFilter: Int = 1,
|
||||||
spatialLevels: Int = 6,
|
spatialLevels: Int = 6,
|
||||||
temporalLevels: Int = 2,
|
temporalLevels: Int = 3,
|
||||||
entropyCoder: Int = 0,
|
entropyCoder: Int = 0,
|
||||||
bufferOffset: Long = 0
|
bufferOffset: Long = 0
|
||||||
) {
|
) {
|
||||||
@@ -7144,9 +6926,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
try {
|
try {
|
||||||
val result = tavDecodeGopToVideoBuffer(
|
val result = tavDecodeGopToVideoBuffer(
|
||||||
compressedDataPtr, compressedSize, gopSize,
|
compressedDataPtr, compressedSize, gopSize,
|
||||||
motionVectorsX, motionVectorsY,
|
width, height,
|
||||||
width, height, canvasWidth, canvasHeight,
|
|
||||||
marginLeft, marginTop,
|
|
||||||
qIndex, qYGlobal, qCoGlobal, qCgGlobal,
|
qIndex, qYGlobal, qCoGlobal, qCgGlobal,
|
||||||
channelLayout, spatialFilter, spatialLevels, temporalLevels,
|
channelLayout, spatialFilter, spatialLevels, temporalLevels,
|
||||||
entropyCoder, bufferOffset
|
entropyCoder, bufferOffset
|
||||||
|
|||||||
@@ -107,7 +107,7 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi
|
|||||||
internal val unusedArea = UnsafeHelper.allocate(1024, this)
|
internal val unusedArea = UnsafeHelper.allocate(1024, this)
|
||||||
internal val scanlineOffsets = UnsafeHelper.allocate(1024, this)
|
internal val scanlineOffsets = UnsafeHelper.allocate(1024, this)
|
||||||
|
|
||||||
internal val videoBuffer = UnsafeHelper.allocate(32 * 1024 * 1024, this)
|
internal val videoBuffer = UnsafeHelper.allocate(48 * 1024 * 1024, this) // 48 MB for triple-buffering (3 slots × 21 frames × 752 kB)
|
||||||
|
|
||||||
protected val paletteShader = LoadShader(DRAW_SHADER_VERT, config.paletteShader)
|
protected val paletteShader = LoadShader(DRAW_SHADER_VERT, config.paletteShader)
|
||||||
protected val textShader = LoadShader(DRAW_SHADER_VERT, config.fragShader)
|
protected val textShader = LoadShader(DRAW_SHADER_VERT, config.fragShader)
|
||||||
|
|||||||
@@ -18,7 +18,7 @@
|
|||||||
#include <float.h>
|
#include <float.h>
|
||||||
#include <fftw3.h>
|
#include <fftw3.h>
|
||||||
|
|
||||||
#define ENCODER_VENDOR_STRING "Encoder-TAV 20251019"
|
#define ENCODER_VENDOR_STRING "Encoder-TAV 20251022 (3d-dwt,ezbc)"
|
||||||
|
|
||||||
// TSVM Advanced Video (TAV) format constants
|
// TSVM Advanced Video (TAV) format constants
|
||||||
#define TAV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x56" // "\x1FTSVM TAV"
|
#define TAV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x56" // "\x1FTSVM TAV"
|
||||||
@@ -48,7 +48,7 @@
|
|||||||
#define TAV_PACKET_IFRAME 0x10 // Intra frame (keyframe)
|
#define TAV_PACKET_IFRAME 0x10 // Intra frame (keyframe)
|
||||||
#define TAV_PACKET_PFRAME 0x11 // Predicted frame (legacy, unused)
|
#define TAV_PACKET_PFRAME 0x11 // Predicted frame (legacy, unused)
|
||||||
#define TAV_PACKET_GOP_UNIFIED 0x12 // Unified 3D DWT GOP (all frames in single block, translation-based)
|
#define TAV_PACKET_GOP_UNIFIED 0x12 // Unified 3D DWT GOP (all frames in single block, translation-based)
|
||||||
#define TAV_PACKET_GOP_UNIFIED_MESH 0x13 // Unified 3D DWT GOP with distortion mesh warping
|
#define TAV_PACKET_GOP_UNIFIED_MOTION 0x13 // Unified 3D DWT GOP with motion-compensated lifting
|
||||||
#define TAV_PACKET_PFRAME_RESIDUAL 0x14 // P-frame with MPEG-style residual coding (block motion compensation)
|
#define TAV_PACKET_PFRAME_RESIDUAL 0x14 // P-frame with MPEG-style residual coding (block motion compensation)
|
||||||
#define TAV_PACKET_BFRAME_RESIDUAL 0x15 // B-frame with MPEG-style residual coding (bidirectional prediction)
|
#define TAV_PACKET_BFRAME_RESIDUAL 0x15 // B-frame with MPEG-style residual coding (bidirectional prediction)
|
||||||
#define TAV_PACKET_PFRAME_ADAPTIVE 0x16 // P-frame with adaptive quad-tree block partitioning
|
#define TAV_PACKET_PFRAME_ADAPTIVE 0x16 // P-frame with adaptive quad-tree block partitioning
|
||||||
@@ -116,13 +116,15 @@ static int needs_alpha_channel(int channel_layout) {
|
|||||||
#define DEFAULT_HEIGHT 448
|
#define DEFAULT_HEIGHT 448
|
||||||
#define DEFAULT_FPS 30
|
#define DEFAULT_FPS 30
|
||||||
#define DEFAULT_QUALITY 3
|
#define DEFAULT_QUALITY 3
|
||||||
#define DEFAULT_ZSTD_LEVEL 9
|
#define DEFAULT_ZSTD_LEVEL 3
|
||||||
#define TEMPORAL_GOP_SIZE 20//8 // ~42 frames fit into 32 MB video buffer
|
#define TEMPORAL_GOP_SIZE 20
|
||||||
#define TEMPORAL_DECOMP_LEVEL 2
|
#define TEMPORAL_DECOMP_LEVEL 2
|
||||||
#define MOTION_THRESHOLD 24.0f // Flush if motion exceeds 24 pixels in any direction
|
#define MOTION_THRESHOLD 24.0f // Flush if motion exceeds 24 pixels in any direction
|
||||||
|
|
||||||
// Audio/subtitle constants (reused from TEV)
|
// Audio/subtitle constants (reused from TEV)
|
||||||
|
#define MP2_SAMPLE_RATE 32000
|
||||||
#define MP2_DEFAULT_PACKET_SIZE 1152
|
#define MP2_DEFAULT_PACKET_SIZE 1152
|
||||||
|
#define PACKET_AUDIO_TIME ((double)MP2_DEFAULT_PACKET_SIZE / MP2_SAMPLE_RATE)
|
||||||
#define MAX_SUBTITLE_LENGTH 2048
|
#define MAX_SUBTITLE_LENGTH 2048
|
||||||
|
|
||||||
int debugDumpMade = 0;
|
int debugDumpMade = 0;
|
||||||
@@ -2175,6 +2177,7 @@ static int mp2_packet_size_to_rate_index(int packet_size, int is_mono);
|
|||||||
static long write_extended_header(tav_encoder_t *enc);
|
static long write_extended_header(tav_encoder_t *enc);
|
||||||
static void write_timecode_packet(FILE *output, int frame_num, int fps, int is_ntsc_framerate);
|
static void write_timecode_packet(FILE *output, int frame_num, int fps, int is_ntsc_framerate);
|
||||||
static int process_audio(tav_encoder_t *enc, int frame_num, FILE *output);
|
static int process_audio(tav_encoder_t *enc, int frame_num, FILE *output);
|
||||||
|
static int process_audio_for_gop(tav_encoder_t *enc, int *frame_numbers, int num_frames, FILE *output);
|
||||||
static subtitle_entry_t* parse_subtitle_file(const char *filename, int fps);
|
static subtitle_entry_t* parse_subtitle_file(const char *filename, int fps);
|
||||||
static subtitle_entry_t* parse_srt_file(const char *filename, int fps);
|
static subtitle_entry_t* parse_srt_file(const char *filename, int fps);
|
||||||
static subtitle_entry_t* parse_smi_file(const char *filename, int fps);
|
static subtitle_entry_t* parse_smi_file(const char *filename, int fps);
|
||||||
@@ -2269,7 +2272,7 @@ static void show_usage(const char *program_name) {
|
|||||||
printf(" --dump-frame N Dump quantised coefficients for frame N (creates .bin files)\n");
|
printf(" --dump-frame N Dump quantised coefficients for frame N (creates .bin files)\n");
|
||||||
printf(" --wavelet N Wavelet filter: 0=LGT 5/3, 1=CDF 9/7, 2=CDF 13/7, 16=DD-4, 255=Haar (default: 1)\n");
|
printf(" --wavelet N Wavelet filter: 0=LGT 5/3, 1=CDF 9/7, 2=CDF 13/7, 16=DD-4, 255=Haar (default: 1)\n");
|
||||||
printf(" --zstd-level N Zstd compression level 1-22 (default: %d, higher = better compression but slower)\n", DEFAULT_ZSTD_LEVEL);
|
printf(" --zstd-level N Zstd compression level 1-22 (default: %d, higher = better compression but slower)\n", DEFAULT_ZSTD_LEVEL);
|
||||||
printf(" --no-grain-synthesis Disable grain synthesis (enabled by default)\n");
|
// printf(" --no-grain-synthesis Disable grain synthesis (enabled by default)\n");
|
||||||
printf(" --help Show this help\n\n");
|
printf(" --help Show this help\n\n");
|
||||||
|
|
||||||
printf("Audio Rate by Quality:\n ");
|
printf("Audio Rate by Quality:\n ");
|
||||||
@@ -2328,7 +2331,7 @@ static tav_encoder_t* create_encoder(void) {
|
|||||||
enc->intra_only = 0;
|
enc->intra_only = 0;
|
||||||
enc->monoblock = 1; // Default to monoblock mode
|
enc->monoblock = 1; // Default to monoblock mode
|
||||||
enc->perceptual_tuning = 1; // Default to perceptual quantisation (versions 5/6)
|
enc->perceptual_tuning = 1; // Default to perceptual quantisation (versions 5/6)
|
||||||
enc->enable_ezbc = 0; // Default to twobit-map (EZBC adds overhead for small files)
|
enc->enable_ezbc = 1; // Default to EZBC over twobit-map
|
||||||
enc->channel_layout = CHANNEL_LAYOUT_YCOCG; // Default to Y-Co-Cg
|
enc->channel_layout = CHANNEL_LAYOUT_YCOCG; // Default to Y-Co-Cg
|
||||||
enc->audio_bitrate = 0; // 0 = use quality table
|
enc->audio_bitrate = 0; // 0 = use quality table
|
||||||
enc->encode_limit = 0; // Default: no frame limit
|
enc->encode_limit = 0; // Default: no frame limit
|
||||||
@@ -2339,7 +2342,7 @@ static tav_encoder_t* create_encoder(void) {
|
|||||||
enc->delta_haar_levels = TEMPORAL_DECOMP_LEVEL;
|
enc->delta_haar_levels = TEMPORAL_DECOMP_LEVEL;
|
||||||
|
|
||||||
// GOP / temporal DWT settings
|
// GOP / temporal DWT settings
|
||||||
enc->enable_temporal_dwt = 0; // Default: disabled for backward compatibility. Mutually exclusive with use_delta_encoding
|
enc->enable_temporal_dwt = 1; // Mutually exclusive with use_delta_encoding
|
||||||
enc->temporal_gop_capacity = TEMPORAL_GOP_SIZE; // 16 frames
|
enc->temporal_gop_capacity = TEMPORAL_GOP_SIZE; // 16 frames
|
||||||
enc->temporal_gop_frame_count = 0;
|
enc->temporal_gop_frame_count = 0;
|
||||||
enc->temporal_decomp_levels = TEMPORAL_DECOMP_LEVEL; // 2 levels of temporal DWT (16 -> 4x4 subbands)
|
enc->temporal_decomp_levels = TEMPORAL_DECOMP_LEVEL; // 2 levels of temporal DWT (16 -> 4x4 subbands)
|
||||||
@@ -4826,16 +4829,6 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
|
|||||||
memcpy(gop_cg_coeffs[i], enc->temporal_gop_cg_frames[i], num_pixels * sizeof(float));
|
memcpy(gop_cg_coeffs[i], enc->temporal_gop_cg_frames[i], num_pixels * sizeof(float));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Debug: Print original frame-to-frame motion vectors
|
|
||||||
if (enc->verbose && actual_gop_size >= 4) {
|
|
||||||
printf("Frame-to-frame motion vectors (before cumulative conversion):\n");
|
|
||||||
for (int i = 0; i < actual_gop_size; i++) {
|
|
||||||
printf(" Frame %d: 1/16px=(%d, %d) pixels=(%.3f, %.3f)\n",
|
|
||||||
i, enc->temporal_gop_translation_x[i], enc->temporal_gop_translation_y[i],
|
|
||||||
enc->temporal_gop_translation_x[i] / 16.0f, enc->temporal_gop_translation_y[i] / 16.0f);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Step 0.5: Convert frame-to-frame motion vectors to cumulative (relative to frame 0)
|
// Step 0.5: Convert frame-to-frame motion vectors to cumulative (relative to frame 0)
|
||||||
// Phase correlation computes motion of frame[i] relative to frame[i-1]
|
// Phase correlation computes motion of frame[i] relative to frame[i-1]
|
||||||
// We need cumulative motion relative to frame 0 for proper alignment
|
// We need cumulative motion relative to frame 0 for proper alignment
|
||||||
@@ -4844,16 +4837,6 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
|
|||||||
enc->temporal_gop_translation_y[i] += enc->temporal_gop_translation_y[i-1];
|
enc->temporal_gop_translation_y[i] += enc->temporal_gop_translation_y[i-1];
|
||||||
}
|
}
|
||||||
|
|
||||||
// Debug: Print cumulative motion vectors
|
|
||||||
if (enc->verbose && actual_gop_size >= 4) {
|
|
||||||
printf("Cumulative motion vectors (after conversion):\n");
|
|
||||||
for (int i = 0; i < actual_gop_size; i++) {
|
|
||||||
printf(" Frame %d: 1/16px=(%d, %d) pixels=(%.3f, %.3f)\n",
|
|
||||||
i, enc->temporal_gop_translation_x[i], enc->temporal_gop_translation_y[i],
|
|
||||||
enc->temporal_gop_translation_x[i] / 16.0f, enc->temporal_gop_translation_y[i] / 16.0f);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Step 0.5b: Calculate the valid region after alignment (crop bounds)
|
// Step 0.5b: Calculate the valid region after alignment (crop bounds)
|
||||||
// Find the bounding box that's valid across all aligned frames
|
// Find the bounding box that's valid across all aligned frames
|
||||||
int min_dx = 0, max_dx = 0, min_dy = 0, max_dy = 0;
|
int min_dx = 0, max_dx = 0, min_dy = 0, max_dy = 0;
|
||||||
@@ -5102,6 +5085,9 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
|
|||||||
// Write timecode packet for first frame in GOP
|
// Write timecode packet for first frame in GOP
|
||||||
write_timecode_packet(output, frame_numbers[0], enc->output_fps, enc->is_ntsc_framerate);
|
write_timecode_packet(output, frame_numbers[0], enc->output_fps, enc->is_ntsc_framerate);
|
||||||
|
|
||||||
|
// Process audio for this GOP (all frames at once)
|
||||||
|
process_audio_for_gop(enc, frame_numbers, actual_gop_size, output);
|
||||||
|
|
||||||
// Single-frame GOP fallback: use traditional I-frame encoding with serialise_tile_data
|
// Single-frame GOP fallback: use traditional I-frame encoding with serialise_tile_data
|
||||||
if (actual_gop_size == 1) {
|
if (actual_gop_size == 1) {
|
||||||
// Write I-frame packet header (no motion vectors, no GOP overhead)
|
// Write I-frame packet header (no motion vectors, no GOP overhead)
|
||||||
@@ -5171,10 +5157,11 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
|
|||||||
printf("Frame %d (single-frame GOP as I-frame): %zu bytes\n",
|
printf("Frame %d (single-frame GOP as I-frame): %zu bytes\n",
|
||||||
frame_numbers[0], compressed_size);
|
frame_numbers[0], compressed_size);
|
||||||
}
|
}
|
||||||
} else {
|
}
|
||||||
|
else {
|
||||||
// Multi-frame GOP: use unified 3D DWT encoding
|
// Multi-frame GOP: use unified 3D DWT encoding
|
||||||
// Choose packet type based on motion compensation method
|
// Choose packet type based on motion compensation method
|
||||||
uint8_t packet_type = enc->temporal_enable_mcezbc ? TAV_PACKET_GOP_UNIFIED_MESH : TAV_PACKET_GOP_UNIFIED;
|
uint8_t packet_type = enc->temporal_enable_mcezbc ? TAV_PACKET_GOP_UNIFIED_MOTION : TAV_PACKET_GOP_UNIFIED;
|
||||||
fwrite(&packet_type, 1, 1, output);
|
fwrite(&packet_type, 1, 1, output);
|
||||||
total_bytes_written += 1;
|
total_bytes_written += 1;
|
||||||
|
|
||||||
@@ -5263,26 +5250,6 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
|
|||||||
|
|
||||||
free(mv_buffer);
|
free(mv_buffer);
|
||||||
free(compressed_mv);
|
free(compressed_mv);
|
||||||
} else {
|
|
||||||
// Packet 0x12: Translation-based alignment
|
|
||||||
// Write canvas expansion information (4 bytes)
|
|
||||||
uint8_t canvas_margins[4] = {
|
|
||||||
(uint8_t)crop_left, // Left margin
|
|
||||||
(uint8_t)crop_right, // Right margin
|
|
||||||
(uint8_t)crop_top, // Top margin
|
|
||||||
(uint8_t)crop_bottom // Bottom margin
|
|
||||||
};
|
|
||||||
fwrite(canvas_margins, 1, 4, output);
|
|
||||||
total_bytes_written += 4;
|
|
||||||
|
|
||||||
// Write all motion vectors (1/16-pixel precision) for the entire GOP
|
|
||||||
for (int t = 0; t < actual_gop_size; t++) {
|
|
||||||
int16_t dx = enc->temporal_gop_translation_x[t];
|
|
||||||
int16_t dy = enc->temporal_gop_translation_y[t];
|
|
||||||
fwrite(&dx, sizeof(int16_t), 1, output);
|
|
||||||
fwrite(&dy, sizeof(int16_t), 1, output);
|
|
||||||
total_bytes_written += 4;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Preprocess ALL frames with unified significance map
|
// Preprocess ALL frames with unified significance map
|
||||||
@@ -8649,13 +8616,8 @@ static int process_audio(tav_encoder_t *enc, int frame_num, FILE *output) {
|
|||||||
// Calculate how much audio time each frame represents (in seconds)
|
// Calculate how much audio time each frame represents (in seconds)
|
||||||
double frame_audio_time = 1.0 / enc->output_fps;
|
double frame_audio_time = 1.0 / enc->output_fps;
|
||||||
|
|
||||||
// Calculate how much audio time each MP2 packet represents
|
|
||||||
// MP2 frame contains 1152 samples at 32kHz = 0.036 seconds
|
|
||||||
#define MP2_SAMPLE_RATE 32000
|
|
||||||
double packet_audio_time = 1152.0 / MP2_SAMPLE_RATE;
|
|
||||||
|
|
||||||
// Estimate how many packets we consume per video frame
|
// Estimate how many packets we consume per video frame
|
||||||
double packets_per_frame = frame_audio_time / packet_audio_time;
|
double packets_per_frame = frame_audio_time / PACKET_AUDIO_TIME;
|
||||||
|
|
||||||
// Allocate MP2 buffer if needed
|
// Allocate MP2 buffer if needed
|
||||||
if (!enc->mp2_buffer) {
|
if (!enc->mp2_buffer) {
|
||||||
@@ -8683,24 +8645,20 @@ static int process_audio(tav_encoder_t *enc, int frame_num, FILE *output) {
|
|||||||
|
|
||||||
// Calculate how many packets we need to maintain target buffer level
|
// Calculate how many packets we need to maintain target buffer level
|
||||||
// Only insert when buffer drops below target, and only insert enough to restore target
|
// Only insert when buffer drops below target, and only insert enough to restore target
|
||||||
double target_level = (double)enc->target_audio_buffer_size;
|
double target_level = fmax(packets_per_frame, (double)enc->target_audio_buffer_size);
|
||||||
if (enc->audio_frames_in_buffer < target_level) {
|
// if (enc->audio_frames_in_buffer < target_level) {
|
||||||
double deficit = target_level - enc->audio_frames_in_buffer;
|
double deficit = target_level - enc->audio_frames_in_buffer;
|
||||||
// Insert packets to cover the deficit, but at least maintain minimum flow
|
// Insert packets to cover the deficit, but at least maintain minimum flow
|
||||||
packets_to_insert = (int)ceil(deficit);
|
packets_to_insert = (int)ceil(deficit);
|
||||||
// Cap at reasonable maximum to prevent excessive insertion
|
|
||||||
if (packets_to_insert > enc->target_audio_buffer_size) {
|
|
||||||
packets_to_insert = enc->target_audio_buffer_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (enc->verbose) {
|
if (enc->verbose) {
|
||||||
printf("Frame %d: Buffer low (%.2f->%.2f), deficit %.2f, inserting %d packets\n",
|
printf("Frame %d: Buffer low (%.2f->%.2f), deficit %.2f, inserting %d packets\n",
|
||||||
frame_num, old_buffer, enc->audio_frames_in_buffer, deficit, packets_to_insert);
|
frame_num, old_buffer, enc->audio_frames_in_buffer, deficit, packets_to_insert);
|
||||||
}
|
}
|
||||||
} else if (enc->verbose && old_buffer != enc->audio_frames_in_buffer) {
|
// } else if (enc->verbose && old_buffer != enc->audio_frames_in_buffer) {
|
||||||
printf("Frame %d: Buffer sufficient (%.2f->%.2f), no packets\n",
|
// printf("Frame %d: Buffer sufficient (%.2f->%.2f), no packets\n",
|
||||||
frame_num, old_buffer, enc->audio_frames_in_buffer);
|
// frame_num, old_buffer, enc->audio_frames_in_buffer);
|
||||||
}
|
// }
|
||||||
}
|
}
|
||||||
|
|
||||||
// Insert the calculated number of audio packets
|
// Insert the calculated number of audio packets
|
||||||
@@ -8737,6 +8695,96 @@ static int process_audio(tav_encoder_t *enc, int frame_num, FILE *output) {
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Process audio for a GOP (multiple frames at once)
|
||||||
|
// Accumulates deficit for N frames and emits all necessary audio packets
|
||||||
|
static int process_audio_for_gop(tav_encoder_t *enc, int *frame_numbers, int num_frames, FILE *output) {
|
||||||
|
if (!enc->has_audio || !enc->mp2_file || enc->audio_remaining <= 0 || num_frames == 0) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle first frame initialization (same as process_audio)
|
||||||
|
int first_frame_in_gop = frame_numbers[0];
|
||||||
|
if (first_frame_in_gop == 0) {
|
||||||
|
uint8_t header[4];
|
||||||
|
if (fread(header, 1, 4, enc->mp2_file) != 4) return 1;
|
||||||
|
fseek(enc->mp2_file, 0, SEEK_SET);
|
||||||
|
enc->mp2_packet_size = get_mp2_packet_size(header);
|
||||||
|
int is_mono = (header[3] >> 6) == 3;
|
||||||
|
enc->mp2_rate_index = mp2_packet_size_to_rate_index(enc->mp2_packet_size, is_mono);
|
||||||
|
enc->target_audio_buffer_size = 4; // 4 audio packets in buffer (does nothing for GOP)
|
||||||
|
enc->audio_frames_in_buffer = 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate audio packet consumption per video frame
|
||||||
|
double frame_audio_time = 1.0 / enc->output_fps;
|
||||||
|
double packets_per_frame = frame_audio_time / PACKET_AUDIO_TIME;
|
||||||
|
|
||||||
|
// Allocate MP2 buffer if needed
|
||||||
|
if (!enc->mp2_buffer) {
|
||||||
|
enc->mp2_buffer_size = enc->mp2_packet_size * 2;
|
||||||
|
enc->mp2_buffer = malloc(enc->mp2_buffer_size);
|
||||||
|
if (!enc->mp2_buffer) {
|
||||||
|
fprintf(stderr, "Failed to allocate audio buffer\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate total deficit for all frames in the GOP
|
||||||
|
int total_packets_to_insert = 0;
|
||||||
|
|
||||||
|
// Simulate buffer consumption for all N frames in the GOP
|
||||||
|
double old_buffer = enc->audio_frames_in_buffer;
|
||||||
|
enc->audio_frames_in_buffer -= (packets_per_frame * num_frames);
|
||||||
|
|
||||||
|
// Calculate deficit to restore buffer to target level
|
||||||
|
// double target_level = fmax(packets_per_frame, (double)enc->target_audio_buffer_size);
|
||||||
|
// if (enc->audio_frames_in_buffer < target_level) {
|
||||||
|
double deficit = packets_per_frame * num_frames;
|
||||||
|
total_packets_to_insert = CLAMP((int)round(deficit), enc->target_audio_buffer_size, 9999);
|
||||||
|
|
||||||
|
if (enc->verbose) {
|
||||||
|
printf("GOP (%d frames, starting at %d): Buffer low (%.2f->%.2f), deficit %.2f, inserting %d packets\n",
|
||||||
|
num_frames, first_frame_in_gop, old_buffer, enc->audio_frames_in_buffer, deficit, total_packets_to_insert);
|
||||||
|
}
|
||||||
|
// } else if (enc->verbose) {
|
||||||
|
// printf("GOP (%d frames, starting at %d): Buffer sufficient (%.2f->%.2f), no packets\n",
|
||||||
|
// num_frames, first_frame_in_gop, old_buffer, enc->audio_frames_in_buffer);
|
||||||
|
// }
|
||||||
|
|
||||||
|
// Emit all audio packets for this GOP
|
||||||
|
for (int q = 0; q < total_packets_to_insert; q++) {
|
||||||
|
size_t bytes_to_read = enc->mp2_packet_size;
|
||||||
|
if (bytes_to_read > enc->audio_remaining) {
|
||||||
|
bytes_to_read = enc->audio_remaining;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t bytes_read = fread(enc->mp2_buffer, 1, bytes_to_read, enc->mp2_file);
|
||||||
|
if (bytes_read == 0) break;
|
||||||
|
|
||||||
|
// Write TAV MP2 audio packet
|
||||||
|
uint8_t audio_packet_type = TAV_PACKET_AUDIO_MP2;
|
||||||
|
uint32_t audio_len = (uint32_t)bytes_read;
|
||||||
|
fwrite(&audio_packet_type, 1, 1, output);
|
||||||
|
fwrite(&audio_len, 4, 1, output);
|
||||||
|
fwrite(enc->mp2_buffer, 1, bytes_read, output);
|
||||||
|
|
||||||
|
// Track audio bytes written
|
||||||
|
enc->audio_remaining -= bytes_read;
|
||||||
|
enc->audio_frames_in_buffer++;
|
||||||
|
|
||||||
|
if (first_frame_in_gop == 0) {
|
||||||
|
enc->audio_frames_in_buffer = enc->target_audio_buffer_size / 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (enc->verbose) {
|
||||||
|
printf("Audio packet %d: %zu bytes (buffer: %.2f packets)\n",
|
||||||
|
q, bytes_read, enc->audio_frames_in_buffer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
// Process subtitles for current frame (copied and adapted from TEV)
|
// Process subtitles for current frame (copied and adapted from TEV)
|
||||||
static int process_subtitles(tav_encoder_t *enc, int frame_num, FILE *output) {
|
static int process_subtitles(tav_encoder_t *enc, int frame_num, FILE *output) {
|
||||||
if (!enc->subtitles) {
|
if (!enc->subtitles) {
|
||||||
@@ -9834,20 +9882,16 @@ int main(int argc, char *argv[]) {
|
|||||||
adjust_quantiser_for_bitrate(enc);
|
adjust_quantiser_for_bitrate(enc);
|
||||||
}
|
}
|
||||||
|
|
||||||
// For GOP encoding, process audio/subtitles for all frames in the flushed GOP
|
// For GOP encoding, audio/subtitles are handled in gop_flush() for all GOP frames
|
||||||
// For traditional encoding, process audio/subtitles for this single frame
|
// For traditional encoding, process audio/subtitles for this single frame
|
||||||
if (enc->enable_temporal_dwt) {
|
if (!enc->enable_temporal_dwt) {
|
||||||
// Note: In GOP mode, audio/subtitle sync is approximate since we flush multiple frames at once
|
// Process audio for this frame
|
||||||
// This is acceptable since GOPs are short (16 frames max = ~0.5s at 30fps)
|
process_audio(enc, true_frame_count, enc->output_fp);
|
||||||
// TODO: Consider buffering audio/subtitles for precise sync if needed
|
|
||||||
|
// Process subtitles for this frame
|
||||||
|
process_subtitles(enc, true_frame_count, enc->output_fp);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Process audio for this frame
|
|
||||||
process_audio(enc, true_frame_count, enc->output_fp);
|
|
||||||
|
|
||||||
// Process subtitles for this frame
|
|
||||||
process_subtitles(enc, true_frame_count, enc->output_fp);
|
|
||||||
|
|
||||||
// Write a sync packet only after a video is been coded
|
// Write a sync packet only after a video is been coded
|
||||||
// For GOP encoding, GOP_SYNC packet already serves as sync - don't emit extra SYNC
|
// For GOP encoding, GOP_SYNC packet already serves as sync - don't emit extra SYNC
|
||||||
// For B-frame mode, sync packets are already written in the encoding loop
|
// For B-frame mode, sync packets are already written in the encoding loop
|
||||||
@@ -9857,7 +9901,8 @@ int main(int argc, char *argv[]) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// NTSC frame duplication: emit extra sync packet for every 1000n+500 frames
|
// NTSC frame duplication: emit extra sync packet for every 1000n+500 frames
|
||||||
if (enc->is_ntsc_framerate && (frame_count % 1000 == 500)) {
|
// Skip when temporal DWT is enabled (audio handled in GOP flush)
|
||||||
|
if (!enc->enable_temporal_dwt && enc->is_ntsc_framerate && (frame_count % 1000 == 500)) {
|
||||||
true_frame_count++;
|
true_frame_count++;
|
||||||
// Process audio and subtitles for the duplicated frame to maintain sync
|
// Process audio and subtitles for the duplicated frame to maintain sync
|
||||||
process_audio(enc, true_frame_count, enc->output_fp);
|
process_audio(enc, true_frame_count, enc->output_fp);
|
||||||
|
|||||||
@@ -18,6 +18,11 @@
|
|||||||
#define TAV_PACKET_IFRAME 0x10
|
#define TAV_PACKET_IFRAME 0x10
|
||||||
#define TAV_PACKET_PFRAME 0x11
|
#define TAV_PACKET_PFRAME 0x11
|
||||||
#define TAV_PACKET_GOP_UNIFIED 0x12 // Unified 3D DWT GOP (all frames in single block)
|
#define TAV_PACKET_GOP_UNIFIED 0x12 // Unified 3D DWT GOP (all frames in single block)
|
||||||
|
#define TAV_PACKET_GOP_UNIFIED_MOTION 0x13
|
||||||
|
#define TAV_PACKET_PFRAME_RESIDUAL 0x14 // P-frame with MPEG-style residual coding (block motion compensation)
|
||||||
|
#define TAV_PACKET_BFRAME_RESIDUAL 0x15 // B-frame with MPEG-style residual coding (bidirectional prediction)
|
||||||
|
#define TAV_PACKET_PFRAME_ADAPTIVE 0x16 // P-frame with adaptive quad-tree block partitioning
|
||||||
|
#define TAV_PACKET_BFRAME_ADAPTIVE 0x17 // B-frame with adaptive quad-tree block partitioning (bidirectional prediction)
|
||||||
#define TAV_PACKET_AUDIO_MP2 0x20
|
#define TAV_PACKET_AUDIO_MP2 0x20
|
||||||
#define TAV_PACKET_SUBTITLE 0x30
|
#define TAV_PACKET_SUBTITLE 0x30
|
||||||
#define TAV_PACKET_SUBTITLE_KAR 0x31
|
#define TAV_PACKET_SUBTITLE_KAR 0x31
|
||||||
@@ -59,6 +64,7 @@ typedef struct {
|
|||||||
int pframe_delta_count;
|
int pframe_delta_count;
|
||||||
int pframe_skip_count;
|
int pframe_skip_count;
|
||||||
int gop_unified_count;
|
int gop_unified_count;
|
||||||
|
int gop_unified_motion_count;
|
||||||
int gop_sync_count;
|
int gop_sync_count;
|
||||||
int total_gop_frames;
|
int total_gop_frames;
|
||||||
int audio_count;
|
int audio_count;
|
||||||
@@ -94,6 +100,11 @@ const char* get_packet_type_name(uint8_t type) {
|
|||||||
case TAV_PACKET_IFRAME: return "I-FRAME";
|
case TAV_PACKET_IFRAME: return "I-FRAME";
|
||||||
case TAV_PACKET_PFRAME: return "P-FRAME";
|
case TAV_PACKET_PFRAME: return "P-FRAME";
|
||||||
case TAV_PACKET_GOP_UNIFIED: return "GOP (3D DWT Unified)";
|
case TAV_PACKET_GOP_UNIFIED: return "GOP (3D DWT Unified)";
|
||||||
|
case TAV_PACKET_GOP_UNIFIED_MOTION: return "GOP (3D DWT Unified with Motion Data)";
|
||||||
|
case TAV_PACKET_PFRAME_RESIDUAL: return "P-FRAME (residual)";
|
||||||
|
case TAV_PACKET_BFRAME_RESIDUAL: return "B-FRAME (residual)";
|
||||||
|
case TAV_PACKET_PFRAME_ADAPTIVE: return "P-FRAME (quadtree)";
|
||||||
|
case TAV_PACKET_BFRAME_ADAPTIVE: return "B-FRAME (quadtree)";
|
||||||
case TAV_PACKET_AUDIO_MP2: return "AUDIO MP2";
|
case TAV_PACKET_AUDIO_MP2: return "AUDIO MP2";
|
||||||
case TAV_PACKET_SUBTITLE: return "SUBTITLE (Simple)";
|
case TAV_PACKET_SUBTITLE: return "SUBTITLE (Simple)";
|
||||||
case TAV_PACKET_SUBTITLE_KAR: return "SUBTITLE (Karaoke)";
|
case TAV_PACKET_SUBTITLE_KAR: return "SUBTITLE (Karaoke)";
|
||||||
@@ -246,9 +257,10 @@ void print_extended_header(FILE *fp, int verbose) {
|
|||||||
if (verbose) {
|
if (verbose) {
|
||||||
if (strcmp(key, "CDAT") == 0) {
|
if (strcmp(key, "CDAT") == 0) {
|
||||||
time_t time_sec = value / 1000000000ULL;
|
time_t time_sec = value / 1000000000ULL;
|
||||||
char *time_str = ctime(&time_sec);
|
struct tm *time_info = gmtime(&time_sec);
|
||||||
if (time_str) {
|
if (time_info) {
|
||||||
time_str[strlen(time_str)-1] = '\0'; // Remove newline
|
char time_str[64];
|
||||||
|
strftime(time_str, sizeof(time_str), "%a %b %d %H:%M:%S %Y UTC", time_info);
|
||||||
printf("%s", time_str);
|
printf("%s", time_str);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@@ -484,48 +496,37 @@ int main(int argc, char *argv[]) {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case TAV_PACKET_GOP_UNIFIED: {
|
case TAV_PACKET_GOP_UNIFIED: case TAV_PACKET_GOP_UNIFIED_MOTION: {
|
||||||
// Unified GOP packet: [gop_size][motion_vectors...][compressed_size][data]
|
// Unified GOP packet: [gop_size][motion_vectors...][compressed_size][data]
|
||||||
uint8_t gop_size;
|
uint8_t gop_size;
|
||||||
if (fread(&gop_size, 1, 1, fp) != 1) break;
|
if (fread(&gop_size, 1, 1, fp) != 1) break;
|
||||||
|
|
||||||
// Read all motion vectors
|
// Read motion vectors
|
||||||
int16_t *motion_x = malloc(gop_size * sizeof(int16_t));
|
uint32_t size0 = 0;
|
||||||
int16_t *motion_y = malloc(gop_size * sizeof(int16_t));
|
if (packet_type == TAV_PACKET_GOP_UNIFIED_MOTION) {
|
||||||
for (int i = 0; i < gop_size; i++) {
|
if (fread(&size0, sizeof(uint32_t), 1, fp) != 1) { break; }
|
||||||
if (fread(&motion_x[i], sizeof(int16_t), 1, fp) != 1) break;
|
stats.total_video_bytes += size0;
|
||||||
if (fread(&motion_y[i], sizeof(int16_t), 1, fp) != 1) break;
|
stats.gop_unified_motion_count++;
|
||||||
|
fseek(fp, size0, SEEK_CUR);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read compressed data size
|
// Read compressed data size
|
||||||
uint32_t size;
|
uint32_t size1;
|
||||||
if (fread(&size, sizeof(uint32_t), 1, fp) != 1) {
|
if (fread(&size1, sizeof(uint32_t), 1, fp) != 1) { break; }
|
||||||
free(motion_x);
|
stats.total_video_bytes += size1;
|
||||||
free(motion_y);
|
fseek(fp, size1, SEEK_CUR);
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
stats.total_video_bytes += size;
|
|
||||||
stats.gop_unified_count++;
|
|
||||||
stats.total_gop_frames += gop_size;
|
stats.total_gop_frames += gop_size;
|
||||||
|
if (packet_type == TAV_PACKET_GOP_UNIFIED) {
|
||||||
|
stats.gop_unified_count++;
|
||||||
|
}
|
||||||
|
|
||||||
if (!opts.summary_only && display) {
|
if (!opts.summary_only && display) {
|
||||||
printf(" - GOP size=%u, data size=%u bytes (%.2f bytes/frame)",
|
printf(" - GOP size=%u, data size=%u bytes (%.2f bytes/frame)",
|
||||||
gop_size, size, (double)size / gop_size);
|
gop_size, (size0 + size1), (double)(size0 + size1) / gop_size);
|
||||||
|
|
||||||
// Always show motion vectors for GOP packets with absolute frame numbers
|
|
||||||
if (gop_size > 0) {
|
|
||||||
printf("\n Motion vectors (1/16-pixel):");
|
|
||||||
for (int i = 0; i < gop_size; i++) {
|
|
||||||
printf("\n Frame %d (#%d): (%.3f, %.3f) px",
|
|
||||||
current_frame + i, i, motion_x[i] / 16.0, motion_y[i] / 16.0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
free(motion_x);
|
|
||||||
free(motion_y);
|
|
||||||
fseek(fp, size, SEEK_CUR);
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -714,10 +715,10 @@ int main(int argc, char *argv[]) {
|
|||||||
printf(")");
|
printf(")");
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
if (stats.gop_unified_count > 0) {
|
if (stats.gop_unified_count + stats.gop_unified_motion_count > 0) {
|
||||||
printf(" 3D GOP packets: %d (total frames: %d, avg %.1f frames/GOP)\n",
|
printf(" 3D GOP packets: %d (total frames: %d, avg %.1f frames/GOP)\n",
|
||||||
stats.gop_unified_count, stats.total_gop_frames,
|
(stats.gop_unified_count + stats.gop_unified_motion_count), stats.total_gop_frames,
|
||||||
(double)stats.total_gop_frames / stats.gop_unified_count);
|
(double)stats.total_gop_frames / (stats.gop_unified_count + stats.gop_unified_motion_count));
|
||||||
printf(" GOP sync packets: %d\n", stats.gop_sync_count);
|
printf(" GOP sync packets: %d\n", stats.gop_sync_count);
|
||||||
}
|
}
|
||||||
printf(" Mux video: %d\n", stats.mux_video_count);
|
printf(" Mux video: %d\n", stats.mux_video_count);
|
||||||
|
|||||||
Reference in New Issue
Block a user