TAV: half-fixed 3d dwt playback

This commit is contained in:
minjaesong
2025-10-22 01:32:19 +09:00
parent 9ac0424be3
commit 4eec98cdca
6 changed files with 278 additions and 467 deletions

View File

@@ -355,11 +355,12 @@ let decodeHeight = isInterlaced ? (header.height >> 1) : header.height
const FRAME_PIXELS = header.width * header.height const FRAME_PIXELS = header.width * header.height
const FRAME_SIZE = FRAME_PIXELS * 3 // RGB buffer size const FRAME_SIZE = FRAME_PIXELS * 3 // RGB buffer size
// Double-buffering: Fixed slot sizes in videoBuffer (32 MB total) // Triple-buffering: Fixed slot sizes in videoBuffer (48 MB total)
const MAX_GOP_SIZE = 21 // Maximum frames per slot (21 * 752KB = ~15MB per slot) const BUFFER_SLOTS = 3 // Three slots: playing, ready, decoding
const MAX_GOP_SIZE = 21 // Maximum frames per slot (21 * 752KB = ~15.8MB per slot)
const SLOT_SIZE = MAX_GOP_SIZE * FRAME_SIZE // Fixed slot size regardless of actual GOP size const SLOT_SIZE = MAX_GOP_SIZE * FRAME_SIZE // Fixed slot size regardless of actual GOP size
console.log(`Double-buffering: Max ${MAX_GOP_SIZE} frames/slot, ${(SLOT_SIZE / 1048576).toFixed(1)}MB per slot`) console.log(`Triple-buffering: ${BUFFER_SLOTS} slots, max ${MAX_GOP_SIZE} frames/slot, ${(SLOT_SIZE / 1048576).toFixed(1)}MB per slot`)
const RGB_BUFFER_A = sys.malloc(FRAME_SIZE) const RGB_BUFFER_A = sys.malloc(FRAME_SIZE)
const RGB_BUFFER_B = sys.malloc(FRAME_SIZE) const RGB_BUFFER_B = sys.malloc(FRAME_SIZE)
@@ -484,17 +485,18 @@ let currentFileIndex = 1 // Track which file we're playing in concatenated stre
let totalFilesProcessed = 0 let totalFilesProcessed = 0
let decoderDbgInfo = {} let decoderDbgInfo = {}
// GOP double-buffering state // GOP triple-buffering state (3 slots: playing, ready, decoding)
let currentGopBufferSlot = 0 // Which buffer slot is currently being displayed (0 or 1) let currentGopBufferSlot = 0 // Which buffer slot is currently being displayed (0, 1, or 2)
let currentGopSize = 0 // Number of frames in current GOP being displayed let currentGopSize = 0 // Number of frames in current GOP being displayed
let currentGopFrameIndex = 0 // Which frame of current GOP we're displaying let currentGopFrameIndex = 0 // Which frame of current GOP we're displaying
let nextGopData = null // Buffered next GOP packet data for background decode let readyGopData = null // GOP that's already decoded and ready to play (next in line)
let decodingGopData = null // GOP currently being decoded in background
let asyncDecodeInProgress = false // Track if async decode is running let asyncDecodeInProgress = false // Track if async decode is running
let asyncDecodeSlot = 0 // Which slot the async decode is targeting let asyncDecodeSlot = 0 // Which slot the async decode is targeting
let asyncDecodeGopSize = 0 // Size of GOP being decoded async let asyncDecodeGopSize = 0 // Size of GOP being decoded async
let asyncDecodePtr = 0 // Compressed data pointer to free after decode let asyncDecodePtr = 0 // Compressed data pointer to free after decode
let asyncDecodeStartTime = 0 // When async decode started (for diagnostics) let asyncDecodeStartTime = 0 // When async decode started (for diagnostics)
let shouldReadPackets = true // Gate packet reading: false when both buffers are full let shouldReadPackets = true // Gate packet reading: false when all 3 buffers are full
let cueElements = [] let cueElements = []
let currentCueIndex = -1 // Track current cue position let currentCueIndex = -1 // Track current cue position
@@ -510,12 +512,19 @@ function cleanupAsyncDecode() {
asyncDecodeGopSize = 0 asyncDecodeGopSize = 0
} }
// Free background GOP decode memory if in progress // Free ready GOP memory if present
if (nextGopData !== null && nextGopData.compressedPtr && nextGopData.compressedPtr !== 0) { if (readyGopData !== null && readyGopData.compressedPtr && readyGopData.compressedPtr !== 0) {
sys.free(nextGopData.compressedPtr) sys.free(readyGopData.compressedPtr)
nextGopData.compressedPtr = 0 readyGopData.compressedPtr = 0
} }
nextGopData = null readyGopData = null
// Free decoding GOP memory if present
if (decodingGopData !== null && decodingGopData.compressedPtr && decodingGopData.compressedPtr !== 0) {
sys.free(decodingGopData.compressedPtr)
decodingGopData.compressedPtr = 0
}
decodingGopData = null
// Reset GOP playback state // Reset GOP playback state
currentGopSize = 0 currentGopSize = 0
@@ -751,7 +760,10 @@ let paused = false
try { try {
let t1 = sys.nanoTime() let t1 = sys.nanoTime()
while (!stopPlay && seqread.getReadCount() < FILE_LENGTH) { // Continue loop while:
// 1. Reading packets (not EOF yet), OR
// 2. There are buffered GOPs to play (after EOF)
while (!stopPlay && (seqread.getReadCount() < FILE_LENGTH || currentGopSize > 0 || readyGopData !== null || decodingGopData !== null || asyncDecodeInProgress)) {
// Handle interactive controls // Handle interactive controls
@@ -866,9 +878,10 @@ try {
} }
// GATED PACKET READING // GATED PACKET READING
// Stop reading when both buffers are full (GOP playing + GOP decoding/ready) // Stop reading when all 3 buffers are full (GOP playing + ready GOP + decoding GOP)
// Resume reading when GOP finishes (one buffer becomes free) // Resume reading when GOP finishes (one buffer becomes free)
if (shouldReadPackets && !paused) { // Also stop reading at EOF
if (shouldReadPackets && !paused && seqread.getReadCount() < FILE_LENGTH) {
// Read packet header (record position before reading for I-frame tracking) // Read packet header (record position before reading for I-frame tracking)
let packetOffset = seqread.getReadCount() let packetOffset = seqread.getReadCount()
var packetType = seqread.readOneByte() var packetType = seqread.readOneByte()
@@ -1051,32 +1064,15 @@ try {
// Read GOP packet data // Read GOP packet data
const gopSize = seqread.readOneByte() const gopSize = seqread.readOneByte()
const marginLeft = seqread.readOneByte()
const marginRight = seqread.readOneByte()
const marginTop = seqread.readOneByte()
const marginBottom = seqread.readOneByte()
const canvasWidth = header.width + marginLeft + marginRight
const canvasHeight = header.height + marginTop + marginBottom
// Read motion vectors (1/16-pixel units, int16)
let motionX = new Array(gopSize)
let motionY = new Array(gopSize)
for (let i = 0; i < gopSize; i++) {
let mx = seqread.readShort()
let my = seqread.readShort()
motionX[i] = (mx > 32767) ? (mx - 65536) : mx
motionY[i] = (my > 32767) ? (my - 65536) : my
}
const compressedSize = seqread.readInt() const compressedSize = seqread.readInt()
let compressedPtr = seqread.readBytes(compressedSize) let compressedPtr = seqread.readBytes(compressedSize)
updateDataRateBin(compressedSize) updateDataRateBin(compressedSize)
// DOUBLE-BUFFERING LOGIC: // TRIPLE-BUFFERING LOGIC (3 slots: playing, ready, decoding):
// - If no GOP is currently playing: decode immediately to current slot // - If no GOP playing: decode first GOP to slot 0
// - Otherwise: buffer this GOP for decode during next GOP's playback // - If GOP playing but no ready GOP: decode to ready slot (next in rotation)
// - If GOP playing and ready GOP exists but no decoding: decode to decoding slot
// - Otherwise: all 3 buffers full, ignore packet
// Check GOP size fits in slot // Check GOP size fits in slot
if (gopSize > MAX_GOP_SIZE) { if (gopSize > MAX_GOP_SIZE) {
@@ -1086,11 +1082,11 @@ try {
} }
if (currentGopSize === 0 && !asyncDecodeInProgress) { if (currentGopSize === 0 && !asyncDecodeInProgress) {
// No active GOP and no decode in progress: decode asynchronously and start playback when ready // Case 1: No active GOP and no decode in progress - decode first GOP
const bufferSlot = currentGopBufferSlot const bufferSlot = currentGopBufferSlot
const bufferOffset = bufferSlot * SLOT_SIZE const bufferOffset = bufferSlot * SLOT_SIZE
// Defensive: free any old async decode memory (shouldn't happen but be safe) // Defensive: free any old async decode memory
if (asyncDecodePtr !== 0) { if (asyncDecodePtr !== 0) {
sys.free(asyncDecodePtr) sys.free(asyncDecodePtr)
asyncDecodePtr = 0 asyncDecodePtr = 0
@@ -1099,10 +1095,7 @@ try {
// Start async decode // Start async decode
graphics.tavDecodeGopToVideoBufferAsync( graphics.tavDecodeGopToVideoBufferAsync(
compressedPtr, compressedSize, gopSize, compressedPtr, compressedSize, gopSize,
motionX, motionY,
header.width, header.height, header.width, header.height,
canvasWidth, canvasHeight,
marginLeft, marginTop,
header.qualityLevel, header.qualityLevel,
QLUT[header.qualityY], QLUT[header.qualityCo], QLUT[header.qualityCg], QLUT[header.qualityY], QLUT[header.qualityCo], QLUT[header.qualityCg],
header.channelLayout, header.channelLayout,
@@ -1114,49 +1107,25 @@ try {
asyncDecodeInProgress = true asyncDecodeInProgress = true
asyncDecodeSlot = bufferSlot asyncDecodeSlot = bufferSlot
asyncDecodeGopSize = gopSize asyncDecodeGopSize = gopSize
asyncDecodePtr = compressedPtr // Will free after decode completes asyncDecodePtr = compressedPtr
asyncDecodeStartTime = sys.nanoTime() asyncDecodeStartTime = sys.nanoTime()
// Note: compressedPtr will be freed after decode completes
// We'll check for completion in main loop and start playback then
if (interactive) {
console.log(`[GOP] Started async decode of first GOP (slot ${bufferSlot}, ${gopSize} frames)`)
}
} else if (currentGopSize === 0 && asyncDecodeInProgress) { } else if (currentGopSize === 0 && asyncDecodeInProgress) {
// First GOP still decoding but another arrived - ignore it to avoid cancelling first GOP // Case 2: First GOP still decoding - ignore to avoid cancellation
if (interactive) {
console.log(`[GOP] Warning: GOP arrived while first GOP still decoding - ignoring to avoid cancellation`)
}
sys.free(compressedPtr) sys.free(compressedPtr)
} else if (currentGopSize > 0 && !asyncDecodeInProgress) {
// GOP is playing and first GOP decode is done: decode this one to other slot in background (async) } else if (currentGopSize > 0 && readyGopData === null && !asyncDecodeInProgress && graphics.tavDecodeGopIsComplete()) {
const nextSlot = 1 - currentGopBufferSlot // Case 3: GOP playing, no ready GOP, no decode in progress - decode to ready slot
const nextSlot = (currentGopBufferSlot + 1) % BUFFER_SLOTS
const nextOffset = nextSlot * SLOT_SIZE const nextOffset = nextSlot * SLOT_SIZE
// DIAGNOSTIC: Measure background decode timing
const framesRemaining = currentGopSize - currentGopFrameIndex const framesRemaining = currentGopSize - currentGopFrameIndex
const timeRemaining = framesRemaining * FRAME_TIME * 1000.0 // milliseconds const timeRemaining = framesRemaining * FRAME_TIME * 1000.0
// If previous GOP still decoding, free its memory (will be overwritten) // Start async decode to ready slot
if (nextGopData !== null && !nextGopData.decoded && nextGopData.compressedPtr && nextGopData.compressedPtr !== 0) {
if (interactive) {
console.log(`[GOP] Warning: New GOP arrived before previous decode completed - freeing old data`)
}
sys.free(nextGopData.compressedPtr)
nextGopData.compressedPtr = 0
}
if (interactive) {
console.log(`[GOP] Background decode started: frame ${currentGopFrameIndex}/${currentGopSize}, ${framesRemaining} frames (${timeRemaining.toFixed(0)}ms) remaining`)
}
// Start async background decode
graphics.tavDecodeGopToVideoBufferAsync( graphics.tavDecodeGopToVideoBufferAsync(
compressedPtr, compressedSize, gopSize, compressedPtr, compressedSize, gopSize,
motionX, motionY,
header.width, header.height, header.width, header.height,
canvasWidth, canvasHeight,
marginLeft, marginTop,
header.qualityLevel, header.qualityLevel,
QLUT[header.qualityY], QLUT[header.qualityCo], QLUT[header.qualityCg], QLUT[header.qualityY], QLUT[header.qualityCo], QLUT[header.qualityCg],
header.channelLayout, header.channelLayout,
@@ -1165,20 +1134,44 @@ try {
nextOffset nextOffset
) )
// Mark as decoding (will check completion in main loop) readyGopData = {
nextGopData = {
gopSize: gopSize, gopSize: gopSize,
decoded: false, // Will be set to true when async decode completes
slot: nextSlot, slot: nextSlot,
compressedPtr: compressedPtr, // Will free after decode completes compressedPtr: compressedPtr,
startTime: sys.nanoTime(), startTime: sys.nanoTime(),
timeRemaining: timeRemaining timeRemaining: timeRemaining
} }
} else {
// Fallback: unexpected state, just free the memory } else if (currentGopSize > 0 && readyGopData !== null && decodingGopData === null && !asyncDecodeInProgress && graphics.tavDecodeGopIsComplete()) {
if (interactive) { // Case 4: GOP playing, ready GOP exists, no decoding GOP, no decode in progress - decode to decoding slot
console.log(`[GOP] Warning: Unexpected state - currentGopSize=${currentGopSize}, asyncDecodeInProgress=${asyncDecodeInProgress} - freeing GOP data`) const decodingSlot = (currentGopBufferSlot + 2) % BUFFER_SLOTS
const decodingOffset = decodingSlot * SLOT_SIZE
const framesRemaining = currentGopSize - currentGopFrameIndex
const timeRemaining = framesRemaining * FRAME_TIME * 1000.0
// Start async decode to decoding slot
graphics.tavDecodeGopToVideoBufferAsync(
compressedPtr, compressedSize, gopSize,
header.width, header.height,
header.qualityLevel,
QLUT[header.qualityY], QLUT[header.qualityCo], QLUT[header.qualityCg],
header.channelLayout,
header.waveletFilter, header.decompLevels, 2,
header.entropyCoder,
decodingOffset
)
decodingGopData = {
gopSize: gopSize,
slot: decodingSlot,
compressedPtr: compressedPtr,
startTime: sys.nanoTime(),
timeRemaining: timeRemaining
} }
} else {
// Case 5: All 3 buffers full (playing + ready + decoding) - ignore packet
sys.free(compressedPtr) sys.free(compressedPtr)
} }
} }
@@ -1187,13 +1180,10 @@ try {
const framesInGOP = seqread.readOneByte() const framesInGOP = seqread.readOneByte()
// Ignore - we display frames based on time accumulator, not this packet // Ignore - we display frames based on time accumulator, not this packet
// CRITICAL: Stop reading packets if both buffers are full // CRITICAL: Stop reading packets if all 3 buffers are full
// (one GOP playing + one GOP ready/decoding) // (one GOP playing + ready GOP + decoding GOP)
if (currentGopSize > 0 && nextGopData !== null) { if (currentGopSize > 0 && readyGopData !== null && decodingGopData !== null) {
shouldReadPackets = false shouldReadPackets = false
if (interactive) {
console.log(`[GOP] Both buffers full - stopping packet reading until current GOP finishes`)
}
} }
} }
else if (packetType === TAV_PACKET_AUDIO_MP2) { else if (packetType === TAV_PACKET_AUDIO_MP2) {
@@ -1326,9 +1316,9 @@ try {
// Resume packet reading to get next GOP (only one buffer occupied now) // Resume packet reading to get next GOP (only one buffer occupied now)
shouldReadPackets = true shouldReadPackets = true
if (interactive) { // if (interactive) {
console.log(`[GOP] First GOP ready (slot ${asyncDecodeSlot}, ${asyncDecodeGopSize} frames) in ${decodeTime.toFixed(1)}ms - starting playback`) // console.log(`[GOP] First GOP ready (slot ${asyncDecodeSlot}, ${asyncDecodeGopSize} frames) in ${decodeTime.toFixed(1)}ms - starting playback`)
} // }
// Free compressed data // Free compressed data
sys.free(asyncDecodePtr) sys.free(asyncDecodePtr)
@@ -1374,44 +1364,37 @@ try {
} }
} }
// Step 4 & 7: GOP finished? Wait for background decode, then transition // Step 4-7: GOP finished? Transition to ready GOP (triple-buffering)
if (!paused && currentGopSize > 0 && currentGopFrameIndex >= currentGopSize) { if (!paused && currentGopSize > 0 && currentGopFrameIndex >= currentGopSize) {
if (nextGopData !== null) { if (readyGopData !== null) {
// Wait for background decode to complete // Ready GOP exists - wait for it to finish decoding if still in progress
while (!graphics.tavDecodeGopIsComplete() && !paused) { while (!graphics.tavDecodeGopIsComplete() && !paused) {
sys.sleep(1) sys.sleep(1)
} }
if (!paused) { if (!paused) {
const [r1, r2] = graphics.tavDecodeGopGetResult() const [r1, r2] = graphics.tavDecodeGopGetResult()
decodeTime = (sys.nanoTime() - nextGopData.startTime) / 1000000.0 decodeTime = (sys.nanoTime() - readyGopData.startTime) / 1000000.0
if (interactive) {
const margin = nextGopData.timeRemaining - decodeTime
const status = margin > 0 ? "✓ ON TIME" : "✗ TOO LATE"
console.log(`[GOP] Background decode finished in ${decodeTime.toFixed(1)}ms (margin: ${margin.toFixed(0)}ms) ${status}`)
}
// Free compressed data // Free compressed data
sys.free(nextGopData.compressedPtr) sys.free(readyGopData.compressedPtr)
// Transition to next GOP // Transition to ready GOP
currentGopBufferSlot = 1 - currentGopBufferSlot currentGopBufferSlot = readyGopData.slot
currentGopSize = nextGopData.gopSize currentGopSize = readyGopData.gopSize
currentGopFrameIndex = 0 currentGopFrameIndex = 0
nextGopData = null
// Resume packet reading now that one buffer is free // Promote decoding GOP to ready GOP
readyGopData = decodingGopData
decodingGopData = null
// Resume packet reading now that one buffer is free (decoding slot available)
shouldReadPackets = true shouldReadPackets = true
if (interactive) {
console.log(`[GOP] ✓ SEAMLESS TRANSITION to next GOP (slot ${currentGopBufferSlot}, ${currentGopSize} frames)`)
}
} }
} else { } else {
// No next GOP available, pause playback // No ready GOP available - hiccup (shouldn't happen with triple-buffering)
if (interactive) { if (interactive) {
console.log(`[GOP] ✗ HICCUP - next GOP NOT READY! Playback paused.`) console.log(`[GOP] ✗ HICCUP - ready GOP NOT READY! Playback paused.`)
} }
currentGopSize = 0 currentGopSize = 0
currentGopFrameIndex = 0 currentGopFrameIndex = 0

View File

@@ -1030,9 +1030,9 @@ transmission capability, and region-of-interest coding.
### List of Keys ### List of Keys
- Uint64 BGNT: Video begin time (must be equal to the value of the first Timecode packet) - Uint64 BGNT: Video begin time (must be equal to the value of the first Timecode packet)
- Uint64 ENDT: Video end time (must be equal to the value of the last Timecode packet) - Uint64 ENDT: Video end time (must be equal to the value of the last Timecode packet)
- Uint64 CDAT: Creation time in nanoseconds since UNIX Epoch - Uint64 CDAT: Creation time in nanoseconds since UNIX Epoch (must be in UTC timezone)
- Bytes VNDR: Name and version of the encoder (for Reference encoder: "Encoder-TAV 20251014") - Bytes VNDR: Name and version of the encoder (for Reference encoder: "Encoder-TAV 20251014 (list,of,features)")
- Bytes FMPG: FFmpeg version (typically "ffmpeg version 6.1.2"; the first line of text FFmpeg emits right before the copyright text) - Bytes FMPG: FFmpeg version (typically "ffmpeg version 8.0 Copyright (c) 2000-2025 the FFmpeg developers"; the first line of text FFmpeg emits)
## Standard Metadata Payload Packet Structure ## Standard Metadata Payload Packet Structure
@@ -1062,10 +1062,12 @@ Updated on 2025-10-17 to include canvas expansion margins.
This packet contains multiple frames encoded as a single spacetime block for optimal This packet contains multiple frames encoded as a single spacetime block for optimal
temporal compression. temporal compression.
uint8 Packet Type (0x12) uint8 Packet Type (0x12/0x13)
uint8 GOP Size (number of frames in this GOP, typically 16) uint8 GOP Size (number of frames in this GOP, typically 16)
int16 Motion Vectors X[GOP Size] (quarter-pixel precision for global motion compensation) <if packet type is 0x13>
int16 Motion Vectors Y[GOP Size] (quarter-pixel precision for global motion compensation) uint32 Compressed Size
* Zstd-compressed Motion Data
<endif>
uint32 Compressed Size uint32 Compressed Size
* Zstd-compressed Unified Block Data * Zstd-compressed Unified Block Data

View File

@@ -6662,194 +6662,6 @@ class GraphicsJSR223Delegate(private val vm: VM) {
System.arraycopy(output, 0, frameData, 0, frameData.size) System.arraycopy(output, 0, frameData, 0, frameData.size)
} }
/**
* Main GOP unified decoder function.
* Decodes a unified 3D DWT GOP block (temporal + spatial) and outputs RGB frames.
*
* @param compressedDataPtr Pointer to compressed Zstd data
* @param compressedSize Size of compressed data
* @param gopSize Number of frames in GOP (1-16)
* @param motionVectorsX X motion vectors in 1/16-pixel units
* @param motionVectorsY Y motion vectors in 1/16-pixel units
* @param outputRGBAddrs Array of output RGB buffer addresses
* @param width Original frame width (output dimensions)
* @param height Original frame height (output dimensions)
* @param canvasWidth Expanded canvas width (for motion compensation)
* @param canvasHeight Expanded canvas height (for motion compensation)
* @param marginLeft Left margin to crop from expanded canvas
* @param marginTop Top margin to crop from expanded canvas
* @param qIndex Quality index
* @param qYGlobal Global Y quantizer
* @param qCoGlobal Global Co quantizer
* @param qCgGlobal Global Cg quantizer
* @param channelLayout Channel layout flags
* @param spatialFilter Wavelet filter type
* @param spatialLevels Number of spatial DWT levels (default 6)
* @param temporalLevels Number of temporal DWT levels (default 2)
* @return Number of frames decoded
*/
fun tavDecodeGopUnified(
compressedDataPtr: Long,
compressedSize: Int,
gopSize: Int,
motionVectorsX: IntArray,
motionVectorsY: IntArray,
outputRGBAddrs: LongArray,
width: Int,
height: Int,
canvasWidth: Int,
canvasHeight: Int,
marginLeft: Int,
marginTop: Int,
qIndex: Int,
qYGlobal: Int,
qCoGlobal: Int,
qCgGlobal: Int,
channelLayout: Int,
spatialFilter: Int = 1,
spatialLevels: Int = 6,
temporalLevels: Int = 2,
entropyCoder: Int = 0
): Array<Any> {
val dbgOut = HashMap<String, Any>()
dbgOut["qY"] = qYGlobal
dbgOut["qCo"] = qCoGlobal
dbgOut["qCg"] = qCgGlobal
dbgOut["frameMode"] = "G"
// Use expanded canvas dimensions for DWT processing
val canvasPixels = canvasWidth * canvasHeight
val outputPixels = width * height
// Step 1: Decompress unified GOP block
val compressedData = ByteArray(compressedSize)
UnsafeHelper.memcpyRaw(
null,
vm.usermem.ptr + compressedDataPtr,
compressedData,
UnsafeHelper.getArrayOffset(compressedData),
compressedSize.toLong()
)
val decompressedData = try {
ZstdInputStream(java.io.ByteArrayInputStream(compressedData)).use { zstd ->
zstd.readBytes()
}
} catch (e: Exception) {
println("ERROR: Zstd decompression failed: ${e.message}")
return arrayOf(0, dbgOut)
}
// Step 2: Postprocess unified block to per-frame coefficients (based on header's entropy coder field)
val (isEZBCMode, quantizedCoeffs) = tavPostprocessGopAuto(
decompressedData,
gopSize,
canvasPixels, // Use expanded canvas size
channelLayout,
entropyCoder
)
// Step 3: Allocate GOP buffers for float coefficients (expanded canvas size)
val gopY = Array(gopSize) { FloatArray(canvasPixels) }
val gopCo = Array(gopSize) { FloatArray(canvasPixels) }
val gopCg = Array(gopSize) { FloatArray(canvasPixels) }
// Step 4: Calculate subband layout for expanded canvas (needed for perceptual dequantization)
val subbands = calculateSubbandLayout(canvasWidth, canvasHeight, spatialLevels)
// Step 5: Dequantize with temporal-spatial scaling
for (t in 0 until gopSize) {
val temporalLevel = getTemporalSubbandLevel(t, gopSize, temporalLevels)
val temporalScale = getTemporalQuantizerScale(temporalLevel)
// Apply temporal scaling to base quantizers for each channel
val baseQY = (qYGlobal * temporalScale).coerceIn(1.0f, 4096.0f)
val baseQCo = (qCoGlobal * temporalScale).coerceIn(1.0f, 4096.0f)
val baseQCg = (qCgGlobal * temporalScale).coerceIn(1.0f, 4096.0f)
// Use existing perceptual dequantization for spatial weighting
dequantiseDWTSubbandsPerceptual(
qIndex, qYGlobal,
quantizedCoeffs[t][0], gopY[t],
subbands, baseQY, false, spatialLevels, // isChroma=false
isEZBCMode
)
dequantiseDWTSubbandsPerceptual(
qIndex, qYGlobal,
quantizedCoeffs[t][1], gopCo[t],
subbands, baseQCo, true, spatialLevels, // isChroma=true
isEZBCMode
)
dequantiseDWTSubbandsPerceptual(
qIndex, qYGlobal,
quantizedCoeffs[t][2], gopCg[t],
subbands, baseQCg, true, spatialLevels, // isChroma=true
isEZBCMode
)
}
// Step 6: Apply inverse 3D DWT (spatial first, then temporal) on expanded canvas
tavApplyInverse3DDWT(gopY, canvasWidth, canvasHeight, gopSize, spatialLevels, temporalLevels, spatialFilter)
tavApplyInverse3DDWT(gopCo, canvasWidth, canvasHeight, gopSize, spatialLevels, temporalLevels, spatialFilter)
tavApplyInverse3DDWT(gopCg, canvasWidth, canvasHeight, gopSize, spatialLevels, temporalLevels, spatialFilter)
// Step 7: Apply inverse motion compensation (shift frames back) on expanded canvas
// Note: Motion vectors are in 1/16-pixel units, cumulative relative to frame 0
for (t in 1 until gopSize) { // Skip frame 0 (reference)
val dx = motionVectorsX[t] / 16 // Convert to pixel units
val dy = motionVectorsY[t] / 16
if (dx != 0 || dy != 0) {
applyInverseTranslation(gopY[t], canvasWidth, canvasHeight, dx, dy)
applyInverseTranslation(gopCo[t], canvasWidth, canvasHeight, dx, dy)
applyInverseTranslation(gopCg[t], canvasWidth, canvasHeight, dx, dy)
}
}
// Step 8: Crop expanded canvas to original dimensions and convert to RGB
for (t in 0 until gopSize) {
val rgbAddr = outputRGBAddrs[t]
// Crop from expanded canvas (canvasWidth x canvasHeight) to output (width x height)
for (row in 0 until height) {
for (col in 0 until width) {
// Source pixel in expanded canvas
val canvasX = col + marginLeft
val canvasY = row + marginTop
val canvasIdx = canvasY * canvasWidth + canvasX
// Destination pixel in output buffer
val outIdx = row * width + col
val yVal = gopY[t][canvasIdx]
val co = gopCo[t][canvasIdx]
val cg = gopCg[t][canvasIdx]
// YCoCg-R to RGB conversion
val tmp = yVal - (cg / 2.0f)
val g = cg + tmp
val b = tmp - (co / 2.0f)
val r = b + co
// Clamp to 0-255 range
val rClamped = r.toInt().coerceIn(0, 255)
val gClamped = g.toInt().coerceIn(0, 255)
val bClamped = b.toInt().coerceIn(0, 255)
// Write RGB24 format (3 bytes per pixel)
val offset = rgbAddr + outIdx * 3L
vm.usermem[offset] = rClamped.toByte()
vm.usermem[offset + 1] = gClamped.toByte()
vm.usermem[offset + 2] = bClamped.toByte()
}
}
}
return arrayOf(gopSize, dbgOut)
}
/** /**
* Decode GOP frames directly into GraphicsAdapter.videoBuffer (Java heap). * Decode GOP frames directly into GraphicsAdapter.videoBuffer (Java heap).
* This avoids allocating GOP frames in VM user memory, saving ~6 MB for 8-frame GOPs. * This avoids allocating GOP frames in VM user memory, saving ~6 MB for 8-frame GOPs.
@@ -6864,14 +6676,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
compressedDataPtr: Long, compressedDataPtr: Long,
compressedSize: Int, compressedSize: Int,
gopSize: Int, gopSize: Int,
motionVectorsX: IntArray,
motionVectorsY: IntArray,
width: Int, width: Int,
height: Int, height: Int,
canvasWidth: Int,
canvasHeight: Int,
marginLeft: Int,
marginTop: Int,
qIndex: Int, qIndex: Int,
qYGlobal: Int, qYGlobal: Int,
qCoGlobal: Int, qCoGlobal: Int,
@@ -6900,7 +6706,6 @@ class GraphicsJSR223Delegate(private val vm: VM) {
} }
// Use expanded canvas dimensions for DWT processing // Use expanded canvas dimensions for DWT processing
val canvasPixels = canvasWidth * canvasHeight
val outputPixels = width * height val outputPixels = width * height
// Step 1: Decompress unified GOP block // Step 1: Decompress unified GOP block
@@ -6926,18 +6731,18 @@ class GraphicsJSR223Delegate(private val vm: VM) {
val (isEZBCMode, quantizedCoeffs) = tavPostprocessGopAuto( val (isEZBCMode, quantizedCoeffs) = tavPostprocessGopAuto(
decompressedData, decompressedData,
gopSize, gopSize,
canvasPixels, outputPixels,
channelLayout, channelLayout,
entropyCoder entropyCoder
) )
// Step 3: Allocate GOP buffers for float coefficients (expanded canvas size) // Step 3: Allocate GOP buffers for float coefficients (expanded canvas size)
val gopY = Array(gopSize) { FloatArray(canvasPixels) } val gopY = Array(gopSize) { FloatArray(outputPixels) }
val gopCo = Array(gopSize) { FloatArray(canvasPixels) } val gopCo = Array(gopSize) { FloatArray(outputPixels) }
val gopCg = Array(gopSize) { FloatArray(canvasPixels) } val gopCg = Array(gopSize) { FloatArray(outputPixels) }
// Step 4: Calculate subband layout for expanded canvas // Step 4: Calculate subband layout for expanded canvas
val subbands = calculateSubbandLayout(canvasWidth, canvasHeight, spatialLevels) val subbands = calculateSubbandLayout(width, height, spatialLevels)
// Step 5: Dequantize with temporal-spatial scaling // Step 5: Dequantize with temporal-spatial scaling
for (t in 0 until gopSize) { for (t in 0 until gopSize) {
@@ -6971,40 +6776,23 @@ class GraphicsJSR223Delegate(private val vm: VM) {
} }
// Step 6: Apply inverse 3D DWT // Step 6: Apply inverse 3D DWT
tavApplyInverse3DDWT(gopY, canvasWidth, canvasHeight, gopSize, spatialLevels, temporalLevels, spatialFilter) tavApplyInverse3DDWT(gopY, width, height, gopSize, spatialLevels, temporalLevels, spatialFilter)
tavApplyInverse3DDWT(gopCo, canvasWidth, canvasHeight, gopSize, spatialLevels, temporalLevels, spatialFilter) tavApplyInverse3DDWT(gopCo, width, height, gopSize, spatialLevels, temporalLevels, spatialFilter)
tavApplyInverse3DDWT(gopCg, canvasWidth, canvasHeight, gopSize, spatialLevels, temporalLevels, spatialFilter) tavApplyInverse3DDWT(gopCg, width, height, gopSize, spatialLevels, temporalLevels, spatialFilter)
// Step 7: Apply inverse motion compensation
for (t in 1 until gopSize) {
val dx = motionVectorsX[t] / 16
val dy = motionVectorsY[t] / 16
if (dx != 0 || dy != 0) {
applyInverseTranslation(gopY[t], canvasWidth, canvasHeight, dx, dy)
applyInverseTranslation(gopCo[t], canvasWidth, canvasHeight, dx, dy)
applyInverseTranslation(gopCg[t], canvasWidth, canvasHeight, dx, dy)
}
}
// Step 8: Crop and convert to RGB, write directly to videoBuffer // Step 8: Crop and convert to RGB, write directly to videoBuffer
for (t in 0 until gopSize) { for (t in 0 until gopSize) {
val videoBufferOffset = bufferOffset + (t * frameSize) // Each frame sequentially, starting at bufferOffset val videoBufferOffset = bufferOffset + (t * frameSize) // Each frame sequentially, starting at bufferOffset
for (row in 0 until height) { for (py in 0 until height) {
for (col in 0 until width) { for (px in 0 until width) {
// Source pixel in expanded canvas
val canvasX = col + marginLeft
val canvasY = row + marginTop
val canvasIdx = canvasY * canvasWidth + canvasX
// Destination pixel in videoBuffer // Destination pixel in videoBuffer
val outIdx = row * width + col val outIdx = py * width + px
val offset = videoBufferOffset + outIdx * 3L val offset = videoBufferOffset + outIdx * 3L
val yVal = gopY[t][canvasIdx] val yVal = gopY[t][outIdx]
val co = gopCo[t][canvasIdx] val co = gopCo[t][outIdx]
val cg = gopCg[t][canvasIdx] val cg = gopCg[t][outIdx]
// YCoCg-R to RGB conversion // YCoCg-R to RGB conversion
val tmp = yVal - (cg / 2.0f) val tmp = yVal - (cg / 2.0f)
@@ -7113,14 +6901,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
compressedDataPtr: Long, compressedDataPtr: Long,
compressedSize: Int, compressedSize: Int,
gopSize: Int, gopSize: Int,
motionVectorsX: IntArray,
motionVectorsY: IntArray,
width: Int, width: Int,
height: Int, height: Int,
canvasWidth: Int,
canvasHeight: Int,
marginLeft: Int,
marginTop: Int,
qIndex: Int, qIndex: Int,
qYGlobal: Int, qYGlobal: Int,
qCoGlobal: Int, qCoGlobal: Int,
@@ -7128,7 +6910,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
channelLayout: Int, channelLayout: Int,
spatialFilter: Int = 1, spatialFilter: Int = 1,
spatialLevels: Int = 6, spatialLevels: Int = 6,
temporalLevels: Int = 2, temporalLevels: Int = 3,
entropyCoder: Int = 0, entropyCoder: Int = 0,
bufferOffset: Long = 0 bufferOffset: Long = 0
) { ) {
@@ -7144,9 +6926,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
try { try {
val result = tavDecodeGopToVideoBuffer( val result = tavDecodeGopToVideoBuffer(
compressedDataPtr, compressedSize, gopSize, compressedDataPtr, compressedSize, gopSize,
motionVectorsX, motionVectorsY, width, height,
width, height, canvasWidth, canvasHeight,
marginLeft, marginTop,
qIndex, qYGlobal, qCoGlobal, qCgGlobal, qIndex, qYGlobal, qCoGlobal, qCgGlobal,
channelLayout, spatialFilter, spatialLevels, temporalLevels, channelLayout, spatialFilter, spatialLevels, temporalLevels,
entropyCoder, bufferOffset entropyCoder, bufferOffset

View File

@@ -107,7 +107,7 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi
internal val unusedArea = UnsafeHelper.allocate(1024, this) internal val unusedArea = UnsafeHelper.allocate(1024, this)
internal val scanlineOffsets = UnsafeHelper.allocate(1024, this) internal val scanlineOffsets = UnsafeHelper.allocate(1024, this)
internal val videoBuffer = UnsafeHelper.allocate(32 * 1024 * 1024, this) internal val videoBuffer = UnsafeHelper.allocate(48 * 1024 * 1024, this) // 48 MB for triple-buffering (3 slots × 21 frames × 752 kB)
protected val paletteShader = LoadShader(DRAW_SHADER_VERT, config.paletteShader) protected val paletteShader = LoadShader(DRAW_SHADER_VERT, config.paletteShader)
protected val textShader = LoadShader(DRAW_SHADER_VERT, config.fragShader) protected val textShader = LoadShader(DRAW_SHADER_VERT, config.fragShader)

View File

@@ -18,7 +18,7 @@
#include <float.h> #include <float.h>
#include <fftw3.h> #include <fftw3.h>
#define ENCODER_VENDOR_STRING "Encoder-TAV 20251019" #define ENCODER_VENDOR_STRING "Encoder-TAV 20251022 (3d-dwt,ezbc)"
// TSVM Advanced Video (TAV) format constants // TSVM Advanced Video (TAV) format constants
#define TAV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x56" // "\x1FTSVM TAV" #define TAV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x56" // "\x1FTSVM TAV"
@@ -48,7 +48,7 @@
#define TAV_PACKET_IFRAME 0x10 // Intra frame (keyframe) #define TAV_PACKET_IFRAME 0x10 // Intra frame (keyframe)
#define TAV_PACKET_PFRAME 0x11 // Predicted frame (legacy, unused) #define TAV_PACKET_PFRAME 0x11 // Predicted frame (legacy, unused)
#define TAV_PACKET_GOP_UNIFIED 0x12 // Unified 3D DWT GOP (all frames in single block, translation-based) #define TAV_PACKET_GOP_UNIFIED 0x12 // Unified 3D DWT GOP (all frames in single block, translation-based)
#define TAV_PACKET_GOP_UNIFIED_MESH 0x13 // Unified 3D DWT GOP with distortion mesh warping #define TAV_PACKET_GOP_UNIFIED_MOTION 0x13 // Unified 3D DWT GOP with motion-compensated lifting
#define TAV_PACKET_PFRAME_RESIDUAL 0x14 // P-frame with MPEG-style residual coding (block motion compensation) #define TAV_PACKET_PFRAME_RESIDUAL 0x14 // P-frame with MPEG-style residual coding (block motion compensation)
#define TAV_PACKET_BFRAME_RESIDUAL 0x15 // B-frame with MPEG-style residual coding (bidirectional prediction) #define TAV_PACKET_BFRAME_RESIDUAL 0x15 // B-frame with MPEG-style residual coding (bidirectional prediction)
#define TAV_PACKET_PFRAME_ADAPTIVE 0x16 // P-frame with adaptive quad-tree block partitioning #define TAV_PACKET_PFRAME_ADAPTIVE 0x16 // P-frame with adaptive quad-tree block partitioning
@@ -116,13 +116,15 @@ static int needs_alpha_channel(int channel_layout) {
#define DEFAULT_HEIGHT 448 #define DEFAULT_HEIGHT 448
#define DEFAULT_FPS 30 #define DEFAULT_FPS 30
#define DEFAULT_QUALITY 3 #define DEFAULT_QUALITY 3
#define DEFAULT_ZSTD_LEVEL 9 #define DEFAULT_ZSTD_LEVEL 3
#define TEMPORAL_GOP_SIZE 20//8 // ~42 frames fit into 32 MB video buffer #define TEMPORAL_GOP_SIZE 20
#define TEMPORAL_DECOMP_LEVEL 2 #define TEMPORAL_DECOMP_LEVEL 2
#define MOTION_THRESHOLD 24.0f // Flush if motion exceeds 24 pixels in any direction #define MOTION_THRESHOLD 24.0f // Flush if motion exceeds 24 pixels in any direction
// Audio/subtitle constants (reused from TEV) // Audio/subtitle constants (reused from TEV)
#define MP2_SAMPLE_RATE 32000
#define MP2_DEFAULT_PACKET_SIZE 1152 #define MP2_DEFAULT_PACKET_SIZE 1152
#define PACKET_AUDIO_TIME ((double)MP2_DEFAULT_PACKET_SIZE / MP2_SAMPLE_RATE)
#define MAX_SUBTITLE_LENGTH 2048 #define MAX_SUBTITLE_LENGTH 2048
int debugDumpMade = 0; int debugDumpMade = 0;
@@ -2175,6 +2177,7 @@ static int mp2_packet_size_to_rate_index(int packet_size, int is_mono);
static long write_extended_header(tav_encoder_t *enc); static long write_extended_header(tav_encoder_t *enc);
static void write_timecode_packet(FILE *output, int frame_num, int fps, int is_ntsc_framerate); static void write_timecode_packet(FILE *output, int frame_num, int fps, int is_ntsc_framerate);
static int process_audio(tav_encoder_t *enc, int frame_num, FILE *output); static int process_audio(tav_encoder_t *enc, int frame_num, FILE *output);
static int process_audio_for_gop(tav_encoder_t *enc, int *frame_numbers, int num_frames, FILE *output);
static subtitle_entry_t* parse_subtitle_file(const char *filename, int fps); static subtitle_entry_t* parse_subtitle_file(const char *filename, int fps);
static subtitle_entry_t* parse_srt_file(const char *filename, int fps); static subtitle_entry_t* parse_srt_file(const char *filename, int fps);
static subtitle_entry_t* parse_smi_file(const char *filename, int fps); static subtitle_entry_t* parse_smi_file(const char *filename, int fps);
@@ -2269,7 +2272,7 @@ static void show_usage(const char *program_name) {
printf(" --dump-frame N Dump quantised coefficients for frame N (creates .bin files)\n"); printf(" --dump-frame N Dump quantised coefficients for frame N (creates .bin files)\n");
printf(" --wavelet N Wavelet filter: 0=LGT 5/3, 1=CDF 9/7, 2=CDF 13/7, 16=DD-4, 255=Haar (default: 1)\n"); printf(" --wavelet N Wavelet filter: 0=LGT 5/3, 1=CDF 9/7, 2=CDF 13/7, 16=DD-4, 255=Haar (default: 1)\n");
printf(" --zstd-level N Zstd compression level 1-22 (default: %d, higher = better compression but slower)\n", DEFAULT_ZSTD_LEVEL); printf(" --zstd-level N Zstd compression level 1-22 (default: %d, higher = better compression but slower)\n", DEFAULT_ZSTD_LEVEL);
printf(" --no-grain-synthesis Disable grain synthesis (enabled by default)\n"); // printf(" --no-grain-synthesis Disable grain synthesis (enabled by default)\n");
printf(" --help Show this help\n\n"); printf(" --help Show this help\n\n");
printf("Audio Rate by Quality:\n "); printf("Audio Rate by Quality:\n ");
@@ -2328,7 +2331,7 @@ static tav_encoder_t* create_encoder(void) {
enc->intra_only = 0; enc->intra_only = 0;
enc->monoblock = 1; // Default to monoblock mode enc->monoblock = 1; // Default to monoblock mode
enc->perceptual_tuning = 1; // Default to perceptual quantisation (versions 5/6) enc->perceptual_tuning = 1; // Default to perceptual quantisation (versions 5/6)
enc->enable_ezbc = 0; // Default to twobit-map (EZBC adds overhead for small files) enc->enable_ezbc = 1; // Default to EZBC over twobit-map
enc->channel_layout = CHANNEL_LAYOUT_YCOCG; // Default to Y-Co-Cg enc->channel_layout = CHANNEL_LAYOUT_YCOCG; // Default to Y-Co-Cg
enc->audio_bitrate = 0; // 0 = use quality table enc->audio_bitrate = 0; // 0 = use quality table
enc->encode_limit = 0; // Default: no frame limit enc->encode_limit = 0; // Default: no frame limit
@@ -2339,7 +2342,7 @@ static tav_encoder_t* create_encoder(void) {
enc->delta_haar_levels = TEMPORAL_DECOMP_LEVEL; enc->delta_haar_levels = TEMPORAL_DECOMP_LEVEL;
// GOP / temporal DWT settings // GOP / temporal DWT settings
enc->enable_temporal_dwt = 0; // Default: disabled for backward compatibility. Mutually exclusive with use_delta_encoding enc->enable_temporal_dwt = 1; // Mutually exclusive with use_delta_encoding
enc->temporal_gop_capacity = TEMPORAL_GOP_SIZE; // 16 frames enc->temporal_gop_capacity = TEMPORAL_GOP_SIZE; // 16 frames
enc->temporal_gop_frame_count = 0; enc->temporal_gop_frame_count = 0;
enc->temporal_decomp_levels = TEMPORAL_DECOMP_LEVEL; // 2 levels of temporal DWT (16 -> 4x4 subbands) enc->temporal_decomp_levels = TEMPORAL_DECOMP_LEVEL; // 2 levels of temporal DWT (16 -> 4x4 subbands)
@@ -4826,16 +4829,6 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
memcpy(gop_cg_coeffs[i], enc->temporal_gop_cg_frames[i], num_pixels * sizeof(float)); memcpy(gop_cg_coeffs[i], enc->temporal_gop_cg_frames[i], num_pixels * sizeof(float));
} }
// Debug: Print original frame-to-frame motion vectors
if (enc->verbose && actual_gop_size >= 4) {
printf("Frame-to-frame motion vectors (before cumulative conversion):\n");
for (int i = 0; i < actual_gop_size; i++) {
printf(" Frame %d: 1/16px=(%d, %d) pixels=(%.3f, %.3f)\n",
i, enc->temporal_gop_translation_x[i], enc->temporal_gop_translation_y[i],
enc->temporal_gop_translation_x[i] / 16.0f, enc->temporal_gop_translation_y[i] / 16.0f);
}
}
// Step 0.5: Convert frame-to-frame motion vectors to cumulative (relative to frame 0) // Step 0.5: Convert frame-to-frame motion vectors to cumulative (relative to frame 0)
// Phase correlation computes motion of frame[i] relative to frame[i-1] // Phase correlation computes motion of frame[i] relative to frame[i-1]
// We need cumulative motion relative to frame 0 for proper alignment // We need cumulative motion relative to frame 0 for proper alignment
@@ -4844,16 +4837,6 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
enc->temporal_gop_translation_y[i] += enc->temporal_gop_translation_y[i-1]; enc->temporal_gop_translation_y[i] += enc->temporal_gop_translation_y[i-1];
} }
// Debug: Print cumulative motion vectors
if (enc->verbose && actual_gop_size >= 4) {
printf("Cumulative motion vectors (after conversion):\n");
for (int i = 0; i < actual_gop_size; i++) {
printf(" Frame %d: 1/16px=(%d, %d) pixels=(%.3f, %.3f)\n",
i, enc->temporal_gop_translation_x[i], enc->temporal_gop_translation_y[i],
enc->temporal_gop_translation_x[i] / 16.0f, enc->temporal_gop_translation_y[i] / 16.0f);
}
}
// Step 0.5b: Calculate the valid region after alignment (crop bounds) // Step 0.5b: Calculate the valid region after alignment (crop bounds)
// Find the bounding box that's valid across all aligned frames // Find the bounding box that's valid across all aligned frames
int min_dx = 0, max_dx = 0, min_dy = 0, max_dy = 0; int min_dx = 0, max_dx = 0, min_dy = 0, max_dy = 0;
@@ -5102,6 +5085,9 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
// Write timecode packet for first frame in GOP // Write timecode packet for first frame in GOP
write_timecode_packet(output, frame_numbers[0], enc->output_fps, enc->is_ntsc_framerate); write_timecode_packet(output, frame_numbers[0], enc->output_fps, enc->is_ntsc_framerate);
// Process audio for this GOP (all frames at once)
process_audio_for_gop(enc, frame_numbers, actual_gop_size, output);
// Single-frame GOP fallback: use traditional I-frame encoding with serialise_tile_data // Single-frame GOP fallback: use traditional I-frame encoding with serialise_tile_data
if (actual_gop_size == 1) { if (actual_gop_size == 1) {
// Write I-frame packet header (no motion vectors, no GOP overhead) // Write I-frame packet header (no motion vectors, no GOP overhead)
@@ -5171,10 +5157,11 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
printf("Frame %d (single-frame GOP as I-frame): %zu bytes\n", printf("Frame %d (single-frame GOP as I-frame): %zu bytes\n",
frame_numbers[0], compressed_size); frame_numbers[0], compressed_size);
} }
} else { }
else {
// Multi-frame GOP: use unified 3D DWT encoding // Multi-frame GOP: use unified 3D DWT encoding
// Choose packet type based on motion compensation method // Choose packet type based on motion compensation method
uint8_t packet_type = enc->temporal_enable_mcezbc ? TAV_PACKET_GOP_UNIFIED_MESH : TAV_PACKET_GOP_UNIFIED; uint8_t packet_type = enc->temporal_enable_mcezbc ? TAV_PACKET_GOP_UNIFIED_MOTION : TAV_PACKET_GOP_UNIFIED;
fwrite(&packet_type, 1, 1, output); fwrite(&packet_type, 1, 1, output);
total_bytes_written += 1; total_bytes_written += 1;
@@ -5263,26 +5250,6 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
free(mv_buffer); free(mv_buffer);
free(compressed_mv); free(compressed_mv);
} else {
// Packet 0x12: Translation-based alignment
// Write canvas expansion information (4 bytes)
uint8_t canvas_margins[4] = {
(uint8_t)crop_left, // Left margin
(uint8_t)crop_right, // Right margin
(uint8_t)crop_top, // Top margin
(uint8_t)crop_bottom // Bottom margin
};
fwrite(canvas_margins, 1, 4, output);
total_bytes_written += 4;
// Write all motion vectors (1/16-pixel precision) for the entire GOP
for (int t = 0; t < actual_gop_size; t++) {
int16_t dx = enc->temporal_gop_translation_x[t];
int16_t dy = enc->temporal_gop_translation_y[t];
fwrite(&dx, sizeof(int16_t), 1, output);
fwrite(&dy, sizeof(int16_t), 1, output);
total_bytes_written += 4;
}
} }
// Preprocess ALL frames with unified significance map // Preprocess ALL frames with unified significance map
@@ -8649,13 +8616,8 @@ static int process_audio(tav_encoder_t *enc, int frame_num, FILE *output) {
// Calculate how much audio time each frame represents (in seconds) // Calculate how much audio time each frame represents (in seconds)
double frame_audio_time = 1.0 / enc->output_fps; double frame_audio_time = 1.0 / enc->output_fps;
// Calculate how much audio time each MP2 packet represents
// MP2 frame contains 1152 samples at 32kHz = 0.036 seconds
#define MP2_SAMPLE_RATE 32000
double packet_audio_time = 1152.0 / MP2_SAMPLE_RATE;
// Estimate how many packets we consume per video frame // Estimate how many packets we consume per video frame
double packets_per_frame = frame_audio_time / packet_audio_time; double packets_per_frame = frame_audio_time / PACKET_AUDIO_TIME;
// Allocate MP2 buffer if needed // Allocate MP2 buffer if needed
if (!enc->mp2_buffer) { if (!enc->mp2_buffer) {
@@ -8683,24 +8645,20 @@ static int process_audio(tav_encoder_t *enc, int frame_num, FILE *output) {
// Calculate how many packets we need to maintain target buffer level // Calculate how many packets we need to maintain target buffer level
// Only insert when buffer drops below target, and only insert enough to restore target // Only insert when buffer drops below target, and only insert enough to restore target
double target_level = (double)enc->target_audio_buffer_size; double target_level = fmax(packets_per_frame, (double)enc->target_audio_buffer_size);
if (enc->audio_frames_in_buffer < target_level) { // if (enc->audio_frames_in_buffer < target_level) {
double deficit = target_level - enc->audio_frames_in_buffer; double deficit = target_level - enc->audio_frames_in_buffer;
// Insert packets to cover the deficit, but at least maintain minimum flow // Insert packets to cover the deficit, but at least maintain minimum flow
packets_to_insert = (int)ceil(deficit); packets_to_insert = (int)ceil(deficit);
// Cap at reasonable maximum to prevent excessive insertion
if (packets_to_insert > enc->target_audio_buffer_size) {
packets_to_insert = enc->target_audio_buffer_size;
}
if (enc->verbose) { if (enc->verbose) {
printf("Frame %d: Buffer low (%.2f->%.2f), deficit %.2f, inserting %d packets\n", printf("Frame %d: Buffer low (%.2f->%.2f), deficit %.2f, inserting %d packets\n",
frame_num, old_buffer, enc->audio_frames_in_buffer, deficit, packets_to_insert); frame_num, old_buffer, enc->audio_frames_in_buffer, deficit, packets_to_insert);
} }
} else if (enc->verbose && old_buffer != enc->audio_frames_in_buffer) { // } else if (enc->verbose && old_buffer != enc->audio_frames_in_buffer) {
printf("Frame %d: Buffer sufficient (%.2f->%.2f), no packets\n", // printf("Frame %d: Buffer sufficient (%.2f->%.2f), no packets\n",
frame_num, old_buffer, enc->audio_frames_in_buffer); // frame_num, old_buffer, enc->audio_frames_in_buffer);
} // }
} }
// Insert the calculated number of audio packets // Insert the calculated number of audio packets
@@ -8737,6 +8695,96 @@ static int process_audio(tav_encoder_t *enc, int frame_num, FILE *output) {
return 1; return 1;
} }
// Process audio for a GOP (multiple frames at once)
// Accumulates deficit for N frames and emits all necessary audio packets
static int process_audio_for_gop(tav_encoder_t *enc, int *frame_numbers, int num_frames, FILE *output) {
if (!enc->has_audio || !enc->mp2_file || enc->audio_remaining <= 0 || num_frames == 0) {
return 1;
}
// Handle first frame initialization (same as process_audio)
int first_frame_in_gop = frame_numbers[0];
if (first_frame_in_gop == 0) {
uint8_t header[4];
if (fread(header, 1, 4, enc->mp2_file) != 4) return 1;
fseek(enc->mp2_file, 0, SEEK_SET);
enc->mp2_packet_size = get_mp2_packet_size(header);
int is_mono = (header[3] >> 6) == 3;
enc->mp2_rate_index = mp2_packet_size_to_rate_index(enc->mp2_packet_size, is_mono);
enc->target_audio_buffer_size = 4; // 4 audio packets in buffer (does nothing for GOP)
enc->audio_frames_in_buffer = 0.0;
}
// Calculate audio packet consumption per video frame
double frame_audio_time = 1.0 / enc->output_fps;
double packets_per_frame = frame_audio_time / PACKET_AUDIO_TIME;
// Allocate MP2 buffer if needed
if (!enc->mp2_buffer) {
enc->mp2_buffer_size = enc->mp2_packet_size * 2;
enc->mp2_buffer = malloc(enc->mp2_buffer_size);
if (!enc->mp2_buffer) {
fprintf(stderr, "Failed to allocate audio buffer\n");
return 1;
}
}
// Calculate total deficit for all frames in the GOP
int total_packets_to_insert = 0;
// Simulate buffer consumption for all N frames in the GOP
double old_buffer = enc->audio_frames_in_buffer;
enc->audio_frames_in_buffer -= (packets_per_frame * num_frames);
// Calculate deficit to restore buffer to target level
// double target_level = fmax(packets_per_frame, (double)enc->target_audio_buffer_size);
// if (enc->audio_frames_in_buffer < target_level) {
double deficit = packets_per_frame * num_frames;
total_packets_to_insert = CLAMP((int)round(deficit), enc->target_audio_buffer_size, 9999);
if (enc->verbose) {
printf("GOP (%d frames, starting at %d): Buffer low (%.2f->%.2f), deficit %.2f, inserting %d packets\n",
num_frames, first_frame_in_gop, old_buffer, enc->audio_frames_in_buffer, deficit, total_packets_to_insert);
}
// } else if (enc->verbose) {
// printf("GOP (%d frames, starting at %d): Buffer sufficient (%.2f->%.2f), no packets\n",
// num_frames, first_frame_in_gop, old_buffer, enc->audio_frames_in_buffer);
// }
// Emit all audio packets for this GOP
for (int q = 0; q < total_packets_to_insert; q++) {
size_t bytes_to_read = enc->mp2_packet_size;
if (bytes_to_read > enc->audio_remaining) {
bytes_to_read = enc->audio_remaining;
}
size_t bytes_read = fread(enc->mp2_buffer, 1, bytes_to_read, enc->mp2_file);
if (bytes_read == 0) break;
// Write TAV MP2 audio packet
uint8_t audio_packet_type = TAV_PACKET_AUDIO_MP2;
uint32_t audio_len = (uint32_t)bytes_read;
fwrite(&audio_packet_type, 1, 1, output);
fwrite(&audio_len, 4, 1, output);
fwrite(enc->mp2_buffer, 1, bytes_read, output);
// Track audio bytes written
enc->audio_remaining -= bytes_read;
enc->audio_frames_in_buffer++;
if (first_frame_in_gop == 0) {
enc->audio_frames_in_buffer = enc->target_audio_buffer_size / 2;
}
if (enc->verbose) {
printf("Audio packet %d: %zu bytes (buffer: %.2f packets)\n",
q, bytes_read, enc->audio_frames_in_buffer);
}
}
return 1;
}
// Process subtitles for current frame (copied and adapted from TEV) // Process subtitles for current frame (copied and adapted from TEV)
static int process_subtitles(tav_encoder_t *enc, int frame_num, FILE *output) { static int process_subtitles(tav_encoder_t *enc, int frame_num, FILE *output) {
if (!enc->subtitles) { if (!enc->subtitles) {
@@ -9834,20 +9882,16 @@ int main(int argc, char *argv[]) {
adjust_quantiser_for_bitrate(enc); adjust_quantiser_for_bitrate(enc);
} }
// For GOP encoding, process audio/subtitles for all frames in the flushed GOP // For GOP encoding, audio/subtitles are handled in gop_flush() for all GOP frames
// For traditional encoding, process audio/subtitles for this single frame // For traditional encoding, process audio/subtitles for this single frame
if (enc->enable_temporal_dwt) { if (!enc->enable_temporal_dwt) {
// Note: In GOP mode, audio/subtitle sync is approximate since we flush multiple frames at once // Process audio for this frame
// This is acceptable since GOPs are short (16 frames max = ~0.5s at 30fps) process_audio(enc, true_frame_count, enc->output_fp);
// TODO: Consider buffering audio/subtitles for precise sync if needed
// Process subtitles for this frame
process_subtitles(enc, true_frame_count, enc->output_fp);
} }
// Process audio for this frame
process_audio(enc, true_frame_count, enc->output_fp);
// Process subtitles for this frame
process_subtitles(enc, true_frame_count, enc->output_fp);
// Write a sync packet only after a video is been coded // Write a sync packet only after a video is been coded
// For GOP encoding, GOP_SYNC packet already serves as sync - don't emit extra SYNC // For GOP encoding, GOP_SYNC packet already serves as sync - don't emit extra SYNC
// For B-frame mode, sync packets are already written in the encoding loop // For B-frame mode, sync packets are already written in the encoding loop
@@ -9857,7 +9901,8 @@ int main(int argc, char *argv[]) {
} }
// NTSC frame duplication: emit extra sync packet for every 1000n+500 frames // NTSC frame duplication: emit extra sync packet for every 1000n+500 frames
if (enc->is_ntsc_framerate && (frame_count % 1000 == 500)) { // Skip when temporal DWT is enabled (audio handled in GOP flush)
if (!enc->enable_temporal_dwt && enc->is_ntsc_framerate && (frame_count % 1000 == 500)) {
true_frame_count++; true_frame_count++;
// Process audio and subtitles for the duplicated frame to maintain sync // Process audio and subtitles for the duplicated frame to maintain sync
process_audio(enc, true_frame_count, enc->output_fp); process_audio(enc, true_frame_count, enc->output_fp);

View File

@@ -18,6 +18,11 @@
#define TAV_PACKET_IFRAME 0x10 #define TAV_PACKET_IFRAME 0x10
#define TAV_PACKET_PFRAME 0x11 #define TAV_PACKET_PFRAME 0x11
#define TAV_PACKET_GOP_UNIFIED 0x12 // Unified 3D DWT GOP (all frames in single block) #define TAV_PACKET_GOP_UNIFIED 0x12 // Unified 3D DWT GOP (all frames in single block)
#define TAV_PACKET_GOP_UNIFIED_MOTION 0x13
#define TAV_PACKET_PFRAME_RESIDUAL 0x14 // P-frame with MPEG-style residual coding (block motion compensation)
#define TAV_PACKET_BFRAME_RESIDUAL 0x15 // B-frame with MPEG-style residual coding (bidirectional prediction)
#define TAV_PACKET_PFRAME_ADAPTIVE 0x16 // P-frame with adaptive quad-tree block partitioning
#define TAV_PACKET_BFRAME_ADAPTIVE 0x17 // B-frame with adaptive quad-tree block partitioning (bidirectional prediction)
#define TAV_PACKET_AUDIO_MP2 0x20 #define TAV_PACKET_AUDIO_MP2 0x20
#define TAV_PACKET_SUBTITLE 0x30 #define TAV_PACKET_SUBTITLE 0x30
#define TAV_PACKET_SUBTITLE_KAR 0x31 #define TAV_PACKET_SUBTITLE_KAR 0x31
@@ -59,6 +64,7 @@ typedef struct {
int pframe_delta_count; int pframe_delta_count;
int pframe_skip_count; int pframe_skip_count;
int gop_unified_count; int gop_unified_count;
int gop_unified_motion_count;
int gop_sync_count; int gop_sync_count;
int total_gop_frames; int total_gop_frames;
int audio_count; int audio_count;
@@ -94,6 +100,11 @@ const char* get_packet_type_name(uint8_t type) {
case TAV_PACKET_IFRAME: return "I-FRAME"; case TAV_PACKET_IFRAME: return "I-FRAME";
case TAV_PACKET_PFRAME: return "P-FRAME"; case TAV_PACKET_PFRAME: return "P-FRAME";
case TAV_PACKET_GOP_UNIFIED: return "GOP (3D DWT Unified)"; case TAV_PACKET_GOP_UNIFIED: return "GOP (3D DWT Unified)";
case TAV_PACKET_GOP_UNIFIED_MOTION: return "GOP (3D DWT Unified with Motion Data)";
case TAV_PACKET_PFRAME_RESIDUAL: return "P-FRAME (residual)";
case TAV_PACKET_BFRAME_RESIDUAL: return "B-FRAME (residual)";
case TAV_PACKET_PFRAME_ADAPTIVE: return "P-FRAME (quadtree)";
case TAV_PACKET_BFRAME_ADAPTIVE: return "B-FRAME (quadtree)";
case TAV_PACKET_AUDIO_MP2: return "AUDIO MP2"; case TAV_PACKET_AUDIO_MP2: return "AUDIO MP2";
case TAV_PACKET_SUBTITLE: return "SUBTITLE (Simple)"; case TAV_PACKET_SUBTITLE: return "SUBTITLE (Simple)";
case TAV_PACKET_SUBTITLE_KAR: return "SUBTITLE (Karaoke)"; case TAV_PACKET_SUBTITLE_KAR: return "SUBTITLE (Karaoke)";
@@ -246,9 +257,10 @@ void print_extended_header(FILE *fp, int verbose) {
if (verbose) { if (verbose) {
if (strcmp(key, "CDAT") == 0) { if (strcmp(key, "CDAT") == 0) {
time_t time_sec = value / 1000000000ULL; time_t time_sec = value / 1000000000ULL;
char *time_str = ctime(&time_sec); struct tm *time_info = gmtime(&time_sec);
if (time_str) { if (time_info) {
time_str[strlen(time_str)-1] = '\0'; // Remove newline char time_str[64];
strftime(time_str, sizeof(time_str), "%a %b %d %H:%M:%S %Y UTC", time_info);
printf("%s", time_str); printf("%s", time_str);
} }
} else { } else {
@@ -484,48 +496,37 @@ int main(int argc, char *argv[]) {
break; break;
} }
case TAV_PACKET_GOP_UNIFIED: { case TAV_PACKET_GOP_UNIFIED: case TAV_PACKET_GOP_UNIFIED_MOTION: {
// Unified GOP packet: [gop_size][motion_vectors...][compressed_size][data] // Unified GOP packet: [gop_size][motion_vectors...][compressed_size][data]
uint8_t gop_size; uint8_t gop_size;
if (fread(&gop_size, 1, 1, fp) != 1) break; if (fread(&gop_size, 1, 1, fp) != 1) break;
// Read all motion vectors // Read motion vectors
int16_t *motion_x = malloc(gop_size * sizeof(int16_t)); uint32_t size0 = 0;
int16_t *motion_y = malloc(gop_size * sizeof(int16_t)); if (packet_type == TAV_PACKET_GOP_UNIFIED_MOTION) {
for (int i = 0; i < gop_size; i++) { if (fread(&size0, sizeof(uint32_t), 1, fp) != 1) { break; }
if (fread(&motion_x[i], sizeof(int16_t), 1, fp) != 1) break; stats.total_video_bytes += size0;
if (fread(&motion_y[i], sizeof(int16_t), 1, fp) != 1) break; stats.gop_unified_motion_count++;
fseek(fp, size0, SEEK_CUR);
} }
// Read compressed data size // Read compressed data size
uint32_t size; uint32_t size1;
if (fread(&size, sizeof(uint32_t), 1, fp) != 1) { if (fread(&size1, sizeof(uint32_t), 1, fp) != 1) { break; }
free(motion_x); stats.total_video_bytes += size1;
free(motion_y); fseek(fp, size1, SEEK_CUR);
break;
}
stats.total_video_bytes += size;
stats.gop_unified_count++;
stats.total_gop_frames += gop_size; stats.total_gop_frames += gop_size;
if (packet_type == TAV_PACKET_GOP_UNIFIED) {
stats.gop_unified_count++;
}
if (!opts.summary_only && display) { if (!opts.summary_only && display) {
printf(" - GOP size=%u, data size=%u bytes (%.2f bytes/frame)", printf(" - GOP size=%u, data size=%u bytes (%.2f bytes/frame)",
gop_size, size, (double)size / gop_size); gop_size, (size0 + size1), (double)(size0 + size1) / gop_size);
// Always show motion vectors for GOP packets with absolute frame numbers
if (gop_size > 0) {
printf("\n Motion vectors (1/16-pixel):");
for (int i = 0; i < gop_size; i++) {
printf("\n Frame %d (#%d): (%.3f, %.3f) px",
current_frame + i, i, motion_x[i] / 16.0, motion_y[i] / 16.0);
}
}
} }
free(motion_x);
free(motion_y);
fseek(fp, size, SEEK_CUR);
break; break;
} }
@@ -714,10 +715,10 @@ int main(int argc, char *argv[]) {
printf(")"); printf(")");
} }
printf("\n"); printf("\n");
if (stats.gop_unified_count > 0) { if (stats.gop_unified_count + stats.gop_unified_motion_count > 0) {
printf(" 3D GOP packets: %d (total frames: %d, avg %.1f frames/GOP)\n", printf(" 3D GOP packets: %d (total frames: %d, avg %.1f frames/GOP)\n",
stats.gop_unified_count, stats.total_gop_frames, (stats.gop_unified_count + stats.gop_unified_motion_count), stats.total_gop_frames,
(double)stats.total_gop_frames / stats.gop_unified_count); (double)stats.total_gop_frames / (stats.gop_unified_count + stats.gop_unified_motion_count));
printf(" GOP sync packets: %d\n", stats.gop_sync_count); printf(" GOP sync packets: %d\n", stats.gop_sync_count);
} }
printf(" Mux video: %d\n", stats.mux_video_count); printf(" Mux video: %d\n", stats.mux_video_count);