mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-03-07 19:51:51 +09:00
TAV: still bugfixing
This commit is contained in:
@@ -25,9 +25,11 @@ const TAV_MODE_MOTION = 0x03
|
|||||||
// Packet types (same as TEV)
|
// Packet types (same as TEV)
|
||||||
const TAV_PACKET_IFRAME = 0x10
|
const TAV_PACKET_IFRAME = 0x10
|
||||||
const TAV_PACKET_PFRAME = 0x11
|
const TAV_PACKET_PFRAME = 0x11
|
||||||
|
const TAV_PACKET_GOP_UNIFIED = 0x12 // Unified 3D DWT GOP (temporal + spatial)
|
||||||
const TAV_PACKET_AUDIO_MP2 = 0x20
|
const TAV_PACKET_AUDIO_MP2 = 0x20
|
||||||
const TAV_PACKET_SUBTITLE = 0x30
|
const TAV_PACKET_SUBTITLE = 0x30
|
||||||
const TAV_PACKET_EXTENDED_HDR = 0xEF
|
const TAV_PACKET_EXTENDED_HDR = 0xEF
|
||||||
|
const TAV_PACKET_GOP_SYNC = 0xFC // GOP sync (N frames decoded from GOP block)
|
||||||
const TAV_PACKET_TIMECODE = 0xFD
|
const TAV_PACKET_TIMECODE = 0xFD
|
||||||
const TAV_PACKET_SYNC_NTSC = 0xFE
|
const TAV_PACKET_SYNC_NTSC = 0xFE
|
||||||
const TAV_PACKET_SYNC = 0xFF
|
const TAV_PACKET_SYNC = 0xFF
|
||||||
@@ -989,6 +991,159 @@ try {
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
else if (packetType === TAV_PACKET_GOP_UNIFIED) {
|
||||||
|
// GOP Unified packet (temporal 3D DWT)
|
||||||
|
|
||||||
|
// Read GOP size (number of frames in this GOP, 1-16)
|
||||||
|
const gopSize = seqread.readOneByte()
|
||||||
|
|
||||||
|
// Read motion vectors (quarter-pixel units, int16)
|
||||||
|
// Encoder writes ALL motion vectors including frame 0
|
||||||
|
let motionX = new Array(gopSize)
|
||||||
|
let motionY = new Array(gopSize)
|
||||||
|
|
||||||
|
for (let i = 0; i < gopSize; i++) {
|
||||||
|
motionX[i] = seqread.readShort() // Signed int16
|
||||||
|
motionY[i] = seqread.readShort()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read compressed data size
|
||||||
|
const compressedSize = seqread.readInt()
|
||||||
|
|
||||||
|
// Read compressed data
|
||||||
|
let compressedPtr = seqread.readBytes(compressedSize)
|
||||||
|
updateDataRateBin(compressedSize)
|
||||||
|
|
||||||
|
// Check if GOP fits in VM memory
|
||||||
|
const gopMemoryNeeded = gopSize * FRAME_SIZE
|
||||||
|
if (gopMemoryNeeded > 8 * 1024 * 1024) {
|
||||||
|
throw new Error(`GOP too large: ${gopSize} frames needs ${(gopMemoryNeeded / 1024 / 1024).toFixed(2)}MB, but VM has only 8MB. Max GOP size: 11 frames.`)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Allocate GOP buffers outside try block so finally can free them
|
||||||
|
let gopRGBBuffers = new Array(gopSize)
|
||||||
|
for (let i = 0; i < gopSize; i++) {
|
||||||
|
gopRGBBuffers[i] = sys.malloc(FRAME_SIZE)
|
||||||
|
if (gopRGBBuffers[i] === 0) {
|
||||||
|
// Malloc failed - free what we allocated and bail out
|
||||||
|
for (let j = 0; j < i; j++) {
|
||||||
|
sys.free(gopRGBBuffers[j])
|
||||||
|
}
|
||||||
|
throw new Error(`Failed to allocate GOP buffer ${i}/${gopSize}. Out of memory.`)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
let decodeStart = sys.nanoTime()
|
||||||
|
|
||||||
|
// Call GOP decoder
|
||||||
|
const framesDecoded = graphics.tavDecodeGopUnified(
|
||||||
|
compressedPtr,
|
||||||
|
compressedSize,
|
||||||
|
gopSize,
|
||||||
|
motionX,
|
||||||
|
motionY,
|
||||||
|
gopRGBBuffers, // Array of output buffer addresses
|
||||||
|
header.width,
|
||||||
|
header.height,
|
||||||
|
header.qualityLevel,
|
||||||
|
QLUT[header.qualityY],
|
||||||
|
QLUT[header.qualityCo],
|
||||||
|
QLUT[header.qualityCg],
|
||||||
|
header.channelLayout,
|
||||||
|
header.waveletFilter,
|
||||||
|
header.decompLevels,
|
||||||
|
2 // temporalLevels (hardcoded for now, could be in header)
|
||||||
|
)
|
||||||
|
|
||||||
|
decodeTime = (sys.nanoTime() - decodeStart) / 1000000.0
|
||||||
|
decompressTime = 0 // Included in decode time
|
||||||
|
|
||||||
|
// Display each decoded frame
|
||||||
|
for (let i = 0; i < framesDecoded; i++) {
|
||||||
|
let uploadStart = sys.nanoTime()
|
||||||
|
|
||||||
|
// Upload GOP frame directly (no copy needed - already in ARGB format)
|
||||||
|
graphics.uploadRGBToFramebuffer(gopRGBBuffers[i], header.width, header.height, trueFrameCount + i, false)
|
||||||
|
uploadTime = (sys.nanoTime() - uploadStart) / 1000000.0
|
||||||
|
|
||||||
|
// Apply bias lighting (only for first/last frame to save CPU)
|
||||||
|
let biasStart = sys.nanoTime()
|
||||||
|
if (i === 0 || i === framesDecoded - 1) {
|
||||||
|
setBiasLighting()
|
||||||
|
}
|
||||||
|
biasTime = (sys.nanoTime() - biasStart) / 1000000.0
|
||||||
|
|
||||||
|
// Fire audio on first frame
|
||||||
|
if (!audioFired && (frameCount > 0 || i > 0)) {
|
||||||
|
audio.play(0)
|
||||||
|
audioFired = true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for frame timing
|
||||||
|
akku -= FRAME_TIME
|
||||||
|
while (akku < 0 && !stopPlay && !paused) {
|
||||||
|
let t = sys.nanoTime()
|
||||||
|
// Busy wait for accurate timing
|
||||||
|
akku += (sys.nanoTime() - t) / 1000000000.0
|
||||||
|
}
|
||||||
|
|
||||||
|
// Swap ping-pong buffers for P-frame reference
|
||||||
|
let temp = CURRENT_RGB_ADDR
|
||||||
|
CURRENT_RGB_ADDR = PREV_RGB_ADDR
|
||||||
|
PREV_RGB_ADDR = temp
|
||||||
|
|
||||||
|
// Log performance for first frame of GOP
|
||||||
|
if (i === 0 && (frameCount % 60 == 0 || frameCount == 0)) {
|
||||||
|
let totalTime = decompressTime + decodeTime + uploadTime + biasTime
|
||||||
|
console.log(`GOP Frame ${frameCount}: Decode=${decodeTime.toFixed(1)}ms, Upload=${uploadTime.toFixed(1)}ms, Bias=${biasTime.toFixed(1)}ms, Total=${totalTime.toFixed(1)}ms (${gopSize} frames)`)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Note: frameCount and trueFrameCount will be incremented by GOP_SYNC packet
|
||||||
|
// Note: GOP buffers will be freed in finally block
|
||||||
|
|
||||||
|
} catch (e) {
|
||||||
|
console.log(`GOP Frame ${frameCount}: decode failed: ${e}`)
|
||||||
|
// Try to get more details from the exception
|
||||||
|
if (e.stack) {
|
||||||
|
console.log(`Stack trace: ${e.stack}`)
|
||||||
|
}
|
||||||
|
if (e.javaException) {
|
||||||
|
console.log(`Java exception: ${e.javaException}`)
|
||||||
|
if (e.javaException.printStackTrace) {
|
||||||
|
serial.println("Java stack trace:")
|
||||||
|
e.javaException.printStackTrace()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Print exception properties
|
||||||
|
try {
|
||||||
|
const props = Object.keys(e)
|
||||||
|
if (props.length > 0) {
|
||||||
|
console.log(`Exception properties: ${props.join(', ')}`)
|
||||||
|
e.printStackTrace()
|
||||||
|
let ee = e.getStackTrace()
|
||||||
|
console.log(ee.length)
|
||||||
|
console.log(ee.join('\n'))
|
||||||
|
}
|
||||||
|
} catch (ex) {}
|
||||||
|
} finally {
|
||||||
|
// Always free GOP buffers even on error
|
||||||
|
for (let i = 0; i < gopSize; i++) {
|
||||||
|
sys.free(gopRGBBuffers[i])
|
||||||
|
}
|
||||||
|
sys.free(compressedPtr)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (packetType === TAV_PACKET_GOP_SYNC) {
|
||||||
|
// GOP sync packet - increment frame counters by number of frames decoded
|
||||||
|
const framesInGOP = seqread.readOneByte()
|
||||||
|
|
||||||
|
frameCount += framesInGOP
|
||||||
|
trueFrameCount += framesInGOP
|
||||||
|
|
||||||
|
// Note: Buffer swapping already handled in GOP_UNIFIED handler
|
||||||
|
}
|
||||||
else if (packetType === TAV_PACKET_AUDIO_MP2) {
|
else if (packetType === TAV_PACKET_AUDIO_MP2) {
|
||||||
// MP2 Audio packet
|
// MP2 Audio packet
|
||||||
let audioLen = seqread.readInt()
|
let audioLen = seqread.readInt()
|
||||||
|
|||||||
Binary file not shown.
Binary file not shown.
@@ -4203,6 +4203,184 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reconstruct per-frame coefficients from unified GOP block (2-bit format)
|
||||||
|
* Reverse of encoder's preprocess_gop_unified()
|
||||||
|
*
|
||||||
|
* Layout: [Y_maps_all][Co_maps_all][Cg_maps_all][Y_other_vals][Co_other_vals][Cg_other_vals]
|
||||||
|
*
|
||||||
|
* 2-bit encoding: 00=0, 01=+1, 10=-1, 11=other (stored in value array)
|
||||||
|
*
|
||||||
|
* @param decompressedData Unified block data (after Zstd decompression)
|
||||||
|
* @param numFrames Number of frames in GOP
|
||||||
|
* @param numPixels Pixels per frame (width × height)
|
||||||
|
* @param channelLayout Channel layout (0=YCoCg, 2=Y-only, etc)
|
||||||
|
* @return Array of [frame][channel] where channel: 0=Y, 1=Co, 2=Cg
|
||||||
|
*/
|
||||||
|
private fun tavPostprocessGopUnified(
|
||||||
|
decompressedData: ByteArray,
|
||||||
|
numFrames: Int,
|
||||||
|
numPixels: Int,
|
||||||
|
channelLayout: Int
|
||||||
|
): Array<Array<ShortArray>> {
|
||||||
|
// 2 bits per coefficient
|
||||||
|
val mapBytesPerFrame = (numPixels * 2 + 7) / 8
|
||||||
|
|
||||||
|
// Determine which channels are present
|
||||||
|
// Bit 0: has alpha, Bit 1: has chroma (inverted), Bit 2: has luma (inverted)
|
||||||
|
val hasY = (channelLayout and 0x04) == 0
|
||||||
|
val hasCo = (channelLayout and 0x02) == 0 // Inverted: 0 = has chroma
|
||||||
|
val hasCg = (channelLayout and 0x02) == 0 // Inverted: 0 = has chroma
|
||||||
|
|
||||||
|
// Calculate buffer positions for maps
|
||||||
|
var readPtr = 0
|
||||||
|
val yMapsStart = if (hasY) readPtr else -1
|
||||||
|
if (hasY) readPtr += mapBytesPerFrame * numFrames
|
||||||
|
|
||||||
|
val coMapsStart = if (hasCo) readPtr else -1
|
||||||
|
if (hasCo) readPtr += mapBytesPerFrame * numFrames
|
||||||
|
|
||||||
|
val cgMapsStart = if (hasCg) readPtr else -1
|
||||||
|
if (hasCg) readPtr += mapBytesPerFrame * numFrames
|
||||||
|
|
||||||
|
// Count "other" values (code 11) across ALL frames
|
||||||
|
var yOtherCount = 0
|
||||||
|
var coOtherCount = 0
|
||||||
|
var cgOtherCount = 0
|
||||||
|
|
||||||
|
for (frame in 0 until numFrames) {
|
||||||
|
val frameMapOffset = frame * mapBytesPerFrame
|
||||||
|
for (i in 0 until numPixels) {
|
||||||
|
val bitPos = i * 2
|
||||||
|
val byteIdx = bitPos / 8
|
||||||
|
val bitOffset = bitPos % 8
|
||||||
|
|
||||||
|
if (hasY && yMapsStart + frameMapOffset + byteIdx < decompressedData.size) {
|
||||||
|
var code = (decompressedData[yMapsStart + frameMapOffset + byteIdx].toInt() shr bitOffset) and 0x03
|
||||||
|
if (bitOffset == 7 && byteIdx + 1 < mapBytesPerFrame) {
|
||||||
|
val nextByte = decompressedData[yMapsStart + frameMapOffset + byteIdx + 1].toInt() and 0xFF
|
||||||
|
code = (code and 0x01) or ((nextByte and 0x01) shl 1)
|
||||||
|
}
|
||||||
|
if (code == 3) yOtherCount++
|
||||||
|
}
|
||||||
|
if (hasCo && coMapsStart + frameMapOffset + byteIdx < decompressedData.size) {
|
||||||
|
var code = (decompressedData[coMapsStart + frameMapOffset + byteIdx].toInt() shr bitOffset) and 0x03
|
||||||
|
if (bitOffset == 7 && byteIdx + 1 < mapBytesPerFrame) {
|
||||||
|
val nextByte = decompressedData[coMapsStart + frameMapOffset + byteIdx + 1].toInt() and 0xFF
|
||||||
|
code = (code and 0x01) or ((nextByte and 0x01) shl 1)
|
||||||
|
}
|
||||||
|
if (code == 3) coOtherCount++
|
||||||
|
}
|
||||||
|
if (hasCg && cgMapsStart + frameMapOffset + byteIdx < decompressedData.size) {
|
||||||
|
var code = (decompressedData[cgMapsStart + frameMapOffset + byteIdx].toInt() shr bitOffset) and 0x03
|
||||||
|
if (bitOffset == 7 && byteIdx + 1 < mapBytesPerFrame) {
|
||||||
|
val nextByte = decompressedData[cgMapsStart + frameMapOffset + byteIdx + 1].toInt() and 0xFF
|
||||||
|
code = (code and 0x01) or ((nextByte and 0x01) shl 1)
|
||||||
|
}
|
||||||
|
if (code == 3) cgOtherCount++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Value arrays start after all maps
|
||||||
|
val yValuesStart = readPtr
|
||||||
|
readPtr += yOtherCount * 2
|
||||||
|
|
||||||
|
val coValuesStart = readPtr
|
||||||
|
readPtr += coOtherCount * 2
|
||||||
|
|
||||||
|
val cgValuesStart = readPtr
|
||||||
|
|
||||||
|
// Allocate output arrays
|
||||||
|
val output = Array(numFrames) { Array(3) { ShortArray(numPixels) } }
|
||||||
|
var yValueIdx = 0
|
||||||
|
var coValueIdx = 0
|
||||||
|
var cgValueIdx = 0
|
||||||
|
|
||||||
|
for (frame in 0 until numFrames) {
|
||||||
|
val frameMapOffset = frame * mapBytesPerFrame
|
||||||
|
for (i in 0 until numPixels) {
|
||||||
|
val bitPos = i * 2
|
||||||
|
val byteIdx = bitPos / 8
|
||||||
|
val bitOffset = bitPos % 8
|
||||||
|
|
||||||
|
// Decode Y
|
||||||
|
if (hasY && yMapsStart + frameMapOffset + byteIdx < decompressedData.size) {
|
||||||
|
var code = (decompressedData[yMapsStart + frameMapOffset + byteIdx].toInt() shr bitOffset) and 0x03
|
||||||
|
if (bitOffset == 7 && byteIdx + 1 < mapBytesPerFrame) {
|
||||||
|
val nextByte = decompressedData[yMapsStart + frameMapOffset + byteIdx + 1].toInt() and 0xFF
|
||||||
|
code = (code and 0x01) or ((nextByte and 0x01) shl 1)
|
||||||
|
}
|
||||||
|
output[frame][0][i] = when (code) {
|
||||||
|
0 -> 0
|
||||||
|
1 -> 1
|
||||||
|
2 -> -1
|
||||||
|
3 -> {
|
||||||
|
val valOffset = yValuesStart + yValueIdx * 2
|
||||||
|
yValueIdx++
|
||||||
|
if (valOffset + 1 < decompressedData.size) {
|
||||||
|
val lo = decompressedData[valOffset].toInt() and 0xFF
|
||||||
|
val hi = decompressedData[valOffset + 1].toInt()
|
||||||
|
((hi shl 8) or lo).toShort()
|
||||||
|
} else 0
|
||||||
|
}
|
||||||
|
else -> 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Decode Co
|
||||||
|
if (hasCo && coMapsStart + frameMapOffset + byteIdx < decompressedData.size) {
|
||||||
|
var code = (decompressedData[coMapsStart + frameMapOffset + byteIdx].toInt() shr bitOffset) and 0x03
|
||||||
|
if (bitOffset == 7 && byteIdx + 1 < mapBytesPerFrame) {
|
||||||
|
val nextByte = decompressedData[coMapsStart + frameMapOffset + byteIdx + 1].toInt() and 0xFF
|
||||||
|
code = (code and 0x01) or ((nextByte and 0x01) shl 1)
|
||||||
|
}
|
||||||
|
output[frame][1][i] = when (code) {
|
||||||
|
0 -> 0
|
||||||
|
1 -> 1
|
||||||
|
2 -> -1
|
||||||
|
3 -> {
|
||||||
|
val valOffset = coValuesStart + coValueIdx * 2
|
||||||
|
coValueIdx++
|
||||||
|
if (valOffset + 1 < decompressedData.size) {
|
||||||
|
val lo = decompressedData[valOffset].toInt() and 0xFF
|
||||||
|
val hi = decompressedData[valOffset + 1].toInt()
|
||||||
|
((hi shl 8) or lo).toShort()
|
||||||
|
} else 0
|
||||||
|
}
|
||||||
|
else -> 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Decode Cg
|
||||||
|
if (hasCg && cgMapsStart + frameMapOffset + byteIdx < decompressedData.size) {
|
||||||
|
var code = (decompressedData[cgMapsStart + frameMapOffset + byteIdx].toInt() shr bitOffset) and 0x03
|
||||||
|
if (bitOffset == 7 && byteIdx + 1 < mapBytesPerFrame) {
|
||||||
|
val nextByte = decompressedData[cgMapsStart + frameMapOffset + byteIdx + 1].toInt() and 0xFF
|
||||||
|
code = (code and 0x01) or ((nextByte and 0x01) shl 1)
|
||||||
|
}
|
||||||
|
output[frame][2][i] = when (code) {
|
||||||
|
0 -> 0
|
||||||
|
1 -> 1
|
||||||
|
2 -> -1
|
||||||
|
3 -> {
|
||||||
|
val valOffset = cgValuesStart + cgValueIdx * 2
|
||||||
|
cgValueIdx++
|
||||||
|
if (valOffset + 1 < decompressedData.size) {
|
||||||
|
val lo = decompressedData[valOffset].toInt() and 0xFF
|
||||||
|
val hi = decompressedData[valOffset + 1].toInt()
|
||||||
|
((hi shl 8) or lo).toShort()
|
||||||
|
} else 0
|
||||||
|
}
|
||||||
|
else -> 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return output
|
||||||
|
}
|
||||||
|
|
||||||
// TAV Simulated overlapping tiles constants (must match encoder)
|
// TAV Simulated overlapping tiles constants (must match encoder)
|
||||||
private val TAV_TILE_SIZE_X = 640
|
private val TAV_TILE_SIZE_X = 640
|
||||||
private val TAV_TILE_SIZE_Y = 540
|
private val TAV_TILE_SIZE_Y = 540
|
||||||
@@ -4348,6 +4526,53 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
else 6
|
else 6
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GOP temporal quantization helpers
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Determines the temporal subband level for a given frame in a GOP.
|
||||||
|
* Returns 0 for tLL (temporal low-pass), 1+ for temporal high-pass levels.
|
||||||
|
*
|
||||||
|
* For 2-level Haar decomposition on 16 frames:
|
||||||
|
* - Frames 0-3: Level 0 (tLL - lowest frequency)
|
||||||
|
* - Frames 4-7: Level 1 (tH - mid frequency)
|
||||||
|
* - Frames 8-15: Level 2 (tHH - highest frequency)
|
||||||
|
*/
|
||||||
|
private fun getTemporalSubbandLevel(frameIdx: Int, numFrames: Int, temporalLevels: Int): Int {
|
||||||
|
if (temporalLevels == 0) return 0
|
||||||
|
|
||||||
|
val framesPerSubband = numFrames shr temporalLevels // numFrames / 2^temporalLevels
|
||||||
|
|
||||||
|
// Determine which temporal subband this frame belongs to
|
||||||
|
val subbandIdx = frameIdx / framesPerSubband
|
||||||
|
|
||||||
|
// Map subband index to level (0 = tLL, 1+ = temporal high-pass levels)
|
||||||
|
return if (subbandIdx == 0) 0 else {
|
||||||
|
// Find highest bit position in subbandIdx to determine level
|
||||||
|
var level = 0
|
||||||
|
var idx = subbandIdx
|
||||||
|
while (idx > 1) {
|
||||||
|
idx = idx shr 1
|
||||||
|
level++
|
||||||
|
}
|
||||||
|
level + 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculates temporal quantizer scale for a given temporal subband level.
|
||||||
|
* Uses exponential scaling: TEMPORAL_BASE_SCALE × 2^(BETA × level)
|
||||||
|
*
|
||||||
|
* With BETA=0.8, TEMPORAL_BASE_SCALE=1.0:
|
||||||
|
* - Level 0 (tLL): 1.0 × 2^0.0 = 1.00
|
||||||
|
* - Level 1 (tH): 1.0 × 2^0.8 = 1.74
|
||||||
|
* - Level 2 (tHH): 1.0 × 2^1.6 = 3.03
|
||||||
|
*/
|
||||||
|
private fun getTemporalQuantizerScale(temporalLevel: Int): Float {
|
||||||
|
val BETA = 0.8f
|
||||||
|
val TEMPORAL_BASE_SCALE = 1.0f
|
||||||
|
return TEMPORAL_BASE_SCALE * Math.pow(2.0, (BETA * temporalLevel).toDouble()).toFloat()
|
||||||
|
}
|
||||||
|
|
||||||
// level is one-based index
|
// level is one-based index
|
||||||
private fun getPerceptualWeight(qIndex: Int, qYGlobal: Int, level0: Int, subbandType: Int, isChroma: Boolean, maxLevels: Int): Float {
|
private fun getPerceptualWeight(qIndex: Int, qYGlobal: Int, level0: Int, subbandType: Int, isChroma: Boolean, maxLevels: Int): Float {
|
||||||
// Psychovisual model based on DWT coefficient statistics and Human Visual System sensitivity
|
// Psychovisual model based on DWT coefficient statistics and Human Visual System sensitivity
|
||||||
@@ -4644,7 +4869,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
// Coefficient delta encoding for efficient P-frames
|
// Coefficient delta encoding for efficient P-frames
|
||||||
readPtr = tavDecodeDeltaTileRGB(readPtr, channelLayout, tileX, tileY, currentRGBAddr,
|
readPtr = tavDecodeDeltaTileRGB(readPtr, channelLayout, tileX, tileY, currentRGBAddr,
|
||||||
width, height, qY, qCo, qCg,
|
width, height, qY, qCo, qCg,
|
||||||
waveletFilter, decompLevels, isLossless, tavVersion, isMonoblock, frameCount)
|
decompLevels, tavVersion, isMonoblock, frameCount)
|
||||||
dbgOut["frameMode"] = " "
|
dbgOut["frameMode"] = " "
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -5262,7 +5487,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
|
|
||||||
private fun tavDecodeDeltaTileRGB(readPtr: Long, channelLayout: Int, tileX: Int, tileY: Int, currentRGBAddr: Long,
|
private fun tavDecodeDeltaTileRGB(readPtr: Long, channelLayout: Int, tileX: Int, tileY: Int, currentRGBAddr: Long,
|
||||||
width: Int, height: Int, qY: Int, qCo: Int, qCg: Int,
|
width: Int, height: Int, qY: Int, qCo: Int, qCg: Int,
|
||||||
waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int, isMonoblock: Boolean = false, frameCount: Int = 0): Long {
|
decompLevels: Int, tavVersion: Int, isMonoblock: Boolean = false, frameCount: Int = 0): Long {
|
||||||
|
|
||||||
val tileIdx = if (isMonoblock) {
|
val tileIdx = if (isMonoblock) {
|
||||||
0 // Single tile index for monoblock
|
0 // Single tile index for monoblock
|
||||||
@@ -5403,15 +5628,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
tavPreviousCoeffsCg!![tileIdx] = currentCg.clone()
|
tavPreviousCoeffsCg!![tileIdx] = currentCg.clone()
|
||||||
|
|
||||||
// Apply inverse DWT
|
// Apply inverse DWT
|
||||||
if (isLossless) {
|
tavApplyDWTInverseMultiLevel(currentY, tileWidth, tileHeight, decompLevels, 255, TavSharpenLuma)
|
||||||
tavApplyDWTInverseMultiLevel(currentY, tileWidth, tileHeight, decompLevels, 0, TavSharpenLuma)
|
tavApplyDWTInverseMultiLevel(currentCo, tileWidth, tileHeight, decompLevels, 255, TavNullFilter)
|
||||||
tavApplyDWTInverseMultiLevel(currentCo, tileWidth, tileHeight, decompLevels, 0, TavNullFilter)
|
tavApplyDWTInverseMultiLevel(currentCg, tileWidth, tileHeight, decompLevels, 255, TavNullFilter)
|
||||||
tavApplyDWTInverseMultiLevel(currentCg, tileWidth, tileHeight, decompLevels, 0, TavNullFilter)
|
|
||||||
} else {
|
|
||||||
tavApplyDWTInverseMultiLevel(currentY, tileWidth, tileHeight, decompLevels, waveletFilter, TavSharpenLuma)
|
|
||||||
tavApplyDWTInverseMultiLevel(currentCo, tileWidth, tileHeight, decompLevels, waveletFilter, TavNullFilter)
|
|
||||||
tavApplyDWTInverseMultiLevel(currentCg, tileWidth, tileHeight, decompLevels, waveletFilter, TavNullFilter)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Debug: Check coefficient values after inverse DWT
|
// Debug: Check coefficient values after inverse DWT
|
||||||
if (tavDebugCurrentFrameNumber == tavDebugFrameTarget) {
|
if (tavDebugCurrentFrameNumber == tavDebugFrameTarget) {
|
||||||
@@ -5882,6 +6101,194 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Apply inverse translation (motion compensation) to a frame.
|
||||||
|
* Inverse operation: shifts by +dx, +dy (opposite of forward encoder).
|
||||||
|
*
|
||||||
|
* @param frameData Input frame data to shift
|
||||||
|
* @param width Frame width
|
||||||
|
* @param height Frame height
|
||||||
|
* @param dx Translation in X direction (pixels)
|
||||||
|
* @param dy Translation in Y direction (pixels)
|
||||||
|
*/
|
||||||
|
private fun applyInverseTranslation(frameData: FloatArray, width: Int, height: Int, dx: Int, dy: Int) {
|
||||||
|
val output = FloatArray(width * height)
|
||||||
|
|
||||||
|
// Apply inverse translation with boundary clamping
|
||||||
|
for (y in 0 until height) {
|
||||||
|
for (x in 0 until width) {
|
||||||
|
// Inverse: shift by +dx, +dy (opposite of encoder's -dx, -dy)
|
||||||
|
var srcX = x + dx
|
||||||
|
var srcY = y + dy
|
||||||
|
|
||||||
|
// Clamp to frame boundaries
|
||||||
|
srcX = srcX.coerceIn(0, width - 1)
|
||||||
|
srcY = srcY.coerceIn(0, height - 1)
|
||||||
|
|
||||||
|
output[y * width + x] = frameData[srcY * width + srcX]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy back to original array
|
||||||
|
System.arraycopy(output, 0, frameData, 0, frameData.size)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Main GOP unified decoder function.
|
||||||
|
* Decodes a unified 3D DWT GOP block (temporal + spatial) and outputs RGB frames.
|
||||||
|
*
|
||||||
|
* @param compressedDataPtr Pointer to compressed Zstd data
|
||||||
|
* @param compressedSize Size of compressed data
|
||||||
|
* @param gopSize Number of frames in GOP (1-16)
|
||||||
|
* @param motionVectorsX X motion vectors in quarter-pixel units
|
||||||
|
* @param motionVectorsY Y motion vectors in quarter-pixel units
|
||||||
|
* @param outputRGBAddrs Array of output RGB buffer addresses
|
||||||
|
* @param width Frame width
|
||||||
|
* @param height Frame height
|
||||||
|
* @param qIndex Quality index
|
||||||
|
* @param qYGlobal Global Y quantizer
|
||||||
|
* @param qCoGlobal Global Co quantizer
|
||||||
|
* @param qCgGlobal Global Cg quantizer
|
||||||
|
* @param channelLayout Channel layout flags
|
||||||
|
* @param spatialFilter Wavelet filter type
|
||||||
|
* @param spatialLevels Number of spatial DWT levels (default 6)
|
||||||
|
* @param temporalLevels Number of temporal DWT levels (default 2)
|
||||||
|
* @return Number of frames decoded
|
||||||
|
*/
|
||||||
|
fun tavDecodeGopUnified(
|
||||||
|
compressedDataPtr: Long,
|
||||||
|
compressedSize: Int,
|
||||||
|
gopSize: Int,
|
||||||
|
motionVectorsX: IntArray,
|
||||||
|
motionVectorsY: IntArray,
|
||||||
|
outputRGBAddrs: LongArray,
|
||||||
|
width: Int,
|
||||||
|
height: Int,
|
||||||
|
qIndex: Int,
|
||||||
|
qYGlobal: Int,
|
||||||
|
qCoGlobal: Int,
|
||||||
|
qCgGlobal: Int,
|
||||||
|
channelLayout: Int,
|
||||||
|
spatialFilter: Int = 1,
|
||||||
|
spatialLevels: Int = 6,
|
||||||
|
temporalLevels: Int = 2
|
||||||
|
): Int {
|
||||||
|
val numPixels = width * height
|
||||||
|
|
||||||
|
// Step 1: Decompress unified GOP block
|
||||||
|
val compressedData = ByteArray(compressedSize)
|
||||||
|
UnsafeHelper.memcpyRaw(
|
||||||
|
null,
|
||||||
|
vm.usermem.ptr + compressedDataPtr,
|
||||||
|
compressedData,
|
||||||
|
UnsafeHelper.getArrayOffset(compressedData),
|
||||||
|
compressedSize.toLong()
|
||||||
|
)
|
||||||
|
|
||||||
|
val decompressedData = try {
|
||||||
|
ZstdInputStream(java.io.ByteArrayInputStream(compressedData)).use { zstd ->
|
||||||
|
zstd.readBytes()
|
||||||
|
}
|
||||||
|
} catch (e: Exception) {
|
||||||
|
println("ERROR: Zstd decompression failed: ${e.message}")
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 2: Postprocess unified block to per-frame coefficients
|
||||||
|
val quantizedCoeffs = tavPostprocessGopUnified(
|
||||||
|
decompressedData,
|
||||||
|
gopSize,
|
||||||
|
numPixels,
|
||||||
|
channelLayout
|
||||||
|
)
|
||||||
|
|
||||||
|
// Step 3: Allocate GOP buffers for float coefficients
|
||||||
|
val gopY = Array(gopSize) { FloatArray(numPixels) }
|
||||||
|
val gopCo = Array(gopSize) { FloatArray(numPixels) }
|
||||||
|
val gopCg = Array(gopSize) { FloatArray(numPixels) }
|
||||||
|
|
||||||
|
// Step 4: Calculate subband layout (needed for perceptual dequantization)
|
||||||
|
val subbands = calculateSubbandLayout(width, height, spatialLevels)
|
||||||
|
|
||||||
|
// Step 5: Dequantize with temporal-spatial scaling
|
||||||
|
for (t in 0 until gopSize) {
|
||||||
|
val temporalLevel = getTemporalSubbandLevel(t, gopSize, temporalLevels)
|
||||||
|
val temporalScale = getTemporalQuantizerScale(temporalLevel)
|
||||||
|
|
||||||
|
// Apply temporal scaling to base quantizers
|
||||||
|
val baseQY = (qYGlobal * temporalScale).coerceIn(1.0f, 255.0f)
|
||||||
|
val baseQCo = (qCoGlobal * temporalScale).coerceIn(1.0f, 255.0f)
|
||||||
|
val baseQCg = (qCgGlobal * temporalScale).coerceIn(1.0f, 255.0f)
|
||||||
|
|
||||||
|
// Use existing perceptual dequantization for spatial weighting
|
||||||
|
dequantiseDWTSubbandsPerceptual(
|
||||||
|
qIndex, qYGlobal,
|
||||||
|
quantizedCoeffs[t][0], gopY[t],
|
||||||
|
subbands, baseQY, false, spatialLevels // isChroma=false
|
||||||
|
)
|
||||||
|
|
||||||
|
dequantiseDWTSubbandsPerceptual(
|
||||||
|
qIndex, qYGlobal,
|
||||||
|
quantizedCoeffs[t][1], gopCo[t],
|
||||||
|
subbands, baseQCo, true, spatialLevels // isChroma=true
|
||||||
|
)
|
||||||
|
|
||||||
|
dequantiseDWTSubbandsPerceptual(
|
||||||
|
qIndex, qYGlobal,
|
||||||
|
quantizedCoeffs[t][2], gopCg[t],
|
||||||
|
subbands, baseQCg, true, spatialLevels // isChroma=true
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 6: Apply inverse 3D DWT (spatial first, then temporal)
|
||||||
|
tavApplyInverse3DDWT(gopY, width, height, gopSize, spatialLevels, temporalLevels, spatialFilter)
|
||||||
|
tavApplyInverse3DDWT(gopCo, width, height, gopSize, spatialLevels, temporalLevels, spatialFilter)
|
||||||
|
tavApplyInverse3DDWT(gopCg, width, height, gopSize, spatialLevels, temporalLevels, spatialFilter)
|
||||||
|
|
||||||
|
// Step 7: Apply inverse motion compensation (shift frames back)
|
||||||
|
// Note: Motion vectors are in quarter-pixel units
|
||||||
|
for (t in 1 until gopSize) { // Skip frame 0 (reference)
|
||||||
|
val dx = motionVectorsX[t] / 4 // Convert to pixel units
|
||||||
|
val dy = motionVectorsY[t] / 4
|
||||||
|
|
||||||
|
if (dx != 0 || dy != 0) {
|
||||||
|
applyInverseTranslation(gopY[t], width, height, dx, dy)
|
||||||
|
applyInverseTranslation(gopCo[t], width, height, dx, dy)
|
||||||
|
applyInverseTranslation(gopCg[t], width, height, dx, dy)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 8: Convert each frame to RGB and write to output buffers
|
||||||
|
for (t in 0 until gopSize) {
|
||||||
|
val rgbAddr = outputRGBAddrs[t]
|
||||||
|
|
||||||
|
for (i in 0 until numPixels) {
|
||||||
|
val y = gopY[t][i]
|
||||||
|
val co = gopCo[t][i]
|
||||||
|
val cg = gopCg[t][i]
|
||||||
|
|
||||||
|
// YCoCg-R to RGB conversion
|
||||||
|
val tmp = y - (cg / 2.0f)
|
||||||
|
val g = cg + tmp
|
||||||
|
val b = tmp - (co / 2.0f)
|
||||||
|
val r = b + co
|
||||||
|
|
||||||
|
// Clamp to 0-255 range
|
||||||
|
val rClamped = r.toInt().coerceIn(0, 255)
|
||||||
|
val gClamped = g.toInt().coerceIn(0, 255)
|
||||||
|
val bClamped = b.toInt().coerceIn(0, 255)
|
||||||
|
|
||||||
|
// Write RGB24 format (3 bytes per pixel)
|
||||||
|
val offset = rgbAddr + i * 3L
|
||||||
|
vm.usermem[offset] = rClamped.toByte()
|
||||||
|
vm.usermem[offset + 1] = gClamped.toByte()
|
||||||
|
vm.usermem[offset + 2] = bClamped.toByte()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return gopSize
|
||||||
|
}
|
||||||
|
|
||||||
// Biorthogonal 13/7 wavelet inverse 1D transform
|
// Biorthogonal 13/7 wavelet inverse 1D transform
|
||||||
// Synthesis filters: Low-pass (13 taps), High-pass (7 taps)
|
// Synthesis filters: Low-pass (13 taps), High-pass (7 taps)
|
||||||
private fun tavApplyDWTBior137Inverse1D(data: FloatArray, length: Int) {
|
private fun tavApplyDWTBior137Inverse1D(data: FloatArray, length: Int) {
|
||||||
@@ -5994,4 +6401,78 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// Temporal 3D DWT Functions (GOP Decoding)
|
||||||
|
// =============================================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Inverse 1D temporal DWT (Haar) along time axis
|
||||||
|
* Reuses existing Haar inverse implementation
|
||||||
|
*/
|
||||||
|
private fun tavApplyTemporalDWTInverse1D(data: FloatArray, numFrames: Int) {
|
||||||
|
if (numFrames < 2) return
|
||||||
|
tavApplyDWTHaarInverse1D(data, numFrames)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Apply inverse 3D DWT to GOP data (spatial + temporal)
|
||||||
|
* Order: SPATIAL first (each frame), then TEMPORAL (across frames)
|
||||||
|
*
|
||||||
|
* @param gopData Array of frame buffers [frame][pixel]
|
||||||
|
* @param width Frame width
|
||||||
|
* @param height Frame height
|
||||||
|
* @param numFrames Number of frames in GOP
|
||||||
|
* @param spatialLevels Spatial decomposition levels (typically 6)
|
||||||
|
* @param temporalLevels Temporal decomposition levels (typically 2)
|
||||||
|
* @param spatialFilter Spatial wavelet filter type (0=5/3, 1=9/7, 255=Haar)
|
||||||
|
*/
|
||||||
|
private fun tavApplyInverse3DDWT(
|
||||||
|
gopData: Array<FloatArray>,
|
||||||
|
width: Int,
|
||||||
|
height: Int,
|
||||||
|
numFrames: Int,
|
||||||
|
spatialLevels: Int,
|
||||||
|
temporalLevels: Int,
|
||||||
|
spatialFilter: Int
|
||||||
|
) {
|
||||||
|
if (numFrames < 2) return
|
||||||
|
|
||||||
|
val numPixels = width * height
|
||||||
|
val temporalLine = FloatArray(numFrames)
|
||||||
|
|
||||||
|
// Step 1: Apply inverse 2D spatial DWT to each temporal subband (each frame)
|
||||||
|
for (t in 0 until numFrames) {
|
||||||
|
tavApplyDWTInverseMultiLevel(
|
||||||
|
gopData[t], width, height,
|
||||||
|
spatialLevels, spatialFilter,
|
||||||
|
TavNullFilter // No sharpening for GOP frames
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 2: Apply inverse temporal DWT to each spatial location
|
||||||
|
for (y in 0 until height) {
|
||||||
|
for (x in 0 until width) {
|
||||||
|
val pixelIdx = y * width + x
|
||||||
|
|
||||||
|
// Extract temporal coefficients for this spatial location
|
||||||
|
for (t in 0 until numFrames) {
|
||||||
|
temporalLine[t] = gopData[t][pixelIdx]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Apply inverse temporal DWT with multiple levels (reverse order)
|
||||||
|
for (level in temporalLevels - 1 downTo 0) {
|
||||||
|
val levelFrames = numFrames shr level
|
||||||
|
if (levelFrames >= 2) {
|
||||||
|
tavApplyTemporalDWTInverse1D(temporalLine, levelFrames)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write back reconstructed values
|
||||||
|
for (t in 0 until numFrames) {
|
||||||
|
gopData[t][pixelIdx] = temporalLine[t]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@@ -15,11 +15,15 @@
|
|||||||
#define TAV_MODE_SKIP 0x00
|
#define TAV_MODE_SKIP 0x00
|
||||||
#define TAV_MODE_INTRA 0x01
|
#define TAV_MODE_INTRA 0x01
|
||||||
#define TAV_MODE_DELTA 0x02
|
#define TAV_MODE_DELTA 0x02
|
||||||
#define TAV_PACKET_IFRAME 0x10
|
#define TAV_PACKET_IFRAME 0x10
|
||||||
#define TAV_PACKET_PFRAME 0x11
|
#define TAV_PACKET_PFRAME 0x11
|
||||||
#define TAV_PACKET_AUDIO_MP2 0x20
|
#define TAV_PACKET_GOP_UNIFIED 0x12 // Unified 3D DWT GOP
|
||||||
#define TAV_PACKET_SUBTITLE 0x30
|
#define TAV_PACKET_AUDIO_MP2 0x20
|
||||||
#define TAV_PACKET_SYNC 0xFF
|
#define TAV_PACKET_SUBTITLE 0x30
|
||||||
|
#define TAV_PACKET_EXTENDED_HDR 0xEF
|
||||||
|
#define TAV_PACKET_GOP_SYNC 0xFC // GOP sync (N frames decoded)
|
||||||
|
#define TAV_PACKET_TIMECODE 0xFD
|
||||||
|
#define TAV_PACKET_SYNC 0xFF
|
||||||
|
|
||||||
// Channel layout constants (bit-field design)
|
// Channel layout constants (bit-field design)
|
||||||
#define CHANNEL_LAYOUT_YCOCG 0 // Y-Co-Cg (000: no alpha, has chroma, has luma)
|
#define CHANNEL_LAYOUT_YCOCG 0 // Y-Co-Cg (000: no alpha, has chroma, has luma)
|
||||||
|
|||||||
@@ -104,7 +104,7 @@ static int needs_alpha_channel(int channel_layout) {
|
|||||||
#define DEFAULT_FPS 30
|
#define DEFAULT_FPS 30
|
||||||
#define DEFAULT_QUALITY 3
|
#define DEFAULT_QUALITY 3
|
||||||
#define DEFAULT_ZSTD_LEVEL 9
|
#define DEFAULT_ZSTD_LEVEL 9
|
||||||
#define GOP_SIZE 16
|
#define GOP_SIZE /*1*/4
|
||||||
|
|
||||||
// Audio/subtitle constants (reused from TEV)
|
// Audio/subtitle constants (reused from TEV)
|
||||||
#define MP2_DEFAULT_PACKET_SIZE 1152
|
#define MP2_DEFAULT_PACKET_SIZE 1152
|
||||||
@@ -1456,8 +1456,8 @@ static void quantise_3d_dwt_coefficients(tav_encoder_t *enc,
|
|||||||
int spatial_size,
|
int spatial_size,
|
||||||
int base_quantiser,
|
int base_quantiser,
|
||||||
int is_chroma) {
|
int is_chroma) {
|
||||||
const float BETA = 0.8f; // Temporal scaling exponent
|
const float BETA = 0.8f; // Temporal scaling exponent (aggressive for temporal high-pass)
|
||||||
const float TEMPORAL_BASE_SCALE = 0.7f; // Temporal coefficients are typically sparser
|
const float TEMPORAL_BASE_SCALE = 1.0f; // Don't reduce tLL quantization (same as intra)
|
||||||
|
|
||||||
// Process each temporal subband independently (separable approach)
|
// Process each temporal subband independently (separable approach)
|
||||||
for (int t = 0; t < num_frames; t++) {
|
for (int t = 0; t < num_frames; t++) {
|
||||||
@@ -1468,11 +1468,11 @@ static void quantise_3d_dwt_coefficients(tav_encoder_t *enc,
|
|||||||
int temporal_level = get_temporal_subband_level(t, num_frames, enc->temporal_decomp_levels);
|
int temporal_level = get_temporal_subband_level(t, num_frames, enc->temporal_decomp_levels);
|
||||||
|
|
||||||
// Step 2: Compute temporal base quantizer using exponential scaling
|
// Step 2: Compute temporal base quantizer using exponential scaling
|
||||||
// Formula: tH_base = Qbase_t * 0.7 * 2^(0.8 * level)
|
// Formula: tH_base = Qbase_t * 1.0 * 2^(2.0 * level)
|
||||||
// Example with Qbase_t=16:
|
// Example with Qbase_t=16:
|
||||||
// - Level 0 (tLL): 16 * 0.7 * 2^0 = 11.2
|
// - Level 0 (tLL): 16 * 1.0 * 2^0 = 16 (same as intra-only)
|
||||||
// - Level 1 (tLH): 16 * 0.7 * 2^0.8 = 19.5
|
// - Level 1 (tH): 16 * 1.0 * 2^2.0 = 64 (4× base, aggressive)
|
||||||
// - Level 2 (tHH): 16 * 0.7 * 2^1.6 = 33.8
|
// - Level 2 (tHH): 16 * 1.0 * 2^4.0 = 256 → clamped to 255 (very aggressive)
|
||||||
float temporal_scale = TEMPORAL_BASE_SCALE * powf(2.0f, BETA * temporal_level);
|
float temporal_scale = TEMPORAL_BASE_SCALE * powf(2.0f, BETA * temporal_level);
|
||||||
float temporal_quantiser = base_quantiser * temporal_scale;
|
float temporal_quantiser = base_quantiser * temporal_scale;
|
||||||
|
|
||||||
@@ -1622,6 +1622,40 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
|
|||||||
memcpy(gop_cg_coeffs[i], enc->gop_cg_frames[i], num_pixels * sizeof(float));
|
memcpy(gop_cg_coeffs[i], enc->gop_cg_frames[i], num_pixels * sizeof(float));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Step 0.5: Apply motion compensation to align frames before temporal DWT
|
||||||
|
// This uses the computed translation vectors to align each frame to the previous one
|
||||||
|
for (int i = 1; i < actual_gop_size; i++) { // Skip frame 0 (reference frame)
|
||||||
|
float *aligned_y = malloc(num_pixels * sizeof(float));
|
||||||
|
float *aligned_co = malloc(num_pixels * sizeof(float));
|
||||||
|
float *aligned_cg = malloc(num_pixels * sizeof(float));
|
||||||
|
|
||||||
|
if (!aligned_y || !aligned_co || !aligned_cg) {
|
||||||
|
fprintf(stderr, "Error: Failed to allocate motion compensation buffers\n");
|
||||||
|
// Cleanup and skip motion compensation for this GOP
|
||||||
|
free(aligned_y);
|
||||||
|
free(aligned_co);
|
||||||
|
free(aligned_cg);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Apply translation to align this frame
|
||||||
|
apply_translation(gop_y_coeffs[i], enc->width, enc->height,
|
||||||
|
enc->gop_translation_x[i], enc->gop_translation_y[i], aligned_y);
|
||||||
|
apply_translation(gop_co_coeffs[i], enc->width, enc->height,
|
||||||
|
enc->gop_translation_x[i], enc->gop_translation_y[i], aligned_co);
|
||||||
|
apply_translation(gop_cg_coeffs[i], enc->width, enc->height,
|
||||||
|
enc->gop_translation_x[i], enc->gop_translation_y[i], aligned_cg);
|
||||||
|
|
||||||
|
// Copy aligned frames back
|
||||||
|
memcpy(gop_y_coeffs[i], aligned_y, num_pixels * sizeof(float));
|
||||||
|
memcpy(gop_co_coeffs[i], aligned_co, num_pixels * sizeof(float));
|
||||||
|
memcpy(gop_cg_coeffs[i], aligned_cg, num_pixels * sizeof(float));
|
||||||
|
|
||||||
|
free(aligned_y);
|
||||||
|
free(aligned_co);
|
||||||
|
free(aligned_cg);
|
||||||
|
}
|
||||||
|
|
||||||
// Step 1: Apply 3D DWT (temporal + spatial) to each channel
|
// Step 1: Apply 3D DWT (temporal + spatial) to each channel
|
||||||
// Note: This modifies gop_*_coeffs in-place
|
// Note: This modifies gop_*_coeffs in-place
|
||||||
dwt_3d_forward(gop_y_coeffs, enc->width, enc->height, actual_gop_size,
|
dwt_3d_forward(gop_y_coeffs, enc->width, enc->height, actual_gop_size,
|
||||||
|
|||||||
Reference in New Issue
Block a user