mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-03-07 19:51:51 +09:00
TAV: fix: iframes not decoding
This commit is contained in:
@@ -497,6 +497,9 @@ let readyGopData = null // GOP that's already decoded and ready to play (
|
||||
let decodingGopData = null // GOP currently being decoded in background
|
||||
let asyncDecodeInProgress = false // Track if async decode is running
|
||||
let asyncDecodeSlot = 0 // Which slot the async decode is targeting
|
||||
|
||||
// I-frame (non-GOP) timing control
|
||||
let iframeReady = false // Track if an I-frame/P-frame is decoded and ready to display
|
||||
let asyncDecodeGopSize = 0 // Size of GOP being decoded async
|
||||
let asyncDecodePtr = 0 // Compressed data pointer to free after decode
|
||||
let asyncDecodeStartTime = 0 // When async decode started (for diagnostics)
|
||||
@@ -773,6 +776,7 @@ function tryReadNextTAVHeader() {
|
||||
let lastKey = 0
|
||||
let skipped = false
|
||||
let paused = false
|
||||
let debugPrintAkku = 0
|
||||
|
||||
// Playback loop - properly adapted from TEV with multi-file support
|
||||
try {
|
||||
@@ -1040,41 +1044,17 @@ try {
|
||||
}
|
||||
}
|
||||
|
||||
graphics.uploadRGBToFramebuffer(CURRENT_RGB_ADDR, header.width, header.height, trueFrameCount, false)
|
||||
uploadTime = (sys.nanoTime() - uploadStart) / 1000000.0
|
||||
// Don't upload immediately - let timing loop handle it
|
||||
// Mark frame as ready for time-based display
|
||||
iframeReady = true
|
||||
uploadTime = 0 // Upload will happen in timing section below
|
||||
|
||||
// Defer audio playback until a first frame is sent
|
||||
if (isInterlaced) {
|
||||
// fire audio after frame 1
|
||||
if (!audioFired && frameCount > 0) {
|
||||
audio.play(0)
|
||||
audioFired = true
|
||||
}
|
||||
}
|
||||
else {
|
||||
// fire audio after frame 0
|
||||
if (!audioFired) {
|
||||
audio.play(0)
|
||||
audioFired = true
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.log(`Frame ${frameCount}: decode failed: ${e}`)
|
||||
} finally {
|
||||
sys.free(compressedPtr)
|
||||
}
|
||||
|
||||
|
||||
let biasStart = sys.nanoTime()
|
||||
setBiasLighting()
|
||||
biasTime = (sys.nanoTime() - biasStart) / 1000000.0
|
||||
|
||||
// Log performance data every 60 frames
|
||||
if (frameCount % 60 == 0 || frameCount == 0) {
|
||||
let totalTime = decompressTime + decodeTime + uploadTime + biasTime
|
||||
console.log(`Frame ${frameCount}: Decompress=${decompressTime.toFixed(1)}ms, Decode=${decodeTime.toFixed(1)}ms, Upload=${uploadTime.toFixed(1)}ms, Bias=${biasTime.toFixed(1)}ms, Total=${totalTime.toFixed(1)}ms`)
|
||||
}
|
||||
|
||||
}
|
||||
else if (packetType === TAV_PACKET_GOP_UNIFIED) {
|
||||
// GOP Unified packet (temporal 3D DWT)
|
||||
@@ -1544,6 +1524,53 @@ try {
|
||||
audioFired = true
|
||||
}
|
||||
|
||||
// Step 2a: Display I-frame/P-frame with proper frame timing
|
||||
if (!paused && iframeReady && currentGopSize === 0) {
|
||||
// Initialize timing on first I-frame
|
||||
if (nextFrameTime === 0) {
|
||||
nextFrameTime = sys.nanoTime()
|
||||
}
|
||||
|
||||
// Spin-wait for next frame time
|
||||
while (sys.nanoTime() < nextFrameTime && !paused) {
|
||||
sys.sleep(1)
|
||||
}
|
||||
|
||||
if (!paused) {
|
||||
let uploadStart = sys.nanoTime()
|
||||
graphics.uploadRGBToFramebuffer(CURRENT_RGB_ADDR, header.width, header.height, trueFrameCount, false)
|
||||
uploadTime = (sys.nanoTime() - uploadStart) / 1000000.0
|
||||
|
||||
// Apply bias lighting
|
||||
let biasStart = sys.nanoTime()
|
||||
setBiasLighting()
|
||||
biasTime = (sys.nanoTime() - biasStart) / 1000000.0
|
||||
|
||||
// Fire audio on first frame
|
||||
if (!audioFired) {
|
||||
audio.play(0)
|
||||
audioFired = true
|
||||
}
|
||||
|
||||
frameCount++
|
||||
trueFrameCount++
|
||||
iframeReady = false
|
||||
|
||||
// Swap ping-pong buffers for next frame
|
||||
let temp = CURRENT_RGB_ADDR
|
||||
CURRENT_RGB_ADDR = PREV_RGB_ADDR
|
||||
PREV_RGB_ADDR = temp
|
||||
|
||||
// Schedule next frame
|
||||
nextFrameTime += (frametime) // frametime is in nanoseconds from header
|
||||
|
||||
// Log performance data every 60 frames
|
||||
if (frameCount % 60 == 0) {
|
||||
console.log(`Frame ${frameCount}: Upload=${uploadTime.toFixed(1)}ms, Bias=${biasTime.toFixed(1)}ms`)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Step 2 & 3: Display current GOP frame if it's time
|
||||
if (!paused && currentGopSize > 0 && currentGopFrameIndex < currentGopSize) {
|
||||
// Spin-wait for next frame time
|
||||
@@ -1731,6 +1758,13 @@ try {
|
||||
gui.printTopBar(guiStatus, 1)
|
||||
}
|
||||
|
||||
|
||||
debugPrintAkku += (t2 - t1)
|
||||
if (debugPrintAkku > 5000000000) {
|
||||
debugPrintAkku -= 5000000000
|
||||
serial.println(`[PLAYTAV] decoding time = ${(decodeTime).toFixed(2)} ms`)
|
||||
}
|
||||
|
||||
// Small sleep to prevent 100% CPU and control loop rate
|
||||
// Allows continuous packet reading while maintaining proper frame timing
|
||||
sys.sleep(1)
|
||||
|
||||
@@ -932,7 +932,7 @@ transmission capability, and region-of-interest coding.
|
||||
- 6-7 = Reserved/invalid (would indicate no luma and no chroma)
|
||||
uint8 Entropy Coder
|
||||
- 0 = Twobit-plane significance map
|
||||
- 1 = Embedded Zero Block Coding
|
||||
- 1 = Embedded Zero Block Coding (EZBC, experimental)
|
||||
uint8 Reserved[2]: fill with zeros
|
||||
uint8 Device Orientation
|
||||
- 0 = No rotation
|
||||
|
||||
@@ -4520,15 +4520,13 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
// Read entropy coder from header: 0 = Twobit-map, 1 = EZBC
|
||||
val isEZBC = (entropyCoder == 1)
|
||||
|
||||
/*if (isEZBC) {
|
||||
println("[AUTO] Using EZBC decoder")
|
||||
if (isEZBC) {
|
||||
postprocessCoefficientsEZBC(compressedData, compressedOffset, coeffCount,
|
||||
channelLayout, outputY, outputCo, outputCg, outputAlpha)
|
||||
} else {
|
||||
println("[AUTO] Using twobit-map decoder")
|
||||
postprocessCoefficientsVariableLayout(compressedData, compressedOffset, coeffCount,
|
||||
channelLayout, outputY, outputCo, outputCg, outputAlpha)
|
||||
}*/
|
||||
}
|
||||
|
||||
return isEZBC
|
||||
}
|
||||
@@ -5323,7 +5321,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
|
||||
// First, we need to determine the size of compressed data for each channel
|
||||
// Read a large buffer to work with significance map format
|
||||
val maxPossibleSize = coeffCount * 3 * 2 + (coeffCount + 7) / 8 * 3 // Worst case: original size + maps
|
||||
val maxPossibleSize = coeffCount * 4 * 2 + (coeffCount + 7) / 8 * 4 // Worst case: original size + maps
|
||||
val coeffBuffer = ByteArray(maxPossibleSize)
|
||||
UnsafeHelper.memcpyRaw(null, vm.usermem.ptr + ptr, coeffBuffer, UnsafeHelper.getArrayOffset(coeffBuffer), maxPossibleSize.toLong())
|
||||
|
||||
@@ -6214,7 +6212,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
}
|
||||
// normal/strong sharpen filters make horizontal/vertical hairline artefacts
|
||||
|
||||
private val TavSharpenLuma = TavSharpenWeak
|
||||
private val TavSharpenLuma = TavNullFilter
|
||||
|
||||
private object TavNullFilter : TavWaveletFilter {
|
||||
override fun getCoeffMultiplier(level: Int): Float = 1.0f
|
||||
@@ -6247,7 +6245,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
if (coeff > maxCoeff) maxCoeff = coeff
|
||||
if (coeff > 0.1f) nonzeroCoeff++
|
||||
}
|
||||
println("[IDWT-LEVEL-$level] BEFORE: ${currentWidth}x${currentHeight}, max=${maxCoeff.toInt()}, nonzero=$nonzeroCoeff/$sampleSize")
|
||||
// println("[IDWT-LEVEL-$level] BEFORE: ${currentWidth}x${currentHeight}, max=${maxCoeff.toInt()}, nonzero=$nonzeroCoeff/$sampleSize")
|
||||
}
|
||||
|
||||
// Apply inverse DWT to current subband region - EXACT match to encoder
|
||||
@@ -7101,12 +7099,10 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
temporalLevels: Int,
|
||||
spatialFilter: Int
|
||||
) {
|
||||
if (numFrames < 2) return
|
||||
|
||||
val numPixels = width * height
|
||||
val temporalLine = FloatArray(numFrames)
|
||||
|
||||
// Step 1: Apply inverse 2D spatial DWT to each temporal subband (each frame)
|
||||
// This is required even for single frames (I-frames) to convert from DWT coefficients to pixel space
|
||||
for (t in 0 until numFrames) {
|
||||
tavApplyDWTInverseMultiLevel(
|
||||
gopData[t], width, height,
|
||||
@@ -7116,6 +7112,10 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
}
|
||||
|
||||
// Step 2: Apply inverse temporal DWT to each spatial location
|
||||
// Only needed for GOPs with multiple frames (skip for I-frames)
|
||||
if (numFrames < 2) return
|
||||
|
||||
val temporalLine = FloatArray(numFrames)
|
||||
for (y in 0 until height) {
|
||||
for (x in 0 until width) {
|
||||
val pixelIdx = y * width + x
|
||||
|
||||
@@ -18,7 +18,7 @@
|
||||
#include <float.h>
|
||||
#include <fftw3.h>
|
||||
|
||||
#define ENCODER_VENDOR_STRING "Encoder-TAV 20251022 (3d-dwt,ezbc)"
|
||||
#define ENCODER_VENDOR_STRING "Encoder-TAV 20251023 (3d-dwt)"
|
||||
|
||||
// TSVM Advanced Video (TAV) format constants
|
||||
#define TAV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x56" // "\x1FTSVM TAV"
|
||||
@@ -118,7 +118,7 @@ static int needs_alpha_channel(int channel_layout) {
|
||||
#define DEFAULT_HEIGHT 448
|
||||
#define DEFAULT_FPS 30
|
||||
#define DEFAULT_QUALITY 3
|
||||
#define DEFAULT_ZSTD_LEVEL 3
|
||||
#define DEFAULT_ZSTD_LEVEL 15
|
||||
#define DEFAULT_PCM_ZSTD_LEVEL 3
|
||||
#define TEMPORAL_GOP_SIZE 20
|
||||
#define TEMPORAL_GOP_SIZE_MIN 8 // Minimum GOP size to avoid decoder hiccups
|
||||
@@ -2270,7 +2270,7 @@ static void show_usage(const char *program_name) {
|
||||
printf(" -c, --channel-layout N Channel layout: 0=Y-Co-Cg, 1=Y-Co-Cg-A, 2=Y-only, 3=Y-A, 4=Co-Cg, 5=Co-Cg-A (default: 0)\n");
|
||||
printf(" -a, --arate N MP2 audio bitrate in kbps (overrides quality-based audio rate)\n");
|
||||
printf(" Valid values: 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384\n");
|
||||
printf(" --separate-audio-track Write entire MP2 file as single packet 0x40 (instead of interleaved)\n");
|
||||
// printf(" --separate-audio-track Write entire audio track as single packet instead of interleaved\n");
|
||||
printf(" --pcm8-audio Use 8-bit PCM audio instead of MP2 (TSVM native audio format)\n");
|
||||
printf(" -S, --subtitles FILE SubRip (.srt) or SAMI (.smi) subtitle file\n");
|
||||
printf(" --fontrom-lo FILE Low font ROM file for internationalised subtitles\n");
|
||||
@@ -2281,9 +2281,9 @@ static void show_usage(const char *program_name) {
|
||||
printf(" --intra-only Disable delta and skip encoding\n");
|
||||
printf(" --enable-delta Enable delta encoding\n");
|
||||
printf(" --delta-haar N Apply N-level Haar DWT to delta coefficients (1-6, auto-enables delta)\n");
|
||||
printf(" --temporal-dwt Enable temporal 3D DWT (GOP-based encoding with temporal transform)\n");
|
||||
printf(" --mc-ezbc Enable MC-EZBC block-based motion compensation (requires --temporal-dwt)\n");
|
||||
printf(" --ezbc Enable EZBC (Embedded Zero Block Coding) for significance maps\n");
|
||||
printf(" --3d-dwt Enable temporal 3D DWT (GOP-based encoding with temporal transform)\n");
|
||||
printf(" --mc-ezbc Enable MC-EZBC block-based motion compensation (requires --temporal-dwt, implies --ezbc)\n");
|
||||
printf(" --ezbc Enable EZBC (Embedded Zero Block Coding) entropy coding\n");
|
||||
printf(" --ictcp Use ICtCp colour space instead of YCoCg-R (use when source is in BT.2100)\n");
|
||||
printf(" --no-perceptual-tuning Disable perceptual quantisation\n");
|
||||
printf(" --no-dead-zone Disable dead-zone quantisation (for comparison/testing)\n");
|
||||
@@ -2350,7 +2350,7 @@ static tav_encoder_t* create_encoder(void) {
|
||||
enc->intra_only = 0;
|
||||
enc->monoblock = 1; // Default to monoblock mode
|
||||
enc->perceptual_tuning = 1; // Default to perceptual quantisation (versions 5/6)
|
||||
enc->enable_ezbc = 1; // Default to EZBC over twobit-map
|
||||
enc->enable_ezbc = 0; // default to twobit-map as EZBC+Zstd 3 = Twobitmap+Zstd 15, and Twobitmap is faster to decode
|
||||
enc->channel_layout = CHANNEL_LAYOUT_YCOCG; // Default to Y-Co-Cg
|
||||
enc->audio_bitrate = 0; // 0 = use quality table
|
||||
enc->encode_limit = 0; // Default: no frame limit
|
||||
@@ -9435,6 +9435,8 @@ int main(int argc, char *argv[]) {
|
||||
{"delta-haar", required_argument, 0, 1018},
|
||||
{"temporal-dwt", no_argument, 0, 1019},
|
||||
{"temporal-3d", no_argument, 0, 1019},
|
||||
{"dwt-3d", no_argument, 0, 1019},
|
||||
{"3d-dwt", no_argument, 0, 1019},
|
||||
{"mc-ezbc", no_argument, 0, 1020},
|
||||
{"residual-coding", no_argument, 0, 1021},
|
||||
{"adaptive-blocks", no_argument, 0, 1022},
|
||||
@@ -9616,6 +9618,7 @@ int main(int argc, char *argv[]) {
|
||||
break;
|
||||
case 1020: // --mc-ezbc
|
||||
enc->temporal_enable_mcezbc = 1;
|
||||
enc->enable_ezbc = 1;
|
||||
printf("MC-EZBC block-based motion compensation enabled (requires --temporal-dwt)\n");
|
||||
break;
|
||||
case 1021: // --residual-coding
|
||||
@@ -10009,6 +10012,16 @@ int main(int argc, char *argv[]) {
|
||||
// Choose encoding path based on configuration
|
||||
size_t packet_size = 0;
|
||||
|
||||
// For GOP encoding, audio/subtitles are handled in gop_flush() for all GOP frames
|
||||
// For traditional encoding, process audio/subtitles for this single frame
|
||||
if (!enc->enable_temporal_dwt) {
|
||||
// Process audio for this frame
|
||||
process_audio(enc, true_frame_count, enc->output_fp);
|
||||
|
||||
// Process subtitles for this frame
|
||||
process_subtitles(enc, true_frame_count, enc->output_fp);
|
||||
}
|
||||
|
||||
if (enc->enable_temporal_dwt) {
|
||||
// GOP-based temporal 3D DWT encoding path
|
||||
|
||||
@@ -10177,7 +10190,8 @@ int main(int argc, char *argv[]) {
|
||||
// Note: packet_size might already be > 0 from scene change flush above
|
||||
packet_size = 0;
|
||||
}
|
||||
} else if (enc->enable_residual_coding) {
|
||||
}
|
||||
else if (enc->enable_residual_coding) {
|
||||
// MPEG-style residual coding path (I/P/B frames with motion compensation)
|
||||
// Get quantiser (use adjusted quantiser from bitrate control if applicable)
|
||||
int qY = enc->bitrate_mode ? quantiser_float_to_int_dithered(enc) : enc->quantiser_y;
|
||||
@@ -10344,7 +10358,8 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
// Traditional 2D DWT encoding path (no temporal transform, no motion compensation)
|
||||
uint8_t packet_type = is_keyframe ? TAV_PACKET_IFRAME : TAV_PACKET_PFRAME;
|
||||
packet_size = compress_and_write_frame(enc, packet_type);
|
||||
@@ -10368,16 +10383,6 @@ int main(int argc, char *argv[]) {
|
||||
adjust_quantiser_for_bitrate(enc);
|
||||
}
|
||||
|
||||
// For GOP encoding, audio/subtitles are handled in gop_flush() for all GOP frames
|
||||
// For traditional encoding, process audio/subtitles for this single frame
|
||||
if (!enc->enable_temporal_dwt) {
|
||||
// Process audio for this frame
|
||||
process_audio(enc, true_frame_count, enc->output_fp);
|
||||
|
||||
// Process subtitles for this frame
|
||||
process_subtitles(enc, true_frame_count, enc->output_fp);
|
||||
}
|
||||
|
||||
// Write a sync packet only after a video is been coded
|
||||
// For GOP encoding, GOP_SYNC packet already serves as sync - don't emit extra SYNC
|
||||
// For B-frame mode, sync packets are already written in the encoding loop
|
||||
|
||||
Reference in New Issue
Block a user