From 53da0bfceec3434208d1ce45965d65b743070825 Mon Sep 17 00:00:00 2001 From: minjaesong Date: Thu, 23 Oct 2025 01:29:20 +0900 Subject: [PATCH] TAV: fix: iframes not decoding --- assets/disk0/tvdos/bin/playtav.js | 90 +++++++++++++------ terranmon.txt | 2 +- .../torvald/tsvm/GraphicsJSR223Delegate.kt | 20 ++--- video_encoder/encoder_tav.c | 43 +++++---- 4 files changed, 97 insertions(+), 58 deletions(-) diff --git a/assets/disk0/tvdos/bin/playtav.js b/assets/disk0/tvdos/bin/playtav.js index e301799..91a66e3 100644 --- a/assets/disk0/tvdos/bin/playtav.js +++ b/assets/disk0/tvdos/bin/playtav.js @@ -497,6 +497,9 @@ let readyGopData = null // GOP that's already decoded and ready to play ( let decodingGopData = null // GOP currently being decoded in background let asyncDecodeInProgress = false // Track if async decode is running let asyncDecodeSlot = 0 // Which slot the async decode is targeting + +// I-frame (non-GOP) timing control +let iframeReady = false // Track if an I-frame/P-frame is decoded and ready to display let asyncDecodeGopSize = 0 // Size of GOP being decoded async let asyncDecodePtr = 0 // Compressed data pointer to free after decode let asyncDecodeStartTime = 0 // When async decode started (for diagnostics) @@ -773,6 +776,7 @@ function tryReadNextTAVHeader() { let lastKey = 0 let skipped = false let paused = false +let debugPrintAkku = 0 // Playback loop - properly adapted from TEV with multi-file support try { @@ -1040,41 +1044,17 @@ try { } } - graphics.uploadRGBToFramebuffer(CURRENT_RGB_ADDR, header.width, header.height, trueFrameCount, false) - uploadTime = (sys.nanoTime() - uploadStart) / 1000000.0 + // Don't upload immediately - let timing loop handle it + // Mark frame as ready for time-based display + iframeReady = true + uploadTime = 0 // Upload will happen in timing section below - // Defer audio playback until a first frame is sent - if (isInterlaced) { - // fire audio after frame 1 - if (!audioFired && frameCount > 0) { - audio.play(0) - audioFired = true - } - } - else { - // fire audio after frame 0 - if (!audioFired) { - audio.play(0) - audioFired = true - } - } } catch (e) { console.log(`Frame ${frameCount}: decode failed: ${e}`) } finally { sys.free(compressedPtr) } - - let biasStart = sys.nanoTime() - setBiasLighting() - biasTime = (sys.nanoTime() - biasStart) / 1000000.0 - - // Log performance data every 60 frames - if (frameCount % 60 == 0 || frameCount == 0) { - let totalTime = decompressTime + decodeTime + uploadTime + biasTime - console.log(`Frame ${frameCount}: Decompress=${decompressTime.toFixed(1)}ms, Decode=${decodeTime.toFixed(1)}ms, Upload=${uploadTime.toFixed(1)}ms, Bias=${biasTime.toFixed(1)}ms, Total=${totalTime.toFixed(1)}ms`) - } - } else if (packetType === TAV_PACKET_GOP_UNIFIED) { // GOP Unified packet (temporal 3D DWT) @@ -1544,6 +1524,53 @@ try { audioFired = true } + // Step 2a: Display I-frame/P-frame with proper frame timing + if (!paused && iframeReady && currentGopSize === 0) { + // Initialize timing on first I-frame + if (nextFrameTime === 0) { + nextFrameTime = sys.nanoTime() + } + + // Spin-wait for next frame time + while (sys.nanoTime() < nextFrameTime && !paused) { + sys.sleep(1) + } + + if (!paused) { + let uploadStart = sys.nanoTime() + graphics.uploadRGBToFramebuffer(CURRENT_RGB_ADDR, header.width, header.height, trueFrameCount, false) + uploadTime = (sys.nanoTime() - uploadStart) / 1000000.0 + + // Apply bias lighting + let biasStart = sys.nanoTime() + setBiasLighting() + biasTime = (sys.nanoTime() - biasStart) / 1000000.0 + + // Fire audio on first frame + if (!audioFired) { + audio.play(0) + audioFired = true + } + + frameCount++ + trueFrameCount++ + iframeReady = false + + // Swap ping-pong buffers for next frame + let temp = CURRENT_RGB_ADDR + CURRENT_RGB_ADDR = PREV_RGB_ADDR + PREV_RGB_ADDR = temp + + // Schedule next frame + nextFrameTime += (frametime) // frametime is in nanoseconds from header + + // Log performance data every 60 frames + if (frameCount % 60 == 0) { + console.log(`Frame ${frameCount}: Upload=${uploadTime.toFixed(1)}ms, Bias=${biasTime.toFixed(1)}ms`) + } + } + } + // Step 2 & 3: Display current GOP frame if it's time if (!paused && currentGopSize > 0 && currentGopFrameIndex < currentGopSize) { // Spin-wait for next frame time @@ -1731,6 +1758,13 @@ try { gui.printTopBar(guiStatus, 1) } + + debugPrintAkku += (t2 - t1) + if (debugPrintAkku > 5000000000) { + debugPrintAkku -= 5000000000 + serial.println(`[PLAYTAV] decoding time = ${(decodeTime).toFixed(2)} ms`) + } + // Small sleep to prevent 100% CPU and control loop rate // Allows continuous packet reading while maintaining proper frame timing sys.sleep(1) diff --git a/terranmon.txt b/terranmon.txt index 54ab528..7b2a181 100644 --- a/terranmon.txt +++ b/terranmon.txt @@ -932,7 +932,7 @@ transmission capability, and region-of-interest coding. - 6-7 = Reserved/invalid (would indicate no luma and no chroma) uint8 Entropy Coder - 0 = Twobit-plane significance map - - 1 = Embedded Zero Block Coding + - 1 = Embedded Zero Block Coding (EZBC, experimental) uint8 Reserved[2]: fill with zeros uint8 Device Orientation - 0 = No rotation diff --git a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt index 338e381..a86d575 100644 --- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt +++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt @@ -4520,15 +4520,13 @@ class GraphicsJSR223Delegate(private val vm: VM) { // Read entropy coder from header: 0 = Twobit-map, 1 = EZBC val isEZBC = (entropyCoder == 1) - /*if (isEZBC) { - println("[AUTO] Using EZBC decoder") + if (isEZBC) { postprocessCoefficientsEZBC(compressedData, compressedOffset, coeffCount, channelLayout, outputY, outputCo, outputCg, outputAlpha) } else { - println("[AUTO] Using twobit-map decoder") postprocessCoefficientsVariableLayout(compressedData, compressedOffset, coeffCount, channelLayout, outputY, outputCo, outputCg, outputAlpha) - }*/ + } return isEZBC } @@ -5323,7 +5321,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { // First, we need to determine the size of compressed data for each channel // Read a large buffer to work with significance map format - val maxPossibleSize = coeffCount * 3 * 2 + (coeffCount + 7) / 8 * 3 // Worst case: original size + maps + val maxPossibleSize = coeffCount * 4 * 2 + (coeffCount + 7) / 8 * 4 // Worst case: original size + maps val coeffBuffer = ByteArray(maxPossibleSize) UnsafeHelper.memcpyRaw(null, vm.usermem.ptr + ptr, coeffBuffer, UnsafeHelper.getArrayOffset(coeffBuffer), maxPossibleSize.toLong()) @@ -6214,7 +6212,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { } // normal/strong sharpen filters make horizontal/vertical hairline artefacts - private val TavSharpenLuma = TavSharpenWeak + private val TavSharpenLuma = TavNullFilter private object TavNullFilter : TavWaveletFilter { override fun getCoeffMultiplier(level: Int): Float = 1.0f @@ -6247,7 +6245,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { if (coeff > maxCoeff) maxCoeff = coeff if (coeff > 0.1f) nonzeroCoeff++ } - println("[IDWT-LEVEL-$level] BEFORE: ${currentWidth}x${currentHeight}, max=${maxCoeff.toInt()}, nonzero=$nonzeroCoeff/$sampleSize") +// println("[IDWT-LEVEL-$level] BEFORE: ${currentWidth}x${currentHeight}, max=${maxCoeff.toInt()}, nonzero=$nonzeroCoeff/$sampleSize") } // Apply inverse DWT to current subband region - EXACT match to encoder @@ -7101,12 +7099,10 @@ class GraphicsJSR223Delegate(private val vm: VM) { temporalLevels: Int, spatialFilter: Int ) { - if (numFrames < 2) return - val numPixels = width * height - val temporalLine = FloatArray(numFrames) // Step 1: Apply inverse 2D spatial DWT to each temporal subband (each frame) + // This is required even for single frames (I-frames) to convert from DWT coefficients to pixel space for (t in 0 until numFrames) { tavApplyDWTInverseMultiLevel( gopData[t], width, height, @@ -7116,6 +7112,10 @@ class GraphicsJSR223Delegate(private val vm: VM) { } // Step 2: Apply inverse temporal DWT to each spatial location + // Only needed for GOPs with multiple frames (skip for I-frames) + if (numFrames < 2) return + + val temporalLine = FloatArray(numFrames) for (y in 0 until height) { for (x in 0 until width) { val pixelIdx = y * width + x diff --git a/video_encoder/encoder_tav.c b/video_encoder/encoder_tav.c index 92789fc..6ea4d51 100644 --- a/video_encoder/encoder_tav.c +++ b/video_encoder/encoder_tav.c @@ -18,7 +18,7 @@ #include #include -#define ENCODER_VENDOR_STRING "Encoder-TAV 20251022 (3d-dwt,ezbc)" +#define ENCODER_VENDOR_STRING "Encoder-TAV 20251023 (3d-dwt)" // TSVM Advanced Video (TAV) format constants #define TAV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x56" // "\x1FTSVM TAV" @@ -118,7 +118,7 @@ static int needs_alpha_channel(int channel_layout) { #define DEFAULT_HEIGHT 448 #define DEFAULT_FPS 30 #define DEFAULT_QUALITY 3 -#define DEFAULT_ZSTD_LEVEL 3 +#define DEFAULT_ZSTD_LEVEL 15 #define DEFAULT_PCM_ZSTD_LEVEL 3 #define TEMPORAL_GOP_SIZE 20 #define TEMPORAL_GOP_SIZE_MIN 8 // Minimum GOP size to avoid decoder hiccups @@ -2270,7 +2270,7 @@ static void show_usage(const char *program_name) { printf(" -c, --channel-layout N Channel layout: 0=Y-Co-Cg, 1=Y-Co-Cg-A, 2=Y-only, 3=Y-A, 4=Co-Cg, 5=Co-Cg-A (default: 0)\n"); printf(" -a, --arate N MP2 audio bitrate in kbps (overrides quality-based audio rate)\n"); printf(" Valid values: 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384\n"); - printf(" --separate-audio-track Write entire MP2 file as single packet 0x40 (instead of interleaved)\n"); +// printf(" --separate-audio-track Write entire audio track as single packet instead of interleaved\n"); printf(" --pcm8-audio Use 8-bit PCM audio instead of MP2 (TSVM native audio format)\n"); printf(" -S, --subtitles FILE SubRip (.srt) or SAMI (.smi) subtitle file\n"); printf(" --fontrom-lo FILE Low font ROM file for internationalised subtitles\n"); @@ -2281,9 +2281,9 @@ static void show_usage(const char *program_name) { printf(" --intra-only Disable delta and skip encoding\n"); printf(" --enable-delta Enable delta encoding\n"); printf(" --delta-haar N Apply N-level Haar DWT to delta coefficients (1-6, auto-enables delta)\n"); - printf(" --temporal-dwt Enable temporal 3D DWT (GOP-based encoding with temporal transform)\n"); - printf(" --mc-ezbc Enable MC-EZBC block-based motion compensation (requires --temporal-dwt)\n"); - printf(" --ezbc Enable EZBC (Embedded Zero Block Coding) for significance maps\n"); + printf(" --3d-dwt Enable temporal 3D DWT (GOP-based encoding with temporal transform)\n"); + printf(" --mc-ezbc Enable MC-EZBC block-based motion compensation (requires --temporal-dwt, implies --ezbc)\n"); + printf(" --ezbc Enable EZBC (Embedded Zero Block Coding) entropy coding\n"); printf(" --ictcp Use ICtCp colour space instead of YCoCg-R (use when source is in BT.2100)\n"); printf(" --no-perceptual-tuning Disable perceptual quantisation\n"); printf(" --no-dead-zone Disable dead-zone quantisation (for comparison/testing)\n"); @@ -2350,7 +2350,7 @@ static tav_encoder_t* create_encoder(void) { enc->intra_only = 0; enc->monoblock = 1; // Default to monoblock mode enc->perceptual_tuning = 1; // Default to perceptual quantisation (versions 5/6) - enc->enable_ezbc = 1; // Default to EZBC over twobit-map + enc->enable_ezbc = 0; // default to twobit-map as EZBC+Zstd 3 = Twobitmap+Zstd 15, and Twobitmap is faster to decode enc->channel_layout = CHANNEL_LAYOUT_YCOCG; // Default to Y-Co-Cg enc->audio_bitrate = 0; // 0 = use quality table enc->encode_limit = 0; // Default: no frame limit @@ -9435,6 +9435,8 @@ int main(int argc, char *argv[]) { {"delta-haar", required_argument, 0, 1018}, {"temporal-dwt", no_argument, 0, 1019}, {"temporal-3d", no_argument, 0, 1019}, + {"dwt-3d", no_argument, 0, 1019}, + {"3d-dwt", no_argument, 0, 1019}, {"mc-ezbc", no_argument, 0, 1020}, {"residual-coding", no_argument, 0, 1021}, {"adaptive-blocks", no_argument, 0, 1022}, @@ -9616,6 +9618,7 @@ int main(int argc, char *argv[]) { break; case 1020: // --mc-ezbc enc->temporal_enable_mcezbc = 1; + enc->enable_ezbc = 1; printf("MC-EZBC block-based motion compensation enabled (requires --temporal-dwt)\n"); break; case 1021: // --residual-coding @@ -10009,6 +10012,16 @@ int main(int argc, char *argv[]) { // Choose encoding path based on configuration size_t packet_size = 0; + // For GOP encoding, audio/subtitles are handled in gop_flush() for all GOP frames + // For traditional encoding, process audio/subtitles for this single frame + if (!enc->enable_temporal_dwt) { + // Process audio for this frame + process_audio(enc, true_frame_count, enc->output_fp); + + // Process subtitles for this frame + process_subtitles(enc, true_frame_count, enc->output_fp); + } + if (enc->enable_temporal_dwt) { // GOP-based temporal 3D DWT encoding path @@ -10177,7 +10190,8 @@ int main(int argc, char *argv[]) { // Note: packet_size might already be > 0 from scene change flush above packet_size = 0; } - } else if (enc->enable_residual_coding) { + } + else if (enc->enable_residual_coding) { // MPEG-style residual coding path (I/P/B frames with motion compensation) // Get quantiser (use adjusted quantiser from bitrate control if applicable) int qY = enc->bitrate_mode ? quantiser_float_to_int_dithered(enc) : enc->quantiser_y; @@ -10344,7 +10358,8 @@ int main(int argc, char *argv[]) { } } } - } else { + } + else { // Traditional 2D DWT encoding path (no temporal transform, no motion compensation) uint8_t packet_type = is_keyframe ? TAV_PACKET_IFRAME : TAV_PACKET_PFRAME; packet_size = compress_and_write_frame(enc, packet_type); @@ -10368,16 +10383,6 @@ int main(int argc, char *argv[]) { adjust_quantiser_for_bitrate(enc); } - // For GOP encoding, audio/subtitles are handled in gop_flush() for all GOP frames - // For traditional encoding, process audio/subtitles for this single frame - if (!enc->enable_temporal_dwt) { - // Process audio for this frame - process_audio(enc, true_frame_count, enc->output_fp); - - // Process subtitles for this frame - process_subtitles(enc, true_frame_count, enc->output_fp); - } - // Write a sync packet only after a video is been coded // For GOP encoding, GOP_SYNC packet already serves as sync - don't emit extra SYNC // For B-frame mode, sync packets are already written in the encoding loop