From 53da0bfceec3434208d1ce45965d65b743070825 Mon Sep 17 00:00:00 2001
From: minjaesong <alswo9628@gmail.com>
Date: Thu, 23 Oct 2025 01:29:20 +0900
Subject: [PATCH] TAV: fix: iframes not decoding

---
 assets/disk0/tvdos/bin/playtav.js             | 90 +++++++++++++------
 terranmon.txt                                 |  2 +-
 .../torvald/tsvm/GraphicsJSR223Delegate.kt    | 20 ++---
 video_encoder/encoder_tav.c                   | 43 +++++----
 4 files changed, 97 insertions(+), 58 deletions(-)

diff --git a/assets/disk0/tvdos/bin/playtav.js b/assets/disk0/tvdos/bin/playtav.js
index e301799..91a66e3 100644
--- a/assets/disk0/tvdos/bin/playtav.js
+++ b/assets/disk0/tvdos/bin/playtav.js
@@ -497,6 +497,9 @@ let readyGopData = null        // GOP that's already decoded and ready to play (
 let decodingGopData = null     // GOP currently being decoded in background
 let asyncDecodeInProgress = false  // Track if async decode is running
 let asyncDecodeSlot = 0        // Which slot the async decode is targeting
+
+// I-frame (non-GOP) timing control
+let iframeReady = false        // Track if an I-frame/P-frame is decoded and ready to display
 let asyncDecodeGopSize = 0     // Size of GOP being decoded async
 let asyncDecodePtr = 0         // Compressed data pointer to free after decode
 let asyncDecodeStartTime = 0   // When async decode started (for diagnostics)
@@ -773,6 +776,7 @@ function tryReadNextTAVHeader() {
 let lastKey = 0
 let skipped = false
 let paused = false
+let debugPrintAkku = 0
 
 // Playback loop - properly adapted from TEV with multi-file support
 try {
@@ -1040,41 +1044,17 @@ try {
                         }
                     }
 
-                    graphics.uploadRGBToFramebuffer(CURRENT_RGB_ADDR, header.width, header.height, trueFrameCount, false)
-                    uploadTime = (sys.nanoTime() - uploadStart) / 1000000.0
+                    // Don't upload immediately - let timing loop handle it
+                    // Mark frame as ready for time-based display
+                    iframeReady = true
+                    uploadTime = 0  // Upload will happen in timing section below
 
-                    // Defer audio playback until a first frame is sent
-                    if (isInterlaced) {
-                        // fire audio after frame 1
-                        if (!audioFired && frameCount > 0) {
-                            audio.play(0)
-                            audioFired = true
-                        }
-                    }
-                    else {
-                        // fire audio after frame 0
-                        if (!audioFired) {
-                            audio.play(0)
-                            audioFired = true
-                        }
-                    }
                 } catch (e) {
                     console.log(`Frame ${frameCount}: decode failed: ${e}`)
                 } finally {
                     sys.free(compressedPtr)
                 }
 
-
-                let biasStart = sys.nanoTime()
-                setBiasLighting()
-                biasTime = (sys.nanoTime() - biasStart) / 1000000.0
-
-                // Log performance data every 60 frames
-                if (frameCount % 60 == 0 || frameCount == 0) {
-                    let totalTime = decompressTime + decodeTime + uploadTime + biasTime
-                    console.log(`Frame ${frameCount}: Decompress=${decompressTime.toFixed(1)}ms, Decode=${decodeTime.toFixed(1)}ms, Upload=${uploadTime.toFixed(1)}ms, Bias=${biasTime.toFixed(1)}ms, Total=${totalTime.toFixed(1)}ms`)
-                }
-
             }
             else if (packetType === TAV_PACKET_GOP_UNIFIED) {
                 // GOP Unified packet (temporal 3D DWT)
@@ -1544,6 +1524,53 @@ try {
             audioFired = true
         }
 
+        // Step 2a: Display I-frame/P-frame with proper frame timing
+        if (!paused && iframeReady && currentGopSize === 0) {
+            // Initialize timing on first I-frame
+            if (nextFrameTime === 0) {
+                nextFrameTime = sys.nanoTime()
+            }
+
+            // Spin-wait for next frame time
+            while (sys.nanoTime() < nextFrameTime && !paused) {
+                sys.sleep(1)
+            }
+
+            if (!paused) {
+                let uploadStart = sys.nanoTime()
+                graphics.uploadRGBToFramebuffer(CURRENT_RGB_ADDR, header.width, header.height, trueFrameCount, false)
+                uploadTime = (sys.nanoTime() - uploadStart) / 1000000.0
+
+                // Apply bias lighting
+                let biasStart = sys.nanoTime()
+                setBiasLighting()
+                biasTime = (sys.nanoTime() - biasStart) / 1000000.0
+
+                // Fire audio on first frame
+                if (!audioFired) {
+                    audio.play(0)
+                    audioFired = true
+                }
+
+                frameCount++
+                trueFrameCount++
+                iframeReady = false
+
+                // Swap ping-pong buffers for next frame
+                let temp = CURRENT_RGB_ADDR
+                CURRENT_RGB_ADDR = PREV_RGB_ADDR
+                PREV_RGB_ADDR = temp
+
+                // Schedule next frame
+                nextFrameTime += (frametime)  // frametime is in nanoseconds from header
+
+                // Log performance data every 60 frames
+                if (frameCount % 60 == 0) {
+                    console.log(`Frame ${frameCount}: Upload=${uploadTime.toFixed(1)}ms, Bias=${biasTime.toFixed(1)}ms`)
+                }
+            }
+        }
+
         // Step 2 & 3: Display current GOP frame if it's time
         if (!paused && currentGopSize > 0 && currentGopFrameIndex < currentGopSize) {
             // Spin-wait for next frame time
@@ -1731,6 +1758,13 @@ try {
             gui.printTopBar(guiStatus, 1)
         }
 
+
+        debugPrintAkku += (t2 - t1)
+        if (debugPrintAkku > 5000000000) {
+            debugPrintAkku -= 5000000000
+            serial.println(`[PLAYTAV] decoding time = ${(decodeTime).toFixed(2)} ms`)
+        }
+
         // Small sleep to prevent 100% CPU and control loop rate
         // Allows continuous packet reading while maintaining proper frame timing
         sys.sleep(1)
diff --git a/terranmon.txt b/terranmon.txt
index 54ab528..7b2a181 100644
--- a/terranmon.txt
+++ b/terranmon.txt
@@ -932,7 +932,7 @@ transmission capability, and region-of-interest coding.
             - 6-7 = Reserved/invalid (would indicate no luma and no chroma)
     uint8  Entropy Coder
             - 0 = Twobit-plane significance map
-            - 1 = Embedded Zero Block Coding
+            - 1 = Embedded Zero Block Coding (EZBC, experimental)
     uint8  Reserved[2]: fill with zeros
     uint8  Device Orientation
             - 0 = No rotation
diff --git a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt
index 338e381..a86d575 100644
--- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt
+++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt
@@ -4520,15 +4520,13 @@ class GraphicsJSR223Delegate(private val vm: VM) {
         // Read entropy coder from header: 0 = Twobit-map, 1 = EZBC
         val isEZBC = (entropyCoder == 1)
 
-        /*if (isEZBC) {
-            println("[AUTO] Using EZBC decoder")
+        if (isEZBC) {
             postprocessCoefficientsEZBC(compressedData, compressedOffset, coeffCount,
                                        channelLayout, outputY, outputCo, outputCg, outputAlpha)
         } else {
-            println("[AUTO] Using twobit-map decoder")
             postprocessCoefficientsVariableLayout(compressedData, compressedOffset, coeffCount,
                                                  channelLayout, outputY, outputCo, outputCg, outputAlpha)
-        }*/
+        }
 
         return isEZBC
     }
@@ -5323,7 +5321,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
 
         // First, we need to determine the size of compressed data for each channel
         // Read a large buffer to work with significance map format
-        val maxPossibleSize = coeffCount * 3 * 2 + (coeffCount + 7) / 8 * 3  // Worst case: original size + maps
+        val maxPossibleSize = coeffCount * 4 * 2 + (coeffCount + 7) / 8 * 4  // Worst case: original size + maps
         val coeffBuffer = ByteArray(maxPossibleSize)
         UnsafeHelper.memcpyRaw(null, vm.usermem.ptr + ptr, coeffBuffer, UnsafeHelper.getArrayOffset(coeffBuffer), maxPossibleSize.toLong())
 
@@ -6214,7 +6212,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
     }
     // normal/strong sharpen filters make horizontal/vertical hairline artefacts
 
-    private val TavSharpenLuma = TavSharpenWeak
+    private val TavSharpenLuma = TavNullFilter
 
     private object TavNullFilter : TavWaveletFilter {
         override fun getCoeffMultiplier(level: Int): Float = 1.0f
@@ -6247,7 +6245,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
                     if (coeff > maxCoeff) maxCoeff = coeff
                     if (coeff > 0.1f) nonzeroCoeff++
                 }
-                println("[IDWT-LEVEL-$level] BEFORE: ${currentWidth}x${currentHeight}, max=${maxCoeff.toInt()}, nonzero=$nonzeroCoeff/$sampleSize")
+//                println("[IDWT-LEVEL-$level] BEFORE: ${currentWidth}x${currentHeight}, max=${maxCoeff.toInt()}, nonzero=$nonzeroCoeff/$sampleSize")
             }
 
             // Apply inverse DWT to current subband region - EXACT match to encoder
@@ -7101,12 +7099,10 @@ class GraphicsJSR223Delegate(private val vm: VM) {
         temporalLevels: Int,
         spatialFilter: Int
     ) {
-        if (numFrames < 2) return
-
         val numPixels = width * height
-        val temporalLine = FloatArray(numFrames)
 
         // Step 1: Apply inverse 2D spatial DWT to each temporal subband (each frame)
+        // This is required even for single frames (I-frames) to convert from DWT coefficients to pixel space
         for (t in 0 until numFrames) {
             tavApplyDWTInverseMultiLevel(
                 gopData[t], width, height,
@@ -7116,6 +7112,10 @@ class GraphicsJSR223Delegate(private val vm: VM) {
         }
 
         // Step 2: Apply inverse temporal DWT to each spatial location
+        // Only needed for GOPs with multiple frames (skip for I-frames)
+        if (numFrames < 2) return
+
+        val temporalLine = FloatArray(numFrames)
         for (y in 0 until height) {
             for (x in 0 until width) {
                 val pixelIdx = y * width + x
diff --git a/video_encoder/encoder_tav.c b/video_encoder/encoder_tav.c
index 92789fc..6ea4d51 100644
--- a/video_encoder/encoder_tav.c
+++ b/video_encoder/encoder_tav.c
@@ -18,7 +18,7 @@
 #include <float.h>
 #include <fftw3.h>
 
-#define ENCODER_VENDOR_STRING "Encoder-TAV 20251022 (3d-dwt,ezbc)"
+#define ENCODER_VENDOR_STRING "Encoder-TAV 20251023 (3d-dwt)"
 
 // TSVM Advanced Video (TAV) format constants
 #define TAV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x56"  // "\x1FTSVM TAV"
@@ -118,7 +118,7 @@ static int needs_alpha_channel(int channel_layout) {
 #define DEFAULT_HEIGHT 448
 #define DEFAULT_FPS 30
 #define DEFAULT_QUALITY 3
-#define DEFAULT_ZSTD_LEVEL 3
+#define DEFAULT_ZSTD_LEVEL 15
 #define DEFAULT_PCM_ZSTD_LEVEL 3
 #define TEMPORAL_GOP_SIZE 20
 #define TEMPORAL_GOP_SIZE_MIN 8 // Minimum GOP size to avoid decoder hiccups
@@ -2270,7 +2270,7 @@ static void show_usage(const char *program_name) {
     printf("  -c, --channel-layout N  Channel layout: 0=Y-Co-Cg, 1=Y-Co-Cg-A, 2=Y-only, 3=Y-A, 4=Co-Cg, 5=Co-Cg-A (default: 0)\n");
     printf("  -a, --arate N           MP2 audio bitrate in kbps (overrides quality-based audio rate)\n");
     printf("                          Valid values: 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384\n");
-    printf("  --separate-audio-track  Write entire MP2 file as single packet 0x40 (instead of interleaved)\n");
+//    printf("  --separate-audio-track  Write entire audio track as single packet instead of interleaved\n");
     printf("  --pcm8-audio            Use 8-bit PCM audio instead of MP2 (TSVM native audio format)\n");
     printf("  -S, --subtitles FILE    SubRip (.srt) or SAMI (.smi) subtitle file\n");
     printf("  --fontrom-lo FILE       Low font ROM file for internationalised subtitles\n");
@@ -2281,9 +2281,9 @@ static void show_usage(const char *program_name) {
     printf("  --intra-only            Disable delta and skip encoding\n");
     printf("  --enable-delta          Enable delta encoding\n");
     printf("  --delta-haar N          Apply N-level Haar DWT to delta coefficients (1-6, auto-enables delta)\n");
-    printf("  --temporal-dwt          Enable temporal 3D DWT (GOP-based encoding with temporal transform)\n");
-    printf("  --mc-ezbc               Enable MC-EZBC block-based motion compensation (requires --temporal-dwt)\n");
-    printf("  --ezbc                  Enable EZBC (Embedded Zero Block Coding) for significance maps\n");
+    printf("  --3d-dwt                Enable temporal 3D DWT (GOP-based encoding with temporal transform)\n");
+    printf("  --mc-ezbc               Enable MC-EZBC block-based motion compensation (requires --temporal-dwt, implies --ezbc)\n");
+    printf("  --ezbc                  Enable EZBC (Embedded Zero Block Coding) entropy coding\n");
     printf("  --ictcp                 Use ICtCp colour space instead of YCoCg-R (use when source is in BT.2100)\n");
     printf("  --no-perceptual-tuning  Disable perceptual quantisation\n");
     printf("  --no-dead-zone          Disable dead-zone quantisation (for comparison/testing)\n");
@@ -2350,7 +2350,7 @@ static tav_encoder_t* create_encoder(void) {
     enc->intra_only = 0;
     enc->monoblock = 1;  // Default to monoblock mode
     enc->perceptual_tuning = 1;  // Default to perceptual quantisation (versions 5/6)
-    enc->enable_ezbc = 1;  // Default to EZBC over twobit-map
+    enc->enable_ezbc = 0;  // default to twobit-map as EZBC+Zstd 3 = Twobitmap+Zstd 15, and Twobitmap is faster to decode
     enc->channel_layout = CHANNEL_LAYOUT_YCOCG;  // Default to Y-Co-Cg
     enc->audio_bitrate = 0;  // 0 = use quality table
     enc->encode_limit = 0;  // Default: no frame limit
@@ -9435,6 +9435,8 @@ int main(int argc, char *argv[]) {
         {"delta-haar", required_argument, 0, 1018},
         {"temporal-dwt", no_argument, 0, 1019},
         {"temporal-3d", no_argument, 0, 1019},
+        {"dwt-3d", no_argument, 0, 1019},
+        {"3d-dwt", no_argument, 0, 1019},
         {"mc-ezbc", no_argument, 0, 1020},
         {"residual-coding", no_argument, 0, 1021},
         {"adaptive-blocks", no_argument, 0, 1022},
@@ -9616,6 +9618,7 @@ int main(int argc, char *argv[]) {
                 break;
             case 1020: // --mc-ezbc
                 enc->temporal_enable_mcezbc = 1;
+                enc->enable_ezbc = 1;
                 printf("MC-EZBC block-based motion compensation enabled (requires --temporal-dwt)\n");
                 break;
             case 1021: // --residual-coding
@@ -10009,6 +10012,16 @@ int main(int argc, char *argv[]) {
         // Choose encoding path based on configuration
         size_t packet_size = 0;
 
+        // For GOP encoding, audio/subtitles are handled in gop_flush() for all GOP frames
+        // For traditional encoding, process audio/subtitles for this single frame
+        if (!enc->enable_temporal_dwt) {
+            // Process audio for this frame
+            process_audio(enc, true_frame_count, enc->output_fp);
+
+            // Process subtitles for this frame
+            process_subtitles(enc, true_frame_count, enc->output_fp);
+        }
+
         if (enc->enable_temporal_dwt) {
             // GOP-based temporal 3D DWT encoding path
 
@@ -10177,7 +10190,8 @@ int main(int argc, char *argv[]) {
                 // Note: packet_size might already be > 0 from scene change flush above
                 packet_size = 0;
             }
-        } else if (enc->enable_residual_coding) {
+        }
+        else if (enc->enable_residual_coding) {
             // MPEG-style residual coding path (I/P/B frames with motion compensation)
             // Get quantiser (use adjusted quantiser from bitrate control if applicable)
             int qY = enc->bitrate_mode ? quantiser_float_to_int_dithered(enc) : enc->quantiser_y;
@@ -10344,7 +10358,8 @@ int main(int argc, char *argv[]) {
                     }
                 }
             }
-        } else {
+        }
+        else {
             // Traditional 2D DWT encoding path (no temporal transform, no motion compensation)
             uint8_t packet_type = is_keyframe ? TAV_PACKET_IFRAME : TAV_PACKET_PFRAME;
             packet_size = compress_and_write_frame(enc, packet_type);
@@ -10368,16 +10383,6 @@ int main(int argc, char *argv[]) {
                 adjust_quantiser_for_bitrate(enc);
             }
 
-            // For GOP encoding, audio/subtitles are handled in gop_flush() for all GOP frames
-            // For traditional encoding, process audio/subtitles for this single frame
-            if (!enc->enable_temporal_dwt) {
-                // Process audio for this frame
-                process_audio(enc, true_frame_count, enc->output_fp);
-
-                // Process subtitles for this frame
-                process_subtitles(enc, true_frame_count, enc->output_fp);
-            }
-
             // Write a sync packet only after a video is been coded
             // For GOP encoding, GOP_SYNC packet already serves as sync - don't emit extra SYNC
             // For B-frame mode, sync packets are already written in the encoding loop