tav: skip frame coding

2026-06-21 19:54:04 +09:00 · 2025-10-08 11:09:06 +09:00
parent d08511a39d
commit 67445b040c
6 changed files with 133 additions and 25 deletions
--- a/assets/disk0/tvdos/bin/playtav.js
+++ b/assets/disk0/tvdos/bin/playtav.js
@@ -890,6 +890,7 @@ try {
                videoRate: getVideoRate(),
                frameCount: frameCount,
                totalFrames: header.totalFrames,
+                frameMode: decoderDbgInfo.frameMode,
                qY: decoderDbgInfo.qY,
                qCo: decoderDbgInfo.qCo,
                qCg: decoderDbgInfo.qCg,
--- a/assets/disk0/tvdos/bin/playtev.js
+++ b/assets/disk0/tvdos/bin/playtev.js
@@ -584,6 +584,7 @@ function rotateFieldBuffers() {
 }

 let frameDuped = false
+let currentFrameType = "I"

 // Main decoding loop - simplified for performance
 try {
@@ -712,6 +713,8 @@ try {
                    serial.println(`Frame ${frameCount}: Decompress=${decompressTime.toFixed(1)}ms, Decode=${decodeTime.toFixed(1)}ms, Upload=${uploadTime.toFixed(1)}ms, Bias=${biasTime.toFixed(1)}ms, Total=${totalTime.toFixed(1)}ms`)
                }

+                currentFrameType = packetType == TEV_PACKET_IFRAME ? "I" : "P"
+
            } else if (packetType == TEV_PACKET_AUDIO_MP2) {
                // MP2 Audio packet
                let audioLen = seqread.readInt()
@@ -756,6 +759,7 @@ try {
                videoRate: getVideoRate(),
                frameCount: frameCount,
                totalFrames: totalFrames,
+                frameMode: currentFrameType,
                qY: qualityY,
                qCo: qualityCo,
                qCg: qualityCg,
--- a/assets/disk0/tvdos/include/playgui.mjs
+++ b/assets/disk0/tvdos/include/playgui.mjs
@@ -204,7 +204,10 @@ status = {
    frameCount: int,
    totalFrames: int,
    fps: int,
+    frameMode: String,
    qY: int,
+    qCo: int,
+    qCg: int,
    akku: float,
    fileName: String,
    fileOrd: int,
@@ -247,7 +250,7 @@ function printTopBar(status, moreInfo) {
    if (moreInfo) {
        let filename = status.fileName.split("\\").pop()

-        let sF = `F ${(''+status.frameCount).padStart((''+status.totalFrames).length, ' ')}/${status.totalFrames}`
+        let sF = `F ${(''+status.frameCount).padStart((''+status.totalFrames).length, ' ')}${status.frameMode}/${status.totalFrames}`
        let sQ = `Q${(''+status.qY).padStart(4,' ')},${(''+status.qCo).padStart(2,' ')},${(''+status.qCg).padStart(2,' ')}`
        let sFPS = `${(status.frameCount / status.akku).toFixed(2)}f`
        let sRes = `${status.resolution}`
--- a/terranmon.txt
+++ b/terranmon.txt
@@ -944,7 +944,7 @@ transmission capability, and region-of-interest coding.

 ## Packet Types
    0x10: I-frame (intra-coded frame)
-    0x11: P-frame (delta-coded frame)
+    0x11: P-frame (delta/skip frame)
    0x1F: (prohibited)
    0x20: MP2 audio packet
    0x30: Subtitle in "Simple" format
@@ -1005,6 +1005,7 @@ transmission capability, and region-of-interest coding.

 ## Block Data (per frame)
    uint8  Mode: encoding mode
+            0x00 = SKIP (just use frame data from previous frame)
            0x01 = INTRA (DWT-coded)
            0x02 = DELTA (DWT delta)
    uint8  Quantiser override Y  (uses exponential numeric system; stored with index bias of 1 (127->252, 255->4032); use 0 to disable overriding)
--- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt
+++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt
@@ -4535,6 +4535,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
                    dbgOut["qY"] = qY
                    dbgOut["qCo"] = qCo
                    dbgOut["qCg"] = qCg
+                    dbgOut["frameMode"] = ""

                    // debug print: raw decompressed bytes
                    /*print("TAV Decode raw bytes (Frame $frameCount, mode: ${arrayOf("SKIP", "INTRA", "DELTA")[mode]}): ")
@@ -4547,18 +4548,21 @@ class GraphicsJSR223Delegate(private val vm: VM) {
                        0x00 -> { // TAV_MODE_SKIP
                            // Copy 280x224 tile from previous frame to current frame
                            tavCopyTileRGB(tileX, tileY, currentRGBAddr, prevRGBAddr, width, height)
+                            dbgOut["frameMode"] = "S"
                        }
                        0x01 -> { // TAV_MODE_INTRA
                            // Decode DWT coefficients directly to RGB buffer
                            readPtr = tavDecodeDWTIntraTileRGB(qIndex, qYGlobal, channelLayout, readPtr, tileX, tileY, currentRGBAddr,
                                                          width, height, qY, qCo, qCg,
                                                          waveletFilter, decompLevels, isLossless, tavVersion, isMonoblock, filmGrainLevel)
+                            dbgOut["frameMode"] = " "
                        }
                        0x02 -> { // TAV_MODE_DELTA
                            // Coefficient delta encoding for efficient P-frames
                            readPtr = tavDecodeDeltaTileRGB(readPtr, channelLayout, tileX, tileY, currentRGBAddr,
                                                      width, height, qY, qCo, qCg,
                                                      waveletFilter, decompLevels, isLossless, tavVersion, isMonoblock, filmGrainLevel)
+                            dbgOut["frameMode"] = " "
                        }
                    }
                }
@@ -4674,7 +4678,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
            dequantiseDWTSubbandsPerceptual(qIndex, qYGlobal, quantisedCo, coTile, subbands, qCo.toFloat(), true, decompLevels)
            dequantiseDWTSubbandsPerceptual(qIndex, qYGlobal, quantisedCg, cgTile, subbands, qCg.toFloat(), true, decompLevels)

-            // Apply spooky noise filter if enabled
+            // Apply film grain filter if enabled
            if (filmGrainLevel > 0) {
                val random = java.util.Random()
                for (i in 0 until coeffCount) {
@@ -4740,7 +4744,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
                cgTile[i] = quantisedCg[i] * qCg.toFloat()
            }

-            // Apply spooky noise filter if enabled
+            // Apply film grain filter if enabled
            if (filmGrainLevel > 0) {
                val random = java.util.Random()
                for (i in 0 until coeffCount) {
@@ -5322,7 +5326,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
            currentCg[i] = prevCg[i] + (deltaCg[i].toFloat() * qCg)
        }

-        // Apply spooky noise filter if enabled
+        // Apply film grain filter if enabled
        if (filmGrainLevel > 0) {
            val random = java.util.Random()
            for (i in 0 until coeffCount) {
--- a/video_encoder/encoder_tav.c
+++ b/video_encoder/encoder_tav.c
@@ -329,6 +329,11 @@ typedef struct tav_encoder_s {
    float *previous_coeffs_alpha;  // Previous frame Alpha coefficients for all tiles
    int previous_coeffs_allocated; // Flag to track allocation

+    // Frame type tracking for SKIP mode
+    uint8_t last_frame_packet_type;  // Last emitted packet type (TAV_PACKET_IFRAME or TAV_PACKET_PFRAME)
+    int is_still_frame_cached;       // Cached result from detect_still_frame() for current frame
+    int used_skip_mode_last_frame;   // Set to 1 when SKIP mode was used (suppresses next keyframe timer)
+
    // Statistics
    size_t total_compressed_size;
    size_t total_uncompressed_size;
@@ -1882,6 +1887,9 @@ static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type)
    uint8_t *uncompressed_buffer = malloc(total_uncompressed_size);
    size_t uncompressed_offset = 0;

+    // Use cached still frame detection result (set in main loop)
+    int is_still_frame = enc->is_still_frame_cached;
+
    // Serialise all tiles
    for (int tile_y = 0; tile_y < enc->tiles_y; tile_y++) {
        for (int tile_x = 0; tile_x < enc->tiles_x; tile_x++) {
@@ -1889,8 +1897,17 @@ static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type)
            // Determine tile mode based on frame type, coefficient availability, and intra_only flag
            uint8_t mode;
            int is_keyframe = (packet_type == TAV_PACKET_IFRAME);
+
+            // SKIP mode condition matches main loop logic: still frame during SKIP run
+            int can_use_skip = is_still_frame && enc->previous_coeffs_allocated;
+
            if (is_keyframe || !enc->previous_coeffs_allocated) {
                mode = TAV_MODE_INTRA;  // I-frames, first frames, or intra-only mode always use INTRA
+            } else if (can_use_skip) {
+                mode = TAV_MODE_SKIP;   // Still frames in SKIP run use SKIP mode
+                if (enc->verbose && tile_x == 0 && tile_y == 0) {
+                    printf("  → Using SKIP mode (copying from reference I-frame)\n");
+                }
            } else {
                mode = TAV_MODE_DELTA;  // P-frames use coefficient delta encoding
            }
@@ -1909,6 +1926,8 @@ static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type)
            float *tile_co_data = malloc(tile_data_size * sizeof(float));
            float *tile_cg_data = malloc(tile_data_size * sizeof(float));

+            // Skip processing for SKIP mode - decoder will copy from reference
+            if (mode != TAV_MODE_SKIP) {
                if (enc->monoblock) {
                    // Extract entire frame (no padding)
                    memcpy(tile_y_data, enc->current_frame_y, tile_data_size * sizeof(float));
@@ -1918,6 +1937,7 @@ static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type)
                    // Extract padded tiles using context from neighbours
                    extract_padded_tile(enc, tile_x, tile_y, tile_y_data, tile_co_data, tile_cg_data);
                }
+            }

            // Debug: check input data before DWT
            /*if (tile_x == 0 && tile_y == 0) {
@@ -1941,7 +1961,8 @@ static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type)
                printf("DEBUG: Y data before DWT: max=%.2f, nonzero=%d/%d\n", max_y_before, nonzero_before, total_pixels);
            }*/

-            // Apply DWT transform to each channel
+            // Apply DWT transform to each channel (skip for SKIP mode)
+            if (mode != TAV_MODE_SKIP) {
                if (enc->monoblock) {
                    // Monoblock mode: transform entire frame
                    dwt_2d_forward_flexible(tile_y_data, enc->width, enc->height, enc->decomp_levels, enc->wavelet_filter);
@@ -1953,6 +1974,7 @@ static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type)
                    dwt_2d_forward_padded(tile_co_data, enc->decomp_levels, enc->wavelet_filter);
                    dwt_2d_forward_padded(tile_cg_data, enc->decomp_levels, enc->wavelet_filter);
                }
+            }

            // Debug: Check Y data after DWT transform for high-frequency content
            /*if (enc->frame_count == 120 && enc->verbose) {
@@ -1998,6 +2020,9 @@ static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type)
    enc->total_compressed_size += compressed_size;
    enc->total_uncompressed_size += uncompressed_offset;

+    // Track last frame type for SKIP mode eligibility
+    enc->last_frame_packet_type = packet_type;
+
    // Mark coefficient storage as available after first I-frame
    if (packet_type == TAV_PACKET_IFRAME) {
        enc->previous_coeffs_allocated = 1;
@@ -3352,6 +3377,52 @@ static int detect_scene_change(tav_encoder_t *enc) {
    return changed_ratio > threshold;
 }

+// Detect still frames (identical or nearly identical to previous frame)
+// Returns 1 if frame is still (suitable for SKIP mode), 0 otherwise
+static int detect_still_frame(tav_encoder_t *enc) {
+    if (!enc->current_frame_rgb || !enc->previous_frame_rgb || enc->intra_only) {
+        return 0; // No frame to compare or intra-only mode
+    }
+
+    long long total_diff = 0;
+    int changed_pixels = 0;
+
+    // Sample every 4th pixel for performance (same as scene change detection)
+    for (int y = 0; y < enc->height; y += 2) {
+        for (int x = 0; x < enc->width; x += 2) {
+            int offset = (y * enc->width + x) * 3;
+
+            // Calculate colour difference
+            int r_diff = abs(enc->current_frame_rgb[offset] - enc->previous_frame_rgb[offset]);
+            int g_diff = abs(enc->current_frame_rgb[offset + 1] - enc->previous_frame_rgb[offset + 1]);
+            int b_diff = abs(enc->current_frame_rgb[offset + 2] - enc->previous_frame_rgb[offset + 2]);
+
+            int pixel_diff = r_diff + g_diff + b_diff;
+            total_diff += pixel_diff;
+
+            // Count changed pixels with very low threshold (2 per channel average = 6 total)
+            if (pixel_diff > 6) {
+                changed_pixels++;
+            }
+        }
+    }
+
+    // Calculate metrics
+    int sampled_pixels = (enc->height / 2) * (enc->width / 2);
+    double avg_diff = (double)total_diff / sampled_pixels;
+    double changed_ratio = (double)changed_pixels / sampled_pixels;
+
+    if (enc->verbose) {
+        printf("Still frame detection: avg_diff=%.2f\tchanged_ratio=%.4f\n", avg_diff, changed_ratio);
+    }
+
+    // Extremely tight thresholds for still frame detection
+    // Designed to catch only truly static content (paused video, title cards)
+    // Rejects slow panning, gradual drawing, or any partial motion
+    // Frame is "still" only if less than 0.1% of pixels changed AND average difference < 0.5
+    return (changed_ratio < 0.00001 && avg_diff < 0.05);
+}
+
 // Main function
 int main(int argc, char *argv[]) {
    generate_random_filename(TEMP_AUDIO_FILE);
@@ -3813,7 +3884,31 @@ int main(int argc, char *argv[]) {
        // Determine frame type
        int is_scene_change = detect_scene_change(enc);
        int is_time_keyframe = (frame_count % KEYFRAME_INTERVAL) == 0;
-        int is_keyframe = enc->intra_only || is_time_keyframe || is_scene_change;
+
+        // Check if we can use SKIP mode
+        int is_still = detect_still_frame(enc);
+        enc->is_still_frame_cached = is_still;  // Cache for use in compress_and_write_frame
+
+        // SKIP mode can be used if:
+        // 1. Frame is still AND
+        // 2. Previous coeffs allocated AND
+        // 3. (Last frame was I-frame OR we're continuing a SKIP run)
+        int in_skip_run = enc->used_skip_mode_last_frame;
+        int can_use_skip = is_still &&
+                          enc->previous_coeffs_allocated &&
+                          (enc->last_frame_packet_type == TAV_PACKET_IFRAME || in_skip_run);
+
+        // During a SKIP run, suppress keyframe timer unless content changes enough to un-skip
+        // Un-skip threshold is the negation of SKIP threshold: content must change to break the run
+        int suppress_keyframe_timer = in_skip_run && is_still;
+
+        // Keyframe decision: intra-only mode, time-based (unless suppressed by SKIP run), or scene change
+        int is_keyframe = enc->intra_only ||
+                         (is_time_keyframe && !suppress_keyframe_timer) ||
+                         is_scene_change;
+
+        // Track if we'll use SKIP mode this frame (continues the SKIP run)
+        enc->used_skip_mode_last_frame = can_use_skip && !is_keyframe;

        // Verbose output for keyframe decisions
        /*if (enc->verbose && is_keyframe) {