TAV: iS tHiS aN iMpRoVeMeNt¿

2026-06-06 13:38:30 +09:00 · 2025-10-16 09:24:21 +09:00
parent cc2f3e4d57
commit 0cf1173dd6
3 changed files with 110 additions and 46 deletions
--- a/assets/disk0/tvdos/bin/playtav.js
+++ b/assets/disk0/tvdos/bin/playtav.js
@@ -4,6 +4,8 @@
 // Usage: playtav moviefile.tav [options]
 // Options: -i (interactive)

+const MAXMEM = sys.maxmem()
+
 const WIDTH = 560
 const HEIGHT = 448
 const TAV_MAGIC = [0x1F, 0x54, 0x53, 0x56, 0x4D, 0x54, 0x41, 0x56] // "\x1FTSVM TAV"
@@ -1016,8 +1018,8 @@ try {

                // Check if GOP fits in VM memory
                const gopMemoryNeeded = gopSize * FRAME_SIZE
-                if (gopMemoryNeeded > 8 * 1024 * 1024) {
-                    throw new Error(`GOP too large: ${gopSize} frames needs ${(gopMemoryNeeded / 1024 / 1024).toFixed(2)}MB, but VM has only 8MB. Max GOP size: 11 frames.`)
+                if (gopMemoryNeeded > MAXMEM) {
+                    throw new Error(`GOP too large: ${gopSize} frames needs ${(gopMemoryNeeded / 1048576).toFixed(2)}MB, but VM has only ${(MAXMEM / 1048576).toFixed(1)}MB. Max GOP size: 11 frames for 8MB system.`)
                }

                // Allocate GOP buffers outside try block so finally can free them
@@ -1037,7 +1039,7 @@ try {
                    let decodeStart = sys.nanoTime()

                    // Call GOP decoder
-                    const framesDecoded = graphics.tavDecodeGopUnified(
+                    const [r1, r2] = graphics.tavDecodeGopUnified(
                        compressedPtr,
                        compressedSize,
                        gopSize,
@@ -1056,14 +1058,18 @@ try {
                        2  // temporalLevels (hardcoded for now, could be in header)
                    )

+                    const framesDecoded = r1
+                    decoderDbgInfo = r2
+
                    decodeTime = (sys.nanoTime() - decodeStart) / 1000000.0
                    decompressTime = 0  // Included in decode time

-                    // Display each decoded frame
+                    // Display each decoded frame with proper timing
                    for (let i = 0; i < framesDecoded; i++) {
-                        let uploadStart = sys.nanoTime()
+                        let frameStart = sys.nanoTime()
+                        let uploadStart = frameStart

-                        // Upload GOP frame directly (no copy needed - already in ARGB format)
+                        // Upload GOP frame directly (no copy needed - already in RGB24 format)
                        graphics.uploadRGBToFramebuffer(gopRGBBuffers[i], header.width, header.height, trueFrameCount + i, false)
                        uploadTime = (sys.nanoTime() - uploadStart) / 1000000.0

@@ -1080,14 +1086,27 @@ try {
                            audioFired = true
                        }

-                        // Wait for frame timing
-                        akku -= FRAME_TIME
-                        while (akku < 0 && !stopPlay && !paused) {
-                            let t = sys.nanoTime()
-                            // Busy wait for accurate timing
-                            akku += (sys.nanoTime() - t) / 1000000000.0
+                        // Calculate how much time we've used so far for this frame
+                        let frameElapsed = (sys.nanoTime() - frameStart) / 1000000000.0
+
+                        // Wait for the remainder of FRAME_TIME (busy wait for accurate timing)
+                        let waitNeeded = FRAME_TIME - frameElapsed
+                        if (waitNeeded > 0) {
+                            let waitStart = sys.nanoTime()
+                            while ((sys.nanoTime() - waitStart) / 1000000000.0 < waitNeeded && !stopPlay && !paused) {
+                                sys.sleep(0) // Busy wait
+                            }
                        }

+                        // Update global time tracking to keep main loop synchronized
+                        let frameEnd = sys.nanoTime()
+                        let frameTotalTime = (frameEnd - frameStart) / 1000000000.0
+                        akku2 += frameTotalTime
+                        t1 = frameEnd  // Keep t1 synchronized with actual time
+
+                        frameCount++
+                        trueFrameCount++
+
                        // Swap ping-pong buffers for P-frame reference
                        let temp = CURRENT_RGB_ADDR
                        CURRENT_RGB_ADDR = PREV_RGB_ADDR
@@ -1124,7 +1143,7 @@ try {
                            e.printStackTrace()
                            let ee = e.getStackTrace()
                            console.log(ee.length)
-                            console.log(ee.join('\n'))
+                            console.log(ee.slice(0, 10).join('\n'))
                        }
                    } catch (ex) {}
                } finally {
--- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt
+++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt
@@ -4542,6 +4542,13 @@ class GraphicsJSR223Delegate(private val vm: VM) {

        val framesPerSubband = numFrames shr temporalLevels  // numFrames / 2^temporalLevels

+        // Safety check: ensure we have enough frames for the temporal levels
+        // Minimum frames needed = 2^temporalLevels
+        if (framesPerSubband == 0) {
+            // Not enough frames for this many temporal levels - treat all as base level
+            return 0
+        }
+
        // Determine which temporal subband this frame belongs to
        val subbandIdx = frameIdx / framesPerSubband

@@ -4863,6 +4870,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
                        0x00 -> { // TAV_MODE_SKIP
                            // Copy 280x224 tile from previous frame to current frame
                            tavCopyTileRGB(tileX, tileY, currentRGBAddr, prevRGBAddr, width, height)
+                            println("SKIP tile at frame $frameCount")
                            dbgOut["frameMode"] = "S"
                        }
                        0x01 -> { // TAV_MODE_INTRA
@@ -4870,7 +4878,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
                            readPtr = tavDecodeDWTIntraTileRGB(qIndex, qYGlobal, channelLayout, readPtr, tileX, tileY, currentRGBAddr,
                                                          width, height, qY, qCo, qCg,
                                                          waveletFilter, decompLevels, isLossless, tavVersion, isMonoblock, frameCount)
-                            dbgOut["frameMode"] = " "
+                            dbgOut["frameMode"] = "I"
                        }
                        0x02 -> { // TAV_MODE_DELTA (with optional Haar wavelet)
                            // Coefficient delta encoding for efficient P-frames
@@ -6275,7 +6283,13 @@ class GraphicsJSR223Delegate(private val vm: VM) {
        spatialFilter: Int = 1,
        spatialLevels: Int = 6,
        temporalLevels: Int = 2
-    ): Int {
+    ): Array<Any> {
+        val dbgOut = HashMap<String, Any>()
+        dbgOut["qY"] = qYGlobal
+        dbgOut["qCo"] = qCoGlobal
+        dbgOut["qCg"] = qCgGlobal
+        dbgOut["frameMode"] = "G"
+
        val numPixels = width * height

        // Step 1: Decompress unified GOP block
@@ -6294,7 +6308,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
            }
        } catch (e: Exception) {
            println("ERROR: Zstd decompression failed: ${e.message}")
-            return 0
+            return arrayOf(0, dbgOut)
        }

        // Step 2: Postprocess unified block to per-frame coefficients
@@ -6318,10 +6332,10 @@ class GraphicsJSR223Delegate(private val vm: VM) {
            val temporalLevel = getTemporalSubbandLevel(t, gopSize, temporalLevels)
            val temporalScale = getTemporalQuantizerScale(temporalLevel)

-            // Apply temporal scaling to base quantizers
-            val baseQY = (qYGlobal * temporalScale).coerceIn(1.0f, 255.0f)
-            val baseQCo = (qCoGlobal * temporalScale).coerceIn(1.0f, 255.0f)
-            val baseQCg = (qCgGlobal * temporalScale).coerceIn(1.0f, 255.0f)
+            // Apply temporal scaling to base quantizers for each channel
+            val baseQY = (qYGlobal * temporalScale).coerceIn(1.0f, 4096.0f)
+            val baseQCo = (qCoGlobal * temporalScale).coerceIn(1.0f, 4096.0f)
+            val baseQCg = (qCgGlobal * temporalScale).coerceIn(1.0f, 4096.0f)

            // Use existing perceptual dequantization for spatial weighting
            dequantiseDWTSubbandsPerceptual(
@@ -6389,7 +6403,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
            }
        }

-        return gopSize
+        return arrayOf(gopSize, dbgOut)
    }

    // Biorthogonal 13/7 wavelet inverse 1D transform
--- a/video_encoder/encoder_tav.c
+++ b/video_encoder/encoder_tav.c
@@ -17,7 +17,7 @@
 #include <float.h>
 #include <fftw3.h>

-#define ENCODER_VENDOR_STRING "Encoder-TAV 20251015"
+#define ENCODER_VENDOR_STRING "Encoder-TAV 20251016"

 // TSVM Advanced Video (TAV) format constants
 #define TAV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x56"  // "\x1FTSVM TAV"
@@ -652,6 +652,9 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
                       int *frame_numbers, int actual_gop_size);
 static size_t gop_process_and_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
                                   int *frame_numbers, int force_flush);
+static size_t serialise_tile_data(tav_encoder_t *enc, int tile_x, int tile_y,
+                                  const float *tile_y_data, const float *tile_co_data, const float *tile_cg_data,
+                                  uint8_t mode, uint8_t *buffer);
 static void dwt_2d_forward_flexible(float *tile_data, int width, int height, int levels, int filter_type);
 static void dwt_2d_haar_inverse_flexible(float *tile_data, int width, int height, int levels);
 static void quantise_dwt_coefficients_perceptual_per_coeff(tav_encoder_t *enc,
@@ -1660,7 +1663,7 @@ static int gop_should_flush_motion(tav_encoder_t *enc) {
    return 0;
 }

-// Flush GOP: apply 3D DWT, quantize, serialize, and write to output
+// Flush GOP: apply 3D DWT, quantize, serialise, and write to output
 // Returns number of bytes written, or 0 on error
 // This function processes the entire GOP and writes all frames with temporal 3D DWT
 static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
@@ -1721,14 +1724,25 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
        free(aligned_cg);
    }

-    // Step 1: Apply 3D DWT (temporal + spatial) to each channel
-    // Note: This modifies gop_*_coeffs in-place
-    dwt_3d_forward(gop_y_coeffs, enc->width, enc->height, actual_gop_size,
-                   enc->decomp_levels, enc->temporal_decomp_levels, enc->wavelet_filter);
-    dwt_3d_forward(gop_co_coeffs, enc->width, enc->height, actual_gop_size,
-                   enc->decomp_levels, enc->temporal_decomp_levels, enc->wavelet_filter);
-    dwt_3d_forward(gop_cg_coeffs, enc->width, enc->height, actual_gop_size,
-                   enc->decomp_levels, enc->temporal_decomp_levels, enc->wavelet_filter);
+    // Step 1: For single-frame GOP, skip temporal DWT and use traditional I-frame path
+    if (actual_gop_size == 1) {
+        // Apply only 2D spatial DWT (no temporal transform for single frame)
+        dwt_2d_forward_flexible(gop_y_coeffs[0], enc->width, enc->height,
+                              enc->decomp_levels, enc->wavelet_filter);
+        dwt_2d_forward_flexible(gop_co_coeffs[0], enc->width, enc->height,
+                              enc->decomp_levels, enc->wavelet_filter);
+        dwt_2d_forward_flexible(gop_cg_coeffs[0], enc->width, enc->height,
+                              enc->decomp_levels, enc->wavelet_filter);
+    } else {
+        // Multi-frame GOP: Apply 3D DWT (temporal + spatial) to each channel
+        // Note: This modifies gop_*_coeffs in-place
+        dwt_3d_forward(gop_y_coeffs, enc->width, enc->height, actual_gop_size,
+                       enc->decomp_levels, enc->temporal_decomp_levels, enc->wavelet_filter);
+        dwt_3d_forward(gop_co_coeffs, enc->width, enc->height, actual_gop_size,
+                       enc->decomp_levels, enc->temporal_decomp_levels, enc->wavelet_filter);
+        dwt_3d_forward(gop_cg_coeffs, enc->width, enc->height, actual_gop_size,
+                       enc->decomp_levels, enc->temporal_decomp_levels, enc->wavelet_filter);
+    }

    // Step 2: Allocate quantized coefficient buffers
    int16_t **quant_y = malloc(actual_gop_size * sizeof(int16_t*));
@@ -1742,12 +1756,17 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
    }

    // Step 3: Quantize 3D DWT coefficients with temporal-spatial quantization
+    // Use channel-specific quantizers from encoder settings
+    int qY = base_quantiser;  // Y quantizer passed as parameter
+    int qCo = QLUT[enc->quantiser_co];  // Co quantizer from encoder
+    int qCg = QLUT[enc->quantiser_cg];  // Cg quantizer from encoder
+
    quantise_3d_dwt_coefficients(enc, gop_y_coeffs, quant_y, actual_gop_size,
-                                 num_pixels, base_quantiser, 0);  // Luma
+                                 num_pixels, qY, 0);  // Luma
    quantise_3d_dwt_coefficients(enc, gop_co_coeffs, quant_co, actual_gop_size,
-                                 num_pixels, base_quantiser, 1);  // Chroma Co
+                                 num_pixels, qCo, 1);  // Chroma Co
    quantise_3d_dwt_coefficients(enc, gop_cg_coeffs, quant_cg, actual_gop_size,
-                                 num_pixels, base_quantiser, 1);  // Chroma Cg
+                                 num_pixels, qCg, 1);  // Chroma Cg

    // Step 4: Preprocessing and compression
    size_t total_bytes_written = 0;
@@ -1755,20 +1774,26 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
    // Write timecode packet for first frame in GOP
    write_timecode_packet(output, frame_numbers[0], enc->output_fps, enc->is_ntsc_framerate);

-    // Single-frame GOP fallback: use traditional I-frame encoding
+    // Single-frame GOP fallback: use traditional I-frame encoding with serialise_tile_data
    if (actual_gop_size == 1) {
        // Write I-frame packet header (no motion vectors, no GOP overhead)
        uint8_t packet_type = TAV_PACKET_IFRAME;
        fwrite(&packet_type, 1, 1, output);
        total_bytes_written += 1;

-        // Preprocess single frame using standard variable layout
-        size_t max_preprocessed_size = (num_pixels * 3 * 2 + 7) / 8 + (num_pixels * 3 * sizeof(int16_t));
-        uint8_t *preprocessed_buffer = malloc(max_preprocessed_size);
+        // Allocate buffer for uncompressed tile data
+        // Use same format as compress_and_write_frame: serialise_tile_data
+        const size_t max_tile_size = 4 + (num_pixels * 3 * sizeof(int16_t));
+        uint8_t *uncompressed_buffer = malloc(max_tile_size);

-        size_t preprocessed_size = preprocess_coefficients_variable_layout(
-            quant_y[0], quant_co[0], quant_cg[0], NULL,
-            num_pixels, enc->channel_layout, preprocessed_buffer);
+        // Use serialise_tile_data with DWT-transformed float coefficients (before quantization)
+        // This matches the traditional I-frame path in compress_and_write_frame
+        size_t tile_size = serialise_tile_data(enc, 0, 0,
+                                               gop_y_coeffs[0], gop_co_coeffs[0], gop_cg_coeffs[0],
+                                               TAV_MODE_INTRA, uncompressed_buffer);
+
+        size_t preprocessed_size = tile_size;
+        uint8_t *preprocessed_buffer = uncompressed_buffer;

        // Compress with Zstd
        size_t max_compressed_size = ZSTD_compressBound(preprocessed_size);
@@ -1809,6 +1834,11 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser,
        free(preprocessed_buffer);
        free(compressed_buffer);

+        // Write SYNC packet after single-frame GOP I-frame
+        uint8_t sync_packet = TAV_PACKET_SYNC;
+        fwrite(&sync_packet, 1, 1, output);
+        total_bytes_written += 1;
+
        if (enc->verbose) {
            printf("Frame %d (single-frame GOP as I-frame): %zu bytes\n",
                   frame_numbers[0], compressed_size);
@@ -5651,8 +5681,11 @@ int main(int argc, char *argv[]) {
            process_subtitles(enc, true_frame_count, enc->output_fp);

            // Write a sync packet only after a video is been coded
-            uint8_t sync_packet = TAV_PACKET_SYNC;
-            fwrite(&sync_packet, 1, 1, enc->output_fp);
+            // For GOP encoding, GOP_SYNC packet already serves as sync - don't emit extra SYNC
+            if (!enc->enable_temporal_dwt) {
+                uint8_t sync_packet = TAV_PACKET_SYNC;
+                fwrite(&sync_packet, 1, 1, enc->output_fp);
+            }

            // NTSC frame duplication: emit extra sync packet for every 1000n+500 frames
            if (enc->is_ntsc_framerate && (frame_count % 1000 == 500)) {
@@ -5709,9 +5742,7 @@ int main(int argc, char *argv[]) {
        if (final_packet_size == 0) {
            fprintf(stderr, "Warning: Failed to flush final GOP frames\n");
        } else {
-            // Write sync packet after final GOP
-            uint8_t sync_packet = TAV_PACKET_SYNC;
-            fwrite(&sync_packet, 1, 1, enc->output_fp);
+            // GOP_SYNC packet already written by gop_process_and_flush - no additional SYNC needed
            printf("Final GOP flushed successfully (%zu bytes)\n", final_packet_size);
        }
    }