diff --git a/assets/disk0/tvdos/bin/playtav.js b/assets/disk0/tvdos/bin/playtav.js index 437bb43..1c95e61 100644 --- a/assets/disk0/tvdos/bin/playtav.js +++ b/assets/disk0/tvdos/bin/playtav.js @@ -4,6 +4,8 @@ // Usage: playtav moviefile.tav [options] // Options: -i (interactive) +const MAXMEM = sys.maxmem() + const WIDTH = 560 const HEIGHT = 448 const TAV_MAGIC = [0x1F, 0x54, 0x53, 0x56, 0x4D, 0x54, 0x41, 0x56] // "\x1FTSVM TAV" @@ -1016,8 +1018,8 @@ try { // Check if GOP fits in VM memory const gopMemoryNeeded = gopSize * FRAME_SIZE - if (gopMemoryNeeded > 8 * 1024 * 1024) { - throw new Error(`GOP too large: ${gopSize} frames needs ${(gopMemoryNeeded / 1024 / 1024).toFixed(2)}MB, but VM has only 8MB. Max GOP size: 11 frames.`) + if (gopMemoryNeeded > MAXMEM) { + throw new Error(`GOP too large: ${gopSize} frames needs ${(gopMemoryNeeded / 1048576).toFixed(2)}MB, but VM has only ${(MAXMEM / 1048576).toFixed(1)}MB. Max GOP size: 11 frames for 8MB system.`) } // Allocate GOP buffers outside try block so finally can free them @@ -1037,7 +1039,7 @@ try { let decodeStart = sys.nanoTime() // Call GOP decoder - const framesDecoded = graphics.tavDecodeGopUnified( + const [r1, r2] = graphics.tavDecodeGopUnified( compressedPtr, compressedSize, gopSize, @@ -1056,14 +1058,18 @@ try { 2 // temporalLevels (hardcoded for now, could be in header) ) + const framesDecoded = r1 + decoderDbgInfo = r2 + decodeTime = (sys.nanoTime() - decodeStart) / 1000000.0 decompressTime = 0 // Included in decode time - // Display each decoded frame + // Display each decoded frame with proper timing for (let i = 0; i < framesDecoded; i++) { - let uploadStart = sys.nanoTime() + let frameStart = sys.nanoTime() + let uploadStart = frameStart - // Upload GOP frame directly (no copy needed - already in ARGB format) + // Upload GOP frame directly (no copy needed - already in RGB24 format) graphics.uploadRGBToFramebuffer(gopRGBBuffers[i], header.width, header.height, trueFrameCount + i, false) uploadTime = (sys.nanoTime() - uploadStart) / 1000000.0 @@ -1080,14 +1086,27 @@ try { audioFired = true } - // Wait for frame timing - akku -= FRAME_TIME - while (akku < 0 && !stopPlay && !paused) { - let t = sys.nanoTime() - // Busy wait for accurate timing - akku += (sys.nanoTime() - t) / 1000000000.0 + // Calculate how much time we've used so far for this frame + let frameElapsed = (sys.nanoTime() - frameStart) / 1000000000.0 + + // Wait for the remainder of FRAME_TIME (busy wait for accurate timing) + let waitNeeded = FRAME_TIME - frameElapsed + if (waitNeeded > 0) { + let waitStart = sys.nanoTime() + while ((sys.nanoTime() - waitStart) / 1000000000.0 < waitNeeded && !stopPlay && !paused) { + sys.sleep(0) // Busy wait + } } + // Update global time tracking to keep main loop synchronized + let frameEnd = sys.nanoTime() + let frameTotalTime = (frameEnd - frameStart) / 1000000000.0 + akku2 += frameTotalTime + t1 = frameEnd // Keep t1 synchronized with actual time + + frameCount++ + trueFrameCount++ + // Swap ping-pong buffers for P-frame reference let temp = CURRENT_RGB_ADDR CURRENT_RGB_ADDR = PREV_RGB_ADDR @@ -1124,7 +1143,7 @@ try { e.printStackTrace() let ee = e.getStackTrace() console.log(ee.length) - console.log(ee.join('\n')) + console.log(ee.slice(0, 10).join('\n')) } } catch (ex) {} } finally { diff --git a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt index bf0366a..cb96c43 100644 --- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt +++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt @@ -4542,6 +4542,13 @@ class GraphicsJSR223Delegate(private val vm: VM) { val framesPerSubband = numFrames shr temporalLevels // numFrames / 2^temporalLevels + // Safety check: ensure we have enough frames for the temporal levels + // Minimum frames needed = 2^temporalLevels + if (framesPerSubband == 0) { + // Not enough frames for this many temporal levels - treat all as base level + return 0 + } + // Determine which temporal subband this frame belongs to val subbandIdx = frameIdx / framesPerSubband @@ -4863,6 +4870,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { 0x00 -> { // TAV_MODE_SKIP // Copy 280x224 tile from previous frame to current frame tavCopyTileRGB(tileX, tileY, currentRGBAddr, prevRGBAddr, width, height) + println("SKIP tile at frame $frameCount") dbgOut["frameMode"] = "S" } 0x01 -> { // TAV_MODE_INTRA @@ -4870,7 +4878,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { readPtr = tavDecodeDWTIntraTileRGB(qIndex, qYGlobal, channelLayout, readPtr, tileX, tileY, currentRGBAddr, width, height, qY, qCo, qCg, waveletFilter, decompLevels, isLossless, tavVersion, isMonoblock, frameCount) - dbgOut["frameMode"] = " " + dbgOut["frameMode"] = "I" } 0x02 -> { // TAV_MODE_DELTA (with optional Haar wavelet) // Coefficient delta encoding for efficient P-frames @@ -6275,7 +6283,13 @@ class GraphicsJSR223Delegate(private val vm: VM) { spatialFilter: Int = 1, spatialLevels: Int = 6, temporalLevels: Int = 2 - ): Int { + ): Array { + val dbgOut = HashMap() + dbgOut["qY"] = qYGlobal + dbgOut["qCo"] = qCoGlobal + dbgOut["qCg"] = qCgGlobal + dbgOut["frameMode"] = "G" + val numPixels = width * height // Step 1: Decompress unified GOP block @@ -6294,7 +6308,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { } } catch (e: Exception) { println("ERROR: Zstd decompression failed: ${e.message}") - return 0 + return arrayOf(0, dbgOut) } // Step 2: Postprocess unified block to per-frame coefficients @@ -6318,10 +6332,10 @@ class GraphicsJSR223Delegate(private val vm: VM) { val temporalLevel = getTemporalSubbandLevel(t, gopSize, temporalLevels) val temporalScale = getTemporalQuantizerScale(temporalLevel) - // Apply temporal scaling to base quantizers - val baseQY = (qYGlobal * temporalScale).coerceIn(1.0f, 255.0f) - val baseQCo = (qCoGlobal * temporalScale).coerceIn(1.0f, 255.0f) - val baseQCg = (qCgGlobal * temporalScale).coerceIn(1.0f, 255.0f) + // Apply temporal scaling to base quantizers for each channel + val baseQY = (qYGlobal * temporalScale).coerceIn(1.0f, 4096.0f) + val baseQCo = (qCoGlobal * temporalScale).coerceIn(1.0f, 4096.0f) + val baseQCg = (qCgGlobal * temporalScale).coerceIn(1.0f, 4096.0f) // Use existing perceptual dequantization for spatial weighting dequantiseDWTSubbandsPerceptual( @@ -6389,7 +6403,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { } } - return gopSize + return arrayOf(gopSize, dbgOut) } // Biorthogonal 13/7 wavelet inverse 1D transform diff --git a/video_encoder/encoder_tav.c b/video_encoder/encoder_tav.c index 4545e28..cdc5e1c 100644 --- a/video_encoder/encoder_tav.c +++ b/video_encoder/encoder_tav.c @@ -17,7 +17,7 @@ #include #include -#define ENCODER_VENDOR_STRING "Encoder-TAV 20251015" +#define ENCODER_VENDOR_STRING "Encoder-TAV 20251016" // TSVM Advanced Video (TAV) format constants #define TAV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x56" // "\x1FTSVM TAV" @@ -652,6 +652,9 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser, int *frame_numbers, int actual_gop_size); static size_t gop_process_and_flush(tav_encoder_t *enc, FILE *output, int base_quantiser, int *frame_numbers, int force_flush); +static size_t serialise_tile_data(tav_encoder_t *enc, int tile_x, int tile_y, + const float *tile_y_data, const float *tile_co_data, const float *tile_cg_data, + uint8_t mode, uint8_t *buffer); static void dwt_2d_forward_flexible(float *tile_data, int width, int height, int levels, int filter_type); static void dwt_2d_haar_inverse_flexible(float *tile_data, int width, int height, int levels); static void quantise_dwt_coefficients_perceptual_per_coeff(tav_encoder_t *enc, @@ -1660,7 +1663,7 @@ static int gop_should_flush_motion(tav_encoder_t *enc) { return 0; } -// Flush GOP: apply 3D DWT, quantize, serialize, and write to output +// Flush GOP: apply 3D DWT, quantize, serialise, and write to output // Returns number of bytes written, or 0 on error // This function processes the entire GOP and writes all frames with temporal 3D DWT static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser, @@ -1721,14 +1724,25 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser, free(aligned_cg); } - // Step 1: Apply 3D DWT (temporal + spatial) to each channel - // Note: This modifies gop_*_coeffs in-place - dwt_3d_forward(gop_y_coeffs, enc->width, enc->height, actual_gop_size, - enc->decomp_levels, enc->temporal_decomp_levels, enc->wavelet_filter); - dwt_3d_forward(gop_co_coeffs, enc->width, enc->height, actual_gop_size, - enc->decomp_levels, enc->temporal_decomp_levels, enc->wavelet_filter); - dwt_3d_forward(gop_cg_coeffs, enc->width, enc->height, actual_gop_size, - enc->decomp_levels, enc->temporal_decomp_levels, enc->wavelet_filter); + // Step 1: For single-frame GOP, skip temporal DWT and use traditional I-frame path + if (actual_gop_size == 1) { + // Apply only 2D spatial DWT (no temporal transform for single frame) + dwt_2d_forward_flexible(gop_y_coeffs[0], enc->width, enc->height, + enc->decomp_levels, enc->wavelet_filter); + dwt_2d_forward_flexible(gop_co_coeffs[0], enc->width, enc->height, + enc->decomp_levels, enc->wavelet_filter); + dwt_2d_forward_flexible(gop_cg_coeffs[0], enc->width, enc->height, + enc->decomp_levels, enc->wavelet_filter); + } else { + // Multi-frame GOP: Apply 3D DWT (temporal + spatial) to each channel + // Note: This modifies gop_*_coeffs in-place + dwt_3d_forward(gop_y_coeffs, enc->width, enc->height, actual_gop_size, + enc->decomp_levels, enc->temporal_decomp_levels, enc->wavelet_filter); + dwt_3d_forward(gop_co_coeffs, enc->width, enc->height, actual_gop_size, + enc->decomp_levels, enc->temporal_decomp_levels, enc->wavelet_filter); + dwt_3d_forward(gop_cg_coeffs, enc->width, enc->height, actual_gop_size, + enc->decomp_levels, enc->temporal_decomp_levels, enc->wavelet_filter); + } // Step 2: Allocate quantized coefficient buffers int16_t **quant_y = malloc(actual_gop_size * sizeof(int16_t*)); @@ -1742,12 +1756,17 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser, } // Step 3: Quantize 3D DWT coefficients with temporal-spatial quantization + // Use channel-specific quantizers from encoder settings + int qY = base_quantiser; // Y quantizer passed as parameter + int qCo = QLUT[enc->quantiser_co]; // Co quantizer from encoder + int qCg = QLUT[enc->quantiser_cg]; // Cg quantizer from encoder + quantise_3d_dwt_coefficients(enc, gop_y_coeffs, quant_y, actual_gop_size, - num_pixels, base_quantiser, 0); // Luma + num_pixels, qY, 0); // Luma quantise_3d_dwt_coefficients(enc, gop_co_coeffs, quant_co, actual_gop_size, - num_pixels, base_quantiser, 1); // Chroma Co + num_pixels, qCo, 1); // Chroma Co quantise_3d_dwt_coefficients(enc, gop_cg_coeffs, quant_cg, actual_gop_size, - num_pixels, base_quantiser, 1); // Chroma Cg + num_pixels, qCg, 1); // Chroma Cg // Step 4: Preprocessing and compression size_t total_bytes_written = 0; @@ -1755,20 +1774,26 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser, // Write timecode packet for first frame in GOP write_timecode_packet(output, frame_numbers[0], enc->output_fps, enc->is_ntsc_framerate); - // Single-frame GOP fallback: use traditional I-frame encoding + // Single-frame GOP fallback: use traditional I-frame encoding with serialise_tile_data if (actual_gop_size == 1) { // Write I-frame packet header (no motion vectors, no GOP overhead) uint8_t packet_type = TAV_PACKET_IFRAME; fwrite(&packet_type, 1, 1, output); total_bytes_written += 1; - // Preprocess single frame using standard variable layout - size_t max_preprocessed_size = (num_pixels * 3 * 2 + 7) / 8 + (num_pixels * 3 * sizeof(int16_t)); - uint8_t *preprocessed_buffer = malloc(max_preprocessed_size); + // Allocate buffer for uncompressed tile data + // Use same format as compress_and_write_frame: serialise_tile_data + const size_t max_tile_size = 4 + (num_pixels * 3 * sizeof(int16_t)); + uint8_t *uncompressed_buffer = malloc(max_tile_size); - size_t preprocessed_size = preprocess_coefficients_variable_layout( - quant_y[0], quant_co[0], quant_cg[0], NULL, - num_pixels, enc->channel_layout, preprocessed_buffer); + // Use serialise_tile_data with DWT-transformed float coefficients (before quantization) + // This matches the traditional I-frame path in compress_and_write_frame + size_t tile_size = serialise_tile_data(enc, 0, 0, + gop_y_coeffs[0], gop_co_coeffs[0], gop_cg_coeffs[0], + TAV_MODE_INTRA, uncompressed_buffer); + + size_t preprocessed_size = tile_size; + uint8_t *preprocessed_buffer = uncompressed_buffer; // Compress with Zstd size_t max_compressed_size = ZSTD_compressBound(preprocessed_size); @@ -1809,6 +1834,11 @@ static size_t gop_flush(tav_encoder_t *enc, FILE *output, int base_quantiser, free(preprocessed_buffer); free(compressed_buffer); + // Write SYNC packet after single-frame GOP I-frame + uint8_t sync_packet = TAV_PACKET_SYNC; + fwrite(&sync_packet, 1, 1, output); + total_bytes_written += 1; + if (enc->verbose) { printf("Frame %d (single-frame GOP as I-frame): %zu bytes\n", frame_numbers[0], compressed_size); @@ -5651,8 +5681,11 @@ int main(int argc, char *argv[]) { process_subtitles(enc, true_frame_count, enc->output_fp); // Write a sync packet only after a video is been coded - uint8_t sync_packet = TAV_PACKET_SYNC; - fwrite(&sync_packet, 1, 1, enc->output_fp); + // For GOP encoding, GOP_SYNC packet already serves as sync - don't emit extra SYNC + if (!enc->enable_temporal_dwt) { + uint8_t sync_packet = TAV_PACKET_SYNC; + fwrite(&sync_packet, 1, 1, enc->output_fp); + } // NTSC frame duplication: emit extra sync packet for every 1000n+500 frames if (enc->is_ntsc_framerate && (frame_count % 1000 == 500)) { @@ -5709,9 +5742,7 @@ int main(int argc, char *argv[]) { if (final_packet_size == 0) { fprintf(stderr, "Warning: Failed to flush final GOP frames\n"); } else { - // Write sync packet after final GOP - uint8_t sync_packet = TAV_PACKET_SYNC; - fwrite(&sync_packet, 1, 1, enc->output_fp); + // GOP_SYNC packet already written by gop_process_and_flush - no additional SYNC needed printf("Final GOP flushed successfully (%zu bytes)\n", final_packet_size); } }