diff --git a/assets/disk0/tvdos/bin/playtav.js b/assets/disk0/tvdos/bin/playtav.js index 0058f65..6e76d0c 100644 --- a/assets/disk0/tvdos/bin/playtav.js +++ b/assets/disk0/tvdos/bin/playtav.js @@ -1337,7 +1337,7 @@ try { // Start async decode graphics.tavDecodeGopToVideoBufferAsync( compressedPtr, compressedSize, gopSize, - header.width, header.height, + header.width, decodeHeight, // Use decodeHeight for interlaced field support header.qualityLevel, QLUT[header.qualityY], QLUT[header.qualityCo], QLUT[header.qualityCg], header.channelLayout, @@ -1411,7 +1411,7 @@ try { // Start async decode to ready slot graphics.tavDecodeGopToVideoBufferAsync( compressedPtr, compressedSize, gopSize, - header.width, header.height, + header.width, decodeHeight, // Use decodeHeight for interlaced field support header.qualityLevel, QLUT[header.qualityY], QLUT[header.qualityCo], QLUT[header.qualityCg], header.channelLayout, @@ -1454,7 +1454,7 @@ try { // Start async decode to decoding slot graphics.tavDecodeGopToVideoBufferAsync( compressedPtr, compressedSize, gopSize, - header.width, header.height, + header.width, decodeHeight, // Use decodeHeight for interlaced field support header.qualityLevel, QLUT[header.qualityY], QLUT[header.qualityCo], QLUT[header.qualityCg], header.channelLayout, @@ -1822,7 +1822,7 @@ try { graphics.tavDecodeGopToVideoBufferAsync( readyGopData.compressedPtr, readyGopData.compressedSize, readyGopData.gopSize, - header.width, header.height, + header.width, decodeHeight, // Use decodeHeight for interlaced field support header.qualityLevel, QLUT[header.qualityY], QLUT[header.qualityCo], QLUT[header.qualityCg], header.channelLayout, @@ -1931,7 +1931,21 @@ try { const bufferOffset = bufferSlot * SLOT_SIZE let uploadStart = sys.nanoTime() - graphics.uploadVideoBufferFrameToFramebuffer(currentGopFrameIndex, header.width, header.height, trueFrameCount, bufferOffset) + + // For interlaced: use specialized function that handles field copying and deinterlacing + if (isInterlaced) { + graphics.uploadInterlacedGopFrameToFramebuffer( + currentGopFrameIndex, currentGopSize, + header.width, decodeHeight, header.height, + trueFrameCount, bufferOffset, + prevFieldAddr, currentFieldAddr, nextFieldAddr, + CURRENT_RGB_ADDR + ) + } else { + // Progressive: upload directly from videoBuffer + graphics.uploadVideoBufferFrameToFramebuffer(currentGopFrameIndex, header.width, header.height, trueFrameCount, bufferOffset) + } + uploadTime = (sys.nanoTime() - uploadStart) / 1000000.0 // Update active screen mask for this GOP frame @@ -2041,7 +2055,7 @@ try { if (readyGopData.needsDecode) { graphics.tavDecodeGopToVideoBufferAsync( readyGopData.compressedPtr, readyGopData.compressedSize, readyGopData.gopSize, - header.width, header.height, + header.width, decodeHeight, // Use decodeHeight for interlaced field support header.qualityLevel, QLUT[header.qualityY], QLUT[header.qualityCo], QLUT[header.qualityCg], header.channelLayout, @@ -2119,7 +2133,7 @@ try { // Start async decode graphics.tavDecodeGopToVideoBufferAsync( overflow.compressedPtr, overflow.compressedSize, overflow.gopSize, - header.width, header.height, + header.width, decodeHeight, // Use decodeHeight for interlaced field support header.qualityLevel, QLUT[header.qualityY], QLUT[header.qualityCo], QLUT[header.qualityCg], header.channelLayout, diff --git a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt index edde5d9..76c271f 100644 --- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt +++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt @@ -6712,6 +6712,82 @@ class GraphicsJSR223Delegate(private val vm: VM) { } } + /** + * Upload interlaced GOP frame from videoBuffer with deinterlacing. + * Handles field extraction and temporal deinterlacing for GOP frames. + * + * @param frameIndex Current frame index in GOP (0-based) + * @param gopSize Total number of frames in GOP + * @param width Frame width + * @param fieldHeight Height of each field (half of display height) + * @param fullHeight Full display height (2 * fieldHeight) + * @param frameCount Global frame counter for dithering + * @param bufferOffset Start offset of GOP in videoBuffer + * @param prevFieldAddr Memory address for previous field buffer + * @param currentFieldAddr Memory address for current field buffer + * @param nextFieldAddr Memory address for next field buffer + * @param deinterlaceOutputAddr Memory address for deinterlaced output + */ + fun uploadInterlacedGopFrameToFramebuffer( + frameIndex: Int, + gopSize: Int, + width: Int, + fieldHeight: Int, + fullHeight: Int, + frameCount: Int, + bufferOffset: Long, + prevFieldAddr: Long, + currentFieldAddr: Long, + nextFieldAddr: Long, + deinterlaceOutputAddr: Long + ) { + val gpu = (vm.peripheralTable[1].peripheral as GraphicsAdapter) + val fieldSize = width * fieldHeight * 3L + + // Copy 3 consecutive fields from videoBuffer to field buffers + // Previous field (frame N-1, or N if first frame) + val prevFrameIdx = if (frameIndex > 0) frameIndex - 1 else 0 + val prevFieldOffset = bufferOffset + (prevFrameIdx * fieldSize) + UnsafeHelper.memcpyRaw( + gpu.videoBuffer, + UnsafeHelper.getArrayOffset(gpu.videoBuffer) + prevFieldOffset, + null, + vm.usermem.ptr + prevFieldAddr, + fieldSize + ) + + // Current field (frame N) + val currFieldOffset = bufferOffset + (frameIndex * fieldSize) + UnsafeHelper.memcpyRaw( + gpu.videoBuffer, + UnsafeHelper.getArrayOffset(gpu.videoBuffer) + currFieldOffset, + null, + vm.usermem.ptr + currentFieldAddr, + fieldSize + ) + + // Next field (frame N+1, or N if last frame) + val nextFrameIdx = if (frameIndex < gopSize - 1) frameIndex + 1 else frameIndex + val nextFieldOffset = bufferOffset + (nextFrameIdx * fieldSize) + UnsafeHelper.memcpyRaw( + gpu.videoBuffer, + UnsafeHelper.getArrayOffset(gpu.videoBuffer) + nextFieldOffset, + null, + vm.usermem.ptr + nextFieldAddr, + fieldSize + ) + + // Deinterlace fields into full frame + tavDeinterlace( + frameCount, width, fieldHeight, + prevFieldAddr, currentFieldAddr, nextFieldAddr, + deinterlaceOutputAddr, "yadif" + ) + + // Upload deinterlaced full-height frame + uploadRGBToFramebuffer(deinterlaceOutputAddr, width, fullHeight, frameCount, false) + } + // Async GOP decode state private val asyncDecodeComplete = java.util.concurrent.atomic.AtomicBoolean(false) private var asyncDecodeResult: Array? = null diff --git a/video_encoder/Makefile b/video_encoder/Makefile index f754965..ca109cd 100644 --- a/video_encoder/Makefile +++ b/video_encoder/Makefile @@ -3,8 +3,8 @@ CC = gcc CXX = g++ -CFLAGS = -std=c99 -Wall -Wextra -Ofast -D_GNU_SOURCE -CXXFLAGS = -std=c++11 -Wall -Wextra -Ofast -D_GNU_SOURCE +CFLAGS = -std=c99 -Wall -Wextra -Ofast -D_GNU_SOURCE -march=native +CXXFLAGS = -std=c++11 -Wall -Wextra -Ofast -D_GNU_SOURCE -march=native DBGFLAGS = # Zstd flags (use pkg-config if available, fallback for cross-platform compatibility) diff --git a/video_encoder/decoder_tav.c b/video_encoder/decoder_tav.c index bfff3f5..e63cf17 100644 --- a/video_encoder/decoder_tav.c +++ b/video_encoder/decoder_tav.c @@ -1023,6 +1023,52 @@ static void dwt_53_inverse_1d(float *data, int length) { free(temp); } +// Biorthogonal 2,4 (LeGall 2/4) INVERSE 1D transform +static void dwt_bior24_inverse_1d(float *data, int length) { + if (length < 2) return; + + float *temp = malloc(sizeof(float) * length); + int half = (length + 1) / 2; + int i; + + int nE = half; + int nO = length / 2; + + float *even = temp; + float *odd = temp + nE; + + // Load L and H + for (i = 0; i < nE; i++) { + even[i] = data[i]; + } + for (i = 0; i < nO; i++) { + odd[i] = data[half + i]; + } + + // ---- Inverse update: s[i] = s[i] - 0.25*d[i] ---- + for (i = 0; i < nE; i++) { + float d = (i < nO) ? odd[i] : 0.0f; + even[i] = even[i] - 0.25f * d; + } + + // ---- Inverse predict: o[i] = d[i] + 0.5*s[i] ---- + for (i = 0; i < nO; i++) { + odd[i] = odd[i] + 0.5f * even[i]; + } + + // Interleave back into output + for (i = 0; i < nO; i++) { + data[2 * i] = even[i]; + data[2 * i + 1] = odd[i]; + } + if (nE > nO) { + // Trailing even sample for odd length + data[2 * nO] = even[nO]; + } + + free(temp); +} + // Multi-level inverse DWT (matches TSVM exactly with correct non-power-of-2 handling) static void apply_inverse_dwt_multilevel(float *data, int width, int height, int levels, int filter_type) { int max_size = (width > height) ? width : height; @@ -1044,14 +1090,14 @@ static void apply_inverse_dwt_multilevel(float *data, int width, int height, int } // Debug: Print dimension sequence - static int debug_once = 1; + /*static int debug_once = 1; if (debug_once) { fprintf(stderr, "DWT dimension sequence for %dx%d with %d levels:\n", width, height, levels); for (int i = 0; i <= levels; i++) { fprintf(stderr, " Level %d: %dx%d\n", i, widths[i], heights[i]); } debug_once = 0; - } + }*/ // TSVM: for (level in levels - 1 downTo 0) // Apply inverse transforms using pre-calculated dimensions diff --git a/video_encoder/encoder_tav.c b/video_encoder/encoder_tav.c index 7269d8f..307620f 100644 --- a/video_encoder/encoder_tav.c +++ b/video_encoder/encoder_tav.c @@ -2013,13 +2013,13 @@ static int calculate_max_decomp_levels(tav_encoder_t *enc, int width, int height int min_size = (!enc->monoblock) ? TILE_SIZE_Y : (width < height ? width : height); // Keep halving until we reach a minimum size (at least 4 pixels) - while (min_size >= 8) { // Need at least 8 pixels to safely halve to 4 + while (min_size >= 16) { // apparently you don't want it to be deep min_size /= 2; levels++; } - // Cap at a reasonable maximum to avoid going too deep - return levels > 10 ? 10 : levels; + // Cap at a reasonable maximum to avoid going deep + return levels > 6 ? 6 : levels; } // Bitrate control functions @@ -2274,6 +2274,16 @@ static int parse_resolution(const char *res_str, int *width, int *height, const *height = 144; return 1; } + if (strcmp(res_str, "d1") == 0 || strcmp(res_str, "D1") == 0) { + *width = 720; + *height = 486; + return 1; + } + if (strcmp(res_str, "d1pal") == 0 || strcmp(res_str, "D1PAL") == 0) { + *width = 720; + *height = 576; + return 1; + } if (strcmp(res_str, "half") == 0 || strcmp(res_str, "HALF") == 0) { *width = DEFAULT_WIDTH >> 1; *height = DEFAULT_HEIGHT >> 1; @@ -2447,6 +2457,8 @@ static void show_usage(const char *program_name) { printf("\n\nVideo Size Keywords:"); printf("\n -s cif: equal to 352x288"); printf("\n -s qcif: equal to 176x144"); + printf("\n -s d1: equal to 720x486"); + printf("\n -s d1pal: equal to 720x576"); printf("\n -s half: equal to %dx%d", DEFAULT_WIDTH >> 1, DEFAULT_HEIGHT >> 1); printf("\n -s default: equal to %dx%d", DEFAULT_WIDTH, DEFAULT_HEIGHT); printf("\n -s original: use input video's original resolution"); @@ -3131,6 +3143,112 @@ static void dwt_haar_forward_1d(float *data, int length) { free(temp); } +// Biorthogonal 2,4 (LeGall 2/4) FORWARD 1D transform +static void dwt_bior24_forward_1d(float *data, int length) { + if (length < 2) return; + + float *temp = malloc(sizeof(float) * length); + int half = (length + 1) / 2; + int i; + + // Even = low-pass input samples + // Odd = high-pass input samples + // Use lifting: predict (P) then update (U) + + // Temporary arrays for even and odd parts + // even[k] = data[2k] + // odd[k] = data[2k+1] + int nE = half; + int nO = length / 2; + + float *even = temp; // reuse temp for even + float *odd = temp + nE; // reuse temp for odd + + // Split into even and odd samples + for (i = 0; i < nE; i++) { + even[i] = data[2 * i]; + } + for (i = 0; i < nO; i++) { + odd[i] = data[2 * i + 1]; + } + + // ---- Predict step: d[i] = odd[i] - 0.5 * even[i] ---- + for (i = 0; i < nO; i++) { + odd[i] = odd[i] - 0.5f * even[i]; + } + + // ---- Update step: s[i] = even[i] + 0.25 * d[i] ---- + for (i = 0; i < nE; i++) { + // When odd array has fewer samples (odd length case), + // treat missing d value as 0. + float d = (i < nO) ? odd[i] : 0.0f; + even[i] = even[i] + 0.25f * d; + } + + // Now write back in your Haar layout: + // [LLLL | HHHH] + for (i = 0; i < nE; i++) { + data[i] = even[i]; + } + for (i = 0; i < nO; i++) { + data[half + i] = odd[i]; + } + // Any leftover slot for odd-length = zero (like Haar) + for (i = nO; i < (length - half); i++) { + data[half + i] = 0.0f; + } + + free(temp); +} + + +// Biorthogonal 2,4 (LeGall 2/4) INVERSE 1D transform +static void dwt_bior24_inverse_1d(float *data, int length) { + if (length < 2) return; + + float *temp = malloc(sizeof(float) * length); + int half = (length + 1) / 2; + int i; + + int nE = half; + int nO = length / 2; + + float *even = temp; + float *odd = temp + nE; + + // Load L and H + for (i = 0; i < nE; i++) { + even[i] = data[i]; + } + for (i = 0; i < nO; i++) { + odd[i] = data[half + i]; + } + + // ---- Inverse update: s[i] = s[i] - 0.25*d[i] ---- + for (i = 0; i < nE; i++) { + float d = (i < nO) ? odd[i] : 0.0f; + even[i] = even[i] - 0.25f * d; + } + + // ---- Inverse predict: o[i] = d[i] + 0.5*s[i] ---- + for (i = 0; i < nO; i++) { + odd[i] = odd[i] + 0.5f * even[i]; + } + + // Interleave back into output + for (i = 0; i < nO; i++) { + data[2 * i] = even[i]; + data[2 * i + 1] = odd[i]; + } + if (nE > nO) { + // Trailing even sample for odd length + data[2 * nO] = even[nO]; + } + + free(temp); +} + + // Haar wavelet inverse 1D transform // Reconstructs from averages (low-pass) and differences (high-pass) static void dwt_haar_inverse_1d(float *data, int length) { @@ -11048,8 +11166,8 @@ int main(int argc, char *argv[]) { enc->perceptual_tuning = 0; } - // disable monoblock mode if either width or height exceeds tie size - if (enc->width > TILE_SIZE_X || enc->height > TILE_SIZE_Y) { + // disable monoblock mode if either width or height exceeds D1 PAL size + if (enc->width > 720 || enc->height > 576) { enc->monoblock = 0; }