From f4b03b55b61a21c868a92ecdf4a3290e5ade4f7a Mon Sep 17 00:00:00 2001 From: minjaesong Date: Wed, 17 Sep 2025 21:49:32 +0900 Subject: [PATCH] monoblock TAV --- assets/disk0/tvdos/bin/playtav.js | 8 +- assets/disk0/tvdos/bin/playtev.js | 2 +- terranmon.txt | 14 +- .../torvald/tsvm/GraphicsJSR223Delegate.kt | 332 +++++++++++++----- video_encoder/encoder_tav.c | 230 ++++++++++-- 5 files changed, 455 insertions(+), 131 deletions(-) diff --git a/assets/disk0/tvdos/bin/playtav.js b/assets/disk0/tvdos/bin/playtav.js index bd2705e..171b3d5 100644 --- a/assets/disk0/tvdos/bin/playtav.js +++ b/assets/disk0/tvdos/bin/playtav.js @@ -402,7 +402,7 @@ for (let i = 0; i < 8; i++) { } if (!magicValid) { - con.puts("Error: Invalid TAV file format") + printerrln("Error: Invalid TAV file format") errorlevel = 1 return } @@ -425,8 +425,8 @@ for (let i = 0; i < 7; i++) { seqread.readOneByte() } -if (header.version < 1 || header.version > 2) { - con.puts(`Error: Unsupported TAV version ${header.version}`) +if (header.version < 1 || header.version > 4) { + printerrln(`Error: Unsupported TAV version ${header.version}`) errorlevel = 1 return } @@ -637,7 +637,7 @@ try { // Upload RGB buffer to display framebuffer (like TEV) let uploadStart = sys.nanoTime() - graphics.uploadRGBToFramebuffer(CURRENT_RGB_ADDR, header.width, header.height, frameCount, true) + graphics.uploadRGBToFramebuffer(CURRENT_RGB_ADDR, header.width, header.height, frameCount, false) uploadTime = (sys.nanoTime() - uploadStart) / 1000000.0 // Defer audio playback until a first frame is sent diff --git a/assets/disk0/tvdos/bin/playtev.js b/assets/disk0/tvdos/bin/playtev.js index eba85ca..ce9dd17 100644 --- a/assets/disk0/tvdos/bin/playtev.js +++ b/assets/disk0/tvdos/bin/playtev.js @@ -673,7 +673,7 @@ try { // Upload RGB buffer to display framebuffer with dithering let uploadStart = sys.nanoTime() - graphics.uploadRGBToFramebuffer(CURRENT_RGB_ADDR, width, height, frameCount, true) + graphics.uploadRGBToFramebuffer(CURRENT_RGB_ADDR, width, height, frameCount, false) uploadTime = (sys.nanoTime() - uploadStart) / 1000000.0 // Convert to milliseconds } else { diff --git a/terranmon.txt b/terranmon.txt index 72686c3..28d99c2 100644 --- a/terranmon.txt +++ b/terranmon.txt @@ -695,7 +695,7 @@ DCT-based compression, motion compensation, and efficient temporal coding. ## Header (24 bytes) uint8 Magic[8]: "\x1FTSVM TEV" - uint8 Version: 2 or 3 + uint8 Version: 2 (YCoCg-R) or 3 (ICtCp) uint16 Width: video width in pixels uint16 Height: video height in pixels uint8 FPS: frames per second @@ -709,7 +709,6 @@ DCT-based compression, motion compensation, and efficient temporal coding. uint8 Video Flags - bit 0 = is interlaced (should be default for most non-archival TEV videos) - bit 1 = is NTSC framerate (repeat every 1000th frame) - - bit 2 = is lossless mode uint8 Reserved, fill with zero ## Packet Types @@ -823,7 +822,7 @@ transmission capability, and region-of-interest coding. ## Header (32 bytes) uint8 Magic[8]: "\x1FTSVM TAV" - uint8 Version: 1 + uint8 Version: 3 (YCoCg-R) or 4 (ICtCp) uint16 Width: video width in pixels uint16 Height: video height in pixels uint8 FPS: frames per second @@ -854,12 +853,11 @@ transmission capability, and region-of-interest coding. uint32 Compressed Size * Zstd-compressed Block Data -## Block Data (per 280x224 tile) +## Block Data (per frame) uint8 Mode: encoding mode 0x00 = SKIP (copy from previous frame) - 0x01 = INTRA (DWT-coded, no prediction) - 0x02 = INTER (DWT-coded with motion compensation) - 0x03 = MOTION (motion vector only, no residual) + 0x01 = INTRA (DWT-coded) + 0x02 = DELTA (DWT delta) uint8 Quantiser override Y (use 0 to disable overriding) uint8 Quantiser override Co (use 0 to disable overriding) uint8 Quantiser override Cg (use 0 to disable overriding) @@ -900,7 +898,7 @@ TAV operates in YCoCg-R colour space with full resolution channels: - Cg: Green-Magenta chroma (full resolution, very aggressive quantization by default) ## Compression Features -- 280x224 DWT tiles vs 16x16 DCT blocks in TEV +- Single DWT tiles vs 16x16 DCT blocks in TEV - Multi-resolution representation enables scalable decoding - Better frequency localization than DCT - Reduced blocking artifacts due to overlapping basis functions diff --git a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt index 11ab701..4e59bea 100644 --- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt +++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt @@ -3822,8 +3822,21 @@ class GraphicsJSR223Delegate(private val vm: VM) { var readPtr = blockDataPtr try { - val tilesX = (width + TILE_SIZE_X - 1) / TILE_SIZE_X // 280x224 tiles - val tilesY = (height + TILE_SIZE_Y - 1) / TILE_SIZE_Y + // Determine if monoblock mode based on TAV version + val isMonoblock = (tavVersion == 3 || tavVersion == 4) + + val tilesX: Int + val tilesY: Int + + if (isMonoblock) { + // Monoblock mode: single tile covering entire frame + tilesX = 1 + tilesY = 1 + } else { + // Standard mode: multiple 280x224 tiles + tilesX = (width + TILE_SIZE_X - 1) / TILE_SIZE_X + tilesY = (height + TILE_SIZE_Y - 1) / TILE_SIZE_Y + } // Process each tile for (tileY in 0 until tilesY) { @@ -3847,17 +3860,17 @@ class GraphicsJSR223Delegate(private val vm: VM) { // Copy 280x224 tile from previous frame to current frame tavCopyTileRGB(tileX, tileY, currentRGBAddr, prevRGBAddr, width, height) } - 0x01 -> { // TAV_MODE_INTRA + 0x01 -> { // TAV_MODE_INTRA // Decode DWT coefficients directly to RGB buffer - readPtr = tavDecodeDWTIntraTileRGB(readPtr, tileX, tileY, currentRGBAddr, + readPtr = tavDecodeDWTIntraTileRGB(readPtr, tileX, tileY, currentRGBAddr, width, height, qY, qCo, qCg, - waveletFilter, decompLevels, isLossless, tavVersion) + waveletFilter, decompLevels, isLossless, tavVersion, isMonoblock) } 0x02 -> { // TAV_MODE_DELTA // Coefficient delta encoding for efficient P-frames readPtr = tavDecodeDeltaTileRGB(readPtr, tileX, tileY, currentRGBAddr, width, height, qY, qCo, qCg, - waveletFilter, decompLevels, isLossless, tavVersion) + waveletFilter, decompLevels, isLossless, tavVersion, isMonoblock) } } } @@ -3870,92 +3883,130 @@ class GraphicsJSR223Delegate(private val vm: VM) { private fun tavDecodeDWTIntraTileRGB(readPtr: Long, tileX: Int, tileY: Int, currentRGBAddr: Long, width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, - waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int): Long { - // Now reading padded coefficient tiles (344x288) instead of core tiles (280x224) - val paddedCoeffCount = PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y + waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int, isMonoblock: Boolean = false): Long { + // Determine coefficient count based on mode + val coeffCount = if (isMonoblock) { + // Monoblock mode: entire frame + width * height + } else { + // Standard mode: padded tiles (344x288) + PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y + } + var ptr = readPtr + + // Read quantised DWT coefficients for Y, Co, Cg channels + val quantisedY = ShortArray(coeffCount) + val quantisedCo = ShortArray(coeffCount) + val quantisedCg = ShortArray(coeffCount) - // Read quantised DWT coefficients for padded tile Y, Co, Cg channels (344x288) - val quantisedY = ShortArray(paddedCoeffCount) - val quantisedCo = ShortArray(paddedCoeffCount) - val quantisedCg = ShortArray(paddedCoeffCount) - - // OPTIMIZATION: Bulk read all coefficient data (344x288 * 3 channels * 2 bytes = 594,432 bytes) - val totalCoeffBytes = paddedCoeffCount * 3 * 2L // 3 channels, 2 bytes per short + // OPTIMIZATION: Bulk read all coefficient data + val totalCoeffBytes = coeffCount * 3 * 2L // 3 channels, 2 bytes per short val coeffBuffer = ByteArray(totalCoeffBytes.toInt()) UnsafeHelper.memcpyRaw(null, vm.usermem.ptr + ptr, coeffBuffer, UnsafeHelper.getArrayOffset(coeffBuffer), totalCoeffBytes) // Convert bulk data to coefficient arrays var bufferOffset = 0 - for (i in 0 until paddedCoeffCount) { + for (i in 0 until coeffCount) { quantisedY[i] = (((coeffBuffer[bufferOffset + 1].toInt() and 0xFF) shl 8) or (coeffBuffer[bufferOffset].toInt() and 0xFF)).toShort() bufferOffset += 2 } - for (i in 0 until paddedCoeffCount) { + for (i in 0 until coeffCount) { quantisedCo[i] = (((coeffBuffer[bufferOffset + 1].toInt() and 0xFF) shl 8) or (coeffBuffer[bufferOffset].toInt() and 0xFF)).toShort() bufferOffset += 2 } - for (i in 0 until paddedCoeffCount) { + for (i in 0 until coeffCount) { quantisedCg[i] = (((coeffBuffer[bufferOffset + 1].toInt() and 0xFF) shl 8) or (coeffBuffer[bufferOffset].toInt() and 0xFF)).toShort() bufferOffset += 2 } ptr += totalCoeffBytes.toInt() - // Dequantise padded coefficient tiles (344x288) - val yPaddedTile = FloatArray(paddedCoeffCount) - val coPaddedTile = FloatArray(paddedCoeffCount) - val cgPaddedTile = FloatArray(paddedCoeffCount) - - for (i in 0 until paddedCoeffCount) { - yPaddedTile[i] = quantisedY[i] * qY.toFloat() - coPaddedTile[i] = quantisedCo[i] * qCo.toFloat() - cgPaddedTile[i] = quantisedCg[i] * qCg.toFloat() + // Dequantise coefficient data + val yTile = FloatArray(coeffCount) + val coTile = FloatArray(coeffCount) + val cgTile = FloatArray(coeffCount) + + for (i in 0 until coeffCount) { + yTile[i] = quantisedY[i] * qY.toFloat() + coTile[i] = quantisedCo[i] * qCo.toFloat() + cgTile[i] = quantisedCg[i] * qCg.toFloat() } // Store coefficients for future delta reference (for P-frames) - val tileIdx = tileY * ((width + TILE_SIZE_X - 1) / TILE_SIZE_X) + tileX + val tileIdx = if (isMonoblock) { + 0 // Single tile index for monoblock + } else { + tileY * ((width + TILE_SIZE_X - 1) / TILE_SIZE_X) + tileX + } + if (tavPreviousCoeffsY == null) { tavPreviousCoeffsY = mutableMapOf() tavPreviousCoeffsCo = mutableMapOf() tavPreviousCoeffsCg = mutableMapOf() } - tavPreviousCoeffsY!![tileIdx] = yPaddedTile.clone() - tavPreviousCoeffsCo!![tileIdx] = coPaddedTile.clone() - tavPreviousCoeffsCg!![tileIdx] = cgPaddedTile.clone() + tavPreviousCoeffsY!![tileIdx] = yTile.clone() + tavPreviousCoeffsCo!![tileIdx] = coTile.clone() + tavPreviousCoeffsCg!![tileIdx] = cgTile.clone() - // Apply inverse DWT on full padded tiles (344x288) + // Apply inverse DWT + val tileWidth = if (isMonoblock) width else PADDED_TILE_SIZE_X + val tileHeight = if (isMonoblock) height else PADDED_TILE_SIZE_Y + if (isLossless) { - tavApplyDWTInverseMultiLevel(yPaddedTile, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, 0) - tavApplyDWTInverseMultiLevel(coPaddedTile, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, 0) - tavApplyDWTInverseMultiLevel(cgPaddedTile, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, 0) + tavApplyDWTInverseMultiLevel(yTile, tileWidth, tileHeight, decompLevels, 0) + tavApplyDWTInverseMultiLevel(coTile, tileWidth, tileHeight, decompLevels, 0) + tavApplyDWTInverseMultiLevel(cgTile, tileWidth, tileHeight, decompLevels, 0) } else { - tavApplyDWTInverseMultiLevel(yPaddedTile, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, waveletFilter) - tavApplyDWTInverseMultiLevel(coPaddedTile, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, waveletFilter) - tavApplyDWTInverseMultiLevel(cgPaddedTile, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, waveletFilter) + tavApplyDWTInverseMultiLevel(yTile, tileWidth, tileHeight, decompLevels, waveletFilter) + tavApplyDWTInverseMultiLevel(coTile, tileWidth, tileHeight, decompLevels, waveletFilter) + tavApplyDWTInverseMultiLevel(cgTile, tileWidth, tileHeight, decompLevels, waveletFilter) } - // Extract core 280x224 pixels from reconstructed padded tiles (344x288) - val yTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y) - val coTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y) - val cgTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y) - - for (y in 0 until TILE_SIZE_Y) { - for (x in 0 until TILE_SIZE_X) { - val coreIdx = y * TILE_SIZE_X + x - val paddedIdx = (y + TAV_TILE_MARGIN) * PADDED_TILE_SIZE_X + (x + TAV_TILE_MARGIN) - - yTile[coreIdx] = yPaddedTile[paddedIdx] - coTile[coreIdx] = coPaddedTile[paddedIdx] - cgTile[coreIdx] = cgPaddedTile[paddedIdx] + // Extract final tile data + val finalYTile: FloatArray + val finalCoTile: FloatArray + val finalCgTile: FloatArray + + if (isMonoblock) { + // Monoblock mode: use full frame data directly (no padding to extract) + finalYTile = yTile + finalCoTile = coTile + finalCgTile = cgTile + } else { + // Standard mode: extract core 280x224 pixels from reconstructed padded tiles (344x288) + finalYTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y) + finalCoTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y) + finalCgTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y) + + for (y in 0 until TILE_SIZE_Y) { + for (x in 0 until TILE_SIZE_X) { + val coreIdx = y * TILE_SIZE_X + x + val paddedIdx = (y + TAV_TILE_MARGIN) * PADDED_TILE_SIZE_X + (x + TAV_TILE_MARGIN) + + finalYTile[coreIdx] = yTile[paddedIdx] + finalCoTile[coreIdx] = coTile[paddedIdx] + finalCgTile[coreIdx] = cgTile[paddedIdx] + } } } - // Convert to RGB based on TAV version (YCoCg-R for v1, ICtCp for v2) - if (tavVersion == 2) { - tavConvertICtCpTileToRGB(tileX, tileY, yTile, coTile, cgTile, currentRGBAddr, width, height) + // Convert to RGB based on TAV version and mode + // v1,v3 = YCoCg-R, v2,v4 = ICtCp + if (tavVersion == 2 || tavVersion == 4) { + // ICtCp color space + if (isMonoblock) { + tavConvertICtCpMonoblockToRGB(finalYTile, finalCoTile, finalCgTile, currentRGBAddr, width, height) + } else { + tavConvertICtCpTileToRGB(tileX, tileY, finalYTile, finalCoTile, finalCgTile, currentRGBAddr, width, height) + } } else { - tavConvertYCoCgTileToRGB(tileX, tileY, yTile, coTile, cgTile, currentRGBAddr, width, height) + // YCoCg-R color space (v1, v3) + if (isMonoblock) { + tavConvertYCoCgMonoblockToRGB(finalYTile, finalCoTile, finalCgTile, currentRGBAddr, width, height) + } else { + tavConvertYCoCgTileToRGB(tileX, tileY, finalYTile, finalCoTile, finalCgTile, currentRGBAddr, width, height) + } } return ptr @@ -4069,6 +4120,79 @@ class GraphicsJSR223Delegate(private val vm: VM) { } } + // Monoblock conversion functions (full frame processing) + private fun tavConvertYCoCgMonoblockToRGB(yData: FloatArray, coData: FloatArray, cgData: FloatArray, + rgbAddr: Long, width: Int, height: Int) { + // Process entire frame at once for monoblock mode + for (y in 0 until height) { + // Create row buffer for bulk RGB data + val rowRgbBuffer = ByteArray(width * 3) + var bufferIdx = 0 + + for (x in 0 until width) { + val idx = y * width + x + + // YCoCg-R to RGB conversion (exact inverse of encoder) + val Y = yData[idx] + val Co = coData[idx] + val Cg = cgData[idx] + + // Inverse of encoder's YCoCg-R transform: + val tmp = Y - Cg / 2.0f + val g = Cg + tmp + val b = tmp - Co / 2.0f + val r = Co + b + + rowRgbBuffer[bufferIdx++] = r.toInt().coerceIn(0, 255).toByte() + rowRgbBuffer[bufferIdx++] = g.toInt().coerceIn(0, 255).toByte() + rowRgbBuffer[bufferIdx++] = b.toInt().coerceIn(0, 255).toByte() + } + + // OPTIMIZATION: Bulk copy entire row at once + val rowStartOffset = y * width * 3L + UnsafeHelper.memcpyRaw(rowRgbBuffer, UnsafeHelper.getArrayOffset(rowRgbBuffer), + null, vm.usermem.ptr + rgbAddr + rowStartOffset, rowRgbBuffer.size.toLong()) + } + } + + private fun tavConvertICtCpMonoblockToRGB(iData: FloatArray, ctData: FloatArray, cpData: FloatArray, + rgbAddr: Long, width: Int, height: Int) { + // Process entire frame at once for monoblock mode + for (y in 0 until height) { + // Create row buffer for bulk RGB data + val rowRgbBuffer = ByteArray(width * 3) + var bufferIdx = 0 + + for (x in 0 until width) { + val idx = y * width + x + + // ICtCp to RGB conversion (BT.2100 -> sRGB) + val I = iData[idx] + val Ct = ctData[idx] + val Cp = cpData[idx] + + // ICtCp to LMS + val L = I + 0.00975f * Ct + 0.20524f * Cp + val M = I - 0.11387f * Ct + 0.13321f * Cp + val S = I + 0.03259f * Ct - 0.67851f * Cp + + // LMS to RGB (simplified conversion) + val r = 3.2406f * L - 1.5372f * M - 0.4986f * S + val g = -0.9689f * L + 1.8758f * M + 0.0415f * S + val b = 0.0557f * L - 0.2040f * M + 1.0570f * S + + rowRgbBuffer[bufferIdx++] = (r * 255f).toInt().coerceIn(0, 255).toByte() + rowRgbBuffer[bufferIdx++] = (g * 255f).toInt().coerceIn(0, 255).toByte() + rowRgbBuffer[bufferIdx++] = (b * 255f).toInt().coerceIn(0, 255).toByte() + } + + // OPTIMIZATION: Bulk copy entire row at once + val rowStartOffset = y * width * 3L + UnsafeHelper.memcpyRaw(rowRgbBuffer, UnsafeHelper.getArrayOffset(rowRgbBuffer), + null, vm.usermem.ptr + rgbAddr + rowStartOffset, rowRgbBuffer.size.toLong()) + } + } + private fun tavAddYCoCgResidualToRGBTile(tileX: Int, tileY: Int, yRes: FloatArray, coRes: FloatArray, cgRes: FloatArray, rgbAddr: Long, width: Int, height: Int) { val startX = tileX * TILE_SIZE_X @@ -4145,20 +4269,30 @@ class GraphicsJSR223Delegate(private val vm: VM) { private fun tavDecodeDeltaTileRGB(readPtr: Long, tileX: Int, tileY: Int, currentRGBAddr: Long, width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, - waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int): Long { + waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int, isMonoblock: Boolean = false): Long { - val tileIdx = tileY * ((width + TILE_SIZE_X - 1) / TILE_SIZE_X) + tileX + val tileIdx = if (isMonoblock) { + 0 // Single tile index for monoblock + } else { + tileY * ((width + TILE_SIZE_X - 1) / TILE_SIZE_X) + tileX + } var ptr = readPtr - + // Initialize coefficient storage if needed if (tavPreviousCoeffsY == null) { tavPreviousCoeffsY = mutableMapOf() tavPreviousCoeffsCo = mutableMapOf() tavPreviousCoeffsCg = mutableMapOf() } - - // Coefficient count for padded tiles: 344x288 = 99,072 coefficients per channel - val coeffCount = PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y + + // Determine coefficient count based on mode + val coeffCount = if (isMonoblock) { + // Monoblock mode: entire frame + width * height + } else { + // Standard mode: padded tiles (344x288) + PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y + } // Read delta coefficients (same format as intra: quantised int16 -> float) val deltaY = ShortArray(coeffCount) @@ -4194,37 +4328,63 @@ class GraphicsJSR223Delegate(private val vm: VM) { tavPreviousCoeffsCg!![tileIdx] = currentCg.clone() // Apply inverse DWT + val tileWidth = if (isMonoblock) width else PADDED_TILE_SIZE_X + val tileHeight = if (isMonoblock) height else PADDED_TILE_SIZE_Y + if (isLossless) { - tavApplyDWTInverseMultiLevel(currentY, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, 0) - tavApplyDWTInverseMultiLevel(currentCo, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, 0) - tavApplyDWTInverseMultiLevel(currentCg, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, 0) + tavApplyDWTInverseMultiLevel(currentY, tileWidth, tileHeight, decompLevels, 0) + tavApplyDWTInverseMultiLevel(currentCo, tileWidth, tileHeight, decompLevels, 0) + tavApplyDWTInverseMultiLevel(currentCg, tileWidth, tileHeight, decompLevels, 0) } else { - tavApplyDWTInverseMultiLevel(currentY, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, waveletFilter) - tavApplyDWTInverseMultiLevel(currentCo, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, waveletFilter) - tavApplyDWTInverseMultiLevel(currentCg, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, waveletFilter) + tavApplyDWTInverseMultiLevel(currentY, tileWidth, tileHeight, decompLevels, waveletFilter) + tavApplyDWTInverseMultiLevel(currentCo, tileWidth, tileHeight, decompLevels, waveletFilter) + tavApplyDWTInverseMultiLevel(currentCg, tileWidth, tileHeight, decompLevels, waveletFilter) } - // Extract core 280x224 pixels and convert to RGB (same as intra) - val yTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y) - val coTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y) - val cgTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y) - - for (y in 0 until TILE_SIZE_Y) { - for (x in 0 until TILE_SIZE_X) { - val coreIdx = y * TILE_SIZE_X + x - val paddedIdx = (y + TAV_TILE_MARGIN) * PADDED_TILE_SIZE_X + (x + TAV_TILE_MARGIN) - - yTile[coreIdx] = currentY[paddedIdx] - coTile[coreIdx] = currentCo[paddedIdx] - cgTile[coreIdx] = currentCg[paddedIdx] + // Extract final tile data + val finalYTile: FloatArray + val finalCoTile: FloatArray + val finalCgTile: FloatArray + + if (isMonoblock) { + // Monoblock mode: use full frame data directly (no padding to extract) + finalYTile = currentY + finalCoTile = currentCo + finalCgTile = currentCg + } else { + // Standard mode: extract core 280x224 pixels from reconstructed padded tiles (344x288) + finalYTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y) + finalCoTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y) + finalCgTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y) + + for (y in 0 until TILE_SIZE_Y) { + for (x in 0 until TILE_SIZE_X) { + val coreIdx = y * TILE_SIZE_X + x + val paddedIdx = (y + TAV_TILE_MARGIN) * PADDED_TILE_SIZE_X + (x + TAV_TILE_MARGIN) + + finalYTile[coreIdx] = currentY[paddedIdx] + finalCoTile[coreIdx] = currentCo[paddedIdx] + finalCgTile[coreIdx] = currentCg[paddedIdx] + } } } - - // Convert to RGB based on TAV version - if (tavVersion == 2) { - tavConvertICtCpTileToRGB(tileX, tileY, yTile, coTile, cgTile, currentRGBAddr, width, height) + + // Convert to RGB based on TAV version and mode + // v1,v3 = YCoCg-R, v2,v4 = ICtCp + if (tavVersion == 2 || tavVersion == 4) { + // ICtCp color space + if (isMonoblock) { + tavConvertICtCpMonoblockToRGB(finalYTile, finalCoTile, finalCgTile, currentRGBAddr, width, height) + } else { + tavConvertICtCpTileToRGB(tileX, tileY, finalYTile, finalCoTile, finalCgTile, currentRGBAddr, width, height) + } } else { - tavConvertYCoCgTileToRGB(tileX, tileY, yTile, coTile, cgTile, currentRGBAddr, width, height) + // YCoCg-R color space (v1, v3) + if (isMonoblock) { + tavConvertYCoCgMonoblockToRGB(finalYTile, finalCoTile, finalCgTile, currentRGBAddr, width, height) + } else { + tavConvertYCoCgTileToRGB(tileX, tileY, finalYTile, finalCoTile, finalCgTile, currentRGBAddr, width, height) + } } return ptr diff --git a/video_encoder/encoder_tav.c b/video_encoder/encoder_tav.c index 805d1fe..960d08d 100644 --- a/video_encoder/encoder_tav.c +++ b/video_encoder/encoder_tav.c @@ -23,8 +23,11 @@ // TSVM Advanced Video (TAV) format constants #define TAV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x56" // "\x1FTSVM TAV" // TAV version - dynamic based on colour space mode -// Version 1: YCoCg-R (default) -// Version 2: ICtCp (--ictcp flag) +// Version 3: YCoCg-R monoblock (default) +// Version 4: ICtCp monoblock (--ictcp flag) +// Legacy versions (4-tile mode, code preserved but not accessible): +// Version 1: YCoCg-R 4-tile +// Version 2: ICtCp 4-tile // Tile encoding modes (280x224 tiles) #define TAV_MODE_SKIP 0x00 // Skip tile (copy from reference) @@ -104,6 +107,21 @@ static inline float FCLAMP(float x, float min, float max) { return x < min ? min : (x > max ? max : x); } +// Calculate maximum decomposition levels for a given frame size +static int calculate_max_decomp_levels(int width, int height) { + int levels = 0; + int min_size = width < height ? width : height; + + // Keep halving until we reach a minimum size (at least 4 pixels) + while (min_size >= 8) { // Need at least 8 pixels to safely halve to 4 + min_size /= 2; + levels++; + } + + // Cap at a reasonable maximum to avoid going too deep + return levels > 10 ? 10 : levels; +} + // MP2 audio rate table (same as TEV) static const int MP2_RATE_TABLE[] = {128, 160, 224, 320, 384, 384}; @@ -164,6 +182,7 @@ typedef struct { int test_mode; int ictcp_mode; // 0 = YCoCg-R (default), 1 = ICtCp colour space int intra_only; // Force all tiles to use INTRA mode (disable delta encoding) + int monoblock; // Single DWT tile mode (encode entire frame as one tile) // Frame buffers uint8_t *current_frame_rgb; @@ -216,12 +235,39 @@ typedef struct { // Wavelet filter constants removed - using lifting scheme implementation instead +// Parse resolution string like "1024x768" with keyword recognition +static int parse_resolution(const char *res_str, int *width, int *height) { + if (!res_str) return 0; + if (strcmp(res_str, "cif") == 0 || strcmp(res_str, "CIF") == 0) { + *width = 352; + *height = 288; + return 1; + } + if (strcmp(res_str, "qcif") == 0 || strcmp(res_str, "QCIF") == 0) { + *width = 176; + *height = 144; + return 1; + } + if (strcmp(res_str, "half") == 0 || strcmp(res_str, "HALF") == 0) { + *width = DEFAULT_WIDTH >> 1; + *height = DEFAULT_HEIGHT >> 1; + return 1; + } + if (strcmp(res_str, "default") == 0 || strcmp(res_str, "DEFAULT") == 0) { + *width = DEFAULT_WIDTH; + *height = DEFAULT_HEIGHT; + return 1; + } + return sscanf(res_str, "%dx%d", width, height) == 2; +} + // Function prototypes static void show_usage(const char *program_name); static tav_encoder_t* create_encoder(void); static void cleanup_encoder(tav_encoder_t *enc); static int initialize_encoder(tav_encoder_t *enc); static void rgb_to_ycocg(const uint8_t *rgb, float *y, float *co, float *cg, int width, int height); +static int calculate_max_decomp_levels(int width, int height); // Audio and subtitle processing prototypes (from TEV) static int start_audio_conversion(tav_encoder_t *enc); @@ -277,7 +323,7 @@ static void show_usage(const char *program_name) { } printf("\n\nFeatures:\n"); - printf(" - 280x224 DWT tiles with multi-resolution encoding\n"); + printf(" - Single DWT tile (monoblock) encoding for optimal quality\n"); printf(" - Full resolution YCoCg-R/ICtCp colour space\n"); printf(" - Lossless and lossy compression modes\n"); @@ -305,6 +351,7 @@ static tav_encoder_t* create_encoder(void) { enc->quantiser_co = QUALITY_CO[DEFAULT_QUALITY]; enc->quantiser_cg = QUALITY_CG[DEFAULT_QUALITY]; enc->intra_only = 1; + enc->monoblock = 1; // Default to monoblock mode return enc; } @@ -312,10 +359,22 @@ static tav_encoder_t* create_encoder(void) { // Initialize encoder resources static int initialize_encoder(tav_encoder_t *enc) { if (!enc) return -1; - + + // Automatic decomposition levels for monoblock mode + if (enc->monoblock) { + enc->decomp_levels = calculate_max_decomp_levels(enc->width, enc->height); + } + // Calculate tile dimensions - enc->tiles_x = (enc->width + TILE_SIZE_X - 1) / TILE_SIZE_X; - enc->tiles_y = (enc->height + TILE_SIZE_Y - 1) / TILE_SIZE_Y; + if (enc->monoblock) { + // Monoblock mode: single tile covering entire frame + enc->tiles_x = 1; + enc->tiles_y = 1; + } else { + // Standard mode: multiple 280x224 tiles + enc->tiles_x = (enc->width + TILE_SIZE_X - 1) / TILE_SIZE_X; + enc->tiles_y = (enc->height + TILE_SIZE_Y - 1) / TILE_SIZE_Y; + } int num_tiles = enc->tiles_x * enc->tiles_y; // Allocate frame buffers @@ -334,17 +393,31 @@ static int initialize_encoder(tav_encoder_t *enc) { // Initialize ZSTD compression enc->zstd_ctx = ZSTD_createCCtx(); - enc->compressed_buffer_size = ZSTD_compressBound(1024 * 1024); // 1MB max + + // Calculate maximum possible frame size for ZSTD buffer + const size_t max_frame_coeff_count = enc->monoblock ? + (enc->width * enc->height) : + (PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y); + const size_t max_frame_size = num_tiles * (4 + max_frame_coeff_count * 3 * sizeof(int16_t)); + enc->compressed_buffer_size = ZSTD_compressBound(max_frame_size); enc->compressed_buffer = malloc(enc->compressed_buffer_size); - // OPTIMIZATION: Allocate reusable quantisation buffers for padded tiles (344x288) - const int padded_coeff_count = PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y; - enc->reusable_quantised_y = malloc(padded_coeff_count * sizeof(int16_t)); - enc->reusable_quantised_co = malloc(padded_coeff_count * sizeof(int16_t)); - enc->reusable_quantised_cg = malloc(padded_coeff_count * sizeof(int16_t)); - + // OPTIMIZATION: Allocate reusable quantisation buffers + int coeff_count_per_tile; + if (enc->monoblock) { + // Monoblock mode: entire frame + coeff_count_per_tile = enc->width * enc->height; + } else { + // Standard mode: padded tiles (344x288) + coeff_count_per_tile = PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y; + } + + enc->reusable_quantised_y = malloc(coeff_count_per_tile * sizeof(int16_t)); + enc->reusable_quantised_co = malloc(coeff_count_per_tile * sizeof(int16_t)); + enc->reusable_quantised_cg = malloc(coeff_count_per_tile * sizeof(int16_t)); + // Allocate coefficient delta storage for P-frames (per-tile coefficient storage) - size_t total_coeff_size = num_tiles * padded_coeff_count * sizeof(float); + size_t total_coeff_size = num_tiles * coeff_count_per_tile * sizeof(float); enc->previous_coeffs_y = malloc(total_coeff_size); enc->previous_coeffs_co = malloc(total_coeff_size); enc->previous_coeffs_cg = malloc(total_coeff_size); @@ -605,8 +678,55 @@ static void dwt_2d_forward_padded(float *tile_data, int levels, int filter_type) free(temp_col); } +// 2D DWT forward transform for arbitrary dimensions +static void dwt_2d_forward_flexible(float *tile_data, int width, int height, int levels, int filter_type) { + const int max_size = (width > height) ? width : height; + float *temp_row = malloc(max_size * sizeof(float)); + float *temp_col = malloc(max_size * sizeof(float)); + for (int level = 0; level < levels; level++) { + int current_width = width >> level; + int current_height = height >> level; + if (current_width < 1 || current_height < 1) break; + // Row transform (horizontal) + for (int y = 0; y < current_height; y++) { + for (int x = 0; x < current_width; x++) { + temp_row[x] = tile_data[y * width + x]; + } + + if (filter_type == WAVELET_5_3_REVERSIBLE) { + dwt_53_forward_1d(temp_row, current_width); + } else { + dwt_97_forward_1d(temp_row, current_width); + } + + for (int x = 0; x < current_width; x++) { + tile_data[y * width + x] = temp_row[x]; + } + } + + // Column transform (vertical) + for (int x = 0; x < current_width; x++) { + for (int y = 0; y < current_height; y++) { + temp_col[y] = tile_data[y * width + x]; + } + + if (filter_type == WAVELET_5_3_REVERSIBLE) { + dwt_53_forward_1d(temp_col, current_height); + } else { + dwt_97_forward_1d(temp_col, current_height); + } + + for (int y = 0; y < current_height; y++) { + tile_data[y * width + x] = temp_col[y]; + } + } + } + + free(temp_row); + free(temp_col); +} // Quantisation for DWT subbands with rate control static void quantise_dwt_coefficients(float *coeffs, int16_t *quantised, int size, int quantiser) { @@ -642,8 +762,10 @@ static size_t serialise_tile_data(tav_encoder_t *enc, int tile_x, int tile_y, return offset; } - // Quantise and serialise DWT coefficients (full padded tile: 344x288) - const int tile_size = PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y; + // Quantise and serialise DWT coefficients + const int tile_size = enc->monoblock ? + (enc->width * enc->height) : // Monoblock mode: full frame + (PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y); // Standard mode: padded tiles // OPTIMIZATION: Use pre-allocated buffers instead of malloc/free per tile int16_t *quantised_y = enc->reusable_quantised_y; int16_t *quantised_co = enc->reusable_quantised_co; @@ -735,8 +857,11 @@ static size_t serialise_tile_data(tav_encoder_t *enc, int tile_x, int tile_y, // Compress and write frame data static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type) { - // Calculate total uncompressed size (for padded tile coefficients: 344x288) - const size_t max_tile_size = 4 + (PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y * 3 * sizeof(int16_t)); // header + 3 channels of coefficients + // Calculate total uncompressed size + const size_t coeff_count = enc->monoblock ? + (enc->width * enc->height) : + (PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y); + const size_t max_tile_size = 4 + (coeff_count * 3 * sizeof(int16_t)); // header + 3 channels of coefficients const size_t total_uncompressed_size = enc->tiles_x * enc->tiles_y * max_tile_size; // Allocate buffer for uncompressed tile data @@ -756,13 +881,29 @@ static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type) mode = TAV_MODE_DELTA; // P-frames use coefficient delta encoding } - // Extract padded tile data (344x288) with neighbour context for overlapping tiles - float tile_y_data[PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y]; - float tile_co_data[PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y]; - float tile_cg_data[PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y]; - - // Extract padded tiles using context from neighbours - extract_padded_tile(enc, tile_x, tile_y, tile_y_data, tile_co_data, tile_cg_data); + // Determine tile data size and allocate buffers + int tile_data_size; + if (enc->monoblock) { + // Monoblock mode: entire frame + tile_data_size = enc->width * enc->height; + } else { + // Standard mode: padded tiles (344x288) + tile_data_size = PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y; + } + + float *tile_y_data = malloc(tile_data_size * sizeof(float)); + float *tile_co_data = malloc(tile_data_size * sizeof(float)); + float *tile_cg_data = malloc(tile_data_size * sizeof(float)); + + if (enc->monoblock) { + // Extract entire frame (no padding) + memcpy(tile_y_data, enc->current_frame_y, tile_data_size * sizeof(float)); + memcpy(tile_co_data, enc->current_frame_co, tile_data_size * sizeof(float)); + memcpy(tile_cg_data, enc->current_frame_cg, tile_data_size * sizeof(float)); + } else { + // Extract padded tiles using context from neighbours + extract_padded_tile(enc, tile_x, tile_y, tile_y_data, tile_co_data, tile_cg_data); + } // Debug: check input data before DWT /*if (tile_x == 0 && tile_y == 0) { @@ -773,16 +914,29 @@ static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type) printf("\n"); }*/ - // Apply DWT transform to each padded channel (176x176) - dwt_2d_forward_padded(tile_y_data, enc->decomp_levels, enc->wavelet_filter); - dwt_2d_forward_padded(tile_co_data, enc->decomp_levels, enc->wavelet_filter); - dwt_2d_forward_padded(tile_cg_data, enc->decomp_levels, enc->wavelet_filter); + // Apply DWT transform to each channel + if (enc->monoblock) { + // Monoblock mode: transform entire frame + dwt_2d_forward_flexible(tile_y_data, enc->width, enc->height, enc->decomp_levels, enc->wavelet_filter); + dwt_2d_forward_flexible(tile_co_data, enc->width, enc->height, enc->decomp_levels, enc->wavelet_filter); + dwt_2d_forward_flexible(tile_cg_data, enc->width, enc->height, enc->decomp_levels, enc->wavelet_filter); + } else { + // Standard mode: transform padded tiles (344x288) + dwt_2d_forward_padded(tile_y_data, enc->decomp_levels, enc->wavelet_filter); + dwt_2d_forward_padded(tile_co_data, enc->decomp_levels, enc->wavelet_filter); + dwt_2d_forward_padded(tile_cg_data, enc->decomp_levels, enc->wavelet_filter); + } // Serialise tile - size_t tile_size = serialise_tile_data(enc, tile_x, tile_y, + size_t tile_size = serialise_tile_data(enc, tile_x, tile_y, tile_y_data, tile_co_data, tile_cg_data, mode, uncompressed_buffer + uncompressed_offset); uncompressed_offset += tile_size; + + // Free allocated tile data + free(tile_y_data); + free(tile_co_data); + free(tile_cg_data); } } @@ -1055,8 +1209,13 @@ static int write_tav_header(tav_encoder_t *enc) { // Magic number fwrite(TAV_MAGIC, 1, 8, enc->output_fp); - // Version (dynamic based on colour space) - uint8_t version = enc->ictcp_mode ? 2 : 1; // Version 2 for ICtCp, 1 for YCoCg-R + // Version (dynamic based on colour space and monoblock mode) + uint8_t version; + if (enc->monoblock) { + version = enc->ictcp_mode ? 4 : 3; // Version 4 for ICtCp monoblock, 3 for YCoCg-R monoblock + } else { + version = enc->ictcp_mode ? 2 : 1; // Version 2 for ICtCp, 1 for YCoCg-R + } fputc(version, enc->output_fp); // Video parameters @@ -2040,6 +2199,13 @@ int main(int argc, char *argv[]) { case 'o': enc->output_file = strdup(optarg); break; + case 's': + if (!parse_resolution(optarg, &enc->width, &enc->height)) { + fprintf(stderr, "Invalid resolution format: %s\n", optarg); + cleanup_encoder(enc); + return 1; + } + break; case 'q': enc->quality_level = CLAMP(atoi(optarg), 0, 5); enc->quantiser_y = QUALITY_Y[enc->quality_level];