From 3e40b048a741db09b5c377196d314e29ad1af085 Mon Sep 17 00:00:00 2001 From: minjaesong Date: Wed, 1 Oct 2025 09:32:34 +0900 Subject: [PATCH] TAV: allowing multi-title if video is larger than default size --- assets/disk0/tvdos/bin/playtav.js | 4 +- terranmon.txt | 35 ++---- .../torvald/tsvm/GraphicsJSR223Delegate.kt | 109 ++++++++---------- video_encoder/encoder_tav.c | 50 ++++---- 4 files changed, 88 insertions(+), 110 deletions(-) diff --git a/assets/disk0/tvdos/bin/playtav.js b/assets/disk0/tvdos/bin/playtav.js index 3f430ba..92d6090 100644 --- a/assets/disk0/tvdos/bin/playtav.js +++ b/assets/disk0/tvdos/bin/playtav.js @@ -254,7 +254,7 @@ header.fileRole = seqread.readOneByte() // Skip reserved bytes seqread.skip(4) -if (header.version < 1 || header.version > 6) { +if (header.version < 1 || header.version > 8) { printerrln(`Error: Unsupported TAV version ${header.version}`) errorlevel = 1 return @@ -296,7 +296,7 @@ console.log(`Decomposition levels: ${header.decompLevels}`) console.log(`Quality: Y=${QLUT[header.qualityY]}, Co=${QLUT[header.qualityCo]}, Cg=${QLUT[header.qualityCg]}`) console.log(`Channel layout: ${getChannelLayoutName(header.channelLayout)}`) console.log(`Tiles: ${tilesX}x${tilesY} (${numTiles} total)`) -console.log(`Colour space: ${header.version === 2 ? "ICtCp" : "YCoCg-R"}`) +console.log(`Colour space: ${header.version % 2 == 0 ? "ICtCp" : "YCoCg-R"}`) console.log(`Features: ${hasAudio ? "Audio " : ""}${hasSubtitles ? "Subtitles " : ""}${progressiveTransmission ? "Progressive " : ""}${roiCoding ? "ROI " : ""}`) // Frame buffer addresses - same as TEV diff --git a/terranmon.txt b/terranmon.txt index 1ad92db..859525c 100644 --- a/terranmon.txt +++ b/terranmon.txt @@ -987,13 +987,15 @@ transmission capability, and region-of-interest coding. uint8 Quantiser override Y (uses exponential numeric system; stored with index bias of 1 (127->252, 255->4032); use 0 to disable overriding; shared with A channel) uint8 Quantiser override Co (uses exponential numeric system; stored with index bias of 1 (127->252, 255->4032); use 0 to disable overriding) uint8 Quantiser override Cg (uses exponential numeric system; stored with index bias of 1 (127->252, 255->4032); use 0 to disable overriding) - - note: quantiser overrides are always present regardless of the channel layout - ## Coefficient Storage Format (Significance Map Compression) + - note: quantiser overrides are always present regardless of the channel layout + * Tile data (one compressed payload per tile) + + ### Coefficient Storage Format (Significance Map Compression) Starting with encoder version 2025-09-29, DWT coefficients are stored using significance map compression with concatenated maps layout for optimal efficiency: - ### Concatenated Maps Format (Current) + #### Concatenated Maps Format All channels are processed together to maximize Zstd compression: uint8 Y Significance Map[(coeff_count + 7) / 8] // 1 bit per Y coefficient @@ -1005,44 +1007,27 @@ transmission capability, and region-of-interest coding. int16 Cg Non-zero Values[variable length] // Only non-zero Cg coefficients int16 A Non-zero Values[variable length] // Only non-zero A coefficients (if alpha present) - ### Significance Map Encoding + #### Significance Map Encoding Each significance map uses 1 bit per coefficient position: - Bit = 1: coefficient is non-zero, read value from corresponding Non-zero Values array - Bit = 0: coefficient is zero - ### Compression Benefits + #### Compression Benefits - **Sparsity exploitation**: Typically 85-95% zeros in quantized DWT coefficients - **Cross-channel patterns**: Concatenated maps allow Zstd to find patterns across similar significance maps - **Overall improvement**: 16-18% compression improvement before Zstd compression - ### Legacy Separate Format (2025-09-29 initial) - Early significance map implementation processed channels separately: - For each channel (Y, Co, Cg, optional A): - uint8 Significance Map[(coeff_count + 7) / 8] // 1 bit per coefficient - int16 Non-zero Values[variable length] // Only non-zero coefficients + ### DWT Coefficient Structure (per tile) - ## Legacy Format (for reference) - int16 Y channel DWT coefficients[width * height + 4] - int16 Co channel DWT coefficients[width * height + 4] - int16 Cg channel DWT coefficients[width * height + 4] - int16 A channel DWT coefficients[width * height + 4] (only when the video has alpha) - - - int16 Y channel DWT coefficients[tile width * tile height + 4] - int16 Co channel DWT coefficients[tile width * tile height + 4] - int16 Cg channel DWT coefficients[tile width * tile height + 4] - ... (repeated per tile) - -### DWT Coefficient Structure (per tile) For each decomposition level L (from highest to lowest): uint16 LL_size: size of LL subband coefficients - uint16 LH_size: size of LH subband coefficients + uint16 LH_size: size of LH subband coefficients uint16 HL_size: size of HL subband coefficients uint16 HH_size: size of HH subband coefficients int16[] LL_coeffs: quantized LL subband (low-low frequencies) int16[] LH_coeffs: quantized LH subband (low-high frequencies) - int16[] HL_coeffs: quantized HL subband (high-low frequencies) + int16[] HL_coeffs: quantized HL subband (high-low frequencies) int16[] HH_coeffs: quantized HH subband (high-high frequencies) ## DWT Implementation Details diff --git a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt index a4060cc..ee31fdc 100644 --- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt +++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt @@ -57,10 +57,8 @@ import kotlin.collections.last import kotlin.collections.listOf import kotlin.collections.map import kotlin.collections.maxOfOrNull -import kotlin.collections.minus import kotlin.collections.mutableListOf import kotlin.collections.mutableMapOf -import kotlin.collections.plus import kotlin.collections.set import kotlin.collections.sliceArray import kotlin.collections.sorted @@ -74,14 +72,10 @@ import kotlin.intArrayOf import kotlin.let import kotlin.longArrayOf import kotlin.math.* -import kotlin.plus import kotlin.repeat -import kotlin.sequences.minus -import kotlin.sequences.plus import kotlin.text.format import kotlin.text.lowercase import kotlin.text.toString -import kotlin.times class GraphicsJSR223Delegate(private val vm: VM) { @@ -4059,11 +4053,11 @@ class GraphicsJSR223Delegate(private val vm: VM) { } // TAV Simulated overlapping tiles constants (must match encoder) - private val TILE_SIZE_X = 280 - private val TILE_SIZE_Y = 224 + private val TAV_TILE_SIZE_X = 640 + private val TAV_TILE_SIZE_Y = 540 private val TAV_TILE_MARGIN = 32 // 32-pixel margin for 3 DWT levels (4 * 2^3 = 32px) - private val PADDED_TILE_SIZE_X = TILE_SIZE_X + 2 * TAV_TILE_MARGIN // 280 + 64 = 344px - private val PADDED_TILE_SIZE_Y = TILE_SIZE_Y + 2 * TAV_TILE_MARGIN // 224 + 64 = 288px + private val TAV_PADDED_TILE_SIZE_X = TAV_TILE_SIZE_X + 2 * TAV_TILE_MARGIN + private val TAV_PADDED_TILE_SIZE_Y = TAV_TILE_SIZE_Y + 2 * TAV_TILE_MARGIN // TAV coefficient delta storage for previous frame (for efficient P-frames) private var tavPreviousCoeffsY: MutableMap? = null @@ -4371,7 +4365,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { try { // Determine if monoblock mode based on TAV version - val isMonoblock = (tavVersion >= 3) + val isMonoblock = (tavVersion in 3..6) val tilesX: Int val tilesY: Int @@ -4381,9 +4375,9 @@ class GraphicsJSR223Delegate(private val vm: VM) { tilesX = 1 tilesY = 1 } else { - // Standard mode: multiple 280x224 tiles (supported for backwards compatibility only) - tilesX = (width + TILE_SIZE_X - 1) / TILE_SIZE_X - tilesY = (height + TILE_SIZE_Y - 1) / TILE_SIZE_Y + // Standard mode: multiple 720x720 tiles + tilesX = (width + TAV_TILE_SIZE_X - 1) / TAV_TILE_SIZE_X + tilesY = (height + TAV_TILE_SIZE_Y - 1) / TAV_TILE_SIZE_Y } // Process each tile @@ -4442,7 +4436,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { width * height } else { // Standard mode: padded tiles (344x288) - PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y + TAV_PADDED_TILE_SIZE_X * TAV_PADDED_TILE_SIZE_Y } var ptr = readPtr @@ -4519,7 +4513,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { val cgTile = FloatArray(coeffCount) // Check if perceptual quantisation is used (versions 5 and 6) - val isPerceptual = (tavVersion == 5 || tavVersion == 6) + val isPerceptual = (tavVersion in 5..8) // Debug: Print version detection for frame 120 if (tavDebugCurrentFrameNumber == tavDebugFrameTarget) { @@ -4528,8 +4522,8 @@ class GraphicsJSR223Delegate(private val vm: VM) { if (isPerceptual) { // Perceptual dequantisation with subband-specific weights - val tileWidth = if (isMonoblock) width else PADDED_TILE_SIZE_X - val tileHeight = if (isMonoblock) height else PADDED_TILE_SIZE_Y + val tileWidth = if (isMonoblock) width else TAV_PADDED_TILE_SIZE_X + val tileHeight = if (isMonoblock) height else TAV_PADDED_TILE_SIZE_Y val subbands = calculateSubbandLayout(tileWidth, tileHeight, decompLevels) dequantiseDWTSubbandsPerceptual(qIndex, qYGlobal, quantisedY, yTile, subbands, qY.toFloat(), false, decompLevels) @@ -4594,8 +4588,8 @@ class GraphicsJSR223Delegate(private val vm: VM) { // Debug: Uniform quantisation subband analysis for comparison if (tavDebugCurrentFrameNumber == tavDebugFrameTarget) { - val tileWidth = if (isMonoblock) width else PADDED_TILE_SIZE_X - val tileHeight = if (isMonoblock) height else PADDED_TILE_SIZE_Y + val tileWidth = if (isMonoblock) width else TAV_PADDED_TILE_SIZE_X + val tileHeight = if (isMonoblock) height else TAV_PADDED_TILE_SIZE_Y val subbands = calculateSubbandLayout(tileWidth, tileHeight, decompLevels) // Comprehensive five-number summary for uniform quantisation baseline @@ -4673,7 +4667,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { val tileIdx = if (isMonoblock) { 0 // Single tile index for monoblock } else { - tileY * ((width + TILE_SIZE_X - 1) / TILE_SIZE_X) + tileX + tileY * ((width + TAV_TILE_SIZE_X - 1) / TAV_TILE_SIZE_X) + tileX } if (tavPreviousCoeffsY == null) { @@ -4686,8 +4680,8 @@ class GraphicsJSR223Delegate(private val vm: VM) { tavPreviousCoeffsCg!![tileIdx] = cgTile.clone() // Apply inverse DWT - val tileWidth = if (isMonoblock) width else PADDED_TILE_SIZE_X - val tileHeight = if (isMonoblock) height else PADDED_TILE_SIZE_Y + val tileWidth = if (isMonoblock) width else TAV_PADDED_TILE_SIZE_X + val tileHeight = if (isMonoblock) height else TAV_PADDED_TILE_SIZE_Y if (isLossless) { tavApplyDWTInverseMultiLevel(yTile, tileWidth, tileHeight, decompLevels, 0, TavSharpenNormal) @@ -4735,14 +4729,14 @@ class GraphicsJSR223Delegate(private val vm: VM) { finalCgTile = cgTile } else { // Standard mode: extract core 280x224 pixels from reconstructed padded tiles (344x288) - finalYTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y) - finalCoTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y) - finalCgTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y) + finalYTile = FloatArray(TAV_TILE_SIZE_X * TAV_TILE_SIZE_Y) + finalCoTile = FloatArray(TAV_TILE_SIZE_X * TAV_TILE_SIZE_Y) + finalCgTile = FloatArray(TAV_TILE_SIZE_X * TAV_TILE_SIZE_Y) - for (y in 0 until TILE_SIZE_Y) { - for (x in 0 until TILE_SIZE_X) { - val coreIdx = y * TILE_SIZE_X + x - val paddedIdx = (y + TAV_TILE_MARGIN) * PADDED_TILE_SIZE_X + (x + TAV_TILE_MARGIN) + for (y in 0 until TAV_TILE_SIZE_Y) { + for (x in 0 until TAV_TILE_SIZE_X) { + val coreIdx = y * TAV_TILE_SIZE_X + x + val paddedIdx = (y + TAV_TILE_MARGIN) * TAV_PADDED_TILE_SIZE_X + (x + TAV_TILE_MARGIN) finalYTile[coreIdx] = yTile[paddedIdx] finalCoTile[coreIdx] = coTile[paddedIdx] @@ -4757,8 +4751,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { } // Convert to RGB based on TAV version and mode - // v1,v3 = YCoCg-R, v2,v4 = ICtCp - if (tavVersion == 2 || tavVersion == 4) { + if (tavVersion % 2 == 0) { // ICtCp color space if (isMonoblock) { tavConvertICtCpMonoblockToRGB(finalYTile, finalCoTile, finalCgTile, currentRGBAddr, width, height) @@ -4779,17 +4772,17 @@ class GraphicsJSR223Delegate(private val vm: VM) { private fun tavConvertYCoCgTileToRGB(tileX: Int, tileY: Int, yTile: FloatArray, coTile: FloatArray, cgTile: FloatArray, rgbAddr: Long, width: Int, height: Int) { - val startX = tileX * TILE_SIZE_X - val startY = tileY * TILE_SIZE_Y + val startX = tileX * TAV_TILE_SIZE_X + val startY = tileY * TAV_TILE_SIZE_Y // OPTIMISATION: Process pixels row by row with bulk copying for better cache locality - for (y in 0 until TILE_SIZE_Y) { + for (y in 0 until TAV_TILE_SIZE_Y) { val frameY = startY + y if (frameY >= height) break // Calculate valid pixel range for this row val validStartX = maxOf(0, startX) - val validEndX = minOf(width, startX + TILE_SIZE_X) + val validEndX = minOf(width, startX + TAV_TILE_SIZE_X) val validPixelsInRow = validEndX - validStartX if (validPixelsInRow > 0) { @@ -4798,7 +4791,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { var bufferIdx = 0 for (x in validStartX until validEndX) { - val tileIdx = y * TILE_SIZE_X + (x - startX) + val tileIdx = y * TAV_TILE_SIZE_X + (x - startX) // YCoCg-R to RGB conversion (exact inverse of encoder) val Y = yTile[tileIdx] @@ -4826,17 +4819,17 @@ class GraphicsJSR223Delegate(private val vm: VM) { private fun tavConvertICtCpTileToRGB(tileX: Int, tileY: Int, iTile: FloatArray, ctTile: FloatArray, cpTile: FloatArray, rgbAddr: Long, width: Int, height: Int) { - val startX = tileX * TILE_SIZE_X - val startY = tileY * TILE_SIZE_Y + val startX = tileX * TAV_TILE_SIZE_X + val startY = tileY * TAV_TILE_SIZE_Y // OPTIMISATION: Process pixels row by row with bulk copying for better cache locality - for (y in 0 until TILE_SIZE_Y) { + for (y in 0 until TAV_TILE_SIZE_Y) { val frameY = startY + y if (frameY >= height) break // Calculate valid pixel range for this row val validStartX = maxOf(0, startX) - val validEndX = minOf(width, startX + TILE_SIZE_X) + val validEndX = minOf(width, startX + TAV_TILE_SIZE_X) val validPixelsInRow = validEndX - validStartX if (validPixelsInRow > 0) { @@ -4845,7 +4838,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { var bufferIdx = 0 for (x in validStartX until validEndX) { - val tileIdx = y * TILE_SIZE_X + (x - startX) + val tileIdx = y * TAV_TILE_SIZE_X + (x - startX) // ICtCp to sRGB conversion (adapted from encoder ICtCp functions) val I = iTile[tileIdx].toDouble() / 255.0 @@ -4996,17 +4989,17 @@ class GraphicsJSR223Delegate(private val vm: VM) { // Helper functions (simplified versions of existing DWT functions) private fun tavCopyTileRGB(tileX: Int, tileY: Int, currentRGBAddr: Long, prevRGBAddr: Long, width: Int, height: Int) { - val startX = tileX * TILE_SIZE_X - val startY = tileY * TILE_SIZE_Y + val startX = tileX * TAV_TILE_SIZE_X + val startY = tileY * TAV_TILE_SIZE_Y // OPTIMISATION: Copy entire rows at once for maximum performance - for (y in 0 until TILE_SIZE_Y) { + for (y in 0 until TAV_TILE_SIZE_Y) { val frameY = startY + y if (frameY >= height) break // Calculate valid pixel range for this row val validStartX = maxOf(0, startX) - val validEndX = minOf(width, startX + TILE_SIZE_X) + val validEndX = minOf(width, startX + TAV_TILE_SIZE_X) val validPixelsInRow = validEndX - validStartX if (validPixelsInRow > 0) { @@ -5077,7 +5070,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { val tileIdx = if (isMonoblock) { 0 // Single tile index for monoblock } else { - tileY * ((width + TILE_SIZE_X - 1) / TILE_SIZE_X) + tileX + tileY * ((width + TAV_TILE_SIZE_X - 1) / TAV_TILE_SIZE_X) + tileX } var ptr = readPtr @@ -5094,7 +5087,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { width * height } else { // Standard mode: padded tiles (344x288) - PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y + TAV_PADDED_TILE_SIZE_X * TAV_PADDED_TILE_SIZE_Y } // Read delta coefficients using significance map format (same as intra but with deltas) @@ -5195,8 +5188,8 @@ class GraphicsJSR223Delegate(private val vm: VM) { tavPreviousCoeffsCg!![tileIdx] = currentCg.clone() // Apply inverse DWT - val tileWidth = if (isMonoblock) width else PADDED_TILE_SIZE_X - val tileHeight = if (isMonoblock) height else PADDED_TILE_SIZE_Y + val tileWidth = if (isMonoblock) width else TAV_PADDED_TILE_SIZE_X + val tileHeight = if (isMonoblock) height else TAV_PADDED_TILE_SIZE_Y if (isLossless) { tavApplyDWTInverseMultiLevel(currentY, tileWidth, tileHeight, decompLevels, 0, TavSharpenNormal) @@ -5244,14 +5237,14 @@ class GraphicsJSR223Delegate(private val vm: VM) { finalCgTile = currentCg } else { // Standard mode: extract core 280x224 pixels from reconstructed padded tiles (344x288) - finalYTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y) - finalCoTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y) - finalCgTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y) + finalYTile = FloatArray(TAV_TILE_SIZE_X * TAV_TILE_SIZE_Y) + finalCoTile = FloatArray(TAV_TILE_SIZE_X * TAV_TILE_SIZE_Y) + finalCgTile = FloatArray(TAV_TILE_SIZE_X * TAV_TILE_SIZE_Y) - for (y in 0 until TILE_SIZE_Y) { - for (x in 0 until TILE_SIZE_X) { - val coreIdx = y * TILE_SIZE_X + x - val paddedIdx = (y + TAV_TILE_MARGIN) * PADDED_TILE_SIZE_X + (x + TAV_TILE_MARGIN) + for (y in 0 until TAV_TILE_SIZE_Y) { + for (x in 0 until TAV_TILE_SIZE_X) { + val coreIdx = y * TAV_TILE_SIZE_X + x + val paddedIdx = (y + TAV_TILE_MARGIN) * TAV_PADDED_TILE_SIZE_X + (x + TAV_TILE_MARGIN) finalYTile[coreIdx] = currentY[paddedIdx] finalCoTile[coreIdx] = currentCo[paddedIdx] @@ -5267,7 +5260,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { // Convert to RGB based on TAV version and mode // v1,v3 = YCoCg-R, v2,v4 = ICtCp - if (tavVersion == 2 || tavVersion == 4) { + if (tavVersion % 2 == 0) { // ICtCp color space if (isMonoblock) { tavConvertICtCpMonoblockToRGB(finalYTile, finalCoTile, finalCgTile, currentRGBAddr, width, height) diff --git a/video_encoder/encoder_tav.c b/video_encoder/encoder_tav.c index aa85044..a806e24 100644 --- a/video_encoder/encoder_tav.c +++ b/video_encoder/encoder_tav.c @@ -16,22 +16,19 @@ #include #include -#ifndef PI -#define PI 3.14159265358979323846f -#endif - // TSVM Advanced Video (TAV) format constants #define TAV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x56" // "\x1FTSVM TAV" // TAV version - dynamic based on colour space and perceptual tuning -// Version 5: YCoCg-R monoblock with perceptual quantisation (default) +// Version 8: ICtCp multi-tile with perceptual quantisation (--ictcp flag) +// Version 7: YCoCg-R multi-tile with perceptual quantisation (default if width > 640 or height > 540) // Version 6: ICtCp monoblock with perceptual quantisation (--ictcp flag) -// Legacy versions (uniform quantisation): -// Version 3: YCoCg-R monoblock uniform (--no-perceptual-tuning) +// Version 5: YCoCg-R monoblock with perceptual quantisation (default if width <= 640 and height <= 540) // Version 4: ICtCp monoblock uniform (--ictcp --no-perceptual-tuning) -// Version 1: YCoCg-R 4-tile (legacy, code preserved but not accessible) -// Version 2: ICtCp 4-tile (legacy, code preserved but not accessible) +// Version 3: YCoCg-R monoblock uniform (--no-perceptual-tuning) +// Version 2: ICtCp multi-tile uniform (--ictcp --no-perceptual-tuning) +// Version 1: YCoCg-R multi-tile uniform (--no-perceptual-tuning) -// Tile encoding modes (280x224 tiles) +// Tile encoding modes #define TAV_MODE_SKIP 0x00 // Skip tile (copy from reference) #define TAV_MODE_INTRA 0x01 // Intra DWT coding (I-frame tiles) #define TAV_MODE_DELTA 0x02 // Coefficient delta encoding (efficient P-frames) @@ -45,16 +42,15 @@ #define TAV_PACKET_SYNC 0xFF // Sync packet // DWT settings -#define TILE_SIZE_X 280 // 280x224 tiles - better compression efficiency -#define TILE_SIZE_Y 224 // Optimised for TSVM 560x448 (2×2 tiles exactly) -#define MAX_DECOMP_LEVELS 6 // Can go deeper: 280→140→70→35→17→8→4, 224→112→56→28→14→7→3 +#define TILE_SIZE_X 640 +#define TILE_SIZE_Y 540 // Simulated overlapping tiles settings for seamless DWT processing #define DWT_FILTER_HALF_SUPPORT 4 // For 9/7 filter (filter lengths 9,7 → L=4) #define TILE_MARGIN_LEVELS 3 // Use margin for 3 levels: 4 * (2^3) = 4 * 8 = 32px #define TILE_MARGIN (DWT_FILTER_HALF_SUPPORT * (1 << TILE_MARGIN_LEVELS)) // 4 * 8 = 32px -#define PADDED_TILE_SIZE_X (TILE_SIZE_X + 2 * TILE_MARGIN) // 280 + 64 = 344px -#define PADDED_TILE_SIZE_Y (TILE_SIZE_Y + 2 * TILE_MARGIN) // 224 + 64 = 288px +#define PADDED_TILE_SIZE_X (TILE_SIZE_X + 2 * TILE_MARGIN) +#define PADDED_TILE_SIZE_Y (TILE_SIZE_Y + 2 * TILE_MARGIN) // Wavelet filter types #define WAVELET_5_3_REVERSIBLE 0 // Lossless capable @@ -662,7 +658,7 @@ static tav_encoder_t* create_encoder(void) { enc->fps = DEFAULT_FPS; enc->quality_level = DEFAULT_QUALITY; enc->wavelet_filter = WAVELET_9_7_IRREVERSIBLE; - enc->decomp_levels = MAX_DECOMP_LEVELS; + enc->decomp_levels = 6; enc->quantiser_y = QUALITY_Y[DEFAULT_QUALITY]; enc->quantiser_co = QUALITY_CO[DEFAULT_QUALITY]; enc->quantiser_cg = QUALITY_CG[DEFAULT_QUALITY]; @@ -681,9 +677,7 @@ static int initialise_encoder(tav_encoder_t *enc) { if (!enc) return -1; // Automatic decomposition levels for monoblock mode - if (enc->monoblock) { - enc->decomp_levels = calculate_max_decomp_levels(enc->width, enc->height); - } + enc->decomp_levels = calculate_max_decomp_levels(enc->width, enc->height); // Calculate tile dimensions if (enc->monoblock) { @@ -691,7 +685,7 @@ static int initialise_encoder(tav_encoder_t *enc) { enc->tiles_x = 1; enc->tiles_y = 1; } else { - // Standard mode: multiple 280x224 tiles + // Standard mode: multiple tiles enc->tiles_x = (enc->width + TILE_SIZE_X - 1) / TILE_SIZE_X; enc->tiles_y = (enc->height + TILE_SIZE_Y - 1) / TILE_SIZE_Y; } @@ -1041,7 +1035,7 @@ static void extract_padded_tile(tav_encoder_t *enc, int tile_x, int tile_y, int core_src_end_x = core_start_x + TILE_SIZE_X; if (core_src_start_x >= 0 && core_src_end_x <= enc->width) { - // OPTIMISATION: Bulk copy core region (280 pixels) in one operation + // OPTIMISATION: Bulk copy core region in one operation const int src_core_offset = src_row_offset + core_src_start_x; memcpy(&padded_y[padded_row_offset + core_start_px], @@ -2181,7 +2175,11 @@ static int write_tav_header(tav_encoder_t *enc) { version = enc->ictcp_mode ? 4 : 3; // Version 4 for ICtCp uniform, 3 for YCoCg-R uniform } } else { - version = enc->ictcp_mode ? 2 : 1; // Legacy 4-tile versions + if (enc->perceptual_tuning) { + version = enc->ictcp_mode ? 8 : 7; + } else { + version = enc->ictcp_mode ? 2 : 1; + } } fputc(version, enc->output_fp); @@ -3260,9 +3258,6 @@ int main(int argc, char *argv[]) { return 1; } break; - /*case 'd': - enc->decomp_levels = CLAMP(atoi(optarg), 1, MAX_DECOMP_LEVELS); - break;*/ case 'v': enc->verbose = 1; break; @@ -3330,6 +3325,11 @@ int main(int argc, char *argv[]) { enc->perceptual_tuning = 0; } + // disable monoblock mode if either width or height exceeds tie size + if (enc->width > TILE_SIZE_X || enc->height > TILE_SIZE_Y) { + enc->monoblock = 0; + } + if (enc->lossless) { enc->perceptual_tuning = 0; enc->quantiser_y = 0; // will be resolved to 1