TAV: allowing multi-title if video is larger than default size

This commit is contained in:
minjaesong
2025-10-01 09:32:34 +09:00
parent 70dfc7bf13
commit 3e40b048a7
4 changed files with 88 additions and 110 deletions

View File

@@ -254,7 +254,7 @@ header.fileRole = seqread.readOneByte()
// Skip reserved bytes // Skip reserved bytes
seqread.skip(4) seqread.skip(4)
if (header.version < 1 || header.version > 6) { if (header.version < 1 || header.version > 8) {
printerrln(`Error: Unsupported TAV version ${header.version}`) printerrln(`Error: Unsupported TAV version ${header.version}`)
errorlevel = 1 errorlevel = 1
return return
@@ -296,7 +296,7 @@ console.log(`Decomposition levels: ${header.decompLevels}`)
console.log(`Quality: Y=${QLUT[header.qualityY]}, Co=${QLUT[header.qualityCo]}, Cg=${QLUT[header.qualityCg]}`) console.log(`Quality: Y=${QLUT[header.qualityY]}, Co=${QLUT[header.qualityCo]}, Cg=${QLUT[header.qualityCg]}`)
console.log(`Channel layout: ${getChannelLayoutName(header.channelLayout)}`) console.log(`Channel layout: ${getChannelLayoutName(header.channelLayout)}`)
console.log(`Tiles: ${tilesX}x${tilesY} (${numTiles} total)`) console.log(`Tiles: ${tilesX}x${tilesY} (${numTiles} total)`)
console.log(`Colour space: ${header.version === 2 ? "ICtCp" : "YCoCg-R"}`) console.log(`Colour space: ${header.version % 2 == 0 ? "ICtCp" : "YCoCg-R"}`)
console.log(`Features: ${hasAudio ? "Audio " : ""}${hasSubtitles ? "Subtitles " : ""}${progressiveTransmission ? "Progressive " : ""}${roiCoding ? "ROI " : ""}`) console.log(`Features: ${hasAudio ? "Audio " : ""}${hasSubtitles ? "Subtitles " : ""}${progressiveTransmission ? "Progressive " : ""}${roiCoding ? "ROI " : ""}`)
// Frame buffer addresses - same as TEV // Frame buffer addresses - same as TEV

View File

@@ -987,13 +987,15 @@ transmission capability, and region-of-interest coding.
uint8 Quantiser override Y (uses exponential numeric system; stored with index bias of 1 (127->252, 255->4032); use 0 to disable overriding; shared with A channel) uint8 Quantiser override Y (uses exponential numeric system; stored with index bias of 1 (127->252, 255->4032); use 0 to disable overriding; shared with A channel)
uint8 Quantiser override Co (uses exponential numeric system; stored with index bias of 1 (127->252, 255->4032); use 0 to disable overriding) uint8 Quantiser override Co (uses exponential numeric system; stored with index bias of 1 (127->252, 255->4032); use 0 to disable overriding)
uint8 Quantiser override Cg (uses exponential numeric system; stored with index bias of 1 (127->252, 255->4032); use 0 to disable overriding) uint8 Quantiser override Cg (uses exponential numeric system; stored with index bias of 1 (127->252, 255->4032); use 0 to disable overriding)
- note: quantiser overrides are always present regardless of the channel layout - note: quantiser overrides are always present regardless of the channel layout
## Coefficient Storage Format (Significance Map Compression) * Tile data (one compressed payload per tile)
### Coefficient Storage Format (Significance Map Compression)
Starting with encoder version 2025-09-29, DWT coefficients are stored using Starting with encoder version 2025-09-29, DWT coefficients are stored using
significance map compression with concatenated maps layout for optimal efficiency: significance map compression with concatenated maps layout for optimal efficiency:
### Concatenated Maps Format (Current) #### Concatenated Maps Format
All channels are processed together to maximize Zstd compression: All channels are processed together to maximize Zstd compression:
uint8 Y Significance Map[(coeff_count + 7) / 8] // 1 bit per Y coefficient uint8 Y Significance Map[(coeff_count + 7) / 8] // 1 bit per Y coefficient
@@ -1005,36 +1007,19 @@ transmission capability, and region-of-interest coding.
int16 Cg Non-zero Values[variable length] // Only non-zero Cg coefficients int16 Cg Non-zero Values[variable length] // Only non-zero Cg coefficients
int16 A Non-zero Values[variable length] // Only non-zero A coefficients (if alpha present) int16 A Non-zero Values[variable length] // Only non-zero A coefficients (if alpha present)
### Significance Map Encoding #### Significance Map Encoding
Each significance map uses 1 bit per coefficient position: Each significance map uses 1 bit per coefficient position:
- Bit = 1: coefficient is non-zero, read value from corresponding Non-zero Values array - Bit = 1: coefficient is non-zero, read value from corresponding Non-zero Values array
- Bit = 0: coefficient is zero - Bit = 0: coefficient is zero
### Compression Benefits #### Compression Benefits
- **Sparsity exploitation**: Typically 85-95% zeros in quantized DWT coefficients - **Sparsity exploitation**: Typically 85-95% zeros in quantized DWT coefficients
- **Cross-channel patterns**: Concatenated maps allow Zstd to find patterns across similar significance maps - **Cross-channel patterns**: Concatenated maps allow Zstd to find patterns across similar significance maps
- **Overall improvement**: 16-18% compression improvement before Zstd compression - **Overall improvement**: 16-18% compression improvement before Zstd compression
### Legacy Separate Format (2025-09-29 initial)
Early significance map implementation processed channels separately:
For each channel (Y, Co, Cg, optional A): ### DWT Coefficient Structure (per tile)
uint8 Significance Map[(coeff_count + 7) / 8] // 1 bit per coefficient
int16 Non-zero Values[variable length] // Only non-zero coefficients
## Legacy Format (for reference)
int16 Y channel DWT coefficients[width * height + 4]
int16 Co channel DWT coefficients[width * height + 4]
int16 Cg channel DWT coefficients[width * height + 4]
int16 A channel DWT coefficients[width * height + 4] (only when the video has alpha)
<for legacy non-monoblock format>
int16 Y channel DWT coefficients[tile width * tile height + 4]
int16 Co channel DWT coefficients[tile width * tile height + 4]
int16 Cg channel DWT coefficients[tile width * tile height + 4]
... (repeated per tile)
### DWT Coefficient Structure (per tile)
For each decomposition level L (from highest to lowest): For each decomposition level L (from highest to lowest):
uint16 LL_size: size of LL subband coefficients uint16 LL_size: size of LL subband coefficients
uint16 LH_size: size of LH subband coefficients uint16 LH_size: size of LH subband coefficients

View File

@@ -57,10 +57,8 @@ import kotlin.collections.last
import kotlin.collections.listOf import kotlin.collections.listOf
import kotlin.collections.map import kotlin.collections.map
import kotlin.collections.maxOfOrNull import kotlin.collections.maxOfOrNull
import kotlin.collections.minus
import kotlin.collections.mutableListOf import kotlin.collections.mutableListOf
import kotlin.collections.mutableMapOf import kotlin.collections.mutableMapOf
import kotlin.collections.plus
import kotlin.collections.set import kotlin.collections.set
import kotlin.collections.sliceArray import kotlin.collections.sliceArray
import kotlin.collections.sorted import kotlin.collections.sorted
@@ -74,14 +72,10 @@ import kotlin.intArrayOf
import kotlin.let import kotlin.let
import kotlin.longArrayOf import kotlin.longArrayOf
import kotlin.math.* import kotlin.math.*
import kotlin.plus
import kotlin.repeat import kotlin.repeat
import kotlin.sequences.minus
import kotlin.sequences.plus
import kotlin.text.format import kotlin.text.format
import kotlin.text.lowercase import kotlin.text.lowercase
import kotlin.text.toString import kotlin.text.toString
import kotlin.times
class GraphicsJSR223Delegate(private val vm: VM) { class GraphicsJSR223Delegate(private val vm: VM) {
@@ -4059,11 +4053,11 @@ class GraphicsJSR223Delegate(private val vm: VM) {
} }
// TAV Simulated overlapping tiles constants (must match encoder) // TAV Simulated overlapping tiles constants (must match encoder)
private val TILE_SIZE_X = 280 private val TAV_TILE_SIZE_X = 640
private val TILE_SIZE_Y = 224 private val TAV_TILE_SIZE_Y = 540
private val TAV_TILE_MARGIN = 32 // 32-pixel margin for 3 DWT levels (4 * 2^3 = 32px) private val TAV_TILE_MARGIN = 32 // 32-pixel margin for 3 DWT levels (4 * 2^3 = 32px)
private val PADDED_TILE_SIZE_X = TILE_SIZE_X + 2 * TAV_TILE_MARGIN // 280 + 64 = 344px private val TAV_PADDED_TILE_SIZE_X = TAV_TILE_SIZE_X + 2 * TAV_TILE_MARGIN
private val PADDED_TILE_SIZE_Y = TILE_SIZE_Y + 2 * TAV_TILE_MARGIN // 224 + 64 = 288px private val TAV_PADDED_TILE_SIZE_Y = TAV_TILE_SIZE_Y + 2 * TAV_TILE_MARGIN
// TAV coefficient delta storage for previous frame (for efficient P-frames) // TAV coefficient delta storage for previous frame (for efficient P-frames)
private var tavPreviousCoeffsY: MutableMap<Int, FloatArray>? = null private var tavPreviousCoeffsY: MutableMap<Int, FloatArray>? = null
@@ -4371,7 +4365,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
try { try {
// Determine if monoblock mode based on TAV version // Determine if monoblock mode based on TAV version
val isMonoblock = (tavVersion >= 3) val isMonoblock = (tavVersion in 3..6)
val tilesX: Int val tilesX: Int
val tilesY: Int val tilesY: Int
@@ -4381,9 +4375,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
tilesX = 1 tilesX = 1
tilesY = 1 tilesY = 1
} else { } else {
// Standard mode: multiple 280x224 tiles (supported for backwards compatibility only) // Standard mode: multiple 720x720 tiles
tilesX = (width + TILE_SIZE_X - 1) / TILE_SIZE_X tilesX = (width + TAV_TILE_SIZE_X - 1) / TAV_TILE_SIZE_X
tilesY = (height + TILE_SIZE_Y - 1) / TILE_SIZE_Y tilesY = (height + TAV_TILE_SIZE_Y - 1) / TAV_TILE_SIZE_Y
} }
// Process each tile // Process each tile
@@ -4442,7 +4436,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
width * height width * height
} else { } else {
// Standard mode: padded tiles (344x288) // Standard mode: padded tiles (344x288)
PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y TAV_PADDED_TILE_SIZE_X * TAV_PADDED_TILE_SIZE_Y
} }
var ptr = readPtr var ptr = readPtr
@@ -4519,7 +4513,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
val cgTile = FloatArray(coeffCount) val cgTile = FloatArray(coeffCount)
// Check if perceptual quantisation is used (versions 5 and 6) // Check if perceptual quantisation is used (versions 5 and 6)
val isPerceptual = (tavVersion == 5 || tavVersion == 6) val isPerceptual = (tavVersion in 5..8)
// Debug: Print version detection for frame 120 // Debug: Print version detection for frame 120
if (tavDebugCurrentFrameNumber == tavDebugFrameTarget) { if (tavDebugCurrentFrameNumber == tavDebugFrameTarget) {
@@ -4528,8 +4522,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
if (isPerceptual) { if (isPerceptual) {
// Perceptual dequantisation with subband-specific weights // Perceptual dequantisation with subband-specific weights
val tileWidth = if (isMonoblock) width else PADDED_TILE_SIZE_X val tileWidth = if (isMonoblock) width else TAV_PADDED_TILE_SIZE_X
val tileHeight = if (isMonoblock) height else PADDED_TILE_SIZE_Y val tileHeight = if (isMonoblock) height else TAV_PADDED_TILE_SIZE_Y
val subbands = calculateSubbandLayout(tileWidth, tileHeight, decompLevels) val subbands = calculateSubbandLayout(tileWidth, tileHeight, decompLevels)
dequantiseDWTSubbandsPerceptual(qIndex, qYGlobal, quantisedY, yTile, subbands, qY.toFloat(), false, decompLevels) dequantiseDWTSubbandsPerceptual(qIndex, qYGlobal, quantisedY, yTile, subbands, qY.toFloat(), false, decompLevels)
@@ -4594,8 +4588,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
// Debug: Uniform quantisation subband analysis for comparison // Debug: Uniform quantisation subband analysis for comparison
if (tavDebugCurrentFrameNumber == tavDebugFrameTarget) { if (tavDebugCurrentFrameNumber == tavDebugFrameTarget) {
val tileWidth = if (isMonoblock) width else PADDED_TILE_SIZE_X val tileWidth = if (isMonoblock) width else TAV_PADDED_TILE_SIZE_X
val tileHeight = if (isMonoblock) height else PADDED_TILE_SIZE_Y val tileHeight = if (isMonoblock) height else TAV_PADDED_TILE_SIZE_Y
val subbands = calculateSubbandLayout(tileWidth, tileHeight, decompLevels) val subbands = calculateSubbandLayout(tileWidth, tileHeight, decompLevels)
// Comprehensive five-number summary for uniform quantisation baseline // Comprehensive five-number summary for uniform quantisation baseline
@@ -4673,7 +4667,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
val tileIdx = if (isMonoblock) { val tileIdx = if (isMonoblock) {
0 // Single tile index for monoblock 0 // Single tile index for monoblock
} else { } else {
tileY * ((width + TILE_SIZE_X - 1) / TILE_SIZE_X) + tileX tileY * ((width + TAV_TILE_SIZE_X - 1) / TAV_TILE_SIZE_X) + tileX
} }
if (tavPreviousCoeffsY == null) { if (tavPreviousCoeffsY == null) {
@@ -4686,8 +4680,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
tavPreviousCoeffsCg!![tileIdx] = cgTile.clone() tavPreviousCoeffsCg!![tileIdx] = cgTile.clone()
// Apply inverse DWT // Apply inverse DWT
val tileWidth = if (isMonoblock) width else PADDED_TILE_SIZE_X val tileWidth = if (isMonoblock) width else TAV_PADDED_TILE_SIZE_X
val tileHeight = if (isMonoblock) height else PADDED_TILE_SIZE_Y val tileHeight = if (isMonoblock) height else TAV_PADDED_TILE_SIZE_Y
if (isLossless) { if (isLossless) {
tavApplyDWTInverseMultiLevel(yTile, tileWidth, tileHeight, decompLevels, 0, TavSharpenNormal) tavApplyDWTInverseMultiLevel(yTile, tileWidth, tileHeight, decompLevels, 0, TavSharpenNormal)
@@ -4735,14 +4729,14 @@ class GraphicsJSR223Delegate(private val vm: VM) {
finalCgTile = cgTile finalCgTile = cgTile
} else { } else {
// Standard mode: extract core 280x224 pixels from reconstructed padded tiles (344x288) // Standard mode: extract core 280x224 pixels from reconstructed padded tiles (344x288)
finalYTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y) finalYTile = FloatArray(TAV_TILE_SIZE_X * TAV_TILE_SIZE_Y)
finalCoTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y) finalCoTile = FloatArray(TAV_TILE_SIZE_X * TAV_TILE_SIZE_Y)
finalCgTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y) finalCgTile = FloatArray(TAV_TILE_SIZE_X * TAV_TILE_SIZE_Y)
for (y in 0 until TILE_SIZE_Y) { for (y in 0 until TAV_TILE_SIZE_Y) {
for (x in 0 until TILE_SIZE_X) { for (x in 0 until TAV_TILE_SIZE_X) {
val coreIdx = y * TILE_SIZE_X + x val coreIdx = y * TAV_TILE_SIZE_X + x
val paddedIdx = (y + TAV_TILE_MARGIN) * PADDED_TILE_SIZE_X + (x + TAV_TILE_MARGIN) val paddedIdx = (y + TAV_TILE_MARGIN) * TAV_PADDED_TILE_SIZE_X + (x + TAV_TILE_MARGIN)
finalYTile[coreIdx] = yTile[paddedIdx] finalYTile[coreIdx] = yTile[paddedIdx]
finalCoTile[coreIdx] = coTile[paddedIdx] finalCoTile[coreIdx] = coTile[paddedIdx]
@@ -4757,8 +4751,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
} }
// Convert to RGB based on TAV version and mode // Convert to RGB based on TAV version and mode
// v1,v3 = YCoCg-R, v2,v4 = ICtCp if (tavVersion % 2 == 0) {
if (tavVersion == 2 || tavVersion == 4) {
// ICtCp color space // ICtCp color space
if (isMonoblock) { if (isMonoblock) {
tavConvertICtCpMonoblockToRGB(finalYTile, finalCoTile, finalCgTile, currentRGBAddr, width, height) tavConvertICtCpMonoblockToRGB(finalYTile, finalCoTile, finalCgTile, currentRGBAddr, width, height)
@@ -4779,17 +4772,17 @@ class GraphicsJSR223Delegate(private val vm: VM) {
private fun tavConvertYCoCgTileToRGB(tileX: Int, tileY: Int, yTile: FloatArray, coTile: FloatArray, cgTile: FloatArray, private fun tavConvertYCoCgTileToRGB(tileX: Int, tileY: Int, yTile: FloatArray, coTile: FloatArray, cgTile: FloatArray,
rgbAddr: Long, width: Int, height: Int) { rgbAddr: Long, width: Int, height: Int) {
val startX = tileX * TILE_SIZE_X val startX = tileX * TAV_TILE_SIZE_X
val startY = tileY * TILE_SIZE_Y val startY = tileY * TAV_TILE_SIZE_Y
// OPTIMISATION: Process pixels row by row with bulk copying for better cache locality // OPTIMISATION: Process pixels row by row with bulk copying for better cache locality
for (y in 0 until TILE_SIZE_Y) { for (y in 0 until TAV_TILE_SIZE_Y) {
val frameY = startY + y val frameY = startY + y
if (frameY >= height) break if (frameY >= height) break
// Calculate valid pixel range for this row // Calculate valid pixel range for this row
val validStartX = maxOf(0, startX) val validStartX = maxOf(0, startX)
val validEndX = minOf(width, startX + TILE_SIZE_X) val validEndX = minOf(width, startX + TAV_TILE_SIZE_X)
val validPixelsInRow = validEndX - validStartX val validPixelsInRow = validEndX - validStartX
if (validPixelsInRow > 0) { if (validPixelsInRow > 0) {
@@ -4798,7 +4791,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
var bufferIdx = 0 var bufferIdx = 0
for (x in validStartX until validEndX) { for (x in validStartX until validEndX) {
val tileIdx = y * TILE_SIZE_X + (x - startX) val tileIdx = y * TAV_TILE_SIZE_X + (x - startX)
// YCoCg-R to RGB conversion (exact inverse of encoder) // YCoCg-R to RGB conversion (exact inverse of encoder)
val Y = yTile[tileIdx] val Y = yTile[tileIdx]
@@ -4826,17 +4819,17 @@ class GraphicsJSR223Delegate(private val vm: VM) {
private fun tavConvertICtCpTileToRGB(tileX: Int, tileY: Int, iTile: FloatArray, ctTile: FloatArray, cpTile: FloatArray, private fun tavConvertICtCpTileToRGB(tileX: Int, tileY: Int, iTile: FloatArray, ctTile: FloatArray, cpTile: FloatArray,
rgbAddr: Long, width: Int, height: Int) { rgbAddr: Long, width: Int, height: Int) {
val startX = tileX * TILE_SIZE_X val startX = tileX * TAV_TILE_SIZE_X
val startY = tileY * TILE_SIZE_Y val startY = tileY * TAV_TILE_SIZE_Y
// OPTIMISATION: Process pixels row by row with bulk copying for better cache locality // OPTIMISATION: Process pixels row by row with bulk copying for better cache locality
for (y in 0 until TILE_SIZE_Y) { for (y in 0 until TAV_TILE_SIZE_Y) {
val frameY = startY + y val frameY = startY + y
if (frameY >= height) break if (frameY >= height) break
// Calculate valid pixel range for this row // Calculate valid pixel range for this row
val validStartX = maxOf(0, startX) val validStartX = maxOf(0, startX)
val validEndX = minOf(width, startX + TILE_SIZE_X) val validEndX = minOf(width, startX + TAV_TILE_SIZE_X)
val validPixelsInRow = validEndX - validStartX val validPixelsInRow = validEndX - validStartX
if (validPixelsInRow > 0) { if (validPixelsInRow > 0) {
@@ -4845,7 +4838,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
var bufferIdx = 0 var bufferIdx = 0
for (x in validStartX until validEndX) { for (x in validStartX until validEndX) {
val tileIdx = y * TILE_SIZE_X + (x - startX) val tileIdx = y * TAV_TILE_SIZE_X + (x - startX)
// ICtCp to sRGB conversion (adapted from encoder ICtCp functions) // ICtCp to sRGB conversion (adapted from encoder ICtCp functions)
val I = iTile[tileIdx].toDouble() / 255.0 val I = iTile[tileIdx].toDouble() / 255.0
@@ -4996,17 +4989,17 @@ class GraphicsJSR223Delegate(private val vm: VM) {
// Helper functions (simplified versions of existing DWT functions) // Helper functions (simplified versions of existing DWT functions)
private fun tavCopyTileRGB(tileX: Int, tileY: Int, currentRGBAddr: Long, prevRGBAddr: Long, width: Int, height: Int) { private fun tavCopyTileRGB(tileX: Int, tileY: Int, currentRGBAddr: Long, prevRGBAddr: Long, width: Int, height: Int) {
val startX = tileX * TILE_SIZE_X val startX = tileX * TAV_TILE_SIZE_X
val startY = tileY * TILE_SIZE_Y val startY = tileY * TAV_TILE_SIZE_Y
// OPTIMISATION: Copy entire rows at once for maximum performance // OPTIMISATION: Copy entire rows at once for maximum performance
for (y in 0 until TILE_SIZE_Y) { for (y in 0 until TAV_TILE_SIZE_Y) {
val frameY = startY + y val frameY = startY + y
if (frameY >= height) break if (frameY >= height) break
// Calculate valid pixel range for this row // Calculate valid pixel range for this row
val validStartX = maxOf(0, startX) val validStartX = maxOf(0, startX)
val validEndX = minOf(width, startX + TILE_SIZE_X) val validEndX = minOf(width, startX + TAV_TILE_SIZE_X)
val validPixelsInRow = validEndX - validStartX val validPixelsInRow = validEndX - validStartX
if (validPixelsInRow > 0) { if (validPixelsInRow > 0) {
@@ -5077,7 +5070,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
val tileIdx = if (isMonoblock) { val tileIdx = if (isMonoblock) {
0 // Single tile index for monoblock 0 // Single tile index for monoblock
} else { } else {
tileY * ((width + TILE_SIZE_X - 1) / TILE_SIZE_X) + tileX tileY * ((width + TAV_TILE_SIZE_X - 1) / TAV_TILE_SIZE_X) + tileX
} }
var ptr = readPtr var ptr = readPtr
@@ -5094,7 +5087,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
width * height width * height
} else { } else {
// Standard mode: padded tiles (344x288) // Standard mode: padded tiles (344x288)
PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y TAV_PADDED_TILE_SIZE_X * TAV_PADDED_TILE_SIZE_Y
} }
// Read delta coefficients using significance map format (same as intra but with deltas) // Read delta coefficients using significance map format (same as intra but with deltas)
@@ -5195,8 +5188,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
tavPreviousCoeffsCg!![tileIdx] = currentCg.clone() tavPreviousCoeffsCg!![tileIdx] = currentCg.clone()
// Apply inverse DWT // Apply inverse DWT
val tileWidth = if (isMonoblock) width else PADDED_TILE_SIZE_X val tileWidth = if (isMonoblock) width else TAV_PADDED_TILE_SIZE_X
val tileHeight = if (isMonoblock) height else PADDED_TILE_SIZE_Y val tileHeight = if (isMonoblock) height else TAV_PADDED_TILE_SIZE_Y
if (isLossless) { if (isLossless) {
tavApplyDWTInverseMultiLevel(currentY, tileWidth, tileHeight, decompLevels, 0, TavSharpenNormal) tavApplyDWTInverseMultiLevel(currentY, tileWidth, tileHeight, decompLevels, 0, TavSharpenNormal)
@@ -5244,14 +5237,14 @@ class GraphicsJSR223Delegate(private val vm: VM) {
finalCgTile = currentCg finalCgTile = currentCg
} else { } else {
// Standard mode: extract core 280x224 pixels from reconstructed padded tiles (344x288) // Standard mode: extract core 280x224 pixels from reconstructed padded tiles (344x288)
finalYTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y) finalYTile = FloatArray(TAV_TILE_SIZE_X * TAV_TILE_SIZE_Y)
finalCoTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y) finalCoTile = FloatArray(TAV_TILE_SIZE_X * TAV_TILE_SIZE_Y)
finalCgTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y) finalCgTile = FloatArray(TAV_TILE_SIZE_X * TAV_TILE_SIZE_Y)
for (y in 0 until TILE_SIZE_Y) { for (y in 0 until TAV_TILE_SIZE_Y) {
for (x in 0 until TILE_SIZE_X) { for (x in 0 until TAV_TILE_SIZE_X) {
val coreIdx = y * TILE_SIZE_X + x val coreIdx = y * TAV_TILE_SIZE_X + x
val paddedIdx = (y + TAV_TILE_MARGIN) * PADDED_TILE_SIZE_X + (x + TAV_TILE_MARGIN) val paddedIdx = (y + TAV_TILE_MARGIN) * TAV_PADDED_TILE_SIZE_X + (x + TAV_TILE_MARGIN)
finalYTile[coreIdx] = currentY[paddedIdx] finalYTile[coreIdx] = currentY[paddedIdx]
finalCoTile[coreIdx] = currentCo[paddedIdx] finalCoTile[coreIdx] = currentCo[paddedIdx]
@@ -5267,7 +5260,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
// Convert to RGB based on TAV version and mode // Convert to RGB based on TAV version and mode
// v1,v3 = YCoCg-R, v2,v4 = ICtCp // v1,v3 = YCoCg-R, v2,v4 = ICtCp
if (tavVersion == 2 || tavVersion == 4) { if (tavVersion % 2 == 0) {
// ICtCp color space // ICtCp color space
if (isMonoblock) { if (isMonoblock) {
tavConvertICtCpMonoblockToRGB(finalYTile, finalCoTile, finalCgTile, currentRGBAddr, width, height) tavConvertICtCpMonoblockToRGB(finalYTile, finalCoTile, finalCgTile, currentRGBAddr, width, height)

View File

@@ -16,22 +16,19 @@
#include <limits.h> #include <limits.h>
#include <float.h> #include <float.h>
#ifndef PI
#define PI 3.14159265358979323846f
#endif
// TSVM Advanced Video (TAV) format constants // TSVM Advanced Video (TAV) format constants
#define TAV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x56" // "\x1FTSVM TAV" #define TAV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x56" // "\x1FTSVM TAV"
// TAV version - dynamic based on colour space and perceptual tuning // TAV version - dynamic based on colour space and perceptual tuning
// Version 5: YCoCg-R monoblock with perceptual quantisation (default) // Version 8: ICtCp multi-tile with perceptual quantisation (--ictcp flag)
// Version 7: YCoCg-R multi-tile with perceptual quantisation (default if width > 640 or height > 540)
// Version 6: ICtCp monoblock with perceptual quantisation (--ictcp flag) // Version 6: ICtCp monoblock with perceptual quantisation (--ictcp flag)
// Legacy versions (uniform quantisation): // Version 5: YCoCg-R monoblock with perceptual quantisation (default if width <= 640 and height <= 540)
// Version 3: YCoCg-R monoblock uniform (--no-perceptual-tuning)
// Version 4: ICtCp monoblock uniform (--ictcp --no-perceptual-tuning) // Version 4: ICtCp monoblock uniform (--ictcp --no-perceptual-tuning)
// Version 1: YCoCg-R 4-tile (legacy, code preserved but not accessible) // Version 3: YCoCg-R monoblock uniform (--no-perceptual-tuning)
// Version 2: ICtCp 4-tile (legacy, code preserved but not accessible) // Version 2: ICtCp multi-tile uniform (--ictcp --no-perceptual-tuning)
// Version 1: YCoCg-R multi-tile uniform (--no-perceptual-tuning)
// Tile encoding modes (280x224 tiles) // Tile encoding modes
#define TAV_MODE_SKIP 0x00 // Skip tile (copy from reference) #define TAV_MODE_SKIP 0x00 // Skip tile (copy from reference)
#define TAV_MODE_INTRA 0x01 // Intra DWT coding (I-frame tiles) #define TAV_MODE_INTRA 0x01 // Intra DWT coding (I-frame tiles)
#define TAV_MODE_DELTA 0x02 // Coefficient delta encoding (efficient P-frames) #define TAV_MODE_DELTA 0x02 // Coefficient delta encoding (efficient P-frames)
@@ -45,16 +42,15 @@
#define TAV_PACKET_SYNC 0xFF // Sync packet #define TAV_PACKET_SYNC 0xFF // Sync packet
// DWT settings // DWT settings
#define TILE_SIZE_X 280 // 280x224 tiles - better compression efficiency #define TILE_SIZE_X 640
#define TILE_SIZE_Y 224 // Optimised for TSVM 560x448 (2×2 tiles exactly) #define TILE_SIZE_Y 540
#define MAX_DECOMP_LEVELS 6 // Can go deeper: 280→140→70→35→17→8→4, 224→112→56→28→14→7→3
// Simulated overlapping tiles settings for seamless DWT processing // Simulated overlapping tiles settings for seamless DWT processing
#define DWT_FILTER_HALF_SUPPORT 4 // For 9/7 filter (filter lengths 9,7 → L=4) #define DWT_FILTER_HALF_SUPPORT 4 // For 9/7 filter (filter lengths 9,7 → L=4)
#define TILE_MARGIN_LEVELS 3 // Use margin for 3 levels: 4 * (2^3) = 4 * 8 = 32px #define TILE_MARGIN_LEVELS 3 // Use margin for 3 levels: 4 * (2^3) = 4 * 8 = 32px
#define TILE_MARGIN (DWT_FILTER_HALF_SUPPORT * (1 << TILE_MARGIN_LEVELS)) // 4 * 8 = 32px #define TILE_MARGIN (DWT_FILTER_HALF_SUPPORT * (1 << TILE_MARGIN_LEVELS)) // 4 * 8 = 32px
#define PADDED_TILE_SIZE_X (TILE_SIZE_X + 2 * TILE_MARGIN) // 280 + 64 = 344px #define PADDED_TILE_SIZE_X (TILE_SIZE_X + 2 * TILE_MARGIN)
#define PADDED_TILE_SIZE_Y (TILE_SIZE_Y + 2 * TILE_MARGIN) // 224 + 64 = 288px #define PADDED_TILE_SIZE_Y (TILE_SIZE_Y + 2 * TILE_MARGIN)
// Wavelet filter types // Wavelet filter types
#define WAVELET_5_3_REVERSIBLE 0 // Lossless capable #define WAVELET_5_3_REVERSIBLE 0 // Lossless capable
@@ -662,7 +658,7 @@ static tav_encoder_t* create_encoder(void) {
enc->fps = DEFAULT_FPS; enc->fps = DEFAULT_FPS;
enc->quality_level = DEFAULT_QUALITY; enc->quality_level = DEFAULT_QUALITY;
enc->wavelet_filter = WAVELET_9_7_IRREVERSIBLE; enc->wavelet_filter = WAVELET_9_7_IRREVERSIBLE;
enc->decomp_levels = MAX_DECOMP_LEVELS; enc->decomp_levels = 6;
enc->quantiser_y = QUALITY_Y[DEFAULT_QUALITY]; enc->quantiser_y = QUALITY_Y[DEFAULT_QUALITY];
enc->quantiser_co = QUALITY_CO[DEFAULT_QUALITY]; enc->quantiser_co = QUALITY_CO[DEFAULT_QUALITY];
enc->quantiser_cg = QUALITY_CG[DEFAULT_QUALITY]; enc->quantiser_cg = QUALITY_CG[DEFAULT_QUALITY];
@@ -681,9 +677,7 @@ static int initialise_encoder(tav_encoder_t *enc) {
if (!enc) return -1; if (!enc) return -1;
// Automatic decomposition levels for monoblock mode // Automatic decomposition levels for monoblock mode
if (enc->monoblock) { enc->decomp_levels = calculate_max_decomp_levels(enc->width, enc->height);
enc->decomp_levels = calculate_max_decomp_levels(enc->width, enc->height);
}
// Calculate tile dimensions // Calculate tile dimensions
if (enc->monoblock) { if (enc->monoblock) {
@@ -691,7 +685,7 @@ static int initialise_encoder(tav_encoder_t *enc) {
enc->tiles_x = 1; enc->tiles_x = 1;
enc->tiles_y = 1; enc->tiles_y = 1;
} else { } else {
// Standard mode: multiple 280x224 tiles // Standard mode: multiple tiles
enc->tiles_x = (enc->width + TILE_SIZE_X - 1) / TILE_SIZE_X; enc->tiles_x = (enc->width + TILE_SIZE_X - 1) / TILE_SIZE_X;
enc->tiles_y = (enc->height + TILE_SIZE_Y - 1) / TILE_SIZE_Y; enc->tiles_y = (enc->height + TILE_SIZE_Y - 1) / TILE_SIZE_Y;
} }
@@ -1041,7 +1035,7 @@ static void extract_padded_tile(tav_encoder_t *enc, int tile_x, int tile_y,
int core_src_end_x = core_start_x + TILE_SIZE_X; int core_src_end_x = core_start_x + TILE_SIZE_X;
if (core_src_start_x >= 0 && core_src_end_x <= enc->width) { if (core_src_start_x >= 0 && core_src_end_x <= enc->width) {
// OPTIMISATION: Bulk copy core region (280 pixels) in one operation // OPTIMISATION: Bulk copy core region in one operation
const int src_core_offset = src_row_offset + core_src_start_x; const int src_core_offset = src_row_offset + core_src_start_x;
memcpy(&padded_y[padded_row_offset + core_start_px], memcpy(&padded_y[padded_row_offset + core_start_px],
@@ -2181,7 +2175,11 @@ static int write_tav_header(tav_encoder_t *enc) {
version = enc->ictcp_mode ? 4 : 3; // Version 4 for ICtCp uniform, 3 for YCoCg-R uniform version = enc->ictcp_mode ? 4 : 3; // Version 4 for ICtCp uniform, 3 for YCoCg-R uniform
} }
} else { } else {
version = enc->ictcp_mode ? 2 : 1; // Legacy 4-tile versions if (enc->perceptual_tuning) {
version = enc->ictcp_mode ? 8 : 7;
} else {
version = enc->ictcp_mode ? 2 : 1;
}
} }
fputc(version, enc->output_fp); fputc(version, enc->output_fp);
@@ -3260,9 +3258,6 @@ int main(int argc, char *argv[]) {
return 1; return 1;
} }
break; break;
/*case 'd':
enc->decomp_levels = CLAMP(atoi(optarg), 1, MAX_DECOMP_LEVELS);
break;*/
case 'v': case 'v':
enc->verbose = 1; enc->verbose = 1;
break; break;
@@ -3330,6 +3325,11 @@ int main(int argc, char *argv[]) {
enc->perceptual_tuning = 0; enc->perceptual_tuning = 0;
} }
// disable monoblock mode if either width or height exceeds tie size
if (enc->width > TILE_SIZE_X || enc->height > TILE_SIZE_Y) {
enc->monoblock = 0;
}
if (enc->lossless) { if (enc->lossless) {
enc->perceptual_tuning = 0; enc->perceptual_tuning = 0;
enc->quantiser_y = 0; // will be resolved to 1 enc->quantiser_y = 0; // will be resolved to 1