mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-03-07 19:51:51 +09:00
more wavelets for experimentation
This commit is contained in:
@@ -31,6 +31,9 @@ const TAV_FILE_HEADER_FIRST = 0x1F
|
|||||||
// Wavelet filter types
|
// Wavelet filter types
|
||||||
const WAVELET_5_3_REVERSIBLE = 0
|
const WAVELET_5_3_REVERSIBLE = 0
|
||||||
const WAVELET_9_7_IRREVERSIBLE = 1
|
const WAVELET_9_7_IRREVERSIBLE = 1
|
||||||
|
const WAVELET_BIORTHOGONAL_13_7 = 2
|
||||||
|
const WAVELET_DD4 = 16
|
||||||
|
const WAVELET_HAAR = 255
|
||||||
|
|
||||||
// Subtitle opcodes (SSF format - same as TEV)
|
// Subtitle opcodes (SSF format - same as TEV)
|
||||||
const SSF_OP_NOP = 0x00
|
const SSF_OP_NOP = 0x00
|
||||||
@@ -441,7 +444,7 @@ console.log(`TAV Decoder`)
|
|||||||
console.log(`Resolution: ${header.width}x${header.height}`)
|
console.log(`Resolution: ${header.width}x${header.height}`)
|
||||||
console.log(`FPS: ${header.fps}`)
|
console.log(`FPS: ${header.fps}`)
|
||||||
console.log(`Total frames: ${header.totalFrames}`)
|
console.log(`Total frames: ${header.totalFrames}`)
|
||||||
console.log(`Wavelet filter: ${header.waveletFilter === WAVELET_5_3_REVERSIBLE ? "5/3 reversible" : "9/7 irreversible"}`)
|
console.log(`Wavelet filter: ${header.waveletFilter === WAVELET_5_3_REVERSIBLE ? "5/3 reversible" : header.waveletFilter === WAVELET_9_7_IRREVERSIBLE ? "9/7 irreversible" : header.waveletFilter === WAVELET_BIORTHOGONAL_13_7 ? "Biorthogonal 13/7" : header.waveletFilter === WAVELET_DD4 ? "DD-4" : header.waveletFilter === WAVELET_HAAR ? "Haar" : "unknown"}`)
|
||||||
console.log(`Decomposition levels: ${header.decompLevels}`)
|
console.log(`Decomposition levels: ${header.decompLevels}`)
|
||||||
console.log(`Quality: Y=${header.qualityY}, Co=${header.qualityCo}, Cg=${header.qualityCg}`)
|
console.log(`Quality: Y=${header.qualityY}, Co=${header.qualityCo}, Cg=${header.qualityCg}`)
|
||||||
console.log(`Tiles: ${tilesX}x${tilesY} (${numTiles} total)`)
|
console.log(`Tiles: ${tilesX}x${tilesY} (${numTiles} total)`)
|
||||||
@@ -658,7 +661,7 @@ try {
|
|||||||
console.log(`Resolution: ${header.width}x${header.height}`)
|
console.log(`Resolution: ${header.width}x${header.height}`)
|
||||||
console.log(`FPS: ${header.fps}`)
|
console.log(`FPS: ${header.fps}`)
|
||||||
console.log(`Total frames: ${header.totalFrames}`)
|
console.log(`Total frames: ${header.totalFrames}`)
|
||||||
console.log(`Wavelet filter: ${header.waveletFilter === WAVELET_5_3_REVERSIBLE ? "5/3 reversible" : "9/7 irreversible"}`)
|
console.log(`Wavelet filter: ${header.waveletFilter === WAVELET_5_3_REVERSIBLE ? "5/3 reversible" : header.waveletFilter === WAVELET_9_7_IRREVERSIBLE ? "9/7 irreversible" : header.waveletFilter === WAVELET_BIORTHOGONAL_13_7 ? "Biorthogonal 13/7" : header.waveletFilter === WAVELET_DD4 ? "DD-4" : header.waveletFilter === WAVELET_HAAR ? "Haar" : "unknown"}`)
|
||||||
console.log(`Quality: Y=${header.qualityY}, Co=${header.qualityCo}, Cg=${header.qualityCg}`)
|
console.log(`Quality: Y=${header.qualityY}, Co=${header.qualityCo}, Cg=${header.qualityCg}`)
|
||||||
|
|
||||||
// Continue with new file
|
// Continue with new file
|
||||||
|
|||||||
@@ -899,9 +899,12 @@ transmission capability, and region-of-interest coding.
|
|||||||
uint8 FPS: frames per second. Use 0x00 for still images
|
uint8 FPS: frames per second. Use 0x00 for still images
|
||||||
uint32 Total Frames: number of video frames. Use 0xFFFFFFFF to denote still image (.im3 file)
|
uint32 Total Frames: number of video frames. Use 0xFFFFFFFF to denote still image (.im3 file)
|
||||||
- frame count of 0 is used to denote not-finalised video stream
|
- frame count of 0 is used to denote not-finalised video stream
|
||||||
uint8 Wavelet Filter Type/File Role:
|
uint8 Wavelet Filter Type:
|
||||||
- 0 = 5/3 reversible
|
- 0 = 5/3 reversible (LGT 5/3, JPEG 2000 standard)
|
||||||
- 1 = 9/7 irreversible
|
- 1 = 9/7 irreversible (CDF 9/7, slight modification of JPEG 2000)
|
||||||
|
- 2 = CDF 13/7 (experimental)
|
||||||
|
- 16 = DD-4 (Four-point interpolating Deslauriers-Dubuc; experimental)
|
||||||
|
- 255 = Haar (experimental)
|
||||||
uint8 Decomposition Levels: number of DWT levels (1-6+)
|
uint8 Decomposition Levels: number of DWT levels (1-6+)
|
||||||
uint8 Quantiser Index for Y channel (1: lossless, 255: potato)
|
uint8 Quantiser Index for Y channel (1: lossless, 255: potato)
|
||||||
uint8 Quantiser Index for Co channel (1: lossless, 255: potato)
|
uint8 Quantiser Index for Co channel (1: lossless, 255: potato)
|
||||||
|
|||||||
@@ -4064,70 +4064,47 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
private fun dequantiseDWTSubbandsPerceptual(qIndex: Int, qYGlobal: Int, quantised: ShortArray, dequantised: FloatArray,
|
private fun dequantiseDWTSubbandsPerceptual(qIndex: Int, qYGlobal: Int, quantised: ShortArray, dequantised: FloatArray,
|
||||||
subbands: List<DWTSubbandInfo>, baseQuantizer: Float, isChroma: Boolean, decompLevels: Int) {
|
subbands: List<DWTSubbandInfo>, baseQuantizer: Float, isChroma: Boolean, decompLevels: Int) {
|
||||||
|
|
||||||
// Initialise output array to zero (critical for detecting missing coefficients)
|
// CRITICAL FIX: Encoder stores coefficients in LINEAR order, not subband-mapped order!
|
||||||
if (tavDebugFrameTarget >= 0) {
|
// The subband layout calculation is only used for determining perceptual weights,
|
||||||
Arrays.fill(dequantised, 0.0f)
|
// but coefficients are stored and read sequentially in memory.
|
||||||
}
|
|
||||||
|
|
||||||
// Track coefficient coverage for debugging
|
// Create weight map for linear coefficient array
|
||||||
var totalProcessed = 0
|
val weights = FloatArray(quantised.size) { 1.0f }
|
||||||
var maxIdx = -1
|
|
||||||
|
|
||||||
|
// Calculate perceptual weight for each coefficient position based on its subband
|
||||||
for (subband in subbands) {
|
for (subband in subbands) {
|
||||||
val weight = getPerceptualWeight(qIndex, qYGlobal, subband.level, subband.subbandType, isChroma, decompLevels)
|
val weight = getPerceptualWeight(qIndex, qYGlobal, subband.level, subband.subbandType, isChroma, decompLevels)
|
||||||
// CRITICAL FIX: Use the same effective quantizer as encoder for proper reconstruction
|
|
||||||
val effectiveQuantizer = baseQuantizer * weight
|
|
||||||
|
|
||||||
// Comprehensive five-number summary for perceptual model analysis
|
|
||||||
if (tavDebugCurrentFrameNumber == tavDebugFrameTarget) {
|
|
||||||
// Collect all quantized coefficient values for this subband
|
|
||||||
val coeffValues = mutableListOf<Int>()
|
|
||||||
for (i in 0 until subband.coeffCount) {
|
|
||||||
val idx = subband.coeffStart + i
|
|
||||||
if (idx < quantised.size) {
|
|
||||||
val quantVal = quantised[idx].toInt()
|
|
||||||
coeffValues.add(quantVal)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Calculate and print five-number summary
|
|
||||||
val subbandTypeName = when (subband.subbandType) {
|
|
||||||
0 -> "LL"
|
|
||||||
1 -> "LH"
|
|
||||||
2 -> "HL"
|
|
||||||
3 -> "HH"
|
|
||||||
else -> "??"
|
|
||||||
}
|
|
||||||
val channelType = if (isChroma) "Chroma" else "Luma"
|
|
||||||
val summary = calculateFiveNumberSummary(coeffValues)
|
|
||||||
println("SUBBAND STATS: $channelType ${subbandTypeName}${subband.level} weight=${weight} effectiveQ=${effectiveQuantizer} - $summary")
|
|
||||||
}
|
|
||||||
|
|
||||||
|
// Apply weight to all coefficients in this subband
|
||||||
for (i in 0 until subband.coeffCount) {
|
for (i in 0 until subband.coeffCount) {
|
||||||
val idx = subband.coeffStart + i
|
val idx = subband.coeffStart + i
|
||||||
if (idx < quantised.size && idx < dequantised.size) {
|
if (idx < weights.size) {
|
||||||
dequantised[idx] = quantised[idx] * effectiveQuantizer
|
weights[idx] = weight
|
||||||
totalProcessed++
|
|
||||||
if (idx > maxIdx) maxIdx = idx
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Debug coefficient coverage
|
// Apply linear dequantization with perceptual weights (matching encoder's linear storage)
|
||||||
|
for (i in quantised.indices) {
|
||||||
|
if (i < dequantised.size) {
|
||||||
|
val effectiveQuantizer = baseQuantizer * weights[i]
|
||||||
|
dequantised[i] = quantised[i] * effectiveQuantizer
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Debug output for verification
|
||||||
if (tavDebugCurrentFrameNumber == tavDebugFrameTarget) {
|
if (tavDebugCurrentFrameNumber == tavDebugFrameTarget) {
|
||||||
val channelType = if (isChroma) "Chroma" else "Luma"
|
val channelType = if (isChroma) "Chroma" else "Luma"
|
||||||
println("COEFFICIENT COVERAGE: $channelType - processed=$totalProcessed, maxIdx=$maxIdx, arraySize=${dequantised.size}")
|
var nonZeroCoeffs = 0
|
||||||
|
val weightStats = weights.toList().sorted()
|
||||||
|
val weightRange = if (weightStats.isNotEmpty())
|
||||||
|
"weights: ${weightStats.first()}-${weightStats.last()}" else "no weights"
|
||||||
|
|
||||||
// Check for gaps (zero coefficients that should have been processed)
|
for (coeff in quantised) {
|
||||||
var zeroCount = 0
|
if (coeff != 0.toShort()) nonZeroCoeffs++
|
||||||
for (i in 0 until minOf(maxIdx + 1, dequantised.size)) {
|
|
||||||
if (dequantised[i] == 0.0f && quantised[i] != 0.toShort()) {
|
|
||||||
zeroCount++
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (zeroCount > 0) {
|
|
||||||
println("WARNING: $zeroCount coefficients were not processed but should have been!")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
println("LINEAR PERCEPTUAL DEQUANT: $channelType - coeffs=${quantised.size}, nonzero=$nonZeroCoeffs, $weightRange")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -5029,8 +5006,14 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
|
|
||||||
if (filterType == 0) {
|
if (filterType == 0) {
|
||||||
tavApplyDWT53Inverse1D(tempCol, currentHeight)
|
tavApplyDWT53Inverse1D(tempCol, currentHeight)
|
||||||
} else {
|
} else if (filterType == 1) {
|
||||||
tavApplyDWT97Inverse1D(tempCol, currentHeight)
|
tavApplyDWT97Inverse1D(tempCol, currentHeight)
|
||||||
|
} else if (filterType == 2) {
|
||||||
|
tavApplyDWTBior137Inverse1D(tempCol, currentHeight)
|
||||||
|
} else if (filterType == 16) {
|
||||||
|
tavApplyDWTDD4Inverse1D(tempCol, currentHeight)
|
||||||
|
} else if (filterType == 255) {
|
||||||
|
tavApplyDWTHaarInverse1D(tempCol, currentHeight)
|
||||||
}
|
}
|
||||||
|
|
||||||
for (y in 0 until currentHeight) {
|
for (y in 0 until currentHeight) {
|
||||||
@@ -5046,8 +5029,14 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
|
|
||||||
if (filterType == 0) {
|
if (filterType == 0) {
|
||||||
tavApplyDWT53Inverse1D(tempRow, currentWidth)
|
tavApplyDWT53Inverse1D(tempRow, currentWidth)
|
||||||
} else {
|
} else if (filterType == 1) {
|
||||||
tavApplyDWT97Inverse1D(tempRow, currentWidth)
|
tavApplyDWT97Inverse1D(tempRow, currentWidth)
|
||||||
|
} else if (filterType == 2) {
|
||||||
|
tavApplyDWTBior137Inverse1D(tempRow, currentWidth)
|
||||||
|
} else if (filterType == 16) {
|
||||||
|
tavApplyDWTDD4Inverse1D(tempRow, currentWidth)
|
||||||
|
} else if (filterType == 255) {
|
||||||
|
tavApplyDWTHaarInverse1D(tempRow, currentWidth)
|
||||||
}
|
}
|
||||||
|
|
||||||
for (x in 0 until currentWidth) {
|
for (x in 0 until currentWidth) {
|
||||||
@@ -5197,7 +5186,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
val left = temp[i]
|
val left = temp[i]
|
||||||
// Symmetric extension for right boundary
|
// Symmetric extension for right boundary
|
||||||
val right = if (i < half - 1) temp[i + 1] else if (half > 2) temp[half - 2] else temp[half - 1]
|
val right = if (i < half - 1) temp[i + 1] else if (half > 2) temp[half - 2] else temp[half - 1]
|
||||||
temp[half + i] -= 0.5f * (left + right)
|
temp[half + i] += 0.5f * (left + right) // ADD to undo the subtraction in encoder
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -5224,4 +5213,184 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Four-point interpolating Deslauriers-Dubuc (DD-4) wavelet inverse 1D transform
|
||||||
|
// Reverses the four-sample prediction kernel: w[-1]=-1/16, w[0]=9/16, w[1]=9/16, w[2]=-1/16
|
||||||
|
private fun tavApplyDWTDD4Inverse1D(data: FloatArray, length: Int) {
|
||||||
|
if (length < 2) return
|
||||||
|
|
||||||
|
val temp = FloatArray(length)
|
||||||
|
val half = (length + 1) / 2 // Handle odd lengths properly
|
||||||
|
|
||||||
|
// Split into low and high frequency components (matching encoder layout)
|
||||||
|
for (i in 0 until half) {
|
||||||
|
temp[i] = data[i] // Low-pass coefficients (first half)
|
||||||
|
}
|
||||||
|
for (i in 0 until length / 2) {
|
||||||
|
if (half + i < length && half + i < data.size) {
|
||||||
|
temp[half + i] = data[half + i] // High-pass coefficients (second half)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// DD-4 inverse lifting (undo forward steps in reverse order)
|
||||||
|
|
||||||
|
// Step 2: Undo update step - s[i] -= 0.25 * (d[i-1] + d[i])
|
||||||
|
for (i in 0 until half) {
|
||||||
|
val d_curr = if (i < length / 2) temp[half + i] else 0.0f
|
||||||
|
val d_prev = if (i > 0 && i - 1 < length / 2) temp[half + i - 1] else 0.0f
|
||||||
|
temp[i] -= 0.25f * (d_prev + d_curr)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 1: Undo four-point prediction - add back the four-point prediction
|
||||||
|
// d[i] += prediction where prediction = (-1/16)*s[i-1] + (9/16)*s[i] + (9/16)*s[i+1] + (-1/16)*s[i+2]
|
||||||
|
for (i in 0 until length / 2) {
|
||||||
|
// Get four neighboring even samples with symmetric boundary extension
|
||||||
|
val s_m1: Float
|
||||||
|
val s_0: Float
|
||||||
|
val s_1: Float
|
||||||
|
val s_2: Float
|
||||||
|
|
||||||
|
// s[i-1]
|
||||||
|
s_m1 = if (i > 0) temp[i - 1] else temp[0] // Mirror boundary
|
||||||
|
|
||||||
|
// s[i]
|
||||||
|
s_0 = temp[i]
|
||||||
|
|
||||||
|
// s[i+1]
|
||||||
|
s_1 = if (i + 1 < half) temp[i + 1] else temp[half - 1] // Mirror boundary
|
||||||
|
|
||||||
|
// s[i+2]
|
||||||
|
s_2 = if (i + 2 < half) temp[i + 2]
|
||||||
|
else if (half > 1) temp[half - 2] // Mirror boundary
|
||||||
|
else temp[half - 1]
|
||||||
|
|
||||||
|
// Apply four-point prediction kernel (add back what was subtracted)
|
||||||
|
val prediction = (-1.0f/16.0f) * s_m1 + (9.0f/16.0f) * s_0 +
|
||||||
|
(9.0f/16.0f) * s_1 + (-1.0f/16.0f) * s_2
|
||||||
|
|
||||||
|
temp[half + i] += prediction
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reconstruction - interleave low and high frequency components
|
||||||
|
for (i in 0 until length) {
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
// Even positions: low-pass coefficients
|
||||||
|
data[i] = temp[i / 2]
|
||||||
|
} else {
|
||||||
|
// Odd positions: high-pass coefficients
|
||||||
|
val idx = i / 2
|
||||||
|
if (half + idx < length) {
|
||||||
|
data[i] = temp[half + idx]
|
||||||
|
} else {
|
||||||
|
// Symmetric extension: mirror the last available high-pass coefficient
|
||||||
|
val lastHighIdx = (length / 2) - 1
|
||||||
|
if (lastHighIdx >= 0 && half + lastHighIdx < length) {
|
||||||
|
data[i] = temp[half + lastHighIdx]
|
||||||
|
} else {
|
||||||
|
data[i] = 0.0f
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Biorthogonal 13/7 wavelet inverse 1D transform
|
||||||
|
// Synthesis filters: Low-pass (13 taps), High-pass (7 taps)
|
||||||
|
private fun tavApplyDWTBior137Inverse1D(data: FloatArray, length: Int) {
|
||||||
|
if (length < 2) return
|
||||||
|
|
||||||
|
val temp = FloatArray(length)
|
||||||
|
val half = (length + 1) / 2
|
||||||
|
|
||||||
|
// Split into low and high frequency components
|
||||||
|
for (i in 0 until half) {
|
||||||
|
temp[i] = data[i] // Low-pass coefficients
|
||||||
|
}
|
||||||
|
for (i in 0 until length / 2) {
|
||||||
|
if (half + i < length) {
|
||||||
|
temp[half + i] = data[half + i] // High-pass coefficients
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Biorthogonal 13/7 inverse lifting (undo forward steps in reverse order)
|
||||||
|
// Must exactly reverse the operations from the forward transform (simplified to match 5/3 structure)
|
||||||
|
|
||||||
|
// Step 2: Undo update step (reverse of encoder step 2)
|
||||||
|
for (i in 0 until half) {
|
||||||
|
val leftIdx = half + i - 1
|
||||||
|
val centerIdx = half + i
|
||||||
|
|
||||||
|
// Same boundary handling as 5/3
|
||||||
|
val left = when {
|
||||||
|
leftIdx >= 0 && leftIdx < length -> temp[leftIdx]
|
||||||
|
centerIdx < length && centerIdx + 1 < length -> temp[centerIdx + 1] // Mirror
|
||||||
|
centerIdx < length -> temp[centerIdx]
|
||||||
|
else -> 0.0f
|
||||||
|
}
|
||||||
|
val right = if (centerIdx < length) temp[centerIdx] else 0.0f
|
||||||
|
temp[i] -= 0.25f * (left + right)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 1: Undo predict step (reverse of encoder step 1)
|
||||||
|
for (i in 0 until length / 2) {
|
||||||
|
if (half + i < length) {
|
||||||
|
// Simple 2-tap prediction (same as encoder)
|
||||||
|
val left = temp[i]
|
||||||
|
val right = if (i + 1 < half) temp[i + 1] else temp[half - 1]
|
||||||
|
val prediction = 0.5f * (left + right)
|
||||||
|
|
||||||
|
temp[half + i] += prediction
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reconstruction - interleave low and high frequency components
|
||||||
|
for (i in 0 until length) {
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
// Even positions: low-pass coefficients
|
||||||
|
data[i] = temp[i / 2]
|
||||||
|
} else {
|
||||||
|
// Odd positions: high-pass coefficients
|
||||||
|
val idx = i / 2
|
||||||
|
if (half + idx < length) {
|
||||||
|
data[i] = temp[half + idx]
|
||||||
|
} else {
|
||||||
|
data[i] = 0.0f
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Haar wavelet inverse 1D transform
|
||||||
|
// The simplest wavelet: reverses averages and differences
|
||||||
|
private fun tavApplyDWTHaarInverse1D(data: FloatArray, length: Int) {
|
||||||
|
if (length < 2) return
|
||||||
|
|
||||||
|
val temp = FloatArray(length)
|
||||||
|
val half = (length + 1) / 2
|
||||||
|
|
||||||
|
// Split into low and high frequency components
|
||||||
|
for (i in 0 until half) {
|
||||||
|
temp[i] = data[i] // Low-pass coefficients (averages)
|
||||||
|
}
|
||||||
|
for (i in 0 until length / 2) {
|
||||||
|
if (half + i < length) {
|
||||||
|
temp[half + i] = data[half + i] // High-pass coefficients (differences)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Haar inverse: reconstruct original samples from averages and differences
|
||||||
|
for (i in 0 until half) {
|
||||||
|
if (2 * i + 1 < length) {
|
||||||
|
val avg = temp[i] // Average (low-pass)
|
||||||
|
val diff = if (half + i < length) temp[half + i] else 0.0f // Difference (high-pass)
|
||||||
|
|
||||||
|
// Reconstruct original adjacent pair
|
||||||
|
data[2 * i] = avg + diff // First sample: average + difference
|
||||||
|
data[2 * i + 1] = avg - diff // Second sample: average - difference
|
||||||
|
} else {
|
||||||
|
// Handle odd length: last sample comes directly from low-pass
|
||||||
|
data[2 * i] = temp[i]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@@ -58,6 +58,9 @@
|
|||||||
// Wavelet filter types
|
// Wavelet filter types
|
||||||
#define WAVELET_5_3_REVERSIBLE 0 // Lossless capable
|
#define WAVELET_5_3_REVERSIBLE 0 // Lossless capable
|
||||||
#define WAVELET_9_7_IRREVERSIBLE 1 // Higher compression
|
#define WAVELET_9_7_IRREVERSIBLE 1 // Higher compression
|
||||||
|
#define WAVELET_BIORTHOGONAL_13_7 2 // Biorthogonal 13/7 wavelet
|
||||||
|
#define WAVELET_DD4 16 // Four-point interpolating Deslauriers-Dubuc (DD-4)
|
||||||
|
#define WAVELET_HAAR 255 // Haar wavelet (simplest wavelet transform)
|
||||||
|
|
||||||
// Default settings
|
// Default settings
|
||||||
#define DEFAULT_WIDTH 560
|
#define DEFAULT_WIDTH 560
|
||||||
@@ -344,7 +347,7 @@ static void show_usage(const char *program_name) {
|
|||||||
printf(" -f, --fps N Output frames per second (enables frame rate conversion)\n");
|
printf(" -f, --fps N Output frames per second (enables frame rate conversion)\n");
|
||||||
printf(" -q, --quality N Quality level 0-5 (default: 2)\n");
|
printf(" -q, --quality N Quality level 0-5 (default: 2)\n");
|
||||||
printf(" -Q, --quantiser Y,Co,Cg Quantiser levels 1-255 for each channel (1: lossless, 255: potato)\n");
|
printf(" -Q, --quantiser Y,Co,Cg Quantiser levels 1-255 for each channel (1: lossless, 255: potato)\n");
|
||||||
// printf(" -w, --wavelet N Wavelet filter: 0=5/3 reversible, 1=9/7 irreversible (default: 1)\n");
|
printf(" -w, --wavelet N Wavelet filter: 0=5/3 reversible, 1=9/7 irreversible, 2=DD-4 (default: 1)\n");
|
||||||
// printf(" -b, --bitrate N Target bitrate in kbps (enables bitrate control mode)\n");
|
// printf(" -b, --bitrate N Target bitrate in kbps (enables bitrate control mode)\n");
|
||||||
printf(" --arate N MP2 audio bitrate in kbps (overrides quality-based audio rate)\n");
|
printf(" --arate N MP2 audio bitrate in kbps (overrides quality-based audio rate)\n");
|
||||||
printf(" Valid values: 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384\n");
|
printf(" Valid values: 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384\n");
|
||||||
@@ -601,6 +604,127 @@ static void dwt_97_forward_1d(float *data, int length) {
|
|||||||
free(temp);
|
free(temp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Four-point interpolating Deslauriers-Dubuc (DD-4) wavelet forward 1D transform
|
||||||
|
// Uses four-sample prediction kernel: w[-1]=-1/16, w[0]=9/16, w[1]=9/16, w[2]=-1/16
|
||||||
|
static void dwt_dd4_forward_1d(float *data, int length) {
|
||||||
|
if (length < 2) return;
|
||||||
|
|
||||||
|
float *temp = malloc(length * sizeof(float));
|
||||||
|
int half = (length + 1) / 2;
|
||||||
|
|
||||||
|
// Split into even/odd samples
|
||||||
|
for (int i = 0; i < half; i++) {
|
||||||
|
temp[i] = data[2 * i]; // Even (low)
|
||||||
|
}
|
||||||
|
for (int i = 0; i < length / 2; i++) {
|
||||||
|
temp[half + i] = data[2 * i + 1]; // Odd (high)
|
||||||
|
}
|
||||||
|
|
||||||
|
// DD-4 forward prediction step with four-point kernel
|
||||||
|
// Predict odd samples using four neighboring even samples
|
||||||
|
// Prediction: P(x) = (-1/16)*s[i-1] + (9/16)*s[i] + (9/16)*s[i+1] + (-1/16)*s[i+2]
|
||||||
|
for (int i = 0; i < length / 2; i++) {
|
||||||
|
// Get four neighboring even samples with symmetric boundary extension
|
||||||
|
float s_m1, s_0, s_1, s_2;
|
||||||
|
|
||||||
|
// s[i-1]
|
||||||
|
if (i > 0) s_m1 = temp[i - 1];
|
||||||
|
else s_m1 = temp[0]; // Mirror boundary
|
||||||
|
|
||||||
|
// s[i]
|
||||||
|
s_0 = temp[i];
|
||||||
|
|
||||||
|
// s[i+1]
|
||||||
|
if (i + 1 < half) s_1 = temp[i + 1];
|
||||||
|
else s_1 = temp[half - 1]; // Mirror boundary
|
||||||
|
|
||||||
|
// s[i+2]
|
||||||
|
if (i + 2 < half) s_2 = temp[i + 2];
|
||||||
|
else if (half > 1) s_2 = temp[half - 2]; // Mirror boundary
|
||||||
|
else s_2 = temp[half - 1];
|
||||||
|
|
||||||
|
// Apply four-point prediction kernel
|
||||||
|
float prediction = (-1.0f/16.0f) * s_m1 + (9.0f/16.0f) * s_0 +
|
||||||
|
(9.0f/16.0f) * s_1 + (-1.0f/16.0f) * s_2;
|
||||||
|
|
||||||
|
temp[half + i] -= prediction;
|
||||||
|
}
|
||||||
|
|
||||||
|
// DD-4 update step - use simple averaging of adjacent high-pass coefficients
|
||||||
|
// s[i] += 0.25 * (d[i-1] + d[i])
|
||||||
|
for (int i = 0; i < half; i++) {
|
||||||
|
float d_curr = (i < length / 2) ? temp[half + i] : 0.0f;
|
||||||
|
float d_prev = (i > 0 && i - 1 < length / 2) ? temp[half + i - 1] : 0.0f;
|
||||||
|
temp[i] += 0.25f * (d_prev + d_curr);
|
||||||
|
}
|
||||||
|
|
||||||
|
memcpy(data, temp, length * sizeof(float));
|
||||||
|
free(temp);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Biorthogonal 13/7 wavelet forward 1D transform
|
||||||
|
// Analysis filters: Low-pass (13 taps), High-pass (7 taps)
|
||||||
|
// Using lifting scheme with predict and update steps (same structure as 5/3)
|
||||||
|
static void dwt_bior137_forward_1d(float *data, int length) {
|
||||||
|
if (length < 2) return;
|
||||||
|
|
||||||
|
float *temp = malloc(length * sizeof(float));
|
||||||
|
int half = (length + 1) / 2;
|
||||||
|
|
||||||
|
// Step 1: Predict step (high-pass) - exactly like 5/3 structure
|
||||||
|
for (int i = 0; i < half; i++) {
|
||||||
|
int idx = 2 * i + 1;
|
||||||
|
if (idx < length) {
|
||||||
|
float prediction = 0.0f;
|
||||||
|
|
||||||
|
// Simple 2-tap prediction for now (will expand to 7-tap later)
|
||||||
|
float left = data[2 * i];
|
||||||
|
float right = (2 * i + 2 < length) ? data[2 * i + 2] : data[2 * i];
|
||||||
|
prediction = 0.5f * (left + right);
|
||||||
|
|
||||||
|
temp[half + i] = data[idx] - prediction;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 2: Update step (low-pass) - exactly like 5/3 structure
|
||||||
|
for (int i = 0; i < half; i++) {
|
||||||
|
float update = 0.25f * ((i > 0 ? temp[half + i - 1] : 0) +
|
||||||
|
(i < half - 1 ? temp[half + i] : 0));
|
||||||
|
temp[i] = data[2 * i] + update;
|
||||||
|
}
|
||||||
|
|
||||||
|
memcpy(data, temp, length * sizeof(float));
|
||||||
|
free(temp);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Haar wavelet forward 1D transform
|
||||||
|
// The simplest wavelet: averages and differences
|
||||||
|
static void dwt_haar_forward_1d(float *data, int length) {
|
||||||
|
if (length < 2) return;
|
||||||
|
|
||||||
|
float *temp = malloc(length * sizeof(float));
|
||||||
|
int half = (length + 1) / 2;
|
||||||
|
|
||||||
|
// Haar transform: compute averages (low-pass) and differences (high-pass)
|
||||||
|
for (int i = 0; i < half; i++) {
|
||||||
|
if (2 * i + 1 < length) {
|
||||||
|
// Average of adjacent pairs (low-pass)
|
||||||
|
temp[i] = (data[2 * i] + data[2 * i + 1]) / 2.0f;
|
||||||
|
// Difference of adjacent pairs (high-pass)
|
||||||
|
temp[half + i] = (data[2 * i] - data[2 * i + 1]) / 2.0f;
|
||||||
|
} else {
|
||||||
|
// Handle odd length: last sample goes to low-pass
|
||||||
|
temp[i] = data[2 * i];
|
||||||
|
if (half + i < length) {
|
||||||
|
temp[half + i] = 0.0f;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
memcpy(data, temp, length * sizeof(float));
|
||||||
|
free(temp);
|
||||||
|
}
|
||||||
|
|
||||||
// Extract padded tile with margins for seamless DWT processing (correct implementation)
|
// Extract padded tile with margins for seamless DWT processing (correct implementation)
|
||||||
static void extract_padded_tile(tav_encoder_t *enc, int tile_x, int tile_y,
|
static void extract_padded_tile(tav_encoder_t *enc, int tile_x, int tile_y,
|
||||||
float *padded_y, float *padded_co, float *padded_cg) {
|
float *padded_y, float *padded_co, float *padded_cg) {
|
||||||
@@ -712,8 +836,14 @@ static void dwt_2d_forward_padded(float *tile_data, int levels, int filter_type)
|
|||||||
|
|
||||||
if (filter_type == WAVELET_5_3_REVERSIBLE) {
|
if (filter_type == WAVELET_5_3_REVERSIBLE) {
|
||||||
dwt_53_forward_1d(temp_row, current_width);
|
dwt_53_forward_1d(temp_row, current_width);
|
||||||
} else {
|
} else if (filter_type == WAVELET_9_7_IRREVERSIBLE) {
|
||||||
dwt_97_forward_1d(temp_row, current_width);
|
dwt_97_forward_1d(temp_row, current_width);
|
||||||
|
} else if (filter_type == WAVELET_BIORTHOGONAL_13_7) {
|
||||||
|
dwt_bior137_forward_1d(temp_row, current_width);
|
||||||
|
} else if (filter_type == WAVELET_DD4) {
|
||||||
|
dwt_dd4_forward_1d(temp_row, current_width);
|
||||||
|
} else if (filter_type == WAVELET_HAAR) {
|
||||||
|
dwt_haar_forward_1d(temp_row, current_width);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int x = 0; x < current_width; x++) {
|
for (int x = 0; x < current_width; x++) {
|
||||||
@@ -729,8 +859,14 @@ static void dwt_2d_forward_padded(float *tile_data, int levels, int filter_type)
|
|||||||
|
|
||||||
if (filter_type == WAVELET_5_3_REVERSIBLE) {
|
if (filter_type == WAVELET_5_3_REVERSIBLE) {
|
||||||
dwt_53_forward_1d(temp_col, current_height);
|
dwt_53_forward_1d(temp_col, current_height);
|
||||||
} else {
|
} else if (filter_type == WAVELET_9_7_IRREVERSIBLE) {
|
||||||
dwt_97_forward_1d(temp_col, current_height);
|
dwt_97_forward_1d(temp_col, current_height);
|
||||||
|
} else if (filter_type == WAVELET_BIORTHOGONAL_13_7) {
|
||||||
|
dwt_bior137_forward_1d(temp_col, current_height);
|
||||||
|
} else if (filter_type == WAVELET_DD4) {
|
||||||
|
dwt_dd4_forward_1d(temp_col, current_height);
|
||||||
|
} else if (filter_type == WAVELET_HAAR) {
|
||||||
|
dwt_haar_forward_1d(temp_col, current_height);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int y = 0; y < current_height; y++) {
|
for (int y = 0; y < current_height; y++) {
|
||||||
@@ -762,8 +898,14 @@ static void dwt_2d_forward_flexible(float *tile_data, int width, int height, int
|
|||||||
|
|
||||||
if (filter_type == WAVELET_5_3_REVERSIBLE) {
|
if (filter_type == WAVELET_5_3_REVERSIBLE) {
|
||||||
dwt_53_forward_1d(temp_row, current_width);
|
dwt_53_forward_1d(temp_row, current_width);
|
||||||
} else {
|
} else if (filter_type == WAVELET_9_7_IRREVERSIBLE) {
|
||||||
dwt_97_forward_1d(temp_row, current_width);
|
dwt_97_forward_1d(temp_row, current_width);
|
||||||
|
} else if (filter_type == WAVELET_BIORTHOGONAL_13_7) {
|
||||||
|
dwt_bior137_forward_1d(temp_row, current_width);
|
||||||
|
} else if (filter_type == WAVELET_DD4) {
|
||||||
|
dwt_dd4_forward_1d(temp_row, current_width);
|
||||||
|
} else if (filter_type == WAVELET_HAAR) {
|
||||||
|
dwt_haar_forward_1d(temp_row, current_width);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int x = 0; x < current_width; x++) {
|
for (int x = 0; x < current_width; x++) {
|
||||||
@@ -779,8 +921,14 @@ static void dwt_2d_forward_flexible(float *tile_data, int width, int height, int
|
|||||||
|
|
||||||
if (filter_type == WAVELET_5_3_REVERSIBLE) {
|
if (filter_type == WAVELET_5_3_REVERSIBLE) {
|
||||||
dwt_53_forward_1d(temp_col, current_height);
|
dwt_53_forward_1d(temp_col, current_height);
|
||||||
} else {
|
} else if (filter_type == WAVELET_9_7_IRREVERSIBLE) {
|
||||||
dwt_97_forward_1d(temp_col, current_height);
|
dwt_97_forward_1d(temp_col, current_height);
|
||||||
|
} else if (filter_type == WAVELET_BIORTHOGONAL_13_7) {
|
||||||
|
dwt_bior137_forward_1d(temp_col, current_height);
|
||||||
|
} else if (filter_type == WAVELET_DD4) {
|
||||||
|
dwt_dd4_forward_1d(temp_col, current_height);
|
||||||
|
} else if (filter_type == WAVELET_HAAR) {
|
||||||
|
dwt_haar_forward_1d(temp_col, current_height);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int y = 0; y < current_height; y++) {
|
for (int y = 0; y < current_height; y++) {
|
||||||
@@ -793,6 +941,7 @@ static void dwt_2d_forward_flexible(float *tile_data, int width, int height, int
|
|||||||
free(temp_col);
|
free(temp_col);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Quantisation for DWT subbands with rate control
|
// Quantisation for DWT subbands with rate control
|
||||||
static void quantise_dwt_coefficients(float *coeffs, int16_t *quantised, int size, int quantiser) {
|
static void quantise_dwt_coefficients(float *coeffs, int16_t *quantised, int size, int quantiser) {
|
||||||
float effective_q = quantiser;
|
float effective_q = quantiser;
|
||||||
@@ -2483,7 +2632,7 @@ int main(int argc, char *argv[]) {
|
|||||||
{"quality", required_argument, 0, 'q'},
|
{"quality", required_argument, 0, 'q'},
|
||||||
{"quantiser", required_argument, 0, 'Q'},
|
{"quantiser", required_argument, 0, 'Q'},
|
||||||
{"quantiser", required_argument, 0, 'Q'},
|
{"quantiser", required_argument, 0, 'Q'},
|
||||||
// {"wavelet", required_argument, 0, 'w'},
|
{"wavelet", required_argument, 0, 'w'},
|
||||||
{"bitrate", required_argument, 0, 'b'},
|
{"bitrate", required_argument, 0, 'b'},
|
||||||
{"arate", required_argument, 0, 1400},
|
{"arate", required_argument, 0, 1400},
|
||||||
{"subtitle", required_argument, 0, 'S'},
|
{"subtitle", required_argument, 0, 'S'},
|
||||||
@@ -2532,9 +2681,9 @@ int main(int argc, char *argv[]) {
|
|||||||
enc->quantiser_co = CLAMP(enc->quantiser_co, 1, 255);
|
enc->quantiser_co = CLAMP(enc->quantiser_co, 1, 255);
|
||||||
enc->quantiser_cg = CLAMP(enc->quantiser_cg, 1, 255);
|
enc->quantiser_cg = CLAMP(enc->quantiser_cg, 1, 255);
|
||||||
break;
|
break;
|
||||||
/*case 'w':
|
case 'w':
|
||||||
enc->wavelet_filter = CLAMP(atoi(optarg), 0, 1);
|
enc->wavelet_filter = CLAMP(atoi(optarg), 0, 255);
|
||||||
break;*/
|
break;
|
||||||
case 'f':
|
case 'f':
|
||||||
enc->output_fps = atoi(optarg);
|
enc->output_fps = atoi(optarg);
|
||||||
if (enc->output_fps <= 0) {
|
if (enc->output_fps <= 0) {
|
||||||
@@ -2625,7 +2774,12 @@ int main(int argc, char *argv[]) {
|
|||||||
printf("Input: %s\n", enc->input_file);
|
printf("Input: %s\n", enc->input_file);
|
||||||
printf("Output: %s\n", enc->output_file);
|
printf("Output: %s\n", enc->output_file);
|
||||||
printf("Resolution: %dx%d @ %dfps\n", enc->width, enc->height, enc->output_fps);
|
printf("Resolution: %dx%d @ %dfps\n", enc->width, enc->height, enc->output_fps);
|
||||||
printf("Wavelet: %s\n", enc->wavelet_filter ? "9/7 irreversible" : "5/3 reversible");
|
printf("Wavelet: %s\n",
|
||||||
|
enc->wavelet_filter == WAVELET_5_3_REVERSIBLE ? "CDF 5/3" :
|
||||||
|
enc->wavelet_filter == WAVELET_9_7_IRREVERSIBLE ? "CDF 9/7" :
|
||||||
|
enc->wavelet_filter == WAVELET_BIORTHOGONAL_13_7 ? "CDF 13/7" :
|
||||||
|
enc->wavelet_filter == WAVELET_DD4 ? "DD 4-tap" :
|
||||||
|
enc->wavelet_filter == WAVELET_HAAR ? "Haar" : "unknown");
|
||||||
printf("Decomposition levels: %d\n", enc->decomp_levels);
|
printf("Decomposition levels: %d\n", enc->decomp_levels);
|
||||||
printf("Colour space: %s\n", enc->ictcp_mode ? "ICtCp" : "YCoCg-R");
|
printf("Colour space: %s\n", enc->ictcp_mode ? "ICtCp" : "YCoCg-R");
|
||||||
printf("Quantisation: %s\n", enc->perceptual_tuning ? "Perceptual (HVS-optimised)" : "Uniform (legacy)");
|
printf("Quantisation: %s\n", enc->perceptual_tuning ? "Perceptual (HVS-optimised)" : "Uniform (legacy)");
|
||||||
|
|||||||
Reference in New Issue
Block a user