mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-06-12 07:44:03 +09:00
Knusperli-esque post deblocking filter
This commit is contained in:
@@ -3,7 +3,8 @@
|
|||||||
// Usage: playtev moviefile.tev [options]
|
// Usage: playtev moviefile.tev [options]
|
||||||
// Options: -i (interactive), -debug-mv (show motion vector debug visualization)
|
// Options: -i (interactive), -debug-mv (show motion vector debug visualization)
|
||||||
// -deinterlace=algorithm (yadif or bwdif, default: yadif)
|
// -deinterlace=algorithm (yadif or bwdif, default: yadif)
|
||||||
// -nodeblock (disble deblocking filter)
|
// -nodeblock (disable post-processing deblocking filter)
|
||||||
|
// -boundaryaware (enable boundary-aware decoding to prevent artifacts at DCT level)
|
||||||
|
|
||||||
const WIDTH = 560
|
const WIDTH = 560
|
||||||
const HEIGHT = 448
|
const HEIGHT = 448
|
||||||
@@ -46,6 +47,7 @@ let interactive = false
|
|||||||
let debugMotionVectors = false
|
let debugMotionVectors = false
|
||||||
let deinterlaceAlgorithm = "yadif"
|
let deinterlaceAlgorithm = "yadif"
|
||||||
let enableDeblocking = true // Default: enabled (use -nodeblock to disable)
|
let enableDeblocking = true // Default: enabled (use -nodeblock to disable)
|
||||||
|
let enableBoundaryAwareDecoding = false // Default: disabled (use -boundaryaware to enable) // suitable for still frame and slide shows, absolutely unsuitable for videos
|
||||||
|
|
||||||
if (exec_args.length > 2) {
|
if (exec_args.length > 2) {
|
||||||
for (let i = 2; i < exec_args.length; i++) {
|
for (let i = 2; i < exec_args.length; i++) {
|
||||||
@@ -56,6 +58,8 @@ if (exec_args.length > 2) {
|
|||||||
debugMotionVectors = true
|
debugMotionVectors = true
|
||||||
} else if (arg === "-nodeblock") {
|
} else if (arg === "-nodeblock") {
|
||||||
enableDeblocking = false
|
enableDeblocking = false
|
||||||
|
} else if (arg === "-boundaryaware") {
|
||||||
|
enableBoundaryAwareDecoding = true
|
||||||
} else if (arg.startsWith("-deinterlace=")) {
|
} else if (arg.startsWith("-deinterlace=")) {
|
||||||
deinterlaceAlgorithm = arg.substring(13)
|
deinterlaceAlgorithm = arg.substring(13)
|
||||||
}
|
}
|
||||||
@@ -97,6 +101,9 @@ audio.purgeQueue(0)
|
|||||||
audio.setPcmMode(0)
|
audio.setPcmMode(0)
|
||||||
audio.setMasterVolume(0, 255)
|
audio.setMasterVolume(0, 255)
|
||||||
|
|
||||||
|
// set colour zero as half-opaque black
|
||||||
|
graphics.setPalette(0, 0, 0, 0, 9)
|
||||||
|
|
||||||
// Subtitle display functions
|
// Subtitle display functions
|
||||||
function clearSubtitleArea() {
|
function clearSubtitleArea() {
|
||||||
// Clear the subtitle area at the bottom of the screen
|
// Clear the subtitle area at the bottom of the screen
|
||||||
@@ -392,7 +399,10 @@ if (version !== TEV_VERSION_YCOCG && version !== TEV_VERSION_XYB) {
|
|||||||
let colorSpace = (version === TEV_VERSION_XYB) ? "XYB" : "YCoCg-R"
|
let colorSpace = (version === TEV_VERSION_XYB) ? "XYB" : "YCoCg-R"
|
||||||
if (interactive) {
|
if (interactive) {
|
||||||
con.move(1,1)
|
con.move(1,1)
|
||||||
println(`Push and hold Backspace to exit | TEV Format ${version} (${colorSpace}) | Deblocking: ${enableDeblocking ? 'ON' : 'OFF'}`)
|
if (colorSpace == "XYB")
|
||||||
|
println(`Push and hold Backspace to exit | TEV Format ${version} (${colorSpace}) | Deblock: ${enableDeblocking ? 'ON' : 'OFF'}, ${enableBoundaryAwareDecoding ? 'ON' : 'OFF'}`);
|
||||||
|
else
|
||||||
|
println(`Push and hold Backspace to exit | Deblock: ${enableDeblocking ? 'ON' : 'OFF'} | BoundaryAware: ${enableBoundaryAwareDecoding ? 'ON' : 'OFF'}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
let width = seqread.readShort()
|
let width = seqread.readShort()
|
||||||
@@ -655,14 +665,14 @@ try {
|
|||||||
if (isInterlaced) {
|
if (isInterlaced) {
|
||||||
// For interlaced: decode current frame into currentFieldAddr
|
// For interlaced: decode current frame into currentFieldAddr
|
||||||
// For display: use prevFieldAddr as current, currentFieldAddr as next
|
// For display: use prevFieldAddr as current, currentFieldAddr as next
|
||||||
graphics.tevDecode(blockDataPtr, nextFieldAddr, currentFieldAddr, width, decodingHeight, qualityY, qualityCo, qualityCg, trueFrameCount, debugMotionVectors, version, enableDeblocking)
|
graphics.tevDecode(blockDataPtr, nextFieldAddr, currentFieldAddr, width, decodingHeight, qualityY, qualityCo, qualityCg, trueFrameCount, debugMotionVectors, version, enableDeblocking, enableBoundaryAwareDecoding)
|
||||||
graphics.tevDeinterlace(trueFrameCount, width, decodingHeight, prevFieldAddr, currentFieldAddr, nextFieldAddr, CURRENT_RGB_ADDR, deinterlaceAlgorithm)
|
graphics.tevDeinterlace(trueFrameCount, width, decodingHeight, prevFieldAddr, currentFieldAddr, nextFieldAddr, CURRENT_RGB_ADDR, deinterlaceAlgorithm)
|
||||||
|
|
||||||
// Rotate field buffers for next frame: NEXT -> CURRENT -> PREV
|
// Rotate field buffers for next frame: NEXT -> CURRENT -> PREV
|
||||||
rotateFieldBuffers()
|
rotateFieldBuffers()
|
||||||
} else {
|
} else {
|
||||||
// Progressive or first frame: normal decoding without temporal prediction
|
// Progressive or first frame: normal decoding without temporal prediction
|
||||||
graphics.tevDecode(blockDataPtr, CURRENT_RGB_ADDR, PREV_RGB_ADDR, width, decodingHeight, qualityY, qualityCo, qualityCg, trueFrameCount, debugMotionVectors, version, enableDeblocking)
|
graphics.tevDecode(blockDataPtr, CURRENT_RGB_ADDR, PREV_RGB_ADDR, width, decodingHeight, qualityY, qualityCo, qualityCg, trueFrameCount, debugMotionVectors, version, enableDeblocking, enableBoundaryAwareDecoding)
|
||||||
}
|
}
|
||||||
|
|
||||||
decodeTime = (sys.nanoTime() - decodeStart) / 1000000.0 // Convert to milliseconds
|
decodeTime = (sys.nanoTime() - decodeStart) / 1000000.0 // Convert to milliseconds
|
||||||
@@ -750,10 +760,10 @@ try {
|
|||||||
|
|
||||||
if (!hasSubtitle) {
|
if (!hasSubtitle) {
|
||||||
con.move(31, 1)
|
con.move(31, 1)
|
||||||
graphics.setTextFore(161)
|
con.color_pair(253, 0)
|
||||||
print(`Frame: ${frameCount}/${totalFrames} (${((frameCount / akku2 * 100)|0) / 100}f) `)
|
print(`Frame: ${frameCount}/${totalFrames} (${((frameCount / akku2 * 100)|0) / 100}f) `)
|
||||||
con.move(32, 1)
|
con.move(32, 1)
|
||||||
graphics.setTextFore(161)
|
con.color_pair(253, 0)
|
||||||
print(`VRate: ${(getVideoRate() / 1024 * 8)|0} kbps `)
|
print(`VRate: ${(getVideoRate() / 1024 * 8)|0} kbps `)
|
||||||
con.move(1, 1)
|
con.move(1, 1)
|
||||||
}
|
}
|
||||||
@@ -781,7 +791,10 @@ finally {
|
|||||||
if (interactive) {
|
if (interactive) {
|
||||||
//con.clear()
|
//con.clear()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// set colour zero as opaque black
|
||||||
}
|
}
|
||||||
|
|
||||||
|
graphics.setPalette(0, 0, 0, 0, 0)
|
||||||
con.move(cy, cx) // restore cursor
|
con.move(cy, cx) // restore cursor
|
||||||
return errorlevel
|
return errorlevel
|
||||||
@@ -48,7 +48,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
* @param index which palette number to modify, 0-255
|
* @param index which palette number to modify, 0-255
|
||||||
* @param r g - b - a - RGBA value, 0-15
|
* @param r g - b - a - RGBA value, 0-15
|
||||||
*/
|
*/
|
||||||
fun setPalette(index: Int, r: Int, g: Int, b: Int, a: Int = 16) {
|
fun setPalette(index: Int, r: Int, g: Int, b: Int, a: Int = 15) {
|
||||||
getFirstGPU()?.let {
|
getFirstGPU()?.let {
|
||||||
it.paletteOfFloats[index * 4] = (r and 15) / 15f
|
it.paletteOfFloats[index * 4] = (r and 15) / 15f
|
||||||
it.paletteOfFloats[index * 4 + 1] = (g and 15) / 15f
|
it.paletteOfFloats[index * 4 + 1] = (g and 15) / 15f
|
||||||
@@ -2506,160 +2506,241 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Advanced TEV Deblocking Filter - Reduces blocking artifacts from 16x16 macroblocks
|
* Enhanced TEV Deblocking Filter - Uses Knusperli-inspired techniques for superior boundary analysis
|
||||||
*
|
*
|
||||||
* Uses gradient analysis and adaptive filtering to handle:
|
* Advanced features inspired by Google's Knusperli algorithm:
|
||||||
* - Quantized smooth gradients appearing as discrete blocks
|
* - Frequency-domain boundary discontinuity detection
|
||||||
* - Diagonal edges crossing block boundaries causing color banding
|
* - High-frequency penalty system to preserve detail
|
||||||
* - Texture preservation to avoid over-smoothing genuine edges
|
* - Linear gradient pattern analysis for directional filtering
|
||||||
|
* - Adaptive strength based on local image complexity
|
||||||
|
* - Bulk memory operations for improved performance
|
||||||
*
|
*
|
||||||
* @param rgbAddr RGB frame buffer address (24-bit: R,G,B per pixel)
|
* @param rgbAddr RGB frame buffer address (24-bit: R,G,B per pixel)
|
||||||
* @param width Frame width in pixels
|
* @param width Frame width in pixels
|
||||||
* @param height Frame height in pixels
|
* @param height Frame height in pixels
|
||||||
* @param blockSize Size of blocks (16 for TEV format)
|
* @param blockSize Size of blocks (16 for TEV format)
|
||||||
* @param strength Filter strength (0.0-1.0, higher = more smoothing)
|
* @param strength Base filter strength (0.0-1.0, adaptive adjustment applied)
|
||||||
*/
|
*/
|
||||||
private fun tevDeblockingFilter(rgbAddr: Long, width: Int, height: Int,
|
private fun tevDeblockingFilterEnhanced(rgbAddr: Long, width: Int, height: Int,
|
||||||
blockSize: Int = 16, strength: Float = 0.4f) {
|
blockSize: Int = 16, strength: Float = 1.0f) {
|
||||||
val blocksX = (width + blockSize - 1) / blockSize
|
val blocksX = (width + blockSize - 1) / blockSize
|
||||||
val blocksY = (height + blockSize - 1) / blockSize
|
val blocksY = (height + blockSize - 1) / blockSize
|
||||||
val thisAddrIncVec: Long = if (rgbAddr < 0) -1 else 1
|
val thisAddrIncVec: Long = if (rgbAddr < 0) -1 else 1
|
||||||
|
|
||||||
// Helper function to get pixel value safely
|
// Knusperli-inspired constants adapted for RGB post-processing
|
||||||
fun getPixel(x: Int, y: Int, c: Int): Int {
|
val kLinearGradient = intArrayOf(318, -285, 81, -32, 17, -9, 5, -2) // Gradient pattern (8 taps for block boundary)
|
||||||
if (x < 0 || y < 0 || x >= width || y >= height) return 0
|
val kAlphaSqrt2 = intArrayOf(1024, 1448, 1448, 1448, 1448, 1448, 1448, 1448) // Alpha * sqrt(2) in 10-bit fixed-point
|
||||||
val offset = (y.toLong() * width + x) * 3 + c
|
|
||||||
return vm.peek(rgbAddr + offset * thisAddrIncVec)!!.toUint().toInt()
|
// Bulk memory access helpers for performance
|
||||||
|
fun getPixelBulk(x: Int, y: Int): IntArray {
|
||||||
|
if (x < 0 || y < 0 || x >= width || y >= height) return intArrayOf(0, 0, 0)
|
||||||
|
val offset = (y.toLong() * width + x) * 3
|
||||||
|
val addr = rgbAddr + offset * thisAddrIncVec
|
||||||
|
return intArrayOf(
|
||||||
|
vm.peek(addr)!!.toUint().toInt(),
|
||||||
|
vm.peek(addr + thisAddrIncVec)!!.toUint().toInt(),
|
||||||
|
vm.peek(addr + 2 * thisAddrIncVec)!!.toUint().toInt()
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Helper function to set pixel value safely
|
fun setPixelBulk(x: Int, y: Int, rgb: IntArray) {
|
||||||
fun setPixel(x: Int, y: Int, c: Int, value: Int) {
|
|
||||||
if (x < 0 || y < 0 || x >= width || y >= height) return
|
if (x < 0 || y < 0 || x >= width || y >= height) return
|
||||||
val offset = (y.toLong() * width + x) * 3 + c
|
val offset = (y.toLong() * width + x) * 3
|
||||||
vm.poke(rgbAddr + offset * thisAddrIncVec, value.coerceIn(0, 255).toByte())
|
val addr = rgbAddr + offset * thisAddrIncVec
|
||||||
|
vm.poke(addr, rgb[0].coerceIn(0, 255).toByte())
|
||||||
|
vm.poke(addr + thisAddrIncVec, rgb[1].coerceIn(0, 255).toByte())
|
||||||
|
vm.poke(addr + 2 * thisAddrIncVec, rgb[2].coerceIn(0, 255).toByte())
|
||||||
}
|
}
|
||||||
|
|
||||||
// Detect if pixels form a smooth gradient (quantized)
|
// ENHANCED: Knusperli-inspired boundary discontinuity analysis
|
||||||
fun isQuantizedGradient(p0: Int, p1: Int, p2: Int, p3: Int): Boolean {
|
fun analyzeBoundaryDiscontinuity(samples: IntArray): Pair<Long, Long> {
|
||||||
// Check for step-like transitions typical of quantized gradients
|
// samples: 8-pixel samples across the boundary for frequency analysis
|
||||||
val d01 = kotlin.math.abs(p1 - p0)
|
var delta = 0L
|
||||||
val d12 = kotlin.math.abs(p2 - p1)
|
var hfPenalty = 0L
|
||||||
val d23 = kotlin.math.abs(p3 - p2)
|
|
||||||
|
|
||||||
// Look for consistent small steps (quantized gradient)
|
for (u in 0 until 8) {
|
||||||
val avgStep = (d01 + d12 + d23) / 3.0f
|
val alpha = kAlphaSqrt2[u]
|
||||||
val stepVariance = kotlin.math.abs(d01 - avgStep) + kotlin.math.abs(d12 - avgStep) + kotlin.math.abs(d23 - avgStep)
|
val sign = if (u and 1 != 0) -1 else 1
|
||||||
|
val leftVal = samples[u]
|
||||||
|
val rightVal = samples[7 - u] // Mirror for boundary analysis
|
||||||
|
|
||||||
|
delta += alpha * (rightVal - sign * leftVal)
|
||||||
|
hfPenalty += (u * u) * (leftVal * leftVal + rightVal * rightVal)
|
||||||
|
}
|
||||||
|
|
||||||
return avgStep in 3.0f..25.0f && stepVariance < avgStep * 0.8f
|
return Pair(delta, hfPenalty)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Apply horizontal deblocking (vertical edges between blocks)
|
// ENHANCED: Adaptive strength based on local complexity
|
||||||
|
fun calculateAdaptiveStrength(baseStrength: Float, hfPenalty: Long, delta: Long): Float {
|
||||||
|
val complexity = kotlin.math.sqrt(hfPenalty.toDouble()).toFloat()
|
||||||
|
val discontinuityMagnitude = kotlin.math.abs(delta).toFloat()
|
||||||
|
|
||||||
|
// Reduce filtering strength in high-frequency areas (preserve detail)
|
||||||
|
val complexityFactor = if (complexity > 800) 0.3f else 1.0f
|
||||||
|
|
||||||
|
// Increase filtering strength for clear discontinuities
|
||||||
|
val discontinuityFactor = kotlin.math.min(2.0f, discontinuityMagnitude / 1000.0f)
|
||||||
|
|
||||||
|
return baseStrength * complexityFactor * discontinuityFactor
|
||||||
|
}
|
||||||
|
|
||||||
|
// ENHANCED: Apply Knusperli-style corrections using linear gradient patterns
|
||||||
|
fun applyBoundaryCorrection(
|
||||||
|
samples: IntArray, delta: Long, adaptiveStrength: Float
|
||||||
|
): IntArray {
|
||||||
|
val result = samples.clone()
|
||||||
|
val correction = (delta * 724 shr 31).toInt() // Apply sqrt(2)/2 weighting like Knusperli
|
||||||
|
|
||||||
|
// Apply linear gradient corrections across boundary
|
||||||
|
for (i in 0 until 8) {
|
||||||
|
val gradientWeight = kLinearGradient[i] * correction / 1024 // Scale from 10-bit fixed-point
|
||||||
|
val sign = if (i < 4) 1 else -1 // Left/right side weighting
|
||||||
|
|
||||||
|
val adjustment = (gradientWeight * sign * adaptiveStrength).toInt()
|
||||||
|
result[i] = (result[i] + adjustment).coerceIn(0, 255)
|
||||||
|
}
|
||||||
|
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
// ENHANCED HORIZONTAL DEBLOCKING: Using Knusperli-inspired boundary analysis
|
||||||
for (by in 0 until blocksY) {
|
for (by in 0 until blocksY) {
|
||||||
for (bx in 1 until blocksX) {
|
for (bx in 1 until blocksX) {
|
||||||
val blockEdgeX = bx * blockSize
|
val blockEdgeX = bx * blockSize
|
||||||
if (blockEdgeX >= width) continue
|
if (blockEdgeX >= width) continue
|
||||||
|
|
||||||
for (y in (by * blockSize) until minOf((by + 1) * blockSize, height)) {
|
// Process boundary in chunks for better performance
|
||||||
for (c in 0..2) { // RGB components
|
val yStart = by * blockSize
|
||||||
// Sample 4 pixels across the block boundary: [left2][left1] | [right1][right2]
|
val yEnd = minOf((by + 1) * blockSize, height)
|
||||||
val left2 = getPixel(blockEdgeX - 2, y, c)
|
|
||||||
val left1 = getPixel(blockEdgeX - 1, y, c)
|
for (y in yStart until yEnd step 2) { // Process 2 lines at a time
|
||||||
val right1 = getPixel(blockEdgeX, y, c)
|
if (y + 1 >= height) continue
|
||||||
val right2 = getPixel(blockEdgeX + 1, y, c)
|
|
||||||
|
// Sample 8x2 pixel region across boundary for both lines
|
||||||
|
val samples1 = IntArray(24) // 8 pixels × 3 channels (RGB)
|
||||||
|
val samples2 = IntArray(24)
|
||||||
|
|
||||||
|
for (i in 0 until 8) {
|
||||||
|
val x = blockEdgeX - 4 + i
|
||||||
|
val rgb1 = getPixelBulk(x, y)
|
||||||
|
val rgb2 = getPixelBulk(x, y + 1)
|
||||||
|
|
||||||
val edgeDiff = kotlin.math.abs(right1 - left1)
|
samples1[i * 3] = rgb1[0] // R
|
||||||
|
samples1[i * 3 + 1] = rgb1[1] // G
|
||||||
|
samples1[i * 3 + 2] = rgb1[2] // B
|
||||||
|
samples2[i * 3] = rgb2[0]
|
||||||
|
samples2[i * 3 + 1] = rgb2[1]
|
||||||
|
samples2[i * 3 + 2] = rgb2[2]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Analyze each color channel separately
|
||||||
|
for (c in 0..2) {
|
||||||
|
val channelSamples1 = IntArray(8) { samples1[it * 3 + c] }
|
||||||
|
val channelSamples2 = IntArray(8) { samples2[it * 3 + c] }
|
||||||
|
|
||||||
// Skip strong edges (likely genuine features)
|
val (delta1, hfPenalty1) = analyzeBoundaryDiscontinuity(channelSamples1)
|
||||||
if (edgeDiff > 50) continue
|
val (delta2, hfPenalty2) = analyzeBoundaryDiscontinuity(channelSamples2)
|
||||||
|
|
||||||
// Check for quantized gradient pattern
|
// Skip if very small discontinuity (early exit optimization)
|
||||||
if (isQuantizedGradient(left2, left1, right1, right2)) {
|
if (kotlin.math.abs(delta1) < 50 && kotlin.math.abs(delta2) < 50) continue
|
||||||
// Apply gradient-preserving smoothing
|
|
||||||
val gradientLeft = left1 - left2
|
// Calculate adaptive filtering strength
|
||||||
val gradientRight = right2 - right1
|
val adaptiveStrength1 = calculateAdaptiveStrength(strength, hfPenalty1, delta1)
|
||||||
val avgGradient = (gradientLeft + gradientRight) / 2.0f
|
val adaptiveStrength2 = calculateAdaptiveStrength(strength, hfPenalty2, delta2)
|
||||||
|
|
||||||
val smoothedLeft1 = (left2 + avgGradient).toInt()
|
// Apply corrections if strength is significant
|
||||||
val smoothedRight1 = (right2 - avgGradient).toInt()
|
if (adaptiveStrength1 > 0.05f) {
|
||||||
|
val corrected1 = applyBoundaryCorrection(channelSamples1, delta1, adaptiveStrength1)
|
||||||
// Blend with original based on strength
|
for (i in 0 until 8) {
|
||||||
val blendLeft = (left1 * (1.0f - strength) + smoothedLeft1 * strength).toInt()
|
samples1[i * 3 + c] = corrected1[i]
|
||||||
val blendRight = (right1 * (1.0f - strength) + smoothedRight1 * strength).toInt()
|
|
||||||
|
|
||||||
setPixel(blockEdgeX - 1, y, c, blendLeft)
|
|
||||||
setPixel(blockEdgeX, y, c, blendRight)
|
|
||||||
}
|
|
||||||
// Check for color banding on diagonal features
|
|
||||||
else if (edgeDiff in 8..35) {
|
|
||||||
// Look at diagonal context to detect banding
|
|
||||||
val diagContext = kotlin.math.abs(getPixel(blockEdgeX - 1, y - 1, c) - getPixel(blockEdgeX, y + 1, c))
|
|
||||||
|
|
||||||
if (diagContext < edgeDiff * 1.5f) {
|
|
||||||
// Likely diagonal banding - apply directional smoothing
|
|
||||||
val blend = 0.3f * strength
|
|
||||||
val blendLeft = (left1 * (1.0f - blend) + right1 * blend).toInt()
|
|
||||||
val blendRight = (right1 * (1.0f - blend) + left1 * blend).toInt()
|
|
||||||
|
|
||||||
setPixel(blockEdgeX - 1, y, c, blendLeft)
|
|
||||||
setPixel(blockEdgeX, y, c, blendRight)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (adaptiveStrength2 > 0.05f) {
|
||||||
|
val corrected2 = applyBoundaryCorrection(channelSamples2, delta2, adaptiveStrength2)
|
||||||
|
for (i in 0 until 8) {
|
||||||
|
samples2[i * 3 + c] = corrected2[i]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write back corrected pixels in bulk
|
||||||
|
for (i in 2..5) { // Only write middle 4 pixels to avoid artifacts
|
||||||
|
val x = blockEdgeX - 4 + i
|
||||||
|
setPixelBulk(x, y, intArrayOf(samples1[i * 3], samples1[i * 3 + 1], samples1[i * 3 + 2]))
|
||||||
|
if (y + 1 < height) {
|
||||||
|
setPixelBulk(x, y + 1, intArrayOf(samples2[i * 3], samples2[i * 3 + 1], samples2[i * 3 + 2]))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Apply vertical deblocking (horizontal edges between blocks)
|
// ENHANCED VERTICAL DEBLOCKING: Same approach for horizontal block boundaries
|
||||||
for (by in 1 until blocksY) {
|
for (by in 1 until blocksY) {
|
||||||
for (bx in 0 until blocksX) {
|
for (bx in 0 until blocksX) {
|
||||||
val blockEdgeY = by * blockSize
|
val blockEdgeY = by * blockSize
|
||||||
if (blockEdgeY >= height) continue
|
if (blockEdgeY >= height) continue
|
||||||
|
|
||||||
for (x in (bx * blockSize) until minOf((bx + 1) * blockSize, width)) {
|
val xStart = bx * blockSize
|
||||||
for (c in 0..2) { // RGB components
|
val xEnd = minOf((bx + 1) * blockSize, width)
|
||||||
// Sample 4 pixels across the block boundary: [top2][top1] | [bottom1][bottom2]
|
|
||||||
val top2 = getPixel(x, blockEdgeY - 2, c)
|
for (x in xStart until xEnd step 2) {
|
||||||
val top1 = getPixel(x, blockEdgeY - 1, c)
|
if (x + 1 >= width) continue
|
||||||
val bottom1 = getPixel(x, blockEdgeY, c)
|
|
||||||
val bottom2 = getPixel(x, blockEdgeY + 1, c)
|
// Sample 8x2 pixel region across vertical boundary
|
||||||
|
val samples1 = IntArray(24)
|
||||||
|
val samples2 = IntArray(24)
|
||||||
|
|
||||||
|
for (i in 0 until 8) {
|
||||||
|
val y = blockEdgeY - 4 + i
|
||||||
|
val rgb1 = getPixelBulk(x, y)
|
||||||
|
val rgb2 = getPixelBulk(x + 1, y)
|
||||||
|
|
||||||
val edgeDiff = kotlin.math.abs(bottom1 - top1)
|
samples1[i * 3] = rgb1[0]
|
||||||
|
samples1[i * 3 + 1] = rgb1[1]
|
||||||
|
samples1[i * 3 + 2] = rgb1[2]
|
||||||
|
samples2[i * 3] = rgb2[0]
|
||||||
|
samples2[i * 3 + 1] = rgb2[1]
|
||||||
|
samples2[i * 3 + 2] = rgb2[2]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Same boundary analysis and correction as horizontal
|
||||||
|
for (c in 0..2) {
|
||||||
|
val channelSamples1 = IntArray(8) { samples1[it * 3 + c] }
|
||||||
|
val channelSamples2 = IntArray(8) { samples2[it * 3 + c] }
|
||||||
|
|
||||||
// Skip strong edges (likely genuine features)
|
val (delta1, hfPenalty1) = analyzeBoundaryDiscontinuity(channelSamples1)
|
||||||
if (edgeDiff > 50) continue
|
val (delta2, hfPenalty2) = analyzeBoundaryDiscontinuity(channelSamples2)
|
||||||
|
|
||||||
// Check for quantized gradient pattern
|
if (kotlin.math.abs(delta1) < 50 && kotlin.math.abs(delta2) < 50) continue
|
||||||
if (isQuantizedGradient(top2, top1, bottom1, bottom2)) {
|
|
||||||
// Apply gradient-preserving smoothing
|
val adaptiveStrength1 = calculateAdaptiveStrength(strength, hfPenalty1, delta1)
|
||||||
val gradientTop = top1 - top2
|
val adaptiveStrength2 = calculateAdaptiveStrength(strength, hfPenalty2, delta2)
|
||||||
val gradientBottom = bottom2 - bottom1
|
|
||||||
val avgGradient = (gradientTop + gradientBottom) / 2.0f
|
if (adaptiveStrength1 > 0.05f) {
|
||||||
|
val corrected1 = applyBoundaryCorrection(channelSamples1, delta1, adaptiveStrength1)
|
||||||
val smoothedTop1 = (top2 + avgGradient).toInt()
|
for (i in 0 until 8) {
|
||||||
val smoothedBottom1 = (bottom2 - avgGradient).toInt()
|
samples1[i * 3 + c] = corrected1[i]
|
||||||
|
|
||||||
// Blend with original based on strength
|
|
||||||
val blendTop = (top1 * (1.0f - strength) + smoothedTop1 * strength).toInt()
|
|
||||||
val blendBottom = (bottom1 * (1.0f - strength) + smoothedBottom1 * strength).toInt()
|
|
||||||
|
|
||||||
setPixel(x, blockEdgeY - 1, c, blendTop)
|
|
||||||
setPixel(x, blockEdgeY, c, blendBottom)
|
|
||||||
}
|
|
||||||
// Check for color banding on diagonal features
|
|
||||||
else if (edgeDiff in 8..35) {
|
|
||||||
// Look at diagonal context to detect banding
|
|
||||||
val diagContext = kotlin.math.abs(getPixel(x - 1, blockEdgeY - 1, c) - getPixel(x + 1, blockEdgeY, c))
|
|
||||||
|
|
||||||
if (diagContext < edgeDiff * 1.5f) {
|
|
||||||
// Likely diagonal banding - apply directional smoothing
|
|
||||||
val blend = 0.3f * strength
|
|
||||||
val blendTop = (top1 * (1.0f - blend) + bottom1 * blend).toInt()
|
|
||||||
val blendBottom = (bottom1 * (1.0f - blend) + top1 * blend).toInt()
|
|
||||||
|
|
||||||
setPixel(x, blockEdgeY - 1, c, blendTop)
|
|
||||||
setPixel(x, blockEdgeY, c, blendBottom)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (adaptiveStrength2 > 0.05f) {
|
||||||
|
val corrected2 = applyBoundaryCorrection(channelSamples2, delta2, adaptiveStrength2)
|
||||||
|
for (i in 0 until 8) {
|
||||||
|
samples2[i * 3 + c] = corrected2[i]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write back corrected pixels
|
||||||
|
for (i in 2..5) {
|
||||||
|
val y = blockEdgeY - 4 + i
|
||||||
|
setPixelBulk(x, y, intArrayOf(samples1[i * 3], samples1[i * 3 + 1], samples1[i * 3 + 2]))
|
||||||
|
if (x + 1 < width) {
|
||||||
|
setPixelBulk(x + 1, y, intArrayOf(samples2[i * 3], samples2[i * 3 + 1], samples2[i * 3 + 2]))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -3221,9 +3302,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Apply deblocking filter if enabled to reduce blocking artifacts
|
// Apply enhanced deblocking filter if enabled to reduce blocking artifacts
|
||||||
if (enableDeblocking) {
|
if (enableDeblocking) {
|
||||||
tevDeblockingFilter(currentRGBAddr, width, height)
|
tevDeblockingFilterEnhanced(currentRGBAddr, width, height)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3761,7 +3842,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
// 16x16 version of Knusperli processing for Y blocks
|
// Optimized 16x16 version of Knusperli processing for Y blocks
|
||||||
private fun processBlocksWithKnusperli16x16(
|
private fun processBlocksWithKnusperli16x16(
|
||||||
blocks: Array<ShortArray?>, quantTable: IntArray, qScale: Int, rateControlFactors: FloatArray,
|
blocks: Array<ShortArray?>, quantTable: IntArray, qScale: Int, rateControlFactors: FloatArray,
|
||||||
blocksX: Int, blocksY: Int,
|
blocksX: Int, blocksY: Int,
|
||||||
@@ -3770,144 +3851,355 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
val coeffsSize = 256 // 16x16 = 256
|
val coeffsSize = 256 // 16x16 = 256
|
||||||
val numBlocks = blocksX * blocksY
|
val numBlocks = blocksX * blocksY
|
||||||
|
|
||||||
// Step 1: Setup quantization intervals for all blocks
|
// OPTIMIZATION 1: Pre-compute quantization values to avoid repeated calculations
|
||||||
val blocksMid = Array(numBlocks) { IntArray(coeffsSize) }
|
val quantValues = Array(numBlocks) { IntArray(coeffsSize) }
|
||||||
val blocksMin = Array(numBlocks) { IntArray(coeffsSize) }
|
val quantHalfValues = Array(numBlocks) { IntArray(coeffsSize) }
|
||||||
val blocksMax = Array(numBlocks) { IntArray(coeffsSize) }
|
|
||||||
val blocksOff = Array(numBlocks) { LongArray(coeffsSize) }
|
|
||||||
|
|
||||||
for (blockIndex in 0 until numBlocks) {
|
for (blockIndex in 0 until numBlocks) {
|
||||||
val block = blocks[blockIndex]
|
val block = blocks[blockIndex]
|
||||||
if (block != null) {
|
if (block != null) {
|
||||||
val rateControlFactor = rateControlFactors[blockIndex]
|
val rateControlFactor = rateControlFactors[blockIndex]
|
||||||
for (i in 0 until coeffsSize) {
|
val qualityMult = jpeg_quality_to_mult(qScale * rateControlFactor)
|
||||||
|
|
||||||
|
quantValues[blockIndex][0] = 1 // DC is lossless
|
||||||
|
quantHalfValues[blockIndex][0] = 0 // DC has no quantization interval
|
||||||
|
|
||||||
|
for (i in 1 until coeffsSize) {
|
||||||
val coeffIdx = i.coerceIn(0, quantTable.size - 1)
|
val coeffIdx = i.coerceIn(0, quantTable.size - 1)
|
||||||
val quant = if (i == 0) 1 else (quantTable[coeffIdx] * jpeg_quality_to_mult(qScale * rateControlFactor)).toInt()
|
val quant = (quantTable[coeffIdx] * qualityMult).toInt()
|
||||||
|
quantValues[blockIndex][i] = quant
|
||||||
blocksMid[blockIndex][i] = block[i].toInt() * quant
|
quantHalfValues[blockIndex][i] = quant / 2
|
||||||
val halfQuant = quant / 2
|
|
||||||
blocksMin[blockIndex][i] = blocksMid[blockIndex][i] - halfQuant
|
|
||||||
blocksMax[blockIndex][i] = blocksMid[blockIndex][i] + halfQuant
|
|
||||||
blocksOff[blockIndex][i] = 0L
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Step 2: Horizontal continuity analysis (16x16 version)
|
// OPTIMIZATION 2: Use single-allocation arrays with block-stride access
|
||||||
for (by in 0 until blocksY) {
|
val blocksMid = Array(numBlocks) { IntArray(coeffsSize) }
|
||||||
for (bx in 0 until blocksX - 1) {
|
val blocksOff = Array(numBlocks) { LongArray(coeffsSize) } // Keep Long for accumulation
|
||||||
val leftBlockIndex = by * blocksX + bx
|
|
||||||
val rightBlockIndex = by * blocksX + (bx + 1)
|
|
||||||
|
|
||||||
if (blocks[leftBlockIndex] != null && blocks[rightBlockIndex] != null) {
|
|
||||||
analyzeHorizontalBoundary16x16(
|
|
||||||
leftBlockIndex, rightBlockIndex, blocksMid, blocksOff,
|
|
||||||
kLinearGradient16, kAlphaSqrt2_16
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Step 3: Vertical continuity analysis (16x16 version)
|
// Step 1: Setup dequantized values and initialize adjustments (BULK OPTIMIZED)
|
||||||
for (by in 0 until blocksY - 1) {
|
|
||||||
for (bx in 0 until blocksX) {
|
|
||||||
val topBlockIndex = by * blocksX + bx
|
|
||||||
val bottomBlockIndex = (by + 1) * blocksX + bx
|
|
||||||
|
|
||||||
if (blocks[topBlockIndex] != null && blocks[bottomBlockIndex] != null) {
|
|
||||||
analyzeVerticalBoundary16x16(
|
|
||||||
topBlockIndex, bottomBlockIndex, blocksMid, blocksOff,
|
|
||||||
kLinearGradient16, kAlphaSqrt2_16
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Step 4: Apply corrections and clamp to quantization intervals
|
|
||||||
for (blockIndex in 0 until numBlocks) {
|
for (blockIndex in 0 until numBlocks) {
|
||||||
val block = blocks[blockIndex]
|
val block = blocks[blockIndex]
|
||||||
if (block != null) {
|
if (block != null) {
|
||||||
for (i in 0 until coeffsSize) {
|
val mid = blocksMid[blockIndex]
|
||||||
// Apply corrections with sqrt(2)/2 weighting
|
val off = blocksOff[blockIndex]
|
||||||
blocksMid[blockIndex][i] += ((blocksOff[blockIndex][i] * kHalfSqrt2) shr 31).toInt()
|
val quantVals = quantValues[blockIndex]
|
||||||
|
|
||||||
// Clamp to quantization interval bounds
|
// OPTIMIZATION 9: Bulk dequantization using vectorized operations
|
||||||
blocksMid[blockIndex][i] = blocksMid[blockIndex][i].coerceIn(
|
bulkDequantizeCoefficients(block, mid, quantVals, coeffsSize)
|
||||||
blocksMin[blockIndex][i],
|
|
||||||
blocksMax[blockIndex][i]
|
// OPTIMIZATION 10: Bulk zero initialization of adjustments
|
||||||
)
|
off.fill(0L)
|
||||||
|
}
|
||||||
// Convert back to quantized coefficient for storage
|
}
|
||||||
val rateControlFactor = rateControlFactors[blockIndex]
|
|
||||||
val coeffIdx = i.coerceIn(0, quantTable.size - 1)
|
// OPTIMIZATION 7: Combined boundary analysis loops for better cache locality
|
||||||
val quant = if (i == 0) 1 else (quantTable[coeffIdx] * jpeg_quality_to_mult(qScale * rateControlFactor)).toInt()
|
// Process horizontal and vertical boundaries in interleaved pattern
|
||||||
block[i] = (blocksMid[blockIndex][i] / quant).coerceIn(Short.MIN_VALUE.toInt(), Short.MAX_VALUE.toInt()).toShort()
|
for (by in 0 until blocksY) {
|
||||||
|
for (bx in 0 until blocksX) {
|
||||||
|
val currentIndex = by * blocksX + bx
|
||||||
|
|
||||||
|
// Horizontal boundary (if not rightmost column)
|
||||||
|
if (bx < blocksX - 1) {
|
||||||
|
val rightIndex = currentIndex + 1
|
||||||
|
if (blocks[currentIndex] != null && blocks[rightIndex] != null) {
|
||||||
|
analyzeHorizontalBoundary16x16(
|
||||||
|
currentIndex, rightIndex, blocksMid, blocksOff,
|
||||||
|
kLinearGradient16, kAlphaSqrt2_16
|
||||||
|
)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Vertical boundary (if not bottom row)
|
||||||
|
if (by < blocksY - 1) {
|
||||||
|
val bottomIndex = currentIndex + blocksX
|
||||||
|
if (blocks[currentIndex] != null && blocks[bottomIndex] != null) {
|
||||||
|
analyzeVerticalBoundary16x16(
|
||||||
|
currentIndex, bottomIndex, blocksMid, blocksOff,
|
||||||
|
kLinearGradient16, kAlphaSqrt2_16
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 4: Apply corrections and clamp to quantization intervals (BULK OPTIMIZED)
|
||||||
|
for (blockIndex in 0 until numBlocks) {
|
||||||
|
val block = blocks[blockIndex]
|
||||||
|
if (block != null) {
|
||||||
|
// OPTIMIZATION 11: Bulk apply corrections and quantization clamping
|
||||||
|
bulkApplyCorrectionsAndClamp(
|
||||||
|
block, blocksMid[blockIndex], blocksOff[blockIndex],
|
||||||
|
quantValues[blockIndex], quantHalfValues[blockIndex],
|
||||||
|
kHalfSqrt2, coeffsSize
|
||||||
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// 16x16 horizontal boundary analysis (adapted from Google's 8x8 version)
|
// BULK MEMORY ACCESS HELPER FUNCTIONS FOR KNUSPERLI
|
||||||
|
|
||||||
|
/**
|
||||||
|
* OPTIMIZATION 9: Bulk dequantization using vectorized operations
|
||||||
|
* Performs coefficient * quantization in optimized chunks
|
||||||
|
*/
|
||||||
|
private fun bulkDequantizeCoefficients(
|
||||||
|
coeffs: ShortArray, result: IntArray, quantVals: IntArray, size: Int
|
||||||
|
) {
|
||||||
|
// Process in chunks of 16 for better vectorization (CPU can process multiple values per instruction)
|
||||||
|
var i = 0
|
||||||
|
val chunks = size and 0xFFFFFFF0.toInt() // Round down to nearest 16
|
||||||
|
|
||||||
|
// Bulk process 16 coefficients at a time for SIMD-friendly operations
|
||||||
|
while (i < chunks) {
|
||||||
|
// Manual loop unrolling for better performance
|
||||||
|
result[i] = coeffs[i].toInt() * quantVals[i]
|
||||||
|
result[i + 1] = coeffs[i + 1].toInt() * quantVals[i + 1]
|
||||||
|
result[i + 2] = coeffs[i + 2].toInt() * quantVals[i + 2]
|
||||||
|
result[i + 3] = coeffs[i + 3].toInt() * quantVals[i + 3]
|
||||||
|
result[i + 4] = coeffs[i + 4].toInt() * quantVals[i + 4]
|
||||||
|
result[i + 5] = coeffs[i + 5].toInt() * quantVals[i + 5]
|
||||||
|
result[i + 6] = coeffs[i + 6].toInt() * quantVals[i + 6]
|
||||||
|
result[i + 7] = coeffs[i + 7].toInt() * quantVals[i + 7]
|
||||||
|
result[i + 8] = coeffs[i + 8].toInt() * quantVals[i + 8]
|
||||||
|
result[i + 9] = coeffs[i + 9].toInt() * quantVals[i + 9]
|
||||||
|
result[i + 10] = coeffs[i + 10].toInt() * quantVals[i + 10]
|
||||||
|
result[i + 11] = coeffs[i + 11].toInt() * quantVals[i + 11]
|
||||||
|
result[i + 12] = coeffs[i + 12].toInt() * quantVals[i + 12]
|
||||||
|
result[i + 13] = coeffs[i + 13].toInt() * quantVals[i + 13]
|
||||||
|
result[i + 14] = coeffs[i + 14].toInt() * quantVals[i + 14]
|
||||||
|
result[i + 15] = coeffs[i + 15].toInt() * quantVals[i + 15]
|
||||||
|
i += 16
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle remaining coefficients
|
||||||
|
while (i < size) {
|
||||||
|
result[i] = coeffs[i].toInt() * quantVals[i]
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* OPTIMIZATION 11: Bulk apply corrections and quantization clamping
|
||||||
|
* Vectorized correction application with proper bounds checking
|
||||||
|
*/
|
||||||
|
private fun bulkApplyCorrectionsAndClamp(
|
||||||
|
block: ShortArray, mid: IntArray, off: LongArray,
|
||||||
|
quantVals: IntArray, quantHalf: IntArray,
|
||||||
|
kHalfSqrt2: Int, size: Int
|
||||||
|
) {
|
||||||
|
var i = 0
|
||||||
|
val chunks = size and 0xFFFFFFF0.toInt() // Process in chunks of 16
|
||||||
|
|
||||||
|
// Bulk process corrections in chunks for better CPU pipeline utilization
|
||||||
|
while (i < chunks) {
|
||||||
|
// Apply corrections with sqrt(2)/2 weighting - bulk operations
|
||||||
|
val corr0 = ((off[i] * kHalfSqrt2) shr 31).toInt()
|
||||||
|
val corr1 = ((off[i + 1] * kHalfSqrt2) shr 31).toInt()
|
||||||
|
val corr2 = ((off[i + 2] * kHalfSqrt2) shr 31).toInt()
|
||||||
|
val corr3 = ((off[i + 3] * kHalfSqrt2) shr 31).toInt()
|
||||||
|
val corr4 = ((off[i + 4] * kHalfSqrt2) shr 31).toInt()
|
||||||
|
val corr5 = ((off[i + 5] * kHalfSqrt2) shr 31).toInt()
|
||||||
|
val corr6 = ((off[i + 6] * kHalfSqrt2) shr 31).toInt()
|
||||||
|
val corr7 = ((off[i + 7] * kHalfSqrt2) shr 31).toInt()
|
||||||
|
|
||||||
|
mid[i] += corr0
|
||||||
|
mid[i + 1] += corr1
|
||||||
|
mid[i + 2] += corr2
|
||||||
|
mid[i + 3] += corr3
|
||||||
|
mid[i + 4] += corr4
|
||||||
|
mid[i + 5] += corr5
|
||||||
|
mid[i + 6] += corr6
|
||||||
|
mid[i + 7] += corr7
|
||||||
|
|
||||||
|
// Apply quantization interval clamping - bulk operations
|
||||||
|
val orig0 = block[i].toInt() * quantVals[i]
|
||||||
|
val orig1 = block[i + 1].toInt() * quantVals[i + 1]
|
||||||
|
val orig2 = block[i + 2].toInt() * quantVals[i + 2]
|
||||||
|
val orig3 = block[i + 3].toInt() * quantVals[i + 3]
|
||||||
|
val orig4 = block[i + 4].toInt() * quantVals[i + 4]
|
||||||
|
val orig5 = block[i + 5].toInt() * quantVals[i + 5]
|
||||||
|
val orig6 = block[i + 6].toInt() * quantVals[i + 6]
|
||||||
|
val orig7 = block[i + 7].toInt() * quantVals[i + 7]
|
||||||
|
|
||||||
|
mid[i] = mid[i].coerceIn(orig0 - quantHalf[i], orig0 + quantHalf[i])
|
||||||
|
mid[i + 1] = mid[i + 1].coerceIn(orig1 - quantHalf[i + 1], orig1 + quantHalf[i + 1])
|
||||||
|
mid[i + 2] = mid[i + 2].coerceIn(orig2 - quantHalf[i + 2], orig2 + quantHalf[i + 2])
|
||||||
|
mid[i + 3] = mid[i + 3].coerceIn(orig3 - quantHalf[i + 3], orig3 + quantHalf[i + 3])
|
||||||
|
mid[i + 4] = mid[i + 4].coerceIn(orig4 - quantHalf[i + 4], orig4 + quantHalf[i + 4])
|
||||||
|
mid[i + 5] = mid[i + 5].coerceIn(orig5 - quantHalf[i + 5], orig5 + quantHalf[i + 5])
|
||||||
|
mid[i + 6] = mid[i + 6].coerceIn(orig6 - quantHalf[i + 6], orig6 + quantHalf[i + 6])
|
||||||
|
mid[i + 7] = mid[i + 7].coerceIn(orig7 - quantHalf[i + 7], orig7 + quantHalf[i + 7])
|
||||||
|
|
||||||
|
// Convert back to quantized coefficients - bulk operations
|
||||||
|
val quantMax = Short.MAX_VALUE.toInt()
|
||||||
|
val quantMin = Short.MIN_VALUE.toInt()
|
||||||
|
block[i] = (mid[i] / quantVals[i]).coerceIn(quantMin, quantMax).toShort()
|
||||||
|
block[i + 1] = (mid[i + 1] / quantVals[i + 1]).coerceIn(quantMin, quantMax).toShort()
|
||||||
|
block[i + 2] = (mid[i + 2] / quantVals[i + 2]).coerceIn(quantMin, quantMax).toShort()
|
||||||
|
block[i + 3] = (mid[i + 3] / quantVals[i + 3]).coerceIn(quantMin, quantMax).toShort()
|
||||||
|
block[i + 4] = (mid[i + 4] / quantVals[i + 4]).coerceIn(quantMin, quantMax).toShort()
|
||||||
|
block[i + 5] = (mid[i + 5] / quantVals[i + 5]).coerceIn(quantMin, quantMax).toShort()
|
||||||
|
block[i + 6] = (mid[i + 6] / quantVals[i + 6]).coerceIn(quantMin, quantMax).toShort()
|
||||||
|
block[i + 7] = (mid[i + 7] / quantVals[i + 7]).coerceIn(quantMin, quantMax).toShort()
|
||||||
|
|
||||||
|
i += 8 // Process 8 at a time for the remaining corrections
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle remaining coefficients (usually 0-15 remaining for 256-coefficient blocks)
|
||||||
|
while (i < size) {
|
||||||
|
mid[i] += ((off[i] * kHalfSqrt2) shr 31).toInt()
|
||||||
|
|
||||||
|
val originalValue = block[i].toInt() * quantVals[i]
|
||||||
|
mid[i] = mid[i].coerceIn(originalValue - quantHalf[i], originalValue + quantHalf[i])
|
||||||
|
|
||||||
|
block[i] = (mid[i] / quantVals[i]).coerceIn(Short.MIN_VALUE.toInt(), Short.MAX_VALUE.toInt()).toShort()
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// OPTIMIZED 16x16 horizontal boundary analysis
|
||||||
private fun analyzeHorizontalBoundary16x16(
|
private fun analyzeHorizontalBoundary16x16(
|
||||||
leftBlockIndex: Int, rightBlockIndex: Int,
|
leftBlockIndex: Int, rightBlockIndex: Int,
|
||||||
blocksMid: Array<IntArray>, blocksOff: Array<LongArray>,
|
blocksMid: Array<IntArray>, blocksOff: Array<LongArray>,
|
||||||
kLinearGradient16: IntArray, kAlphaSqrt2_16: IntArray
|
kLinearGradient16: IntArray, kAlphaSqrt2_16: IntArray
|
||||||
) {
|
) {
|
||||||
// Analyze low-to-mid frequencies only (v < 8 for 16x16, similar to v < 4 for 8x8)
|
val leftMid = blocksMid[leftBlockIndex]
|
||||||
for (v in 0 until 8) {
|
val rightMid = blocksMid[rightBlockIndex]
|
||||||
|
val leftOff = blocksOff[leftBlockIndex]
|
||||||
|
val rightOff = blocksOff[rightBlockIndex]
|
||||||
|
|
||||||
|
// OPTIMIZATION 4: Process multiple frequencies in single loop for better cache locality
|
||||||
|
for (v in 0 until 8) { // Only low-to-mid frequencies
|
||||||
var deltaV = 0L
|
var deltaV = 0L
|
||||||
var hfPenalty = 0L
|
var hfPenalty = 0L
|
||||||
|
val vOffset = v * 16
|
||||||
|
|
||||||
// Analyze discontinuity across the boundary
|
// First pass: Calculate boundary discontinuity
|
||||||
for (u in 0 until 16) {
|
for (u in 0 until 16) {
|
||||||
|
val idx = vOffset + u
|
||||||
val alpha = kAlphaSqrt2_16[u]
|
val alpha = kAlphaSqrt2_16[u]
|
||||||
val sign = if (u and 1 != 0) -1 else 1
|
val sign = if (u and 1 != 0) -1 else 1
|
||||||
val gi = blocksMid[leftBlockIndex][v * 16 + u]
|
val gi = leftMid[idx]
|
||||||
val gj = blocksMid[rightBlockIndex][v * 16 + u]
|
val gj = rightMid[idx]
|
||||||
|
|
||||||
deltaV += alpha * (gj - sign * gi)
|
deltaV += alpha * (gj - sign * gi)
|
||||||
hfPenalty += (u * u) * (gi * gi + gj * gj)
|
hfPenalty += (u * u) * (gi * gi + gj * gj)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Apply corrections with high-frequency damping (scaled for 16x16)
|
// OPTIMIZATION 8: Early exit for very small adjustments
|
||||||
for (u in 0 until 16) {
|
if (kotlin.math.abs(deltaV) < 100) continue
|
||||||
if (hfPenalty > 1600) deltaV /= 2 // Scaled threshold for 16x16
|
|
||||||
val sign = if (u and 1 != 0) 1 else -1
|
// OPTIMIZATION 5: Apply high-frequency damping once per frequency band
|
||||||
val gradientIdx = u.coerceIn(0, kLinearGradient16.size - 1)
|
if (hfPenalty > 1600) deltaV /= 2
|
||||||
blocksOff[leftBlockIndex][v * 16 + u] += deltaV * kLinearGradient16[gradientIdx]
|
|
||||||
blocksOff[rightBlockIndex][v * 16 + u] += deltaV * kLinearGradient16[gradientIdx] * sign
|
// Second pass: Apply corrections (BULK OPTIMIZED with unrolling)
|
||||||
}
|
val correction = deltaV
|
||||||
|
// Bulk apply corrections for 16 coefficients - manually unrolled for performance
|
||||||
|
leftOff[vOffset] += correction * kLinearGradient16[0]
|
||||||
|
rightOff[vOffset] += correction * kLinearGradient16[0]
|
||||||
|
leftOff[vOffset + 1] += correction * kLinearGradient16[1]
|
||||||
|
rightOff[vOffset + 1] -= correction * kLinearGradient16[1] // Alternating signs
|
||||||
|
leftOff[vOffset + 2] += correction * kLinearGradient16[2]
|
||||||
|
rightOff[vOffset + 2] += correction * kLinearGradient16[2]
|
||||||
|
leftOff[vOffset + 3] += correction * kLinearGradient16[3]
|
||||||
|
rightOff[vOffset + 3] -= correction * kLinearGradient16[3]
|
||||||
|
leftOff[vOffset + 4] += correction * kLinearGradient16[4]
|
||||||
|
rightOff[vOffset + 4] += correction * kLinearGradient16[4]
|
||||||
|
leftOff[vOffset + 5] += correction * kLinearGradient16[5]
|
||||||
|
rightOff[vOffset + 5] -= correction * kLinearGradient16[5]
|
||||||
|
leftOff[vOffset + 6] += correction * kLinearGradient16[6]
|
||||||
|
rightOff[vOffset + 6] += correction * kLinearGradient16[6]
|
||||||
|
leftOff[vOffset + 7] += correction * kLinearGradient16[7]
|
||||||
|
rightOff[vOffset + 7] -= correction * kLinearGradient16[7]
|
||||||
|
leftOff[vOffset + 8] += correction * kLinearGradient16[8]
|
||||||
|
rightOff[vOffset + 8] += correction * kLinearGradient16[8]
|
||||||
|
leftOff[vOffset + 9] += correction * kLinearGradient16[9]
|
||||||
|
rightOff[vOffset + 9] -= correction * kLinearGradient16[9]
|
||||||
|
leftOff[vOffset + 10] += correction * kLinearGradient16[10]
|
||||||
|
rightOff[vOffset + 10] += correction * kLinearGradient16[10]
|
||||||
|
leftOff[vOffset + 11] += correction * kLinearGradient16[11]
|
||||||
|
rightOff[vOffset + 11] -= correction * kLinearGradient16[11]
|
||||||
|
leftOff[vOffset + 12] += correction * kLinearGradient16[12]
|
||||||
|
rightOff[vOffset + 12] += correction * kLinearGradient16[12]
|
||||||
|
leftOff[vOffset + 13] += correction * kLinearGradient16[13]
|
||||||
|
rightOff[vOffset + 13] -= correction * kLinearGradient16[13]
|
||||||
|
leftOff[vOffset + 14] += correction * kLinearGradient16[14]
|
||||||
|
rightOff[vOffset + 14] += correction * kLinearGradient16[14]
|
||||||
|
leftOff[vOffset + 15] += correction * kLinearGradient16[15]
|
||||||
|
rightOff[vOffset + 15] -= correction * kLinearGradient16[15]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// 16x16 vertical boundary analysis (adapted from Google's 8x8 version)
|
// OPTIMIZED 16x16 vertical boundary analysis
|
||||||
private fun analyzeVerticalBoundary16x16(
|
private fun analyzeVerticalBoundary16x16(
|
||||||
topBlockIndex: Int, bottomBlockIndex: Int,
|
topBlockIndex: Int, bottomBlockIndex: Int,
|
||||||
blocksMid: Array<IntArray>, blocksOff: Array<LongArray>,
|
blocksMid: Array<IntArray>, blocksOff: Array<LongArray>,
|
||||||
kLinearGradient16: IntArray, kAlphaSqrt2_16: IntArray
|
kLinearGradient16: IntArray, kAlphaSqrt2_16: IntArray
|
||||||
) {
|
) {
|
||||||
// Analyze low-to-mid frequencies only (u < 8 for 16x16)
|
val topMid = blocksMid[topBlockIndex]
|
||||||
for (u in 0 until 8) {
|
val bottomMid = blocksMid[bottomBlockIndex]
|
||||||
|
val topOff = blocksOff[topBlockIndex]
|
||||||
|
val bottomOff = blocksOff[bottomBlockIndex]
|
||||||
|
|
||||||
|
// OPTIMIZATION 6: Optimized vertical analysis with better cache access pattern
|
||||||
|
for (u in 0 until 8) { // Only low-to-mid frequencies
|
||||||
var deltaU = 0L
|
var deltaU = 0L
|
||||||
var hfPenalty = 0L
|
var hfPenalty = 0L
|
||||||
|
|
||||||
|
// First pass: Calculate boundary discontinuity
|
||||||
for (v in 0 until 16) {
|
for (v in 0 until 16) {
|
||||||
|
val idx = v * 16 + u
|
||||||
val alpha = kAlphaSqrt2_16[v]
|
val alpha = kAlphaSqrt2_16[v]
|
||||||
val sign = if (v and 1 != 0) -1 else 1
|
val sign = if (v and 1 != 0) -1 else 1
|
||||||
val gi = blocksMid[topBlockIndex][v * 16 + u]
|
val gi = topMid[idx]
|
||||||
val gj = blocksMid[bottomBlockIndex][v * 16 + u]
|
val gj = bottomMid[idx]
|
||||||
|
|
||||||
deltaU += alpha * (gj - sign * gi)
|
deltaU += alpha * (gj - sign * gi)
|
||||||
hfPenalty += (v * v) * (gi * gi + gj * gj)
|
hfPenalty += (v * v) * (gi * gi + gj * gj)
|
||||||
}
|
}
|
||||||
|
|
||||||
for (v in 0 until 16) {
|
// Early exit for very small adjustments
|
||||||
if (hfPenalty > 1600) deltaU /= 2 // Scaled threshold for 16x16
|
if (kotlin.math.abs(deltaU) < 100) continue
|
||||||
val sign = if (v and 1 != 0) 1 else -1
|
|
||||||
val gradientIdx = v.coerceIn(0, kLinearGradient16.size - 1)
|
// Apply high-frequency damping once per frequency band
|
||||||
blocksOff[topBlockIndex][v * 16 + u] += deltaU * kLinearGradient16[gradientIdx]
|
if (hfPenalty > 1600) deltaU /= 2
|
||||||
blocksOff[bottomBlockIndex][v * 16 + u] += deltaU * kLinearGradient16[gradientIdx] * sign
|
|
||||||
}
|
// Second pass: Apply corrections (BULK OPTIMIZED vertical)
|
||||||
|
val correction = deltaU
|
||||||
|
// Bulk apply corrections for 16 vertical coefficients - manually unrolled
|
||||||
|
topOff[u] += correction * kLinearGradient16[0]
|
||||||
|
bottomOff[u] += correction * kLinearGradient16[0]
|
||||||
|
topOff[16 + u] += correction * kLinearGradient16[1]
|
||||||
|
bottomOff[16 + u] -= correction * kLinearGradient16[1] // Alternating signs
|
||||||
|
topOff[32 + u] += correction * kLinearGradient16[2]
|
||||||
|
bottomOff[32 + u] += correction * kLinearGradient16[2]
|
||||||
|
topOff[48 + u] += correction * kLinearGradient16[3]
|
||||||
|
bottomOff[48 + u] -= correction * kLinearGradient16[3]
|
||||||
|
topOff[64 + u] += correction * kLinearGradient16[4]
|
||||||
|
bottomOff[64 + u] += correction * kLinearGradient16[4]
|
||||||
|
topOff[80 + u] += correction * kLinearGradient16[5]
|
||||||
|
bottomOff[80 + u] -= correction * kLinearGradient16[5]
|
||||||
|
topOff[96 + u] += correction * kLinearGradient16[6]
|
||||||
|
bottomOff[96 + u] += correction * kLinearGradient16[6]
|
||||||
|
topOff[112 + u] += correction * kLinearGradient16[7]
|
||||||
|
bottomOff[112 + u] -= correction * kLinearGradient16[7]
|
||||||
|
topOff[128 + u] += correction * kLinearGradient16[8]
|
||||||
|
bottomOff[128 + u] += correction * kLinearGradient16[8]
|
||||||
|
topOff[144 + u] += correction * kLinearGradient16[9]
|
||||||
|
bottomOff[144 + u] -= correction * kLinearGradient16[9]
|
||||||
|
topOff[160 + u] += correction * kLinearGradient16[10]
|
||||||
|
bottomOff[160 + u] += correction * kLinearGradient16[10]
|
||||||
|
topOff[176 + u] += correction * kLinearGradient16[11]
|
||||||
|
bottomOff[176 + u] -= correction * kLinearGradient16[11]
|
||||||
|
topOff[192 + u] += correction * kLinearGradient16[12]
|
||||||
|
bottomOff[192 + u] += correction * kLinearGradient16[12]
|
||||||
|
topOff[208 + u] += correction * kLinearGradient16[13]
|
||||||
|
bottomOff[208 + u] -= correction * kLinearGradient16[13]
|
||||||
|
topOff[224 + u] += correction * kLinearGradient16[14]
|
||||||
|
bottomOff[224 + u] += correction * kLinearGradient16[14]
|
||||||
|
topOff[240 + u] += correction * kLinearGradient16[15]
|
||||||
|
bottomOff[240 + u] -= correction * kLinearGradient16[15]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user