mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-06-11 07:14:04 +09:00
tev format working with better motion compensation
This commit is contained in:
@@ -1,6 +1,7 @@
|
|||||||
// Created by Claude on 2025-08-18.
|
// Created by Claude on 2025-08-18.
|
||||||
// TSVM Enhanced Video (TEV) Format Decoder - YCoCg-R 4:2:0 Version
|
// TSVM Enhanced Video (TEV) Format Decoder - YCoCg-R 4:2:0 Version
|
||||||
// Usage: playtev moviefile.tev [options]
|
// Usage: playtev moviefile.tev [options]
|
||||||
|
// Options: -i (interactive), -debug-mv (show motion vector debug visualization)
|
||||||
|
|
||||||
const WIDTH = 560
|
const WIDTH = 560
|
||||||
const HEIGHT = 448
|
const HEIGHT = 448
|
||||||
@@ -21,6 +22,7 @@ const TEV_PACKET_AUDIO_MP2 = 0x20
|
|||||||
const TEV_PACKET_SYNC = 0xFF
|
const TEV_PACKET_SYNC = 0xFF
|
||||||
|
|
||||||
const interactive = exec_args[2] && exec_args[2].toLowerCase() == "-i"
|
const interactive = exec_args[2] && exec_args[2].toLowerCase() == "-i"
|
||||||
|
const debugMotionVectors = exec_args[2] && exec_args[2].toLowerCase() == "-debug-mv"
|
||||||
const fullFilePath = _G.shell.resolvePathInput(exec_args[1])
|
const fullFilePath = _G.shell.resolvePathInput(exec_args[1])
|
||||||
const FILE_LENGTH = files.open(fullFilePath.full).size
|
const FILE_LENGTH = files.open(fullFilePath.full).size
|
||||||
|
|
||||||
@@ -102,8 +104,7 @@ function getVideoRate(rate) {
|
|||||||
return baseRate * mult
|
return baseRate * mult
|
||||||
}
|
}
|
||||||
|
|
||||||
let frameTime = 1.0 / fps
|
let FRAME_TIME = 1.0 / fps
|
||||||
|
|
||||||
// Ultra-fast approach: always render to display, use dedicated previous frame buffer
|
// Ultra-fast approach: always render to display, use dedicated previous frame buffer
|
||||||
const FRAME_PIXELS = width * height
|
const FRAME_PIXELS = width * height
|
||||||
|
|
||||||
@@ -133,6 +134,7 @@ sys.memset(DISPLAY_BA_ADDR, 15, FRAME_PIXELS) // Black with alpha=15 (opaque) in
|
|||||||
|
|
||||||
let frameCount = 0
|
let frameCount = 0
|
||||||
let stopPlay = false
|
let stopPlay = false
|
||||||
|
let akku = FRAME_TIME
|
||||||
|
|
||||||
// 4x4 Bayer dithering matrix
|
// 4x4 Bayer dithering matrix
|
||||||
const BAYER_MATRIX = [
|
const BAYER_MATRIX = [
|
||||||
@@ -142,6 +144,7 @@ const BAYER_MATRIX = [
|
|||||||
[15, 7,13, 5]
|
[15, 7,13, 5]
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
// Apply Bayer dithering to reduce banding when quantizing to 4-bit
|
// Apply Bayer dithering to reduce banding when quantizing to 4-bit
|
||||||
function ditherValue(value, x, y) {
|
function ditherValue(value, x, y) {
|
||||||
// Get the dither threshold for this pixel position
|
// Get the dither threshold for this pixel position
|
||||||
@@ -157,7 +160,11 @@ function ditherValue(value, x, y) {
|
|||||||
|
|
||||||
// Main decoding loop - simplified for performance
|
// Main decoding loop - simplified for performance
|
||||||
try {
|
try {
|
||||||
|
let t1 = sys.nanoTime()
|
||||||
while (!stopPlay && seqread.getReadCount() < FILE_LENGTH && frameCount < totalFrames) {
|
while (!stopPlay && seqread.getReadCount() < FILE_LENGTH && frameCount < totalFrames) {
|
||||||
|
|
||||||
|
if (akku >= FRAME_TIME) {
|
||||||
|
|
||||||
// Handle interactive controls
|
// Handle interactive controls
|
||||||
if (interactive) {
|
if (interactive) {
|
||||||
sys.poke(-40, 1)
|
sys.poke(-40, 1)
|
||||||
@@ -175,7 +182,7 @@ try {
|
|||||||
frameCount++
|
frameCount++
|
||||||
|
|
||||||
// Copy current RGB frame to previous frame buffer for next frame reference
|
// Copy current RGB frame to previous frame buffer for next frame reference
|
||||||
// This is the only copying we need, and it happens once per frame after display
|
// memcpy(source, destination, length) - so CURRENT (source) -> PREV (destination)
|
||||||
sys.memcpy(CURRENT_RGB_ADDR, PREV_RGB_ADDR, FRAME_PIXELS * 3)
|
sys.memcpy(CURRENT_RGB_ADDR, PREV_RGB_ADDR, FRAME_PIXELS * 3)
|
||||||
|
|
||||||
} else if (packetType == TEV_PACKET_IFRAME || packetType == TEV_PACKET_PFRAME) {
|
} else if (packetType == TEV_PACKET_IFRAME || packetType == TEV_PACKET_PFRAME) {
|
||||||
@@ -214,8 +221,7 @@ try {
|
|||||||
|
|
||||||
// Hardware-accelerated TEV YCoCg-R decoding to RGB buffers
|
// Hardware-accelerated TEV YCoCg-R decoding to RGB buffers
|
||||||
try {
|
try {
|
||||||
graphics.tevDecode(blockDataPtr, CURRENT_RGB_ADDR, PREV_RGB_ADDR, width, height, quality)
|
graphics.tevDecode(blockDataPtr, CURRENT_RGB_ADDR, PREV_RGB_ADDR, width, height, quality, debugMotionVectors)
|
||||||
// graphics.tevDecode(blockDataPtr, CURRENT_RGB_ADDR, PREV_RGB_ADDR, width, height, 0) // force quality 0 for testing
|
|
||||||
|
|
||||||
// Upload RGB buffer to display framebuffer with dithering
|
// Upload RGB buffer to display framebuffer with dithering
|
||||||
graphics.uploadRGBToFramebuffer(CURRENT_RGB_ADDR, DISPLAY_RG_ADDR, DISPLAY_BA_ADDR,
|
graphics.uploadRGBToFramebuffer(CURRENT_RGB_ADDR, DISPLAY_RG_ADDR, DISPLAY_BA_ADDR,
|
||||||
@@ -236,6 +242,12 @@ try {
|
|||||||
println(`Unknown packet type: 0x${packetType.toString(16)}`)
|
println(`Unknown packet type: 0x${packetType.toString(16)}`)
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sys.sleep(1)
|
||||||
|
|
||||||
|
let t2 = sys.nanoTime()
|
||||||
|
akku += (t2 - t1) / 1000000000.0
|
||||||
|
|
||||||
// Simple progress display
|
// Simple progress display
|
||||||
if (interactive) {
|
if (interactive) {
|
||||||
@@ -247,6 +259,8 @@ try {
|
|||||||
print(`VRate: ${(getVideoRate() / 1024 * 8)|0} kbps `)
|
print(`VRate: ${(getVideoRate() / 1024 * 8)|0} kbps `)
|
||||||
con.move(1, 1)
|
con.move(1, 1)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
t1 = t2
|
||||||
}
|
}
|
||||||
|
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
|
|||||||
@@ -1588,8 +1588,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
sum += dctBasis8[u][x] * dctBasis8[v][y] * dctCoeffs[u][v]
|
sum += dctBasis8[u][x] * dctBasis8[v][y] * dctCoeffs[u][v]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Chroma residuals should be in reasonable range (±128 max)
|
// Chroma residuals should be in reasonable range (±255 max)
|
||||||
val pixel = sum.coerceIn(-127f, 128f)
|
val pixel = sum.coerceIn(-256f, 255f)
|
||||||
result[y * 8 + x] = pixel.toInt()
|
result[y * 8 + x] = pixel.toInt()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1692,8 +1692,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
// Store YCoCg values
|
// Store YCoCg values
|
||||||
val yIdx = py * 16 + px
|
val yIdx = py * 16 + px
|
||||||
ycocgData[yIdx * 3] = y.coerceIn(0, 255) // Y
|
ycocgData[yIdx * 3] = y.coerceIn(0, 255) // Y
|
||||||
ycocgData[yIdx * 3 + 1] = co.coerceIn(-128, 127) // Co
|
ycocgData[yIdx * 3 + 1] = co.coerceIn(-256, 255) // Co
|
||||||
ycocgData[yIdx * 3 + 2] = cg.coerceIn(-128, 127) // Cg
|
ycocgData[yIdx * 3 + 2] = cg.coerceIn(-256, 255) // Cg
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1713,7 +1713,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
* @param frameCounter Frame counter for temporal patterns
|
* @param frameCounter Frame counter for temporal patterns
|
||||||
*/
|
*/
|
||||||
fun tevDecode(blockDataPtr: Long, currentRGBAddr: Long, prevRGBAddr: Long,
|
fun tevDecode(blockDataPtr: Long, currentRGBAddr: Long, prevRGBAddr: Long,
|
||||||
width: Int, height: Int, quality: Int) {
|
width: Int, height: Int, quality: Int, debugMotionVectors: Boolean = false) {
|
||||||
|
|
||||||
val blocksX = (width + 15) / 16 // 16x16 blocks now
|
val blocksX = (width + 15) / 16 // 16x16 blocks now
|
||||||
val blocksY = (height + 15) / 16
|
val blocksY = (height + 15) / 16
|
||||||
@@ -1772,25 +1772,45 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
for (dx in 0 until 16) {
|
for (dx in 0 until 16) {
|
||||||
val x = startX + dx
|
val x = startX + dx
|
||||||
val y = startY + dy
|
val y = startY + dy
|
||||||
val refX = x + mvX
|
val refX = x + mvX // Test: revert to original motion compensation
|
||||||
val refY = y + mvY
|
val refY = y + mvY
|
||||||
|
|
||||||
if (x < width && y < height) {
|
if (x < width && y < height) {
|
||||||
val dstPixelOffset = y.toLong() * width + x
|
val dstPixelOffset = y.toLong() * width + x
|
||||||
val dstRgbOffset = dstPixelOffset * 3
|
val dstRgbOffset = dstPixelOffset * 3
|
||||||
|
|
||||||
if (refX in 0 until width && refY in 0 until height) {
|
if (refX >= 0 && refY >= 0 && refX < width && refY < height) {
|
||||||
val refPixelOffset = refY.toLong() * width + refX
|
val refPixelOffset = refY.toLong() * width + refX
|
||||||
val refRgbOffset = refPixelOffset * 3
|
val refRgbOffset = refPixelOffset * 3
|
||||||
|
|
||||||
// Copy RGB from reference position
|
// Additional safety: ensure RGB offset is within valid range
|
||||||
val refR = vm.peek(prevRGBAddr + refRgbOffset*prevAddrIncVec)!!
|
val maxValidOffset = (width * height - 1) * 3L + 2
|
||||||
val refG = vm.peek(prevRGBAddr + (refRgbOffset + 1)*prevAddrIncVec)!!
|
if (refRgbOffset >= 0 && refRgbOffset <= maxValidOffset) {
|
||||||
val refB = vm.peek(prevRGBAddr + (refRgbOffset + 2)*prevAddrIncVec)!!
|
// Copy RGB from reference position
|
||||||
|
val refR = vm.peek(prevRGBAddr + refRgbOffset*prevAddrIncVec)!!
|
||||||
|
val refG = vm.peek(prevRGBAddr + (refRgbOffset + 1)*prevAddrIncVec)!!
|
||||||
|
val refB = vm.peek(prevRGBAddr + (refRgbOffset + 2)*prevAddrIncVec)!!
|
||||||
|
|
||||||
vm.poke(currentRGBAddr + dstRgbOffset*thisAddrIncVec, refR)
|
|
||||||
vm.poke(currentRGBAddr + (dstRgbOffset + 1)*thisAddrIncVec, refG)
|
if (debugMotionVectors) {
|
||||||
vm.poke(currentRGBAddr + (dstRgbOffset + 2)*thisAddrIncVec, refB)
|
// Debug: Color INTER blocks by motion vector magnitude
|
||||||
|
val mvMagnitude = kotlin.math.sqrt((mvX * mvX + mvY * mvY).toDouble()).toInt()
|
||||||
|
val intensity = (mvMagnitude * 8).coerceIn(0, 255) // Scale for visibility
|
||||||
|
|
||||||
|
vm.poke(currentRGBAddr + dstRgbOffset*thisAddrIncVec, intensity.toByte()) // R = MV magnitude
|
||||||
|
vm.poke(currentRGBAddr + (dstRgbOffset + 1)*thisAddrIncVec, 0.toByte()) // G = 0
|
||||||
|
vm.poke(currentRGBAddr + (dstRgbOffset + 2)*thisAddrIncVec, (255-intensity).toByte()) // B = inverse
|
||||||
|
} else {
|
||||||
|
vm.poke(currentRGBAddr + dstRgbOffset*thisAddrIncVec, refR)
|
||||||
|
vm.poke(currentRGBAddr + (dstRgbOffset + 1)*thisAddrIncVec, refG)
|
||||||
|
vm.poke(currentRGBAddr + (dstRgbOffset + 2)*thisAddrIncVec, refB)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Invalid RGB offset - use black
|
||||||
|
vm.poke(currentRGBAddr + dstRgbOffset*thisAddrIncVec, 0.toByte()) // R=0
|
||||||
|
vm.poke(currentRGBAddr + (dstRgbOffset + 1)*thisAddrIncVec, 0.toByte()) // G=0
|
||||||
|
vm.poke(currentRGBAddr + (dstRgbOffset + 2)*thisAddrIncVec, 0.toByte()) // B=0
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// Out of bounds - use black
|
// Out of bounds - use black
|
||||||
vm.poke(currentRGBAddr + dstRgbOffset*thisAddrIncVec, 0.toByte()) // R=0
|
vm.poke(currentRGBAddr + dstRgbOffset*thisAddrIncVec, 0.toByte()) // R=0
|
||||||
@@ -1905,14 +1925,22 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
for (dx in 0 until 16) {
|
for (dx in 0 until 16) {
|
||||||
val x = startX + dx
|
val x = startX + dx
|
||||||
val y = startY + dy
|
val y = startY + dy
|
||||||
val refX = x + mvX
|
val refX = x + mvX // Revert to original motion compensation
|
||||||
val refY = y + mvY
|
val refY = y + mvY
|
||||||
|
|
||||||
|
// DEBUG: Log motion compensation coordinates for red trails
|
||||||
|
if (x == 168 && y == 236) {
|
||||||
|
println("INTER MV DEBUG (red): x=$x y=$y refX=$refX refY=$refY mvX=$mvX mvY=$mvY")
|
||||||
|
}
|
||||||
|
if (x == 342 && y == 232) {
|
||||||
|
println("INTER MV DEBUG (magenta): x=$x y=$y refX=$refX refY=$refY mvX=$mvX mvY=$mvY")
|
||||||
|
}
|
||||||
val pixelIdx = dy * 16 + dx
|
val pixelIdx = dy * 16 + dx
|
||||||
|
|
||||||
if (x < width && y < height) {
|
if (x < width && y < height) {
|
||||||
var mcY: Int
|
var mcY: Int
|
||||||
|
|
||||||
if (refX in 0 until width && refY in 0 until height) {
|
if (refX >= 0 && refY >= 0 && refX < width && refY < height) {
|
||||||
// Get motion-compensated RGB from previous frame
|
// Get motion-compensated RGB from previous frame
|
||||||
val refPixelOffset = refY.toLong() * width + refX
|
val refPixelOffset = refY.toLong() * width + refX
|
||||||
val refRgbOffset = refPixelOffset * 3
|
val refRgbOffset = refPixelOffset * 3
|
||||||
@@ -1921,20 +1949,21 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
val mcG = vm.peek(prevRGBAddr + (refRgbOffset + 1)*prevAddrIncVec)!!.toUint().toInt()
|
val mcG = vm.peek(prevRGBAddr + (refRgbOffset + 1)*prevAddrIncVec)!!.toUint().toInt()
|
||||||
val mcB = vm.peek(prevRGBAddr + (refRgbOffset + 2)*prevAddrIncVec)!!.toUint().toInt()
|
val mcB = vm.peek(prevRGBAddr + (refRgbOffset + 2)*prevAddrIncVec)!!.toUint().toInt()
|
||||||
|
|
||||||
|
|
||||||
// Convert motion-compensated RGB to Y only
|
// Convert motion-compensated RGB to Y only
|
||||||
val co = mcR - mcB
|
val co = mcR - mcB
|
||||||
val tmp = mcB + (co / 2)
|
val tmp = mcB + (co / 2)
|
||||||
val cg = mcG - tmp
|
val cg = mcG - tmp
|
||||||
val yVal = tmp + (cg / 2)
|
val yVal = tmp + (cg / 2)
|
||||||
|
|
||||||
mcY = yVal
|
mcY = yVal // Keep full 0-255 range for prediction
|
||||||
} else {
|
} else {
|
||||||
// Out of bounds reference - use neutral values
|
// Out of bounds reference - use neutral gray (128)
|
||||||
mcY = 128
|
mcY = 128
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add Y residual (subtract 128 bias added by IDCT)
|
// Add Y residual: prediction + (IDCT_output - 128 - encoder's_+128_bias)
|
||||||
val residual = yResidual[pixelIdx] - 128
|
val residual = yResidual[pixelIdx] - 128 - 128 // Remove both IDCT bias and encoder's +128
|
||||||
finalY[pixelIdx] = (mcY + residual).coerceIn(0, 255)
|
finalY[pixelIdx] = (mcY + residual).coerceIn(0, 255)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1994,9 +2023,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
mcCg = 0
|
mcCg = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add chroma residuals - no clamping to see if that's the issue
|
// Add chroma residuals with clamping to prevent overflow artifacts
|
||||||
finalCo[chromaIdx] = mcCo + coResidual[chromaIdx]
|
finalCo[chromaIdx] = (mcCo + coResidual[chromaIdx]).coerceIn(-256, 255)
|
||||||
finalCg[chromaIdx] = mcCg + cgResidual[chromaIdx]
|
finalCg[chromaIdx] = (mcCg + cgResidual[chromaIdx]).coerceIn(-256, 255)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -2010,13 +2039,27 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
val x = startX + dx
|
val x = startX + dx
|
||||||
val y = startY + dy
|
val y = startY + dy
|
||||||
if (x < width && y < height) {
|
if (x < width && y < height) {
|
||||||
val rgbIdx = (dy * 16 + dx) * 3
|
|
||||||
val imageOffset = y.toLong() * width + x
|
val imageOffset = y.toLong() * width + x
|
||||||
val bufferOffset = imageOffset * 3
|
val bufferOffset = imageOffset * 3
|
||||||
|
|
||||||
vm.poke(currentRGBAddr + bufferOffset*thisAddrIncVec, finalRgb[rgbIdx].toByte())
|
if (debugMotionVectors) {
|
||||||
vm.poke(currentRGBAddr + (bufferOffset + 1)*thisAddrIncVec, finalRgb[rgbIdx + 1].toByte())
|
// Debug: Color INTER blocks by motion vector magnitude
|
||||||
vm.poke(currentRGBAddr + (bufferOffset + 2)*thisAddrIncVec, finalRgb[rgbIdx + 2].toByte())
|
val mvMagnitude = kotlin.math.sqrt((mvX * mvX + mvY * mvY).toDouble()).toInt()
|
||||||
|
val intensity = (mvMagnitude * 8).coerceIn(0, 255) // Scale for visibility
|
||||||
|
|
||||||
|
vm.poke(currentRGBAddr + bufferOffset*thisAddrIncVec, intensity.toByte()) // R = MV magnitude
|
||||||
|
vm.poke(currentRGBAddr + (bufferOffset + 1)*thisAddrIncVec, 0.toByte()) // G = 0
|
||||||
|
vm.poke(currentRGBAddr + (bufferOffset + 2)*thisAddrIncVec, (255-intensity).toByte()) // B = inverse
|
||||||
|
} else {
|
||||||
|
val rgbIdx = (dy * 16 + dx) * 3
|
||||||
|
val finalR = finalRgb[rgbIdx]
|
||||||
|
val finalG = finalRgb[rgbIdx + 1]
|
||||||
|
val finalB = finalRgb[rgbIdx + 2]
|
||||||
|
|
||||||
|
vm.poke(currentRGBAddr + bufferOffset*thisAddrIncVec, finalR.toByte())
|
||||||
|
vm.poke(currentRGBAddr + (bufferOffset + 1)*thisAddrIncVec, finalG.toByte())
|
||||||
|
vm.poke(currentRGBAddr + (bufferOffset + 2)*thisAddrIncVec, finalB.toByte())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -254,8 +254,8 @@ static const uint8_t QUANT_TABLES_C[8][64] = {
|
|||||||
#define MP2_DEFAULT_PACKET_SIZE 0x240
|
#define MP2_DEFAULT_PACKET_SIZE 0x240
|
||||||
|
|
||||||
// Encoding parameters
|
// Encoding parameters
|
||||||
#define MAX_MOTION_SEARCH 16
|
#define MAX_MOTION_SEARCH 32
|
||||||
#define KEYFRAME_INTERVAL 30
|
#define KEYFRAME_INTERVAL 120
|
||||||
#define BLOCK_SIZE 16 // 16x16 blocks now
|
#define BLOCK_SIZE 16 // 16x16 blocks now
|
||||||
|
|
||||||
// Default values
|
// Default values
|
||||||
@@ -322,10 +322,10 @@ static void rgb_to_ycocgr(uint8_t r, uint8_t g, uint8_t b, int *y, int *co, int
|
|||||||
*cg = (int)g - tmp;
|
*cg = (int)g - tmp;
|
||||||
*y = tmp + ((*cg) / 2);
|
*y = tmp + ((*cg) / 2);
|
||||||
|
|
||||||
// Clamp to valid ranges (YCoCg-R should be roughly -128 to +127)
|
// Clamp to valid ranges (YCoCg-R should be roughly -256 to +255)
|
||||||
*y = CLAMP(*y, 0, 255);
|
*y = CLAMP(*y, 0, 255);
|
||||||
*co = CLAMP(*co, -128, 127);
|
*co = CLAMP(*co, -256, 255);
|
||||||
*cg = CLAMP(*cg, -128, 127);
|
*cg = CLAMP(*cg, -256, 255);
|
||||||
}
|
}
|
||||||
|
|
||||||
// YCoCg-R to RGB transform (for verification - per YCoCg-R specification)
|
// YCoCg-R to RGB transform (for verification - per YCoCg-R specification)
|
||||||
@@ -341,8 +341,36 @@ static void ycocgr_to_rgb(int y, int co, int cg, uint8_t *r, uint8_t *g, uint8_t
|
|||||||
*b = CLAMP(*b, 0, 255);
|
*b = CLAMP(*b, 0, 255);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 16x16 2D DCT
|
// Pre-calculated cosine tables
|
||||||
|
static float dct_table_16[16][16]; // For 16x16 DCT
|
||||||
|
static float dct_table_8[8][8]; // For 8x8 DCT
|
||||||
|
static int tables_initialized = 0;
|
||||||
|
|
||||||
|
// Initialize the pre-calculated tables
|
||||||
|
static void init_dct_tables(void) {
|
||||||
|
if (tables_initialized) return;
|
||||||
|
|
||||||
|
// Pre-calculate cosine values for 16x16 DCT
|
||||||
|
for (int u = 0; u < 16; u++) {
|
||||||
|
for (int x = 0; x < 16; x++) {
|
||||||
|
dct_table_16[u][x] = cosf((2.0f * x + 1.0f) * u * M_PI / 32.0f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pre-calculate cosine values for 8x8 DCT
|
||||||
|
for (int u = 0; u < 8; u++) {
|
||||||
|
for (int x = 0; x < 8; x++) {
|
||||||
|
dct_table_8[u][x] = cosf((2.0f * x + 1.0f) * u * M_PI / 16.0f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tables_initialized = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Optimized 16x16 2D DCT
|
||||||
static void dct_16x16(float *input, float *output) {
|
static void dct_16x16(float *input, float *output) {
|
||||||
|
init_dct_tables(); // Ensure tables are initialized
|
||||||
|
|
||||||
for (int u = 0; u < 16; u++) {
|
for (int u = 0; u < 16; u++) {
|
||||||
for (int v = 0; v < 16; v++) {
|
for (int v = 0; v < 16; v++) {
|
||||||
float sum = 0.0f;
|
float sum = 0.0f;
|
||||||
@@ -352,8 +380,8 @@ static void dct_16x16(float *input, float *output) {
|
|||||||
for (int x = 0; x < 16; x++) {
|
for (int x = 0; x < 16; x++) {
|
||||||
for (int y = 0; y < 16; y++) {
|
for (int y = 0; y < 16; y++) {
|
||||||
sum += input[y * 16 + x] *
|
sum += input[y * 16 + x] *
|
||||||
cosf((2.0f * x + 1.0f) * u * M_PI / 32.0f) *
|
dct_table_16[u][x] *
|
||||||
cosf((2.0f * y + 1.0f) * v * M_PI / 32.0f);
|
dct_table_16[v][y];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -362,8 +390,10 @@ static void dct_16x16(float *input, float *output) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// 8x8 2D DCT (for chroma)
|
// Optimized 8x8 2D DCT (for chroma)
|
||||||
static void dct_8x8(float *input, float *output) {
|
static void dct_8x8(float *input, float *output) {
|
||||||
|
init_dct_tables(); // Ensure tables are initialized
|
||||||
|
|
||||||
for (int u = 0; u < 8; u++) {
|
for (int u = 0; u < 8; u++) {
|
||||||
for (int v = 0; v < 8; v++) {
|
for (int v = 0; v < 8; v++) {
|
||||||
float sum = 0.0f;
|
float sum = 0.0f;
|
||||||
@@ -373,8 +403,8 @@ static void dct_8x8(float *input, float *output) {
|
|||||||
for (int x = 0; x < 8; x++) {
|
for (int x = 0; x < 8; x++) {
|
||||||
for (int y = 0; y < 8; y++) {
|
for (int y = 0; y < 8; y++) {
|
||||||
sum += input[y * 8 + x] *
|
sum += input[y * 8 + x] *
|
||||||
cosf((2.0f * x + 1.0f) * u * M_PI / 16.0f) *
|
dct_table_8[u][x] *
|
||||||
cosf((2.0f * y + 1.0f) * v * M_PI / 16.0f);
|
dct_table_8[v][y];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -387,7 +417,7 @@ static void dct_8x8(float *input, float *output) {
|
|||||||
static int16_t quantize_coeff(float coeff, uint8_t quant, int is_dc, int is_chroma) {
|
static int16_t quantize_coeff(float coeff, uint8_t quant, int is_dc, int is_chroma) {
|
||||||
if (is_dc) {
|
if (is_dc) {
|
||||||
if (is_chroma) {
|
if (is_chroma) {
|
||||||
// Chroma DC: range -255 to +255, use lossless quantization for testing
|
// Chroma DC: range -256 to +255, use lossless quantization for testing
|
||||||
return (int16_t)roundf(coeff);
|
return (int16_t)roundf(coeff);
|
||||||
} else {
|
} else {
|
||||||
// Luma DC: range -128 to +127, use lossless quantization for testing
|
// Luma DC: range -128 to +127, use lossless quantization for testing
|
||||||
@@ -475,8 +505,8 @@ static void estimate_motion(tev_encoder_t *enc, int block_x, int block_y,
|
|||||||
// Search in range [-16, +16] pixels
|
// Search in range [-16, +16] pixels
|
||||||
for (int mv_y = -MAX_MOTION_SEARCH; mv_y <= MAX_MOTION_SEARCH; mv_y++) {
|
for (int mv_y = -MAX_MOTION_SEARCH; mv_y <= MAX_MOTION_SEARCH; mv_y++) {
|
||||||
for (int mv_x = -MAX_MOTION_SEARCH; mv_x <= MAX_MOTION_SEARCH; mv_x++) {
|
for (int mv_x = -MAX_MOTION_SEARCH; mv_x <= MAX_MOTION_SEARCH; mv_x++) {
|
||||||
int ref_x = start_x + mv_x;
|
int ref_x = start_x - mv_x; // Motion estimation: where did current block come FROM?
|
||||||
int ref_y = start_y + mv_y;
|
int ref_y = start_y - mv_y;
|
||||||
|
|
||||||
// Check bounds
|
// Check bounds
|
||||||
if (ref_x < 0 || ref_y < 0 ||
|
if (ref_x < 0 || ref_y < 0 ||
|
||||||
@@ -559,8 +589,8 @@ static void convert_rgb_to_ycocgr_block(const uint8_t *rgb_block,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Average and store subsampled chroma
|
// Average and store subsampled chroma
|
||||||
co_block[cy * 8 + cx] = CLAMP(sum_co / 4, -128, 127);
|
co_block[cy * 8 + cx] = CLAMP(sum_co / 4, -256, 255);
|
||||||
cg_block[cy * 8 + cx] = CLAMP(sum_cg / 4, -128, 127);
|
cg_block[cy * 8 + cx] = CLAMP(sum_cg / 4, -256, 255);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -576,7 +606,7 @@ static void extract_motion_compensated_block(const uint8_t *rgb_data, int width,
|
|||||||
for (int dx = 0; dx < 16; dx++) {
|
for (int dx = 0; dx < 16; dx++) {
|
||||||
int cur_x = block_x + dx;
|
int cur_x = block_x + dx;
|
||||||
int cur_y = block_y + dy;
|
int cur_y = block_y + dy;
|
||||||
int ref_x = cur_x + mv_x;
|
int ref_x = cur_x + mv_x; // Revert to original motion compensation
|
||||||
int ref_y = cur_y + mv_y;
|
int ref_y = cur_y + mv_y;
|
||||||
|
|
||||||
int rgb_idx = (dy * 16 + dx) * 3;
|
int rgb_idx = (dy * 16 + dx) * 3;
|
||||||
@@ -613,9 +643,9 @@ static void compute_motion_residual(tev_encoder_t *enc, int block_x, int block_y
|
|||||||
ref_y, ref_co, ref_cg);
|
ref_y, ref_co, ref_cg);
|
||||||
|
|
||||||
// Compute residuals: current - motion_compensated_reference
|
// Compute residuals: current - motion_compensated_reference
|
||||||
// Both current and reference Y should be centered around 0 for proper residual DCT
|
// Current is already centered (-128 to +127), reference is 0-255, so subtract and center reference
|
||||||
for (int i = 0; i < 256; i++) {
|
for (int i = 0; i < 256; i++) {
|
||||||
float ref_y_centered = (float)ref_y[i] - 128.0f; // Convert ref to centered like current
|
float ref_y_centered = (float)ref_y[i] - 128.0f; // Center reference to match current
|
||||||
enc->y_workspace[i] = enc->y_workspace[i] - ref_y_centered;
|
enc->y_workspace[i] = enc->y_workspace[i] - ref_y_centered;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -654,13 +684,13 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke
|
|||||||
if (x < enc->width && y < enc->height) {
|
if (x < enc->width && y < enc->height) {
|
||||||
int cur_offset = (y * enc->width + x) * 3;
|
int cur_offset = (y * enc->width + x) * 3;
|
||||||
|
|
||||||
// Compare current with previous frame (simple luma difference)
|
// Compare current with previous frame (using YCoCg-R Luma calculation)
|
||||||
int cur_luma = (enc->current_rgb[cur_offset] +
|
int cur_luma = (enc->current_rgb[cur_offset] +
|
||||||
enc->current_rgb[cur_offset + 1] +
|
2 * enc->current_rgb[cur_offset + 1] +
|
||||||
enc->current_rgb[cur_offset + 2]) / 3;
|
enc->current_rgb[cur_offset + 2]) / 4;
|
||||||
int prev_luma = (enc->previous_rgb[cur_offset] +
|
int prev_luma = (enc->previous_rgb[cur_offset] +
|
||||||
enc->previous_rgb[cur_offset + 1] +
|
2 * enc->previous_rgb[cur_offset + 1] +
|
||||||
enc->previous_rgb[cur_offset + 2]) / 3;
|
enc->previous_rgb[cur_offset + 2]) / 4;
|
||||||
|
|
||||||
skip_sad += abs(cur_luma - prev_luma);
|
skip_sad += abs(cur_luma - prev_luma);
|
||||||
}
|
}
|
||||||
@@ -688,12 +718,13 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke
|
|||||||
int cur_offset = (cur_y * enc->width + cur_x) * 3;
|
int cur_offset = (cur_y * enc->width + cur_x) * 3;
|
||||||
int ref_offset = (ref_y * enc->width + ref_x) * 3;
|
int ref_offset = (ref_y * enc->width + ref_x) * 3;
|
||||||
|
|
||||||
|
// use YCoCg-R Luma calculation
|
||||||
int cur_luma = (enc->current_rgb[cur_offset] +
|
int cur_luma = (enc->current_rgb[cur_offset] +
|
||||||
enc->current_rgb[cur_offset + 1] +
|
2 * enc->current_rgb[cur_offset + 1] +
|
||||||
enc->current_rgb[cur_offset + 2]) / 3;
|
enc->current_rgb[cur_offset + 2]) / 4;
|
||||||
int ref_luma = (enc->previous_rgb[ref_offset] +
|
int ref_luma = (enc->previous_rgb[ref_offset] +
|
||||||
enc->previous_rgb[ref_offset + 1] +
|
2 * enc->previous_rgb[ref_offset + 1] +
|
||||||
enc->previous_rgb[ref_offset + 2]) / 3;
|
enc->previous_rgb[ref_offset + 2]) / 4;
|
||||||
|
|
||||||
motion_sad += abs(cur_luma - ref_luma);
|
motion_sad += abs(cur_luma - ref_luma);
|
||||||
} else {
|
} else {
|
||||||
@@ -716,7 +747,7 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke
|
|||||||
memset(block->cg_coeffs, 0, sizeof(block->cg_coeffs));
|
memset(block->cg_coeffs, 0, sizeof(block->cg_coeffs));
|
||||||
enc->blocks_skip++;
|
enc->blocks_skip++;
|
||||||
return; // Skip DCT encoding entirely
|
return; // Skip DCT encoding entirely
|
||||||
} else if (motion_sad < skip_sad && motion_sad <= 128 &&
|
} else if (motion_sad < skip_sad && motion_sad <= 1024 &&
|
||||||
(abs(block->mv_x) > 0 || abs(block->mv_y) > 0)) {
|
(abs(block->mv_x) > 0 || abs(block->mv_y) > 0)) {
|
||||||
// Good motion prediction - use motion-only mode
|
// Good motion prediction - use motion-only mode
|
||||||
block->mode = TEV_MODE_MOTION;
|
block->mode = TEV_MODE_MOTION;
|
||||||
@@ -728,12 +759,29 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke
|
|||||||
enc->blocks_motion++;
|
enc->blocks_motion++;
|
||||||
return; // Skip DCT encoding, just store motion vector
|
return; // Skip DCT encoding, just store motion vector
|
||||||
} else if (motion_sad < skip_sad && (abs(block->mv_x) > 0 || abs(block->mv_y) > 0)) {
|
} else if (motion_sad < skip_sad && (abs(block->mv_x) > 0 || abs(block->mv_y) > 0)) {
|
||||||
// Use inter mode with residual DCT - motion compensation + residual
|
// Motion compensation with threshold
|
||||||
block->mode = TEV_MODE_INTER;
|
if (motion_sad <= 1024) {
|
||||||
enc->blocks_inter++;
|
block->mode = TEV_MODE_MOTION;
|
||||||
|
block->cbp = 0x00; // No coefficients present
|
||||||
|
memset(block->y_coeffs, 0, sizeof(block->y_coeffs));
|
||||||
|
memset(block->co_coeffs, 0, sizeof(block->co_coeffs));
|
||||||
|
memset(block->cg_coeffs, 0, sizeof(block->cg_coeffs));
|
||||||
|
enc->blocks_motion++;
|
||||||
|
return; // Skip DCT encoding, just store motion vector
|
||||||
|
}
|
||||||
|
|
||||||
// Compute motion-compensated residual for DCT encoding
|
// Use INTER mode with motion vector and residuals
|
||||||
compute_motion_residual(enc, block_x, block_y, block->mv_x, block->mv_y);
|
if (abs(block->mv_x) <= 24 && abs(block->mv_y) <= 24) {
|
||||||
|
block->mode = TEV_MODE_INTER;
|
||||||
|
enc->blocks_inter++;
|
||||||
|
} else {
|
||||||
|
// Motion vector too large, fall back to INTRA
|
||||||
|
block->mode = TEV_MODE_INTRA;
|
||||||
|
block->mv_x = 0;
|
||||||
|
block->mv_y = 0;
|
||||||
|
enc->blocks_intra++;
|
||||||
|
return;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// No good motion prediction - use intra mode
|
// No good motion prediction - use intra mode
|
||||||
block->mode = TEV_MODE_INTRA;
|
block->mode = TEV_MODE_INTRA;
|
||||||
@@ -781,6 +829,8 @@ static tev_encoder_t* init_encoder(void) {
|
|||||||
enc->quality = 4; // Default quality
|
enc->quality = 4; // Default quality
|
||||||
enc->mp2_packet_size = MP2_DEFAULT_PACKET_SIZE;
|
enc->mp2_packet_size = MP2_DEFAULT_PACKET_SIZE;
|
||||||
|
|
||||||
|
init_dct_tables();
|
||||||
|
|
||||||
return enc;
|
return enc;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1208,10 +1258,10 @@ int main(int argc, char *argv[]) {
|
|||||||
// Handle test mode or real video
|
// Handle test mode or real video
|
||||||
if (test_mode) {
|
if (test_mode) {
|
||||||
// Test mode: generate solid color frames
|
// Test mode: generate solid color frames
|
||||||
enc->fps = 5; // 5 test frames
|
enc->fps = 1;
|
||||||
enc->total_frames = 5;
|
enc->total_frames = 15;
|
||||||
enc->has_audio = 0;
|
enc->has_audio = 0;
|
||||||
printf("Test mode: Generating 5 solid color frames (black, white, red, green, blue)\n");
|
printf("Test mode: Generating 15 solid color frames\n");
|
||||||
} else {
|
} else {
|
||||||
// Get video metadata and start FFmpeg processes
|
// Get video metadata and start FFmpeg processes
|
||||||
if (!get_video_metadata(enc)) {
|
if (!get_video_metadata(enc)) {
|
||||||
@@ -1271,11 +1321,21 @@ int main(int argc, char *argv[]) {
|
|||||||
const char* color_name = "unknown";
|
const char* color_name = "unknown";
|
||||||
|
|
||||||
switch (frame_count) {
|
switch (frame_count) {
|
||||||
case 0: test_r = 0; test_g = 0; test_b = 0; color_name = "black"; break; // Black
|
case 0: test_r = 0; test_g = 0; test_b = 0; color_name = "black"; break;
|
||||||
case 1: test_r = 255; test_g = 255; test_b = 255; color_name = "white"; break; // White
|
case 1: test_r = 127; test_g = 127; test_b = 127; color_name = "grey"; break;
|
||||||
case 2: test_r = 255; test_g = 0; test_b = 0; color_name = "red"; break; // Red
|
case 2: test_r = 255; test_g = 255; test_b = 255; color_name = "white"; break;
|
||||||
case 3: test_r = 0; test_g = 255; test_b = 0; color_name = "green"; break; // Green
|
case 3: test_r = 127; test_g = 0; test_b = 0; color_name = "half red"; break;
|
||||||
case 4: test_r = 0; test_g = 0; test_b = 255; color_name = "blue"; break; // Blue
|
case 4: test_r = 127; test_g = 127; test_b = 0; color_name = "half yellow"; break;
|
||||||
|
case 5: test_r = 0; test_g = 127; test_b = 0; color_name = "half green"; break;
|
||||||
|
case 6: test_r = 0; test_g = 127; test_b = 127; color_name = "half cyan"; break;
|
||||||
|
case 7: test_r = 0; test_g = 0; test_b = 127; color_name = "half blue"; break;
|
||||||
|
case 8: test_r = 127; test_g = 0; test_b = 127; color_name = "half magenta"; break;
|
||||||
|
case 9: test_r = 255; test_g = 0; test_b = 0; color_name = "red"; break;
|
||||||
|
case 10: test_r = 255; test_g = 255; test_b = 0; color_name = "yellow"; break;
|
||||||
|
case 11: test_r = 0; test_g = 255; test_b = 0; color_name = "green"; break;
|
||||||
|
case 12: test_r = 0; test_g = 255; test_b = 255; color_name = "cyan"; break;
|
||||||
|
case 13: test_r = 0; test_g = 0; test_b = 255; color_name = "blue"; break;
|
||||||
|
case 14: test_r = 255; test_g = 0; test_b = 255; color_name = "magenta"; break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fill entire frame with solid color
|
// Fill entire frame with solid color
|
||||||
|
|||||||
Reference in New Issue
Block a user