first working (and crappy) video delta encoding

This commit is contained in:
minjaesong
2025-04-18 21:00:46 +09:00
parent 4c4f24be37
commit d9af28eed2
5 changed files with 398 additions and 105 deletions

View File

@@ -0,0 +1,242 @@
// some manual configurations
//
let IPFMODE = 3 // 1 or 2
let TOTAL_FRAMES = 800
let FPS = 24 // must be integer
let WIDTH = 560
let HEIGHT = 448
let PATHFUN = (i) => `C:/welkom/${(''+i).padStart(4,'0')}.png` // how can be the image file found, if a frame number (starts from 1) were given
let AUDIOTRACK = 'C:/welkom.pcm'
let AUDIOFORMAT = 'PCMu8' // undefined or PCMu8 or MP2fr
// to export video to its frames (with automatic scaling and cropping):
// ffmpeg -i file.mp4 -vf scale=560:448:force_original_aspect_ratio=increase,crop=560:448 file/%05d.png
//
// to convert audio to MP2:
// ffmpeg -i file.mp4 -acodec libtwolame -psymodel 4 -b:a <rate>k -ar 32000 output.mp2
//
// end of manual configuration
let MP2_RATE_INDEX;
let MP2_PACKETSIZE;
const DECODE_TIME_FACTOR = 1.000
let outfilename = exec_args[1]
if (!outfilename) {
println("Usage: encodemov2 <outfile>")
return 1
}
const FBUF_SIZE = WIDTH * HEIGHT
let infile = sys.malloc(512000) // somewhat arbitrary
let imagearea = sys.malloc(FBUF_SIZE*3)
let decodearea = sys.malloc(FBUF_SIZE)
let ipfarea1 = sys.malloc(FBUF_SIZE)
let ipfarea2 = sys.malloc(FBUF_SIZE)
let ipfDelta = sys.malloc(FBUF_SIZE)
let gzippedImage = sys.malloc(512000) // somewhat arbitrary
let outfile = files.open(_G.shell.resolvePathInput(outfilename).full)
function appendToOutfile(bytes) {
outfile.bappend(bytes)
}
function appendToOutfilePtr(ptr, len) {
outfile.pappend(ptr, len)
}
function audioFormatToAudioPacketType() {
return ("PCMu8" == AUDIOFORMAT) ? [1, 16]
: ("MP2fr" == AUDIOFORMAT) ? [255, 17]
: [255, 16]
}
const videoPacketType = [4, (IPFMODE - 1)]
const videoPacketTypeIPF1 = [4, (1 - 1)]
const syncPacket = [255, 255]
const AUDIO_SAMPLE_SIZE = 2 * (((32000 / FPS) + 1)|0) // times 2 because stereo
const AUDIO_BLOCK_SIZE = ("MP2fr" == AUDIOFORMAT) ? 0x240 : 0
const AUDIO_QUEUE_SIZE = ("MP2fr" == AUDIOFORMAT) ? Math.ceil(AUDIO_SAMPLE_SIZE / (2304 * DECODE_TIME_FACTOR)) + 1 : 0
// write header to the file
let headerBytes = [
0x1F, 0x54, 0x53, 0x56, 0x4D, 0x4D, 0x4F, 0x56, // magic
WIDTH & 255, (WIDTH >> 8) & 255, // width
HEIGHT & 255, (HEIGHT >> 8) & 255, // height
FPS & 255, (FPS >> 8) & 255, // FPS
TOTAL_FRAMES & 255, (TOTAL_FRAMES >> 8) & 255, (TOTAL_FRAMES >> 16) & 255, (TOTAL_FRAMES >> 24) & 255, // frame count
0xFF, 0x00, // new standard deprecates global type
AUDIO_BLOCK_SIZE & 255, (AUDIO_BLOCK_SIZE >>> 8) | (AUDIO_QUEUE_SIZE << 4),
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 // reserved
]
if (IPFMODE != 3) throw Error("Unknown IPF mode "+IPFMODE)
let audioBytesRead = 0
const audioFile = (AUDIOTRACK) ? files.open(_G.shell.resolvePathInput(AUDIOTRACK).full) : undefined
let audioRemaining = (audioFile) ? audioFile.size : 0
const audioPacketType = audioFormatToAudioPacketType()
outfile.bwrite(headerBytes)
function getRepeatCount(fnum) {
if ("PCMu8" == AUDIOFORMAT) {
return (fnum == 1) ? 2 : 1
}
else if ("MP2fr" == AUDIOFORMAT) {
let r = Math.ceil((AUDIO_SAMPLE_SIZE - audioSamplesWrote) / AUDIO_SAMPLE_SIZE) * ((fnum == 1) ? 2 : 1)
return (fnum == 2) ? 1 : (fnum > TOTAL_FRAMES) ? Math.ceil(audioRemaining / MP2_PACKETSIZE) : r
}
}
function mp2PacketSizeToRateIndex(packetSize, isMono) {
let r = (144 == packetSize) ? 0
: (216 == packetSize) ? 2
: (252 == packetSize) ? 4
: (288 == packetSize) ? 6
: (360 == packetSize) ? 8
: (432 == packetSize) ? 10
: (504 == packetSize) ? 12
: (576 == packetSize) ? 14
: (720 == packetSize) ? 16
: (864 == packetSize) ? 18
: (1008 == packetSize) ? 20
: (1152 == packetSize) ? 22
: (1440 == packetSize) ? 24
: (1728 == packetSize) ? 26 : undefined
if (r === undefined) throw Error("Unknown MP2 Packet Size: "+packetSize)
return r + isMono
}
let ipfAreaOld = ipfarea2
let ipfAreaNew = ipfarea1
let audioSamplesWrote = 0
for (let f = 1; ; f++) {
// insert sync packet
if (f > 1) appendToOutfile(syncPacket)
// insert audio track, if any
if (audioRemaining > 0) {
// first frame gets two audio packets
let rrrr = getRepeatCount(f) // must be called only once
for (let q = 0; q < rrrr; q++) {
print(`Frame ${f}/${TOTAL_FRAMES} (${AUDIOFORMAT}) ->`)
serial.print(`Frame ${f}/${TOTAL_FRAMES} (${AUDIOFORMAT}) ->`)
// read a chunk/mpeg-frame
let actualBytesToRead;
if ("PCMu8" == AUDIOFORMAT) {
actualBytesToRead = Math.min(
(f % 2 == 1) ? AUDIO_SAMPLE_SIZE : AUDIO_SAMPLE_SIZE + 2,
audioRemaining
)
audioFile.pread(infile, actualBytesToRead, audioBytesRead)
}
else if ("MP2fr" == AUDIOFORMAT) {
if (!MP2_PACKETSIZE) {
audioFile.pread(infile, 3, 0)
MP2_PACKETSIZE = audio.mp2GetInitialFrameSize([sys.peek(infile),sys.peek(infile+1),sys.peek(infile+2)])
audioPacketType[0] = mp2PacketSizeToRateIndex(MP2_PACKETSIZE, sys.peek(infile+4) >> 6 == 3)
}
actualBytesToRead = Math.min(MP2_PACKETSIZE, audioRemaining)
audioFile.pread(infile, actualBytesToRead, audioBytesRead)
if (f > 1) audioSamplesWrote += 2304 / DECODE_TIME_FACTOR // a little hack to ensure first 2 or so frames get more MP2 frames than they should
}
else if (AUDIOFORMAT !== undefined) throw Error("Unknown audio format: " + AUDIOFORMAT)
// writeout
let audioSize = [
(actualBytesToRead >>> 0) & 255,
(actualBytesToRead >>> 8) & 255,
(actualBytesToRead >>> 16) & 255,
(actualBytesToRead >>> 24) & 255
]
appendToOutfile(audioPacketType)
if ("MP2fr" != AUDIOFORMAT) appendToOutfile(audioSize);
appendToOutfilePtr(infile, actualBytesToRead)
print(` ${actualBytesToRead} bytes\n`)
serial.print(` ${actualBytesToRead} bytes\n`)
audioBytesRead += actualBytesToRead
audioRemaining -= actualBytesToRead
}
}
// insert video frame
if (f <= TOTAL_FRAMES) {
let fname = PATHFUN(f)
let framefile = files.open(_G.shell.resolvePathInput(fname).full)
let fileLen = framefile.size
framefile.pread(infile, fileLen)
let [_1, _2, channels, _3] = graphics.decodeImageTo(infile, fileLen, imagearea)
print(`Frame ${f}/${TOTAL_FRAMES} (Ch: ${channels}) ->`)
serial.print(`Frame ${f}/${TOTAL_FRAMES} (Ch: ${channels}) ->`)
// graphics.imageToDisplayableFormat(imagearea, decodearea, 560, 448, 3, 1)
graphics.encodeIpf1(imagearea, ipfAreaNew, WIDTH, HEIGHT, channels, false, 0)
// get the difference map
let patchEncodedSize = graphics.encodeIpf1d(ipfAreaOld, ipfAreaNew, ipfDelta, WIDTH, HEIGHT)
if (f < 2 || patchEncodedSize > WIDTH * HEIGHT * 0.90) patchEncodedSize = 0
// decide whether or not the patch encoding should be used
let gzlen = gzip.compFromTo(
(patchEncodedSize) ? ipfDelta : ipfAreaNew,
patchEncodedSize || FBUF_SIZE,
gzippedImage
)
let frameSize = [
(gzlen >>> 0) & 255,
(gzlen >>> 8) & 255,
(gzlen >>> 16) & 255,
(gzlen >>> 24) & 255
]
appendToOutfile(patchEncodedSize ? videoPacketType : videoPacketTypeIPF1)
appendToOutfile(frameSize)
appendToOutfilePtr(gzippedImage, gzlen)
if (patchEncodedSize) {
print(` ${gzlen} bytes (${100*(patchEncodedSize / (WIDTH * HEIGHT))} %)\n`)
serial.print(` ${gzlen} bytes (${100*(patchEncodedSize / (WIDTH * HEIGHT))} %)\n`)
}
else {
print(` ${gzlen} bytes\n`)
serial.print(` ${gzlen} bytes\n`)
}
audioSamplesWrote -= AUDIO_SAMPLE_SIZE
// swap two pointers
let t = ipfAreaOld
ipfAreaOld = ipfAreaNew
ipfAreaNew = t
}
// if there is no video and audio remaining, exit the loop
if (f > TOTAL_FRAMES && audioRemaining <= 0) break
}
sys.free(infile)
sys.free(imagearea)
sys.free(decodearea)
sys.free(ipfarea1)
sys.free(ipfarea2)
sys.free(ipfDelta)
sys.free(gzippedImage)

View File

@@ -122,6 +122,8 @@ while (!stopPlay && seqread.getReadCount() < FILE_LENGTH) {
frameUnit += 1
}
if (frameUnit > 1) frameUnit = 1 // comment to enable frameskip
if (frameUnit != 0) {
// skip frames if necessary
while (!stopPlay && frameUnit >= 1 && seqread.getReadCount() < FILE_LENGTH) {
@@ -210,6 +212,62 @@ while (!stopPlay && seqread.getReadCount() < FILE_LENGTH) {
sys.free(gzippedPtr)
}
// iPF1d
else if (packetType == 516) {
let payloadLen = seqread.readInt()
if (framesRead >= FRAME_COUNT) {
break renderLoop
}
framesRead += 1
let gzippedPtr = seqread.readBytes(payloadLen)
framesRendered += 1
if (frameUnit == 1) {
gzip.decompFromTo(gzippedPtr, payloadLen, ipfbuf) // should return FBUF_SIZE
graphics.applyIpf1d(ipfbuf, -1048577, -1310721, width, height)
// defer audio playback until a first frame is sent
if (!audioFired) {
audio.play(0)
audioFired = true
}
// calculate bgcolour from the edges of the screen
if (AUTO_BGCOLOUR_CHANGE) {
let samples = []
for (let x = 8; x < 560; x+=32) {
samples.push(getRGBfromScr(x, 3))
samples.push(getRGBfromScr(x, 445))
}
for (let y = 29; y < 448; y+=26) {
samples.push(getRGBfromScr(8, y))
samples.push(getRGBfromScr(552, y))
}
let out = [0.0, 0.0, 0.0]
samples.forEach(rgb=>{
out[0] += rgb[0]
out[1] += rgb[1]
out[2] += rgb[2]
})
out[0] = out[0] / samples.length / 2.0 // darken a bit
out[1] = out[1] / samples.length / 2.0
out[2] = out[2] / samples.length / 2.0
let bgr = (oldBgcol[0]*5 + out[0]) / 6.0
let bgg = (oldBgcol[1]*5 + out[1]) / 6.0
let bgb = (oldBgcol[2]*5 + out[2]) / 6.0
oldBgcol = [bgr, bgg, bgb]
graphics.setBackground(Math.round(bgr * 255), Math.round(bgg * 255), Math.round(bgb * 255))
}
}
sys.free(gzippedPtr)
}
else {
throw Error(`Unknown Video Packet with type ${packetType} at offset ${seqread.getReadCount() - 2}`)
}

View File

@@ -603,18 +603,14 @@ iPF2:
iPF1-delta (for video encoding):
Delta encoded frames contain "insutructions" for delta-encoding the existing frame.
Or, a collection of [OPCODE | PAYLOAD] pairs
Delta encoded frames contain "insutructions" for patch-encoding the existing frame.
Or, a collection of [StateChangeCode] [Optional VarInts] [Payload...] pairs
Opcode:
0x00 : Skip N blocks
payload: (varint) number of 4x4 blocks
0x10 : Patch
payload: (12 bytes) encoded delta block
0x20 : Repeat
payload: (varint) repeat last delta N times
0xF0 : End of delta stream
payload: none
States:
0x00 SKIP [varint skipCount]
0x01 PATCH [varint blockCount] [12x blockCount bytes]
0x02 REPEAT [varint repeatCount] [a block]
0xFF END
Sample stream:
[SKIP 10] [PATCH A] [REPEAT 3] [SKIP 5] [PATCH B] [END]

View File

@@ -6,6 +6,7 @@ import net.torvald.UnsafeHelper
import net.torvald.terrarum.modulecomputers.virtualcomputer.tvd.toUint
import net.torvald.tsvm.peripheral.GraphicsAdapter
import net.torvald.tsvm.peripheral.fmod
import kotlin.math.abs
import kotlin.math.roundToInt
class GraphicsJSR223Delegate(private val vm: VM) {
@@ -627,7 +628,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
// TODO oob-check
val ox = blockX * 4 + px
val oy = blockY * 4 + py
val t = bayerKernels[pattern % bayerKernels.size][4 * (py % 4) + (px % 4)]
val t = if (pattern < 0) 0f else bayerKernels[pattern % bayerKernels.size][4 * (py % 4) + (px % 4)]
val offset = channels * (oy * width + ox)
val r0 = vm.peek(srcPtr + offset+0L)!!.toUint() / 255f
@@ -708,107 +709,92 @@ class GraphicsJSR223Delegate(private val vm: VM) {
* @return non-zero if delta-encoded, 0 if delta encoding is worthless
*/
fun encodeIpf1d(
prevIPFptr: Int, // full iPF picture frame for t minus one
newIPFptr: Int, // full iPF picture frame for t equals zero
currentFrame: Int, // where to write delta-encoded payloads to. Not touched if delta-encoding is worthless
previousPtr: Int, // full iPF picture frame for t minus one
currentPtr: Int, // full iPF picture frame for t equals zero
outPtr: Int, // where to write delta-encoded payloads to. Not touched if delta-encoding is worthless
width: Int, height: Int,
inefficiencyThreshold: Double = 0.90
): Int {
val frameSize = width * height
val BLOCK_SIZE = 12
val totalBlocks = frameSize / BLOCK_SIZE
var skipCount = 0
var repeatCount = 0
val temp = ByteArray(frameSize * 2) // Overallocate
var tempPtr = 0
var outOffset = outPtr.toLong()
val blockSize = 12
val blocksPerRow = ceil(width / 4f).toInt()
val blocksPerCol = ceil(height / 4f).toInt()
val tempBlockA = ByteArray(blockSize)
val tempBlockB = ByteArray(blockSize)
var lastDelta = ByteArray(BLOCK_SIZE)
fun readBlock(ptr: Int): ByteArray {
return ByteArray(BLOCK_SIZE) { i -> vm.peek(ptr.toLong() + i)!!.toByte() }
}
// MSB is a "continuation flag"; varint decoding terminates when it sees byte with no MSB set
fun writeVarInt(buf: ByteArray, start: Int, value: Int): Int {
var v = value
var i = 0
while (v >= 0x80) {
buf[start + i] = ((v and 0x7F) or 0x80).toByte()
v = v ushr 7
i++
fun writeVarInt(n: Int) {
var value = n
while (true) {
val part = value and 0x7F
value = value ushr 7
vm.poke(outOffset++, (if (value > 0) (part or 0x80) else part).toByte())
if (value == 0) break
}
buf[start + i] = v.toByte()
return i + 1
}
fun flushSkips() {
if (skipCount > 0) {
temp[tempPtr++] = 0x00
tempPtr += writeVarInt(temp, tempPtr, skipCount)
for (blockIndex in 0 until (blocksPerRow * blocksPerCol)) {
val offsetA = previousPtr.toLong() + blockIndex * blockSize
val offsetB = currentPtr.toLong() + blockIndex * blockSize
for (i in 0 until blockSize) {
tempBlockA[i] = vm.peek(offsetA + i)!!
tempBlockB[i] = vm.peek(offsetB + i)!!
}
if (isSignificantlyDifferent(tempBlockA, tempBlockB)) {
// [skip payload]
if (skipCount > 0) {
vm.poke(outOffset++, SKIP)
writeVarInt(skipCount)
}
skipCount = 0
// [block payload]
vm.poke(outOffset++, PATCH)
for (i in 0 until blockSize) {
vm.poke(outOffset++, tempBlockB[i])
}
}
}
fun flushRepeats() {
if (repeatCount > 0) {
temp[tempPtr++] = 0x20
tempPtr += writeVarInt(temp, tempPtr, repeatCount)
repeatCount = 0
}
}
for (blockIndex in 0 until totalBlocks) {
val offset = blockIndex * BLOCK_SIZE
val prevBlock = readBlock(prevIPFptr + offset)
val currBlock = readBlock(newIPFptr + offset)
val diff = isSignificantlyDifferent(prevBlock, currBlock)
if (!diff) {
if (repeatCount > 0) flushRepeats()
else {
skipCount++
} else if (lastDelta.contentEquals(currBlock)) {
flushSkips()
repeatCount++
} else {
flushSkips()
flushRepeats()
temp[tempPtr++] = 0x10
currBlock.copyInto(temp, tempPtr)
tempPtr += BLOCK_SIZE
lastDelta = currBlock
}
}
flushSkips()
flushRepeats()
temp[tempPtr++] = 0xF0.toByte()
vm.poke(outOffset++, -1)
if (tempPtr >= (frameSize * inefficiencyThreshold).toInt()) {
return 0 // delta is inefficient, do not write
}
// Write delta to memory
if (currentFrame >= 0) {
UnsafeHelper.memcpyRaw(temp, UnsafeHelper.getArrayOffset(temp), null, vm.usermem.ptr + currentFrame, tempPtr.toLong())
}
else {
for (i in 0 until tempPtr) {
vm.poke(currentFrame.toLong() + i, temp[i])
}
}
return tempPtr
return (outOffset - outPtr).toInt()
}
private fun isSignificantlyDifferent(a: ByteArray, b: ByteArray, threshold: Int = 5): Boolean {
var total = 0
for (i in a.indices) {
total += kotlin.math.abs((a[i].toInt() and 0xFF) - (b[i].toInt() and 0xFF))
private fun isSignificantlyDifferent(a: ByteArray, b: ByteArray): Boolean {
var score = 0
// Co (bytes 01): 4 nybbles
val coA = (a[0].toInt() and 0xFF) or ((a[1].toInt() and 0xFF) shl 8)
val coB = (b[0].toInt() and 0xFF) or ((b[1].toInt() and 0xFF) shl 8)
for (i in 0 until 4) {
val delta = abs((coA shr (i * 4) and 0xF) - (coB shr (i * 4) and 0xF))
score += delta * 3
}
return total > threshold
// Cg (bytes 23): 4 nybbles
val cgA = (a[2].toInt() and 0xFF) or ((a[3].toInt() and 0xFF) shl 8)
val cgB = (b[2].toInt() and 0xFF) or ((b[3].toInt() and 0xFF) shl 8)
for (i in 0 until 4) {
val delta = abs((cgA shr (i * 4) and 0xF) - (cgB shr (i * 4) and 0xF))
score += delta * 3
}
// Y (bytes 49): 16 nybbles
for (i in 4 until 10) {
val byteA = a[i].toInt() and 0xFF
val byteB = b[i].toInt() and 0xFF
val highDelta = abs((byteA shr 4) - (byteB shr 4))
val lowDelta = abs((byteA and 0xF) - (byteB and 0xF))
score += highDelta * 2
score += lowDelta * 2
}
return score > 0
}
fun encodeIpf2(srcPtr: Int, destPtr: Int, width: Int, height: Int, channels: Int, hasAlpha: Boolean, pattern: Int) {
@@ -1043,16 +1029,18 @@ class GraphicsJSR223Delegate(private val vm: VM) {
}
fun applyIpf1d(ipf1DeltaPtr: Int, destRG: Int, destBA: Int, width: Int, height: Int) {
val BLOCK_SIZE = 12
val blocksPerRow = (width + 3) / 4
val totalBlocks = ((width + 3) / 4) * ((height + 3) / 4)
val gpu = getFirstGPU()
val sign = if (destRG >= 0) 1 else -1
if (destRG * destBA < 0) throw IllegalArgumentException("Both destination memories must be on the same domain")
var ptr = ipf1DeltaPtr.toLong()
var blockIndex = 0
fun readByte(): Int = (vm.peek(ptr++)!!.toInt() and 0xFF)
fun readByte(): Int = vm.peek(ptr++)!!.toInt() and 0xFF
fun readShort(): Int {
val low = readByte()
val high = readByte()
@@ -1072,13 +1060,14 @@ class GraphicsJSR223Delegate(private val vm: VM) {
}
while (true) {
val opcode = readByte()
val opcode = readByte().toByte()
when (opcode) {
0x00 -> { // Skip blocks
SKIP -> { // Skip blocks
val count = readVarInt()
blockIndex += count
}
0x10 -> { // Write literal patch
PATCH -> { // Write literal patch
if (blockIndex >= totalBlocks) break
val co = readShort()
@@ -1133,12 +1122,16 @@ class GraphicsJSR223Delegate(private val vm: VM) {
blockIndex++
}
0x20 -> { // Repeat last literal
REPEAT -> { // Repeat last literal
val repeatCount = readVarInt()
// Just skip applying. We assume previous patch was already applied visually.
blockIndex += repeatCount
repeat(repeatCount) {
// Just skip applying. We assume previous patch was already applied visually.
blockIndex++
}
}
0xF0 -> return // End of stream
END -> return // End of stream
else -> error("Unknown delta opcode: ${opcode.toString(16)}")
}
}
@@ -1215,4 +1208,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
}}
}
private val SKIP = 0x00.toByte()
private val PATCH = 0x01.toByte()
private val REPEAT = 0x02.toByte()
private val END = 0xFF.toByte()
}

View File

@@ -260,7 +260,6 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi
protected var slpcnt = 0L
open fun applyDelay() {
applyDelay0()
}
protected fun applyDelay0() {