faster sys.memcpy impl

This commit is contained in:
minjaesong
2023-01-21 17:59:54 +09:00
parent 8fdde0b192
commit b60b5bada6
8 changed files with 100 additions and 47 deletions

View File

@@ -1,4 +1,5 @@
// usage: playmov moviefile.mov [/i] // usage: playmov moviefile.mov [/i]
const SND_BASE_ADDR = audio.getBaseAddr()
const interactive = exec_args[2] && exec_args[2].toLowerCase() == "/i" const interactive = exec_args[2] && exec_args[2].toLowerCase() == "/i"
const WIDTH = 560 const WIDTH = 560
const HEIGHT = 448 const HEIGHT = 448
@@ -34,6 +35,7 @@ if (!magicMatching) {
return 1 return 1
} }
let mp2Initialised = false
let width = seqread.readShort() let width = seqread.readShort()
let height = seqread.readShort() let height = seqread.readShort()
@@ -62,11 +64,6 @@ if (globalType != 255) {
printerrln(`Unsupported MOV type (${globalType})`) printerrln(`Unsupported MOV type (${globalType})`)
return 1 return 1
} }
// MP2 stuffs
let mp2context;
let samplePtrL;
let samplePtrR;
let ipfbuf = sys.malloc(FBUF_SIZE) let ipfbuf = sys.malloc(FBUF_SIZE)
graphics.setGraphicsMode(4) graphics.setGraphicsMode(4)
@@ -239,14 +236,11 @@ while (!stopPlay && seqread.getReadCount() < FILE_LENGTH) {
AUDIO_QUEUE_LENGTH += 1 AUDIO_QUEUE_LENGTH += 1
audioQueue.push(sys.malloc(AUDIO_QUEUE_BYTES)) audioQueue.push(sys.malloc(AUDIO_QUEUE_BYTES))
} }
if (mp2context === undefined) mp2context = audio.mp2Init() if (!mp2Initialised) audio.mp2Init()
if (samplePtrL === undefined) samplePtrL = sys.malloc(2304) // 16b samples
if (samplePtrR === undefined) samplePtrR = sys.malloc(2304) // 16b samples
let frame = seqread.readBytes(readLength) seqread.readBytes(readLength, SND_BASE_ADDR - 2368)
let [frameSize, samples] = audio.mp2DecodeFrame(mp2context, frame, true, samplePtrL, samplePtrR) audio.mp2Decode()
s16StTou8St(samplePtrL, samplePtrR, audioQueue[audioQueuePos++], samples) sys.memcpy(SND_BASE_ADDR - 64, audioQueue[audioQueuePos++], 2304)
sys.free(frame)
} }
// RAW PCM packets (decode on the fly) // RAW PCM packets (decode on the fly)
else if (packetType == 0x1000 || packetType == 0x1001) { else if (packetType == 0x1000 || packetType == 0x1001) {
@@ -321,8 +315,6 @@ finally {
sys.free(audioQueue[i]) sys.free(audioQueue[i])
} }
} }
if (samplePtrL !== undefined) sys.free(samplePtrL)
if (samplePtrR !== undefined) sys.free(samplePtrR)
//audio.stop(0) //audio.stop(0)
let timeTook = (endTime - startTime) / 1000000000.0 let timeTook = (endTime - startTime) / 1000000000.0

View File

@@ -196,7 +196,6 @@ try {
filebuf.readBytes(FRAME_SIZE, SND_BASE_ADDR - 2368) filebuf.readBytes(FRAME_SIZE, SND_BASE_ADDR - 2368)
audio.mp2Decode() audio.mp2Decode()
sys.waitForMemChg(SND_BASE_ADDR - 41, 255, 255)
if (audio.getPosition(0) >= QUEUE_MAX) { if (audio.getPosition(0) >= QUEUE_MAX) {
while (audio.getPosition(0) >= (QUEUE_MAX >>> 1)) { while (audio.getPosition(0) >= (QUEUE_MAX >>> 1)) {

View File

@@ -49,7 +49,7 @@ function readBytes(length, ptrToDecode) {
let ptr = (ptrToDecode === undefined) ? sys.malloc(length) : ptrToDecode let ptr = (ptrToDecode === undefined) ? sys.malloc(length) : ptrToDecode
let requiredBlocks = Math.floor((readCount + length) / 4096) - Math.floor(readCount / 4096) let requiredBlocks = Math.floor((readCount + length) / 4096) - Math.floor(readCount / 4096)
let destVector = (ptrToDecode >= 0) ? 1 : -1 let destVector = (ptr >= 0) ? 1 : -1
let completedReads = 0 let completedReads = 0

View File

@@ -174,10 +174,6 @@ From the start of the memory space:
write to this address FIRST and then write to "command" to execute the command write to this address FIRST and then write to "command" to execute the command
1134 bytes 1134 bytes
unused unused
(1920) !!PENDING FOR REMOVAL!!
mapped to font ROM
Font Mapping area holds 128 characters in consecutive order, each character is always 15 bytes.
(designer's note: it's still useful to divide the char rom to two halves, lower half being characters ROM and upper half being symbols ROM)
2 bytes 2 bytes
Cursor position in: (y*80 + x) Cursor position in: (y*80 + x)
2560 bytes 2560 bytes

View File

@@ -4,7 +4,10 @@ import kotlinx.coroutines.GlobalScope
import kotlinx.coroutines.Job import kotlinx.coroutines.Job
import kotlinx.coroutines.launch import kotlinx.coroutines.launch
import net.torvald.UnsafeHelper import net.torvald.UnsafeHelper
import net.torvald.UnsafePtr
import net.torvald.terrarum.modulecomputers.virtualcomputer.tvd.toUlong import net.torvald.terrarum.modulecomputers.virtualcomputer.tvd.toUlong
import net.torvald.tsvm.peripheral.AudioAdapter
import net.torvald.tsvm.peripheral.GraphicsAdapter
import net.torvald.tsvm.peripheral.IOSpace import net.torvald.tsvm.peripheral.IOSpace
import java.nio.charset.Charset import java.nio.charset.Charset
@@ -13,6 +16,61 @@ import java.nio.charset.Charset
*/ */
class VMJSR223Delegate(private val vm: VM) { class VMJSR223Delegate(private val vm: VM) {
private fun relPtrInDev(from: Long, len: Long, start: Int, end: Int) =
(from in start..end && (from + len) in start..end)
private fun getDev(from: Long, len: Long, isDest: Boolean): Long? {
return if (from >= 0) vm.usermem.ptr + from
// MMIO area
else if (from in -1048576..-1 && (from - len) in -1048577..-1) {
val fromIndex = (-from-1) / 131072
val dev = vm.peripheralTable[fromIndex.toInt()].peripheral ?: return null
val fromRel = (-from-1) % 131072
if (fromRel + len > 131072) return null
return if (dev is IOSpace) {
if (relPtrInDev(fromRel, len, 1024, 2047)) dev.peripheralFast.ptr + fromRel - 1024
else if (relPtrInDev(fromRel, len, 4096, 8191)) (if (isDest) dev.blockTransferTx[0] else dev.blockTransferRx[0]).ptr + fromRel - 4096
else if (relPtrInDev(fromRel, len, 8192, 12287)) (if (isDest) dev.blockTransferTx[1] else dev.blockTransferRx[1]).ptr + fromRel - 8192
else if (relPtrInDev(fromRel, len, 12288, 16383)) (if (isDest) dev.blockTransferTx[2] else dev.blockTransferRx[2]).ptr + fromRel - 12288
else if (relPtrInDev(fromRel, len, 16384, 20479)) (if (isDest) dev.blockTransferTx[3] else dev.blockTransferRx[3]).ptr + fromRel - 16384
else null
}
else if (dev is AudioAdapter) {
if (relPtrInDev(fromRel, len, 64, 2367)) dev.mediaDecodedBin.ptr + fromRel - 64
else if (relPtrInDev(fromRel, len, 2368, 4096)) dev.mediaFrameBin.ptr + fromRel - 2368
else null
}
else if (dev is GraphicsAdapter) {
if (relPtrInDev(fromRel, len, 1024, 2047)) dev.scanlineOffsets.ptr + fromRel - 1024
else if (relPtrInDev(fromRel, len, 2048, 4095)) dev.mappedFontRom.ptr + fromRel - 2048
else if (relPtrInDev(fromRel, len, 65536, 131071)) dev.instArea.ptr + fromRel - 65536
else null
}
else null
}
// memory area
else {
val fromIndex = (-from-1) / 1048576
val dev = vm.peripheralTable[fromIndex.toInt()].peripheral ?: return null
val fromRel = (-from-1) % 1048576
if (fromRel + len > 1048576) return null
return if (dev is AudioAdapter) {
if (relPtrInDev(fromRel, len, 0, 114687)) dev.sampleBin.ptr + fromRel - 0
else null
}
else if (dev is GraphicsAdapter) {
if (relPtrInDev(fromRel, len, 0, 250879)) dev.framebuffer.ptr + fromRel - 0
else if (relPtrInDev(fromRel, len, 250880, 251903)) dev.unusedArea.ptr + fromRel - 250880
else if (relPtrInDev(fromRel, len, 253950, 261631)) dev.textArea.ptr + fromRel - 253950
else if (relPtrInDev(fromRel, len, 262144, 513023)) dev.framebuffer2?.ptr?.plus(fromRel)?.minus(253950)
else null
}
else null
}
}
fun getVmId() = vm.id.toString() fun getVmId() = vm.id.toString()
fun poke(addr: Int, value: Int) = vm.poke(addr.toLong(), value.toByte()) fun poke(addr: Int, value: Int) = vm.poke(addr.toLong(), value.toByte())
@@ -21,25 +79,32 @@ class VMJSR223Delegate(private val vm: VM) {
fun malloc(size: Int) = vm.malloc(size) fun malloc(size: Int) = vm.malloc(size)
fun free(ptr: Int) = vm.free(ptr) fun free(ptr: Int) = vm.free(ptr)
fun memcpy(from: Int, to: Int, len: Int) { fun memcpy(from: Int, to: Int, len: Int) {
val from = from.toLong()
val to = to.toLong()
val len = len.toLong()
val fromVector = if (from >= 0) 1 else -1 val fromVector = if (from >= 0) 1 else -1
val toVector = if (to >= 0) 1 else -1 val toVector = if (to >= 0) 1 else -1
val len = len.toLong() val fromDev = getDev(from, len, false)
// some special cases for native memcpy val toDev = getDev(to, len, true)
val ioSpace = vm.peripheralTable[0].peripheral!! as IOSpace
// within scratchpad memory? // println("from = $from, to = $to")
if (from in 0 until 8388608 && (to + len) in 0 until 8388608) // println("fromDev = $fromDev, toDev = $toDev")
UnsafeHelper.memcpy(vm.usermem.ptr + from, vm.usermem.ptr + to, len)
// first serial read buffer -> usermem if (fromDev != null && toDev != null)
else if (from in -4097 downTo -8192 && (to + len) in 0 until 8388608) UnsafeHelper.memcpy(fromDev, toDev, len)
UnsafeHelper.memcpy(ioSpace.blockTransferRx[0].ptr + (-4097 - from), vm.usermem.ptr + to, len) else if (fromDev == null && toDev != null) {
// usermem -> first serial write buffer val buf = UnsafeHelper.allocate(len, this)
else if (from in 0 until 8388608 && (to + len) in -4097L downTo -8192L) for (i in 0 until len) buf[i] = vm.peek(from + i*fromVector)!!
UnsafeHelper.memcpy(vm.usermem.ptr + from, ioSpace.blockTransferTx[0].ptr + (-4097 - to), len) UnsafeHelper.memcpy(buf.ptr, toDev, len)
else buf.destroy()
for (i in 0 until len) { }
// println("vm.memcpy($from, $to, $len) = mem[${to + i*toVector}] <- mem[${from + i*fromVector}]") else if (fromDev != null) {
vm.poke(to + i*toVector, vm.peek(from + i*fromVector)!!) for (i in 0 until len) vm.poke(to + i*toVector, UnsafeHelper.unsafe.getByte(fromDev + i))
} }
else {
for (i in 0 until len) vm.poke(to + i*toVector, vm.peek(from + i*fromVector)!!)
}
} }
fun mapRom(slot: Int) { fun mapRom(slot: Int) {
vm.romMapping = slot.and(255) vm.romMapping = slot.and(255)
@@ -171,7 +236,7 @@ class VMJSR223Delegate(private val vm: VM) {
fun waitForMemChg(addr: Int, andMask: Int, xorMask: Int) { fun waitForMemChg(addr: Int, andMask: Int, xorMask: Int) {
while ((peek(addr) xor xorMask) and andMask == 0) { while ((peek(addr) xor xorMask) and andMask == 0) {
spin(); Thread.sleep(1L)
} }
} }
fun waitForMemChg(addr: Int, andMask: Int) = waitForMemChg(addr, andMask, 0) fun waitForMemChg(addr: Int, andMask: Int) = waitForMemChg(addr, andMask, 0)

View File

@@ -40,7 +40,7 @@ private class RenderRunnable(val playhead: AudioAdapter.Playhead) : Runnable {
Thread.sleep(12) Thread.sleep(12)
} }
else if (playhead.isPlaying && writeQueue.isEmpty) { else if (playhead.isPlaying && writeQueue.isEmpty) {
printdbg("Queue exhausted, stopping audio device...") printdbg("!! QUEUE EXHAUSTED !! QUEUE EXHAUSTED !! QUEUE EXHAUSTED !! QUEUE EXHAUSTED !! QUEUE EXHAUSTED !! QUEUE EXHAUSTED ")
// TODO: wait for 1-2 seconds then finally stop the device // TODO: wait for 1-2 seconds then finally stop the device
// playhead.audioDevice.stop() // playhead.audioDevice.stop()

View File

@@ -98,7 +98,7 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi
var framebufferScrollX = 0 var framebufferScrollX = 0
var framebufferScrollY = 0 var framebufferScrollY = 0
private var fontRomMappingMode = 0 // 0: low, 1: high private var fontRomMappingMode = 0 // 0: low, 1: high
private var mappedFontRom = ByteArray(1920) internal var mappedFontRom = UnsafeHelper.allocate(2048, this)
override var ttyFore: Int = TTY_FORE_DEFAULT // cannot be Byte override var ttyFore: Int = TTY_FORE_DEFAULT // cannot be Byte
override var ttyBack: Int = TTY_BACK_DEFAULT // cannot be Byte override var ttyBack: Int = TTY_BACK_DEFAULT // cannot be Byte
@@ -127,7 +127,7 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi
// override var halfrowMode = false // override var halfrowMode = false
private val instArea = UnsafeHelper.allocate(65536L, this) internal val instArea = UnsafeHelper.allocate(65536L, this)
override var rawCursorPos: Int override var rawCursorPos: Int
get() = textArea.getShortFree(memTextCursorPosOffset).toInt() get() = textArea.getShortFree(memTextCursorPosOffset).toInt()
@@ -210,7 +210,6 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi
} }
return when (addr) { return when (addr) {
in 0 until 250880 -> framebuffer[addr] in 0 until 250880 -> framebuffer[addr]
in 252030 until 252030+1920 -> mappedFontRom[adi- 252030]
in 250880 until 250880+1024 -> unusedArea[addr - 250880] in 250880 until 250880+1024 -> unusedArea[addr - 250880]
in 253950 until 261632 -> textArea[addr - 253950] in 253950 until 261632 -> textArea[addr - 253950]
in 261632 until 262144 -> peekPalette(adi - 261632) in 261632 until 262144 -> peekPalette(adi - 261632)
@@ -243,7 +242,6 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi
unusedArea[addr - 250880] = byte unusedArea[addr - 250880] = byte
runCommand(byte) runCommand(byte)
} }
in 252030 until 252030+1920 -> mappedFontRom[adi- 252030] = byte
in 250880 until 250880+1024 -> unusedArea[addr - 250880] = byte in 250880 until 250880+1024 -> unusedArea[addr - 250880] = byte
in 253950 until 261632 -> textArea[addr - 253950] = byte in 253950 until 261632 -> textArea[addr - 253950] = byte
in 261632 until 262144 -> pokePalette(adi - 261632, byte) in 261632 until 262144 -> pokePalette(adi - 261632, byte)
@@ -296,8 +294,9 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi
20L -> drawCallProgramCounter.and(255).toByte() 20L -> drawCallProgramCounter.and(255).toByte()
21L -> drawCallProgramCounter.ushr(8).and(255).toByte() 21L -> drawCallProgramCounter.ushr(8).and(255).toByte()
in 1024L..2047L -> scanlineOffsets[addr - 1024] in 1024L..2047L -> scanlineOffsets[addr - 1024]
in 2048L..4095L -> mappedFontRom[addr - 2048]
in 65536L..131071L -> instArea[addr - 65536] in 65536L..131071L -> instArea[addr - 65536]
@@ -323,6 +322,7 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi
19L -> { if (bi != 0) compileAndRunDrawCalls() } 19L -> { if (bi != 0) compileAndRunDrawCalls() }
in 1024L..2047L -> { scanlineOffsets[addr - 1024] = byte } in 1024L..2047L -> { scanlineOffsets[addr - 1024] = byte }
in 2048L..4095L -> { mappedFontRom[addr - 2048] = byte }
in 65536L..131071L -> instArea[addr - 65536] = byte in 65536L..131071L -> instArea[addr - 65536] = byte
@@ -596,7 +596,7 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi
val pixel = (scanline[bm] < 0).toInt() val pixel = (scanline[bm] < 0).toInt()
word = word or (pixel shl (scanline.size - 1 - bm)) word = word or (pixel shl (scanline.size - 1 - bm))
} }
mappedFontRom[char * ch + line] = word.toByte() mappedFontRom[char.toLong() * ch + line] = word.toByte()
} }
} }
@@ -622,7 +622,7 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi
val px = (char % 16) * cw; val py = (char / 16) * ch val px = (char % 16) * cw; val py = (char / 16) * ch
val off = dataOffset + (py * 16 * cw) + px val off = dataOffset + (py * 16 * cw) + px
for (line in 0 until ch) { for (line in 0 until ch) {
val word = mappedFontRom[char * ch + line].toInt() val word = mappedFontRom[char.toLong() * ch + line].toInt()
for (bm in 0 until scanline.size) { for (bm in 0 until scanline.size) {
val pixel = 255 * ((word shr (cw - 1 - bm)) and 1) val pixel = 255 * ((word shr (cw - 1 - bm)) and 1)
scanline[bm] = pixel.toByte() scanline[bm] = pixel.toByte()
@@ -974,6 +974,7 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi
unusedArea.destroy() unusedArea.destroy()
scanlineOffsets.destroy() scanlineOffsets.destroy()
instArea.destroy() instArea.destroy()
mappedFontRom.destroy()
} }
private var textCursorBlinkTimer = 0f private var textCursorBlinkTimer = 0f

View File

@@ -40,7 +40,7 @@ class IOSpace(val vm: VM) : PeriBase("io"), InputProcessor {
) )
/*private*/ val blockTransferPorts = Array(4) { BlockTransferPort(vm, it) } /*private*/ val blockTransferPorts = Array(4) { BlockTransferPort(vm, it) }
private val peripheralFast = UnsafeHelper.allocate(1024, this) internal val peripheralFast = UnsafeHelper.allocate(1024, this)
private val keyEventBuffers = ByteArray(8) private val keyEventBuffers = ByteArray(8)