From b60b5bada63d11e94110f0feb0bff5833894a8ff Mon Sep 17 00:00:00 2001 From: minjaesong Date: Sat, 21 Jan 2023 17:59:54 +0900 Subject: [PATCH] faster sys.memcpy impl --- assets/disk0/tvdos/bin/playmov.js | 20 ++-- assets/disk0/tvdos/bin/playmp2.js | 1 - assets/disk0/tvdos/include/seqread.js | 2 +- terranmon.txt | 4 - .../src/net/torvald/tsvm/VMJSR223Delegate.kt | 101 ++++++++++++++---- .../torvald/tsvm/peripheral/AudioAdapter.kt | 2 +- .../tsvm/peripheral/GraphicsAdapter.kt | 15 +-- .../net/torvald/tsvm/peripheral/IOSpace.kt | 2 +- 8 files changed, 100 insertions(+), 47 deletions(-) diff --git a/assets/disk0/tvdos/bin/playmov.js b/assets/disk0/tvdos/bin/playmov.js index 383fd6f..1176267 100644 --- a/assets/disk0/tvdos/bin/playmov.js +++ b/assets/disk0/tvdos/bin/playmov.js @@ -1,4 +1,5 @@ // usage: playmov moviefile.mov [/i] +const SND_BASE_ADDR = audio.getBaseAddr() const interactive = exec_args[2] && exec_args[2].toLowerCase() == "/i" const WIDTH = 560 const HEIGHT = 448 @@ -34,6 +35,7 @@ if (!magicMatching) { return 1 } +let mp2Initialised = false let width = seqread.readShort() let height = seqread.readShort() @@ -62,11 +64,6 @@ if (globalType != 255) { printerrln(`Unsupported MOV type (${globalType})`) return 1 } -// MP2 stuffs -let mp2context; -let samplePtrL; -let samplePtrR; - let ipfbuf = sys.malloc(FBUF_SIZE) graphics.setGraphicsMode(4) @@ -239,14 +236,11 @@ while (!stopPlay && seqread.getReadCount() < FILE_LENGTH) { AUDIO_QUEUE_LENGTH += 1 audioQueue.push(sys.malloc(AUDIO_QUEUE_BYTES)) } - if (mp2context === undefined) mp2context = audio.mp2Init() - if (samplePtrL === undefined) samplePtrL = sys.malloc(2304) // 16b samples - if (samplePtrR === undefined) samplePtrR = sys.malloc(2304) // 16b samples + if (!mp2Initialised) audio.mp2Init() - let frame = seqread.readBytes(readLength) - let [frameSize, samples] = audio.mp2DecodeFrame(mp2context, frame, true, samplePtrL, samplePtrR) - s16StTou8St(samplePtrL, samplePtrR, audioQueue[audioQueuePos++], samples) - sys.free(frame) + seqread.readBytes(readLength, SND_BASE_ADDR - 2368) + audio.mp2Decode() + sys.memcpy(SND_BASE_ADDR - 64, audioQueue[audioQueuePos++], 2304) } // RAW PCM packets (decode on the fly) else if (packetType == 0x1000 || packetType == 0x1001) { @@ -321,8 +315,6 @@ finally { sys.free(audioQueue[i]) } } - if (samplePtrL !== undefined) sys.free(samplePtrL) - if (samplePtrR !== undefined) sys.free(samplePtrR) //audio.stop(0) let timeTook = (endTime - startTime) / 1000000000.0 diff --git a/assets/disk0/tvdos/bin/playmp2.js b/assets/disk0/tvdos/bin/playmp2.js index 36555e2..82b0fd0 100644 --- a/assets/disk0/tvdos/bin/playmp2.js +++ b/assets/disk0/tvdos/bin/playmp2.js @@ -196,7 +196,6 @@ try { filebuf.readBytes(FRAME_SIZE, SND_BASE_ADDR - 2368) audio.mp2Decode() - sys.waitForMemChg(SND_BASE_ADDR - 41, 255, 255) if (audio.getPosition(0) >= QUEUE_MAX) { while (audio.getPosition(0) >= (QUEUE_MAX >>> 1)) { diff --git a/assets/disk0/tvdos/include/seqread.js b/assets/disk0/tvdos/include/seqread.js index 28799e8..c8eb3c2 100644 --- a/assets/disk0/tvdos/include/seqread.js +++ b/assets/disk0/tvdos/include/seqread.js @@ -49,7 +49,7 @@ function readBytes(length, ptrToDecode) { let ptr = (ptrToDecode === undefined) ? sys.malloc(length) : ptrToDecode let requiredBlocks = Math.floor((readCount + length) / 4096) - Math.floor(readCount / 4096) - let destVector = (ptrToDecode >= 0) ? 1 : -1 + let destVector = (ptr >= 0) ? 1 : -1 let completedReads = 0 diff --git a/terranmon.txt b/terranmon.txt index 5ff24a8..174e473 100644 --- a/terranmon.txt +++ b/terranmon.txt @@ -174,10 +174,6 @@ From the start of the memory space: write to this address FIRST and then write to "command" to execute the command 1134 bytes unused -(1920) !!PENDING FOR REMOVAL!! - mapped to font ROM - Font Mapping area holds 128 characters in consecutive order, each character is always 15 bytes. - (designer's note: it's still useful to divide the char rom to two halves, lower half being characters ROM and upper half being symbols ROM) 2 bytes Cursor position in: (y*80 + x) 2560 bytes diff --git a/tsvm_core/src/net/torvald/tsvm/VMJSR223Delegate.kt b/tsvm_core/src/net/torvald/tsvm/VMJSR223Delegate.kt index c47b0a5..2a4bdb4 100644 --- a/tsvm_core/src/net/torvald/tsvm/VMJSR223Delegate.kt +++ b/tsvm_core/src/net/torvald/tsvm/VMJSR223Delegate.kt @@ -4,7 +4,10 @@ import kotlinx.coroutines.GlobalScope import kotlinx.coroutines.Job import kotlinx.coroutines.launch import net.torvald.UnsafeHelper +import net.torvald.UnsafePtr import net.torvald.terrarum.modulecomputers.virtualcomputer.tvd.toUlong +import net.torvald.tsvm.peripheral.AudioAdapter +import net.torvald.tsvm.peripheral.GraphicsAdapter import net.torvald.tsvm.peripheral.IOSpace import java.nio.charset.Charset @@ -13,6 +16,61 @@ import java.nio.charset.Charset */ class VMJSR223Delegate(private val vm: VM) { + private fun relPtrInDev(from: Long, len: Long, start: Int, end: Int) = + (from in start..end && (from + len) in start..end) + + private fun getDev(from: Long, len: Long, isDest: Boolean): Long? { + return if (from >= 0) vm.usermem.ptr + from + // MMIO area + else if (from in -1048576..-1 && (from - len) in -1048577..-1) { + val fromIndex = (-from-1) / 131072 + val dev = vm.peripheralTable[fromIndex.toInt()].peripheral ?: return null + val fromRel = (-from-1) % 131072 + if (fromRel + len > 131072) return null + + return if (dev is IOSpace) { + if (relPtrInDev(fromRel, len, 1024, 2047)) dev.peripheralFast.ptr + fromRel - 1024 + else if (relPtrInDev(fromRel, len, 4096, 8191)) (if (isDest) dev.blockTransferTx[0] else dev.blockTransferRx[0]).ptr + fromRel - 4096 + else if (relPtrInDev(fromRel, len, 8192, 12287)) (if (isDest) dev.blockTransferTx[1] else dev.blockTransferRx[1]).ptr + fromRel - 8192 + else if (relPtrInDev(fromRel, len, 12288, 16383)) (if (isDest) dev.blockTransferTx[2] else dev.blockTransferRx[2]).ptr + fromRel - 12288 + else if (relPtrInDev(fromRel, len, 16384, 20479)) (if (isDest) dev.blockTransferTx[3] else dev.blockTransferRx[3]).ptr + fromRel - 16384 + else null + } + else if (dev is AudioAdapter) { + if (relPtrInDev(fromRel, len, 64, 2367)) dev.mediaDecodedBin.ptr + fromRel - 64 + else if (relPtrInDev(fromRel, len, 2368, 4096)) dev.mediaFrameBin.ptr + fromRel - 2368 + else null + } + else if (dev is GraphicsAdapter) { + if (relPtrInDev(fromRel, len, 1024, 2047)) dev.scanlineOffsets.ptr + fromRel - 1024 + else if (relPtrInDev(fromRel, len, 2048, 4095)) dev.mappedFontRom.ptr + fromRel - 2048 + else if (relPtrInDev(fromRel, len, 65536, 131071)) dev.instArea.ptr + fromRel - 65536 + else null + } + else null + } + // memory area + else { + val fromIndex = (-from-1) / 1048576 + val dev = vm.peripheralTable[fromIndex.toInt()].peripheral ?: return null + val fromRel = (-from-1) % 1048576 + if (fromRel + len > 1048576) return null + + return if (dev is AudioAdapter) { + if (relPtrInDev(fromRel, len, 0, 114687)) dev.sampleBin.ptr + fromRel - 0 + else null + } + else if (dev is GraphicsAdapter) { + if (relPtrInDev(fromRel, len, 0, 250879)) dev.framebuffer.ptr + fromRel - 0 + else if (relPtrInDev(fromRel, len, 250880, 251903)) dev.unusedArea.ptr + fromRel - 250880 + else if (relPtrInDev(fromRel, len, 253950, 261631)) dev.textArea.ptr + fromRel - 253950 + else if (relPtrInDev(fromRel, len, 262144, 513023)) dev.framebuffer2?.ptr?.plus(fromRel)?.minus(253950) + else null + } + else null + } + } + fun getVmId() = vm.id.toString() fun poke(addr: Int, value: Int) = vm.poke(addr.toLong(), value.toByte()) @@ -21,25 +79,32 @@ class VMJSR223Delegate(private val vm: VM) { fun malloc(size: Int) = vm.malloc(size) fun free(ptr: Int) = vm.free(ptr) fun memcpy(from: Int, to: Int, len: Int) { + val from = from.toLong() + val to = to.toLong() + val len = len.toLong() + val fromVector = if (from >= 0) 1 else -1 val toVector = if (to >= 0) 1 else -1 - val len = len.toLong() - // some special cases for native memcpy - val ioSpace = vm.peripheralTable[0].peripheral!! as IOSpace - // within scratchpad memory? - if (from in 0 until 8388608 && (to + len) in 0 until 8388608) - UnsafeHelper.memcpy(vm.usermem.ptr + from, vm.usermem.ptr + to, len) - // first serial read buffer -> usermem - else if (from in -4097 downTo -8192 && (to + len) in 0 until 8388608) - UnsafeHelper.memcpy(ioSpace.blockTransferRx[0].ptr + (-4097 - from), vm.usermem.ptr + to, len) - // usermem -> first serial write buffer - else if (from in 0 until 8388608 && (to + len) in -4097L downTo -8192L) - UnsafeHelper.memcpy(vm.usermem.ptr + from, ioSpace.blockTransferTx[0].ptr + (-4097 - to), len) - else - for (i in 0 until len) { -// println("vm.memcpy($from, $to, $len) = mem[${to + i*toVector}] <- mem[${from + i*fromVector}]") - vm.poke(to + i*toVector, vm.peek(from + i*fromVector)!!) - } + val fromDev = getDev(from, len, false) + val toDev = getDev(to, len, true) + +// println("from = $from, to = $to") +// println("fromDev = $fromDev, toDev = $toDev") + + if (fromDev != null && toDev != null) + UnsafeHelper.memcpy(fromDev, toDev, len) + else if (fromDev == null && toDev != null) { + val buf = UnsafeHelper.allocate(len, this) + for (i in 0 until len) buf[i] = vm.peek(from + i*fromVector)!! + UnsafeHelper.memcpy(buf.ptr, toDev, len) + buf.destroy() + } + else if (fromDev != null) { + for (i in 0 until len) vm.poke(to + i*toVector, UnsafeHelper.unsafe.getByte(fromDev + i)) + } + else { + for (i in 0 until len) vm.poke(to + i*toVector, vm.peek(from + i*fromVector)!!) + } } fun mapRom(slot: Int) { vm.romMapping = slot.and(255) @@ -171,7 +236,7 @@ class VMJSR223Delegate(private val vm: VM) { fun waitForMemChg(addr: Int, andMask: Int, xorMask: Int) { while ((peek(addr) xor xorMask) and andMask == 0) { - spin(); + Thread.sleep(1L) } } fun waitForMemChg(addr: Int, andMask: Int) = waitForMemChg(addr, andMask, 0) diff --git a/tsvm_core/src/net/torvald/tsvm/peripheral/AudioAdapter.kt b/tsvm_core/src/net/torvald/tsvm/peripheral/AudioAdapter.kt index 26c11a4..396c1a0 100644 --- a/tsvm_core/src/net/torvald/tsvm/peripheral/AudioAdapter.kt +++ b/tsvm_core/src/net/torvald/tsvm/peripheral/AudioAdapter.kt @@ -40,7 +40,7 @@ private class RenderRunnable(val playhead: AudioAdapter.Playhead) : Runnable { Thread.sleep(12) } else if (playhead.isPlaying && writeQueue.isEmpty) { - printdbg("Queue exhausted, stopping audio device...") + printdbg("!! QUEUE EXHAUSTED !! QUEUE EXHAUSTED !! QUEUE EXHAUSTED !! QUEUE EXHAUSTED !! QUEUE EXHAUSTED !! QUEUE EXHAUSTED ") // TODO: wait for 1-2 seconds then finally stop the device // playhead.audioDevice.stop() diff --git a/tsvm_core/src/net/torvald/tsvm/peripheral/GraphicsAdapter.kt b/tsvm_core/src/net/torvald/tsvm/peripheral/GraphicsAdapter.kt index 7e6f320..78e62ad 100644 --- a/tsvm_core/src/net/torvald/tsvm/peripheral/GraphicsAdapter.kt +++ b/tsvm_core/src/net/torvald/tsvm/peripheral/GraphicsAdapter.kt @@ -98,7 +98,7 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi var framebufferScrollX = 0 var framebufferScrollY = 0 private var fontRomMappingMode = 0 // 0: low, 1: high - private var mappedFontRom = ByteArray(1920) + internal var mappedFontRom = UnsafeHelper.allocate(2048, this) override var ttyFore: Int = TTY_FORE_DEFAULT // cannot be Byte override var ttyBack: Int = TTY_BACK_DEFAULT // cannot be Byte @@ -127,7 +127,7 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi // override var halfrowMode = false - private val instArea = UnsafeHelper.allocate(65536L, this) + internal val instArea = UnsafeHelper.allocate(65536L, this) override var rawCursorPos: Int get() = textArea.getShortFree(memTextCursorPosOffset).toInt() @@ -210,7 +210,6 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi } return when (addr) { in 0 until 250880 -> framebuffer[addr] - in 252030 until 252030+1920 -> mappedFontRom[adi- 252030] in 250880 until 250880+1024 -> unusedArea[addr - 250880] in 253950 until 261632 -> textArea[addr - 253950] in 261632 until 262144 -> peekPalette(adi - 261632) @@ -243,7 +242,6 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi unusedArea[addr - 250880] = byte runCommand(byte) } - in 252030 until 252030+1920 -> mappedFontRom[adi- 252030] = byte in 250880 until 250880+1024 -> unusedArea[addr - 250880] = byte in 253950 until 261632 -> textArea[addr - 253950] = byte in 261632 until 262144 -> pokePalette(adi - 261632, byte) @@ -296,8 +294,9 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi 20L -> drawCallProgramCounter.and(255).toByte() 21L -> drawCallProgramCounter.ushr(8).and(255).toByte() - + in 1024L..2047L -> scanlineOffsets[addr - 1024] + in 2048L..4095L -> mappedFontRom[addr - 2048] in 65536L..131071L -> instArea[addr - 65536] @@ -323,6 +322,7 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi 19L -> { if (bi != 0) compileAndRunDrawCalls() } in 1024L..2047L -> { scanlineOffsets[addr - 1024] = byte } + in 2048L..4095L -> { mappedFontRom[addr - 2048] = byte } in 65536L..131071L -> instArea[addr - 65536] = byte @@ -596,7 +596,7 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi val pixel = (scanline[bm] < 0).toInt() word = word or (pixel shl (scanline.size - 1 - bm)) } - mappedFontRom[char * ch + line] = word.toByte() + mappedFontRom[char.toLong() * ch + line] = word.toByte() } } @@ -622,7 +622,7 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi val px = (char % 16) * cw; val py = (char / 16) * ch val off = dataOffset + (py * 16 * cw) + px for (line in 0 until ch) { - val word = mappedFontRom[char * ch + line].toInt() + val word = mappedFontRom[char.toLong() * ch + line].toInt() for (bm in 0 until scanline.size) { val pixel = 255 * ((word shr (cw - 1 - bm)) and 1) scanline[bm] = pixel.toByte() @@ -974,6 +974,7 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi unusedArea.destroy() scanlineOffsets.destroy() instArea.destroy() + mappedFontRom.destroy() } private var textCursorBlinkTimer = 0f diff --git a/tsvm_core/src/net/torvald/tsvm/peripheral/IOSpace.kt b/tsvm_core/src/net/torvald/tsvm/peripheral/IOSpace.kt index 6d67c2a..514ef99 100644 --- a/tsvm_core/src/net/torvald/tsvm/peripheral/IOSpace.kt +++ b/tsvm_core/src/net/torvald/tsvm/peripheral/IOSpace.kt @@ -40,7 +40,7 @@ class IOSpace(val vm: VM) : PeriBase("io"), InputProcessor { ) /*private*/ val blockTransferPorts = Array(4) { BlockTransferPort(vm, it) } - private val peripheralFast = UnsafeHelper.allocate(1024, this) + internal val peripheralFast = UnsafeHelper.allocate(1024, this) private val keyEventBuffers = ByteArray(8)