faster sys.memcpy impl

This commit is contained in:
minjaesong
2023-01-21 17:59:54 +09:00
parent 8fdde0b192
commit b60b5bada6
8 changed files with 100 additions and 47 deletions

View File

@@ -1,4 +1,5 @@
// usage: playmov moviefile.mov [/i]
const SND_BASE_ADDR = audio.getBaseAddr()
const interactive = exec_args[2] && exec_args[2].toLowerCase() == "/i"
const WIDTH = 560
const HEIGHT = 448
@@ -34,6 +35,7 @@ if (!magicMatching) {
return 1
}
let mp2Initialised = false
let width = seqread.readShort()
let height = seqread.readShort()
@@ -62,11 +64,6 @@ if (globalType != 255) {
printerrln(`Unsupported MOV type (${globalType})`)
return 1
}
// MP2 stuffs
let mp2context;
let samplePtrL;
let samplePtrR;
let ipfbuf = sys.malloc(FBUF_SIZE)
graphics.setGraphicsMode(4)
@@ -239,14 +236,11 @@ while (!stopPlay && seqread.getReadCount() < FILE_LENGTH) {
AUDIO_QUEUE_LENGTH += 1
audioQueue.push(sys.malloc(AUDIO_QUEUE_BYTES))
}
if (mp2context === undefined) mp2context = audio.mp2Init()
if (samplePtrL === undefined) samplePtrL = sys.malloc(2304) // 16b samples
if (samplePtrR === undefined) samplePtrR = sys.malloc(2304) // 16b samples
if (!mp2Initialised) audio.mp2Init()
let frame = seqread.readBytes(readLength)
let [frameSize, samples] = audio.mp2DecodeFrame(mp2context, frame, true, samplePtrL, samplePtrR)
s16StTou8St(samplePtrL, samplePtrR, audioQueue[audioQueuePos++], samples)
sys.free(frame)
seqread.readBytes(readLength, SND_BASE_ADDR - 2368)
audio.mp2Decode()
sys.memcpy(SND_BASE_ADDR - 64, audioQueue[audioQueuePos++], 2304)
}
// RAW PCM packets (decode on the fly)
else if (packetType == 0x1000 || packetType == 0x1001) {
@@ -321,8 +315,6 @@ finally {
sys.free(audioQueue[i])
}
}
if (samplePtrL !== undefined) sys.free(samplePtrL)
if (samplePtrR !== undefined) sys.free(samplePtrR)
//audio.stop(0)
let timeTook = (endTime - startTime) / 1000000000.0

View File

@@ -196,7 +196,6 @@ try {
filebuf.readBytes(FRAME_SIZE, SND_BASE_ADDR - 2368)
audio.mp2Decode()
sys.waitForMemChg(SND_BASE_ADDR - 41, 255, 255)
if (audio.getPosition(0) >= QUEUE_MAX) {
while (audio.getPosition(0) >= (QUEUE_MAX >>> 1)) {

View File

@@ -49,7 +49,7 @@ function readBytes(length, ptrToDecode) {
let ptr = (ptrToDecode === undefined) ? sys.malloc(length) : ptrToDecode
let requiredBlocks = Math.floor((readCount + length) / 4096) - Math.floor(readCount / 4096)
let destVector = (ptrToDecode >= 0) ? 1 : -1
let destVector = (ptr >= 0) ? 1 : -1
let completedReads = 0

View File

@@ -174,10 +174,6 @@ From the start of the memory space:
write to this address FIRST and then write to "command" to execute the command
1134 bytes
unused
(1920) !!PENDING FOR REMOVAL!!
mapped to font ROM
Font Mapping area holds 128 characters in consecutive order, each character is always 15 bytes.
(designer's note: it's still useful to divide the char rom to two halves, lower half being characters ROM and upper half being symbols ROM)
2 bytes
Cursor position in: (y*80 + x)
2560 bytes

View File

@@ -4,7 +4,10 @@ import kotlinx.coroutines.GlobalScope
import kotlinx.coroutines.Job
import kotlinx.coroutines.launch
import net.torvald.UnsafeHelper
import net.torvald.UnsafePtr
import net.torvald.terrarum.modulecomputers.virtualcomputer.tvd.toUlong
import net.torvald.tsvm.peripheral.AudioAdapter
import net.torvald.tsvm.peripheral.GraphicsAdapter
import net.torvald.tsvm.peripheral.IOSpace
import java.nio.charset.Charset
@@ -13,6 +16,61 @@ import java.nio.charset.Charset
*/
class VMJSR223Delegate(private val vm: VM) {
private fun relPtrInDev(from: Long, len: Long, start: Int, end: Int) =
(from in start..end && (from + len) in start..end)
private fun getDev(from: Long, len: Long, isDest: Boolean): Long? {
return if (from >= 0) vm.usermem.ptr + from
// MMIO area
else if (from in -1048576..-1 && (from - len) in -1048577..-1) {
val fromIndex = (-from-1) / 131072
val dev = vm.peripheralTable[fromIndex.toInt()].peripheral ?: return null
val fromRel = (-from-1) % 131072
if (fromRel + len > 131072) return null
return if (dev is IOSpace) {
if (relPtrInDev(fromRel, len, 1024, 2047)) dev.peripheralFast.ptr + fromRel - 1024
else if (relPtrInDev(fromRel, len, 4096, 8191)) (if (isDest) dev.blockTransferTx[0] else dev.blockTransferRx[0]).ptr + fromRel - 4096
else if (relPtrInDev(fromRel, len, 8192, 12287)) (if (isDest) dev.blockTransferTx[1] else dev.blockTransferRx[1]).ptr + fromRel - 8192
else if (relPtrInDev(fromRel, len, 12288, 16383)) (if (isDest) dev.blockTransferTx[2] else dev.blockTransferRx[2]).ptr + fromRel - 12288
else if (relPtrInDev(fromRel, len, 16384, 20479)) (if (isDest) dev.blockTransferTx[3] else dev.blockTransferRx[3]).ptr + fromRel - 16384
else null
}
else if (dev is AudioAdapter) {
if (relPtrInDev(fromRel, len, 64, 2367)) dev.mediaDecodedBin.ptr + fromRel - 64
else if (relPtrInDev(fromRel, len, 2368, 4096)) dev.mediaFrameBin.ptr + fromRel - 2368
else null
}
else if (dev is GraphicsAdapter) {
if (relPtrInDev(fromRel, len, 1024, 2047)) dev.scanlineOffsets.ptr + fromRel - 1024
else if (relPtrInDev(fromRel, len, 2048, 4095)) dev.mappedFontRom.ptr + fromRel - 2048
else if (relPtrInDev(fromRel, len, 65536, 131071)) dev.instArea.ptr + fromRel - 65536
else null
}
else null
}
// memory area
else {
val fromIndex = (-from-1) / 1048576
val dev = vm.peripheralTable[fromIndex.toInt()].peripheral ?: return null
val fromRel = (-from-1) % 1048576
if (fromRel + len > 1048576) return null
return if (dev is AudioAdapter) {
if (relPtrInDev(fromRel, len, 0, 114687)) dev.sampleBin.ptr + fromRel - 0
else null
}
else if (dev is GraphicsAdapter) {
if (relPtrInDev(fromRel, len, 0, 250879)) dev.framebuffer.ptr + fromRel - 0
else if (relPtrInDev(fromRel, len, 250880, 251903)) dev.unusedArea.ptr + fromRel - 250880
else if (relPtrInDev(fromRel, len, 253950, 261631)) dev.textArea.ptr + fromRel - 253950
else if (relPtrInDev(fromRel, len, 262144, 513023)) dev.framebuffer2?.ptr?.plus(fromRel)?.minus(253950)
else null
}
else null
}
}
fun getVmId() = vm.id.toString()
fun poke(addr: Int, value: Int) = vm.poke(addr.toLong(), value.toByte())
@@ -21,25 +79,32 @@ class VMJSR223Delegate(private val vm: VM) {
fun malloc(size: Int) = vm.malloc(size)
fun free(ptr: Int) = vm.free(ptr)
fun memcpy(from: Int, to: Int, len: Int) {
val from = from.toLong()
val to = to.toLong()
val len = len.toLong()
val fromVector = if (from >= 0) 1 else -1
val toVector = if (to >= 0) 1 else -1
val len = len.toLong()
// some special cases for native memcpy
val ioSpace = vm.peripheralTable[0].peripheral!! as IOSpace
// within scratchpad memory?
if (from in 0 until 8388608 && (to + len) in 0 until 8388608)
UnsafeHelper.memcpy(vm.usermem.ptr + from, vm.usermem.ptr + to, len)
// first serial read buffer -> usermem
else if (from in -4097 downTo -8192 && (to + len) in 0 until 8388608)
UnsafeHelper.memcpy(ioSpace.blockTransferRx[0].ptr + (-4097 - from), vm.usermem.ptr + to, len)
// usermem -> first serial write buffer
else if (from in 0 until 8388608 && (to + len) in -4097L downTo -8192L)
UnsafeHelper.memcpy(vm.usermem.ptr + from, ioSpace.blockTransferTx[0].ptr + (-4097 - to), len)
else
for (i in 0 until len) {
// println("vm.memcpy($from, $to, $len) = mem[${to + i*toVector}] <- mem[${from + i*fromVector}]")
vm.poke(to + i*toVector, vm.peek(from + i*fromVector)!!)
}
val fromDev = getDev(from, len, false)
val toDev = getDev(to, len, true)
// println("from = $from, to = $to")
// println("fromDev = $fromDev, toDev = $toDev")
if (fromDev != null && toDev != null)
UnsafeHelper.memcpy(fromDev, toDev, len)
else if (fromDev == null && toDev != null) {
val buf = UnsafeHelper.allocate(len, this)
for (i in 0 until len) buf[i] = vm.peek(from + i*fromVector)!!
UnsafeHelper.memcpy(buf.ptr, toDev, len)
buf.destroy()
}
else if (fromDev != null) {
for (i in 0 until len) vm.poke(to + i*toVector, UnsafeHelper.unsafe.getByte(fromDev + i))
}
else {
for (i in 0 until len) vm.poke(to + i*toVector, vm.peek(from + i*fromVector)!!)
}
}
fun mapRom(slot: Int) {
vm.romMapping = slot.and(255)
@@ -171,7 +236,7 @@ class VMJSR223Delegate(private val vm: VM) {
fun waitForMemChg(addr: Int, andMask: Int, xorMask: Int) {
while ((peek(addr) xor xorMask) and andMask == 0) {
spin();
Thread.sleep(1L)
}
}
fun waitForMemChg(addr: Int, andMask: Int) = waitForMemChg(addr, andMask, 0)

View File

@@ -40,7 +40,7 @@ private class RenderRunnable(val playhead: AudioAdapter.Playhead) : Runnable {
Thread.sleep(12)
}
else if (playhead.isPlaying && writeQueue.isEmpty) {
printdbg("Queue exhausted, stopping audio device...")
printdbg("!! QUEUE EXHAUSTED !! QUEUE EXHAUSTED !! QUEUE EXHAUSTED !! QUEUE EXHAUSTED !! QUEUE EXHAUSTED !! QUEUE EXHAUSTED ")
// TODO: wait for 1-2 seconds then finally stop the device
// playhead.audioDevice.stop()

View File

@@ -98,7 +98,7 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi
var framebufferScrollX = 0
var framebufferScrollY = 0
private var fontRomMappingMode = 0 // 0: low, 1: high
private var mappedFontRom = ByteArray(1920)
internal var mappedFontRom = UnsafeHelper.allocate(2048, this)
override var ttyFore: Int = TTY_FORE_DEFAULT // cannot be Byte
override var ttyBack: Int = TTY_BACK_DEFAULT // cannot be Byte
@@ -127,7 +127,7 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi
// override var halfrowMode = false
private val instArea = UnsafeHelper.allocate(65536L, this)
internal val instArea = UnsafeHelper.allocate(65536L, this)
override var rawCursorPos: Int
get() = textArea.getShortFree(memTextCursorPosOffset).toInt()
@@ -210,7 +210,6 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi
}
return when (addr) {
in 0 until 250880 -> framebuffer[addr]
in 252030 until 252030+1920 -> mappedFontRom[adi- 252030]
in 250880 until 250880+1024 -> unusedArea[addr - 250880]
in 253950 until 261632 -> textArea[addr - 253950]
in 261632 until 262144 -> peekPalette(adi - 261632)
@@ -243,7 +242,6 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi
unusedArea[addr - 250880] = byte
runCommand(byte)
}
in 252030 until 252030+1920 -> mappedFontRom[adi- 252030] = byte
in 250880 until 250880+1024 -> unusedArea[addr - 250880] = byte
in 253950 until 261632 -> textArea[addr - 253950] = byte
in 261632 until 262144 -> pokePalette(adi - 261632, byte)
@@ -296,8 +294,9 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi
20L -> drawCallProgramCounter.and(255).toByte()
21L -> drawCallProgramCounter.ushr(8).and(255).toByte()
in 1024L..2047L -> scanlineOffsets[addr - 1024]
in 2048L..4095L -> mappedFontRom[addr - 2048]
in 65536L..131071L -> instArea[addr - 65536]
@@ -323,6 +322,7 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi
19L -> { if (bi != 0) compileAndRunDrawCalls() }
in 1024L..2047L -> { scanlineOffsets[addr - 1024] = byte }
in 2048L..4095L -> { mappedFontRom[addr - 2048] = byte }
in 65536L..131071L -> instArea[addr - 65536] = byte
@@ -596,7 +596,7 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi
val pixel = (scanline[bm] < 0).toInt()
word = word or (pixel shl (scanline.size - 1 - bm))
}
mappedFontRom[char * ch + line] = word.toByte()
mappedFontRom[char.toLong() * ch + line] = word.toByte()
}
}
@@ -622,7 +622,7 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi
val px = (char % 16) * cw; val py = (char / 16) * ch
val off = dataOffset + (py * 16 * cw) + px
for (line in 0 until ch) {
val word = mappedFontRom[char * ch + line].toInt()
val word = mappedFontRom[char.toLong() * ch + line].toInt()
for (bm in 0 until scanline.size) {
val pixel = 255 * ((word shr (cw - 1 - bm)) and 1)
scanline[bm] = pixel.toByte()
@@ -974,6 +974,7 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi
unusedArea.destroy()
scanlineOffsets.destroy()
instArea.destroy()
mappedFontRom.destroy()
}
private var textCursorBlinkTimer = 0f

View File

@@ -40,7 +40,7 @@ class IOSpace(val vm: VM) : PeriBase("io"), InputProcessor {
)
/*private*/ val blockTransferPorts = Array(4) { BlockTransferPort(vm, it) }
private val peripheralFast = UnsafeHelper.allocate(1024, this)
internal val peripheralFast = UnsafeHelper.allocate(1024, this)
private val keyEventBuffers = ByteArray(8)