diff --git a/src/net/torvald/terrarum/modulebasegame/console/Load.kt b/src/net/torvald/terrarum/modulebasegame/console/Load.kt index 11f1507c3..05eaf5919 100644 --- a/src/net/torvald/terrarum/modulebasegame/console/Load.kt +++ b/src/net/torvald/terrarum/modulebasegame/console/Load.kt @@ -26,7 +26,7 @@ object Load : ConsoleCommand { val disk = VDUtil.readDiskArchive(file, charset = charset) val metaFile = VDUtil.getFile(disk, VDUtil.VDPath("savegame", charset))!! - val metaReader = ByteArray64Reader(metaFile.contents.serialize().array) + val metaReader = ByteArray64Reader(metaFile.contents.serialize().array, charset) val meta = Common.jsoner.fromJson(JsonValue::class.java, metaReader) } diff --git a/src/net/torvald/terrarum/modulebasegame/console/ReaderTest.kt b/src/net/torvald/terrarum/modulebasegame/console/ReaderTest.kt new file mode 100644 index 000000000..07c20a7b0 --- /dev/null +++ b/src/net/torvald/terrarum/modulebasegame/console/ReaderTest.kt @@ -0,0 +1,33 @@ +package net.torvald.terrarum.modulebasegame.console + +import net.torvald.terrarum.console.ConsoleCommand +import net.torvald.terrarum.console.Echo +import net.torvald.terrarum.serialise.ByteArray64Reader +import net.torvald.terrarum.serialise.ByteArray64Writer +import java.io.File + +/** + * Created by minjaesong on 2021-08-31. + */ +object ReaderTest : ConsoleCommand { + override fun execute(args: Array) { + val textfile = File("./work_files/utftest.txt") + val text = textfile.readText() + + val writer = ByteArray64Writer(Charsets.UTF_8) + writer.write(text); writer.flush(); writer.close() + + val ba = writer.toByteArray64() + + val reader = ByteArray64Reader(ba, Charsets.UTF_8) + val readText = reader.readText(); reader.close() + + println(readText) + val outfile = File("./work_files/utftest-roundtrip.txt") + outfile.writeText(readText, Charsets.UTF_8) + } + + override fun printUsage() { + Echo("Usage: readertest") + } +} \ No newline at end of file diff --git a/src/net/torvald/terrarum/serialise/Common.kt b/src/net/torvald/terrarum/serialise/Common.kt index 7fcd7badd..19dd60b08 100644 --- a/src/net/torvald/terrarum/serialise/Common.kt +++ b/src/net/torvald/terrarum/serialise/Common.kt @@ -3,6 +3,7 @@ package net.torvald.terrarum.serialise import com.badlogic.gdx.utils.Json import com.badlogic.gdx.utils.JsonValue import com.badlogic.gdx.utils.JsonWriter +import net.torvald.terrarum.AppLoader.printdbg import net.torvald.terrarum.console.EchoError import net.torvald.terrarum.gameworld.BlockLayer import net.torvald.terrarum.gameworld.GameWorld @@ -10,17 +11,14 @@ import net.torvald.terrarum.gameworld.WorldTime import net.torvald.terrarum.modulecomputers.virtualcomputer.tvd.ByteArray64 import net.torvald.terrarum.modulecomputers.virtualcomputer.tvd.ByteArray64GrowableOutputStream import net.torvald.terrarum.modulecomputers.virtualcomputer.tvd.ByteArray64InputStream -import net.torvald.terrarum.modulecomputers.virtualcomputer.tvd.ByteArray64OutputStream import net.torvald.terrarum.tail import net.torvald.terrarum.utils.* import org.apache.commons.codec.digest.DigestUtils import java.io.Reader import java.io.Writer import java.math.BigInteger -import java.nio.CharBuffer import java.nio.channels.ClosedChannelException import java.nio.charset.Charset -import java.nio.charset.CharsetDecoder import java.nio.charset.UnsupportedCharsetException import java.util.zip.GZIPInputStream import java.util.zip.GZIPOutputStream @@ -273,7 +271,7 @@ object Common { } } -class ByteArray64Writer() : Writer() { +class ByteArray64Writer(val charset: Charset) : Writer() { private var closed = false private val ba64 = ByteArray64() @@ -288,7 +286,7 @@ class ByteArray64Writer() : Writer() { override fun write(c: Int) { checkOpen() - "${c.toChar()}".toByteArray().forEach { ba64.add(it) } + "${c.toChar()}".toByteArray(charset).forEach { ba64.add(it) } } override fun write(cbuf: CharArray) { @@ -298,7 +296,7 @@ class ByteArray64Writer() : Writer() { override fun write(str: String) { checkOpen() - str.toByteArray().forEach { ba64.add(it) } + str.toByteArray(charset).forEach { ba64.add(it) } } override fun write(cbuf: CharArray, off: Int, len: Int) { @@ -334,7 +332,7 @@ class ByteArray64Reader(val ba: ByteArray64, val charset: Charset) : Reader() { * U+0800 .. U+FFFF 1110xxxx 10xxxxxx 10xxxxxx * U+10000 .. U+10FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ - private fun utf8GetBytes(head: Byte) = when (head.toInt() and 255) { + private fun utf8GetCharLen(head: Byte) = when (head.toInt() and 255) { in 0b11110_000..0b11110_111 -> 4 in 0b1110_0000..0b1110_1111 -> 3 in 0b110_00000..0b110_11111 -> 2 @@ -343,52 +341,73 @@ class ByteArray64Reader(val ba: ByteArray64, val charset: Charset) : Reader() { } /** - * @param list of bytes that encodes one unicode character. Get required byte length using [utf8GetBytes]. + * @param list of bytes that encodes one unicode character. Get required byte length using [utf8GetCharLen]. * @return A codepoint of the character. */ private fun utf8decode(bytes0: List): Int { val bytes = bytes0.map { it.toInt() and 255 } var ret = when (bytes.size) { - 4 -> (bytes[0] and 7) shl 15 - 3 -> (bytes[0] and 15) shl 10 - 2 -> (bytes[0] and 31) shl 5 + 4 -> (bytes[0] and 7) shl 18 + 3 -> (bytes[0] and 15) shl 12 + 2 -> (bytes[0] and 31) shl 6 1 -> (bytes[0] and 127) else -> throw IllegalArgumentException("Expected bytes size: 1..4, got ${bytes.size}") } - bytes.tail().forEachIndexed { index, byte -> - ret = ret or (byte and 63).shl(5 * (2 - index)) + bytes.tail().reversed().forEachIndexed { index, byte -> + ret = ret or (byte and 63).shl(6 * index) } return ret } + private var surrogateLeftover = ' ' + override fun read(cbuf: CharArray, off: Int, len: Int): Int { var readCount = 0 when (charset) { Charsets.UTF_8 -> { while (readCount < len && remaining > 0) { - val bbuf = (0..minOf(3L, remaining)).map { ba[readCursor + it] } - val codePoint = utf8decode(bbuf.subList(0, utf8GetBytes(bbuf[0]))) + if (surrogateLeftover != ' ') { + cbuf[off + readCount] = surrogateLeftover - if (codePoint < 65536) { - cbuf[off + readCount] = codePoint.toChar() readCount += 1 - readCursor += bbuf.size + surrogateLeftover = ' ' } else { - /* + val bbuf = (0 until minOf(4L, remaining)).map { ba[readCursor + it] } + val charLen = utf8GetCharLen(bbuf[0]) + val codePoint = utf8decode(bbuf.subList(0, charLen)) + + if (codePoint < 65536) { + cbuf[off + readCount] = codePoint.toChar() + + readCount += 1 + readCursor += charLen + } + else { + /* * U' = yyyyyyyyyyxxxxxxxxxx // U - 0x10000 * W1 = 110110yyyyyyyyyy // 0xD800 + yyyyyyyyyy * W2 = 110111xxxxxxxxxx // 0xDC00 + xxxxxxxxxx */ - val surroLead = (0xD800 or codePoint.ushr(10)).toChar() - val surroTrail = (0xDC00 or codePoint.and(1023)).toChar() + val codPoin = codePoint - 65536 + val surroLead = (0xD800 or codPoin.ushr(10)).toChar() + val surroTrail = (0xDC00 or codPoin.and(1023)).toChar() - cbuf[off + readCount] = surroLead - cbuf[off + readCount + 1] = surroTrail + cbuf[off + readCount] = surroLead - readCount += 2 - readCursor + 4 + if (off + readCount + 1 < cbuf.size) { + cbuf[off + readCount + 1] = surroTrail + + readCount += 2 + readCursor += 4 + } + else { + readCount += 1 + readCursor += 4 + surrogateLeftover = surroTrail + } + } } } } @@ -402,7 +421,7 @@ class ByteArray64Reader(val ba: ByteArray64, val charset: Charset) : Reader() { else -> throw UnsupportedCharsetException(charset.name()) } - return readCount + return if (readCount == 0) -1 else readCount } override fun close() { readCursor = 0L } diff --git a/src/net/torvald/terrarum/serialise/WriteActor.kt b/src/net/torvald/terrarum/serialise/WriteActor.kt index e817c43d4..fe56c3b11 100644 --- a/src/net/torvald/terrarum/serialise/WriteActor.kt +++ b/src/net/torvald/terrarum/serialise/WriteActor.kt @@ -19,9 +19,12 @@ object WriteActor { } fun encodeToByteArray64(actor: Actor): ByteArray64 { - val ba = ByteArray64() - this.invoke(actor).toByteArray().forEach { ba.add(it) } - return ba + val baw = ByteArray64Writer(Common.CHARSET) + + Common.jsoner.toJson(actor, actor.javaClass, baw) + baw.flush(); baw.close() + + return baw.toByteArray64() } } \ No newline at end of file diff --git a/src/net/torvald/terrarum/serialise/WriteMeta.kt b/src/net/torvald/terrarum/serialise/WriteMeta.kt index 6aa8b7ac1..1c2402197 100644 --- a/src/net/torvald/terrarum/serialise/WriteMeta.kt +++ b/src/net/torvald/terrarum/serialise/WriteMeta.kt @@ -36,28 +36,28 @@ open class WriteMeta(val ingame: TerrarumIngame) { it.append("\n\n## module: $modname ##\n\n") it.append(file.readText()) } - bytesToZipdStr(it.toString().toByteArray()) + bytesToZipdStr(it.toString().toByteArray(Common.CHARSET)) }}", "items": "${StringBuilder().let { ModMgr.getFilesFromEveryMod("items/itemid.csv").forEach { (modname, file) -> it.append("\n\n## module: $modname ##\n\n") it.append(file.readText()) } - bytesToZipdStr(it.toString().toByteArray()) + bytesToZipdStr(it.toString().toByteArray(Common.CHARSET)) }}", "wires": "${StringBuilder().let { ModMgr.getFilesFromEveryMod("wires/wires.csv").forEach { (modname, file) -> it.append("\n\n## module: $modname ##\n\n") it.append(file.readText()) } - bytesToZipdStr(it.toString().toByteArray()) + bytesToZipdStr(it.toString().toByteArray(Common.CHARSET)) }}", "materials": "${StringBuilder().let { ModMgr.getFilesFromEveryMod("materials/materials.csv").forEach { (modname, file) -> it.append("\n\n## module: $modname ##\n\n") it.append(file.readText()) } - bytesToZipdStr(it.toString().toByteArray()) + bytesToZipdStr(it.toString().toByteArray(Common.CHARSET)) }}", "loadorder": [${ModMgr.loadOrder.map { "\"${it}\"" }.joinToString()}], "worlds": [${ingame.gameworldIndices.joinToString()}] @@ -68,7 +68,7 @@ open class WriteMeta(val ingame: TerrarumIngame) { fun encodeToByteArray64(): ByteArray64 { val ba = ByteArray64() - this.invoke().toByteArray().forEach { ba.add(it) } + this.invoke().toByteArray(Common.CHARSET).forEach { ba.add(it) } return ba } } diff --git a/src/net/torvald/terrarum/serialise/WriteWorld.kt b/src/net/torvald/terrarum/serialise/WriteWorld.kt index eb81cd608..39bbd8842 100644 --- a/src/net/torvald/terrarum/serialise/WriteWorld.kt +++ b/src/net/torvald/terrarum/serialise/WriteWorld.kt @@ -23,17 +23,12 @@ open class WriteWorld(val ingame: TerrarumIngame) { world.genver = Common.GENVER world.comp = Common.COMP_GZIP - val baw = ByteArray64Writer() + val baw = ByteArray64Writer(Common.CHARSET) Common.jsoner.toJson(world, baw) baw.flush(); baw.close() return baw.toByteArray64() - - - /*val ba = ByteArray64() - this.invoke().toByteArray().forEach { ba.add(it) } - return ba*/ } } diff --git a/work_files/utftest-roundtrip.txt b/work_files/utftest-roundtrip.txt new file mode 100644 index 000000000..5cd4e6089 --- /dev/null +++ b/work_files/utftest-roundtrip.txt @@ -0,0 +1,28 @@ +ํ•˜์ˆ˜โฌ‡๏ธ๐Ÿž๋Š” ๋‘ ์‚ฐ๐Ÿ”๐Ÿž๐Ÿ” ํ‹ˆ์—์„œ ๋‚˜์™€ ๋Œ๊ณผ ๋ถ€๋”ช์ณ ์‹ธ์šฐ๋ฉฐ๐Ÿคœ๐Ÿฟ๐Ÿชจ๐Ÿค›๐Ÿป, ๊ทธ ๋†€๋ž€ ํŒŒ๋„๐Ÿ˜ฒ๐ŸŒŠ์™€ ์„ฑ๋‚œ ๋ฌผ๋จธ๋ฆฌ์™€ ์šฐ๋Š” ์—ฌ์šธ๊ณผ ๋…ธํ•œ ๋ฌผ๊ฒฐ๊ณผ ์Šฌํ”ˆ ๊ณก์กฐ์™€ ์›๋งํ•˜๋Š” ์†Œ๋ฆฌ๊ฐ€ ๊ตฝ์ด์ณ ๋Œ๋ฉด์„œ, ์šฐ๋Š” ๋“ฏ, ์†Œ๋ฆฌ์น˜๋Š” ๋“ฏ, ๋ฐ”์˜๊ฒŒ ํ˜ธ๋ นํ•˜๋Š” ๋“ฏ, ํ•ญ์ƒ ์žฅ์„ฑ์„ ๊นจ๋œจ๋ฆด ํ˜•์„ธ๊ฐ€ ์žˆ์–ด, ์ „์ฐจ ๋งŒ์Šน๊ณผ ์ „๊ธฐ ๋งŒ๋Œ€๋‚˜ ์ „ํฌ ๋งŒ๊ฐ€์™€ ์ „๊ณ  ๋งŒ์ขŒ๋กœ์จ๋Š” ๊ทธ ๋ฌด๋„ˆ๋œจ๋ฆฌ๊ณ  ๋‚ด๋ฟœ๋Š” ์†Œ๋ฆฌ๋ฅผ ์กฑํžˆ ํ˜•์šฉํ•  ์ˆ˜ ์—†์„ ๊ฒƒ์ด๋‹ค. ๋ชจ๋ž˜ ์œ„์— ํฐ ๋Œ์€ ํ™€์—ฐํžˆ ๋–จ์–ด์ ธ ์„ฐ๊ณ , ๊ฐ• ์–ธ๋•์— ๋ฒ„๋“œ๋‚˜๋ฌด๋Š” ์–ด๋‘ก๊ณ  ์ปด์ปดํ•˜์—ฌ ๋ฌผ์ง€ํ‚ด๊ณผ ํ•˜์ˆ˜ ๊ท€์‹ ์ด ๋‹คํˆฌ์–ด ๋‚˜์™€์„œ ์‚ฌ๋žŒ์„ ๋†€๋ฆฌ๋Š” ๋“ฏํ•œ๋ฐ, ์ขŒ์šฐ์˜ ๊ต๋ฆฌ๊ฐ€ ๋ถ™๋“ค๋ ค๊ณ  ์• ์“ฐ๋Š” ๋“ฏ์‹ถ์—ˆ๋‹ค. +ํ˜น์€ ๋งํ•˜๊ธฐ๋ฅผ, + โ€œ์—ฌ๊ธฐ๋Š” ์˜› ์ „์Ÿํ„ฐ์ด๋ฏ€๋กœ ๊ฐ•๋ฌผ์ด ์ €๊ฐ™์ด ์šฐ๋Š” ๊ฒƒ์ด๋‹คโ€ +ํ•˜์ง€๋งŒ ์ด๋Š” ๊ทธ๋Ÿฐ ๊ฒƒ์ด ์•„๋‹ˆ๋‹ˆ, ๊ฐ•๋ฌผ ์†Œ๋ฆฌ๋Š” ๋“ฃ๊ธฐ ์—ฌํ•˜์— ๋‹ฌ๋ ธ์„ ๊ฒƒ์ด๋‹ค. + +ๆฒณๅ‡บๅ…ฉๅฑฑ้–“, +่งธ็Ÿณ้ฌช็‹ผ, +ๅ…ถ้ฉšๆฟค้งญๆตช, +ๆ†ค็€พๆ€’ๆณข, +ๅ“€ๆนๆ€จ็€จ, +็Ї่กๅทๅ€’, +ๅ˜ถๅ“ฎ่™Ÿๅ–Š, +ๅธธๆœ‰ๆ‘ง็ ด้•ทๅŸŽไน‹ๅ‹ข. +ๆˆฐ่ปŠ่ฌไน˜, +ๆˆฐ้จŽ่ฌ้šŠ, +ๆˆฐ็ ฒ่ฌๆžถ, +ๆˆฐ้ผ“่ฌๅ, +ๆœช่ถณๅ–ฉๅ…ถๅดฉๅกŒๆฝฐๅฃ“ไน‹่ฒ. +ๆฒ™ไธŠๅทจ็Ÿณ, +ๅฑน็„ถ้›ข็ซ‹, +ๆฒณๅ คๆŸณๆจน, +็ช…ๅ†ฅ้ดป่’™, +ๅฆ‚ๆฐด็ฅ—ๆฒณ็ฅž็ˆญๅ‡บ้ฉ•ไบบ, +่€Œๅทฆๅณ่›Ÿ่žญ่ฉฆๅ…ถๆ‹ๆ”ซไนŸ. +ๆˆ–ๆ›ฐ: ใ€Œๆญคๅคๆˆฐๅ ด, +ๆ•…ๆฒณ้ณด็„ถไนŸใ€. +ๆญค้ž็ˆฒๅ…ถ็„ถไนŸ, +ๆฒณ่ฒๅœจ่ฝไน‹ๅฆ‚ไฝ•็ˆพ. diff --git a/work_files/utftest.txt b/work_files/utftest.txt new file mode 100644 index 000000000..5cd4e6089 --- /dev/null +++ b/work_files/utftest.txt @@ -0,0 +1,28 @@ +ํ•˜์ˆ˜โฌ‡๏ธ๐Ÿž๋Š” ๋‘ ์‚ฐ๐Ÿ”๐Ÿž๐Ÿ” ํ‹ˆ์—์„œ ๋‚˜์™€ ๋Œ๊ณผ ๋ถ€๋”ช์ณ ์‹ธ์šฐ๋ฉฐ๐Ÿคœ๐Ÿฟ๐Ÿชจ๐Ÿค›๐Ÿป, ๊ทธ ๋†€๋ž€ ํŒŒ๋„๐Ÿ˜ฒ๐ŸŒŠ์™€ ์„ฑ๋‚œ ๋ฌผ๋จธ๋ฆฌ์™€ ์šฐ๋Š” ์—ฌ์šธ๊ณผ ๋…ธํ•œ ๋ฌผ๊ฒฐ๊ณผ ์Šฌํ”ˆ ๊ณก์กฐ์™€ ์›๋งํ•˜๋Š” ์†Œ๋ฆฌ๊ฐ€ ๊ตฝ์ด์ณ ๋Œ๋ฉด์„œ, ์šฐ๋Š” ๋“ฏ, ์†Œ๋ฆฌ์น˜๋Š” ๋“ฏ, ๋ฐ”์˜๊ฒŒ ํ˜ธ๋ นํ•˜๋Š” ๋“ฏ, ํ•ญ์ƒ ์žฅ์„ฑ์„ ๊นจ๋œจ๋ฆด ํ˜•์„ธ๊ฐ€ ์žˆ์–ด, ์ „์ฐจ ๋งŒ์Šน๊ณผ ์ „๊ธฐ ๋งŒ๋Œ€๋‚˜ ์ „ํฌ ๋งŒ๊ฐ€์™€ ์ „๊ณ  ๋งŒ์ขŒ๋กœ์จ๋Š” ๊ทธ ๋ฌด๋„ˆ๋œจ๋ฆฌ๊ณ  ๋‚ด๋ฟœ๋Š” ์†Œ๋ฆฌ๋ฅผ ์กฑํžˆ ํ˜•์šฉํ•  ์ˆ˜ ์—†์„ ๊ฒƒ์ด๋‹ค. ๋ชจ๋ž˜ ์œ„์— ํฐ ๋Œ์€ ํ™€์—ฐํžˆ ๋–จ์–ด์ ธ ์„ฐ๊ณ , ๊ฐ• ์–ธ๋•์— ๋ฒ„๋“œ๋‚˜๋ฌด๋Š” ์–ด๋‘ก๊ณ  ์ปด์ปดํ•˜์—ฌ ๋ฌผ์ง€ํ‚ด๊ณผ ํ•˜์ˆ˜ ๊ท€์‹ ์ด ๋‹คํˆฌ์–ด ๋‚˜์™€์„œ ์‚ฌ๋žŒ์„ ๋†€๋ฆฌ๋Š” ๋“ฏํ•œ๋ฐ, ์ขŒ์šฐ์˜ ๊ต๋ฆฌ๊ฐ€ ๋ถ™๋“ค๋ ค๊ณ  ์• ์“ฐ๋Š” ๋“ฏ์‹ถ์—ˆ๋‹ค. +ํ˜น์€ ๋งํ•˜๊ธฐ๋ฅผ, + โ€œ์—ฌ๊ธฐ๋Š” ์˜› ์ „์Ÿํ„ฐ์ด๋ฏ€๋กœ ๊ฐ•๋ฌผ์ด ์ €๊ฐ™์ด ์šฐ๋Š” ๊ฒƒ์ด๋‹คโ€ +ํ•˜์ง€๋งŒ ์ด๋Š” ๊ทธ๋Ÿฐ ๊ฒƒ์ด ์•„๋‹ˆ๋‹ˆ, ๊ฐ•๋ฌผ ์†Œ๋ฆฌ๋Š” ๋“ฃ๊ธฐ ์—ฌํ•˜์— ๋‹ฌ๋ ธ์„ ๊ฒƒ์ด๋‹ค. + +ๆฒณๅ‡บๅ…ฉๅฑฑ้–“, +่งธ็Ÿณ้ฌช็‹ผ, +ๅ…ถ้ฉšๆฟค้งญๆตช, +ๆ†ค็€พๆ€’ๆณข, +ๅ“€ๆนๆ€จ็€จ, +็Ї่กๅทๅ€’, +ๅ˜ถๅ“ฎ่™Ÿๅ–Š, +ๅธธๆœ‰ๆ‘ง็ ด้•ทๅŸŽไน‹ๅ‹ข. +ๆˆฐ่ปŠ่ฌไน˜, +ๆˆฐ้จŽ่ฌ้šŠ, +ๆˆฐ็ ฒ่ฌๆžถ, +ๆˆฐ้ผ“่ฌๅ, +ๆœช่ถณๅ–ฉๅ…ถๅดฉๅกŒๆฝฐๅฃ“ไน‹่ฒ. +ๆฒ™ไธŠๅทจ็Ÿณ, +ๅฑน็„ถ้›ข็ซ‹, +ๆฒณๅ คๆŸณๆจน, +็ช…ๅ†ฅ้ดป่’™, +ๅฆ‚ๆฐด็ฅ—ๆฒณ็ฅž็ˆญๅ‡บ้ฉ•ไบบ, +่€Œๅทฆๅณ่›Ÿ่žญ่ฉฆๅ…ถๆ‹ๆ”ซไนŸ. +ๆˆ–ๆ›ฐ: ใ€Œๆญคๅคๆˆฐๅ ด, +ๆ•…ๆฒณ้ณด็„ถไนŸใ€. +ๆญค้ž็ˆฒๅ…ถ็„ถไนŸ, +ๆฒณ่ฒๅœจ่ฝไน‹ๅฆ‚ไฝ•็ˆพ.