From 229cc78eb6b437eaec91a519428342500c75763a Mon Sep 17 00:00:00 2001 From: minjaesong Date: Thu, 23 Dec 2021 20:12:20 +0900 Subject: [PATCH] hangul assembler; adding Unicode conversion function to the TVDOS --- assets/disk0/home/hangasm.js | 51 +++++++++++++++++++++++---------- assets/disk0/home/hangultest.js | 40 +++++++++++++------------- assets/disk0/tvdos/TVDOS.SYS | 38 ++++++++++++++++++++++++ 3 files changed, 94 insertions(+), 35 deletions(-) diff --git a/assets/disk0/home/hangasm.js b/assets/disk0/home/hangasm.js index c1b44ec..d96bee0 100644 --- a/assets/disk0/home/hangasm.js +++ b/assets/disk0/home/hangasm.js @@ -46,14 +46,14 @@ i:{ // Cell Indices: [c0,c2] },p:{ // Cell Indices: [c2,c4], where c2 will be work as an multiplier // c2:[null,ㅗ,ㅛ,ㅜ,ㅠ,ㅡ] // c4:[0xC6,ㅏ,ㅐ,ㅑ,ㅒ,ㅓ,ㅔ,ㅕ,ㅖ,ㅘ,ㅙ,ㅚㅢㅟ,ㅝ,ㅞ,ㅣ] -0:[0,0], -1:[0,1], -2:[0,2], -3:[0,3], -4:[0,4], -5:[0,5], -6:[0,6], -7:[0,7], +0:[0,1], +1:[0,2], +2:[0,3], +3:[0,4], +4:[0,5], +5:[0,6], +6:[0,7], +7:[0,8], 8:[1,0], 9:[1,9], 10:[1,10], @@ -103,13 +103,14 @@ let enc = { i:[ 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d, 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d, +0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad, 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed, 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd, 0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd ],p:[ 0xc6,0x8e,0x8f,0xae,0xaf,0xce,0xcf,0xee,0xef,0xb0,0xb1,0xb2,0xb5,0xb6,0xfe ],f:[ -0x20,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xdb,0xdc,0xdd,0xde,0xdf +0x20,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xdb,0xdc,0xdd,0xde,0xdf ] } @@ -122,12 +123,17 @@ function toLineChar(i,p,f) { /* 0 | 0 */out[0] = ibuf[0] /* x | 2 */out[2] = ibuf[1] /* 2 | 4 */out[2+dbl] = pbuf[1] - /* | */out[dbl] *= pbuf[0]*2 + /* | */out[dbl] += pbuf[0]*14 /* 1 | 3 */out[1+dbl] = fbuf[0] /* 3 | 5 */out[3+dbl] = fbuf[1] - if (ibuf.length > 4) { +// serial.println(`ipf: ${i} ${p} ${f}`) +// serial.println(out) + + + if (out.length > 4) { out[0] = enc.i[out[0]] + out[1] = 0x20 out[2] = enc.i[out[2]] out[3] = enc.f[out[3]] out[4] = enc.p[out[4]] @@ -147,14 +153,29 @@ function toLineChar(i,p,f) { function printHangul(char) { let [cy,cx] = con.getyx() - serial.println(char) +// serial.println("chars:") +// serial.println(char) char.forEach((v,i)=>{ con.mvaddch(cy+(i%2),cx+(i/2),v) - serial.println(v.toString(16)) +// serial.println(v.toString(16)) }) con.move(cy+(char.length%2),cx+(char.length/2)) } -con.move(4,4) -printHangul(toLineChar(2,0,1)) \ No newline at end of file + +let text = "동해물과 백두산이 마르고 닳도록 7비트 한글조합" + +//con.clear() +//con.move(1,1) +unicode.utf8toCodepoints(text).forEach(cp=>{ + if (0xAC00 <= cp && cp <= 0xD7A3) { + let i = ((cp - 0xAC00) / 588)|0 + let p = ((cp - 0xAC00) / 28 % 21)|0 + let f = (cp - 0xAC00) % 28 + printHangul(toLineChar(i,p,f)) + } + else { + print(String.fromCharCode(cp)) + } +}) \ No newline at end of file diff --git a/assets/disk0/home/hangultest.js b/assets/disk0/home/hangultest.js index a3fe67f..356a5d3 100644 --- a/assets/disk0/home/hangultest.js +++ b/assets/disk0/home/hangultest.js @@ -147,31 +147,31 @@ chrmap[12], chrmap[13], ] -let UTF8_ACCEPT = 0 -let UTF8D = [ - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, - 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, - 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, - 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, - 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12, - 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12, - 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12, - 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12, - 12,36,12,12,12,12,12,12,12,12,12,12, -] - /** * @param utf8text A JS string in UTF-8 * @return array of Unicode codepoints */ function utf8decode(utf8text) { - var state = UTF8_ACCEPT - var codep = 0 - var codepoints = [] + let UTF8_ACCEPT = 0 + let UTF8D = [ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, + 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12, + 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12, + 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12, + 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12, + 12,36,12,12,12,12,12,12,12,12,12,12, + ] + + let state = UTF8_ACCEPT + let codep = 0 + let codepoints = [] for (let i=0; i < utf8text.length; i++) { let byte = utf8text.charCodeAt(i) diff --git a/assets/disk0/tvdos/TVDOS.SYS b/assets/disk0/tvdos/TVDOS.SYS index ab7291d..a3bdb9a 100644 --- a/assets/disk0/tvdos/TVDOS.SYS +++ b/assets/disk0/tvdos/TVDOS.SYS @@ -281,6 +281,44 @@ Object.freeze(input); /////////////////////////////////////////////////////////////////////////////// +const unicode = {}; +unicode.utf8toCodepoints = function(utf8text) { + let UTF8_ACCEPT = 0 + let UTF8D = [ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, + 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12, + 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12, + 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12, + 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12, + 12,36,12,12,12,12,12,12,12,12,12,12, + ] + + let state = UTF8_ACCEPT + let codep = 0 + let codepoints = [] + + for (let i=0; i < utf8text.length; i++) { + let byte = utf8text.charCodeAt(i) + let type = UTF8D[byte] + codep = (state != UTF8_ACCEPT) ? + (byte & 0x3f) | (codep << 6) : (0xff >> type) & (byte) + state = UTF8D[256 + state + type] + if (state == UTF8_ACCEPT) + codepoints.push(codep) + } + return codepoints +} +Object.freeze(unicode); + +/////////////////////////////////////////////////////////////////////////////// + // install other stuffs filesystem.open("A", "tvdos/gl.js", "R"); const GL = eval(filesystem.readAll("A"));