mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-03-07 19:51:51 +09:00
hangul assembler; adding Unicode conversion function to the TVDOS
This commit is contained in:
@@ -46,14 +46,14 @@ i:{ // Cell Indices: [c0,c2]
|
||||
},p:{ // Cell Indices: [c2,c4], where c2 will be work as an multiplier
|
||||
// c2:[null,ㅗ,ㅛ,ㅜ,ㅠ,ㅡ]
|
||||
// c4:[0xC6,ㅏ,ㅐ,ㅑ,ㅒ,ㅓ,ㅔ,ㅕ,ㅖ,ㅘ,ㅙ,ㅚㅢㅟ,ㅝ,ㅞ,ㅣ]
|
||||
0:[0,0],
|
||||
1:[0,1],
|
||||
2:[0,2],
|
||||
3:[0,3],
|
||||
4:[0,4],
|
||||
5:[0,5],
|
||||
6:[0,6],
|
||||
7:[0,7],
|
||||
0:[0,1],
|
||||
1:[0,2],
|
||||
2:[0,3],
|
||||
3:[0,4],
|
||||
4:[0,5],
|
||||
5:[0,6],
|
||||
6:[0,7],
|
||||
7:[0,8],
|
||||
8:[1,0],
|
||||
9:[1,9],
|
||||
10:[1,10],
|
||||
@@ -103,13 +103,14 @@ let enc = {
|
||||
i:[
|
||||
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,
|
||||
0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,
|
||||
0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,
|
||||
0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,
|
||||
0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,
|
||||
0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd
|
||||
],p:[
|
||||
0xc6,0x8e,0x8f,0xae,0xaf,0xce,0xcf,0xee,0xef,0xb0,0xb1,0xb2,0xb5,0xb6,0xfe
|
||||
],f:[
|
||||
0x20,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xdb,0xdc,0xdd,0xde,0xdf
|
||||
0x20,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xdb,0xdc,0xdd,0xde,0xdf
|
||||
]
|
||||
}
|
||||
|
||||
@@ -122,12 +123,17 @@ function toLineChar(i,p,f) {
|
||||
/* 0 | 0 */out[0] = ibuf[0]
|
||||
/* x | 2 */out[2] = ibuf[1]
|
||||
/* 2 | 4 */out[2+dbl] = pbuf[1]
|
||||
/* | */out[dbl] *= pbuf[0]*2
|
||||
/* | */out[dbl] += pbuf[0]*14
|
||||
/* 1 | 3 */out[1+dbl] = fbuf[0]
|
||||
/* 3 | 5 */out[3+dbl] = fbuf[1]
|
||||
|
||||
if (ibuf.length > 4) {
|
||||
// serial.println(`ipf: ${i} ${p} ${f}`)
|
||||
// serial.println(out)
|
||||
|
||||
|
||||
if (out.length > 4) {
|
||||
out[0] = enc.i[out[0]]
|
||||
out[1] = 0x20
|
||||
out[2] = enc.i[out[2]]
|
||||
out[3] = enc.f[out[3]]
|
||||
out[4] = enc.p[out[4]]
|
||||
@@ -147,14 +153,29 @@ function toLineChar(i,p,f) {
|
||||
function printHangul(char) {
|
||||
let [cy,cx] = con.getyx()
|
||||
|
||||
serial.println(char)
|
||||
// serial.println("chars:")
|
||||
// serial.println(char)
|
||||
|
||||
char.forEach((v,i)=>{
|
||||
con.mvaddch(cy+(i%2),cx+(i/2),v)
|
||||
serial.println(v.toString(16))
|
||||
// serial.println(v.toString(16))
|
||||
})
|
||||
con.move(cy+(char.length%2),cx+(char.length/2))
|
||||
}
|
||||
|
||||
con.move(4,4)
|
||||
printHangul(toLineChar(2,0,1))
|
||||
|
||||
let text = "동해물과 백두산이 마르고 닳도록 7비트 한글조합"
|
||||
|
||||
//con.clear()
|
||||
//con.move(1,1)
|
||||
unicode.utf8toCodepoints(text).forEach(cp=>{
|
||||
if (0xAC00 <= cp && cp <= 0xD7A3) {
|
||||
let i = ((cp - 0xAC00) / 588)|0
|
||||
let p = ((cp - 0xAC00) / 28 % 21)|0
|
||||
let f = (cp - 0xAC00) % 28
|
||||
printHangul(toLineChar(i,p,f))
|
||||
}
|
||||
else {
|
||||
print(String.fromCharCode(cp))
|
||||
}
|
||||
})
|
||||
@@ -147,31 +147,31 @@ chrmap[12],
|
||||
chrmap[13],
|
||||
]
|
||||
|
||||
let UTF8_ACCEPT = 0
|
||||
let UTF8D = [
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
|
||||
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
|
||||
8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
||||
10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
|
||||
0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
|
||||
12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
|
||||
12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
|
||||
12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
|
||||
12,36,12,12,12,12,12,12,12,12,12,12,
|
||||
]
|
||||
|
||||
/**
|
||||
* @param utf8text A JS string in UTF-8
|
||||
* @return array of Unicode codepoints
|
||||
*/
|
||||
function utf8decode(utf8text) {
|
||||
var state = UTF8_ACCEPT
|
||||
var codep = 0
|
||||
var codepoints = []
|
||||
let UTF8_ACCEPT = 0
|
||||
let UTF8D = [
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
|
||||
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
|
||||
8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
||||
10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
|
||||
0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
|
||||
12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
|
||||
12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
|
||||
12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
|
||||
12,36,12,12,12,12,12,12,12,12,12,12,
|
||||
]
|
||||
|
||||
let state = UTF8_ACCEPT
|
||||
let codep = 0
|
||||
let codepoints = []
|
||||
|
||||
for (let i=0; i < utf8text.length; i++) {
|
||||
let byte = utf8text.charCodeAt(i)
|
||||
|
||||
@@ -281,6 +281,44 @@ Object.freeze(input);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
const unicode = {};
|
||||
unicode.utf8toCodepoints = function(utf8text) {
|
||||
let UTF8_ACCEPT = 0
|
||||
let UTF8D = [
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
|
||||
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
|
||||
8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
||||
10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
|
||||
0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
|
||||
12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
|
||||
12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
|
||||
12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
|
||||
12,36,12,12,12,12,12,12,12,12,12,12,
|
||||
]
|
||||
|
||||
let state = UTF8_ACCEPT
|
||||
let codep = 0
|
||||
let codepoints = []
|
||||
|
||||
for (let i=0; i < utf8text.length; i++) {
|
||||
let byte = utf8text.charCodeAt(i)
|
||||
let type = UTF8D[byte]
|
||||
codep = (state != UTF8_ACCEPT) ?
|
||||
(byte & 0x3f) | (codep << 6) : (0xff >> type) & (byte)
|
||||
state = UTF8D[256 + state + type]
|
||||
if (state == UTF8_ACCEPT)
|
||||
codepoints.push(codep)
|
||||
}
|
||||
return codepoints
|
||||
}
|
||||
Object.freeze(unicode);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// install other stuffs
|
||||
filesystem.open("A", "tvdos/gl.js", "R");
|
||||
const GL = eval(filesystem.readAll("A"));
|
||||
|
||||
Reference in New Issue
Block a user