mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-06-11 23:34:04 +09:00
hangul assembler; adding Unicode conversion function to the TVDOS
This commit is contained in:
@@ -46,14 +46,14 @@ i:{ // Cell Indices: [c0,c2]
|
|||||||
},p:{ // Cell Indices: [c2,c4], where c2 will be work as an multiplier
|
},p:{ // Cell Indices: [c2,c4], where c2 will be work as an multiplier
|
||||||
// c2:[null,ㅗ,ㅛ,ㅜ,ㅠ,ㅡ]
|
// c2:[null,ㅗ,ㅛ,ㅜ,ㅠ,ㅡ]
|
||||||
// c4:[0xC6,ㅏ,ㅐ,ㅑ,ㅒ,ㅓ,ㅔ,ㅕ,ㅖ,ㅘ,ㅙ,ㅚㅢㅟ,ㅝ,ㅞ,ㅣ]
|
// c4:[0xC6,ㅏ,ㅐ,ㅑ,ㅒ,ㅓ,ㅔ,ㅕ,ㅖ,ㅘ,ㅙ,ㅚㅢㅟ,ㅝ,ㅞ,ㅣ]
|
||||||
0:[0,0],
|
0:[0,1],
|
||||||
1:[0,1],
|
1:[0,2],
|
||||||
2:[0,2],
|
2:[0,3],
|
||||||
3:[0,3],
|
3:[0,4],
|
||||||
4:[0,4],
|
4:[0,5],
|
||||||
5:[0,5],
|
5:[0,6],
|
||||||
6:[0,6],
|
6:[0,7],
|
||||||
7:[0,7],
|
7:[0,8],
|
||||||
8:[1,0],
|
8:[1,0],
|
||||||
9:[1,9],
|
9:[1,9],
|
||||||
10:[1,10],
|
10:[1,10],
|
||||||
@@ -103,13 +103,14 @@ let enc = {
|
|||||||
i:[
|
i:[
|
||||||
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,
|
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,
|
||||||
0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,
|
0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,
|
||||||
|
0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,
|
||||||
0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,
|
0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,
|
||||||
0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,
|
0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,
|
||||||
0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd
|
0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd
|
||||||
],p:[
|
],p:[
|
||||||
0xc6,0x8e,0x8f,0xae,0xaf,0xce,0xcf,0xee,0xef,0xb0,0xb1,0xb2,0xb5,0xb6,0xfe
|
0xc6,0x8e,0x8f,0xae,0xaf,0xce,0xcf,0xee,0xef,0xb0,0xb1,0xb2,0xb5,0xb6,0xfe
|
||||||
],f:[
|
],f:[
|
||||||
0x20,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xdb,0xdc,0xdd,0xde,0xdf
|
0x20,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xdb,0xdc,0xdd,0xde,0xdf
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -122,12 +123,17 @@ function toLineChar(i,p,f) {
|
|||||||
/* 0 | 0 */out[0] = ibuf[0]
|
/* 0 | 0 */out[0] = ibuf[0]
|
||||||
/* x | 2 */out[2] = ibuf[1]
|
/* x | 2 */out[2] = ibuf[1]
|
||||||
/* 2 | 4 */out[2+dbl] = pbuf[1]
|
/* 2 | 4 */out[2+dbl] = pbuf[1]
|
||||||
/* | */out[dbl] *= pbuf[0]*2
|
/* | */out[dbl] += pbuf[0]*14
|
||||||
/* 1 | 3 */out[1+dbl] = fbuf[0]
|
/* 1 | 3 */out[1+dbl] = fbuf[0]
|
||||||
/* 3 | 5 */out[3+dbl] = fbuf[1]
|
/* 3 | 5 */out[3+dbl] = fbuf[1]
|
||||||
|
|
||||||
if (ibuf.length > 4) {
|
// serial.println(`ipf: ${i} ${p} ${f}`)
|
||||||
|
// serial.println(out)
|
||||||
|
|
||||||
|
|
||||||
|
if (out.length > 4) {
|
||||||
out[0] = enc.i[out[0]]
|
out[0] = enc.i[out[0]]
|
||||||
|
out[1] = 0x20
|
||||||
out[2] = enc.i[out[2]]
|
out[2] = enc.i[out[2]]
|
||||||
out[3] = enc.f[out[3]]
|
out[3] = enc.f[out[3]]
|
||||||
out[4] = enc.p[out[4]]
|
out[4] = enc.p[out[4]]
|
||||||
@@ -147,14 +153,29 @@ function toLineChar(i,p,f) {
|
|||||||
function printHangul(char) {
|
function printHangul(char) {
|
||||||
let [cy,cx] = con.getyx()
|
let [cy,cx] = con.getyx()
|
||||||
|
|
||||||
serial.println(char)
|
// serial.println("chars:")
|
||||||
|
// serial.println(char)
|
||||||
|
|
||||||
char.forEach((v,i)=>{
|
char.forEach((v,i)=>{
|
||||||
con.mvaddch(cy+(i%2),cx+(i/2),v)
|
con.mvaddch(cy+(i%2),cx+(i/2),v)
|
||||||
serial.println(v.toString(16))
|
// serial.println(v.toString(16))
|
||||||
})
|
})
|
||||||
con.move(cy+(char.length%2),cx+(char.length/2))
|
con.move(cy+(char.length%2),cx+(char.length/2))
|
||||||
}
|
}
|
||||||
|
|
||||||
con.move(4,4)
|
|
||||||
printHangul(toLineChar(2,0,1))
|
let text = "동해물과 백두산이 마르고 닳도록 7비트 한글조합"
|
||||||
|
|
||||||
|
//con.clear()
|
||||||
|
//con.move(1,1)
|
||||||
|
unicode.utf8toCodepoints(text).forEach(cp=>{
|
||||||
|
if (0xAC00 <= cp && cp <= 0xD7A3) {
|
||||||
|
let i = ((cp - 0xAC00) / 588)|0
|
||||||
|
let p = ((cp - 0xAC00) / 28 % 21)|0
|
||||||
|
let f = (cp - 0xAC00) % 28
|
||||||
|
printHangul(toLineChar(i,p,f))
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
print(String.fromCharCode(cp))
|
||||||
|
}
|
||||||
|
})
|
||||||
@@ -147,31 +147,31 @@ chrmap[12],
|
|||||||
chrmap[13],
|
chrmap[13],
|
||||||
]
|
]
|
||||||
|
|
||||||
let UTF8_ACCEPT = 0
|
|
||||||
let UTF8D = [
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
|
|
||||||
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
|
|
||||||
8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
|
||||||
10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
|
|
||||||
0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
|
|
||||||
12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
|
|
||||||
12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
|
|
||||||
12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
|
|
||||||
12,36,12,12,12,12,12,12,12,12,12,12,
|
|
||||||
]
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param utf8text A JS string in UTF-8
|
* @param utf8text A JS string in UTF-8
|
||||||
* @return array of Unicode codepoints
|
* @return array of Unicode codepoints
|
||||||
*/
|
*/
|
||||||
function utf8decode(utf8text) {
|
function utf8decode(utf8text) {
|
||||||
var state = UTF8_ACCEPT
|
let UTF8_ACCEPT = 0
|
||||||
var codep = 0
|
let UTF8D = [
|
||||||
var codepoints = []
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
|
||||||
|
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
|
||||||
|
8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
||||||
|
10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
|
||||||
|
0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
|
||||||
|
12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
|
||||||
|
12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
|
||||||
|
12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
|
||||||
|
12,36,12,12,12,12,12,12,12,12,12,12,
|
||||||
|
]
|
||||||
|
|
||||||
|
let state = UTF8_ACCEPT
|
||||||
|
let codep = 0
|
||||||
|
let codepoints = []
|
||||||
|
|
||||||
for (let i=0; i < utf8text.length; i++) {
|
for (let i=0; i < utf8text.length; i++) {
|
||||||
let byte = utf8text.charCodeAt(i)
|
let byte = utf8text.charCodeAt(i)
|
||||||
|
|||||||
@@ -281,6 +281,44 @@ Object.freeze(input);
|
|||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
const unicode = {};
|
||||||
|
unicode.utf8toCodepoints = function(utf8text) {
|
||||||
|
let UTF8_ACCEPT = 0
|
||||||
|
let UTF8D = [
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
|
||||||
|
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
|
||||||
|
8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
||||||
|
10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
|
||||||
|
0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
|
||||||
|
12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
|
||||||
|
12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
|
||||||
|
12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
|
||||||
|
12,36,12,12,12,12,12,12,12,12,12,12,
|
||||||
|
]
|
||||||
|
|
||||||
|
let state = UTF8_ACCEPT
|
||||||
|
let codep = 0
|
||||||
|
let codepoints = []
|
||||||
|
|
||||||
|
for (let i=0; i < utf8text.length; i++) {
|
||||||
|
let byte = utf8text.charCodeAt(i)
|
||||||
|
let type = UTF8D[byte]
|
||||||
|
codep = (state != UTF8_ACCEPT) ?
|
||||||
|
(byte & 0x3f) | (codep << 6) : (0xff >> type) & (byte)
|
||||||
|
state = UTF8D[256 + state + type]
|
||||||
|
if (state == UTF8_ACCEPT)
|
||||||
|
codepoints.push(codep)
|
||||||
|
}
|
||||||
|
return codepoints
|
||||||
|
}
|
||||||
|
Object.freeze(unicode);
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
// install other stuffs
|
// install other stuffs
|
||||||
filesystem.open("A", "tvdos/gl.js", "R");
|
filesystem.open("A", "tvdos/gl.js", "R");
|
||||||
const GL = eval(filesystem.readAll("A"));
|
const GL = eval(filesystem.readAll("A"));
|
||||||
|
|||||||
Reference in New Issue
Block a user