mirror of
https://github.com/curioustorvald/Terrarum-sans-bitmap.git
synced 2026-06-15 10:04:05 +09:00
devanagari ligation wip
This commit is contained in:
BIN
assets/devanagari_variable.tga
LFS
BIN
assets/devanagari_variable.tga
LFS
Binary file not shown.
@@ -1035,17 +1035,30 @@ class TerrarumSansBitmap(
|
|||||||
|
|
||||||
// basically an Unicode NFD with some additional flavours
|
// basically an Unicode NFD with some additional flavours
|
||||||
private fun CodepointSequence.normalise(): CodepointSequence {
|
private fun CodepointSequence.normalise(): CodepointSequence {
|
||||||
val dis = this.utf16to32()
|
|
||||||
val seq = CodepointSequence()
|
val seq = CodepointSequence()
|
||||||
val seq2 = CodepointSequence()
|
val seq2 = CodepointSequence()
|
||||||
|
|
||||||
val yankedCharacters = Stack<Pair<Int, CodePoint>>() // Stack of <Position, CodePoint>; codepoint use -1 if not applicable
|
val yankedCharacters = Stack<Pair<Int, CodePoint>>() // Stack of <Position, CodePoint>; codepoint use -1 if not applicable
|
||||||
|
|
||||||
|
fun emptyOutYanked() {
|
||||||
|
while (!yankedCharacters.empty()) {
|
||||||
|
val poppedChar = yankedCharacters.pop()
|
||||||
|
if (poppedChar.second == DEVANAGARI_RA)
|
||||||
|
seq.add(DEVANAGARI_RA_SUPER)
|
||||||
|
else
|
||||||
|
seq.add(yankedCharacters.pop().second)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
var i = 0
|
var i = 0
|
||||||
|
this.utf16to32().let { dis ->
|
||||||
while (i < dis.size) {
|
while (i < dis.size) {
|
||||||
val c = dis[i]
|
val cPrev2 = dis.getOrElse(i-2) { -1 }
|
||||||
val cPrev = dis.getOrElse(i-1) { -1 }
|
val cPrev = dis.getOrElse(i-1) { -1 }
|
||||||
|
val c = dis[i]
|
||||||
val cNext = dis.getOrElse(i+1) { -1 }
|
val cNext = dis.getOrElse(i+1) { -1 }
|
||||||
|
val cNext2 = dis.getOrElse(i+2) { -1 }
|
||||||
|
// can't use regular sliding window as the 'i' value is changed way too often
|
||||||
|
|
||||||
// LET THE NORMALISATION BEGIN //
|
// LET THE NORMALISATION BEGIN //
|
||||||
|
|
||||||
@@ -1137,11 +1150,26 @@ class TerrarumSansBitmap(
|
|||||||
// END of tamil subsystem implementation
|
// END of tamil subsystem implementation
|
||||||
|
|
||||||
// BEGIN of devanagari string replacer
|
// BEGIN of devanagari string replacer
|
||||||
else if (c == DEVANAGARI_VIRAMA) {
|
// Unicode Devanagari Rendering Rule R6-R8
|
||||||
yankedCharacters.push(i-1 to cPrev)
|
// (this must precede the ligaturing-machine coded on the 2nd pass, otherwise the rules below will cause undesirable effects)
|
||||||
|
else if (devanagariConsonants.contains(c) && cNext == DEVANAGARI_VIRAMA && cNext2 == DEVANAGARI_RA) {
|
||||||
|
seq.addAll(toRaAppended(c))
|
||||||
|
i += 2
|
||||||
}
|
}
|
||||||
else if (c == DEVANAGARI_RA) {
|
// Unicode Devanagari Rendering Rule R5
|
||||||
|
else if (c == DEVANAGARI_RRA && cNext == DEVANAGARI_VIRAMA || c == DEVANAGARI_RA && cNext == DEVANAGARI_VIRAMA && cNext2 == ZWJ) {
|
||||||
|
seq.add(DEVANAGARI_EYELASH_RA)
|
||||||
|
i += 1
|
||||||
|
}
|
||||||
|
// Unicode Devanagari Rendering Rule R2-R4
|
||||||
|
else if (c == DEVANAGARI_RA && cNext == DEVANAGARI_VIRAMA && cNext2 != DEVANAGARI_RA) {
|
||||||
yankedCharacters.push(i to c)
|
yankedCharacters.push(i to c)
|
||||||
|
i += 1
|
||||||
|
}
|
||||||
|
// Unicode Devanagari Rendering Rule R2-R4
|
||||||
|
else if (!isDevanagari(c) && !yankedCharacters.empty()) {
|
||||||
|
emptyOutYanked()
|
||||||
|
seq.add(c)
|
||||||
}
|
}
|
||||||
// WIP
|
// WIP
|
||||||
// END of devanagari string replacer
|
// END of devanagari string replacer
|
||||||
@@ -1164,7 +1192,11 @@ class TerrarumSansBitmap(
|
|||||||
|
|
||||||
i++
|
i++
|
||||||
}
|
}
|
||||||
|
emptyOutYanked()
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// second scan
|
||||||
// swap position of {letter, diacritics that comes before the letter}
|
// swap position of {letter, diacritics that comes before the letter}
|
||||||
i = 1
|
i = 1
|
||||||
while (i <= seq.lastIndex) {
|
while (i <= seq.lastIndex) {
|
||||||
@@ -1175,6 +1207,27 @@ class TerrarumSansBitmap(
|
|||||||
seq[i] = t
|
seq[i] = t
|
||||||
}
|
}
|
||||||
|
|
||||||
|
val cPrev2 = seq.getOrElse(i-2) { -1 }
|
||||||
|
val cPrev = seq.getOrElse(i-1) { -1 }
|
||||||
|
val c = seq[i]
|
||||||
|
|
||||||
|
// BEGIN of Devanagari String Replacer 2 (lookbehind type)
|
||||||
|
// Devanagari Ligations (Lookbehind)
|
||||||
|
if (devanagariConsonants.contains(cPrev2) && cPrev == DEVANAGARI_VIRAMA && devanagariConsonants.contains(c)) {
|
||||||
|
i -= 2
|
||||||
|
|
||||||
|
repeat(3) { seq.removeAt(i) }
|
||||||
|
|
||||||
|
val ligature = ligateIndicConsonants(cPrev2, c)
|
||||||
|
ligature.forEachIndexed { index, char ->
|
||||||
|
seq.add(i + index, char)
|
||||||
|
}
|
||||||
|
|
||||||
|
i += ligature.size
|
||||||
|
}
|
||||||
|
// END of Devanagari String Replacer 2
|
||||||
|
|
||||||
|
|
||||||
i++
|
i++
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1630,13 +1683,130 @@ class TerrarumSansBitmap(
|
|||||||
'j'.toInt() to 0x237
|
'j'.toInt() to 0x237
|
||||||
)
|
)
|
||||||
|
|
||||||
private val tamilLigatingConsonants = listOf('க','ங','ச','ஞ','ட','ண','த','ந','ன','ப','ம','ய','ர','ற','ல','ள','ழ','வ').map { it.toInt() }.toIntArray()
|
private val ZWJ = 0x200D
|
||||||
|
|
||||||
|
private val tamilLigatingConsonants = listOf('க','ங','ச','ஞ','ட','ண','த','ந','ன','ப','ம','ய','ர','ற','ல','ள','ழ','வ').map { it.toInt() }.toIntArray()
|
||||||
private val TAMIL_KSSA = 0xF00ED
|
private val TAMIL_KSSA = 0xF00ED
|
||||||
private val TAMIL_SHRII = 0xF00EE
|
private val TAMIL_SHRII = 0xF00EE
|
||||||
|
|
||||||
|
private val devanagariConsonants = ((0x0915..0x0939) + (0x0958..0x095F) + (0x0978..0x097F) + (0xF0140..0xF01FF)).toIntArray()
|
||||||
|
|
||||||
|
private val devanagariBaseConsonants = 0x0915..0x0939
|
||||||
|
private val devanagariBaseConsonantsWithNukta = 0x0958..0x095F
|
||||||
|
private val devanagariBaseConsonantsExtended = 0x0978..0x097F
|
||||||
|
private val devanagariPresentationFormsConsonants = 0xF0140..0xF01FF
|
||||||
|
|
||||||
private val DEVANAGARI_VIRAMA = 0x94D
|
private val DEVANAGARI_VIRAMA = 0x94D
|
||||||
private val DEVANAGARI_RA = 0x930
|
private val DEVANAGARI_RA = 0x930
|
||||||
|
private val DEVANAGARI_RRA = 0x931
|
||||||
|
private val DEVANAGARI_RA_SUPER = 0xF0104
|
||||||
|
private val DEVANAGARI_EYELASH_RA = 0xF012A
|
||||||
|
|
||||||
|
private val DEVANAGARI_LIG_K_SS = 0xF0181
|
||||||
|
private val DEVANAGARI_LIG_J_NY = 0xF0184
|
||||||
|
private val DEVANAGARI_LIG_T_T = 0xF018B
|
||||||
|
private val DEVANAGARI_LIG_T_R = 0xF0154
|
||||||
|
private val DEVANAGARI_LIG_SH_R = 0xF0166
|
||||||
|
private val DEVANAGARI_HALFLIG_K_SS = 0xF012B
|
||||||
|
private val DEVANAGARI_HALFLIG_J_NY = 0xF012C
|
||||||
|
private val DEVANAGARI_HALFLIG_T_T = 0xF012D
|
||||||
|
private val DEVANAGARI_HALFLIG_T_R = 0xF012E
|
||||||
|
private val DEVANAGARI_HALFLIG_SH_R = 0xF012F
|
||||||
|
|
||||||
|
private val DEVANAGARI_SYLL_RU = 0xF0100
|
||||||
|
private val DEVANAGARI_SYLL_RUU = 0xF0101
|
||||||
|
|
||||||
|
private val DEVANAGARI_HALF_FORMS = 0xF0100 // starting point for Devanagari half forms
|
||||||
|
private val DEVANAGARI_LIG_X_R = 0xF0140 // starting point for Devanagari ligature CONSONANT+RA
|
||||||
|
|
||||||
|
private fun CodePoint.toHalfFormOrNull(): CodePoint? {
|
||||||
|
if (this in devanagariBaseConsonants) return (this - 0x0910 + DEVANAGARI_HALF_FORMS)
|
||||||
|
if (this in devanagariBaseConsonantsWithNukta) return (this - 0x0920 + DEVANAGARI_HALF_FORMS)
|
||||||
|
else if (this == DEVANAGARI_LIG_K_SS) return DEVANAGARI_HALFLIG_K_SS
|
||||||
|
else if (this == DEVANAGARI_LIG_J_NY) return DEVANAGARI_HALFLIG_J_NY
|
||||||
|
else if (this == DEVANAGARI_LIG_T_T) return DEVANAGARI_HALFLIG_T_T
|
||||||
|
else if (this == DEVANAGARI_LIG_T_R) return DEVANAGARI_HALFLIG_T_R
|
||||||
|
else if (this == DEVANAGARI_LIG_SH_R) return DEVANAGARI_HALFLIG_SH_R
|
||||||
|
// TODO half forms of X_R-ligatures
|
||||||
|
else return null
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO use proper version of Virama for respective scripts
|
||||||
|
private fun CodePoint.toHalfFormOrVirama(): List<CodePoint> = this.toHalfFormOrNull().let {
|
||||||
|
if (it == null) listOf(this, DEVANAGARI_VIRAMA) else listOf(it)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO use proper version of Virama for respective scripts
|
||||||
|
private fun toRaAppended(c: CodePoint): List<CodePoint> {
|
||||||
|
if (c in devanagariBaseConsonants) return listOf(c - 0x0910 + DEVANAGARI_LIG_X_R)
|
||||||
|
else return listOf(c, DEVANAGARI_VIRAMA, DEVANAGARI_RA)
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun ligateIndicConsonants(c1: CodePoint, c2: CodePoint): List<CodePoint> {
|
||||||
|
if (c2 == DEVANAGARI_RA) return toRaAppended(c1) // Devanagari @.RA
|
||||||
|
when (c1) {
|
||||||
|
0x0915 -> /* Devanagari KA */ when (c2) {
|
||||||
|
0x0924 -> return listOf(0xF0180) // K.TA
|
||||||
|
0x0937 -> return listOf(DEVANAGARI_LIG_K_SS) // K.SSA
|
||||||
|
else -> return c1.toHalfFormOrVirama() + c2
|
||||||
|
}
|
||||||
|
0x0919 -> /* Devanagari NGA */ when (c2) {
|
||||||
|
0x0917 -> return listOf(0xF0182) // NG.G
|
||||||
|
0x092E -> return listOf(0xF0183) // NG.M
|
||||||
|
else -> return c1.toHalfFormOrVirama() + c2
|
||||||
|
}
|
||||||
|
0x091C -> /* Devanagari JA */ when (c2) {
|
||||||
|
0x091E -> return listOf(DEVANAGARI_LIG_J_NY) // J.NY
|
||||||
|
else -> return c1.toHalfFormOrVirama() + c2
|
||||||
|
}
|
||||||
|
0x091F -> /* Devanagari TTA */ when (c2) {
|
||||||
|
0x091F -> return listOf(0xF0185) // TT.TT
|
||||||
|
0x0920 -> return listOf(0xF0186) // TT.TTH
|
||||||
|
else -> return c1.toHalfFormOrVirama() + c2
|
||||||
|
}
|
||||||
|
0x0920 -> /* Devanagari TTHA */ when (c2) {
|
||||||
|
0x0920 -> return listOf(0xF0187) // TTH.TTH
|
||||||
|
else -> return c1.toHalfFormOrVirama() + c2
|
||||||
|
}
|
||||||
|
0x0921 -> /* Devanagari DDA */ when (c2) {
|
||||||
|
0x0921 -> return listOf(0xF0188) // DD.DD
|
||||||
|
0x0922 -> return listOf(0xF0189) // DD.DDH
|
||||||
|
else -> return c1.toHalfFormOrVirama() + c2
|
||||||
|
}
|
||||||
|
0x0922 -> /* Devanagari DDHA */ when (c2) {
|
||||||
|
0x0922 -> return listOf(0xF018A) // DDH.DDH
|
||||||
|
else -> return c1.toHalfFormOrVirama() + c2
|
||||||
|
}
|
||||||
|
0x0924 -> /* Devanagari TA */ when (c2) {
|
||||||
|
0x0924 -> return listOf(DEVANAGARI_LIG_T_T) // T.T
|
||||||
|
else -> return c1.toHalfFormOrVirama() + c2
|
||||||
|
}
|
||||||
|
0x0926 -> /* Devanagari DA */ when (c2) {
|
||||||
|
0x0926 -> return listOf(0xF018C) // D.D
|
||||||
|
0x0927 -> return listOf(0xF018D) // D.DH
|
||||||
|
0x092C -> return listOf(0xF018E) // D.B
|
||||||
|
0x092D -> return listOf(0xF018F) // D.BH
|
||||||
|
0x092E -> return listOf(0xF0190) // D.M
|
||||||
|
0x092F -> return listOf(0xF0191) // D.Y
|
||||||
|
0x0935 -> return listOf(0xF0192) // D.V
|
||||||
|
else -> return c1.toHalfFormOrVirama() + c2
|
||||||
|
}
|
||||||
|
0x0928 -> /* Devanagari NA */ when (c2) {
|
||||||
|
0x0928 -> return listOf(0xF0193) // N.N
|
||||||
|
else -> return c1.toHalfFormOrVirama() + c2
|
||||||
|
}
|
||||||
|
0x0939 -> /* Devanagari HA */ when (c2) {
|
||||||
|
0x0923 -> return listOf(0xF0194) // H.NN
|
||||||
|
0x0928 -> return listOf(0xF0195) // H.N
|
||||||
|
0x092E -> return listOf(0xF0196) // H.M
|
||||||
|
0x092F -> return listOf(0xF0197) // H.Y
|
||||||
|
0x0932 -> return listOf(0xF0198) // H.L
|
||||||
|
0x0935 -> return listOf(0xF0199) // H.v
|
||||||
|
else -> return c1.toHalfFormOrVirama() + c2
|
||||||
|
}
|
||||||
|
else -> return c1.toHalfFormOrVirama() + c2 // TODO use proper version of Virama for respective scripts
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
private fun Int.toHex() = "U+${this.toString(16).padStart(4, '0').toUpperCase()}"
|
private fun Int.toHex() = "U+${this.toString(16).padStart(4, '0').toUpperCase()}"
|
||||||
|
|||||||
BIN
testing.PNG
BIN
testing.PNG
Binary file not shown.
|
Before Width: | Height: | Size: 14 KiB After Width: | Height: | Size: 16 KiB |
18
testtext.txt
18
testtext.txt
@@ -1,17 +1,3 @@
|
|||||||
|
|
||||||
A ดุ ตี ปู่ พี่ ป่ ม่ ปั มั พีุ ทิ่ท่ท่ิ ปิ่ป่ป่ิ ทิ้ ปิ้ มำด มําด
|
फ़ॊण्ज़ीणू क्की क्कि र्क र्त्सा र्र्ल ऱ्ड क्राप् ठ्री त्र् त्र्मोत्र्यो ज्ञज्ञ्रत्त न्न
|
||||||
|
ह्णह्नह्मह्यह्लह्वह्र णनमयलव ज़्लतान
|
||||||
이는 일본의 요미가나(読み仮名)와 비슷한 용법이다.
|
|
||||||
|
|
||||||
Sugarさとう설탕砂糖
|
|
||||||
|
|
||||||
বাংলাদেশ
|
|
||||||
|
|
||||||
தமிழ் லூ லு
|
|
||||||
|
|
||||||
QWனோT
|
|
||||||
னோ
|
|
||||||
|
|
||||||
எழுத்து வடிவங்களுக்கு வார்த்தைகள் மற்றும் வாக்கியங்கள் போலவே தொனி, ஒலி பண்பு, தன்மை உண்டு.
|
|
||||||
|
|
||||||
க்ஷ ஶ்ரீ க்ஷௌ
|
|
||||||
|
|||||||
Binary file not shown.
Reference in New Issue
Block a user