mirror of
https://github.com/curioustorvald/Terrarum-sans-bitmap.git
synced 2026-06-06 05:58:30 +09:00
devanagari ligation wip
This commit is contained in:
BIN
assets/devanagari_variable.tga
LFS
BIN
assets/devanagari_variable.tga
LFS
Binary file not shown.
@@ -1035,17 +1035,30 @@ class TerrarumSansBitmap(
|
||||
|
||||
// basically an Unicode NFD with some additional flavours
|
||||
private fun CodepointSequence.normalise(): CodepointSequence {
|
||||
val dis = this.utf16to32()
|
||||
val seq = CodepointSequence()
|
||||
val seq2 = CodepointSequence()
|
||||
|
||||
val yankedCharacters = Stack<Pair<Int, CodePoint>>() // Stack of <Position, CodePoint>; codepoint use -1 if not applicable
|
||||
|
||||
fun emptyOutYanked() {
|
||||
while (!yankedCharacters.empty()) {
|
||||
val poppedChar = yankedCharacters.pop()
|
||||
if (poppedChar.second == DEVANAGARI_RA)
|
||||
seq.add(DEVANAGARI_RA_SUPER)
|
||||
else
|
||||
seq.add(yankedCharacters.pop().second)
|
||||
}
|
||||
}
|
||||
|
||||
var i = 0
|
||||
this.utf16to32().let { dis ->
|
||||
while (i < dis.size) {
|
||||
val c = dis[i]
|
||||
val cPrev2 = dis.getOrElse(i-2) { -1 }
|
||||
val cPrev = dis.getOrElse(i-1) { -1 }
|
||||
val c = dis[i]
|
||||
val cNext = dis.getOrElse(i+1) { -1 }
|
||||
val cNext2 = dis.getOrElse(i+2) { -1 }
|
||||
// can't use regular sliding window as the 'i' value is changed way too often
|
||||
|
||||
// LET THE NORMALISATION BEGIN //
|
||||
|
||||
@@ -1137,11 +1150,26 @@ class TerrarumSansBitmap(
|
||||
// END of tamil subsystem implementation
|
||||
|
||||
// BEGIN of devanagari string replacer
|
||||
else if (c == DEVANAGARI_VIRAMA) {
|
||||
yankedCharacters.push(i-1 to cPrev)
|
||||
// Unicode Devanagari Rendering Rule R6-R8
|
||||
// (this must precede the ligaturing-machine coded on the 2nd pass, otherwise the rules below will cause undesirable effects)
|
||||
else if (devanagariConsonants.contains(c) && cNext == DEVANAGARI_VIRAMA && cNext2 == DEVANAGARI_RA) {
|
||||
seq.addAll(toRaAppended(c))
|
||||
i += 2
|
||||
}
|
||||
else if (c == DEVANAGARI_RA) {
|
||||
// Unicode Devanagari Rendering Rule R5
|
||||
else if (c == DEVANAGARI_RRA && cNext == DEVANAGARI_VIRAMA || c == DEVANAGARI_RA && cNext == DEVANAGARI_VIRAMA && cNext2 == ZWJ) {
|
||||
seq.add(DEVANAGARI_EYELASH_RA)
|
||||
i += 1
|
||||
}
|
||||
// Unicode Devanagari Rendering Rule R2-R4
|
||||
else if (c == DEVANAGARI_RA && cNext == DEVANAGARI_VIRAMA && cNext2 != DEVANAGARI_RA) {
|
||||
yankedCharacters.push(i to c)
|
||||
i += 1
|
||||
}
|
||||
// Unicode Devanagari Rendering Rule R2-R4
|
||||
else if (!isDevanagari(c) && !yankedCharacters.empty()) {
|
||||
emptyOutYanked()
|
||||
seq.add(c)
|
||||
}
|
||||
// WIP
|
||||
// END of devanagari string replacer
|
||||
@@ -1164,7 +1192,11 @@ class TerrarumSansBitmap(
|
||||
|
||||
i++
|
||||
}
|
||||
emptyOutYanked()
|
||||
}
|
||||
|
||||
|
||||
// second scan
|
||||
// swap position of {letter, diacritics that comes before the letter}
|
||||
i = 1
|
||||
while (i <= seq.lastIndex) {
|
||||
@@ -1175,6 +1207,27 @@ class TerrarumSansBitmap(
|
||||
seq[i] = t
|
||||
}
|
||||
|
||||
val cPrev2 = seq.getOrElse(i-2) { -1 }
|
||||
val cPrev = seq.getOrElse(i-1) { -1 }
|
||||
val c = seq[i]
|
||||
|
||||
// BEGIN of Devanagari String Replacer 2 (lookbehind type)
|
||||
// Devanagari Ligations (Lookbehind)
|
||||
if (devanagariConsonants.contains(cPrev2) && cPrev == DEVANAGARI_VIRAMA && devanagariConsonants.contains(c)) {
|
||||
i -= 2
|
||||
|
||||
repeat(3) { seq.removeAt(i) }
|
||||
|
||||
val ligature = ligateIndicConsonants(cPrev2, c)
|
||||
ligature.forEachIndexed { index, char ->
|
||||
seq.add(i + index, char)
|
||||
}
|
||||
|
||||
i += ligature.size
|
||||
}
|
||||
// END of Devanagari String Replacer 2
|
||||
|
||||
|
||||
i++
|
||||
}
|
||||
|
||||
@@ -1630,13 +1683,130 @@ class TerrarumSansBitmap(
|
||||
'j'.toInt() to 0x237
|
||||
)
|
||||
|
||||
private val tamilLigatingConsonants = listOf('க','ங','ச','ஞ','ட','ண','த','ந','ன','ப','ம','ய','ர','ற','ல','ள','ழ','வ').map { it.toInt() }.toIntArray()
|
||||
private val ZWJ = 0x200D
|
||||
|
||||
private val tamilLigatingConsonants = listOf('க','ங','ச','ஞ','ட','ண','த','ந','ன','ப','ம','ய','ர','ற','ல','ள','ழ','வ').map { it.toInt() }.toIntArray()
|
||||
private val TAMIL_KSSA = 0xF00ED
|
||||
private val TAMIL_SHRII = 0xF00EE
|
||||
|
||||
private val devanagariConsonants = ((0x0915..0x0939) + (0x0958..0x095F) + (0x0978..0x097F) + (0xF0140..0xF01FF)).toIntArray()
|
||||
|
||||
private val devanagariBaseConsonants = 0x0915..0x0939
|
||||
private val devanagariBaseConsonantsWithNukta = 0x0958..0x095F
|
||||
private val devanagariBaseConsonantsExtended = 0x0978..0x097F
|
||||
private val devanagariPresentationFormsConsonants = 0xF0140..0xF01FF
|
||||
|
||||
private val DEVANAGARI_VIRAMA = 0x94D
|
||||
private val DEVANAGARI_RA = 0x930
|
||||
private val DEVANAGARI_RRA = 0x931
|
||||
private val DEVANAGARI_RA_SUPER = 0xF0104
|
||||
private val DEVANAGARI_EYELASH_RA = 0xF012A
|
||||
|
||||
private val DEVANAGARI_LIG_K_SS = 0xF0181
|
||||
private val DEVANAGARI_LIG_J_NY = 0xF0184
|
||||
private val DEVANAGARI_LIG_T_T = 0xF018B
|
||||
private val DEVANAGARI_LIG_T_R = 0xF0154
|
||||
private val DEVANAGARI_LIG_SH_R = 0xF0166
|
||||
private val DEVANAGARI_HALFLIG_K_SS = 0xF012B
|
||||
private val DEVANAGARI_HALFLIG_J_NY = 0xF012C
|
||||
private val DEVANAGARI_HALFLIG_T_T = 0xF012D
|
||||
private val DEVANAGARI_HALFLIG_T_R = 0xF012E
|
||||
private val DEVANAGARI_HALFLIG_SH_R = 0xF012F
|
||||
|
||||
private val DEVANAGARI_SYLL_RU = 0xF0100
|
||||
private val DEVANAGARI_SYLL_RUU = 0xF0101
|
||||
|
||||
private val DEVANAGARI_HALF_FORMS = 0xF0100 // starting point for Devanagari half forms
|
||||
private val DEVANAGARI_LIG_X_R = 0xF0140 // starting point for Devanagari ligature CONSONANT+RA
|
||||
|
||||
private fun CodePoint.toHalfFormOrNull(): CodePoint? {
|
||||
if (this in devanagariBaseConsonants) return (this - 0x0910 + DEVANAGARI_HALF_FORMS)
|
||||
if (this in devanagariBaseConsonantsWithNukta) return (this - 0x0920 + DEVANAGARI_HALF_FORMS)
|
||||
else if (this == DEVANAGARI_LIG_K_SS) return DEVANAGARI_HALFLIG_K_SS
|
||||
else if (this == DEVANAGARI_LIG_J_NY) return DEVANAGARI_HALFLIG_J_NY
|
||||
else if (this == DEVANAGARI_LIG_T_T) return DEVANAGARI_HALFLIG_T_T
|
||||
else if (this == DEVANAGARI_LIG_T_R) return DEVANAGARI_HALFLIG_T_R
|
||||
else if (this == DEVANAGARI_LIG_SH_R) return DEVANAGARI_HALFLIG_SH_R
|
||||
// TODO half forms of X_R-ligatures
|
||||
else return null
|
||||
}
|
||||
|
||||
// TODO use proper version of Virama for respective scripts
|
||||
private fun CodePoint.toHalfFormOrVirama(): List<CodePoint> = this.toHalfFormOrNull().let {
|
||||
if (it == null) listOf(this, DEVANAGARI_VIRAMA) else listOf(it)
|
||||
}
|
||||
|
||||
// TODO use proper version of Virama for respective scripts
|
||||
private fun toRaAppended(c: CodePoint): List<CodePoint> {
|
||||
if (c in devanagariBaseConsonants) return listOf(c - 0x0910 + DEVANAGARI_LIG_X_R)
|
||||
else return listOf(c, DEVANAGARI_VIRAMA, DEVANAGARI_RA)
|
||||
}
|
||||
|
||||
private fun ligateIndicConsonants(c1: CodePoint, c2: CodePoint): List<CodePoint> {
|
||||
if (c2 == DEVANAGARI_RA) return toRaAppended(c1) // Devanagari @.RA
|
||||
when (c1) {
|
||||
0x0915 -> /* Devanagari KA */ when (c2) {
|
||||
0x0924 -> return listOf(0xF0180) // K.TA
|
||||
0x0937 -> return listOf(DEVANAGARI_LIG_K_SS) // K.SSA
|
||||
else -> return c1.toHalfFormOrVirama() + c2
|
||||
}
|
||||
0x0919 -> /* Devanagari NGA */ when (c2) {
|
||||
0x0917 -> return listOf(0xF0182) // NG.G
|
||||
0x092E -> return listOf(0xF0183) // NG.M
|
||||
else -> return c1.toHalfFormOrVirama() + c2
|
||||
}
|
||||
0x091C -> /* Devanagari JA */ when (c2) {
|
||||
0x091E -> return listOf(DEVANAGARI_LIG_J_NY) // J.NY
|
||||
else -> return c1.toHalfFormOrVirama() + c2
|
||||
}
|
||||
0x091F -> /* Devanagari TTA */ when (c2) {
|
||||
0x091F -> return listOf(0xF0185) // TT.TT
|
||||
0x0920 -> return listOf(0xF0186) // TT.TTH
|
||||
else -> return c1.toHalfFormOrVirama() + c2
|
||||
}
|
||||
0x0920 -> /* Devanagari TTHA */ when (c2) {
|
||||
0x0920 -> return listOf(0xF0187) // TTH.TTH
|
||||
else -> return c1.toHalfFormOrVirama() + c2
|
||||
}
|
||||
0x0921 -> /* Devanagari DDA */ when (c2) {
|
||||
0x0921 -> return listOf(0xF0188) // DD.DD
|
||||
0x0922 -> return listOf(0xF0189) // DD.DDH
|
||||
else -> return c1.toHalfFormOrVirama() + c2
|
||||
}
|
||||
0x0922 -> /* Devanagari DDHA */ when (c2) {
|
||||
0x0922 -> return listOf(0xF018A) // DDH.DDH
|
||||
else -> return c1.toHalfFormOrVirama() + c2
|
||||
}
|
||||
0x0924 -> /* Devanagari TA */ when (c2) {
|
||||
0x0924 -> return listOf(DEVANAGARI_LIG_T_T) // T.T
|
||||
else -> return c1.toHalfFormOrVirama() + c2
|
||||
}
|
||||
0x0926 -> /* Devanagari DA */ when (c2) {
|
||||
0x0926 -> return listOf(0xF018C) // D.D
|
||||
0x0927 -> return listOf(0xF018D) // D.DH
|
||||
0x092C -> return listOf(0xF018E) // D.B
|
||||
0x092D -> return listOf(0xF018F) // D.BH
|
||||
0x092E -> return listOf(0xF0190) // D.M
|
||||
0x092F -> return listOf(0xF0191) // D.Y
|
||||
0x0935 -> return listOf(0xF0192) // D.V
|
||||
else -> return c1.toHalfFormOrVirama() + c2
|
||||
}
|
||||
0x0928 -> /* Devanagari NA */ when (c2) {
|
||||
0x0928 -> return listOf(0xF0193) // N.N
|
||||
else -> return c1.toHalfFormOrVirama() + c2
|
||||
}
|
||||
0x0939 -> /* Devanagari HA */ when (c2) {
|
||||
0x0923 -> return listOf(0xF0194) // H.NN
|
||||
0x0928 -> return listOf(0xF0195) // H.N
|
||||
0x092E -> return listOf(0xF0196) // H.M
|
||||
0x092F -> return listOf(0xF0197) // H.Y
|
||||
0x0932 -> return listOf(0xF0198) // H.L
|
||||
0x0935 -> return listOf(0xF0199) // H.v
|
||||
else -> return c1.toHalfFormOrVirama() + c2
|
||||
}
|
||||
else -> return c1.toHalfFormOrVirama() + c2 // TODO use proper version of Virama for respective scripts
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private fun Int.toHex() = "U+${this.toString(16).padStart(4, '0').toUpperCase()}"
|
||||
|
||||
BIN
testing.PNG
BIN
testing.PNG
Binary file not shown.
|
Before Width: | Height: | Size: 14 KiB After Width: | Height: | Size: 16 KiB |
18
testtext.txt
18
testtext.txt
@@ -1,17 +1,3 @@
|
||||
|
||||
A ดุ ตี ปู่ พี่ ป่ ม่ ปั มั พีุ ทิ่ท่ท่ิ ปิ่ป่ป่ิ ทิ้ ปิ้ มำด มําด
|
||||
|
||||
이는 일본의 요미가나(読み仮名)와 비슷한 용법이다.
|
||||
|
||||
Sugarさとう설탕砂糖
|
||||
|
||||
বাংলাদেশ
|
||||
|
||||
தமிழ் லூ லு
|
||||
|
||||
QWனோT
|
||||
னோ
|
||||
|
||||
எழுத்து வடிவங்களுக்கு வார்த்தைகள் மற்றும் வாக்கியங்கள் போலவே தொனி, ஒலி பண்பு, தன்மை உண்டு.
|
||||
|
||||
க்ஷ ஶ்ரீ க்ஷௌ
|
||||
फ़ॊण्ज़ीणू क्की क्कि र्क र्त्सा र्र्ल ऱ्ड क्राप् ठ्री त्र् त्र्मोत्र्यो ज्ञज्ञ्रत्त न्न
|
||||
ह्णह्नह्मह्यह्लह्वह्र णनमयलव ज़्लतान
|
||||
|
||||
Binary file not shown.
Reference in New Issue
Block a user