i think devanagari is fully working now

This commit is contained in:
minjaesong
2022-01-25 16:23:02 +09:00
parent 1e66cfec51
commit 7c3069e8cf
7 changed files with 78 additions and 36 deletions

Binary file not shown.

BIN
demo.PNG

Binary file not shown.

Before

Width:  |  Height:  |  Size: 151 KiB

After

Width:  |  Height:  |  Size: 153 KiB

View File

@@ -109,7 +109,7 @@ How multilingual? Real multilingual!
Basic Latin Latin-1 Supplement Latin Extended-A Latin Extended-B IPA Extensions
Spacing Modifier Letters Combining Diacritical Marks Greek and Coptic􏿆ᴱ􀀀 Cyrillic􏿆ᴭ􀀀 Cyrillic Supplement􏿆ᴭ􀀀
Armenian Devanagari􏿆ᶠⁱ􀀀 Bengali􏿆ᶠⁱ􀀀 Tamil Thai Georgian􏿆ჼ􀀀 Hangul Jamo Cherokee􏿆⁷􀀀 Runic
Armenian Devanagari Bengali􏿆ᶠⁱ􀀀 Tamil Thai Georgian􏿆ჼ􀀀 Hangul Jamo Cherokee􏿆⁷􀀀 Runic
Georgian Extended Phonetic Extensions Phonetic Extensions Supplement Latin Extended Additional
Greek Extended General Punctuations Superscripts and Subscripts Currency Symbols Letterlike Symbols
CJK Symbols and Punctuation Latin Extended-C Hiragana Katakana Hangul Compatibility Jamo

View File

@@ -32,6 +32,7 @@ import com.badlogic.gdx.graphics.g2d.*
import com.badlogic.gdx.utils.GdxRuntimeException
import net.torvald.terrarumsansbitmap.DiacriticsAnchor
import net.torvald.terrarumsansbitmap.GlyphProps
import net.torvald.terrarumsansbitmap.gdx.TerrarumSansBitmap.Companion.charInfo
import java.io.BufferedOutputStream
import java.io.FileOutputStream
import java.util.*
@@ -1071,6 +1072,7 @@ class TerrarumSansBitmap(
val c = dis[i]
val cNext = dis.getOrElse(i+1) { -1 }
val cNext2 = dis.getOrElse(i+2) { -1 }
val cNext3 = dis.getOrElse(i+3) { -1 }
// can't use regular sliding window as the 'i' value is changed way too often
// LET THE NORMALISATION BEGIN //
@@ -1163,6 +1165,13 @@ class TerrarumSansBitmap(
// END of tamil subsystem implementation
// BEGIN of devanagari string replacer
// Alternative Forms of Cluster-initial RA
else if (c == DEVANAGARI_RA && cNext == ZWJ && cNext2 == DEVANAGARI_VIRAMA && cNext3 == DEVANAGARI_YA) {
seq.add(DEVANAGARI_RYA); i += 3
}
else if (c == DEVANAGARI_RA && cNext == ZWJ && cNext2 == DEVANAGARI_VIRAMA) {
seq.add(DEVANAGARI_RA); i += 2
}
// Unicode Devanagari Rendering Rule R14
else if (c == DEVANAGARI_RA && cNext == DEVANAGARI_U) {
seq.add(DEVANAGARI_SYLL_RU); i += 1
@@ -1247,23 +1256,14 @@ class TerrarumSansBitmap(
}
// second scan
// swap position of {letter, diacritics that comes before the letter}
i = 1
// BEGIN of Devanagari String Replacer 2 (lookbehind type)
i = 0
while (i <= seq.lastIndex) {
// reposition [cluster, align-before, align-after] into [align-before, cluster, align-after]
if ((glyphProps[seq[i]] ?: nullProp).alignWhere == GlyphProps.ALIGN_BEFORE) {
val t = seq[i - 1]
seq[i - 1] = seq[i]
seq[i] = t
}
val cPrev2 = seq.getOrElse(i-2) { -1 }
val cPrev = seq.getOrElse(i-1) { -1 }
val c = seq[i]
// BEGIN of Devanagari String Replacer 2 (lookbehind type)
// Devanagari Ligations (Lookbehind)
if (devanagariConsonants.contains(cPrev2) && cPrev == DEVANAGARI_VIRAMA && devanagariConsonants.contains(c)) {
i -= 2
@@ -1277,11 +1277,47 @@ class TerrarumSansBitmap(
i += ligature.size
}
// END of Devanagari String Replacer 2
i++
}
// END of Devanagari String Replacer 2
// second scan
// swap position of {letter, diacritics that comes before the letter}
// reposition [cluster, align-before, align-after] into [align-before, cluster, align-after]
i = 0
while (i <= seq.lastIndex) {
if (i > 0 && (glyphProps[seq[i]] ?: nullProp).alignWhere == GlyphProps.ALIGN_BEFORE) {
val verb = seq[i]
// dbgprn("Verb realign: index $i, ${verb.charInfo()}")
if (isDevanagari(verb)) {
// scan for the consonant cluster backwards
// [not ligature glyphs] h h h h h c l r
var scanCounter = 1
while (true) {
val cAtCurs = seq.getOrElse(i - scanCounter) { -1 }
// dbgprn(" scan back $scanCounter, char: ${cAtCurs.charInfo()}")
if (scanCounter == 1 && devanagariConsonantsNonLig.contains(cAtCurs) ||
scanCounter > 1 && devanariConsonantsHalfs.contains(cAtCurs))
scanCounter += 1
else
break
} // scanCounter points at the terminator. the left-verb must be placed at (i - scanCounter + 1)
seq.removeAt(i)
seq.add(i - scanCounter + 1, verb)
}
else {
val t = seq[i - 1]
seq[i - 1] = seq[i]
seq[i] = t
}
}
i++
}
// unpack replacewith
seq.forEach {
@@ -1740,18 +1776,26 @@ class TerrarumSansBitmap(
private val ZWNJ = 0x200C
private val ZWJ = 0x200D
private val tamilLigatingConsonants = listOf('க','ங','ச','ஞ','ட','ண','த','ந','ன','ப','ம','ய','ர','ற','ல','ள','ழ','வ').map { it.toInt() }.toIntArray()
private val tamilLigatingConsonants = listOf('க','ங','ச','ஞ','ட','ண','த','ந','ன','ப','ம','ய','ர','ற','ல','ள','ழ','வ').map { it.toInt() }.toIntArray() // this is the only thing that .indexOf() is called against, so NO HASHSET
private val TAMIL_KSSA = 0xF00ED
private val TAMIL_SHRII = 0xF00EE
private val devanagariConsonants = ((0x0915..0x0939) + (0x0958..0x095F) + (0x0978..0x097F) + (0xF0105..0xF01FF)).toIntArray()
private val devanagariVerbs = ((0x093A..0x093C) + (0x093E..0x094C) + (0x094E..0x094F)).toIntArray()
private val devanagariConsonants = ((0x0915..0x0939) + (0x0958..0x095F) + (0x0978..0x097F) + (0xF0105..0xF01FF)).toHashSet()
private val devanagariVerbs = ((0x093A..0x093C) + (0x093E..0x094C) + (0x094E..0x094F)).toHashSet()
private val devanagariBaseConsonants = 0x0915..0x0939
private val devanagariBaseConsonantsWithNukta = 0x0958..0x095F
private val devanagariBaseConsonantsExtended = 0x0978..0x097F
private val devanagariPresentationConsonants = 0xF0140..0xF01FF
private val devanagariPresentationConsonantsWithRa = 0xF0145..0xF017F
private val devanagariPresentationIrregularConsonants = 0xF0180..0xF01BF
private val devanagariConsonantsNonLig = (devanagariBaseConsonants +
devanagariBaseConsonantsWithNukta + devanagariBaseConsonantsExtended +
devanagariPresentationConsonantsWithRa + devanagariPresentationIrregularConsonants).toHashSet()
private val devanariConsonantsHalfs = ((0xF0105..0xF012F) + (0xF0137..0xF013F) + (0xF01C0..0xF01FF)).toHashSet()
private val DEVANAGARI_VIRAMA = 0x94D
private val DEVANAGARI_RA = 0x930
@@ -1761,6 +1805,8 @@ class TerrarumSansBitmap(
private val DEVANAGARI_HA = 0x939
private val DEVANAGARI_U = 0x941
private val DEVANAGARI_UU = 0x942
private val DEVANAGARI_RYA = 0xF0140
private val DEVANAGARI_HALF_RYA = 0xF0141
private val DEVANAGARI_SYLL_RU = 0xF0100
private val DEVANAGARI_SYLL_RUU = 0xF0101
@@ -1801,6 +1847,8 @@ class TerrarumSansBitmap(
private val DEVANAGARI_LIG_X_R = 0xF0140 // starting point for Devanagari ligature CONSONANT+RA
private fun CodePoint.toHalfFormOrNull(): CodePoint? {
if (this == DEVANAGARI_RYA) return DEVANAGARI_HALF_RYA
if (this in 0xF018C..0xF018F) return this + 0x34
if (this == DEVANAGARI_LIG_K_SS) return DEVANAGARI_HALFLIG_K_SS
if (this == DEVANAGARI_LIG_J_NY) return DEVANAGARI_HALFLIG_J_NY
if (this == DEVANAGARI_LIG_T_T) return DEVANAGARI_HALFLIG_T_T
@@ -1814,7 +1862,7 @@ class TerrarumSansBitmap(
// TODO use proper version of Virama for respective scripts
private fun CodePoint.toHalfFormOrVirama(): List<CodePoint> = this.toHalfFormOrNull().let {
println("[TerrarumSansBitmap] toHalfForm ${this.charInfo()} = ${it?.charInfo()}")
// println("[TerrarumSansBitmap] toHalfForm ${this.charInfo()} = ${it?.charInfo()}")
if (it == null) listOf(this, DEVANAGARI_VIRAMA) else listOf(it)
}
@@ -1829,7 +1877,7 @@ class TerrarumSansBitmap(
}
private fun ligateIndicConsonants(c1: CodePoint, c2: CodePoint): List<CodePoint> {
println("[TerrarumSansBitmap] Indic ligation ${c1.charInfo()} - ${c2.charInfo()}")
// println("[TerrarumSansBitmap] Indic ligation ${c1.charInfo()} - ${c2.charInfo()}")
if (c2 == DEVANAGARI_RA) return toRaAppended(c1) // Devanagari @.RA
when (c1) {
0x0915 -> /* Devanagari KA */ when (c2) {
@@ -1946,6 +1994,10 @@ class TerrarumSansBitmap(
DEVANAGARI_YA -> return listOf(MARWARI_LIG_DD_Y) // DD.Y
else -> return c1.toHalfFormOrVirama() + c2
}
in 0xF018C..0xF018F -> /* Devanagari D.@A */ when (c2) {
DEVANAGARI_YA -> return c1.toHalfFormOrVirama() + DEVANAGARI_OPEN_YA
else -> return c1.toHalfFormOrVirama() + c2
}
else -> return c1.toHalfFormOrVirama() + c2 // TODO use proper version of Virama for respective scripts
}
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 27 KiB

After

Width:  |  Height:  |  Size: 17 KiB

View File

@@ -1,14 +1,4 @@
फ़ॊण्ज़ीणू क्की क्कि र्क र्त्सा र्र्ल ऱ्ड क्राप् ठ्री त्र् स्त्रक्ष्य
ह्णह्नह्मह्यह्लह्वह्र णनमयलव ज़्लतान छात्त्र छात्र क्ष्र ज्ञ्र
्यङ्यछ्यट्यठ्यड्यढ्यद्यश्यह्य ढा ऱुमिक्युब हुहू दुदू द्गद्घद्द ढ्ढढ्वद्व
ग्र्क्र क्श्र त्रत्त्रष्र्प ढ्य्ब न्न्ग
तत्र अग्निः अस्ति। अष्टाध्यायी प्च प्छ
र्त्सार्पै « R.T.SAA R.PAI (RAsup must be above last 2 stems)
ऱ्त्साऱ्पै « RR.T.SAA RR.PAI (must use eyelash-RA)
शर्करा « RAsup must be above KA
अंग्रेजी
ख्य
மோநௌ « check the sanity of glyph reordering
भ्रष्ट ष्ट्रॉष्ठ्रॊष्ढ्रो प्ट्रॉप्ठ्रॊप्ढ्रो घ्ट्रॉघ्ठ्रॊघ्ढ्रो ष्ट्र्गौ ष्ठ्दॊ ष्ट्कष्ठ्कष्ढ्क म्ह न्ह
व्ख ब्ख व्ब व्य व्स ष्ट्र्कष्ठ्र्कष्ढ्र्क प्ट्र्खप्ठ्र्खप्ढ्र्ख घ्ट्र्गघ्ठ्र्गघ्ढ्र्ग
ऱ्हस्व र्ष्टॊष्ट्रर्ष्ट ॸ्ॸ ॸ्ढ ॸ्य ॸ्य्ख ॸ्र
प्रधानमन्त्री
श्रेयो हि ज्ञानमभ्यासाज्ज्ञानाद्ध्यानं विशिष्यते ।
्यानात्कर्मफलत्यागस्त्यागाच्छन्तिरनन्तरम् ॥
मस्त कष्ट पुन्हा ध्वनि

Binary file not shown.