i think devanagari is fully working now

This commit is contained in:
minjaesong
2022-01-25 16:23:02 +09:00
parent 1e66cfec51
commit 7c3069e8cf
7 changed files with 78 additions and 36 deletions

Binary file not shown.

BIN
demo.PNG

Binary file not shown.

Before

Width:  |  Height:  |  Size: 151 KiB

After

Width:  |  Height:  |  Size: 153 KiB

View File

@@ -109,7 +109,7 @@ How multilingual? Real multilingual!
Basic Latin Latin-1 Supplement Latin Extended-A Latin Extended-B IPA Extensions Basic Latin Latin-1 Supplement Latin Extended-A Latin Extended-B IPA Extensions
Spacing Modifier Letters Combining Diacritical Marks Greek and Coptic􏿆ᴱ􀀀 Cyrillic􏿆ᴭ􀀀 Cyrillic Supplement􏿆ᴭ􀀀 Spacing Modifier Letters Combining Diacritical Marks Greek and Coptic􏿆ᴱ􀀀 Cyrillic􏿆ᴭ􀀀 Cyrillic Supplement􏿆ᴭ􀀀
Armenian Devanagari􏿆ᶠⁱ􀀀 Bengali􏿆ᶠⁱ􀀀 Tamil Thai Georgian􏿆ჼ􀀀 Hangul Jamo Cherokee􏿆⁷􀀀 Runic Armenian Devanagari Bengali􏿆ᶠⁱ􀀀 Tamil Thai Georgian􏿆ჼ􀀀 Hangul Jamo Cherokee􏿆⁷􀀀 Runic
Georgian Extended Phonetic Extensions Phonetic Extensions Supplement Latin Extended Additional Georgian Extended Phonetic Extensions Phonetic Extensions Supplement Latin Extended Additional
Greek Extended General Punctuations Superscripts and Subscripts Currency Symbols Letterlike Symbols Greek Extended General Punctuations Superscripts and Subscripts Currency Symbols Letterlike Symbols
CJK Symbols and Punctuation Latin Extended-C Hiragana Katakana Hangul Compatibility Jamo CJK Symbols and Punctuation Latin Extended-C Hiragana Katakana Hangul Compatibility Jamo

View File

@@ -32,6 +32,7 @@ import com.badlogic.gdx.graphics.g2d.*
import com.badlogic.gdx.utils.GdxRuntimeException import com.badlogic.gdx.utils.GdxRuntimeException
import net.torvald.terrarumsansbitmap.DiacriticsAnchor import net.torvald.terrarumsansbitmap.DiacriticsAnchor
import net.torvald.terrarumsansbitmap.GlyphProps import net.torvald.terrarumsansbitmap.GlyphProps
import net.torvald.terrarumsansbitmap.gdx.TerrarumSansBitmap.Companion.charInfo
import java.io.BufferedOutputStream import java.io.BufferedOutputStream
import java.io.FileOutputStream import java.io.FileOutputStream
import java.util.* import java.util.*
@@ -1071,6 +1072,7 @@ class TerrarumSansBitmap(
val c = dis[i] val c = dis[i]
val cNext = dis.getOrElse(i+1) { -1 } val cNext = dis.getOrElse(i+1) { -1 }
val cNext2 = dis.getOrElse(i+2) { -1 } val cNext2 = dis.getOrElse(i+2) { -1 }
val cNext3 = dis.getOrElse(i+3) { -1 }
// can't use regular sliding window as the 'i' value is changed way too often // can't use regular sliding window as the 'i' value is changed way too often
// LET THE NORMALISATION BEGIN // // LET THE NORMALISATION BEGIN //
@@ -1163,6 +1165,13 @@ class TerrarumSansBitmap(
// END of tamil subsystem implementation // END of tamil subsystem implementation
// BEGIN of devanagari string replacer // BEGIN of devanagari string replacer
// Alternative Forms of Cluster-initial RA
else if (c == DEVANAGARI_RA && cNext == ZWJ && cNext2 == DEVANAGARI_VIRAMA && cNext3 == DEVANAGARI_YA) {
seq.add(DEVANAGARI_RYA); i += 3
}
else if (c == DEVANAGARI_RA && cNext == ZWJ && cNext2 == DEVANAGARI_VIRAMA) {
seq.add(DEVANAGARI_RA); i += 2
}
// Unicode Devanagari Rendering Rule R14 // Unicode Devanagari Rendering Rule R14
else if (c == DEVANAGARI_RA && cNext == DEVANAGARI_U) { else if (c == DEVANAGARI_RA && cNext == DEVANAGARI_U) {
seq.add(DEVANAGARI_SYLL_RU); i += 1 seq.add(DEVANAGARI_SYLL_RU); i += 1
@@ -1247,23 +1256,14 @@ class TerrarumSansBitmap(
} }
// second scan // BEGIN of Devanagari String Replacer 2 (lookbehind type)
// swap position of {letter, diacritics that comes before the letter} i = 0
i = 1
while (i <= seq.lastIndex) { while (i <= seq.lastIndex) {
// reposition [cluster, align-before, align-after] into [align-before, cluster, align-after]
if ((glyphProps[seq[i]] ?: nullProp).alignWhere == GlyphProps.ALIGN_BEFORE) {
val t = seq[i - 1]
seq[i - 1] = seq[i]
seq[i] = t
}
val cPrev2 = seq.getOrElse(i-2) { -1 } val cPrev2 = seq.getOrElse(i-2) { -1 }
val cPrev = seq.getOrElse(i-1) { -1 } val cPrev = seq.getOrElse(i-1) { -1 }
val c = seq[i] val c = seq[i]
// BEGIN of Devanagari String Replacer 2 (lookbehind type)
// Devanagari Ligations (Lookbehind) // Devanagari Ligations (Lookbehind)
if (devanagariConsonants.contains(cPrev2) && cPrev == DEVANAGARI_VIRAMA && devanagariConsonants.contains(c)) { if (devanagariConsonants.contains(cPrev2) && cPrev == DEVANAGARI_VIRAMA && devanagariConsonants.contains(c)) {
i -= 2 i -= 2
@@ -1277,11 +1277,47 @@ class TerrarumSansBitmap(
i += ligature.size i += ligature.size
} }
// END of Devanagari String Replacer 2
i++ i++
} }
// END of Devanagari String Replacer 2
// second scan
// swap position of {letter, diacritics that comes before the letter}
// reposition [cluster, align-before, align-after] into [align-before, cluster, align-after]
i = 0
while (i <= seq.lastIndex) {
if (i > 0 && (glyphProps[seq[i]] ?: nullProp).alignWhere == GlyphProps.ALIGN_BEFORE) {
val verb = seq[i]
// dbgprn("Verb realign: index $i, ${verb.charInfo()}")
if (isDevanagari(verb)) {
// scan for the consonant cluster backwards
// [not ligature glyphs] h h h h h c l r
var scanCounter = 1
while (true) {
val cAtCurs = seq.getOrElse(i - scanCounter) { -1 }
// dbgprn(" scan back $scanCounter, char: ${cAtCurs.charInfo()}")
if (scanCounter == 1 && devanagariConsonantsNonLig.contains(cAtCurs) ||
scanCounter > 1 && devanariConsonantsHalfs.contains(cAtCurs))
scanCounter += 1
else
break
} // scanCounter points at the terminator. the left-verb must be placed at (i - scanCounter + 1)
seq.removeAt(i)
seq.add(i - scanCounter + 1, verb)
}
else {
val t = seq[i - 1]
seq[i - 1] = seq[i]
seq[i] = t
}
}
i++
}
// unpack replacewith // unpack replacewith
seq.forEach { seq.forEach {
@@ -1740,18 +1776,26 @@ class TerrarumSansBitmap(
private val ZWNJ = 0x200C private val ZWNJ = 0x200C
private val ZWJ = 0x200D private val ZWJ = 0x200D
private val tamilLigatingConsonants = listOf('க','ங','ச','ஞ','ட','ண','த','ந','ன','ப','ம','ய','ர','ற','ல','ள','ழ','வ').map { it.toInt() }.toIntArray() private val tamilLigatingConsonants = listOf('க','ங','ச','ஞ','ட','ண','த','ந','ன','ப','ம','ய','ர','ற','ல','ள','ழ','வ').map { it.toInt() }.toIntArray() // this is the only thing that .indexOf() is called against, so NO HASHSET
private val TAMIL_KSSA = 0xF00ED private val TAMIL_KSSA = 0xF00ED
private val TAMIL_SHRII = 0xF00EE private val TAMIL_SHRII = 0xF00EE
private val devanagariConsonants = ((0x0915..0x0939) + (0x0958..0x095F) + (0x0978..0x097F) + (0xF0105..0xF01FF)).toIntArray() private val devanagariConsonants = ((0x0915..0x0939) + (0x0958..0x095F) + (0x0978..0x097F) + (0xF0105..0xF01FF)).toHashSet()
private val devanagariVerbs = ((0x093A..0x093C) + (0x093E..0x094C) + (0x094E..0x094F)).toIntArray() private val devanagariVerbs = ((0x093A..0x093C) + (0x093E..0x094C) + (0x094E..0x094F)).toHashSet()
private val devanagariBaseConsonants = 0x0915..0x0939 private val devanagariBaseConsonants = 0x0915..0x0939
private val devanagariBaseConsonantsWithNukta = 0x0958..0x095F private val devanagariBaseConsonantsWithNukta = 0x0958..0x095F
private val devanagariBaseConsonantsExtended = 0x0978..0x097F private val devanagariBaseConsonantsExtended = 0x0978..0x097F
private val devanagariPresentationConsonants = 0xF0140..0xF01FF private val devanagariPresentationConsonants = 0xF0140..0xF01FF
private val devanagariPresentationConsonantsWithRa = 0xF0145..0xF017F private val devanagariPresentationConsonantsWithRa = 0xF0145..0xF017F
private val devanagariPresentationIrregularConsonants = 0xF0180..0xF01BF
private val devanagariConsonantsNonLig = (devanagariBaseConsonants +
devanagariBaseConsonantsWithNukta + devanagariBaseConsonantsExtended +
devanagariPresentationConsonantsWithRa + devanagariPresentationIrregularConsonants).toHashSet()
private val devanariConsonantsHalfs = ((0xF0105..0xF012F) + (0xF0137..0xF013F) + (0xF01C0..0xF01FF)).toHashSet()
private val DEVANAGARI_VIRAMA = 0x94D private val DEVANAGARI_VIRAMA = 0x94D
private val DEVANAGARI_RA = 0x930 private val DEVANAGARI_RA = 0x930
@@ -1761,6 +1805,8 @@ class TerrarumSansBitmap(
private val DEVANAGARI_HA = 0x939 private val DEVANAGARI_HA = 0x939
private val DEVANAGARI_U = 0x941 private val DEVANAGARI_U = 0x941
private val DEVANAGARI_UU = 0x942 private val DEVANAGARI_UU = 0x942
private val DEVANAGARI_RYA = 0xF0140
private val DEVANAGARI_HALF_RYA = 0xF0141
private val DEVANAGARI_SYLL_RU = 0xF0100 private val DEVANAGARI_SYLL_RU = 0xF0100
private val DEVANAGARI_SYLL_RUU = 0xF0101 private val DEVANAGARI_SYLL_RUU = 0xF0101
@@ -1801,6 +1847,8 @@ class TerrarumSansBitmap(
private val DEVANAGARI_LIG_X_R = 0xF0140 // starting point for Devanagari ligature CONSONANT+RA private val DEVANAGARI_LIG_X_R = 0xF0140 // starting point for Devanagari ligature CONSONANT+RA
private fun CodePoint.toHalfFormOrNull(): CodePoint? { private fun CodePoint.toHalfFormOrNull(): CodePoint? {
if (this == DEVANAGARI_RYA) return DEVANAGARI_HALF_RYA
if (this in 0xF018C..0xF018F) return this + 0x34
if (this == DEVANAGARI_LIG_K_SS) return DEVANAGARI_HALFLIG_K_SS if (this == DEVANAGARI_LIG_K_SS) return DEVANAGARI_HALFLIG_K_SS
if (this == DEVANAGARI_LIG_J_NY) return DEVANAGARI_HALFLIG_J_NY if (this == DEVANAGARI_LIG_J_NY) return DEVANAGARI_HALFLIG_J_NY
if (this == DEVANAGARI_LIG_T_T) return DEVANAGARI_HALFLIG_T_T if (this == DEVANAGARI_LIG_T_T) return DEVANAGARI_HALFLIG_T_T
@@ -1814,7 +1862,7 @@ class TerrarumSansBitmap(
// TODO use proper version of Virama for respective scripts // TODO use proper version of Virama for respective scripts
private fun CodePoint.toHalfFormOrVirama(): List<CodePoint> = this.toHalfFormOrNull().let { private fun CodePoint.toHalfFormOrVirama(): List<CodePoint> = this.toHalfFormOrNull().let {
println("[TerrarumSansBitmap] toHalfForm ${this.charInfo()} = ${it?.charInfo()}") // println("[TerrarumSansBitmap] toHalfForm ${this.charInfo()} = ${it?.charInfo()}")
if (it == null) listOf(this, DEVANAGARI_VIRAMA) else listOf(it) if (it == null) listOf(this, DEVANAGARI_VIRAMA) else listOf(it)
} }
@@ -1829,7 +1877,7 @@ class TerrarumSansBitmap(
} }
private fun ligateIndicConsonants(c1: CodePoint, c2: CodePoint): List<CodePoint> { private fun ligateIndicConsonants(c1: CodePoint, c2: CodePoint): List<CodePoint> {
println("[TerrarumSansBitmap] Indic ligation ${c1.charInfo()} - ${c2.charInfo()}") // println("[TerrarumSansBitmap] Indic ligation ${c1.charInfo()} - ${c2.charInfo()}")
if (c2 == DEVANAGARI_RA) return toRaAppended(c1) // Devanagari @.RA if (c2 == DEVANAGARI_RA) return toRaAppended(c1) // Devanagari @.RA
when (c1) { when (c1) {
0x0915 -> /* Devanagari KA */ when (c2) { 0x0915 -> /* Devanagari KA */ when (c2) {
@@ -1946,6 +1994,10 @@ class TerrarumSansBitmap(
DEVANAGARI_YA -> return listOf(MARWARI_LIG_DD_Y) // DD.Y DEVANAGARI_YA -> return listOf(MARWARI_LIG_DD_Y) // DD.Y
else -> return c1.toHalfFormOrVirama() + c2 else -> return c1.toHalfFormOrVirama() + c2
} }
in 0xF018C..0xF018F -> /* Devanagari D.@A */ when (c2) {
DEVANAGARI_YA -> return c1.toHalfFormOrVirama() + DEVANAGARI_OPEN_YA
else -> return c1.toHalfFormOrVirama() + c2
}
else -> return c1.toHalfFormOrVirama() + c2 // TODO use proper version of Virama for respective scripts else -> return c1.toHalfFormOrVirama() + c2 // TODO use proper version of Virama for respective scripts
} }
} }

Binary file not shown.

Before

Width:  |  Height:  |  Size: 27 KiB

After

Width:  |  Height:  |  Size: 17 KiB

View File

@@ -1,14 +1,4 @@
फ़ॊण्ज़ीणू क्की क्कि र्क र्त्सा र्र्ल ऱ्ड क्राप् ठ्री त्र् स्त्रक्ष्य प्रधानमन्त्री
ह्णह्नह्मह्यह्लह्वह्र णनमयलव ज़्लतान छात्त्र छात्र क्ष्र ज्ञ्र श्रेयो हि ज्ञानमभ्यासाज्ज्ञानाद्ध्यानं विशिष्यते ।
्यङ्यछ्यट्यठ्यड्यढ्यद्यश्यह्य ढा ऱुमिक्युब हुहू दुदू द्गद्घद्द ढ्ढढ्वद्व ्यानात्कर्मफलत्यागस्त्यागाच्छन्तिरनन्तरम् ॥
ग्र्क्र क्श्र त्रत्त्रष्र्प ढ्य्ब न्न्ग मस्त कष्ट पुन्हा ध्वनि
तत्र अग्निः अस्ति। अष्टाध्यायी प्च प्छ
र्त्सार्पै « R.T.SAA R.PAI (RAsup must be above last 2 stems)
ऱ्त्साऱ्पै « RR.T.SAA RR.PAI (must use eyelash-RA)
शर्करा « RAsup must be above KA
अंग्रेजी
ख्य
மோநௌ « check the sanity of glyph reordering
भ्रष्ट ष्ट्रॉष्ठ्रॊष्ढ्रो प्ट्रॉप्ठ्रॊप्ढ्रो घ्ट्रॉघ्ठ्रॊघ्ढ्रो ष्ट्र्गौ ष्ठ्दॊ ष्ट्कष्ठ्कष्ढ्क म्ह न्ह
व्ख ब्ख व्ब व्य व्स ष्ट्र्कष्ठ्र्कष्ढ्र्क प्ट्र्खप्ठ्र्खप्ढ्र्ख घ्ट्र्गघ्ठ्र्गघ्ढ्र्ग
ऱ्हस्व र्ष्टॊष्ट्रर्ष्ट ॸ्ॸ ॸ्ढ ॸ्य ॸ्य्ख ॸ्र

Binary file not shown.