mirror of
https://github.com/curioustorvald/Terrarum-sans-bitmap.git
synced 2026-03-07 11:51:50 +09:00
more attempts at cross system compatibility
This commit is contained in:
@@ -121,8 +121,8 @@ print(f"{name}: advance={w}, has_outlines={has_outlines}")
|
|||||||
- **ccmp** — replacewith expansions (DFLT); consonant-to-PUA mapping + vowel decompositions + anusvara upper (dev2); vowel decompositions (tml2)
|
- **ccmp** — replacewith expansions (DFLT); consonant-to-PUA mapping + vowel decompositions + anusvara upper (dev2); vowel decompositions (tml2)
|
||||||
- **kern** — pair positioning from `keming_machine.py`
|
- **kern** — pair positioning from `keming_machine.py`
|
||||||
- **liga** — Latin ligatures (ff, fi, fl, ffi, ffl, st) and Armenian ligatures
|
- **liga** — Latin ligatures (ff, fi, fl, ffi, ffl, st) and Armenian ligatures
|
||||||
- **locl** — Bulgarian/Serbian Cyrillic alternates
|
- **locl** — Bulgarian/Serbian Cyrillic alternates; Devanagari consonant-to-PUA mapping + vowel decompositions + anusvara upper (dev2, duplicated from ccmp for DirectWrite compatibility)
|
||||||
- **nukt, akhn, half, vatu, pres, blws, rphf** — Devanagari complex script shaping (all under `script dev2`)
|
- **nukt, akhn, half, blwf, cjct, pres, blws, rphf, abvs, psts, calt** — Devanagari complex script shaping (all under `script dev2`)
|
||||||
- **pres** (tml2) — Tamil consonant+vowel ligatures
|
- **pres** (tml2) — Tamil consonant+vowel ligatures
|
||||||
- **pres** (sund) — Sundanese diacritic combinations
|
- **pres** (sund) — Sundanese diacritic combinations
|
||||||
- **ljmo, vjmo, tjmo** — Hangul jamo positional variants
|
- **ljmo, vjmo, tjmo** — Hangul jamo positional variants
|
||||||
@@ -181,7 +181,7 @@ for sr in gsub.table.ScriptList.ScriptRecord:
|
|||||||
print(f"{tag}/{lsr.LangSysTag}: {' '.join(sorted(set(feats)))}")
|
print(f"{tag}/{lsr.LangSysTag}: {' '.join(sorted(set(feats)))}")
|
||||||
```
|
```
|
||||||
|
|
||||||
Expected output for dev2: `dev2/dflt: abvs akhn blwf blws calt ccmp cjct half liga nukt pres psts rphf`. If language-specific records (e.g. `dev2/MAR`) appear with only `ccmp liga`, the language records have incomplete feature inheritance — remove the corresponding `languagesystem` declaration.
|
Expected output for dev2: `dev2/dflt: abvs akhn blwf blws calt ccmp cjct half liga locl nukt pres psts rphf`. If language-specific records (e.g. `dev2/MAR`) appear with only `ccmp liga`, the language records have incomplete feature inheritance — remove the corresponding `languagesystem` declaration.
|
||||||
|
|
||||||
### Debugging feature compilation failures
|
### Debugging feature compilation failures
|
||||||
|
|
||||||
@@ -201,3 +201,26 @@ Understanding feature application order is critical for Devanagari debugging:
|
|||||||
4. **GPOS**: `kern` → `mark`/`abvm` → `mkmk`
|
4. **GPOS**: `kern` → `mark`/`abvm` → `mkmk`
|
||||||
|
|
||||||
Implication: GSUB rules that need to match pre-base matras adjacent to post-base marks (e.g. anusvara substitution triggered by I-matra) must go in `ccmp`, not `psts`, because reordering separates them.
|
Implication: GSUB rules that need to match pre-base matras adjacent to post-base marks (e.g. anusvara substitution triggered by I-matra) must go in `ccmp`, not `psts`, because reordering separates them.
|
||||||
|
|
||||||
|
### Cross-platform shaper differences (DirectWrite, CoreText, HarfBuzz)
|
||||||
|
|
||||||
|
The three major shapers behave differently for Devanagari (dev2):
|
||||||
|
|
||||||
|
**DirectWrite (Windows)**:
|
||||||
|
- Feature order: `locl` → `nukt` → `akhn` → `rphf` → `rkrf` → `blwf` → `half` → `vatu` → `cjct` → `pres` → `abvs` → `blws` → `psts` → `haln` → `calt` → GPOS: `kern` → `dist` → `abvm` → `blwm`
|
||||||
|
- **Does NOT apply `ccmp`** for the dev2 script. All lookups that must run before `nukt` (e.g. consonant-to-PUA mapping) must be registered under `locl` instead.
|
||||||
|
- Tests reph eligibility via `would_substitute([RA, virama], rphf)` using **original Unicode codepoints** (before locl/ccmp). The `rphf` feature must include a rule with the Unicode form of RA, not just the PUA form.
|
||||||
|
|
||||||
|
**CoreText (macOS)**:
|
||||||
|
- Applies `ccmp` but may do so **after** reordering (unlike HarfBuzz which applies ccmp before reordering). This means pre-base matras (I-matra U+093F) are already reordered before the consonant, breaking adjacency rules like `sub 093F 0902'`.
|
||||||
|
- Tests reph eligibility using `would_substitute()` with Unicode codepoints, same as DirectWrite.
|
||||||
|
- Solution: add wider-context fallback rules in `abvs` (post-reordering) that match I-matra separated from anusvara by 1-3 intervening glyphs.
|
||||||
|
|
||||||
|
**HarfBuzz (reference)**:
|
||||||
|
- Applies `ccmp` **before** reordering (Unicode order).
|
||||||
|
- Reph detection is pattern-based (RA + halant + consonant at syllable start), not feature-based.
|
||||||
|
- Most lenient — works with PUA-only rules.
|
||||||
|
|
||||||
|
**Practical implication**: Define standalone lookups (e.g. `DevaConsonantMap`, `DevaVowelDecomp`) **outside** any feature block, then reference them from both `locl` and `ccmp`. This ensures DirectWrite (via locl) and HarfBuzz (via ccmp) both fire the lookups. The second application is a no-op since glyphs are already transformed.
|
||||||
|
|
||||||
|
Source: [Microsoft Devanagari shaping spec](https://learn.microsoft.com/en-us/typography/script-development/devanagari)
|
||||||
|
|||||||
@@ -702,25 +702,46 @@ def _generate_devanagari(glyphs, has, replacewith_subs=None):
|
|||||||
|
|
||||||
if ccmp_subs or vowel_decomp_subs or anusvara_ccmp_subs:
|
if ccmp_subs or vowel_decomp_subs or anusvara_ccmp_subs:
|
||||||
ccmp_parts = []
|
ccmp_parts = []
|
||||||
# AnusvaraUpper lookup defined OUTSIDE the feature block so it only
|
# Define lookups OUTSIDE feature blocks so they can be referenced
|
||||||
# fires when referenced by contextual rules (not unconditionally).
|
# from both locl (for DirectWrite) and ccmp (for HarfBuzz).
|
||||||
|
# DirectWrite's dev2 shaper does not apply ccmp but does apply locl.
|
||||||
if anusvara_ccmp_subs:
|
if anusvara_ccmp_subs:
|
||||||
ccmp_parts.append(f"lookup AnusvaraUpper {{")
|
ccmp_parts.append(f"lookup AnusvaraUpper {{")
|
||||||
ccmp_parts.append(f" sub {glyph_name(0x0902)} by {glyph_name(anusvara_upper)};")
|
ccmp_parts.append(f" sub {glyph_name(0x0902)} by {glyph_name(anusvara_upper)};")
|
||||||
ccmp_parts.append(f"}} AnusvaraUpper;")
|
ccmp_parts.append(f"}} AnusvaraUpper;")
|
||||||
ccmp_parts.append("")
|
ccmp_parts.append("")
|
||||||
ccmp_parts.append("feature ccmp {")
|
if ccmp_subs:
|
||||||
|
ccmp_parts.append("lookup DevaConsonantMap {")
|
||||||
|
ccmp_parts.extend(ccmp_subs)
|
||||||
|
ccmp_parts.append("} DevaConsonantMap;")
|
||||||
|
ccmp_parts.append("")
|
||||||
|
if vowel_decomp_subs:
|
||||||
|
ccmp_parts.append("lookup DevaVowelDecomp {")
|
||||||
|
ccmp_parts.extend(vowel_decomp_subs)
|
||||||
|
ccmp_parts.append("} DevaVowelDecomp;")
|
||||||
|
ccmp_parts.append("")
|
||||||
|
# locl for dev2 — DirectWrite applies locl as the first feature
|
||||||
|
# for Devanagari shaping. Registering consonant mapping and vowel
|
||||||
|
# decomposition here ensures they fire on DirectWrite.
|
||||||
|
ccmp_parts.append("feature locl {")
|
||||||
ccmp_parts.append(" script dev2;")
|
ccmp_parts.append(" script dev2;")
|
||||||
if ccmp_subs:
|
if ccmp_subs:
|
||||||
ccmp_parts.append(" lookup DevaConsonantMap {")
|
ccmp_parts.append(" lookup DevaConsonantMap;")
|
||||||
ccmp_parts.extend(" " + s for s in ccmp_subs)
|
|
||||||
ccmp_parts.append(" } DevaConsonantMap;")
|
|
||||||
if anusvara_ccmp_subs:
|
if anusvara_ccmp_subs:
|
||||||
ccmp_parts.extend(anusvara_ccmp_subs)
|
ccmp_parts.extend(anusvara_ccmp_subs)
|
||||||
if vowel_decomp_subs:
|
if vowel_decomp_subs:
|
||||||
ccmp_parts.append(" lookup DevaVowelDecomp {")
|
ccmp_parts.append(" lookup DevaVowelDecomp;")
|
||||||
ccmp_parts.extend(" " + s for s in vowel_decomp_subs)
|
ccmp_parts.append("} locl;")
|
||||||
ccmp_parts.append(" } DevaVowelDecomp;")
|
ccmp_parts.append("")
|
||||||
|
# ccmp for dev2 — HarfBuzz applies ccmp before reordering
|
||||||
|
ccmp_parts.append("feature ccmp {")
|
||||||
|
ccmp_parts.append(" script dev2;")
|
||||||
|
if ccmp_subs:
|
||||||
|
ccmp_parts.append(" lookup DevaConsonantMap;")
|
||||||
|
if anusvara_ccmp_subs:
|
||||||
|
ccmp_parts.extend(anusvara_ccmp_subs)
|
||||||
|
if vowel_decomp_subs:
|
||||||
|
ccmp_parts.append(" lookup DevaVowelDecomp;")
|
||||||
ccmp_parts.append("} ccmp;")
|
ccmp_parts.append("} ccmp;")
|
||||||
features.append('\n'.join(ccmp_parts))
|
features.append('\n'.join(ccmp_parts))
|
||||||
|
|
||||||
@@ -981,15 +1002,25 @@ def _generate_devanagari(glyphs, has, replacewith_subs=None):
|
|||||||
if blws_subs:
|
if blws_subs:
|
||||||
features.append("feature blws {\n script dev2;\n" + '\n'.join(blws_subs) + "\n} blws;")
|
features.append("feature blws {\n script dev2;\n" + '\n'.join(blws_subs) + "\n} blws;")
|
||||||
|
|
||||||
# --- rphf: RA (PUA) + virama -> reph ---
|
# --- rphf: RA + virama -> reph ---
|
||||||
|
# Must include BOTH Unicode and PUA rules:
|
||||||
|
# - Unicode rule: needed by shapers (CoreText, DirectWrite) that test
|
||||||
|
# reph eligibility via would_substitute() BEFORE ccmp/locl maps RA
|
||||||
|
# to its PUA form
|
||||||
|
# - PUA rule: matches the actual glyph after ccmp/locl has run
|
||||||
if has(ra_int) and has(SC.DEVANAGARI_VIRAMA) and has(SC.DEVANAGARI_RA_SUPER):
|
if has(ra_int) and has(SC.DEVANAGARI_VIRAMA) and has(SC.DEVANAGARI_RA_SUPER):
|
||||||
rphf_code = (
|
rphf_lines = ["feature rphf {", " script dev2;"]
|
||||||
f"feature rphf {{\n"
|
if has(0x0930):
|
||||||
f" script dev2;\n"
|
rphf_lines.append(
|
||||||
f" sub {glyph_name(ra_int)} {glyph_name(SC.DEVANAGARI_VIRAMA)} by {glyph_name(SC.DEVANAGARI_RA_SUPER)};\n"
|
f" sub {glyph_name(0x0930)} {glyph_name(SC.DEVANAGARI_VIRAMA)}"
|
||||||
f"}} rphf;"
|
f" by {glyph_name(SC.DEVANAGARI_RA_SUPER)};"
|
||||||
|
)
|
||||||
|
rphf_lines.append(
|
||||||
|
f" sub {glyph_name(ra_int)} {glyph_name(SC.DEVANAGARI_VIRAMA)}"
|
||||||
|
f" by {glyph_name(SC.DEVANAGARI_RA_SUPER)};"
|
||||||
)
|
)
|
||||||
features.append(rphf_code)
|
rphf_lines.append("} rphf;")
|
||||||
|
features.append('\n'.join(rphf_lines))
|
||||||
|
|
||||||
# --- pres: alternate half-SHA before LA ---
|
# --- pres: alternate half-SHA before LA ---
|
||||||
# SHA+virama+LA uses a special half-SHA form (uF010F) instead of the
|
# SHA+virama+LA uses a special half-SHA form (uF010F) instead of the
|
||||||
@@ -1009,18 +1040,39 @@ def _generate_devanagari(glyphs, has, replacewith_subs=None):
|
|||||||
pres_lines.append("} pres;")
|
pres_lines.append("} pres;")
|
||||||
features.append('\n'.join(pres_lines))
|
features.append('\n'.join(pres_lines))
|
||||||
|
|
||||||
# --- abvs: complex reph substitution ---
|
# --- abvs: complex reph + post-reordering anusvara upper ---
|
||||||
# The Kotlin engine uses complex reph (U+F010D) when a
|
# Complex reph: the Kotlin engine uses complex reph (U+F010D) when a
|
||||||
# devanagariSuperscript mark precedes reph, or any vowel matra
|
# devanagariSuperscript mark precedes reph, or any vowel matra
|
||||||
# (e.g. i-matra) exists in the syllable.
|
# (e.g. i-matra) exists in the syllable.
|
||||||
# After dev2 reordering, glyph order is:
|
# After dev2 reordering, glyph order is:
|
||||||
# [pre-base matras] + [base] + [below-base] + [above-base] + [reph]
|
# [pre-base matras] + [base] + [below-base] + [above-base] + [reph]
|
||||||
# We use chaining contextual substitution to detect these conditions.
|
# We use chaining contextual substitution to detect these conditions.
|
||||||
if has(SC.DEVANAGARI_RA_SUPER) and has(SC.DEVANAGARI_RA_SUPER_COMPLEX):
|
#
|
||||||
# Trigger class: must match Kotlin's devanagariSuperscripts exactly.
|
# Anusvara upper fallback: CoreText may apply ccmp AFTER reordering,
|
||||||
# Does NOT include non-superscript vowels (AA 093E, below-base
|
# which separates I-matra from anusvara (KA I-MATRA ANUSVARA →
|
||||||
# 0941-0944, nukta 093C) or I-matra 093F (handled separately
|
# I-MATRA KA ANUSVARA). The ccmp/locl rule `sub 093F 0902'` won't
|
||||||
# via the sawLeftI / i-matra context rules below).
|
# match when they're separated. Add wider-context rules here (abvs
|
||||||
|
# runs post-reordering on all shapers).
|
||||||
|
|
||||||
|
# Broad Devanagari class for context gaps
|
||||||
|
deva_any_cps = (
|
||||||
|
list(range(0xF0140, 0xF0165)) + # PUA consonants
|
||||||
|
list(range(0xF0170, 0xF0195)) + # nukta forms
|
||||||
|
list(range(0xF0230, 0xF0255)) + # half forms
|
||||||
|
list(range(0xF0320, 0xF0405)) + # RA-appended forms
|
||||||
|
list(range(0x093A, 0x094D)) + # vowel signs/matras
|
||||||
|
list(range(0x0900, 0x0903)) + # signs
|
||||||
|
[0x094E, 0x094F, 0x0951] +
|
||||||
|
list(range(0x0953, 0x0956)) +
|
||||||
|
[SC.DEVANAGARI_RA_SUB] + # below-base RA
|
||||||
|
[r for _, _, r, _ in _conjuncts] # conjunct result glyphs
|
||||||
|
)
|
||||||
|
deva_any_glyphs = [glyph_name(cp) for cp in sorted(set(deva_any_cps)) if has(cp)]
|
||||||
|
|
||||||
|
abvs_lookups = []
|
||||||
|
abvs_body = []
|
||||||
|
|
||||||
|
if has(SC.DEVANAGARI_RA_SUPER) and has(SC.DEVANAGARI_RA_SUPER_COMPLEX) and deva_any_glyphs:
|
||||||
trigger_cps = (
|
trigger_cps = (
|
||||||
list(range(0x0900, 0x0903)) +
|
list(range(0x0900, 0x0903)) +
|
||||||
list(range(0x093A, 0x093C)) + # 093A-093B only (not 093C)
|
list(range(0x093A, 0x093C)) + # 093A-093B only (not 093C)
|
||||||
@@ -1031,42 +1083,45 @@ def _generate_devanagari(glyphs, has, replacewith_subs=None):
|
|||||||
)
|
)
|
||||||
trigger_glyphs = [glyph_name(cp) for cp in trigger_cps if has(cp)]
|
trigger_glyphs = [glyph_name(cp) for cp in trigger_cps if has(cp)]
|
||||||
|
|
||||||
# Broad Devanagari class for context gaps between i-matra and reph
|
if trigger_glyphs:
|
||||||
deva_any_cps = (
|
|
||||||
list(range(0xF0140, 0xF0165)) + # PUA consonants
|
|
||||||
list(range(0xF0170, 0xF0195)) + # nukta forms
|
|
||||||
list(range(0xF0230, 0xF0255)) + # half forms
|
|
||||||
list(range(0xF0320, 0xF0405)) + # RA-appended forms
|
|
||||||
list(range(0x093A, 0x094D)) + # vowel signs/matras
|
|
||||||
list(range(0x0900, 0x0903)) + # signs
|
|
||||||
[0x094E, 0x094F, 0x0951] +
|
|
||||||
list(range(0x0953, 0x0956)) +
|
|
||||||
[SC.DEVANAGARI_RA_SUB] + # below-base RA
|
|
||||||
[r for _, _, r, _ in _conjuncts] # conjunct result glyphs
|
|
||||||
)
|
|
||||||
deva_any_glyphs = [glyph_name(cp) for cp in sorted(set(deva_any_cps)) if has(cp)]
|
|
||||||
|
|
||||||
if trigger_glyphs and deva_any_glyphs:
|
|
||||||
reph = glyph_name(SC.DEVANAGARI_RA_SUPER)
|
reph = glyph_name(SC.DEVANAGARI_RA_SUPER)
|
||||||
complex_reph = glyph_name(SC.DEVANAGARI_RA_SUPER_COMPLEX)
|
complex_reph = glyph_name(SC.DEVANAGARI_RA_SUPER_COMPLEX)
|
||||||
|
|
||||||
abvs_lines = []
|
abvs_lookups.append(f"lookup ComplexReph {{")
|
||||||
abvs_lines.append(f"lookup ComplexReph {{")
|
abvs_lookups.append(f" sub {reph} by {complex_reph};")
|
||||||
abvs_lines.append(f" sub {reph} by {complex_reph};")
|
abvs_lookups.append(f"}} ComplexReph;")
|
||||||
abvs_lines.append(f"}} ComplexReph;")
|
|
||||||
abvs_lines.append("")
|
abvs_body.append(f" @complexRephTriggers = [{' '.join(trigger_glyphs)}];")
|
||||||
abvs_lines.append("feature abvs {")
|
|
||||||
abvs_lines.append(" script dev2;")
|
|
||||||
abvs_lines.append(f" @complexRephTriggers = [{' '.join(trigger_glyphs)}];")
|
|
||||||
abvs_lines.append(f" @devaAny = [{' '.join(deva_any_glyphs)}];")
|
|
||||||
# Rule 1: trigger mark/vowel immediately before reph
|
# Rule 1: trigger mark/vowel immediately before reph
|
||||||
abvs_lines.append(f" sub @complexRephTriggers {reph}' lookup ComplexReph;")
|
abvs_body.append(f" sub @complexRephTriggers {reph}' lookup ComplexReph;")
|
||||||
# Rules 2-4: i-matra separated from reph by 1-3 intervening glyphs
|
# Rules 2-4: i-matra separated from reph by 1-3 intervening glyphs
|
||||||
abvs_lines.append(f" sub {glyph_name(0x093F)} @devaAny {reph}' lookup ComplexReph;")
|
abvs_body.append(f" sub {glyph_name(0x093F)} @devaAny {reph}' lookup ComplexReph;")
|
||||||
abvs_lines.append(f" sub {glyph_name(0x093F)} @devaAny @devaAny {reph}' lookup ComplexReph;")
|
abvs_body.append(f" sub {glyph_name(0x093F)} @devaAny @devaAny {reph}' lookup ComplexReph;")
|
||||||
abvs_lines.append(f" sub {glyph_name(0x093F)} @devaAny @devaAny @devaAny {reph}' lookup ComplexReph;")
|
abvs_body.append(f" sub {glyph_name(0x093F)} @devaAny @devaAny @devaAny {reph}' lookup ComplexReph;")
|
||||||
abvs_lines.append("} abvs;")
|
|
||||||
features.append('\n'.join(abvs_lines))
|
# Post-reordering anusvara upper: catch I-matra separated from
|
||||||
|
# anusvara by reordering (1-3 intervening consonants/marks).
|
||||||
|
# On HarfBuzz, ccmp already handled this (no-op here); on CoreText,
|
||||||
|
# ccmp may run after reordering so the adjacency rule didn't match.
|
||||||
|
if has(0x093F) and has(0x0902) and has(anusvara_upper) and deva_any_glyphs:
|
||||||
|
abvs_body.append(f" sub {glyph_name(0x093F)} @devaAny"
|
||||||
|
f" {glyph_name(0x0902)}' lookup AnusvaraUpper;")
|
||||||
|
abvs_body.append(f" sub {glyph_name(0x093F)} @devaAny @devaAny"
|
||||||
|
f" {glyph_name(0x0902)}' lookup AnusvaraUpper;")
|
||||||
|
abvs_body.append(f" sub {glyph_name(0x093F)} @devaAny @devaAny @devaAny"
|
||||||
|
f" {glyph_name(0x0902)}' lookup AnusvaraUpper;")
|
||||||
|
|
||||||
|
if abvs_body:
|
||||||
|
abvs_lines = abvs_lookups[:]
|
||||||
|
if abvs_lookups:
|
||||||
|
abvs_lines.append("")
|
||||||
|
abvs_lines.append("feature abvs {")
|
||||||
|
abvs_lines.append(" script dev2;")
|
||||||
|
if deva_any_glyphs:
|
||||||
|
abvs_lines.append(f" @devaAny = [{' '.join(deva_any_glyphs)}];")
|
||||||
|
abvs_lines.extend(abvs_body)
|
||||||
|
abvs_lines.append("} abvs;")
|
||||||
|
features.append('\n'.join(abvs_lines))
|
||||||
|
|
||||||
# --- psts: I-matra/II-matra length variants + open Ya ---
|
# --- psts: I-matra/II-matra length variants + open Ya ---
|
||||||
# Must run AFTER abvs because abvs uses uni093F as context for complex
|
# Must run AFTER abvs because abvs uses uni093F as context for complex
|
||||||
|
|||||||
Reference in New Issue
Block a user