diff --git a/OTFbuild/CLAUDE.md b/OTFbuild/CLAUDE.md index be2e38f..f49833b 100644 --- a/OTFbuild/CLAUDE.md +++ b/OTFbuild/CLAUDE.md @@ -121,8 +121,8 @@ print(f"{name}: advance={w}, has_outlines={has_outlines}") - **ccmp** — replacewith expansions (DFLT); consonant-to-PUA mapping + vowel decompositions + anusvara upper (dev2); vowel decompositions (tml2) - **kern** — pair positioning from `keming_machine.py` - **liga** — Latin ligatures (ff, fi, fl, ffi, ffl, st) and Armenian ligatures -- **locl** — Bulgarian/Serbian Cyrillic alternates -- **nukt, akhn, half, vatu, pres, blws, rphf** — Devanagari complex script shaping (all under `script dev2`) +- **locl** — Bulgarian/Serbian Cyrillic alternates; Devanagari consonant-to-PUA mapping + vowel decompositions + anusvara upper (dev2, duplicated from ccmp for DirectWrite compatibility) +- **nukt, akhn, half, blwf, cjct, pres, blws, rphf, abvs, psts, calt** — Devanagari complex script shaping (all under `script dev2`) - **pres** (tml2) — Tamil consonant+vowel ligatures - **pres** (sund) — Sundanese diacritic combinations - **ljmo, vjmo, tjmo** — Hangul jamo positional variants @@ -181,7 +181,7 @@ for sr in gsub.table.ScriptList.ScriptRecord: print(f"{tag}/{lsr.LangSysTag}: {' '.join(sorted(set(feats)))}") ``` -Expected output for dev2: `dev2/dflt: abvs akhn blwf blws calt ccmp cjct half liga nukt pres psts rphf`. If language-specific records (e.g. `dev2/MAR`) appear with only `ccmp liga`, the language records have incomplete feature inheritance — remove the corresponding `languagesystem` declaration. +Expected output for dev2: `dev2/dflt: abvs akhn blwf blws calt ccmp cjct half liga locl nukt pres psts rphf`. If language-specific records (e.g. `dev2/MAR`) appear with only `ccmp liga`, the language records have incomplete feature inheritance — remove the corresponding `languagesystem` declaration. ### Debugging feature compilation failures @@ -201,3 +201,26 @@ Understanding feature application order is critical for Devanagari debugging: 4. **GPOS**: `kern` → `mark`/`abvm` → `mkmk` Implication: GSUB rules that need to match pre-base matras adjacent to post-base marks (e.g. anusvara substitution triggered by I-matra) must go in `ccmp`, not `psts`, because reordering separates them. + +### Cross-platform shaper differences (DirectWrite, CoreText, HarfBuzz) + +The three major shapers behave differently for Devanagari (dev2): + +**DirectWrite (Windows)**: +- Feature order: `locl` → `nukt` → `akhn` → `rphf` → `rkrf` → `blwf` → `half` → `vatu` → `cjct` → `pres` → `abvs` → `blws` → `psts` → `haln` → `calt` → GPOS: `kern` → `dist` → `abvm` → `blwm` +- **Does NOT apply `ccmp`** for the dev2 script. All lookups that must run before `nukt` (e.g. consonant-to-PUA mapping) must be registered under `locl` instead. +- Tests reph eligibility via `would_substitute([RA, virama], rphf)` using **original Unicode codepoints** (before locl/ccmp). The `rphf` feature must include a rule with the Unicode form of RA, not just the PUA form. + +**CoreText (macOS)**: +- Applies `ccmp` but may do so **after** reordering (unlike HarfBuzz which applies ccmp before reordering). This means pre-base matras (I-matra U+093F) are already reordered before the consonant, breaking adjacency rules like `sub 093F 0902'`. +- Tests reph eligibility using `would_substitute()` with Unicode codepoints, same as DirectWrite. +- Solution: add wider-context fallback rules in `abvs` (post-reordering) that match I-matra separated from anusvara by 1-3 intervening glyphs. + +**HarfBuzz (reference)**: +- Applies `ccmp` **before** reordering (Unicode order). +- Reph detection is pattern-based (RA + halant + consonant at syllable start), not feature-based. +- Most lenient — works with PUA-only rules. + +**Practical implication**: Define standalone lookups (e.g. `DevaConsonantMap`, `DevaVowelDecomp`) **outside** any feature block, then reference them from both `locl` and `ccmp`. This ensures DirectWrite (via locl) and HarfBuzz (via ccmp) both fire the lookups. The second application is a no-op since glyphs are already transformed. + +Source: [Microsoft Devanagari shaping spec](https://learn.microsoft.com/en-us/typography/script-development/devanagari) diff --git a/OTFbuild/opentype_features.py b/OTFbuild/opentype_features.py index ade110a..a465c12 100644 --- a/OTFbuild/opentype_features.py +++ b/OTFbuild/opentype_features.py @@ -702,25 +702,46 @@ def _generate_devanagari(glyphs, has, replacewith_subs=None): if ccmp_subs or vowel_decomp_subs or anusvara_ccmp_subs: ccmp_parts = [] - # AnusvaraUpper lookup defined OUTSIDE the feature block so it only - # fires when referenced by contextual rules (not unconditionally). + # Define lookups OUTSIDE feature blocks so they can be referenced + # from both locl (for DirectWrite) and ccmp (for HarfBuzz). + # DirectWrite's dev2 shaper does not apply ccmp but does apply locl. if anusvara_ccmp_subs: ccmp_parts.append(f"lookup AnusvaraUpper {{") ccmp_parts.append(f" sub {glyph_name(0x0902)} by {glyph_name(anusvara_upper)};") ccmp_parts.append(f"}} AnusvaraUpper;") ccmp_parts.append("") - ccmp_parts.append("feature ccmp {") + if ccmp_subs: + ccmp_parts.append("lookup DevaConsonantMap {") + ccmp_parts.extend(ccmp_subs) + ccmp_parts.append("} DevaConsonantMap;") + ccmp_parts.append("") + if vowel_decomp_subs: + ccmp_parts.append("lookup DevaVowelDecomp {") + ccmp_parts.extend(vowel_decomp_subs) + ccmp_parts.append("} DevaVowelDecomp;") + ccmp_parts.append("") + # locl for dev2 — DirectWrite applies locl as the first feature + # for Devanagari shaping. Registering consonant mapping and vowel + # decomposition here ensures they fire on DirectWrite. + ccmp_parts.append("feature locl {") ccmp_parts.append(" script dev2;") if ccmp_subs: - ccmp_parts.append(" lookup DevaConsonantMap {") - ccmp_parts.extend(" " + s for s in ccmp_subs) - ccmp_parts.append(" } DevaConsonantMap;") + ccmp_parts.append(" lookup DevaConsonantMap;") if anusvara_ccmp_subs: ccmp_parts.extend(anusvara_ccmp_subs) if vowel_decomp_subs: - ccmp_parts.append(" lookup DevaVowelDecomp {") - ccmp_parts.extend(" " + s for s in vowel_decomp_subs) - ccmp_parts.append(" } DevaVowelDecomp;") + ccmp_parts.append(" lookup DevaVowelDecomp;") + ccmp_parts.append("} locl;") + ccmp_parts.append("") + # ccmp for dev2 — HarfBuzz applies ccmp before reordering + ccmp_parts.append("feature ccmp {") + ccmp_parts.append(" script dev2;") + if ccmp_subs: + ccmp_parts.append(" lookup DevaConsonantMap;") + if anusvara_ccmp_subs: + ccmp_parts.extend(anusvara_ccmp_subs) + if vowel_decomp_subs: + ccmp_parts.append(" lookup DevaVowelDecomp;") ccmp_parts.append("} ccmp;") features.append('\n'.join(ccmp_parts)) @@ -981,15 +1002,25 @@ def _generate_devanagari(glyphs, has, replacewith_subs=None): if blws_subs: features.append("feature blws {\n script dev2;\n" + '\n'.join(blws_subs) + "\n} blws;") - # --- rphf: RA (PUA) + virama -> reph --- + # --- rphf: RA + virama -> reph --- + # Must include BOTH Unicode and PUA rules: + # - Unicode rule: needed by shapers (CoreText, DirectWrite) that test + # reph eligibility via would_substitute() BEFORE ccmp/locl maps RA + # to its PUA form + # - PUA rule: matches the actual glyph after ccmp/locl has run if has(ra_int) and has(SC.DEVANAGARI_VIRAMA) and has(SC.DEVANAGARI_RA_SUPER): - rphf_code = ( - f"feature rphf {{\n" - f" script dev2;\n" - f" sub {glyph_name(ra_int)} {glyph_name(SC.DEVANAGARI_VIRAMA)} by {glyph_name(SC.DEVANAGARI_RA_SUPER)};\n" - f"}} rphf;" + rphf_lines = ["feature rphf {", " script dev2;"] + if has(0x0930): + rphf_lines.append( + f" sub {glyph_name(0x0930)} {glyph_name(SC.DEVANAGARI_VIRAMA)}" + f" by {glyph_name(SC.DEVANAGARI_RA_SUPER)};" + ) + rphf_lines.append( + f" sub {glyph_name(ra_int)} {glyph_name(SC.DEVANAGARI_VIRAMA)}" + f" by {glyph_name(SC.DEVANAGARI_RA_SUPER)};" ) - features.append(rphf_code) + rphf_lines.append("} rphf;") + features.append('\n'.join(rphf_lines)) # --- pres: alternate half-SHA before LA --- # SHA+virama+LA uses a special half-SHA form (uF010F) instead of the @@ -1009,18 +1040,39 @@ def _generate_devanagari(glyphs, has, replacewith_subs=None): pres_lines.append("} pres;") features.append('\n'.join(pres_lines)) - # --- abvs: complex reph substitution --- - # The Kotlin engine uses complex reph (U+F010D) when a + # --- abvs: complex reph + post-reordering anusvara upper --- + # Complex reph: the Kotlin engine uses complex reph (U+F010D) when a # devanagariSuperscript mark precedes reph, or any vowel matra # (e.g. i-matra) exists in the syllable. # After dev2 reordering, glyph order is: # [pre-base matras] + [base] + [below-base] + [above-base] + [reph] # We use chaining contextual substitution to detect these conditions. - if has(SC.DEVANAGARI_RA_SUPER) and has(SC.DEVANAGARI_RA_SUPER_COMPLEX): - # Trigger class: must match Kotlin's devanagariSuperscripts exactly. - # Does NOT include non-superscript vowels (AA 093E, below-base - # 0941-0944, nukta 093C) or I-matra 093F (handled separately - # via the sawLeftI / i-matra context rules below). + # + # Anusvara upper fallback: CoreText may apply ccmp AFTER reordering, + # which separates I-matra from anusvara (KA I-MATRA ANUSVARA → + # I-MATRA KA ANUSVARA). The ccmp/locl rule `sub 093F 0902'` won't + # match when they're separated. Add wider-context rules here (abvs + # runs post-reordering on all shapers). + + # Broad Devanagari class for context gaps + deva_any_cps = ( + list(range(0xF0140, 0xF0165)) + # PUA consonants + list(range(0xF0170, 0xF0195)) + # nukta forms + list(range(0xF0230, 0xF0255)) + # half forms + list(range(0xF0320, 0xF0405)) + # RA-appended forms + list(range(0x093A, 0x094D)) + # vowel signs/matras + list(range(0x0900, 0x0903)) + # signs + [0x094E, 0x094F, 0x0951] + + list(range(0x0953, 0x0956)) + + [SC.DEVANAGARI_RA_SUB] + # below-base RA + [r for _, _, r, _ in _conjuncts] # conjunct result glyphs + ) + deva_any_glyphs = [glyph_name(cp) for cp in sorted(set(deva_any_cps)) if has(cp)] + + abvs_lookups = [] + abvs_body = [] + + if has(SC.DEVANAGARI_RA_SUPER) and has(SC.DEVANAGARI_RA_SUPER_COMPLEX) and deva_any_glyphs: trigger_cps = ( list(range(0x0900, 0x0903)) + list(range(0x093A, 0x093C)) + # 093A-093B only (not 093C) @@ -1031,42 +1083,45 @@ def _generate_devanagari(glyphs, has, replacewith_subs=None): ) trigger_glyphs = [glyph_name(cp) for cp in trigger_cps if has(cp)] - # Broad Devanagari class for context gaps between i-matra and reph - deva_any_cps = ( - list(range(0xF0140, 0xF0165)) + # PUA consonants - list(range(0xF0170, 0xF0195)) + # nukta forms - list(range(0xF0230, 0xF0255)) + # half forms - list(range(0xF0320, 0xF0405)) + # RA-appended forms - list(range(0x093A, 0x094D)) + # vowel signs/matras - list(range(0x0900, 0x0903)) + # signs - [0x094E, 0x094F, 0x0951] + - list(range(0x0953, 0x0956)) + - [SC.DEVANAGARI_RA_SUB] + # below-base RA - [r for _, _, r, _ in _conjuncts] # conjunct result glyphs - ) - deva_any_glyphs = [glyph_name(cp) for cp in sorted(set(deva_any_cps)) if has(cp)] - - if trigger_glyphs and deva_any_glyphs: + if trigger_glyphs: reph = glyph_name(SC.DEVANAGARI_RA_SUPER) complex_reph = glyph_name(SC.DEVANAGARI_RA_SUPER_COMPLEX) - abvs_lines = [] - abvs_lines.append(f"lookup ComplexReph {{") - abvs_lines.append(f" sub {reph} by {complex_reph};") - abvs_lines.append(f"}} ComplexReph;") - abvs_lines.append("") - abvs_lines.append("feature abvs {") - abvs_lines.append(" script dev2;") - abvs_lines.append(f" @complexRephTriggers = [{' '.join(trigger_glyphs)}];") - abvs_lines.append(f" @devaAny = [{' '.join(deva_any_glyphs)}];") + abvs_lookups.append(f"lookup ComplexReph {{") + abvs_lookups.append(f" sub {reph} by {complex_reph};") + abvs_lookups.append(f"}} ComplexReph;") + + abvs_body.append(f" @complexRephTriggers = [{' '.join(trigger_glyphs)}];") # Rule 1: trigger mark/vowel immediately before reph - abvs_lines.append(f" sub @complexRephTriggers {reph}' lookup ComplexReph;") + abvs_body.append(f" sub @complexRephTriggers {reph}' lookup ComplexReph;") # Rules 2-4: i-matra separated from reph by 1-3 intervening glyphs - abvs_lines.append(f" sub {glyph_name(0x093F)} @devaAny {reph}' lookup ComplexReph;") - abvs_lines.append(f" sub {glyph_name(0x093F)} @devaAny @devaAny {reph}' lookup ComplexReph;") - abvs_lines.append(f" sub {glyph_name(0x093F)} @devaAny @devaAny @devaAny {reph}' lookup ComplexReph;") - abvs_lines.append("} abvs;") - features.append('\n'.join(abvs_lines)) + abvs_body.append(f" sub {glyph_name(0x093F)} @devaAny {reph}' lookup ComplexReph;") + abvs_body.append(f" sub {glyph_name(0x093F)} @devaAny @devaAny {reph}' lookup ComplexReph;") + abvs_body.append(f" sub {glyph_name(0x093F)} @devaAny @devaAny @devaAny {reph}' lookup ComplexReph;") + + # Post-reordering anusvara upper: catch I-matra separated from + # anusvara by reordering (1-3 intervening consonants/marks). + # On HarfBuzz, ccmp already handled this (no-op here); on CoreText, + # ccmp may run after reordering so the adjacency rule didn't match. + if has(0x093F) and has(0x0902) and has(anusvara_upper) and deva_any_glyphs: + abvs_body.append(f" sub {glyph_name(0x093F)} @devaAny" + f" {glyph_name(0x0902)}' lookup AnusvaraUpper;") + abvs_body.append(f" sub {glyph_name(0x093F)} @devaAny @devaAny" + f" {glyph_name(0x0902)}' lookup AnusvaraUpper;") + abvs_body.append(f" sub {glyph_name(0x093F)} @devaAny @devaAny @devaAny" + f" {glyph_name(0x0902)}' lookup AnusvaraUpper;") + + if abvs_body: + abvs_lines = abvs_lookups[:] + if abvs_lookups: + abvs_lines.append("") + abvs_lines.append("feature abvs {") + abvs_lines.append(" script dev2;") + if deva_any_glyphs: + abvs_lines.append(f" @devaAny = [{' '.join(deva_any_glyphs)}];") + abvs_lines.extend(abvs_body) + abvs_lines.append("} abvs;") + features.append('\n'.join(abvs_lines)) # --- psts: I-matra/II-matra length variants + open Ya --- # Must run AFTER abvs because abvs uses uni093F as context for complex