anusvara positioning fixed for DirectWrite and CoreText but now broken for HarfBuzz

This commit is contained in:
minjaesong
2026-03-04 07:00:14 +09:00
parent cb2f432479
commit da59fe24d4
2 changed files with 170 additions and 93 deletions

Binary file not shown.

View File

@@ -58,6 +58,7 @@ languagesystem hang KOH ;
languagesystem cyrl SRB ;
languagesystem cyrl BGR ;
languagesystem dev2 dflt;
languagesystem deva dflt;
languagesystem tml2 dflt;
languagesystem sund dflt;
"""
@@ -182,7 +183,7 @@ def _generate_hangul_gsub(glyphs, has, jamo_data):
pua_fn = jamo_data['pua_fn']
# Build codepoint lists (standard + extended jamo ranges)
cho_ranges = list(range(0x1100, 0x115F)) + list(range(0xA960, 0xA97C))
cho_ranges = list(range(0x1100, 0x115F)) + list(range(0xA960, 0xA97D))
jung_ranges = list(range(0x1160, 0x11A8)) + list(range(0xD7B0, 0xD7C7))
jong_ranges = list(range(0x11A8, 0x1200)) + list(range(0xD7CB, 0xD7FC))
@@ -720,11 +721,14 @@ def _generate_devanagari(glyphs, has, replacewith_subs=None):
ccmp_parts.extend(vowel_decomp_subs)
ccmp_parts.append("} DevaVowelDecomp;")
ccmp_parts.append("")
# locl for dev2 — DirectWrite applies locl as the first feature
# for Devanagari shaping. Registering consonant mapping and vowel
# decomposition here ensures they fire on DirectWrite.
# locl for dev2/deva — DirectWrite applies locl as the first
# feature for Devanagari shaping. Registering consonant mapping
# and vowel decomposition here ensures they fire on DirectWrite.
# Both dev2 (new Indic) and deva (old Indic) script tags are
# needed for CoreText compatibility.
ccmp_parts.append("feature locl {")
ccmp_parts.append(" script dev2;")
for _st in ['dev2', 'deva']:
ccmp_parts.append(f" script {_st};")
if ccmp_subs:
ccmp_parts.append(" lookup DevaConsonantMap;")
if anusvara_ccmp_subs:
@@ -733,9 +737,10 @@ def _generate_devanagari(glyphs, has, replacewith_subs=None):
ccmp_parts.append(" lookup DevaVowelDecomp;")
ccmp_parts.append("} locl;")
ccmp_parts.append("")
# ccmp for dev2 — HarfBuzz applies ccmp before reordering
# ccmp for dev2/deva — HarfBuzz applies ccmp before reordering
ccmp_parts.append("feature ccmp {")
ccmp_parts.append(" script dev2;")
for _st in ['dev2', 'deva']:
ccmp_parts.append(f" script {_st};")
if ccmp_subs:
ccmp_parts.append(" lookup DevaConsonantMap;")
if anusvara_ccmp_subs:
@@ -756,7 +761,9 @@ def _generate_devanagari(glyphs, has, replacewith_subs=None):
f" sub {glyph_name(internal)} {glyph_name(0x093C)} by {glyph_name(nukta_form)};"
)
if nukt_subs:
features.append("feature nukt {\n script dev2;\n" + '\n'.join(nukt_subs) + "\n} nukt;")
nukt_body = '\n'.join(nukt_subs)
features.append("feature nukt {\n script dev2;\n" + nukt_body
+ "\n script deva;\n" + nukt_body + "\n} nukt;")
# --- akhn: akhand ligatures + conjuncts ---
# All conjunct ligatures (C1 + virama + C2 → ligature) go in akhn
@@ -882,7 +889,9 @@ def _generate_devanagari(glyphs, has, replacewith_subs=None):
f" sub {glyph_name(c1)} {glyph_name(SC.DEVANAGARI_VIRAMA)} {glyph_name(c2)} by {glyph_name(result)}; # {name}"
)
if akhn_subs:
features.append("feature akhn {\n script dev2;\n" + '\n'.join(akhn_subs) + "\n} akhn;")
akhn_body = '\n'.join(akhn_subs)
features.append("feature akhn {\n script dev2;\n" + akhn_body
+ "\n script deva;\n" + akhn_body + "\n} akhn;")
# --- half: consonant (PUA) + virama -> half form ---
# After ccmp, consonants are in PUA form, so reference PUA here.
@@ -915,7 +924,9 @@ def _generate_devanagari(glyphs, has, replacewith_subs=None):
f" sub {glyph_name(SC.MARWARI_LIG_DD_Y)} {glyph_name(SC.DEVANAGARI_VIRAMA)} by {glyph_name(SC.MARWARI_HALFLIG_DD_Y)};"
)
if half_subs:
features.append("feature half {\n script dev2;\n" + '\n'.join(half_subs) + "\n} half;")
half_body = '\n'.join(half_subs)
features.append("feature half {\n script dev2;\n" + half_body
+ "\n script deva;\n" + half_body + "\n} half;")
# --- blwf: virama + RA -> below-base RA (rakaar) ---
# This serves two purposes:
@@ -938,7 +949,9 @@ def _generate_devanagari(glyphs, has, replacewith_subs=None):
f" sub {glyph_name(SC.DEVANAGARI_VIRAMA)} {glyph_name(ra_int)} by {glyph_name(ra_sub)};"
)
if blwf_subs:
features.append("feature blwf {\n script dev2;\n" + '\n'.join(blwf_subs) + "\n} blwf;")
blwf_body = '\n'.join(blwf_subs)
features.append("feature blwf {\n script dev2;\n" + blwf_body
+ "\n script deva;\n" + blwf_body + "\n} blwf;")
# --- cjct: consonant (PUA) + below-base RA -> RA-appended form ---
# After blwf converts virama+RA to rakaar mark, cjct combines it
@@ -948,7 +961,10 @@ def _generate_devanagari(glyphs, has, replacewith_subs=None):
#
# A second lookup converts RA-appended + virama -> RA-appended half,
# since the half feature has already run before cjct.
cjct_lines = []
# Lookups defined OUTSIDE the feature block so they can be referenced
# from both dev2 and deva script sections without name collisions.
cjct_lookups = []
cjct_lookup_refs = []
# Lookup 1: consonant + rakaar -> RA-appended form
ra_append_subs = []
@@ -964,9 +980,10 @@ def _generate_devanagari(glyphs, has, replacewith_subs=None):
f" sub {glyph_name(SC.MARWARI_DD)} {glyph_name(ra_sub)} by {glyph_name(SC.MARWARI_LIG_DD_R)};"
)
if ra_append_subs:
cjct_lines.append(" lookup CjctRaAppend {")
cjct_lines.extend(ra_append_subs)
cjct_lines.append(" } CjctRaAppend;")
cjct_lookups.append("lookup CjctRaAppend {")
cjct_lookups.extend(ra_append_subs)
cjct_lookups.append("} CjctRaAppend;")
cjct_lookup_refs.append(" lookup CjctRaAppend;")
# Lookup 2: RA-appended + virama -> RA-appended half form
ra_half_subs = []
@@ -977,12 +994,18 @@ def _generate_devanagari(glyphs, has, replacewith_subs=None):
f" sub {glyph_name(ra_form)} {glyph_name(SC.DEVANAGARI_VIRAMA)} by {glyph_name(ra_half)};"
)
if ra_half_subs:
cjct_lines.append(" lookup CjctRaHalf {")
cjct_lines.extend(ra_half_subs)
cjct_lines.append(" } CjctRaHalf;")
cjct_lookups.append("lookup CjctRaHalf {")
cjct_lookups.extend(ra_half_subs)
cjct_lookups.append("} CjctRaHalf;")
cjct_lookup_refs.append(" lookup CjctRaHalf;")
if cjct_lines:
features.append("feature cjct {\n script dev2;\n" + '\n'.join(cjct_lines) + "\n} cjct;")
if cjct_lookup_refs:
cjct_feat = cjct_lookups + ["", "feature cjct {"]
for _st in ['dev2', 'deva']:
cjct_feat.append(f" script {_st};")
cjct_feat.extend(cjct_lookup_refs)
cjct_feat.append("} cjct;")
features.append('\n'.join(cjct_feat))
# --- blws: RA/RRA/HA (PUA) + U/UU -> special syllables ---
blws_subs = []
@@ -1000,7 +1023,9 @@ def _generate_devanagari(glyphs, has, replacewith_subs=None):
f" sub {glyph_name(c1)} {glyph_name(c2)} by {glyph_name(result)}; # {name}"
)
if blws_subs:
features.append("feature blws {\n script dev2;\n" + '\n'.join(blws_subs) + "\n} blws;")
blws_body = '\n'.join(blws_subs)
features.append("feature blws {\n script dev2;\n" + blws_body
+ "\n script deva;\n" + blws_body + "\n} blws;")
# --- rphf: RA + virama -> reph ---
# Must include BOTH Unicode and PUA rules:
@@ -1009,16 +1034,20 @@ def _generate_devanagari(glyphs, has, replacewith_subs=None):
# to its PUA form
# - PUA rule: matches the actual glyph after ccmp/locl has run
if has(ra_int) and has(SC.DEVANAGARI_VIRAMA) and has(SC.DEVANAGARI_RA_SUPER):
rphf_lines = ["feature rphf {", " script dev2;"]
rphf_rules = []
if has(0x0930):
rphf_lines.append(
rphf_rules.append(
f" sub {glyph_name(0x0930)} {glyph_name(SC.DEVANAGARI_VIRAMA)}"
f" by {glyph_name(SC.DEVANAGARI_RA_SUPER)};"
)
rphf_lines.append(
rphf_rules.append(
f" sub {glyph_name(ra_int)} {glyph_name(SC.DEVANAGARI_VIRAMA)}"
f" by {glyph_name(SC.DEVANAGARI_RA_SUPER)};"
)
rphf_lines = ["feature rphf {"]
for _st in ['dev2', 'deva']:
rphf_lines.append(f" script {_st};")
rphf_lines.extend(rphf_rules)
rphf_lines.append("} rphf;")
features.append('\n'.join(rphf_lines))
@@ -1035,7 +1064,8 @@ def _generate_devanagari(glyphs, has, replacewith_subs=None):
pres_lines.append(f"}} AltHalfSha;")
pres_lines.append("")
pres_lines.append("feature pres {")
pres_lines.append(" script dev2;")
for _st in ['dev2', 'deva']:
pres_lines.append(f" script {_st};")
pres_lines.append(f" sub {glyph_name(half_sha)}' lookup AltHalfSha {glyph_name(la_int)};")
pres_lines.append("} pres;")
features.append('\n'.join(pres_lines))
@@ -1116,7 +1146,8 @@ def _generate_devanagari(glyphs, has, replacewith_subs=None):
if abvs_lookups:
abvs_lines.append("")
abvs_lines.append("feature abvs {")
abvs_lines.append(" script dev2;")
for _st in ['dev2', 'deva']:
abvs_lines.append(f" script {_st};")
if deva_any_glyphs:
abvs_lines.append(f" @devaAny = [{' '.join(deva_any_glyphs)}];")
abvs_lines.extend(abvs_body)
@@ -1137,7 +1168,9 @@ def _generate_devanagari(glyphs, has, replacewith_subs=None):
all_lookups = matra_lookups + ya_lookups + anus_lookups
all_body = matra_body + ya_body + anus_body
if all_body:
feat = ["feature psts {", " script dev2;"]
feat = ["feature psts {"]
for _st in ['dev2', 'deva']:
feat.append(f" script {_st};")
feat.extend(all_body)
feat.append("} psts;")
features.append('\n'.join(all_lookups + [''] + feat))
@@ -1168,7 +1201,8 @@ def _generate_devanagari(glyphs, has, replacewith_subs=None):
calt_lines.append(f"}} InterwordVisarga;")
calt_lines.append("")
calt_lines.append("feature calt {")
calt_lines.append(" script dev2;")
for _st in ['dev2', 'deva']:
calt_lines.append(f" script {_st};")
calt_lines.append(f" @devaFollowing = [{' '.join(deva_following)}];")
calt_lines.append(f" sub {glyph_name(visarga)}' lookup InterwordVisarga @devaFollowing;")
calt_lines.append("} calt;")
@@ -1571,9 +1605,12 @@ def _generate_mark(glyphs, has):
# and would bloat the GPOS table).
_EXCLUDE_RANGES = (
range(0x3400, 0xA000), # CJK Unified Ideographs (Ext A + main)
range(0xAC00, 0xD7FF), # Hangul Syllables
range(0xAC00, 0xD800), # Hangul Syllables
range(0x2800, 0x2900), # Braille
)
# I-matra glyphs excluded from MarkToBase (they should not attract
# mark attachment — marks attach to the consonant, not the matra).
_EXCLUDE_CPS = {0x093F} | set(range(0xF0110, 0xF0120))
all_bases = {}
marks = {}
@@ -1583,7 +1620,7 @@ def _generate_mark(glyphs, has):
if g.props.write_on_top >= 0:
marks[cp] = g
elif g.bitmap and g.props.width > 0:
if not any(cp in r for r in _EXCLUDE_RANGES):
if cp not in _EXCLUDE_CPS and not any(cp in r for r in _EXCLUDE_RANGES):
all_bases[cp] = g
if not all_bases or not marks:
@@ -1758,26 +1795,35 @@ def _generate_mark(glyphs, has):
lines.append("")
mkmk_lookup_names.append(mkmk_name)
# Register MarkToBase lookups under DFLT (for Latin, etc.)
# Register MarkToBase lookups under mark for non-Devanagari scripts.
# For dev2/deva, abvm already includes these lookups. Registering
# mark/mkmk under dev2/deva too risks double-application on shapers
# (CoreText, DirectWrite) that may process mark AND abvm separately.
_NON_DEVA_SCRIPTS = ['DFLT', 'latn', 'cyrl', 'grek', 'hang', 'tml2', 'sund']
lines.append("feature mark {")
for _st in _NON_DEVA_SCRIPTS:
lines.append(f" script {_st};")
for ln in lookup_names:
lines.append(f" lookup {ln};")
lines.append("} mark;")
# Register MarkToMark lookups under mkmk
# Register MarkToMark lookups under mkmk (non-Devanagari only)
if mkmk_lookup_names:
lines.append("")
lines.append("feature mkmk {")
for _st in _NON_DEVA_SCRIPTS:
lines.append(f" script {_st};")
for ln in mkmk_lookup_names:
lines.append(f" lookup {ln};")
lines.append("} mkmk;")
# For Devanagari, HarfBuzz's Indic v2 shaper uses abvm/blwm
# features for mark positioning, not the generic 'mark' feature.
# Register the same lookups under abvm for dev2 script.
# Register the same lookups under abvm for both dev2 and deva scripts.
lines.append("")
lines.append("feature abvm {")
lines.append(" script dev2;")
for _st in ['dev2', 'deva']:
lines.append(f" script {_st};")
for ln in lookup_names:
lines.append(f" lookup {ln};")
for ln in mkmk_lookup_names:
@@ -1826,80 +1872,111 @@ def _generate_anusvara_gpos(glyphs, has):
lines.append(f" pos {glyph_name(anusvara_upper)} <150 0 0 0>;")
lines.append(f"}} AnusvaraUpperShift3;")
lines.append(f"lookup AnusvaraUpperShift3Down2 {{")
lines.append(f" pos {glyph_name(anusvara_upper)} <150 -100 0 0>;")
lines.append(f"}} AnusvaraUpperShift3Down2;")
# --- Lookups for regular anusvara (uni0902) ---
if has_regular:
lines.append(f"lookup AnusvaraRegShift2 {{")
lines.append(f" pos {glyph_name(anusvara)} <100 0 0 0>;")
lines.append(f"}} AnusvaraRegShift2;")
lines.append(f"lookup AnusvaraRegShift3Down2 {{")
lines.append(f" pos {glyph_name(anusvara)} <150 -100 0 0>;")
lines.append(f"}} AnusvaraRegShift3Down2;")
# --- MarkToMark: anusvara attaches to complex reph ---
# Without explicit MarkToMark, two marks on the same base get
# shaper-specific heuristic stacking (HarfBuzz, DirectWrite, and
# CoreText all disagree by ~100 units). MarkToMark gives the font
# explicit control and suppresses those heuristics.
has_mkmk = False
if has(complex_reph):
mkmk_lines = []
if has_upper:
mkmk_lines.append(
f" markClass {glyph_name(anusvara_upper)}"
f" <anchor 100 800> @anuUpperToReph;")
if has_regular:
mkmk_lines.append(
f" markClass {glyph_name(anusvara)}"
f" <anchor 150 800> @anuRegToReph;")
if has_upper:
mkmk_lines.append(
f" pos mark {glyph_name(complex_reph)}"
f" <anchor 150 800> mark @anuUpperToReph;")
if has_regular:
mkmk_lines.append(
f" pos mark {glyph_name(complex_reph)}"
f" <anchor 150 800> mark @anuRegToReph;")
if mkmk_lines:
lines.append("")
lines.append("feature abvm {")
lines.append(" script dev2;")
lines.append("lookup AnusvaraToComplexReph {")
lines.extend(mkmk_lines)
lines.append("} AnusvaraToComplexReph;")
has_mkmk = True
# Collect contextual positioning rules into NAMED lookups so that
# both dev2 and deva script sections reference the SAME lookup index.
# Without this, feaLib creates separate anonymous lookups for each
# script section, and shapers that merge both dev2/deva features
# (CoreText, DirectWrite) would apply the shift TWICE.
#
# NOTE: complex_reph + anusvara cases are handled by MarkToMark
# above (AnusvaraToComplexReph), NOT by ChainContextPos.
abvm_rules = []
# --- Rules for anusvara upper (uF016C) ---
# After reordering: base, [matras], reph?, anusvara.
# When reph is present between matra and anusvara, use 3-glyph backtrack.
# Rules ordered longest-context-first (first match wins).
if has_upper:
# Complex reph → always shift3down2 (directly before anusvara)
if has(complex_reph):
lines.append(
f" pos {glyph_name(complex_reph)}"
f" {glyph_name(anusvara_upper)}' lookup AnusvaraUpperShift3Down2;"
)
# Matra + simple reph + anusvara (3-glyph context: matra in backtrack)
if has(simple_reph):
if has(0x094F):
lines.append(
abvm_rules.append(
f" pos {glyph_name(0x094F)} {glyph_name(simple_reph)}"
f" {glyph_name(anusvara_upper)}' lookup AnusvaraUpperShift3;"
)
for cp in [0x093A, 0x0948, 0x094C]:
if has(cp):
lines.append(
abvm_rules.append(
f" pos {glyph_name(cp)} {glyph_name(simple_reph)}"
f" {glyph_name(anusvara_upper)}' lookup AnusvaraUpperShift2;"
)
# Matra directly before anusvara (no reph)
if has(0x094F):
lines.append(
abvm_rules.append(
f" pos {glyph_name(0x094F)}"
f" {glyph_name(anusvara_upper)}' lookup AnusvaraUpperShift3;"
)
for cp in [0x093A, 0x0948, 0x094C]:
if has(cp):
lines.append(
abvm_rules.append(
f" pos {glyph_name(cp)}"
f" {glyph_name(anusvara_upper)}' lookup AnusvaraUpperShift2;"
)
# --- Rules for regular anusvara (uni0902) ---
# Regular anusvara has no matra trigger (else it would be upper).
# Only reph can trigger a shift here.
# Complex reph case handled by MarkToMark; only simple reph here.
if has_regular:
# Complex reph → +3px X, -2px Y
if has(complex_reph):
lines.append(
f" pos {glyph_name(complex_reph)}"
f" {glyph_name(anusvara)}' lookup AnusvaraRegShift3Down2;"
)
# Simple reph → +2px X
if has(simple_reph):
lines.append(
abvm_rules.append(
f" pos {glyph_name(simple_reph)}"
f" {glyph_name(anusvara)}' lookup AnusvaraRegShift2;"
)
# --- Emit named lookup ---
if abvm_rules:
lines.append("")
lines.append("lookup AnusvaraCtxShift {")
lines.extend(abvm_rules)
lines.append("} AnusvaraCtxShift;")
lines.append("")
lines.append("feature abvm {")
for _st in ['dev2', 'deva']:
lines.append(f" script {_st};")
if has_mkmk:
lines.append(" lookup AnusvaraToComplexReph;")
if abvm_rules:
lines.append(" lookup AnusvaraCtxShift;")
lines.append("} abvm;")
return '\n'.join(lines)