otf more deva

2026-06-06 05:58:30 +09:00 · 2026-02-24 21:14:20 +09:00
parent 73fcd7d922
commit fca02f1a3d
2 changed files with 231 additions and 0 deletions
--- a/OTFbuild/calligra_font_tests.odt
+++ b/OTFbuild/calligra_font_tests.odt
--- a/OTFbuild/opentype_features.py
+++ b/OTFbuild/opentype_features.py
@@ -759,11 +759,242 @@ def _generate_devanagari(glyphs, has, replacewith_subs=None):
            abvs_lines.append("} abvs;")
            features.append('\n'.join(abvs_lines))

+    # --- psts: I-matra and II-matra length variants ---
+    # Must run AFTER abvs because abvs uses uni093F as context for complex
+    # reph substitution.  If I-matra were substituted before abvs, those
+    # contextual rules would break.
+    #
+    # HarfBuzz dev2 feature order: init → pres → abvs → blws → psts → haln
+    # psts has F_GLOBAL_MANUAL_JOINERS masking → applied to ALL glyphs in the
+    # syllable, so it works for both pre-base I-matra and post-base II-matra.
+    psts_code = _generate_psts_matra_variants(glyphs, has, _conjuncts)
+    if psts_code:
+        features.append(psts_code)
+
    if not features:
        return ""
    return '\n\n'.join(features)


+def _generate_psts_matra_variants(glyphs, has, conjuncts):
+    """Generate psts feature for I-matra and II-matra length variant selection.
+
+    The bitmap font has 16 length variants for each of I-matra (U+093F) and
+    II-matra (U+0940), mapped to PUA U+F0110-F011F and U+F0120-F012F.
+    The variant is selected based on the consonant's stem position (anchor[0].x)
+    and width, mirroring the Kotlin engine logic.
+    """
+    from collections import defaultdict
+
+    if not has(0x093F) and not has(0x0940):
+        return ""
+
+    # Check that at least some variant glyphs exist
+    has_i_variants = any(has(0xF0110 + i) for i in range(16))
+    has_ii_variants = any(has(0xF0120 + i) for i in range(16))
+    if not has_i_variants and not has_ii_variants:
+        return ""
+
+    def anchor_x(cp):
+        """Get anchor[0].x for a glyph, defaulting to 0."""
+        if cp not in glyphs:
+            return 0
+        a = glyphs[cp].props.diacritics_anchors[0]
+        return a.x if a.x_used else 0
+
+    def glyph_width(cp):
+        """Get glyph width."""
+        if cp not in glyphs:
+            return 0
+        return glyphs[cp].props.width
+
+    # Collect base consonants (full forms that can serve as syllable base)
+    # Includes: presentation consonants, nukta forms, conjuncts, RA-appended forms
+    base_cps = set()
+    for cp in SC.DEVANAGARI_PRESENTATION_CONSONANTS:
+        if has(cp):
+            base_cps.add(cp)
+    for cp in SC.DEVANAGARI_PRESENTATION_CONSONANTS_WITH_RA:
+        if has(cp):
+            base_cps.add(cp)
+    # Add conjunct result glyphs (they are also full consonants)
+    for _, _, result, _ in conjuncts:
+        if has(result):
+            base_cps.add(result)
+
+    # Collect half consonants
+    half_cps = set()
+    for cp in SC.DEVANAGARI_PRESENTATION_CONSONANTS_HALF:
+        if has(cp):
+            half_cps.add(cp)
+    for cp in SC.DEVANAGARI_PRESENTATION_CONSONANTS_WITH_RA_HALF:
+        if has(cp):
+            half_cps.add(cp)
+
+    if not base_cps:
+        return ""
+
+    lines = []
+
+    # ===== I-matra variant lookups and rules =====
+    if has(0x093F) and has_i_variants:
+        # Create 16 single-substitution lookups
+        for var in range(16):
+            target = 0xF0110 + var
+            if has(target):
+                lines.append(f"lookup IMatraVar{var} {{")
+                lines.append(f"    sub {glyph_name(0x093F)} by {glyph_name(target)};")
+                lines.append(f"}} IMatraVar{var};")
+
+        # --- Group base consonants by I-matra variant index ---
+        # Formula: var_idx = clamp(anchor_x + 2, 6, 21) - 6
+        i_base_groups = defaultdict(set)  # var_idx -> set of base cps
+        for cp in sorted(base_cps):
+            ax = anchor_x(cp)
+            var_idx = min(max(ax + 2, 6), 21) - 6
+            i_base_groups[var_idx].add(cp)
+
+        # --- Group half consonants by width ---
+        # Half consonants only contribute their width to the variant calc,
+        # so we can group them into width-classes to avoid O(n^2) rule explosion.
+        half_by_width = defaultdict(set)  # width -> set of half cps
+        for half_cp in half_cps:
+            hw = glyph_width(half_cp)
+            half_by_width[hw].add(half_cp)
+
+        # --- Group (half_width, base) pairs by variant index ---
+        # For half+base: var_idx = clamp(half_width + anchor_x + 2, 6, 21) - 6
+        # Key: (half_width, var_idx) -> set of base cps
+        i_hw_base = defaultdict(lambda: defaultdict(set))
+        for hw, _ in sorted(half_by_width.items()):
+            for cp in base_cps:
+                ax = anchor_x(cp)
+                var_idx = min(max(hw + ax + 2, 6), 21) - 6
+                i_hw_base[hw][var_idx].add(cp)
+
+        # --- Group (half_width1, half_width2, base) by variant index ---
+        # For half+half+base: var_idx = clamp(hw1 + hw2 + anchor_x + 2, 6, 21) - 6
+        i_hww_base = defaultdict(lambda: defaultdict(set))
+        half_widths = sorted(half_by_width.keys())
+        for hw1 in half_widths:
+            for hw2 in half_widths:
+                for cp in base_cps:
+                    ax = anchor_x(cp)
+                    var_idx = min(max(hw1 + hw2 + ax + 2, 6), 21) - 6
+                    i_hww_base[(hw1, hw2)][var_idx].add(cp)
+
+        # Build psts feature rules
+        # Rules must be ordered longest-context-first (first match wins)
+        psts_i_lines = []
+
+        # Case C: half + half + base (4-glyph context)
+        # Use width-class groups: @halfW{w} for half consonants of width w
+        hh_class_idx = 0
+        for (hw1, hw2), var_groups in sorted(i_hww_base.items()):
+            for var_idx, bases in sorted(var_groups.items()):
+                if not has(0xF0110 + var_idx):
+                    continue
+                base_names = ' '.join(glyph_name(cp) for cp in sorted(bases))
+                h1_names = ' '.join(glyph_name(cp) for cp in sorted(half_by_width[hw1]))
+                h2_names = ' '.join(glyph_name(cp) for cp in sorted(half_by_width[hw2]))
+                cls_b = f"@iHH{hh_class_idx}"
+                cls_h1 = f"@iHH1_{hh_class_idx}"
+                cls_h2 = f"@iHH2_{hh_class_idx}"
+                psts_i_lines.append(f"    {cls_b} = [{base_names}];")
+                psts_i_lines.append(f"    {cls_h1} = [{h1_names}];")
+                psts_i_lines.append(f"    {cls_h2} = [{h2_names}];")
+                psts_i_lines.append(
+                    f"    sub {glyph_name(0x093F)}' lookup IMatraVar{var_idx} "
+                    f"{cls_h1} {cls_h2} {cls_b};"
+                )
+                hh_class_idx += 1
+
+        # Case B: half + base (3-glyph context)
+        hb_class_idx = 0
+        for hw, var_groups in sorted(i_hw_base.items()):
+            for var_idx, bases in sorted(var_groups.items()):
+                if not has(0xF0110 + var_idx):
+                    continue
+                base_names = ' '.join(glyph_name(cp) for cp in sorted(bases))
+                h_names = ' '.join(glyph_name(cp) for cp in sorted(half_by_width[hw]))
+                cls_b = f"@iHB{hb_class_idx}"
+                cls_h = f"@iH{hb_class_idx}"
+                psts_i_lines.append(f"    {cls_b} = [{base_names}];")
+                psts_i_lines.append(f"    {cls_h} = [{h_names}];")
+                psts_i_lines.append(
+                    f"    sub {glyph_name(0x093F)}' lookup IMatraVar{var_idx} "
+                    f"{cls_h} {cls_b};"
+                )
+                hb_class_idx += 1
+
+        # Case A: base only (2-glyph context)
+        for var_idx, bases in sorted(i_base_groups.items()):
+            if not has(0xF0110 + var_idx):
+                continue
+            base_names = ' '.join(glyph_name(cp) for cp in sorted(bases))
+            cls = f"@iB{var_idx}"
+            psts_i_lines.append(f"    {cls} = [{base_names}];")
+            psts_i_lines.append(
+                f"    sub {glyph_name(0x093F)}' lookup IMatraVar{var_idx} {cls};"
+            )
+
+    else:
+        psts_i_lines = []
+
+    # ===== II-matra variant lookups and rules =====
+    if has(0x0940) and has_ii_variants:
+        # Create 16 single-substitution lookups
+        for var in range(16):
+            target = 0xF0120 + var
+            if has(target):
+                lines.append(f"lookup IIMatraVar{var} {{")
+                lines.append(f"    sub {glyph_name(0x0940)} by {glyph_name(target)};")
+                lines.append(f"}} IIMatraVar{var};")
+
+        # Group base consonants by II-matra variant index
+        # Formula: var_idx = 15 - (clamp(width - anchor_x + 1, 4, 19) - 4)
+        # (0xF012F - result gives the codepoint, so var_idx 0 = 0xF012F,
+        #  var_idx 15 = 0xF0120; we reverse so var_idx 0 maps to 0xF0120)
+        # Actually from the plan: 0xF012F - (clamp(w+1, 4, 19) - 4)
+        # where w = width - anchor_x
+        # So the PUA codepoint = 0xF012F - (clamp(w+1, 4, 19) - 4)
+        # If we define var_idx as offset from 0xF0120:
+        #   pua = 0xF0120 + var_idx
+        #   var_idx = 0xF012F - (clamp(w+1, 4, 19) - 4) - 0xF0120
+        #           = 15 - (clamp(w+1, 4, 19) - 4)
+        ii_base_groups = defaultdict(set)
+        for cp in sorted(base_cps):
+            w = glyph_width(cp) - anchor_x(cp)
+            clamped = min(max(w + 1, 4), 19) - 4
+            var_idx = 15 - clamped  # 0xF012F - clamped → offset from 0xF0120
+            ii_base_groups[var_idx].add(cp)
+
+        psts_ii_lines = []
+        for var_idx, bases in sorted(ii_base_groups.items()):
+            target = 0xF0120 + var_idx
+            if not has(target):
+                continue
+            base_names = ' '.join(glyph_name(cp) for cp in sorted(bases))
+            cls = f"@iiB{var_idx}"
+            psts_ii_lines.append(f"    {cls} = [{base_names}];")
+            psts_ii_lines.append(
+                f"    sub {cls} {glyph_name(0x0940)}' lookup IIMatraVar{var_idx};"
+            )
+    else:
+        psts_ii_lines = []
+
+    if not psts_i_lines and not psts_ii_lines:
+        return ""
+
+    # Assemble the feature block
+    feat = ["feature psts {", "    script dev2;"]
+    feat.extend(psts_i_lines)
+    feat.extend(psts_ii_lines)
+    feat.append("} psts;")
+
+    return '\n'.join(lines + [''] + feat)
+
+
 def _generate_tamil(glyphs, has):
    """Generate Tamil GSUB features."""
    subs = []