otf wip

2026-06-12 17:04:03 +09:00 · 2026-02-23 19:32:25 +09:00
parent 5e2cacd491
commit 949b6aa777
6 changed files with 476 additions and 71 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -13,3 +13,7 @@ tmp_*
 *.bak
 *-autosave.kra
 .directory
 */__pycache__
 OTFbuild/*.ttf
 OTFbuild/*.otf
--- a/OTFbuild/build_font.py
+++ b/OTFbuild/build_font.py
@@ -33,8 +33,8 @@ def main():
    )
    parser.add_argument(
        "-o", "--output",
-        default="OTFbuild/TerrarumSansBitmap.ttf",
+        default="OTFbuild/TerrarumSansBitmap.otf",
-        help="Output TTF file path (default: OTFbuild/TerrarumSansBitmap.ttf)"
+        help="Output OTF file path (default: OTFbuild/TerrarumSansBitmap.otf)"
    )
    parser.add_argument(
        "--no-bitmap",
--- a/OTFbuild/font_builder.py
+++ b/OTFbuild/font_builder.py
@@ -3,25 +3,26 @@ Orchestrate fonttools TTFont assembly.
 1. Parse all sheets -> glyphs dict
 2. Compose Hangul -> add to dict
-3. Create glyph order and cmap
+3. Expand replacewith directives
-4. Trace all bitmaps -> glyf table
+4. Create glyph order and cmap
-5. Set hmtx, hhea, OS/2, head, name, post
+5. Trace all bitmaps -> CFF charstrings
-6. Generate and compile OpenType features via feaLib
+6. Set hmtx, hhea, OS/2, head, name, post
-7. Add EBDT/EBLC bitmap strike at ppem=20
+7. Generate and compile OpenType features via feaLib
-8. Save TTF
+8. Add EBDT/EBLC bitmap strike at ppem=20
 9. Save OTF
 """
 import time
 from typing import Dict
 from fontTools.fontBuilder import FontBuilder
-from fontTools.pens.ttGlyphPen import TTGlyphPen
+from fontTools.pens.t2CharStringPen import T2CharStringPen
 from fontTools.feaLib.builder import addOpenTypeFeatures
 from fontTools.ttLib import TTFont
 import io
-from glyph_parser import ExtractedGlyph, parse_all_sheets
+from glyph_parser import ExtractedGlyph, GlyphProps, parse_all_sheets
-from hangul import compose_hangul
+from hangul import compose_hangul, get_jamo_gsub_data, HANGUL_PUA_BASE
 from bitmap_tracer import trace_bitmap, draw_glyph_to_pen, SCALE, BASELINE_ROW
 from keming_machine import generate_kerning_pairs
 from opentype_features import generate_features, glyph_name
@@ -30,12 +31,6 @@ import sheet_config as SC
 # Codepoints that get cmap entries (user-visible)
 # PUA forms used internally by GSUB get glyphs but NO cmap entries
 _PUA_CMAP_RANGES = [
    range(0xE000, 0xE100),   # Custom symbols
    range(0xF0520, 0xF0580), # Codestyle ASCII
 ]
 def _should_have_cmap(cp):
    """Determine if a codepoint should have a cmap entry."""
    # Standard Unicode characters always get cmap entries
@@ -61,9 +56,8 @@ def _should_have_cmap(cp):
    # Everything in standard Unicode ranges (up to 0xFFFF plus SMP)
    if cp <= 0xFFFF:
        return True
-    # Internal PUA forms (Devanagari, Tamil, Sundanese, Bulgarian, Serbian internals)
+    # Internal PUA forms — GSUB-only, no cmap
-    # These are GSUB-only and should NOT have cmap entries
+    if 0xF0000 <= cp <= 0xF0FFF:
    if 0xF0000 <= cp <= 0xF051F:
        return False
    # Internal control characters
    if 0xFFE00 <= cp <= 0xFFFFF:
@@ -71,8 +65,30 @@ def _should_have_cmap(cp):
    return True
 def _expand_replacewith(glyphs):
    """
    Find glyphs with 'replacewith' directive and generate GSUB multiple
    substitution data. Returns list of (source_cp, [target_cp, ...]).
    A replacewith glyph's extInfo contains up to 7 codepoints that the
    glyph expands to (e.g. U+01C7 "LJ" → [0x4C, 0x4A]).
    """
    replacements = []
    for cp, g in glyphs.items():
        if g.props.is_pragma("replacewith"):
            targets = []
            count = g.props.required_ext_info_count()
            for i in range(count):
                val = g.props.ext_info[i]
                if val != 0:
                    targets.append(val)
            if targets:
                replacements.append((cp, targets))
    return replacements
 def build_font(assets_dir, output_path, no_bitmap=False, no_features=False):
-    """Build the complete TTF font."""
+    """Build the complete OTF font."""
    t0 = time.time()
    # Step 1: Parse all sheets
@@ -86,8 +102,13 @@ def build_font(assets_dir, output_path, no_bitmap=False, no_features=False):
    glyphs.update(hangul_glyphs)
    print(f"  Total glyphs after Hangul: {len(glyphs)}")
-    # Step 3: Create glyph order and cmap
+    # Step 3: Expand replacewith directives
-    print("Step 3: Building glyph order and cmap...")
+    print("Step 3: Processing replacewith directives...")
    replacewith_subs = _expand_replacewith(glyphs)
    print(f"  Found {len(replacewith_subs)} replacewith substitutions")
    # Step 4: Create glyph order and cmap
    print("Step 4: Building glyph order and cmap...")
    glyph_order = [".notdef"]
    cmap = {}
    glyph_set = set()
@@ -111,34 +132,31 @@ def build_font(assets_dir, output_path, no_bitmap=False, no_features=False):
    print(f"  Glyph order: {len(glyph_order)} glyphs, cmap: {len(cmap)} entries")
-    # Step 4: Build font with fonttools
+    # Step 5: Build font with fonttools (CFF/OTF)
-    print("Step 4: Building font tables...")
+    print("Step 5: Building font tables...")
-    fb = FontBuilder(SC.UNITS_PER_EM, isTTF=True)
+    fb = FontBuilder(SC.UNITS_PER_EM, isTTF=False)
    fb.setupGlyphOrder(glyph_order)
    # Build cmap
    fb.setupCharacterMap(cmap)
-    # Step 5: Trace bitmaps -> glyf table
+    # Step 6: Trace bitmaps -> CFF charstrings
-    print("Step 5: Tracing bitmaps to outlines...")
+    print("Step 6: Tracing bitmaps to CFF outlines...")
    glyph_table = {}
-    pen = TTGlyphPen(None)
+    charstrings = {}
    # .notdef glyph (empty box)
    pen = T2CharStringPen(SC.UNITS_PER_EM // 2, None)
    pen.moveTo((0, 0))
    pen.lineTo((0, SC.ASCENT))
    pen.lineTo((SC.UNITS_PER_EM // 2, SC.ASCENT))
    pen.lineTo((SC.UNITS_PER_EM // 2, 0))
    pen.closePath()
    # Inner box
    _m = 2 * SCALE
    pen.moveTo((_m, _m))
    pen.lineTo((SC.UNITS_PER_EM // 2 - _m, _m))
    pen.lineTo((SC.UNITS_PER_EM // 2 - _m, SC.ASCENT - _m))
    pen.lineTo((_m, SC.ASCENT - _m))
    pen.closePath()
-    glyph_table[".notdef"] = pen.glyph()
+    charstrings[".notdef"] = pen.getCharString()
    traced_count = 0
    for cp in sorted_cps:
@@ -149,25 +167,26 @@ def build_font(assets_dir, output_path, no_bitmap=False, no_features=False):
        if name == ".notdef" or name not in glyph_set:
            continue
        advance = g.props.width * SCALE
        contours = trace_bitmap(g.bitmap, g.props.width)
-        pen = TTGlyphPen(None)
+        pen = T2CharStringPen(advance, None)
        if contours:
            draw_glyph_to_pen(contours, pen)
            glyph_table[name] = pen.glyph()
            traced_count += 1
-        else:
+        charstrings[name] = pen.getCharString()
            # Empty glyph (space, zero-width, etc.)
            pen.moveTo((0, 0))
            pen.endPath()
            glyph_table[name] = pen.glyph()
    print(f"  Traced {traced_count} glyphs with outlines")
-    fb.setupGlyf(glyph_table)
+    fb.setupCFF(
        psName="TerrarumSansBitmap-Regular",
        fontInfo={},
        charStringsDict=charstrings,
        privateDict={},
    )
-    # Step 6: Set metrics
+    # Step 7: Set metrics
-    print("Step 6: Setting font metrics...")
+    print("Step 7: Setting font metrics...")
    metrics = {}
    metrics[".notdef"] = (SC.UNITS_PER_EM // 2, 0)
@@ -179,7 +198,7 @@ def build_font(assets_dir, output_path, no_bitmap=False, no_features=False):
        if name == ".notdef" or name not in glyph_set:
            continue
        advance = g.props.width * SCALE
-        metrics[name] = (advance, 0)  # (advance_width, lsb)
+        metrics[name] = (advance, 0)
    fb.setupHorizontalMetrics(metrics)
    fb.setupHorizontalHeader(
@@ -200,7 +219,7 @@ def build_font(assets_dir, output_path, no_bitmap=False, no_features=False):
        usWinDescent=SC.DESCENT,
        sxHeight=SC.X_HEIGHT,
        sCapHeight=SC.CAP_HEIGHT,
-        fsType=0,  # Installable embedding
+        fsType=0,
    )
    fb.setupPost()
@@ -208,13 +227,16 @@ def build_font(assets_dir, output_path, no_bitmap=False, no_features=False):
    font = fb.font
-    # Step 7: Generate and compile OpenType features
+    # Step 8: Generate and compile OpenType features
    if not no_features:
-        print("Step 7: Generating OpenType features...")
+        print("Step 8: Generating OpenType features...")
        kern_pairs = generate_kerning_pairs(glyphs)
        print(f"  {len(kern_pairs)} kerning pairs")
-        fea_code = generate_features(glyphs, kern_pairs, glyph_set)
+        jamo_data = get_jamo_gsub_data()
        fea_code = generate_features(glyphs, kern_pairs, glyph_set,
                                     replacewith_subs=replacewith_subs,
                                     jamo_data=jamo_data)
        if fea_code.strip():
            print("  Compiling features with feaLib...")
@@ -228,14 +250,14 @@ def build_font(assets_dir, output_path, no_bitmap=False, no_features=False):
        else:
            print("  No features to compile")
    else:
-        print("Step 7: Skipping OpenType features (--no-features)")
+        print("Step 8: Skipping OpenType features (--no-features)")
-    # Step 8: Add bitmap strike (EBDT/EBLC)
+    # Step 9: Add bitmap strike (EBDT/EBLC)
    if not no_bitmap:
-        print("Step 8: Adding bitmap strike...")
+        print("Step 9: Adding bitmap strike...")
        _add_bitmap_strike(font, glyphs, glyph_order, glyph_set)
    else:
-        print("Step 8: Skipping bitmap strike (--no-bitmap)")
+        print("Step 9: Skipping bitmap strike (--no-bitmap)")
    # Save
    print(f"Saving to {output_path}...")
@@ -254,7 +276,6 @@ def _add_bitmap_strike(font, glyphs, glyph_order, glyph_set):
    ppem = 20
    name_to_id = {name: idx for idx, name in enumerate(glyph_order)}
    # Collect bitmap data — only glyphs with actual pixels
    bitmap_entries = []
    for name in glyph_order:
        if name == ".notdef":
@@ -272,7 +293,6 @@ def _add_bitmap_strike(font, glyphs, glyph_order, glyph_set):
        if w == 0 or h == 0:
            continue
        # Pack rows into hex
        hex_rows = []
        for row in bitmap:
            row_bytes = bytearray()
@@ -298,12 +318,9 @@ def _add_bitmap_strike(font, glyphs, glyph_order, glyph_set):
        print("  No bitmap data to embed")
        return
    # Split into contiguous GID runs for separate index subtables
    # This avoids the empty-name problem for gaps
    gid_sorted = sorted(bitmap_entries, key=lambda e: e['gid'])
    gid_to_entry = {e['gid']: e for e in gid_sorted}
-    runs = []  # list of lists of entries
+    runs = []
    current_run = [gid_sorted[0]]
    for i in range(1, len(gid_sorted)):
        if gid_sorted[i]['gid'] == gid_sorted[i-1]['gid'] + 1:
@@ -313,7 +330,6 @@ def _add_bitmap_strike(font, glyphs, glyph_order, glyph_set):
            current_run = [gid_sorted[i]]
    runs.append(current_run)
    # Build TTX XML for EBDT
    ebdt_xml = ['<EBDT>', '<header version="2.0"/>', '<strikedata index="0">']
    for entry in gid_sorted:
        ebdt_xml.append(f'  <cbdt_bitmap_format_1 name="{entry["name"]}">')
@@ -332,7 +348,6 @@ def _add_bitmap_strike(font, glyphs, glyph_order, glyph_set):
    ebdt_xml.append('</strikedata>')
    ebdt_xml.append('</EBDT>')
    # Build TTX XML for EBLC
    all_gids = [e['gid'] for e in gid_sorted]
    desc = -(SC.H - BASELINE_ROW)
@@ -371,8 +386,6 @@ def _add_bitmap_strike(font, glyphs, glyph_order, glyph_set):
        '  </bitmapSizeTable>',
    ])
    # One index subtable per contiguous run — no gaps
    # Use format 1 (32-bit offsets) to avoid 16-bit overflow
    for run in runs:
        first_gid = run[0]['gid']
        last_gid = run[-1]['gid']
--- a/OTFbuild/glyph_parser.py
+++ b/OTFbuild/glyph_parser.py
@@ -191,8 +191,9 @@ def parse_variable_sheet(image, sheet_index, cell_w, cell_h, cols, is_xy_swapped
                        info |= (1 << y)
                ext_info[x] = info
-        # Extract glyph bitmap (all pixels except tag column)
+        # Extract glyph bitmap: only pixels within the glyph's declared width.
-        bitmap_w = cell_w - 1
+        # The tag column and any padding beyond width must be stripped.
        bitmap_w = min(width, cell_w - 1) if width > 0 else 0
        bitmap = []
        for row in range(cell_h):
            row_data = []
@@ -206,14 +207,98 @@ def parse_variable_sheet(image, sheet_index, cell_w, cell_h, cols, is_xy_swapped
    return result
 def _read_hangul_cell(image, column, row, cell_w=SC.W_HANGUL_BASE, cell_h=SC.H):
    """Read a single cell from the Hangul johab sheet at (column, row)."""
    cell_x = column * cell_w
    cell_y = row * cell_h
    bitmap = []
    for r in range(cell_h):
        row_data = []
        for c in range(cell_w):
            px = image.get_pixel(cell_x + c, cell_y + r)
            row_data.append(1 if (px & 0xFF) != 0 else 0)
        bitmap.append(row_data)
    return bitmap
 def parse_hangul_jamo_sheet(image, cell_w, cell_h):
    """
    Parse the Hangul Jamo sheet with correct row/column mapping.
    Layout in hangul_johab.tga:
      - Choseong (U+1100-U+115E): column = choseongIndex, row = 1
      - Jungseong (U+1161-U+11A7): column = jungseongIndex+1, row = 15
        (column 0 is filler U+1160, stored at row 15 col 0)
      - Jongseong (U+11A8-U+11FF): column = jongseongIndex, row = 17
        (index starts at 1 for 11A8)
      - Extended Choseong (U+A960-U+A97F): column = 96+offset, row = 1
      - Extended Jungseong (U+D7B0-U+D7C6): column = 72+offset, row = 15
      - Extended Jongseong (U+D7CB-U+D7FB): column = 89+offset, row = 17
    Each jamo gets a default-row bitmap. Multiple variant rows exist for
    syllable composition (handled separately by hangul.py / GSUB).
    """
    result = {}
    # U+1160 (Hangul Jungseong Filler) — column 0, row 15
    bm = _read_hangul_cell(image, 0, 15, cell_w, cell_h)
    result[0x1160] = ExtractedGlyph(0x1160, GlyphProps(width=cell_w), bm)
    # Choseong: U+1100-U+115E → column = cp - 0x1100, row = 1
    for cp in range(0x1100, 0x115F):
        col = cp - 0x1100
        bm = _read_hangul_cell(image, col, 1, cell_w, cell_h)
        result[cp] = ExtractedGlyph(cp, GlyphProps(width=cell_w), bm)
    # U+115F (Hangul Choseong Filler)
    col = 0x115F - 0x1100
    bm = _read_hangul_cell(image, col, 1, cell_w, cell_h)
    result[0x115F] = ExtractedGlyph(0x115F, GlyphProps(width=cell_w), bm)
    # Jungseong: U+1161-U+11A7 → column = (cp - 0x1160), row = 15
    for cp in range(0x1161, 0x11A8):
        col = cp - 0x1160
        bm = _read_hangul_cell(image, col, 15, cell_w, cell_h)
        result[cp] = ExtractedGlyph(cp, GlyphProps(width=cell_w), bm)
    # Jongseong: U+11A8-U+11FF → column = (cp - 0x11A8 + 1), row = 17
    for cp in range(0x11A8, 0x1200):
        col = cp - 0x11A8 + 1
        bm = _read_hangul_cell(image, col, 17, cell_w, cell_h)
        result[cp] = ExtractedGlyph(cp, GlyphProps(width=cell_w), bm)
    # Extended Choseong: U+A960-U+A97F → column = (cp - 0xA960 + 96), row = 1
    for cp in range(0xA960, 0xA980):
        col = cp - 0xA960 + 96
        bm = _read_hangul_cell(image, col, 1, cell_w, cell_h)
        result[cp] = ExtractedGlyph(cp, GlyphProps(width=cell_w), bm)
    # Extended Jungseong: U+D7B0-U+D7C6 → column = (cp - 0xD7B0 + 72), row = 15
    for cp in range(0xD7B0, 0xD7C7):
        col = cp - 0xD7B0 + 72
        bm = _read_hangul_cell(image, col, 15, cell_w, cell_h)
        result[cp] = ExtractedGlyph(cp, GlyphProps(width=cell_w), bm)
    # Extended Jongseong: U+D7CB-U+D7FB → column = (cp - 0xD7CB + 88 + 1), row = 17
    for cp in range(0xD7CB, 0xD7FC):
        col = cp - 0xD7CB + 88 + 1
        bm = _read_hangul_cell(image, col, 17, cell_w, cell_h)
        result[cp] = ExtractedGlyph(cp, GlyphProps(width=cell_w), bm)
    return result
 def parse_fixed_sheet(image, sheet_index, cell_w, cell_h, cols):
    """Parse a fixed-width sheet (Hangul, Unihan, Runic, Custom Sym)."""
    # Hangul Jamo sheet has special layout — handled separately
    if sheet_index == SC.SHEET_HANGUL:
        return parse_hangul_jamo_sheet(image, cell_w, cell_h)
    code_range = SC.CODE_RANGE[sheet_index]
    result = {}
    fixed_width = {
        SC.SHEET_CUSTOM_SYM: 20,
        SC.SHEET_HANGUL: SC.W_HANGUL_BASE,
        SC.SHEET_RUNIC: 9,
        SC.SHEET_UNIHAN: SC.W_UNIHAN,
    }.get(sheet_index, cell_w)
@@ -301,7 +386,7 @@ def _add_fixed_width_overrides(result):
 def get_hangul_jamo_bitmaps(assets_dir):
    """
    Extract raw Hangul jamo bitmaps from the Hangul sheet for composition.
-    Returns a function: (index, row) -> bitmap (list of list of int)
+    Returns a function: (column_index, row) -> bitmap (list of list of int)
    """
    filename = SC.FILE_LIST[SC.SHEET_HANGUL]
    filepath = os.path.join(assets_dir, filename)
@@ -326,3 +411,41 @@ def get_hangul_jamo_bitmaps(assets_dir):
        return bitmap
    return get_bitmap
 def extract_hangul_jamo_variants(assets_dir):
    """
    Extract ALL Hangul jamo variant bitmaps from hangul_johab.tga.
    Returns dict of (column, row) -> bitmap for every non-empty cell.
    Used by hangul.py to store variants in PUA for GSUB assembly.
    Layout:
      Row 0: Hangul Compatibility Jamo (U+3130-U+318F)
      Rows 1-14: Choseong variants (row depends on jungseong context)
      Rows 15-16: Jungseong variants (15=no final, 16=with final)
      Rows 17-18: Jongseong variants (17=normal, 18=rightie jungseong)
      Rows 19-24: Additional choseong variants (giyeok remapping)
    """
    filename = SC.FILE_LIST[SC.SHEET_HANGUL]
    filepath = os.path.join(assets_dir, filename)
    if not os.path.exists(filepath):
        return {}
    image = read_tga(filepath)
    cell_w = SC.W_HANGUL_BASE
    cell_h = SC.H
    variants = {}
    # Scan all rows that contain jamo data
    # Rows 0-24 at minimum, checking up to image height
    max_row = image.height // cell_h
    max_col = image.width // cell_w
    for row in range(max_row):
        for col in range(max_col):
            bm = _read_hangul_cell(image, col, row, cell_w, cell_h)
            # Check if non-empty
            if any(px for r in bm for px in r):
                variants[(col, row)] = bm
    return variants
--- a/OTFbuild/hangul.py
+++ b/OTFbuild/hangul.py
@@ -1,15 +1,24 @@
 """
 Compose 11,172 Hangul syllables (U+AC00-U+D7A3) from jamo sprite pieces.
 Also composes Hangul Compatibility Jamo (U+3130-U+318F).
 Also stores all jamo variant bitmaps in PUA for GSUB-based jamo assembly.
 Ported from HangulCompositor.kt and TerrarumSansBitmap.kt.
 """
 from typing import Dict, List, Tuple
-from glyph_parser import ExtractedGlyph, GlyphProps, get_hangul_jamo_bitmaps
+from glyph_parser import (
    ExtractedGlyph, GlyphProps, get_hangul_jamo_bitmaps,
    extract_hangul_jamo_variants, _read_hangul_cell, _empty_bitmap,
 )
 import sheet_config as SC
 # PUA range for Hangul jamo variant storage.
 # We need space for: max_col * max_row variants.
 # Using 0xF0600-0xF0FFF (2560 slots, more than enough).
 HANGUL_PUA_BASE = 0xF0600
 def _compose_bitmaps(a, b, w, h):
    """OR two bitmaps together."""
@@ -32,9 +41,15 @@ def _compose_bitmap_into(target, source, w, h):
                target[row][col] = 1
 def _pua_for_jamo_variant(col, row):
    """Get PUA codepoint for a jamo variant at (column, row) in the sheet."""
    # Encode as base + row * 256 + col (supports up to 256 columns per row)
    return HANGUL_PUA_BASE + row * 256 + col
 def compose_hangul(assets_dir) -> Dict[int, ExtractedGlyph]:
    """
-    Compose all Hangul syllables and compatibility jamo.
+    Compose all Hangul syllables, compatibility jamo, and jamo variants.
    Returns a dict of codepoint -> ExtractedGlyph.
    """
    get_jamo = get_hangul_jamo_bitmaps(assets_dir)
@@ -94,5 +109,39 @@ def compose_hangul(assets_dir) -> Dict[int, ExtractedGlyph]:
        props = GlyphProps(width=advance_width)
        result[c] = ExtractedGlyph(c, props, composed)
-    print(f"  Hangul composition done: {len(result)} glyphs")
+    print(f"  Hangul syllable composition done: {len(result)} glyphs")
    # Store jamo variant bitmaps in PUA for GSUB assembly
    print("  Extracting jamo variants for GSUB...")
    variants = extract_hangul_jamo_variants(assets_dir)
    variant_count = 0
    for (col, row), bm in variants.items():
        pua = _pua_for_jamo_variant(col, row)
        if pua not in result:
            result[pua] = ExtractedGlyph(pua, GlyphProps(width=cell_w), bm)
            variant_count += 1
    print(f"  Stored {variant_count} jamo variant glyphs in PUA (0x{HANGUL_PUA_BASE:05X}+)")
    print(f"  Total Hangul glyphs: {len(result)}")
    return result
 def get_jamo_gsub_data():
    """
    Generate the data needed for Hangul jamo GSUB lookups.
    Returns a dict with:
      - 'cho_rows': dict mapping (i_jung, has_jong) -> row for choseong
      - 'jung_rows': dict mapping has_jong -> row for jungseong
      - 'jong_rows': dict mapping is_rightie -> row for jongseong
      - 'pua_fn': function(col, row) -> PUA codepoint
    These are the row-selection rules from the Kotlin code:
      Choseong row = getHanInitialRow(i_cho, i_jung, i_jong)
      Jungseong row = 15 if no final, else 16
      Jongseong row = 17 if jungseong is not rightie, else 18
    """
    return {
        'pua_fn': _pua_for_jamo_variant,
        'pua_base': HANGUL_PUA_BASE,
    }
--- a/OTFbuild/opentype_features.py
+++ b/OTFbuild/opentype_features.py
@@ -28,7 +28,8 @@ def glyph_name(cp):
    return f"u{cp:05X}" if cp <= 0xFFFFF else f"u{cp:06X}"
-def generate_features(glyphs, kern_pairs, font_glyph_set):
+def generate_features(glyphs, kern_pairs, font_glyph_set,
                      replacewith_subs=None, jamo_data=None):
    """
    Generate complete OpenType feature code string.
@@ -36,6 +37,8 @@ def generate_features(glyphs, kern_pairs, font_glyph_set):
        glyphs: dict of codepoint -> ExtractedGlyph
        kern_pairs: dict of (left_cp, right_cp) -> kern_value_in_font_units
        font_glyph_set: set of glyph names actually present in the font
        replacewith_subs: list of (source_cp, [target_cp, ...]) for ccmp
        jamo_data: dict with Hangul jamo GSUB data
    Returns:
        Feature code string for feaLib compilation.
    """
@@ -44,6 +47,16 @@ def generate_features(glyphs, kern_pairs, font_glyph_set):
    def has(cp):
        return glyph_name(cp) in font_glyph_set
    # ccmp feature (replacewith directives + Hangul jamo decomposition)
    ccmp_code = _generate_ccmp(replacewith_subs or [], has)
    if ccmp_code:
        parts.append(ccmp_code)
    # Hangul jamo GSUB assembly
    hangul_code = _generate_hangul_gsub(glyphs, has, jamo_data)
    if hangul_code:
        parts.append(hangul_code)
    # kern feature
    kern_code = _generate_kern(kern_pairs, has)
    if kern_code:
@@ -82,6 +95,209 @@ def generate_features(glyphs, kern_pairs, font_glyph_set):
    return '\n\n'.join(parts)
 def _generate_ccmp(replacewith_subs, has):
    """Generate ccmp feature for replacewith directives (multiple substitution)."""
    if not replacewith_subs:
        return ""
    subs = []
    for src_cp, target_cps in replacewith_subs:
        if not has(src_cp):
            continue
        if not all(has(t) for t in target_cps):
            continue
        src = glyph_name(src_cp)
        targets = ' '.join(glyph_name(t) for t in target_cps)
        subs.append(f"    sub {src} by {targets};")
    if not subs:
        return ""
    lines = ["feature ccmp {", "    lookup ReplacewithExpansion {"]
    lines.extend(subs)
    lines.append("    } ReplacewithExpansion;")
    lines.append("} ccmp;")
    return '\n'.join(lines)
 def _generate_hangul_gsub(glyphs, has, jamo_data):
    """
    Generate Hangul jamo GSUB lookups for syllable assembly.
    When a shaping engine encounters consecutive Hangul Jamo (Choseong +
    Jungseong + optional Jongseong), these lookups substitute each jamo
    with the correct positional variant from the PUA area.
    The row selection logic mirrors the Kotlin code:
      - Choseong row depends on which jungseong follows and whether jongseong exists
      - Jungseong row is 15 (no final) or 16 (with final)
      - Jongseong row is 17 (normal) or 18 (rightie jungseong)
    """
    if not jamo_data:
        return ""
    pua_fn = jamo_data['pua_fn']
    # Build contextual substitution lookups
    # Strategy: use ljmo/vjmo/tjmo features (standard Hangul OpenType features)
    #
    # ljmo: choseong → positional variant (depends on following jungseong)
    # vjmo: jungseong → positional variant (depends on whether jongseong follows)
    # tjmo: jongseong → positional variant (depends on preceding jungseong)
    lines = []
    # --- ljmo: Choseong variant selection ---
    # For each choseong, we need variants for different jungseong contexts.
    # Row 1 is the default (basic vowels like ㅏ).
    # We use contextual alternates: choseong' lookup X jungseong
    ljmo_lookups = []
    # Group jungseong indices by which choseong row they select
    # From getHanInitialRow: the row depends on jungseong index (p) and has-final (f)
    # For GSUB, we pre-compute for f=0 (no final) since we can't know yet
    row_to_jung_indices = {}
    for p in range(96):  # all possible jungseong indices
        # Without jongseong first; use i=1 to avoid giyeok edge cases
        try:
            row_nf = SC.get_han_initial_row(1, p, 0)
        except (ValueError, KeyError):
            continue
        if row_nf not in row_to_jung_indices:
            row_to_jung_indices[row_nf] = []
        row_to_jung_indices[row_nf].append(p)
    # For each unique choseong row, create a lookup that substitutes
    # the default choseong glyph with the variant at that row
    for cho_row, jung_indices in sorted(row_to_jung_indices.items()):
        if cho_row == 1:
            continue  # row 1 is the default, no substitution needed
        lookup_name = f"ljmo_row{cho_row}"
        subs = []
        # For standard choseong (U+1100-U+115E)
        for cho_cp in range(0x1100, 0x115F):
            col = cho_cp - 0x1100
            variant_pua = pua_fn(col, cho_row)
            if has(cho_cp) and has(variant_pua):
                subs.append(f"        sub {glyph_name(cho_cp)} by {glyph_name(variant_pua)};")
        if subs:
            lines.append(f"lookup {lookup_name} {{")
            lines.extend(subs)
            lines.append(f"}} {lookup_name};")
            ljmo_lookups.append((lookup_name, jung_indices))
    # --- vjmo: Jungseong variant selection ---
    # Row 15 = no jongseong following, Row 16 = jongseong follows
    # We need two lookups
    vjmo_subs_16 = []  # with-final variant (row 16)
    for jung_cp in range(0x1161, 0x11A8):
        col = jung_cp - 0x1160
        variant_pua = pua_fn(col, 16)
        if has(jung_cp) and has(variant_pua):
            vjmo_subs_16.append(f"    sub {glyph_name(jung_cp)} by {glyph_name(variant_pua)};")
    if vjmo_subs_16:
        lines.append("lookup vjmo_withfinal {")
        lines.extend(vjmo_subs_16)
        lines.append("} vjmo_withfinal;")
    # --- tjmo: Jongseong variant selection ---
    # Row 17 = normal, Row 18 = after rightie jungseong
    tjmo_subs_18 = []
    for jong_cp in range(0x11A8, 0x1200):
        col = jong_cp - 0x11A8 + 1
        variant_pua = pua_fn(col, 18)
        if has(jong_cp) and has(variant_pua):
            tjmo_subs_18.append(f"    sub {glyph_name(jong_cp)} by {glyph_name(variant_pua)};")
    if tjmo_subs_18:
        lines.append("lookup tjmo_rightie {")
        lines.extend(tjmo_subs_18)
        lines.append("} tjmo_rightie;")
    # --- Build the actual features using contextual substitution ---
    # Jungseong class definitions for contextual rules
    # Build classes of jungseong glyphs that trigger specific choseong rows
    feature_lines = []
    # ljmo feature: contextual choseong substitution
    if ljmo_lookups:
        feature_lines.append("feature ljmo {")
        feature_lines.append("    script hang;")
        for lookup_name, jung_indices in ljmo_lookups:
            # Build jungseong class for this row
            jung_glyphs = []
            for idx in jung_indices:
                cp = 0x1160 + idx
                if has(cp):
                    jung_glyphs.append(glyph_name(cp))
            if not jung_glyphs:
                continue
            class_name = f"@jung_for_{lookup_name}"
            feature_lines.append(f"    {class_name} = [{' '.join(jung_glyphs)}];")
        # Contextual rules: choseong' [lookup X] jungseong
        # For each choseong, if followed by a jungseong in the right class,
        # apply the variant lookup
        for lookup_name, jung_indices in ljmo_lookups:
            jung_glyphs = []
            for idx in jung_indices:
                cp = 0x1160 + idx
                if has(cp):
                    jung_glyphs.append(glyph_name(cp))
            if not jung_glyphs:
                continue
            class_name = f"@jung_for_{lookup_name}"
            # Build choseong class
            cho_glyphs = [glyph_name(cp) for cp in range(0x1100, 0x115F) if has(cp)]
            if cho_glyphs:
                feature_lines.append(f"    @choseong = [{' '.join(cho_glyphs)}];")
                feature_lines.append(f"    sub @choseong' lookup {lookup_name} {class_name};")
        feature_lines.append("} ljmo;")
    # vjmo feature: jungseong gets row 16 variant when followed by jongseong
    if vjmo_subs_16:
        jong_glyphs = [glyph_name(cp) for cp in range(0x11A8, 0x1200) if has(cp)]
        if jong_glyphs:
            feature_lines.append("feature vjmo {")
            feature_lines.append("    script hang;")
            jung_glyphs = [glyph_name(cp) for cp in range(0x1161, 0x11A8) if has(cp)]
            feature_lines.append(f"    @jongseong = [{' '.join(jong_glyphs)}];")
            feature_lines.append(f"    @jungseong = [{' '.join(jung_glyphs)}];")
            feature_lines.append(f"    sub @jungseong' lookup vjmo_withfinal @jongseong;")
            feature_lines.append("} vjmo;")
    # tjmo feature: jongseong gets row 18 variant when after rightie jungseong
    if tjmo_subs_18:
        rightie_glyphs = []
        for idx in sorted(SC.JUNGSEONG_RIGHTIE):
            cp = 0x1160 + idx
            if has(cp):
                rightie_glyphs.append(glyph_name(cp))
            # Also check PUA variants (row 16)
            pua16 = pua_fn(idx, 16)
            if has(pua16):
                rightie_glyphs.append(glyph_name(pua16))
        if rightie_glyphs:
            feature_lines.append("feature tjmo {")
            feature_lines.append("    script hang;")
            feature_lines.append(f"    @rightie_jung = [{' '.join(rightie_glyphs)}];")
            jong_glyphs = [glyph_name(cp) for cp in range(0x11A8, 0x1200) if has(cp)]
            feature_lines.append(f"    @jongseong_all = [{' '.join(jong_glyphs)}];")
            feature_lines.append(f"    sub @rightie_jung @jongseong_all' lookup tjmo_rightie;")
            feature_lines.append("} tjmo;")
    if not lines and not feature_lines:
        return ""
    return '\n'.join(lines + [''] + feature_lines)
 def _generate_kern(kern_pairs, has):
    """Generate kern feature from pair positioning data."""
    if not kern_pairs: