otf wip

2026-06-11 16:34:04 +09:00 · 2026-02-23 19:32:25 +09:00
parent 5e2cacd491
commit 949b6aa777
6 changed files with 476 additions and 71 deletions
--- a/OTFbuild/build_font.py
+++ b/OTFbuild/build_font.py
@@ -33,8 +33,8 @@ def main():
    )
    parser.add_argument(
        "-o", "--output",
-        default="OTFbuild/TerrarumSansBitmap.ttf",
-        help="Output TTF file path (default: OTFbuild/TerrarumSansBitmap.ttf)"
+        default="OTFbuild/TerrarumSansBitmap.otf",
+        help="Output OTF file path (default: OTFbuild/TerrarumSansBitmap.otf)"
    )
    parser.add_argument(
        "--no-bitmap",
--- a/OTFbuild/font_builder.py
+++ b/OTFbuild/font_builder.py
@@ -3,25 +3,26 @@ Orchestrate fonttools TTFont assembly.

 1. Parse all sheets -> glyphs dict
 2. Compose Hangul -> add to dict
-3. Create glyph order and cmap
-4. Trace all bitmaps -> glyf table
-5. Set hmtx, hhea, OS/2, head, name, post
-6. Generate and compile OpenType features via feaLib
-7. Add EBDT/EBLC bitmap strike at ppem=20
-8. Save TTF
+3. Expand replacewith directives
+4. Create glyph order and cmap
+5. Trace all bitmaps -> CFF charstrings
+6. Set hmtx, hhea, OS/2, head, name, post
+7. Generate and compile OpenType features via feaLib
+8. Add EBDT/EBLC bitmap strike at ppem=20
+9. Save OTF
 """

 import time
 from typing import Dict

 from fontTools.fontBuilder import FontBuilder
-from fontTools.pens.ttGlyphPen import TTGlyphPen
+from fontTools.pens.t2CharStringPen import T2CharStringPen
 from fontTools.feaLib.builder import addOpenTypeFeatures
 from fontTools.ttLib import TTFont
 import io

-from glyph_parser import ExtractedGlyph, parse_all_sheets
-from hangul import compose_hangul
+from glyph_parser import ExtractedGlyph, GlyphProps, parse_all_sheets
+from hangul import compose_hangul, get_jamo_gsub_data, HANGUL_PUA_BASE
 from bitmap_tracer import trace_bitmap, draw_glyph_to_pen, SCALE, BASELINE_ROW
 from keming_machine import generate_kerning_pairs
 from opentype_features import generate_features, glyph_name
@@ -30,12 +31,6 @@ import sheet_config as SC

 # Codepoints that get cmap entries (user-visible)
 # PUA forms used internally by GSUB get glyphs but NO cmap entries
-_PUA_CMAP_RANGES = [
-    range(0xE000, 0xE100),   # Custom symbols
-    range(0xF0520, 0xF0580), # Codestyle ASCII
-]
-
-
 def _should_have_cmap(cp):
    """Determine if a codepoint should have a cmap entry."""
    # Standard Unicode characters always get cmap entries
@@ -61,9 +56,8 @@ def _should_have_cmap(cp):
    # Everything in standard Unicode ranges (up to 0xFFFF plus SMP)
    if cp <= 0xFFFF:
        return True
-    # Internal PUA forms (Devanagari, Tamil, Sundanese, Bulgarian, Serbian internals)
-    # These are GSUB-only and should NOT have cmap entries
-    if 0xF0000 <= cp <= 0xF051F:
+    # Internal PUA forms — GSUB-only, no cmap
+    if 0xF0000 <= cp <= 0xF0FFF:
        return False
    # Internal control characters
    if 0xFFE00 <= cp <= 0xFFFFF:
@@ -71,8 +65,30 @@ def _should_have_cmap(cp):
    return True


+def _expand_replacewith(glyphs):
+    """
+    Find glyphs with 'replacewith' directive and generate GSUB multiple
+    substitution data. Returns list of (source_cp, [target_cp, ...]).
+
+    A replacewith glyph's extInfo contains up to 7 codepoints that the
+    glyph expands to (e.g. U+01C7 "LJ" → [0x4C, 0x4A]).
+    """
+    replacements = []
+    for cp, g in glyphs.items():
+        if g.props.is_pragma("replacewith"):
+            targets = []
+            count = g.props.required_ext_info_count()
+            for i in range(count):
+                val = g.props.ext_info[i]
+                if val != 0:
+                    targets.append(val)
+            if targets:
+                replacements.append((cp, targets))
+    return replacements
+
+
 def build_font(assets_dir, output_path, no_bitmap=False, no_features=False):
-    """Build the complete TTF font."""
+    """Build the complete OTF font."""
    t0 = time.time()

    # Step 1: Parse all sheets
@@ -86,8 +102,13 @@ def build_font(assets_dir, output_path, no_bitmap=False, no_features=False):
    glyphs.update(hangul_glyphs)
    print(f"  Total glyphs after Hangul: {len(glyphs)}")

-    # Step 3: Create glyph order and cmap
-    print("Step 3: Building glyph order and cmap...")
+    # Step 3: Expand replacewith directives
+    print("Step 3: Processing replacewith directives...")
+    replacewith_subs = _expand_replacewith(glyphs)
+    print(f"  Found {len(replacewith_subs)} replacewith substitutions")
+
+    # Step 4: Create glyph order and cmap
+    print("Step 4: Building glyph order and cmap...")
    glyph_order = [".notdef"]
    cmap = {}
    glyph_set = set()
@@ -111,34 +132,31 @@ def build_font(assets_dir, output_path, no_bitmap=False, no_features=False):

    print(f"  Glyph order: {len(glyph_order)} glyphs, cmap: {len(cmap)} entries")

-    # Step 4: Build font with fonttools
-    print("Step 4: Building font tables...")
-    fb = FontBuilder(SC.UNITS_PER_EM, isTTF=True)
+    # Step 5: Build font with fonttools (CFF/OTF)
+    print("Step 5: Building font tables...")
+    fb = FontBuilder(SC.UNITS_PER_EM, isTTF=False)
    fb.setupGlyphOrder(glyph_order)
-
-    # Build cmap
    fb.setupCharacterMap(cmap)

-    # Step 5: Trace bitmaps -> glyf table
-    print("Step 5: Tracing bitmaps to outlines...")
-    glyph_table = {}
+    # Step 6: Trace bitmaps -> CFF charstrings
+    print("Step 6: Tracing bitmaps to CFF outlines...")

-    pen = TTGlyphPen(None)
+    charstrings = {}

    # .notdef glyph (empty box)
+    pen = T2CharStringPen(SC.UNITS_PER_EM // 2, None)
    pen.moveTo((0, 0))
    pen.lineTo((0, SC.ASCENT))
    pen.lineTo((SC.UNITS_PER_EM // 2, SC.ASCENT))
    pen.lineTo((SC.UNITS_PER_EM // 2, 0))
    pen.closePath()
-    # Inner box
    _m = 2 * SCALE
    pen.moveTo((_m, _m))
    pen.lineTo((SC.UNITS_PER_EM // 2 - _m, _m))
    pen.lineTo((SC.UNITS_PER_EM // 2 - _m, SC.ASCENT - _m))
    pen.lineTo((_m, SC.ASCENT - _m))
    pen.closePath()
-    glyph_table[".notdef"] = pen.glyph()
+    charstrings[".notdef"] = pen.getCharString()

    traced_count = 0
    for cp in sorted_cps:
@@ -149,25 +167,26 @@ def build_font(assets_dir, output_path, no_bitmap=False, no_features=False):
        if name == ".notdef" or name not in glyph_set:
            continue

+        advance = g.props.width * SCALE
        contours = trace_bitmap(g.bitmap, g.props.width)

-        pen = TTGlyphPen(None)
+        pen = T2CharStringPen(advance, None)
        if contours:
            draw_glyph_to_pen(contours, pen)
-            glyph_table[name] = pen.glyph()
            traced_count += 1
-        else:
-            # Empty glyph (space, zero-width, etc.)
-            pen.moveTo((0, 0))
-            pen.endPath()
-            glyph_table[name] = pen.glyph()
+        charstrings[name] = pen.getCharString()

    print(f"  Traced {traced_count} glyphs with outlines")

-    fb.setupGlyf(glyph_table)
+    fb.setupCFF(
+        psName="TerrarumSansBitmap-Regular",
+        fontInfo={},
+        charStringsDict=charstrings,
+        privateDict={},
+    )

-    # Step 6: Set metrics
-    print("Step 6: Setting font metrics...")
+    # Step 7: Set metrics
+    print("Step 7: Setting font metrics...")
    metrics = {}
    metrics[".notdef"] = (SC.UNITS_PER_EM // 2, 0)

@@ -179,7 +198,7 @@ def build_font(assets_dir, output_path, no_bitmap=False, no_features=False):
        if name == ".notdef" or name not in glyph_set:
            continue
        advance = g.props.width * SCALE
-        metrics[name] = (advance, 0)  # (advance_width, lsb)
+        metrics[name] = (advance, 0)

    fb.setupHorizontalMetrics(metrics)
    fb.setupHorizontalHeader(
@@ -200,7 +219,7 @@ def build_font(assets_dir, output_path, no_bitmap=False, no_features=False):
        usWinDescent=SC.DESCENT,
        sxHeight=SC.X_HEIGHT,
        sCapHeight=SC.CAP_HEIGHT,
-        fsType=0,  # Installable embedding
+        fsType=0,
    )

    fb.setupPost()
@@ -208,13 +227,16 @@ def build_font(assets_dir, output_path, no_bitmap=False, no_features=False):

    font = fb.font

-    # Step 7: Generate and compile OpenType features
+    # Step 8: Generate and compile OpenType features
    if not no_features:
-        print("Step 7: Generating OpenType features...")
+        print("Step 8: Generating OpenType features...")
        kern_pairs = generate_kerning_pairs(glyphs)
        print(f"  {len(kern_pairs)} kerning pairs")

-        fea_code = generate_features(glyphs, kern_pairs, glyph_set)
+        jamo_data = get_jamo_gsub_data()
+        fea_code = generate_features(glyphs, kern_pairs, glyph_set,
+                                     replacewith_subs=replacewith_subs,
+                                     jamo_data=jamo_data)

        if fea_code.strip():
            print("  Compiling features with feaLib...")
@@ -228,14 +250,14 @@ def build_font(assets_dir, output_path, no_bitmap=False, no_features=False):
        else:
            print("  No features to compile")
    else:
-        print("Step 7: Skipping OpenType features (--no-features)")
+        print("Step 8: Skipping OpenType features (--no-features)")

-    # Step 8: Add bitmap strike (EBDT/EBLC)
+    # Step 9: Add bitmap strike (EBDT/EBLC)
    if not no_bitmap:
-        print("Step 8: Adding bitmap strike...")
+        print("Step 9: Adding bitmap strike...")
        _add_bitmap_strike(font, glyphs, glyph_order, glyph_set)
    else:
-        print("Step 8: Skipping bitmap strike (--no-bitmap)")
+        print("Step 9: Skipping bitmap strike (--no-bitmap)")

    # Save
    print(f"Saving to {output_path}...")
@@ -254,7 +276,6 @@ def _add_bitmap_strike(font, glyphs, glyph_order, glyph_set):
    ppem = 20
    name_to_id = {name: idx for idx, name in enumerate(glyph_order)}

-    # Collect bitmap data — only glyphs with actual pixels
    bitmap_entries = []
    for name in glyph_order:
        if name == ".notdef":
@@ -272,7 +293,6 @@ def _add_bitmap_strike(font, glyphs, glyph_order, glyph_set):
        if w == 0 or h == 0:
            continue

-        # Pack rows into hex
        hex_rows = []
        for row in bitmap:
            row_bytes = bytearray()
@@ -298,12 +318,9 @@ def _add_bitmap_strike(font, glyphs, glyph_order, glyph_set):
        print("  No bitmap data to embed")
        return

-    # Split into contiguous GID runs for separate index subtables
-    # This avoids the empty-name problem for gaps
    gid_sorted = sorted(bitmap_entries, key=lambda e: e['gid'])
-    gid_to_entry = {e['gid']: e for e in gid_sorted}

-    runs = []  # list of lists of entries
+    runs = []
    current_run = [gid_sorted[0]]
    for i in range(1, len(gid_sorted)):
        if gid_sorted[i]['gid'] == gid_sorted[i-1]['gid'] + 1:
@@ -313,7 +330,6 @@ def _add_bitmap_strike(font, glyphs, glyph_order, glyph_set):
            current_run = [gid_sorted[i]]
    runs.append(current_run)

-    # Build TTX XML for EBDT
    ebdt_xml = ['<EBDT>', '<header version="2.0"/>', '<strikedata index="0">']
    for entry in gid_sorted:
        ebdt_xml.append(f'  <cbdt_bitmap_format_1 name="{entry["name"]}">')
@@ -332,7 +348,6 @@ def _add_bitmap_strike(font, glyphs, glyph_order, glyph_set):
    ebdt_xml.append('</strikedata>')
    ebdt_xml.append('</EBDT>')

-    # Build TTX XML for EBLC
    all_gids = [e['gid'] for e in gid_sorted]
    desc = -(SC.H - BASELINE_ROW)

@@ -371,8 +386,6 @@ def _add_bitmap_strike(font, glyphs, glyph_order, glyph_set):
        '  </bitmapSizeTable>',
    ])

-    # One index subtable per contiguous run — no gaps
-    # Use format 1 (32-bit offsets) to avoid 16-bit overflow
    for run in runs:
        first_gid = run[0]['gid']
        last_gid = run[-1]['gid']
--- a/OTFbuild/glyph_parser.py
+++ b/OTFbuild/glyph_parser.py
@@ -191,8 +191,9 @@ def parse_variable_sheet(image, sheet_index, cell_w, cell_h, cols, is_xy_swapped
                        info |= (1 << y)
                ext_info[x] = info

-        # Extract glyph bitmap (all pixels except tag column)
-        bitmap_w = cell_w - 1
+        # Extract glyph bitmap: only pixels within the glyph's declared width.
+        # The tag column and any padding beyond width must be stripped.
+        bitmap_w = min(width, cell_w - 1) if width > 0 else 0
        bitmap = []
        for row in range(cell_h):
            row_data = []
@@ -206,14 +207,98 @@ def parse_variable_sheet(image, sheet_index, cell_w, cell_h, cols, is_xy_swapped
    return result


+def _read_hangul_cell(image, column, row, cell_w=SC.W_HANGUL_BASE, cell_h=SC.H):
+    """Read a single cell from the Hangul johab sheet at (column, row)."""
+    cell_x = column * cell_w
+    cell_y = row * cell_h
+    bitmap = []
+    for r in range(cell_h):
+        row_data = []
+        for c in range(cell_w):
+            px = image.get_pixel(cell_x + c, cell_y + r)
+            row_data.append(1 if (px & 0xFF) != 0 else 0)
+        bitmap.append(row_data)
+    return bitmap
+
+
+def parse_hangul_jamo_sheet(image, cell_w, cell_h):
+    """
+    Parse the Hangul Jamo sheet with correct row/column mapping.
+
+    Layout in hangul_johab.tga:
+      - Choseong (U+1100-U+115E): column = choseongIndex, row = 1
+      - Jungseong (U+1161-U+11A7): column = jungseongIndex+1, row = 15
+        (column 0 is filler U+1160, stored at row 15 col 0)
+      - Jongseong (U+11A8-U+11FF): column = jongseongIndex, row = 17
+        (index starts at 1 for 11A8)
+      - Extended Choseong (U+A960-U+A97F): column = 96+offset, row = 1
+      - Extended Jungseong (U+D7B0-U+D7C6): column = 72+offset, row = 15
+      - Extended Jongseong (U+D7CB-U+D7FB): column = 89+offset, row = 17
+
+    Each jamo gets a default-row bitmap. Multiple variant rows exist for
+    syllable composition (handled separately by hangul.py / GSUB).
+    """
+    result = {}
+
+    # U+1160 (Hangul Jungseong Filler) — column 0, row 15
+    bm = _read_hangul_cell(image, 0, 15, cell_w, cell_h)
+    result[0x1160] = ExtractedGlyph(0x1160, GlyphProps(width=cell_w), bm)
+
+    # Choseong: U+1100-U+115E → column = cp - 0x1100, row = 1
+    for cp in range(0x1100, 0x115F):
+        col = cp - 0x1100
+        bm = _read_hangul_cell(image, col, 1, cell_w, cell_h)
+        result[cp] = ExtractedGlyph(cp, GlyphProps(width=cell_w), bm)
+
+    # U+115F (Hangul Choseong Filler)
+    col = 0x115F - 0x1100
+    bm = _read_hangul_cell(image, col, 1, cell_w, cell_h)
+    result[0x115F] = ExtractedGlyph(0x115F, GlyphProps(width=cell_w), bm)
+
+    # Jungseong: U+1161-U+11A7 → column = (cp - 0x1160), row = 15
+    for cp in range(0x1161, 0x11A8):
+        col = cp - 0x1160
+        bm = _read_hangul_cell(image, col, 15, cell_w, cell_h)
+        result[cp] = ExtractedGlyph(cp, GlyphProps(width=cell_w), bm)
+
+    # Jongseong: U+11A8-U+11FF → column = (cp - 0x11A8 + 1), row = 17
+    for cp in range(0x11A8, 0x1200):
+        col = cp - 0x11A8 + 1
+        bm = _read_hangul_cell(image, col, 17, cell_w, cell_h)
+        result[cp] = ExtractedGlyph(cp, GlyphProps(width=cell_w), bm)
+
+    # Extended Choseong: U+A960-U+A97F → column = (cp - 0xA960 + 96), row = 1
+    for cp in range(0xA960, 0xA980):
+        col = cp - 0xA960 + 96
+        bm = _read_hangul_cell(image, col, 1, cell_w, cell_h)
+        result[cp] = ExtractedGlyph(cp, GlyphProps(width=cell_w), bm)
+
+    # Extended Jungseong: U+D7B0-U+D7C6 → column = (cp - 0xD7B0 + 72), row = 15
+    for cp in range(0xD7B0, 0xD7C7):
+        col = cp - 0xD7B0 + 72
+        bm = _read_hangul_cell(image, col, 15, cell_w, cell_h)
+        result[cp] = ExtractedGlyph(cp, GlyphProps(width=cell_w), bm)
+
+    # Extended Jongseong: U+D7CB-U+D7FB → column = (cp - 0xD7CB + 88 + 1), row = 17
+    for cp in range(0xD7CB, 0xD7FC):
+        col = cp - 0xD7CB + 88 + 1
+        bm = _read_hangul_cell(image, col, 17, cell_w, cell_h)
+        result[cp] = ExtractedGlyph(cp, GlyphProps(width=cell_w), bm)
+
+    return result
+
+
 def parse_fixed_sheet(image, sheet_index, cell_w, cell_h, cols):
    """Parse a fixed-width sheet (Hangul, Unihan, Runic, Custom Sym)."""
+    # Hangul Jamo sheet has special layout — handled separately
+    if sheet_index == SC.SHEET_HANGUL:
+        return parse_hangul_jamo_sheet(image, cell_w, cell_h)
+
    code_range = SC.CODE_RANGE[sheet_index]
    result = {}

    fixed_width = {
        SC.SHEET_CUSTOM_SYM: 20,
-        SC.SHEET_HANGUL: SC.W_HANGUL_BASE,
        SC.SHEET_RUNIC: 9,
        SC.SHEET_UNIHAN: SC.W_UNIHAN,
    }.get(sheet_index, cell_w)
@@ -301,7 +386,7 @@ def _add_fixed_width_overrides(result):
 def get_hangul_jamo_bitmaps(assets_dir):
    """
    Extract raw Hangul jamo bitmaps from the Hangul sheet for composition.
-    Returns a function: (index, row) -> bitmap (list of list of int)
+    Returns a function: (column_index, row) -> bitmap (list of list of int)
    """
    filename = SC.FILE_LIST[SC.SHEET_HANGUL]
    filepath = os.path.join(assets_dir, filename)
@@ -326,3 +411,41 @@ def get_hangul_jamo_bitmaps(assets_dir):
        return bitmap

    return get_bitmap
+
+
+def extract_hangul_jamo_variants(assets_dir):
+    """
+    Extract ALL Hangul jamo variant bitmaps from hangul_johab.tga.
+    Returns dict of (column, row) -> bitmap for every non-empty cell.
+    Used by hangul.py to store variants in PUA for GSUB assembly.
+
+    Layout:
+      Row 0: Hangul Compatibility Jamo (U+3130-U+318F)
+      Rows 1-14: Choseong variants (row depends on jungseong context)
+      Rows 15-16: Jungseong variants (15=no final, 16=with final)
+      Rows 17-18: Jongseong variants (17=normal, 18=rightie jungseong)
+      Rows 19-24: Additional choseong variants (giyeok remapping)
+    """
+    filename = SC.FILE_LIST[SC.SHEET_HANGUL]
+    filepath = os.path.join(assets_dir, filename)
+    if not os.path.exists(filepath):
+        return {}
+
+    image = read_tga(filepath)
+    cell_w = SC.W_HANGUL_BASE
+    cell_h = SC.H
+
+    variants = {}
+    # Scan all rows that contain jamo data
+    # Rows 0-24 at minimum, checking up to image height
+    max_row = image.height // cell_h
+    max_col = image.width // cell_w
+
+    for row in range(max_row):
+        for col in range(max_col):
+            bm = _read_hangul_cell(image, col, row, cell_w, cell_h)
+            # Check if non-empty
+            if any(px for r in bm for px in r):
+                variants[(col, row)] = bm
+
+    return variants
--- a/OTFbuild/hangul.py
+++ b/OTFbuild/hangul.py
@@ -1,15 +1,24 @@
 """
 Compose 11,172 Hangul syllables (U+AC00-U+D7A3) from jamo sprite pieces.
 Also composes Hangul Compatibility Jamo (U+3130-U+318F).
+Also stores all jamo variant bitmaps in PUA for GSUB-based jamo assembly.

 Ported from HangulCompositor.kt and TerrarumSansBitmap.kt.
 """

 from typing import Dict, List, Tuple

-from glyph_parser import ExtractedGlyph, GlyphProps, get_hangul_jamo_bitmaps
+from glyph_parser import (
+    ExtractedGlyph, GlyphProps, get_hangul_jamo_bitmaps,
+    extract_hangul_jamo_variants, _read_hangul_cell, _empty_bitmap,
+)
 import sheet_config as SC

+# PUA range for Hangul jamo variant storage.
+# We need space for: max_col * max_row variants.
+# Using 0xF0600-0xF0FFF (2560 slots, more than enough).
+HANGUL_PUA_BASE = 0xF0600
+

 def _compose_bitmaps(a, b, w, h):
    """OR two bitmaps together."""
@@ -32,9 +41,15 @@ def _compose_bitmap_into(target, source, w, h):
                target[row][col] = 1


+def _pua_for_jamo_variant(col, row):
+    """Get PUA codepoint for a jamo variant at (column, row) in the sheet."""
+    # Encode as base + row * 256 + col (supports up to 256 columns per row)
+    return HANGUL_PUA_BASE + row * 256 + col
+
+
 def compose_hangul(assets_dir) -> Dict[int, ExtractedGlyph]:
    """
-    Compose all Hangul syllables and compatibility jamo.
+    Compose all Hangul syllables, compatibility jamo, and jamo variants.
    Returns a dict of codepoint -> ExtractedGlyph.
    """
    get_jamo = get_hangul_jamo_bitmaps(assets_dir)
@@ -94,5 +109,39 @@ def compose_hangul(assets_dir) -> Dict[int, ExtractedGlyph]:
        props = GlyphProps(width=advance_width)
        result[c] = ExtractedGlyph(c, props, composed)

-    print(f"  Hangul composition done: {len(result)} glyphs")
+    print(f"  Hangul syllable composition done: {len(result)} glyphs")
+
+    # Store jamo variant bitmaps in PUA for GSUB assembly
+    print("  Extracting jamo variants for GSUB...")
+    variants = extract_hangul_jamo_variants(assets_dir)
+    variant_count = 0
+    for (col, row), bm in variants.items():
+        pua = _pua_for_jamo_variant(col, row)
+        if pua not in result:
+            result[pua] = ExtractedGlyph(pua, GlyphProps(width=cell_w), bm)
+            variant_count += 1
+
+    print(f"  Stored {variant_count} jamo variant glyphs in PUA (0x{HANGUL_PUA_BASE:05X}+)")
+    print(f"  Total Hangul glyphs: {len(result)}")
    return result
+
+
+def get_jamo_gsub_data():
+    """
+    Generate the data needed for Hangul jamo GSUB lookups.
+
+    Returns a dict with:
+      - 'cho_rows': dict mapping (i_jung, has_jong) -> row for choseong
+      - 'jung_rows': dict mapping has_jong -> row for jungseong
+      - 'jong_rows': dict mapping is_rightie -> row for jongseong
+      - 'pua_fn': function(col, row) -> PUA codepoint
+
+    These are the row-selection rules from the Kotlin code:
+      Choseong row = getHanInitialRow(i_cho, i_jung, i_jong)
+      Jungseong row = 15 if no final, else 16
+      Jongseong row = 17 if jungseong is not rightie, else 18
+    """
+    return {
+        'pua_fn': _pua_for_jamo_variant,
+        'pua_base': HANGUL_PUA_BASE,
+    }
--- a/OTFbuild/opentype_features.py
+++ b/OTFbuild/opentype_features.py
@@ -28,7 +28,8 @@ def glyph_name(cp):
    return f"u{cp:05X}" if cp <= 0xFFFFF else f"u{cp:06X}"


-def generate_features(glyphs, kern_pairs, font_glyph_set):
+def generate_features(glyphs, kern_pairs, font_glyph_set,
+                      replacewith_subs=None, jamo_data=None):
    """
    Generate complete OpenType feature code string.

@@ -36,6 +37,8 @@ def generate_features(glyphs, kern_pairs, font_glyph_set):
        glyphs: dict of codepoint -> ExtractedGlyph
        kern_pairs: dict of (left_cp, right_cp) -> kern_value_in_font_units
        font_glyph_set: set of glyph names actually present in the font
+        replacewith_subs: list of (source_cp, [target_cp, ...]) for ccmp
+        jamo_data: dict with Hangul jamo GSUB data
    Returns:
        Feature code string for feaLib compilation.
    """
@@ -44,6 +47,16 @@ def generate_features(glyphs, kern_pairs, font_glyph_set):
    def has(cp):
        return glyph_name(cp) in font_glyph_set

+    # ccmp feature (replacewith directives + Hangul jamo decomposition)
+    ccmp_code = _generate_ccmp(replacewith_subs or [], has)
+    if ccmp_code:
+        parts.append(ccmp_code)
+
+    # Hangul jamo GSUB assembly
+    hangul_code = _generate_hangul_gsub(glyphs, has, jamo_data)
+    if hangul_code:
+        parts.append(hangul_code)
+
    # kern feature
    kern_code = _generate_kern(kern_pairs, has)
    if kern_code:
@@ -82,6 +95,209 @@ def generate_features(glyphs, kern_pairs, font_glyph_set):
    return '\n\n'.join(parts)


+def _generate_ccmp(replacewith_subs, has):
+    """Generate ccmp feature for replacewith directives (multiple substitution)."""
+    if not replacewith_subs:
+        return ""
+
+    subs = []
+    for src_cp, target_cps in replacewith_subs:
+        if not has(src_cp):
+            continue
+        if not all(has(t) for t in target_cps):
+            continue
+        src = glyph_name(src_cp)
+        targets = ' '.join(glyph_name(t) for t in target_cps)
+        subs.append(f"    sub {src} by {targets};")
+
+    if not subs:
+        return ""
+
+    lines = ["feature ccmp {", "    lookup ReplacewithExpansion {"]
+    lines.extend(subs)
+    lines.append("    } ReplacewithExpansion;")
+    lines.append("} ccmp;")
+    return '\n'.join(lines)
+
+
+def _generate_hangul_gsub(glyphs, has, jamo_data):
+    """
+    Generate Hangul jamo GSUB lookups for syllable assembly.
+
+    When a shaping engine encounters consecutive Hangul Jamo (Choseong +
+    Jungseong + optional Jongseong), these lookups substitute each jamo
+    with the correct positional variant from the PUA area.
+
+    The row selection logic mirrors the Kotlin code:
+      - Choseong row depends on which jungseong follows and whether jongseong exists
+      - Jungseong row is 15 (no final) or 16 (with final)
+      - Jongseong row is 17 (normal) or 18 (rightie jungseong)
+    """
+    if not jamo_data:
+        return ""
+
+    pua_fn = jamo_data['pua_fn']
+
+    # Build contextual substitution lookups
+    # Strategy: use ljmo/vjmo/tjmo features (standard Hangul OpenType features)
+    #
+    # ljmo: choseong → positional variant (depends on following jungseong)
+    # vjmo: jungseong → positional variant (depends on whether jongseong follows)
+    # tjmo: jongseong → positional variant (depends on preceding jungseong)
+
+    lines = []
+
+    # --- ljmo: Choseong variant selection ---
+    # For each choseong, we need variants for different jungseong contexts.
+    # Row 1 is the default (basic vowels like ㅏ).
+    # We use contextual alternates: choseong' lookup X jungseong
+    ljmo_lookups = []
+
+    # Group jungseong indices by which choseong row they select
+    # From getHanInitialRow: the row depends on jungseong index (p) and has-final (f)
+    # For GSUB, we pre-compute for f=0 (no final) since we can't know yet
+    row_to_jung_indices = {}
+    for p in range(96):  # all possible jungseong indices
+        # Without jongseong first; use i=1 to avoid giyeok edge cases
+        try:
+            row_nf = SC.get_han_initial_row(1, p, 0)
+        except (ValueError, KeyError):
+            continue
+        if row_nf not in row_to_jung_indices:
+            row_to_jung_indices[row_nf] = []
+        row_to_jung_indices[row_nf].append(p)
+
+    # For each unique choseong row, create a lookup that substitutes
+    # the default choseong glyph with the variant at that row
+    for cho_row, jung_indices in sorted(row_to_jung_indices.items()):
+        if cho_row == 1:
+            continue  # row 1 is the default, no substitution needed
+
+        lookup_name = f"ljmo_row{cho_row}"
+        subs = []
+
+        # For standard choseong (U+1100-U+115E)
+        for cho_cp in range(0x1100, 0x115F):
+            col = cho_cp - 0x1100
+            variant_pua = pua_fn(col, cho_row)
+            if has(cho_cp) and has(variant_pua):
+                subs.append(f"        sub {glyph_name(cho_cp)} by {glyph_name(variant_pua)};")
+
+        if subs:
+            lines.append(f"lookup {lookup_name} {{")
+            lines.extend(subs)
+            lines.append(f"}} {lookup_name};")
+            ljmo_lookups.append((lookup_name, jung_indices))
+
+    # --- vjmo: Jungseong variant selection ---
+    # Row 15 = no jongseong following, Row 16 = jongseong follows
+    # We need two lookups
+    vjmo_subs_16 = []  # with-final variant (row 16)
+    for jung_cp in range(0x1161, 0x11A8):
+        col = jung_cp - 0x1160
+        variant_pua = pua_fn(col, 16)
+        if has(jung_cp) and has(variant_pua):
+            vjmo_subs_16.append(f"    sub {glyph_name(jung_cp)} by {glyph_name(variant_pua)};")
+
+    if vjmo_subs_16:
+        lines.append("lookup vjmo_withfinal {")
+        lines.extend(vjmo_subs_16)
+        lines.append("} vjmo_withfinal;")
+
+    # --- tjmo: Jongseong variant selection ---
+    # Row 17 = normal, Row 18 = after rightie jungseong
+    tjmo_subs_18 = []
+    for jong_cp in range(0x11A8, 0x1200):
+        col = jong_cp - 0x11A8 + 1
+        variant_pua = pua_fn(col, 18)
+        if has(jong_cp) and has(variant_pua):
+            tjmo_subs_18.append(f"    sub {glyph_name(jong_cp)} by {glyph_name(variant_pua)};")
+
+    if tjmo_subs_18:
+        lines.append("lookup tjmo_rightie {")
+        lines.extend(tjmo_subs_18)
+        lines.append("} tjmo_rightie;")
+
+    # --- Build the actual features using contextual substitution ---
+
+    # Jungseong class definitions for contextual rules
+    # Build classes of jungseong glyphs that trigger specific choseong rows
+    feature_lines = []
+
+    # ljmo feature: contextual choseong substitution
+    if ljmo_lookups:
+        feature_lines.append("feature ljmo {")
+        feature_lines.append("    script hang;")
+        for lookup_name, jung_indices in ljmo_lookups:
+            # Build jungseong class for this row
+            jung_glyphs = []
+            for idx in jung_indices:
+                cp = 0x1160 + idx
+                if has(cp):
+                    jung_glyphs.append(glyph_name(cp))
+            if not jung_glyphs:
+                continue
+            class_name = f"@jung_for_{lookup_name}"
+            feature_lines.append(f"    {class_name} = [{' '.join(jung_glyphs)}];")
+
+        # Contextual rules: choseong' [lookup X] jungseong
+        # For each choseong, if followed by a jungseong in the right class,
+        # apply the variant lookup
+        for lookup_name, jung_indices in ljmo_lookups:
+            jung_glyphs = []
+            for idx in jung_indices:
+                cp = 0x1160 + idx
+                if has(cp):
+                    jung_glyphs.append(glyph_name(cp))
+            if not jung_glyphs:
+                continue
+            class_name = f"@jung_for_{lookup_name}"
+            # Build choseong class
+            cho_glyphs = [glyph_name(cp) for cp in range(0x1100, 0x115F) if has(cp)]
+            if cho_glyphs:
+                feature_lines.append(f"    @choseong = [{' '.join(cho_glyphs)}];")
+                feature_lines.append(f"    sub @choseong' lookup {lookup_name} {class_name};")
+
+        feature_lines.append("} ljmo;")
+
+    # vjmo feature: jungseong gets row 16 variant when followed by jongseong
+    if vjmo_subs_16:
+        jong_glyphs = [glyph_name(cp) for cp in range(0x11A8, 0x1200) if has(cp)]
+        if jong_glyphs:
+            feature_lines.append("feature vjmo {")
+            feature_lines.append("    script hang;")
+            jung_glyphs = [glyph_name(cp) for cp in range(0x1161, 0x11A8) if has(cp)]
+            feature_lines.append(f"    @jongseong = [{' '.join(jong_glyphs)}];")
+            feature_lines.append(f"    @jungseong = [{' '.join(jung_glyphs)}];")
+            feature_lines.append(f"    sub @jungseong' lookup vjmo_withfinal @jongseong;")
+            feature_lines.append("} vjmo;")
+
+    # tjmo feature: jongseong gets row 18 variant when after rightie jungseong
+    if tjmo_subs_18:
+        rightie_glyphs = []
+        for idx in sorted(SC.JUNGSEONG_RIGHTIE):
+            cp = 0x1160 + idx
+            if has(cp):
+                rightie_glyphs.append(glyph_name(cp))
+            # Also check PUA variants (row 16)
+            pua16 = pua_fn(idx, 16)
+            if has(pua16):
+                rightie_glyphs.append(glyph_name(pua16))
+        if rightie_glyphs:
+            feature_lines.append("feature tjmo {")
+            feature_lines.append("    script hang;")
+            feature_lines.append(f"    @rightie_jung = [{' '.join(rightie_glyphs)}];")
+            jong_glyphs = [glyph_name(cp) for cp in range(0x11A8, 0x1200) if has(cp)]
+            feature_lines.append(f"    @jongseong_all = [{' '.join(jong_glyphs)}];")
+            feature_lines.append(f"    sub @rightie_jung @jongseong_all' lookup tjmo_rightie;")
+            feature_lines.append("} tjmo;")
+
+    if not lines and not feature_lines:
+        return ""
+
+    return '\n'.join(lines + [''] + feature_lines)
+
+
 def _generate_kern(kern_pairs, has):
    """Generate kern feature from pair positioning data."""
    if not kern_pairs: