From 949b6aa777db2a91412fab94bee0f25dc30d0810 Mon Sep 17 00:00:00 2001 From: minjaesong Date: Mon, 23 Feb 2026 19:32:25 +0900 Subject: [PATCH] otf wip --- .gitignore | 4 + OTFbuild/build_font.py | 4 +- OTFbuild/font_builder.py | 135 +++++++++++---------- OTFbuild/glyph_parser.py | 131 +++++++++++++++++++- OTFbuild/hangul.py | 55 ++++++++- OTFbuild/opentype_features.py | 218 +++++++++++++++++++++++++++++++++- 6 files changed, 476 insertions(+), 71 deletions(-) diff --git a/.gitignore b/.gitignore index 00b472c..2c431da 100755 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,7 @@ tmp_* *.bak *-autosave.kra .directory + +*/__pycache__ +OTFbuild/*.ttf +OTFbuild/*.otf diff --git a/OTFbuild/build_font.py b/OTFbuild/build_font.py index fdff6fb..9b8dfa9 100644 --- a/OTFbuild/build_font.py +++ b/OTFbuild/build_font.py @@ -33,8 +33,8 @@ def main(): ) parser.add_argument( "-o", "--output", - default="OTFbuild/TerrarumSansBitmap.ttf", - help="Output TTF file path (default: OTFbuild/TerrarumSansBitmap.ttf)" + default="OTFbuild/TerrarumSansBitmap.otf", + help="Output OTF file path (default: OTFbuild/TerrarumSansBitmap.otf)" ) parser.add_argument( "--no-bitmap", diff --git a/OTFbuild/font_builder.py b/OTFbuild/font_builder.py index dc2836e..de65f93 100644 --- a/OTFbuild/font_builder.py +++ b/OTFbuild/font_builder.py @@ -3,25 +3,26 @@ Orchestrate fonttools TTFont assembly. 1. Parse all sheets -> glyphs dict 2. Compose Hangul -> add to dict -3. Create glyph order and cmap -4. Trace all bitmaps -> glyf table -5. Set hmtx, hhea, OS/2, head, name, post -6. Generate and compile OpenType features via feaLib -7. Add EBDT/EBLC bitmap strike at ppem=20 -8. Save TTF +3. Expand replacewith directives +4. Create glyph order and cmap +5. Trace all bitmaps -> CFF charstrings +6. Set hmtx, hhea, OS/2, head, name, post +7. Generate and compile OpenType features via feaLib +8. Add EBDT/EBLC bitmap strike at ppem=20 +9. Save OTF """ import time from typing import Dict from fontTools.fontBuilder import FontBuilder -from fontTools.pens.ttGlyphPen import TTGlyphPen +from fontTools.pens.t2CharStringPen import T2CharStringPen from fontTools.feaLib.builder import addOpenTypeFeatures from fontTools.ttLib import TTFont import io -from glyph_parser import ExtractedGlyph, parse_all_sheets -from hangul import compose_hangul +from glyph_parser import ExtractedGlyph, GlyphProps, parse_all_sheets +from hangul import compose_hangul, get_jamo_gsub_data, HANGUL_PUA_BASE from bitmap_tracer import trace_bitmap, draw_glyph_to_pen, SCALE, BASELINE_ROW from keming_machine import generate_kerning_pairs from opentype_features import generate_features, glyph_name @@ -30,12 +31,6 @@ import sheet_config as SC # Codepoints that get cmap entries (user-visible) # PUA forms used internally by GSUB get glyphs but NO cmap entries -_PUA_CMAP_RANGES = [ - range(0xE000, 0xE100), # Custom symbols - range(0xF0520, 0xF0580), # Codestyle ASCII -] - - def _should_have_cmap(cp): """Determine if a codepoint should have a cmap entry.""" # Standard Unicode characters always get cmap entries @@ -61,9 +56,8 @@ def _should_have_cmap(cp): # Everything in standard Unicode ranges (up to 0xFFFF plus SMP) if cp <= 0xFFFF: return True - # Internal PUA forms (Devanagari, Tamil, Sundanese, Bulgarian, Serbian internals) - # These are GSUB-only and should NOT have cmap entries - if 0xF0000 <= cp <= 0xF051F: + # Internal PUA forms — GSUB-only, no cmap + if 0xF0000 <= cp <= 0xF0FFF: return False # Internal control characters if 0xFFE00 <= cp <= 0xFFFFF: @@ -71,8 +65,30 @@ def _should_have_cmap(cp): return True +def _expand_replacewith(glyphs): + """ + Find glyphs with 'replacewith' directive and generate GSUB multiple + substitution data. Returns list of (source_cp, [target_cp, ...]). + + A replacewith glyph's extInfo contains up to 7 codepoints that the + glyph expands to (e.g. U+01C7 "LJ" → [0x4C, 0x4A]). + """ + replacements = [] + for cp, g in glyphs.items(): + if g.props.is_pragma("replacewith"): + targets = [] + count = g.props.required_ext_info_count() + for i in range(count): + val = g.props.ext_info[i] + if val != 0: + targets.append(val) + if targets: + replacements.append((cp, targets)) + return replacements + + def build_font(assets_dir, output_path, no_bitmap=False, no_features=False): - """Build the complete TTF font.""" + """Build the complete OTF font.""" t0 = time.time() # Step 1: Parse all sheets @@ -86,8 +102,13 @@ def build_font(assets_dir, output_path, no_bitmap=False, no_features=False): glyphs.update(hangul_glyphs) print(f" Total glyphs after Hangul: {len(glyphs)}") - # Step 3: Create glyph order and cmap - print("Step 3: Building glyph order and cmap...") + # Step 3: Expand replacewith directives + print("Step 3: Processing replacewith directives...") + replacewith_subs = _expand_replacewith(glyphs) + print(f" Found {len(replacewith_subs)} replacewith substitutions") + + # Step 4: Create glyph order and cmap + print("Step 4: Building glyph order and cmap...") glyph_order = [".notdef"] cmap = {} glyph_set = set() @@ -111,34 +132,31 @@ def build_font(assets_dir, output_path, no_bitmap=False, no_features=False): print(f" Glyph order: {len(glyph_order)} glyphs, cmap: {len(cmap)} entries") - # Step 4: Build font with fonttools - print("Step 4: Building font tables...") - fb = FontBuilder(SC.UNITS_PER_EM, isTTF=True) + # Step 5: Build font with fonttools (CFF/OTF) + print("Step 5: Building font tables...") + fb = FontBuilder(SC.UNITS_PER_EM, isTTF=False) fb.setupGlyphOrder(glyph_order) - - # Build cmap fb.setupCharacterMap(cmap) - # Step 5: Trace bitmaps -> glyf table - print("Step 5: Tracing bitmaps to outlines...") - glyph_table = {} + # Step 6: Trace bitmaps -> CFF charstrings + print("Step 6: Tracing bitmaps to CFF outlines...") - pen = TTGlyphPen(None) + charstrings = {} # .notdef glyph (empty box) + pen = T2CharStringPen(SC.UNITS_PER_EM // 2, None) pen.moveTo((0, 0)) pen.lineTo((0, SC.ASCENT)) pen.lineTo((SC.UNITS_PER_EM // 2, SC.ASCENT)) pen.lineTo((SC.UNITS_PER_EM // 2, 0)) pen.closePath() - # Inner box _m = 2 * SCALE pen.moveTo((_m, _m)) pen.lineTo((SC.UNITS_PER_EM // 2 - _m, _m)) pen.lineTo((SC.UNITS_PER_EM // 2 - _m, SC.ASCENT - _m)) pen.lineTo((_m, SC.ASCENT - _m)) pen.closePath() - glyph_table[".notdef"] = pen.glyph() + charstrings[".notdef"] = pen.getCharString() traced_count = 0 for cp in sorted_cps: @@ -149,25 +167,26 @@ def build_font(assets_dir, output_path, no_bitmap=False, no_features=False): if name == ".notdef" or name not in glyph_set: continue + advance = g.props.width * SCALE contours = trace_bitmap(g.bitmap, g.props.width) - pen = TTGlyphPen(None) + pen = T2CharStringPen(advance, None) if contours: draw_glyph_to_pen(contours, pen) - glyph_table[name] = pen.glyph() traced_count += 1 - else: - # Empty glyph (space, zero-width, etc.) - pen.moveTo((0, 0)) - pen.endPath() - glyph_table[name] = pen.glyph() + charstrings[name] = pen.getCharString() print(f" Traced {traced_count} glyphs with outlines") - fb.setupGlyf(glyph_table) + fb.setupCFF( + psName="TerrarumSansBitmap-Regular", + fontInfo={}, + charStringsDict=charstrings, + privateDict={}, + ) - # Step 6: Set metrics - print("Step 6: Setting font metrics...") + # Step 7: Set metrics + print("Step 7: Setting font metrics...") metrics = {} metrics[".notdef"] = (SC.UNITS_PER_EM // 2, 0) @@ -179,7 +198,7 @@ def build_font(assets_dir, output_path, no_bitmap=False, no_features=False): if name == ".notdef" or name not in glyph_set: continue advance = g.props.width * SCALE - metrics[name] = (advance, 0) # (advance_width, lsb) + metrics[name] = (advance, 0) fb.setupHorizontalMetrics(metrics) fb.setupHorizontalHeader( @@ -200,7 +219,7 @@ def build_font(assets_dir, output_path, no_bitmap=False, no_features=False): usWinDescent=SC.DESCENT, sxHeight=SC.X_HEIGHT, sCapHeight=SC.CAP_HEIGHT, - fsType=0, # Installable embedding + fsType=0, ) fb.setupPost() @@ -208,13 +227,16 @@ def build_font(assets_dir, output_path, no_bitmap=False, no_features=False): font = fb.font - # Step 7: Generate and compile OpenType features + # Step 8: Generate and compile OpenType features if not no_features: - print("Step 7: Generating OpenType features...") + print("Step 8: Generating OpenType features...") kern_pairs = generate_kerning_pairs(glyphs) print(f" {len(kern_pairs)} kerning pairs") - fea_code = generate_features(glyphs, kern_pairs, glyph_set) + jamo_data = get_jamo_gsub_data() + fea_code = generate_features(glyphs, kern_pairs, glyph_set, + replacewith_subs=replacewith_subs, + jamo_data=jamo_data) if fea_code.strip(): print(" Compiling features with feaLib...") @@ -228,14 +250,14 @@ def build_font(assets_dir, output_path, no_bitmap=False, no_features=False): else: print(" No features to compile") else: - print("Step 7: Skipping OpenType features (--no-features)") + print("Step 8: Skipping OpenType features (--no-features)") - # Step 8: Add bitmap strike (EBDT/EBLC) + # Step 9: Add bitmap strike (EBDT/EBLC) if not no_bitmap: - print("Step 8: Adding bitmap strike...") + print("Step 9: Adding bitmap strike...") _add_bitmap_strike(font, glyphs, glyph_order, glyph_set) else: - print("Step 8: Skipping bitmap strike (--no-bitmap)") + print("Step 9: Skipping bitmap strike (--no-bitmap)") # Save print(f"Saving to {output_path}...") @@ -254,7 +276,6 @@ def _add_bitmap_strike(font, glyphs, glyph_order, glyph_set): ppem = 20 name_to_id = {name: idx for idx, name in enumerate(glyph_order)} - # Collect bitmap data — only glyphs with actual pixels bitmap_entries = [] for name in glyph_order: if name == ".notdef": @@ -272,7 +293,6 @@ def _add_bitmap_strike(font, glyphs, glyph_order, glyph_set): if w == 0 or h == 0: continue - # Pack rows into hex hex_rows = [] for row in bitmap: row_bytes = bytearray() @@ -298,12 +318,9 @@ def _add_bitmap_strike(font, glyphs, glyph_order, glyph_set): print(" No bitmap data to embed") return - # Split into contiguous GID runs for separate index subtables - # This avoids the empty-name problem for gaps gid_sorted = sorted(bitmap_entries, key=lambda e: e['gid']) - gid_to_entry = {e['gid']: e for e in gid_sorted} - runs = [] # list of lists of entries + runs = [] current_run = [gid_sorted[0]] for i in range(1, len(gid_sorted)): if gid_sorted[i]['gid'] == gid_sorted[i-1]['gid'] + 1: @@ -313,7 +330,6 @@ def _add_bitmap_strike(font, glyphs, glyph_order, glyph_set): current_run = [gid_sorted[i]] runs.append(current_run) - # Build TTX XML for EBDT ebdt_xml = ['', '
', ''] for entry in gid_sorted: ebdt_xml.append(f' ') @@ -332,7 +348,6 @@ def _add_bitmap_strike(font, glyphs, glyph_order, glyph_set): ebdt_xml.append('') ebdt_xml.append('') - # Build TTX XML for EBLC all_gids = [e['gid'] for e in gid_sorted] desc = -(SC.H - BASELINE_ROW) @@ -371,8 +386,6 @@ def _add_bitmap_strike(font, glyphs, glyph_order, glyph_set): ' ', ]) - # One index subtable per contiguous run — no gaps - # Use format 1 (32-bit offsets) to avoid 16-bit overflow for run in runs: first_gid = run[0]['gid'] last_gid = run[-1]['gid'] diff --git a/OTFbuild/glyph_parser.py b/OTFbuild/glyph_parser.py index 890b049..07ecb0e 100644 --- a/OTFbuild/glyph_parser.py +++ b/OTFbuild/glyph_parser.py @@ -191,8 +191,9 @@ def parse_variable_sheet(image, sheet_index, cell_w, cell_h, cols, is_xy_swapped info |= (1 << y) ext_info[x] = info - # Extract glyph bitmap (all pixels except tag column) - bitmap_w = cell_w - 1 + # Extract glyph bitmap: only pixels within the glyph's declared width. + # The tag column and any padding beyond width must be stripped. + bitmap_w = min(width, cell_w - 1) if width > 0 else 0 bitmap = [] for row in range(cell_h): row_data = [] @@ -206,14 +207,98 @@ def parse_variable_sheet(image, sheet_index, cell_w, cell_h, cols, is_xy_swapped return result +def _read_hangul_cell(image, column, row, cell_w=SC.W_HANGUL_BASE, cell_h=SC.H): + """Read a single cell from the Hangul johab sheet at (column, row).""" + cell_x = column * cell_w + cell_y = row * cell_h + bitmap = [] + for r in range(cell_h): + row_data = [] + for c in range(cell_w): + px = image.get_pixel(cell_x + c, cell_y + r) + row_data.append(1 if (px & 0xFF) != 0 else 0) + bitmap.append(row_data) + return bitmap + + +def parse_hangul_jamo_sheet(image, cell_w, cell_h): + """ + Parse the Hangul Jamo sheet with correct row/column mapping. + + Layout in hangul_johab.tga: + - Choseong (U+1100-U+115E): column = choseongIndex, row = 1 + - Jungseong (U+1161-U+11A7): column = jungseongIndex+1, row = 15 + (column 0 is filler U+1160, stored at row 15 col 0) + - Jongseong (U+11A8-U+11FF): column = jongseongIndex, row = 17 + (index starts at 1 for 11A8) + - Extended Choseong (U+A960-U+A97F): column = 96+offset, row = 1 + - Extended Jungseong (U+D7B0-U+D7C6): column = 72+offset, row = 15 + - Extended Jongseong (U+D7CB-U+D7FB): column = 89+offset, row = 17 + + Each jamo gets a default-row bitmap. Multiple variant rows exist for + syllable composition (handled separately by hangul.py / GSUB). + """ + result = {} + + # U+1160 (Hangul Jungseong Filler) — column 0, row 15 + bm = _read_hangul_cell(image, 0, 15, cell_w, cell_h) + result[0x1160] = ExtractedGlyph(0x1160, GlyphProps(width=cell_w), bm) + + # Choseong: U+1100-U+115E → column = cp - 0x1100, row = 1 + for cp in range(0x1100, 0x115F): + col = cp - 0x1100 + bm = _read_hangul_cell(image, col, 1, cell_w, cell_h) + result[cp] = ExtractedGlyph(cp, GlyphProps(width=cell_w), bm) + + # U+115F (Hangul Choseong Filler) + col = 0x115F - 0x1100 + bm = _read_hangul_cell(image, col, 1, cell_w, cell_h) + result[0x115F] = ExtractedGlyph(0x115F, GlyphProps(width=cell_w), bm) + + # Jungseong: U+1161-U+11A7 → column = (cp - 0x1160), row = 15 + for cp in range(0x1161, 0x11A8): + col = cp - 0x1160 + bm = _read_hangul_cell(image, col, 15, cell_w, cell_h) + result[cp] = ExtractedGlyph(cp, GlyphProps(width=cell_w), bm) + + # Jongseong: U+11A8-U+11FF → column = (cp - 0x11A8 + 1), row = 17 + for cp in range(0x11A8, 0x1200): + col = cp - 0x11A8 + 1 + bm = _read_hangul_cell(image, col, 17, cell_w, cell_h) + result[cp] = ExtractedGlyph(cp, GlyphProps(width=cell_w), bm) + + # Extended Choseong: U+A960-U+A97F → column = (cp - 0xA960 + 96), row = 1 + for cp in range(0xA960, 0xA980): + col = cp - 0xA960 + 96 + bm = _read_hangul_cell(image, col, 1, cell_w, cell_h) + result[cp] = ExtractedGlyph(cp, GlyphProps(width=cell_w), bm) + + # Extended Jungseong: U+D7B0-U+D7C6 → column = (cp - 0xD7B0 + 72), row = 15 + for cp in range(0xD7B0, 0xD7C7): + col = cp - 0xD7B0 + 72 + bm = _read_hangul_cell(image, col, 15, cell_w, cell_h) + result[cp] = ExtractedGlyph(cp, GlyphProps(width=cell_w), bm) + + # Extended Jongseong: U+D7CB-U+D7FB → column = (cp - 0xD7CB + 88 + 1), row = 17 + for cp in range(0xD7CB, 0xD7FC): + col = cp - 0xD7CB + 88 + 1 + bm = _read_hangul_cell(image, col, 17, cell_w, cell_h) + result[cp] = ExtractedGlyph(cp, GlyphProps(width=cell_w), bm) + + return result + + def parse_fixed_sheet(image, sheet_index, cell_w, cell_h, cols): """Parse a fixed-width sheet (Hangul, Unihan, Runic, Custom Sym).""" + # Hangul Jamo sheet has special layout — handled separately + if sheet_index == SC.SHEET_HANGUL: + return parse_hangul_jamo_sheet(image, cell_w, cell_h) + code_range = SC.CODE_RANGE[sheet_index] result = {} fixed_width = { SC.SHEET_CUSTOM_SYM: 20, - SC.SHEET_HANGUL: SC.W_HANGUL_BASE, SC.SHEET_RUNIC: 9, SC.SHEET_UNIHAN: SC.W_UNIHAN, }.get(sheet_index, cell_w) @@ -301,7 +386,7 @@ def _add_fixed_width_overrides(result): def get_hangul_jamo_bitmaps(assets_dir): """ Extract raw Hangul jamo bitmaps from the Hangul sheet for composition. - Returns a function: (index, row) -> bitmap (list of list of int) + Returns a function: (column_index, row) -> bitmap (list of list of int) """ filename = SC.FILE_LIST[SC.SHEET_HANGUL] filepath = os.path.join(assets_dir, filename) @@ -326,3 +411,41 @@ def get_hangul_jamo_bitmaps(assets_dir): return bitmap return get_bitmap + + +def extract_hangul_jamo_variants(assets_dir): + """ + Extract ALL Hangul jamo variant bitmaps from hangul_johab.tga. + Returns dict of (column, row) -> bitmap for every non-empty cell. + Used by hangul.py to store variants in PUA for GSUB assembly. + + Layout: + Row 0: Hangul Compatibility Jamo (U+3130-U+318F) + Rows 1-14: Choseong variants (row depends on jungseong context) + Rows 15-16: Jungseong variants (15=no final, 16=with final) + Rows 17-18: Jongseong variants (17=normal, 18=rightie jungseong) + Rows 19-24: Additional choseong variants (giyeok remapping) + """ + filename = SC.FILE_LIST[SC.SHEET_HANGUL] + filepath = os.path.join(assets_dir, filename) + if not os.path.exists(filepath): + return {} + + image = read_tga(filepath) + cell_w = SC.W_HANGUL_BASE + cell_h = SC.H + + variants = {} + # Scan all rows that contain jamo data + # Rows 0-24 at minimum, checking up to image height + max_row = image.height // cell_h + max_col = image.width // cell_w + + for row in range(max_row): + for col in range(max_col): + bm = _read_hangul_cell(image, col, row, cell_w, cell_h) + # Check if non-empty + if any(px for r in bm for px in r): + variants[(col, row)] = bm + + return variants diff --git a/OTFbuild/hangul.py b/OTFbuild/hangul.py index c4bf759..62b6a12 100644 --- a/OTFbuild/hangul.py +++ b/OTFbuild/hangul.py @@ -1,15 +1,24 @@ """ Compose 11,172 Hangul syllables (U+AC00-U+D7A3) from jamo sprite pieces. Also composes Hangul Compatibility Jamo (U+3130-U+318F). +Also stores all jamo variant bitmaps in PUA for GSUB-based jamo assembly. Ported from HangulCompositor.kt and TerrarumSansBitmap.kt. """ from typing import Dict, List, Tuple -from glyph_parser import ExtractedGlyph, GlyphProps, get_hangul_jamo_bitmaps +from glyph_parser import ( + ExtractedGlyph, GlyphProps, get_hangul_jamo_bitmaps, + extract_hangul_jamo_variants, _read_hangul_cell, _empty_bitmap, +) import sheet_config as SC +# PUA range for Hangul jamo variant storage. +# We need space for: max_col * max_row variants. +# Using 0xF0600-0xF0FFF (2560 slots, more than enough). +HANGUL_PUA_BASE = 0xF0600 + def _compose_bitmaps(a, b, w, h): """OR two bitmaps together.""" @@ -32,9 +41,15 @@ def _compose_bitmap_into(target, source, w, h): target[row][col] = 1 +def _pua_for_jamo_variant(col, row): + """Get PUA codepoint for a jamo variant at (column, row) in the sheet.""" + # Encode as base + row * 256 + col (supports up to 256 columns per row) + return HANGUL_PUA_BASE + row * 256 + col + + def compose_hangul(assets_dir) -> Dict[int, ExtractedGlyph]: """ - Compose all Hangul syllables and compatibility jamo. + Compose all Hangul syllables, compatibility jamo, and jamo variants. Returns a dict of codepoint -> ExtractedGlyph. """ get_jamo = get_hangul_jamo_bitmaps(assets_dir) @@ -94,5 +109,39 @@ def compose_hangul(assets_dir) -> Dict[int, ExtractedGlyph]: props = GlyphProps(width=advance_width) result[c] = ExtractedGlyph(c, props, composed) - print(f" Hangul composition done: {len(result)} glyphs") + print(f" Hangul syllable composition done: {len(result)} glyphs") + + # Store jamo variant bitmaps in PUA for GSUB assembly + print(" Extracting jamo variants for GSUB...") + variants = extract_hangul_jamo_variants(assets_dir) + variant_count = 0 + for (col, row), bm in variants.items(): + pua = _pua_for_jamo_variant(col, row) + if pua not in result: + result[pua] = ExtractedGlyph(pua, GlyphProps(width=cell_w), bm) + variant_count += 1 + + print(f" Stored {variant_count} jamo variant glyphs in PUA (0x{HANGUL_PUA_BASE:05X}+)") + print(f" Total Hangul glyphs: {len(result)}") return result + + +def get_jamo_gsub_data(): + """ + Generate the data needed for Hangul jamo GSUB lookups. + + Returns a dict with: + - 'cho_rows': dict mapping (i_jung, has_jong) -> row for choseong + - 'jung_rows': dict mapping has_jong -> row for jungseong + - 'jong_rows': dict mapping is_rightie -> row for jongseong + - 'pua_fn': function(col, row) -> PUA codepoint + + These are the row-selection rules from the Kotlin code: + Choseong row = getHanInitialRow(i_cho, i_jung, i_jong) + Jungseong row = 15 if no final, else 16 + Jongseong row = 17 if jungseong is not rightie, else 18 + """ + return { + 'pua_fn': _pua_for_jamo_variant, + 'pua_base': HANGUL_PUA_BASE, + } diff --git a/OTFbuild/opentype_features.py b/OTFbuild/opentype_features.py index 892352d..95eea8b 100644 --- a/OTFbuild/opentype_features.py +++ b/OTFbuild/opentype_features.py @@ -28,7 +28,8 @@ def glyph_name(cp): return f"u{cp:05X}" if cp <= 0xFFFFF else f"u{cp:06X}" -def generate_features(glyphs, kern_pairs, font_glyph_set): +def generate_features(glyphs, kern_pairs, font_glyph_set, + replacewith_subs=None, jamo_data=None): """ Generate complete OpenType feature code string. @@ -36,6 +37,8 @@ def generate_features(glyphs, kern_pairs, font_glyph_set): glyphs: dict of codepoint -> ExtractedGlyph kern_pairs: dict of (left_cp, right_cp) -> kern_value_in_font_units font_glyph_set: set of glyph names actually present in the font + replacewith_subs: list of (source_cp, [target_cp, ...]) for ccmp + jamo_data: dict with Hangul jamo GSUB data Returns: Feature code string for feaLib compilation. """ @@ -44,6 +47,16 @@ def generate_features(glyphs, kern_pairs, font_glyph_set): def has(cp): return glyph_name(cp) in font_glyph_set + # ccmp feature (replacewith directives + Hangul jamo decomposition) + ccmp_code = _generate_ccmp(replacewith_subs or [], has) + if ccmp_code: + parts.append(ccmp_code) + + # Hangul jamo GSUB assembly + hangul_code = _generate_hangul_gsub(glyphs, has, jamo_data) + if hangul_code: + parts.append(hangul_code) + # kern feature kern_code = _generate_kern(kern_pairs, has) if kern_code: @@ -82,6 +95,209 @@ def generate_features(glyphs, kern_pairs, font_glyph_set): return '\n\n'.join(parts) +def _generate_ccmp(replacewith_subs, has): + """Generate ccmp feature for replacewith directives (multiple substitution).""" + if not replacewith_subs: + return "" + + subs = [] + for src_cp, target_cps in replacewith_subs: + if not has(src_cp): + continue + if not all(has(t) for t in target_cps): + continue + src = glyph_name(src_cp) + targets = ' '.join(glyph_name(t) for t in target_cps) + subs.append(f" sub {src} by {targets};") + + if not subs: + return "" + + lines = ["feature ccmp {", " lookup ReplacewithExpansion {"] + lines.extend(subs) + lines.append(" } ReplacewithExpansion;") + lines.append("} ccmp;") + return '\n'.join(lines) + + +def _generate_hangul_gsub(glyphs, has, jamo_data): + """ + Generate Hangul jamo GSUB lookups for syllable assembly. + + When a shaping engine encounters consecutive Hangul Jamo (Choseong + + Jungseong + optional Jongseong), these lookups substitute each jamo + with the correct positional variant from the PUA area. + + The row selection logic mirrors the Kotlin code: + - Choseong row depends on which jungseong follows and whether jongseong exists + - Jungseong row is 15 (no final) or 16 (with final) + - Jongseong row is 17 (normal) or 18 (rightie jungseong) + """ + if not jamo_data: + return "" + + pua_fn = jamo_data['pua_fn'] + + # Build contextual substitution lookups + # Strategy: use ljmo/vjmo/tjmo features (standard Hangul OpenType features) + # + # ljmo: choseong → positional variant (depends on following jungseong) + # vjmo: jungseong → positional variant (depends on whether jongseong follows) + # tjmo: jongseong → positional variant (depends on preceding jungseong) + + lines = [] + + # --- ljmo: Choseong variant selection --- + # For each choseong, we need variants for different jungseong contexts. + # Row 1 is the default (basic vowels like ㅏ). + # We use contextual alternates: choseong' lookup X jungseong + ljmo_lookups = [] + + # Group jungseong indices by which choseong row they select + # From getHanInitialRow: the row depends on jungseong index (p) and has-final (f) + # For GSUB, we pre-compute for f=0 (no final) since we can't know yet + row_to_jung_indices = {} + for p in range(96): # all possible jungseong indices + # Without jongseong first; use i=1 to avoid giyeok edge cases + try: + row_nf = SC.get_han_initial_row(1, p, 0) + except (ValueError, KeyError): + continue + if row_nf not in row_to_jung_indices: + row_to_jung_indices[row_nf] = [] + row_to_jung_indices[row_nf].append(p) + + # For each unique choseong row, create a lookup that substitutes + # the default choseong glyph with the variant at that row + for cho_row, jung_indices in sorted(row_to_jung_indices.items()): + if cho_row == 1: + continue # row 1 is the default, no substitution needed + + lookup_name = f"ljmo_row{cho_row}" + subs = [] + + # For standard choseong (U+1100-U+115E) + for cho_cp in range(0x1100, 0x115F): + col = cho_cp - 0x1100 + variant_pua = pua_fn(col, cho_row) + if has(cho_cp) and has(variant_pua): + subs.append(f" sub {glyph_name(cho_cp)} by {glyph_name(variant_pua)};") + + if subs: + lines.append(f"lookup {lookup_name} {{") + lines.extend(subs) + lines.append(f"}} {lookup_name};") + ljmo_lookups.append((lookup_name, jung_indices)) + + # --- vjmo: Jungseong variant selection --- + # Row 15 = no jongseong following, Row 16 = jongseong follows + # We need two lookups + vjmo_subs_16 = [] # with-final variant (row 16) + for jung_cp in range(0x1161, 0x11A8): + col = jung_cp - 0x1160 + variant_pua = pua_fn(col, 16) + if has(jung_cp) and has(variant_pua): + vjmo_subs_16.append(f" sub {glyph_name(jung_cp)} by {glyph_name(variant_pua)};") + + if vjmo_subs_16: + lines.append("lookup vjmo_withfinal {") + lines.extend(vjmo_subs_16) + lines.append("} vjmo_withfinal;") + + # --- tjmo: Jongseong variant selection --- + # Row 17 = normal, Row 18 = after rightie jungseong + tjmo_subs_18 = [] + for jong_cp in range(0x11A8, 0x1200): + col = jong_cp - 0x11A8 + 1 + variant_pua = pua_fn(col, 18) + if has(jong_cp) and has(variant_pua): + tjmo_subs_18.append(f" sub {glyph_name(jong_cp)} by {glyph_name(variant_pua)};") + + if tjmo_subs_18: + lines.append("lookup tjmo_rightie {") + lines.extend(tjmo_subs_18) + lines.append("} tjmo_rightie;") + + # --- Build the actual features using contextual substitution --- + + # Jungseong class definitions for contextual rules + # Build classes of jungseong glyphs that trigger specific choseong rows + feature_lines = [] + + # ljmo feature: contextual choseong substitution + if ljmo_lookups: + feature_lines.append("feature ljmo {") + feature_lines.append(" script hang;") + for lookup_name, jung_indices in ljmo_lookups: + # Build jungseong class for this row + jung_glyphs = [] + for idx in jung_indices: + cp = 0x1160 + idx + if has(cp): + jung_glyphs.append(glyph_name(cp)) + if not jung_glyphs: + continue + class_name = f"@jung_for_{lookup_name}" + feature_lines.append(f" {class_name} = [{' '.join(jung_glyphs)}];") + + # Contextual rules: choseong' [lookup X] jungseong + # For each choseong, if followed by a jungseong in the right class, + # apply the variant lookup + for lookup_name, jung_indices in ljmo_lookups: + jung_glyphs = [] + for idx in jung_indices: + cp = 0x1160 + idx + if has(cp): + jung_glyphs.append(glyph_name(cp)) + if not jung_glyphs: + continue + class_name = f"@jung_for_{lookup_name}" + # Build choseong class + cho_glyphs = [glyph_name(cp) for cp in range(0x1100, 0x115F) if has(cp)] + if cho_glyphs: + feature_lines.append(f" @choseong = [{' '.join(cho_glyphs)}];") + feature_lines.append(f" sub @choseong' lookup {lookup_name} {class_name};") + + feature_lines.append("} ljmo;") + + # vjmo feature: jungseong gets row 16 variant when followed by jongseong + if vjmo_subs_16: + jong_glyphs = [glyph_name(cp) for cp in range(0x11A8, 0x1200) if has(cp)] + if jong_glyphs: + feature_lines.append("feature vjmo {") + feature_lines.append(" script hang;") + jung_glyphs = [glyph_name(cp) for cp in range(0x1161, 0x11A8) if has(cp)] + feature_lines.append(f" @jongseong = [{' '.join(jong_glyphs)}];") + feature_lines.append(f" @jungseong = [{' '.join(jung_glyphs)}];") + feature_lines.append(f" sub @jungseong' lookup vjmo_withfinal @jongseong;") + feature_lines.append("} vjmo;") + + # tjmo feature: jongseong gets row 18 variant when after rightie jungseong + if tjmo_subs_18: + rightie_glyphs = [] + for idx in sorted(SC.JUNGSEONG_RIGHTIE): + cp = 0x1160 + idx + if has(cp): + rightie_glyphs.append(glyph_name(cp)) + # Also check PUA variants (row 16) + pua16 = pua_fn(idx, 16) + if has(pua16): + rightie_glyphs.append(glyph_name(pua16)) + if rightie_glyphs: + feature_lines.append("feature tjmo {") + feature_lines.append(" script hang;") + feature_lines.append(f" @rightie_jung = [{' '.join(rightie_glyphs)}];") + jong_glyphs = [glyph_name(cp) for cp in range(0x11A8, 0x1200) if has(cp)] + feature_lines.append(f" @jongseong_all = [{' '.join(jong_glyphs)}];") + feature_lines.append(f" sub @rightie_jung @jongseong_all' lookup tjmo_rightie;") + feature_lines.append("} tjmo;") + + if not lines and not feature_lines: + return "" + + return '\n'.join(lines + [''] + feature_lines) + + def _generate_kern(kern_pairs, has): """Generate kern feature from pair positioning data.""" if not kern_pairs: