This commit is contained in:
minjaesong
2026-02-23 19:32:25 +09:00
parent 5e2cacd491
commit 949b6aa777
6 changed files with 476 additions and 71 deletions

View File

@@ -33,8 +33,8 @@ def main():
)
parser.add_argument(
"-o", "--output",
default="OTFbuild/TerrarumSansBitmap.ttf",
help="Output TTF file path (default: OTFbuild/TerrarumSansBitmap.ttf)"
default="OTFbuild/TerrarumSansBitmap.otf",
help="Output OTF file path (default: OTFbuild/TerrarumSansBitmap.otf)"
)
parser.add_argument(
"--no-bitmap",

View File

@@ -3,25 +3,26 @@ Orchestrate fonttools TTFont assembly.
1. Parse all sheets -> glyphs dict
2. Compose Hangul -> add to dict
3. Create glyph order and cmap
4. Trace all bitmaps -> glyf table
5. Set hmtx, hhea, OS/2, head, name, post
6. Generate and compile OpenType features via feaLib
7. Add EBDT/EBLC bitmap strike at ppem=20
8. Save TTF
3. Expand replacewith directives
4. Create glyph order and cmap
5. Trace all bitmaps -> CFF charstrings
6. Set hmtx, hhea, OS/2, head, name, post
7. Generate and compile OpenType features via feaLib
8. Add EBDT/EBLC bitmap strike at ppem=20
9. Save OTF
"""
import time
from typing import Dict
from fontTools.fontBuilder import FontBuilder
from fontTools.pens.ttGlyphPen import TTGlyphPen
from fontTools.pens.t2CharStringPen import T2CharStringPen
from fontTools.feaLib.builder import addOpenTypeFeatures
from fontTools.ttLib import TTFont
import io
from glyph_parser import ExtractedGlyph, parse_all_sheets
from hangul import compose_hangul
from glyph_parser import ExtractedGlyph, GlyphProps, parse_all_sheets
from hangul import compose_hangul, get_jamo_gsub_data, HANGUL_PUA_BASE
from bitmap_tracer import trace_bitmap, draw_glyph_to_pen, SCALE, BASELINE_ROW
from keming_machine import generate_kerning_pairs
from opentype_features import generate_features, glyph_name
@@ -30,12 +31,6 @@ import sheet_config as SC
# Codepoints that get cmap entries (user-visible)
# PUA forms used internally by GSUB get glyphs but NO cmap entries
_PUA_CMAP_RANGES = [
range(0xE000, 0xE100), # Custom symbols
range(0xF0520, 0xF0580), # Codestyle ASCII
]
def _should_have_cmap(cp):
"""Determine if a codepoint should have a cmap entry."""
# Standard Unicode characters always get cmap entries
@@ -61,9 +56,8 @@ def _should_have_cmap(cp):
# Everything in standard Unicode ranges (up to 0xFFFF plus SMP)
if cp <= 0xFFFF:
return True
# Internal PUA forms (Devanagari, Tamil, Sundanese, Bulgarian, Serbian internals)
# These are GSUB-only and should NOT have cmap entries
if 0xF0000 <= cp <= 0xF051F:
# Internal PUA forms — GSUB-only, no cmap
if 0xF0000 <= cp <= 0xF0FFF:
return False
# Internal control characters
if 0xFFE00 <= cp <= 0xFFFFF:
@@ -71,8 +65,30 @@ def _should_have_cmap(cp):
return True
def _expand_replacewith(glyphs):
"""
Find glyphs with 'replacewith' directive and generate GSUB multiple
substitution data. Returns list of (source_cp, [target_cp, ...]).
A replacewith glyph's extInfo contains up to 7 codepoints that the
glyph expands to (e.g. U+01C7 "LJ" → [0x4C, 0x4A]).
"""
replacements = []
for cp, g in glyphs.items():
if g.props.is_pragma("replacewith"):
targets = []
count = g.props.required_ext_info_count()
for i in range(count):
val = g.props.ext_info[i]
if val != 0:
targets.append(val)
if targets:
replacements.append((cp, targets))
return replacements
def build_font(assets_dir, output_path, no_bitmap=False, no_features=False):
"""Build the complete TTF font."""
"""Build the complete OTF font."""
t0 = time.time()
# Step 1: Parse all sheets
@@ -86,8 +102,13 @@ def build_font(assets_dir, output_path, no_bitmap=False, no_features=False):
glyphs.update(hangul_glyphs)
print(f" Total glyphs after Hangul: {len(glyphs)}")
# Step 3: Create glyph order and cmap
print("Step 3: Building glyph order and cmap...")
# Step 3: Expand replacewith directives
print("Step 3: Processing replacewith directives...")
replacewith_subs = _expand_replacewith(glyphs)
print(f" Found {len(replacewith_subs)} replacewith substitutions")
# Step 4: Create glyph order and cmap
print("Step 4: Building glyph order and cmap...")
glyph_order = [".notdef"]
cmap = {}
glyph_set = set()
@@ -111,34 +132,31 @@ def build_font(assets_dir, output_path, no_bitmap=False, no_features=False):
print(f" Glyph order: {len(glyph_order)} glyphs, cmap: {len(cmap)} entries")
# Step 4: Build font with fonttools
print("Step 4: Building font tables...")
fb = FontBuilder(SC.UNITS_PER_EM, isTTF=True)
# Step 5: Build font with fonttools (CFF/OTF)
print("Step 5: Building font tables...")
fb = FontBuilder(SC.UNITS_PER_EM, isTTF=False)
fb.setupGlyphOrder(glyph_order)
# Build cmap
fb.setupCharacterMap(cmap)
# Step 5: Trace bitmaps -> glyf table
print("Step 5: Tracing bitmaps to outlines...")
glyph_table = {}
# Step 6: Trace bitmaps -> CFF charstrings
print("Step 6: Tracing bitmaps to CFF outlines...")
pen = TTGlyphPen(None)
charstrings = {}
# .notdef glyph (empty box)
pen = T2CharStringPen(SC.UNITS_PER_EM // 2, None)
pen.moveTo((0, 0))
pen.lineTo((0, SC.ASCENT))
pen.lineTo((SC.UNITS_PER_EM // 2, SC.ASCENT))
pen.lineTo((SC.UNITS_PER_EM // 2, 0))
pen.closePath()
# Inner box
_m = 2 * SCALE
pen.moveTo((_m, _m))
pen.lineTo((SC.UNITS_PER_EM // 2 - _m, _m))
pen.lineTo((SC.UNITS_PER_EM // 2 - _m, SC.ASCENT - _m))
pen.lineTo((_m, SC.ASCENT - _m))
pen.closePath()
glyph_table[".notdef"] = pen.glyph()
charstrings[".notdef"] = pen.getCharString()
traced_count = 0
for cp in sorted_cps:
@@ -149,25 +167,26 @@ def build_font(assets_dir, output_path, no_bitmap=False, no_features=False):
if name == ".notdef" or name not in glyph_set:
continue
advance = g.props.width * SCALE
contours = trace_bitmap(g.bitmap, g.props.width)
pen = TTGlyphPen(None)
pen = T2CharStringPen(advance, None)
if contours:
draw_glyph_to_pen(contours, pen)
glyph_table[name] = pen.glyph()
traced_count += 1
else:
# Empty glyph (space, zero-width, etc.)
pen.moveTo((0, 0))
pen.endPath()
glyph_table[name] = pen.glyph()
charstrings[name] = pen.getCharString()
print(f" Traced {traced_count} glyphs with outlines")
fb.setupGlyf(glyph_table)
fb.setupCFF(
psName="TerrarumSansBitmap-Regular",
fontInfo={},
charStringsDict=charstrings,
privateDict={},
)
# Step 6: Set metrics
print("Step 6: Setting font metrics...")
# Step 7: Set metrics
print("Step 7: Setting font metrics...")
metrics = {}
metrics[".notdef"] = (SC.UNITS_PER_EM // 2, 0)
@@ -179,7 +198,7 @@ def build_font(assets_dir, output_path, no_bitmap=False, no_features=False):
if name == ".notdef" or name not in glyph_set:
continue
advance = g.props.width * SCALE
metrics[name] = (advance, 0) # (advance_width, lsb)
metrics[name] = (advance, 0)
fb.setupHorizontalMetrics(metrics)
fb.setupHorizontalHeader(
@@ -200,7 +219,7 @@ def build_font(assets_dir, output_path, no_bitmap=False, no_features=False):
usWinDescent=SC.DESCENT,
sxHeight=SC.X_HEIGHT,
sCapHeight=SC.CAP_HEIGHT,
fsType=0, # Installable embedding
fsType=0,
)
fb.setupPost()
@@ -208,13 +227,16 @@ def build_font(assets_dir, output_path, no_bitmap=False, no_features=False):
font = fb.font
# Step 7: Generate and compile OpenType features
# Step 8: Generate and compile OpenType features
if not no_features:
print("Step 7: Generating OpenType features...")
print("Step 8: Generating OpenType features...")
kern_pairs = generate_kerning_pairs(glyphs)
print(f" {len(kern_pairs)} kerning pairs")
fea_code = generate_features(glyphs, kern_pairs, glyph_set)
jamo_data = get_jamo_gsub_data()
fea_code = generate_features(glyphs, kern_pairs, glyph_set,
replacewith_subs=replacewith_subs,
jamo_data=jamo_data)
if fea_code.strip():
print(" Compiling features with feaLib...")
@@ -228,14 +250,14 @@ def build_font(assets_dir, output_path, no_bitmap=False, no_features=False):
else:
print(" No features to compile")
else:
print("Step 7: Skipping OpenType features (--no-features)")
print("Step 8: Skipping OpenType features (--no-features)")
# Step 8: Add bitmap strike (EBDT/EBLC)
# Step 9: Add bitmap strike (EBDT/EBLC)
if not no_bitmap:
print("Step 8: Adding bitmap strike...")
print("Step 9: Adding bitmap strike...")
_add_bitmap_strike(font, glyphs, glyph_order, glyph_set)
else:
print("Step 8: Skipping bitmap strike (--no-bitmap)")
print("Step 9: Skipping bitmap strike (--no-bitmap)")
# Save
print(f"Saving to {output_path}...")
@@ -254,7 +276,6 @@ def _add_bitmap_strike(font, glyphs, glyph_order, glyph_set):
ppem = 20
name_to_id = {name: idx for idx, name in enumerate(glyph_order)}
# Collect bitmap data — only glyphs with actual pixels
bitmap_entries = []
for name in glyph_order:
if name == ".notdef":
@@ -272,7 +293,6 @@ def _add_bitmap_strike(font, glyphs, glyph_order, glyph_set):
if w == 0 or h == 0:
continue
# Pack rows into hex
hex_rows = []
for row in bitmap:
row_bytes = bytearray()
@@ -298,12 +318,9 @@ def _add_bitmap_strike(font, glyphs, glyph_order, glyph_set):
print(" No bitmap data to embed")
return
# Split into contiguous GID runs for separate index subtables
# This avoids the empty-name problem for gaps
gid_sorted = sorted(bitmap_entries, key=lambda e: e['gid'])
gid_to_entry = {e['gid']: e for e in gid_sorted}
runs = [] # list of lists of entries
runs = []
current_run = [gid_sorted[0]]
for i in range(1, len(gid_sorted)):
if gid_sorted[i]['gid'] == gid_sorted[i-1]['gid'] + 1:
@@ -313,7 +330,6 @@ def _add_bitmap_strike(font, glyphs, glyph_order, glyph_set):
current_run = [gid_sorted[i]]
runs.append(current_run)
# Build TTX XML for EBDT
ebdt_xml = ['<EBDT>', '<header version="2.0"/>', '<strikedata index="0">']
for entry in gid_sorted:
ebdt_xml.append(f' <cbdt_bitmap_format_1 name="{entry["name"]}">')
@@ -332,7 +348,6 @@ def _add_bitmap_strike(font, glyphs, glyph_order, glyph_set):
ebdt_xml.append('</strikedata>')
ebdt_xml.append('</EBDT>')
# Build TTX XML for EBLC
all_gids = [e['gid'] for e in gid_sorted]
desc = -(SC.H - BASELINE_ROW)
@@ -371,8 +386,6 @@ def _add_bitmap_strike(font, glyphs, glyph_order, glyph_set):
' </bitmapSizeTable>',
])
# One index subtable per contiguous run — no gaps
# Use format 1 (32-bit offsets) to avoid 16-bit overflow
for run in runs:
first_gid = run[0]['gid']
last_gid = run[-1]['gid']

View File

@@ -191,8 +191,9 @@ def parse_variable_sheet(image, sheet_index, cell_w, cell_h, cols, is_xy_swapped
info |= (1 << y)
ext_info[x] = info
# Extract glyph bitmap (all pixels except tag column)
bitmap_w = cell_w - 1
# Extract glyph bitmap: only pixels within the glyph's declared width.
# The tag column and any padding beyond width must be stripped.
bitmap_w = min(width, cell_w - 1) if width > 0 else 0
bitmap = []
for row in range(cell_h):
row_data = []
@@ -206,14 +207,98 @@ def parse_variable_sheet(image, sheet_index, cell_w, cell_h, cols, is_xy_swapped
return result
def _read_hangul_cell(image, column, row, cell_w=SC.W_HANGUL_BASE, cell_h=SC.H):
"""Read a single cell from the Hangul johab sheet at (column, row)."""
cell_x = column * cell_w
cell_y = row * cell_h
bitmap = []
for r in range(cell_h):
row_data = []
for c in range(cell_w):
px = image.get_pixel(cell_x + c, cell_y + r)
row_data.append(1 if (px & 0xFF) != 0 else 0)
bitmap.append(row_data)
return bitmap
def parse_hangul_jamo_sheet(image, cell_w, cell_h):
"""
Parse the Hangul Jamo sheet with correct row/column mapping.
Layout in hangul_johab.tga:
- Choseong (U+1100-U+115E): column = choseongIndex, row = 1
- Jungseong (U+1161-U+11A7): column = jungseongIndex+1, row = 15
(column 0 is filler U+1160, stored at row 15 col 0)
- Jongseong (U+11A8-U+11FF): column = jongseongIndex, row = 17
(index starts at 1 for 11A8)
- Extended Choseong (U+A960-U+A97F): column = 96+offset, row = 1
- Extended Jungseong (U+D7B0-U+D7C6): column = 72+offset, row = 15
- Extended Jongseong (U+D7CB-U+D7FB): column = 89+offset, row = 17
Each jamo gets a default-row bitmap. Multiple variant rows exist for
syllable composition (handled separately by hangul.py / GSUB).
"""
result = {}
# U+1160 (Hangul Jungseong Filler) — column 0, row 15
bm = _read_hangul_cell(image, 0, 15, cell_w, cell_h)
result[0x1160] = ExtractedGlyph(0x1160, GlyphProps(width=cell_w), bm)
# Choseong: U+1100-U+115E → column = cp - 0x1100, row = 1
for cp in range(0x1100, 0x115F):
col = cp - 0x1100
bm = _read_hangul_cell(image, col, 1, cell_w, cell_h)
result[cp] = ExtractedGlyph(cp, GlyphProps(width=cell_w), bm)
# U+115F (Hangul Choseong Filler)
col = 0x115F - 0x1100
bm = _read_hangul_cell(image, col, 1, cell_w, cell_h)
result[0x115F] = ExtractedGlyph(0x115F, GlyphProps(width=cell_w), bm)
# Jungseong: U+1161-U+11A7 → column = (cp - 0x1160), row = 15
for cp in range(0x1161, 0x11A8):
col = cp - 0x1160
bm = _read_hangul_cell(image, col, 15, cell_w, cell_h)
result[cp] = ExtractedGlyph(cp, GlyphProps(width=cell_w), bm)
# Jongseong: U+11A8-U+11FF → column = (cp - 0x11A8 + 1), row = 17
for cp in range(0x11A8, 0x1200):
col = cp - 0x11A8 + 1
bm = _read_hangul_cell(image, col, 17, cell_w, cell_h)
result[cp] = ExtractedGlyph(cp, GlyphProps(width=cell_w), bm)
# Extended Choseong: U+A960-U+A97F → column = (cp - 0xA960 + 96), row = 1
for cp in range(0xA960, 0xA980):
col = cp - 0xA960 + 96
bm = _read_hangul_cell(image, col, 1, cell_w, cell_h)
result[cp] = ExtractedGlyph(cp, GlyphProps(width=cell_w), bm)
# Extended Jungseong: U+D7B0-U+D7C6 → column = (cp - 0xD7B0 + 72), row = 15
for cp in range(0xD7B0, 0xD7C7):
col = cp - 0xD7B0 + 72
bm = _read_hangul_cell(image, col, 15, cell_w, cell_h)
result[cp] = ExtractedGlyph(cp, GlyphProps(width=cell_w), bm)
# Extended Jongseong: U+D7CB-U+D7FB → column = (cp - 0xD7CB + 88 + 1), row = 17
for cp in range(0xD7CB, 0xD7FC):
col = cp - 0xD7CB + 88 + 1
bm = _read_hangul_cell(image, col, 17, cell_w, cell_h)
result[cp] = ExtractedGlyph(cp, GlyphProps(width=cell_w), bm)
return result
def parse_fixed_sheet(image, sheet_index, cell_w, cell_h, cols):
"""Parse a fixed-width sheet (Hangul, Unihan, Runic, Custom Sym)."""
# Hangul Jamo sheet has special layout — handled separately
if sheet_index == SC.SHEET_HANGUL:
return parse_hangul_jamo_sheet(image, cell_w, cell_h)
code_range = SC.CODE_RANGE[sheet_index]
result = {}
fixed_width = {
SC.SHEET_CUSTOM_SYM: 20,
SC.SHEET_HANGUL: SC.W_HANGUL_BASE,
SC.SHEET_RUNIC: 9,
SC.SHEET_UNIHAN: SC.W_UNIHAN,
}.get(sheet_index, cell_w)
@@ -301,7 +386,7 @@ def _add_fixed_width_overrides(result):
def get_hangul_jamo_bitmaps(assets_dir):
"""
Extract raw Hangul jamo bitmaps from the Hangul sheet for composition.
Returns a function: (index, row) -> bitmap (list of list of int)
Returns a function: (column_index, row) -> bitmap (list of list of int)
"""
filename = SC.FILE_LIST[SC.SHEET_HANGUL]
filepath = os.path.join(assets_dir, filename)
@@ -326,3 +411,41 @@ def get_hangul_jamo_bitmaps(assets_dir):
return bitmap
return get_bitmap
def extract_hangul_jamo_variants(assets_dir):
"""
Extract ALL Hangul jamo variant bitmaps from hangul_johab.tga.
Returns dict of (column, row) -> bitmap for every non-empty cell.
Used by hangul.py to store variants in PUA for GSUB assembly.
Layout:
Row 0: Hangul Compatibility Jamo (U+3130-U+318F)
Rows 1-14: Choseong variants (row depends on jungseong context)
Rows 15-16: Jungseong variants (15=no final, 16=with final)
Rows 17-18: Jongseong variants (17=normal, 18=rightie jungseong)
Rows 19-24: Additional choseong variants (giyeok remapping)
"""
filename = SC.FILE_LIST[SC.SHEET_HANGUL]
filepath = os.path.join(assets_dir, filename)
if not os.path.exists(filepath):
return {}
image = read_tga(filepath)
cell_w = SC.W_HANGUL_BASE
cell_h = SC.H
variants = {}
# Scan all rows that contain jamo data
# Rows 0-24 at minimum, checking up to image height
max_row = image.height // cell_h
max_col = image.width // cell_w
for row in range(max_row):
for col in range(max_col):
bm = _read_hangul_cell(image, col, row, cell_w, cell_h)
# Check if non-empty
if any(px for r in bm for px in r):
variants[(col, row)] = bm
return variants

View File

@@ -1,15 +1,24 @@
"""
Compose 11,172 Hangul syllables (U+AC00-U+D7A3) from jamo sprite pieces.
Also composes Hangul Compatibility Jamo (U+3130-U+318F).
Also stores all jamo variant bitmaps in PUA for GSUB-based jamo assembly.
Ported from HangulCompositor.kt and TerrarumSansBitmap.kt.
"""
from typing import Dict, List, Tuple
from glyph_parser import ExtractedGlyph, GlyphProps, get_hangul_jamo_bitmaps
from glyph_parser import (
ExtractedGlyph, GlyphProps, get_hangul_jamo_bitmaps,
extract_hangul_jamo_variants, _read_hangul_cell, _empty_bitmap,
)
import sheet_config as SC
# PUA range for Hangul jamo variant storage.
# We need space for: max_col * max_row variants.
# Using 0xF0600-0xF0FFF (2560 slots, more than enough).
HANGUL_PUA_BASE = 0xF0600
def _compose_bitmaps(a, b, w, h):
"""OR two bitmaps together."""
@@ -32,9 +41,15 @@ def _compose_bitmap_into(target, source, w, h):
target[row][col] = 1
def _pua_for_jamo_variant(col, row):
"""Get PUA codepoint for a jamo variant at (column, row) in the sheet."""
# Encode as base + row * 256 + col (supports up to 256 columns per row)
return HANGUL_PUA_BASE + row * 256 + col
def compose_hangul(assets_dir) -> Dict[int, ExtractedGlyph]:
"""
Compose all Hangul syllables and compatibility jamo.
Compose all Hangul syllables, compatibility jamo, and jamo variants.
Returns a dict of codepoint -> ExtractedGlyph.
"""
get_jamo = get_hangul_jamo_bitmaps(assets_dir)
@@ -94,5 +109,39 @@ def compose_hangul(assets_dir) -> Dict[int, ExtractedGlyph]:
props = GlyphProps(width=advance_width)
result[c] = ExtractedGlyph(c, props, composed)
print(f" Hangul composition done: {len(result)} glyphs")
print(f" Hangul syllable composition done: {len(result)} glyphs")
# Store jamo variant bitmaps in PUA for GSUB assembly
print(" Extracting jamo variants for GSUB...")
variants = extract_hangul_jamo_variants(assets_dir)
variant_count = 0
for (col, row), bm in variants.items():
pua = _pua_for_jamo_variant(col, row)
if pua not in result:
result[pua] = ExtractedGlyph(pua, GlyphProps(width=cell_w), bm)
variant_count += 1
print(f" Stored {variant_count} jamo variant glyphs in PUA (0x{HANGUL_PUA_BASE:05X}+)")
print(f" Total Hangul glyphs: {len(result)}")
return result
def get_jamo_gsub_data():
"""
Generate the data needed for Hangul jamo GSUB lookups.
Returns a dict with:
- 'cho_rows': dict mapping (i_jung, has_jong) -> row for choseong
- 'jung_rows': dict mapping has_jong -> row for jungseong
- 'jong_rows': dict mapping is_rightie -> row for jongseong
- 'pua_fn': function(col, row) -> PUA codepoint
These are the row-selection rules from the Kotlin code:
Choseong row = getHanInitialRow(i_cho, i_jung, i_jong)
Jungseong row = 15 if no final, else 16
Jongseong row = 17 if jungseong is not rightie, else 18
"""
return {
'pua_fn': _pua_for_jamo_variant,
'pua_base': HANGUL_PUA_BASE,
}

View File

@@ -28,7 +28,8 @@ def glyph_name(cp):
return f"u{cp:05X}" if cp <= 0xFFFFF else f"u{cp:06X}"
def generate_features(glyphs, kern_pairs, font_glyph_set):
def generate_features(glyphs, kern_pairs, font_glyph_set,
replacewith_subs=None, jamo_data=None):
"""
Generate complete OpenType feature code string.
@@ -36,6 +37,8 @@ def generate_features(glyphs, kern_pairs, font_glyph_set):
glyphs: dict of codepoint -> ExtractedGlyph
kern_pairs: dict of (left_cp, right_cp) -> kern_value_in_font_units
font_glyph_set: set of glyph names actually present in the font
replacewith_subs: list of (source_cp, [target_cp, ...]) for ccmp
jamo_data: dict with Hangul jamo GSUB data
Returns:
Feature code string for feaLib compilation.
"""
@@ -44,6 +47,16 @@ def generate_features(glyphs, kern_pairs, font_glyph_set):
def has(cp):
return glyph_name(cp) in font_glyph_set
# ccmp feature (replacewith directives + Hangul jamo decomposition)
ccmp_code = _generate_ccmp(replacewith_subs or [], has)
if ccmp_code:
parts.append(ccmp_code)
# Hangul jamo GSUB assembly
hangul_code = _generate_hangul_gsub(glyphs, has, jamo_data)
if hangul_code:
parts.append(hangul_code)
# kern feature
kern_code = _generate_kern(kern_pairs, has)
if kern_code:
@@ -82,6 +95,209 @@ def generate_features(glyphs, kern_pairs, font_glyph_set):
return '\n\n'.join(parts)
def _generate_ccmp(replacewith_subs, has):
"""Generate ccmp feature for replacewith directives (multiple substitution)."""
if not replacewith_subs:
return ""
subs = []
for src_cp, target_cps in replacewith_subs:
if not has(src_cp):
continue
if not all(has(t) for t in target_cps):
continue
src = glyph_name(src_cp)
targets = ' '.join(glyph_name(t) for t in target_cps)
subs.append(f" sub {src} by {targets};")
if not subs:
return ""
lines = ["feature ccmp {", " lookup ReplacewithExpansion {"]
lines.extend(subs)
lines.append(" } ReplacewithExpansion;")
lines.append("} ccmp;")
return '\n'.join(lines)
def _generate_hangul_gsub(glyphs, has, jamo_data):
"""
Generate Hangul jamo GSUB lookups for syllable assembly.
When a shaping engine encounters consecutive Hangul Jamo (Choseong +
Jungseong + optional Jongseong), these lookups substitute each jamo
with the correct positional variant from the PUA area.
The row selection logic mirrors the Kotlin code:
- Choseong row depends on which jungseong follows and whether jongseong exists
- Jungseong row is 15 (no final) or 16 (with final)
- Jongseong row is 17 (normal) or 18 (rightie jungseong)
"""
if not jamo_data:
return ""
pua_fn = jamo_data['pua_fn']
# Build contextual substitution lookups
# Strategy: use ljmo/vjmo/tjmo features (standard Hangul OpenType features)
#
# ljmo: choseong → positional variant (depends on following jungseong)
# vjmo: jungseong → positional variant (depends on whether jongseong follows)
# tjmo: jongseong → positional variant (depends on preceding jungseong)
lines = []
# --- ljmo: Choseong variant selection ---
# For each choseong, we need variants for different jungseong contexts.
# Row 1 is the default (basic vowels like ㅏ).
# We use contextual alternates: choseong' lookup X jungseong
ljmo_lookups = []
# Group jungseong indices by which choseong row they select
# From getHanInitialRow: the row depends on jungseong index (p) and has-final (f)
# For GSUB, we pre-compute for f=0 (no final) since we can't know yet
row_to_jung_indices = {}
for p in range(96): # all possible jungseong indices
# Without jongseong first; use i=1 to avoid giyeok edge cases
try:
row_nf = SC.get_han_initial_row(1, p, 0)
except (ValueError, KeyError):
continue
if row_nf not in row_to_jung_indices:
row_to_jung_indices[row_nf] = []
row_to_jung_indices[row_nf].append(p)
# For each unique choseong row, create a lookup that substitutes
# the default choseong glyph with the variant at that row
for cho_row, jung_indices in sorted(row_to_jung_indices.items()):
if cho_row == 1:
continue # row 1 is the default, no substitution needed
lookup_name = f"ljmo_row{cho_row}"
subs = []
# For standard choseong (U+1100-U+115E)
for cho_cp in range(0x1100, 0x115F):
col = cho_cp - 0x1100
variant_pua = pua_fn(col, cho_row)
if has(cho_cp) and has(variant_pua):
subs.append(f" sub {glyph_name(cho_cp)} by {glyph_name(variant_pua)};")
if subs:
lines.append(f"lookup {lookup_name} {{")
lines.extend(subs)
lines.append(f"}} {lookup_name};")
ljmo_lookups.append((lookup_name, jung_indices))
# --- vjmo: Jungseong variant selection ---
# Row 15 = no jongseong following, Row 16 = jongseong follows
# We need two lookups
vjmo_subs_16 = [] # with-final variant (row 16)
for jung_cp in range(0x1161, 0x11A8):
col = jung_cp - 0x1160
variant_pua = pua_fn(col, 16)
if has(jung_cp) and has(variant_pua):
vjmo_subs_16.append(f" sub {glyph_name(jung_cp)} by {glyph_name(variant_pua)};")
if vjmo_subs_16:
lines.append("lookup vjmo_withfinal {")
lines.extend(vjmo_subs_16)
lines.append("} vjmo_withfinal;")
# --- tjmo: Jongseong variant selection ---
# Row 17 = normal, Row 18 = after rightie jungseong
tjmo_subs_18 = []
for jong_cp in range(0x11A8, 0x1200):
col = jong_cp - 0x11A8 + 1
variant_pua = pua_fn(col, 18)
if has(jong_cp) and has(variant_pua):
tjmo_subs_18.append(f" sub {glyph_name(jong_cp)} by {glyph_name(variant_pua)};")
if tjmo_subs_18:
lines.append("lookup tjmo_rightie {")
lines.extend(tjmo_subs_18)
lines.append("} tjmo_rightie;")
# --- Build the actual features using contextual substitution ---
# Jungseong class definitions for contextual rules
# Build classes of jungseong glyphs that trigger specific choseong rows
feature_lines = []
# ljmo feature: contextual choseong substitution
if ljmo_lookups:
feature_lines.append("feature ljmo {")
feature_lines.append(" script hang;")
for lookup_name, jung_indices in ljmo_lookups:
# Build jungseong class for this row
jung_glyphs = []
for idx in jung_indices:
cp = 0x1160 + idx
if has(cp):
jung_glyphs.append(glyph_name(cp))
if not jung_glyphs:
continue
class_name = f"@jung_for_{lookup_name}"
feature_lines.append(f" {class_name} = [{' '.join(jung_glyphs)}];")
# Contextual rules: choseong' [lookup X] jungseong
# For each choseong, if followed by a jungseong in the right class,
# apply the variant lookup
for lookup_name, jung_indices in ljmo_lookups:
jung_glyphs = []
for idx in jung_indices:
cp = 0x1160 + idx
if has(cp):
jung_glyphs.append(glyph_name(cp))
if not jung_glyphs:
continue
class_name = f"@jung_for_{lookup_name}"
# Build choseong class
cho_glyphs = [glyph_name(cp) for cp in range(0x1100, 0x115F) if has(cp)]
if cho_glyphs:
feature_lines.append(f" @choseong = [{' '.join(cho_glyphs)}];")
feature_lines.append(f" sub @choseong' lookup {lookup_name} {class_name};")
feature_lines.append("} ljmo;")
# vjmo feature: jungseong gets row 16 variant when followed by jongseong
if vjmo_subs_16:
jong_glyphs = [glyph_name(cp) for cp in range(0x11A8, 0x1200) if has(cp)]
if jong_glyphs:
feature_lines.append("feature vjmo {")
feature_lines.append(" script hang;")
jung_glyphs = [glyph_name(cp) for cp in range(0x1161, 0x11A8) if has(cp)]
feature_lines.append(f" @jongseong = [{' '.join(jong_glyphs)}];")
feature_lines.append(f" @jungseong = [{' '.join(jung_glyphs)}];")
feature_lines.append(f" sub @jungseong' lookup vjmo_withfinal @jongseong;")
feature_lines.append("} vjmo;")
# tjmo feature: jongseong gets row 18 variant when after rightie jungseong
if tjmo_subs_18:
rightie_glyphs = []
for idx in sorted(SC.JUNGSEONG_RIGHTIE):
cp = 0x1160 + idx
if has(cp):
rightie_glyphs.append(glyph_name(cp))
# Also check PUA variants (row 16)
pua16 = pua_fn(idx, 16)
if has(pua16):
rightie_glyphs.append(glyph_name(pua16))
if rightie_glyphs:
feature_lines.append("feature tjmo {")
feature_lines.append(" script hang;")
feature_lines.append(f" @rightie_jung = [{' '.join(rightie_glyphs)}];")
jong_glyphs = [glyph_name(cp) for cp in range(0x11A8, 0x1200) if has(cp)]
feature_lines.append(f" @jongseong_all = [{' '.join(jong_glyphs)}];")
feature_lines.append(f" sub @rightie_jung @jongseong_all' lookup tjmo_rightie;")
feature_lines.append("} tjmo;")
if not lines and not feature_lines:
return ""
return '\n'.join(lines + [''] + feature_lines)
def _generate_kern(kern_pairs, has):
"""Generate kern feature from pair positioning data."""
if not kern_pairs: