old hangul composition

This commit is contained in:
minjaesong
2026-03-02 07:21:28 +09:00
parent 602923f5bc
commit b3acbf1c0e
4 changed files with 275 additions and 125 deletions

Binary file not shown.

View File

@@ -298,6 +298,14 @@ def build_font(assets_dir, output_path, no_bitmap=False, no_features=False):
if cp in _unihan_cps: if cp in _unihan_cps:
y_offset -= ((SC.H - SC.H_UNIHAN) // 2) * SCALE y_offset -= ((SC.H - SC.H_UNIHAN) // 2) * SCALE
# Hangul jungseong/jongseong PUA variants (rows 15-18) have zero
# advance and overlay the preceding choseong. Shift their outlines
# left by one syllable cell width so they render at the same position.
if cp >= HANGUL_PUA_BASE:
_pua_row = (cp - HANGUL_PUA_BASE) // 256
if 15 <= _pua_row <= 18:
x_offset -= SC.W_HANGUL_BASE * SCALE
contours = trace_bitmap(g.bitmap, g.props.width) contours = trace_bitmap(g.bitmap, g.props.width)
pen = T2CharStringPen(advance, None) pen = T2CharStringPen(advance, None)

View File

@@ -118,7 +118,10 @@ def compose_hangul(assets_dir) -> Dict[int, ExtractedGlyph]:
for (col, row), bm in variants.items(): for (col, row), bm in variants.items():
pua = _pua_for_jamo_variant(col, row) pua = _pua_for_jamo_variant(col, row)
if pua not in result: if pua not in result:
result[pua] = ExtractedGlyph(pua, GlyphProps(width=cell_w), bm) # Jungseong (rows 15-16) and jongseong (rows 17-18) overlay the
# choseong, so they need zero advance width.
w = 0 if 15 <= row <= 18 else cell_w
result[pua] = ExtractedGlyph(pua, GlyphProps(width=w), bm)
variant_count += 1 variant_count += 1
print(f" Stored {variant_count} jamo variant glyphs in PUA (0x{HANGUL_PUA_BASE:05X}+)") print(f" Stored {variant_count} jamo variant glyphs in PUA (0x{HANGUL_PUA_BASE:05X}+)")

View File

@@ -140,173 +140,312 @@ def _generate_hangul_gsub(glyphs, has, jamo_data):
with the correct positional variant from the PUA area. with the correct positional variant from the PUA area.
The row selection logic mirrors the Kotlin code: The row selection logic mirrors the Kotlin code:
- Choseong row depends on which jungseong follows and whether jongseong exists - Choseong row depends on which jungseong follows AND whether jongseong
exists (row increments by 1 when jongseong is present). Giyeok-class
choseong get remapped rows when combined with JUNGSEONG_UU.
- Jungseong row is 15 (no final) or 16 (with final) - Jungseong row is 15 (no final) or 16 (with final)
- Jongseong row is 17 (normal) or 18 (rightie jungseong) - Jongseong row is 17 (normal) or 18 (after rightie jungseong)
""" """
if not jamo_data: if not jamo_data:
return "" return ""
pua_fn = jamo_data['pua_fn'] pua_fn = jamo_data['pua_fn']
# Build contextual substitution lookups # Build codepoint lists (standard + extended jamo ranges)
# Strategy: use ljmo/vjmo/tjmo features (standard Hangul OpenType features) cho_ranges = list(range(0x1100, 0x115F)) + list(range(0xA960, 0xA97C))
# jung_ranges = list(range(0x1161, 0x11A8)) + list(range(0xD7B0, 0xD7C7))
# ljmo: choseong → positional variant (depends on following jungseong) jong_ranges = list(range(0x11A8, 0x1200)) + list(range(0xD7CB, 0xD7FC))
# vjmo: jungseong → positional variant (depends on whether jongseong follows)
# tjmo: jongseong → positional variant (depends on preceding jungseong) cho_cps = [cp for cp in cho_ranges if has(cp)]
jung_cps = [cp for cp in jung_ranges if has(cp)]
jong_cps = [cp for cp in jong_ranges if has(cp)]
if not cho_cps or not jung_cps:
return ""
def _jung_idx(cp):
return SC.to_hangul_jungseong_index(cp)
def _cho_col(cp):
return SC.to_hangul_choseong_index(cp)
def _jong_col(cp):
return SC.to_hangul_jongseong_index(cp)
lines = [] lines = []
# --- ljmo: Choseong variant selection --- # ----------------------------------------------------------------
# For each choseong, we need variants for different jungseong contexts. # Step 1: Compute choseong row mapping
# Row 1 is the default (basic vowels like ㅏ). # ----------------------------------------------------------------
# We use contextual alternates: choseong' lookup X jungseong # Group jungseong codepoints by the choseong row they produce.
ljmo_lookups = [] # Separate non-giyeok (general) from giyeok (remapped) mappings.
# Key: (row, has_jong) → [jung_cps]
jung_groups_general = {} # for non-giyeok choseong (i=1)
jung_groups_giyeok = {} # for giyeok choseong where row differs
# Group jungseong indices by which choseong row they select for jcp in jung_cps:
# From getHanInitialRow: the row depends on jungseong index (p) and has-final (f) idx = _jung_idx(jcp)
# For GSUB, we pre-compute for f=0 (no final) since we can't know yet if idx is None:
row_to_jung_indices = {}
for p in range(96): # all possible jungseong indices
# Without jongseong first; use i=1 to avoid giyeok edge cases
try:
row_nf = SC.get_han_initial_row(1, p, 0)
except (ValueError, KeyError):
continue continue
if row_nf not in row_to_jung_indices: for f in [0, 1]:
row_to_jung_indices[row_nf] = [] try:
row_to_jung_indices[row_nf].append(p) row_ng = SC.get_han_initial_row(1, idx, f)
except (ValueError, KeyError):
continue
jung_groups_general.setdefault((row_ng, f), []).append(jcp)
# For each unique choseong row, create a lookup that substitutes # Giyeok choseong get remapped rows for JUNGSEONG_UU
# the default choseong glyph with the variant at that row if idx in SC.JUNGSEONG_UU:
for cho_row, jung_indices in sorted(row_to_jung_indices.items()): try:
if cho_row == 1: row_g = SC.get_han_initial_row(0, idx, f)
continue # row 1 is the default, no substitution needed except (ValueError, KeyError):
continue
if row_g != row_ng:
jung_groups_giyeok.setdefault((row_g, f), []).append(jcp)
# Identify giyeok choseong codepoints
giyeok_cho_cps = []
for ccp in cho_cps:
try:
col = _cho_col(ccp)
if col in SC.CHOSEONG_GIYEOKS:
giyeok_cho_cps.append(ccp)
except ValueError:
pass
# Collect all unique choseong rows
all_cho_rows = set()
for (row, _f) in jung_groups_general:
all_cho_rows.add(row)
for (row, _f) in jung_groups_giyeok:
all_cho_rows.add(row)
# ----------------------------------------------------------------
# Step 2: Create choseong substitution lookups (one per row)
# ----------------------------------------------------------------
cho_lookup_names = {}
for cho_row in sorted(all_cho_rows):
lookup_name = f"ljmo_row{cho_row}" lookup_name = f"ljmo_row{cho_row}"
subs = [] subs = []
for ccp in cho_cps:
# For standard choseong (U+1100-U+115E) try:
for cho_cp in range(0x1100, 0x115F): col = _cho_col(ccp)
col = cho_cp - 0x1100 except ValueError:
continue
variant_pua = pua_fn(col, cho_row) variant_pua = pua_fn(col, cho_row)
if has(cho_cp) and has(variant_pua): if has(variant_pua):
subs.append(f" sub {glyph_name(cho_cp)} by {glyph_name(variant_pua)};") subs.append(f" sub {glyph_name(ccp)} by {glyph_name(variant_pua)};")
if subs: if subs:
lines.append(f"lookup {lookup_name} {{") lines.append(f"lookup {lookup_name} {{")
lines.extend(subs) lines.extend(subs)
lines.append(f"}} {lookup_name};") lines.append(f"}} {lookup_name};")
ljmo_lookups.append((lookup_name, jung_indices)) lines.append("")
cho_lookup_names[cho_row] = lookup_name
# --- vjmo: Jungseong variant selection --- # ----------------------------------------------------------------
# Row 15 = no jongseong following, Row 16 = jongseong follows # Step 3: Create jungseong substitution lookups (row 15 and 16)
# We need two lookups # ----------------------------------------------------------------
vjmo_subs_16 = [] # with-final variant (row 16) vjmo_has = {}
for jung_cp in range(0x1161, 0x11A8): for jung_row in [15, 16]:
col = jung_cp - 0x1160 lookup_name = f"vjmo_row{jung_row}"
variant_pua = pua_fn(col, 16) subs = []
if has(jung_cp) and has(variant_pua): for jcp in jung_cps:
vjmo_subs_16.append(f" sub {glyph_name(jung_cp)} by {glyph_name(variant_pua)};") idx = _jung_idx(jcp)
if idx is None:
continue
variant_pua = pua_fn(idx, jung_row)
if has(variant_pua):
subs.append(f" sub {glyph_name(jcp)} by {glyph_name(variant_pua)};")
if subs:
lines.append(f"lookup {lookup_name} {{")
lines.extend(subs)
lines.append(f"}} {lookup_name};")
lines.append("")
vjmo_has[jung_row] = True
if vjmo_subs_16: # ----------------------------------------------------------------
lines.append("lookup vjmo_withfinal {") # Step 4: Create jongseong substitution lookups (row 17 and 18)
lines.extend(vjmo_subs_16) # ----------------------------------------------------------------
lines.append("} vjmo_withfinal;") tjmo_has = {}
for jong_row in [17, 18]:
lookup_name = f"tjmo_row{jong_row}"
subs = []
for jcp in jong_cps:
col = _jong_col(jcp)
if col is None:
continue
variant_pua = pua_fn(col, jong_row)
if has(variant_pua):
subs.append(f" sub {glyph_name(jcp)} by {glyph_name(variant_pua)};")
if subs:
lines.append(f"lookup {lookup_name} {{")
lines.extend(subs)
lines.append(f"}} {lookup_name};")
lines.append("")
tjmo_has[jong_row] = True
# --- tjmo: Jongseong variant selection --- # ----------------------------------------------------------------
# Row 17 = normal, Row 18 = after rightie jungseong # Step 5: Generate ljmo feature (choseong contextual substitution)
tjmo_subs_18 = [] # ----------------------------------------------------------------
for jong_cp in range(0x11A8, 0x1200):
col = jong_cp - 0x11A8 + 1
variant_pua = pua_fn(col, 18)
if has(jong_cp) and has(variant_pua):
tjmo_subs_18.append(f" sub {glyph_name(jong_cp)} by {glyph_name(variant_pua)};")
if tjmo_subs_18:
lines.append("lookup tjmo_rightie {")
lines.extend(tjmo_subs_18)
lines.append("} tjmo_rightie;")
# --- Build the actual features using contextual substitution ---
# Jungseong class definitions for contextual rules
# Build classes of jungseong glyphs that trigger specific choseong rows
feature_lines = [] feature_lines = []
# ljmo feature: contextual choseong substitution if cho_lookup_names:
if ljmo_lookups:
feature_lines.append("feature ljmo {") feature_lines.append("feature ljmo {")
feature_lines.append(" script hang;") feature_lines.append(" script hang;")
for lookup_name, jung_indices in ljmo_lookups:
# Build jungseong class for this row
jung_glyphs = []
for idx in jung_indices:
cp = 0x1160 + idx
if has(cp):
jung_glyphs.append(glyph_name(cp))
if not jung_glyphs:
continue
class_name = f"@jung_for_{lookup_name}"
feature_lines.append(f" {class_name} = [{' '.join(jung_glyphs)}];")
# Contextual rules: choseong' [lookup X] jungseong # Define glyph classes
# For each choseong, if followed by a jungseong in the right class, cho_names = [glyph_name(c) for c in cho_cps]
# apply the variant lookup feature_lines.append(f" @cho_all = [{' '.join(cho_names)}];")
for lookup_name, jung_indices in ljmo_lookups:
jung_glyphs = [] if giyeok_cho_cps:
for idx in jung_indices: giyeok_names = [glyph_name(c) for c in giyeok_cho_cps]
cp = 0x1160 + idx feature_lines.append(f" @cho_giyeok = [{' '.join(giyeok_names)}];")
if has(cp):
jung_glyphs.append(glyph_name(cp)) if jong_cps:
if not jung_glyphs: jong_names = [glyph_name(c) for c in jong_cps]
feature_lines.append(f" @jong_all = [{' '.join(jong_names)}];")
# Define jungseong group classes (unique names)
cls_idx = [0]
def _make_jung_class(jcps, prefix):
name = f"@jung_{prefix}_{cls_idx[0]}"
cls_idx[0] += 1
feature_lines.append(f" {name} = [{' '.join(glyph_name(c) for c in jcps)}];")
return name
# Giyeok-specific rules first (most specific: giyeok cho + UU jung)
# With-jong before without-jong for each group.
giyeok_rules = []
for (row, f) in sorted(jung_groups_giyeok.keys()):
if row not in cho_lookup_names:
continue continue
class_name = f"@jung_for_{lookup_name}" jcps = jung_groups_giyeok[(row, f)]
# Build choseong class cls_name = _make_jung_class(jcps, "gk")
cho_glyphs = [glyph_name(cp) for cp in range(0x1100, 0x115F) if has(cp)] giyeok_rules.append((row, f, cls_name))
if cho_glyphs:
feature_lines.append(f" @choseong = [{' '.join(cho_glyphs)}];") # Sort: with-jong (f=1) before without-jong (f=0)
feature_lines.append(f" sub @choseong' lookup {lookup_name} {class_name};") for row, f, cls_name in sorted(giyeok_rules, key=lambda x: (-x[1], x[0])):
lookup = cho_lookup_names[row]
if f == 1 and jong_cps:
feature_lines.append(
f" sub @cho_giyeok' lookup {lookup} {cls_name} @jong_all;")
else:
feature_lines.append(
f" sub @cho_giyeok' lookup {lookup} {cls_name};")
# General rules: with-jong first, then without-jong
general_rules = []
for (row, f) in sorted(jung_groups_general.keys()):
if row not in cho_lookup_names:
continue
jcps = jung_groups_general[(row, f)]
cls_name = _make_jung_class(jcps, "ng")
general_rules.append((row, f, cls_name))
# With-jong rules
for row, f, cls_name in sorted(general_rules, key=lambda x: x[0]):
if f != 1:
continue
if not jong_cps:
continue
lookup = cho_lookup_names[row]
feature_lines.append(
f" sub @cho_all' lookup {lookup} {cls_name} @jong_all;")
# Without-jong rules (fallback)
for row, f, cls_name in sorted(general_rules, key=lambda x: x[0]):
if f != 0:
continue
lookup = cho_lookup_names[row]
feature_lines.append(
f" sub @cho_all' lookup {lookup} {cls_name};")
feature_lines.append("} ljmo;") feature_lines.append("} ljmo;")
feature_lines.append("")
# vjmo feature: jungseong gets row 16 variant when followed by jongseong # ----------------------------------------------------------------
if vjmo_subs_16: # Step 6: Generate vjmo feature (jungseong contextual substitution)
jong_glyphs = [glyph_name(cp) for cp in range(0x11A8, 0x1200) if has(cp)] # ----------------------------------------------------------------
if jong_glyphs: if 15 in vjmo_has:
feature_lines.append("feature vjmo {") feature_lines.append("feature vjmo {")
feature_lines.append(" script hang;") feature_lines.append(" script hang;")
jung_glyphs = [glyph_name(cp) for cp in range(0x1161, 0x11A8) if has(cp)]
feature_lines.append(f" @jongseong = [{' '.join(jong_glyphs)}];")
feature_lines.append(f" @jungseong = [{' '.join(jung_glyphs)}];")
feature_lines.append(f" sub @jungseong' lookup vjmo_withfinal @jongseong;")
feature_lines.append("} vjmo;")
# tjmo feature: jongseong gets row 18 variant when after rightie jungseong jung_names = [glyph_name(c) for c in jung_cps]
if tjmo_subs_18: feature_lines.append(f" @jungseong = [{' '.join(jung_names)}];")
if jong_cps and 16 in vjmo_has:
jong_names = [glyph_name(c) for c in jong_cps]
feature_lines.append(f" @jongseong = [{' '.join(jong_names)}];")
feature_lines.append(" sub @jungseong' lookup vjmo_row16 @jongseong;")
# Fallback: no jongseong following → row 15
feature_lines.append(" sub @jungseong' lookup vjmo_row15;")
feature_lines.append("} vjmo;")
feature_lines.append("")
# ----------------------------------------------------------------
# Step 7: Generate tjmo feature (jongseong contextual substitution)
# ----------------------------------------------------------------
if 17 in tjmo_has and jong_cps:
feature_lines.append("feature tjmo {")
feature_lines.append(" script hang;")
# Rightie jungseong class: original + PUA row 15/16 variants
rightie_glyphs = [] rightie_glyphs = []
for idx in sorted(SC.JUNGSEONG_RIGHTIE): for idx in sorted(SC.JUNGSEONG_RIGHTIE):
# Original Unicode jungseong
cp = 0x1160 + idx cp = 0x1160 + idx
if has(cp): if has(cp):
rightie_glyphs.append(glyph_name(cp)) rightie_glyphs.append(glyph_name(cp))
# Also check PUA variants (row 16) # PUA variants (after vjmo substitution)
pua16 = pua_fn(idx, 16) for row in [15, 16]:
if has(pua16): pua = pua_fn(idx, row)
rightie_glyphs.append(glyph_name(pua16)) if has(pua):
if rightie_glyphs: rightie_glyphs.append(glyph_name(pua))
feature_lines.append("feature tjmo {") # Extended jungseong that are rightie
feature_lines.append(" script hang;") for jcp in jung_cps:
feature_lines.append(f" @rightie_jung = [{' '.join(rightie_glyphs)}];") if jcp < 0xD7B0:
jong_glyphs = [glyph_name(cp) for cp in range(0x11A8, 0x1200) if has(cp)] continue
feature_lines.append(f" @jongseong_all = [{' '.join(jong_glyphs)}];") idx = _jung_idx(jcp)
feature_lines.append(f" sub @rightie_jung @jongseong_all' lookup tjmo_rightie;") if idx is not None and idx in SC.JUNGSEONG_RIGHTIE:
feature_lines.append("} tjmo;") rightie_glyphs.append(glyph_name(jcp))
# All jungseong variants class (original + PUA row 15/16)
all_jung_variants = []
for jcp in jung_cps:
idx = _jung_idx(jcp)
if idx is None:
continue
all_jung_variants.append(glyph_name(jcp))
for row in [15, 16]:
pua = pua_fn(idx, row)
if has(pua):
all_jung_variants.append(glyph_name(pua))
jong_names = [glyph_name(c) for c in jong_cps]
feature_lines.append(f" @jongseong_all = [{' '.join(jong_names)}];")
if rightie_glyphs and 18 in tjmo_has:
feature_lines.append(
f" @rightie_jung = [{' '.join(rightie_glyphs)}];")
feature_lines.append(
" sub @rightie_jung @jongseong_all' lookup tjmo_row18;")
if all_jung_variants:
feature_lines.append(
f" @all_jung_variants = [{' '.join(all_jung_variants)}];")
feature_lines.append(
" sub @all_jung_variants @jongseong_all' lookup tjmo_row17;")
feature_lines.append("} tjmo;")
feature_lines.append("")
if not lines and not feature_lines: if not lines and not feature_lines:
return "" return ""
return '\n'.join(lines + [''] + feature_lines) return '\n'.join(lines + feature_lines)
def _generate_kern(kern_pairs, has): def _generate_kern(kern_pairs, has):