This commit is contained in:
minjaesong
2026-02-23 19:32:25 +09:00
parent 5e2cacd491
commit 949b6aa777
6 changed files with 476 additions and 71 deletions

4
.gitignore vendored
View File

@@ -13,3 +13,7 @@ tmp_*
*.bak *.bak
*-autosave.kra *-autosave.kra
.directory .directory
*/__pycache__
OTFbuild/*.ttf
OTFbuild/*.otf

View File

@@ -33,8 +33,8 @@ def main():
) )
parser.add_argument( parser.add_argument(
"-o", "--output", "-o", "--output",
default="OTFbuild/TerrarumSansBitmap.ttf", default="OTFbuild/TerrarumSansBitmap.otf",
help="Output TTF file path (default: OTFbuild/TerrarumSansBitmap.ttf)" help="Output OTF file path (default: OTFbuild/TerrarumSansBitmap.otf)"
) )
parser.add_argument( parser.add_argument(
"--no-bitmap", "--no-bitmap",

View File

@@ -3,25 +3,26 @@ Orchestrate fonttools TTFont assembly.
1. Parse all sheets -> glyphs dict 1. Parse all sheets -> glyphs dict
2. Compose Hangul -> add to dict 2. Compose Hangul -> add to dict
3. Create glyph order and cmap 3. Expand replacewith directives
4. Trace all bitmaps -> glyf table 4. Create glyph order and cmap
5. Set hmtx, hhea, OS/2, head, name, post 5. Trace all bitmaps -> CFF charstrings
6. Generate and compile OpenType features via feaLib 6. Set hmtx, hhea, OS/2, head, name, post
7. Add EBDT/EBLC bitmap strike at ppem=20 7. Generate and compile OpenType features via feaLib
8. Save TTF 8. Add EBDT/EBLC bitmap strike at ppem=20
9. Save OTF
""" """
import time import time
from typing import Dict from typing import Dict
from fontTools.fontBuilder import FontBuilder from fontTools.fontBuilder import FontBuilder
from fontTools.pens.ttGlyphPen import TTGlyphPen from fontTools.pens.t2CharStringPen import T2CharStringPen
from fontTools.feaLib.builder import addOpenTypeFeatures from fontTools.feaLib.builder import addOpenTypeFeatures
from fontTools.ttLib import TTFont from fontTools.ttLib import TTFont
import io import io
from glyph_parser import ExtractedGlyph, parse_all_sheets from glyph_parser import ExtractedGlyph, GlyphProps, parse_all_sheets
from hangul import compose_hangul from hangul import compose_hangul, get_jamo_gsub_data, HANGUL_PUA_BASE
from bitmap_tracer import trace_bitmap, draw_glyph_to_pen, SCALE, BASELINE_ROW from bitmap_tracer import trace_bitmap, draw_glyph_to_pen, SCALE, BASELINE_ROW
from keming_machine import generate_kerning_pairs from keming_machine import generate_kerning_pairs
from opentype_features import generate_features, glyph_name from opentype_features import generate_features, glyph_name
@@ -30,12 +31,6 @@ import sheet_config as SC
# Codepoints that get cmap entries (user-visible) # Codepoints that get cmap entries (user-visible)
# PUA forms used internally by GSUB get glyphs but NO cmap entries # PUA forms used internally by GSUB get glyphs but NO cmap entries
_PUA_CMAP_RANGES = [
range(0xE000, 0xE100), # Custom symbols
range(0xF0520, 0xF0580), # Codestyle ASCII
]
def _should_have_cmap(cp): def _should_have_cmap(cp):
"""Determine if a codepoint should have a cmap entry.""" """Determine if a codepoint should have a cmap entry."""
# Standard Unicode characters always get cmap entries # Standard Unicode characters always get cmap entries
@@ -61,9 +56,8 @@ def _should_have_cmap(cp):
# Everything in standard Unicode ranges (up to 0xFFFF plus SMP) # Everything in standard Unicode ranges (up to 0xFFFF plus SMP)
if cp <= 0xFFFF: if cp <= 0xFFFF:
return True return True
# Internal PUA forms (Devanagari, Tamil, Sundanese, Bulgarian, Serbian internals) # Internal PUA forms — GSUB-only, no cmap
# These are GSUB-only and should NOT have cmap entries if 0xF0000 <= cp <= 0xF0FFF:
if 0xF0000 <= cp <= 0xF051F:
return False return False
# Internal control characters # Internal control characters
if 0xFFE00 <= cp <= 0xFFFFF: if 0xFFE00 <= cp <= 0xFFFFF:
@@ -71,8 +65,30 @@ def _should_have_cmap(cp):
return True return True
def _expand_replacewith(glyphs):
"""
Find glyphs with 'replacewith' directive and generate GSUB multiple
substitution data. Returns list of (source_cp, [target_cp, ...]).
A replacewith glyph's extInfo contains up to 7 codepoints that the
glyph expands to (e.g. U+01C7 "LJ" → [0x4C, 0x4A]).
"""
replacements = []
for cp, g in glyphs.items():
if g.props.is_pragma("replacewith"):
targets = []
count = g.props.required_ext_info_count()
for i in range(count):
val = g.props.ext_info[i]
if val != 0:
targets.append(val)
if targets:
replacements.append((cp, targets))
return replacements
def build_font(assets_dir, output_path, no_bitmap=False, no_features=False): def build_font(assets_dir, output_path, no_bitmap=False, no_features=False):
"""Build the complete TTF font.""" """Build the complete OTF font."""
t0 = time.time() t0 = time.time()
# Step 1: Parse all sheets # Step 1: Parse all sheets
@@ -86,8 +102,13 @@ def build_font(assets_dir, output_path, no_bitmap=False, no_features=False):
glyphs.update(hangul_glyphs) glyphs.update(hangul_glyphs)
print(f" Total glyphs after Hangul: {len(glyphs)}") print(f" Total glyphs after Hangul: {len(glyphs)}")
# Step 3: Create glyph order and cmap # Step 3: Expand replacewith directives
print("Step 3: Building glyph order and cmap...") print("Step 3: Processing replacewith directives...")
replacewith_subs = _expand_replacewith(glyphs)
print(f" Found {len(replacewith_subs)} replacewith substitutions")
# Step 4: Create glyph order and cmap
print("Step 4: Building glyph order and cmap...")
glyph_order = [".notdef"] glyph_order = [".notdef"]
cmap = {} cmap = {}
glyph_set = set() glyph_set = set()
@@ -111,34 +132,31 @@ def build_font(assets_dir, output_path, no_bitmap=False, no_features=False):
print(f" Glyph order: {len(glyph_order)} glyphs, cmap: {len(cmap)} entries") print(f" Glyph order: {len(glyph_order)} glyphs, cmap: {len(cmap)} entries")
# Step 4: Build font with fonttools # Step 5: Build font with fonttools (CFF/OTF)
print("Step 4: Building font tables...") print("Step 5: Building font tables...")
fb = FontBuilder(SC.UNITS_PER_EM, isTTF=True) fb = FontBuilder(SC.UNITS_PER_EM, isTTF=False)
fb.setupGlyphOrder(glyph_order) fb.setupGlyphOrder(glyph_order)
# Build cmap
fb.setupCharacterMap(cmap) fb.setupCharacterMap(cmap)
# Step 5: Trace bitmaps -> glyf table # Step 6: Trace bitmaps -> CFF charstrings
print("Step 5: Tracing bitmaps to outlines...") print("Step 6: Tracing bitmaps to CFF outlines...")
glyph_table = {}
pen = TTGlyphPen(None) charstrings = {}
# .notdef glyph (empty box) # .notdef glyph (empty box)
pen = T2CharStringPen(SC.UNITS_PER_EM // 2, None)
pen.moveTo((0, 0)) pen.moveTo((0, 0))
pen.lineTo((0, SC.ASCENT)) pen.lineTo((0, SC.ASCENT))
pen.lineTo((SC.UNITS_PER_EM // 2, SC.ASCENT)) pen.lineTo((SC.UNITS_PER_EM // 2, SC.ASCENT))
pen.lineTo((SC.UNITS_PER_EM // 2, 0)) pen.lineTo((SC.UNITS_PER_EM // 2, 0))
pen.closePath() pen.closePath()
# Inner box
_m = 2 * SCALE _m = 2 * SCALE
pen.moveTo((_m, _m)) pen.moveTo((_m, _m))
pen.lineTo((SC.UNITS_PER_EM // 2 - _m, _m)) pen.lineTo((SC.UNITS_PER_EM // 2 - _m, _m))
pen.lineTo((SC.UNITS_PER_EM // 2 - _m, SC.ASCENT - _m)) pen.lineTo((SC.UNITS_PER_EM // 2 - _m, SC.ASCENT - _m))
pen.lineTo((_m, SC.ASCENT - _m)) pen.lineTo((_m, SC.ASCENT - _m))
pen.closePath() pen.closePath()
glyph_table[".notdef"] = pen.glyph() charstrings[".notdef"] = pen.getCharString()
traced_count = 0 traced_count = 0
for cp in sorted_cps: for cp in sorted_cps:
@@ -149,25 +167,26 @@ def build_font(assets_dir, output_path, no_bitmap=False, no_features=False):
if name == ".notdef" or name not in glyph_set: if name == ".notdef" or name not in glyph_set:
continue continue
advance = g.props.width * SCALE
contours = trace_bitmap(g.bitmap, g.props.width) contours = trace_bitmap(g.bitmap, g.props.width)
pen = TTGlyphPen(None) pen = T2CharStringPen(advance, None)
if contours: if contours:
draw_glyph_to_pen(contours, pen) draw_glyph_to_pen(contours, pen)
glyph_table[name] = pen.glyph()
traced_count += 1 traced_count += 1
else: charstrings[name] = pen.getCharString()
# Empty glyph (space, zero-width, etc.)
pen.moveTo((0, 0))
pen.endPath()
glyph_table[name] = pen.glyph()
print(f" Traced {traced_count} glyphs with outlines") print(f" Traced {traced_count} glyphs with outlines")
fb.setupGlyf(glyph_table) fb.setupCFF(
psName="TerrarumSansBitmap-Regular",
fontInfo={},
charStringsDict=charstrings,
privateDict={},
)
# Step 6: Set metrics # Step 7: Set metrics
print("Step 6: Setting font metrics...") print("Step 7: Setting font metrics...")
metrics = {} metrics = {}
metrics[".notdef"] = (SC.UNITS_PER_EM // 2, 0) metrics[".notdef"] = (SC.UNITS_PER_EM // 2, 0)
@@ -179,7 +198,7 @@ def build_font(assets_dir, output_path, no_bitmap=False, no_features=False):
if name == ".notdef" or name not in glyph_set: if name == ".notdef" or name not in glyph_set:
continue continue
advance = g.props.width * SCALE advance = g.props.width * SCALE
metrics[name] = (advance, 0) # (advance_width, lsb) metrics[name] = (advance, 0)
fb.setupHorizontalMetrics(metrics) fb.setupHorizontalMetrics(metrics)
fb.setupHorizontalHeader( fb.setupHorizontalHeader(
@@ -200,7 +219,7 @@ def build_font(assets_dir, output_path, no_bitmap=False, no_features=False):
usWinDescent=SC.DESCENT, usWinDescent=SC.DESCENT,
sxHeight=SC.X_HEIGHT, sxHeight=SC.X_HEIGHT,
sCapHeight=SC.CAP_HEIGHT, sCapHeight=SC.CAP_HEIGHT,
fsType=0, # Installable embedding fsType=0,
) )
fb.setupPost() fb.setupPost()
@@ -208,13 +227,16 @@ def build_font(assets_dir, output_path, no_bitmap=False, no_features=False):
font = fb.font font = fb.font
# Step 7: Generate and compile OpenType features # Step 8: Generate and compile OpenType features
if not no_features: if not no_features:
print("Step 7: Generating OpenType features...") print("Step 8: Generating OpenType features...")
kern_pairs = generate_kerning_pairs(glyphs) kern_pairs = generate_kerning_pairs(glyphs)
print(f" {len(kern_pairs)} kerning pairs") print(f" {len(kern_pairs)} kerning pairs")
fea_code = generate_features(glyphs, kern_pairs, glyph_set) jamo_data = get_jamo_gsub_data()
fea_code = generate_features(glyphs, kern_pairs, glyph_set,
replacewith_subs=replacewith_subs,
jamo_data=jamo_data)
if fea_code.strip(): if fea_code.strip():
print(" Compiling features with feaLib...") print(" Compiling features with feaLib...")
@@ -228,14 +250,14 @@ def build_font(assets_dir, output_path, no_bitmap=False, no_features=False):
else: else:
print(" No features to compile") print(" No features to compile")
else: else:
print("Step 7: Skipping OpenType features (--no-features)") print("Step 8: Skipping OpenType features (--no-features)")
# Step 8: Add bitmap strike (EBDT/EBLC) # Step 9: Add bitmap strike (EBDT/EBLC)
if not no_bitmap: if not no_bitmap:
print("Step 8: Adding bitmap strike...") print("Step 9: Adding bitmap strike...")
_add_bitmap_strike(font, glyphs, glyph_order, glyph_set) _add_bitmap_strike(font, glyphs, glyph_order, glyph_set)
else: else:
print("Step 8: Skipping bitmap strike (--no-bitmap)") print("Step 9: Skipping bitmap strike (--no-bitmap)")
# Save # Save
print(f"Saving to {output_path}...") print(f"Saving to {output_path}...")
@@ -254,7 +276,6 @@ def _add_bitmap_strike(font, glyphs, glyph_order, glyph_set):
ppem = 20 ppem = 20
name_to_id = {name: idx for idx, name in enumerate(glyph_order)} name_to_id = {name: idx for idx, name in enumerate(glyph_order)}
# Collect bitmap data — only glyphs with actual pixels
bitmap_entries = [] bitmap_entries = []
for name in glyph_order: for name in glyph_order:
if name == ".notdef": if name == ".notdef":
@@ -272,7 +293,6 @@ def _add_bitmap_strike(font, glyphs, glyph_order, glyph_set):
if w == 0 or h == 0: if w == 0 or h == 0:
continue continue
# Pack rows into hex
hex_rows = [] hex_rows = []
for row in bitmap: for row in bitmap:
row_bytes = bytearray() row_bytes = bytearray()
@@ -298,12 +318,9 @@ def _add_bitmap_strike(font, glyphs, glyph_order, glyph_set):
print(" No bitmap data to embed") print(" No bitmap data to embed")
return return
# Split into contiguous GID runs for separate index subtables
# This avoids the empty-name problem for gaps
gid_sorted = sorted(bitmap_entries, key=lambda e: e['gid']) gid_sorted = sorted(bitmap_entries, key=lambda e: e['gid'])
gid_to_entry = {e['gid']: e for e in gid_sorted}
runs = [] # list of lists of entries runs = []
current_run = [gid_sorted[0]] current_run = [gid_sorted[0]]
for i in range(1, len(gid_sorted)): for i in range(1, len(gid_sorted)):
if gid_sorted[i]['gid'] == gid_sorted[i-1]['gid'] + 1: if gid_sorted[i]['gid'] == gid_sorted[i-1]['gid'] + 1:
@@ -313,7 +330,6 @@ def _add_bitmap_strike(font, glyphs, glyph_order, glyph_set):
current_run = [gid_sorted[i]] current_run = [gid_sorted[i]]
runs.append(current_run) runs.append(current_run)
# Build TTX XML for EBDT
ebdt_xml = ['<EBDT>', '<header version="2.0"/>', '<strikedata index="0">'] ebdt_xml = ['<EBDT>', '<header version="2.0"/>', '<strikedata index="0">']
for entry in gid_sorted: for entry in gid_sorted:
ebdt_xml.append(f' <cbdt_bitmap_format_1 name="{entry["name"]}">') ebdt_xml.append(f' <cbdt_bitmap_format_1 name="{entry["name"]}">')
@@ -332,7 +348,6 @@ def _add_bitmap_strike(font, glyphs, glyph_order, glyph_set):
ebdt_xml.append('</strikedata>') ebdt_xml.append('</strikedata>')
ebdt_xml.append('</EBDT>') ebdt_xml.append('</EBDT>')
# Build TTX XML for EBLC
all_gids = [e['gid'] for e in gid_sorted] all_gids = [e['gid'] for e in gid_sorted]
desc = -(SC.H - BASELINE_ROW) desc = -(SC.H - BASELINE_ROW)
@@ -371,8 +386,6 @@ def _add_bitmap_strike(font, glyphs, glyph_order, glyph_set):
' </bitmapSizeTable>', ' </bitmapSizeTable>',
]) ])
# One index subtable per contiguous run — no gaps
# Use format 1 (32-bit offsets) to avoid 16-bit overflow
for run in runs: for run in runs:
first_gid = run[0]['gid'] first_gid = run[0]['gid']
last_gid = run[-1]['gid'] last_gid = run[-1]['gid']

View File

@@ -191,8 +191,9 @@ def parse_variable_sheet(image, sheet_index, cell_w, cell_h, cols, is_xy_swapped
info |= (1 << y) info |= (1 << y)
ext_info[x] = info ext_info[x] = info
# Extract glyph bitmap (all pixels except tag column) # Extract glyph bitmap: only pixels within the glyph's declared width.
bitmap_w = cell_w - 1 # The tag column and any padding beyond width must be stripped.
bitmap_w = min(width, cell_w - 1) if width > 0 else 0
bitmap = [] bitmap = []
for row in range(cell_h): for row in range(cell_h):
row_data = [] row_data = []
@@ -206,14 +207,98 @@ def parse_variable_sheet(image, sheet_index, cell_w, cell_h, cols, is_xy_swapped
return result return result
def _read_hangul_cell(image, column, row, cell_w=SC.W_HANGUL_BASE, cell_h=SC.H):
"""Read a single cell from the Hangul johab sheet at (column, row)."""
cell_x = column * cell_w
cell_y = row * cell_h
bitmap = []
for r in range(cell_h):
row_data = []
for c in range(cell_w):
px = image.get_pixel(cell_x + c, cell_y + r)
row_data.append(1 if (px & 0xFF) != 0 else 0)
bitmap.append(row_data)
return bitmap
def parse_hangul_jamo_sheet(image, cell_w, cell_h):
"""
Parse the Hangul Jamo sheet with correct row/column mapping.
Layout in hangul_johab.tga:
- Choseong (U+1100-U+115E): column = choseongIndex, row = 1
- Jungseong (U+1161-U+11A7): column = jungseongIndex+1, row = 15
(column 0 is filler U+1160, stored at row 15 col 0)
- Jongseong (U+11A8-U+11FF): column = jongseongIndex, row = 17
(index starts at 1 for 11A8)
- Extended Choseong (U+A960-U+A97F): column = 96+offset, row = 1
- Extended Jungseong (U+D7B0-U+D7C6): column = 72+offset, row = 15
- Extended Jongseong (U+D7CB-U+D7FB): column = 89+offset, row = 17
Each jamo gets a default-row bitmap. Multiple variant rows exist for
syllable composition (handled separately by hangul.py / GSUB).
"""
result = {}
# U+1160 (Hangul Jungseong Filler) — column 0, row 15
bm = _read_hangul_cell(image, 0, 15, cell_w, cell_h)
result[0x1160] = ExtractedGlyph(0x1160, GlyphProps(width=cell_w), bm)
# Choseong: U+1100-U+115E → column = cp - 0x1100, row = 1
for cp in range(0x1100, 0x115F):
col = cp - 0x1100
bm = _read_hangul_cell(image, col, 1, cell_w, cell_h)
result[cp] = ExtractedGlyph(cp, GlyphProps(width=cell_w), bm)
# U+115F (Hangul Choseong Filler)
col = 0x115F - 0x1100
bm = _read_hangul_cell(image, col, 1, cell_w, cell_h)
result[0x115F] = ExtractedGlyph(0x115F, GlyphProps(width=cell_w), bm)
# Jungseong: U+1161-U+11A7 → column = (cp - 0x1160), row = 15
for cp in range(0x1161, 0x11A8):
col = cp - 0x1160
bm = _read_hangul_cell(image, col, 15, cell_w, cell_h)
result[cp] = ExtractedGlyph(cp, GlyphProps(width=cell_w), bm)
# Jongseong: U+11A8-U+11FF → column = (cp - 0x11A8 + 1), row = 17
for cp in range(0x11A8, 0x1200):
col = cp - 0x11A8 + 1
bm = _read_hangul_cell(image, col, 17, cell_w, cell_h)
result[cp] = ExtractedGlyph(cp, GlyphProps(width=cell_w), bm)
# Extended Choseong: U+A960-U+A97F → column = (cp - 0xA960 + 96), row = 1
for cp in range(0xA960, 0xA980):
col = cp - 0xA960 + 96
bm = _read_hangul_cell(image, col, 1, cell_w, cell_h)
result[cp] = ExtractedGlyph(cp, GlyphProps(width=cell_w), bm)
# Extended Jungseong: U+D7B0-U+D7C6 → column = (cp - 0xD7B0 + 72), row = 15
for cp in range(0xD7B0, 0xD7C7):
col = cp - 0xD7B0 + 72
bm = _read_hangul_cell(image, col, 15, cell_w, cell_h)
result[cp] = ExtractedGlyph(cp, GlyphProps(width=cell_w), bm)
# Extended Jongseong: U+D7CB-U+D7FB → column = (cp - 0xD7CB + 88 + 1), row = 17
for cp in range(0xD7CB, 0xD7FC):
col = cp - 0xD7CB + 88 + 1
bm = _read_hangul_cell(image, col, 17, cell_w, cell_h)
result[cp] = ExtractedGlyph(cp, GlyphProps(width=cell_w), bm)
return result
def parse_fixed_sheet(image, sheet_index, cell_w, cell_h, cols): def parse_fixed_sheet(image, sheet_index, cell_w, cell_h, cols):
"""Parse a fixed-width sheet (Hangul, Unihan, Runic, Custom Sym).""" """Parse a fixed-width sheet (Hangul, Unihan, Runic, Custom Sym)."""
# Hangul Jamo sheet has special layout — handled separately
if sheet_index == SC.SHEET_HANGUL:
return parse_hangul_jamo_sheet(image, cell_w, cell_h)
code_range = SC.CODE_RANGE[sheet_index] code_range = SC.CODE_RANGE[sheet_index]
result = {} result = {}
fixed_width = { fixed_width = {
SC.SHEET_CUSTOM_SYM: 20, SC.SHEET_CUSTOM_SYM: 20,
SC.SHEET_HANGUL: SC.W_HANGUL_BASE,
SC.SHEET_RUNIC: 9, SC.SHEET_RUNIC: 9,
SC.SHEET_UNIHAN: SC.W_UNIHAN, SC.SHEET_UNIHAN: SC.W_UNIHAN,
}.get(sheet_index, cell_w) }.get(sheet_index, cell_w)
@@ -301,7 +386,7 @@ def _add_fixed_width_overrides(result):
def get_hangul_jamo_bitmaps(assets_dir): def get_hangul_jamo_bitmaps(assets_dir):
""" """
Extract raw Hangul jamo bitmaps from the Hangul sheet for composition. Extract raw Hangul jamo bitmaps from the Hangul sheet for composition.
Returns a function: (index, row) -> bitmap (list of list of int) Returns a function: (column_index, row) -> bitmap (list of list of int)
""" """
filename = SC.FILE_LIST[SC.SHEET_HANGUL] filename = SC.FILE_LIST[SC.SHEET_HANGUL]
filepath = os.path.join(assets_dir, filename) filepath = os.path.join(assets_dir, filename)
@@ -326,3 +411,41 @@ def get_hangul_jamo_bitmaps(assets_dir):
return bitmap return bitmap
return get_bitmap return get_bitmap
def extract_hangul_jamo_variants(assets_dir):
"""
Extract ALL Hangul jamo variant bitmaps from hangul_johab.tga.
Returns dict of (column, row) -> bitmap for every non-empty cell.
Used by hangul.py to store variants in PUA for GSUB assembly.
Layout:
Row 0: Hangul Compatibility Jamo (U+3130-U+318F)
Rows 1-14: Choseong variants (row depends on jungseong context)
Rows 15-16: Jungseong variants (15=no final, 16=with final)
Rows 17-18: Jongseong variants (17=normal, 18=rightie jungseong)
Rows 19-24: Additional choseong variants (giyeok remapping)
"""
filename = SC.FILE_LIST[SC.SHEET_HANGUL]
filepath = os.path.join(assets_dir, filename)
if not os.path.exists(filepath):
return {}
image = read_tga(filepath)
cell_w = SC.W_HANGUL_BASE
cell_h = SC.H
variants = {}
# Scan all rows that contain jamo data
# Rows 0-24 at minimum, checking up to image height
max_row = image.height // cell_h
max_col = image.width // cell_w
for row in range(max_row):
for col in range(max_col):
bm = _read_hangul_cell(image, col, row, cell_w, cell_h)
# Check if non-empty
if any(px for r in bm for px in r):
variants[(col, row)] = bm
return variants

View File

@@ -1,15 +1,24 @@
""" """
Compose 11,172 Hangul syllables (U+AC00-U+D7A3) from jamo sprite pieces. Compose 11,172 Hangul syllables (U+AC00-U+D7A3) from jamo sprite pieces.
Also composes Hangul Compatibility Jamo (U+3130-U+318F). Also composes Hangul Compatibility Jamo (U+3130-U+318F).
Also stores all jamo variant bitmaps in PUA for GSUB-based jamo assembly.
Ported from HangulCompositor.kt and TerrarumSansBitmap.kt. Ported from HangulCompositor.kt and TerrarumSansBitmap.kt.
""" """
from typing import Dict, List, Tuple from typing import Dict, List, Tuple
from glyph_parser import ExtractedGlyph, GlyphProps, get_hangul_jamo_bitmaps from glyph_parser import (
ExtractedGlyph, GlyphProps, get_hangul_jamo_bitmaps,
extract_hangul_jamo_variants, _read_hangul_cell, _empty_bitmap,
)
import sheet_config as SC import sheet_config as SC
# PUA range for Hangul jamo variant storage.
# We need space for: max_col * max_row variants.
# Using 0xF0600-0xF0FFF (2560 slots, more than enough).
HANGUL_PUA_BASE = 0xF0600
def _compose_bitmaps(a, b, w, h): def _compose_bitmaps(a, b, w, h):
"""OR two bitmaps together.""" """OR two bitmaps together."""
@@ -32,9 +41,15 @@ def _compose_bitmap_into(target, source, w, h):
target[row][col] = 1 target[row][col] = 1
def _pua_for_jamo_variant(col, row):
"""Get PUA codepoint for a jamo variant at (column, row) in the sheet."""
# Encode as base + row * 256 + col (supports up to 256 columns per row)
return HANGUL_PUA_BASE + row * 256 + col
def compose_hangul(assets_dir) -> Dict[int, ExtractedGlyph]: def compose_hangul(assets_dir) -> Dict[int, ExtractedGlyph]:
""" """
Compose all Hangul syllables and compatibility jamo. Compose all Hangul syllables, compatibility jamo, and jamo variants.
Returns a dict of codepoint -> ExtractedGlyph. Returns a dict of codepoint -> ExtractedGlyph.
""" """
get_jamo = get_hangul_jamo_bitmaps(assets_dir) get_jamo = get_hangul_jamo_bitmaps(assets_dir)
@@ -94,5 +109,39 @@ def compose_hangul(assets_dir) -> Dict[int, ExtractedGlyph]:
props = GlyphProps(width=advance_width) props = GlyphProps(width=advance_width)
result[c] = ExtractedGlyph(c, props, composed) result[c] = ExtractedGlyph(c, props, composed)
print(f" Hangul composition done: {len(result)} glyphs") print(f" Hangul syllable composition done: {len(result)} glyphs")
# Store jamo variant bitmaps in PUA for GSUB assembly
print(" Extracting jamo variants for GSUB...")
variants = extract_hangul_jamo_variants(assets_dir)
variant_count = 0
for (col, row), bm in variants.items():
pua = _pua_for_jamo_variant(col, row)
if pua not in result:
result[pua] = ExtractedGlyph(pua, GlyphProps(width=cell_w), bm)
variant_count += 1
print(f" Stored {variant_count} jamo variant glyphs in PUA (0x{HANGUL_PUA_BASE:05X}+)")
print(f" Total Hangul glyphs: {len(result)}")
return result return result
def get_jamo_gsub_data():
"""
Generate the data needed for Hangul jamo GSUB lookups.
Returns a dict with:
- 'cho_rows': dict mapping (i_jung, has_jong) -> row for choseong
- 'jung_rows': dict mapping has_jong -> row for jungseong
- 'jong_rows': dict mapping is_rightie -> row for jongseong
- 'pua_fn': function(col, row) -> PUA codepoint
These are the row-selection rules from the Kotlin code:
Choseong row = getHanInitialRow(i_cho, i_jung, i_jong)
Jungseong row = 15 if no final, else 16
Jongseong row = 17 if jungseong is not rightie, else 18
"""
return {
'pua_fn': _pua_for_jamo_variant,
'pua_base': HANGUL_PUA_BASE,
}

View File

@@ -28,7 +28,8 @@ def glyph_name(cp):
return f"u{cp:05X}" if cp <= 0xFFFFF else f"u{cp:06X}" return f"u{cp:05X}" if cp <= 0xFFFFF else f"u{cp:06X}"
def generate_features(glyphs, kern_pairs, font_glyph_set): def generate_features(glyphs, kern_pairs, font_glyph_set,
replacewith_subs=None, jamo_data=None):
""" """
Generate complete OpenType feature code string. Generate complete OpenType feature code string.
@@ -36,6 +37,8 @@ def generate_features(glyphs, kern_pairs, font_glyph_set):
glyphs: dict of codepoint -> ExtractedGlyph glyphs: dict of codepoint -> ExtractedGlyph
kern_pairs: dict of (left_cp, right_cp) -> kern_value_in_font_units kern_pairs: dict of (left_cp, right_cp) -> kern_value_in_font_units
font_glyph_set: set of glyph names actually present in the font font_glyph_set: set of glyph names actually present in the font
replacewith_subs: list of (source_cp, [target_cp, ...]) for ccmp
jamo_data: dict with Hangul jamo GSUB data
Returns: Returns:
Feature code string for feaLib compilation. Feature code string for feaLib compilation.
""" """
@@ -44,6 +47,16 @@ def generate_features(glyphs, kern_pairs, font_glyph_set):
def has(cp): def has(cp):
return glyph_name(cp) in font_glyph_set return glyph_name(cp) in font_glyph_set
# ccmp feature (replacewith directives + Hangul jamo decomposition)
ccmp_code = _generate_ccmp(replacewith_subs or [], has)
if ccmp_code:
parts.append(ccmp_code)
# Hangul jamo GSUB assembly
hangul_code = _generate_hangul_gsub(glyphs, has, jamo_data)
if hangul_code:
parts.append(hangul_code)
# kern feature # kern feature
kern_code = _generate_kern(kern_pairs, has) kern_code = _generate_kern(kern_pairs, has)
if kern_code: if kern_code:
@@ -82,6 +95,209 @@ def generate_features(glyphs, kern_pairs, font_glyph_set):
return '\n\n'.join(parts) return '\n\n'.join(parts)
def _generate_ccmp(replacewith_subs, has):
"""Generate ccmp feature for replacewith directives (multiple substitution)."""
if not replacewith_subs:
return ""
subs = []
for src_cp, target_cps in replacewith_subs:
if not has(src_cp):
continue
if not all(has(t) for t in target_cps):
continue
src = glyph_name(src_cp)
targets = ' '.join(glyph_name(t) for t in target_cps)
subs.append(f" sub {src} by {targets};")
if not subs:
return ""
lines = ["feature ccmp {", " lookup ReplacewithExpansion {"]
lines.extend(subs)
lines.append(" } ReplacewithExpansion;")
lines.append("} ccmp;")
return '\n'.join(lines)
def _generate_hangul_gsub(glyphs, has, jamo_data):
"""
Generate Hangul jamo GSUB lookups for syllable assembly.
When a shaping engine encounters consecutive Hangul Jamo (Choseong +
Jungseong + optional Jongseong), these lookups substitute each jamo
with the correct positional variant from the PUA area.
The row selection logic mirrors the Kotlin code:
- Choseong row depends on which jungseong follows and whether jongseong exists
- Jungseong row is 15 (no final) or 16 (with final)
- Jongseong row is 17 (normal) or 18 (rightie jungseong)
"""
if not jamo_data:
return ""
pua_fn = jamo_data['pua_fn']
# Build contextual substitution lookups
# Strategy: use ljmo/vjmo/tjmo features (standard Hangul OpenType features)
#
# ljmo: choseong → positional variant (depends on following jungseong)
# vjmo: jungseong → positional variant (depends on whether jongseong follows)
# tjmo: jongseong → positional variant (depends on preceding jungseong)
lines = []
# --- ljmo: Choseong variant selection ---
# For each choseong, we need variants for different jungseong contexts.
# Row 1 is the default (basic vowels like ㅏ).
# We use contextual alternates: choseong' lookup X jungseong
ljmo_lookups = []
# Group jungseong indices by which choseong row they select
# From getHanInitialRow: the row depends on jungseong index (p) and has-final (f)
# For GSUB, we pre-compute for f=0 (no final) since we can't know yet
row_to_jung_indices = {}
for p in range(96): # all possible jungseong indices
# Without jongseong first; use i=1 to avoid giyeok edge cases
try:
row_nf = SC.get_han_initial_row(1, p, 0)
except (ValueError, KeyError):
continue
if row_nf not in row_to_jung_indices:
row_to_jung_indices[row_nf] = []
row_to_jung_indices[row_nf].append(p)
# For each unique choseong row, create a lookup that substitutes
# the default choseong glyph with the variant at that row
for cho_row, jung_indices in sorted(row_to_jung_indices.items()):
if cho_row == 1:
continue # row 1 is the default, no substitution needed
lookup_name = f"ljmo_row{cho_row}"
subs = []
# For standard choseong (U+1100-U+115E)
for cho_cp in range(0x1100, 0x115F):
col = cho_cp - 0x1100
variant_pua = pua_fn(col, cho_row)
if has(cho_cp) and has(variant_pua):
subs.append(f" sub {glyph_name(cho_cp)} by {glyph_name(variant_pua)};")
if subs:
lines.append(f"lookup {lookup_name} {{")
lines.extend(subs)
lines.append(f"}} {lookup_name};")
ljmo_lookups.append((lookup_name, jung_indices))
# --- vjmo: Jungseong variant selection ---
# Row 15 = no jongseong following, Row 16 = jongseong follows
# We need two lookups
vjmo_subs_16 = [] # with-final variant (row 16)
for jung_cp in range(0x1161, 0x11A8):
col = jung_cp - 0x1160
variant_pua = pua_fn(col, 16)
if has(jung_cp) and has(variant_pua):
vjmo_subs_16.append(f" sub {glyph_name(jung_cp)} by {glyph_name(variant_pua)};")
if vjmo_subs_16:
lines.append("lookup vjmo_withfinal {")
lines.extend(vjmo_subs_16)
lines.append("} vjmo_withfinal;")
# --- tjmo: Jongseong variant selection ---
# Row 17 = normal, Row 18 = after rightie jungseong
tjmo_subs_18 = []
for jong_cp in range(0x11A8, 0x1200):
col = jong_cp - 0x11A8 + 1
variant_pua = pua_fn(col, 18)
if has(jong_cp) and has(variant_pua):
tjmo_subs_18.append(f" sub {glyph_name(jong_cp)} by {glyph_name(variant_pua)};")
if tjmo_subs_18:
lines.append("lookup tjmo_rightie {")
lines.extend(tjmo_subs_18)
lines.append("} tjmo_rightie;")
# --- Build the actual features using contextual substitution ---
# Jungseong class definitions for contextual rules
# Build classes of jungseong glyphs that trigger specific choseong rows
feature_lines = []
# ljmo feature: contextual choseong substitution
if ljmo_lookups:
feature_lines.append("feature ljmo {")
feature_lines.append(" script hang;")
for lookup_name, jung_indices in ljmo_lookups:
# Build jungseong class for this row
jung_glyphs = []
for idx in jung_indices:
cp = 0x1160 + idx
if has(cp):
jung_glyphs.append(glyph_name(cp))
if not jung_glyphs:
continue
class_name = f"@jung_for_{lookup_name}"
feature_lines.append(f" {class_name} = [{' '.join(jung_glyphs)}];")
# Contextual rules: choseong' [lookup X] jungseong
# For each choseong, if followed by a jungseong in the right class,
# apply the variant lookup
for lookup_name, jung_indices in ljmo_lookups:
jung_glyphs = []
for idx in jung_indices:
cp = 0x1160 + idx
if has(cp):
jung_glyphs.append(glyph_name(cp))
if not jung_glyphs:
continue
class_name = f"@jung_for_{lookup_name}"
# Build choseong class
cho_glyphs = [glyph_name(cp) for cp in range(0x1100, 0x115F) if has(cp)]
if cho_glyphs:
feature_lines.append(f" @choseong = [{' '.join(cho_glyphs)}];")
feature_lines.append(f" sub @choseong' lookup {lookup_name} {class_name};")
feature_lines.append("} ljmo;")
# vjmo feature: jungseong gets row 16 variant when followed by jongseong
if vjmo_subs_16:
jong_glyphs = [glyph_name(cp) for cp in range(0x11A8, 0x1200) if has(cp)]
if jong_glyphs:
feature_lines.append("feature vjmo {")
feature_lines.append(" script hang;")
jung_glyphs = [glyph_name(cp) for cp in range(0x1161, 0x11A8) if has(cp)]
feature_lines.append(f" @jongseong = [{' '.join(jong_glyphs)}];")
feature_lines.append(f" @jungseong = [{' '.join(jung_glyphs)}];")
feature_lines.append(f" sub @jungseong' lookup vjmo_withfinal @jongseong;")
feature_lines.append("} vjmo;")
# tjmo feature: jongseong gets row 18 variant when after rightie jungseong
if tjmo_subs_18:
rightie_glyphs = []
for idx in sorted(SC.JUNGSEONG_RIGHTIE):
cp = 0x1160 + idx
if has(cp):
rightie_glyphs.append(glyph_name(cp))
# Also check PUA variants (row 16)
pua16 = pua_fn(idx, 16)
if has(pua16):
rightie_glyphs.append(glyph_name(pua16))
if rightie_glyphs:
feature_lines.append("feature tjmo {")
feature_lines.append(" script hang;")
feature_lines.append(f" @rightie_jung = [{' '.join(rightie_glyphs)}];")
jong_glyphs = [glyph_name(cp) for cp in range(0x11A8, 0x1200) if has(cp)]
feature_lines.append(f" @jongseong_all = [{' '.join(jong_glyphs)}];")
feature_lines.append(f" sub @rightie_jung @jongseong_all' lookup tjmo_rightie;")
feature_lines.append("} tjmo;")
if not lines and not feature_lines:
return ""
return '\n'.join(lines + [''] + feature_lines)
def _generate_kern(kern_pairs, has): def _generate_kern(kern_pairs, has):
"""Generate kern feature from pair positioning data.""" """Generate kern feature from pair positioning data."""
if not kern_pairs: if not kern_pairs: