diff --git a/OTFbuild/OTFbuild.iml b/OTFbuild/OTFbuild.iml deleted file mode 100644 index 1ad6bc2..0000000 --- a/OTFbuild/OTFbuild.iml +++ /dev/null @@ -1,26 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/OTFbuild/bitmap_tracer.py b/OTFbuild/bitmap_tracer.py new file mode 100644 index 0000000..346a3bc --- /dev/null +++ b/OTFbuild/bitmap_tracer.py @@ -0,0 +1,91 @@ +""" +Convert 1-bit bitmap arrays to TrueType quadratic outlines. + +Each set pixel becomes part of a rectangle contour drawn clockwise. +Adjacent identical horizontal runs are merged vertically into rectangles. + +Scale: x_left = col * SCALE, y_top = (BASELINE_ROW - row) * SCALE +where BASELINE_ROW = 16 (ascent in pixels). +""" + +from typing import Dict, List, Tuple + +import sheet_config as SC + +SCALE = SC.SCALE +BASELINE_ROW = 16 # pixels from top to baseline + + +def trace_bitmap(bitmap, glyph_width_px): + """ + Convert a bitmap to a list of rectangle contours. + + Each rectangle is ((x0, y0), (x1, y1)) in font units, where: + - (x0, y0) is bottom-left + - (x1, y1) is top-right + + Returns list of (x0, y0, x1, y1) tuples representing rectangles. + """ + if not bitmap or not bitmap[0]: + return [] + + h = len(bitmap) + w = len(bitmap[0]) + + # Step 1: Find horizontal runs per row + runs = [] # list of (row, col_start, col_end) + for row in range(h): + col = 0 + while col < w: + if bitmap[row][col]: + start = col + while col < w and bitmap[row][col]: + col += 1 + runs.append((row, start, col)) + else: + col += 1 + + # Step 2: Merge vertically adjacent identical runs into rectangles + rects = [] # (row_start, row_end, col_start, col_end) + used = [False] * len(runs) + + for i, (row, cs, ce) in enumerate(runs): + if used[i]: + continue + # Try to extend this run downward + row_end = row + 1 + j = i + 1 + while j < len(runs): + r2, cs2, ce2 = runs[j] + if r2 > row_end: + break + if r2 == row_end and cs2 == cs and ce2 == ce and not used[j]: + used[j] = True + row_end = r2 + 1 + j += 1 + rects.append((row, row_end, cs, ce)) + + # Step 3: Convert to font coordinates + contours = [] + for row_start, row_end, col_start, col_end in rects: + x0 = col_start * SCALE + x1 = col_end * SCALE + y_top = (BASELINE_ROW - row_start) * SCALE + y_bottom = (BASELINE_ROW - row_end) * SCALE + contours.append((x0, y_bottom, x1, y_top)) + + return contours + + +def draw_glyph_to_pen(contours, pen): + """ + Draw rectangle contours to a TTGlyphPen or similar pen. + Each rectangle is drawn as a clockwise closed contour (4 on-curve points). + """ + for x0, y0, x1, y1 in contours: + # Clockwise: bottom-left -> top-left -> top-right -> bottom-right + pen.moveTo((x0, y0)) + pen.lineTo((x0, y1)) + pen.lineTo((x1, y1)) + pen.lineTo((x1, y0)) + pen.closePath() diff --git a/OTFbuild/build_font.py b/OTFbuild/build_font.py new file mode 100644 index 0000000..fdff6fb --- /dev/null +++ b/OTFbuild/build_font.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python3 +""" +Terrarum Sans Bitmap OTF Builder v2 — Python + fonttools + +Builds a TTF font with both vector-traced outlines (TrueType glyf) +and embedded bitmap strike (EBDT/EBLC) from TGA sprite sheets. + +Usage: + python3 OTFbuild/build_font.py src/assets -o OTFbuild/TerrarumSansBitmap.ttf + +Options: + --no-bitmap Skip EBDT/EBLC bitmap strike + --no-features Skip GSUB/GPOS OpenType features +""" + +import argparse +import sys +import os + +# Add OTFbuild dir to path for imports +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + +from font_builder import build_font + + +def main(): + parser = argparse.ArgumentParser( + description="Build Terrarum Sans Bitmap TTF from TGA sprite sheets" + ) + parser.add_argument( + "assets_dir", + help="Path to assets directory containing TGA sprite sheets" + ) + parser.add_argument( + "-o", "--output", + default="OTFbuild/TerrarumSansBitmap.ttf", + help="Output TTF file path (default: OTFbuild/TerrarumSansBitmap.ttf)" + ) + parser.add_argument( + "--no-bitmap", + action="store_true", + help="Skip EBDT/EBLC bitmap strike" + ) + parser.add_argument( + "--no-features", + action="store_true", + help="Skip GSUB/GPOS OpenType features" + ) + + args = parser.parse_args() + + if not os.path.isdir(args.assets_dir): + print(f"Error: assets directory not found: {args.assets_dir}", file=sys.stderr) + sys.exit(1) + + # Ensure output directory exists + output_dir = os.path.dirname(args.output) + if output_dir: + os.makedirs(output_dir, exist_ok=True) + + print(f"Terrarum Sans Bitmap OTF Builder v2") + print(f" Assets: {args.assets_dir}") + print(f" Output: {args.output}") + print() + + build_font( + assets_dir=args.assets_dir, + output_path=args.output, + no_bitmap=args.no_bitmap, + no_features=args.no_features, + ) + + +if __name__ == "__main__": + main() diff --git a/OTFbuild/build_otf.sh b/OTFbuild/build_otf.sh deleted file mode 100755 index de63ff3..0000000 --- a/OTFbuild/build_otf.sh +++ /dev/null @@ -1,80 +0,0 @@ -#!/bin/bash -set -e - -SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" -PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" -ASSETS_DIR="$PROJECT_DIR/src/assets" -OUTPUT_DIR="$SCRIPT_DIR" -BITSNPICAS_JAR="$SCRIPT_DIR/bitsnpicas_runtime/BitsNPicas.jar" - -# Output paths -KBITX_OUTPUT="$OUTPUT_DIR/TerrarumSansBitmap.kbitx" -TTF_OUTPUT="$OUTPUT_DIR/TerrarumSansBitmap.ttf" - -echo "=== Terrarum Sans Bitmap OTF Build Pipeline ===" -echo "Project: $PROJECT_DIR" -echo "Assets: $ASSETS_DIR" -echo "" - -# Step 1: Compile the builder -echo "--- Step 1: Compiling OTFbuild module ---" -COMPILE_CLASSPATH="$BITSNPICAS_JAR" -SRC_DIR="$SCRIPT_DIR/src" -OUT_DIR="$SCRIPT_DIR/out" - -mkdir -p "$OUT_DIR" - -# Find all Kotlin source files -SRC_FILES=$(find "$SRC_DIR" -name "*.kt" | tr '\n' ' ') - -# Try to find Kotlin compiler -if command -v kotlinc &> /dev/null; then - KOTLINC="kotlinc" - KOTLIN_STDLIB="" -else - # Try IntelliJ's bundled Kotlin - IDEA_CACHE="$HOME/.cache/JetBrains" - KOTLIN_DIST=$(find "$IDEA_CACHE" -path "*/kotlin-dist-for-ide/*/lib/kotlin-compiler.jar" 2>/dev/null | sort -V | tail -1) - if [ -n "$KOTLIN_DIST" ]; then - KOTLIN_LIB="$(dirname "$KOTLIN_DIST")" - KOTLINC_CP="$KOTLIN_LIB/kotlin-compiler.jar:$KOTLIN_LIB/kotlin-stdlib.jar:$KOTLIN_LIB/trove4j.jar:$KOTLIN_LIB/kotlin-reflect.jar:$KOTLIN_LIB/kotlin-script-runtime.jar:$KOTLIN_LIB/kotlin-daemon.jar:$KOTLIN_LIB/annotations-13.0.jar" - KOTLIN_STDLIB="$KOTLIN_LIB/kotlin-stdlib.jar:$KOTLIN_LIB/kotlin-stdlib-jdk7.jar:$KOTLIN_LIB/kotlin-stdlib-jdk8.jar" - echo "Using IntelliJ's Kotlin from: $KOTLIN_LIB" - else - echo "ERROR: kotlinc not found. Please install Kotlin compiler or build via IntelliJ IDEA." - echo "" - echo "Alternative: Build the OTFbuild module in IntelliJ IDEA, then run:" - echo " java -cp \"$OUT_DIR:$COMPILE_CLASSPATH\" net.torvald.otfbuild.MainKt \"$ASSETS_DIR\" \"$KBITX_OUTPUT\"" - exit 1 - fi -fi - -if [ -n "$KOTLIN_STDLIB" ]; then - # Use IntelliJ's bundled Kotlin via java - java -cp "$KOTLINC_CP" org.jetbrains.kotlin.cli.jvm.K2JVMCompiler \ - -cp "$COMPILE_CLASSPATH:$KOTLIN_STDLIB" -d "$OUT_DIR" $SRC_FILES -else - kotlinc -cp "$COMPILE_CLASSPATH" -d "$OUT_DIR" $SRC_FILES - KOTLIN_STDLIB="" -fi - -# Step 2: Run the builder to generate KBITX -echo "" -echo "--- Step 2: Generating KBITX ---" -RUNTIME_CP="$OUT_DIR:$COMPILE_CLASSPATH" -if [ -n "$KOTLIN_STDLIB" ]; then - RUNTIME_CP="$RUNTIME_CP:$KOTLIN_STDLIB" -fi -java -cp "$RUNTIME_CP" net.torvald.otfbuild.MainKt "$ASSETS_DIR" "$KBITX_OUTPUT" - -# Step 3: Convert KBITX to TTF via BitsNPicas -echo "" -echo "--- Step 3: Converting KBITX to TTF ---" -java -jar "$BITSNPICAS_JAR" convertbitmap \ - -f ttf -o "$TTF_OUTPUT" \ - "$KBITX_OUTPUT" - -echo "" -echo "=== Build complete ===" -echo " KBITX: $KBITX_OUTPUT" -echo " TTF: $TTF_OUTPUT" diff --git a/OTFbuild/font_builder.py b/OTFbuild/font_builder.py new file mode 100644 index 0000000..dc2836e --- /dev/null +++ b/OTFbuild/font_builder.py @@ -0,0 +1,422 @@ +""" +Orchestrate fonttools TTFont assembly. + +1. Parse all sheets -> glyphs dict +2. Compose Hangul -> add to dict +3. Create glyph order and cmap +4. Trace all bitmaps -> glyf table +5. Set hmtx, hhea, OS/2, head, name, post +6. Generate and compile OpenType features via feaLib +7. Add EBDT/EBLC bitmap strike at ppem=20 +8. Save TTF +""" + +import time +from typing import Dict + +from fontTools.fontBuilder import FontBuilder +from fontTools.pens.ttGlyphPen import TTGlyphPen +from fontTools.feaLib.builder import addOpenTypeFeatures +from fontTools.ttLib import TTFont +import io + +from glyph_parser import ExtractedGlyph, parse_all_sheets +from hangul import compose_hangul +from bitmap_tracer import trace_bitmap, draw_glyph_to_pen, SCALE, BASELINE_ROW +from keming_machine import generate_kerning_pairs +from opentype_features import generate_features, glyph_name +import sheet_config as SC + + +# Codepoints that get cmap entries (user-visible) +# PUA forms used internally by GSUB get glyphs but NO cmap entries +_PUA_CMAP_RANGES = [ + range(0xE000, 0xE100), # Custom symbols + range(0xF0520, 0xF0580), # Codestyle ASCII +] + + +def _should_have_cmap(cp): + """Determine if a codepoint should have a cmap entry.""" + # Standard Unicode characters always get cmap entries + if cp < 0xE000: + return True + # Custom sym PUA range + if 0xE000 <= cp <= 0xE0FF: + return True + # Codestyle PUA + if 0xF0520 <= cp <= 0xF057F: + return True + # Hangul syllables + if 0xAC00 <= cp <= 0xD7A3: + return True + # Hangul compat jamo + if 0x3130 <= cp <= 0x318F: + return True + # SMP characters (Enclosed Alphanumeric Supplement, Hentaigana, etc.) + if 0x1F100 <= cp <= 0x1F1FF: + return True + if 0x1B000 <= cp <= 0x1B16F: + return True + # Everything in standard Unicode ranges (up to 0xFFFF plus SMP) + if cp <= 0xFFFF: + return True + # Internal PUA forms (Devanagari, Tamil, Sundanese, Bulgarian, Serbian internals) + # These are GSUB-only and should NOT have cmap entries + if 0xF0000 <= cp <= 0xF051F: + return False + # Internal control characters + if 0xFFE00 <= cp <= 0xFFFFF: + return False + return True + + +def build_font(assets_dir, output_path, no_bitmap=False, no_features=False): + """Build the complete TTF font.""" + t0 = time.time() + + # Step 1: Parse all sheets + print("Step 1: Parsing glyph sheets...") + glyphs = parse_all_sheets(assets_dir) + print(f" Parsed {len(glyphs)} glyphs from sheets") + + # Step 2: Compose Hangul + print("Step 2: Composing Hangul syllables...") + hangul_glyphs = compose_hangul(assets_dir) + glyphs.update(hangul_glyphs) + print(f" Total glyphs after Hangul: {len(glyphs)}") + + # Step 3: Create glyph order and cmap + print("Step 3: Building glyph order and cmap...") + glyph_order = [".notdef"] + cmap = {} + glyph_set = set() + + # Sort codepoints for deterministic output + sorted_cps = sorted(glyphs.keys()) + + for cp in sorted_cps: + g = glyphs[cp] + if g.props.is_illegal: + continue + name = glyph_name(cp) + if name == ".notdef": + continue + if name in glyph_set: + continue + glyph_order.append(name) + glyph_set.add(name) + if _should_have_cmap(cp): + cmap[cp] = name + + print(f" Glyph order: {len(glyph_order)} glyphs, cmap: {len(cmap)} entries") + + # Step 4: Build font with fonttools + print("Step 4: Building font tables...") + fb = FontBuilder(SC.UNITS_PER_EM, isTTF=True) + fb.setupGlyphOrder(glyph_order) + + # Build cmap + fb.setupCharacterMap(cmap) + + # Step 5: Trace bitmaps -> glyf table + print("Step 5: Tracing bitmaps to outlines...") + glyph_table = {} + + pen = TTGlyphPen(None) + + # .notdef glyph (empty box) + pen.moveTo((0, 0)) + pen.lineTo((0, SC.ASCENT)) + pen.lineTo((SC.UNITS_PER_EM // 2, SC.ASCENT)) + pen.lineTo((SC.UNITS_PER_EM // 2, 0)) + pen.closePath() + # Inner box + _m = 2 * SCALE + pen.moveTo((_m, _m)) + pen.lineTo((SC.UNITS_PER_EM // 2 - _m, _m)) + pen.lineTo((SC.UNITS_PER_EM // 2 - _m, SC.ASCENT - _m)) + pen.lineTo((_m, SC.ASCENT - _m)) + pen.closePath() + glyph_table[".notdef"] = pen.glyph() + + traced_count = 0 + for cp in sorted_cps: + g = glyphs[cp] + if g.props.is_illegal: + continue + name = glyph_name(cp) + if name == ".notdef" or name not in glyph_set: + continue + + contours = trace_bitmap(g.bitmap, g.props.width) + + pen = TTGlyphPen(None) + if contours: + draw_glyph_to_pen(contours, pen) + glyph_table[name] = pen.glyph() + traced_count += 1 + else: + # Empty glyph (space, zero-width, etc.) + pen.moveTo((0, 0)) + pen.endPath() + glyph_table[name] = pen.glyph() + + print(f" Traced {traced_count} glyphs with outlines") + + fb.setupGlyf(glyph_table) + + # Step 6: Set metrics + print("Step 6: Setting font metrics...") + metrics = {} + metrics[".notdef"] = (SC.UNITS_PER_EM // 2, 0) + + for cp in sorted_cps: + g = glyphs[cp] + if g.props.is_illegal: + continue + name = glyph_name(cp) + if name == ".notdef" or name not in glyph_set: + continue + advance = g.props.width * SCALE + metrics[name] = (advance, 0) # (advance_width, lsb) + + fb.setupHorizontalMetrics(metrics) + fb.setupHorizontalHeader( + ascent=SC.ASCENT, + descent=-SC.DESCENT + ) + + fb.setupNameTable({ + "familyName": "Terrarum Sans Bitmap", + "styleName": "Regular", + }) + + fb.setupOS2( + sTypoAscender=SC.ASCENT, + sTypoDescender=-SC.DESCENT, + sTypoLineGap=SC.LINE_GAP, + usWinAscent=SC.ASCENT, + usWinDescent=SC.DESCENT, + sxHeight=SC.X_HEIGHT, + sCapHeight=SC.CAP_HEIGHT, + fsType=0, # Installable embedding + ) + + fb.setupPost() + fb.setupHead(unitsPerEm=SC.UNITS_PER_EM) + + font = fb.font + + # Step 7: Generate and compile OpenType features + if not no_features: + print("Step 7: Generating OpenType features...") + kern_pairs = generate_kerning_pairs(glyphs) + print(f" {len(kern_pairs)} kerning pairs") + + fea_code = generate_features(glyphs, kern_pairs, glyph_set) + + if fea_code.strip(): + print(" Compiling features with feaLib...") + try: + fea_stream = io.StringIO(fea_code) + addOpenTypeFeatures(font, fea_stream) + print(" Features compiled successfully") + except Exception as e: + print(f" [WARNING] Feature compilation failed: {e}") + print(" Continuing without OpenType features") + else: + print(" No features to compile") + else: + print("Step 7: Skipping OpenType features (--no-features)") + + # Step 8: Add bitmap strike (EBDT/EBLC) + if not no_bitmap: + print("Step 8: Adding bitmap strike...") + _add_bitmap_strike(font, glyphs, glyph_order, glyph_set) + else: + print("Step 8: Skipping bitmap strike (--no-bitmap)") + + # Save + print(f"Saving to {output_path}...") + font.save(output_path) + + elapsed = time.time() - t0 + print(f"Done! Built {len(glyph_order)} glyphs in {elapsed:.1f}s") + print(f"Output: {output_path}") + + +def _add_bitmap_strike(font, glyphs, glyph_order, glyph_set): + """Add EBDT/EBLC embedded bitmap strike at ppem=20 via TTX roundtrip.""" + import tempfile + import os as _os + + ppem = 20 + name_to_id = {name: idx for idx, name in enumerate(glyph_order)} + + # Collect bitmap data — only glyphs with actual pixels + bitmap_entries = [] + for name in glyph_order: + if name == ".notdef": + continue + cp = _name_to_cp(name) + if cp is None or cp not in glyphs: + continue + g = glyphs[cp] + if g.props.is_illegal or g.props.width == 0: + continue + + bitmap = g.bitmap + h = len(bitmap) + w = len(bitmap[0]) if h > 0 else 0 + if w == 0 or h == 0: + continue + + # Pack rows into hex + hex_rows = [] + for row in bitmap: + row_bytes = bytearray() + for col_start in range(0, w, 8): + byte_val = 0 + for bit in range(8): + col = col_start + bit + if col < w and row[col]: + byte_val |= (0x80 >> bit) + row_bytes.append(byte_val) + hex_rows.append(row_bytes.hex()) + + bitmap_entries.append({ + 'name': name, + 'gid': name_to_id.get(name, 0), + 'height': h, + 'width': w, + 'advance': g.props.width, + 'hex_rows': hex_rows, + }) + + if not bitmap_entries: + print(" No bitmap data to embed") + return + + # Split into contiguous GID runs for separate index subtables + # This avoids the empty-name problem for gaps + gid_sorted = sorted(bitmap_entries, key=lambda e: e['gid']) + gid_to_entry = {e['gid']: e for e in gid_sorted} + + runs = [] # list of lists of entries + current_run = [gid_sorted[0]] + for i in range(1, len(gid_sorted)): + if gid_sorted[i]['gid'] == gid_sorted[i-1]['gid'] + 1: + current_run.append(gid_sorted[i]) + else: + runs.append(current_run) + current_run = [gid_sorted[i]] + runs.append(current_run) + + # Build TTX XML for EBDT + ebdt_xml = ['', '
', ''] + for entry in gid_sorted: + ebdt_xml.append(f' ') + ebdt_xml.append(f' ') + ebdt_xml.append(f' ') + ebdt_xml.append(f' ') + ebdt_xml.append(f' ') + ebdt_xml.append(f' ') + ebdt_xml.append(f' ') + ebdt_xml.append(f' ') + ebdt_xml.append(f' ') + for hr in entry['hex_rows']: + ebdt_xml.append(f' {hr}') + ebdt_xml.append(f' ') + ebdt_xml.append(f' ') + ebdt_xml.append('') + ebdt_xml.append('') + + # Build TTX XML for EBLC + all_gids = [e['gid'] for e in gid_sorted] + desc = -(SC.H - BASELINE_ROW) + + def _line_metrics_xml(direction, caret_num=1): + return [ + f' ', + f' ', + f' ', + f' ', + f' ', + ' ', + ' ', + ' ', + ' ', + f' ', + f' ', + ' ', + ' ', + f' ', + ] + + eblc_xml = [ + '', '
', + '', ' ', + ' ', + ] + eblc_xml.extend(_line_metrics_xml("hori", 1)) + eblc_xml.extend(_line_metrics_xml("vert", 0)) + eblc_xml.extend([ + f' ', + f' ', + f' ', + f' ', + ' ', + ' ', + ' ', + ]) + + # One index subtable per contiguous run — no gaps + # Use format 1 (32-bit offsets) to avoid 16-bit overflow + for run in runs: + first_gid = run[0]['gid'] + last_gid = run[-1]['gid'] + eblc_xml.append(f' ') + for entry in run: + eblc_xml.append(f' ') + eblc_xml.append(' ') + + eblc_xml.append('') + eblc_xml.append('') + + try: + ttx_content = '\n\n' + ttx_content += '\n'.join(ebdt_xml) + '\n' + ttx_content += '\n'.join(eblc_xml) + '\n' + ttx_content += '\n' + + with tempfile.NamedTemporaryFile(mode='w', suffix='.ttx', delete=False) as f: + f.write(ttx_content) + ttx_path = f.name + + font.importXML(ttx_path) + _os.unlink(ttx_path) + + print(f" Added bitmap strike at {ppem}ppem with {len(bitmap_entries)} glyphs ({len(runs)} index subtables)") + except Exception as e: + print(f" [WARNING] Bitmap strike failed: {e}") + print(" Continuing without bitmap strike") + + +def _name_to_cp(name): + """Convert glyph name back to codepoint.""" + if name == ".notdef": + return None + if name == "space": + return 0x20 + if name.startswith("uni"): + try: + return int(name[3:], 16) + except ValueError: + return None + if name.startswith("u"): + try: + return int(name[1:], 16) + except ValueError: + return None + return None diff --git a/OTFbuild/glyph_parser.py b/OTFbuild/glyph_parser.py new file mode 100644 index 0000000..890b049 --- /dev/null +++ b/OTFbuild/glyph_parser.py @@ -0,0 +1,328 @@ +""" +Extract glyph bitmaps and tag-column properties from TGA sprite sheets. +Ported from TerrarumSansBitmap.kt:buildWidthTable() and GlyphSheetParser.kt. + +Enhancement over v1: extracts all 6 diacritics anchors for GPOS mark feature. +""" + +import os +from dataclasses import dataclass, field +from typing import Dict, List, Optional, Tuple + +from tga_reader import TgaImage, read_tga +import sheet_config as SC + + +@dataclass +class DiacriticsAnchor: + type: int + x: int + y: int + x_used: bool + y_used: bool + + +@dataclass +class GlyphProps: + width: int + is_low_height: bool = False + nudge_x: int = 0 + nudge_y: int = 0 + diacritics_anchors: List[DiacriticsAnchor] = field(default_factory=lambda: [ + DiacriticsAnchor(i, 0, 0, False, False) for i in range(6) + ]) + align_where: int = 0 + write_on_top: int = -1 + stack_where: int = 0 + ext_info: List[int] = field(default_factory=lambda: [0] * 15) + has_kern_data: bool = False + is_kern_y_type: bool = False + kerning_mask: int = 255 + directive_opcode: int = 0 + directive_arg1: int = 0 + directive_arg2: int = 0 + + @property + def is_illegal(self): + return self.directive_opcode == 255 + + def required_ext_info_count(self): + if self.stack_where == SC.STACK_BEFORE_N_AFTER: + return 2 + if 0b10000_000 <= self.directive_opcode <= 0b10000_111: + return 7 + return 0 + + def is_pragma(self, pragma): + if pragma == "replacewith": + return 0b10000_000 <= self.directive_opcode <= 0b10000_111 + return False + + +@dataclass +class ExtractedGlyph: + codepoint: int + props: GlyphProps + bitmap: List[List[int]] # [row][col], 0 or 1 + + +def _tagify(pixel): + """Return 0 if alpha channel is zero, else return the original value.""" + return 0 if (pixel & 0xFF) == 0 else pixel + + +def _signed_byte(val): + """Convert unsigned byte to signed.""" + return val - 256 if val >= 128 else val + + +def _parse_diacritics_anchors(image, code_start_x, code_start_y): + """Parse 6 diacritics anchors from tag column rows 11-14.""" + anchors = [] + for i in range(6): + y_pos = 13 - (i // 3) * 2 + shift = (3 - (i % 3)) * 8 + y_pixel = _tagify(image.get_pixel(code_start_x, code_start_y + y_pos)) + x_pixel = _tagify(image.get_pixel(code_start_x, code_start_y + y_pos + 1)) + y_used = ((y_pixel >> shift) & 128) != 0 + x_used = ((x_pixel >> shift) & 128) != 0 + y_val = (y_pixel >> shift) & 127 if y_used else 0 + x_val = (x_pixel >> shift) & 127 if x_used else 0 + anchors.append(DiacriticsAnchor(i, x_val, y_val, x_used, y_used)) + return anchors + + +def parse_variable_sheet(image, sheet_index, cell_w, cell_h, cols, is_xy_swapped): + """Parse a variable-width sheet: extract tag column for properties, bitmap for glyph.""" + code_range = SC.CODE_RANGE[sheet_index] + binary_code_offset = cell_w - 1 # tag column is last pixel column of cell + result = {} + + for index, code in enumerate(code_range): + if is_xy_swapped: + cell_x = (index // cols) * cell_w + cell_y = (index % cols) * cell_h + else: + cell_x = (index % cols) * cell_w + cell_y = (index // cols) * cell_h + + code_start_x = cell_x + binary_code_offset + code_start_y = cell_y + + # Width (5 bits) + width = 0 + for y in range(5): + if image.get_pixel(code_start_x, code_start_y + y) & 0xFF: + width |= (1 << y) + + is_low_height = (image.get_pixel(code_start_x, code_start_y + 5) & 0xFF) != 0 + + # Kerning data + kerning_bit1 = _tagify(image.get_pixel(code_start_x, code_start_y + 6)) + # kerning_bit2 and kerning_bit3 are reserved + is_kern_y_type = (kerning_bit1 & 0x80000000) != 0 + kerning_mask = (kerning_bit1 >> 8) & 0xFFFFFF + has_kern_data = (kerning_bit1 & 0xFF) != 0 + if not has_kern_data: + is_kern_y_type = False + kerning_mask = 255 + + # Compiler directives + compiler_directives = _tagify(image.get_pixel(code_start_x, code_start_y + 9)) + directive_opcode = (compiler_directives >> 24) & 255 + directive_arg1 = (compiler_directives >> 16) & 255 + directive_arg2 = (compiler_directives >> 8) & 255 + + # Nudge + nudging_bits = _tagify(image.get_pixel(code_start_x, code_start_y + 10)) + nudge_x = _signed_byte((nudging_bits >> 24) & 0xFF) + nudge_y = _signed_byte((nudging_bits >> 16) & 0xFF) + + # Diacritics anchors + diacritics_anchors = _parse_diacritics_anchors(image, code_start_x, code_start_y) + + # Alignment + align_where = 0 + for y in range(2): + if image.get_pixel(code_start_x, code_start_y + y + 15) & 0xFF: + align_where |= (1 << y) + + # Write on top + write_on_top_raw = image.get_pixel(code_start_x, code_start_y + 17) # NO tagify + if (write_on_top_raw & 0xFF) == 0: + write_on_top = -1 + else: + if (write_on_top_raw >> 8) == 0xFFFFFF: + write_on_top = 0 + else: + write_on_top = (write_on_top_raw >> 28) & 15 + + # Stack where + stack_where0 = _tagify(image.get_pixel(code_start_x, code_start_y + 18)) + stack_where1 = _tagify(image.get_pixel(code_start_x, code_start_y + 19)) + if stack_where0 == 0x00FF00FF and stack_where1 == 0x00FF00FF: + stack_where = SC.STACK_DONT + else: + stack_where = 0 + for y in range(2): + if image.get_pixel(code_start_x, code_start_y + y + 18) & 0xFF: + stack_where |= (1 << y) + + ext_info = [0] * 15 + props = GlyphProps( + width=width, is_low_height=is_low_height, + nudge_x=nudge_x, nudge_y=nudge_y, + diacritics_anchors=diacritics_anchors, + align_where=align_where, write_on_top=write_on_top, + stack_where=stack_where, ext_info=ext_info, + has_kern_data=has_kern_data, is_kern_y_type=is_kern_y_type, + kerning_mask=kerning_mask, + directive_opcode=directive_opcode, directive_arg1=directive_arg1, + directive_arg2=directive_arg2, + ) + + # Parse extInfo if needed + ext_count = props.required_ext_info_count() + if ext_count > 0: + for x in range(ext_count): + info = 0 + for y in range(20): + if image.get_pixel(cell_x + x, cell_y + y) & 0xFF: + info |= (1 << y) + ext_info[x] = info + + # Extract glyph bitmap (all pixels except tag column) + bitmap_w = cell_w - 1 + bitmap = [] + for row in range(cell_h): + row_data = [] + for col in range(bitmap_w): + px = image.get_pixel(cell_x + col, cell_y + row) + row_data.append(1 if (px & 0xFF) != 0 else 0) + bitmap.append(row_data) + + result[code] = ExtractedGlyph(code, props, bitmap) + + return result + + +def parse_fixed_sheet(image, sheet_index, cell_w, cell_h, cols): + """Parse a fixed-width sheet (Hangul, Unihan, Runic, Custom Sym).""" + code_range = SC.CODE_RANGE[sheet_index] + result = {} + + fixed_width = { + SC.SHEET_CUSTOM_SYM: 20, + SC.SHEET_HANGUL: SC.W_HANGUL_BASE, + SC.SHEET_RUNIC: 9, + SC.SHEET_UNIHAN: SC.W_UNIHAN, + }.get(sheet_index, cell_w) + + for index, code in enumerate(code_range): + cell_x = (index % cols) * cell_w + cell_y = (index // cols) * cell_h + + bitmap = [] + for row in range(cell_h): + row_data = [] + for col in range(cell_w): + px = image.get_pixel(cell_x + col, cell_y + row) + row_data.append(1 if (px & 0xFF) != 0 else 0) + bitmap.append(row_data) + + props = GlyphProps(width=fixed_width) + result[code] = ExtractedGlyph(code, props, bitmap) + + return result + + +def _empty_bitmap(w=SC.W_VAR_INIT, h=SC.H): + return [[0] * w for _ in range(h)] + + +def parse_all_sheets(assets_dir): + """Parse all sheets and return a map of codepoint -> ExtractedGlyph.""" + result = {} + + for sheet_index, filename in enumerate(SC.FILE_LIST): + filepath = os.path.join(assets_dir, filename) + if not os.path.exists(filepath): + print(f" [SKIP] {filename} not found") + continue + + is_var = SC.is_variable(filename) + is_xy = SC.is_xy_swapped(filename) + is_ew = SC.is_extra_wide(filename) + cell_w = SC.get_cell_width(sheet_index) + cell_h = SC.get_cell_height(sheet_index) + cols = SC.get_columns(sheet_index) + + tags = [] + if is_var: tags.append("VARIABLE") + if is_xy: tags.append("XYSWAP") + if is_ew: tags.append("EXTRAWIDE") + if not tags: tags.append("STATIC") + print(f" Loading [{','.join(tags)}] {filename}") + + image = read_tga(filepath) + + if is_var: + sheet_glyphs = parse_variable_sheet(image, sheet_index, cell_w, cell_h, cols, is_xy) + else: + sheet_glyphs = parse_fixed_sheet(image, sheet_index, cell_w, cell_h, cols) + + result.update(sheet_glyphs) + + # Fixed-width overrides + _add_fixed_width_overrides(result) + + return result + + +def _add_fixed_width_overrides(result): + """Apply fixed-width overrides.""" + # Hangul compat jamo + for code in SC.CODE_RANGE_HANGUL_COMPAT: + if code not in result: + result[code] = ExtractedGlyph(code, GlyphProps(width=SC.W_HANGUL_BASE), _empty_bitmap(SC.W_HANGUL_BASE)) + + # Zero-width ranges (only internal/PUA control ranges, not surrogates or full Plane 16) + for code in range(0xFFFA0, 0x100000): + result[code] = ExtractedGlyph(code, GlyphProps(width=0), _empty_bitmap(1, 1)) + + # Null char + result[0] = ExtractedGlyph(0, GlyphProps(width=0), _empty_bitmap(1, 1)) + + # Replacement character at U+007F + if 0x7F in result: + result[0x7F].props.width = 15 + + +def get_hangul_jamo_bitmaps(assets_dir): + """ + Extract raw Hangul jamo bitmaps from the Hangul sheet for composition. + Returns a function: (index, row) -> bitmap (list of list of int) + """ + filename = SC.FILE_LIST[SC.SHEET_HANGUL] + filepath = os.path.join(assets_dir, filename) + if not os.path.exists(filepath): + print(" [WARNING] Hangul sheet not found") + return lambda idx, row: _empty_bitmap(SC.W_HANGUL_BASE) + + image = read_tga(filepath) + cell_w = SC.W_HANGUL_BASE + cell_h = SC.H + + def get_bitmap(index, row): + cell_x = index * cell_w + cell_y = row * cell_h + bitmap = [] + for r in range(cell_h): + row_data = [] + for c in range(cell_w): + px = image.get_pixel(cell_x + c, cell_y + r) + row_data.append(1 if (px & 0xFF) != 0 else 0) + bitmap.append(row_data) + return bitmap + + return get_bitmap diff --git a/OTFbuild/hangul.py b/OTFbuild/hangul.py new file mode 100644 index 0000000..c4bf759 --- /dev/null +++ b/OTFbuild/hangul.py @@ -0,0 +1,98 @@ +""" +Compose 11,172 Hangul syllables (U+AC00-U+D7A3) from jamo sprite pieces. +Also composes Hangul Compatibility Jamo (U+3130-U+318F). + +Ported from HangulCompositor.kt and TerrarumSansBitmap.kt. +""" + +from typing import Dict, List, Tuple + +from glyph_parser import ExtractedGlyph, GlyphProps, get_hangul_jamo_bitmaps +import sheet_config as SC + + +def _compose_bitmaps(a, b, w, h): + """OR two bitmaps together.""" + result = [] + for row in range(h): + row_data = [] + for col in range(w): + av = a[row][col] if row < len(a) and col < len(a[row]) else 0 + bv = b[row][col] if row < len(b) and col < len(b[row]) else 0 + row_data.append(1 if av or bv else 0) + result.append(row_data) + return result + + +def _compose_bitmap_into(target, source, w, h): + """OR source bitmap into target (mutates target).""" + for row in range(min(h, len(target), len(source))): + for col in range(min(w, len(target[row]), len(source[row]))): + if source[row][col]: + target[row][col] = 1 + + +def compose_hangul(assets_dir) -> Dict[int, ExtractedGlyph]: + """ + Compose all Hangul syllables and compatibility jamo. + Returns a dict of codepoint -> ExtractedGlyph. + """ + get_jamo = get_hangul_jamo_bitmaps(assets_dir) + cell_w = SC.W_HANGUL_BASE + cell_h = SC.H + result = {} + + # Compose Hangul Compatibility Jamo (U+3130-U+318F) + for c in range(0x3130, 0x3190): + index = c - 0x3130 + bitmap = get_jamo(index, 0) + props = GlyphProps(width=cell_w) + result[c] = ExtractedGlyph(c, props, bitmap) + + # Compose 11,172 Hangul syllables (U+AC00-U+D7A3) + print(" Composing 11,172 Hangul syllables...") + for c in range(0xAC00, 0xD7A4): + c_int = c - 0xAC00 + index_cho = c_int // (SC.JUNG_COUNT * SC.JONG_COUNT) + index_jung = c_int // SC.JONG_COUNT % SC.JUNG_COUNT + index_jong = c_int % SC.JONG_COUNT # 0 = no jongseong + + # Map to jamo codepoints + cho_cp = 0x1100 + index_cho + jung_cp = 0x1161 + index_jung + jong_cp = 0x11A8 + index_jong - 1 if index_jong > 0 else 0 + + # Get sheet indices + i_cho = SC.to_hangul_choseong_index(cho_cp) + i_jung = SC.to_hangul_jungseong_index(jung_cp) + if i_jung is None: + i_jung = 0 + i_jong = 0 + if jong_cp != 0: + idx = SC.to_hangul_jongseong_index(jong_cp) + if idx is not None: + i_jong = idx + + # Get row positions + cho_row = SC.get_han_initial_row(i_cho, i_jung, i_jong) + jung_row = SC.get_han_medial_row(i_cho, i_jung, i_jong) + jong_row = SC.get_han_final_row(i_cho, i_jung, i_jong) + + # Get jamo bitmaps + cho_bitmap = get_jamo(i_cho, cho_row) + jung_bitmap = get_jamo(i_jung, jung_row) + + # Compose + composed = _compose_bitmaps(cho_bitmap, jung_bitmap, cell_w, cell_h) + if index_jong > 0: + jong_bitmap = get_jamo(i_jong, jong_row) + _compose_bitmap_into(composed, jong_bitmap, cell_w, cell_h) + + # Determine advance width + advance_width = cell_w + 1 if i_jung in SC.HANGUL_PEAKS_WITH_EXTRA_WIDTH else cell_w + + props = GlyphProps(width=advance_width) + result[c] = ExtractedGlyph(c, props, composed) + + print(f" Hangul composition done: {len(result)} glyphs") + return result diff --git a/OTFbuild/keming_machine.py b/OTFbuild/keming_machine.py new file mode 100644 index 0000000..71fb3f7 --- /dev/null +++ b/OTFbuild/keming_machine.py @@ -0,0 +1,126 @@ +""" +Generate kerning pairs from shape rules. +Ported from TerrarumSansBitmap.kt "The Keming Machine" section. + +6 base rules + 6 mirrored (auto-generated) = 12 rules total. +Also includes r+dot special pairs. + +Output kern values scaled by SCALE (50 units/pixel): + -1px -> -50 units, -2px -> -100 units +""" + +from typing import Dict, Tuple + +from glyph_parser import ExtractedGlyph +import sheet_config as SC + +SCALE = SC.SCALE + + +class _Ing: + """Pattern matcher for kerning shape bits.""" + + def __init__(self, s): + self.s = s + self.care_bits = 0 + self.rule_bits = 0 + for index, char in enumerate(s): + if char == '@': + self.care_bits |= SC.KEMING_BIT_MASK[index] + self.rule_bits |= SC.KEMING_BIT_MASK[index] + elif char == '`': + self.care_bits |= SC.KEMING_BIT_MASK[index] + + def matches(self, shape_bits): + return (shape_bits & self.care_bits) == self.rule_bits + + +class _Kem: + def __init__(self, first, second, bb=2, yy=1): + self.first = first + self.second = second + self.bb = bb + self.yy = yy + + +def _build_kerning_rules(): + """Build the 12 kerning rules (6 base + 6 mirrored).""" + base_rules = [ + _Kem(_Ing("_`_@___`__"), _Ing("`_`___@___")), + _Kem(_Ing("_@_`___`__"), _Ing("`_________")), + _Kem(_Ing("_@_@___`__"), _Ing("`___@_@___"), 1, 1), + _Kem(_Ing("_@_@_`_`__"), _Ing("`_____@___")), + _Kem(_Ing("___`_`____"), _Ing("`___@_`___")), + _Kem(_Ing("___`_`____"), _Ing("`_@___`___")), + ] + + mirrored = [] + for rule in base_rules: + left = rule.first.s + right = rule.second.s + new_left = [] + new_right = [] + for c in range(0, len(left), 2): + new_left.append(right[c + 1]) + new_left.append(right[c]) + new_right.append(left[c + 1]) + new_right.append(left[c]) + mirrored.append(_Kem( + _Ing(''.join(new_left)), + _Ing(''.join(new_right)), + rule.bb, rule.yy + )) + + return base_rules + mirrored + + +_KERNING_RULES = _build_kerning_rules() + + +def generate_kerning_pairs(glyphs: Dict[int, ExtractedGlyph]) -> Dict[Tuple[int, int], int]: + """ + Generate kerning pairs from all glyphs that have kerning data. + Returns dict of (left_codepoint, right_codepoint) -> kern_offset_in_font_units. + Negative values = tighter spacing. + """ + result = {} + + # Collect all codepoints with kerning data + kernable = {cp: g for cp, g in glyphs.items() if g.props.has_kern_data} + + if not kernable: + print(" [KemingMachine] No glyphs with kern data found") + return result + + print(f" [KemingMachine] {len(kernable)} glyphs with kern data") + + # Special rule: lowercase r + dot + r_dot_count = 0 + for r in SC.LOWERCASE_RS: + for d in SC.DOTS: + if r in glyphs and d in glyphs: + result[(r, d)] = -1 * SCALE + r_dot_count += 1 + + # Apply kerning rules to all pairs + kern_codes = list(kernable.keys()) + pairs_found = 0 + + for left_code in kern_codes: + left_props = kernable[left_code].props + mask_l = left_props.kerning_mask + + for right_code in kern_codes: + right_props = kernable[right_code].props + mask_r = right_props.kerning_mask + + for rule in _KERNING_RULES: + if rule.first.matches(mask_l) and rule.second.matches(mask_r): + contraction = rule.yy if (left_props.is_kern_y_type or right_props.is_kern_y_type) else rule.bb + if contraction > 0: + result[(left_code, right_code)] = -contraction * SCALE + pairs_found += 1 + break # first matching rule wins + + print(f" [KemingMachine] Generated {pairs_found} kerning pairs (+ {r_dot_count} r-dot pairs)") + return result diff --git a/OTFbuild/opentype_features.py b/OTFbuild/opentype_features.py new file mode 100644 index 0000000..892352d --- /dev/null +++ b/OTFbuild/opentype_features.py @@ -0,0 +1,449 @@ +""" +Generate OpenType feature code (feaLib syntax) for GSUB/GPOS tables. + +Features implemented: +- kern: GPOS pair positioning from KemingMachine +- liga: Standard ligatures (Alphabetic Presentation Forms) +- locl: Bulgarian/Serbian Cyrillic variants +- Devanagari GSUB: nukt, akhn, half, vatu, pres, blws, rphf +- Tamil GSUB: consonant+vowel ligatures, KSSA, SHRII +- Sundanese GSUB: diacritic combinations +- mark: GPOS mark-to-base positioning (diacritics anchors) +""" + +from typing import Dict, List, Set, Tuple + +from glyph_parser import ExtractedGlyph +import sheet_config as SC + + +def glyph_name(cp): + """Generate standard glyph name for a codepoint.""" + if cp == 0: + return ".notdef" + if cp == 0x20: + return "space" + if cp <= 0xFFFF: + return f"uni{cp:04X}" + return f"u{cp:05X}" if cp <= 0xFFFFF else f"u{cp:06X}" + + +def generate_features(glyphs, kern_pairs, font_glyph_set): + """ + Generate complete OpenType feature code string. + + Args: + glyphs: dict of codepoint -> ExtractedGlyph + kern_pairs: dict of (left_cp, right_cp) -> kern_value_in_font_units + font_glyph_set: set of glyph names actually present in the font + Returns: + Feature code string for feaLib compilation. + """ + parts = [] + + def has(cp): + return glyph_name(cp) in font_glyph_set + + # kern feature + kern_code = _generate_kern(kern_pairs, has) + if kern_code: + parts.append(kern_code) + + # liga feature + liga_code = _generate_liga(has) + if liga_code: + parts.append(liga_code) + + # locl feature (Bulgarian/Serbian) + locl_code = _generate_locl(glyphs, has) + if locl_code: + parts.append(locl_code) + + # Devanagari features + deva_code = _generate_devanagari(glyphs, has) + if deva_code: + parts.append(deva_code) + + # Tamil features + tamil_code = _generate_tamil(glyphs, has) + if tamil_code: + parts.append(tamil_code) + + # Sundanese features + sund_code = _generate_sundanese(glyphs, has) + if sund_code: + parts.append(sund_code) + + # mark feature + mark_code = _generate_mark(glyphs, has) + if mark_code: + parts.append(mark_code) + + return '\n\n'.join(parts) + + +def _generate_kern(kern_pairs, has): + """Generate kern feature from pair positioning data.""" + if not kern_pairs: + return "" + + lines = ["feature kern {"] + count = 0 + for (left_cp, right_cp), value in sorted(kern_pairs.items()): + if has(left_cp) and has(right_cp): + lines.append(f" pos {glyph_name(left_cp)} {glyph_name(right_cp)} {value};") + count += 1 + + if count == 0: + return "" + lines.append("} kern;") + return '\n'.join(lines) + + +def _generate_liga(has): + """Generate liga feature for Alphabetic Presentation Forms.""" + subs = [] + + _liga_rules = [ + ([0x66, 0x66, 0x69], 0xFB03, "ffi"), + ([0x66, 0x66, 0x6C], 0xFB04, "ffl"), + ([0x66, 0x66], 0xFB00, "ff"), + ([0x66, 0x69], 0xFB01, "fi"), + ([0x66, 0x6C], 0xFB02, "fl"), + ([0x17F, 0x74], 0xFB05, "long-s t"), + ([0x73, 0x74], 0xFB06, "st"), + ] + + for seq, result_cp, name in _liga_rules: + if all(has(c) for c in seq) and has(result_cp): + seq_names = ' '.join(glyph_name(c) for c in seq) + subs.append(f" sub {seq_names} by {glyph_name(result_cp)}; # {name}") + + _armenian_rules = [ + ([0x574, 0x576], 0xFB13, "men now"), + ([0x574, 0x565], 0xFB14, "men ech"), + ([0x574, 0x56B], 0xFB15, "men ini"), + ([0x57E, 0x576], 0xFB16, "vew now"), + ([0x574, 0x56D], 0xFB17, "men xeh"), + ] + + for seq, result_cp, name in _armenian_rules: + if all(has(c) for c in seq) and has(result_cp): + seq_names = ' '.join(glyph_name(c) for c in seq) + subs.append(f" sub {seq_names} by {glyph_name(result_cp)}; # Armenian {name}") + + if not subs: + return "" + + lines = ["feature liga {"] + lines.extend(subs) + lines.append("} liga;") + return '\n'.join(lines) + + +def _generate_locl(glyphs, has): + """Generate locl feature for Bulgarian and Serbian Cyrillic variants.""" + bg_subs = [] + sr_subs = [] + + for pua in range(0xF0000, 0xF0060): + cyrillic = pua - 0xF0000 + 0x0400 + if has(pua) and has(cyrillic): + pua_bm = glyphs[pua].bitmap + cyr_bm = glyphs[cyrillic].bitmap + if pua_bm != cyr_bm: + bg_subs.append(f" sub {glyph_name(cyrillic)} by {glyph_name(pua)};") + + for pua in range(0xF0060, 0xF00C0): + cyrillic = pua - 0xF0060 + 0x0400 + if has(pua) and has(cyrillic): + pua_bm = glyphs[pua].bitmap + cyr_bm = glyphs[cyrillic].bitmap + if pua_bm != cyr_bm: + sr_subs.append(f" sub {glyph_name(cyrillic)} by {glyph_name(pua)};") + + if not bg_subs and not sr_subs: + return "" + + lines = ["feature locl {"] + lines.append(" script cyrl;") + if bg_subs: + lines.append(" language BGR;") + lines.append(" lookup BulgarianForms {") + lines.extend(bg_subs) + lines.append(" } BulgarianForms;") + if sr_subs: + lines.append(" language SRB;") + lines.append(" lookup SerbianForms {") + lines.extend(sr_subs) + lines.append(" } SerbianForms;") + lines.append("} locl;") + return '\n'.join(lines) + + +def _generate_devanagari(glyphs, has): + """Generate Devanagari GSUB features: nukt, akhn, half, vatu, pres, blws, rphf.""" + features = [] + + # --- nukt: consonant + nukta -> nukta form --- + nukt_subs = [] + for uni_cp in range(0x0915, 0x093A): + internal = SC.to_deva_internal(uni_cp) + nukta_form = internal + 48 + if has(uni_cp) and has(0x093C) and has(nukta_form): + nukt_subs.append( + f" sub {glyph_name(uni_cp)} {glyph_name(0x093C)} by {glyph_name(nukta_form)};" + ) + if nukt_subs: + features.append("feature nukt {\n script dev2;\n" + '\n'.join(nukt_subs) + "\n} nukt;") + + # --- akhn: akhand ligatures --- + akhn_subs = [] + if has(0x0915) and has(SC.DEVANAGARI_VIRAMA) and has(0x0937) and has(SC.DEVANAGARI_LIG_K_SS): + akhn_subs.append( + f" sub {glyph_name(0x0915)} {glyph_name(SC.DEVANAGARI_VIRAMA)} {glyph_name(0x0937)} by {glyph_name(SC.DEVANAGARI_LIG_K_SS)};" + ) + if has(0x091C) and has(SC.DEVANAGARI_VIRAMA) and has(0x091E) and has(SC.DEVANAGARI_LIG_J_NY): + akhn_subs.append( + f" sub {glyph_name(0x091C)} {glyph_name(SC.DEVANAGARI_VIRAMA)} {glyph_name(0x091E)} by {glyph_name(SC.DEVANAGARI_LIG_J_NY)};" + ) + if akhn_subs: + features.append("feature akhn {\n script dev2;\n" + '\n'.join(akhn_subs) + "\n} akhn;") + + # --- half: consonant + virama -> half form --- + half_subs = [] + for uni_cp in range(0x0915, 0x093A): + internal = SC.to_deva_internal(uni_cp) + half_form = internal + 240 + if has(uni_cp) and has(SC.DEVANAGARI_VIRAMA) and has(half_form): + half_subs.append( + f" sub {glyph_name(uni_cp)} {glyph_name(SC.DEVANAGARI_VIRAMA)} by {glyph_name(half_form)};" + ) + if half_subs: + features.append("feature half {\n script dev2;\n" + '\n'.join(half_subs) + "\n} half;") + + # --- vatu: consonant + virama + RA -> RA-appended form --- + vatu_subs = [] + for uni_cp in range(0x0915, 0x093A): + internal = SC.to_deva_internal(uni_cp) + ra_form = internal + 480 + if has(uni_cp) and has(SC.DEVANAGARI_VIRAMA) and has(0x0930) and has(ra_form): + vatu_subs.append( + f" sub {glyph_name(uni_cp)} {glyph_name(SC.DEVANAGARI_VIRAMA)} {glyph_name(0x0930)} by {glyph_name(ra_form)};" + ) + if vatu_subs: + features.append("feature vatu {\n script dev2;\n" + '\n'.join(vatu_subs) + "\n} vatu;") + + # --- pres: named conjunct ligatures --- + pres_subs = [] + _conjuncts = [ + (0x0915, 0x0924, SC.DEVANAGARI_LIG_K_T, "K.T"), + (0x0924, 0x0924, SC.DEVANAGARI_LIG_T_T, "T.T"), + (0x0928, 0x0924, SC.DEVANAGARI_LIG_N_T, "N.T"), + (0x0928, 0x0928, SC.DEVANAGARI_LIG_N_N, "N.N"), + (0x0926, 0x0917, 0xF01B0, "D.G"), + (0x0926, 0x0918, 0xF01B1, "D.GH"), + (0x0926, 0x0926, 0xF01B2, "D.D"), + (0x0926, 0x0927, 0xF01B3, "D.DH"), + (0x0926, 0x0928, 0xF01B4, "D.N"), + (0x0926, 0x092C, 0xF01B5, "D.B"), + (0x0926, 0x092D, 0xF01B6, "D.BH"), + (0x0926, 0x092E, 0xF01B7, "D.M"), + (0x0926, 0x092F, 0xF01B8, "D.Y"), + (0x0926, 0x0935, 0xF01B9, "D.V"), + (0x0938, 0x0935, SC.DEVANAGARI_LIG_S_V, "S.V"), + (0x0937, 0x092A, SC.DEVANAGARI_LIG_SS_P, "SS.P"), + (0x0936, 0x091A, SC.DEVANAGARI_LIG_SH_C, "SH.C"), + (0x0936, 0x0928, SC.DEVANAGARI_LIG_SH_N, "SH.N"), + (0x0936, 0x0935, SC.DEVANAGARI_LIG_SH_V, "SH.V"), + (0x0918, 0x091F, 0xF01BD, "GH.TT"), + (0x0918, 0x0920, 0xF01BE, "GH.TTH"), + (0x0918, 0x0922, 0xF01BF, "GH.DDH"), + (0x091F, 0x091F, 0xF01D6, "TT.TT"), + (0x091F, 0x0920, 0xF01D7, "TT.TTH"), + (0x0920, 0x0920, 0xF01D9, "TTH.TTH"), + (0x0921, 0x0921, 0xF01DB, "DD.DD"), + (0x0921, 0x0922, 0xF01DC, "DD.DDH"), + (0x0922, 0x0922, 0xF01DE, "DDH.DDH"), + (0x092A, 0x091F, 0xF01C0, "P.TT"), + (0x092A, 0x0920, 0xF01C1, "P.TTH"), + (0x092A, 0x0922, 0xF01C2, "P.DDH"), + (0x0937, 0x091F, 0xF01C3, "SS.TT"), + (0x0937, 0x0920, 0xF01C4, "SS.TTH"), + (0x0937, 0x0922, 0xF01C5, "SS.DDH"), + (0x0939, 0x0923, 0xF01C6, "H.NN"), + (0x0939, 0x0928, 0xF01C7, "H.N"), + (0x0939, 0x092E, 0xF01C8, "H.M"), + (0x0939, 0x092F, 0xF01C9, "H.Y"), + (0x0939, 0x0932, 0xF01CA, "H.L"), + (0x0939, 0x0935, 0xF01CB, "H.V"), + ] + for c1, c2, result, name in _conjuncts: + if has(c1) and has(SC.DEVANAGARI_VIRAMA) and has(c2) and has(result): + pres_subs.append( + f" sub {glyph_name(c1)} {glyph_name(SC.DEVANAGARI_VIRAMA)} {glyph_name(c2)} by {glyph_name(result)}; # {name}" + ) + if pres_subs: + features.append("feature pres {\n script dev2;\n" + '\n'.join(pres_subs) + "\n} pres;") + + # --- blws: RA/RRA/HA + U/UU -> special syllables --- + blws_subs = [] + _blws_rules = [ + (0x0930, SC.DEVANAGARI_U, SC.DEVANAGARI_SYLL_RU, "Ru"), + (0x0930, SC.DEVANAGARI_UU, SC.DEVANAGARI_SYLL_RUU, "Ruu"), + (0x0931, SC.DEVANAGARI_U, SC.DEVANAGARI_SYLL_RRU, "RRu"), + (0x0931, SC.DEVANAGARI_UU, SC.DEVANAGARI_SYLL_RRUU, "RRuu"), + (0x0939, SC.DEVANAGARI_U, SC.DEVANAGARI_SYLL_HU, "Hu"), + (0x0939, SC.DEVANAGARI_UU, SC.DEVANAGARI_SYLL_HUU, "Huu"), + ] + for c1, c2, result, name in _blws_rules: + if has(c1) and has(c2) and has(result): + blws_subs.append( + f" sub {glyph_name(c1)} {glyph_name(c2)} by {glyph_name(result)}; # {name}" + ) + if blws_subs: + features.append("feature blws {\n script dev2;\n" + '\n'.join(blws_subs) + "\n} blws;") + + # --- rphf: RA + virama -> reph --- + if has(0x0930) and has(SC.DEVANAGARI_VIRAMA) and has(SC.DEVANAGARI_RA_SUPER): + rphf_code = ( + f"feature rphf {{\n" + f" script dev2;\n" + f" sub {glyph_name(0x0930)} {glyph_name(SC.DEVANAGARI_VIRAMA)} by {glyph_name(SC.DEVANAGARI_RA_SUPER)};\n" + f"}} rphf;" + ) + features.append(rphf_code) + + if not features: + return "" + return '\n\n'.join(features) + + +def _generate_tamil(glyphs, has): + """Generate Tamil GSUB features.""" + subs = [] + + _tamil_i_rules = [ + (0x0B99, 0xF00F0, "nga+i"), + (0x0BAA, 0xF00F1, "pa+i"), + (0x0BAF, 0xF00F2, "ya+i"), + (0x0BB2, 0xF00F3, "la+i"), + (0x0BB5, 0xF00F4, "va+i"), + (0x0BB8, 0xF00F5, "sa+i"), + ] + for cons, result, name in _tamil_i_rules: + if has(cons) and has(SC.TAMIL_I) and has(result): + subs.append(f" sub {glyph_name(cons)} {glyph_name(SC.TAMIL_I)} by {glyph_name(result)}; # {name}") + + if has(0x0B9F) and has(0x0BBF) and has(0xF00C0): + subs.append(f" sub {glyph_name(0x0B9F)} {glyph_name(0x0BBF)} by {glyph_name(0xF00C0)}; # tta+i") + if has(0x0B9F) and has(0x0BC0) and has(0xF00C1): + subs.append(f" sub {glyph_name(0x0B9F)} {glyph_name(0x0BC0)} by {glyph_name(0xF00C1)}; # tta+ii") + + for idx, cons in enumerate(SC.TAMIL_LIGATING_CONSONANTS): + u_form = 0xF00C2 + idx + uu_form = 0xF00D4 + idx + if has(cons) and has(0x0BC1) and has(u_form): + subs.append(f" sub {glyph_name(cons)} {glyph_name(0x0BC1)} by {glyph_name(u_form)};") + if has(cons) and has(0x0BC2) and has(uu_form): + subs.append(f" sub {glyph_name(cons)} {glyph_name(0x0BC2)} by {glyph_name(uu_form)};") + + if has(0x0B95) and has(0x0BCD) and has(0x0BB7) and has(SC.TAMIL_KSSA): + subs.append(f" sub {glyph_name(0x0B95)} {glyph_name(0x0BCD)} {glyph_name(0x0BB7)} by {glyph_name(SC.TAMIL_KSSA)}; # KSSA") + + if has(0x0BB6) and has(0x0BCD) and has(0x0BB0) and has(0x0BC0) and has(SC.TAMIL_SHRII): + subs.append(f" sub {glyph_name(0x0BB6)} {glyph_name(0x0BCD)} {glyph_name(0x0BB0)} {glyph_name(0x0BC0)} by {glyph_name(SC.TAMIL_SHRII)}; # SHRII (sha)") + if has(0x0BB8) and has(0x0BCD) and has(0x0BB0) and has(0x0BC0) and has(SC.TAMIL_SHRII): + subs.append(f" sub {glyph_name(0x0BB8)} {glyph_name(0x0BCD)} {glyph_name(0x0BB0)} {glyph_name(0x0BC0)} by {glyph_name(SC.TAMIL_SHRII)}; # SHRII (sa)") + + if not subs: + return "" + + lines = ["feature pres {", " script tml2;"] + lines.extend(subs) + lines.append("} pres;") + return '\n'.join(lines) + + +def _generate_sundanese(glyphs, has): + """Generate Sundanese GSUB feature for diacritic combinations.""" + subs = [] + _rules = [ + (0x1BA4, 0x1B80, SC.SUNDANESE_ING, "panghulu+panyecek=ing"), + (0x1BA8, 0x1B80, SC.SUNDANESE_ENG, "pamepet+panyecek=eng"), + (0x1BA9, 0x1B80, SC.SUNDANESE_EUNG, "paneuleung+panyecek=eung"), + (0x1BA4, 0x1B81, SC.SUNDANESE_IR, "panghulu+panglayar=ir"), + (0x1BA8, 0x1B81, SC.SUNDANESE_ER, "pamepet+panglayar=er"), + (0x1BA9, 0x1B81, SC.SUNDANESE_EUR, "paneuleung+panglayar=eur"), + (0x1BA3, 0x1BA5, SC.SUNDANESE_LU, "panyuku+panglayar=lu"), + ] + for c1, c2, result, name in _rules: + if has(c1) and has(c2) and has(result): + subs.append(f" sub {glyph_name(c1)} {glyph_name(c2)} by {glyph_name(result)}; # {name}") + + if not subs: + return "" + + lines = ["feature pres {", " script sund;"] + lines.extend(subs) + lines.append("} pres;") + return '\n'.join(lines) + + +def _generate_mark(glyphs, has): + """ + Generate GPOS mark-to-base positioning using diacritics anchors from tag column. + """ + bases_with_anchors = {} + marks = {} + + for cp, g in glyphs.items(): + if not has(cp): + continue + if g.props.write_on_top >= 0: + marks[cp] = g + elif any(a.x_used or a.y_used for a in g.props.diacritics_anchors): + bases_with_anchors[cp] = g + + if not bases_with_anchors or not marks: + return "" + + lines = [] + + # Group marks by writeOnTop type + mark_classes = {} + for cp, g in marks.items(): + mark_type = g.props.write_on_top + if mark_type not in mark_classes: + mark_classes[mark_type] = [] + mark_classes[mark_type].append((cp, g)) + + for mark_type, mark_list in sorted(mark_classes.items()): + class_name = f"@mark_type{mark_type}" + for cp, g in mark_list: + mark_x = (g.props.width * SC.SCALE) // 2 + mark_y = SC.ASCENT + lines.append( + f"markClass {glyph_name(cp)} {class_name};" + ) + + lines.append("") + lines.append("feature mark {") + + for mark_type, mark_list in sorted(mark_classes.items()): + class_name = f"@mark_type{mark_type}" + lookup_name = f"mark_type{mark_type}" + lines.append(f" lookup {lookup_name} {{") + + for cp, g in sorted(bases_with_anchors.items()): + anchor = g.props.diacritics_anchors[mark_type] if mark_type < 6 else None + if anchor and (anchor.x_used or anchor.y_used): + ax = anchor.x * SC.SCALE + ay = (SC.ASCENT // SC.SCALE - anchor.y) * SC.SCALE + lines.append(f" pos base {glyph_name(cp)} mark {class_name};") + + lines.append(f" }} {lookup_name};") + + lines.append("} mark;") + + return '\n'.join(lines) diff --git a/OTFbuild/requirements.txt b/OTFbuild/requirements.txt new file mode 100644 index 0000000..f1ba9b0 --- /dev/null +++ b/OTFbuild/requirements.txt @@ -0,0 +1 @@ +fonttools>=4.47.0 diff --git a/OTFbuild/sheet_config.py b/OTFbuild/sheet_config.py new file mode 100644 index 0000000..b532cc2 --- /dev/null +++ b/OTFbuild/sheet_config.py @@ -0,0 +1,533 @@ +""" +Sheet definitions, code ranges, index functions, and font metric constants. +Ported from TerrarumSansBitmap.kt companion object and SheetConfig.kt. +""" + +# Font metrics +H = 20 +H_UNIHAN = 16 +W_HANGUL_BASE = 13 +W_UNIHAN = 16 +W_LATIN_WIDE = 9 +W_VAR_INIT = 15 +W_WIDEVAR_INIT = 31 +HGAP_VAR = 1 +SIZE_CUSTOM_SYM = 20 + +H_DIACRITICS = 3 +H_STACKUP_LOWERCASE_SHIFTDOWN = 4 +H_OVERLAY_LOWERCASE_SHIFTDOWN = 2 + +LINE_HEIGHT = 24 + +# OTF metrics (1000 UPM, scale = 50 units/pixel) +UNITS_PER_EM = 1000 +SCALE = 50 # units per pixel +ASCENT = 16 * SCALE # 800 +DESCENT = 4 * SCALE # 200 +X_HEIGHT = 8 * SCALE # 400 +CAP_HEIGHT = 12 * SCALE # 600 +LINE_GAP = (LINE_HEIGHT - H) * SCALE # 200 + +# Sheet indices +SHEET_ASCII_VARW = 0 +SHEET_HANGUL = 1 +SHEET_EXTA_VARW = 2 +SHEET_EXTB_VARW = 3 +SHEET_KANA = 4 +SHEET_CJK_PUNCT = 5 +SHEET_UNIHAN = 6 +SHEET_CYRILIC_VARW = 7 +SHEET_HALFWIDTH_FULLWIDTH_VARW = 8 +SHEET_UNI_PUNCT_VARW = 9 +SHEET_GREEK_VARW = 10 +SHEET_THAI_VARW = 11 +SHEET_HAYEREN_VARW = 12 +SHEET_KARTULI_VARW = 13 +SHEET_IPA_VARW = 14 +SHEET_RUNIC = 15 +SHEET_LATIN_EXT_ADD_VARW = 16 +SHEET_CUSTOM_SYM = 17 +SHEET_BULGARIAN_VARW = 18 +SHEET_SERBIAN_VARW = 19 +SHEET_TSALAGI_VARW = 20 +SHEET_PHONETIC_EXT_VARW = 21 +SHEET_DEVANAGARI_VARW = 22 +SHEET_KARTULI_CAPS_VARW = 23 +SHEET_DIACRITICAL_MARKS_VARW = 24 +SHEET_GREEK_POLY_VARW = 25 +SHEET_EXTC_VARW = 26 +SHEET_EXTD_VARW = 27 +SHEET_CURRENCIES_VARW = 28 +SHEET_INTERNAL_VARW = 29 +SHEET_LETTERLIKE_MATHS_VARW = 30 +SHEET_ENCLOSED_ALPHNUM_SUPL_VARW = 31 +SHEET_TAMIL_VARW = 32 +SHEET_BENGALI_VARW = 33 +SHEET_BRAILLE_VARW = 34 +SHEET_SUNDANESE_VARW = 35 +SHEET_DEVANAGARI2_INTERNAL_VARW = 36 +SHEET_CODESTYLE_ASCII_VARW = 37 +SHEET_ALPHABETIC_PRESENTATION_FORMS = 38 +SHEET_HENTAIGANA_VARW = 39 + +SHEET_UNKNOWN = 254 + +FILE_LIST = [ + "ascii_variable.tga", + "hangul_johab.tga", + "latinExtA_variable.tga", + "latinExtB_variable.tga", + "kana_variable.tga", + "cjkpunct_variable.tga", + "wenquanyi.tga", + "cyrilic_variable.tga", + "halfwidth_fullwidth_variable.tga", + "unipunct_variable.tga", + "greek_variable.tga", + "thai_variable.tga", + "hayeren_variable.tga", + "kartuli_variable.tga", + "ipa_ext_variable.tga", + "futhark.tga", + "latinExt_additional_variable.tga", + "puae000-e0ff.tga", + "cyrilic_bulgarian_variable.tga", + "cyrilic_serbian_variable.tga", + "tsalagi_variable.tga", + "phonetic_extensions_variable.tga", + "devanagari_variable.tga", + "kartuli_allcaps_variable.tga", + "diacritical_marks_variable.tga", + "greek_polytonic_xyswap_variable.tga", + "latinExtC_variable.tga", + "latinExtD_variable.tga", + "currencies_variable.tga", + "internal_variable.tga", + "letterlike_symbols_variable.tga", + "enclosed_alphanumeric_supplement_variable.tga", + "tamil_extrawide_variable.tga", + "bengali_variable.tga", + "braille_variable.tga", + "sundanese_variable.tga", + "devanagari_internal_extrawide_variable.tga", + "pua_codestyle_ascii_variable.tga", + "alphabetic_presentation_forms_extrawide_variable.tga", + "hentaigana_variable.tga", +] + +CODE_RANGE = [ + list(range(0x00, 0x100)), # 0: ASCII + list(range(0x1100, 0x1200)) + list(range(0xA960, 0xA980)) + list(range(0xD7B0, 0xD800)), # 1: Hangul Jamo + list(range(0x100, 0x180)), # 2: Latin Ext A + list(range(0x180, 0x250)), # 3: Latin Ext B + list(range(0x3040, 0x3100)) + list(range(0x31F0, 0x3200)), # 4: Kana + list(range(0x3000, 0x3040)), # 5: CJK Punct + list(range(0x3400, 0xA000)), # 6: Unihan + list(range(0x400, 0x530)), # 7: Cyrillic + list(range(0xFF00, 0x10000)), # 8: Halfwidth/Fullwidth + list(range(0x2000, 0x20A0)), # 9: Uni Punct + list(range(0x370, 0x3CF)), # 10: Greek + list(range(0xE00, 0xE60)), # 11: Thai + list(range(0x530, 0x590)), # 12: Armenian + list(range(0x10D0, 0x1100)), # 13: Georgian + list(range(0x250, 0x300)), # 14: IPA + list(range(0x16A0, 0x1700)), # 15: Runic + list(range(0x1E00, 0x1F00)), # 16: Latin Ext Additional + list(range(0xE000, 0xE100)), # 17: Custom Sym (PUA) + list(range(0xF0000, 0xF0060)), # 18: Bulgarian + list(range(0xF0060, 0xF00C0)), # 19: Serbian + list(range(0x13A0, 0x13F6)), # 20: Cherokee + list(range(0x1D00, 0x1DC0)), # 21: Phonetic Ext + list(range(0x900, 0x980)) + list(range(0xF0100, 0xF0500)), # 22: Devanagari + list(range(0x1C90, 0x1CC0)), # 23: Georgian Caps + list(range(0x300, 0x370)), # 24: Diacritical Marks + list(range(0x1F00, 0x2000)), # 25: Greek Polytonic + list(range(0x2C60, 0x2C80)), # 26: Latin Ext C + list(range(0xA720, 0xA800)), # 27: Latin Ext D + list(range(0x20A0, 0x20D0)), # 28: Currencies + list(range(0xFFE00, 0xFFFA0)), # 29: Internal + list(range(0x2100, 0x2150)), # 30: Letterlike + list(range(0x1F100, 0x1F200)), # 31: Enclosed Alphanum Supl + list(range(0x0B80, 0x0C00)) + list(range(0xF00C0, 0xF0100)), # 32: Tamil + list(range(0x980, 0xA00)), # 33: Bengali + list(range(0x2800, 0x2900)), # 34: Braille + list(range(0x1B80, 0x1BC0)) + list(range(0x1CC0, 0x1CD0)) + list(range(0xF0500, 0xF0510)), # 35: Sundanese + list(range(0xF0110, 0xF0130)), # 36: Devanagari2 Internal + list(range(0xF0520, 0xF0580)), # 37: Codestyle ASCII + list(range(0xFB00, 0xFB18)), # 38: Alphabetic Presentation + list(range(0x1B000, 0x1B170)), # 39: Hentaigana +] + +CODE_RANGE_HANGUL_COMPAT = range(0x3130, 0x3190) + +ALT_CHARSET_CODEPOINT_OFFSETS = [ + 0, + 0xF0000 - 0x400, # Bulgarian + 0xF0060 - 0x400, # Serbian + 0xF0520 - 0x20, # Codestyle +] + +ALT_CHARSET_CODEPOINT_DOMAINS = [ + range(0, 0x10FFFF + 1), + range(0x400, 0x460), + range(0x400, 0x460), + range(0x20, 0x80), +] + +# Unicode spacing characters +NQSP = 0x2000 +MQSP = 0x2001 +ENSP = 0x2002 +EMSP = 0x2003 +THREE_PER_EMSP = 0x2004 +QUARTER_EMSP = 0x2005 +SIX_PER_EMSP = 0x2006 +FSP = 0x2007 +PSP = 0x2008 +THSP = 0x2009 +HSP = 0x200A +ZWSP = 0x200B +ZWNJ = 0x200C +ZWJ = 0x200D +SHY = 0xAD +NBSP = 0xA0 +OBJ = 0xFFFC + +FIXED_BLOCK_1 = 0xFFFD0 +MOVABLE_BLOCK_M1 = 0xFFFE0 +MOVABLE_BLOCK_1 = 0xFFFF0 + +CHARSET_OVERRIDE_DEFAULT = 0xFFFC0 +CHARSET_OVERRIDE_BG_BG = 0xFFFC1 +CHARSET_OVERRIDE_SR_SR = 0xFFFC2 +CHARSET_OVERRIDE_CODESTYLE = 0xFFFC3 + +# Alignment constants +ALIGN_LEFT = 0 +ALIGN_RIGHT = 1 +ALIGN_CENTRE = 2 +ALIGN_BEFORE = 3 + +# Stack constants +STACK_UP = 0 +STACK_DOWN = 1 +STACK_BEFORE_N_AFTER = 2 +STACK_UP_N_DOWN = 3 +STACK_DONT = 4 + + +def is_variable(filename): + return filename.endswith("_variable.tga") + + +def is_xy_swapped(filename): + return "xyswap" in filename.lower() + + +def is_extra_wide(filename): + return "extrawide" in filename.lower() + + +def get_cell_width(sheet_index): + """Returns the cell pitch in the sprite sheet (includes HGAP_VAR for variable sheets).""" + fn = FILE_LIST[sheet_index] + if is_extra_wide(fn): + return W_WIDEVAR_INIT + HGAP_VAR # 32 + if is_variable(fn): + return W_VAR_INIT + HGAP_VAR # 16 + if sheet_index == SHEET_UNIHAN: + return W_UNIHAN + if sheet_index == SHEET_HANGUL: + return W_HANGUL_BASE + if sheet_index == SHEET_CUSTOM_SYM: + return SIZE_CUSTOM_SYM + if sheet_index == SHEET_RUNIC: + return W_LATIN_WIDE + return W_VAR_INIT + HGAP_VAR + + +def get_cell_height(sheet_index): + if sheet_index == SHEET_UNIHAN: + return H_UNIHAN + if sheet_index == SHEET_CUSTOM_SYM: + return SIZE_CUSTOM_SYM + return H + + +def get_columns(sheet_index): + if sheet_index == SHEET_UNIHAN: + return 256 + return 16 + + +# Hangul constants +JUNG_COUNT = 21 +JONG_COUNT = 28 + +# Hangul shape arrays (sorted sets) +JUNGSEONG_I = frozenset([21, 61]) +JUNGSEONG_OU = frozenset([9, 13, 14, 18, 34, 35, 39, 45, 51, 53, 54, 64, 73, 80, 83]) +JUNGSEONG_OU_COMPLEX = frozenset( + [10, 11, 16] + list(range(22, 34)) + [36, 37, 38] + list(range(41, 45)) + + list(range(46, 51)) + list(range(56, 60)) + [63] + list(range(67, 73)) + + list(range(74, 80)) + list(range(81, 84)) + list(range(85, 92)) + [93, 94] +) +JUNGSEONG_RIGHTIE = frozenset([2, 4, 6, 8, 11, 16, 32, 33, 37, 42, 44, 48, 50, 71, 72, 75, 78, 79, 83, 86, 87, 88, 94]) +JUNGSEONG_OEWI = frozenset([12, 15, 17, 40, 52, 55, 89, 90, 91]) +JUNGSEONG_EU = frozenset([19, 62, 66]) +JUNGSEONG_YI = frozenset([20, 60, 65]) +JUNGSEONG_UU = frozenset([14, 15, 16, 17, 18, 27, 30, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 59, 67, 68, 73, 77, 78, 79, 80, 81, 82, 83, 84, 91]) +JUNGSEONG_WIDE = frozenset(list(JUNGSEONG_OU) + list(JUNGSEONG_EU)) +CHOSEONG_GIYEOKS = frozenset([0, 1, 15, 23, 30, 34, 45, 51, 56, 65, 82, 90, 100, 101, 110, 111, 115]) +HANGUL_PEAKS_WITH_EXTRA_WIDTH = frozenset([2, 4, 6, 8, 11, 16, 32, 33, 37, 42, 44, 48, 50, 71, 75, 78, 79, 83, 86, 87, 88, 94]) + +GIYEOK_REMAPPING = {5: 19, 6: 20, 7: 21, 8: 22, 11: 23, 12: 24} + + +def is_hangul_choseong(c): + return 0x1100 <= c <= 0x115F or 0xA960 <= c <= 0xA97F + + +def is_hangul_jungseong(c): + return 0x1160 <= c <= 0x11A7 or 0xD7B0 <= c <= 0xD7C6 + + +def is_hangul_jongseong(c): + return 0x11A8 <= c <= 0x11FF or 0xD7CB <= c <= 0xD7FB + + +def is_hangul_compat(c): + return 0x3130 <= c <= 0x318F + + +def to_hangul_choseong_index(c): + if 0x1100 <= c <= 0x115F: + return c - 0x1100 + if 0xA960 <= c <= 0xA97F: + return c - 0xA960 + 96 + raise ValueError(f"Not a choseong: U+{c:04X}") + + +def to_hangul_jungseong_index(c): + if 0x1160 <= c <= 0x11A7: + return c - 0x1160 + if 0xD7B0 <= c <= 0xD7C6: + return c - 0xD7B0 + 72 + return None + + +def to_hangul_jongseong_index(c): + if 0x11A8 <= c <= 0x11FF: + return c - 0x11A8 + 1 + if 0xD7CB <= c <= 0xD7FB: + return c - 0xD7CB + 88 + 1 + return None + + +def get_han_initial_row(i, p, f): + if p in JUNGSEONG_I: + ret = 3 + elif p in JUNGSEONG_OEWI: + ret = 11 + elif p in JUNGSEONG_OU_COMPLEX: + ret = 7 + elif p in JUNGSEONG_OU: + ret = 5 + elif p in JUNGSEONG_EU: + ret = 9 + elif p in JUNGSEONG_YI: + ret = 13 + else: + ret = 1 + + if f != 0: + ret += 1 + + if p in JUNGSEONG_UU and i in CHOSEONG_GIYEOKS: + mapped = GIYEOK_REMAPPING.get(ret) + if mapped is None: + raise ValueError(f"Giyeok remapping failed: i={i} p={p} f={f} ret={ret}") + return mapped + return ret + + +def get_han_medial_row(i, p, f): + return 15 if f == 0 else 16 + + +def get_han_final_row(i, p, f): + return 17 if p not in JUNGSEONG_RIGHTIE else 18 + + +# Kerning constants +KEMING_BIT_MASK = [1 << b for b in [7, 6, 5, 4, 3, 2, 1, 0, 15, 14]] + +# Special characters for r+dot kerning +LOWERCASE_RS = frozenset([0x72, 0x155, 0x157, 0x159, 0x211, 0x213, 0x27c, 0x1e59, 0x1e58, 0x1e5f]) +DOTS = frozenset([0x2c, 0x2e]) + +# Devanagari internal encoding +DEVANAGARI_UNICODE_NUQTA_TABLE = [0xF0170, 0xF0171, 0xF0172, 0xF0177, 0xF017C, 0xF017D, 0xF0186, 0xF018A] + + +def to_deva_internal(c): + if 0x0915 <= c <= 0x0939: + return c - 0x0915 + 0xF0140 + if 0x0958 <= c <= 0x095F: + return DEVANAGARI_UNICODE_NUQTA_TABLE[c - 0x0958] + raise ValueError(f"No internal form for U+{c:04X}") + + +DEVANAGARI_CONSONANTS = frozenset( + list(range(0x0915, 0x093A)) + list(range(0x0958, 0x0960)) + + list(range(0x0978, 0x0980)) + list(range(0xF0140, 0xF0500)) + + list(range(0xF0106, 0xF010A)) +) + +# Sundanese internal forms +SUNDANESE_ING = 0xF0500 +SUNDANESE_ENG = 0xF0501 +SUNDANESE_EUNG = 0xF0502 +SUNDANESE_IR = 0xF0503 +SUNDANESE_ER = 0xF0504 +SUNDANESE_EUR = 0xF0505 +SUNDANESE_LU = 0xF0506 + +# Tamil constants +TAMIL_KSSA = 0xF00ED +TAMIL_SHRII = 0xF00EE +TAMIL_I = 0xBBF +TAMIL_LIGATING_CONSONANTS = [ + 0x0B95, 0x0B99, 0x0B9A, 0x0B9E, 0x0B9F, 0x0BA3, 0x0BA4, 0x0BA8, + 0x0BA9, 0x0BAA, 0x0BAE, 0x0BAF, 0x0BB0, 0x0BB1, 0x0BB2, 0x0BB3, + 0x0BB4, 0x0BB5, +] + +# Devanagari special codepoints +DEVANAGARI_VIRAMA = 0x94D +DEVANAGARI_NUQTA = 0x93C +DEVANAGARI_RA = to_deva_internal(0x930) +DEVANAGARI_YA = to_deva_internal(0x92F) +DEVANAGARI_RRA = to_deva_internal(0x931) +DEVANAGARI_VA = to_deva_internal(0x935) +DEVANAGARI_HA = to_deva_internal(0x939) +DEVANAGARI_U = 0x941 +DEVANAGARI_UU = 0x942 +DEVANAGARI_I_VOWEL = 0x093F +DEVANAGARI_II_VOWEL = 0x0940 +DEVANAGARI_RYA = 0xF0106 +DEVANAGARI_HALF_RYA = 0xF0107 +DEVANAGARI_OPEN_YA = 0xF0108 +DEVANAGARI_OPEN_HALF_YA = 0xF0109 +DEVANAGARI_ALT_HALF_SHA = 0xF010F +DEVANAGARI_EYELASH_RA = 0xF010B +DEVANAGARI_RA_SUPER = 0xF010C +DEVANAGARI_RA_SUPER_COMPLEX = 0xF010D +MARWARI_DD = 0x978 +MARWARI_LIG_DD_R = 0xF010E + +DEVANAGARI_SYLL_RU = 0xF0100 +DEVANAGARI_SYLL_RUU = 0xF0101 +DEVANAGARI_SYLL_RRU = 0xF0102 +DEVANAGARI_SYLL_RRUU = 0xF0103 +DEVANAGARI_SYLL_HU = 0xF0104 +DEVANAGARI_SYLL_HUU = 0xF0105 + +# Devanagari ligature codepoints +DEVANAGARI_LIG_K_T = 0xF01BC +DEVANAGARI_LIG_K_SS = 0xF01A1 +DEVANAGARI_LIG_J_NY = 0xF01A2 +DEVANAGARI_LIG_T_T = 0xF01A3 +DEVANAGARI_LIG_N_T = 0xF01A4 +DEVANAGARI_LIG_N_N = 0xF01A5 +DEVANAGARI_LIG_S_V = 0xF01A6 +DEVANAGARI_LIG_SS_P = 0xF01A7 +DEVANAGARI_LIG_SH_C = 0xF01A8 +DEVANAGARI_LIG_SH_N = 0xF01A9 +DEVANAGARI_LIG_SH_V = 0xF01AA +DEVANAGARI_LIG_J_Y = 0xF01AB +DEVANAGARI_LIG_J_J_Y = 0xF01AC + +MARWARI_LIG_DD_DD = 0xF01BA +MARWARI_LIG_DD_DDH = 0xF01BB +MARWARI_LIG_DD_Y = 0xF016E +MARWARI_HALFLIG_DD_Y = 0xF016F + +# Devanagari range sets for feature generation +DEVANAGARI_PRESENTATION_CONSONANTS = range(0xF0140, 0xF0230) +DEVANAGARI_PRESENTATION_CONSONANTS_HALF = range(0xF0230, 0xF0320) +DEVANAGARI_PRESENTATION_CONSONANTS_WITH_RA = range(0xF0320, 0xF0410) +DEVANAGARI_PRESENTATION_CONSONANTS_WITH_RA_HALF = range(0xF0410, 0xF0500) + +# Index functions +def _kana_index_y(c): + return 12 if 0x31F0 <= c <= 0x31FF else (c - 0x3040) // 16 + +def _unihan_index_y(c): + return (c - 0x3400) // 256 + +def _devanagari_index_y(c): + return ((c - 0x0900) if c < 0xF0000 else (c - 0xF0080)) // 16 + +def _tamil_index_y(c): + return ((c - 0x0B80) if c < 0xF0000 else (c - 0xF0040)) // 16 + +def _sundanese_index_y(c): + if c >= 0xF0500: + return (c - 0xF04B0) // 16 + if c < 0x1BC0: + return (c - 0x1B80) // 16 + return (c - 0x1C80) // 16 + + +def index_x(c): + return c % 16 + +def unihan_index_x(c): + return (c - 0x3400) % 256 + +def index_y(sheet_index, c): + """Y-index (row) for codepoint c in the given sheet.""" + return { + SHEET_ASCII_VARW: lambda: c // 16, + SHEET_UNIHAN: lambda: _unihan_index_y(c), + SHEET_EXTA_VARW: lambda: (c - 0x100) // 16, + SHEET_EXTB_VARW: lambda: (c - 0x180) // 16, + SHEET_KANA: lambda: _kana_index_y(c), + SHEET_CJK_PUNCT: lambda: (c - 0x3000) // 16, + SHEET_CYRILIC_VARW: lambda: (c - 0x400) // 16, + SHEET_HALFWIDTH_FULLWIDTH_VARW: lambda: (c - 0xFF00) // 16, + SHEET_UNI_PUNCT_VARW: lambda: (c - 0x2000) // 16, + SHEET_GREEK_VARW: lambda: (c - 0x370) // 16, + SHEET_THAI_VARW: lambda: (c - 0xE00) // 16, + SHEET_CUSTOM_SYM: lambda: (c - 0xE000) // 16, + SHEET_HAYEREN_VARW: lambda: (c - 0x530) // 16, + SHEET_KARTULI_VARW: lambda: (c - 0x10D0) // 16, + SHEET_IPA_VARW: lambda: (c - 0x250) // 16, + SHEET_RUNIC: lambda: (c - 0x16A0) // 16, + SHEET_LATIN_EXT_ADD_VARW: lambda: (c - 0x1E00) // 16, + SHEET_BULGARIAN_VARW: lambda: (c - 0xF0000) // 16, + SHEET_SERBIAN_VARW: lambda: (c - 0xF0060) // 16, + SHEET_TSALAGI_VARW: lambda: (c - 0x13A0) // 16, + SHEET_PHONETIC_EXT_VARW: lambda: (c - 0x1D00) // 16, + SHEET_DEVANAGARI_VARW: lambda: _devanagari_index_y(c), + SHEET_KARTULI_CAPS_VARW: lambda: (c - 0x1C90) // 16, + SHEET_DIACRITICAL_MARKS_VARW: lambda: (c - 0x300) // 16, + SHEET_GREEK_POLY_VARW: lambda: (c - 0x1F00) // 16, + SHEET_EXTC_VARW: lambda: (c - 0x2C60) // 16, + SHEET_EXTD_VARW: lambda: (c - 0xA720) // 16, + SHEET_CURRENCIES_VARW: lambda: (c - 0x20A0) // 16, + SHEET_INTERNAL_VARW: lambda: (c - 0xFFE00) // 16, + SHEET_LETTERLIKE_MATHS_VARW: lambda: (c - 0x2100) // 16, + SHEET_ENCLOSED_ALPHNUM_SUPL_VARW: lambda: (c - 0x1F100) // 16, + SHEET_TAMIL_VARW: lambda: _tamil_index_y(c), + SHEET_BENGALI_VARW: lambda: (c - 0x980) // 16, + SHEET_BRAILLE_VARW: lambda: (c - 0x2800) // 16, + SHEET_SUNDANESE_VARW: lambda: _sundanese_index_y(c), + SHEET_DEVANAGARI2_INTERNAL_VARW: lambda: (c - 0xF0110) // 16, + SHEET_CODESTYLE_ASCII_VARW: lambda: (c - 0xF0520) // 16, + SHEET_ALPHABETIC_PRESENTATION_FORMS: lambda: (c - 0xFB00) // 16, + SHEET_HENTAIGANA_VARW: lambda: (c - 0x1B000) // 16, + SHEET_HANGUL: lambda: 0, + }.get(sheet_index, lambda: c // 16)() diff --git a/OTFbuild/src/net/torvald/otfbuild/DevanagariTamilProcessor.kt b/OTFbuild/src/net/torvald/otfbuild/DevanagariTamilProcessor.kt deleted file mode 100644 index 58db972..0000000 --- a/OTFbuild/src/net/torvald/otfbuild/DevanagariTamilProcessor.kt +++ /dev/null @@ -1,133 +0,0 @@ -package net.torvald.otfbuild - -/** - * Ensures all Devanagari, Tamil, Sundanese, and Alphabetic Presentation Forms - * PUA glyphs are included in the font. Since BitsNPicas doesn't support OpenType - * GSUB/GPOS features, complex text shaping must be done by the application. - * - * All the relevant PUA codepoints are already in the sprite sheets and extracted - * by GlyphSheetParser. This processor: - * 1. Verifies that key PUA ranges have been loaded - * 2. Ensures Unicode pre-composed forms (U+0958–U+095F) map correctly - * 3. Documents the mapping for reference - * - * The runtime normalise() function handles the actual Unicode → PUA mapping, - * but since we can't put GSUB tables into the KBITX/TTF, applications must - * use the PUA codepoints directly, or perform their own normalisation. - */ -class DevanagariTamilProcessor { - - /** - * Verify that key PUA glyphs exist in the extracted set. - * Returns a set of codepoints that should be included but are missing. - */ - fun verify(glyphs: Map): Set { - val missing = mutableSetOf() - - // Devanagari special syllables - val devanagariSpecials = listOf( - 0xF0100, // Ru - 0xF0101, // Ruu - 0xF0102, // RRu - 0xF0103, // RRuu - 0xF0104, // Hu - 0xF0105, // Huu - 0xF0106, // RYA - 0xF0107, // Half-RYA - 0xF0108, // Open YA - 0xF0109, // Open Half-YA - 0xF010B, // Eyelash RA - 0xF010C, // RA superscript - 0xF010D, // RA superscript (complex) - 0xF010E, // DDRA (Marwari) - 0xF010F, // Alt Half SHA - ) - - // Devanagari presentation consonants (full forms) - val devaPresentation = (0xF0140..0xF022F).toList() - // Devanagari presentation consonants (half forms) - val devaHalf = (0xF0230..0xF031F).toList() - // Devanagari presentation consonants (with RA) - val devaRa = (0xF0320..0xF040F).toList() - // Devanagari presentation consonants (with RA, half forms) - val devaRaHalf = (0xF0410..0xF04FF).toList() - - // Devanagari II variant forms - val devaII = (0xF0110..0xF012F).toList() - - // Devanagari named ligatures - val devaLigatures = listOf( - 0xF01A1, // K.SS - 0xF01A2, // J.NY - 0xF01A3, // T.T - 0xF01A4, // N.T - 0xF01A5, // N.N - 0xF01A6, // S.V - 0xF01A7, // SS.P - 0xF01A8, // SH.C - 0xF01A9, // SH.N - 0xF01AA, // SH.V - 0xF01AB, // J.Y - 0xF01AC, // J.J.Y - 0xF01BC, // K.T - // D-series ligatures - 0xF01B0, 0xF01B1, 0xF01B2, 0xF01B3, 0xF01B4, - 0xF01B5, 0xF01B6, 0xF01B7, 0xF01B8, 0xF01B9, - // Marwari - 0xF01BA, 0xF01BB, - // Extended ligatures - 0xF01BD, 0xF01BE, 0xF01BF, - 0xF01C0, 0xF01C1, 0xF01C2, 0xF01C3, 0xF01C4, 0xF01C5, - 0xF01C6, 0xF01C7, 0xF01C8, 0xF01C9, 0xF01CA, 0xF01CB, - 0xF01CD, 0xF01CE, 0xF01CF, - 0xF01D0, 0xF01D1, 0xF01D2, 0xF01D3, 0xF01D4, 0xF01D5, - 0xF01D6, 0xF01D7, 0xF01D8, 0xF01D9, 0xF01DA, - 0xF01DB, 0xF01DC, 0xF01DD, 0xF01DE, 0xF01DF, - 0xF01E0, 0xF01E1, 0xF01E2, 0xF01E3, - ) - - // Tamil ligatures - val tamilLigatures = listOf( - 0xF00C0, 0xF00C1, // TTA+I, TTA+II - 0xF00ED, // KSSA - 0xF00EE, // SHRII - 0xF00F0, 0xF00F1, 0xF00F2, 0xF00F3, 0xF00F4, 0xF00F5, // consonant+I - ) + (0xF00C2..0xF00D3).toList() + // consonant+U - (0xF00D4..0xF00E5).toList() // consonant+UU - - // Sundanese internal forms - val sundanese = listOf( - 0xF0500, // ING - 0xF0501, // ENG - 0xF0502, // EUNG - 0xF0503, // IR - 0xF0504, // ER - 0xF0505, // EUR - 0xF0506, // LU - ) - - // Alphabetic Presentation Forms (already in sheet 38) - // FB00–FB06 (Latin ligatures), FB13–FB17 (Armenian ligatures) - - // Check all expected ranges - val allExpected = devanagariSpecials + devaPresentation + devaHalf + devaRa + devaRaHalf + - devaII + devaLigatures + tamilLigatures + sundanese - - for (cp in allExpected) { - if (!glyphs.containsKey(cp)) { - missing.add(cp) - } - } - - if (missing.isNotEmpty()) { - println(" [DevanagariTamilProcessor] ${missing.size} expected PUA glyphs missing") - // Only warn for the first few - missing.take(10).forEach { println(" Missing: U+${it.toString(16).uppercase().padStart(5, '0')}") } - if (missing.size > 10) println(" ... and ${missing.size - 10} more") - } else { - println(" [DevanagariTamilProcessor] All expected PUA glyphs present") - } - - return missing - } -} diff --git a/OTFbuild/src/net/torvald/otfbuild/GlyphSheetParser.kt b/OTFbuild/src/net/torvald/otfbuild/GlyphSheetParser.kt deleted file mode 100644 index bcd9998..0000000 --- a/OTFbuild/src/net/torvald/otfbuild/GlyphSheetParser.kt +++ /dev/null @@ -1,321 +0,0 @@ -package net.torvald.otfbuild - -import com.kreative.bitsnpicas.BitmapFontGlyph -import java.io.File - -/** - * Glyph properties extracted from tag column. - * Mirrors GlyphProps from the runtime but is standalone. - */ -data class ExtractedGlyphProps( - val width: Int, - val isLowHeight: Boolean = false, - val nudgeX: Int = 0, - val nudgeY: Int = 0, - val alignWhere: Int = 0, - val writeOnTop: Int = -1, - val stackWhere: Int = 0, - val hasKernData: Boolean = false, - val isKernYtype: Boolean = false, - val kerningMask: Int = 255, - val directiveOpcode: Int = 0, - val directiveArg1: Int = 0, - val directiveArg2: Int = 0, - val extInfo: IntArray = IntArray(15), -) { - companion object { - const val ALIGN_LEFT = 0 - const val ALIGN_RIGHT = 1 - const val ALIGN_CENTRE = 2 - const val ALIGN_BEFORE = 3 - - const val STACK_UP = 0 - const val STACK_DOWN = 1 - const val STACK_BEFORE_N_AFTER = 2 - const val STACK_UP_N_DOWN = 3 - const val STACK_DONT = 4 - } - - fun requiredExtInfoCount(): Int = - if (stackWhere == STACK_BEFORE_N_AFTER) 2 - else if (directiveOpcode in 0b10000_000..0b10000_111) 7 - else 0 - - fun isPragma(pragma: String) = when (pragma) { - "replacewith" -> directiveOpcode in 0b10000_000..0b10000_111 - else -> false - } - - val isIllegal: Boolean get() = directiveOpcode == 255 -} - -data class ExtractedGlyph( - val codepoint: Int, - val props: ExtractedGlyphProps, - val bitmap: Array, // [row][col], 0 or -1(0xFF) -) - -/** - * Extracts glyph bitmaps and properties from TGA sprite sheets. - * Ported from TerrarumSansBitmap.buildWidthTable() and related methods. - */ -class GlyphSheetParser(private val assetsDir: String) { - - private fun Boolean.toInt() = if (this) 1 else 0 - /** @return 32-bit number: if alpha channel is zero, return 0; else return the original value */ - private fun Int.tagify() = if (this and 0xFF == 0) 0 else this - - /** - * Parse all sheets and return a map of codepoint -> (props, bitmap). - */ - fun parseAll(): Map { - val result = HashMap(65536) - - SheetConfig.fileList.forEachIndexed { sheetIndex, filename -> - val file = File(assetsDir, filename) - if (!file.exists()) { - println(" [SKIP] $filename not found") - return@forEachIndexed - } - - val isVariable = SheetConfig.isVariable(filename) - val isXYSwapped = SheetConfig.isXYSwapped(filename) - val isExtraWide = SheetConfig.isExtraWide(filename) - val cellW = SheetConfig.getCellWidth(sheetIndex) - val cellH = SheetConfig.getCellHeight(sheetIndex) - val cols = SheetConfig.getColumns(sheetIndex) - - val image = TgaReader.read(file) - - val statusParts = mutableListOf() - if (isVariable) statusParts.add("VARIABLE") - if (isXYSwapped) statusParts.add("XYSWAP") - if (isExtraWide) statusParts.add("EXTRAWIDE") - if (statusParts.isEmpty()) statusParts.add("STATIC") - println(" Loading [${statusParts.joinToString()}] $filename") - - if (isVariable) { - parseVariableSheet(image, sheetIndex, cellW, cellH, cols, isXYSwapped, result) - } else { - parseFixedSheet(image, sheetIndex, cellW, cellH, cols, result) - } - } - - // Add fixed-width overrides - addFixedWidthOverrides(result) - - return result - } - - /** - * Parse a variable-width sheet: extract tag column for properties, bitmap for glyph. - */ - private fun parseVariableSheet( - image: TgaImage, - sheetIndex: Int, - cellW: Int, - cellH: Int, - cols: Int, - isXYSwapped: Boolean, - result: HashMap - ) { - val codeRangeList = SheetConfig.codeRange[sheetIndex] - val binaryCodeOffset = cellW - 1 // tag column is last pixel column of cell - - codeRangeList.forEachIndexed { index, code -> - val cellX: Int - val cellY: Int - - if (isXYSwapped) { - cellX = (index / cols) * cellW // row becomes X - cellY = (index % cols) * cellH // col becomes Y - } else { - cellX = (index % cols) * cellW - cellY = (index / cols) * cellH - } - - val codeStartX = cellX + binaryCodeOffset - val codeStartY = cellY - - // Parse tag column - val width = (0..4).fold(0) { acc, y -> - acc or ((image.getPixel(codeStartX, codeStartY + y).and(0xFF) != 0).toInt() shl y) - } - val isLowHeight = image.getPixel(codeStartX, codeStartY + 5).and(0xFF) != 0 - - // Kerning data - val kerningBit1 = image.getPixel(codeStartX, codeStartY + 6).tagify() - val kerningBit2 = image.getPixel(codeStartX, codeStartY + 7).tagify() - val kerningBit3 = image.getPixel(codeStartX, codeStartY + 8).tagify() - var isKernYtype = (kerningBit1 and 0x80000000.toInt()) != 0 - var kerningMask = kerningBit1.ushr(8).and(0xFFFFFF) - val hasKernData = kerningBit1 and 0xFF != 0 - if (!hasKernData) { - isKernYtype = false - kerningMask = 255 - } - - // Compiler directives - val compilerDirectives = image.getPixel(codeStartX, codeStartY + 9).tagify() - val directiveOpcode = compilerDirectives.ushr(24).and(255) - val directiveArg1 = compilerDirectives.ushr(16).and(255) - val directiveArg2 = compilerDirectives.ushr(8).and(255) - - // Nudge - val nudgingBits = image.getPixel(codeStartX, codeStartY + 10).tagify() - val nudgeX = nudgingBits.ushr(24).toByte().toInt() - val nudgeY = nudgingBits.ushr(16).toByte().toInt() - - // Diacritics anchors (we don't store them in ExtractedGlyphProps for now but could) - // For alignment and width, they are useful during composition but not in final output - - // Alignment - val alignWhere = (0..1).fold(0) { acc, y -> - acc or ((image.getPixel(codeStartX, codeStartY + y + 15).and(0xFF) != 0).toInt() shl y) - } - - // Write on top - var writeOnTop = image.getPixel(codeStartX, codeStartY + 17) // NO .tagify() - if (writeOnTop and 0xFF == 0) writeOnTop = -1 - else { - writeOnTop = if (writeOnTop.ushr(8) == 0xFFFFFF) 0 else writeOnTop.ushr(28) and 15 - } - - // Stack where - val stackWhere0 = image.getPixel(codeStartX, codeStartY + 18).tagify() - val stackWhere1 = image.getPixel(codeStartX, codeStartY + 19).tagify() - val stackWhere = if (stackWhere0 == 0x00FF00FF && stackWhere1 == 0x00FF00FF) - ExtractedGlyphProps.STACK_DONT - else (0..1).fold(0) { acc, y -> - acc or ((image.getPixel(codeStartX, codeStartY + y + 18).and(0xFF) != 0).toInt() shl y) - } - - val extInfo = IntArray(15) - val props = ExtractedGlyphProps( - width, isLowHeight, nudgeX, nudgeY, alignWhere, writeOnTop, stackWhere, - hasKernData, isKernYtype, kerningMask, directiveOpcode, directiveArg1, directiveArg2, extInfo - ) - - // Parse extInfo if needed - val extCount = props.requiredExtInfoCount() - if (extCount > 0) { - for (x in 0 until extCount) { - var info = 0 - for (y in 0..19) { - if (image.getPixel(cellX + x, cellY + y).and(0xFF) != 0) { - info = info or (1 shl y) - } - } - extInfo[x] = info - } - } - - // Extract glyph bitmap: all pixels in cell except tag column - val bitmapW = cellW - 1 // exclude tag column - val bitmap = Array(cellH) { row -> - ByteArray(bitmapW) { col -> - val px = image.getPixel(cellX + col, cellY + row) - if (px and 0xFF != 0) 0xFF.toByte() else 0 - } - } - - result[code] = ExtractedGlyph(code, props, bitmap) - } - } - - /** - * Parse a fixed-width sheet (Hangul, Unihan, Runic, Custom Sym). - */ - private fun parseFixedSheet( - image: TgaImage, - sheetIndex: Int, - cellW: Int, - cellH: Int, - cols: Int, - result: HashMap - ) { - val codeRangeList = SheetConfig.codeRange[sheetIndex] - val fixedWidth = when (sheetIndex) { - SheetConfig.SHEET_CUSTOM_SYM -> 20 - SheetConfig.SHEET_HANGUL -> SheetConfig.W_HANGUL_BASE - SheetConfig.SHEET_RUNIC -> 9 - SheetConfig.SHEET_UNIHAN -> SheetConfig.W_UNIHAN - else -> cellW - } - - codeRangeList.forEachIndexed { index, code -> - val cellX = (index % cols) * cellW - val cellY = (index / cols) * cellH - - val bitmap = Array(cellH) { row -> - ByteArray(cellW) { col -> - val px = image.getPixel(cellX + col, cellY + row) - if (px and 0xFF != 0) 0xFF.toByte() else 0 - } - } - - val props = ExtractedGlyphProps(fixedWidth) - result[code] = ExtractedGlyph(code, props, bitmap) - } - } - - /** - * Apply fixed-width overrides as in buildWidthTableFixed(). - */ - private fun addFixedWidthOverrides(result: HashMap) { - // Hangul compat jamo - SheetConfig.codeRangeHangulCompat.forEach { code -> - if (!result.containsKey(code)) { - result[code] = ExtractedGlyph(code, ExtractedGlyphProps(SheetConfig.W_HANGUL_BASE), emptyBitmap()) - } - } - - // Zero-width ranges - (0xD800..0xDFFF).forEach { result[it] = ExtractedGlyph(it, ExtractedGlyphProps(0), emptyBitmap()) } - (0x100000..0x10FFFF).forEach { result[it] = ExtractedGlyph(it, ExtractedGlyphProps(0), emptyBitmap()) } - (0xFFFA0..0xFFFFF).forEach { result[it] = ExtractedGlyph(it, ExtractedGlyphProps(0), emptyBitmap()) } - - // Insular letter - result[0x1D79]?.let { /* already in sheet */ } ?: run { - result[0x1D79] = ExtractedGlyph(0x1D79, ExtractedGlyphProps(9), emptyBitmap()) - } - - // Replacement character at U+007F - result[0x7F]?.let { existing -> - result[0x7F] = existing.copy(props = existing.props.copy(width = 15)) - } - - // Null char - result[0] = ExtractedGlyph(0, ExtractedGlyphProps(0), emptyBitmap()) - } - - private fun emptyBitmap() = Array(SheetConfig.H) { ByteArray(SheetConfig.W_VAR_INIT) } - - /** - * Extracts raw Hangul jamo bitmaps from the Hangul sheet for composition. - * Returns a function: (index, row) -> bitmap - */ - fun getHangulJamoBitmaps(): (Int, Int) -> Array { - val filename = SheetConfig.fileList[SheetConfig.SHEET_HANGUL] - val file = File(assetsDir, filename) - if (!file.exists()) { - println(" [WARNING] Hangul sheet not found") - return { _, _ -> Array(SheetConfig.H) { ByteArray(SheetConfig.W_HANGUL_BASE) } } - } - - val image = TgaReader.read(file) - val cellW = SheetConfig.W_HANGUL_BASE - val cellH = SheetConfig.H - - return { index: Int, row: Int -> - val cellX = index * cellW - val cellY = row * cellH - Array(cellH) { r -> - ByteArray(cellW) { c -> - val px = image.getPixel(cellX + c, cellY + r) - if (px and 0xFF != 0) 0xFF.toByte() else 0 - } - } - } - } -} diff --git a/OTFbuild/src/net/torvald/otfbuild/HangulCompositor.kt b/OTFbuild/src/net/torvald/otfbuild/HangulCompositor.kt deleted file mode 100644 index 5c0da60..0000000 --- a/OTFbuild/src/net/torvald/otfbuild/HangulCompositor.kt +++ /dev/null @@ -1,124 +0,0 @@ -package net.torvald.otfbuild - -import com.kreative.bitsnpicas.BitmapFontGlyph - -/** - * Composes 11,172 Hangul syllables (U+AC00–U+D7A3) from jamo sprite pieces. - * Also composes Hangul Compatibility Jamo (U+3130–U+318F). - * - * Ported from TerrarumSansBitmap.kt Hangul assembly logic. - */ -class HangulCompositor(private val parser: GlyphSheetParser) { - - private val getJamoBitmap = parser.getHangulJamoBitmaps() - private val cellW = SheetConfig.W_HANGUL_BASE - private val cellH = SheetConfig.H - - /** - * Compose all Hangul syllables and compatibility jamo. - * @return Map of codepoint to BitmapFontGlyph - */ - fun compose(): Map> { - val result = HashMap>(12000) - - // Compose Hangul Compatibility Jamo (U+3130–U+318F) - // These are standalone jamo from row 0 of the sheet - for (c in 0x3130..0x318F) { - val index = c - 0x3130 - val bitmap = getJamoBitmap(index, 0) - val glyph = bitmapToGlyph(bitmap, cellW, cellH) - result[c] = glyph to cellW - } - - // Compose 11,172 Hangul syllables (U+AC00–U+D7A3) - println(" Composing 11,172 Hangul syllables...") - for (c in 0xAC00..0xD7A3) { - val cInt = c - 0xAC00 - val indexCho = cInt / (SheetConfig.JUNG_COUNT * SheetConfig.JONG_COUNT) - val indexJung = cInt / SheetConfig.JONG_COUNT % SheetConfig.JUNG_COUNT - val indexJong = cInt % SheetConfig.JONG_COUNT // 0 = no jongseong - - // Map to jamo codepoints - val choCP = 0x1100 + indexCho - val jungCP = 0x1161 + indexJung - val jongCP = if (indexJong > 0) 0x11A8 + indexJong - 1 else 0 - - // Get sheet indices - val iCho = SheetConfig.toHangulChoseongIndex(choCP) - val iJung = SheetConfig.toHangulJungseongIndex(jungCP) ?: 0 - val iJong = if (jongCP != 0) SheetConfig.toHangulJongseongIndex(jongCP) ?: 0 else 0 - - // Get row positions - val choRow = SheetConfig.getHanInitialRow(iCho, iJung, iJong) - val jungRow = SheetConfig.getHanMedialRow(iCho, iJung, iJong) - val jongRow = SheetConfig.getHanFinalRow(iCho, iJung, iJong) - - // Get jamo bitmaps - val choBitmap = getJamoBitmap(iCho, choRow) - val jungBitmap = getJamoBitmap(iJung, jungRow) - - // Compose - val composed = composeBitmaps(choBitmap, jungBitmap, cellW, cellH) - if (indexJong > 0) { - val jongBitmap = getJamoBitmap(iJong, jongRow) - composeBitmapInto(composed, jongBitmap, cellW, cellH) - } - - // Determine advance width - val advanceWidth = if (iJung in SheetConfig.hangulPeaksWithExtraWidth) cellW + 1 else cellW - - val glyph = bitmapToGlyph(composed, advanceWidth, cellH) - result[c] = glyph to advanceWidth - } - - println(" Hangul composition done: ${result.size} glyphs") - return result - } - - /** - * Compose two bitmaps by OR-ing them together. - */ - private fun composeBitmaps(a: Array, b: Array, w: Int, h: Int): Array { - val result = Array(h) { row -> - ByteArray(w) { col -> - val av = a.getOrNull(row)?.getOrNull(col)?.toInt()?.and(0xFF) ?: 0 - val bv = b.getOrNull(row)?.getOrNull(col)?.toInt()?.and(0xFF) ?: 0 - if (av != 0 || bv != 0) 0xFF.toByte() else 0 - } - } - return result - } - - /** - * OR a bitmap into an existing one. - */ - private fun composeBitmapInto(target: Array, source: Array, w: Int, h: Int) { - for (row in 0 until minOf(h, target.size, source.size)) { - for (col in 0 until minOf(w, target[row].size, source[row].size)) { - if (source[row][col].toInt() and 0xFF != 0) { - target[row][col] = 0xFF.toByte() - } - } - } - } - - companion object { - /** - * Convert a byte[][] bitmap to BitmapFontGlyph. - */ - fun bitmapToGlyph(bitmap: Array, advanceWidth: Int, cellH: Int): BitmapFontGlyph { - val h = bitmap.size - val w = if (h > 0) bitmap[0].size else 0 - val glyphData = Array(h) { row -> - ByteArray(w) { col -> bitmap[row][col] } - } - // BitmapFontGlyph(byte[][] glyph, int offset, int width, int ascent) - // offset = x offset (left side bearing), width = advance width, ascent = baseline from top - val glyph = BitmapFontGlyph() - glyph.setGlyph(glyphData) - glyph.setXY(0, cellH) // y = ascent from top of em square to baseline - glyph.setCharacterWidth(advanceWidth) - return glyph - } - } -} diff --git a/OTFbuild/src/net/torvald/otfbuild/KbitxBuilder.kt b/OTFbuild/src/net/torvald/otfbuild/KbitxBuilder.kt deleted file mode 100644 index ba26ccd..0000000 --- a/OTFbuild/src/net/torvald/otfbuild/KbitxBuilder.kt +++ /dev/null @@ -1,218 +0,0 @@ -package net.torvald.otfbuild - -import com.kreative.bitsnpicas.BitmapFont -import com.kreative.bitsnpicas.BitmapFontGlyph -import com.kreative.bitsnpicas.Font -import com.kreative.bitsnpicas.exporter.KbitxBitmapFontExporter -import java.io.File - -/** - * Orchestrates the entire font building pipeline: - * 1. Parse all TGA sheets - * 2. Create BitmapFont with metrics - * 3. Add all extracted glyphs - * 4. Compose Hangul syllables - * 5. Verify Devanagari/Tamil PUA glyphs - * 6. Generate kerning pairs - * 7. Export to KBITX - */ -class KbitxBuilder(private val assetsDir: String) { - - fun build(outputPath: String) { - println("=== Terrarum Sans Bitmap OTF Builder ===") - println("Assets: $assetsDir") - println("Output: $outputPath") - println() - - // 1. Create BitmapFont with metrics - println("[1/7] Creating BitmapFont...") - val font = BitmapFont( - 16, // emAscent: baseline to top of em square - 4, // emDescent: baseline to bottom of em square - 16, // lineAscent - 4, // lineDescent - 8, // xHeight - 12, // capHeight - 0 // lineGap - ) - - // Set font names - font.setName(Font.NAME_FAMILY, "Terrarum Sans Bitmap") - font.setName(Font.NAME_STYLE, "Regular") - font.setName(Font.NAME_VERSION, "Version 1.0") - font.setName(Font.NAME_FAMILY_AND_STYLE, "Terrarum Sans Bitmap Regular") - font.setName(Font.NAME_COPYRIGHT, "Copyright (c) 2017-2026 see CONTRIBUTORS.txt") - font.setName(Font.NAME_DESCRIPTION, "Bitmap font for Terrarum game engine") - font.setName(Font.NAME_LICENSE_DESCRIPTION, "MIT License") - - // 2. Parse all TGA sheets - println("[2/7] Parsing TGA sprite sheets...") - val parser = GlyphSheetParser(assetsDir) - val allGlyphs = parser.parseAll() - println(" Parsed ${allGlyphs.size} glyphs from sheets") - - // 3. Add all extracted glyphs to BitmapFont - println("[3/7] Adding glyphs to BitmapFont...") - var addedCount = 0 - var skippedCount = 0 - - for ((codepoint, extracted) in allGlyphs) { - // Skip zero-width control characters and surrogates — don't add empty glyphs - if (extracted.props.width <= 0 && codepoint != 0x7F) { - // Still add zero-width glyphs that have actual bitmap data - val hasPixels = extracted.bitmap.any { row -> row.any { it.toInt() and 0xFF != 0 } } - if (!hasPixels) { - skippedCount++ - continue - } - } - - // Skip internal-only codepoints that would cause issues - if (codepoint in 0x100000..0x10FFFF || codepoint in 0xD800..0xDFFF) { - skippedCount++ - continue - } - - val glyph = extractedToBitmapFontGlyph(extracted) - font.putCharacter(codepoint, glyph) - addedCount++ - } - println(" Added $addedCount glyphs, skipped $skippedCount") - - // 4. Compose Hangul syllables - println("[4/7] Composing Hangul syllables...") - val hangulCompositor = HangulCompositor(parser) - val hangulGlyphs = hangulCompositor.compose() - for ((codepoint, pair) in hangulGlyphs) { - val (glyph, _) = pair - font.putCharacter(codepoint, glyph) - } - println(" Added ${hangulGlyphs.size} Hangul glyphs") - - // 5. Verify Devanagari/Tamil PUA - println("[5/7] Verifying Devanagari/Tamil PUA glyphs...") - val devaTamilProcessor = DevanagariTamilProcessor() - devaTamilProcessor.verify(allGlyphs) - - // 6. Generate kerning pairs - println("[6/7] Generating kerning pairs...") - val kemingMachine = KemingMachine() - val kernPairs = kemingMachine.generateKerningPairs(allGlyphs) - for ((pair, offset) in kernPairs) { - font.setKernPair(pair, offset) - } - println(" Added ${kernPairs.size} kerning pairs") - - // 7. Add spacing characters - println("[7/7] Finalising...") - addSpacingCharacters(font, allGlyphs) - - // Add .notdef from U+007F (replacement character) - allGlyphs[0x7F]?.let { - val notdefGlyph = extractedToBitmapFontGlyph(it) - font.putNamedGlyph(".notdef", notdefGlyph) - } - - // Contract glyphs to trim whitespace - font.contractGlyphs() - - // Auto-fill any missing name fields - font.autoFillNames() - - // Count glyphs - val totalGlyphs = font.characters(false).size - println() - println("Total glyph count: $totalGlyphs") - - // Export - println("Exporting to KBITX: $outputPath") - val exporter = KbitxBitmapFontExporter() - exporter.exportFontToFile(font, File(outputPath)) - - println("Done!") - } - - private fun extractedToBitmapFontGlyph(extracted: ExtractedGlyph): BitmapFontGlyph { - val bitmap = extracted.bitmap - val props = extracted.props - val h = bitmap.size - val w = if (h > 0) bitmap[0].size else 0 - - val glyphData = Array(h) { row -> - ByteArray(w) { col -> bitmap[row][col] } - } - - val glyph = BitmapFontGlyph() - glyph.setGlyph(glyphData) - - // y = distance from top of glyph to baseline - // For most glyphs this is 16 (baseline at row 16 from top in a 20px cell) - // For Unihan: baseline at row 14 (offset by 2 from the 16px cell centred in 20px) - val sheetIndex = getSheetIndex(extracted.codepoint) - val baseline = when (sheetIndex) { - SheetConfig.SHEET_UNIHAN -> 14 - SheetConfig.SHEET_CUSTOM_SYM -> 16 - else -> 16 - } - glyph.setXY(0, baseline) - glyph.setCharacterWidth(props.width) - - return glyph - } - - private fun getSheetIndex(codepoint: Int): Int { - // Check fixed sheets first - if (codepoint in 0xF0000..0xF005F) return SheetConfig.SHEET_BULGARIAN_VARW - if (codepoint in 0xF0060..0xF00BF) return SheetConfig.SHEET_SERBIAN_VARW - - for (i in SheetConfig.codeRange.indices.reversed()) { - if (codepoint in SheetConfig.codeRange[i]) return i - } - return SheetConfig.SHEET_UNKNOWN - } - - /** - * Add spacing characters as empty glyphs with correct advance widths. - */ - private fun addSpacingCharacters(font: BitmapFont, allGlyphs: Map) { - val figWidth = allGlyphs[0x30]?.props?.width ?: 9 - val punctWidth = allGlyphs[0x2E]?.props?.width ?: 6 - val em = 12 + 1 // as defined in the original - - fun Int.halveWidth() = this / 2 + 1 - - val spacings = mapOf( - SheetConfig.NQSP to em.halveWidth(), - SheetConfig.MQSP to em, - SheetConfig.ENSP to em.halveWidth(), - SheetConfig.EMSP to em, - SheetConfig.THREE_PER_EMSP to (em / 3 + 1), - SheetConfig.QUARTER_EMSP to (em / 4 + 1), - SheetConfig.SIX_PER_EMSP to (em / 6 + 1), - SheetConfig.FSP to figWidth, - SheetConfig.PSP to punctWidth, - SheetConfig.THSP to 2, - SheetConfig.HSP to 1, - SheetConfig.ZWSP to 0, - SheetConfig.ZWNJ to 0, - SheetConfig.ZWJ to 0, - SheetConfig.SHY to 0, - ) - - for ((cp, width) in spacings) { - val glyph = BitmapFontGlyph() - glyph.setGlyph(Array(SheetConfig.H) { ByteArray(0) }) - glyph.setXY(0, 16) - glyph.setCharacterWidth(width) - font.putCharacter(cp, glyph) - } - - // NBSP: same width as space - val spaceWidth = allGlyphs[32]?.props?.width ?: 7 - val nbspGlyph = BitmapFontGlyph() - nbspGlyph.setGlyph(Array(SheetConfig.H) { ByteArray(0) }) - nbspGlyph.setXY(0, 16) - nbspGlyph.setCharacterWidth(spaceWidth) - font.putCharacter(SheetConfig.NBSP, nbspGlyph) - } -} diff --git a/OTFbuild/src/net/torvald/otfbuild/KemingMachine.kt b/OTFbuild/src/net/torvald/otfbuild/KemingMachine.kt deleted file mode 100644 index b168e3b..0000000 --- a/OTFbuild/src/net/torvald/otfbuild/KemingMachine.kt +++ /dev/null @@ -1,120 +0,0 @@ -package net.torvald.otfbuild - -import com.kreative.bitsnpicas.GlyphPair - -/** - * Generates kerning pairs from shape rules. - * Ported from TerrarumSansBitmap.kt "The Keming Machine" section. - */ -class KemingMachine { - - private class Ing(val s: String) { - private var careBits = 0 - private var ruleBits = 0 - - init { - s.forEachIndexed { index, char -> - when (char) { - '@' -> { - careBits = careBits or SheetConfig.kemingBitMask[index] - ruleBits = ruleBits or SheetConfig.kemingBitMask[index] - } - '`' -> { - careBits = careBits or SheetConfig.kemingBitMask[index] - } - } - } - } - - fun matches(shapeBits: Int) = ((shapeBits and careBits) == ruleBits) - - override fun toString() = "C:${careBits.toString(2).padStart(16, '0')}-R:${ruleBits.toString(2).padStart(16, '0')}" - } - - private data class Kem(val first: Ing, val second: Ing, val bb: Int = 2, val yy: Int = 1) - - private val kerningRules: List - - init { - val baseRules = listOf( - Kem(Ing("_`_@___`__"), Ing("`_`___@___")), - Kem(Ing("_@_`___`__"), Ing("`_________")), - Kem(Ing("_@_@___`__"), Ing("`___@_@___"), 1, 1), - Kem(Ing("_@_@_`_`__"), Ing("`_____@___")), - Kem(Ing("___`_`____"), Ing("`___@_`___")), - Kem(Ing("___`_`____"), Ing("`_@___`___")), - ) - - // Automatically create mirrored versions - val mirrored = baseRules.map { rule -> - val left = rule.first.s - val right = rule.second.s - val newLeft = StringBuilder() - val newRight = StringBuilder() - - for (c in left.indices step 2) { - newLeft.append(right[c + 1]).append(right[c]) - newRight.append(left[c + 1]).append(left[c]) - } - - Kem(Ing(newLeft.toString()), Ing(newRight.toString()), rule.bb, rule.yy) - } - - kerningRules = baseRules + mirrored - } - - /** - * Generate kerning pairs from all glyphs that have kerning data. - * @return Map of GlyphPair to kern offset (negative values = tighter) - */ - fun generateKerningPairs(glyphs: Map): Map { - val result = HashMap() - - // Collect all codepoints with kerning data - val kernableGlyphs = glyphs.filter { it.value.props.hasKernData } - - if (kernableGlyphs.isEmpty()) { - println(" [KemingMachine] No glyphs with kern data found") - return result - } - - println(" [KemingMachine] ${kernableGlyphs.size} glyphs with kern data") - - // Special rule: lowercase r + dot - for (r in SheetConfig.lowercaseRs) { - for (d in SheetConfig.dots) { - if (glyphs.containsKey(r) && glyphs.containsKey(d)) { - result[GlyphPair(r, d)] = -1 - } - } - } - - // Apply kerning rules to all pairs - val kernCodes = kernableGlyphs.keys.toIntArray() - var pairsFound = 0 - - for (leftCode in kernCodes) { - val leftProps = kernableGlyphs[leftCode]!!.props - val maskL = leftProps.kerningMask - - for (rightCode in kernCodes) { - val rightProps = kernableGlyphs[rightCode]!!.props - val maskR = rightProps.kerningMask - - for (rule in kerningRules) { - if (rule.first.matches(maskL) && rule.second.matches(maskR)) { - val contraction = if (leftProps.isKernYtype || rightProps.isKernYtype) rule.yy else rule.bb - if (contraction > 0) { - result[GlyphPair(leftCode, rightCode)] = -contraction - pairsFound++ - } - break // first matching rule wins - } - } - } - } - - println(" [KemingMachine] Generated $pairsFound kerning pairs (+ ${SheetConfig.lowercaseRs.size * SheetConfig.dots.size} r-dot pairs)") - return result - } -} diff --git a/OTFbuild/src/net/torvald/otfbuild/Main.kt b/OTFbuild/src/net/torvald/otfbuild/Main.kt deleted file mode 100644 index 1fbb891..0000000 --- a/OTFbuild/src/net/torvald/otfbuild/Main.kt +++ /dev/null @@ -1,7 +0,0 @@ -package net.torvald.otfbuild - -fun main(args: Array) { - val assetsDir = args.getOrElse(0) { "src/assets" } - val outputPath = args.getOrElse(1) { "OTFbuild/TerrarumSansBitmap.kbitx" } - KbitxBuilder(assetsDir).build(outputPath) -} diff --git a/OTFbuild/src/net/torvald/otfbuild/SheetConfig.kt b/OTFbuild/src/net/torvald/otfbuild/SheetConfig.kt deleted file mode 100644 index 14bda7e..0000000 --- a/OTFbuild/src/net/torvald/otfbuild/SheetConfig.kt +++ /dev/null @@ -1,377 +0,0 @@ -package net.torvald.otfbuild - -typealias CodePoint = Int - -/** - * Ported from TerrarumSansBitmap.kt companion object. - * All sheet definitions, code ranges, index functions, and font metric constants. - */ -object SheetConfig { - - // Font metrics - const val H = 20 - const val H_UNIHAN = 16 - const val W_HANGUL_BASE = 13 - const val W_UNIHAN = 16 - const val W_LATIN_WIDE = 9 - const val W_VAR_INIT = 15 - const val W_WIDEVAR_INIT = 31 - const val HGAP_VAR = 1 - const val SIZE_CUSTOM_SYM = 20 - - const val H_DIACRITICS = 3 - const val H_STACKUP_LOWERCASE_SHIFTDOWN = 4 - const val H_OVERLAY_LOWERCASE_SHIFTDOWN = 2 - - const val LINE_HEIGHT = 24 - - // Sheet indices - const val SHEET_ASCII_VARW = 0 - const val SHEET_HANGUL = 1 - const val SHEET_EXTA_VARW = 2 - const val SHEET_EXTB_VARW = 3 - const val SHEET_KANA = 4 - const val SHEET_CJK_PUNCT = 5 - const val SHEET_UNIHAN = 6 - const val SHEET_CYRILIC_VARW = 7 - const val SHEET_HALFWIDTH_FULLWIDTH_VARW = 8 - const val SHEET_UNI_PUNCT_VARW = 9 - const val SHEET_GREEK_VARW = 10 - const val SHEET_THAI_VARW = 11 - const val SHEET_HAYEREN_VARW = 12 - const val SHEET_KARTULI_VARW = 13 - const val SHEET_IPA_VARW = 14 - const val SHEET_RUNIC = 15 - const val SHEET_LATIN_EXT_ADD_VARW = 16 - const val SHEET_CUSTOM_SYM = 17 - const val SHEET_BULGARIAN_VARW = 18 - const val SHEET_SERBIAN_VARW = 19 - const val SHEET_TSALAGI_VARW = 20 - const val SHEET_PHONETIC_EXT_VARW = 21 - const val SHEET_DEVANAGARI_VARW = 22 - const val SHEET_KARTULI_CAPS_VARW = 23 - const val SHEET_DIACRITICAL_MARKS_VARW = 24 - const val SHEET_GREEK_POLY_VARW = 25 - const val SHEET_EXTC_VARW = 26 - const val SHEET_EXTD_VARW = 27 - const val SHEET_CURRENCIES_VARW = 28 - const val SHEET_INTERNAL_VARW = 29 - const val SHEET_LETTERLIKE_MATHS_VARW = 30 - const val SHEET_ENCLOSED_ALPHNUM_SUPL_VARW = 31 - const val SHEET_TAMIL_VARW = 32 - const val SHEET_BENGALI_VARW = 33 - const val SHEET_BRAILLE_VARW = 34 - const val SHEET_SUNDANESE_VARW = 35 - const val SHEET_DEVANAGARI2_INTERNAL_VARW = 36 - const val SHEET_CODESTYLE_ASCII_VARW = 37 - const val SHEET_ALPHABETIC_PRESENTATION_FORMS = 38 - const val SHEET_HENTAIGANA_VARW = 39 - - const val SHEET_UNKNOWN = 254 - - val fileList = arrayOf( - "ascii_variable.tga", - "hangul_johab.tga", - "latinExtA_variable.tga", - "latinExtB_variable.tga", - "kana_variable.tga", - "cjkpunct_variable.tga", - "wenquanyi.tga", - "cyrilic_variable.tga", - "halfwidth_fullwidth_variable.tga", - "unipunct_variable.tga", - "greek_variable.tga", - "thai_variable.tga", - "hayeren_variable.tga", - "kartuli_variable.tga", - "ipa_ext_variable.tga", - "futhark.tga", - "latinExt_additional_variable.tga", - "puae000-e0ff.tga", - "cyrilic_bulgarian_variable.tga", - "cyrilic_serbian_variable.tga", - "tsalagi_variable.tga", - "phonetic_extensions_variable.tga", - "devanagari_variable.tga", - "kartuli_allcaps_variable.tga", - "diacritical_marks_variable.tga", - "greek_polytonic_xyswap_variable.tga", - "latinExtC_variable.tga", - "latinExtD_variable.tga", - "currencies_variable.tga", - "internal_variable.tga", - "letterlike_symbols_variable.tga", - "enclosed_alphanumeric_supplement_variable.tga", - "tamil_extrawide_variable.tga", - "bengali_variable.tga", - "braille_variable.tga", - "sundanese_variable.tga", - "devanagari_internal_extrawide_variable.tga", - "pua_codestyle_ascii_variable.tga", - "alphabetic_presentation_forms_extrawide_variable.tga", - "hentaigana_variable.tga", - ) - - val codeRange: Array> = arrayOf( - (0..0xFF).toList(), - (0x1100..0x11FF).toList() + (0xA960..0xA97F).toList() + (0xD7B0..0xD7FF).toList(), - (0x100..0x17F).toList(), - (0x180..0x24F).toList(), - (0x3040..0x30FF).toList() + (0x31F0..0x31FF).toList(), - (0x3000..0x303F).toList(), - (0x3400..0x9FFF).toList(), - (0x400..0x52F).toList(), - (0xFF00..0xFFFF).toList(), - (0x2000..0x209F).toList(), - (0x370..0x3CE).toList(), - (0xE00..0xE5F).toList(), - (0x530..0x58F).toList(), - (0x10D0..0x10FF).toList(), - (0x250..0x2FF).toList(), - (0x16A0..0x16FF).toList(), - (0x1E00..0x1EFF).toList(), - (0xE000..0xE0FF).toList(), - (0xF0000..0xF005F).toList(), - (0xF0060..0xF00BF).toList(), - (0x13A0..0x13F5).toList(), - (0x1D00..0x1DBF).toList(), - (0x900..0x97F).toList() + (0xF0100..0xF04FF).toList(), - (0x1C90..0x1CBF).toList(), - (0x300..0x36F).toList(), - (0x1F00..0x1FFF).toList(), - (0x2C60..0x2C7F).toList(), - (0xA720..0xA7FF).toList(), - (0x20A0..0x20CF).toList(), - (0xFFE00..0xFFF9F).toList(), - (0x2100..0x214F).toList(), - (0x1F100..0x1F1FF).toList(), - (0x0B80..0x0BFF).toList() + (0xF00C0..0xF00FF).toList(), - (0x980..0x9FF).toList(), - (0x2800..0x28FF).toList(), - (0x1B80..0x1BBF).toList() + (0x1CC0..0x1CCF).toList() + (0xF0500..0xF050F).toList(), - (0xF0110..0xF012F).toList(), - (0xF0520..0xF057F).toList(), - (0xFB00..0xFB17).toList(), - (0x1B000..0x1B16F).toList(), - ) - - val codeRangeHangulCompat = 0x3130..0x318F - - val altCharsetCodepointOffsets = intArrayOf( - 0, - 0xF0000 - 0x400, // Bulgarian - 0xF0060 - 0x400, // Serbian - 0xF0520 - 0x20, // Codestyle - ) - - val altCharsetCodepointDomains = arrayOf( - 0..0x10FFFF, - 0x400..0x45F, - 0x400..0x45F, - 0x20..0x7F, - ) - - // Unicode spacing characters - const val NQSP = 0x2000 - const val MQSP = 0x2001 - const val ENSP = 0x2002 - const val EMSP = 0x2003 - const val THREE_PER_EMSP = 0x2004 - const val QUARTER_EMSP = 0x2005 - const val SIX_PER_EMSP = 0x2006 - const val FSP = 0x2007 - const val PSP = 0x2008 - const val THSP = 0x2009 - const val HSP = 0x200A - const val ZWSP = 0x200B - const val ZWNJ = 0x200C - const val ZWJ = 0x200D - const val SHY = 0xAD - const val NBSP = 0xA0 - const val OBJ = 0xFFFC - - const val FIXED_BLOCK_1 = 0xFFFD0 - const val MOVABLE_BLOCK_M1 = 0xFFFE0 - const val MOVABLE_BLOCK_1 = 0xFFFF0 - - const val CHARSET_OVERRIDE_DEFAULT = 0xFFFC0 - const val CHARSET_OVERRIDE_BG_BG = 0xFFFC1 - const val CHARSET_OVERRIDE_SR_SR = 0xFFFC2 - const val CHARSET_OVERRIDE_CODESTYLE = 0xFFFC3 - - // Sheet type detection - fun isVariable(filename: String) = filename.endsWith("_variable.tga") - fun isXYSwapped(filename: String) = filename.contains("xyswap", ignoreCase = true) - fun isExtraWide(filename: String) = filename.contains("extrawide", ignoreCase = true) - - /** Returns the cell width for a given sheet index. */ - fun getCellWidth(sheetIndex: Int): Int = when { - isExtraWide(fileList[sheetIndex]) -> W_WIDEVAR_INIT - isVariable(fileList[sheetIndex]) -> W_VAR_INIT - sheetIndex == SHEET_UNIHAN -> W_UNIHAN - sheetIndex == SHEET_HANGUL -> W_HANGUL_BASE - sheetIndex == SHEET_CUSTOM_SYM -> SIZE_CUSTOM_SYM - sheetIndex == SHEET_RUNIC -> W_LATIN_WIDE - else -> W_VAR_INIT - } - - /** Returns the cell height for a given sheet index. */ - fun getCellHeight(sheetIndex: Int): Int = when (sheetIndex) { - SHEET_UNIHAN -> H_UNIHAN - SHEET_CUSTOM_SYM -> SIZE_CUSTOM_SYM - else -> H - } - - /** Number of columns per row for the sheet. */ - fun getColumns(sheetIndex: Int): Int = when (sheetIndex) { - SHEET_UNIHAN -> 256 - else -> 16 - } - - // Index functions (X position in sheet) - fun indexX(c: CodePoint): Int = c % 16 - fun unihanIndexX(c: CodePoint): Int = (c - 0x3400) % 256 - - // Index functions (Y position in sheet) — per sheet type - fun indexY(sheetIndex: Int, c: CodePoint): Int = when (sheetIndex) { - SHEET_ASCII_VARW -> c / 16 - SHEET_UNIHAN -> unihanIndexY(c) - SHEET_EXTA_VARW -> (c - 0x100) / 16 - SHEET_EXTB_VARW -> (c - 0x180) / 16 - SHEET_KANA -> kanaIndexY(c) - SHEET_CJK_PUNCT -> (c - 0x3000) / 16 - SHEET_CYRILIC_VARW -> (c - 0x400) / 16 - SHEET_HALFWIDTH_FULLWIDTH_VARW -> (c - 0xFF00) / 16 - SHEET_UNI_PUNCT_VARW -> (c - 0x2000) / 16 - SHEET_GREEK_VARW -> (c - 0x370) / 16 - SHEET_THAI_VARW -> (c - 0xE00) / 16 - SHEET_CUSTOM_SYM -> (c - 0xE000) / 16 - SHEET_HAYEREN_VARW -> (c - 0x530) / 16 - SHEET_KARTULI_VARW -> (c - 0x10D0) / 16 - SHEET_IPA_VARW -> (c - 0x250) / 16 - SHEET_RUNIC -> (c - 0x16A0) / 16 - SHEET_LATIN_EXT_ADD_VARW -> (c - 0x1E00) / 16 - SHEET_BULGARIAN_VARW -> (c - 0xF0000) / 16 - SHEET_SERBIAN_VARW -> (c - 0xF0060) / 16 - SHEET_TSALAGI_VARW -> (c - 0x13A0) / 16 - SHEET_PHONETIC_EXT_VARW -> (c - 0x1D00) / 16 - SHEET_DEVANAGARI_VARW -> devanagariIndexY(c) - SHEET_KARTULI_CAPS_VARW -> (c - 0x1C90) / 16 - SHEET_DIACRITICAL_MARKS_VARW -> (c - 0x300) / 16 - SHEET_GREEK_POLY_VARW -> (c - 0x1F00) / 16 - SHEET_EXTC_VARW -> (c - 0x2C60) / 16 - SHEET_EXTD_VARW -> (c - 0xA720) / 16 - SHEET_CURRENCIES_VARW -> (c - 0x20A0) / 16 - SHEET_INTERNAL_VARW -> (c - 0xFFE00) / 16 - SHEET_LETTERLIKE_MATHS_VARW -> (c - 0x2100) / 16 - SHEET_ENCLOSED_ALPHNUM_SUPL_VARW -> (c - 0x1F100) / 16 - SHEET_TAMIL_VARW -> tamilIndexY(c) - SHEET_BENGALI_VARW -> (c - 0x980) / 16 - SHEET_BRAILLE_VARW -> (c - 0x2800) / 16 - SHEET_SUNDANESE_VARW -> sundaneseIndexY(c) - SHEET_DEVANAGARI2_INTERNAL_VARW -> (c - 0xF0110) / 16 - SHEET_CODESTYLE_ASCII_VARW -> (c - 0xF0520) / 16 - SHEET_ALPHABETIC_PRESENTATION_FORMS -> (c - 0xFB00) / 16 - SHEET_HENTAIGANA_VARW -> (c - 0x1B000) / 16 - SHEET_HANGUL -> 0 // Hangul uses special row logic - else -> c / 16 - } - - private fun kanaIndexY(c: CodePoint): Int = - if (c in 0x31F0..0x31FF) 12 - else (c - 0x3040) / 16 - - private fun unihanIndexY(c: CodePoint): Int = (c - 0x3400) / 256 - - private fun devanagariIndexY(c: CodePoint): Int = - (if (c < 0xF0000) (c - 0x0900) else (c - 0xF0080)) / 16 - - private fun tamilIndexY(c: CodePoint): Int = - (if (c < 0xF0000) (c - 0x0B80) else (c - 0xF0040)) / 16 - - private fun sundaneseIndexY(c: CodePoint): Int = - (if (c >= 0xF0500) (c - 0xF04B0) else if (c < 0x1BC0) (c - 0x1B80) else (c - 0x1C80)) / 16 - - // Hangul constants - const val JUNG_COUNT = 21 - const val JONG_COUNT = 28 - - // Hangul shape arrays (sorted) - val jungseongI = sortedSetOf(21, 61) - val jungseongOU = sortedSetOf(9, 13, 14, 18, 34, 35, 39, 45, 51, 53, 54, 64, 73, 80, 83) - val jungseongOUComplex = (listOf(10, 11, 16) + (22..33).toList() + listOf(36, 37, 38) + (41..44).toList() + - (46..50).toList() + (56..59).toList() + listOf(63) + (67..72).toList() + (74..79).toList() + - (81..83).toList() + (85..91).toList() + listOf(93, 94)).toSortedSet() - val jungseongRightie = sortedSetOf(2, 4, 6, 8, 11, 16, 32, 33, 37, 42, 44, 48, 50, 71, 72, 75, 78, 79, 83, 86, 87, 88, 94) - val jungseongOEWI = sortedSetOf(12, 15, 17, 40, 52, 55, 89, 90, 91) - val jungseongEU = sortedSetOf(19, 62, 66) - val jungseongYI = sortedSetOf(20, 60, 65) - val jungseongUU = sortedSetOf(14, 15, 16, 17, 18, 27, 30, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 59, 67, 68, 73, 77, 78, 79, 80, 81, 82, 83, 84, 91) - val jungseongWide = (jungseongOU.toList() + jungseongEU.toList()).toSortedSet() - val choseongGiyeoks = sortedSetOf(0, 1, 15, 23, 30, 34, 45, 51, 56, 65, 82, 90, 100, 101, 110, 111, 115) - val hangulPeaksWithExtraWidth = sortedSetOf(2, 4, 6, 8, 11, 16, 32, 33, 37, 42, 44, 48, 50, 71, 75, 78, 79, 83, 86, 87, 88, 94) - - val giyeokRemapping = hashMapOf( - 5 to 19, 6 to 20, 7 to 21, 8 to 22, 11 to 23, 12 to 24, - ) - - fun isHangulChoseong(c: CodePoint) = c in 0x1100..0x115F || c in 0xA960..0xA97F - fun isHangulJungseong(c: CodePoint) = c in 0x1160..0x11A7 || c in 0xD7B0..0xD7C6 - fun isHangulJongseong(c: CodePoint) = c in 0x11A8..0x11FF || c in 0xD7CB..0xD7FB - fun isHangulCompat(c: CodePoint) = c in codeRangeHangulCompat - - fun toHangulChoseongIndex(c: CodePoint): Int = - if (c in 0x1100..0x115F) c - 0x1100 - else if (c in 0xA960..0xA97F) c - 0xA960 + 96 - else throw IllegalArgumentException("Not a choseong: U+${c.toString(16)}") - - fun toHangulJungseongIndex(c: CodePoint): Int? = - if (c in 0x1160..0x11A7) c - 0x1160 - else if (c in 0xD7B0..0xD7C6) c - 0xD7B0 + 72 - else null - - fun toHangulJongseongIndex(c: CodePoint): Int? = - if (c in 0x11A8..0x11FF) c - 0x11A8 + 1 - else if (c in 0xD7CB..0xD7FB) c - 0xD7CB + 88 + 1 - else null - - fun getHanInitialRow(i: Int, p: Int, f: Int): Int { - var ret = when { - p in jungseongI -> 3 - p in jungseongOEWI -> 11 - p in jungseongOUComplex -> 7 - p in jungseongOU -> 5 - p in jungseongEU -> 9 - p in jungseongYI -> 13 - else -> 1 - } - if (f != 0) ret += 1 - return if (p in jungseongUU && i in choseongGiyeoks) { - giyeokRemapping[ret] ?: throw NullPointerException("i=$i p=$p f=$f ret=$ret") - } else ret - } - - fun getHanMedialRow(i: Int, p: Int, f: Int): Int = if (f == 0) 15 else 16 - - fun getHanFinalRow(i: Int, p: Int, f: Int): Int = - if (p !in jungseongRightie) 17 else 18 - - // Kerning constants - val kemingBitMask: IntArray = intArrayOf(7, 6, 5, 4, 3, 2, 1, 0, 15, 14).map { 1 shl it }.toIntArray() - - // Special characters for r+dot kerning - val lowercaseRs = sortedSetOf(0x72, 0x155, 0x157, 0x159, 0x211, 0x213, 0x27c, 0x1e59, 0x1e58, 0x1e5f) - val dots = sortedSetOf(0x2c, 0x2e) - - // Devanagari internal encoding - fun Int.toDevaInternal(): Int { - if (this in 0x0915..0x0939) return this - 0x0915 + 0xF0140 - else if (this in 0x0958..0x095F) return devanagariUnicodeNuqtaTable[this - 0x0958] - else throw IllegalArgumentException("No internal form for U+${this.toString(16)}") - } - - val devanagariUnicodeNuqtaTable = intArrayOf(0xF0170, 0xF0171, 0xF0172, 0xF0177, 0xF017C, 0xF017D, 0xF0186, 0xF018A) - - val devanagariConsonants = ((0x0915..0x0939).toList() + (0x0958..0x095F).toList() + (0x0978..0x097F).toList() + - (0xF0140..0xF04FF).toList() + (0xF0106..0xF0109).toList()).toHashSet() -} diff --git a/OTFbuild/src/net/torvald/otfbuild/TgaReader.kt b/OTFbuild/src/net/torvald/otfbuild/TgaReader.kt deleted file mode 100644 index f727daa..0000000 --- a/OTFbuild/src/net/torvald/otfbuild/TgaReader.kt +++ /dev/null @@ -1,80 +0,0 @@ -package net.torvald.otfbuild - -import java.io.File -import java.io.InputStream - -/** - * Simple TGA reader for uncompressed true-colour images (Type 2). - * Returns RGBA8888 pixel data. - */ -class TgaImage(val width: Int, val height: Int, val pixels: IntArray) { - /** Get pixel at (x, y) as RGBA8888. */ - fun getPixel(x: Int, y: Int): Int { - if (x < 0 || x >= width || y < 0 || y >= height) return 0 - return pixels[y * width + x] - } -} - -object TgaReader { - - fun read(file: File): TgaImage = read(file.inputStream()) - - fun read(input: InputStream): TgaImage { - val data = input.use { it.readBytes() } - var pos = 0 - - fun u8() = data[pos++].toInt() and 0xFF - fun u16() = u8() or (u8() shl 8) - - val idLength = u8() - val colourMapType = u8() - val imageType = u8() - - // colour map spec (5 bytes) - u16(); u16(); u8() - - // image spec - val xOrigin = u16() - val yOrigin = u16() - val width = u16() - val height = u16() - val bitsPerPixel = u8() - val descriptor = u8() - - val topToBottom = (descriptor and 0x20) != 0 - val bytesPerPixel = bitsPerPixel / 8 - - // skip ID - pos += idLength - - // skip colour map - if (colourMapType != 0) { - throw UnsupportedOperationException("Colour-mapped TGA not supported") - } - - if (imageType != 2) { - throw UnsupportedOperationException("Only uncompressed true-colour TGA is supported (type 2), got type $imageType") - } - - if (bytesPerPixel !in 3..4) { - throw UnsupportedOperationException("Only 24-bit or 32-bit TGA supported, got ${bitsPerPixel}-bit") - } - - val pixels = IntArray(width * height) - - for (row in 0 until height) { - val y = if (topToBottom) row else (height - 1 - row) - for (x in 0 until width) { - val b = data[pos++].toInt() and 0xFF - val g = data[pos++].toInt() and 0xFF - val r = data[pos++].toInt() and 0xFF - val a = if (bytesPerPixel == 4) data[pos++].toInt() and 0xFF else 0xFF - - // Store as RGBA8888 - pixels[y * width + x] = (r shl 24) or (g shl 16) or (b shl 8) or a - } - } - - return TgaImage(width, height, pixels) - } -} diff --git a/OTFbuild/tga_reader.py b/OTFbuild/tga_reader.py new file mode 100644 index 0000000..6df9744 --- /dev/null +++ b/OTFbuild/tga_reader.py @@ -0,0 +1,90 @@ +""" +TGA reader for uncompressed true-colour images (Type 2). +Stores pixels as RGBA8888: (R<<24 | G<<16 | B<<8 | A). + +Matches the convention in TerrarumSansBitmap.kt where .and(255) checks +the alpha channel (lowest byte). +""" + +import struct +from typing import List + + +class TgaImage: + __slots__ = ('width', 'height', 'pixels') + + def __init__(self, width: int, height: int, pixels: List[int]): + self.width = width + self.height = height + self.pixels = pixels # flat array, row-major + + def get_pixel(self, x: int, y: int) -> int: + """Get pixel at (x, y) as RGBA8888 (R in bits 31-24, A in bits 7-0).""" + if x < 0 or x >= self.width or y < 0 or y >= self.height: + return 0 + return self.pixels[y * self.width + x] + + +def read_tga(path: str) -> TgaImage: + """Read an uncompressed true-colour TGA file.""" + with open(path, 'rb') as f: + data = f.read() + + pos = 0 + + def u8(): + nonlocal pos + val = data[pos] + pos += 1 + return val + + def u16(): + nonlocal pos + val = struct.unpack_from('