""" Extract glyph bitmaps and tag-column properties from TGA sprite sheets. Ported from TerrarumSansBitmap.kt:buildWidthTable() and GlyphSheetParser.kt. Enhancement over v1: extracts all 6 diacritics anchors for GPOS mark feature. """ import os from dataclasses import dataclass, field from typing import Dict, List, Optional, Tuple from tga_reader import TgaImage, read_tga import sheet_config as SC @dataclass class DiacriticsAnchor: type: int x: int y: int x_used: bool y_used: bool @dataclass class GlyphProps: width: int is_low_height: bool = False nudge_x: int = 0 nudge_y: int = 0 diacritics_anchors: List[DiacriticsAnchor] = field(default_factory=lambda: [ DiacriticsAnchor(i, 0, 0, False, False) for i in range(6) ]) align_where: int = 0 write_on_top: int = -1 stack_where: int = 0 ext_info: List[int] = field(default_factory=lambda: [0] * 15) has_kern_data: bool = False is_kern_y_type: bool = False kerning_mask: int = 255 directive_opcode: int = 0 directive_arg1: int = 0 directive_arg2: int = 0 @property def is_illegal(self): return self.directive_opcode == 255 def required_ext_info_count(self): if self.stack_where == SC.STACK_BEFORE_N_AFTER: return 2 if 0b10000_000 <= self.directive_opcode <= 0b10000_111: return 7 return 0 def is_pragma(self, pragma): if pragma == "replacewith": return 0b10000_000 <= self.directive_opcode <= 0b10000_111 return False @dataclass class ExtractedGlyph: codepoint: int props: GlyphProps bitmap: List[List[int]] # [row][col], 0 or 1 def _tagify(pixel): """Return 0 if alpha channel is zero, else return the original value.""" return 0 if (pixel & 0xFF) == 0 else pixel def _signed_byte(val): """Convert unsigned byte to signed.""" return val - 256 if val >= 128 else val def _parse_diacritics_anchors(image, code_start_x, code_start_y): """Parse 6 diacritics anchors from tag column rows 11-14.""" anchors = [] for i in range(6): y_pos = 13 - (i // 3) * 2 shift = (3 - (i % 3)) * 8 y_pixel = _tagify(image.get_pixel(code_start_x, code_start_y + y_pos)) x_pixel = _tagify(image.get_pixel(code_start_x, code_start_y + y_pos + 1)) y_used = ((y_pixel >> shift) & 128) != 0 x_used = ((x_pixel >> shift) & 128) != 0 y_val = (y_pixel >> shift) & 127 if y_used else 0 x_val = (x_pixel >> shift) & 127 if x_used else 0 anchors.append(DiacriticsAnchor(i, x_val, y_val, x_used, y_used)) return anchors def parse_variable_sheet(image, sheet_index, cell_w, cell_h, cols, is_xy_swapped): """Parse a variable-width sheet: extract tag column for properties, bitmap for glyph.""" code_range = SC.CODE_RANGE[sheet_index] binary_code_offset = cell_w - 1 # tag column is last pixel column of cell result = {} for index, code in enumerate(code_range): if is_xy_swapped: cell_x = (index // cols) * cell_w cell_y = (index % cols) * cell_h else: cell_x = (index % cols) * cell_w cell_y = (index // cols) * cell_h code_start_x = cell_x + binary_code_offset code_start_y = cell_y # Width (5 bits) width = 0 for y in range(5): if image.get_pixel(code_start_x, code_start_y + y) & 0xFF: width |= (1 << y) is_low_height = (image.get_pixel(code_start_x, code_start_y + 5) & 0xFF) != 0 # Kerning data kerning_bit1 = _tagify(image.get_pixel(code_start_x, code_start_y + 6)) # kerning_bit2 and kerning_bit3 are reserved is_kern_y_type = (kerning_bit1 & 0x80000000) != 0 kerning_mask = (kerning_bit1 >> 8) & 0xFFFFFF has_kern_data = (kerning_bit1 & 0xFF) != 0 if not has_kern_data: is_kern_y_type = False kerning_mask = 255 # Compiler directives compiler_directives = _tagify(image.get_pixel(code_start_x, code_start_y + 9)) directive_opcode = (compiler_directives >> 24) & 255 directive_arg1 = (compiler_directives >> 16) & 255 directive_arg2 = (compiler_directives >> 8) & 255 # Nudge nudging_bits = _tagify(image.get_pixel(code_start_x, code_start_y + 10)) nudge_x = _signed_byte((nudging_bits >> 24) & 0xFF) nudge_y = _signed_byte((nudging_bits >> 16) & 0xFF) # Diacritics anchors diacritics_anchors = _parse_diacritics_anchors(image, code_start_x, code_start_y) # Alignment align_where = 0 for y in range(2): if image.get_pixel(code_start_x, code_start_y + y + 15) & 0xFF: align_where |= (1 << y) # Write on top write_on_top_raw = image.get_pixel(code_start_x, code_start_y + 17) # NO tagify if (write_on_top_raw & 0xFF) == 0: write_on_top = -1 else: if (write_on_top_raw >> 8) == 0xFFFFFF: write_on_top = 0 else: write_on_top = (write_on_top_raw >> 28) & 15 # Stack where stack_where0 = _tagify(image.get_pixel(code_start_x, code_start_y + 18)) stack_where1 = _tagify(image.get_pixel(code_start_x, code_start_y + 19)) if stack_where0 == 0x00FF00FF and stack_where1 == 0x00FF00FF: stack_where = SC.STACK_DONT else: stack_where = 0 for y in range(2): if image.get_pixel(code_start_x, code_start_y + y + 18) & 0xFF: stack_where |= (1 << y) ext_info = [0] * 15 props = GlyphProps( width=width, is_low_height=is_low_height, nudge_x=nudge_x, nudge_y=nudge_y, diacritics_anchors=diacritics_anchors, align_where=align_where, write_on_top=write_on_top, stack_where=stack_where, ext_info=ext_info, has_kern_data=has_kern_data, is_kern_y_type=is_kern_y_type, kerning_mask=kerning_mask, directive_opcode=directive_opcode, directive_arg1=directive_arg1, directive_arg2=directive_arg2, ) # Parse extInfo if needed ext_count = props.required_ext_info_count() if ext_count > 0: for x in range(ext_count): info = 0 for y in range(20): if image.get_pixel(cell_x + x, cell_y + y) & 0xFF: info |= (1 << y) ext_info[x] = info # Extract glyph bitmap (all pixels except tag column) bitmap_w = cell_w - 1 bitmap = [] for row in range(cell_h): row_data = [] for col in range(bitmap_w): px = image.get_pixel(cell_x + col, cell_y + row) row_data.append(1 if (px & 0xFF) != 0 else 0) bitmap.append(row_data) result[code] = ExtractedGlyph(code, props, bitmap) return result def parse_fixed_sheet(image, sheet_index, cell_w, cell_h, cols): """Parse a fixed-width sheet (Hangul, Unihan, Runic, Custom Sym).""" code_range = SC.CODE_RANGE[sheet_index] result = {} fixed_width = { SC.SHEET_CUSTOM_SYM: 20, SC.SHEET_HANGUL: SC.W_HANGUL_BASE, SC.SHEET_RUNIC: 9, SC.SHEET_UNIHAN: SC.W_UNIHAN, }.get(sheet_index, cell_w) for index, code in enumerate(code_range): cell_x = (index % cols) * cell_w cell_y = (index // cols) * cell_h bitmap = [] for row in range(cell_h): row_data = [] for col in range(cell_w): px = image.get_pixel(cell_x + col, cell_y + row) row_data.append(1 if (px & 0xFF) != 0 else 0) bitmap.append(row_data) props = GlyphProps(width=fixed_width) result[code] = ExtractedGlyph(code, props, bitmap) return result def _empty_bitmap(w=SC.W_VAR_INIT, h=SC.H): return [[0] * w for _ in range(h)] def parse_all_sheets(assets_dir): """Parse all sheets and return a map of codepoint -> ExtractedGlyph.""" result = {} for sheet_index, filename in enumerate(SC.FILE_LIST): filepath = os.path.join(assets_dir, filename) if not os.path.exists(filepath): print(f" [SKIP] {filename} not found") continue is_var = SC.is_variable(filename) is_xy = SC.is_xy_swapped(filename) is_ew = SC.is_extra_wide(filename) cell_w = SC.get_cell_width(sheet_index) cell_h = SC.get_cell_height(sheet_index) cols = SC.get_columns(sheet_index) tags = [] if is_var: tags.append("VARIABLE") if is_xy: tags.append("XYSWAP") if is_ew: tags.append("EXTRAWIDE") if not tags: tags.append("STATIC") print(f" Loading [{','.join(tags)}] {filename}") image = read_tga(filepath) if is_var: sheet_glyphs = parse_variable_sheet(image, sheet_index, cell_w, cell_h, cols, is_xy) else: sheet_glyphs = parse_fixed_sheet(image, sheet_index, cell_w, cell_h, cols) result.update(sheet_glyphs) # Fixed-width overrides _add_fixed_width_overrides(result) return result def _add_fixed_width_overrides(result): """Apply fixed-width overrides.""" # Hangul compat jamo for code in SC.CODE_RANGE_HANGUL_COMPAT: if code not in result: result[code] = ExtractedGlyph(code, GlyphProps(width=SC.W_HANGUL_BASE), _empty_bitmap(SC.W_HANGUL_BASE)) # Zero-width ranges (only internal/PUA control ranges, not surrogates or full Plane 16) for code in range(0xFFFA0, 0x100000): result[code] = ExtractedGlyph(code, GlyphProps(width=0), _empty_bitmap(1, 1)) # Null char result[0] = ExtractedGlyph(0, GlyphProps(width=0), _empty_bitmap(1, 1)) # Replacement character at U+007F if 0x7F in result: result[0x7F].props.width = 15 def get_hangul_jamo_bitmaps(assets_dir): """ Extract raw Hangul jamo bitmaps from the Hangul sheet for composition. Returns a function: (index, row) -> bitmap (list of list of int) """ filename = SC.FILE_LIST[SC.SHEET_HANGUL] filepath = os.path.join(assets_dir, filename) if not os.path.exists(filepath): print(" [WARNING] Hangul sheet not found") return lambda idx, row: _empty_bitmap(SC.W_HANGUL_BASE) image = read_tga(filepath) cell_w = SC.W_HANGUL_BASE cell_h = SC.H def get_bitmap(index, row): cell_x = index * cell_w cell_y = row * cell_h bitmap = [] for r in range(cell_h): row_data = [] for c in range(cell_w): px = image.get_pixel(cell_x + c, cell_y + r) row_data.append(1 if (px & 0xFF) != 0 else 0) bitmap.append(row_data) return bitmap return get_bitmap