mirror of
https://github.com/curioustorvald/Terrarum-sans-bitmap.git
synced 2026-03-11 13:51:50 +09:00
632 lines
21 KiB
Python
632 lines
21 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Spritesheet statistics generator for TerrarumSansBitmap.
|
|
|
|
Scans all *_variable.tga sheets and reports:
|
|
- Width distribution
|
|
- Compiler directives (replaceWith breakdown)
|
|
- Kerning shape distribution
|
|
- Lowheight count
|
|
- Diacritics (anchors, writeOnTop, stacking)
|
|
- Glyphs missing kerning data
|
|
- Dot removal directives
|
|
- Nudge usage
|
|
- Alignment modes
|
|
- Per-sheet summary
|
|
|
|
Usage:
|
|
python sheet_stats.py [assets_dir]
|
|
python sheet_stats.py ../src/assets
|
|
"""
|
|
|
|
import os
|
|
import struct
|
|
import sys
|
|
from collections import Counter, defaultdict
|
|
|
|
# ---- TGA reader ----
|
|
|
|
class TgaImage:
|
|
__slots__ = ('width', 'height', 'pixels')
|
|
|
|
def __init__(self, width, height, pixels):
|
|
self.width = width
|
|
self.height = height
|
|
self.pixels = pixels
|
|
|
|
def get_pixel(self, x, y):
|
|
if x < 0 or x >= self.width or y < 0 or y >= self.height:
|
|
return 0
|
|
return self.pixels[y * self.width + x]
|
|
|
|
|
|
def read_tga(path):
|
|
with open(path, 'rb') as f:
|
|
data = f.read()
|
|
pos = 0
|
|
id_length = data[pos]; pos += 1
|
|
pos += 1 # colour_map_type
|
|
image_type = data[pos]; pos += 1
|
|
pos += 5
|
|
pos += 4 # x/y origin
|
|
width = struct.unpack_from('<H', data, pos)[0]; pos += 2
|
|
height = struct.unpack_from('<H', data, pos)[0]; pos += 2
|
|
bits_per_pixel = data[pos]; pos += 1
|
|
descriptor = data[pos]; pos += 1
|
|
top_to_bottom = (descriptor & 0x20) != 0
|
|
bpp = bits_per_pixel // 8
|
|
pos += id_length
|
|
if image_type != 2 or bpp not in (3, 4):
|
|
raise ValueError(f"Unsupported TGA: type={image_type}, bpp={bits_per_pixel}")
|
|
pixels = [0] * (width * height)
|
|
for row in range(height):
|
|
y = row if top_to_bottom else (height - 1 - row)
|
|
for x in range(width):
|
|
b = data[pos]; g = data[pos+1]; r = data[pos+2]
|
|
a = data[pos+3] if bpp == 4 else 0xFF
|
|
pos += bpp
|
|
pixels[y * width + x] = (r << 24) | (g << 16) | (b << 8) | a
|
|
return TgaImage(width, height, pixels)
|
|
|
|
|
|
def tagify(pixel):
|
|
return 0 if (pixel & 0xFF) == 0 else pixel
|
|
|
|
|
|
def signed_byte(val):
|
|
return val - 256 if val >= 128 else val
|
|
|
|
|
|
# ---- Unicode range classification ----
|
|
|
|
# Ranges to EXCLUDE from "missing kern" report
|
|
EXCLUDE_KERN_RANGES = [
|
|
(0x3400, 0xA000, 'CJK Unified Ideographs'),
|
|
(0x1100, 0x1200, 'Hangul Jamo'),
|
|
(0xA960, 0xA980, 'Hangul Jamo Extended-A'),
|
|
(0xD7B0, 0xD800, 'Hangul Jamo Extended-B'),
|
|
(0x3130, 0x3190, 'Hangul Compatibility Jamo'),
|
|
(0xAC00, 0xD7A4, 'Hangul Syllables'),
|
|
(0xE000, 0xE100, 'Custom Symbols (PUA)'),
|
|
(0xF0000, 0xF0600, 'Internal PUA'),
|
|
(0xFFE00, 0x100000, 'Internal control/PUA'),
|
|
(0x2800, 0x2900, 'Braille'),
|
|
(0x1FB00, 0x1FC00, 'Legacy Computing Symbols'),
|
|
(0x2400, 0x2440, 'Control Pictures'),
|
|
(0x3000, 0x3040, 'CJK Punctuation'),
|
|
(0x3040, 0x3100, 'Hiragana/Katakana'),
|
|
(0x31F0, 0x3200, 'Katakana Phonetic Ext'),
|
|
(0xFF00, 0x10000, 'Halfwidth/Fullwidth'),
|
|
(0x16A0, 0x1700, 'Runic'),
|
|
(0x300, 0x370, 'Combining Diacritical Marks'),
|
|
(0x1B000, 0x1B170, 'Hentaigana'),
|
|
]
|
|
|
|
|
|
def is_excluded_from_kern(cp):
|
|
for lo, hi, _ in EXCLUDE_KERN_RANGES:
|
|
if lo <= cp < hi:
|
|
return True
|
|
return False
|
|
|
|
|
|
def unicode_block_name(cp):
|
|
"""Rough Unicode block classification for display."""
|
|
blocks = [
|
|
(0x0000, 0x0080, 'Basic Latin'),
|
|
(0x0080, 0x0100, 'Latin-1 Supplement'),
|
|
(0x0100, 0x0180, 'Latin Extended-A'),
|
|
(0x0180, 0x0250, 'Latin Extended-B'),
|
|
(0x0250, 0x02B0, 'IPA Extensions'),
|
|
(0x02B0, 0x0300, 'Spacing Modifier Letters'),
|
|
(0x0300, 0x0370, 'Combining Diacritical Marks'),
|
|
(0x0370, 0x0400, 'Greek and Coptic'),
|
|
(0x0400, 0x0530, 'Cyrillic'),
|
|
(0x0530, 0x0590, 'Armenian'),
|
|
(0x0900, 0x0980, 'Devanagari'),
|
|
(0x0980, 0x0A00, 'Bengali'),
|
|
(0x0B80, 0x0C00, 'Tamil'),
|
|
(0x0E00, 0x0E80, 'Thai'),
|
|
(0x10D0, 0x1100, 'Georgian'),
|
|
(0x1100, 0x1200, 'Hangul Jamo'),
|
|
(0x13A0, 0x13F6, 'Cherokee'),
|
|
(0x1B80, 0x1BC0, 'Sundanese'),
|
|
(0x1C80, 0x1CC0, 'Cyrillic Extended'),
|
|
(0x1D00, 0x1DC0, 'Phonetic Extensions'),
|
|
(0x1E00, 0x1F00, 'Latin Extended Additional'),
|
|
(0x1F00, 0x2000, 'Greek Extended'),
|
|
(0x2000, 0x2070, 'General Punctuation'),
|
|
(0x20A0, 0x20D0, 'Currency Symbols'),
|
|
(0x2100, 0x2200, 'Letterlike Symbols'),
|
|
(0x2C60, 0x2C80, 'Latin Extended-C'),
|
|
(0x2DE0, 0x2E00, 'Cyrillic Extended-A'),
|
|
(0xA640, 0xA6A0, 'Cyrillic Extended-B'),
|
|
(0xA720, 0xA800, 'Latin Extended-D'),
|
|
(0xFB00, 0xFB50, 'Alphabetic Presentation Forms'),
|
|
(0x1F100, 0x1F200, 'Enclosed Alphanumeric Supplement'),
|
|
(0xF0000, 0xF0060, 'PUA Bulgarian'),
|
|
(0xF0060, 0xF00C0, 'PUA Serbian'),
|
|
(0xF0100, 0xF0500, 'PUA Devanagari Internal'),
|
|
(0xF0500, 0xF0600, 'PUA Sundanese/Codestyle'),
|
|
]
|
|
for lo, hi, name in blocks:
|
|
if lo <= cp < hi:
|
|
return name
|
|
return f'U+{cp:04X}'
|
|
|
|
|
|
# ---- Code ranges (from sheet_config.py) ----
|
|
|
|
CODE_RANGE = [
|
|
list(range(0x00, 0x100)),
|
|
list(range(0x1100, 0x1200)) + list(range(0xA960, 0xA980)) + list(range(0xD7B0, 0xD800)),
|
|
list(range(0x100, 0x180)),
|
|
list(range(0x180, 0x250)),
|
|
list(range(0x3040, 0x3100)) + list(range(0x31F0, 0x3200)),
|
|
list(range(0x3000, 0x3040)),
|
|
list(range(0x3400, 0xA000)),
|
|
list(range(0x400, 0x530)),
|
|
list(range(0xFF00, 0x10000)),
|
|
list(range(0x2000, 0x20A0)),
|
|
list(range(0x370, 0x3CF)),
|
|
list(range(0xE00, 0xE60)),
|
|
list(range(0x530, 0x590)),
|
|
list(range(0x10D0, 0x1100)),
|
|
list(range(0x250, 0x300)),
|
|
list(range(0x16A0, 0x1700)),
|
|
list(range(0x1E00, 0x1F00)),
|
|
list(range(0xE000, 0xE100)),
|
|
list(range(0xF0000, 0xF0060)),
|
|
list(range(0xF0060, 0xF00C0)),
|
|
list(range(0x13A0, 0x13F6)),
|
|
list(range(0x1D00, 0x1DC0)),
|
|
list(range(0x900, 0x980)) + list(range(0xF0100, 0xF0500)),
|
|
list(range(0x1C90, 0x1CC0)),
|
|
list(range(0x300, 0x370)),
|
|
list(range(0x1F00, 0x2000)),
|
|
list(range(0x2C60, 0x2C80)),
|
|
list(range(0xA720, 0xA800)),
|
|
list(range(0x20A0, 0x20D0)),
|
|
list(range(0xFFE00, 0xFFFA0)),
|
|
list(range(0x2100, 0x2200)),
|
|
list(range(0x1F100, 0x1F200)),
|
|
list(range(0x0B80, 0x0C00)) + list(range(0xF00C0, 0xF0100)),
|
|
list(range(0x980, 0xA00)),
|
|
list(range(0x2800, 0x2900)),
|
|
list(range(0x1B80, 0x1BC0)) + list(range(0x1CC0, 0x1CD0)) + list(range(0xF0500, 0xF0510)),
|
|
list(range(0xF0110, 0xF0130)),
|
|
list(range(0xF0520, 0xF0580)),
|
|
list(range(0xFB00, 0xFB18)),
|
|
list(range(0x1B000, 0x1B170)),
|
|
list(range(0x2400, 0x2440)),
|
|
list(range(0x1FB00, 0x1FC00)),
|
|
list(range(0xA640, 0xA6A0)),
|
|
list(range(0x2DE0, 0x2E00)),
|
|
list(range(0x1C80, 0x1C8F)),
|
|
]
|
|
|
|
FILE_LIST = [
|
|
"ascii_variable.tga",
|
|
"hangul_johab.tga",
|
|
"latinExtA_variable.tga",
|
|
"latinExtB_variable.tga",
|
|
"kana_variable.tga",
|
|
"cjkpunct_variable.tga",
|
|
"wenquanyi.tga",
|
|
"cyrilic_variable.tga",
|
|
"halfwidth_fullwidth_variable.tga",
|
|
"unipunct_variable.tga",
|
|
"greek_variable.tga",
|
|
"thai_variable.tga",
|
|
"hayeren_variable.tga",
|
|
"kartuli_variable.tga",
|
|
"ipa_ext_variable.tga",
|
|
"futhark.tga",
|
|
"latinExt_additional_variable.tga",
|
|
"puae000-e0ff.tga",
|
|
"cyrilic_bulgarian_variable.tga",
|
|
"cyrilic_serbian_variable.tga",
|
|
"tsalagi_variable.tga",
|
|
"phonetic_extensions_variable.tga",
|
|
"devanagari_variable.tga",
|
|
"kartuli_allcaps_variable.tga",
|
|
"diacritical_marks_variable.tga",
|
|
"greek_polytonic_xyswap_variable.tga",
|
|
"latinExtC_variable.tga",
|
|
"latinExtD_variable.tga",
|
|
"currencies_variable.tga",
|
|
"internal_variable.tga",
|
|
"letterlike_symbols_variable.tga",
|
|
"enclosed_alphanumeric_supplement_variable.tga",
|
|
"tamil_extrawide_variable.tga",
|
|
"bengali_variable.tga",
|
|
"braille_variable.tga",
|
|
"sundanese_variable.tga",
|
|
"devanagari_internal_extrawide_variable.tga",
|
|
"pua_codestyle_ascii_variable.tga",
|
|
"alphabetic_presentation_forms_extrawide_variable.tga",
|
|
"hentaigana_variable.tga",
|
|
"control_pictures_variable.tga",
|
|
"symbols_for_legacy_computing_variable.tga",
|
|
"cyrilic_extB_variable.tga",
|
|
"cyrilic_extA_variable.tga",
|
|
"cyrilic_extC_variable.tga",
|
|
]
|
|
|
|
|
|
def is_variable(fn):
|
|
return fn.endswith('_variable.tga')
|
|
|
|
|
|
def is_extra_wide(fn):
|
|
return 'extrawide' in fn.lower()
|
|
|
|
|
|
def is_xyswap(fn):
|
|
return 'xyswap' in fn.lower()
|
|
|
|
|
|
# ---- Shape tag formatting ----
|
|
|
|
SHAPE_CHARS = 'ABCDEFGHJK'
|
|
|
|
|
|
def format_shape(mask, is_ytype):
|
|
"""Format kerning mask + ytype as keming_machine tag, e.g. 'ABCDEFGH(B)'."""
|
|
bits = []
|
|
for i, ch in enumerate(SHAPE_CHARS):
|
|
bit_pos = [7, 6, 5, 4, 3, 2, 1, 0, 15, 14][i]
|
|
if (mask >> bit_pos) & 1:
|
|
bits.append(ch)
|
|
chars = ''.join(bits) if bits else '(empty)'
|
|
mode = '(Y)' if is_ytype else '(B)'
|
|
return f'{chars}{mode}'
|
|
|
|
|
|
# ---- Parsing ----
|
|
|
|
def parse_diacritics_anchors(img, tag_x, tag_y):
|
|
"""Return number of defined diacritics anchors (0-6)."""
|
|
count = 0
|
|
for i in range(6):
|
|
y_pos = 13 - (i // 3) * 2
|
|
shift = (3 - (i % 3)) * 8
|
|
y_pixel = tagify(img.get_pixel(tag_x, tag_y + y_pos))
|
|
x_pixel = tagify(img.get_pixel(tag_x, tag_y + y_pos + 1))
|
|
y_used = ((y_pixel >> shift) & 128) != 0
|
|
x_used = ((x_pixel >> shift) & 128) != 0
|
|
if y_used or x_used:
|
|
count += 1
|
|
return count
|
|
|
|
|
|
def parse_variable_sheet(path, code_range, is_xy, is_ew):
|
|
"""Parse a variable-width sheet and yield per-glyph stats dicts."""
|
|
img = read_tga(path)
|
|
cell_w = 32 if is_ew else 16
|
|
cell_h = 20
|
|
cols = img.width // cell_w
|
|
|
|
for index, code in enumerate(code_range):
|
|
if is_xy:
|
|
cell_x = (index // cols) * cell_w
|
|
cell_y = (index % cols) * cell_h
|
|
else:
|
|
cell_x = (index % cols) * cell_w
|
|
cell_y = (index // cols) * cell_h
|
|
|
|
tag_x = cell_x + (cell_w - 1)
|
|
tag_y = cell_y
|
|
|
|
# Width
|
|
width = 0
|
|
for y in range(5):
|
|
if img.get_pixel(tag_x, tag_y + y) & 0xFF:
|
|
width |= (1 << y)
|
|
|
|
if width == 0:
|
|
continue # empty cell
|
|
|
|
# Lowheight
|
|
is_low_height = (img.get_pixel(tag_x, tag_y + 5) & 0xFF) != 0
|
|
|
|
# Kerning data
|
|
kern_pixel = tagify(img.get_pixel(tag_x, tag_y + 6))
|
|
has_kern = (kern_pixel & 0xFF) != 0
|
|
is_ytype = (kern_pixel & 0x80000000) != 0 if has_kern else False
|
|
kern_mask = ((kern_pixel >> 8) & 0xFFFFFF) if has_kern else 0
|
|
|
|
# Dot removal (Y+7)
|
|
dot_pixel = tagify(img.get_pixel(tag_x, tag_y + 7))
|
|
has_dot_removal = dot_pixel != 0
|
|
|
|
# Compiler directive (Y+9)
|
|
dir_pixel = tagify(img.get_pixel(tag_x, tag_y + 9))
|
|
opcode = (dir_pixel >> 24) & 0xFF
|
|
arg1 = (dir_pixel >> 16) & 0xFF
|
|
arg2 = (dir_pixel >> 8) & 0xFF
|
|
|
|
# Nudge (Y+10)
|
|
nudge_pixel = tagify(img.get_pixel(tag_x, tag_y + 10))
|
|
nudge_x = signed_byte((nudge_pixel >> 24) & 0xFF) if nudge_pixel else 0
|
|
nudge_y = signed_byte((nudge_pixel >> 16) & 0xFF) if nudge_pixel else 0
|
|
has_nudge = nudge_x != 0 or nudge_y != 0
|
|
|
|
# Diacritics anchors (Y+11..Y+14)
|
|
n_anchors = parse_diacritics_anchors(img, tag_x, tag_y)
|
|
|
|
# Alignment (Y+15..Y+16)
|
|
align = 0
|
|
for y in range(2):
|
|
if img.get_pixel(tag_x, tag_y + 15 + y) & 0xFF:
|
|
align |= (1 << y)
|
|
|
|
# WriteOnTop (Y+17)
|
|
wot_raw = img.get_pixel(tag_x, tag_y + 17)
|
|
has_write_on_top = (wot_raw & 0xFF) != 0
|
|
|
|
# Stack (Y+18..Y+19)
|
|
s0 = tagify(img.get_pixel(tag_x, tag_y + 18))
|
|
s1 = tagify(img.get_pixel(tag_x, tag_y + 19))
|
|
if s0 == 0x00FF00FF and s1 == 0x00FF00FF:
|
|
stack_where = 4 # STACK_DONT
|
|
else:
|
|
stack_where = 0
|
|
for y in range(2):
|
|
if img.get_pixel(tag_x, tag_y + 18 + y) & 0xFF:
|
|
stack_where |= (1 << y)
|
|
|
|
yield {
|
|
'code': code,
|
|
'width': width,
|
|
'lowheight': is_low_height,
|
|
'has_kern': has_kern,
|
|
'is_ytype': is_ytype,
|
|
'kern_mask': kern_mask,
|
|
'has_dot_removal': has_dot_removal,
|
|
'opcode': opcode,
|
|
'opcode_arg1': arg1,
|
|
'opcode_arg2': arg2,
|
|
'has_nudge': has_nudge,
|
|
'nudge_x': nudge_x,
|
|
'nudge_y': nudge_y,
|
|
'n_anchors': n_anchors,
|
|
'align': align,
|
|
'has_write_on_top': has_write_on_top,
|
|
'stack_where': stack_where,
|
|
}
|
|
|
|
|
|
# ---- Main ----
|
|
|
|
def main():
|
|
assets_dir = sys.argv[1] if len(sys.argv) > 1 else '../src/assets'
|
|
|
|
# Accumulators
|
|
all_glyphs = []
|
|
per_sheet = defaultdict(lambda: {'total': 0, 'kern': 0, 'lowh': 0, 'directives': 0})
|
|
sheets_scanned = 0
|
|
|
|
print(f"Scanning {assets_dir}...\n")
|
|
|
|
for sheet_idx, filename in enumerate(FILE_LIST):
|
|
if not is_variable(filename):
|
|
continue
|
|
if sheet_idx >= len(CODE_RANGE):
|
|
continue
|
|
|
|
path = os.path.join(assets_dir, filename)
|
|
if not os.path.exists(path):
|
|
continue
|
|
|
|
is_xy = is_xyswap(filename)
|
|
is_ew = is_extra_wide(filename)
|
|
code_range = CODE_RANGE[sheet_idx]
|
|
|
|
count = 0
|
|
for g in parse_variable_sheet(path, code_range, is_xy, is_ew):
|
|
g['sheet'] = filename
|
|
all_glyphs.append(g)
|
|
s = per_sheet[filename]
|
|
s['total'] += 1
|
|
if g['has_kern']:
|
|
s['kern'] += 1
|
|
if g['lowheight']:
|
|
s['lowh'] += 1
|
|
if g['opcode'] != 0:
|
|
s['directives'] += 1
|
|
count += 1
|
|
|
|
sheets_scanned += 1
|
|
|
|
total = len(all_glyphs)
|
|
if total == 0:
|
|
print("No glyphs found!")
|
|
return 1
|
|
|
|
print(f"Scanned {sheets_scanned} variable sheets, {total} glyphs with width > 0\n")
|
|
|
|
# ---- 1. Width distribution ----
|
|
width_counter = Counter(g['width'] for g in all_glyphs)
|
|
print("=" * 60)
|
|
print("WIDTH DISTRIBUTION")
|
|
print("=" * 60)
|
|
for w in sorted(width_counter):
|
|
c = width_counter[w]
|
|
bar = '#' * (c * 40 // max(width_counter.values()))
|
|
print(f" w={w:2d}: {c:5d} ({100*c/total:5.1f}%) {bar}")
|
|
print(f" Total: {total}")
|
|
|
|
# ---- 2. Compiler directives ----
|
|
dir_glyphs = [g for g in all_glyphs if g['opcode'] != 0]
|
|
print(f"\n{'=' * 60}")
|
|
print("COMPILER DIRECTIVES")
|
|
print("=" * 60)
|
|
print(f" Total glyphs with directives: {len(dir_glyphs)}/{total} ({100*len(dir_glyphs)/total:.1f}%)")
|
|
|
|
opcode_counter = Counter()
|
|
replace_counts = Counter()
|
|
illegal_count = 0
|
|
for g in dir_glyphs:
|
|
op = g['opcode']
|
|
opcode_counter[op] += 1
|
|
if 0x80 <= op <= 0x87:
|
|
n_replace = op & 0x07
|
|
replace_counts[n_replace] += 1
|
|
if op == 255:
|
|
illegal_count += 1
|
|
|
|
if opcode_counter:
|
|
print(f"\n By opcode:")
|
|
for op in sorted(opcode_counter):
|
|
c = opcode_counter[op]
|
|
if 0x80 <= op <= 0x87:
|
|
label = f'replaceWith (n={op & 0x07})'
|
|
elif op == 255:
|
|
label = 'ILLEGAL (0xFF)'
|
|
else:
|
|
label = f'unknown'
|
|
print(f" 0x{op:02X} ({label}): {c}")
|
|
|
|
if replace_counts:
|
|
print(f"\n replaceWith breakdown:")
|
|
for n in sorted(replace_counts):
|
|
print(f" {n} replacement char(s): {replace_counts[n]}")
|
|
|
|
if illegal_count:
|
|
print(f" Illegal glyphs: {illegal_count}")
|
|
|
|
# ---- 3. Kerning shapes ----
|
|
kern_glyphs = [g for g in all_glyphs if g['has_kern']]
|
|
print(f"\n{'=' * 60}")
|
|
print("KERNING SHAPES")
|
|
print("=" * 60)
|
|
print(f" Glyphs with kern data: {len(kern_glyphs)}/{total} ({100*len(kern_glyphs)/total:.1f}%)")
|
|
|
|
shape_counter = Counter()
|
|
for g in kern_glyphs:
|
|
tag = format_shape(g['kern_mask'], g['is_ytype'])
|
|
shape_counter[tag] += 1
|
|
|
|
n_unique = len(shape_counter)
|
|
n_kern = len(kern_glyphs)
|
|
ytype_count = sum(1 for g in kern_glyphs if g['is_ytype'])
|
|
btype_count = n_kern - ytype_count
|
|
print(f" Unique shapes: {n_unique}")
|
|
print(f" B-type: {btype_count} ({100*btype_count/n_kern:.1f}%)")
|
|
print(f" Y-type: {ytype_count} ({100*ytype_count/n_kern:.1f}%)")
|
|
|
|
# Per-bit occurrences
|
|
bit_names = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K']
|
|
bit_positions = [7, 6, 5, 4, 3, 2, 1, 0, 15, 14]
|
|
print(f"\n Per-bit occurrences ({n_kern} glyphs with kern):")
|
|
for name, pos in zip(bit_names, bit_positions):
|
|
c = sum(1 for g in kern_glyphs if (g['kern_mask'] >> pos) & 1)
|
|
bar = '#' * (c * 30 // n_kern)
|
|
print(f" {name}: {c:5d}/{n_kern} ({100*c/n_kern:5.1f}%) {bar}")
|
|
|
|
print(f"\n Top shapes (of {n_unique} unique):")
|
|
for tag, c in shape_counter.most_common(30):
|
|
bar = '#' * (c * 30 // shape_counter.most_common(1)[0][1])
|
|
print(f" {tag:<22s} {c:4d} ({100*c/len(kern_glyphs):5.1f}%) {bar}")
|
|
if n_unique > 30:
|
|
remaining = sum(c for _, c in shape_counter.most_common()[30:])
|
|
print(f" ... {n_unique - 30} more shapes: {remaining} glyphs")
|
|
|
|
# ---- 4. Lowheight ----
|
|
lowh_glyphs = [g for g in all_glyphs if g['lowheight']]
|
|
print(f"\n{'=' * 60}")
|
|
print("LOWHEIGHT")
|
|
print("=" * 60)
|
|
print(f" Lowheight glyphs: {len(lowh_glyphs)}/{total} ({100*len(lowh_glyphs)/total:.1f}%)")
|
|
|
|
# ---- 5. Diacritics / stacking ----
|
|
anchor_glyphs = [g for g in all_glyphs if g['n_anchors'] > 0]
|
|
wot_glyphs = [g for g in all_glyphs if g['has_write_on_top']]
|
|
stack_names = {0: 'STACK_UP', 1: 'STACK_DOWN', 2: 'STACK_BEFORE_N_AFTER',
|
|
3: 'STACK_UP_N_DOWN', 4: 'STACK_DONT'}
|
|
stack_counter = Counter(g['stack_where'] for g in all_glyphs if g['stack_where'] != 0)
|
|
|
|
print(f"\n{'=' * 60}")
|
|
print("DIACRITICS & STACKING")
|
|
print("=" * 60)
|
|
print(f" Glyphs with diacritics anchors: {len(anchor_glyphs)}/{total} ({100*len(anchor_glyphs)/total:.1f}%)")
|
|
anchor_count_dist = Counter(g['n_anchors'] for g in anchor_glyphs)
|
|
for n in sorted(anchor_count_dist):
|
|
print(f" {n} anchor(s): {anchor_count_dist[n]}")
|
|
print(f" Glyphs with writeOnTop: {len(wot_glyphs)}")
|
|
if stack_counter:
|
|
print(f" Stack modes:")
|
|
for sw, c in stack_counter.most_common():
|
|
print(f" {stack_names.get(sw, f'?{sw}')}: {c}")
|
|
|
|
# ---- 6. Dot removal ----
|
|
dot_glyphs = [g for g in all_glyphs if g['has_dot_removal']]
|
|
print(f"\n{'=' * 60}")
|
|
print("DOT REMOVAL")
|
|
print("=" * 60)
|
|
print(f" Glyphs with dot removal directive: {len(dot_glyphs)}/{total} ({100*len(dot_glyphs)/total:.1f}%)")
|
|
|
|
# ---- 7. Nudge ----
|
|
nudge_glyphs = [g for g in all_glyphs if g['has_nudge']]
|
|
print(f"\n{'=' * 60}")
|
|
print("NUDGE")
|
|
print("=" * 60)
|
|
print(f" Glyphs with nudge: {len(nudge_glyphs)}/{total} ({100*len(nudge_glyphs)/total:.1f}%)")
|
|
if nudge_glyphs:
|
|
nudge_x_vals = Counter(g['nudge_x'] for g in nudge_glyphs if g['nudge_x'] != 0)
|
|
nudge_y_vals = Counter(g['nudge_y'] for g in nudge_glyphs if g['nudge_y'] != 0)
|
|
if nudge_x_vals:
|
|
print(f" X nudge values: {dict(sorted(nudge_x_vals.items()))}")
|
|
if nudge_y_vals:
|
|
print(f" Y nudge values: {dict(sorted(nudge_y_vals.items()))}")
|
|
|
|
# ---- 8. Alignment ----
|
|
align_names = {0: 'LEFT', 1: 'RIGHT', 2: 'CENTRE', 3: 'BEFORE'}
|
|
align_counter = Counter(g['align'] for g in all_glyphs if g['align'] != 0)
|
|
print(f"\n{'=' * 60}")
|
|
print("ALIGNMENT")
|
|
print("=" * 60)
|
|
if align_counter:
|
|
for a, c in align_counter.most_common():
|
|
print(f" {align_names.get(a, f'?{a}')}: {c}")
|
|
else:
|
|
print(" All glyphs use default (LEFT) alignment")
|
|
|
|
# ---- 9. Missing kern data ----
|
|
missing = [g for g in all_glyphs
|
|
if not g['has_kern']
|
|
and g['opcode'] == 0
|
|
and not is_excluded_from_kern(g['code'])]
|
|
print(f"\n{'=' * 60}")
|
|
print("MISSING KERNING DATA")
|
|
print("=" * 60)
|
|
print(f" Glyphs without kern (excl. CJK/Hangul/symbols/diacriticals): "
|
|
f"{len(missing)}/{total} ({100*len(missing)/total:.1f}%)")
|
|
if missing:
|
|
by_block = defaultdict(list)
|
|
for g in missing:
|
|
by_block[unicode_block_name(g['code'])].append(g['code'])
|
|
print(f"\n By block:")
|
|
for block in sorted(by_block, key=lambda b: by_block[b][0]):
|
|
cps = by_block[block]
|
|
sample = ', '.join(f'U+{c:04X}' for c in cps[:8])
|
|
more = f' ... +{len(cps)-8}' if len(cps) > 8 else ''
|
|
print(f" {block}: {len(cps)} ({sample}{more})")
|
|
|
|
# ---- 10. Per-sheet summary ----
|
|
print(f"\n{'=' * 60}")
|
|
print("PER-SHEET SUMMARY")
|
|
print("=" * 60)
|
|
print(f" {'Sheet':<52s} {'Total':>5s} {'Kern':>5s} {'LowH':>5s} {'Dir':>4s}")
|
|
print(f" {'-'*52} {'-'*5} {'-'*5} {'-'*5} {'-'*4}")
|
|
for fn in sorted(per_sheet):
|
|
s = per_sheet[fn]
|
|
print(f" {fn:<52s} {s['total']:5d} {s['kern']:5d} {s['lowh']:5d} {s['directives']:4d}")
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == '__main__':
|
|
sys.exit(main())
|