mirror of
https://github.com/curioustorvald/Terrarum-sans-bitmap.git
synced 2026-06-06 14:08:30 +09:00
Compare commits
3 Commits
8daa968d80
...
175fe4edfb
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
175fe4edfb | ||
|
|
4d7aa79740 | ||
|
|
9d9efce9d4 |
@@ -2,6 +2,7 @@
|
||||
#include "tga.h"
|
||||
#include "nn.h"
|
||||
#include "safetensor.h"
|
||||
#include "unicode_lm.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
@@ -75,7 +76,8 @@ int apply_model(const char *tga_path) {
|
||||
int rows = img->height / cell_h;
|
||||
int total_cells = cols * rows;
|
||||
|
||||
int processed = 0, updated = 0, skipped = 0;
|
||||
int start_code = sheet_start_code(basename);
|
||||
int processed = 0, updated = 0, skipped = 0, fixed_lm = 0;
|
||||
|
||||
for (int index = 0; index < total_cells; index++) {
|
||||
int cell_x, cell_y;
|
||||
@@ -107,6 +109,21 @@ int apply_model(const char *tga_path) {
|
||||
int opcode = (int)((dir_pixel >> 24) & 0xFF);
|
||||
if (opcode != 0) { skipped++; continue; }
|
||||
|
||||
/* Modifier letters: fixed kern pixel, skip inference */
|
||||
if (start_code >= 0 && is_modifier_letter(start_code + index)) {
|
||||
if (is_subscript_modifier(start_code + index)) {
|
||||
/* Subscript: CDEFGHJK(B), lowheight=1 */
|
||||
tga_write_pixel(tga_path, img, tag_x, tag_y + 5, 0xFFFFFFFF);
|
||||
tga_write_pixel(tga_path, img, tag_x, tag_y + 6, 0x00C03FFF);
|
||||
} else {
|
||||
/* Superscript: ABCDEF(B), lowheight=0 */
|
||||
tga_write_pixel(tga_path, img, tag_x, tag_y + 5, 0x00000000);
|
||||
tga_write_pixel(tga_path, img, tag_x, tag_y + 6, 0x0000FCFF);
|
||||
}
|
||||
processed++; updated++; fixed_lm++;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Extract 15x20 binary input */
|
||||
float input[300];
|
||||
for (int gy = 0; gy < 20; gy++) {
|
||||
@@ -155,8 +172,8 @@ int apply_model(const char *tga_path) {
|
||||
updated++;
|
||||
}
|
||||
|
||||
printf("Processed: %d cells, Updated: %d, Skipped: %d (of %d total)\n",
|
||||
processed, updated, skipped, total_cells);
|
||||
printf("Processed: %d cells, Updated: %d, Skipped: %d, Fixed Lm: %d (of %d total)\n",
|
||||
processed, updated, skipped, fixed_lm, total_cells);
|
||||
|
||||
tga_free(img);
|
||||
network_free(net);
|
||||
|
||||
BIN
Autokem/autokem.safetensors
LFS
BIN
Autokem/autokem.safetensors
LFS
Binary file not shown.
@@ -2,6 +2,7 @@
|
||||
#include "tga.h"
|
||||
#include "nn.h"
|
||||
#include "safetensor.h"
|
||||
#include "unicode_lm.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
@@ -42,7 +43,8 @@ static void extract_shape_bits(int kerning_mask, float *shape) {
|
||||
|
||||
/* ---- Collect samples from one TGA ---- */
|
||||
|
||||
static int collect_from_sheet(const char *path, int is_xyswap, Sample *samples, int max_samples) {
|
||||
static int collect_from_sheet(const char *path, int is_xyswap, int start_code,
|
||||
Sample *samples, int max_samples) {
|
||||
TgaImage *img = tga_read(path);
|
||||
if (!img) {
|
||||
fprintf(stderr, "Warning: cannot read %s\n", path);
|
||||
@@ -76,6 +78,10 @@ static int collect_from_sheet(const char *path, int is_xyswap, Sample *samples,
|
||||
}
|
||||
if (width == 0) continue;
|
||||
|
||||
/* Skip modifier letters (superscripts/subscripts) */
|
||||
if (start_code >= 0 && is_modifier_letter(start_code + index))
|
||||
continue;
|
||||
|
||||
/* Read kerning data pixel at Y+6 */
|
||||
uint32_t kern_pixel = tagify(tga_get_pixel(img, tag_x, tag_y + 6));
|
||||
if ((kern_pixel & 0xFF) == 0) continue; /* no kern data */
|
||||
@@ -170,7 +176,9 @@ int train_model(void) {
|
||||
char fullpath[512];
|
||||
snprintf(fullpath, sizeof(fullpath), "%s/%s", assets_dir, name);
|
||||
|
||||
int got = collect_from_sheet(fullpath, is_xyswap, all_samples + total, max_total - total);
|
||||
int start_code = sheet_start_code(name);
|
||||
int got = collect_from_sheet(fullpath, is_xyswap, start_code,
|
||||
all_samples + total, max_total - total);
|
||||
if (got > 0) {
|
||||
printf(" %s: %d samples\n", name, got);
|
||||
total += got;
|
||||
|
||||
@@ -20,10 +20,26 @@ import json
|
||||
import os
|
||||
import struct
|
||||
import sys
|
||||
import unicodedata
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
|
||||
# ---- Sheet code ranges (imported from OTFbuild/sheet_config.py) ----
|
||||
|
||||
_otfbuild = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'OTFbuild')
|
||||
try:
|
||||
sys.path.insert(0, _otfbuild)
|
||||
from sheet_config import FILE_LIST as _FILE_LIST, CODE_RANGE as _CODE_RANGE
|
||||
sys.path.pop(0)
|
||||
_CODE_RANGE_MAP = {}
|
||||
for _i, _fn in enumerate(_FILE_LIST):
|
||||
if _i < len(_CODE_RANGE):
|
||||
_CODE_RANGE_MAP[_fn] = _CODE_RANGE[_i]
|
||||
except ImportError:
|
||||
_CODE_RANGE_MAP = {}
|
||||
|
||||
|
||||
# ---- TGA reader (matches OTFbuild/tga_reader.py and Autokem/tga.c) ----
|
||||
|
||||
class TgaImage:
|
||||
@@ -80,7 +96,7 @@ def tagify(pixel):
|
||||
|
||||
# ---- Data collection (matches Autokem/train.c) ----
|
||||
|
||||
def collect_from_sheet(path, is_xyswap):
|
||||
def collect_from_sheet(path, is_xyswap, code_range=None):
|
||||
"""Extract labelled samples from a single TGA sheet."""
|
||||
img = read_tga(path)
|
||||
cell_w, cell_h = 16, 20
|
||||
@@ -90,6 +106,7 @@ def collect_from_sheet(path, is_xyswap):
|
||||
|
||||
inputs = []
|
||||
labels = []
|
||||
skipped_lm = 0
|
||||
|
||||
for index in range(total_cells):
|
||||
if is_xyswap:
|
||||
@@ -110,6 +127,16 @@ def collect_from_sheet(path, is_xyswap):
|
||||
if width == 0:
|
||||
continue
|
||||
|
||||
# Skip modifier letters (superscripts/subscripts)
|
||||
if code_range is not None and index < len(code_range):
|
||||
cp = code_range[index]
|
||||
try:
|
||||
if unicodedata.category(chr(cp)) == 'Lm':
|
||||
skipped_lm += 1
|
||||
continue
|
||||
except (ValueError, OverflowError):
|
||||
pass
|
||||
|
||||
# Kern data pixel at Y+6
|
||||
kern_pixel = tagify(img.get_pixel(tag_x, tag_y + 6))
|
||||
if (kern_pixel & 0xFF) == 0:
|
||||
@@ -145,7 +172,7 @@ def collect_from_sheet(path, is_xyswap):
|
||||
inputs.append(inp)
|
||||
labels.append(shape + [is_kern_ytype, is_low_height])
|
||||
|
||||
return inputs, labels
|
||||
return inputs, labels, skipped_lm
|
||||
|
||||
|
||||
def collect_all_samples(assets_dir):
|
||||
@@ -153,6 +180,7 @@ def collect_all_samples(assets_dir):
|
||||
all_inputs = []
|
||||
all_labels = []
|
||||
file_count = 0
|
||||
total_skipped_lm = 0
|
||||
|
||||
for name in sorted(os.listdir(assets_dir)):
|
||||
if not name.endswith('_variable.tga'):
|
||||
@@ -161,14 +189,20 @@ def collect_all_samples(assets_dir):
|
||||
continue
|
||||
|
||||
is_xyswap = 'xyswap' in name
|
||||
code_range = _CODE_RANGE_MAP.get(name, None)
|
||||
path = os.path.join(assets_dir, name)
|
||||
inputs, labels = collect_from_sheet(path, is_xyswap)
|
||||
inputs, labels, skipped_lm = collect_from_sheet(path, is_xyswap, code_range)
|
||||
total_skipped_lm += skipped_lm
|
||||
if inputs:
|
||||
print(f" {name}: {len(inputs)} samples")
|
||||
suffix = f" (skipped {skipped_lm} Lm)" if skipped_lm else ""
|
||||
print(f" {name}: {len(inputs)} samples{suffix}")
|
||||
all_inputs.extend(inputs)
|
||||
all_labels.extend(labels)
|
||||
file_count += 1
|
||||
|
||||
if total_skipped_lm:
|
||||
print(f" Total modifier letters filtered: {total_skipped_lm}")
|
||||
|
||||
return np.array(all_inputs), np.array(all_labels, dtype=np.float32), file_count
|
||||
|
||||
|
||||
|
||||
141
Autokem/unicode_lm.h
Normal file
141
Autokem/unicode_lm.h
Normal file
@@ -0,0 +1,141 @@
|
||||
#ifndef UNICODE_LM_H
|
||||
#define UNICODE_LM_H
|
||||
|
||||
#include <string.h>
|
||||
|
||||
/*
|
||||
* Unicode category Lm (Letter, modifier) range checks.
|
||||
* Generated from Python unicodedata (Unicode 16.0).
|
||||
*
|
||||
* is_modifier_letter(cp) — true for all Lm codepoints
|
||||
* is_subscript_modifier(cp) — true for Lm codepoints with <sub> decomposition
|
||||
*/
|
||||
|
||||
static inline int is_modifier_letter(int cp) {
|
||||
/* 71 contiguous ranges covering all 397 Lm codepoints */
|
||||
if (cp >= 0x02B0 && cp <= 0x02C1) return 1;
|
||||
if (cp >= 0x02C6 && cp <= 0x02D1) return 1;
|
||||
if (cp >= 0x02E0 && cp <= 0x02E4) return 1;
|
||||
if (cp == 0x02EC) return 1;
|
||||
if (cp == 0x02EE) return 1;
|
||||
if (cp == 0x0374) return 1;
|
||||
if (cp == 0x037A) return 1;
|
||||
if (cp == 0x0559) return 1;
|
||||
if (cp == 0x0640) return 1;
|
||||
if (cp >= 0x06E5 && cp <= 0x06E6) return 1;
|
||||
if (cp >= 0x07F4 && cp <= 0x07F5) return 1;
|
||||
if (cp == 0x07FA) return 1;
|
||||
if (cp == 0x081A) return 1;
|
||||
if (cp == 0x0824) return 1;
|
||||
if (cp == 0x0828) return 1;
|
||||
if (cp == 0x08C9) return 1;
|
||||
if (cp == 0x0971) return 1;
|
||||
if (cp == 0x0E46) return 1;
|
||||
if (cp == 0x0EC6) return 1;
|
||||
if (cp == 0x10FC) return 1;
|
||||
if (cp == 0x17D7) return 1;
|
||||
if (cp == 0x1843) return 1;
|
||||
if (cp == 0x1AA7) return 1;
|
||||
if (cp >= 0x1C78 && cp <= 0x1C7D) return 1;
|
||||
if (cp >= 0x1D2C && cp <= 0x1D6A) return 1;
|
||||
if (cp == 0x1D78) return 1;
|
||||
if (cp >= 0x1D9B && cp <= 0x1DBF) return 1;
|
||||
if (cp == 0x2071) return 1;
|
||||
if (cp == 0x207F) return 1;
|
||||
if (cp >= 0x2090 && cp <= 0x209C) return 1;
|
||||
if (cp >= 0x2C7C && cp <= 0x2C7D) return 1;
|
||||
if (cp == 0x2D6F) return 1;
|
||||
if (cp == 0x2E2F) return 1;
|
||||
if (cp == 0x3005) return 1;
|
||||
if (cp >= 0x3031 && cp <= 0x3035) return 1;
|
||||
if (cp == 0x303B) return 1;
|
||||
if (cp >= 0x309D && cp <= 0x309E) return 1;
|
||||
if (cp >= 0x30FC && cp <= 0x30FE) return 1;
|
||||
if (cp == 0xA015) return 1;
|
||||
if (cp >= 0xA4F8 && cp <= 0xA4FD) return 1;
|
||||
if (cp == 0xA60C) return 1;
|
||||
if (cp == 0xA67F) return 1;
|
||||
if (cp >= 0xA69C && cp <= 0xA69D) return 1;
|
||||
if (cp >= 0xA717 && cp <= 0xA71F) return 1;
|
||||
if (cp == 0xA770) return 1;
|
||||
if (cp == 0xA788) return 1;
|
||||
if (cp >= 0xA7F2 && cp <= 0xA7F4) return 1;
|
||||
if (cp >= 0xA7F8 && cp <= 0xA7F9) return 1;
|
||||
if (cp == 0xA9CF) return 1;
|
||||
if (cp == 0xA9E6) return 1;
|
||||
if (cp == 0xAA70) return 1;
|
||||
if (cp == 0xAADD) return 1;
|
||||
if (cp >= 0xAAF3 && cp <= 0xAAF4) return 1;
|
||||
if (cp >= 0xAB5C && cp <= 0xAB5F) return 1;
|
||||
if (cp == 0xAB69) return 1;
|
||||
if (cp == 0xFF70) return 1;
|
||||
if (cp >= 0xFF9E && cp <= 0xFF9F) return 1;
|
||||
if (cp >= 0x10780 && cp <= 0x10785) return 1;
|
||||
if (cp >= 0x10787 && cp <= 0x107B0) return 1;
|
||||
if (cp >= 0x107B2 && cp <= 0x107BA) return 1;
|
||||
if (cp >= 0x16B40 && cp <= 0x16B43) return 1;
|
||||
if (cp >= 0x16F93 && cp <= 0x16F9F) return 1;
|
||||
if (cp >= 0x16FE0 && cp <= 0x16FE1) return 1;
|
||||
if (cp == 0x16FE3) return 1;
|
||||
if (cp >= 0x1AFF0 && cp <= 0x1AFF3) return 1;
|
||||
if (cp >= 0x1AFF5 && cp <= 0x1AFFB) return 1;
|
||||
if (cp >= 0x1AFFD && cp <= 0x1AFFE) return 1;
|
||||
if (cp >= 0x1E030 && cp <= 0x1E06D) return 1;
|
||||
if (cp >= 0x1E137 && cp <= 0x1E13D) return 1;
|
||||
if (cp == 0x1E4EB) return 1;
|
||||
if (cp == 0x1E94B) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int is_subscript_modifier(int cp) {
|
||||
/* 49 Lm codepoints with <sub> decomposition */
|
||||
if (cp >= 0x1D62 && cp <= 0x1D6A) return 1; /* 9 */
|
||||
if (cp >= 0x2090 && cp <= 0x209C) return 1; /* 13 */
|
||||
if (cp == 0x2C7C) return 1; /* 1 */
|
||||
if (cp >= 0x1E051 && cp <= 0x1E06A) return 1; /* 26 */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Map sheet filename to first codepoint of its (contiguous) code range.
|
||||
* Returns -1 if unknown. For non-contiguous sheets (e.g. Devanagari),
|
||||
* returns the start of the first sub-range; cells beyond it won't
|
||||
* collide with Lm codepoints in practice.
|
||||
*/
|
||||
static int sheet_start_code(const char *basename) {
|
||||
if (strstr(basename, "ascii_variable")) return 0x00;
|
||||
if (strstr(basename, "latinExtA_variable")) return 0x100;
|
||||
if (strstr(basename, "latinExtB_variable")) return 0x180;
|
||||
if (strstr(basename, "cyrilic_extC_variable")) return 0x1C80;
|
||||
if (strstr(basename, "cyrilic_extB_variable")) return 0xA640;
|
||||
if (strstr(basename, "cyrilic_bulgarian_variable")) return 0xF0000;
|
||||
if (strstr(basename, "cyrilic_serbian_variable")) return 0xF0060;
|
||||
if (strstr(basename, "cyrilic_variable")) return 0x400;
|
||||
if (strstr(basename, "halfwidth_fullwidth_variable")) return 0xFF00;
|
||||
if (strstr(basename, "unipunct_variable")) return 0x2000;
|
||||
if (strstr(basename, "greek_polytonic")) return 0x1F00;
|
||||
if (strstr(basename, "greek_variable")) return 0x370;
|
||||
if (strstr(basename, "thai_variable")) return 0xE00;
|
||||
if (strstr(basename, "hayeren_variable")) return 0x530;
|
||||
if (strstr(basename, "kartuli_allcaps_variable")) return 0x1C90;
|
||||
if (strstr(basename, "kartuli_variable")) return 0x10D0;
|
||||
if (strstr(basename, "ipa_ext_variable")) return 0x250;
|
||||
if (strstr(basename, "latinExt_additional_variable")) return 0x1E00;
|
||||
if (strstr(basename, "tsalagi_variable")) return 0x13A0;
|
||||
if (strstr(basename, "phonetic_extensions_variable")) return 0x1D00;
|
||||
if (strstr(basename, "latinExtC_variable")) return 0x2C60;
|
||||
if (strstr(basename, "latinExtD_variable")) return 0xA720;
|
||||
if (strstr(basename, "internal_variable")) return 0xFFE00;
|
||||
if (strstr(basename, "letterlike_symbols_variable")) return 0x2100;
|
||||
if (strstr(basename, "enclosed_alphanumeric")) return 0x1F100;
|
||||
if (strstr(basename, "sundanese_variable")) return 0x1B80;
|
||||
if (strstr(basename, "control_pictures_variable")) return 0x2400;
|
||||
if (strstr(basename, "latinExtE_variable")) return 0xAB30;
|
||||
if (strstr(basename, "latinExtF_variable")) return 0x10780;
|
||||
if (strstr(basename, "latinExtG_variable")) return 0x1DF00;
|
||||
if (strstr(basename, "devanagari") && !strstr(basename, "internal"))
|
||||
return 0x900;
|
||||
return -1;
|
||||
}
|
||||
|
||||
#endif /* UNICODE_LM_H */
|
||||
BIN
demo.PNG
BIN
demo.PNG
Binary file not shown.
|
Before Width: | Height: | Size: 177 KiB After Width: | Height: | Size: 178 KiB |
@@ -114,12 +114,12 @@ How multilingual? Real multilingual!
|
||||
⁃ Basic Latin
|
||||
⁃ Latin-1 Supplement
|
||||
⁃ Latin Extended Additional
|
||||
⁃ Latin Extended-A/B/C/D
|
||||
⁃ Latin Extended-A/B/C/D/E/F/G
|
||||
⁃ Armenian
|
||||
⁃ Arrows
|
||||
⁃ Bengaliᶠⁱ
|
||||
⁃ Braille Patterns
|
||||
⁃ Cherokee⁷
|
||||
⁃ Cherokeeᴬ
|
||||
⁃ CJK Symbols and Punctuation
|
||||
⁃ CJK Unified Ideographs⁶
|
||||
⁃ CJK Unified Ideographs Extension A¹²·¹
|
||||
@@ -161,8 +161,8 @@ How multilingual? Real multilingual!
|
||||
⁃ Tamil
|
||||
⁃ Thai
|
||||
|
||||
ᴱ No support for Coptic
|
||||
ᶠⁱ No support for ligatures ჼ Mkhedruli only
|
||||
⁶ ⁷ ⁹ ¹²·¹ Up to the specified Unicode version
|
||||
ᴱ No support for Coptic ᴬ Uppercase only
|
||||
ᶠⁱ No support for ligatures ჼ Mkhedruli only
|
||||
⁶ ¹²·¹ Up to the specified Unicode version
|
||||
|
||||
GitHub’s issue page is open! You can report any errors, or leave suggestions. You can help this font to be more versatile. (for more languages, more frameworks) Clone this repo, make changes, and make a pull request! I appreciate any and all supports.
|
||||
BIN
src/assets/cyrilic_variable.tga
LFS
BIN
src/assets/cyrilic_variable.tga
LFS
Binary file not shown.
BIN
src/assets/hayeren_variable.tga
LFS
BIN
src/assets/hayeren_variable.tga
LFS
Binary file not shown.
BIN
src/assets/ipa_ext_variable.tga
LFS
BIN
src/assets/ipa_ext_variable.tga
LFS
Binary file not shown.
Binary file not shown.
BIN
src/assets/latinExtF_variable.tga
LFS
Normal file
BIN
src/assets/latinExtF_variable.tga
LFS
Normal file
Binary file not shown.
BIN
src/assets/latinExtG_variable.tga
LFS
Normal file
BIN
src/assets/latinExtG_variable.tga
LFS
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
src/assets/thai_variable.tga
LFS
BIN
src/assets/thai_variable.tga
LFS
Binary file not shown.
BIN
src/assets/tsalagi_variable.tga
LFS
BIN
src/assets/tsalagi_variable.tga
LFS
Binary file not shown.
BIN
work_files/cyrilic_variable.psd
LFS
BIN
work_files/cyrilic_variable.psd
LFS
Binary file not shown.
BIN
work_files/hayeren_variable.psd
LFS
BIN
work_files/hayeren_variable.psd
LFS
Binary file not shown.
BIN
work_files/ipa_ext_variable.psd
LFS
BIN
work_files/ipa_ext_variable.psd
LFS
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
work_files/latinExtG_variable.kra
LFS
Normal file
BIN
work_files/latinExtG_variable.kra
LFS
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
work_files/thai_variable.psd
LFS
BIN
work_files/thai_variable.psd
LFS
Binary file not shown.
BIN
work_files/tsalagi_variable.psd
LFS
BIN
work_files/tsalagi_variable.psd
LFS
Binary file not shown.
Reference in New Issue
Block a user