mirror of
https://github.com/curioustorvald/Terrarum-sans-bitmap.git
synced 2026-03-07 20:01:52 +09:00
424 lines
14 KiB
C
424 lines
14 KiB
C
#include "train.h"
|
|
#include "tga.h"
|
|
#include "nn.h"
|
|
#include "safetensor.h"
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <math.h>
|
|
#include <time.h>
|
|
#include <dirent.h>
|
|
|
|
/* ---- Data sample ---- */
|
|
|
|
typedef struct {
|
|
float input[300]; /* 15x20 binary */
|
|
float shape[10]; /* A,B,C,D,E,F,G,H,J,K */
|
|
float ytype;
|
|
float lowheight;
|
|
} Sample;
|
|
|
|
/* ---- Bit extraction from kerning mask ---- */
|
|
/* kerningMask = pixel >> 8 & 0xFFFFFF
|
|
* Layout: Red=Y0000000, Green=JK000000, Blue=ABCDEFGH
|
|
* After >> 8: bits 23-16 = Red[7:0], bits 15-8 = Green[7:0], bits 7-0 = Blue[7:0]
|
|
* Y = bit 23 (already extracted separately as isKernYtype)
|
|
* J = bit 15, K = bit 14
|
|
* A = bit 7, B = bit 6, ..., H = bit 0
|
|
*/
|
|
static void extract_shape_bits(int kerning_mask, float *shape) {
|
|
shape[0] = (float)((kerning_mask >> 7) & 1); /* A */
|
|
shape[1] = (float)((kerning_mask >> 6) & 1); /* B */
|
|
shape[2] = (float)((kerning_mask >> 5) & 1); /* C */
|
|
shape[3] = (float)((kerning_mask >> 4) & 1); /* D */
|
|
shape[4] = (float)((kerning_mask >> 3) & 1); /* E */
|
|
shape[5] = (float)((kerning_mask >> 2) & 1); /* F */
|
|
shape[6] = (float)((kerning_mask >> 1) & 1); /* G */
|
|
shape[7] = (float)((kerning_mask >> 0) & 1); /* H */
|
|
shape[8] = (float)((kerning_mask >> 15) & 1); /* J */
|
|
shape[9] = (float)((kerning_mask >> 14) & 1); /* K */
|
|
}
|
|
|
|
/* ---- Collect samples from one TGA ---- */
|
|
|
|
static int collect_from_sheet(const char *path, int is_xyswap, Sample *samples, int max_samples) {
|
|
TgaImage *img = tga_read(path);
|
|
if (!img) {
|
|
fprintf(stderr, "Warning: cannot read %s\n", path);
|
|
return 0;
|
|
}
|
|
|
|
int cell_w = 16, cell_h = 20;
|
|
int cols = img->width / cell_w;
|
|
int rows = img->height / cell_h;
|
|
int total_cells = cols * rows;
|
|
int count = 0;
|
|
|
|
for (int index = 0; index < total_cells && count < max_samples; index++) {
|
|
int cell_x, cell_y;
|
|
if (is_xyswap) {
|
|
cell_x = (index / cols) * cell_w;
|
|
cell_y = (index % cols) * cell_h;
|
|
} else {
|
|
cell_x = (index % cols) * cell_w;
|
|
cell_y = (index / cols) * cell_h;
|
|
}
|
|
|
|
int tag_x = cell_x + (cell_w - 1); /* rightmost column */
|
|
int tag_y = cell_y;
|
|
|
|
/* Read width (5-bit binary from Y+0..Y+4) */
|
|
int width = 0;
|
|
for (int y = 0; y < 5; y++) {
|
|
if (tga_get_pixel(img, tag_x, tag_y + y) & 0xFF)
|
|
width |= (1 << y);
|
|
}
|
|
if (width == 0) continue;
|
|
|
|
/* Read kerning data pixel at Y+6 */
|
|
uint32_t kern_pixel = tagify(tga_get_pixel(img, tag_x, tag_y + 6));
|
|
if ((kern_pixel & 0xFF) == 0) continue; /* no kern data */
|
|
|
|
/* Extract labels */
|
|
int is_kern_ytype = (kern_pixel & 0x80000000u) != 0;
|
|
int kerning_mask = (int)((kern_pixel >> 8) & 0xFFFFFF);
|
|
|
|
int is_low_height = (tga_get_pixel(img, tag_x, tag_y + 5) & 0xFF) != 0;
|
|
|
|
Sample *s = &samples[count];
|
|
extract_shape_bits(kerning_mask, s->shape);
|
|
s->ytype = (float)is_kern_ytype;
|
|
s->lowheight = (float)is_low_height;
|
|
|
|
/* Extract 15x20 binary input from the glyph area */
|
|
for (int gy = 0; gy < 20; gy++) {
|
|
for (int gx = 0; gx < 15; gx++) {
|
|
uint32_t p = tga_get_pixel(img, cell_x + gx, cell_y + gy);
|
|
s->input[gy * 15 + gx] = ((p & 0x80) != 0) ? 1.0f : 0.0f;
|
|
}
|
|
}
|
|
|
|
count++;
|
|
}
|
|
|
|
tga_free(img);
|
|
return count;
|
|
}
|
|
|
|
/* ---- Fisher-Yates shuffle ---- */
|
|
|
|
static void shuffle_indices(int *arr, int n) {
|
|
for (int i = n - 1; i > 0; i--) {
|
|
int j = rand() % (i + 1);
|
|
int tmp = arr[i]; arr[i] = arr[j]; arr[j] = tmp;
|
|
}
|
|
}
|
|
|
|
/* ---- Copy network weights ---- */
|
|
|
|
static void copy_tensor_data(Tensor *dst, Tensor *src) {
|
|
memcpy(dst->data, src->data, (size_t)src->size * sizeof(float));
|
|
}
|
|
|
|
static void save_weights(Network *net, Network *best) {
|
|
copy_tensor_data(best->conv1.weight, net->conv1.weight);
|
|
copy_tensor_data(best->conv1.bias, net->conv1.bias);
|
|
copy_tensor_data(best->conv2.weight, net->conv2.weight);
|
|
copy_tensor_data(best->conv2.bias, net->conv2.bias);
|
|
copy_tensor_data(best->fc1.weight, net->fc1.weight);
|
|
copy_tensor_data(best->fc1.bias, net->fc1.bias);
|
|
copy_tensor_data(best->head_shape.weight, net->head_shape.weight);
|
|
copy_tensor_data(best->head_shape.bias, net->head_shape.bias);
|
|
copy_tensor_data(best->head_ytype.weight, net->head_ytype.weight);
|
|
copy_tensor_data(best->head_ytype.bias, net->head_ytype.bias);
|
|
copy_tensor_data(best->head_lowheight.weight, net->head_lowheight.weight);
|
|
copy_tensor_data(best->head_lowheight.bias, net->head_lowheight.bias);
|
|
}
|
|
|
|
/* ---- Training ---- */
|
|
|
|
int train_model(void) {
|
|
const char *assets_dir = "../src/assets";
|
|
const int max_total = 16384;
|
|
|
|
Sample *all_samples = calloc((size_t)max_total, sizeof(Sample));
|
|
if (!all_samples) { fprintf(stderr, "Error: out of memory\n"); return 1; }
|
|
|
|
int total = 0;
|
|
|
|
/* Scan for *_variable.tga files */
|
|
DIR *dir = opendir(assets_dir);
|
|
if (!dir) {
|
|
fprintf(stderr, "Error: cannot open %s\n", assets_dir);
|
|
free(all_samples);
|
|
return 1;
|
|
}
|
|
|
|
struct dirent *ent;
|
|
int file_count = 0;
|
|
while ((ent = readdir(dir)) != NULL) {
|
|
const char *name = ent->d_name;
|
|
size_t len = strlen(name);
|
|
|
|
/* Must end with _variable.tga */
|
|
if (len < 14) continue;
|
|
if (strcmp(name + len - 13, "_variable.tga") != 0) continue;
|
|
|
|
/* Skip extrawide */
|
|
if (strstr(name, "extrawide") != NULL) continue;
|
|
|
|
/* Check for xyswap */
|
|
int is_xyswap = (strstr(name, "xyswap") != NULL);
|
|
|
|
char fullpath[512];
|
|
snprintf(fullpath, sizeof(fullpath), "%s/%s", assets_dir, name);
|
|
|
|
int got = collect_from_sheet(fullpath, is_xyswap, all_samples + total, max_total - total);
|
|
if (got > 0) {
|
|
printf(" %s: %d samples\n", name, got);
|
|
total += got;
|
|
file_count++;
|
|
}
|
|
}
|
|
closedir(dir);
|
|
|
|
printf("Collected %d samples from %d sheets\n", total, file_count);
|
|
if (total < 10) {
|
|
fprintf(stderr, "Error: too few samples to train\n");
|
|
free(all_samples);
|
|
return 1;
|
|
}
|
|
|
|
/* Print label distribution */
|
|
{
|
|
const char *bit_names[] = {"A","B","C","D","E","F","G","H","J","K","Ytype","LowH"};
|
|
int counts[12] = {0};
|
|
int nonzero_input = 0;
|
|
for (int i = 0; i < total; i++) {
|
|
for (int b = 0; b < 10; b++)
|
|
counts[b] += (int)all_samples[i].shape[b];
|
|
counts[10] += (int)all_samples[i].ytype;
|
|
counts[11] += (int)all_samples[i].lowheight;
|
|
for (int p = 0; p < 300; p++)
|
|
if (all_samples[i].input[p] > 0.5f) { nonzero_input++; break; }
|
|
}
|
|
printf("Label distribution:\n ");
|
|
for (int b = 0; b < 12; b++)
|
|
printf("%s:%d(%.0f%%) ", bit_names[b], counts[b], 100.0 * counts[b] / total);
|
|
printf("\n Non-empty inputs: %d/%d\n\n", nonzero_input, total);
|
|
}
|
|
|
|
/* Shuffle and split 80/20 */
|
|
srand((unsigned)time(NULL));
|
|
int *indices = malloc((size_t)total * sizeof(int));
|
|
for (int i = 0; i < total; i++) indices[i] = i;
|
|
shuffle_indices(indices, total);
|
|
|
|
int n_train = (int)(total * 0.8);
|
|
int n_val = total - n_train;
|
|
printf("Train: %d, Validation: %d\n\n", n_train, n_val);
|
|
|
|
/* Create network */
|
|
Network *net = network_create();
|
|
Network *best_net = network_create();
|
|
|
|
int batch_size = 32;
|
|
float lr = 0.001f, beta1 = 0.9f, beta2 = 0.999f, eps = 1e-8f;
|
|
int max_epochs = 200;
|
|
int patience = 10;
|
|
float best_val_loss = 1e30f;
|
|
int patience_counter = 0;
|
|
int best_epoch = 0;
|
|
int adam_t = 0;
|
|
|
|
for (int epoch = 0; epoch < max_epochs; epoch++) {
|
|
/* Shuffle training indices */
|
|
shuffle_indices(indices, n_train);
|
|
|
|
float train_loss = 0.0f;
|
|
int n_batches = 0;
|
|
|
|
/* Training loop */
|
|
for (int start = 0; start < n_train; start += batch_size) {
|
|
int bs = (start + batch_size <= n_train) ? batch_size : (n_train - start);
|
|
|
|
/* Build batch tensors */
|
|
int ishape[] = {bs, 1, 20, 15};
|
|
Tensor *input = tensor_alloc(4, ishape);
|
|
|
|
int sshape[] = {bs, 10};
|
|
Tensor *tgt_shape = tensor_alloc(2, sshape);
|
|
|
|
int yshape[] = {bs, 1};
|
|
Tensor *tgt_ytype = tensor_alloc(2, yshape);
|
|
Tensor *tgt_lh = tensor_alloc(2, yshape);
|
|
|
|
for (int i = 0; i < bs; i++) {
|
|
Sample *s = &all_samples[indices[start + i]];
|
|
memcpy(input->data + i * 300, s->input, 300 * sizeof(float));
|
|
memcpy(tgt_shape->data + i * 10, s->shape, 10 * sizeof(float));
|
|
tgt_ytype->data[i] = s->ytype;
|
|
tgt_lh->data[i] = s->lowheight;
|
|
}
|
|
|
|
/* Forward */
|
|
network_zero_grad(net);
|
|
network_forward(net, input, 1);
|
|
|
|
/* Loss */
|
|
float loss = network_bce_loss(net, tgt_shape, tgt_ytype, tgt_lh);
|
|
train_loss += loss;
|
|
n_batches++;
|
|
|
|
/* Backward */
|
|
network_backward(net, tgt_shape, tgt_ytype, tgt_lh);
|
|
|
|
/* Adam step */
|
|
adam_t++;
|
|
network_adam_step(net, lr, beta1, beta2, eps, adam_t);
|
|
|
|
tensor_free(input);
|
|
tensor_free(tgt_shape);
|
|
tensor_free(tgt_ytype);
|
|
tensor_free(tgt_lh);
|
|
}
|
|
|
|
train_loss /= (float)n_batches;
|
|
|
|
/* Validation */
|
|
float val_loss = 0.0f;
|
|
int val_batches = 0;
|
|
for (int start = 0; start < n_val; start += batch_size) {
|
|
int bs = (start + batch_size <= n_val) ? batch_size : (n_val - start);
|
|
|
|
int ishape[] = {bs, 1, 20, 15};
|
|
Tensor *input = tensor_alloc(4, ishape);
|
|
|
|
int sshape[] = {bs, 10};
|
|
Tensor *tgt_shape = tensor_alloc(2, sshape);
|
|
|
|
int yshape[] = {bs, 1};
|
|
Tensor *tgt_ytype = tensor_alloc(2, yshape);
|
|
Tensor *tgt_lh = tensor_alloc(2, yshape);
|
|
|
|
for (int i = 0; i < bs; i++) {
|
|
Sample *s = &all_samples[indices[n_train + start + i]];
|
|
memcpy(input->data + i * 300, s->input, 300 * sizeof(float));
|
|
memcpy(tgt_shape->data + i * 10, s->shape, 10 * sizeof(float));
|
|
tgt_ytype->data[i] = s->ytype;
|
|
tgt_lh->data[i] = s->lowheight;
|
|
}
|
|
|
|
network_forward(net, input, 0);
|
|
val_loss += network_bce_loss(net, tgt_shape, tgt_ytype, tgt_lh);
|
|
val_batches++;
|
|
|
|
tensor_free(input);
|
|
tensor_free(tgt_shape);
|
|
tensor_free(tgt_ytype);
|
|
tensor_free(tgt_lh);
|
|
}
|
|
|
|
val_loss /= (float)val_batches;
|
|
|
|
printf("Epoch %3d: train_loss=%.4f val_loss=%.4f", epoch + 1, (double)train_loss, (double)val_loss);
|
|
|
|
if (val_loss < best_val_loss) {
|
|
best_val_loss = val_loss;
|
|
best_epoch = epoch + 1;
|
|
patience_counter = 0;
|
|
save_weights(net, best_net);
|
|
printf(" *best*");
|
|
} else {
|
|
patience_counter++;
|
|
}
|
|
printf("\n");
|
|
|
|
if (patience_counter >= patience) {
|
|
printf("\nEarly stopping at epoch %d (best epoch: %d)\n", epoch + 1, best_epoch);
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* Restore best weights and save */
|
|
save_weights(best_net, net);
|
|
safetensor_save("autokem.safetensors", net, total, best_epoch, best_val_loss);
|
|
|
|
/* Compute final per-bit accuracy on validation set */
|
|
{
|
|
const char *bit_names[] = {"A","B","C","D","E","F","G","H","J","K","Ytype","LowH"};
|
|
int correct_per_bit[12] = {0};
|
|
int total_per_bit = n_val;
|
|
int n_examples = 0;
|
|
const int max_examples = 8;
|
|
|
|
printf("\nGlyph Tags — validation predictions:\n");
|
|
|
|
for (int i = 0; i < n_val; i++) {
|
|
Sample *s = &all_samples[indices[n_train + i]];
|
|
float output[12];
|
|
network_infer(net, s->input, output);
|
|
|
|
int pred_bits[12], tgt_bits[12];
|
|
int any_mismatch = 0;
|
|
for (int b = 0; b < 10; b++) {
|
|
pred_bits[b] = output[b] >= 0.5f ? 1 : 0;
|
|
tgt_bits[b] = (int)s->shape[b];
|
|
if (pred_bits[b] == tgt_bits[b]) correct_per_bit[b]++;
|
|
else any_mismatch = 1;
|
|
}
|
|
pred_bits[10] = output[10] >= 0.5f ? 1 : 0;
|
|
tgt_bits[10] = (int)s->ytype;
|
|
if (pred_bits[10] == tgt_bits[10]) correct_per_bit[10]++;
|
|
else any_mismatch = 1;
|
|
pred_bits[11] = output[11] >= 0.5f ? 1 : 0;
|
|
tgt_bits[11] = (int)s->lowheight;
|
|
if (pred_bits[11] == tgt_bits[11]) correct_per_bit[11]++;
|
|
else any_mismatch = 1;
|
|
|
|
/* Print a few examples (mix of correct and mismatched) */
|
|
if (n_examples < max_examples && (any_mismatch || i < 4)) {
|
|
/* Build tag string: e.g. "ABCDEFGH(B)" or "AB(Y)" */
|
|
char actual[32] = "", predicted[32] = "";
|
|
int ap = 0, pp = 0;
|
|
const char shape_chars[] = "ABCDEFGHJK";
|
|
for (int b = 0; b < 10; b++) {
|
|
if (tgt_bits[b]) actual[ap++] = shape_chars[b];
|
|
if (pred_bits[b]) predicted[pp++] = shape_chars[b];
|
|
}
|
|
actual[ap] = '\0'; predicted[pp] = '\0';
|
|
|
|
char actual_tag[48], pred_tag[48];
|
|
snprintf(actual_tag, sizeof(actual_tag), "%s%s%s",
|
|
ap > 0 ? actual : "(empty)",
|
|
tgt_bits[10] ? "(Y)" : "(B)",
|
|
tgt_bits[11] ? " low" : "");
|
|
snprintf(pred_tag, sizeof(pred_tag), "%s%s%s",
|
|
pp > 0 ? predicted : "(empty)",
|
|
pred_bits[10] ? "(Y)" : "(B)",
|
|
pred_bits[11] ? " low" : "");
|
|
|
|
printf(" actual=%-20s pred=%-20s %s\n", actual_tag, pred_tag,
|
|
any_mismatch ? "MISMATCH" : "ok");
|
|
n_examples++;
|
|
}
|
|
}
|
|
|
|
printf("\nPer-bit accuracy (%d val samples):\n ", n_val);
|
|
int total_correct = 0;
|
|
for (int b = 0; b < 12; b++) {
|
|
printf("%s:%.1f%% ", bit_names[b], 100.0 * correct_per_bit[b] / total_per_bit);
|
|
total_correct += correct_per_bit[b];
|
|
}
|
|
printf("\n Overall: %d/%d (%.2f%%)\n",
|
|
total_correct, n_val * 12, 100.0 * total_correct / (n_val * 12));
|
|
}
|
|
|
|
network_free(net);
|
|
network_free(best_net);
|
|
free(all_samples);
|
|
free(indices);
|
|
return 0;
|
|
}
|