Autokem: CNN-based glyph labeller for Keming Machine

This commit is contained in:
minjaesong
2026-03-06 15:43:47 +09:00
parent adab8fa0ef
commit 0c99a27ffe
15 changed files with 1748 additions and 1 deletions

22
Autokem/Makefile Normal file
View File

@@ -0,0 +1,22 @@
CC = gcc
CFLAGS = -Ofast -Wall -Wextra -std=c11
LDFLAGS = -lm
SRC = main.c tga.c nn.c safetensor.c train.c apply.c
OBJ = $(SRC:.c=.o)
all: autokem
autokem: $(OBJ)
$(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS)
%.o: %.c
$(CC) $(CFLAGS) -c $< -o $@
debug: CFLAGS = -g -Wall -Wextra -std=c11 -fsanitize=address,undefined
debug: LDFLAGS += -fsanitize=address,undefined
debug: clean autokem
clean:
rm -f *.o autokem
.PHONY: all debug clean

164
Autokem/apply.c Normal file
View File

@@ -0,0 +1,164 @@
#include "apply.h"
#include "tga.h"
#include "nn.h"
#include "safetensor.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* Copy file for backup */
static int copy_file(const char *src, const char *dst) {
FILE *in = fopen(src, "rb");
if (!in) return -1;
FILE *out = fopen(dst, "wb");
if (!out) { fclose(in); return -1; }
char buf[4096];
size_t n;
while ((n = fread(buf, 1, sizeof(buf), in)) > 0) {
if (fwrite(buf, 1, n, out) != n) {
fclose(in); fclose(out);
return -1;
}
}
fclose(in);
fclose(out);
return 0;
}
int apply_model(const char *tga_path) {
/* Validate filename */
const char *basename = strrchr(tga_path, '/');
basename = basename ? basename + 1 : tga_path;
if (strstr(basename, "variable") == NULL) {
fprintf(stderr, "Error: %s does not appear to be a variable sheet\n", tga_path);
return 1;
}
if (strstr(basename, "extrawide") != NULL) {
fprintf(stderr, "Error: extrawide sheets are not supported\n");
return 1;
}
int is_xyswap = (strstr(basename, "xyswap") != NULL);
/* Create backup */
char bakpath[512];
snprintf(bakpath, sizeof(bakpath), "%s.bak", tga_path);
if (copy_file(tga_path, bakpath) != 0) {
fprintf(stderr, "Error: failed to create backup %s\n", bakpath);
return 1;
}
printf("Backup: %s\n", bakpath);
/* Load model */
Network *net = network_create();
if (safetensor_load("autokem.safetensors", net) != 0) {
fprintf(stderr, "Error: failed to load model\n");
network_free(net);
return 1;
}
/* Load TGA */
TgaImage *img = tga_read(tga_path);
if (!img) {
fprintf(stderr, "Error: cannot read %s\n", tga_path);
network_free(net);
return 1;
}
int cell_w = 16, cell_h = 20;
int cols = img->width / cell_w;
int rows = img->height / cell_h;
int total_cells = cols * rows;
int processed = 0, updated = 0, skipped = 0;
for (int index = 0; index < total_cells; index++) {
int cell_x, cell_y;
if (is_xyswap) {
cell_x = (index / cols) * cell_w;
cell_y = (index % cols) * cell_h;
} else {
cell_x = (index % cols) * cell_w;
cell_y = (index / cols) * cell_h;
}
int tag_x = cell_x + (cell_w - 1);
int tag_y = cell_y;
/* Read width */
int width = 0;
for (int y = 0; y < 5; y++) {
if (tga_get_pixel(img, tag_x, tag_y + y) & 0xFF)
width |= (1 << y);
}
if (width == 0) { skipped++; continue; }
/* Check writeOnTop at Y+17 — skip if defined */
uint32_t wot = tga_get_pixel(img, tag_x, tag_y + 17);
if ((wot & 0xFF) != 0) { skipped++; continue; }
/* Check compiler directive at Y+9 — skip if opcode != 0 */
uint32_t dir_pixel = tagify(tga_get_pixel(img, tag_x, tag_y + 9));
int opcode = (int)((dir_pixel >> 24) & 0xFF);
if (opcode != 0) { skipped++; continue; }
/* Extract 15x20 binary input */
float input[300];
for (int gy = 0; gy < 20; gy++) {
for (int gx = 0; gx < 15; gx++) {
uint32_t p = tga_get_pixel(img, cell_x + gx, cell_y + gy);
input[gy * 15 + gx] = ((p & 0x80) != 0) ? 1.0f : 0.0f;
}
}
/* Inference */
float output[12];
network_infer(net, input, output);
/* Threshold at 0.5 */
int A = output[0] >= 0.5f;
int B = output[1] >= 0.5f;
int C = output[2] >= 0.5f;
int D = output[3] >= 0.5f;
int E = output[4] >= 0.5f;
int F = output[5] >= 0.5f;
int G = output[6] >= 0.5f;
int H = output[7] >= 0.5f;
int J = output[8] >= 0.5f;
int K = output[9] >= 0.5f;
int ytype = output[10] >= 0.5f;
int lowheight = output[11] >= 0.5f;
/* Compose Y+5 pixel: lowheight (alpha=0xFF when set) */
uint32_t lh_pixel = lowheight ? 0x000000FF : 0x00000000;
tga_write_pixel(tga_path, img, tag_x, tag_y + 5, lh_pixel);
/* Compose Y+6 pixel:
* Red byte: Y0000000 -> bit 31
* Green byte: JK000000 -> bits 23,22
* Blue byte: ABCDEFGH -> bits 15-8
* Alpha: 0xFF = hasKernData */
uint32_t pixel = 0;
pixel |= (uint32_t)(ytype ? 0x80 : 0) << 24;
pixel |= (uint32_t)((J ? 0x80 : 0) | (K ? 0x40 : 0)) << 16;
pixel |= (uint32_t)(A<<7 | B<<6 | C<<5 | D<<4 | E<<3 | F<<2 | G<<1 | H) << 8;
pixel |= 0xFF;
tga_write_pixel(tga_path, img, tag_x, tag_y + 6, pixel);
processed++;
updated++;
}
printf("Processed: %d cells, Updated: %d, Skipped: %d (of %d total)\n",
processed, updated, skipped, total_cells);
tga_free(img);
network_free(net);
return 0;
}

8
Autokem/apply.h Normal file
View File

@@ -0,0 +1,8 @@
#ifndef APPLY_H
#define APPLY_H
/* Apply trained model to a spritesheet.
Creates .bak backup, then writes predicted kerning bits. */
int apply_model(const char *tga_path);
#endif

BIN
Autokem/autokem.safetensors LFS Normal file

Binary file not shown.

40
Autokem/main.c Normal file
View File

@@ -0,0 +1,40 @@
#include <stdio.h>
#include <string.h>
#include "train.h"
#include "apply.h"
#include "safetensor.h"
static void print_usage(void) {
printf("Usage: autokem <command> [args]\n");
printf("Commands:\n");
printf(" train Train model on existing spritesheets\n");
printf(" apply <file.tga> Apply trained model to a spritesheet\n");
printf(" stats Print model statistics\n");
printf(" help Print this message\n");
}
int main(int argc, char **argv) {
if (argc < 2) {
print_usage();
return 1;
}
if (strcmp(argv[1], "train") == 0) {
return train_model();
} else if (strcmp(argv[1], "apply") == 0) {
if (argc < 3) {
fprintf(stderr, "Error: apply requires a TGA file path\n");
return 1;
}
return apply_model(argv[2]);
} else if (strcmp(argv[1], "stats") == 0) {
return safetensor_stats("autokem.safetensors");
} else if (strcmp(argv[1], "help") == 0) {
print_usage();
return 0;
} else {
fprintf(stderr, "Unknown command: %s\n", argv[1]);
print_usage();
return 1;
}
}

556
Autokem/nn.c Normal file
View File

@@ -0,0 +1,556 @@
#define _GNU_SOURCE
#include "nn.h"
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <time.h>
#ifndef M_PI
#define M_PI 3.14159265358979323846
#endif
/* ---- Tensor ---- */
Tensor *tensor_alloc(int ndim, const int *shape) {
Tensor *t = malloc(sizeof(Tensor));
t->ndim = ndim;
t->size = 1;
for (int i = 0; i < ndim; i++) {
t->shape[i] = shape[i];
t->size *= shape[i];
}
for (int i = ndim; i < 4; i++) t->shape[i] = 0;
t->data = malloc((size_t)t->size * sizeof(float));
return t;
}
Tensor *tensor_zeros(int ndim, const int *shape) {
Tensor *t = tensor_alloc(ndim, shape);
memset(t->data, 0, (size_t)t->size * sizeof(float));
return t;
}
void tensor_free(Tensor *t) {
if (!t) return;
free(t->data);
free(t);
}
/* ---- RNG (Box-Muller) ---- */
static uint64_t rng_state = 0;
static void rng_seed(uint64_t s) { rng_state = s; }
static uint64_t xorshift64(void) {
uint64_t x = rng_state;
x ^= x << 13;
x ^= x >> 7;
x ^= x << 17;
rng_state = x;
return x;
}
static float rand_uniform(void) {
return (float)(xorshift64() & 0x7FFFFFFF) / (float)0x7FFFFFFF;
}
static float rand_normal(void) {
float u1, u2;
do { u1 = rand_uniform(); } while (u1 < 1e-10f);
u2 = rand_uniform();
return sqrtf(-2.0f * logf(u1)) * cosf(2.0f * (float)M_PI * u2);
}
/* He init: std = sqrt(2/fan_in) */
static void he_init(Tensor *w, int fan_in) {
float std = sqrtf(2.0f / (float)fan_in);
for (int i = 0; i < w->size; i++)
w->data[i] = rand_normal() * std;
}
/* ---- Activations ---- */
static inline float leaky_relu(float x) {
return x >= 0.0f ? x : 0.01f * x;
}
static inline float leaky_relu_grad(float x) {
return x >= 0.0f ? 1.0f : 0.01f;
}
static inline float sigmoid_f(float x) {
if (x >= 0.0f) {
float ez = expf(-x);
return 1.0f / (1.0f + ez);
} else {
float ez = expf(x);
return ez / (1.0f + ez);
}
}
/* ---- Conv2D forward/backward ---- */
static void conv2d_init(Conv2D *c, int in_ch, int out_ch, int kh, int kw) {
c->in_ch = in_ch;
c->out_ch = out_ch;
c->kh = kh;
c->kw = kw;
int wshape[] = {out_ch, in_ch, kh, kw};
int bshape[] = {out_ch};
c->weight = tensor_alloc(4, wshape);
c->bias = tensor_zeros(1, bshape);
c->grad_weight = tensor_zeros(4, wshape);
c->grad_bias = tensor_zeros(1, bshape);
c->m_weight = tensor_zeros(4, wshape);
c->v_weight = tensor_zeros(4, wshape);
c->m_bias = tensor_zeros(1, bshape);
c->v_bias = tensor_zeros(1, bshape);
c->input_cache = NULL;
he_init(c->weight, in_ch * kh * kw);
}
static void conv2d_free(Conv2D *c) {
tensor_free(c->weight);
tensor_free(c->bias);
tensor_free(c->grad_weight);
tensor_free(c->grad_bias);
tensor_free(c->m_weight);
tensor_free(c->v_weight);
tensor_free(c->m_bias);
tensor_free(c->v_bias);
tensor_free(c->input_cache);
}
/* Forward: input [batch, in_ch, H, W] -> output [batch, out_ch, H, W] (same padding) */
static Tensor *conv2d_forward(Conv2D *c, Tensor *input, int training) {
int batch = input->shape[0];
int in_ch = c->in_ch, out_ch = c->out_ch;
int H = input->shape[2], W = input->shape[3];
int kh = c->kh, kw = c->kw;
int ph = kh / 2, pw = kw / 2;
if (training) {
tensor_free(c->input_cache);
c->input_cache = tensor_alloc(input->ndim, input->shape);
memcpy(c->input_cache->data, input->data, (size_t)input->size * sizeof(float));
}
int oshape[] = {batch, out_ch, H, W};
Tensor *out = tensor_alloc(4, oshape);
for (int b = 0; b < batch; b++) {
for (int oc = 0; oc < out_ch; oc++) {
for (int oh = 0; oh < H; oh++) {
for (int ow = 0; ow < W; ow++) {
float sum = c->bias->data[oc];
for (int ic = 0; ic < in_ch; ic++) {
for (int fh = 0; fh < kh; fh++) {
for (int fw = 0; fw < kw; fw++) {
int ih = oh + fh - ph;
int iw = ow + fw - pw;
if (ih >= 0 && ih < H && iw >= 0 && iw < W) {
float inp = input->data[((b * in_ch + ic) * H + ih) * W + iw];
float wt = c->weight->data[((oc * in_ch + ic) * kh + fh) * kw + fw];
sum += inp * wt;
}
}
}
}
out->data[((b * out_ch + oc) * H + oh) * W + ow] = sum;
}
}
}
}
return out;
}
/* Backward: grad_output [batch, out_ch, H, W] -> grad_input [batch, in_ch, H, W] */
static Tensor *conv2d_backward(Conv2D *c, Tensor *grad_output) {
Tensor *input = c->input_cache;
int batch = input->shape[0];
int in_ch = c->in_ch, out_ch = c->out_ch;
int H = input->shape[2], W = input->shape[3];
int kh = c->kh, kw = c->kw;
int ph = kh / 2, pw = kw / 2;
Tensor *grad_input = tensor_zeros(input->ndim, input->shape);
for (int b = 0; b < batch; b++) {
for (int oc = 0; oc < out_ch; oc++) {
for (int oh = 0; oh < H; oh++) {
for (int ow = 0; ow < W; ow++) {
float go = grad_output->data[((b * out_ch + oc) * H + oh) * W + ow];
c->grad_bias->data[oc] += go;
for (int ic = 0; ic < in_ch; ic++) {
for (int fh = 0; fh < kh; fh++) {
for (int fw = 0; fw < kw; fw++) {
int ih = oh + fh - ph;
int iw = ow + fw - pw;
if (ih >= 0 && ih < H && iw >= 0 && iw < W) {
float inp = input->data[((b * in_ch + ic) * H + ih) * W + iw];
c->grad_weight->data[((oc * in_ch + ic) * kh + fh) * kw + fw] += go * inp;
grad_input->data[((b * in_ch + ic) * H + ih) * W + iw] +=
go * c->weight->data[((oc * in_ch + ic) * kh + fh) * kw + fw];
}
}
}
}
}
}
}
}
return grad_input;
}
/* ---- Dense forward/backward ---- */
static void dense_init(Dense *d, int in_f, int out_f) {
d->in_features = in_f;
d->out_features = out_f;
int wshape[] = {out_f, in_f};
int bshape[] = {out_f};
d->weight = tensor_alloc(2, wshape);
d->bias = tensor_zeros(1, bshape);
d->grad_weight = tensor_zeros(2, wshape);
d->grad_bias = tensor_zeros(1, bshape);
d->m_weight = tensor_zeros(2, wshape);
d->v_weight = tensor_zeros(2, wshape);
d->m_bias = tensor_zeros(1, bshape);
d->v_bias = tensor_zeros(1, bshape);
d->input_cache = NULL;
he_init(d->weight, in_f);
}
static void dense_free(Dense *d) {
tensor_free(d->weight);
tensor_free(d->bias);
tensor_free(d->grad_weight);
tensor_free(d->grad_bias);
tensor_free(d->m_weight);
tensor_free(d->v_weight);
tensor_free(d->m_bias);
tensor_free(d->v_bias);
tensor_free(d->input_cache);
}
/* Forward: input [batch, in_f] -> output [batch, out_f] */
static Tensor *dense_forward(Dense *d, Tensor *input, int training) {
int batch = input->shape[0];
int in_f = d->in_features, out_f = d->out_features;
if (training) {
tensor_free(d->input_cache);
d->input_cache = tensor_alloc(input->ndim, input->shape);
memcpy(d->input_cache->data, input->data, (size_t)input->size * sizeof(float));
}
int oshape[] = {batch, out_f};
Tensor *out = tensor_alloc(2, oshape);
for (int b = 0; b < batch; b++) {
for (int o = 0; o < out_f; o++) {
float sum = d->bias->data[o];
for (int i = 0; i < in_f; i++) {
sum += input->data[b * in_f + i] * d->weight->data[o * in_f + i];
}
out->data[b * out_f + o] = sum;
}
}
return out;
}
/* Backward: grad_output [batch, out_f] -> grad_input [batch, in_f] */
static Tensor *dense_backward(Dense *d, Tensor *grad_output) {
Tensor *input = d->input_cache;
int batch = input->shape[0];
int in_f = d->in_features, out_f = d->out_features;
int gshape[] = {batch, in_f};
Tensor *grad_input = tensor_zeros(2, gshape);
for (int b = 0; b < batch; b++) {
for (int o = 0; o < out_f; o++) {
float go = grad_output->data[b * out_f + o];
d->grad_bias->data[o] += go;
for (int i = 0; i < in_f; i++) {
d->grad_weight->data[o * in_f + i] += go * input->data[b * in_f + i];
grad_input->data[b * in_f + i] += go * d->weight->data[o * in_f + i];
}
}
}
return grad_input;
}
/* ---- LeakyReLU helpers on tensors ---- */
static Tensor *apply_leaky_relu(Tensor *input) {
Tensor *out = tensor_alloc(input->ndim, input->shape);
for (int i = 0; i < input->size; i++)
out->data[i] = leaky_relu(input->data[i]);
return out;
}
static Tensor *apply_leaky_relu_backward(Tensor *grad_output, Tensor *pre_activation) {
Tensor *grad = tensor_alloc(grad_output->ndim, grad_output->shape);
for (int i = 0; i < grad_output->size; i++)
grad->data[i] = grad_output->data[i] * leaky_relu_grad(pre_activation->data[i]);
return grad;
}
/* ---- Sigmoid on tensor ---- */
static Tensor *apply_sigmoid(Tensor *input) {
Tensor *out = tensor_alloc(input->ndim, input->shape);
for (int i = 0; i < input->size; i++)
out->data[i] = sigmoid_f(input->data[i]);
return out;
}
/* ---- Adam step for a single parameter tensor ---- */
static void adam_update(Tensor *param, Tensor *grad, Tensor *m, Tensor *v,
float lr, float beta1, float beta2, float eps, int t) {
float bc1 = 1.0f - powf(beta1, (float)t);
float bc2 = 1.0f - powf(beta2, (float)t);
for (int i = 0; i < param->size; i++) {
m->data[i] = beta1 * m->data[i] + (1.0f - beta1) * grad->data[i];
v->data[i] = beta2 * v->data[i] + (1.0f - beta2) * grad->data[i] * grad->data[i];
float m_hat = m->data[i] / bc1;
float v_hat = v->data[i] / bc2;
param->data[i] -= lr * m_hat / (sqrtf(v_hat) + eps);
}
}
/* ---- Network ---- */
Network *network_create(void) {
rng_seed((uint64_t)time(NULL) ^ 0xDEADBEEF);
Network *net = calloc(1, sizeof(Network));
conv2d_init(&net->conv1, 1, 12, 3, 3);
conv2d_init(&net->conv2, 12, 16, 3, 3);
dense_init(&net->fc1, 4800, 24);
dense_init(&net->head_shape, 24, 10);
dense_init(&net->head_ytype, 24, 1);
dense_init(&net->head_lowheight, 24, 1);
return net;
}
void network_free(Network *net) {
if (!net) return;
conv2d_free(&net->conv1);
conv2d_free(&net->conv2);
dense_free(&net->fc1);
dense_free(&net->head_shape);
dense_free(&net->head_ytype);
dense_free(&net->head_lowheight);
tensor_free(net->act_conv1);
tensor_free(net->act_relu1);
tensor_free(net->act_conv2);
tensor_free(net->act_relu2);
tensor_free(net->act_flat);
tensor_free(net->act_fc1);
tensor_free(net->act_relu3);
tensor_free(net->out_shape);
tensor_free(net->out_ytype);
tensor_free(net->out_lowheight);
free(net);
}
static void free_activations(Network *net) {
tensor_free(net->act_conv1); net->act_conv1 = NULL;
tensor_free(net->act_relu1); net->act_relu1 = NULL;
tensor_free(net->act_conv2); net->act_conv2 = NULL;
tensor_free(net->act_relu2); net->act_relu2 = NULL;
tensor_free(net->act_flat); net->act_flat = NULL;
tensor_free(net->act_fc1); net->act_fc1 = NULL;
tensor_free(net->act_relu3); net->act_relu3 = NULL;
tensor_free(net->out_shape); net->out_shape = NULL;
tensor_free(net->out_ytype); net->out_ytype = NULL;
tensor_free(net->out_lowheight); net->out_lowheight = NULL;
}
void network_forward(Network *net, Tensor *input, int training) {
free_activations(net);
/* Conv1 -> LeakyReLU */
net->act_conv1 = conv2d_forward(&net->conv1, input, training);
net->act_relu1 = apply_leaky_relu(net->act_conv1);
/* Conv2 -> LeakyReLU */
net->act_conv2 = conv2d_forward(&net->conv2, net->act_relu1, training);
net->act_relu2 = apply_leaky_relu(net->act_conv2);
/* Flatten: [batch, 16, 20, 15] -> [batch, 4800] */
int batch = net->act_relu2->shape[0];
int flat_size = net->act_relu2->size / batch;
int fshape[] = {batch, flat_size};
net->act_flat = tensor_alloc(2, fshape);
memcpy(net->act_flat->data, net->act_relu2->data, (size_t)net->act_relu2->size * sizeof(float));
/* FC1 -> LeakyReLU */
net->act_fc1 = dense_forward(&net->fc1, net->act_flat, training);
net->act_relu3 = apply_leaky_relu(net->act_fc1);
/* Three heads with sigmoid */
Tensor *logit_shape = dense_forward(&net->head_shape, net->act_relu3, training);
Tensor *logit_ytype = dense_forward(&net->head_ytype, net->act_relu3, training);
Tensor *logit_lowheight = dense_forward(&net->head_lowheight, net->act_relu3, training);
net->out_shape = apply_sigmoid(logit_shape);
net->out_ytype = apply_sigmoid(logit_ytype);
net->out_lowheight = apply_sigmoid(logit_lowheight);
tensor_free(logit_shape);
tensor_free(logit_ytype);
tensor_free(logit_lowheight);
}
void network_backward(Network *net, Tensor *target_shape, Tensor *target_ytype, Tensor *target_lowheight) {
int batch = net->out_shape->shape[0];
/* BCE gradient at sigmoid: d_logit = pred - target */
/* Head: shape (10 outputs) */
int gs[] = {batch, 10};
Tensor *grad_logit_shape = tensor_alloc(2, gs);
for (int i = 0; i < batch * 10; i++)
grad_logit_shape->data[i] = (net->out_shape->data[i] - target_shape->data[i]) / (float)batch;
int gy[] = {batch, 1};
Tensor *grad_logit_ytype = tensor_alloc(2, gy);
for (int i = 0; i < batch; i++)
grad_logit_ytype->data[i] = (net->out_ytype->data[i] - target_ytype->data[i]) / (float)batch;
Tensor *grad_logit_lh = tensor_alloc(2, gy);
for (int i = 0; i < batch; i++)
grad_logit_lh->data[i] = (net->out_lowheight->data[i] - target_lowheight->data[i]) / (float)batch;
/* Backward through heads */
Tensor *grad_relu3_s = dense_backward(&net->head_shape, grad_logit_shape);
Tensor *grad_relu3_y = dense_backward(&net->head_ytype, grad_logit_ytype);
Tensor *grad_relu3_l = dense_backward(&net->head_lowheight, grad_logit_lh);
/* Sum gradients from three heads */
int r3shape[] = {batch, 24};
Tensor *grad_relu3 = tensor_zeros(2, r3shape);
for (int i = 0; i < batch * 24; i++)
grad_relu3->data[i] = grad_relu3_s->data[i] + grad_relu3_y->data[i] + grad_relu3_l->data[i];
tensor_free(grad_logit_shape);
tensor_free(grad_logit_ytype);
tensor_free(grad_logit_lh);
tensor_free(grad_relu3_s);
tensor_free(grad_relu3_y);
tensor_free(grad_relu3_l);
/* LeakyReLU backward (fc1 output) */
Tensor *grad_fc1_out = apply_leaky_relu_backward(grad_relu3, net->act_fc1);
tensor_free(grad_relu3);
/* Dense fc1 backward */
Tensor *grad_flat = dense_backward(&net->fc1, grad_fc1_out);
tensor_free(grad_fc1_out);
/* Unflatten: [batch, 4800] -> [batch, 16, 20, 15] */
int ushape[] = {batch, 16, 20, 15};
Tensor *grad_relu2 = tensor_alloc(4, ushape);
memcpy(grad_relu2->data, grad_flat->data, (size_t)grad_flat->size * sizeof(float));
tensor_free(grad_flat);
/* LeakyReLU backward (conv2 output) */
Tensor *grad_conv2_out = apply_leaky_relu_backward(grad_relu2, net->act_conv2);
tensor_free(grad_relu2);
/* Conv2 backward */
Tensor *grad_relu1 = conv2d_backward(&net->conv2, grad_conv2_out);
tensor_free(grad_conv2_out);
/* LeakyReLU backward (conv1 output) */
Tensor *grad_conv1_out = apply_leaky_relu_backward(grad_relu1, net->act_conv1);
tensor_free(grad_relu1);
/* Conv1 backward */
Tensor *grad_input = conv2d_backward(&net->conv1, grad_conv1_out);
tensor_free(grad_conv1_out);
tensor_free(grad_input);
}
void network_adam_step(Network *net, float lr, float beta1, float beta2, float eps, int t) {
adam_update(net->conv1.weight, net->conv1.grad_weight, net->conv1.m_weight, net->conv1.v_weight, lr, beta1, beta2, eps, t);
adam_update(net->conv1.bias, net->conv1.grad_bias, net->conv1.m_bias, net->conv1.v_bias, lr, beta1, beta2, eps, t);
adam_update(net->conv2.weight, net->conv2.grad_weight, net->conv2.m_weight, net->conv2.v_weight, lr, beta1, beta2, eps, t);
adam_update(net->conv2.bias, net->conv2.grad_bias, net->conv2.m_bias, net->conv2.v_bias, lr, beta1, beta2, eps, t);
adam_update(net->fc1.weight, net->fc1.grad_weight, net->fc1.m_weight, net->fc1.v_weight, lr, beta1, beta2, eps, t);
adam_update(net->fc1.bias, net->fc1.grad_bias, net->fc1.m_bias, net->fc1.v_bias, lr, beta1, beta2, eps, t);
adam_update(net->head_shape.weight, net->head_shape.grad_weight, net->head_shape.m_weight, net->head_shape.v_weight, lr, beta1, beta2, eps, t);
adam_update(net->head_shape.bias, net->head_shape.grad_bias, net->head_shape.m_bias, net->head_shape.v_bias, lr, beta1, beta2, eps, t);
adam_update(net->head_ytype.weight, net->head_ytype.grad_weight, net->head_ytype.m_weight, net->head_ytype.v_weight, lr, beta1, beta2, eps, t);
adam_update(net->head_ytype.bias, net->head_ytype.grad_bias, net->head_ytype.m_bias, net->head_ytype.v_bias, lr, beta1, beta2, eps, t);
adam_update(net->head_lowheight.weight, net->head_lowheight.grad_weight, net->head_lowheight.m_weight, net->head_lowheight.v_weight, lr, beta1, beta2, eps, t);
adam_update(net->head_lowheight.bias, net->head_lowheight.grad_bias, net->head_lowheight.m_bias, net->head_lowheight.v_bias, lr, beta1, beta2, eps, t);
}
void network_zero_grad(Network *net) {
memset(net->conv1.grad_weight->data, 0, (size_t)net->conv1.grad_weight->size * sizeof(float));
memset(net->conv1.grad_bias->data, 0, (size_t)net->conv1.grad_bias->size * sizeof(float));
memset(net->conv2.grad_weight->data, 0, (size_t)net->conv2.grad_weight->size * sizeof(float));
memset(net->conv2.grad_bias->data, 0, (size_t)net->conv2.grad_bias->size * sizeof(float));
memset(net->fc1.grad_weight->data, 0, (size_t)net->fc1.grad_weight->size * sizeof(float));
memset(net->fc1.grad_bias->data, 0, (size_t)net->fc1.grad_bias->size * sizeof(float));
memset(net->head_shape.grad_weight->data, 0, (size_t)net->head_shape.grad_weight->size * sizeof(float));
memset(net->head_shape.grad_bias->data, 0, (size_t)net->head_shape.grad_bias->size * sizeof(float));
memset(net->head_ytype.grad_weight->data, 0, (size_t)net->head_ytype.grad_weight->size * sizeof(float));
memset(net->head_ytype.grad_bias->data, 0, (size_t)net->head_ytype.grad_bias->size * sizeof(float));
memset(net->head_lowheight.grad_weight->data, 0, (size_t)net->head_lowheight.grad_weight->size * sizeof(float));
memset(net->head_lowheight.grad_bias->data, 0, (size_t)net->head_lowheight.grad_bias->size * sizeof(float));
}
float network_bce_loss(Network *net, Tensor *target_shape, Tensor *target_ytype, Tensor *target_lowheight) {
float loss = 0.0f;
int batch = net->out_shape->shape[0];
for (int i = 0; i < batch * 10; i++) {
float p = net->out_shape->data[i];
float t = target_shape->data[i];
p = fmaxf(1e-7f, fminf(1.0f - 1e-7f, p));
loss -= t * logf(p) + (1.0f - t) * logf(1.0f - p);
}
for (int i = 0; i < batch; i++) {
float p = net->out_ytype->data[i];
float t = target_ytype->data[i];
p = fmaxf(1e-7f, fminf(1.0f - 1e-7f, p));
loss -= t * logf(p) + (1.0f - t) * logf(1.0f - p);
}
for (int i = 0; i < batch; i++) {
float p = net->out_lowheight->data[i];
float t = target_lowheight->data[i];
p = fmaxf(1e-7f, fminf(1.0f - 1e-7f, p));
loss -= t * logf(p) + (1.0f - t) * logf(1.0f - p);
}
return loss / (float)batch;
}
void network_infer(Network *net, const float *input300, float *output12) {
int ishape[] = {1, 1, 20, 15};
Tensor *input = tensor_alloc(4, ishape);
memcpy(input->data, input300, 300 * sizeof(float));
network_forward(net, input, 0);
/* output order: A,B,C,D,E,F,G,H,J,K, ytype, lowheight */
for (int i = 0; i < 10; i++)
output12[i] = net->out_shape->data[i];
output12[10] = net->out_ytype->data[0];
output12[11] = net->out_lowheight->data[0];
tensor_free(input);
}

90
Autokem/nn.h Normal file
View File

@@ -0,0 +1,90 @@
#ifndef NN_H
#define NN_H
#include <stdint.h>
/* ---- Tensor ---- */
typedef struct {
float *data;
int shape[4]; /* up to 4 dims */
int ndim;
int size; /* total number of elements */
} Tensor;
Tensor *tensor_alloc(int ndim, const int *shape);
Tensor *tensor_zeros(int ndim, const int *shape);
void tensor_free(Tensor *t);
/* ---- Layers ---- */
typedef struct {
int in_ch, out_ch, kh, kw;
Tensor *weight; /* [out_ch, in_ch, kh, kw] */
Tensor *bias; /* [out_ch] */
Tensor *grad_weight;
Tensor *grad_bias;
/* Adam moments */
Tensor *m_weight, *v_weight;
Tensor *m_bias, *v_bias;
/* cached input for backward */
Tensor *input_cache;
} Conv2D;
typedef struct {
int in_features, out_features;
Tensor *weight; /* [out_features, in_features] */
Tensor *bias; /* [out_features] */
Tensor *grad_weight;
Tensor *grad_bias;
Tensor *m_weight, *v_weight;
Tensor *m_bias, *v_bias;
Tensor *input_cache;
} Dense;
/* ---- Network ---- */
typedef struct {
Conv2D conv1; /* 1->12, 3x3 */
Conv2D conv2; /* 12->16, 3x3 */
Dense fc1; /* 4800->24 */
Dense head_shape; /* 24->10 (bits A-H, J, K) */
Dense head_ytype; /* 24->1 */
Dense head_lowheight;/* 24->1 */
/* activation caches (allocated per forward) */
Tensor *act_conv1;
Tensor *act_relu1;
Tensor *act_conv2;
Tensor *act_relu2;
Tensor *act_flat;
Tensor *act_fc1;
Tensor *act_relu3;
Tensor *out_shape;
Tensor *out_ytype;
Tensor *out_lowheight;
} Network;
/* Init / free */
Network *network_create(void);
void network_free(Network *net);
/* Forward pass. input: [batch, 1, 20, 15]. Outputs stored in net->out_* */
void network_forward(Network *net, Tensor *input, int training);
/* Backward pass. targets: shape[batch,10], ytype[batch,1], lowheight[batch,1] */
void network_backward(Network *net, Tensor *target_shape, Tensor *target_ytype, Tensor *target_lowheight);
/* Adam update step */
void network_adam_step(Network *net, float lr, float beta1, float beta2, float eps, int t);
/* Zero all gradients */
void network_zero_grad(Network *net);
/* Compute BCE loss (sum of all heads) */
float network_bce_loss(Network *net, Tensor *target_shape, Tensor *target_ytype, Tensor *target_lowheight);
/* Single-sample inference: input float[300], output float[12] (A-H,J,K,ytype,lowheight) */
void network_infer(Network *net, const float *input300, float *output12);
#endif

269
Autokem/safetensor.c Normal file
View File

@@ -0,0 +1,269 @@
#include "safetensor.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
/* Tensor registry entry */
typedef struct {
const char *name;
float *data;
int size;
int ndim;
int shape[4];
} TensorEntry;
static void collect_tensors(Network *net, TensorEntry *entries, int *count) {
int n = 0;
#define ADD(nm, layer, field) do { \
entries[n].name = nm; \
entries[n].data = net->layer.field->data; \
entries[n].size = net->layer.field->size; \
entries[n].ndim = net->layer.field->ndim; \
for (int i = 0; i < net->layer.field->ndim; i++) \
entries[n].shape[i] = net->layer.field->shape[i]; \
n++; \
} while(0)
ADD("conv1.weight", conv1, weight);
ADD("conv1.bias", conv1, bias);
ADD("conv2.weight", conv2, weight);
ADD("conv2.bias", conv2, bias);
ADD("fc1.weight", fc1, weight);
ADD("fc1.bias", fc1, bias);
ADD("head_shape.weight", head_shape, weight);
ADD("head_shape.bias", head_shape, bias);
ADD("head_ytype.weight", head_ytype, weight);
ADD("head_ytype.bias", head_ytype, bias);
ADD("head_lowheight.weight", head_lowheight, weight);
ADD("head_lowheight.bias", head_lowheight, bias);
#undef ADD
*count = n;
}
int safetensor_save(const char *path, Network *net, int total_samples, int epochs, float val_loss) {
TensorEntry entries[12];
int count;
collect_tensors(net, entries, &count);
/* Build JSON header */
char header[8192];
int pos = 0;
pos += snprintf(header + pos, sizeof(header) - (size_t)pos, "{");
/* metadata */
pos += snprintf(header + pos, sizeof(header) - (size_t)pos,
"\"__metadata__\":{\"samples\":\"%d\",\"epochs\":\"%d\",\"val_loss\":\"%.6f\"},",
total_samples, epochs, (double)val_loss);
/* tensor entries */
size_t data_offset = 0;
for (int i = 0; i < count; i++) {
size_t byte_size = (size_t)entries[i].size * sizeof(float);
pos += snprintf(header + pos, sizeof(header) - (size_t)pos,
"\"%s\":{\"dtype\":\"F32\",\"shape\":[", entries[i].name);
for (int d = 0; d < entries[i].ndim; d++) {
if (d > 0) pos += snprintf(header + pos, sizeof(header) - (size_t)pos, ",");
pos += snprintf(header + pos, sizeof(header) - (size_t)pos, "%d", entries[i].shape[d]);
}
pos += snprintf(header + pos, sizeof(header) - (size_t)pos,
"],\"data_offsets\":[%zu,%zu]}", data_offset, data_offset + byte_size);
if (i < count - 1)
pos += snprintf(header + pos, sizeof(header) - (size_t)pos, ",");
data_offset += byte_size;
}
pos += snprintf(header + pos, sizeof(header) - (size_t)pos, "}");
/* Pad header to 8-byte alignment */
size_t header_len = (size_t)pos;
size_t padded = (header_len + 7) & ~(size_t)7;
while (header_len < padded) {
header[header_len++] = ' ';
}
FILE *f = fopen(path, "wb");
if (!f) {
fprintf(stderr, "Error: cannot open %s for writing\n", path);
return -1;
}
/* 8-byte LE header length */
uint64_t hlen = (uint64_t)header_len;
fwrite(&hlen, 8, 1, f);
/* JSON header */
fwrite(header, 1, header_len, f);
/* Raw tensor data */
for (int i = 0; i < count; i++) {
fwrite(entries[i].data, sizeof(float), (size_t)entries[i].size, f);
}
fclose(f);
printf("Saved model to %s (%zu bytes)\n", path, 8 + header_len + data_offset);
return 0;
}
/* Minimal JSON parser: find tensor by name, extract data_offsets */
static int find_tensor_offsets(const char *json, size_t json_len, const char *name,
size_t *off_start, size_t *off_end) {
/* Search for "name": */
size_t nlen = strlen(name);
for (size_t i = 0; i + nlen + 3 < json_len; i++) {
if (json[i] == '"' && strncmp(json + i + 1, name, nlen) == 0 && json[i + 1 + nlen] == '"') {
/* Found the key, now find data_offsets */
const char *doff = strstr(json + i, "\"data_offsets\"");
if (!doff || (size_t)(doff - json) > json_len) return -1;
const char *bracket = strchr(doff, '[');
if (!bracket) return -1;
if (sscanf(bracket, "[%zu,%zu]", off_start, off_end) != 2) return -1;
return 0;
}
}
return -1;
}
int safetensor_load(const char *path, Network *net) {
FILE *f = fopen(path, "rb");
if (!f) {
fprintf(stderr, "Error: cannot open %s\n", path);
return -1;
}
uint64_t header_len;
if (fread(&header_len, 8, 1, f) != 1) { fclose(f); return -1; }
char *json = malloc((size_t)header_len + 1);
if (fread(json, 1, (size_t)header_len, f) != (size_t)header_len) {
free(json);
fclose(f);
return -1;
}
json[header_len] = '\0';
long data_start = 8 + (long)header_len;
TensorEntry entries[12];
int count;
collect_tensors(net, entries, &count);
for (int i = 0; i < count; i++) {
size_t off_start, off_end;
if (find_tensor_offsets(json, (size_t)header_len, entries[i].name, &off_start, &off_end) != 0) {
fprintf(stderr, "Error: tensor '%s' not found in %s\n", entries[i].name, path);
free(json);
fclose(f);
return -1;
}
size_t byte_size = off_end - off_start;
if (byte_size != (size_t)entries[i].size * sizeof(float)) {
fprintf(stderr, "Error: size mismatch for '%s': expected %zu, got %zu\n",
entries[i].name, (size_t)entries[i].size * sizeof(float), byte_size);
free(json);
fclose(f);
return -1;
}
fseek(f, data_start + (long)off_start, SEEK_SET);
if (fread(entries[i].data, 1, byte_size, f) != byte_size) {
fprintf(stderr, "Error: failed to read tensor '%s'\n", entries[i].name);
free(json);
fclose(f);
return -1;
}
}
free(json);
fclose(f);
return 0;
}
int safetensor_stats(const char *path) {
FILE *f = fopen(path, "rb");
if (!f) {
fprintf(stderr, "Error: cannot open %s\n", path);
return -1;
}
uint64_t header_len;
if (fread(&header_len, 8, 1, f) != 1) { fclose(f); return -1; }
char *json = malloc((size_t)header_len + 1);
if (fread(json, 1, (size_t)header_len, f) != (size_t)header_len) {
free(json);
fclose(f);
return -1;
}
json[header_len] = '\0';
fclose(f);
printf("Model: %s\n", path);
printf("Header length: %lu bytes\n", (unsigned long)header_len);
/* Extract a JSON string value: find "key":"value" and return value */
/* Helper: find value for key within metadata block */
const char *meta = strstr(json, "\"__metadata__\"");
if (meta) {
const char *keys[] = {"samples", "epochs", "val_loss"};
const char *labels[] = {"Training samples", "Epochs", "Validation loss"};
for (int k = 0; k < 3; k++) {
char search[64];
snprintf(search, sizeof(search), "\"%s\"", keys[k]);
const char *found = strstr(meta, search);
if (!found) continue;
/* skip past key and colon to opening quote of value */
const char *colon = strchr(found + strlen(search), ':');
if (!colon) continue;
const char *vstart = strchr(colon, '"');
if (!vstart) continue;
vstart++;
const char *vend = strchr(vstart, '"');
if (!vend) continue;
printf("%s: %.*s\n", labels[k], (int)(vend - vstart), vstart);
}
}
/* List tensors */
const char *tensor_names[] = {
"conv1.weight", "conv1.bias", "conv2.weight", "conv2.bias",
"fc1.weight", "fc1.bias",
"head_shape.weight", "head_shape.bias",
"head_ytype.weight", "head_ytype.bias",
"head_lowheight.weight", "head_lowheight.bias"
};
int total_params = 0;
printf("\nTensors:\n");
for (int i = 0; i < 12; i++) {
size_t off_start, off_end;
if (find_tensor_offsets(json, (size_t)header_len, tensor_names[i], &off_start, &off_end) == 0) {
int params = (int)(off_end - off_start) / 4;
total_params += params;
/* Extract shape */
const char *key = strstr(json, tensor_names[i]);
if (key) {
const char *shp = strstr(key, "\"shape\"");
if (shp) {
const char *br = strchr(shp, '[');
const char *bre = strchr(shp, ']');
if (br && bre) {
printf(" %-28s shape=[%.*s] params=%d\n",
tensor_names[i], (int)(bre - br - 1), br + 1, params);
}
}
}
}
}
printf("\nTotal parameters: %d (%.1f KB as float32)\n", total_params, (float)total_params * 4.0f / 1024.0f);
free(json);
return 0;
}

16
Autokem/safetensor.h Normal file
View File

@@ -0,0 +1,16 @@
#ifndef SAFETENSOR_H
#define SAFETENSOR_H
#include "nn.h"
/* Save network weights to .safetensors format.
metadata: optional string pairs (key,value,...,NULL) */
int safetensor_save(const char *path, Network *net, int total_samples, int epochs, float val_loss);
/* Load network weights from .safetensors file. */
int safetensor_load(const char *path, Network *net);
/* Print model stats from .safetensors file. */
int safetensor_stats(const char *path);
#endif

105
Autokem/tga.c Normal file
View File

@@ -0,0 +1,105 @@
#include "tga.h"
#include <stdlib.h>
#include <string.h>
TgaImage *tga_read(const char *path) {
FILE *f = fopen(path, "rb");
if (!f) return NULL;
uint8_t header[18];
if (fread(header, 1, 18, f) != 18) { fclose(f); return NULL; }
uint8_t id_length = header[0];
uint8_t colour_map_type = header[1];
uint8_t image_type = header[2];
/* skip colour map spec (bytes 3-7) */
/* image spec starts at byte 8 */
uint16_t width = header[12] | (header[13] << 8);
uint16_t height = header[14] | (header[15] << 8);
uint8_t bpp = header[16];
uint8_t descriptor = header[17];
if (colour_map_type != 0 || image_type != 2 || bpp != 32) {
fclose(f);
return NULL;
}
int top_to_bottom = (descriptor & 0x20) != 0;
/* skip image ID */
if (id_length > 0) fseek(f, id_length, SEEK_CUR);
long pixel_data_offset = 18 + id_length;
TgaImage *img = malloc(sizeof(TgaImage));
if (!img) { fclose(f); return NULL; }
img->width = width;
img->height = height;
img->pixel_data_offset = pixel_data_offset;
img->top_to_bottom = top_to_bottom;
img->pixels = malloc((size_t)width * height * sizeof(uint32_t));
if (!img->pixels) { free(img); fclose(f); return NULL; }
for (int row = 0; row < height; row++) {
int y = top_to_bottom ? row : (height - 1 - row);
for (int x = 0; x < width; x++) {
uint8_t bgra[4];
if (fread(bgra, 1, 4, f) != 4) {
free(img->pixels); free(img); fclose(f);
return NULL;
}
/* TGA stores BGRA, convert to RGBA8888 */
uint32_t r = bgra[2], g = bgra[1], b = bgra[0], a = bgra[3];
img->pixels[y * width + x] = (r << 24) | (g << 16) | (b << 8) | a;
}
}
fclose(f);
return img;
}
uint32_t tga_get_pixel(const TgaImage *img, int x, int y) {
if (x < 0 || x >= img->width || y < 0 || y >= img->height) return 0;
return img->pixels[y * img->width + x];
}
int tga_write_pixel(const char *path, TgaImage *img, int x, int y, uint32_t rgba) {
if (x < 0 || x >= img->width || y < 0 || y >= img->height) return -1;
/* compute file row: reverse the mapping used during read */
int file_row;
if (img->top_to_bottom) {
file_row = y;
} else {
file_row = img->height - 1 - y;
}
long offset = img->pixel_data_offset + ((long)file_row * img->width + x) * 4;
FILE *f = fopen(path, "r+b");
if (!f) return -1;
fseek(f, offset, SEEK_SET);
/* convert RGBA8888 to TGA BGRA */
uint8_t bgra[4];
bgra[2] = (rgba >> 24) & 0xFF; /* R */
bgra[1] = (rgba >> 16) & 0xFF; /* G */
bgra[0] = (rgba >> 8) & 0xFF; /* B */
bgra[3] = rgba & 0xFF; /* A */
size_t written = fwrite(bgra, 1, 4, f);
fclose(f);
/* also update in-memory pixel array */
img->pixels[y * img->width + x] = rgba;
return (written == 4) ? 0 : -1;
}
void tga_free(TgaImage *img) {
if (!img) return;
free(img->pixels);
free(img);
}

33
Autokem/tga.h Normal file
View File

@@ -0,0 +1,33 @@
#ifndef TGA_H
#define TGA_H
#include <stdint.h>
#include <stdio.h>
typedef struct {
int width;
int height;
uint32_t *pixels; /* RGBA8888: R<<24 | G<<16 | B<<8 | A */
long pixel_data_offset; /* byte offset of pixel data in file */
int top_to_bottom;
} TgaImage;
/* Read an uncompressed 32-bit TGA file. Returns NULL on error. */
TgaImage *tga_read(const char *path);
/* Get pixel at (x,y) as RGBA8888. Returns 0 for out-of-bounds. */
uint32_t tga_get_pixel(const TgaImage *img, int x, int y);
/* Write a single pixel (RGBA8888) to TGA file on disk at (x,y).
Opens/closes the file internally. */
int tga_write_pixel(const char *path, TgaImage *img, int x, int y, uint32_t rgba);
/* Free a TgaImage. */
void tga_free(TgaImage *img);
/* tagify: returns 0 if alpha==0, else full pixel value */
static inline uint32_t tagify(uint32_t pixel) {
return (pixel & 0xFF) == 0 ? 0 : pixel;
}
#endif

423
Autokem/train.c Normal file
View File

@@ -0,0 +1,423 @@
#include "train.h"
#include "tga.h"
#include "nn.h"
#include "safetensor.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <time.h>
#include <dirent.h>
/* ---- Data sample ---- */
typedef struct {
float input[300]; /* 15x20 binary */
float shape[10]; /* A,B,C,D,E,F,G,H,J,K */
float ytype;
float lowheight;
} Sample;
/* ---- Bit extraction from kerning mask ---- */
/* kerningMask = pixel >> 8 & 0xFFFFFF
* Layout: Red=Y0000000, Green=JK000000, Blue=ABCDEFGH
* After >> 8: bits 23-16 = Red[7:0], bits 15-8 = Green[7:0], bits 7-0 = Blue[7:0]
* Y = bit 23 (already extracted separately as isKernYtype)
* J = bit 15, K = bit 14
* A = bit 7, B = bit 6, ..., H = bit 0
*/
static void extract_shape_bits(int kerning_mask, float *shape) {
shape[0] = (float)((kerning_mask >> 7) & 1); /* A */
shape[1] = (float)((kerning_mask >> 6) & 1); /* B */
shape[2] = (float)((kerning_mask >> 5) & 1); /* C */
shape[3] = (float)((kerning_mask >> 4) & 1); /* D */
shape[4] = (float)((kerning_mask >> 3) & 1); /* E */
shape[5] = (float)((kerning_mask >> 2) & 1); /* F */
shape[6] = (float)((kerning_mask >> 1) & 1); /* G */
shape[7] = (float)((kerning_mask >> 0) & 1); /* H */
shape[8] = (float)((kerning_mask >> 15) & 1); /* J */
shape[9] = (float)((kerning_mask >> 14) & 1); /* K */
}
/* ---- Collect samples from one TGA ---- */
static int collect_from_sheet(const char *path, int is_xyswap, Sample *samples, int max_samples) {
TgaImage *img = tga_read(path);
if (!img) {
fprintf(stderr, "Warning: cannot read %s\n", path);
return 0;
}
int cell_w = 16, cell_h = 20;
int cols = img->width / cell_w;
int rows = img->height / cell_h;
int total_cells = cols * rows;
int count = 0;
for (int index = 0; index < total_cells && count < max_samples; index++) {
int cell_x, cell_y;
if (is_xyswap) {
cell_x = (index / cols) * cell_w;
cell_y = (index % cols) * cell_h;
} else {
cell_x = (index % cols) * cell_w;
cell_y = (index / cols) * cell_h;
}
int tag_x = cell_x + (cell_w - 1); /* rightmost column */
int tag_y = cell_y;
/* Read width (5-bit binary from Y+0..Y+4) */
int width = 0;
for (int y = 0; y < 5; y++) {
if (tga_get_pixel(img, tag_x, tag_y + y) & 0xFF)
width |= (1 << y);
}
if (width == 0) continue;
/* Read kerning data pixel at Y+6 */
uint32_t kern_pixel = tagify(tga_get_pixel(img, tag_x, tag_y + 6));
if ((kern_pixel & 0xFF) == 0) continue; /* no kern data */
/* Extract labels */
int is_kern_ytype = (kern_pixel & 0x80000000u) != 0;
int kerning_mask = (int)((kern_pixel >> 8) & 0xFFFFFF);
int is_low_height = (tga_get_pixel(img, tag_x, tag_y + 5) & 0xFF) != 0;
Sample *s = &samples[count];
extract_shape_bits(kerning_mask, s->shape);
s->ytype = (float)is_kern_ytype;
s->lowheight = (float)is_low_height;
/* Extract 15x20 binary input from the glyph area */
for (int gy = 0; gy < 20; gy++) {
for (int gx = 0; gx < 15; gx++) {
uint32_t p = tga_get_pixel(img, cell_x + gx, cell_y + gy);
s->input[gy * 15 + gx] = ((p & 0x80) != 0) ? 1.0f : 0.0f;
}
}
count++;
}
tga_free(img);
return count;
}
/* ---- Fisher-Yates shuffle ---- */
static void shuffle_indices(int *arr, int n) {
for (int i = n - 1; i > 0; i--) {
int j = rand() % (i + 1);
int tmp = arr[i]; arr[i] = arr[j]; arr[j] = tmp;
}
}
/* ---- Copy network weights ---- */
static void copy_tensor_data(Tensor *dst, Tensor *src) {
memcpy(dst->data, src->data, (size_t)src->size * sizeof(float));
}
static void save_weights(Network *net, Network *best) {
copy_tensor_data(best->conv1.weight, net->conv1.weight);
copy_tensor_data(best->conv1.bias, net->conv1.bias);
copy_tensor_data(best->conv2.weight, net->conv2.weight);
copy_tensor_data(best->conv2.bias, net->conv2.bias);
copy_tensor_data(best->fc1.weight, net->fc1.weight);
copy_tensor_data(best->fc1.bias, net->fc1.bias);
copy_tensor_data(best->head_shape.weight, net->head_shape.weight);
copy_tensor_data(best->head_shape.bias, net->head_shape.bias);
copy_tensor_data(best->head_ytype.weight, net->head_ytype.weight);
copy_tensor_data(best->head_ytype.bias, net->head_ytype.bias);
copy_tensor_data(best->head_lowheight.weight, net->head_lowheight.weight);
copy_tensor_data(best->head_lowheight.bias, net->head_lowheight.bias);
}
/* ---- Training ---- */
int train_model(void) {
const char *assets_dir = "../src/assets";
const int max_total = 16384;
Sample *all_samples = calloc((size_t)max_total, sizeof(Sample));
if (!all_samples) { fprintf(stderr, "Error: out of memory\n"); return 1; }
int total = 0;
/* Scan for *_variable.tga files */
DIR *dir = opendir(assets_dir);
if (!dir) {
fprintf(stderr, "Error: cannot open %s\n", assets_dir);
free(all_samples);
return 1;
}
struct dirent *ent;
int file_count = 0;
while ((ent = readdir(dir)) != NULL) {
const char *name = ent->d_name;
size_t len = strlen(name);
/* Must end with _variable.tga */
if (len < 14) continue;
if (strcmp(name + len - 13, "_variable.tga") != 0) continue;
/* Skip extrawide */
if (strstr(name, "extrawide") != NULL) continue;
/* Check for xyswap */
int is_xyswap = (strstr(name, "xyswap") != NULL);
char fullpath[512];
snprintf(fullpath, sizeof(fullpath), "%s/%s", assets_dir, name);
int got = collect_from_sheet(fullpath, is_xyswap, all_samples + total, max_total - total);
if (got > 0) {
printf(" %s: %d samples\n", name, got);
total += got;
file_count++;
}
}
closedir(dir);
printf("Collected %d samples from %d sheets\n", total, file_count);
if (total < 10) {
fprintf(stderr, "Error: too few samples to train\n");
free(all_samples);
return 1;
}
/* Print label distribution */
{
const char *bit_names[] = {"A","B","C","D","E","F","G","H","J","K","Ytype","LowH"};
int counts[12] = {0};
int nonzero_input = 0;
for (int i = 0; i < total; i++) {
for (int b = 0; b < 10; b++)
counts[b] += (int)all_samples[i].shape[b];
counts[10] += (int)all_samples[i].ytype;
counts[11] += (int)all_samples[i].lowheight;
for (int p = 0; p < 300; p++)
if (all_samples[i].input[p] > 0.5f) { nonzero_input++; break; }
}
printf("Label distribution:\n ");
for (int b = 0; b < 12; b++)
printf("%s:%d(%.0f%%) ", bit_names[b], counts[b], 100.0 * counts[b] / total);
printf("\n Non-empty inputs: %d/%d\n\n", nonzero_input, total);
}
/* Shuffle and split 80/20 */
srand((unsigned)time(NULL));
int *indices = malloc((size_t)total * sizeof(int));
for (int i = 0; i < total; i++) indices[i] = i;
shuffle_indices(indices, total);
int n_train = (int)(total * 0.8);
int n_val = total - n_train;
printf("Train: %d, Validation: %d\n\n", n_train, n_val);
/* Create network */
Network *net = network_create();
Network *best_net = network_create();
int batch_size = 32;
float lr = 0.001f, beta1 = 0.9f, beta2 = 0.999f, eps = 1e-8f;
int max_epochs = 200;
int patience = 10;
float best_val_loss = 1e30f;
int patience_counter = 0;
int best_epoch = 0;
int adam_t = 0;
for (int epoch = 0; epoch < max_epochs; epoch++) {
/* Shuffle training indices */
shuffle_indices(indices, n_train);
float train_loss = 0.0f;
int n_batches = 0;
/* Training loop */
for (int start = 0; start < n_train; start += batch_size) {
int bs = (start + batch_size <= n_train) ? batch_size : (n_train - start);
/* Build batch tensors */
int ishape[] = {bs, 1, 20, 15};
Tensor *input = tensor_alloc(4, ishape);
int sshape[] = {bs, 10};
Tensor *tgt_shape = tensor_alloc(2, sshape);
int yshape[] = {bs, 1};
Tensor *tgt_ytype = tensor_alloc(2, yshape);
Tensor *tgt_lh = tensor_alloc(2, yshape);
for (int i = 0; i < bs; i++) {
Sample *s = &all_samples[indices[start + i]];
memcpy(input->data + i * 300, s->input, 300 * sizeof(float));
memcpy(tgt_shape->data + i * 10, s->shape, 10 * sizeof(float));
tgt_ytype->data[i] = s->ytype;
tgt_lh->data[i] = s->lowheight;
}
/* Forward */
network_zero_grad(net);
network_forward(net, input, 1);
/* Loss */
float loss = network_bce_loss(net, tgt_shape, tgt_ytype, tgt_lh);
train_loss += loss;
n_batches++;
/* Backward */
network_backward(net, tgt_shape, tgt_ytype, tgt_lh);
/* Adam step */
adam_t++;
network_adam_step(net, lr, beta1, beta2, eps, adam_t);
tensor_free(input);
tensor_free(tgt_shape);
tensor_free(tgt_ytype);
tensor_free(tgt_lh);
}
train_loss /= (float)n_batches;
/* Validation */
float val_loss = 0.0f;
int val_batches = 0;
for (int start = 0; start < n_val; start += batch_size) {
int bs = (start + batch_size <= n_val) ? batch_size : (n_val - start);
int ishape[] = {bs, 1, 20, 15};
Tensor *input = tensor_alloc(4, ishape);
int sshape[] = {bs, 10};
Tensor *tgt_shape = tensor_alloc(2, sshape);
int yshape[] = {bs, 1};
Tensor *tgt_ytype = tensor_alloc(2, yshape);
Tensor *tgt_lh = tensor_alloc(2, yshape);
for (int i = 0; i < bs; i++) {
Sample *s = &all_samples[indices[n_train + start + i]];
memcpy(input->data + i * 300, s->input, 300 * sizeof(float));
memcpy(tgt_shape->data + i * 10, s->shape, 10 * sizeof(float));
tgt_ytype->data[i] = s->ytype;
tgt_lh->data[i] = s->lowheight;
}
network_forward(net, input, 0);
val_loss += network_bce_loss(net, tgt_shape, tgt_ytype, tgt_lh);
val_batches++;
tensor_free(input);
tensor_free(tgt_shape);
tensor_free(tgt_ytype);
tensor_free(tgt_lh);
}
val_loss /= (float)val_batches;
printf("Epoch %3d: train_loss=%.4f val_loss=%.4f", epoch + 1, (double)train_loss, (double)val_loss);
if (val_loss < best_val_loss) {
best_val_loss = val_loss;
best_epoch = epoch + 1;
patience_counter = 0;
save_weights(net, best_net);
printf(" *best*");
} else {
patience_counter++;
}
printf("\n");
if (patience_counter >= patience) {
printf("\nEarly stopping at epoch %d (best epoch: %d)\n", epoch + 1, best_epoch);
break;
}
}
/* Restore best weights and save */
save_weights(best_net, net);
safetensor_save("autokem.safetensors", net, total, best_epoch, best_val_loss);
/* Compute final per-bit accuracy on validation set */
{
const char *bit_names[] = {"A","B","C","D","E","F","G","H","J","K","Ytype","LowH"};
int correct_per_bit[12] = {0};
int total_per_bit = n_val;
int n_examples = 0;
const int max_examples = 8;
printf("\nGlyph Tags — validation predictions:\n");
for (int i = 0; i < n_val; i++) {
Sample *s = &all_samples[indices[n_train + i]];
float output[12];
network_infer(net, s->input, output);
int pred_bits[12], tgt_bits[12];
int any_mismatch = 0;
for (int b = 0; b < 10; b++) {
pred_bits[b] = output[b] >= 0.5f ? 1 : 0;
tgt_bits[b] = (int)s->shape[b];
if (pred_bits[b] == tgt_bits[b]) correct_per_bit[b]++;
else any_mismatch = 1;
}
pred_bits[10] = output[10] >= 0.5f ? 1 : 0;
tgt_bits[10] = (int)s->ytype;
if (pred_bits[10] == tgt_bits[10]) correct_per_bit[10]++;
else any_mismatch = 1;
pred_bits[11] = output[11] >= 0.5f ? 1 : 0;
tgt_bits[11] = (int)s->lowheight;
if (pred_bits[11] == tgt_bits[11]) correct_per_bit[11]++;
else any_mismatch = 1;
/* Print a few examples (mix of correct and mismatched) */
if (n_examples < max_examples && (any_mismatch || i < 4)) {
/* Build tag string: e.g. "ABCDEFGH(B)" or "AB(Y)" */
char actual[32] = "", predicted[32] = "";
int ap = 0, pp = 0;
const char shape_chars[] = "ABCDEFGHJK";
for (int b = 0; b < 10; b++) {
if (tgt_bits[b]) actual[ap++] = shape_chars[b];
if (pred_bits[b]) predicted[pp++] = shape_chars[b];
}
actual[ap] = '\0'; predicted[pp] = '\0';
char actual_tag[48], pred_tag[48];
snprintf(actual_tag, sizeof(actual_tag), "%s%s%s",
ap > 0 ? actual : "(empty)",
tgt_bits[10] ? "(Y)" : "(B)",
tgt_bits[11] ? " low" : "");
snprintf(pred_tag, sizeof(pred_tag), "%s%s%s",
pp > 0 ? predicted : "(empty)",
pred_bits[10] ? "(Y)" : "(B)",
pred_bits[11] ? " low" : "");
printf(" actual=%-20s pred=%-20s %s\n", actual_tag, pred_tag,
any_mismatch ? "MISMATCH" : "ok");
n_examples++;
}
}
printf("\nPer-bit accuracy (%d val samples):\n ", n_val);
int total_correct = 0;
for (int b = 0; b < 12; b++) {
printf("%s:%.1f%% ", bit_names[b], 100.0 * correct_per_bit[b] / total_per_bit);
total_correct += correct_per_bit[b];
}
printf("\n Overall: %d/%d (%.2f%%)\n",
total_correct, n_val * 12, 100.0 * total_correct / (n_val * 12));
}
network_free(net);
network_free(best_net);
free(all_samples);
free(indices);
return 0;
}

8
Autokem/train.h Normal file
View File

@@ -0,0 +1,8 @@
#ifndef TRAIN_H
#define TRAIN_H
/* Train model on existing spritesheets in ../src/assets/
Saves to autokem.safetensors */
int train_model(void);
#endif