revised autokem model

2026-06-10 16:04:04 +09:00 · 2026-03-08 20:34:45 +09:00
parent 39603d897b
commit 244371aa9d
8 changed files with 774 additions and 235 deletions
--- a/Autokem/train_torch.py
+++ b/Autokem/train_torch.py
@@ -0,0 +1,496 @@
+#!/usr/bin/env python3
+"""
+PyTorch training script for Autokem — drop-in replacement for `autokem train`.
+
+Reads the same *_variable.tga sprite sheets, trains the same architecture,
+and saves weights in safetensors format loadable by the C inference code.
+
+Usage:
+    python train_keras.py                  # train with defaults
+    python train_keras.py --epochs 300     # override max epochs
+    python train_keras.py --lr 0.0005      # override learning rate
+    python train_keras.py --save model.safetensors
+
+Requirements:
+    pip install torch numpy
+"""
+
+import argparse
+import json
+import os
+import struct
+import sys
+from pathlib import Path
+
+import numpy as np
+
+# ---- TGA reader (matches OTFbuild/tga_reader.py and Autokem/tga.c) ----
+
+class TgaImage:
+    __slots__ = ('width', 'height', 'pixels')
+
+    def __init__(self, width, height, pixels):
+        self.width = width
+        self.height = height
+        self.pixels = pixels  # flat list of RGBA8888 ints
+
+    def get_pixel(self, x, y):
+        if x < 0 or x >= self.width or y < 0 or y >= self.height:
+            return 0
+        return self.pixels[y * self.width + x]
+
+
+def read_tga(path):
+    with open(path, 'rb') as f:
+        data = f.read()
+
+    pos = 0
+    id_length = data[pos]; pos += 1
+    _colour_map_type = data[pos]; pos += 1
+    image_type = data[pos]; pos += 1
+    pos += 5  # colour map spec
+    pos += 2  # x_origin
+    pos += 2  # y_origin
+    width = struct.unpack_from('<H', data, pos)[0]; pos += 2
+    height = struct.unpack_from('<H', data, pos)[0]; pos += 2
+    bits_per_pixel = data[pos]; pos += 1
+    descriptor = data[pos]; pos += 1
+    top_to_bottom = (descriptor & 0x20) != 0
+    bpp = bits_per_pixel // 8
+    pos += id_length
+
+    if image_type != 2 or bpp not in (3, 4):
+        raise ValueError(f"Unsupported TGA: type={image_type}, bpp={bits_per_pixel}")
+
+    pixels = [0] * (width * height)
+    for row in range(height):
+        y = row if top_to_bottom else (height - 1 - row)
+        for x in range(width):
+            b = data[pos]; g = data[pos+1]; r = data[pos+2]
+            a = data[pos+3] if bpp == 4 else 0xFF
+            pos += bpp
+            pixels[y * width + x] = (r << 24) | (g << 16) | (b << 8) | a
+
+    return TgaImage(width, height, pixels)
+
+
+def tagify(pixel):
+    return 0 if (pixel & 0xFF) == 0 else pixel
+
+
+# ---- Data collection (matches Autokem/train.c) ----
+
+def collect_from_sheet(path, is_xyswap):
+    """Extract labelled samples from a single TGA sheet."""
+    img = read_tga(path)
+    cell_w, cell_h = 16, 20
+    cols = img.width // cell_w
+    rows = img.height // cell_h
+    total_cells = cols * rows
+
+    inputs = []
+    labels = []
+
+    for index in range(total_cells):
+        if is_xyswap:
+            cell_x = (index // cols) * cell_w
+            cell_y = (index % cols) * cell_h
+        else:
+            cell_x = (index % cols) * cell_w
+            cell_y = (index // cols) * cell_h
+
+        tag_x = cell_x + (cell_w - 1)
+        tag_y = cell_y
+
+        # Width (5-bit)
+        width = 0
+        for y in range(5):
+            if img.get_pixel(tag_x, tag_y + y) & 0xFF:
+                width |= (1 << y)
+        if width == 0:
+            continue
+
+        # Kern data pixel at Y+6
+        kern_pixel = tagify(img.get_pixel(tag_x, tag_y + 6))
+        if (kern_pixel & 0xFF) == 0:
+            continue  # no kern data
+
+        # Extract labels
+        is_kern_ytype = 1.0 if (kern_pixel & 0x80000000) != 0 else 0.0
+        kerning_mask = (kern_pixel >> 8) & 0xFFFFFF
+        is_low_height = 1.0 if (img.get_pixel(tag_x, tag_y + 5) & 0xFF) != 0 else 0.0
+
+        # Shape bits: A(7) B(6) C(5) D(4) E(3) F(2) G(1) H(0) J(15) K(14)
+        shape = [
+            float((kerning_mask >> 7) & 1),  # A
+            float((kerning_mask >> 6) & 1),  # B
+            float((kerning_mask >> 5) & 1),  # C
+            float((kerning_mask >> 4) & 1),  # D
+            float((kerning_mask >> 3) & 1),  # E
+            float((kerning_mask >> 2) & 1),  # F
+            float((kerning_mask >> 1) & 1),  # G
+            float((kerning_mask >> 0) & 1),  # H
+            float((kerning_mask >> 15) & 1), # J
+            float((kerning_mask >> 14) & 1), # K
+        ]
+
+        # 15x20 binary input
+        inp = np.zeros((20, 15), dtype=np.float32)
+        for gy in range(20):
+            for gx in range(15):
+                p = img.get_pixel(cell_x + gx, cell_y + gy)
+                if (p & 0x80) != 0:
+                    inp[gy, gx] = 1.0
+
+        inputs.append(inp)
+        labels.append(shape + [is_kern_ytype, is_low_height])
+
+    return inputs, labels
+
+
+def collect_all_samples(assets_dir):
+    """Scan assets_dir for *_variable.tga, collect all labelled samples."""
+    all_inputs = []
+    all_labels = []
+    file_count = 0
+
+    for name in sorted(os.listdir(assets_dir)):
+        if not name.endswith('_variable.tga'):
+            continue
+        if 'extrawide' in name:
+            continue
+
+        is_xyswap = 'xyswap' in name
+        path = os.path.join(assets_dir, name)
+        inputs, labels = collect_from_sheet(path, is_xyswap)
+        if inputs:
+            print(f"  {name}: {len(inputs)} samples")
+            all_inputs.extend(inputs)
+            all_labels.extend(labels)
+            file_count += 1
+
+    return np.array(all_inputs), np.array(all_labels, dtype=np.float32), file_count
+
+
+# ---- Model (matches Autokem/nn.c architecture) ----
+
+def build_model():
+    """
+    Conv2D(1->32, 7x7, padding=1) -> SiLU
+    Conv2D(32->64, 7x7, padding=1) -> SiLU
+    GlobalAveragePooling2D -> [64]
+    Dense(256) -> SiLU
+    Dense(12) -> sigmoid
+    """
+    import torch
+    import torch.nn as nn
+
+    class Keminet(nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.conv1 = nn.Conv2d(1, 32, 7, padding=1)
+            self.conv2 = nn.Conv2d(32, 64, 7, padding=1)
+            self.fc1 = nn.Linear(64, 256)
+            # self.fc2 = nn.Linear(256, 48)
+            self.output = nn.Linear(256, 12)
+            self.tf = nn.SiLU()
+
+            # He init
+            for m in self.modules():
+                if isinstance(m, (nn.Conv2d, nn.Linear)):
+                    nn.init.kaiming_normal_(m.weight, a=0.01, nonlinearity='leaky_relu')
+                    if m.bias is not None:
+                        nn.init.zeros_(m.bias)
+
+        def forward(self, x):
+            x = self.tf(self.conv1(x))
+            x = self.tf(self.conv2(x))
+            x = x.mean(dim=(2, 3))  # global average pool
+            x = self.tf(self.fc1(x))
+            # x = self.tf(self.fc2(x))
+            x = torch.sigmoid(self.output(x))
+            return x
+
+    return Keminet()
+
+
+# ---- Safetensors export (matches Autokem/safetensor.c layout) ----
+
+def export_safetensors(model, path, total_samples, epochs, val_loss):
+    """
+    Save model weights in safetensors format compatible with the C code.
+
+    C code expects these tensor names with these shapes:
+      conv1.weight  [out_ch, in_ch, kh, kw]  — PyTorch matches this layout
+      conv1.bias    [out_ch]
+      conv2.weight  [out_ch, in_ch, kh, kw]
+      conv2.bias    [out_ch]
+      fc1.weight    [out_features, in_features]  — PyTorch matches this layout
+      fc1.bias      [out_features]
+      fc2.weight    [out_features, in_features]
+      fc2.bias      [out_features]
+      output.weight [out_features, in_features]
+      output.bias   [out_features]
+    """
+    tensor_names = [
+        'conv1.weight', 'conv1.bias',
+        'conv2.weight', 'conv2.bias',
+        'fc1.weight', 'fc1.bias',
+        # 'fc2.weight', 'fc2.bias',
+        'output.weight', 'output.bias',
+    ]
+
+    state = model.state_dict()
+
+    header = {}
+    header['__metadata__'] = {
+        'samples': str(total_samples),
+        'epochs': str(epochs),
+        'val_loss': f'{val_loss:.6f}',
+    }
+
+    data_parts = []
+    offset = 0
+    for name in tensor_names:
+        arr = state[name].detach().cpu().numpy().astype(np.float32)
+        raw = arr.tobytes()
+        header[name] = {
+            'dtype': 'F32',
+            'shape': list(arr.shape),
+            'data_offsets': [offset, offset + len(raw)],
+        }
+        data_parts.append(raw)
+        offset += len(raw)
+
+    header_json = json.dumps(header, separators=(',', ':')).encode('utf-8')
+    padded_len = (len(header_json) + 7) & ~7
+    header_json = header_json + b' ' * (padded_len - len(header_json))
+
+    with open(path, 'wb') as f:
+        f.write(struct.pack('<Q', len(header_json)))
+        f.write(header_json)
+        for part in data_parts:
+            f.write(part)
+
+    total_bytes = 8 + len(header_json) + offset
+    print(f"Saved model to {path} ({total_bytes} bytes)")
+
+
+def load_safetensors(model, path):
+    """Load weights from safetensors file into the PyTorch model."""
+    import torch
+
+    with open(path, 'rb') as f:
+        header_len = struct.unpack('<Q', f.read(8))[0]
+        header_json = f.read(header_len)
+        header = json.loads(header_json)
+        data_start = 8 + header_len
+
+        state = model.state_dict()
+        for name in state:
+            if name not in header:
+                print(f"  Warning: tensor '{name}' not in safetensors")
+                continue
+            entry = header[name]
+            off_start, off_end = entry['data_offsets']
+            f.seek(data_start + off_start)
+            raw = f.read(off_end - off_start)
+            arr = np.frombuffer(raw, dtype=np.float32).reshape(entry['shape'])
+            state[name] = torch.from_numpy(arr.copy())
+
+        model.load_state_dict(state)
+    print(f"Loaded weights from {path}")
+
+
+# ---- Pretty-print helpers ----
+
+BIT_NAMES = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'Ytype', 'LowH']
+SHAPE_CHARS = 'ABCDEFGHJK'
+
+
+def format_tag(bits_12):
+    """Format 12 binary bits as keming_machine tag string, e.g. 'ABCDEFGH(B)'."""
+    chars = ''.join(SHAPE_CHARS[i] for i in range(10) if bits_12[i])
+    if not chars:
+        chars = '(empty)'
+    mode = '(Y)' if bits_12[10] else '(B)'
+    low = ' low' if bits_12[11] else ''
+    return f'{chars}{mode}{low}'
+
+
+def print_label_distribution(labels, total):
+    counts = labels.sum(axis=0).astype(int)
+    parts = [f'{BIT_NAMES[b]}:{counts[b]}({100*counts[b]/total:.0f}%)' for b in range(12)]
+    print(f"Label distribution:\n  {' '.join(parts)}")
+
+
+def print_examples_and_accuracy(model, X_val, y_val, max_examples=8):
+    """Print example predictions and per-bit accuracy on validation set."""
+    import torch
+
+    model.eval()
+    with torch.no_grad():
+        preds = model(X_val).cpu().numpy()
+
+    y_np = y_val.cpu().numpy() if hasattr(y_val, 'cpu') else y_val
+    pred_bits = (preds >= 0.5).astype(int)
+    tgt_bits = y_np.astype(int)
+
+    n_val = len(y_np)
+    n_examples = 0
+
+    print("\nGlyph Tags — validation predictions:")
+    for i in range(n_val):
+        mismatch = not np.array_equal(pred_bits[i], tgt_bits[i])
+        if n_examples < max_examples and (mismatch or i < 4):
+            actual_tag = format_tag(tgt_bits[i])
+            pred_tag = format_tag(pred_bits[i])
+            status = 'MISMATCH' if mismatch else 'ok'
+            print(f"  actual={actual_tag:<20s} pred={pred_tag:<20s} {status}")
+            n_examples += 1
+
+    correct = (pred_bits == tgt_bits)
+    per_bit = correct.sum(axis=0)
+    total_correct = correct.sum()
+
+    print(f"\nPer-bit accuracy ({n_val} val samples):")
+    parts = [f'{BIT_NAMES[b]}:{100*per_bit[b]/n_val:.1f}%' for b in range(12)]
+    print(f"  {' '.join(parts)}")
+    print(f"  Overall: {total_correct}/{n_val*12} ({100*total_correct/(n_val*12):.2f}%)")
+
+
+# ---- Main ----
+
+def main():
+    parser = argparse.ArgumentParser(description='Train Autokem model (PyTorch)')
+    parser.add_argument('--assets', default='../src/assets',
+                        help='Path to assets directory (default: ../src/assets)')
+    parser.add_argument('--save', default='autokem.safetensors',
+                        help='Output safetensors path (default: autokem.safetensors)')
+    parser.add_argument('--load', default=None,
+                        help='Load weights from safetensors before training')
+    parser.add_argument('--epochs', type=int, default=200, help='Max epochs (default: 200)')
+    parser.add_argument('--batch-size', type=int, default=32, help='Batch size (default: 32)')
+    parser.add_argument('--lr', type=float, default=0.001, help='Learning rate (default: 0.001)')
+    parser.add_argument('--patience', type=int, default=10,
+                        help='Early stopping patience (default: 10)')
+    parser.add_argument('--val-split', type=float, default=0.2,
+                        help='Validation split (default: 0.2)')
+    args = parser.parse_args()
+
+    import torch
+    import torch.nn as nn
+
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    print(f"Device: {device}")
+
+    # Collect data
+    print("Collecting samples...")
+    X, y, file_count = collect_all_samples(args.assets)
+
+    if len(X) < 10:
+        print(f"Error: too few samples ({len(X)})", file=sys.stderr)
+        return 1
+
+    total = len(X)
+    print(f"Collected {total} samples from {file_count} sheets")
+    print_label_distribution(y, total)
+
+    nonzero = np.any(X.reshape(total, -1) > 0.5, axis=1).sum()
+    print(f"  Non-empty inputs: {nonzero}/{total}\n")
+
+    # Shuffle and split
+    rng = np.random.default_rng(42)
+    perm = rng.permutation(total)
+    X, y = X[perm], y[perm]
+
+    n_train = int(total * (1 - args.val_split))
+    X_train, X_val = X[:n_train], X[n_train:]
+    y_train, y_val = y[:n_train], y[n_train:]
+    print(f"Train: {n_train}, Validation: {total - n_train}\n")
+
+    # Convert to tensors — PyTorch conv expects [N, C, H, W]
+    X_train_t = torch.from_numpy(X_train[:, np.newaxis, :, :]).to(device)  # [N,1,20,15]
+    y_train_t = torch.from_numpy(y_train).to(device)
+    X_val_t = torch.from_numpy(X_val[:, np.newaxis, :, :]).to(device)
+    y_val_t = torch.from_numpy(y_val).to(device)
+
+    # Build model
+    model = build_model().to(device)
+
+    if args.load:
+        load_safetensors(model, args.load)
+
+    total_params = sum(p.numel() for p in model.parameters())
+    print(f"Model parameters: {total_params} ({total_params * 4 / 1024:.1f} KB)\n")
+
+    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
+    loss_fn = nn.BCELoss()
+
+    best_val_loss = float('inf')
+    best_epoch = 0
+    patience_counter = 0
+    best_state = None
+
+    for epoch in range(1, args.epochs + 1):
+        # Training
+        model.train()
+        perm_train = torch.randperm(n_train, device=device)
+        train_loss = 0.0
+        n_batches = 0
+
+        for start in range(0, n_train, args.batch_size):
+            end = min(start + args.batch_size, n_train)
+            idx = perm_train[start:end]
+
+            optimizer.zero_grad()
+            pred = model(X_train_t[idx])
+            loss = loss_fn(pred, y_train_t[idx])
+            loss.backward()
+            optimizer.step()
+
+            train_loss += loss.item()
+            n_batches += 1
+
+        train_loss /= n_batches
+
+        # Validation
+        model.eval()
+        with torch.no_grad():
+            val_pred = model(X_val_t)
+            val_loss = loss_fn(val_pred, y_val_t).item()
+
+        marker = ''
+        if val_loss < best_val_loss:
+            best_val_loss = val_loss
+            best_epoch = epoch
+            patience_counter = 0
+            best_state = {k: v.clone() for k, v in model.state_dict().items()}
+            marker = '  *best*'
+        else:
+            patience_counter += 1
+
+        print(f"Epoch {epoch:3d}: train_loss={train_loss:.4f}  val_loss={val_loss:.4f}{marker}")
+
+        if patience_counter >= args.patience:
+            print(f"\nEarly stopping at epoch {epoch} (best epoch: {best_epoch})")
+            break
+
+    # Restore best weights
+    if best_state is not None:
+        model.load_state_dict(best_state)
+
+    print(f"\nBest epoch: {best_epoch}, val_loss: {best_val_loss:.6f}")
+
+    # Print accuracy
+    model.eval()
+    print_examples_and_accuracy(model, X_val_t, y_val, max_examples=8)
+
+    # Save
+    export_safetensors(model, args.save, total, best_epoch, best_val_loss)
+
+    return 0
+
+
+if __name__ == '__main__':
+    sys.exit(main())