diff --git a/Autokem/apply.c b/Autokem/apply.c
index 43455f1..fe8a97f 100644
--- a/Autokem/apply.c
+++ b/Autokem/apply.c
@@ -2,6 +2,7 @@
 #include "tga.h"
 #include "nn.h"
 #include "safetensor.h"
+#include "unicode_lm.h"
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -75,7 +76,8 @@ int apply_model(const char *tga_path) {
     int rows = img->height / cell_h;
     int total_cells = cols * rows;
 
-    int processed = 0, updated = 0, skipped = 0;
+    int start_code = sheet_start_code(basename);
+    int processed = 0, updated = 0, skipped = 0, fixed_lm = 0;
 
     for (int index = 0; index < total_cells; index++) {
         int cell_x, cell_y;
@@ -107,6 +109,21 @@ int apply_model(const char *tga_path) {
         int opcode = (int)((dir_pixel >> 24) & 0xFF);
         if (opcode != 0) { skipped++; continue; }
 
+        /* Modifier letters: fixed kern pixel, skip inference */
+        if (start_code >= 0 && is_modifier_letter(start_code + index)) {
+            if (is_subscript_modifier(start_code + index)) {
+                /* Subscript: CDEFGHJK(B), lowheight=1 */
+                tga_write_pixel(tga_path, img, tag_x, tag_y + 5, 0xFFFFFFFF);
+                tga_write_pixel(tga_path, img, tag_x, tag_y + 6, 0x00C03FFF);
+            } else {
+                /* Superscript: ABCDEF(B), lowheight=0 */
+                tga_write_pixel(tga_path, img, tag_x, tag_y + 5, 0x00000000);
+                tga_write_pixel(tga_path, img, tag_x, tag_y + 6, 0x0000FCFF);
+            }
+            processed++; updated++; fixed_lm++;
+            continue;
+        }
+
         /* Extract 15x20 binary input */
         float input[300];
         for (int gy = 0; gy < 20; gy++) {
@@ -155,8 +172,8 @@ int apply_model(const char *tga_path) {
         updated++;
     }
 
-    printf("Processed: %d cells, Updated: %d, Skipped: %d (of %d total)\n",
-           processed, updated, skipped, total_cells);
+    printf("Processed: %d cells, Updated: %d, Skipped: %d, Fixed Lm: %d (of %d total)\n",
+           processed, updated, skipped, fixed_lm, total_cells);
 
     tga_free(img);
     network_free(net);
diff --git a/Autokem/autokem.safetensors b/Autokem/autokem.safetensors
index d0e8e83..8143cc1 100644
--- a/Autokem/autokem.safetensors
+++ b/Autokem/autokem.safetensors
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1c86449f1fdd0a57c22b4d2c80ab8b574429d325ba952b27f2a87837873b5118
+oid sha256:c20b8357be6f8464c62884fc8a477696324fce1d46c6ed86b816015d3101072c
 size 487640
diff --git a/Autokem/train.c b/Autokem/train.c
index c647296..418a44e 100644
--- a/Autokem/train.c
+++ b/Autokem/train.c
@@ -2,6 +2,7 @@
 #include "tga.h"
 #include "nn.h"
 #include "safetensor.h"
+#include "unicode_lm.h"
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -42,7 +43,8 @@ static void extract_shape_bits(int kerning_mask, float *shape) {
 
 /* ---- Collect samples from one TGA ---- */
 
-static int collect_from_sheet(const char *path, int is_xyswap, Sample *samples, int max_samples) {
+static int collect_from_sheet(const char *path, int is_xyswap, int start_code,
+                              Sample *samples, int max_samples) {
     TgaImage *img = tga_read(path);
     if (!img) {
         fprintf(stderr, "Warning: cannot read %s\n", path);
@@ -76,6 +78,10 @@ static int collect_from_sheet(const char *path, int is_xyswap, Sample *samples,
         }
         if (width == 0) continue;
 
+        /* Skip modifier letters (superscripts/subscripts) */
+        if (start_code >= 0 && is_modifier_letter(start_code + index))
+            continue;
+
         /* Read kerning data pixel at Y+6 */
         uint32_t kern_pixel = tagify(tga_get_pixel(img, tag_x, tag_y + 6));
         if ((kern_pixel & 0xFF) == 0) continue; /* no kern data */
@@ -170,7 +176,9 @@ int train_model(void) {
         char fullpath[512];
         snprintf(fullpath, sizeof(fullpath), "%s/%s", assets_dir, name);
 
-        int got = collect_from_sheet(fullpath, is_xyswap, all_samples + total, max_total - total);
+        int start_code = sheet_start_code(name);
+        int got = collect_from_sheet(fullpath, is_xyswap, start_code,
+                                     all_samples + total, max_total - total);
         if (got > 0) {
             printf("  %s: %d samples\n", name, got);
             total += got;
diff --git a/Autokem/train_torch.py b/Autokem/train_torch.py
index a915440..425c980 100644
--- a/Autokem/train_torch.py
+++ b/Autokem/train_torch.py
@@ -20,10 +20,26 @@ import json
 import os
 import struct
 import sys
+import unicodedata
 from pathlib import Path
 
 import numpy as np
 
+# ---- Sheet code ranges (imported from OTFbuild/sheet_config.py) ----
+
+_otfbuild = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'OTFbuild')
+try:
+    sys.path.insert(0, _otfbuild)
+    from sheet_config import FILE_LIST as _FILE_LIST, CODE_RANGE as _CODE_RANGE
+    sys.path.pop(0)
+    _CODE_RANGE_MAP = {}
+    for _i, _fn in enumerate(_FILE_LIST):
+        if _i < len(_CODE_RANGE):
+            _CODE_RANGE_MAP[_fn] = _CODE_RANGE[_i]
+except ImportError:
+    _CODE_RANGE_MAP = {}
+
+
 # ---- TGA reader (matches OTFbuild/tga_reader.py and Autokem/tga.c) ----
 
 class TgaImage:
@@ -80,7 +96,7 @@ def tagify(pixel):
 
 # ---- Data collection (matches Autokem/train.c) ----
 
-def collect_from_sheet(path, is_xyswap):
+def collect_from_sheet(path, is_xyswap, code_range=None):
     """Extract labelled samples from a single TGA sheet."""
     img = read_tga(path)
     cell_w, cell_h = 16, 20
@@ -90,6 +106,7 @@ def collect_from_sheet(path, is_xyswap):
 
     inputs = []
     labels = []
+    skipped_lm = 0
 
     for index in range(total_cells):
         if is_xyswap:
@@ -110,6 +127,16 @@ def collect_from_sheet(path, is_xyswap):
         if width == 0:
             continue
 
+        # Skip modifier letters (superscripts/subscripts)
+        if code_range is not None and index < len(code_range):
+            cp = code_range[index]
+            try:
+                if unicodedata.category(chr(cp)) == 'Lm':
+                    skipped_lm += 1
+                    continue
+            except (ValueError, OverflowError):
+                pass
+
         # Kern data pixel at Y+6
         kern_pixel = tagify(img.get_pixel(tag_x, tag_y + 6))
         if (kern_pixel & 0xFF) == 0:
@@ -145,7 +172,7 @@ def collect_from_sheet(path, is_xyswap):
         inputs.append(inp)
         labels.append(shape + [is_kern_ytype, is_low_height])
 
-    return inputs, labels
+    return inputs, labels, skipped_lm
 
 
 def collect_all_samples(assets_dir):
@@ -153,6 +180,7 @@ def collect_all_samples(assets_dir):
     all_inputs = []
     all_labels = []
     file_count = 0
+    total_skipped_lm = 0
 
     for name in sorted(os.listdir(assets_dir)):
         if not name.endswith('_variable.tga'):
@@ -161,14 +189,20 @@ def collect_all_samples(assets_dir):
             continue
 
         is_xyswap = 'xyswap' in name
+        code_range = _CODE_RANGE_MAP.get(name, None)
         path = os.path.join(assets_dir, name)
-        inputs, labels = collect_from_sheet(path, is_xyswap)
+        inputs, labels, skipped_lm = collect_from_sheet(path, is_xyswap, code_range)
+        total_skipped_lm += skipped_lm
         if inputs:
-            print(f"  {name}: {len(inputs)} samples")
+            suffix = f" (skipped {skipped_lm} Lm)" if skipped_lm else ""
+            print(f"  {name}: {len(inputs)} samples{suffix}")
             all_inputs.extend(inputs)
             all_labels.extend(labels)
             file_count += 1
 
+    if total_skipped_lm:
+        print(f"  Total modifier letters filtered: {total_skipped_lm}")
+
     return np.array(all_inputs), np.array(all_labels, dtype=np.float32), file_count
 
 
diff --git a/Autokem/unicode_lm.h b/Autokem/unicode_lm.h
new file mode 100644
index 0000000..095144d
--- /dev/null
+++ b/Autokem/unicode_lm.h
@@ -0,0 +1,141 @@
+#ifndef UNICODE_LM_H
+#define UNICODE_LM_H
+
+#include <string.h>
+
+/*
+ * Unicode category Lm (Letter, modifier) range checks.
+ * Generated from Python unicodedata (Unicode 16.0).
+ *
+ * is_modifier_letter(cp)    — true for all Lm codepoints
+ * is_subscript_modifier(cp) — true for Lm codepoints with <sub> decomposition
+ */
+
+static inline int is_modifier_letter(int cp) {
+    /* 71 contiguous ranges covering all 397 Lm codepoints */
+    if (cp >= 0x02B0 && cp <= 0x02C1) return 1;
+    if (cp >= 0x02C6 && cp <= 0x02D1) return 1;
+    if (cp >= 0x02E0 && cp <= 0x02E4) return 1;
+    if (cp == 0x02EC) return 1;
+    if (cp == 0x02EE) return 1;
+    if (cp == 0x0374) return 1;
+    if (cp == 0x037A) return 1;
+    if (cp == 0x0559) return 1;
+    if (cp == 0x0640) return 1;
+    if (cp >= 0x06E5 && cp <= 0x06E6) return 1;
+    if (cp >= 0x07F4 && cp <= 0x07F5) return 1;
+    if (cp == 0x07FA) return 1;
+    if (cp == 0x081A) return 1;
+    if (cp == 0x0824) return 1;
+    if (cp == 0x0828) return 1;
+    if (cp == 0x08C9) return 1;
+    if (cp == 0x0971) return 1;
+    if (cp == 0x0E46) return 1;
+    if (cp == 0x0EC6) return 1;
+    if (cp == 0x10FC) return 1;
+    if (cp == 0x17D7) return 1;
+    if (cp == 0x1843) return 1;
+    if (cp == 0x1AA7) return 1;
+    if (cp >= 0x1C78 && cp <= 0x1C7D) return 1;
+    if (cp >= 0x1D2C && cp <= 0x1D6A) return 1;
+    if (cp == 0x1D78) return 1;
+    if (cp >= 0x1D9B && cp <= 0x1DBF) return 1;
+    if (cp == 0x2071) return 1;
+    if (cp == 0x207F) return 1;
+    if (cp >= 0x2090 && cp <= 0x209C) return 1;
+    if (cp >= 0x2C7C && cp <= 0x2C7D) return 1;
+    if (cp == 0x2D6F) return 1;
+    if (cp == 0x2E2F) return 1;
+    if (cp == 0x3005) return 1;
+    if (cp >= 0x3031 && cp <= 0x3035) return 1;
+    if (cp == 0x303B) return 1;
+    if (cp >= 0x309D && cp <= 0x309E) return 1;
+    if (cp >= 0x30FC && cp <= 0x30FE) return 1;
+    if (cp == 0xA015) return 1;
+    if (cp >= 0xA4F8 && cp <= 0xA4FD) return 1;
+    if (cp == 0xA60C) return 1;
+    if (cp == 0xA67F) return 1;
+    if (cp >= 0xA69C && cp <= 0xA69D) return 1;
+    if (cp >= 0xA717 && cp <= 0xA71F) return 1;
+    if (cp == 0xA770) return 1;
+    if (cp == 0xA788) return 1;
+    if (cp >= 0xA7F2 && cp <= 0xA7F4) return 1;
+    if (cp >= 0xA7F8 && cp <= 0xA7F9) return 1;
+    if (cp == 0xA9CF) return 1;
+    if (cp == 0xA9E6) return 1;
+    if (cp == 0xAA70) return 1;
+    if (cp == 0xAADD) return 1;
+    if (cp >= 0xAAF3 && cp <= 0xAAF4) return 1;
+    if (cp >= 0xAB5C && cp <= 0xAB5F) return 1;
+    if (cp == 0xAB69) return 1;
+    if (cp == 0xFF70) return 1;
+    if (cp >= 0xFF9E && cp <= 0xFF9F) return 1;
+    if (cp >= 0x10780 && cp <= 0x10785) return 1;
+    if (cp >= 0x10787 && cp <= 0x107B0) return 1;
+    if (cp >= 0x107B2 && cp <= 0x107BA) return 1;
+    if (cp >= 0x16B40 && cp <= 0x16B43) return 1;
+    if (cp >= 0x16F93 && cp <= 0x16F9F) return 1;
+    if (cp >= 0x16FE0 && cp <= 0x16FE1) return 1;
+    if (cp == 0x16FE3) return 1;
+    if (cp >= 0x1AFF0 && cp <= 0x1AFF3) return 1;
+    if (cp >= 0x1AFF5 && cp <= 0x1AFFB) return 1;
+    if (cp >= 0x1AFFD && cp <= 0x1AFFE) return 1;
+    if (cp >= 0x1E030 && cp <= 0x1E06D) return 1;
+    if (cp >= 0x1E137 && cp <= 0x1E13D) return 1;
+    if (cp == 0x1E4EB) return 1;
+    if (cp == 0x1E94B) return 1;
+    return 0;
+}
+
+static inline int is_subscript_modifier(int cp) {
+    /* 49 Lm codepoints with <sub> decomposition */
+    if (cp >= 0x1D62 && cp <= 0x1D6A) return 1;  /* 9 */
+    if (cp >= 0x2090 && cp <= 0x209C) return 1;   /* 13 */
+    if (cp == 0x2C7C) return 1;                    /* 1 */
+    if (cp >= 0x1E051 && cp <= 0x1E06A) return 1;  /* 26 */
+    return 0;
+}
+
+/*
+ * Map sheet filename to first codepoint of its (contiguous) code range.
+ * Returns -1 if unknown. For non-contiguous sheets (e.g. Devanagari),
+ * returns the start of the first sub-range; cells beyond it won't
+ * collide with Lm codepoints in practice.
+ */
+static int sheet_start_code(const char *basename) {
+    if (strstr(basename, "ascii_variable"))                return 0x00;
+    if (strstr(basename, "latinExtA_variable"))            return 0x100;
+    if (strstr(basename, "latinExtB_variable"))            return 0x180;
+    if (strstr(basename, "cyrilic_extC_variable"))         return 0x1C80;
+    if (strstr(basename, "cyrilic_extB_variable"))         return 0xA640;
+    if (strstr(basename, "cyrilic_bulgarian_variable"))    return 0xF0000;
+    if (strstr(basename, "cyrilic_serbian_variable"))      return 0xF0060;
+    if (strstr(basename, "cyrilic_variable"))              return 0x400;
+    if (strstr(basename, "halfwidth_fullwidth_variable"))  return 0xFF00;
+    if (strstr(basename, "unipunct_variable"))             return 0x2000;
+    if (strstr(basename, "greek_polytonic"))               return 0x1F00;
+    if (strstr(basename, "greek_variable"))                return 0x370;
+    if (strstr(basename, "thai_variable"))                 return 0xE00;
+    if (strstr(basename, "hayeren_variable"))              return 0x530;
+    if (strstr(basename, "kartuli_allcaps_variable"))      return 0x1C90;
+    if (strstr(basename, "kartuli_variable"))              return 0x10D0;
+    if (strstr(basename, "ipa_ext_variable"))              return 0x250;
+    if (strstr(basename, "latinExt_additional_variable"))  return 0x1E00;
+    if (strstr(basename, "tsalagi_variable"))              return 0x13A0;
+    if (strstr(basename, "phonetic_extensions_variable"))  return 0x1D00;
+    if (strstr(basename, "latinExtC_variable"))            return 0x2C60;
+    if (strstr(basename, "latinExtD_variable"))            return 0xA720;
+    if (strstr(basename, "internal_variable"))             return 0xFFE00;
+    if (strstr(basename, "letterlike_symbols_variable"))   return 0x2100;
+    if (strstr(basename, "enclosed_alphanumeric"))         return 0x1F100;
+    if (strstr(basename, "sundanese_variable"))            return 0x1B80;
+    if (strstr(basename, "control_pictures_variable"))     return 0x2400;
+    if (strstr(basename, "latinExtE_variable"))            return 0xAB30;
+    if (strstr(basename, "latinExtF_variable"))            return 0x10780;
+    if (strstr(basename, "latinExtG_variable"))            return 0x1DF00;
+    if (strstr(basename, "devanagari") && !strstr(basename, "internal"))
+                                                           return 0x900;
+    return -1;
+}
+
+#endif /* UNICODE_LM_H */
diff --git a/demo.PNG b/demo.PNG
index 3b14a54..803bef3 100644
Binary files a/demo.PNG and b/demo.PNG differ
diff --git a/demotext_unaligned.txt b/demotext_unaligned.txt
index 847ec9b..c1bb571 100755
--- a/demotext_unaligned.txt
+++ b/demotext_unaligned.txt
@@ -114,7 +114,7 @@ How multilingual? Real multilingual!
 ⁃ Basic Latin
 ⁃ Latin-1 Supplement
 ⁃ Latin Extended Additional
-⁃ Latin Extended-A/B/C/D
+⁃ Latin Extended-A/B/C/D/E/F/G
 ⁃ Armenian
 ⁃ Arrows
 ⁃ Bengali􏿆ᶠⁱ􀀀
diff --git a/src/assets/ipa_ext_variable.tga b/src/assets/ipa_ext_variable.tga
index 46c0002..19c9f07 100755
--- a/src/assets/ipa_ext_variable.tga
+++ b/src/assets/ipa_ext_variable.tga
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:72a58fa1974770b16f1daed856319914514388c93c33ccbfd62822540d5a32d4
+oid sha256:bbc1d05ede81a0a1d98344edd3893bc6b092a5a3e7587466fcc42c3df48dc4cb
 size 225298
diff --git a/src/assets/latinExtD_variable.tga b/src/assets/latinExtD_variable.tga
index 4c3c401..b541c1b 100644
--- a/src/assets/latinExtD_variable.tga
+++ b/src/assets/latinExtD_variable.tga
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f9e2af1ae604cadc8459aec306cd5325f4dc1bfae7b63ffd6f9346b7299d76ff
+oid sha256:bb60f2fc6af0b0b5d9d2757cabd601570405018a5248bc85c74c00747bcb7596
 size 286738
diff --git a/src/assets/latinExtF_variable.tga b/src/assets/latinExtF_variable.tga
new file mode 100644
index 0000000..acbf7c8
--- /dev/null
+++ b/src/assets/latinExtF_variable.tga
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aa128782b9b5b9641a879723e23227a9e6bc6f292792cf751bb4c5da3c81f4d6
+size 81938
diff --git a/src/assets/latinExtG_variable.tga b/src/assets/latinExtG_variable.tga
new file mode 100644
index 0000000..1c9d475
--- /dev/null
+++ b/src/assets/latinExtG_variable.tga
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6fd993ad3a979988ada0e7253c01190ab27bbc87f1339014320fd3e5015d8b50
+size 327698
diff --git a/src/assets/phonetic_extensions_variable.tga b/src/assets/phonetic_extensions_variable.tga
index 1d282cb..a734334 100644
--- a/src/assets/phonetic_extensions_variable.tga
+++ b/src/assets/phonetic_extensions_variable.tga
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:823083e1b8f00dd3b39f6249845a2eaad5ddf00bb9a27fe98bf3d273d3c0c17f
+oid sha256:2fca366fa083c07ebe17decc6537a6040f552c1f841f7639a4b7745a4837a56f
 size 245778
diff --git a/work_files/ipa_ext_variable.psd b/work_files/ipa_ext_variable.psd
index ececc39..812c1e5 100644
--- a/work_files/ipa_ext_variable.psd
+++ b/work_files/ipa_ext_variable.psd
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f9d30b45f78d4c1b45fb7597df61490073ff781df6941c34cc837c13d81d641f
-size 227492
+oid sha256:83ae5847833cf2a62a56e76d7752567309473d99fbd49b640877ed97efa1d586
+size 217290
diff --git a/work_files/latinExtD_variable.psd b/work_files/latinExtD_variable.psd
index 3581720..39c0bf8 100644
--- a/work_files/latinExtD_variable.psd
+++ b/work_files/latinExtD_variable.psd
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c978e28c87b130f7f243f6b60269bb61930cb5457cb10ffea4ecca2982fc2f47
-size 328120
+oid sha256:11c589309af5d955e383a13bed4778c8f440ded0ac23ce8bdf73e746037b1430
+size 328092
diff --git a/work_files/latinExtF_variable.kra b/work_files/latinExtF_variable.kra
index 30b4b6c..1d8aa0a 100644
--- a/work_files/latinExtF_variable.kra
+++ b/work_files/latinExtF_variable.kra
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7a3791ea9d40745fb6114f33b6918e0bc1f5251a80990c68b7815685e60b89cb
-size 41672
+oid sha256:5c24aa0777412faf4aab7ae567b75e98b17dc17e2eec31561ea26cf9e2985e0f
+size 48467
diff --git a/work_files/latinExtG_variable.kra b/work_files/latinExtG_variable.kra
new file mode 100644
index 0000000..755ee82
--- /dev/null
+++ b/work_files/latinExtG_variable.kra
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6bf71d461ed69ca112cef36c9d5ba24f835f473f90518dcc85d863c648742e8a
+size 43820
diff --git a/work_files/phonetic_extensions_variable.psd b/work_files/phonetic_extensions_variable.psd
index 1536ac5..a2e20b2 100644
--- a/work_files/phonetic_extensions_variable.psd
+++ b/work_files/phonetic_extensions_variable.psd
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:37b022f69eb885f38b4edaf0345f8540d821e2bb9af68d510ce24f677f548739
-size 268717
+oid sha256:aabb7b8c2be78c5f08c87b6f2cc8bd22e837b2e84b130e05683932c995d06bc6
+size 238793