From 60b07a325a720ff00082408c6cd6c5d8ac991567 Mon Sep 17 00:00:00 2001
From: minjaesong <alswo9628@gmail.com>
Date: Wed, 6 May 2026 05:31:55 +0900
Subject: [PATCH] xm2taud (wip), separate sustain and loop def

---
 2taud.sh                                      |    2 +
 TAUD_NOTE_EFFECTS.md                          |    7 +-
 assets/disk0/tvdos/bin/taut.js                |   35 +-
 assets/disk0/tvdos/bin/tauthdr.png            |  Bin 385 -> 490 bytes
 assets/disk0/tvdos/bin/tauthdr.r8             |  Bin 1260 -> 1288 bytes
 it2taud.py                                    |  295 ++--
 mod2taud.py                                   |   12 +-
 s3m2taud.py                                   |   22 +-
 taud_common.py                                |   43 +-
 terranmon.txt                                 |  151 +-
 .../torvald/tsvm/peripheral/AudioAdapter.kt   |  389 +++--
 xm2taud.py                                    | 1376 +++++++++++++++++
 12 files changed, 1954 insertions(+), 378 deletions(-)
 create mode 100644 xm2taud.py

diff --git a/2taud.sh b/2taud.sh
index 5a94774..6266dff 100755
--- a/2taud.sh
+++ b/2taud.sh
@@ -4,3 +4,5 @@ for f in *.mod; python3 mod2taud.py $f assets/disk0/(basename $f .mod).taud; end
 for f in *.s3m; python3 s3m2taud.py $f assets/disk0/(basename $f .s3m).taud; end
 for f in *.it; python3 it2taud.py $f assets/disk0/(basename $f .it).taud; end
 for f in *.xm; python3 xm2taud.py $f assets/disk0/(basename $f .xm).taud; end
+for f in *.mon; python3 mon2taud.py $f assets/disk0/(basename $f .mon).taud; end
+for f in *.MON; python3 mon2taud.py $f assets/disk0/(basename $f .MON).taud; end
diff --git a/TAUD_NOTE_EFFECTS.md b/TAUD_NOTE_EFFECTS.md
index 67ac3ed..13c338b 100644
--- a/TAUD_NOTE_EFFECTS.md
+++ b/TAUD_NOTE_EFFECTS.md
@@ -956,13 +956,14 @@ Effects in this section modifies the behaviour of the mixer. Primary intention o
 
 **Plain.** Sets mixer-wide behaviour flags. Available flags are:
 
-    0b 0000 0mfp
+    0b 0000 Fmfp
 
 - p unset: Linear panning mode (tracker-accurate). Centre panning gets 3 dB boost. Default setting.
 - p set: Equal-power panning mode. L/R amplitude is at 0.707 when centre-panned.
 
-- f unset: Linear tone mode. Pitch shift will behave like MIDI/ImpulseTracker/ScreamTracker linear mode. **Coarse and fine E/F arguments are stored as 4096-TET pitch units** and subtracted/added directly from the stored pitch.
-- f set: Amiga (cycle-based) tone mode. Pitch shift will behave like ProTracker/ScreamTracker default mode. **Coarse and fine E/F arguments are stored as raw tracker period units** (the unscaled byte/nibble from the source PT/S3M/IT file) and applied in Amiga period space. Tone portamento (G) remains linear regardless of mode.
+- Ff = 0: Linear tone mode. Pitch shift will behave like MIDI/ImpulseTracker/ScreamTracker linear mode. **Coarse and fine E/F arguments are stored as 4096-TET pitch units** and subtracted/added directly from the stored pitch.
+- Ff = 1: Amiga (cycle-based) tone mode. Pitch shift will behave like ProTracker/ScreamTracker default mode. **Coarse and fine E/F arguments are stored as raw tracker period units** (the unscaled byte/nibble from the source PT/S3M/IT file) and applied in Amiga period space. Tone portamento (G) remains linear regardless of mode.
+- Ff = 2: Linear frequency mode. Pitch shift will behave against frequency number.
 
 - m unset: IT fadeout-zero policy. An instrument with stored volume fadeout = 0 does **not** fade out on key-off; the voice plays through until the volume envelope ends it (or never, if there is no envelope).
 - m set: FT2 fadeout-zero policy. An instrument with stored volume fadeout = 0 is **cut** on the first tick after key-off (or NNA Note-Fade). Nonzero fadeouts behave identically in both modes — the per-tick decrement is always `fadeout / 65536` in unity-volume units.
diff --git a/assets/disk0/tvdos/bin/taut.js b/assets/disk0/tvdos/bin/taut.js
index 0531a16..00af359 100644
--- a/assets/disk0/tvdos/bin/taut.js
+++ b/assets/disk0/tvdos/bin/taut.js
@@ -275,7 +275,7 @@ function noteToStr(note) {
     if (note === 0x0000) return sym.keyoff
     if (pitchTablePresets[PITCH_PRESET_IDX].table.length === 0) return note.hex04()
     const [s, o] = pitchSymLut[note & 0xFFF]
-    return s + ((note >> 12) - 1 + o)
+    return s + ((note >> 12) - 1 + o).toString(16) // octave 10 -> 'a'
 }
 
 /**
@@ -1109,7 +1109,7 @@ function drawVoiceDetail(isVerticalLayout = false, ptn = null, activeRow = -1, c
         let cumLines = []
         if (cumState !== null && lowerH > 0) {
             const _apo  = Math.abs(cumState.pitchOff)
-            const _psgn = cumState.pitchOff > 0 ? '+' : cumState.pitchOff < 0 ? '-' : '='
+            const _psgn = cumState.pitchOff > 0 ? '+' : cumState.pitchOff < 0 ? '-' : ' '
             const _absN = (cumState.lastNote !== 0xFFFF && cumState.pitchOff !== 0)
                 ? noteToStr(Math.max(0, Math.min(0xFFFE, cumState.lastNote + cumState.pitchOff))) + ' '
                 : ''
@@ -1319,7 +1319,7 @@ if (fullPathObj === undefined) {
 
 const logofile = files.open("A:/tvdos/bin/tauthdr.r8")
 const logoBytes = logofile.bread(); logofile.close()
-const logoTexture = new gl.Texture(90, 14, logoBytes)
+const logoTexture = new gl.Texture(92, 14, logoBytes)
 const buttonfile = files.open("A:/tvdos/bin/tautbtn.r8")
 const buttonBytes = buttonfile.bread(); buttonfile.close()
 const buttonTexture = new gl.Texture(2, 28, buttonBytes)
@@ -1689,24 +1689,35 @@ function simulateRowState(ptnDat, uptoRow) {
         // Note column
         const isGRow = (effop === OP_G)
         const isNoteDelay = (effop === OP_S) && (((effarg >>> 12) & 0xF) === 0xD)
+        // Track whether this row reloads the channel's default volume.  Engine:
+        // triggerNote() resets channelVolume to 0x3F on fresh triggers, and an
+        // instrument byte on a tone-porta row also reloads default vol (matches
+        // schism csf_instrument_change inst_column branch).
+        let reloadDefaultVol = false
         if (note !== 0xFFFF && note !== 0xFFFE) {
             if (note === 0x0000) {
                 // key-off; sample stays referenced
             } else if (isGRow) {
                 portaTarget = note
+                if (inst !== 0) reloadDefaultVol = true
             } else if (isNoteDelay) {
                 // Delayed trigger: latched but doesn't fire on this row's first tick.
                 // For "state at end of row" treat as if it triggered.
                 lastNote = note
                 pitchOff = 0
                 portaTarget = -1
+                reloadDefaultVol = true
             } else {
                 lastNote = note
                 pitchOff = 0
                 portaTarget = -1
+                reloadDefaultVol = true
             }
         }
         if (inst !== 0) lastInst = inst
+        // Default vol reset must happen before the volume column so a SET selector
+        // can still override on the same row (engine order: triggerNote → applyVolColumn).
+        if (reloadDefaultVol) volAbs = 0x3F
 
         // Pre-scan effect column for S$80xx (8-bit pan SET wins over volcol/pancol SET).
         const rowHasS80 = (effop === OP_S) && (((effarg >>> 12) & 0xF) === 0x8)
@@ -2089,16 +2100,20 @@ function drawProjectContents(wo) {
     for (let y = PTNVIEW_OFFSET_Y; y < SCRH; y++) fillLine(y, colBackPtn, 255)
 
     let mixerflag = initialTrackerMixerflags
-    let flagstrbuf = ''
+    let flagStrSelected = []
     let flagstr = [
-        ['Linear pan','Equal-energy pan'],
-        ['Linear tone','Amiga tone'],
+        ['Linear pan','EquNrg pan'],
+        ['Linear pitch','Amiga pitch', 'Linear freq', ''], // TODO MONOTONE uses linear-freq pitch
+        ['IT fade','FT2 fade'],
     ]
     for (let i = 0; i < flagstr.length; i++) {
-        let s = flagstr[i][(mixerflag >>> i) & 1 != 0]
-        if (i > 0) flagstrbuf += ', ';
-        flagstrbuf += s
+        if (i != 1 && 1 != 3) {
+            let s = flagstr[i][(mixerflag >>> i) & 1 != 0]
+            flagStrSelected.push(s)
+        }
     }
+    let toneMode = (((mixerflag >>> 1) & 1)) | (((mixerflag >>> 3) & 1) << 1)
+    flagStrSelected.splice(1, 0, flagstr[1][toneMode])
 
 
     let projMeta = {
@@ -2106,7 +2121,7 @@ function drawProjectContents(wo) {
         Patterns: `${song.numPats}/4095 ($${song.numPats.hex03()})`,
         Cues: `${song.lastActiveCue}/1024 ($${song.lastActiveCue.hex03()})`,
         Notation: pitchTablePresets[PITCH_PRESET_IDX].name,
-        Flags: `${flagstrbuf} ($${mixerflag.hex02()})`,
+        Flags: `${flagStrSelected.join(', ')} ($${mixerflag.hex02()})`,
         GlobalVol: initialGlobalVolume,
         MixingVol: initialMixingVolume
     }
diff --git a/assets/disk0/tvdos/bin/tauthdr.png b/assets/disk0/tvdos/bin/tauthdr.png
index 6024a4874d7032b8edc34e8ad6e85cdaf3d68915..98dbc0035e2f44358dff0e9528e26c5d1e9c1455 100644
GIT binary patch
delta 478
zcmV<40U`c@1L^}IiBL{Q4GJ0x0000DNk~Le000170000E2nGNE0HUdTq>&*Qf1F7~
zK~z}7?N|#A!Y~Le%}(MOJc7se2%f>KU>94%7+)KsQAw7p4Bl1>uVqm7g5(~GERs#c
z@8^XF1}}%xV{p8lqc$br_m?L1%jbuz^4=~>b^<h$X{EHR{xnL#Cijm4z)z{TY6Y{8
zQQ+td@1t^AnW$xE1Zxv)C-a~Ae<rJHT-;?cvnBqf*`)dryNAuF5;o3YiNRjSB+J#s
zkZiiHa37&+BDixLJ~qsWMMAElKEx)9<z}|^ygx&(lYE$M7<ci4V@)&@Y<`*{CmHk>
z(!(0}tG;E^CP3?$O?>x^>U%XvS*qEh_C9JqY-=WaC7rY&N`RnmuA5+6f3^{<#11M2
z5bn43ewwlMDr1=mcEXQ0M_~AagAZ0eAY|LFiimzk2fRWoN|J0PU$RJ0Ba~q@H?D9V
zmbg1AQBF5bP@Gde&gYBccaqJXU$kQDy2D3kA=AA|9IR}H(^#BlIA@d7hFkJcy0Cl(
zF1_enU)5=v{F-e2vsfC>G@AUI&E#isK`A>({Gjrh9`7Gcv_p+xMVhhMZ<@g^H<)jA
U<@lxE#{d8T07*qoM6N<$g6VbLc>n+a

delta 372
zcmV-)0gL|X1Azk}iBL{Q4GJ0x0000DNk~Le000150000C2nGNE0OEhD=#e29e=|u$
zK~z}7?N<Q~f-nqh2`}*tKEY@E1mECSD#Hy+?YgotLkyP?v3AAXZr6bTK#$7}60XmC
z_&``g%K^u@6A{Bn!Eb<4t%^EJzaI%S;XN_LL;c5pQuIRd5ix4Ppx{ZhBz@T7+u2>9
z<SjYU^J{8JlH+$`%FZr?mJ?$jf5#|0#Z$M7qRCjCL?+1?AH#As*Q!x_S`-=6#)w>Z
z-LWu<W|9rHlA%ynEw=k+_3%ksO4igG?o-^EOPS}b79iygnaLJo9x&o?x}UFHgDN#x
zQLDcDe+WU1*xFo1u&VO$Jb3NaiOt0t4D~drS@GKWa(Zg1Rx2UY<CGFeN=1QQ&G2hm
z8;!2Hmb4VUy<;;Y?z2{-b6SHrCOwlQ&9%p=!}QRNFFRXn<-FAN*oK!Icik7+z6ySD
SEMq?a0000<MNUMnLSTaM(y7P*

diff --git a/assets/disk0/tvdos/bin/tauthdr.r8 b/assets/disk0/tvdos/bin/tauthdr.r8
index a8e171525d18872c85fa502f91c8c03e6e4f2c8d..fdeaeb737a961cf9625e682b454c514502f7f44a 100644
GIT binary patch
literal 1288
zcmbu6$qfTB3<KSNdoCm;I2w{t76_07`?0NSM9Jrzv(~D!A7j~f?_7It!SxvH9yHZM
z`-9z+eiP7a?i%2iovg<rYy1osQcSUfuFEeJTIIM-p~W7LlQPVBs}ST~zBU&$IqB%o
z)g?4=TBr+S$Y7Z9@)jm+T_OgaCexc%LFR7mt54#MHYxJmhcjK6l8inP8=;m9Ee^+U
zcd;s|?uQoB(J07b1PRG2Fv;IAUBZ?gAFU2fv$D80qe;q!m-TCmiZF&1fxYl&e(SCz
ze~MDP@3)mef@o}or)boI%_g)R+npzuAz80cK*Jv4IFt`jn6TvSE=WisF5ZF#Q&KK$
d8MZ4A{hHx**z37;3P`5=IA5jLB!`uzyeF?nd~yH)

delta 171
zcmeC+dc!%9Icj1&5SdJTug1o383-6A%QA-WLzw^nL+QzljLNJG@87>?n7oovnGY;_
znE^^ozQd@@#PEJHBa`}MEv6_&hRN+rkt`s|$pI|llOvdQCTlVaqYH4e1C0ZlID<JF
cY&8SJ<loF8FlIPQC@TX4!~g%2H?xEQ0Bc1>!T<mO

diff --git a/it2taud.py b/it2taud.py
index 9f1512c..b9dce56 100644
--- a/it2taud.py
+++ b/it2taud.py
@@ -54,6 +54,7 @@ from taud_common import (
     J_SEMI_TABLE,
     d_arg_to_col, resample_linear, rescale_offset_effects, encode_cue, deduplicate_patterns,
     normalise_sample, encode_song_entry,
+    CUE_INST_NOP, CUE_INST_HALT, CUE_INST_LEN, cue_instruction_len,
 )
 
 
@@ -467,8 +468,9 @@ def parse_samples(data: bytes, h: ITHeader, decompress: bool) -> list:
 
 class ITInstrument:
     __slots__ = ('name', 'dfp', 'gv', 'canonical_sample', 'canonical_volume',
-                 'vol_envelope', 'pan_envelope', 'vol_env_sustain', 'pan_env_sustain',
-                 'pf_envelope', 'pf_env_sustain', 'pf_is_filter',
+                 'vol_envelope', 'pan_envelope', 'pf_envelope', 'pf_is_filter',
+                 'vol_env_loop', 'pan_env_loop', 'pf_env_loop',
+                 'vol_env_sus', 'pan_env_sus', 'pf_env_sus',
                  'ifc', 'ifr', 'fadeout', 'pps', 'ppc', 'rv', 'rp', 'nna',
                  'dct', 'dca')
     # vol_envelope / pan_envelope / pf_envelope: list of 25 (value, minifloat_idx) tuples, or None
@@ -538,14 +540,14 @@ def parse_instruments(data: bytes, h: ITHeader) -> list:
         # Parse IT envelopes (new-format only, ≥cmwt 0x200)
         # Vol envelope at ptr+0x130; pan envelope at ptr+0x182; pf envelope at ptr+0x1D4
         ticks_per_sec = max(h.initial_tempo * 2.0 / 5.0, 1.0)
-        inst.vol_envelope, inst.vol_env_sustain = _parse_it_envelope(
+        inst.vol_envelope, inst.vol_env_loop, inst.vol_env_sus = _parse_it_envelope(
             data, ptr + 0x130, kind='vol', ticks_per_sec=ticks_per_sec)
-        inst.pan_envelope, inst.pan_env_sustain = _parse_it_envelope(
+        inst.pan_envelope, inst.pan_env_loop, inst.pan_env_sus = _parse_it_envelope(
             data, ptr + 0x182, kind='pan', ticks_per_sec=ticks_per_sec)
         # pf envelope: byte 0 bit 7 distinguishes filter (1) from pitch (0).
         pf_flag_byte = data[ptr + 0x1D4] if ptr + 0x1D4 < len(data) else 0
         inst.pf_is_filter = bool(pf_flag_byte & 0x80)
-        inst.pf_envelope, inst.pf_env_sustain = _parse_it_envelope(
+        inst.pf_envelope, inst.pf_env_loop, inst.pf_env_sus = _parse_it_envelope(
             data, ptr + 0x1D4, kind=('filter' if inst.pf_is_filter else 'pitch'),
             ticks_per_sec=ticks_per_sec)
         insts.append(inst)
@@ -555,31 +557,32 @@ def parse_instruments(data: bytes, h: ITHeader) -> list:
 def _parse_it_envelope(data: bytes, env_ptr: int, kind: str,
                        ticks_per_sec: float) -> tuple:
     """Parse one IT envelope block (vol / pan / pitch / filter) into up to 25
-    Taud (value, minifloat_idx) points + a 16-bit sustain/flags word.
+    Taud (value, minifloat_idx) points + LOOP word + SUSTAIN word.
 
-    Returns (points_list, sus_word). points_list has 25 entries (padded
-    with hold-zeros) or None if the envelope is disabled.
+    Returns (points_list, loop_word, sustain_word).
+    points_list has 25 entries (padded with hold) or None if the envelope is
+    disabled. loop_word and sustain_word are zero when the corresponding
+    region is not enabled.
 
     kind:
-      'vol'    — IT 0..64    →  Taud 0..63  (byte 1 = volume)
+      'vol'    — IT 0..64    →  Taud 0..63
       'pan'    — IT -32..+32 →  Taud 0..255 (0x80 = centre)
-      'pitch'  — IT -32..+32 →  Taud 0..255 (0x80 = unity, 1 unit ≈ 1 semitone)
+      'pitch'  — IT -32..+32 →  Taud 0..255 (0x80 = unity)
       'filter' — IT -32..+32 →  Taud 0..255 (0x80 = unity cutoff)
 
-    sus_word layout (16 bits, 0b 0ut sssss pcb eeeee):
-        bit 14 = u (enable sustain/loop)
-        bit 13 = t (sustain — breaks on key-off when set)
-        bits 12..8 = sustain/loop start (5-bit index 0..24)
-        bit  7 = p   (vol: fadeout-zero; pan: use default pan; pf: filter mode)
-        bit  6 = c   (envelope carry)
-        bit  5 = b   (use envelope at all)
-        bits 4..0 = sustain/loop end (5-bit index 0..24)
+    Word layout (terranmon.txt:2049+ / 2114+):
+      LOOP    word: 0b 0000_0sss_ssXcb_eeeee  (X = 'p'/'m' for pan/pf, 0 for vol)
+      SUSTAIN word: 0b 0000_0sss_ss00b_eeeee
+        bits 12..8 = start index, bits 4..0 = end index
+        bit  7 = p (pan: use default pan) / m (pf: pitch=0/filter=1) / 0 (vol)
+        bit  6 = c (envelope carry — placed in the LOOP word)
+        bit  5 = b (enable that region)
     """
     if env_ptr + 82 > len(data):
-        return None, 0
+        return None, 0, 0
     flags = data[env_ptr]
     if not (flags & 0x01):
-        return None, 0          # envelope not enabled
+        return None, 0, 0       # envelope not enabled
 
     num_nodes  = max(1, min(data[env_ptr + 1], 25))
     it_lpb     = data[env_ptr + 2]
@@ -591,20 +594,6 @@ def _parse_it_envelope(data: bytes, env_ptr: int, kind: str,
     carry        = bool(flags & 0x08)
     is_filter    = bool(flags & 0x80) and kind in ('pitch', 'filter')
 
-    # Priority: sus loop > env loop (Taud carries one loop region).
-    if has_sus_loop:
-        use_lb, use_le = it_slb, it_sle
-        has_loop = True
-        is_sustain = True
-    elif has_env_loop:
-        use_lb, use_le = it_lpb, it_lpe
-        has_loop = True
-        is_sustain = False
-    else:
-        use_lb = use_le = -1
-        has_loop = False
-        is_sustain = False
-
     # Read IT nodes: (int8 value, uint16 tick_pos LE)
     nodes = []
     for n in range(num_nodes):
@@ -615,14 +604,13 @@ def _parse_it_envelope(data: bytes, env_ptr: int, kind: str,
         tick = struct.unpack_from('<H', data, nptr + 1)[0]
         nodes.append((val, tick))
     if not nodes:
-        return None, 0
+        return None, 0, 0
 
     def _to_taud_val(it_val: int) -> int:
         if kind == 'vol':
             return min(63, max(0, round(it_val * 63 / 64)))
         if kind == 'pan':
             return min(255, max(0, round((it_val + 32) * 255 / 64)))
-        # pitch / filter: -32..+32 → 0..255 (0x80 = unity)
         return min(255, max(0, round((it_val + 32) * 255 / 64)))
 
     pad_value = (63 if kind == 'vol' else 0x80)
@@ -639,27 +627,34 @@ def _parse_it_envelope(data: bytes, env_ptr: int, kind: str,
                 delta_sec    = max(0.0, (next_tick - tick) / ticks_per_sec)
                 mf_idx       = _nearest_minifloat(delta_sec)
             else:
-                mf_idx = 0          # last real node: hold
+                mf_idx = 0
         else:
-            # Pad: hold at last real node's value.
             taud_val = points[-1][0] if points else pad_value
             mf_idx   = 0
         points.append((taud_val, mf_idx))
 
-    # Build 16-bit sus word.
-    sus_word = 0x0020   # b = 1 (use envelope) — set whenever the envelope is enabled
+    # Build LOOP word (offsets 15/17/19) and SUSTAIN word (offsets 189/191/193).
+    # IT distinguishes envelope loop and sustain loop natively; map both
+    # directly. Bits: 5=b enable, 6=c carry, 7=p (pan default-pan flag) /
+    # m (pf filter mode); 12..8=start, 4..0=end. SUSTAIN word never carries
+    # c/p/m — those live in the LOOP word.
+    loop_word = 0
+    if has_env_loop and 0 <= it_lpb < 25 and 0 <= it_lpe < 25:
+        loop_word |= 0x0020                              # b: enable LOOP
+        loop_word |= (it_lpb & 0x1F) << 8
+        loop_word |= (it_lpe & 0x1F)
     if carry:
-        sus_word |= 0x0040
+        loop_word |= 0x0040                              # c carry — kept in LOOP word
     if is_filter:
-        sus_word |= 0x0080
-    if has_loop and 0 <= use_lb < 25 and 0 <= use_le < 25:
-        sus_word |= 0x4000                      # u
-        if is_sustain:
-            sus_word |= 0x2000                  # t
-        sus_word |= (use_lb & 0x1F) << 8        # sssss
-        sus_word |= (use_le & 0x1F)             # eeeee
+        loop_word |= 0x0080                              # m filter-mode (pf only)
 
-    return points, sus_word
+    sus_word = 0
+    if has_sus_loop and 0 <= it_slb < 25 and 0 <= it_sle < 25:
+        sus_word |= 0x0020                               # b: enable SUSTAIN
+        sus_word |= (it_slb & 0x1F) << 8
+        sus_word |= (it_sle & 0x1F)
+
+    return points, loop_word, sus_word
 
 
 # ── IT pattern parser ─────────────────────────────────────────────────────────
@@ -1008,12 +1003,21 @@ def resolve_it_recalls(patterns_rows: list, order_list: list,
 
 def split_patterns(patterns_rows: list):
     """
-    Returns (chunks, chunk_map).
+    Returns (chunks, chunk_map, chunk_lens).
       chunks: flat list of 64-row grids (list of 64 × 64-channel ITRow arrays)
       chunk_map: list per source pattern of [chunk_idx_0, chunk_idx_1, ...]
+      chunk_lens: list parallel to chunks giving the real row count of each
+                  chunk (64 for full chunks, < 64 for partial-tail chunks).
+                  The cue builder emits a Taud LEN ($02xx) instruction for
+                  any chunk whose length is < 64.
+
+    Patterns ≤ 64 rows produce one chunk of `rows` rows (LEN if rows < 64).
+    Patterns > 64 rows split into ⌊rows/64⌋ full 64-row chunks plus, if
+    `rows % 64 != 0`, a final chunk holding the remainder (which gets LEN).
     """
-    chunks   = []
-    chunk_map = []
+    chunks     = []
+    chunk_map  = []
+    chunk_lens = []
 
     for pi, (grid, rows) in enumerate(patterns_rows):
         if rows == 0:
@@ -1028,7 +1032,10 @@ def split_patterns(patterns_rows: list):
         for k in range(n_chunks):
             r0 = k * PATTERN_ROWS
             r1 = min(r0 + PATTERN_ROWS, rows)
-            # Build a 64-row grid for this chunk
+            chunk_len = r1 - r0
+            # Build a 64-row grid for this chunk (rows past chunk_len are
+            # silent padding; the engine will stop early via LEN when
+            # chunk_len < 64).
             chunk_grid = []
             for ch in range(64):
                 ch_rows = []
@@ -1041,52 +1048,30 @@ def split_patterns(patterns_rows: list):
                         ch_rows.append(ITRow())
                 chunk_grid.append(ch_rows)
 
-            # If this is not the last chunk, add a C $0000 on ch0 row (r1-r0-1)
-            # to immediately break to next order (skip padding rows).
-            # Only needed when the last real row of this chunk is < 63.
-            if k < n_chunks - 1:
-                last_real = r1 - r0 - 1
-                pad_row = chunk_grid[0][last_real]
-                if pad_row.effect == 0:
-                    pad_row.effect     = EFF_C
-                    pad_row.effect_arg = 0
-            elif rows < PATTERN_ROWS and n_chunks == 1:
-                # Single chunk, short pattern → break at last real row
-                last_real = rows - 1
-                if last_real < PATTERN_ROWS - 1:
-                    pad_row = chunk_grid[0][last_real]
-                    if pad_row.effect == 0:
-                        pad_row.effect     = EFF_C
-                        pad_row.effect_arg = 0
-
             idx = len(chunks)
             chunks.append(chunk_grid)
+            chunk_lens.append(chunk_len)
             pat_chunks.append(idx)
 
         chunk_map.append(pat_chunks)
-    return chunks, chunk_map
+    return chunks, chunk_map, chunk_lens
 
 
 def _remap_bc_effects(chunks: list, chunk_map: list,
                       order_list: list, it_ord_to_taud_cue: dict,
                       num_channels: int) -> None:
-    """Rewrite B/C effects using remapped order indices.
+    """Rewrite B (position-jump) effects using remapped order indices.
 
-    B effects in all chunks are rewritten to point to the first chunk
-    of the target IT order.  C effects in non-final chunks of a split
-    pattern get a co-row B to skip remaining chunks.
+    B effects are rewritten to point to the first chunk of the target IT
+    order. C effects (pattern break) need no special handling: each
+    Taud cue carries its own LEN instruction, so a non-final chunk of a
+    split source pattern simply terminates after its real row count
+    when LEN < 64 — but full 64-row non-final chunks rely on the C
+    being emitted by the engine when the source pattern's row pointer
+    naturally hits a chunk boundary. Since splits at exact multiples of
+    64 have no LEN gap, no C-skip injection is required.
     """
-    # For each chunk, record which (it_pat, chunk_k, n_chunks) it came from.
-    # We build this from chunk_map.
-    chunk_info = {}   # chunk_idx → (it_pat_idx, k, n_chunks)
-    for pi, pat_chunks in enumerate(chunk_map):
-        n = len(pat_chunks)
-        for k, ci in enumerate(pat_chunks):
-            chunk_info[ci] = (pi, k, n)
-
     for ci, chunk_grid in enumerate(chunks):
-        pi, k, n = chunk_info.get(ci, (0, 0, 1))
-
         for ch in range(num_channels):
             if ch >= len(chunk_grid): continue
             for row in chunk_grid[ch]:
@@ -1094,25 +1079,6 @@ def _remap_bc_effects(chunks: list, chunk_map: list,
                     it_tgt = row.effect_arg
                     taud_cue = it_ord_to_taud_cue.get(it_tgt, it_tgt)
                     row.effect_arg = taud_cue & 0xFF
-                elif row.effect == EFF_C and k < n - 1:
-                    # C in non-final chunk: need B to skip remaining chunks
-                    # Find the cue index immediately after all chunks of this pat
-                    # (the cue right after the last chunk of pi in the order list)
-                    # We store the B in aux_effect; the Taud builder handles it.
-                    skip_cue = _find_post_pat_cue(pi, order_list, chunk_map,
-                                                  it_ord_to_taud_cue)
-                    if skip_cue is not None:
-                        row.aux_effect = (EFF_B, skip_cue & 0xFF)
-
-def _find_post_pat_cue(pi: int, order_list: list, chunk_map: list,
-                       it_ord_to_taud_cue: dict):
-    """Return the Taud cue index that follows ALL chunks of pattern pi in the order list."""
-    for taud_cue, it_ord in it_ord_to_taud_cue.items():
-        # Find first Taud cue after the last chunk of pi
-        pass
-    # Simpler: walk the Taud cue list (we'll compute it in assemble_taud)
-    # Return None for now — assemble_taud will do a second pass.
-    return None
 
 
 # ── Sample / instrument bin (same as s3m2taud) ────────────────────────────────
@@ -1165,7 +1131,8 @@ def build_sample_inst_bin_it(samples_or_proxy: list,
             s.sus_end  = min(s.sus_end,  n)
         pos += n
 
-    # 192-byte instrument layout (terranmon.txt:1997-2070).
+    # 256-byte instrument layout (terranmon.txt:2001+).
+    INST_STRIDE = 256
     USE_ENV_BIT = 0x0020   # b — set whenever the engine should evaluate the envelope
 
     def _write_env(buf: bytearray, base: int, env_pts):
@@ -1216,7 +1183,7 @@ def build_sample_inst_bin_it(samples_or_proxy: list,
             loop_mode = 0   # no loop
         flags_byte = (loop_mode & 0x3) | sustain_bit
 
-        base = taud_idx * 192
+        base = taud_idx * INST_STRIDE
         struct.pack_into('<I', inst_bin, base + 0,  ptr)
         struct.pack_into('<H', inst_bin, base + 4,  s_len)
         struct.pack_into('<H', inst_bin, base + 6,  c2spd)
@@ -1226,12 +1193,19 @@ def build_sample_inst_bin_it(samples_or_proxy: list,
         inst_bin[base + 14] = flags_byte
 
         idata = (instr_data_by_slot or {}).get(taud_idx) or {}
-        vol_env = idata.get('vol_env')
-        pan_env = idata.get('pan_env')
-        pf_env  = idata.get('pf_env')
-        vol_sus = idata.get('vol_sus', USE_ENV_BIT)
-        pan_sus = idata.get('pan_sus', 0)
-        pf_sus  = idata.get('pf_sus',  0)
+        vol_env       = idata.get('vol_env')
+        pan_env       = idata.get('pan_env')
+        pf_env        = idata.get('pf_env')
+        # LOOP words live at offsets 15/17/19. SUSTAIN words at 189/191/193.
+        # When the source has neither loop nor sustain on the volume envelope
+        # the engine still needs the b flag so the single-point unit envelope
+        # is evaluated — synthesise USE_ENV_BIT into the LOOP word as a fallback.
+        vol_env_loop  = idata.get('vol_env_loop', USE_ENV_BIT)
+        vol_env_sus   = idata.get('vol_env_sus',  0)
+        pan_env_loop  = idata.get('pan_env_loop', 0)
+        pan_env_sus   = idata.get('pan_env_sus',  0)
+        pf_env_loop   = idata.get('pf_env_loop',  0)
+        pf_env_sus    = idata.get('pf_env_sus',   0)
         # Sample-mode default IGV: fold sample default vol (Sv) and sample GV
         # into Taud's IGV. Instrument-mode supplies inst_gv pre-folded.
         if 'inst_gv' in idata:
@@ -1247,9 +1221,10 @@ def build_sample_inst_bin_it(samples_or_proxy: list,
         # effects.c:1261). Clamp defensively to 4095.
         fadeout = min(0xFFF, idata.get('fadeout', 0) & 0xFFFF)
 
-        struct.pack_into('<H', inst_bin, base + 15, vol_sus & 0xFFFF)
-        struct.pack_into('<H', inst_bin, base + 17, pan_sus & 0xFFFF)
-        struct.pack_into('<H', inst_bin, base + 19, pf_sus  & 0xFFFF)
+        # LOOP words at offsets 15/17/19.
+        struct.pack_into('<H', inst_bin, base + 15, vol_env_loop & 0xFFFF)
+        struct.pack_into('<H', inst_bin, base + 17, pan_env_loop & 0xFFFF)
+        struct.pack_into('<H', inst_bin, base + 19, pf_env_loop  & 0xFFFF)
 
         if vol_env:
             _write_env(inst_bin, base + 21,  vol_env)
@@ -1258,8 +1233,10 @@ def build_sample_inst_bin_it(samples_or_proxy: list,
             # carried by IGV (byte 171), so the envelope must be a unit multiplier.
             inst_bin[base + 21] = 63
             inst_bin[base + 22] = 0
-            # Force engine to use this single point.
-            struct.pack_into('<H', inst_bin, base + 15, USE_ENV_BIT)
+            # Force engine to use this single point — set the b bit on the LOOP
+            # word so the envelope is evaluated even though no wrap region exists.
+            cur_loop = struct.unpack_from('<H', inst_bin, base + 15)[0]
+            struct.pack_into('<H', inst_bin, base + 15, cur_loop | USE_ENV_BIT)
 
         if pan_env:
             _write_env(inst_bin, base + 71, pan_env)
@@ -1304,13 +1281,16 @@ def build_sample_inst_bin_it(samples_or_proxy: list,
         inst_bin[base + 187] = idata.get('vib_depth', 0) & 0xFF
         # Byte 188: vibrato rate (0..255 full range, IT samplewise Vir).
         inst_bin[base + 188] = idata.get('vib_rate', 0) & 0xFF
-        # Byte 189: duplicate-check / action (IT-only — bits 0-1 = DCT, bits 2-3 = DCA).
-        #   DCT: 0=off, 1=note, 2=sample, 3=instrument.
-        #   DCA: 0=note cut, 1=note off, 2=note fade.
+        # SUSTAIN words at offsets 189/191/193.
+        struct.pack_into('<H', inst_bin, base + 189, vol_env_sus & 0xFFFF)
+        struct.pack_into('<H', inst_bin, base + 191, pan_env_sus & 0xFFFF)
+        struct.pack_into('<H', inst_bin, base + 193, pf_env_sus  & 0xFFFF)
+        # Byte 195: duplicate-check / action (IT-only — bits 0-1 = DCT, bits 2-3 = DCA).
+        # Relocated 2026-05-06 from old offset 189 (now part of the vol sustain word).
         dct = idata.get('dct', 0) & 0x03
         dca = idata.get('dca', 0) & 0x03
-        inst_bin[base + 189] = (dca << 2) | dct
-        # Bytes 190-191: reserved (already zeroed).
+        inst_bin[base + 195] = (dca << 2) | dct
+        # Bytes 196..255: reserved (already zeroed).
 
         vprint(f"  instrument[{taud_idx}] '{s.name}' ptr:{ptr} c5spd:{s.c5_speed}")
 
@@ -1331,7 +1311,6 @@ def build_pattern_it(chunk_grid: list, ch_idx: int, default_pan: int,
     """Build a 512-byte Taud pattern for one IT channel from a 64-row chunk grid."""
     out = bytearray(PATTERN_BYTES)
     rows = chunk_grid[ch_idx] if ch_idx < len(chunk_grid) else [ITRow()] * PATTERN_ROWS
-    last_inst = 0
     last_note_it = -1
 
     for r, cell in enumerate(rows[:PATTERN_ROWS]):
@@ -1360,9 +1339,6 @@ def build_pattern_it(chunk_grid: list, ch_idx: int, default_pan: int,
         if cell.note >= 0:
             note_taud = encode_note_it(cell.note)
 
-        if cell.inst > 0:
-            last_inst = cell.inst
-
         note_triggers = (0 <= (cell.note if cell.note >= 0 else -1) <= 119)
 
         # ── Volume column ────────────────────────────────────────────────────
@@ -1392,30 +1368,16 @@ def build_pattern_it(chunk_grid: list, ch_idx: int, default_pan: int,
         else:
             pan_sel, pan_value = SEL_FINE, 0
 
-        # Handle aux_effect (B) stored on a C cell for chunk-skip
-        if cell.aux_effect is not None:
-            aux_cmd, aux_arg = cell.aux_effect
-            if aux_cmd == EFF_B and op == TOP_C:
-                # Encode as B effect; C row break handled by engine's simultaneous B+C
-                op   = TOP_C
-                # We need to emit both; store the B's target in arg16 high byte
-                # Taud simultaneous B+C: B sets order, C sets row. Engine handles.
-                # Encoding: keep op=TOP_C (pattern break), store B target in
-                # a separate "B command on another channel". We can't encode two
-                # effects in one cell, so instead just emit the B effect here
-                # and let the order index point past the remaining chunks.
-                # This is a best-effort; the engine should honour the lowest-channel B.
-                op    = TOP_B
-                arg16 = aux_arg & 0xFF
-
         vol_byte = (vol_value & 0x3F) | ((vol_sel & 0x3) << 6)
         pan_byte = (pan_value & 0x3F) | ((pan_sel & 0x3) << 6)
 
-        taud_inst = last_inst & 0xFF if (note_triggers or cell.inst > 0) else 0
-
+        # Preserve cell.inst==0 verbatim — IT semantics: a note row with no
+        # explicit instrument byte retriggers the channel's currently-loaded
+        # instrument. Filling in last_inst converts that into an explicit
+        # instrument-change, which can break NNA / envelope-reset behaviour.
         base = r * 8
         struct.pack_into('<H', out, base + 0, note_taud)
-        out[base + 2] = taud_inst
+        out[base + 2] = cell.inst & 0xFF
         out[base + 3] = vol_byte
         out[base + 4] = pan_byte
         out[base + 5] = op & 0xFF
@@ -1563,7 +1525,7 @@ def assemble_taud(h: ITHeader, samples: list, instruments: list,
 
     # ── Split patterns into 64-row chunks ────────────────────────────────────
     vprint("  splitting patterns…")
-    chunks, chunk_map = split_patterns(patterns_rows)
+    chunks, chunk_map, chunk_lens = split_patterns(patterns_rows)
 
     # ── Choose active channels ───────────────────────────────────────────────
     active_channels = _active_channels(h, patterns_rows)
@@ -1657,12 +1619,15 @@ def assemble_taud(h: ITHeader, samples: list, instruments: list,
             nna_taud = it_to_taud_nna[inst.nna & 0x03]
 
             instr_data_by_slot[taud_slot] = {
-                'vol_env': inst.vol_envelope,
-                'vol_sus': inst.vol_env_sustain,
-                'pan_env': inst.pan_envelope,
-                'pan_sus': inst.pan_env_sustain,
-                'pf_env':  inst.pf_envelope,
-                'pf_sus':  inst.pf_env_sustain,
+                'vol_env':       inst.vol_envelope,
+                'vol_env_loop':  inst.vol_env_loop,
+                'vol_env_sus':   inst.vol_env_sus,
+                'pan_env':       inst.pan_envelope,
+                'pan_env_loop':  inst.pan_env_loop,
+                'pan_env_sus':   inst.pan_env_sus,
+                'pf_env':        inst.pf_envelope,
+                'pf_env_loop':   inst.pf_env_loop,
+                'pf_env_sus':    inst.pf_env_sus,
                 'inst_gv': inst_gv_255,
                 'fadeout': inst.fadeout,
                 'vib_speed':  vib_speed_taud,
@@ -1739,17 +1704,35 @@ def assemble_taud(h: ITHeader, samples: list, instruments: list,
         sheet[c*CUE_SIZE:c*CUE_SIZE+CUE_SIZE] = encode_cue([], 0)
 
     last_active = -1
+    len_cue_count = 0
     for cue_idx, ci in enumerate(taud_cue_list):
         if cue_idx >= NUM_CUES: break
         base_pat = cue_idx * C
         pats = [pat_remap[base_pat + vi] for vi in range(C)]
-        sheet[cue_idx*CUE_SIZE:(cue_idx+1)*CUE_SIZE] = encode_cue(pats, 0)
+        clen = chunk_lens[ci] if ci < len(chunk_lens) else PATTERN_ROWS
+        if clen < PATTERN_ROWS:
+            instr = cue_instruction_len(clen)
+            len_cue_count += 1
+        else:
+            instr = CUE_INST_NOP
+        sheet[cue_idx*CUE_SIZE:(cue_idx+1)*CUE_SIZE] = encode_cue(pats, instr)
         last_active = cue_idx
 
     if last_active >= 0:
-        sheet[last_active * CUE_SIZE + 30] = 0x01
+        # Halt overlays whatever LEN was on this cue. If both apply
+        # (the song terminates on a partial-tail chunk), the LEN is
+        # mooted by halt — warn so the user is aware.
+        b30_existing = sheet[last_active * CUE_SIZE + 30]
+        if b30_existing == CUE_INST_LEN:
+            vprint(f"  warning: last active cue {last_active} had LEN; "
+                   f"replaced with HALT (partial tail at song terminus)")
+        sheet[last_active * CUE_SIZE + 30] = CUE_INST_HALT
+        sheet[last_active * CUE_SIZE + 31] = 0x00
     else:
-        sheet[30] = 0x01
+        sheet[30] = CUE_INST_HALT
+    if len_cue_count:
+        vprint(f"  emitted {len_cue_count} LEN cue instruction(s) "
+               f"for partial-length patterns")
 
     # ── Header ───────────────────────────────────────────────────────────────
     sig    = (SIGNATURE + b' ' * 14)[:14]
diff --git a/mod2taud.py b/mod2taud.py
index 621da27..48978e8 100644
--- a/mod2taud.py
+++ b/mod2taud.py
@@ -500,7 +500,8 @@ def build_sample_inst_bin(samples: list) -> tuple:
             s.loop_end = min(s.loop_end, n)
         pos += n
 
-    # New 192-byte instrument layout (terranmon.txt:1997-2070).
+    # New 256-byte instrument layout (terranmon.txt:2001+).
+    INST_STRIDE = 256
     inst_bin = bytearray(INSTBIN_SIZE)
     for i, s in enumerate(samples):
         taud_idx = i + 1     # 1-based instrument number
@@ -519,9 +520,11 @@ def build_sample_inst_bin(samples: list) -> tuple:
         # Envelope first point is full-scale; per-sample level is carried by
         # IGV (byte 171) so the envelope must contribute a unit multiplier.
         env_vol   = 63
-        vol_env_flags = 0x0020   # use-envelope bit
+        # MOD has no envelopes; vol LOOP word b=1 just so the engine evaluates
+        # the unit envelope. Pan/PF stay disabled.
+        vol_env_loop = 0x0020   # b enable
 
-        base = taud_idx * 192
+        base = taud_idx * INST_STRIDE
         struct.pack_into('<I', inst_bin, base + 0,  ptr)
         struct.pack_into('<H', inst_bin, base + 4,  s_len)
         struct.pack_into('<H', inst_bin, base + 6,  c2spd)
@@ -529,7 +532,8 @@ def build_sample_inst_bin(samples: list) -> tuple:
         struct.pack_into('<H', inst_bin, base + 10, ls)
         struct.pack_into('<H', inst_bin, base + 12, le)
         inst_bin[base + 14] = flags_byte
-        struct.pack_into('<H', inst_bin, base + 15, vol_env_flags)
+        # LOOP words at 15/17/19; SUSTAIN words at 189/191/193 (left zero).
+        struct.pack_into('<H', inst_bin, base + 15, vol_env_loop)
         struct.pack_into('<H', inst_bin, base + 17, 0)
         struct.pack_into('<H', inst_bin, base + 19, 0)
         inst_bin[base + 21] = env_vol
diff --git a/s3m2taud.py b/s3m2taud.py
index 67cf3b2..aa787d9 100644
--- a/s3m2taud.py
+++ b/s3m2taud.py
@@ -476,9 +476,10 @@ def build_sample_inst_bin(instruments: list) -> tuple:
             inst.loop_end = min(inst.loop_end, n)
         pos += n
 
-    # Build instrument bin (256 × 192 bytes)
-    # New layout (terranmon.txt:1997-2070): u32 sample ptr, ..., 25-point envelopes,
-    # plus a host of optional fields. S3M doesn't supply most of those — they default to 0.
+    # Build instrument bin (256 × 256 bytes)
+    # New layout (terranmon.txt:2001+): LOOP words at 15/17/19, SUSTAIN words at 189/191/193.
+    # S3M has no envelope sustain or loop, so SUSTAIN words stay zero.
+    INST_STRIDE = 256
     inst_bin = bytearray(INSTBIN_SIZE)
     for i, inst in enumerate(instruments):
         taud_idx = i + 1
@@ -498,10 +499,10 @@ def build_sample_inst_bin(instruments: list) -> tuple:
         # Volume envelope first point is full-scale; per-sample level is carried
         # by IGV (byte 171) so the envelope contributes a unit multiplier.
         env_vol = 63
-        # Vol env-flags: enable use-envelope bit (b=1) so engine reads the single point.
-        vol_env_flags = 0x0020   # b=bit 5
+        # Vol LOOP word: only b=1 (use envelope) — no actual loop / sustain.
+        vol_env_loop = 0x0020
 
-        base = taud_idx * 192
+        base = taud_idx * INST_STRIDE
         struct.pack_into('<I', inst_bin, base + 0,  ptr)        # u32 sample pointer
         struct.pack_into('<H', inst_bin, base + 4,  s_len)
         struct.pack_into('<H', inst_bin, base + 6,  c2spd)      # rate at TAUD_C4
@@ -509,9 +510,10 @@ def build_sample_inst_bin(instruments: list) -> tuple:
         struct.pack_into('<H', inst_bin, base + 10, ls)
         struct.pack_into('<H', inst_bin, base + 12, le)
         inst_bin[base + 14] = flags_byte
-        struct.pack_into('<H', inst_bin, base + 15, vol_env_flags)
-        struct.pack_into('<H', inst_bin, base + 17, 0)          # pan env-flags
-        struct.pack_into('<H', inst_bin, base + 19, 0)          # pitch/filter env-flags
+        # LOOP words at 15/17/19; SUSTAIN words at 189/191/193 (left zero).
+        struct.pack_into('<H', inst_bin, base + 15, vol_env_loop)
+        struct.pack_into('<H', inst_bin, base + 17, 0)
+        struct.pack_into('<H', inst_bin, base + 19, 0)
         # Volume env point 0: hold at env_vol indefinitely (offset minifloat = 0 → hold).
         inst_bin[base + 21] = env_vol
         inst_bin[base + 22] = 0
@@ -524,7 +526,7 @@ def build_sample_inst_bin(instruments: list) -> tuple:
         inst_bin[base + 183] = 0xFF # filter resonance = off
         inst_bin[base + 186] = 1 # NNA: note cut
 
-        vprint(f"  instrument[{base // 192}] '{inst.name}' ptr: '{ptr}', sampling rate: '{inst.c2spd}'")
+        vprint(f"  instrument[{base // INST_STRIDE}] '{inst.name}' ptr: '{ptr}', sampling rate: '{inst.c2spd}'")
         if inst.c2spd > 65535:
             vprint(f"  warning: sampling rate of '{inst.name}' exceeds 65535 (got '{inst.c2spd}')")
 
diff --git a/taud_common.py b/taud_common.py
index f7ae0db..a87884c 100644
--- a/taud_common.py
+++ b/taud_common.py
@@ -30,8 +30,9 @@ TAUD_MAGIC       = bytes([0x1F,0x54,0x53,0x56,0x4D,0x61,0x75,0x64])
 TAUD_VERSION     = 1
 TAUD_HEADER_SIZE = 32       # magic(8)+ver(1)+numSongs(1)+compSize(4)+projOff(4)+sig(14)
 TAUD_SONG_ENTRY  = 32       # full spec entry (see encode_song_entry)
-SAMPLEBIN_SIZE   = 737280
-INSTBIN_SIZE     = 49152    # 256 instruments × 192 bytes
+INST_RECORD_SIZE = 256      # widened 2026-05-06 (was 192). 256 inst × 256 = 64K.
+SAMPLEBIN_SIZE   = 720896   # was 737280; 16K reallocated to inst bin (terranmon.txt:1985-1997)
+INSTBIN_SIZE     = INST_RECORD_SIZE * 256   # 65536 = 64K
 SAMPLEINST_SIZE  = SAMPLEBIN_SIZE + INSTBIN_SIZE
 PATTERN_ROWS     = 64
 PATTERN_BYTES    = PATTERN_ROWS * 8     # 512
@@ -46,6 +47,18 @@ NOTE_KEYOFF = 0x0000
 NOTE_CUT    = 0xFFFE
 TAUD_C4     = 0x5000   # The audio engine's Middle C
 
+# Cue sheet instruction byte (cue offset 30; offset 31 = arg byte for 2-byte forms).
+# Per terranmon.txt §"Cue Sheet":
+#   00000010 00xxxxxx (LEN)  pattern length: rows = (xxxxxx) + 1, range 1..64
+#   00000001          (HALT) end of song
+#   00000000          (NOP)  default 64-row cue
+#   1000xxxx yyyyyyyy (BAK)  go back 12-bit arg
+#   1001xxxx yyyyyyyy (FWD)  skip forward 12-bit arg
+#   1111xxxx yyyyyyyy (JMP)  go to absolute pattern
+CUE_INST_NOP  = 0x00
+CUE_INST_HALT = 0x01
+CUE_INST_LEN  = 0x02
+
 # Taud effect opcodes (base-36: 0..9 → 0x00..0x09, A..Z → 0x0A..0x23)
 TOP_NONE = 0x00
 TOP_A    = 0x0A
@@ -152,8 +165,13 @@ def rescale_offset_effects(pat_bin: bytes, ratio: float) -> bytes:
     return bytes(out)
 
 
-def encode_cue(patterns12: list, instruction: int) -> bytearray:
-    """Encode a 32-byte cue entry for up to 20 voices with 12-bit pattern numbers."""
+def encode_cue(patterns12: list, instruction) -> bytearray:
+    """Encode a 32-byte cue entry for up to 20 voices with 12-bit pattern numbers.
+
+    `instruction` is either an int (legacy single-byte value placed at byte 30,
+    byte 31 = 0) or a 2-tuple `(byte30, byte31)` for two-byte forms such as
+    LEN (CUE_INST_LEN with row count - 1).
+    """
     pats = list(patterns12) + [0xFFF] * NUM_VOICES
     pats = pats[:NUM_VOICES]
     entry = bytearray(CUE_SIZE)
@@ -162,10 +180,25 @@ def encode_cue(patterns12: list, instruction: int) -> bytearray:
         entry[i]      = ((v0 & 0xF) << 4) | (v1 & 0xF)               # low nybbles
         entry[10 + i] = (((v0 >> 4) & 0xF) << 4) | ((v1 >> 4) & 0xF) # mid nybbles
         entry[20 + i] = (((v0 >> 8) & 0xF) << 4) | ((v1 >> 8) & 0xF) # high nybbles
-    entry[30] = instruction & 0xFF
+    if isinstance(instruction, tuple):
+        b30, b31 = instruction
+        entry[30] = b30 & 0xFF
+        entry[31] = b31 & 0xFF
+    else:
+        entry[30] = instruction & 0xFF
     return entry
 
 
+def cue_instruction_len(rows: int) -> tuple:
+    """Build the 2-byte LEN cue instruction for `rows` (1..64).
+
+    Returns (byte30, byte31) where byte30 = 0x02 and byte31 = (rows - 1) & 0x3F.
+    """
+    if not 1 <= rows <= 64:
+        raise ValueError(f"LEN row count must be 1..64, got {rows}")
+    return (CUE_INST_LEN, (rows - 1) & 0x3F)
+
+
 def deduplicate_patterns(pat_bin: bytes, num_pats: int) -> tuple:
     """Consolidate identical 512-byte Taud patterns into a single copy.
 
diff --git a/terranmon.txt b/terranmon.txt
index 1c9cd52..86d1602 100644
--- a/terranmon.txt
+++ b/terranmon.txt
@@ -1985,16 +1985,57 @@ Synchronisation between playheads are not guaranteed. Do not play music in multi
 
 Memory Space
 
-0..737279 RW: Sample bin (720k)
-737280..786431 RW: Instrument bin (256 instruments, 192 bytes each; instrument 0 does nothing; 48k)
+0..720895 RW: Sample bin (704k)
+720896..786431 RW: Instrument bin (256 instruments, 256 bytes each; instrument 0 does nothing; 64k)
 786432..851967 RW: Play data 1 (currently exposed bank; 64k)
 851968..917503 RW: Play data 2 (currently exposed bank; 64k)
 917504..983039 RW: TAD Input Buffer (64k)
 983040..1048575 RW: TAD Decode Output (64k)
 
+(Layout note 2026-05-06: sample bin shrunk by 16k and instrument bin widened
+ by the same amount so all downstream dispatch ranges keep their existing
+ anchors at 786432. Total memory space stays at exactly 1 MiB.)
+
 Sample bin: just raw sample data thrown in there. You need to keep track of starting point for each sample
 
 Instrument bin: Registry for 256 instruments, formatted as:
+
+The instrument record is 256 bytes wide. Envelopes are described by FOUR
+independent regions per envelope (vol / pan / pitch-filter):
+    1. The 25 envelope nodes (offsets 21 / 71 / 121).
+    2. The LOOP word (offsets 15 / 17 / 19) — defines an always-active
+       wrap region. When enabled (b=1) and the envelope position reaches
+       loop_end, it wraps back to loop_start. Active regardless of key
+       state. This is the IT/FT2 envelope loop.
+    3. The SUSTAIN word (offsets 189 / 191 / 193) — defines a wrap
+       region that is ONLY active while the key is on. When the key
+       goes off the sustain "releases" and the envelope position is
+       free to walk past sus_end. Concretely:
+         - FT2-style "sustain point": store sus_start == sus_end (single
+           index). Engine wraps that index → itself, so the envelope
+           holds at the point until key-off.
+         - IT-style "sustain loop":  store sus_start <= sus_end. Engine
+           wraps sus_end → sus_start while key is on, so the envelope
+           loops within the sustain range until key-off.
+    4. (none — there is no separate "release loop"; once sustain releases
+       the envelope walks forward and is captured by the LOOP region if
+       the LOOP region exists and the position enters it.)
+
+Priority during playback follows schismtracker player/sndmix.c:480-499:
+    if SUSTAIN.b == 1 and !key_off : wrap (sus_start, sus_end)
+    elif LOOP.b == 1               : wrap (loop_start, loop_end)
+    else                           : hold at last node
+
+This means SUSTAIN takes precedence over LOOP while the key is on; once
+the key is released, LOOP becomes the active wrap region. Setting both
+to b=0 disables envelope wrapping entirely (envelope plays once and holds
+at its last node).
+
+The b flag is the SOLE enable bit for each region; the historical 't'
+(sustain breaks on key-off) and 'u' (sustain/loop enable) flags are NOT
+present in this encoding — sustain vs loop is now a structural
+distinction (different word at a different offset), not a flag bit.
+
 0   Uint32 Sample Pointer
 4   Uint16 Sample length
 6   Uint16 Sampling rate at C4 (note number 0x5000)
@@ -2006,41 +2047,35 @@ Instrument bin: Registry for 256 instruments, formatted as:
            pp: loop mode. 0-no loop, 1-loop, 2-backandforth, 3-oneshot (ignores note length unless overridden by other notes)
            s: loop is sustain (key-off escapes the loop)
             - IT: look for sample's SusLoop flag
-15  Bit16  Volume envelope sustain/loops and flags
-        * Sustain is implemented by enabling 't' flag. FastTracker has no 'Sus Loop' but only 'Sus Point'; use same value for start and end index
-        0b 0ut sssss 0cb eeeee
-            s: sustain/loop start index
-            e: sustain/loop end index
-
-            b: use envelope
-            c: envelope carry
-
-            t: the loop must sustain (key-off escapes the loop)
-            u: set to enable the sustain/loop
-17  Bit16   Panning envelope sustain/loops and flags
-        * Sustain is implemented by enabling 't' flag
-        0b 0ut sssss pcb eeeee
-            s: sustain/loop start index
-            e: sustain/loop end index
-
-            b: use envelope
-            c: envelope carry
-            p: use default pan (see offset 177 "Default pan value" below)
-
-            t: the loop must sustain (key-off escapes the loop)
-            u: set to enable the sustain/loop
-19  Bit16   Pitch/Filter envelope sustain/loops and flags
-        * Sustain is implemented by enabling 't' flag
-        0b 0ut sssss mcb eeeee
-            s: sustain/loop start index
-            e: sustain/loop end index
-
-            b: use envelope
-            c: envelope carry
-            m: mode (0: on pitch, 1: on filter)
-
-            t: the loop must sustain (key-off escapes the loop)
-            u: set to enable the sustain/loop
+15  Bit16  Volume envelope LOOP word
+        * Always-active wrap region for the volume envelope. See SUSTAIN word at offset 189 for the key-on-only wrap.
+        0b 000_sssss_0cb_eeeee
+            s (bits 12..8) : loop start index (0..24)
+            e (bits 4..0)  : loop end   index (0..24)
+            b (bit 5)      : enable the LOOP (0 = no envelope loop)
+            c (bit 6)      : envelope carry (cross-trigger envelope position carry)
+            (bits 7, 13..15 reserved — set to 0)
+17  Bit16  Panning envelope LOOP word
+        * Always-active wrap region for the pan envelope.
+        0b 000_sssss_pcb_eeeee
+            s (bits 12..8) : loop start index
+            e (bits 4..0)  : loop end   index
+            b (bit 5)      : enable the LOOP
+            c (bit 6)      : envelope carry
+            p (bit 7)      : use default pan (see offset 177 "Default pan value" below).
+                              Independent of LOOP enable; the engine reads this bit
+                              from the LOOP word as the canonical home for envelope-
+                              level meta flags.
+            (bits 13..15 reserved)
+19  Bit16  Pitch/Filter envelope LOOP word
+        * Always-active wrap region for the pitch/filter envelope.
+        0b 000_sssss_mcb_eeeee
+            s (bits 12..8) : loop start index
+            e (bits 4..0)  : loop end   index
+            b (bit 5)      : enable the LOOP
+            c (bit 6)      : envelope carry
+            m (bit 7)      : mode — 0 = pitch envelope, 1 = filter envelope
+            (bits 13..15 reserved)
 21  Bit16x25 Volume envelopes
        Byte 1: Volume (00..3F)
        Byte 2: Time until the next point, in seconds (3.5 Unsigned Minifloat). 0 = hold at this point indefinitely.
@@ -2090,7 +2125,29 @@ Instrument bin: Registry for 256 instruments, formatted as:
         * FastTracker2 has range of 0..16; multiply by (255/16) then round to int
 188 Uint8  Vibrato Rate (0..255 full range)
         * ImpulseTracker sample config. The spec follows ImpulseTracker precisely
-189 Byte[3] Reserved
+189 Bit16  Volume envelope SUSTAIN word
+        * Wrap region active ONLY while key is on. Released on key-off.
+        * FT2 single-point sustain: store sus_start == sus_end (the engine
+          wraps that index → itself, so the envelope holds there).
+        * IT sustain loop: store sus_start <= sus_end (engine wraps the range
+          while key is on; same shape as the LOOP word).
+        0b 000_sssss_00b_eeeee
+            s (bits 12..8) : sustain start index (0..24)
+            e (bits 4..0)  : sustain end   index (0..24)
+            b (bit 5)      : enable the SUSTAIN (0 = no sustain wrap)
+            (bits 6..7, 13..15 reserved — the 'c' carry bit lives in the LOOP word)
+191 Bit16  Panning envelope SUSTAIN word
+        * Same encoding as offset 189, applied to the pan envelope.
+        0b 000_sssss_00b_eeeee
+193 Bit16  Pitch/Filter envelope SUSTAIN word
+        * Same encoding as offset 189, applied to the pitch/filter envelope.
+        0b 000_sssss_00b_eeeee
+195 Bit8   Duplicate Check / Action (IT-only; FT2 leaves this 0)
+        0b 0000 dcdt
+            dt (bits 0..1) : Duplicate Check Type. 0=off, 1=note, 2=sample, 3=instrument.
+            dc (bits 2..3) : Duplicate Check Action. 0=note cut, 1=note off, 2=note fade.
+        * Relocated from offset 189 (which is now the volume sustain word) on 2026-05-06.
+196..255 Reserved (60 bytes free for future per-instrument fields)
 
 
 
@@ -2115,6 +2172,10 @@ TODO:
     [ ] low-number voleffs are too quiet (needs elaboration and test cases)
     [x] scale Oxxxx when samples get resampled
     [x] implement bitcrusher and overdrive  (eff sym '8' and '9')
+    [x] note trigger with inst and note fx set (e.g. portamento) but no volume set is not getting their default volume but getting what was before instead (SATELL.taud ptn 23) -- and simulateRowState() of taut.js always shows old volume instead of default volume, regardless of note fx's existence
+    [ ] implement extended tone mode (MONOTONE compat)
+    [ ] pattern loops stops working after processed once (test with slumberjack.xm)
+    [ ] how does fadeout=0 work on IT? On XM, the note don't decay at all (that's why there's separate CUT value). Also see what Global Behaviour 'm' flag actually do on Taud (or, which slop AI had fed me *sigh*)
 
 
 Play Data: play data are series of tracker-like instructions, visualised as:
@@ -2239,10 +2300,12 @@ Play Head Flags
         Byte 11..20: 0b miV1 miV2, 0b miV3 miV4, 0b miV5 miV6, ... 0b miV19 miV20
         Byte 21..30: 0b hiV1 hiV2, 0b hiV3 hiV4, 0b hiV5 hiV6, ... 0b hiV19 hiV20
     Byte 31..32: instruction
-        1000xxxx yyyyyyyy - Go back 0bxxxxyyyyyyyy patterns
-        1001xxxx yyyyyyyy - Skip forward 0bxxxxyyyyyyyy patterns
-        1111xxxx yyyyyyyy - Go to absolute pattern number 0bxxxxyyyyyyyy
-        00000001 - Halt
+        1000xxxx yyyyyyyy (BAK000) - Go back 0bxxxxyyyyyyyy patterns
+        1001xxxx yyyyyyyy (FWD000) - Skip forward 0bxxxxyyyyyyyy patterns
+        1111xxxx yyyyyyyy (JMP000) - Go to absolute pattern number 0bxxxxyyyyyyyy
+        00000010 00xxxxxx (LEN 00) - Pattern length for this cue (0..63), where 0: 1 row, 63: 64 rows (decoded by AudioAdapter as of 2026-05-05; emitted by xm2taud / it2taud for non-multiple-of-64 source patterns)
+        00000001 00000000 - Halt (HALT  )
+        00000001 00111111 - Fadeout (FADOUT) - Gradually decrease global volume such that at row 63 it reaches zero
         00000000 - No operation
 
 65536..131071 RW: PCM Sample buffer
@@ -2329,9 +2392,9 @@ Endianness: Little
     Uint16  Current Tuning base note (1..65533). A4 (western default) is 0x5C00. C9 (tracker default) is 0xA000. If zero, assume the tracker default value
     Float32 Frequency at the base note. Tracker default is 8363.0. If zero, assume the tracker default
     Uint8   Flags for Global Behaviour (effect symbol '1')
-        0b 0000 0mfp
+        0b 0000 Fmfp
             p: panning law         (0=linear, 1=equal-power)
-            f: tone mode           (0=linear pitch slides, 1=Amiga period slides)
+            Ff: tone mode          (0=linear pitch slides, 1=Amiga period slides, 2=linear-frequency slides, 3=reserved)
             m: fadeout-zero policy (0=IT — stored fadeout 0 means no fadeout;
                                     1=FT2 — stored fadeout 0 means cut on key-off)
     Uint8   Song global volume
diff --git a/tsvm_core/src/net/torvald/tsvm/peripheral/AudioAdapter.kt b/tsvm_core/src/net/torvald/tsvm/peripheral/AudioAdapter.kt
index e5e99b6..a0491de 100644
--- a/tsvm_core/src/net/torvald/tsvm/peripheral/AudioAdapter.kt
+++ b/tsvm_core/src/net/torvald/tsvm/peripheral/AudioAdapter.kt
@@ -135,7 +135,11 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
         const val AMIGA_BASE_PERIOD = 428.0
     }
 
-    internal val sampleBin = UnsafeHelper.allocate(737280L, this)
+    // Memory map (terranmon.txt:1985-1997, updated 2026-05-06):
+    //   0..720895       sample bin (704K, was 737280)
+    //   720896..786431  instrument bin (256 inst × 256 bytes = 64K)
+    //   786432..        play data 1 / 2 / TAD blocks (anchors unchanged)
+    internal val sampleBin = UnsafeHelper.allocate(720896L, this)
     internal val instruments = Array(256) { TaudInst(it) }
     internal val playdata = Array(4096) { Array(64) { TaudPlayData(0xFFFF, 0, 0, 0, 32, 0, 0, 0) } }
     internal val playheads: Array<Playhead>
@@ -307,8 +311,8 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
 
     override fun peek(addr: Long): Byte {
         return when (val adi = addr.toInt()) {
-            in 0..737279 -> sampleBin[addr]
-            in 737280..786431 -> (adi - 737280).let { instruments[it / 192].getByte(it % 192) }
+            in 0..720895 -> sampleBin[addr]
+            in 720896..786431 -> (adi - 720896).let { instruments[it / 256].getByte(it % 256) }
             in 786432..851967 -> { val off = adi - 786432; playdata[playheads[0].patBank1 * 128 + off / 512][(off % 512) / 8].getByte(off % 8) }
             in 851968..917503 -> { val off = adi - 851968; playdata[playheads[0].patBank2 * 128 + off / 512][(off % 512) / 8].getByte(off % 8) }
             in 917504..983039 -> tadInputBin[addr - 917504]   // TAD input buffer (65536 bytes)
@@ -321,8 +325,8 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
         val adi = addr.toInt()
         val bi = byte.toUint()
         when (adi) {
-            in 0..737279 -> { sampleBin[addr] = byte }
-            in 737280..786431 -> (adi - 737280).let { instruments[it / 192].setByte(it % 192, bi) }
+            in 0..720895 -> { sampleBin[addr] = byte }
+            in 720896..786431 -> (adi - 720896).let { instruments[it / 256].setByte(it % 256, bi) }
             in 786432..851967 -> { val off = adi - 786432; playdata[playheads[0].patBank1 * 128 + off / 512][(off % 512) / 8].setByte(off % 8, bi) }
             in 851968..917503 -> { val off = adi - 851968; playdata[playheads[0].patBank2 * 128 + off / 512][(off % 512) / 8].setByte(off % 8, bi) }
             in 917504..983039 -> tadInputBin[addr - 917504] = byte   // TAD input buffer
@@ -1205,33 +1209,60 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
         return amigaPeriodToNoteVal(newPeriod)
     }
 
+    /**
+     * Resolve the active wrap region for an envelope based on the LOOP and
+     * SUSTAIN words and key state.
+     *
+     * Encoding (terranmon.txt:2049+, 2114+):
+     *   LOOP word (offset 15/17/19):    0b 0000_0sss_ssXcb_eeeee
+     *   SUSTAIN word (offset 189/191/193): 0b 0000_0sss_ss00b_eeeee
+     *   In both, bit 5 = b (enable). bits 12..8 = start, bits 4..0 = end.
+     *
+     * Priority (matches schismtracker player/sndmix.c:480-499):
+     *   if SUSTAIN.b and !keyOff : wrap (sus_start, sus_end)
+     *   elif LOOP.b              : wrap (loop_start, loop_end)
+     *   else                     : no wrap (envelope walks forward and holds)
+     *
+     * Returns -1 in `wrapEnd` when no wrap is active.
+     */
+    private inline fun resolveEnvWrap(loopWord: Int, sustainWord: Int, keyOff: Boolean,
+                                       outRange: IntArray) {
+        val susB = (sustainWord ushr 5) and 1 != 0
+        val loopB = (loopWord ushr 5) and 1 != 0
+        if (susB && !keyOff) {
+            outRange[0] = (sustainWord ushr 8) and 0x1F
+            outRange[1] = sustainWord and 0x1F
+        } else if (loopB) {
+            outRange[0] = (loopWord ushr 8) and 0x1F
+            outRange[1] = loopWord and 0x1F
+        } else {
+            outRange[0] = -1
+            outRange[1] = -1
+        }
+    }
+
+    // Reusable per-envelope wrap-range scratch (avoid per-tick allocation).
+    private val volWrap = IntArray(2)
+    private val panWrap = IntArray(2)
+    private val pfWrap  = IntArray(2)
+
     private fun advanceEnvelope(voice: Voice, inst: TaudInst, tickSec: Double) {
-        // 16-bit envelope-flag layout (terranmon.txt:2007-2030):
-        //   0b 0ut sssss pcb eeeee
-        //     bit 14 = u (enable sustain/loop)
-        //     bit 13 = t (sustain — 1=breaks on key-off, 0=loops forever)
-        //     bits 12..8 = sustain/loop start index (0..24)
-        //     bit  7 = p (channel-specific flag — fadeout zero / use default pan)
-        //     bit  6 = c (envelope carry)
-        //     bit  5 = b (use envelope at all)
-        //     bits 4..0 = sustain/loop end index (0..24)
         val maxIdx = 24
 
         // Volume envelope
-        val vSus       = inst.volEnvSustain
-        val vUseEnv    = (vSus ushr 5) and 1 != 0
-        if (vUseEnv && voice.volEnvOn) {
-            val vEnabled   = (vSus ushr 14) and 1 != 0
-            val vIsSustain = (vSus ushr 13) and 1 != 0
-            val vSusOn     = vEnabled && (!vIsSustain || !voice.keyOff)
-            val vSusStart  = (vSus ushr 8) and 0x1F
-            val vSusEnd    = vSus and 0x1F
+        val vEnvActive = (((inst.volEnvLoop ushr 5) and 1) or ((inst.volEnvSustainWord ushr 5) and 1)) != 0
+        if (vEnvActive && voice.volEnvOn) {
+            resolveEnvWrap(inst.volEnvLoop, inst.volEnvSustainWord, voice.keyOff, volWrap)
+            val wStart = volWrap[0]
+            val wEnd   = volWrap[1]
+            val wrapping = wStart >= 0
 
-            if (vSusOn && voice.envIndex == vSusEnd && vSusStart == vSusEnd) {
+            if (wrapping && voice.envIndex == wEnd && wStart == wEnd) {
+                // Hold at the wrap point (FT2 single-point sustain).
                 voice.envVolume = (inst.volEnvelopes[voice.envIndex].value / 63.0).coerceIn(0.0, 1.0)
-            } else if (vSusOn && voice.envIndex == vSusEnd) {
+            } else if (wrapping && voice.envIndex == wEnd) {
                 voice.envTimeSec = 0.0
-                voice.envIndex = vSusStart
+                voice.envIndex = wStart
                 voice.envVolume = (inst.volEnvelopes[voice.envIndex].value / 63.0).coerceIn(0.0, 1.0)
             } else if (voice.envIndex >= maxIdx) {
                 voice.envVolume = (inst.volEnvelopes[maxIdx].value / 63.0).coerceIn(0.0, 1.0)
@@ -1243,7 +1274,7 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
                     voice.envTimeSec += tickSec
                     if (voice.envTimeSec >= vOffset) {
                         voice.envTimeSec -= vOffset
-                        val nextIdx = if (vSusOn && voice.envIndex == vSusEnd) vSusStart
+                        val nextIdx = if (wrapping && voice.envIndex == wEnd) wStart
                                       else (voice.envIndex + 1).coerceAtMost(maxIdx)
                         voice.envIndex = nextIdx
                         voice.envVolume = (inst.volEnvelopes[voice.envIndex].value / 63.0).coerceIn(0.0, 1.0)
@@ -1258,20 +1289,18 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
 
         // Pan envelope (only when active for this instrument)
         if (!voice.hasPanEnv || !voice.panEnvOn) return
-        val pSus       = inst.panEnvSustain
-        val pUseEnv    = (pSus ushr 5) and 1 != 0
-        if (!pUseEnv) return
-        val pEnabled   = (pSus ushr 14) and 1 != 0
-        val pIsSustain = (pSus ushr 13) and 1 != 0
-        val pSusOn     = pEnabled && (!pIsSustain || !voice.keyOff)
-        val pSusStart  = (pSus ushr 8) and 0x1F
-        val pSusEnd    = pSus and 0x1F
+        val pEnvActive = (((inst.panEnvLoop ushr 5) and 1) or ((inst.panEnvSustainWord ushr 5) and 1)) != 0
+        if (!pEnvActive) return
+        resolveEnvWrap(inst.panEnvLoop, inst.panEnvSustainWord, voice.keyOff, panWrap)
+        val pStart = panWrap[0]
+        val pEnd   = panWrap[1]
+        val pWrapping = pStart >= 0
 
-        if (pSusOn && voice.envPanIndex == pSusEnd && pSusStart == pSusEnd) {
+        if (pWrapping && voice.envPanIndex == pEnd && pStart == pEnd) {
             voice.envPan = inst.panEnvelopes[voice.envPanIndex].value / 255.0
-        } else if (pSusOn && voice.envPanIndex == pSusEnd) {
+        } else if (pWrapping && voice.envPanIndex == pEnd) {
             voice.envPanTimeSec = 0.0
-            voice.envPanIndex = pSusStart
+            voice.envPanIndex = pStart
             voice.envPan = inst.panEnvelopes[voice.envPanIndex].value / 255.0
         } else if (voice.envPanIndex >= maxIdx) {
             voice.envPan = inst.panEnvelopes[maxIdx].value / 255.0
@@ -1283,7 +1312,7 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
                 voice.envPanTimeSec += tickSec
                 if (voice.envPanTimeSec >= pOffset) {
                     voice.envPanTimeSec -= pOffset
-                    val nextIdx = if (pSusOn && voice.envPanIndex == pSusEnd) pSusStart
+                    val nextIdx = if (pWrapping && voice.envPanIndex == pEnd) pStart
                                   else (voice.envPanIndex + 1).coerceAtMost(maxIdx)
                     voice.envPanIndex = nextIdx
                     voice.envPan = inst.panEnvelopes[voice.envPanIndex].value / 255.0
@@ -1303,14 +1332,12 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
     private fun advancePfEnvelope(voice: Voice, inst: TaudInst, tickSec: Double) {
         if (!voice.hasPfEnv || !voice.pfEnvOn) return
         val maxIdx = 24
-        val pSus       = inst.pfEnvSustain
-        val pUseEnv    = (pSus ushr 5) and 1 != 0
-        if (!pUseEnv) return
-        val pEnabled   = (pSus ushr 14) and 1 != 0
-        val pIsSustain = (pSus ushr 13) and 1 != 0
-        val pSusOn     = pEnabled && (!pIsSustain || !voice.keyOff)
-        val pSusStart  = (pSus ushr 8) and 0x1F
-        val pSusEnd    = pSus and 0x1F
+        val pEnvActive = (((inst.pfEnvLoop ushr 5) and 1) or ((inst.pfEnvSustainWord ushr 5) and 1)) != 0
+        if (!pEnvActive) return
+        resolveEnvWrap(inst.pfEnvLoop, inst.pfEnvSustainWord, voice.keyOff, pfWrap)
+        val pSusStart = pfWrap[0]
+        val pSusEnd   = pfWrap[1]
+        val pSusOn    = pSusStart >= 0
 
         if (pSusOn && voice.envPfIndex == pSusEnd && pSusStart == pSusEnd) {
             voice.envPfValue = inst.pfEnvelopes[voice.envPfIndex].value / 255.0
@@ -1523,7 +1550,7 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
         val sampleLen = inst.sampleLength.coerceAtLeast(1)
         val loopStart = inst.sampleLoopStart.toDouble()
         val loopEnd = inst.sampleLoopEnd.toDouble().coerceAtLeast(1.0)
-        val binMax = 737279  // sampleBin is 737280 bytes (0..737279)
+        val binMax = 720895  // sampleBin is 720896 bytes (0..720895)
 
         val i0 = voice.samplePos.toInt().coerceIn(0, sampleLen - 1)
         val i1 = (i0 + 1).coerceAtMost(sampleLen - 1)
@@ -1578,11 +1605,12 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
         voice.envPanIndex = 0
         voice.envPanTimeSec = 0.0
         voice.envPan = inst.panEnvelopes[0].value / 255.0
-        // Pan envelope is active when the `b` (use envelope) flag is set in panEnvSustain.
-        voice.hasPanEnv = (inst.panEnvSustain ushr 5) and 1 != 0
+        // Pan envelope is active when EITHER the LOOP word's b bit OR the SUSTAIN word's b bit is set.
+        voice.hasPanEnv = (((inst.panEnvLoop ushr 5) and 1) or ((inst.panEnvSustainWord ushr 5) and 1)) != 0
         // Pitch/filter envelope state.
-        voice.hasPfEnv      = (inst.pfEnvSustain ushr 5) and 1 != 0
-        voice.envPfIsFilter = (inst.pfEnvSustain ushr 7) and 1 != 0
+        voice.hasPfEnv      = (((inst.pfEnvLoop ushr 5) and 1) or ((inst.pfEnvSustainWord ushr 5) and 1)) != 0
+        // The pf 'm' mode bit (pitch=0, filter=1) lives in the LOOP word at bit 7.
+        voice.envPfIsFilter = (inst.pfEnvLoop ushr 7) and 1 != 0
         voice.envPfIndex    = 0
         voice.envPfTimeSec  = 0.0
         voice.envPfValue    = if (voice.hasPfEnv) inst.pfEnvelopes[0].value / 255.0 else 0.5
@@ -1597,8 +1625,8 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
         voice.randomPanBias = if (inst.panSwing != 0)
             (Math.random() * (2 * inst.panSwing + 1)).toInt() - inst.panSwing else 0
         // Default pan: applied unless the pattern row has already overridden channelPan.
-        // We treat the pan envelope "p" flag (panEnvSustain bit 7) as "use default pan".
-        if ((inst.panEnvSustain ushr 7) and 1 != 0) {
+        // The pan envelope's 'p' flag ("use default pan") lives in the pan LOOP word at bit 7.
+        if ((inst.panEnvLoop ushr 7) and 1 != 0) {
             voice.channelPan = inst.defaultPan
             voice.rowPan = (voice.channelPan ushr 2).coerceIn(0, 63)
         }
@@ -1874,12 +1902,26 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
                 // call this an "instrument-only retrigger"; in MOD/S3M/IT the sample
                 // keeps playing, but the channel's instrument reference advances.
                 0xFFFF -> { if (row.instrment != 0) voice.instrumentId = row.instrment }
-                0x0000 -> { voice.keyOff = true; voice.active = false }  // key-off; breaks sustain loop
-                0xFFFE -> voice.active = false                  // note cut
+                // Key-off: release sustain; envelope walks past the sustain point and the fadeout
+                // begins (foreground-voice fade path at line ~2380). The voice deactivates when
+                // fadeoutVolume reaches 0, or immediately if FT2-mode fadeStep == 0. Setting
+                // voice.active = false here would defeat both — instruments with sustain points
+                // and non-zero fadeout (FT2 sustain-then-fade idiom) would be cut on the spot.
+                0x0000 -> { voice.keyOff = true }
+                0xFFFE -> voice.active = false                  // note cut (immediate)
                 else -> {
                     if (toneG && voice.active) {
                         // Tone porta: target the note, do not retrigger sample.
                         voice.tonePortaTarget = row.note
+                        // Instrument byte on a porta row reloads the channel's default
+                        // volume even though the sample isn't retriggered. Mirrors schism
+                        // csf_instrument_change (effects.c:1302) which writes
+                        // chan->volume = psmp->volume whenever inst_column is set.
+                        if (row.instrment != 0) {
+                            voice.instrumentId = row.instrment
+                            voice.channelVolume = 0x3F
+                            voice.rowVolume = 0x3F
+                        }
                     } else if ((row.effect == EffectOp.OP_S) && ((row.effectArg ushr 12) and 0xF) == 0xD) {
                         // Note delay: defer trigger to the requested tick. NNA fires when the
                         // deferred trigger actually executes, not now.
@@ -2364,14 +2406,19 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
             // Volume fadeout: after key-off OR Note-Fade NNA, decrement per tick.
             // The 12-bit fadeStep is split across volumeFadeoutLow + low nibble of fadeoutHigh.
             // Divisor selects per-tracker semantics:
-            //   FT2 mode (fadeoutCutOnZero=true):  fadeStep / 65536 per tick — matches FT2 .XM (16-bit accumulator, decrement = stored).
-            //   IT  mode (fadeoutCutOnZero=false): fadeStep / 1024  per tick — matches Schism (sndmix.c:331-339 + effects.c:1261:
-            //                                                                  accumulator 65536, decrement = (stored<<5)<<1 = stored·64).
+            //   FT2 mode (fadeoutCutOnZero=true):  fadeStep / 32768 per tick — matches ft2-clone
+            //                                       (ft2_replayer.c:387-390, 1469-1481): the FT2 XM
+            //                                       file format docs claim the accumulator is 16-bit
+            //                                       (65536), but the actual replayer initialises
+            //                                       fadeoutVol to 32768 and decrements by stored.
+            //   IT  mode (fadeoutCutOnZero=false): fadeStep / 1024  per tick — matches Schism
+            //                                       (sndmix.c:331-339 + effects.c:1261: accumulator
+            //                                       65536, decrement = (stored<<5)<<1 = stored·64).
             // Stored 0: FT2 mode cuts on key-off; IT mode leaves voice playing (no fade).
             if (voice.keyOff || voice.noteFading) {
                 val fadeStep = inst.volumeFadeoutLow or ((inst.fadeoutHigh and 0x0F) shl 8)
                 if (fadeStep > 0) {
-                    val divisor = if (ts.fadeoutCutOnZero) 65536.0 else 1024.0
+                    val divisor = if (ts.fadeoutCutOnZero) 32768.0 else 1024.0
                     voice.fadeoutVolume = (voice.fadeoutVolume - fadeStep / divisor).coerceAtLeast(0.0)
                     if (voice.fadeoutVolume <= 0.0) voice.active = false
                 } else if (ts.fadeoutCutOnZero) {
@@ -2427,7 +2474,9 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
             if (bg.keyOff || bg.noteFading) {
                 val fadeStep = inst.volumeFadeoutLow or ((inst.fadeoutHigh and 0x0F) shl 8)
                 if (fadeStep > 0) {
-                    val divisor = if (ts.fadeoutCutOnZero) 65536.0 else 1024.0
+                    // Divisor must mirror the foreground-voice fade path above
+                    // (FT2 mode: 32768 to match ft2_replayer.c:387-390+1469-1481).
+                    val divisor = if (ts.fadeoutCutOnZero) 32768.0 else 1024.0
                     bg.fadeoutVolume = (bg.fadeoutVolume - fadeStep / divisor).coerceAtLeast(0.0)
                 } else if (ts.fadeoutCutOnZero) {
                     bg.active = false
@@ -2625,7 +2674,13 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
             }
             else -> {
                 ts.rowIndex++
-                if (ts.rowIndex >= 64) {
+                // LEN cue instruction shortens the effective row count so the
+                // engine wraps to the next cue early. Patterns fed by the
+                // converter are still 64 rows long; rows past `rowLimit` are
+                // silent padding that we skip here.
+                val currentInst = cueSheet[ts.cuePos].instruction
+                val rowLimit = if (currentInst is PlayInstPatLen) currentInst.rows else 64
+                if (ts.rowIndex >= rowLimit) {
                     ts.rowIndex = 0
                     advanceTrackerCue(ts, playhead)
                     resetPatternLoopState(ts)
@@ -2637,14 +2692,38 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
 
     internal data class PlayCue(
         val patterns: IntArray = IntArray(20) { 0xFFF },
-        var instruction: PlayInstruction = PlayInstNop
+        var instruction: PlayInstruction = PlayInstNop,
+        var instByte30: Int = 0,
+        var instByte31: Int = 0,
     ) {
         // Cue layout (32 bytes, 20 voices, 12-bit pattern numbers):
         //   bytes  0-9:  packed low nybbles  (byte i => voice i*2 in hi, voice i*2+1 in lo)
         //   bytes 10-19: packed mid nybbles  (same packing)
         //   bytes 20-29: packed high nybbles (same packing)
-        //   byte  30:    instruction
-        //   byte  31:    unused
+        //   byte  30:    instruction (low byte)
+        //   byte  31:    instruction arg byte (used by 2-byte forms: LEN, BAK, FWD, JMP)
+        // Decoding rules per terranmon.txt §"Cue Sheet":
+        //   00000010 00xxxxxx (LEN)  pattern length: rows = (xxxxxx) + 1, range 1..64
+        //   00000001          (HALT) end of song
+        //   00000000          (NOP)  default 64-row cue
+        //   1000xxxx yyyyyyyy (BAK)  go back 12-bit arg
+        //   1001xxxx yyyyyyyy (FWD)  skip forward 12-bit arg
+        //   1111xxxx yyyyyyyy (JMP)  go to absolute pattern (currently unused)
+        private fun recomputeInstruction() {
+            val b30 = instByte30
+            val b31 = instByte31
+            instruction = when {
+                b30 == 0x02 -> PlayInstPatLen((b31 and 0x3F) + 1)
+                b30 == 0x01 -> PlayInstHalt
+                b30 == 0x00 -> PlayInstNop
+                // BAK: 1000xxxx yyyyyyyy — 12-bit arg combining b30 low nybble + b31.
+                (b30 and 0xF0) == 0x80 -> PlayInstGoBack(((b30 and 0xF) shl 8) or (b31 and 0xFF))
+                // FWD: 1001xxxx yyyyyyyy — 12-bit arg.
+                (b30 and 0xF0) == 0x90 -> PlayInstSkip(((b30 and 0xF) shl 8) or (b31 and 0xFF))
+                // JMP: 1111xxxx yyyyyyyy — reserved (decoder TBD).
+                else -> PlayInstNop
+            }
+        }
         fun write(index: Int, byte: Int) = when (index) {
             in 0..9 -> {
                 val b = index * 2
@@ -2661,13 +2740,8 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
                 patterns[b]     = (patterns[b]     and 0x0FF) or (((byte ushr 4) and 0xF) shl 8)
                 patterns[b + 1] = (patterns[b + 1] and 0x0FF) or ((byte and 0xF) shl 8)
             }
-            30 -> { instruction = when {
-                    byte >= 128 -> PlayInstGoBack(byte and 127)
-                    byte in 16..31 -> PlayInstSkip(byte and 15)
-                    byte == 1 -> PlayInstHalt
-                    else -> PlayInstNop
-            } }
-            31 -> {}
+            30 -> { instByte30 = byte and 0xFF; recomputeInstruction() }
+            31 -> { instByte31 = byte and 0xFF; recomputeInstruction() }
             else -> throw InternalError("Bad offset $index")
         }
         fun read(index: Int): Byte = when (index) {
@@ -2683,13 +2757,8 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
                 val b = (index - 20) * 2
                 ((((patterns[b] ushr 8) and 0xF) shl 4) or ((patterns[b + 1] ushr 8) and 0xF)).toByte()
             }
-            30 -> when (instruction) {
-                is PlayInstGoBack -> (0b10000000 or instruction.arg).toByte()
-                is PlayInstSkip   -> (0b00010000 or instruction.arg).toByte()
-                is PlayInstHalt   -> 1
-                else              -> 0
-            }
-            31 -> 0
+            30 -> instByte30.toByte()
+            31 -> instByte31.toByte()
             else -> throw InternalError("Bad offset $index")
         }
     }
@@ -2697,6 +2766,7 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
     internal open class PlayInstruction(val arg: Int)
     internal class PlayInstGoBack(arg: Int) : PlayInstruction(arg)
     internal class PlayInstSkip(arg: Int) : PlayInstruction(arg)
+    internal class PlayInstPatLen(val rows: Int) : PlayInstruction(rows)
     internal object PlayInstHalt : PlayInstruction(0)
     internal object PlayInstNop : PlayInstruction(0)
 
@@ -2768,7 +2838,7 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
         var envPfIndex = 0
         var envPfTimeSec = 0.0
         var envPfValue = 0.5               // 0.0..1.0; 0.5 = unity (no pitch shift / unmodulated cutoff)
-        var envPfIsFilter = false          // mirror of inst.pfEnvSustain bit 7 latched at trigger
+        var envPfIsFilter = false          // mirror of inst.pfEnvLoop bit 7 latched at trigger
 
         // Volume fadeout — engaged after key-off, decays to 0 at rate inst.volumeFadeoutLow.
         var fadeoutVolume = 1.0
@@ -3143,77 +3213,92 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
 
     data class TaudInstEnvPoint(var value: Int, var offset: ThreeFiveMiniUfloat)
     /**
-     * 192-byte instrument record (terranmon.txt:1997-2070).
+     * 256-byte instrument record (terranmon.txt:2001+).
+     *
+     * Envelopes have FOUR independent regions per envelope (vol/pan/pf):
+     *   - 25 envelope nodes (offsets 21 / 71 / 121).
+     *   - LOOP word     (offsets 15 / 17 / 19) — always-active wrap region.
+     *   - SUSTAIN word  (offsets 189 / 191 / 193) — wrap region active ONLY
+     *                   while key is on; released on key-off.
+     *
+     * Priority during playback (matches schismtracker player/sndmix.c:480-499):
+     *   if SUSTAIN.b == 1 and !key_off : wrap (sus_start, sus_end)
+     *   elif LOOP.b == 1               : wrap (loop_start, loop_end)
+     *   else                           : hold at last node
+     *
      * Layout:
      *   0..3   u32 sample pointer
      *   4..5   u16 sample length
-     *   6..7   u16 sampling rate at Middle C (0x5000) // NOTE: Taud treats middle C as C4, but some trackers show you C4 even if they are internally C5. Best practice: copy the value as-is.
+     *   6..7   u16 sampling rate at Middle C (0x5000)
      *   8..9   u16 play start
      *   10..11 u16 loop start
      *   12..13 u16 loop end
      *   14     u8  sample flags (low 2 bits = loop mode 0..3)
-     *   15..16 u16 volume envelope flags    (0b 0ut sssss pcb eeeee)
-     *   17..18 u16 panning envelope flags
-     *   19..20 u16 pitch/filter envelope flags
-     *   21..70  Bit16×25 volume envelope points (value 0x00-0x3F + minifloat dt)
-     *   71..120 Bit16×25 panning envelope points (value 0x00-0xFF, 0x80=centre)
+     *   15..16 u16 volume envelope LOOP word    (0b 0000_0sss_ss0cb_eeeee)
+     *   17..18 u16 panning envelope LOOP word   (0b 0000_0sss_ssp_cb_eeeee, p=use-default-pan)
+     *   19..20 u16 pitch/filter envelope LOOP word (0b 0000_0sss_ssm_cb_eeeee, m=mode)
+     *   21..70  Bit16×25 volume envelope points
+     *   71..120 Bit16×25 panning envelope points
      *   121..170 Bit16×25 pitch/filter envelope points
      *   171    u8 instrument global volume
      *   172    u8 volume fadeout low bits
      *   173    u8 fadeout high bits (low nibble; 0b 0000 ffff)
      *   174    u8 volume swing
-     *   175    u8 vibrato speed (FT2 instrumentwise; IT Vis rescaled to 0..255)
-     *   176    u8 vibrato sweep (FT2-only ramp ticks; 0 for IT)
+     *   175    u8 vibrato speed
+     *   176    u8 vibrato sweep
      *   177    u8 default pan
-     *   178..179 u16 pitch-pan centre (4096-TET)
+     *   178..179 u16 pitch-pan centre
      *   180    s8 pitch-pan separation
      *   181    u8 pan swing
      *   182    u8 default cutoff
      *   183    u8 default resonance
-     *   184..185 u16 sample detune (4096-TET, signed stored as u16)
-     *   186    u8 instrument flag (0b 000 www nn — NNA bits 0-1, vib waveform bits 2-4)
-     *                   NNA: 00=note off, 01=note cut, 10=continue, 11=note fade
-     *                   waveform: 0=sine, 1=ramp-down, 2=square, 3=random, 4=ramp-up (FT2)
-     *   187    u8 vibrato depth (0..255 full range)
-     *   188    u8 vibrato rate  (0..255 full range — IT samplewise Vir)
-     *   189    u8 duplicate-check / action (IT-only — 0b 0000 aadd)
-     *                   dd  = DCT (Duplicate Check Type) 0=off, 1=note, 2=sample, 3=instrument
-     *                   aa  = DCA (Duplicate Check Action) 0=note cut, 1=note off, 2=note fade
-     *   190..191 byte[2] reserved
+     *   184..185 u16 sample detune (signed)
+     *   186    u8 instrument flag (NNA bits 0-1, vib waveform bits 2-4)
+     *   187    u8 vibrato depth
+     *   188    u8 vibrato rate
+     *   189..190 u16 volume envelope SUSTAIN word   (0b 0000_0sss_ss00b_eeeee)
+     *   191..192 u16 panning envelope SUSTAIN word
+     *   193..194 u16 pitch/filter envelope SUSTAIN word
+     *   195    u8 duplicate-check / action (relocated from old offset 189)
+     *                  bits 0-1 = DCT, bits 2-3 = DCA
+     *   196..255 reserved (60 bytes)
      */
     data class TaudInst(
         var index: Int,
 
-        var samplePtr: Int,                 // 32-bit sample bin offset
+        var samplePtr: Int,
         var sampleLength: Int,
-        var samplingRate: Int,              // rate at MIDDLE_C
+        var samplingRate: Int,
         var samplePlayStart: Int,
         var sampleLoopStart: Int,
         var sampleLoopEnd: Int,
-        var loopMode: Int,                  // byte 14, low 3 bits (bits 0-1: loop kind, bit 2: sustain)
-        var volEnvSustain: Int,             // bytes 15-16 (16-bit, see flag layout)
-        var panEnvSustain: Int,             // bytes 17-18
-        var pfEnvSustain: Int,              // bytes 19-20 (pitch/filter)
-        var instGlobalVolume: Int,          // byte 171
-        var volEnvelopes: Array<TaudInstEnvPoint>,   // 25 points
-        var panEnvelopes: Array<TaudInstEnvPoint>,   // 25 points
-        var pfEnvelopes: Array<TaudInstEnvPoint>,    // 25 points (pitch/filter)
-        var volumeFadeoutLow: Int,          // byte 172
-        var fadeoutHigh: Int,               // byte 173 (low nibble — 0b 0000 ffff)
-        var volumeSwing: Int,               // byte 174
-        var vibratoSpeed: Int,              // byte 175
-        var vibratoSweep: Int,              // byte 176 (FT2 ramp ticks)
-        var defaultPan: Int,                // byte 177
-        var pitchPanCentre: Int,            // bytes 178-179
-        var pitchPanSeparation: Int,        // byte 180 (signed)
-        var panSwing: Int,                  // byte 181
-        var defaultCutoff: Int,             // byte 182
-        var defaultResonance: Int,          // byte 183
-        var sampleDetune: Int,              // bytes 184-185 (signed 4096-TET stored as u16)
-        var instrumentFlag: Int,            // byte 186 (NNA + vibrato waveform)
-        var vibratoDepth: Int,              // byte 187 (0..255 full range)
-        var vibratoRate: Int,               // byte 188 (IT samplewise Vir)
-        var dupCheckFlag: Int               // byte 189 (DCT bits 0-1, DCA bits 2-3)
+        var loopMode: Int,
+        var volEnvLoop: Int,                // bytes 15-16 (LOOP word)
+        var panEnvLoop: Int,                // bytes 17-18
+        var pfEnvLoop: Int,                 // bytes 19-20
+        var instGlobalVolume: Int,
+        var volEnvelopes: Array<TaudInstEnvPoint>,
+        var panEnvelopes: Array<TaudInstEnvPoint>,
+        var pfEnvelopes: Array<TaudInstEnvPoint>,
+        var volumeFadeoutLow: Int,
+        var fadeoutHigh: Int,
+        var volumeSwing: Int,
+        var vibratoSpeed: Int,
+        var vibratoSweep: Int,
+        var defaultPan: Int,
+        var pitchPanCentre: Int,
+        var pitchPanSeparation: Int,
+        var panSwing: Int,
+        var defaultCutoff: Int,
+        var defaultResonance: Int,
+        var sampleDetune: Int,
+        var instrumentFlag: Int,
+        var vibratoDepth: Int,
+        var vibratoRate: Int,
+        var volEnvSustainWord: Int,         // bytes 189-190 (SUSTAIN word)
+        var panEnvSustainWord: Int,         // bytes 191-192
+        var pfEnvSustainWord: Int,          // bytes 193-194
+        var dupCheckFlag: Int               // byte 195 (relocated from 189)
     ) {
         constructor(index: Int) : this(
             index, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFF,
@@ -3221,7 +3306,7 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
             Array(25) { TaudInstEnvPoint(0x80, ThreeFiveMiniUfloat(0)) },
             Array(25) { TaudInstEnvPoint(0x80, ThreeFiveMiniUfloat(0)) },
             0, 0, 0, 0, 0, 0x80, 0x5000, 0, 0, 0xFF, 0,
-            0, 0, 0, 0, 0
+            0, 0, 0, 0, 0, 0, 0, 0
         )
 
         /** Sample-flag byte 14 bit 2 — when set, the sample loop is a sustain loop:
@@ -3240,8 +3325,8 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
         /** Duplicate Check Action — 0=note cut, 1=note off, 2=note fade. */
         val duplicateCheckAction: Int get() = (dupCheckFlag ushr 2) and 0x03
 
-        // Reserved padding at offsets 190..191 (2 bytes per instrument).
-        private val reserved = ByteArray(2)
+        // Reserved padding at offsets 196..255 (60 bytes per instrument).
+        private val reserved = ByteArray(60)
 
         // Funk repeat (S$Fx00) bit-mask — non-destructive XOR overlay across the loop region.
         // Lazily allocated; a 1-bit flips the byte, a 0-bit leaves it intact.
@@ -3294,12 +3379,12 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
             13 -> sampleLoopEnd.ushr(8).toByte()
 
             14 -> (loopMode and 7).toByte()
-            15 -> volEnvSustain.toByte()
-            16 -> volEnvSustain.ushr(8).toByte()
-            17 -> panEnvSustain.toByte()
-            18 -> panEnvSustain.ushr(8).toByte()
-            19 -> pfEnvSustain.toByte()
-            20 -> pfEnvSustain.ushr(8).toByte()
+            15 -> volEnvLoop.toByte()
+            16 -> volEnvLoop.ushr(8).toByte()
+            17 -> panEnvLoop.toByte()
+            18 -> panEnvLoop.ushr(8).toByte()
+            19 -> pfEnvLoop.toByte()
+            20 -> pfEnvLoop.ushr(8).toByte()
 
             in 21..70  -> envPointGet(volEnvelopes, 21,  offset)
             in 71..120 -> envPointGet(panEnvelopes, 71,  offset)
@@ -3323,8 +3408,14 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
             186 -> instrumentFlag.toByte()
             187 -> vibratoDepth.toByte()
             188 -> vibratoRate.toByte()
-            189 -> dupCheckFlag.toByte()
-            in 190..191 -> reserved[offset - 190]
+            189 -> volEnvSustainWord.toByte()
+            190 -> volEnvSustainWord.ushr(8).toByte()
+            191 -> panEnvSustainWord.toByte()
+            192 -> panEnvSustainWord.ushr(8).toByte()
+            193 -> pfEnvSustainWord.toByte()
+            194 -> pfEnvSustainWord.ushr(8).toByte()
+            195 -> dupCheckFlag.toByte()
+            in 196..255 -> reserved[offset - 196]
             else -> throw InternalError("Bad offset $offset")
         }
 
@@ -3350,12 +3441,12 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
             13 -> { sampleLoopEnd = (sampleLoopEnd and 0x00ff) or (byte shl 8) }
 
             14 -> { loopMode = byte and 7 }
-            15 -> { volEnvSustain = (volEnvSustain and 0xff00) or byte }
-            16 -> { volEnvSustain = (volEnvSustain and 0x00ff) or (byte shl 8) }
-            17 -> { panEnvSustain = (panEnvSustain and 0xff00) or byte }
-            18 -> { panEnvSustain = (panEnvSustain and 0x00ff) or (byte shl 8) }
-            19 -> { pfEnvSustain = (pfEnvSustain and 0xff00) or byte }
-            20 -> { pfEnvSustain = (pfEnvSustain and 0x00ff) or (byte shl 8) }
+            15 -> { volEnvLoop = (volEnvLoop and 0xff00) or byte }
+            16 -> { volEnvLoop = (volEnvLoop and 0x00ff) or (byte shl 8) }
+            17 -> { panEnvLoop = (panEnvLoop and 0xff00) or byte }
+            18 -> { panEnvLoop = (panEnvLoop and 0x00ff) or (byte shl 8) }
+            19 -> { pfEnvLoop = (pfEnvLoop and 0xff00) or byte }
+            20 -> { pfEnvLoop = (pfEnvLoop and 0x00ff) or (byte shl 8) }
 
             in 21..70  -> envPointSet(volEnvelopes, 21,  offset, byte)
             in 71..120 -> envPointSet(panEnvelopes, 71,  offset, byte)
@@ -3363,14 +3454,14 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
 
             171 -> { instGlobalVolume = byte and 0xFF }
             172 -> { volumeFadeoutLow = byte and 0xFF }
-            173 -> { fadeoutHigh = byte and 0x0F }   // low nibble only (0b 0000 ffff)
+            173 -> { fadeoutHigh = byte and 0x0F }
             174 -> { volumeSwing = byte and 0xFF }
             175 -> { vibratoSpeed = byte and 0xFF }
             176 -> { vibratoSweep = byte and 0xFF }
             177 -> { defaultPan = byte and 0xFF }
             178 -> { pitchPanCentre = (pitchPanCentre and 0xff00) or byte }
             179 -> { pitchPanCentre = (pitchPanCentre and 0x00ff) or (byte shl 8) }
-            180 -> { pitchPanSeparation = byte.toByte().toInt() }   // signed
+            180 -> { pitchPanSeparation = byte.toByte().toInt() }
             181 -> { panSwing = byte and 0xFF }
             182 -> { defaultCutoff = byte and 0xFF }
             183 -> { defaultResonance = byte and 0xFF }
@@ -3379,8 +3470,14 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
             186 -> { instrumentFlag = byte and 0xFF }
             187 -> { vibratoDepth = byte and 0xFF }
             188 -> { vibratoRate = byte and 0xFF }
-            189 -> { dupCheckFlag = byte and 0x0F }   // DCT (bits 0-1) + DCA (bits 2-3)
-            in 190..191 -> { reserved[offset - 190] = byte.toByte() }
+            189 -> { volEnvSustainWord = (volEnvSustainWord and 0xff00) or byte }
+            190 -> { volEnvSustainWord = (volEnvSustainWord and 0x00ff) or (byte shl 8) }
+            191 -> { panEnvSustainWord = (panEnvSustainWord and 0xff00) or byte }
+            192 -> { panEnvSustainWord = (panEnvSustainWord and 0x00ff) or (byte shl 8) }
+            193 -> { pfEnvSustainWord = (pfEnvSustainWord and 0xff00) or byte }
+            194 -> { pfEnvSustainWord = (pfEnvSustainWord and 0x00ff) or (byte shl 8) }
+            195 -> { dupCheckFlag = byte and 0x0F }
+            in 196..255 -> { reserved[offset - 196] = byte.toByte() }
             else -> throw InternalError("Bad offset $offset")
         }
     }
diff --git a/xm2taud.py b/xm2taud.py
new file mode 100644
index 0000000..4a1c803
--- /dev/null
+++ b/xm2taud.py
@@ -0,0 +1,1376 @@
+#!/usr/bin/env python3
+"""xm2taud.py — Convert FastTracker 2 (.xm) to TSVM Taud (.taud)
+
+Usage:
+    python3 xm2taud.py input.xm output.taud [-v]
+
+Limits:
+    - Up to 20 XM channels (excess unused).
+    - Sample bin is 737280 bytes; if all samples together exceed this,
+      every sample is globally resampled down (with c2spd adjusted) so
+      pitch is preserved, mirroring it2taud / mod2taud.
+    - Multi-sample instruments use the sample selected by the *current
+      note's* keymap entry; the converter materialises one Taud
+      instrument slot per (XM instrument, sample-in-instrument) pair.
+
+Pattern length policy:
+    - XM patterns ≤ 64 rows → 1 Taud cue with the LEN ($02xx)
+      cuesheet instruction (rows < 64) or no instruction (rows == 64).
+    - XM patterns > 64 rows → split into ⌊rows/64⌋ full 64-row cues
+      plus, if rows % 64 != 0, a final cue holding the remainder rows
+      with the LEN instruction. Full 64-row cues emit no instruction.
+    - The cuesheet LEN instruction is decoded by AudioAdapter.kt — the
+      engine wraps to the next cue after `rows` rows instead of always
+      waiting for row 64.
+
+Effect support:
+    Full XM effect dispatch per TAUD_NOTE_EFFECTS.md (FastTracker 2 →
+    Taud conversion table). Volume column commands fold into either
+    the Taud volume column directly or as an aux effect on the main
+    effect slot when free, dropped otherwise (same policy as
+    it2taud's decode_volcol). Position-jump (Bxx) and pattern-break
+    (Dxx) are remapped to Taud cue indices.
+
+Reference:
+    XM format spec — reference_materials/MilkyTracker/resources/reference/xm-form.txt
+    Parser  — reference_materials/MilkyTracker/src/milkyplay/LoaderXM.cpp
+"""
+
+import argparse
+import gzip
+import math
+import struct
+import sys
+
+from taud_common import (
+    set_verbose, vprint,
+    TAUD_MAGIC, TAUD_VERSION, TAUD_HEADER_SIZE, TAUD_SONG_ENTRY,
+    SAMPLEBIN_SIZE, INSTBIN_SIZE, SAMPLEINST_SIZE,
+    PATTERN_ROWS, PATTERN_BYTES, NUM_PATTERNS_MAX, NUM_CUES, CUE_SIZE, NUM_VOICES,
+    NOTE_NOP, NOTE_KEYOFF, NOTE_CUT, TAUD_C4,
+    TOP_NONE, TOP_A, TOP_B, TOP_C, TOP_D, TOP_E, TOP_F, TOP_G, TOP_H, TOP_I,
+    TOP_J, TOP_K, TOP_L, TOP_O, TOP_Q, TOP_R, TOP_S, TOP_T, TOP_U, TOP_V, TOP_W, TOP_Y,
+    SEL_SET, SEL_UP, SEL_DOWN, SEL_FINE,
+    J_SEMI_TABLE,
+    d_arg_to_col, resample_linear, rescale_offset_effects, encode_cue, deduplicate_patterns,
+    normalise_sample, encode_song_entry,
+    CUE_INST_NOP, CUE_INST_HALT, CUE_INST_LEN, cue_instruction_len,
+)
+
+
+# ── XM constants ─────────────────────────────────────────────────────────────
+
+XM_MAGIC      = b'Extended Module: '   # 17 bytes
+XM_NOTE_OFF   = 97                     # XM raw note value for key-off
+XM_RELNOTE_C4 = 49                     # XM note 49 (after relnote applied) = C-4
+
+# Sample type flags
+XM_SMP_LOOP_FWD      = 0x01
+XM_SMP_LOOP_PINGPONG = 0x02
+XM_SMP_LOOP_MASK     = 0x03
+XM_SMP_16BIT         = 0x10
+
+# Envelope type flags
+XM_ENV_ON       = 0x01
+XM_ENV_SUSTAIN  = 0x02
+XM_ENV_LOOP     = 0x04
+
+SIGNATURE = b"xm2taud/TSVM  "          # 14 bytes
+
+
+# ── Minifloat LUT (must match it2taud / engine) ──────────────────────────────
+
+_MINUFLOAT_LUT = [
+    0.0, 0.03125, 0.0625, 0.09375, 0.125, 0.15625, 0.1875, 0.21875,
+    0.25, 0.28125, 0.3125, 0.34375, 0.375, 0.40625, 0.4375, 0.46875,
+    0.5, 0.53125, 0.5625, 0.59375, 0.625, 0.65625, 0.6875, 0.71875,
+    0.75, 0.78125, 0.8125, 0.84375, 0.875, 0.90625, 0.9375, 0.96875,
+    1.0, 1.03125, 1.0625, 1.09375, 1.125, 1.15625, 1.1875, 1.21875,
+    1.25, 1.28125, 1.3125, 1.34375, 1.375, 1.40625, 1.4375, 1.46875,
+    1.5, 1.53125, 1.5625, 1.59375, 1.625, 1.65625, 1.6875, 1.71875,
+    1.75, 1.78125, 1.8125, 1.84375, 1.875, 1.90625, 1.9375, 1.96875,
+    2.0, 2.0625, 2.125, 2.1875, 2.25, 2.3125, 2.375, 2.4375,
+    2.5, 2.5625, 2.625, 2.6875, 2.75, 2.8125, 2.875, 2.9375,
+    3.0, 3.0625, 3.125, 3.1875, 3.25, 3.3125, 3.375, 3.4375,
+    3.5, 3.5625, 3.625, 3.6875, 3.75, 3.8125, 3.875, 3.9375,
+    4.0, 4.125, 4.25, 4.375, 4.5, 4.625, 4.75, 4.875,
+    5.0, 5.125, 5.25, 5.375, 5.5, 5.625, 5.75, 5.875,
+    6.0, 6.125, 6.25, 6.375, 6.5, 6.625, 6.75, 6.875,
+    7.0, 7.125, 7.25, 7.375, 7.5, 7.625, 7.75, 7.875,
+    8.0, 8.25, 8.5, 8.75, 9.0, 9.25, 9.5, 9.75,
+    10.0, 10.25, 10.5, 10.75, 11.0, 11.25, 11.5, 11.75,
+    12.0, 12.25, 12.5, 12.75, 13.0, 13.25, 13.5, 13.75,
+    14.0, 14.25, 14.5, 14.75, 15.0, 15.25, 15.5, 15.75,
+    16.0, 16.5, 17.0, 17.5, 18.0, 18.5, 19.0, 19.5,
+    20.0, 20.5, 21.0, 21.5, 22.0, 22.5, 23.0, 23.5,
+    24.0, 24.5, 25.0, 25.5, 26.0, 26.5, 27.0, 27.5,
+    28.0, 28.5, 29.0, 29.5, 30.0, 30.5, 31.0, 31.5,
+    32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0,
+    40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0,
+    48.0, 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0,
+    56.0, 57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0,
+    64.0, 66.0, 68.0, 70.0, 72.0, 74.0, 76.0, 78.0,
+    80.0, 82.0, 84.0, 86.0, 88.0, 90.0, 92.0, 94.0,
+    96.0, 98.0, 100.0, 102.0, 104.0, 106.0, 108.0, 110.0,
+    112.0, 114.0, 116.0, 118.0, 120.0, 122.0, 124.0, 126.0,
+]
+
+
+def _nearest_minifloat(sec: float) -> int:
+    if sec <= 0.0:
+        return 0
+    if sec >= 126.0:
+        return 255
+    lo, hi = 0, len(_MINUFLOAT_LUT) - 1
+    while lo < hi:
+        mid = (lo + hi) // 2
+        if _MINUFLOAT_LUT[mid] < sec:
+            lo = mid + 1
+        else:
+            hi = mid
+    if lo > 0 and abs(_MINUFLOAT_LUT[lo - 1] - sec) < abs(_MINUFLOAT_LUT[lo] - sec):
+        return lo - 1
+    return lo
+
+
+# ── Data classes ─────────────────────────────────────────────────────────────
+
+class XMHeader:
+    __slots__ = ('title', 'tracker', 'version', 'header_size',
+                 'order_count', 'restart_pos', 'channels', 'pattern_count',
+                 'instrument_count', 'flags', 'default_speed', 'default_bpm',
+                 'order_list', 'linear_freq')
+
+
+class XMSample:
+    __slots__ = ('name', 'length', 'loop_start', 'loop_length',
+                 'volume', 'finetune', 'flags', 'panning', 'rel_note',
+                 'sample_data', 'is_16bit', 'pingpong')
+
+
+class XMInstrument:
+    __slots__ = ('name', 'sample_count', 'keymap',
+                 'vol_env_pts', 'pan_env_pts',
+                 'vol_env_count', 'pan_env_count',
+                 'vol_sustain', 'vol_loop_start', 'vol_loop_end',
+                 'pan_sustain', 'pan_loop_start', 'pan_loop_end',
+                 'vol_env_type', 'pan_env_type',
+                 'vib_type', 'vib_sweep', 'vib_depth', 'vib_rate',
+                 'fadeout', 'samples')
+
+
+class XMRow:
+    __slots__ = ('note', 'inst', 'volcol', 'effect', 'effect_arg')
+    def __init__(self):
+        self.note       = 0    # 0=empty, 1..96=pitch, 97=key off
+        self.inst       = 0    # 1-based; 0=none
+        self.volcol     = 0    # 0=none; otherwise raw vol-col byte
+        self.effect     = 0
+        self.effect_arg = 0
+
+
+# ── Header parser ─────────────────────────────────────────────────────────────
+
+def _read_u8(data, off):  return data[off]
+def _read_u16(data, off): return struct.unpack_from('<H', data, off)[0]
+def _read_u32(data, off): return struct.unpack_from('<I', data, off)[0]
+
+
+def parse_xm_header(data: bytes) -> XMHeader:
+    if data[:17] != XM_MAGIC:
+        sys.exit(f"error: not an XM file (bad magic: {data[:17]!r})")
+    if data[37] != 0x1A:
+        vprint(f"  warning: expected 0x1A marker at offset 37, got 0x{data[37]:02X}")
+
+    h = XMHeader()
+    h.title         = data[17:37].rstrip(b'\x00 ').decode('latin-1', errors='replace')
+    h.tracker       = data[38:58].rstrip(b'\x00 ').decode('latin-1', errors='replace')
+    h.version       = _read_u16(data, 58)
+    h.header_size   = _read_u32(data, 60)
+    h.order_count   = _read_u16(data, 64)
+    h.restart_pos   = _read_u16(data, 66)
+    h.channels      = _read_u16(data, 68)
+    h.pattern_count = _read_u16(data, 70)
+    h.instrument_count = _read_u16(data, 72)
+    h.flags         = _read_u16(data, 74)
+    h.linear_freq   = bool(h.flags & 0x01)
+    h.default_speed = _read_u16(data, 76)
+    h.default_bpm   = _read_u16(data, 78)
+    h.order_list    = list(data[80:80 + 256])
+
+    if h.version not in (0x0102, 0x0103, 0x0104):
+        vprint(f"  warning: unusual XM version 0x{h.version:04X}")
+    if h.channels < 2 or h.channels > 32:
+        vprint(f"  warning: unusual channel count {h.channels}")
+
+    return h
+
+
+# ── Pattern parser ────────────────────────────────────────────────────────────
+
+def parse_patterns(data: bytes, h: XMHeader, patterns_offset: int):
+    """Returns (patterns_rows, next_offset).
+
+    patterns_rows: list of (grid, rows) where grid is a list of `channels`
+    arrays, each `rows` long, of XMRow.
+    """
+    patterns = []
+    off = patterns_offset
+    for pi in range(h.pattern_count):
+        if off + 9 > len(data):
+            sys.exit(f"error: truncated pattern {pi} header at offset {off}")
+        hdr_len    = _read_u32(data, off)
+        # packing_type = data[off + 4]   # always 0
+        rows       = _read_u16(data, off + 5)
+        packed_sz  = _read_u16(data, off + 7)
+        body_off   = off + hdr_len
+        if body_off + packed_sz > len(data):
+            sys.exit(f"error: truncated pattern {pi} body")
+
+        grid = [[XMRow() for _ in range(rows)] for _ in range(h.channels)]
+        if packed_sz == 0:
+            patterns.append((grid, rows))
+            off = body_off + packed_sz
+            continue
+
+        p = body_off
+        end = body_off + packed_sz
+        for r in range(rows):
+            for c in range(h.channels):
+                if p >= end:
+                    break
+                first = data[p]; p += 1
+                cell = grid[c][r]
+                if first & 0x80:
+                    if first & 0x01:
+                        cell.note = data[p]; p += 1
+                    if first & 0x02:
+                        cell.inst = data[p]; p += 1
+                    if first & 0x04:
+                        cell.volcol = data[p]; p += 1
+                    if first & 0x08:
+                        cell.effect = data[p]; p += 1
+                    if first & 0x10:
+                        cell.effect_arg = data[p]; p += 1
+                else:
+                    # Uncompressed — `first` is the note byte; 4 more follow
+                    cell.note       = first
+                    cell.inst       = data[p];     p += 1
+                    cell.volcol     = data[p];     p += 1
+                    cell.effect     = data[p];     p += 1
+                    cell.effect_arg = data[p];     p += 1
+
+        patterns.append((grid, rows))
+        off = body_off + packed_sz
+    return patterns, off
+
+
+# ── Instrument / sample parser ────────────────────────────────────────────────
+
+def parse_instruments(data: bytes, h: XMHeader, off_start: int) -> list:
+    insts = []
+    off = off_start
+    for ii in range(h.instrument_count):
+        if off + 29 > len(data):
+            vprint(f"  warning: truncated instrument {ii} at offset {off}")
+            break
+        hdr_size = _read_u32(data, off)
+        name = data[off + 4:off + 26].rstrip(b'\x00 ').decode('latin-1', errors='replace')
+        # type byte at +26 ignored (almost always 0)
+        n_samples = _read_u16(data, off + 27)
+
+        inst = XMInstrument()
+        inst.name = name
+        inst.sample_count = n_samples
+        inst.keymap = [0] * 96
+        inst.vol_env_pts = []
+        inst.pan_env_pts = []
+        inst.vol_env_count = 0
+        inst.pan_env_count = 0
+        inst.vol_sustain = 0
+        inst.vol_loop_start = 0
+        inst.vol_loop_end = 0
+        inst.pan_sustain = 0
+        inst.pan_loop_start = 0
+        inst.pan_loop_end = 0
+        inst.vol_env_type = 0
+        inst.pan_env_type = 0
+        inst.vib_type = 0
+        inst.vib_sweep = 0
+        inst.vib_depth = 0
+        inst.vib_rate = 0
+        inst.fadeout = 0
+        inst.samples = []
+
+        if n_samples == 0:
+            insts.append(inst)
+            off += hdr_size
+            continue
+
+        # Extended header begins at off + 29 (per LoaderXM.cpp:162)
+        ext = off + 29
+        if ext + 214 > len(data):
+            vprint(f"  warning: truncated extended header for inst {ii}")
+            insts.append(inst)
+            off += hdr_size
+            continue
+
+        sample_hdr_size = _read_u32(data, ext)        # 4 bytes
+        inst.keymap     = list(data[ext + 4:ext + 100])  # 96 bytes
+        # Volume envelope: 12 × (frame:u16, value:u16) = 48 bytes at ext+100
+        for k in range(12):
+            fr  = _read_u16(data, ext + 100 + k * 4)
+            val = _read_u16(data, ext + 100 + k * 4 + 2)
+            inst.vol_env_pts.append((fr, val))
+        # Panning envelope at ext+148
+        for k in range(12):
+            fr  = _read_u16(data, ext + 148 + k * 4)
+            val = _read_u16(data, ext + 148 + k * 4 + 2)
+            inst.pan_env_pts.append((fr, val))
+        inst.vol_env_count  = data[ext + 196]
+        inst.pan_env_count  = data[ext + 197]
+        inst.vol_sustain    = data[ext + 198]
+        inst.vol_loop_start = data[ext + 199]
+        inst.vol_loop_end   = data[ext + 200]
+        inst.pan_sustain    = data[ext + 201]
+        inst.pan_loop_start = data[ext + 202]
+        inst.pan_loop_end   = data[ext + 203]
+        inst.vol_env_type   = data[ext + 204]
+        inst.pan_env_type   = data[ext + 205]
+        inst.vib_type       = data[ext + 206]
+        inst.vib_sweep      = data[ext + 207]
+        inst.vib_depth      = data[ext + 208]
+        inst.vib_rate       = data[ext + 209]
+        inst.fadeout        = _read_u16(data, ext + 210)
+        # 2 reserved bytes at ext+212
+
+        off += hdr_size
+
+        # Sample headers (40 bytes each per xm-form.txt:262-283)
+        sample_hdrs_off = off
+        sample_hdrs = []
+        for si in range(n_samples):
+            sh = sample_hdrs_off + si * sample_hdr_size
+            if sh + 40 > len(data):
+                vprint(f"  warning: truncated sample header inst {ii} sample {si}")
+                break
+            s = XMSample()
+            s.length      = _read_u32(data, sh + 0)
+            s.loop_start  = _read_u32(data, sh + 4)
+            s.loop_length = _read_u32(data, sh + 8)
+            s.volume      = data[sh + 12]
+            s.finetune    = struct.unpack_from('b', data, sh + 13)[0]   # signed
+            s.flags       = data[sh + 14]
+            s.panning     = data[sh + 15]
+            s.rel_note    = struct.unpack_from('b', data, sh + 16)[0]   # signed
+            # reserved byte at +17
+            s.name        = data[sh + 18:sh + 40].rstrip(b'\x00 ').decode('latin-1', errors='replace')
+            s.is_16bit    = bool(s.flags & XM_SMP_16BIT)
+            loop_type     = s.flags & XM_SMP_LOOP_MASK
+            s.pingpong    = (loop_type == XM_SMP_LOOP_PINGPONG)
+            s.sample_data = b''
+            sample_hdrs.append(s)
+        off = sample_hdrs_off + n_samples * sample_hdr_size
+
+        # Sample data follows immediately after all sample headers
+        for s in sample_hdrs:
+            if s.length == 0:
+                continue
+            raw = data[off:off + s.length]
+            off += s.length
+            # Integrate delta encoding
+            if s.is_16bit:
+                pcm = bytearray(s.length)
+                last = 0
+                for i in range(0, s.length, 2):
+                    if i + 2 > s.length:
+                        break
+                    delta = struct.unpack_from('<h', raw, i)[0]
+                    last = (last + delta) & 0xFFFF
+                    if last >= 0x8000:
+                        signed = last - 0x10000
+                    else:
+                        signed = last
+                    struct.pack_into('<h', pcm, i, signed)
+                # Update length / loop fields to be in sample units (not byte units)
+                s.length      //= 2
+                s.loop_start  //= 2
+                s.loop_length //= 2
+                s.sample_data = bytes(pcm)
+            else:
+                pcm = bytearray(s.length)
+                last = 0
+                for i in range(s.length):
+                    delta = raw[i]
+                    if delta >= 0x80:
+                        delta -= 0x100
+                    last = (last + delta) & 0xFF
+                    pcm[i] = last  # signed-stored, will be flipped by normalise_sample
+                s.sample_data = bytes(pcm)
+
+            # Normalise to unsigned 8-bit mono
+            s.sample_data = normalise_sample(
+                s.sample_data, signed=True, is_16bit=s.is_16bit,
+                is_stereo=False, name=s.name or '<unnamed>'
+            )
+            # length is now in 8-bit mono samples
+            s.length = len(s.sample_data)
+            s.loop_start  = min(s.loop_start, s.length)
+            s.loop_length = max(0, min(s.loop_length, s.length - s.loop_start))
+
+        inst.samples = sample_hdrs
+        insts.append(inst)
+
+    return insts, off
+
+
+# ── Note / volume column / effect translation ────────────────────────────────
+
+def encode_note_xm(xm_note: int) -> int:
+    """XM raw note (1..96) → Taud 4096-TET pitch.
+
+    XM note 1 = C-0; note 49 = C-4 (matches Taud TAUD_C4 anchor).
+    """
+    if xm_note == XM_NOTE_OFF:
+        return NOTE_KEYOFF
+    if 1 <= xm_note <= 96:
+        semis = xm_note - XM_RELNOTE_C4
+        val = round(TAUD_C4 + semis * 4096 / 12)
+        return max(1, min(0xFFFD, val))
+    return NOTE_NOP
+
+
+def decode_volcol_xm(vc: int):
+    """Decode XM volume column byte.
+
+    Returns (vol_sel, vol_value, pan_set, aux_effect):
+      vol_sel/vol_value : Taud volume column override (or SEL_FINE/0)
+      pan_set           : 0..63 pan-column override, or None
+      aux_effect        : (Taud op, arg) folded into main effect slot if
+                          unoccupied, dropped otherwise
+
+    XM vol-col byte ranges (xm-form.txt:958-1030):
+      0x10..0x50  Set volume value-0x10 (0..64)
+      0x60..0x6F  Volume slide down (nybble = speed)
+      0x70..0x7F  Volume slide up
+      0x80..0x8F  Fine volume slide down
+      0x90..0x9F  Fine volume slide up
+      0xA0..0xAF  Set vibrato speed (nybble)
+      0xB0..0xBF  Vibrato with depth (nybble)
+      0xC0..0xCF  Set panning (nybble × 17)
+      0xD0..0xDF  Panning slide left
+      0xE0..0xEF  Panning slide right
+      0xF0..0xFF  Tone portamento (nybble × 16)
+    """
+    if vc == 0:
+        return SEL_FINE, 0, None, None
+    if 0x10 <= vc <= 0x50:
+        # Set volume 0..64 → 0..63 (clamp)
+        return SEL_SET, min(vc - 0x10, 0x3F), None, None
+    nybble = vc & 0xF
+    if 0x60 <= vc <= 0x6F:
+        return SEL_DOWN, nybble, None, None
+    if 0x70 <= vc <= 0x7F:
+        return SEL_UP, nybble, None, None
+    if 0x80 <= vc <= 0x8F:
+        # Fine slide down: dir bit 0 = down; magnitude in low 5 bits.
+        return SEL_FINE, (nybble & 0x1F), None, None
+    if 0x90 <= vc <= 0x9F:
+        # Fine slide up: dir bit 5 set.
+        return SEL_FINE, (nybble & 0x1F) | 0x20, None, None
+    if 0xA0 <= vc <= 0xAF:
+        # Set vibrato speed → fold as TOP_H with speed in high byte.
+        return SEL_FINE, 0, None, (TOP_H, (nybble * 0x11) << 8)
+    if 0xB0 <= vc <= 0xBF:
+        # Vibrato with depth → TOP_H with depth in low byte.
+        return SEL_FINE, 0, None, (TOP_H, nybble * 0x11)
+    if 0xC0 <= vc <= 0xCF:
+        # Set panning: nybble × 17 = 0..255; convert to 6-bit.
+        pan8 = nybble * 17
+        pan6 = min(0x3F, round(pan8 * 63 / 255))
+        return SEL_FINE, 0, pan6, None
+    if 0xD0 <= vc <= 0xDF:
+        # Pan slide left: SEL_DOWN on pan column.
+        return SEL_FINE, 0, None, None  # consumed via pan_override below
+    if 0xE0 <= vc <= 0xEF:
+        return SEL_FINE, 0, None, None
+    if 0xF0 <= vc <= 0xFF:
+        # Tone portamento: nybble × 16 → TOP_G argument in linear units.
+        spd_period = nybble * 16
+        return SEL_FINE, 0, None, (TOP_G, round(spd_period * 64 / 3) & 0xFFFF)
+    return SEL_FINE, 0, None, None
+
+
+def _xm_volcol_pan_override(vc: int):
+    """Returns (pan_sel, pan_value) for vol-col D/E pan slides, or None."""
+    if 0xD0 <= vc <= 0xDF:
+        return (SEL_DOWN, vc & 0xF)    # left
+    if 0xE0 <= vc <= 0xEF:
+        return (SEL_UP, vc & 0xF)      # right
+    return None
+
+
+def encode_effect_xm(cmd: int, arg: int, ch: int = 0, row: int = 0,
+                     amiga_mode: bool = False) -> tuple:
+    """Map an XM effect (cmd, arg) → (taud_op, taud_arg16, vol_override, pan_override).
+
+    XM effect numbers per XModule.cpp:1303 / xm-form.txt:690-743.
+    """
+    # 0 with arg=0 = true no-op; 0 with arg!=0 = arpeggio.
+    if cmd == 0x00:
+        if arg == 0:
+            return (TOP_NONE, 0, None, None)
+        hi = (arg >> 4) & 0xF
+        lo = arg & 0xF
+        return (TOP_J, (J_SEMI_TABLE[hi] << 8) | J_SEMI_TABLE[lo], None, None)
+
+    if cmd == 0x01:
+        # Porta up: arg in period units (Amiga) or 4096-TET-equivalent.
+        if amiga_mode:
+            return (TOP_F, arg & 0xFFFF, None, None)
+        return (TOP_F, round(arg * 64 / 3) & 0xFFFF, None, None)
+
+    if cmd == 0x02:
+        if amiga_mode:
+            return (TOP_E, arg & 0xFFFF, None, None)
+        return (TOP_E, round(arg * 64 / 3) & 0xFFFF, None, None)
+
+    if cmd == 0x03:
+        # Tone portamento: always linear regardless of mode.
+        return (TOP_G, round(arg * 64 / 3) & 0xFFFF, None, None)
+
+    if cmd == 0x04:
+        hi = (arg >> 4) & 0xF
+        lo = arg & 0xF
+        return (TOP_H, ((hi * 0x11) << 8) | (lo * 0x11), None, None)
+
+    if cmd == 0x05:
+        # Tone porta + vol slide → Taud L (G + d_arg vol slide override).
+        return (TOP_G, 0x0000, d_arg_to_col(arg), None)
+
+    if cmd == 0x06:
+        # Vibrato + vol slide → Taud K (H + d_arg vol slide override).
+        return (TOP_H, 0x0000, d_arg_to_col(arg), None)
+
+    if cmd == 0x07:
+        hi = (arg >> 4) & 0xF
+        lo = arg & 0xF
+        return (TOP_R, ((hi * 0x11) << 8) | (lo * 0x11), None, None)
+
+    if cmd == 0x08:
+        # Set panning 0..255 → Taud pan column 0..63.
+        pan6 = min(0x3F, round((arg & 0xFF) * 63 / 255))
+        return (TOP_NONE, 0, None, (SEL_SET, pan6))
+
+    if cmd == 0x09:
+        return (TOP_O, (arg & 0xFF) << 8, None, None)
+
+    if cmd == 0x0A:
+        # Volume slide: high nybble = up, low nybble = down. Taud TOP_D
+        # uses the same nybble-pair layout in the high byte.
+        return (TOP_D, (arg & 0xFF) << 8, None, None)
+
+    if cmd == 0x0B:
+        # Position jump — order index translated to Taud cue at remap time.
+        return (TOP_B, arg & 0xFF, None, None)
+
+    if cmd == 0x0C:
+        # Set volume 0..64 → vol column SEL_SET.
+        return (TOP_NONE, 0, (SEL_SET, min(arg, 0x3F)), None)
+
+    if cmd == 0x0D:
+        # Pattern break: XM stores BCD row number.
+        hi = (arg >> 4) & 0xF
+        lo = arg & 0xF
+        row_num = (hi * 10 + lo) & 0xFF
+        if row_num >= PATTERN_ROWS:
+            row_num = 0
+        return (TOP_C, row_num & 0xFF, None, None)
+
+    if cmd == 0x0E:
+        # Extended commands E0x..EFx — fold into Taud TOP_S sub-codes
+        # where possible.
+        sub = (arg >> 4) & 0xF
+        val = arg & 0xF
+        # Fine porta up E1x / down E2x:
+        if sub == 0x1:
+            # Fine porta up: TOP_F with $Fx layout (engine treats this as fine).
+            if amiga_mode:
+                return (TOP_F, 0xF000 | (val & 0xFFF), None, None)
+            return (TOP_F, 0xF000 | (round(val * 16 / 3) & 0xFFF), None, None)
+        if sub == 0x2:
+            if amiga_mode:
+                return (TOP_E, 0xF000 | (val & 0xFFF), None, None)
+            return (TOP_E, 0xF000 | (round(val * 16 / 3) & 0xFFF), None, None)
+        # E3x glissando control / E4x vibrato wave / E5x finetune /
+        # E7x tremolo wave / E9x retrigger / EAx fine vol up / EBx fine
+        # vol down / ECx note cut / EDx note delay / EEx pattern delay.
+        if sub in (0x3, 0x4, 0x7, 0xC, 0xD, 0xE):
+            return (TOP_S, (sub << 12) | (val << 8), None, None)
+        if sub == 0x5:
+            # Set finetune — convert to S5x sub-effect (4-bit signed nibble).
+            return (TOP_S, 0x5000 | (val << 8), None, None)
+        if sub == 0x6:
+            # Set loop point / loop. Taud S6x = fine pattern delay; the
+            # closest analogue here is dropping with a warn if val>0.
+            vprint(f"    dropped E6{val:X} (set loop) at ch{ch} row{row}")
+            return (TOP_NONE, 0, None, None)
+        if sub == 0x8:
+            # Pan position 0..15 → set pan column (XM nybble × 17 → 8-bit).
+            pan8 = (val << 4) | val
+            pan6 = min(0x3F, round(pan8 * 63 / 255))
+            return (TOP_NONE, 0, None, (SEL_SET, pan6))
+        if sub == 0x9:
+            # Retrig with vol 0 → multi-retrig speed; map to TOP_Q.
+            return (TOP_Q, (val & 0xF) << 8, None, None)
+        if sub == 0xA:
+            # Fine vol up: vol col fine slide
+            return (TOP_NONE, 0, (SEL_FINE, (val & 0x1F) | 0x20), None)
+        if sub == 0xB:
+            # Fine vol down
+            return (TOP_NONE, 0, (SEL_FINE, val & 0x1F), None)
+        if sub == 0xF:
+            # E$Fx in XM is unused (or "Funk repeat" in old PT) — drop.
+            vprint(f"    dropped EF{val:X} (unused / funk) at ch{ch} row{row}")
+            return (TOP_NONE, 0, None, None)
+        return (TOP_NONE, 0, None, None)
+
+    if cmd == 0x0F:
+        # Set speed if arg < 0x20, else set tempo (BPM).
+        if arg == 0:
+            return (TOP_NONE, 0, None, None)
+        if arg < 0x20:
+            return (TOP_A, (arg & 0xFF) << 8, None, None)
+        # Tempo: Taud T uses bias of -24 in stored form; mirror it2taud:
+        return (TOP_T, ((arg - 0x18) & 0xFF) << 8, None, None)
+
+    if cmd == 0x10:
+        # Set global volume 0..64 → Taud V (×4 to fit 0..255).
+        taud_v = min(arg * 4, 0xFF)
+        return (TOP_V, (taud_v & 0xFF) << 8, None, None)
+
+    if cmd == 0x11:
+        # Global volume slide: high nyb up, low nyb down → TOP_W.
+        return (TOP_W, (arg & 0xFF) << 8, None, None)
+
+    if cmd == 0x14:
+        # Key off (delayed): map to a note-off via SDx-like delay sub-effect.
+        # Taud doesn't have a direct delayed-key-off, so issue a key-off note
+        # immediately (loses delay parameter — most XMs use Kxx with arg=0).
+        if arg > 0:
+            vprint(f"    K{arg:02X} delay parameter lost at ch{ch} row{row}")
+        return (TOP_NONE, 0, None, None)   # caller forces note=NOTE_KEYOFF
+
+    if cmd == 0x15:
+        vprint(f"    dropped L{arg:02X} (set envelope position) at ch{ch} row{row}")
+        return (TOP_NONE, 0, None, None)
+
+    if cmd == 0x19:
+        # Pan slide → TOP_S not appropriate; use pan-column slide via
+        # d_arg_to_col interpreted as pan.
+        return (TOP_NONE, 0, None, d_arg_to_col(arg))
+
+    if cmd == 0x1B:
+        # Multi retrig with volume change → TOP_Q.
+        return (TOP_Q, (arg & 0xFF) << 8, None, None)
+
+    if cmd == 0x1D:
+        # Tremor → TOP_I.
+        return (TOP_I, (arg & 0xFF) << 8, None, None)
+
+    if cmd == 0x21:
+        # Extra-fine porta X1x / X2x.
+        sub = (arg >> 4) & 0xF
+        val = arg & 0xF
+        if sub == 1:
+            if amiga_mode:
+                return (TOP_F, 0xE000 | (val & 0xFFF), None, None)
+            return (TOP_F, 0xE000 | (round(val * 4 / 3) & 0xFFF), None, None)
+        if sub == 2:
+            if amiga_mode:
+                return (TOP_E, 0xE000 | (val & 0xFFF), None, None)
+            return (TOP_E, 0xE000 | (round(val * 4 / 3) & 0xFFF), None, None)
+        return (TOP_NONE, 0, None, None)
+
+    return (TOP_NONE, 0, None, None)
+
+
+# ── Pattern splitting (XM-specific; mirrors it2taud's $02xx policy) ──────────
+
+def split_patterns_xm(patterns: list):
+    """Returns (chunks, chunk_map, chunk_lens) as in it2taud.split_patterns."""
+    chunks     = []
+    chunk_map  = []
+    chunk_lens = []
+
+    for pi, (grid, rows) in enumerate(patterns):
+        if rows == 0:
+            chunk_map.append([])
+            continue
+        n_chunks = (rows + PATTERN_ROWS - 1) // PATTERN_ROWS
+        if n_chunks > 1:
+            vprint(f"  pattern {pi}: {rows} rows → {n_chunks} chunks")
+        pat_chunks = []
+        for k in range(n_chunks):
+            r0 = k * PATTERN_ROWS
+            r1 = min(r0 + PATTERN_ROWS, rows)
+            chunk_len = r1 - r0
+            chunk_grid = []
+            for ch in range(len(grid)):
+                ch_rows = []
+                src = grid[ch]
+                for ri in range(PATTERN_ROWS):
+                    sr = r0 + ri
+                    if sr < r1 and sr < len(src):
+                        ch_rows.append(src[sr])
+                    else:
+                        ch_rows.append(XMRow())
+                chunk_grid.append(ch_rows)
+            idx = len(chunks)
+            chunks.append(chunk_grid)
+            chunk_lens.append(chunk_len)
+            pat_chunks.append(idx)
+        chunk_map.append(pat_chunks)
+    return chunks, chunk_map, chunk_lens
+
+
+def remap_b_effects_xm(chunks: list, chunk_map: list,
+                       order_list: list, xm_ord_to_taud_cue: dict,
+                       num_channels: int) -> None:
+    """Rewrite XM B (position jump) effects so the argument indexes Taud cues
+    rather than XM order positions. (Pattern break Dxx already targets a row,
+    no remap needed — the post-break behaviour is "advance to next order",
+    which Taud emulates correctly when the cue ends.)"""
+    for chunk_grid in chunks:
+        for ch in range(min(num_channels, len(chunk_grid))):
+            for row in chunk_grid[ch]:
+                if row.effect == 0x0B:
+                    xm_ord = row.effect_arg & 0xFF
+                    taud_cue = xm_ord_to_taud_cue.get(xm_ord, xm_ord)
+                    row.effect_arg = taud_cue & 0xFF
+
+
+# ── Sample / instrument bin ───────────────────────────────────────────────────
+
+class _XMSampleProxy:
+    """Adapter object passed to the inst-bin builder. One per
+    (xm_instrument, sample-in-instrument) pair. Envelopes / fadeout /
+    NNA / vibrato are filled from the parent XM instrument."""
+    __slots__ = ('name', 'length', 'loop_begin', 'loop_end', 'volume',
+                 'finetune', 'rel_note', 'panning', 'pingpong',
+                 'sample_data', 'c2spd', 'flags',
+                 'fadeout', 'vib_speed', 'vib_depth', 'vib_sweep', 'vib_wave',
+                 'vol_env_pts', 'vol_env_loop_word', 'vol_env_sus_word',
+                 'pan_env_pts', 'pan_env_loop_word', 'pan_env_sus_word',
+                 'has_pan_env', 'nna')
+
+
+def _xm_envelope_to_taud(env_pts: list, num_pts: int, env_type: int,
+                         sustain: int, loop_start: int, loop_end: int,
+                         kind: str, ticks_per_sec: float) -> tuple:
+    """Translate one XM envelope (frame, value) list → 25 (val, mf) Taud
+    points + LOOP word + SUSTAIN word.
+
+    Returns (points, loop_word, sus_word).
+
+    XM envelope value ranges:
+      'vol' — 0..64  → Taud 0..63
+      'pan' — 0..64  → Taud 0..255 (32 = centre → 0x80)
+
+    XM single-point sustain becomes the SUSTAIN word with start == end.
+    XM envelope loop becomes the LOOP word. The two are independent in XM
+    and remain independent in Taud (matches FT2 + IT semantics described
+    in terranmon.txt:2049+). Returns (None, 0, 0) when the envelope is
+    disabled (XM_ENV_ON not set).
+    """
+    if not (env_type & XM_ENV_ON) or num_pts < 1:
+        return None, 0, 0
+    nodes = env_pts[:max(1, min(num_pts, 12))]
+
+    has_sus  = bool(env_type & XM_ENV_SUSTAIN) and 0 <= sustain < len(nodes)
+    has_loop = (bool(env_type & XM_ENV_LOOP)
+                and 0 <= loop_start < len(nodes)
+                and loop_start <= loop_end < len(nodes))
+
+    def _to_taud_val(xm_val: int) -> int:
+        v = max(0, min(64, xm_val))
+        if kind == 'vol':
+            return min(63, round(v * 63 / 64))
+        return min(255, max(0, round(v * 255 / 64)))
+
+    pad_value = (63 if kind == 'vol' else 0x80)
+
+    points = []
+    for k in range(25):
+        if k < len(nodes):
+            frame, val = nodes[k]
+            taud_val = _to_taud_val(val)
+            if k < len(nodes) - 1:
+                next_frame, _ = nodes[k + 1]
+                delta_sec = max(0.0, (next_frame - frame) / ticks_per_sec)
+                mf_idx    = _nearest_minifloat(delta_sec)
+            else:
+                mf_idx = 0
+        else:
+            taud_val = points[-1][0] if points else pad_value
+            mf_idx   = 0
+        points.append((taud_val, mf_idx))
+
+    # LOOP word (offsets 15/17/19): b=enable, bits 12..8=start, 4..0=end.
+    # SUSTAIN word (offsets 189/191/193): same bit layout; FT2 single-point
+    # sustain is encoded with start == end (engine wraps that index → itself).
+    loop_word = 0x0020   # b: use envelope (vol always; even with no loop the engine evaluates it)
+    if has_loop:
+        loop_word |= (loop_start & 0x1F) << 8
+        loop_word |= (loop_end   & 0x1F)
+    else:
+        # Disable LOOP wrap — leave start/end zero so the engine treats it as
+        # "no loop". The b bit still keeps the envelope active.
+        pass
+
+    sus_word = 0
+    if has_sus:
+        sus_word |= 0x0020             # b: enable SUSTAIN
+        sus_word |= (sustain & 0x1F) << 8
+        sus_word |= (sustain & 0x1F)   # FT2 single-point: start == end
+
+    return points, loop_word, sus_word
+
+
+def _xm_sample_to_proxy(inst: XMInstrument, samp: XMSample,
+                        ticks_per_sec: float) -> _XMSampleProxy:
+    p = _XMSampleProxy()
+    p.name        = samp.name
+    p.length      = samp.length
+    p.loop_begin  = samp.loop_start
+    p.loop_end    = samp.loop_start + samp.loop_length
+    p.volume      = min(samp.volume, 64)   # XM 0..64
+    p.finetune    = samp.finetune          # signed -128..+127
+    p.rel_note    = samp.rel_note          # signed semitones
+    p.panning     = samp.panning           # 0..255
+    p.pingpong    = samp.pingpong
+    p.sample_data = samp.sample_data
+    # c2spd: XM uses a per-sample finetune (1/128 semitone units) plus a
+    # rel_note offset. We bake both into c2spd so the engine plays the
+    # XM "C-4 row" at the correct audible pitch when the Taud note is
+    # also C-4.
+    semis = samp.rel_note + samp.finetune / 128.0
+    p.c2spd       = max(1, round(8363.0 * (2.0 ** (semis / 12.0))))
+    loop_type     = samp.flags & XM_SMP_LOOP_MASK
+    p.flags       = 1 if loop_type != 0 else 0   # 1=loop on, 0=off
+    # Fadeout: XM stores 0..4095 (FT2 file format); 0 means "no fadeout"
+    # in FT2 — matches Taud's fadeStep semantics where 0 = held forever.
+    p.fadeout     = min(0xFFF, inst.fadeout & 0xFFFF)
+    p.vib_speed   = inst.vib_rate          # XM rate ↔ Taud "speed"
+    p.vib_depth   = (inst.vib_depth * 2) & 0xFF  # LoaderXM.cpp:217 scaling
+    p.vib_sweep   = inst.vib_sweep & 0xFF
+    p.vib_wave    = inst.vib_type & 0x07
+
+    # Envelopes (volume + panning).
+    p.vol_env_pts, p.vol_env_loop_word, p.vol_env_sus_word = _xm_envelope_to_taud(
+        inst.vol_env_pts, inst.vol_env_count, inst.vol_env_type,
+        inst.vol_sustain, inst.vol_loop_start, inst.vol_loop_end,
+        kind='vol', ticks_per_sec=ticks_per_sec)
+    p.pan_env_pts, p.pan_env_loop_word, p.pan_env_sus_word = _xm_envelope_to_taud(
+        inst.pan_env_pts, inst.pan_env_count, inst.pan_env_type,
+        inst.pan_sustain, inst.pan_loop_start, inst.pan_loop_end,
+        kind='pan', ticks_per_sec=ticks_per_sec)
+    p.has_pan_env = p.pan_env_pts is not None
+
+    # XM has no NNA: every new note unconditionally retriggers the
+    # channel, completely replacing whatever was playing. Use Taud
+    # NNA=1 (cut) to suppress the engine's NNA-ghosting path entirely,
+    # otherwise the previous voice keeps running in the background pool
+    # while the new note plays — IT semantics, not FT2.
+    p.nna = 1
+    return p
+
+
+def build_sample_inst_bin_xm(proxies: list) -> tuple:
+    """proxies: list (1-indexed; slot 0 unused) of _XMSampleProxy | None.
+
+    Returns (sampleinst_bin, offsets_dict, ratio).
+    """
+    pcm_list = [(i, s) for i, s in enumerate(proxies)
+                if s is not None and s.sample_data]
+
+    total = sum(len(s.sample_data) for _, s in pcm_list)
+    ratio = 1.0
+    if total > SAMPLEBIN_SIZE:
+        ratio = SAMPLEBIN_SIZE / total
+        vprint(f"  info: sample bin overflow ({total} bytes); "
+               f"resampling all by {ratio:.4f}")
+        for _, s in pcm_list:
+            new_data = resample_linear(s.sample_data, ratio)
+            s.sample_data = new_data
+            s.length      = len(new_data)
+            s.loop_begin  = max(0, int(s.loop_begin * ratio))
+            s.loop_end    = max(0, min(int(s.loop_end * ratio), s.length))
+            s.c2spd       = max(1, int(s.c2spd * ratio))
+
+    sample_bin = bytearray(SAMPLEBIN_SIZE)
+    offsets = {}
+    pos = 0
+    for idx, s in pcm_list:
+        n = min(len(s.sample_data), SAMPLEBIN_SIZE - pos)
+        if n <= 0:
+            vprint(f"  warning: sample bin full, dropping '{s.name}'")
+            offsets[idx] = 0
+            s.length = 0
+            continue
+        sample_bin[pos:pos + n] = s.sample_data[:n]
+        offsets[idx] = pos
+        if n < len(s.sample_data):
+            vprint(f"  warning: '{s.name}' truncated {len(s.sample_data)} → {n}")
+            s.length = n
+            s.loop_end = min(s.loop_end, n)
+        pos += n
+
+    USE_ENV_BIT = 0x0020   # b: engine should evaluate the envelope
+    INST_STRIDE = 256
+
+    def _write_env(buf: bytearray, base: int, env_pts, pad_value: int) -> None:
+        """Write 25 (value, minifloat) pairs. Pads with the previous value
+        (or pad_value) and offset=0 if shorter than 25."""
+        for k in range(25):
+            if env_pts and k < len(env_pts):
+                val, mf = env_pts[k]
+            else:
+                val = (env_pts[-1][0] if env_pts else pad_value)
+                mf  = 0
+            buf[base + k * 2]     = val & 0xFF
+            buf[base + k * 2 + 1] = mf  & 0xFF
+
+    inst_bin = bytearray(INSTBIN_SIZE)
+    for i, s in enumerate(proxies):
+        if i == 0 or i >= 256 or s is None or not s.sample_data:
+            continue
+        ptr   = offsets.get(i, 0) & 0xFFFFFFFF
+        s_len = min(s.length, 65535)
+        c2spd = min(s.c2spd, 65535)
+        ls    = min(s.loop_begin, 65535)
+        le    = min(s.loop_end,   65535)
+        loop_mode = 0
+        if s.flags & 1:
+            loop_mode = 2 if s.pingpong else 1
+        flags_byte = loop_mode & 0x3
+
+        # Resolve envelope LOOP / SUSTAIN words from the proxy. When XM has no
+        # envelope, fall back to a single-point unit envelope (vol LOOP word
+        # b=1 only) and rely on IGV for level.
+        if s.vol_env_pts is not None:
+            vol_env_loop = s.vol_env_loop_word
+            vol_env_sus  = s.vol_env_sus_word
+            vol_env      = s.vol_env_pts
+        else:
+            vol_env_loop = USE_ENV_BIT
+            vol_env_sus  = 0
+            vol_env      = None
+        if s.pan_env_pts is not None:
+            pan_env_loop = s.pan_env_loop_word
+            pan_env_sus  = s.pan_env_sus_word
+            pan_env      = s.pan_env_pts
+        else:
+            pan_env_loop = 0
+            pan_env_sus  = 0
+            pan_env      = None
+
+        base = i * INST_STRIDE
+        struct.pack_into('<I', inst_bin, base + 0,  ptr)
+        struct.pack_into('<H', inst_bin, base + 4,  s_len)
+        struct.pack_into('<H', inst_bin, base + 6,  c2spd)
+        struct.pack_into('<H', inst_bin, base + 8,  0)         # play start
+        struct.pack_into('<H', inst_bin, base + 10, ls)
+        struct.pack_into('<H', inst_bin, base + 12, le)
+        inst_bin[base + 14] = flags_byte
+        # LOOP words at 15/17/19.
+        struct.pack_into('<H', inst_bin, base + 15, vol_env_loop & 0xFFFF)
+        struct.pack_into('<H', inst_bin, base + 17, pan_env_loop & 0xFFFF)
+        struct.pack_into('<H', inst_bin, base + 19, 0)         # pf envelope: off
+
+        if vol_env:
+            _write_env(inst_bin, base + 21, vol_env, pad_value=63)
+        else:
+            inst_bin[base + 21] = 63
+            inst_bin[base + 22] = 0
+
+        if pan_env:
+            _write_env(inst_bin, base + 71, pan_env, pad_value=0x80)
+        else:
+            for k in range(25):
+                inst_bin[base + 71 + k * 2]     = 0x80
+                inst_bin[base + 71 + k * 2 + 1] = 0x00
+
+        # pf envelope (pitch/filter): unused — keep at unity centre.
+        for k in range(25):
+            inst_bin[base + 121 + k * 2]     = 0x80
+            inst_bin[base + 121 + k * 2 + 1] = 0x00
+
+        # IGV: XM volume 0..64 → 0..255
+        inst_bin[base + 171] = min(0xFF, round(s.volume * 255 / 64))
+        # Fadeout: 12-bit. Low 8 bits at +172, high 4 bits at +173.
+        inst_bin[base + 172] = s.fadeout & 0xFF
+        inst_bin[base + 173] = (s.fadeout >> 8) & 0x0F
+        # Default pan (XM sample panning 0..255 → Taud direct 0..255)
+        inst_bin[base + 177] = s.panning & 0xFF
+        # Filter cutoff/resonance: XM has no filters → off.
+        inst_bin[base + 182] = 0xFF
+        inst_bin[base + 183] = 0xFF
+        # Auto-vibrato (XM instrument-level)
+        inst_bin[base + 175] = s.vib_speed & 0xFF
+        inst_bin[base + 176] = s.vib_sweep & 0xFF
+        inst_bin[base + 187] = s.vib_depth & 0xFF
+        inst_bin[base + 188] = s.vib_speed & 0xFF
+        # Inst flag byte: 0bb wwwnn — wwww=vib waveform, nn=NNA
+        inst_bin[base + 186] = ((s.vib_wave & 0x07) << 2) | (s.nna & 0x03)
+        # SUSTAIN words at 189/191/193.
+        struct.pack_into('<H', inst_bin, base + 189, vol_env_sus & 0xFFFF)
+        struct.pack_into('<H', inst_bin, base + 191, pan_env_sus & 0xFFFF)
+        struct.pack_into('<H', inst_bin, base + 193, 0)        # pf sustain: off
+        # Byte 195 (DCT/DCA) — XM has no NNA / duplicate-check, leave 0.
+
+        env_tag = ''
+        if vol_env: env_tag += 'V'
+        if pan_env: env_tag += 'P'
+        vprint(f"  instrument[{i}] '{s.name}' ptr={ptr} c2spd={s.c2spd} "
+               f"vol={s.volume} loop=({ls},{le},{'on' if loop_mode else 'off'}) "
+               f"fade={s.fadeout} nna={s.nna} env=[{env_tag or '-'}]")
+
+    return bytes(sample_bin) + bytes(inst_bin), offsets, ratio
+
+
+# ── Pattern bin builder ───────────────────────────────────────────────────────
+
+def build_pattern_xm(chunk_grid: list, ch_idx: int, default_pan: int,
+                     inst_to_taud_slot: dict, amiga_mode: bool = False) -> bytes:
+    """Render one Taud channel's 512-byte pattern from a 64-row chunk grid."""
+    out = bytearray(PATTERN_BYTES)
+    if ch_idx >= len(chunk_grid):
+        rows = [XMRow()] * PATTERN_ROWS
+    else:
+        rows = chunk_grid[ch_idx]
+
+    for r, cell in enumerate(rows[:PATTERN_ROWS]):
+        # ── Volume column → vol/pan/aux-effect overrides ────────────────────
+        vs, vv, pan_from_vc, aux_eff = decode_volcol_xm(cell.volcol)
+        # Pan slide via vol-col D/E (encoded as pan_override below)
+        vc_pan_override = _xm_volcol_pan_override(cell.volcol)
+
+        # ── Main effect translation ─────────────────────────────────────────
+        op, arg16, vol_override, pan_override = encode_effect_xm(
+            cell.effect, cell.effect_arg, ch_idx, r, amiga_mode=amiga_mode)
+
+        # XM K00 (0x14) = key off — force note to NOTE_KEYOFF
+        if cell.effect == 0x14:
+            cell.note = XM_NOTE_OFF
+
+        # Fold vol-col aux into main slot if free
+        if aux_eff is not None:
+            if op == TOP_NONE:
+                op, arg16 = aux_eff
+                aux_eff = None
+            else:
+                vprint(f"    ch{ch_idx} row{r}: dropped vol-col aux effect "
+                       f"(main effect slot occupied)")
+
+        # ── Note ────────────────────────────────────────────────────────────
+        note_taud = NOTE_NOP
+        if cell.note > 0:
+            note_taud = encode_note_xm(cell.note)
+
+        # XM cell.inst==0 means "no instrument change" — preserve verbatim
+        # so the engine retriggers whatever sample slot is currently loaded.
+        # When cell.inst > 0, look up the Taud slot via the keymap (using
+        # the row's own note if present, else the first sample of the
+        # instrument).
+        if cell.inst > 0:
+            note_for_lookup = cell.note if cell.note > 0 else None
+            taud_slot = inst_to_taud_slot(cell.inst, note_for_lookup) or 0
+        else:
+            taud_slot = 0
+
+        note_triggers = (1 <= cell.note <= 96)
+
+        # ── Volume column resolution ────────────────────────────────────────
+        if vs != SEL_FINE or vv != 0:
+            vol_sel, vol_value = vs, vv
+        elif vol_override is not None:
+            vol_sel, vol_value = vol_override
+        else:
+            vol_sel, vol_value = SEL_FINE, 0
+
+        # ── Pan column resolution ───────────────────────────────────────────
+        if pan_from_vc is not None:
+            pan_sel, pan_value = SEL_SET, pan_from_vc
+        elif vc_pan_override is not None:
+            pan_sel, pan_value = vc_pan_override
+        elif pan_override is not None:
+            pan_sel, pan_value = pan_override
+        elif r == 0:
+            pan_sel, pan_value = SEL_SET, default_pan & 0x3F
+        else:
+            pan_sel, pan_value = SEL_FINE, 0
+
+        vol_byte = (vol_value & 0x3F) | ((vol_sel & 0x3) << 6)
+        pan_byte = (pan_value & 0x3F) | ((pan_sel & 0x3) << 6)
+
+        base = r * 8
+        struct.pack_into('<H', out, base + 0, note_taud)
+        out[base + 2] = taud_slot & 0xFF
+        out[base + 3] = vol_byte
+        out[base + 4] = pan_byte
+        out[base + 5] = op & 0xFF
+        struct.pack_into('<H', out, base + 6, arg16 & 0xFFFF)
+
+    return bytes(out)
+
+
+# ── Channel selection ─────────────────────────────────────────────────────────
+
+def _active_channels_xm(h: XMHeader, patterns: list) -> list:
+    in_use = set()
+    for grid, _rows in patterns:
+        for ch in range(len(grid)):
+            for cell in grid[ch]:
+                if cell.note != 0 or cell.inst != 0 or cell.effect != 0 or cell.volcol != 0:
+                    in_use.add(ch)
+                    break
+    active = sorted(in_use)
+    if len(active) > NUM_VOICES:
+        vprint(f"  warning: {len(active)} active channels; capping at {NUM_VOICES}")
+        active = active[:NUM_VOICES]
+    return active
+
+
+# ── Main assembly ─────────────────────────────────────────────────────────────
+
+def assemble_taud(h: XMHeader, patterns: list, instruments: list) -> bytes:
+    # XM envelope frames advance once per row tick. Tick rate is derived
+    # from BPM the same way ProTracker derives it: ticks_per_sec = BPM × 2/5
+    # (matches MilkyTracker's tick clock and it2taud's ticks_per_sec).
+    tempo_for_envs = max(24, min(280, h.default_bpm if h.default_bpm > 0 else 125))
+    ticks_per_sec  = max(1.0, tempo_for_envs * 2.0 / 5.0)
+
+    # ── Build XM-instrument → list of Taud slot proxies ─────────────────────
+    # One Taud slot per (xm_inst, sample-in-inst). Slot 0 unused.
+    proxies = [None]
+    inst_to_slots = {}      # xm_inst (1-based) → list of taud slots, one per sample index
+    for ii, inst in enumerate(instruments, start=1):
+        if not inst.samples:
+            inst_to_slots[ii] = []
+            continue
+        slots = []
+        for samp in inst.samples:
+            if not samp.sample_data:
+                slots.append(0)
+                continue
+            taud_slot = len(proxies)
+            if taud_slot >= 256:
+                vprint(f"  warning: >255 sample slots; clipping at instrument {ii}")
+                slots.append(0)
+                continue
+            proxies.append(_xm_sample_to_proxy(inst, samp, ticks_per_sec))
+            slots.append(taud_slot)
+        inst_to_slots[ii] = slots
+
+    # Closure resolving (xm_inst, note) → taud slot via per-instrument keymap.
+    def resolve_inst_slot(xm_inst: int, note: int):
+        slots = inst_to_slots.get(xm_inst, [])
+        if not slots:
+            return None
+        if note is None or note < 1 or note > 96:
+            # No note context; fall back to first sample of the instrument.
+            for s in slots:
+                if s != 0:
+                    return s
+            return None
+        inst = instruments[xm_inst - 1] if xm_inst - 1 < len(instruments) else None
+        if inst is None:
+            return slots[0] if slots[0] else None
+        sample_idx = inst.keymap[(note - 1) % 96] if inst.keymap else 0
+        if 0 <= sample_idx < len(slots) and slots[sample_idx]:
+            return slots[sample_idx]
+        return slots[0] if slots[0] else None
+
+    # ── Sample / instrument bin ─────────────────────────────────────────────
+    vprint(f"  building sample/inst bin… ({len(proxies) - 1} sample slots used)")
+    sampleinst_raw, _, sample_ratio = build_sample_inst_bin_xm(proxies)
+    compressed = gzip.compress(sampleinst_raw, compresslevel=9, mtime=0)
+    comp_size  = len(compressed)
+    vprint(f"  sample+inst bin: {SAMPLEINST_SIZE} → {comp_size} bytes (gzip)")
+
+    # ── Tempo / speed ───────────────────────────────────────────────────────
+    speed = h.default_speed if h.default_speed > 0 else 6
+    tempo = h.default_bpm  if h.default_bpm  > 0 else 125
+    tempo = max(24, min(280, tempo))
+    bpm_stored = (tempo - 24) & 0xFF
+    vprint(f"  initial speed={speed}, tempo={tempo} BPM")
+
+    # ── Channels / cue list ─────────────────────────────────────────────────
+    active_channels = _active_channels_xm(h, patterns)
+    C = len(active_channels)
+    if C == 0:
+        sys.exit("error: no active channels found")
+
+    chunks, chunk_map, chunk_lens = split_patterns_xm(patterns)
+
+    taud_cue_list = []
+    xm_ord_to_taud_cue = {}
+    for oi, order in enumerate(h.order_list[:h.order_count]):
+        if order >= h.pattern_count:
+            continue
+        if order >= len(chunk_map):
+            continue
+        xm_ord_to_taud_cue.setdefault(oi, len(taud_cue_list))
+        for ci in chunk_map[order]:
+            taud_cue_list.append(ci)
+
+    if not taud_cue_list:
+        sys.exit("error: order list resolved to no playable cues")
+
+    remap_b_effects_xm(chunks, chunk_map, h.order_list, xm_ord_to_taud_cue, C)
+
+    # ── Pattern bin ─────────────────────────────────────────────────────────
+    total_taud_pats = len(taud_cue_list) * C
+    if total_taud_pats > NUM_PATTERNS_MAX:
+        sys.exit(f"error: {len(taud_cue_list)} cues × {C} channels = "
+                 f"{total_taud_pats} > {NUM_PATTERNS_MAX} Taud pattern limit.")
+
+    # Default pan per active channel: alternate L/R FT2-style (0,12,12,0,...).
+    def _xm_default_pan(idx: int) -> int:
+        side = idx % 4
+        return 16 if side in (0, 3) else 47
+    default_pans = [_xm_default_pan(i) for i in range(C)]
+
+    pat_bin = bytearray()
+    for ci in taud_cue_list:
+        cg = chunks[ci]
+        for vi, ch in enumerate(active_channels):
+            pat_bin += build_pattern_xm(cg, ch, default_pans[vi],
+                                        resolve_inst_slot,
+                                        amiga_mode=not h.linear_freq)
+    pat_bin = rescale_offset_effects(bytes(pat_bin), sample_ratio)
+
+    orig_count = len(taud_cue_list) * C
+    pat_bin, pat_remap, num_taud_pats = deduplicate_patterns(pat_bin, orig_count)
+    vprint(f"  patterns: {orig_count} → {num_taud_pats} unique "
+           f"({orig_count - num_taud_pats} deduplicated)")
+
+    # ── Cue sheet ───────────────────────────────────────────────────────────
+    sheet = bytearray(NUM_CUES * CUE_SIZE)
+    for c in range(NUM_CUES):
+        sheet[c * CUE_SIZE:c * CUE_SIZE + CUE_SIZE] = encode_cue([], 0)
+
+    last_active = -1
+    len_cue_count = 0
+    for cue_idx, ci in enumerate(taud_cue_list):
+        if cue_idx >= NUM_CUES:
+            break
+        base_pat = cue_idx * C
+        pats = [pat_remap[base_pat + vi] for vi in range(C)]
+        clen = chunk_lens[ci] if ci < len(chunk_lens) else PATTERN_ROWS
+        if clen < PATTERN_ROWS:
+            instr = cue_instruction_len(clen)
+            len_cue_count += 1
+        else:
+            instr = CUE_INST_NOP
+        sheet[cue_idx * CUE_SIZE:(cue_idx + 1) * CUE_SIZE] = encode_cue(pats, instr)
+        last_active = cue_idx
+
+    if last_active >= 0:
+        if sheet[last_active * CUE_SIZE + 30] == CUE_INST_LEN:
+            vprint(f"  warning: last active cue {last_active} had LEN; "
+                   f"replaced with HALT (partial tail at song terminus)")
+        sheet[last_active * CUE_SIZE + 30] = CUE_INST_HALT
+        sheet[last_active * CUE_SIZE + 31] = 0x00
+    else:
+        sheet[30] = CUE_INST_HALT
+    if len_cue_count:
+        vprint(f"  emitted {len_cue_count} LEN cue instruction(s) "
+               f"for partial-length patterns")
+
+    # ── Header / song table ─────────────────────────────────────────────────
+    song_offset = TAUD_HEADER_SIZE + comp_size + TAUD_SONG_ENTRY
+    sig    = (SIGNATURE + b' ' * 14)[:14]
+    header = (
+        TAUD_MAGIC +
+        bytes([TAUD_VERSION, 1]) +
+        struct.pack('<I', comp_size) +
+        b'\x00\x00\x00\x00' +
+        sig
+    )
+    assert len(header) == TAUD_HEADER_SIZE
+
+    pat_comp = gzip.compress(bytes(pat_bin), compresslevel=9, mtime=0)
+    cue_comp = gzip.compress(bytes(sheet),   compresslevel=9, mtime=0)
+    vprint(f"  pattern bin: {len(pat_bin)} → {len(pat_comp)} bytes (gzip)")
+    vprint(f"  cue sheet:   {len(sheet)} → {len(cue_comp)} bytes (gzip)")
+
+    # Flags byte:
+    #   bit 1 (f) = Amiga pitch-slide mode (set when XM uses Amiga period table).
+    #   bit 2 (m) = FT2 fadeout-zero policy (stored 0 ⇒ cut on key-off; fadeStep
+    #               divisor 65536 — XM convention). Without this, the engine
+    #               uses the IT divisor (1024), making fadeout ~64× faster
+    #               than FT2 — voices with non-zero fadeout get silenced
+    #               within a few ticks of key-off instead of fading naturally.
+    flags_byte = (0x00 if h.linear_freq else 0x02) | 0x04
+    song_table = encode_song_entry(
+        song_offset=song_offset,
+        num_voices=C,
+        num_patterns=num_taud_pats,
+        bpm_stored=bpm_stored,
+        tick_rate=speed,
+        base_note=0xA000,
+        base_freq=8363.0,
+        flags_byte=flags_byte,
+        pat_bin_comp_size=len(pat_comp),
+        cue_sheet_comp_size=len(cue_comp),
+        global_vol=0xFF,
+        mixing_vol=0x80,
+    )
+    assert len(song_table) == TAUD_SONG_ENTRY
+
+    return header + compressed + song_table + pat_comp + cue_comp
+
+
+# ── Main ──────────────────────────────────────────────────────────────────────
+
+def main():
+    ap = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.RawDescriptionHelpFormatter)
+    ap.add_argument('input',  help='Input .xm file')
+    ap.add_argument('output', help='Output .taud file')
+    ap.add_argument('-v', '--verbose', action='store_true')
+    args = ap.parse_args()
+    set_verbose(args.verbose)
+
+    with open(args.input, 'rb') as f:
+        data = f.read()
+
+    vprint(f"parsing '{args.input}' ({len(data)} bytes)…")
+    h = parse_xm_header(data)
+    vprint(f"  title:    '{h.title}'")
+    vprint(f"  tracker:  '{h.tracker}'  version=0x{h.version:04X}")
+    vprint(f"  channels={h.channels} patterns={h.pattern_count} "
+           f"insts={h.instrument_count} orders={h.order_count}")
+    vprint(f"  freq table: {'linear' if h.linear_freq else 'Amiga'}")
+
+    patterns_off = 60 + h.header_size
+    patterns, after_patterns = parse_patterns(data, h, patterns_off)
+    instruments, _after = parse_instruments(data, h, after_patterns)
+
+    taud = assemble_taud(h, patterns, instruments)
+
+    with open(args.output, 'wb') as f:
+        f.write(taud)
+
+    print(f"wrote {len(taud)} bytes to '{args.output}'")
+    if args.verbose:
+        print(f"  magic ok: {taud[:8].hex()}", file=sys.stderr)
+        sig_off = TAUD_HEADER_SIZE - 14
+        print(f"  signature: {taud[sig_off:sig_off + 14]}", file=sys.stderr)
+
+
+if __name__ == '__main__':
+    main()