From db44b6b523b9d07955032eeecaa4214f177d64fd Mon Sep 17 00:00:00 2001
From: minjaesong <alswo9628@gmail.com>
Date: Sun, 14 Jun 2026 00:53:33 +0900
Subject: [PATCH] taud: midi and sf2 WIP

---
 assets/disk0/tvdos/bin/taut.js                |  177 +-
 assets/disk0/tvdos/include/taud.mjs           |   65 +-
 midi2taud.py                                  | 2043 +++++++++++++++++
 taud_common.py                                |  130 +-
 terranmon.txt                                 |   53 +-
 .../net/torvald/tsvm/AudioJSR223Delegate.kt   |  139 +-
 .../torvald/tsvm/peripheral/AudioAdapter.kt   |  862 +++++--
 7 files changed, 3243 insertions(+), 226 deletions(-)
 create mode 100644 midi2taud.py

diff --git a/assets/disk0/tvdos/bin/taut.js b/assets/disk0/tvdos/bin/taut.js
index 7fd461c..29b5876 100644
--- a/assets/disk0/tvdos/bin/taut.js
+++ b/assets/disk0/tvdos/bin/taut.js
@@ -3239,26 +3239,76 @@ function decodeInstRecord(rec) {
 // usedBy[], name }. usedBy is a list of instrument slot numbers (1..255).
 let samplesCache = null
 
+// Ixmp ("instrument extra samples") introspection — present once the host VM
+// exposes the patch read-back API. On an un-rebuilt host it's absent and the
+// Samples tab simply lists the base-record samples (no patch samples).
+const hasIxmpAPI = (typeof audio !== 'undefined' &&
+    typeof audio.getInstrumentPatchCount === 'function' &&
+    typeof audio.getInstrumentPatches === 'function')
+
+// Per-patch on-wire length from its version byte (terranmon.txt §Ixmp; mirrors
+// taud.mjs#patchLen / AudioJSR223Delegate). 31 common bytes + present blocks.
+function ixmpPatchLen(ver) {
+    return 31
+        + ((ver & 0x80) ? 15 : 0)   // x: extra-base-info (flags1+flags2+fadeout+cutoff+reson+atten)
+        + ((ver & 0x02) ? 54 : 0)   // v: volume envelope
+        + ((ver & 0x04) ? 54 : 0)   // p: panning envelope
+        + ((ver & 0x08) ? 54 : 0)   // f: filter envelope
+        + ((ver & 0x10) ? 54 : 0)   // P: pitch envelope
+}
+
+// Walk instrument `slot`'s Ixmp patches; invoke cb(samplePtr, sampleLen, extra) per
+// patch. Patch common-byte layout (terranmon.txt §Ixmp): u32 ptr@7, u16 len@11,
+// u16 playStart@13, loopStart@15, loopEnd@17, rate@19, u8 loopMode@23. No-op without API.
+function forEachIxmpPatchSample(slot, cb) {
+    if (!hasIxmpAPI) return
+    if (audio.getInstrumentPatchCount(slot) <= 0) return
+    const b = audio.getInstrumentPatches(slot)
+    if (!b || b.length < 31) return
+    const u16 = (o) => (b[o] & 0xFF) | ((b[o+1] & 0xFF) << 8)
+    let o = 0
+    while (o + 31 <= b.length) {
+        const ver = b[o] & 0xFF
+        const len = ixmpPatchLen(ver)
+        if (o + len > b.length) break
+        const ptr = (b[o+7] & 0xFF) | ((b[o+8] & 0xFF) << 8) |
+                    ((b[o+9] & 0xFF) << 16) | ((b[o+10] & 0xFF) * 0x1000000)
+        cb(ptr, u16(o+11), {
+            c4Rate: u16(o+19), playStart: u16(o+13),
+            loopStart: u16(o+15), loopEnd: u16(o+17),
+            sampleFlags: b[o+23] & 0xFF
+        })
+        o += len
+    }
+}
+
 function buildSampleIndex() {
     const byPtr = new Map()
-    for (let i = 1; i < TAUT_INST_COUNT; i++) {
-        const d = decodeInstRecord(readInstRecord(i))
-        if (d.sampleLen === 0) continue
-        const key = d.samplePtr + ':' + d.sampleLen
+    const addSample = (slot, ptr, len, extra) => {
+        if (len === 0) return
+        const key = ptr + ':' + len
         if (!byPtr.has(key)) {
-            byPtr.set(key, {
-                ptr:        d.samplePtr,
-                len:        d.sampleLen,
-                c4Rate:     d.c4Rate,
-                playStart:  d.playStart,
-                loopStart:  d.loopStart,
-                loopEnd:    d.loopEnd,
-                sampleFlags:d.sampleFlags,
-                usedBy:     [],
-                name:       ''
+            byPtr.set(key, Object.assign({
+                ptr: ptr, len: len, c4Rate: 0, playStart: 0,
+                loopStart: 0, loopEnd: 0, sampleFlags: 0, usedBy: [], name: ''
+            }, extra || {}))
+        }
+        const e = byPtr.get(key)
+        if (e.usedBy.indexOf(slot) < 0) e.usedBy.push(slot)
+    }
+    for (let i = 1; i < TAUT_INST_COUNT; i++) {
+        const rec = readInstRecord(i)
+        // Metainstruments (samplePtr high 16 bits == 0xFFFF) carry no sample of their
+        // own — only a layer table — so skip their bogus base pointer here.
+        if (((rec[2] | (rec[3] << 8)) & 0xFFFF) !== 0xFFFF) {
+            const d = decodeInstRecord(rec)
+            addSample(i, d.samplePtr, d.sampleLen, {
+                c4Rate: d.c4Rate, playStart: d.playStart, loopStart: d.loopStart,
+                loopEnd: d.loopEnd, sampleFlags: d.sampleFlags
             })
         }
-        byPtr.get(key).usedBy.push(i)
+        // Ixmp patch samples (extra multisamples that velocity/key layers reference).
+        forEachIxmpPatchSample(i, (ptr, slen, ex) => addSample(i, ptr, slen, ex))
     }
     const list = Array.from(byPtr.values()).sort((a, b) => a.ptr - b.ptr)
     const names = (songsMeta && songsMeta.sampleNames) || []
@@ -3820,9 +3870,37 @@ function decodeEnvelope(rec, kind) {
     }
 }
 
+// Decode a Metainstrument record (terranmon.txt §"Metainstrument definition"):
+// byte0 = type (0 = layered), byte1 = layer count, bytes2-3 = 0xFFFF identifier,
+// then `count` 10-byte layer descriptors from byte4. Each: u8 instIdx, u8 mixOctet
+// (Perceptually-Significant-Octet dB, 159 = unity), s16 detune (4096-TET),
+// u16 pitchStart, u16 pitchEnd, u8 volStart, u8 volEnd (0..63).
+function decodeMetaRecord(rec) {
+    const count = rec[1] & 0xFF
+    const layers = []
+    let o = 4
+    for (let i = 0; i < count && o + 10 <= 256; i++, o += 10) {
+        let det = rec[o+2] | (rec[o+3] << 8); if (det >= 0x8000) det -= 0x10000
+        layers.push({
+            instIdx:    rec[o] & 0xFF,
+            mixOctet:   rec[o+1] & 0xFF,
+            detune:     det,
+            pitchStart: rec[o+4] | (rec[o+5] << 8),
+            pitchEnd:   rec[o+6] | (rec[o+7] << 8),
+            volStart:   rec[o+8] & 0x3F,
+            volEnd:     rec[o+9] & 0x3F
+        })
+    }
+    return { isMeta: true, metaType: rec[0] & 0xFF, layers }
+}
+
+// True when a 256-byte record is a Metainstrument (samplePtr high 16 bits == 0xFFFF).
+function recordIsMeta(rec) { return ((rec[2] | (rec[3] << 8)) & 0xFFFF) === 0xFFFF }
+
 // Decode the full 256-byte instrument record into a structured object suitable
 // for display. Field offsets/encodings track terranmon.txt §"Instrument bin".
 function decodeInstFull(rec) {
+    if (recordIsMeta(rec)) return decodeMetaRecord(rec)
     const samplePtr      = (rec[0]) | (rec[1] << 8) | (rec[2] << 16) | (rec[3] * 0x1000000)
     const sampleLen      = rec[4]  | (rec[5]  << 8)
     const c4Rate         = rec[6]  | (rec[7]  << 8)
@@ -3845,7 +3923,9 @@ function decodeInstFull(rec) {
     const defReso        = rec[183]
     let   detune         = rec[184] | (rec[185] << 8); if (detune >= 0x8000) detune -= 0x10000
     const instFlag       = rec[186]
-    const nna            = instFlag & 3
+    // NNA UI value: 0..3 = traditional (bits 0-1); 4 = Key Lift (bit 5 set,
+    // bits 0-1 = 00 — the 0b100 "Nnn" pattern, terranmon byte 186).
+    const nna            = ((instFlag >>> 5) & 1) ? 4 : (instFlag & 3)
     const vibWaveform    = (instFlag >>> 2) & 7
     const vibDepth       = rec[187]
     const vibRate        = rec[188]
@@ -4051,11 +4131,13 @@ function loopModeNameInst(flags) {
     const names = ['None', 'Forward', 'Pingpong', 'Oneshot']
     return names[lp] + (sus ? ' (sustain)' : '')
 }
-// Clickable button-group option lists. NNA/DCT use every value; DCA's 4th slot
-// is reserved (dropped); vibrato exposes the 5 engine-supported waves
-// (sine/ramp-dn/square/random/ramp-up — see AudioAdapter.advanceAutoVibrato).
-const NNA_NAMES      = ['Cut', 'Off', 'Continue', 'Fade']
-const DCT_NAMES      = ['Off', 'Note', 'Sample', 'Inst.']
+// Clickable button-group option lists. NNA's 5th option is Key Lift (flag bit 5,
+// the 0b100 pattern: MIDI-exact key-up — envelope jumps to the release nodes);
+// DCT uses every value; DCA's 4th slot is reserved (dropped); vibrato exposes
+// the 5 engine-supported waves (sine/ramp-dn/square/random/ramp-up — see
+// AudioAdapter.advanceAutoVibrato).
+const NNA_NAMES      = ['Off', 'Cut', 'Cont.', 'Fade', 'Lift']
+const DCT_NAMES      = ['Never', 'Note', 'Sample', 'Inst.']
 const DCA_OPTIONS    = ['Cut', 'Off', 'Fade']
 const VIB_WF_OPTIONS = ['\u00D8\u00D9', '\u00A5\u00A6', '\u00B4\u00B4', '\u00F3\u00F3', '\u00B5\u00B6']//['Sine', 'Ramp-dn', 'Square', 'Random', 'Ramp-up']
 
@@ -4464,7 +4546,10 @@ function drawInstTabGeneral2(e) {
     y++
     drawGroupHeader(y++, 'Note actions')
     // NNA — instFlag (byte 186) bits 0..1; DCT/DCA — dcByte (byte 195) bits 0..1 / 2..3.
-    y += buttonGroupRow(y, '  NNA:', NNA_NAMES,   d.nna & 3, (v) => instWriteField(e, 186, 0, 2, v))
+    y += buttonGroupRow(y, '  NNA:', NNA_NAMES,   d.nna, (v) => {
+        instWriteField(e, 186, 5, 1, v === 4 ? 1 : 0)        // Key Lift bit
+        instWriteField(e, 186, 0, 2, v === 4 ? 0 : v)        // traditional nn
+    })
     y += buttonGroupRow(y, '  DCT:', DCT_NAMES,   d.dct & 3, (v) => instWriteField(e, 195, 0, 2, v))
     y += buttonGroupRow(y, '  DCA:', DCA_OPTIONS, d.dca & 3, (v) => instWriteField(e, 195, 2, 2, v))
 
@@ -4707,13 +4792,54 @@ function drawInstTabPitch(e) {
     })
 }
 
+// Metainstrument view (terranmon.txt §"Metainstrument definition"): the record
+// carries no sample of its own — only a layer table fanned out at trigger time.
+// One row per layer: target instrument, mix volume (Perceptually-Significant
+// octet; 159 = unity), sample detune (4096-TET → cents), and the pitch × velocity
+// rectangle that gates the layer.
+function drawInstTabMeta(e) {
+    const d = e.decoded
+    let y = INST_BODY_Y
+    drawGroupHeader(y++, 'Metainstrument  (' + d.layers.length + ' layer' +
+                         (d.layers.length === 1 ? '' : 's') + ')')
+    drawLabelRow(y++, '  Type:', d.metaType === 0 ? 'layered (0)' : '$' + _hex(d.metaType, 2))
+    y++
+    // Column header.
+    con.move(y, INST_RIGHT_X); con.color_pair(colInstGroupHdr, colBackPtn)
+    print(' #  Inst  Mix     Detune    Pitch        Vel'.substring(0, INST_RIGHT_W))
+    y++
+    const maxRows = INST_BTN_Y - y - 1
+    for (let i = 0; i < d.layers.length && i < maxRows; i++) {
+        const L = d.layers[i]
+        const cents = (L.detune * 1200 / 4096)
+        const mix = (L.mixOctet === 159) ? '$9F=1x' : ('$' + _hex(L.mixOctet, 2))
+        const det = (cents >= 0 ? '+' : '') + cents.toFixed(0) + 'c'
+        const pit = noteToStr(L.pitchStart) + sym.doubledot + noteToStr(L.pitchEnd)
+        const vel = L.volStart + sym.doubledot + L.volEnd
+        con.move(y, INST_RIGHT_X); con.color_pair(colInstLabel, colBackPtn)
+        const num = (i + 1).toString().padStart(2)
+        con.color_pair(colInstValue, colBackPtn)
+        const row = ' ' + num + '  $' + _hex(L.instIdx, 2) +
+                    '  ' + mix.padEnd(7) +
+                    ' ' + det.padEnd(8) +
+                    '  ' + pit.padEnd(11) +
+                    '  ' + vel
+        print(row.length > INST_RIGHT_W ? row.substring(0, INST_RIGHT_W) : row)
+        y++
+    }
+    if (d.layers.length > maxRows) {
+        con.move(y, INST_RIGHT_X); con.color_pair(colInstGroupHdr, colBackPtn)
+        print('  … ' + (d.layers.length - maxRows) + ' more layer(s)')
+    }
+}
+
 // ── Edit button (bottom row) ───────────────────────────────────────────────
 function drawInstrumentsEditButton() {
     const y = INST_BTN_Y
     con.move(y, INST_RIGHT_X)
     con.color_pair(colInstGroupHdr, colBackPtn); print('[ E ]')
     con.color_pair(colInstValue,    colBackPtn)
-    const label = ' Edit instrument'
+    const label = ' Advanced Edit'
     print(label)
     const rest = INST_RIGHT_W - (5 + label.length)
     if (rest > 0) print(' '.repeat(rest))
@@ -4749,7 +4875,10 @@ function drawInstrumentsContents(wo) {
     // until after the text tabs are drawn — otherwise plotRect-555 fill at the
     // end of the body redraw would erase the graph again.
     clearInstrumentsEnvelopeArea()
-    if      (instSubTab === INST_TAB_GEN1) drawInstTabGeneral1(e)
+    // Metainstruments have no sample/envelopes — show their layer table on every
+    // sub-tab (the Gen/env drawers would read absent fields and mis-render).
+    if (e.decoded.isMeta)                  drawInstTabMeta(e)
+    else if (instSubTab === INST_TAB_GEN1) drawInstTabGeneral1(e)
     else if (instSubTab === INST_TAB_GEN2) drawInstTabGeneral2(e)
     else if (instSubTab === INST_TAB_VOL)  drawInstTabVolume(e)
     else if (instSubTab === INST_TAB_PAN)  drawInstTabPanning(e)
diff --git a/assets/disk0/tvdos/include/taud.mjs b/assets/disk0/tvdos/include/taud.mjs
index e177d76..c0200e4 100644
--- a/assets/disk0/tvdos/include/taud.mjs
+++ b/assets/disk0/tvdos/include/taud.mjs
@@ -169,7 +169,14 @@ function uploadTaudFile(inFile, songIndex, playhead) {
             if ((sys.peek(filePtr + projOff + i) & 0xFF) !== projMagic[i]) { prjOk = false; break }
         }
         if (prjOk) {
-            const PATCH_SIZE = 31
+            // Patches are VARIABLE LENGTH (since 2026-06-13): a version byte (feature
+            // bit-flags 0b x00Pfpvi) + 30 common bytes, then optional x/v/p/f/P blocks.
+            const patchLen = (ver) => 31
+                + ((ver & 0x80) ? 15 : 0)   // x: extra-base-info (u32 flags1 + u32 flags2 + u16 fadeout + u16 cutoff + u16 reson + u8 initialAttenuation octet)
+                + ((ver & 0x02) ? 54 : 0)   // v: volume envelope
+                + ((ver & 0x04) ? 54 : 0)   // p: panning envelope
+                + ((ver & 0x08) ? 54 : 0)   // f: filter envelope
+                + ((ver & 0x10) ? 54 : 0)   // P: pitch envelope
             let p = projOff + 16  // skip magic(8) + reserved(8)
             while (p + 8 <= fileSize) {
                 const fc = String.fromCharCode(
@@ -179,7 +186,7 @@ function uploadTaudFile(inFile, songIndex, playhead) {
                 const payload = p + 8
                 if (payload + secLen > fileSize) break
                 if (fc === 'Ixmp') {
-                    // Each entry: Uint8 instId + Uint24 patchCount + (patchCount × PATCH_SIZE) bytes.
+                    // Each entry: Uint8 instId + Uint24 patchCount + variable-length patches.
                     let q = payload
                     const qEnd = payload + secLen
                     while (q + 4 <= qEnd) {
@@ -188,8 +195,15 @@ function uploadTaudFile(inFile, songIndex, playhead) {
                         const cntMid   = sys.peek(filePtr + q) & 0xFF; q++
                         const cntHi    = sys.peek(filePtr + q) & 0xFF; q++
                         const patchCnt = cntLo | (cntMid << 8) | (cntHi << 16)
-                        const blobLen  = patchCnt * PATCH_SIZE
-                        if (q + blobLen > qEnd) break
+                        // Walk the patches to find the blob length (each depends on its version byte).
+                        let blobLen = 0, scan = q, ok = true
+                        for (let i = 0; i < patchCnt; i++) {
+                            if (scan + 31 > qEnd) { ok = false; break }
+                            const len = patchLen(sys.peek(filePtr + scan) & 0xFF)
+                            if (scan + len > qEnd) { ok = false; break }
+                            scan += len; blobLen += len
+                        }
+                        if (!ok) break
                         let buf = new Array(blobLen)
                         for (let k = 0; k < blobLen; k++) buf[k] = sys.peek(filePtr + q + k) & 0xFF
                         audio.uploadInstrumentPatches(instId, buf)
@@ -291,6 +305,35 @@ function captureTrackerDataToFile(outFile) {
     // Layout: header(32) + compressed(compressedSize) + songTable(1 × TAUD_SONG_ENTRY)
     let songOffset = TAUD_HEADER_SIZE + compressedSize + 1 * TAUD_SONG_ENTRY
 
+    // -- 6.5 Build Ixmp project-data block (preserves multi-sample instruments)
+    // Without this, saving a song whose instruments carry Ixmp patches (IT/XM
+    // keyboard tables, SF2 imports) would silently collapse every instrument to
+    // its base sample on the next load. Section format per terranmon.txt
+    // §"Project Data" / §"Ixmp": magic(8) + reserved(8) + FourCC + Uint32 len +
+    // repetition of { Uint8 instId, Uint24 count, count × variable-length patches }.
+    let ixmpPayload = []
+    for (let s = 0; s < 256; s++) {
+        const cnt = audio.getInstrumentPatchCount(s)
+        if (cnt <= 0) continue
+        const blob = audio.getInstrumentPatches(s)   // flat variable-length patch bytes
+        ixmpPayload.push(s & 0xFF, cnt & 0xFF, (cnt >>> 8) & 0xFF, (cnt >>> 16) & 0xFF)
+        for (let k = 0; k < blob.length; k++) ixmpPayload.push(blob[k] & 0xFF)
+    }
+    let projData = []
+    let projOff  = 0
+    if (ixmpPayload.length > 0) {
+        projData = [
+            0x1E, 0x54, 0x61, 0x75, 0x64, 0x50, 0x72, 0x4A,   // \x1ETaudPrJ
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,   // reserved
+            0x49, 0x78, 0x6D, 0x70,                            // 'Ixmp'
+            (ixmpPayload.length        ) & 0xFF,
+            (ixmpPayload.length >>>  8) & 0xFF,
+            (ixmpPayload.length >>> 16) & 0xFF,
+            (ixmpPayload.length >>> 24) & 0xFF,
+        ].concat(ixmpPayload)
+        projOff = songOffset + patCompSize + cueCompSize
+    }
+
     // -- 7. Build header byte array (32 bytes) --------------------------------
     let sigBytes = new Array(14)
     for (let i = 0; i < 14; i++)
@@ -306,8 +349,11 @@ function captureTrackerDataToFile(outFile) {
         (compressedSize >>>  8) & 0xFF,
         (compressedSize >>> 16) & 0xFF,
         (compressedSize >>> 24) & 0xFF,
-        // project data offset (4) -- not emitted
-        0x00, 0x00, 0x00, 0x00,
+        // project data offset (4) -- zero when no Ixmp/etc. to carry
+        (projOff        ) & 0xFF,
+        (projOff >>>  8) & 0xFF,
+        (projOff >>> 16) & 0xFF,
+        (projOff >>> 24) & 0xFF,
     ].concat(sigBytes)  // 8 + 2 + 4 + 4 + 14 = 32 bytes
 
     // -- 8. Build song-table row (32 bytes) -----------------------------------
@@ -360,6 +406,13 @@ function captureTrackerDataToFile(outFile) {
         TAUD_HEADER_SIZE + compressedSize + songTable.length + patCompSize)
     sys.free(cueCompBuf)
 
+    // -- 14. Append project data (Ixmp) at projOff ----------------------------
+    if (projData.length > 0) {
+        let projBuf = sys.malloc(projData.length)
+        for (let k = 0; k < projData.length; k++) sys.poke(projBuf + k, projData[k])
+        fileHandle.pwrite(projBuf, projData.length, projOff)
+        sys.free(projBuf)
+    }
 
     fileHandle.flush(); fileHandle.close()
 }
diff --git a/midi2taud.py b/midi2taud.py
new file mode 100644
index 0000000..cfd9ee0
--- /dev/null
+++ b/midi2taud.py
@@ -0,0 +1,2043 @@
+#!/usr/bin/env python3
+"""midi2taud.py — Convert Standard MIDI (.mid) + SoundFont 2 (.sf2) to TSVM Taud (.taud)
+
+Usage:
+    python3 midi2taud.py song.mid soundfont.sf2 [output.taud]
+                         [--perc-force-mapping BANK INST]
+                         [--rpb N] [--speed N] [--fadeout N]
+                         [--bend-epsilon CENTS] [--drum-keyoff]
+                         [-v] [--no-project-data]
+
+Behaviour (per midi2taud.md):
+  * Pitch bends are preserved as much as possible. A note starting under a
+    non-zero bend triggers directly at the bent 4096-TET pitch (Taud notes
+    are 4096-TET, so the trigger encodes the exact shifted pitch). Bend
+    movement during a note is approximated as linear segments: each segment
+    is one row carrying the exact 4096-TET target note plus tone portamento
+    (G $xxxx, units/tick) sized to land on the target by row end. Jittery
+    curves are simplified via --bend-epsilon (cents). RPN 0,0 pitch-bend
+    range messages are honoured; bend values are computed as floats from
+    the full 14-bit word (MIDIs that only drive the MSB work transparently).
+  * Both MIDI key-off idioms — real note-off messages and note-on with
+    velocity 0 — are translated into Taud KEY_OFF. Percussion-channel
+    key-offs are dropped by default (GM percussion ignores note-off, and
+    emitting them would chop one-shot drum tails); --drum-keyoff re-enables.
+  * The SF2 key/velocity sample-layering model is recreated faithfully. Each
+    preset's zones are partitioned into the fewest mutually-DISJOINT layers
+    (--max-layers cap, default 4); each layer becomes one normal Taud instrument
+    with its zones as Ixmp patches (velocity axis round(vel × 63/127)). A preset
+    needing >1 layer is emitted as a Metainstrument (terranmon.txt "Metainstrument
+    definition"): the note references the meta slot and the engine fans out one
+    voice per matching layer, so SF2's simultaneous layering (and detune-stacks)
+    now sound — overlapping zones are no longer dropped. Single-layer presets stay
+    plain instruments. Stereo SF2 samples are mixed to mono. Unused instruments,
+    patches, and samples are trimmed.
+  * The SF2 volume-envelope ADSR is preserved on the (instrument-scope) Taud
+    volume envelope: delay/attack/hold/decay nodes, a sustain region held
+    while the key is on, and release nodes walked after key-off, plus a
+    safety fadeout (~4× release) that guarantees voices die. The canonical
+    zone's ADSR represents the instrument (Taud envelopes are instrument-
+    scope; differing zone ADSRs are warned). Per the Ixmp per-patch import
+    rule, only initialAttenuation and filters are ignored.
+  * Polyphony rides the engine's New Note Action (matching MIDI semantics):
+    melodic instruments get NNA = note-off, drums NNA = continue, so a voice
+    column is reusable the moment its note releases — the release/ring tail
+    moves to a background ghost on the next trigger. Voice budget defaults
+    to 16 columns (--max-voices); overflow releases the oldest pedal-held or
+    soonest-ending note early instead of cutting it.
+  * Sub-row timing is carried by S $Dx note delays (one row = `--speed`
+    ticks, default 6; one beat = `--rpb` rows, default 4 → 1/24-beat grid).
+    MIDI tempo changes map to T $xx00 set-tempo effects; channel volume /
+    expression (CC7 × CC11) map to M $xx00 channel-volume effects so they
+    never disturb the velocity-driven patch selection axis.
+"""
+
+import argparse
+import array
+import bisect
+import math
+import os
+import struct
+import sys
+
+from taud_common import (
+    set_verbose, vprint,
+    TAUD_MAGIC, TAUD_VERSION, TAUD_HEADER_SIZE, TAUD_SONG_ENTRY,
+    SAMPLEBIN_SIZE, INSTBIN_SIZE, SAMPLEINST_SIZE, SAMPLE_LEN_LIMIT,
+    PATTERN_ROWS, PATTERN_BYTES, NUM_PATTERNS_MAX, NUM_CUES, CUE_SIZE, NUM_VOICES,
+    NOTE_NOP, NOTE_KEYOFF, TAUD_C4,
+    TOP_G, TOP_M, TOP_S, TOP_T,
+    SEL_SET, SEL_FINE,
+    CUE_INST_NOP, CUE_INST_HALT,
+    resample_linear, encode_cue, deduplicate_patterns, encode_song_entry,
+    compress_blob, build_project_data, cue_instruction_len, nearest_minifloat,
+    IXMP_PAN_NO_OVERRIDE, atten_cb_to_octet,
+)
+
+SIGNATURE = b'midi2taud/TSVM'   # 14 bytes
+UNITS_PER_SEMI = 4096.0 / 12.0  # 4096-TET units per 12-TET semitone
+
+# Effect priorities for the shared per-cell effect slot. Higher wins when a
+# later pass needs the slot: SD note delays carry trigger timing and are
+# never overwritten; T tempo is global and may evict G/M; M only takes free
+# slots.
+PRIO_FREE  = 0
+PRIO_M     = 1
+PRIO_PORTA = 2
+PRIO_DELAY = 3
+PRIO_TEMPO = 4
+
+
+def key_to_noteval(key: float) -> int:
+    """MIDI key (float, 60 = middle C) → Taud 4096-TET noteVal (C4 = 0x5000)."""
+    return max(0x20, min(0xFFFF, round(TAUD_C4 + (key - 60.0) * UNITS_PER_SEMI)))
+
+
+# ── MIDI parser ───────────────────────────────────────────────────────────────
+
+def _read_varlen(data: bytes, pos: int):
+    val = 0
+    while True:
+        b = data[pos]; pos += 1
+        val = (val << 7) | (b & 0x7F)
+        if not (b & 0x80):
+            return val, pos
+
+
+def _parse_track(data: bytes, pos: int, end: int) -> list:
+    """Parse one MTrk body → list of (abs_tick, event_tuple)."""
+    evs = []
+    tick = 0
+    status = 0
+    while pos < end:
+        delta, pos = _read_varlen(data, pos)
+        tick += delta
+        if pos >= end:
+            break
+        b = data[pos]
+        if b & 0x80:
+            status = b
+            pos += 1
+        elif status < 0x80:
+            vprint(f"  warning: corrupt track data at {pos:#x}, truncating track")
+            break
+
+        if status == 0xFF:                       # meta
+            mtype = data[pos]; pos += 1
+            ln, pos = _read_varlen(data, pos)
+            payload = data[pos:pos+ln]; pos += ln
+            if mtype == 0x51 and ln >= 3:
+                uspq = int.from_bytes(payload[:3], 'big')
+                if uspq > 0:
+                    evs.append((tick, ('tempo', 60000000.0 / uspq)))
+            elif mtype == 0x03:
+                txt = payload.decode('latin-1', errors='replace').strip()
+                if txt:
+                    evs.append((tick, ('title', txt)))
+            elif mtype == 0x2F:
+                evs.append((tick, ('eot',)))
+                break
+            status = 0                           # meta cancels running status
+        elif status in (0xF0, 0xF7):             # sysex
+            ln, pos = _read_varlen(data, pos)
+            pos += ln
+            status = 0
+        else:
+            hi = status & 0xF0
+            ch = status & 0x0F
+            if hi in (0xC0, 0xD0):
+                d1 = data[pos]; pos += 1
+                if hi == 0xC0:
+                    evs.append((tick, ('prog', ch, d1)))
+            else:
+                d1 = data[pos]; d2 = data[pos+1]; pos += 2
+                if hi == 0x90:
+                    if d2 > 0:
+                        evs.append((tick, ('on', ch, d1, d2)))
+                    else:
+                        evs.append((tick, ('off', ch, d1)))   # vel-0 idiom
+                elif hi == 0x80:
+                    evs.append((tick, ('off', ch, d1)))
+                elif hi == 0xB0:
+                    evs.append((tick, ('cc', ch, d1, d2)))
+                elif hi == 0xE0:
+                    evs.append((tick, ('bend', ch, (d2 << 7) | d1)))
+                # 0xA0 polyphonic aftertouch: ignored
+    return evs
+
+
+def parse_midi(path: str):
+    """Returns (division, merged_events). division: ('ppq', tpq) or
+    ('smpte', fps, tpf). merged_events: [(tick, seq, event_tuple)] sorted."""
+    with open(path, 'rb') as f:
+        data = f.read()
+
+    if data[:4] == b'RIFF':                      # RMID wrapper
+        pos = 12
+        while pos + 8 <= len(data):
+            cid = data[pos:pos+4]
+            sz  = struct.unpack_from('<I', data, pos+4)[0]
+            if cid == b'data':
+                data = data[pos+8 : pos+8+sz]
+                break
+            pos += 8 + sz + (sz & 1)
+
+    if data[:4] != b'MThd':
+        sys.exit("error: not a MIDI file (bad MThd magic)")
+    hlen = struct.unpack_from('>I', data, 4)[0]
+    fmt, ntrk, div = struct.unpack_from('>HHH', data, 8)
+    if fmt == 2:
+        vprint("  warning: SMF format 2 — tracks merged on a shared timeline")
+
+    if div & 0x8000:
+        fps = -struct.unpack_from('b', data, 12)[0]
+        tpf = div & 0xFF
+        division = ('smpte', fps, tpf)
+    else:
+        division = ('ppq', max(1, div))
+
+    pos = 8 + hlen
+    merged = []
+    seq = 0
+    tracks_found = 0
+    while pos + 8 <= len(data) and tracks_found < ntrk:
+        cid = data[pos:pos+4]
+        sz  = struct.unpack_from('>I', data, pos+4)[0]
+        body_start = pos + 8
+        pos = body_start + sz
+        if cid != b'MTrk':
+            continue
+        tracks_found += 1
+        for tick, ev in _parse_track(data, body_start, min(pos, len(data))):
+            merged.append((tick, seq, ev))
+            seq += 1
+    merged.sort(key=lambda e: (e[0], e[1]))
+    return division, merged
+
+
+# ── Note / controller extraction ──────────────────────────────────────────────
+
+class Note:
+    __slots__ = ('ch', 'key', 'vel', 'start_ft', 'end_ft', 'inst_key',
+                 'bend0', 'slot', 'voice', 'drum', 'pedal_ft')
+    def __init__(self, ch, key, vel, start_ft, inst_key, bend0):
+        self.ch       = ch
+        self.key      = key
+        self.vel      = vel
+        self.start_ft = start_ft
+        self.end_ft   = None
+        self.inst_key = inst_key
+        self.bend0    = bend0
+        self.slot     = 0
+        self.voice    = -1
+        self.drum     = (inst_key[0] == 'd')
+        self.pedal_ft = None     # physical key-up time when only the pedal holds it
+
+
+class _ChState:
+    __slots__ = ('bank', 'prog', 'rpn_msb', 'rpn_lsb', 'range_semi',
+                 'range_cents', 'cur_bend', 'bend_ft', 'bend_val',
+                 'cc7_ft', 'cc7_val', 'cc11_ft', 'cc11_val',
+                 'cc10_ft', 'cc10_val', 'sus', 'pending', 'active')
+    def __init__(self):
+        self.bank = 0
+        self.prog = 0
+        self.rpn_msb = 0x7F
+        self.rpn_lsb = 0x7F
+        self.range_semi  = 2
+        self.range_cents = 0
+        self.cur_bend = 0.0
+        self.bend_ft  = [0];   self.bend_val = [0.0]
+        self.cc7_ft   = [0];   self.cc7_val  = [100]    # GM default
+        self.cc11_ft  = [0];   self.cc11_val = [127]
+        self.cc10_ft  = [];    self.cc10_val = []       # empty = never set
+        self.sus = False
+        self.pending = []        # notes held by the sustain pedal
+        self.active  = {}        # key → Note
+
+
+def _curve_at(fts: list, vals: list, ft: int, default):
+    i = bisect.bisect_right(fts, ft) - 1
+    return vals[i] if i >= 0 else default
+
+
+def _curve_push(fts: list, vals: list, ft: int, val):
+    if fts and fts[-1] == ft:
+        vals[-1] = val
+    else:
+        fts.append(ft); vals.append(val)
+
+
+class Song:
+    __slots__ = ('notes', 'channels', 'tempo_ft', 'tempo_bpm', 'title', 'end_ft')
+
+
+def extract_song(division, merged, rpb: int, speed: int) -> Song:
+    """Walk merged MIDI events, producing note instances (with both key-off
+    idioms resolved to a definite end time), per-channel bend/CC curves, and
+    the tempo map — all on the Taud fine-tick (ft) grid where one row =
+    `speed` fts and one beat = `rpb` rows."""
+    if division[0] == 'ppq':
+        tpq = division[1]
+        def to_ft(tick):
+            return round(tick * rpb * speed / tpq)
+    else:
+        _, fps, tpf = division
+        tps = max(1.0, float(fps * tpf))         # ticks per second
+        # SMPTE timing has no musical beats: pin a 120 BPM equivalent grid.
+        def to_ft(tick):
+            return round((tick / tps) * 2.0 * rpb * speed)
+        vprint("  info: SMPTE division — pinned to a 120 BPM-equivalent grid")
+
+    chs = [_ChState() for _ in range(16)]
+    notes = []
+    tempo_ft, tempo_bpm = [], []
+    title = None
+    max_ft = 0
+
+    def end_note(n: Note, ft: int):
+        if n.end_ft is None:
+            n.end_ft = max(ft, n.start_ft)
+
+    for tick, _seq, ev in merged:
+        ft = to_ft(tick)
+        if ft > max_ft:
+            max_ft = ft
+        kind = ev[0]
+
+        if kind == 'on':
+            _, ch, key, vel = ev
+            st = chs[ch]
+            prev = st.active.pop(key, None)
+            if prev is not None:                 # re-strike: close the old one
+                end_note(prev, ft)
+            ik = ('d', st.prog) if ch == 9 else ('m', st.bank, st.prog)
+            n = Note(ch, key, vel, ft, ik, st.cur_bend)
+            st.active[key] = n
+            notes.append(n)
+
+        elif kind == 'off':
+            _, ch, key = ev
+            st = chs[ch]
+            n = st.active.pop(key, None)
+            if n is not None:
+                if st.sus:
+                    n.pedal_ft = ft
+                    st.pending.append(n)
+                else:
+                    end_note(n, ft)
+
+        elif kind == 'bend':
+            _, ch, val14 = ev
+            st = chs[ch]
+            # MUST be float maths: 14-bit word (or MSB-only 7-bit source,
+            # which simply leaves the low 7 bits zero) → ±range semitones.
+            norm  = (float(val14) - 8192.0) / 8192.0
+            semis = norm * (st.range_semi + st.range_cents / 100.0)
+            st.cur_bend = semis
+            _curve_push(st.bend_ft, st.bend_val, ft, semis)
+
+        elif kind == 'cc':
+            _, ch, num, val = ev
+            st = chs[ch]
+            if num == 0:
+                st.bank = val
+            elif num == 7:
+                _curve_push(st.cc7_ft, st.cc7_val, ft, val)
+            elif num == 10:
+                _curve_push(st.cc10_ft, st.cc10_val, ft, val)
+            elif num == 11:
+                _curve_push(st.cc11_ft, st.cc11_val, ft, val)
+            elif num == 64:
+                if val >= 64:
+                    st.sus = True
+                else:
+                    st.sus = False
+                    for n in st.pending:
+                        end_note(n, ft)
+                    st.pending.clear()
+            elif num == 100:
+                st.rpn_lsb = val
+            elif num == 101:
+                st.rpn_msb = val
+            elif num in (98, 99):                # NRPN deselects RPN
+                st.rpn_msb = st.rpn_lsb = 0x7F
+            elif num == 6:
+                if st.rpn_msb == 0 and st.rpn_lsb == 0:
+                    st.range_semi = val
+            elif num == 38:
+                if st.rpn_msb == 0 and st.rpn_lsb == 0:
+                    st.range_cents = val
+            elif num in (120, 123):              # all sound / notes off
+                for n in list(st.active.values()):
+                    end_note(n, ft)
+                st.active.clear()
+                for n in st.pending:
+                    end_note(n, ft)
+                st.pending.clear()
+            elif num == 121:                     # reset all controllers
+                st.cur_bend = 0.0
+                _curve_push(st.bend_ft, st.bend_val, ft, 0.0)
+                _curve_push(st.cc11_ft, st.cc11_val, ft, 127)
+                st.sus = False
+                for n in st.pending:
+                    end_note(n, ft)
+                st.pending.clear()
+                st.rpn_msb = st.rpn_lsb = 0x7F
+
+        elif kind == 'prog':
+            _, ch, val = ev
+            chs[ch].prog = val
+
+        elif kind == 'tempo':
+            tempo_ft.append(ft); tempo_bpm.append(ev[1])
+
+        elif kind == 'title':
+            if title is None:
+                title = ev[1]
+
+    # Close anything still ringing at end-of-file.
+    for st in chs:
+        for n in list(st.active.values()):
+            end_note(n, max_ft)
+        st.active.clear()
+        for n in st.pending:
+            end_note(n, max_ft)
+        st.pending.clear()
+
+    dropped = [n for n in notes if n.end_ft <= n.start_ft]
+    if dropped:
+        vprint(f"  info: dropped {len(dropped)} zero-length note(s)")
+    notes = [n for n in notes if n.end_ft > n.start_ft]
+    notes.sort(key=lambda n: (n.start_ft, n.ch, n.key))
+
+    song = Song()
+    song.notes     = notes
+    song.channels  = chs
+    song.tempo_ft  = tempo_ft
+    song.tempo_bpm = tempo_bpm
+    song.title     = title
+    song.end_ft    = max_ft
+    return song
+
+
+# ── SF2 parser ────────────────────────────────────────────────────────────────
+
+GEN_START_OFF        = 0
+GEN_END_OFF          = 1
+GEN_STARTLOOP_OFF    = 2
+GEN_ENDLOOP_OFF      = 3
+GEN_START_COARSE     = 4
+GEN_MODENV2PITCH     = 7      # modEnvToPitch (signed cents at full mod-env)
+GEN_FILTERFC         = 8      # initialFilterFc (absolute cents; default 13500 = open)
+GEN_FILTERQ          = 9      # initialFilterQ (cB of resonance; default 0)
+GEN_MODENV2FILT      = 11     # modEnvToFilterFc (signed cents at full mod-env)
+GEN_END_COARSE       = 12
+GEN_PAN              = 17
+GEN_DELAY_MODENV     = 25
+GEN_ATTACK_MODENV    = 26
+GEN_HOLD_MODENV      = 27
+GEN_DECAY_MODENV     = 28
+GEN_SUSTAIN_MODENV   = 29     # 0.1% units of full-scale DECREASE (0..1000)
+GEN_RELEASE_MODENV   = 30
+GEN_DELAY_VOLENV     = 33
+GEN_ATTACK_VOLENV    = 34
+GEN_HOLD_VOLENV      = 35
+GEN_DECAY_VOLENV     = 36
+GEN_SUSTAIN_VOLENV   = 37     # centibels of attenuation, 0..1440
+GEN_RELEASE_VOLENV   = 38
+GEN_INSTRUMENT       = 41
+GEN_KEYRANGE         = 43
+GEN_VELRANGE         = 44
+GEN_STARTLOOP_COARSE = 45
+GEN_INITATTEN        = 48     # initialAttenuation (cB; per-zone static gain)
+GEN_ENDLOOP_COARSE   = 50
+GEN_COARSETUNE       = 51
+GEN_FINETUNE         = 52
+GEN_SAMPLEID         = 53
+GEN_SAMPLEMODES      = 54
+GEN_SCALETUNING      = 56
+GEN_ROOTKEY          = 58
+
+_SIGNED_GENS = frozenset({GEN_START_OFF, GEN_END_OFF, GEN_STARTLOOP_OFF,
+                          GEN_ENDLOOP_OFF, GEN_START_COARSE, GEN_END_COARSE,
+                          GEN_STARTLOOP_COARSE, GEN_ENDLOOP_COARSE,
+                          GEN_PAN, GEN_COARSETUNE, GEN_FINETUNE,
+                          GEN_DELAY_VOLENV, GEN_ATTACK_VOLENV, GEN_HOLD_VOLENV,
+                          GEN_DECAY_VOLENV, GEN_RELEASE_VOLENV,
+                          GEN_MODENV2PITCH, GEN_MODENV2FILT,
+                          GEN_DELAY_MODENV, GEN_ATTACK_MODENV, GEN_HOLD_MODENV,
+                          GEN_DECAY_MODENV, GEN_RELEASE_MODENV,
+                          # cB/cents value-generators that are ADDITIVE (and so may be
+                          # NEGATIVE) at the preset level. Their instrument-level absolutes
+                          # all sit well under 0x8000 (atten≤1440, filterFc≤13500, Q≤960,
+                          # sustain≤1440/1000), so reading them signed is lossless there and
+                          # correct for relative preset deltas. Without this a preset zone
+                          # carrying e.g. initialAttenuation 0xFFFE (a −2 cB boost) was read
+                          # as 65534 cB → ~−6575 dB → the whole instrument went silent
+                          # (SGM 'Synth Strings 1' vol-env nodes stuck at 0).
+                          GEN_INITATTEN, GEN_FILTERFC, GEN_FILTERQ,
+                          GEN_SUSTAIN_VOLENV, GEN_SUSTAIN_MODENV})
+
+
+def _timecents_to_sec(tc: int) -> float:
+    """SF2 timecents → seconds (2^(tc/1200)); default -12000 ≈ 1 ms."""
+    return 2.0 ** (max(-12000, min(8000, tc)) / 1200.0)
+
+
+class SFSampleHdr:
+    __slots__ = ('name', 'start', 'end', 'loopstart', 'loopend', 'rate',
+                 'origkey', 'correction', 'link', 'stype')
+
+
+class SFZone:
+    """One effective preset×instrument zone (post combination)."""
+    __slots__ = ('keylo', 'keyhi', 'vello', 'velhi', 'sample', 'rootkey',
+                 'tune_cents', 'modes', 'pan', 'scale', 'a_start', 'a_end',
+                 'loop_abs_start', 'loop_abs_end', 'pair', 'rate', 'name',
+                 'env_delay', 'env_attack', 'env_hold', 'env_decay',
+                 'env_sustain_cb', 'env_release',
+                 # initialAttenuation (cB static per-zone gain) + static filter.
+                 'atten_cb', 'filter_fc', 'filter_q',
+                 # modulation envelope (drives pitch and/or filter) + its targets.
+                 'm_delay', 'm_attack', 'm_hold', 'm_decay', 'm_sustain_pc',
+                 'm_release', 'me2pitch', 'me2filt')
+
+
+class SF2:
+    __slots__ = ('presets', 'shdrs', 'file', 'smpl_off', 'smpl_size')
+
+    def read_frames(self, start_frame: int, n_frames: int) -> array.array:
+        """Read n_frames of 16-bit PCM starting at absolute frame index."""
+        n_avail = max(0, min(n_frames, self.smpl_size // 2 - start_frame))
+        a = array.array('h')
+        if n_avail <= 0:
+            return a
+        self.file.seek(self.smpl_off + start_frame * 2)
+        a.frombytes(self.file.read(n_avail * 2))
+        if sys.byteorder == 'big':
+            a.byteswap()
+        return a
+
+
+def _gen_amount(oper: int, raw: int) -> int:
+    if oper in _SIGNED_GENS:
+        return raw - 0x10000 if raw >= 0x8000 else raw
+    return raw
+
+
+def _parse_bags(bag_data, gen_data, start_bag, end_bag, terminal_gen):
+    """Resolve bags [start_bag, end_bag) into (global_gens, [zone_gens...]).
+    Each zone_gens is {oper: amount}; zones lacking the terminal generator
+    other than a leading global zone are discarded per the SF2 spec."""
+    glob = {}
+    zones = []
+    n_bags = len(bag_data) // 4
+    for bi in range(start_bag, end_bag):
+        g0 = struct.unpack_from('<H', bag_data, bi*4)[0]
+        g1 = (struct.unpack_from('<H', bag_data, (bi+1)*4)[0]
+              if bi + 1 < n_bags else len(gen_data) // 4)
+        gens = {}
+        for gi in range(g0, min(g1, len(gen_data) // 4)):
+            oper, raw = struct.unpack_from('<HH', gen_data, gi*4)
+            gens[oper] = _gen_amount(oper, raw)
+        if terminal_gen in gens:
+            zones.append(gens)
+        elif bi == start_bag and not zones:
+            glob = gens
+    return glob, zones
+
+
+def parse_sf2(path: str) -> SF2:
+    f = open(path, 'rb')
+    hdr = f.read(12)
+    if hdr[:4] != b'RIFF' or hdr[8:12] != b'sfbk':
+        sys.exit("error: not an SF2 file (bad RIFF/sfbk magic)")
+    riff_end = 8 + struct.unpack_from('<I', hdr, 4)[0]
+
+    pdta = {}
+    smpl_off = smpl_size = 0
+    pos = 12
+    while pos + 8 <= riff_end:
+        f.seek(pos)
+        chdr = f.read(8)
+        if len(chdr) < 8:
+            break
+        cid = chdr[:4]
+        sz  = struct.unpack_from('<I', chdr, 4)[0]
+        if cid == b'LIST':
+            ltype = f.read(4)
+            inner = pos + 12
+            inner_end = pos + 8 + sz
+            while inner + 8 <= inner_end:
+                f.seek(inner)
+                shdr_ = f.read(8)
+                scid = shdr_[:4]
+                ssz  = struct.unpack_from('<I', shdr_, 4)[0]
+                if ltype == b'pdta':
+                    pdta[scid.decode('latin-1')] = f.read(ssz)
+                elif ltype == b'sdta' and scid == b'smpl':
+                    smpl_off, smpl_size = inner + 8, ssz
+                inner += 8 + ssz + (ssz & 1)
+        pos += 8 + sz + (sz & 1)
+
+    for need in ('phdr', 'pbag', 'pgen', 'inst', 'ibag', 'igen', 'shdr'):
+        if need not in pdta:
+            sys.exit(f"error: SF2 missing required pdta sub-chunk '{need}'")
+    if not smpl_size:
+        sys.exit("error: SF2 has no smpl chunk (sample data)")
+
+    sf = SF2()
+    sf.file = f
+    sf.smpl_off, sf.smpl_size = smpl_off, smpl_size
+
+    shdr_data = pdta['shdr']
+    sf.shdrs = []
+    for i in range(len(shdr_data) // 46 - 1):    # last record is EOS sentinel
+        s = SFSampleHdr()
+        off = i * 46
+        s.name = shdr_data[off:off+20].split(b'\x00')[0].decode('latin-1',
+                                                                errors='replace')
+        (s.start, s.end, s.loopstart, s.loopend, s.rate) = \
+            struct.unpack_from('<IIIII', shdr_data, off+20)
+        s.origkey    = shdr_data[off+40]
+        s.correction = struct.unpack_from('b', shdr_data, off+41)[0]
+        s.link, s.stype = struct.unpack_from('<HH', shdr_data, off+42)
+        if s.rate == 0:
+            s.rate = 8363
+        sf.shdrs.append(s)
+
+    # Instruments: index → (global_gens, [zone_gens])
+    inst_data, ibag, igen = pdta['inst'], pdta['ibag'], pdta['igen']
+    n_inst = len(inst_data) // 22 - 1
+    inst_zones = []
+    for i in range(n_inst):
+        b0 = struct.unpack_from('<H', inst_data, i*22 + 20)[0]
+        b1 = struct.unpack_from('<H', inst_data, (i+1)*22 + 20)[0]
+        inst_zones.append(_parse_bags(ibag, igen, b0, b1, GEN_SAMPLEID))
+
+    # Presets
+    phdr, pbag, pgen = pdta['phdr'], pdta['pbag'], pdta['pgen']
+    n_pre = len(phdr) // 38 - 1
+    sf.presets = {}
+    scale_warned = False
+    for i in range(n_pre):
+        off = i * 38
+        pname = phdr[off:off+20].split(b'\x00')[0].decode('latin-1',
+                                                          errors='replace')
+        preset, bank, bag0 = struct.unpack_from('<HHH', phdr, off+20)
+        bag1 = struct.unpack_from('<H', phdr, (i+1)*38 + 24)[0]
+        pglob, pzones = _parse_bags(pbag, pgen, bag0, bag1, GEN_INSTRUMENT)
+
+        zones = []
+        for pz_raw in pzones:
+            pz = dict(pglob); pz.update(pz_raw)
+            ii = pz[GEN_INSTRUMENT]
+            if not (0 <= ii < n_inst):
+                continue
+            iglob, izones = inst_zones[ii]
+            pk = pz.get(GEN_KEYRANGE, 0x7F00)
+            pv = pz.get(GEN_VELRANGE, 0x7F00)
+            pklo, pkhi = pk & 0xFF, (pk >> 8) & 0xFF
+            pvlo, pvhi = pv & 0xFF, (pv >> 8) & 0xFF
+            for iz_raw in izones:
+                iz = dict(iglob); iz.update(iz_raw)
+                si = iz[GEN_SAMPLEID]
+                if not (0 <= si < len(sf.shdrs)):
+                    continue
+                s = sf.shdrs[si]
+                if s.stype & 0x8000:             # ROM sample
+                    continue
+                ik = iz.get(GEN_KEYRANGE, 0x7F00)
+                iv = iz.get(GEN_VELRANGE, 0x7F00)
+                klo = max(ik & 0xFF, pklo); khi = min((ik >> 8) & 0xFF, pkhi)
+                vlo = max(iv & 0xFF, pvlo); vhi = min((iv >> 8) & 0xFF, pvhi)
+                if klo > khi or vlo > vhi:
+                    continue
+
+                z = SFZone()
+                z.keylo, z.keyhi = klo, khi
+                z.vello, z.velhi = vlo, vhi
+                z.sample = si
+                rk = iz.get(GEN_ROOTKEY, -1)
+                z.rootkey = rk if 0 <= rk <= 127 else \
+                            (s.origkey if s.origkey <= 127 else 60)
+                z.tune_cents = ((iz.get(GEN_COARSETUNE, 0)
+                                 + pz.get(GEN_COARSETUNE, 0)) * 100
+                                + iz.get(GEN_FINETUNE, 0)
+                                + pz.get(GEN_FINETUNE, 0)
+                                + s.correction)
+                z.modes = iz.get(GEN_SAMPLEMODES, 0) & 3
+                z.pan   = max(-500, min(500, iz.get(GEN_PAN, 0)
+                                        + pz.get(GEN_PAN, 0)))
+                z.scale = iz.get(GEN_SCALETUNING, 100)
+                if z.scale != 100 and klo != khi and not scale_warned:
+                    vprint("  warning: scaleTuning != 100 on a multi-key zone "
+                           "— pitch is exact only at the zone's centre key")
+                    scale_warned = True
+                # Volume-envelope ADSR (timecents at inst level, preset adds).
+                z.env_delay  = _timecents_to_sec(iz.get(GEN_DELAY_VOLENV,  -12000)
+                                                 + pz.get(GEN_DELAY_VOLENV,  0))
+                z.env_attack = _timecents_to_sec(iz.get(GEN_ATTACK_VOLENV, -12000)
+                                                 + pz.get(GEN_ATTACK_VOLENV, 0))
+                z.env_hold   = _timecents_to_sec(iz.get(GEN_HOLD_VOLENV,   -12000)
+                                                 + pz.get(GEN_HOLD_VOLENV,   0))
+                z.env_decay  = _timecents_to_sec(iz.get(GEN_DECAY_VOLENV,  -12000)
+                                                 + pz.get(GEN_DECAY_VOLENV,  0))
+                z.env_sustain_cb = max(0, min(1440, iz.get(GEN_SUSTAIN_VOLENV, 0)
+                                              + pz.get(GEN_SUSTAIN_VOLENV, 0)))
+                z.env_release = _timecents_to_sec(iz.get(GEN_RELEASE_VOLENV, -12000)
+                                                  + pz.get(GEN_RELEASE_VOLENV, 0))
+                # initialAttenuation: per-zone static gain in cB (preset adds to inst).
+                # Clamped to the SF2 spec range [0, 1440] so any out-of-range value can
+                # never collapse the folded vol-env to silence (see _SIGNED_GENS note).
+                z.atten_cb = max(0, min(1440, iz.get(GEN_INITATTEN, 0)
+                                        + pz.get(GEN_INITATTEN, 0)))
+                # Static low-pass filter. initialFilterFc is absolute cents (default
+                # 13500 ≈ open); initialFilterQ is cB of resonance (default 0).
+                z.filter_fc = iz.get(GEN_FILTERFC, 13500) + pz.get(GEN_FILTERFC, 0)
+                z.filter_q  = max(0, iz.get(GEN_FILTERQ, 0) + pz.get(GEN_FILTERQ, 0))
+                # Modulation envelope (drives pitch via modEnvToPitch and/or filter via
+                # modEnvToFilterFc). Times are timecents; sustain is 0.1%-of-full DECREASE.
+                z.m_delay   = _timecents_to_sec(iz.get(GEN_DELAY_MODENV,  -12000)
+                                                + pz.get(GEN_DELAY_MODENV,  0))
+                z.m_attack  = _timecents_to_sec(iz.get(GEN_ATTACK_MODENV, -12000)
+                                                + pz.get(GEN_ATTACK_MODENV, 0))
+                z.m_hold    = _timecents_to_sec(iz.get(GEN_HOLD_MODENV,   -12000)
+                                                + pz.get(GEN_HOLD_MODENV,   0))
+                z.m_decay   = _timecents_to_sec(iz.get(GEN_DECAY_MODENV,  -12000)
+                                                + pz.get(GEN_DECAY_MODENV,  0))
+                z.m_sustain_pc = max(0, min(1000, iz.get(GEN_SUSTAIN_MODENV, 0)
+                                            + pz.get(GEN_SUSTAIN_MODENV, 0)))
+                z.m_release = _timecents_to_sec(iz.get(GEN_RELEASE_MODENV, -12000)
+                                                + pz.get(GEN_RELEASE_MODENV, 0))
+                z.me2pitch  = iz.get(GEN_MODENV2PITCH, 0) + pz.get(GEN_MODENV2PITCH, 0)
+                z.me2filt   = iz.get(GEN_MODENV2FILT,  0) + pz.get(GEN_MODENV2FILT,  0)
+                z.a_start = (s.start + iz.get(GEN_START_OFF, 0)
+                             + 32768 * iz.get(GEN_START_COARSE, 0))
+                z.a_end   = (s.end + iz.get(GEN_END_OFF, 0)
+                             + 32768 * iz.get(GEN_END_COARSE, 0))
+                z.a_start = max(0, z.a_start)
+                z.a_end   = max(z.a_start, min(z.a_end, sf.smpl_size // 2))
+                z.loop_abs_start = (s.loopstart + iz.get(GEN_STARTLOOP_OFF, 0)
+                                    + 32768 * iz.get(GEN_STARTLOOP_COARSE, 0))
+                z.loop_abs_end   = (s.loopend + iz.get(GEN_ENDLOOP_OFF, 0)
+                                    + 32768 * iz.get(GEN_ENDLOOP_COARSE, 0))
+                z.pair = None
+                z.rate = s.rate
+                z.name = s.name
+                zones.append(z)
+        if zones:
+            sf.presets[(bank, preset)] = (pname, zones)
+    return sf
+
+
+# ── Preset resolution / Taud instrument building ──────────────────────────────
+
+def resolve_preset(sf: SF2, inst_key, perc_force):
+    """inst_key: ('m', bank, prog) or ('d', prog). Returns (name, zones) or None."""
+    if inst_key[0] == 'd':
+        prog = inst_key[1]
+        cands = []
+        if perc_force is not None:
+            cands.append(tuple(perc_force))
+        cands += [(128, prog), (128, 0)]
+    else:
+        _, bank, prog = inst_key
+        cands = [(bank, prog), (0, prog)]
+    for c in cands:
+        if c in sf.presets:
+            return sf.presets[c]
+    # Last resort: same program number in any bank, then nothing.
+    prog = inst_key[1] if inst_key[0] == 'd' else inst_key[2]
+    for (b, p) in sorted(sf.presets):
+        if p == prog:
+            return sf.presets[(b, p)]
+    return None
+
+
+def merge_stereo_zones(zones: list, shdrs: list) -> list:
+    """Collapse L/R zone pairs into single mono zones. Two flavours are merged:
+      (1) LINKED stereo — samples are each other's sampleLink with L/R types;
+      (2) PAN stereo — two MONO-typed zones with the same key/vel rect and
+          opposite hard pan (±500). SGM/Timbres store most "stereo" samples this
+          way (e.g. 'VA LGFF C3-L' / '…-R'), NOT as linked L/R.
+    The merged zone mixes both channels to mono and drops the pan override.
+    Merging is essential: an unmerged R zone fully overlaps its L zone, so the
+    disjointify spills it into a SECOND layer that then plays CENTRED alongside
+    the L zone — a spurious +6 dB doubling. Lone L/R zones keep their channel."""
+    out = []
+    used = set()
+    for i, z in enumerate(zones):
+        if i in used:
+            continue
+        s = shdrs[z.sample]
+        partner = None
+        if s.stype in (2, 4) and 0 <= s.link < len(shdrs):
+            for j in range(i + 1, len(zones)):
+                if j in used:
+                    continue
+                z2 = zones[j]
+                if (z2.sample == s.link
+                        and (z2.keylo, z2.keyhi, z2.vello, z2.velhi)
+                            == (z.keylo, z.keyhi, z.vello, z.velhi)
+                        and z2.modes == z.modes
+                        and z2.rootkey == z.rootkey):
+                    partner = j
+                    break
+        if partner is None and z.pan is not None and abs(z.pan) >= 400:
+            for j in range(i + 1, len(zones)):
+                if j in used:
+                    continue
+                z2 = zones[j]
+                if (z2.sample != z.sample
+                        and z2.pan is not None and abs(z2.pan) >= 400
+                        and (z.pan < 0) != (z2.pan < 0)        # opposite sides
+                        and (z2.keylo, z2.keyhi, z2.vello, z2.velhi)
+                            == (z.keylo, z.keyhi, z.vello, z.velhi)
+                        and z2.modes == z.modes
+                        and z2.rootkey == z.rootkey):
+                    partner = j
+                    break
+        if partner is not None:
+            used.add(partner)
+            z2 = zones[partner]
+            z.pair = (z.sample, z2.sample, z2.a_start)
+            z.pan = None                          # mixed to mono → centred
+            z.a_end = z.a_start + min(z.a_end - z.a_start,
+                                      z2.a_end - z2.a_start)
+        out.append(z)
+    return out
+
+
+def _rect_of_zone(z: SFZone):
+    """Zone key/vel ranges → Taud (pitch_lo, pitch_hi, vol_lo, vol_hi).
+    Pitch bounds sit on half-semitone boundaries so triggers carrying an
+    initial pitch bend (< 50 cents) still land inside the right rectangle;
+    adjacent zones stay disjoint. Velocity per Ixmp note 5: round(v·63/127)."""
+    if z.keylo <= 0:
+        plo = 0x0000
+    else:
+        plo = max(0, min(0xFFFF, round(TAUD_C4 + (z.keylo - 0.5 - 60) * UNITS_PER_SEMI)))
+    if z.keyhi >= 127:
+        phi = 0xFFFF
+    else:
+        phi = max(0, min(0xFFFF, round(TAUD_C4 + (z.keyhi + 0.5 - 60) * UNITS_PER_SEMI) - 1))
+    vlo = round(z.vello * 63 / 127)
+    vhi = round(z.velhi * 63 / 127)
+    return (plo, phi, vlo, vhi)
+
+
+def _rect_subtract(r, k):
+    """Pieces of rectangle r not covered by rectangle k (≤ 4 pieces)."""
+    p0, p1, v0, v1 = r
+    q0, q1, w0, w1 = k
+    if p1 < q0 or p0 > q1 or v1 < w0 or v0 > w1:
+        return [r]
+    pieces = []
+    if p0 < q0: pieces.append((p0, q0 - 1, v0, v1))
+    if p1 > q1: pieces.append((q1 + 1, p1, v0, v1))
+    m0, m1 = max(p0, q0), min(p1, q1)
+    if v0 < w0: pieces.append((m0, m1, v0, w0 - 1))
+    if v1 > w1: pieces.append((m0, m1, w1 + 1, v1))
+    return pieces
+
+
+class MonoSample:
+    """One pooled (deduplicated) mono u8 sample slice."""
+    __slots__ = ('pair', 'a_start', 'frames', 'rate', 'name',
+                 'data', 'ratio', 'offset')
+    def __init__(self, z: SFZone):
+        self.pair    = z.pair                    # None or (idxL, idxR, b_start)
+        self.a_start = z.a_start
+        self.frames  = max(0, z.a_end - z.a_start)
+        self.rate    = z.rate
+        self.name    = z.name
+        self.data    = None
+        self.ratio   = 1.0
+        self.offset  = 0
+
+    def key(self):
+        return (self.pair[0], self.pair[1], self.a_start, self.frames) \
+            if self.pair else (-1, -1, self.a_start, self.frames)
+
+    def render(self, sf: SF2):
+        if self.data is not None:
+            return
+        n = min(self.frames, 1 << 24)            # hard sanity cap (16M frames)
+        if self.pair:
+            la = sf.read_frames(self.a_start, n)
+            ra = sf.read_frames(self.pair[2], n)
+            m  = min(len(la), len(ra))
+            self.data = bytes((((la[i] + ra[i]) >> 1) >> 8) + 128 & 0xFF
+                              for i in range(m))
+        else:
+            la = sf.read_frames(self.a_start, n)
+            self.data = bytes(((s >> 8) + 128) & 0xFF for s in la)
+        self.frames = len(self.data)
+
+
+class Patch:
+    """One Ixmp-patch-to-be: a disjoint rect plus the zone's sample fields."""
+    __slots__ = ('rect', 'zone', 'ms', 'loop_start', 'loop_end', 'loop_mode',
+                 'detune', 'pan8', 'hits')
+    def __init__(self, rect, z: SFZone, ms: MonoSample):
+        self.rect = rect
+        self.zone = z
+        self.ms   = ms
+        ls = z.loop_abs_start - z.a_start
+        le = z.loop_abs_end   - z.a_start
+        nf = max(0, z.a_end - z.a_start)
+        ls = max(0, min(ls, nf)); le = max(0, min(le, nf))
+        if z.modes in (1, 3) and le - ls >= 2:
+            self.loop_mode  = 1 | (0x4 if z.modes == 3 else 0)
+            self.loop_start = ls
+            self.loop_end   = le
+        else:
+            self.loop_mode  = 0
+            self.loop_start = 0
+            self.loop_end   = 0
+        # samplingRate = SF2 rate; the rootkey/tuning shift goes into the
+        # signed 4096-TET detune so MIDI key 60 always means noteVal 0x5000.
+        # scaleTuning (cents per key, 0 = fixed-pitch drums) is folded in
+        # around the zone's centre key: exact for single-key zones, exact
+        # everywhere when scale = 100.
+        k_ref = (z.keylo + z.keyhi) / 2.0
+        det = round(((k_ref - z.rootkey) * (z.scale / 100.0)
+                     - (k_ref - 60.0)) * UNITS_PER_SEMI
+                    + z.tune_cents * 4096.0 / 1200.0)
+        self.detune = max(-0x8000, min(0x7FFF, det))
+        if z.pan is None:
+            self.pan8 = IXMP_PAN_NO_OVERRIDE
+        else:
+            self.pan8 = max(0, min(255, round(127.5 + z.pan * 255.0 / 1000.0)))
+        self.hits = 0
+
+    def to_ixmp_dict(self, canonical, bpm0, fadeout_override):
+        r = self.ms.ratio
+        d = {
+            'pitch_start':         self.rect[0],
+            'pitch_end':           self.rect[1],
+            'volume_start':        self.rect[2],
+            'volume_end':          self.rect[3],
+            'sample_ptr':          self.ms.offset,
+            'sample_length':       min(len(self.ms.data), 0xFFFF),
+            'play_start':          0,
+            'loop_start':          min(0xFFFF, round(self.loop_start * r)),
+            'loop_end':            min(0xFFFF, round(self.loop_end   * r)),
+            'sampling_rate':       max(1, min(0xFFFF, round(self.ms.rate * r))),
+            'sample_detune':       self.detune,
+            'loop_mode':           self.loop_mode,
+            'default_pan':         self.pan8,
+            'default_note_volume': 0,            # no override → base DNV
+            'vibrato_speed':       0,
+            'vibrato_sweep':       0,
+            'vibrato_depth':       0,
+            'vibrato_rate':        0,
+            'vibrato_waveform':    0xFF,         # no override
+        }
+        # Per-patch overrides — emitted ONLY when they differ from the canonical
+        # zone (whose envelopes/filter live in the base instrument record, which the
+        # patch falls through to when a block is absent). This is what gives SF2
+        # velocity / key layers their own ADSR + filter while keeping patches lean.
+        z, c = self.zone, canonical.zone
+        vol_self, _, _ = _vol_env_block(z)
+        vol_canon, _, _ = _vol_env_block(c)
+        if vol_self != vol_canon:
+            d['vol_env'] = vol_self
+        # SF-mode filter: mode flag + 16-bit cutoff cents / Q centibels + filter env.
+        sf_s, cut_s, res_s, filt_s = _zone_filter_sf(z)
+        sf_c, cut_c, res_c, filt_c = _zone_filter_sf(c)
+        pit_s = _pitch_env_block(z) if z.me2pitch else None
+        pit_c = _pitch_env_block(c) if c.me2pitch else None
+        # Emit the 'x' block when filter (mode/cutoff/resonance/env) OR initialAttenuation
+        # differs from the canonical (base) zone. initialAttenuation is a per-voice gain (NOT
+        # folded into the env); when 'x' is present it carries this patch's atten, else the
+        # voice inherits the base record's atten. A differing filter ENV must co-emit 'x'
+        # because the env's node ratios scale the patch's OWN peak cutoff (the 'x' cutoff).
+        att_s = atten_cb_to_octet(z.atten_cb)
+        att_c = atten_cb_to_octet(c.atten_cb)
+        filt_differs = (filt_s != filt_c)
+        if (sf_s != sf_c or cut_s != cut_c or res_s != res_c or att_s != att_c or filt_differs):
+            d['extra'] = {'fadeout':            _zone_fadeout(z, bpm0, fadeout_override),
+                          'filter_sf_mode':     sf_s,
+                          'default_cutoff':     cut_s,
+                          'default_resonance':  res_s,
+                          'initial_attenuation': att_s}
+        if filt_s is not None and filt_differs:
+            d['filter_env'] = filt_s
+        if pit_s is not None and pit_s != pit_c:
+            d['pitch_env'] = pit_s
+        return d
+
+
+class TaudInstrument:
+    __slots__ = ('slot', 'inst_key', 'name', 'patches', 'canonical', 'usable')
+    # patches: kept Patch list in zone order, canonical Patch INCLUDED
+    # (the Ixmp emitter skips it; the base record carries its fields).
+
+
+def _rect_overlap(a, b) -> bool:
+    """True when two (pitch_lo, pitch_hi, vol_lo, vol_hi) rectangles intersect."""
+    p0, p1, v0, v1 = a
+    q0, q1, w0, w1 = b
+    return not (p1 < q0 or p0 > q1 or v1 < w0 or v0 > w1)
+
+
+def _partition_layers(zones: list, registry: dict, max_layers: int):
+    """Split zones into disjoint layers by ITERATED first-wins disjointify.
+
+    Layer 0 is the classic disjointify result: each zone is rectangle-SUBTRACTED
+    against the rects already placed in the layer, so its non-overlapping pieces
+    tile in. This is essential — the velocity axis quantises 0..127 → 0..63, so
+    adjacent SF2 velocity splits round to ranges that touch/overlap by ~1 unit;
+    subtraction absorbs that boundary sliver into the first zone instead of
+    spawning a spurious extra layer (which would DOUBLE the level at boundary
+    velocities). Only a zone that is *fully* covered by the layer below — SF2's
+    real simultaneous layering, detune-stacks, duplicate zones — spills down to
+    the next layer, where the same disjointify runs over the spilled set. Returns
+    ([ [(rect, zone, ms), …] per layer ], dropped_zone_count)."""
+    remaining = []
+    for z in zones:
+        ms = MonoSample(z)
+        if ms.frames < 2:
+            continue
+        ms = registry.setdefault(ms.key(), ms)
+        remaining.append((z, ms))
+
+    layers = []
+    while remaining and len(layers) < max_layers:
+        kept_rects = []
+        layer = []
+        spill = []
+        for z, ms in remaining:
+            pieces = [_rect_of_zone(z)]
+            for k in kept_rects:
+                pieces = [p2 for p in pieces for p2 in _rect_subtract(p, k)]
+                if not pieces:
+                    break
+            pieces = [p for p in pieces if p[0] <= p[1] and p[2] <= p[3]]
+            if not pieces:
+                spill.append((z, ms))          # fully overlapped → next layer
+                continue
+            for p in pieces:
+                kept_rects.append(p)
+                layer.append((p, z, ms))
+        if layer:
+            layers.append(layer)
+        remaining = spill
+    return layers, len(remaining)
+
+
+def _build_layer_instrument(name: str, items: list, trig: dict):
+    """One normal TaudInstrument from a layer's disjoint (rect, zone, ms) items,
+    trimmed to patches actually hit by a trigger. None when no patch is hit
+    (the layer is silent for the whole song → dropped)."""
+    all_patches = [Patch(r, z, ms) for (r, z, ms) in items]
+    for (nv, v6), cnt in trig.items():
+        for p in all_patches:
+            r = p.rect
+            if r[0] <= nv <= r[1] and r[2] <= v6 <= r[3]:
+                p.hits += cnt
+                break
+    kept = [p for p in all_patches if p.hits > 0]
+    if not kept:
+        return None
+    ti = TaudInstrument()
+    ti.name = name
+    ti.patches = kept
+    ti.canonical = max(kept, key=lambda p: p.hits)
+    ti.usable = True
+    ti.slot = 0
+    ti.inst_key = None
+    return ti
+
+
+def build_presets(sf: SF2, slot_keys: list, triggers: dict, perc_force,
+                  registry: dict, max_layers: int) -> dict:
+    """For each preset (inst_key), partition its SF2 zones into disjoint layers
+    and build one normal TaudInstrument per layer (trimmed to triggered patches).
+    Returns dict[inst_key → (name, [layer TaudInstrument])]. Downstream, a preset
+    with >1 layer becomes a Metainstrument; a single-layer preset stays a plain
+    instrument. `registry` dedupes MonoSamples across all presets/layers."""
+    presets = {}
+    for ik in slot_keys:
+        res = resolve_preset(sf, ik, perc_force)
+        if res is None:
+            vprint(f"  warning: no SF2 preset for {ik!r} — its notes are dropped")
+            presets[ik] = ('(missing preset)', [])
+            continue
+        name, zones = res
+        zones = merge_stereo_zones(zones, sf.shdrs)
+        layer_items, dropped = _partition_layers(zones, registry, max_layers)
+        if dropped:
+            vprint(f"  warning: '{name}': {dropped} zone(s) exceed the "
+                   f"{max_layers}-layer cap and were dropped (raise --max-layers)")
+        trig = triggers.get(ik, {})
+        layers = [ti for items in layer_items
+                  if (ti := _build_layer_instrument(name, items, trig)) is not None]
+        if not layers and layer_items:
+            # Nothing triggered (out-of-range): keep the single patch nearest the
+            # mean trigger pitch so the preset still sounds (matches the old path).
+            mean_nv = (sum(nv * c for (nv, _), c in trig.items())
+                       / max(1, sum(trig.values()))) if trig else TAUD_C4
+            flat = [Patch(r, z, ms) for items in layer_items for (r, z, ms) in items]
+            best = min(flat, key=lambda p: abs((p.rect[0] + p.rect[1]) / 2 - mean_nv))
+            ti = TaudInstrument()
+            ti.name = name; ti.patches = [best]; ti.canonical = best
+            ti.usable = True; ti.slot = 0; ti.inst_key = ik
+            layers = [ti]
+        for ti in layers:
+            ti.inst_key = ik
+        presets[ik] = (name, layers)
+        if layers:
+            vprint(f"  preset '{name}': {len(zones)} zone(s) → {len(layers)} layer(s)"
+                   + (" → Metainstrument" if len(layers) > 1 else ""))
+        else:
+            vprint(f"  warning: '{name}': no usable zones — notes dropped")
+    return presets
+
+
+# Metainstrument mix-volume octet for an unmixed layer (159 = 0 dB / unity); the
+# converter folds per-zone level/tune into each layer instrument's patches, so the
+# meta layers stay neutral. (terranmon.txt "Perceptually Significant Octet …".)
+META_UNITY_OCTET = 159
+
+
+def _layer_bbox(ti: 'TaudInstrument'):
+    """Bounding (pitch_lo, pitch_hi, vol_lo, vol_hi) over a layer instrument's kept
+    patch rects — the Metainstrument layer's gating rectangle."""
+    rs = [p.rect for p in ti.patches]
+    return (min(r[0] for r in rs), max(r[1] for r in rs),
+            min(r[2] for r in rs), max(r[3] for r in rs))
+
+
+# ── Sample pool + instrument bin ──────────────────────────────────────────────
+
+def _env_seg_count(t_sec: float) -> int:
+    """Number of linear segments to approximate an exponential (linear-dB) ramp of
+    `t_sec` seconds. Short ramps keep the old 2-segment shape; long ramps (the 5–20 s
+    SF2 decays/releases that a 2-point line collapses badly) get up to 8 segments so
+    the curve stays smooth (issue 4)."""
+    return max(3, min(8, 2 + round(t_sec / 2.0)))
+
+
+def _adsr_to_env(z: SFZone):
+    """SF2 volume-envelope ADSR → (env_points, sustain_idx, release_sec).
+
+    env_points is up to 25 (value 0..63, minifloat_idx) pairs; each node's
+    minifloat encodes the time to the NEXT node (engine interpolates values
+    linearly across that span). The engine wraps on the sustain node while
+    the key is held (SUSTAIN word) and walks the trailing release nodes after
+    key-off. SF2's decay and release are LINEAR in dB (exponential in amplitude);
+    per the SF2 spec decayVolEnv is the full-100dB time, truncated by the sustain
+    level. Both legs are sampled at equal-time (= equal-dB) points and emitted as
+    a piecewise-linear-amplitude approximation — segment count scales with the
+    leg's duration (issue 4) so multi-second decays don't collapse to a 2-point
+    line.
+    """
+    EPS = 0.004                       # below the minifloat resolution (1/256 s)
+    sus_cb = min(z.env_sustain_cb, 1000.0)     # clamp to 100 dB full-scale
+    slevel = 10.0 ** (-z.env_sustain_cb / 200.0)
+    s63 = max(0, min(63, round(63 * slevel)))
+    pts = []                          # (value, delta_sec_to_next)
+    if z.env_delay >= EPS:
+        pts.append((0, z.env_delay))
+    if z.env_attack >= EPS:
+        pts.append((0, z.env_attack))
+    hold = z.env_hold if z.env_hold >= EPS else 0.0
+    # Decay leg: peak (63) → sustain (s63), exponential amplitude over `edec` seconds.
+    # The peak node carries the hold time. The final decay node is the sustain node
+    # (appended below), so the in-between nodes are f = 1/n .. (n-1)/n.
+    if s63 < 63:
+        edec = z.env_decay * sus_cb / 1000.0
+        if edec >= EPS:
+            n = _env_seg_count(edec)
+            seg = edec / n
+            pts.append((63, hold + seg))                       # peak, held then 1st seg
+            for i in range(1, n):                              # f = 1/n .. (n-1)/n
+                f = i / n
+                v = round(63 * 10.0 ** (-(sus_cb * f) / 200.0))
+                pts.append((max(s63, min(63, v)), seg))
+        else:
+            pts.append((63, hold))
+    sustain_idx = len(pts)            # the node appended next is the sustain node
+    rel = z.env_release
+    if s63 > 0 and rel >= EPS:
+        # Release leg: sustain (s63) → silence, exponential amplitude over `rel`
+        # seconds (a 100 dB drop ≈ to 0). Sampled at equal-time points.
+        n = _env_seg_count(rel)
+        seg = rel / n
+        pts.append((s63, seg))                                 # sustain node
+        for i in range(1, n):                                  # f = 1/n .. (n-1)/n
+            f = i / n
+            v = round(s63 * 10.0 ** (-5.0 * f))                # −100 dB over the leg
+            pts.append((max(0, min(s63, v)), seg))
+        pts.append((0, 0.0))
+    elif s63 > 0:
+        pts.append((s63, 0.0))
+        pts.append((0, 0.0))          # default 1 ms release = cut at key-off
+    else:
+        pts.append((0, 0.0))
+    env = [(v, nearest_minifloat(d)) for v, d in pts[:25]]
+    while len(env) < 25:
+        env.append((env[-1][0], 0))
+    return env, min(sustain_idx, 24), rel
+
+
+# Envelope LOOP-word bits (terranmon.txt base byte 15/17/19).
+ENV_PRESENT_BIT = 0x2000          # P — envelope present in source (LOOP-word bit 13)
+ENV_SUS_ENABLE  = 0x0020          # b — enable the SUSTAIN wrap (SUSTAIN-word bit 5)
+ENV_PF_FILTER   = 0x0080          # m — pitch/filter LOOP-word bit 7 (1 = filter)
+
+
+def _atten_gain(atten_cb: float) -> float:
+    """SF2 initialAttenuation (cB) → linear amplitude multiplier (≤ 1.0)."""
+    return 10.0 ** (-max(0.0, atten_cb) / 200.0)
+
+
+def _vol_env_block(z: SFZone):
+    """Taud volume-envelope block dict from a zone's SF2 ADSR — the PURE ADSR shape
+    at full 0..63 resolution. initialAttenuation is NO LONGER folded into the node
+    peak (it would crush a heavily-attenuated env to peak ~3 and zero its tail, e.g.
+    SGM 'Fantasia'); it is now carried as a separate per-voice gain — base record
+    bytes 251-252 / Ixmp 'x' block initialAttenuation — applied in the mixer. Returns
+    (block_dict, sustain_idx, release_sec)."""
+    env, sidx, rel = _adsr_to_env(z)
+    nodes = [(max(0, min(63, v)), mf) for (v, mf) in env]
+    sustain = ENV_SUS_ENABLE | ((sidx & 0x1F) << 8) | (sidx & 0x1F)
+    return {'loop': ENV_PRESENT_BIT, 'sustain': sustain, 'nodes': nodes}, sidx, rel
+
+
+# SF2 initialFilterFc default ≈ 13500 cents (~20 kHz) means "no filter / fully open".
+SF2_FILTER_OPEN_CENTS = 13500
+# Taud SF-mode "filter off" sentinel for the 16-bit cutoff/resonance fields.
+SF_FILTER_OFF = 0xFFFF
+
+
+def _zone_filter_sf(z: SFZone):
+    """Resolve a zone's filter into Taud SF-mode parameters.
+
+    Taud SF mode (base byte 173 bit 4 / patch 'x' flag) stores the cutoff as
+    SoundFont **absolute cents** and resonance as **centibels above DC gain** —
+    the engine computes freq = 8.176·2^(cents/1200) and dmpfac = 10^(−Qcb/200),
+    so there is no ImpulseTracker ~5 kHz cutoff ceiling. When the zone has a
+    modulation envelope driving the cutoff, the stored cutoff is the PEAK the
+    envelope reaches and the filter-env nodes scale it back down (see
+    [_filter_env_block_sf]); the engine's `currentCutoff = baseCut · envValue`
+    then reproduces the SF2 sweep exactly (linear-in-cents = the right log-Hz
+    sweep).
+
+    Returns (sf_mode, cutoff16, resonance16, filter_env_block_or_None).
+    sf_mode False → no filter (IT-mode 'off')."""
+    base_fc = z.filter_fc
+    amt     = z.me2filt
+    has_static = base_fc < SF2_FILTER_OPEN_CENTS
+    has_env    = bool(amt)
+    if not has_static and not has_env:
+        return False, SF_FILTER_OFF, SF_FILTER_OFF, None
+    peak = max(1, min(0xFFFE, round(base_fc + max(0, amt))))   # engine baseCut
+    qcb  = max(0, min(0xFFFE, round(z.filter_q)))              # cB above DC gain
+    env  = _filter_env_block_sf(z, base_fc, amt, peak) if has_env else None
+    return True, peak, qcb, env
+
+
+def _filter_env_block_sf(z: SFZone, base_fc: float, amt: float, peak: int) -> dict:
+    """Filter envelope in SF-cents domain. Each node value = cutoff_cents(u)/peak·255
+    following the SF2 modulation-envelope DAHDSR (u walks 0→1→sustain), where
+    cutoff_cents(u) = base_fc + amt·u. 0xFF (255) = fully open at `peak`; the
+    release returns to the base cutoff. The engine multiplies `peak` (= baseCut)
+    by node/255 each tick, so the node ratios reproduce the SF2 cutoff sweep."""
+    EPS   = 0.004
+    sus_u = 1.0 - z.m_sustain_pc / 1000.0          # mod-env sustain level (0..1)
+
+    def nodeval(u: float) -> int:
+        cents = base_fc + amt * u
+        return max(0, min(255, round(255.0 * cents / peak)))
+
+    pts = []                                        # (value_byte, secs_to_next)
+    if z.m_delay >= EPS:
+        pts.append((nodeval(0.0), z.m_delay))
+    pts.append((nodeval(0.0), z.m_attack if z.m_attack >= EPS else 0.0))
+    hold = z.m_hold if z.m_hold >= EPS else 0.0
+    if sus_u < 1.0 and z.m_decay >= EPS:
+        pts.append((nodeval(1.0), hold + z.m_decay))
+        sustain_idx = len(pts)
+        pts.append((nodeval(sus_u), z.m_release if z.m_release >= EPS else 0.0))
+    else:
+        pts.append((nodeval(1.0), hold))
+        sustain_idx = len(pts) - 1
+    pts.append((nodeval(0.0), 0.0))                 # release returns to base cutoff
+    nodes = [(v, nearest_minifloat(d)) for v, d in pts[:25]]
+    while len(nodes) < 25:
+        nodes.append((nodes[-1][0], 0))
+    sustain_idx = min(sustain_idx, 24)
+    loop    = ENV_PRESENT_BIT | ENV_PF_FILTER       # m-bit set = filter role
+    sustain = ENV_SUS_ENABLE | ((sustain_idx & 0x1F) << 8) | (sustain_idx & 0x1F)
+    return {'loop': loop, 'sustain': sustain, 'nodes': nodes}
+
+
+def _zone_fadeout(z: SFZone, bpm0: int, fadeout_override) -> int:
+    """Safety fadeout sized ~4× the zone's SF2 release so released voices / NNA
+    ghosts always die. Mirrors the base-record computation."""
+    if fadeout_override is not None:
+        return min(0xFFF, max(0, fadeout_override))
+    fade_sec = max(z.env_release, 0.05) * 4.0
+    return max(1, min(0xFFF, round(2560.0 / (fade_sec * bpm0))))
+
+
+def _extra_block(z: SFZone, bpm0: int, fadeout_override) -> dict:
+    """The 'x' block: safety fadeout + SF-mode static cutoff/resonance + filter mode."""
+    sf_mode, cut16, res16, _ = _zone_filter_sf(z)
+    return {'fadeout':            _zone_fadeout(z, bpm0, fadeout_override),
+            'filter_sf_mode':     sf_mode,
+            'default_cutoff':     cut16,
+            'default_resonance':  res16}
+
+
+def _pitch_env_block(z: SFZone) -> dict:
+    """Pitch ('P') envelope block from the SF2 modulation envelope (DAHDSR),
+    scaled by modEnvToPitch. Engine value mapping (byte/255; 0.5 = 0x80 = unity):
+    envValue 1.0 → +16 semitones, so value = 0.5 + semis/32. The mod-env is
+    unipolar 0→1; release returns to unity (0x80). (Filter envelopes are built
+    separately in cents domain by [_filter_env_block_sf].)"""
+    EPS = 0.004
+    amount_cents = z.me2pitch
+    sus_lvl = 1.0 - z.m_sustain_pc / 1000.0          # mod-env sustain level (0..1)
+
+    def mapval(u: float) -> int:
+        val = 0.5 + (amount_cents * u / 100.0) / 32.0
+        return max(0, min(255, round(255 * max(0.0, min(1.0, val)))))
+
+    pts = []                                          # (value_byte, secs_to_next)
+    if z.m_delay >= EPS:
+        pts.append((mapval(0.0), z.m_delay))
+    pts.append((mapval(0.0), z.m_attack if z.m_attack >= EPS else 0.0))
+    hold = z.m_hold if z.m_hold >= EPS else 0.0
+    if sus_lvl < 1.0 and z.m_decay >= EPS:
+        pts.append((mapval(1.0), hold + z.m_decay))
+        sustain_idx = len(pts)
+        pts.append((mapval(sus_lvl), z.m_release if z.m_release >= EPS else 0.0))
+    else:
+        pts.append((mapval(1.0), hold))
+        sustain_idx = len(pts) - 1
+    pts.append((mapval(0.0), 0.0))                    # release returns to unity (0x80)
+    nodes = [(v, nearest_minifloat(d)) for v, d in pts[:25]]
+    while len(nodes) < 25:
+        nodes.append((nodes[-1][0], 0))
+    sustain_idx = min(sustain_idx, 24)
+    loop = ENV_PRESENT_BIT                            # m-bit clear = pitch role
+    sustain = ENV_SUS_ENABLE | ((sustain_idx & 0x1F) << 8) | (sustain_idx & 0x1F)
+    return {'loop': loop, 'sustain': sustain, 'nodes': nodes}
+
+
+def _zone_pf_envs(z: SFZone):
+    """Return (filter_env_block_or_None, pitch_env_block_or_None) for a zone's
+    modulation envelope. SF2's single mod-env can drive both targets at once;
+    the filter leg is built in SF-cents domain (see [_zone_filter_sf])."""
+    _, _, _, filt = _zone_filter_sf(z)
+    pit = _pitch_env_block(z) if z.me2pitch else None
+    return filt, pit
+
+
+def build_sample_inst_bin(sf: SF2, pool: list, layer_insts: list, meta_records: list,
+                          fadeout_override, bpm0: int):
+    """Render & pool every used MonoSample (with the 65535-byte per-sample
+    and 8 MB global caps), write the 256-byte normal-instrument records for every
+    layer instrument, then the Metainstrument records. Returns the raw
+    SAMPLEINST_SIZE image."""
+    for ms in pool:
+        ms.render(sf)
+
+    # Per-sample u16 cap.
+    for ms in pool:
+        if len(ms.data) > SAMPLE_LEN_LIMIT:
+            r = SAMPLE_LEN_LIMIT / len(ms.data)
+            vprint(f"  info: '{ms.name}' {len(ms.data)} bytes > 64K cap; "
+                   f"resampling by {r:.4f}")
+            old = len(ms.data)
+            ms.data = resample_linear(ms.data, r)
+            ms.ratio *= len(ms.data) / old
+
+    # Global 8 MB pool cap.
+    total = sum(len(ms.data) for ms in pool)
+    if total > SAMPLEBIN_SIZE:
+        g = SAMPLEBIN_SIZE / total
+        vprint(f"  info: sample pool overflow ({total} bytes); "
+               f"resampling all by {g:.4f}")
+        for ms in pool:
+            old = len(ms.data)
+            ms.data = resample_linear(ms.data, g)
+            ms.ratio *= len(ms.data) / old
+
+    sample_bin = bytearray(SAMPLEBIN_SIZE)
+    pos = 0
+    for ms in pool:
+        n = min(len(ms.data), SAMPLEBIN_SIZE - pos)
+        if n < len(ms.data):
+            vprint(f"  warning: pool full, truncating '{ms.name}'")
+            ms.data = ms.data[:n]
+        sample_bin[pos:pos+n] = ms.data
+        ms.offset = pos
+        pos += n
+    vprint(f"  sample pool: {len(pool)} sample(s), {pos} bytes")
+
+    inst_bin = bytearray(INSTBIN_SIZE)
+    for ti in layer_insts:
+        if not ti.usable:
+            continue
+        c  = ti.canonical
+        ms = c.ms
+        r  = ms.ratio
+        base = ti.slot * 256
+        struct.pack_into('<I', inst_bin, base + 0, ms.offset)
+        struct.pack_into('<H', inst_bin, base + 4, min(len(ms.data), 0xFFFF))
+        struct.pack_into('<H', inst_bin, base + 6,
+                         max(1, min(0xFFFF, round(ms.rate * r))))
+        struct.pack_into('<H', inst_bin, base + 8, 0)            # play start
+        struct.pack_into('<H', inst_bin, base + 10,
+                         min(0xFFFF, round(c.loop_start * r)))
+        struct.pack_into('<H', inst_bin, base + 12,
+                         min(0xFFFF, round(c.loop_end * r)))
+        inst_bin[base + 14] = c.loop_mode
+
+        def wenv(loop_off, sus_off, nodes_off, blk):
+            struct.pack_into('<H', inst_bin, base + loop_off, blk['loop'] & 0xFFFF)
+            struct.pack_into('<H', inst_bin, base + sus_off,  blk['sustain'] & 0xFFFF)
+            nodes = list(blk['nodes'])
+            for k in range(25):
+                v, mf = nodes[k] if k < len(nodes) else (nodes[-1][0] if nodes else 0, 0)
+                inst_bin[base + nodes_off + k*2]     = v & 0xFF
+                inst_bin[base + nodes_off + k*2 + 1] = mf & 0xFF
+
+        # Volume envelope from the canonical zone's SF2 ADSR (D/A/H/D shape, single-
+        # node sustain held while key is on, release nodes after key-off), with the
+        # zone's initialAttenuation folded into the 0..63 node peak. Non-canonical
+        # zones with a different ADSR carry their own per-patch vol_env (see
+        # Patch.to_ixmp_dict); the base record is the canonical / fall-through.
+        vol_blk, _, rel = _vol_env_block(c.zone)
+        wenv(15, 189, 21, vol_blk)
+        # Pan envelope: none (default unity nodes; P bit clear in LOOP word).
+        struct.pack_into('<H', inst_bin, base + 17, 0)
+        for k in range(25):
+            inst_bin[base + 71 + k*2] = 0x80
+        # Pitch/filter envelopes — SEPARATE, fixed slots (issue 2): slot #1 (bytes
+        # 19/121) is the FILTER envelope (m-bit set), defaulting flat to 0xFF
+        # (fully OPEN — the engine's filter-env neutral, since currentCutoff =
+        # baseCut·envValue and 1.0 = open); slot #2 (bytes 197/201) is the PITCH
+        # envelope (m-bit clear), defaulting flat to 0x80 (unity, no transpose). A
+        # flat slot keeps its LOOP word at 0 (P-bit clear) so the engine ignores it.
+        sf_mode, cut16, res16, filt_env = _zone_filter_sf(c.zone)
+        pit_env = _pitch_env_block(c.zone) if c.zone.me2pitch else None
+        for k in range(25):
+            inst_bin[base + 121 + k*2] = 0xFF                    # filter-env (slot 1) flat = open
+            inst_bin[base + 201 + k*2] = 0x80                    # pitch-env (slot 2) flat = unity
+        if filt_env is not None:
+            wenv(19, 193, 121, filt_env)
+        if pit_env is not None:
+            wenv(197, 199, 201, pit_env)
+
+        # Fadeout: safety net that guarantees released voices (and NNA ghosts)
+        # eventually die. Sized ~4× the SF2 release so the envelope's release
+        # ramp dominates what you hear.
+        if fadeout_override is not None:
+            fo = min(0xFFF, max(0, fadeout_override))
+        else:
+            fade_sec = max(rel, 0.05) * 4.0
+            fo = max(1, min(0xFFF, round(2560.0 / (fade_sec * bpm0))))
+        inst_bin[base + 171] = 0xFF                              # IGV (unit)
+        inst_bin[base + 172] = fo & 0xFF
+        # byte 173: bits 0-3 = fadeout high nibble, bit 4 = SF filter mode (cutoff/resonance
+        # are 16-bit SoundFont cents/centibels in bytes 182<<8|252 / 183<<8|253).
+        inst_bin[base + 173] = ((fo >> 8) & 0x0F) | (0x10 if sf_mode else 0)
+        inst_bin[base + 177] = (0x80 if c.pan8 == IXMP_PAN_NO_OVERRIDE
+                                else c.pan8)                     # default pan
+        struct.pack_into('<H', inst_bin, base + 178, TAUD_C4)    # PPC
+        inst_bin[base + 182] = (cut16 >> 8) & 0xFF               # cutoff high (SF cents / IT byte)
+        inst_bin[base + 252] = cut16 & 0xFF                      # cutoff low  (SF mode)
+        inst_bin[base + 183] = (res16 >> 8) & 0xFF               # resonance high
+        inst_bin[base + 253] = res16 & 0xFF                      # resonance low (SF mode)
+        struct.pack_into('<H', inst_bin, base + 184, c.detune & 0xFFFF)
+        # NNA: melodic = Key Lift (flag bit 5, the 0b100 pattern) — MIDI-exact
+        # key release: key-off jumps the envelope playhead to the sustain-end
+        # node so the release nodes play immediately, instead of walking the
+        # remaining hold/decay first (which rings like a held sustain pedal on
+        # SF2 instruments with multi-second hold/decay). Applies to pattern
+        # KEY_OFFs and NNA ghosts alike. Drums = continue (one-shots ring to
+        # their natural end).
+        inst_bin[base + 186] = 0b10 if ti.inst_key[0] == 'd' else 0b100000
+        inst_bin[base + 196] = 255                               # default note vol
+        # initialAttenuation (byte 251, dB-table octet) — the canonical zone's static gain,
+        # applied per-voice by the mixer (no longer folded into the vol-env). Per-patch zones
+        # with a different attenuation carry their own octet in the Ixmp 'x' block.
+        inst_bin[base + 251] = atten_cb_to_octet(c.zone.atten_cb) & 0xFF
+        if ti.inst_key[0] == 'd' and (c.loop_mode & 3) != 0:
+            vprint(f"  warning: '{ti.name}': looped drum sample with NNA "
+                   f"continue — ghosts only die via the background-pool cap")
+
+    # Metainstrument records: a 0xFFFF-sentinel sample pointer (high 16 bits) plus a
+    # layer table (terranmon.txt "Metainstrument definition"). Layers stay neutral
+    # (unity mix, zero detune); per-zone level/tune already live in each layer
+    # instrument's patches. The note references the meta slot; the engine fans out.
+    for meta_slot, _name, layer_descs in meta_records:
+        base = meta_slot * 256
+        inst_bin[base + 0] = 0                                  # type 0 = layered
+        inst_bin[base + 1] = len(layer_descs) & 0xFF            # layer count
+        inst_bin[base + 2] = 0xFF; inst_bin[base + 3] = 0xFF    # identifier (hi 16 bits)
+        o = base + 4
+        for layer_slot, rect in layer_descs:
+            plo, phi, vlo, vhi = rect
+            inst_bin[o]     = layer_slot & 0xFF
+            inst_bin[o + 1] = META_UNITY_OCTET
+            struct.pack_into('<h', inst_bin, o + 2, 0)          # sample detune (neutral)
+            struct.pack_into('<H', inst_bin, o + 4, plo & 0xFFFF)
+            struct.pack_into('<H', inst_bin, o + 6, phi & 0xFFFF)
+            inst_bin[o + 8] = vlo & 0x3F
+            inst_bin[o + 9] = vhi & 0x3F
+            o += 10
+
+    return bytes(sample_bin) + bytes(inst_bin)
+
+
+# ── Cell grid (voices × rows) ────────────────────────────────────────────────
+
+def _cell(cells: dict, v: int, row: int) -> dict:
+    c = cells.get((v, row))
+    if c is None:
+        c = {'note': NOTE_NOP, 'inst': 0, 'vol': (SEL_FINE, 0),
+             'pan': (SEL_FINE, 0), 'eff': None, 'prio': PRIO_FREE}
+        cells[(v, row)] = c
+    return c
+
+
+def allocate_voices(notes: list, speed: int, max_voices: int) -> int:
+    """Greedy per-row interval scheduling onto as few columns as possible.
+
+    The engine's New Note Action does the heavy lifting (matching MIDI
+    polyphony semantics): a fresh trigger on an occupied voice migrates the
+    old note into the mixer's background-ghost pool, so a voice is reusable
+    the moment its note is *released* — the release/ring tail rides the
+    ghost. Melodic voices free at their key-off row; drum voices (NNA
+    continue, no key-off) free on the very next row. Stealing is therefore
+    graceful: the victim is released early, not cut.
+
+    Mutates note.voice (and truncates stolen notes' end_ft). Returns the
+    number of voices used."""
+    cap = max(1, min(max_voices, NUM_VOICES))
+    v_end  = []     # voice → first row at which it is free again
+    v_slot = []     # voice → last instrument slot (affinity only)
+    v_note = []     # voice → currently scheduled note
+    stolen = 0
+    for n in notes:
+        srow = n.start_ft // speed
+        free = [v for v in range(len(v_end)) if v_end[v] <= srow]
+        v = next((x for x in free if v_slot[x] == n.slot),
+                 free[0] if free else -1)
+        if v < 0:
+            if len(v_end) < cap:
+                v = len(v_end)
+                v_end.append(0); v_slot.append(0); v_note.append(None)
+            else:
+                # Steal preference: notes held only by the sustain pedal lose
+                # least (their key is already up); otherwise the note ending
+                # soonest. Either way NNA turns the steal into an early release.
+                pedal = [x for x in range(len(v_end))
+                         if v_note[x] is not None
+                         and v_note[x].pedal_ft is not None
+                         and v_note[x].pedal_ft <= n.start_ft]
+                cand = pedal if pedal else range(len(v_end))
+                v = min(cand, key=lambda x: v_end[x])
+                victim = v_note[v]
+                if victim is not None and victim.end_ft > n.start_ft:
+                    victim.end_ft = n.start_ft
+                stolen += 1
+        if n.drum:
+            end_row = srow + 1                       # ghost carries the ring
+        else:
+            end_row = max(srow + 1, n.end_ft // speed)   # free at key-off row
+        n.voice = v
+        v_end[v], v_slot[v], v_note[v] = end_row, n.slot, n
+    if stolen:
+        vprint(f"  info: polyphony exceeded {cap} voices; {stolen} note(s) "
+               f"released early (NNA ghost keeps the tail)")
+    return len(v_end)
+
+
+def emit_cells(song: Song, insts: dict, speed: int, rpb: int,
+               eps_units: float, drum_keyoff: bool, shift_ft: int,
+               max_voices: int) -> tuple:
+    """Place triggers, key-offs, portamento bend segments, M channel-volume
+    and T tempo effects into the (voice,row) cell grid.
+    Returns (cells, n_voices, total_rows, taud_bpm0)."""
+    notes = [n for n in song.notes if n.slot > 0]
+
+    def midi_bpm_at(ft):
+        i = bisect.bisect_right(song.tempo_ft, ft) - 1
+        return song.tempo_bpm[i] if i >= 0 else 120.0
+
+    scale = rpb * speed / 24.0
+
+    def taud_bpm(b):
+        t = round(b * scale)
+        if not (25 <= t <= 280):
+            vprint(f"  warning: tempo {b:.1f} BPM maps to Taud {t}, "
+                   f"clamped to 25..280 (try a different --rpb/--speed)")
+        return max(25, min(280, t))
+
+    n_voices = allocate_voices(notes, speed, max_voices)
+    if n_voices == 0:
+        sys.exit("error: no playable notes")
+    vprint(f"  voices: {n_voices} used (cap {max_voices}; NNA carries tails)")
+
+    cells = {}
+
+    # ── Pass 1: triggers ──
+    for n in notes:
+        row, tick = n.start_ft // speed, n.start_ft % speed
+        c = _cell(cells, n.voice, row)
+        nv = key_to_noteval(n.key + n.bend0)
+        c['note'] = nv
+        c['inst'] = n.slot
+        c['vol']  = (SEL_SET, round(n.vel * 63 / 127))
+        st = song.channels[n.ch]
+        if st.cc10_ft:
+            pan = _curve_at(st.cc10_ft, st.cc10_val, n.start_ft + shift_ft, 64)
+            c['pan'] = (SEL_SET, round(pan * 63 / 127))
+        if tick > 0:
+            c['eff']  = (TOP_S, 0xD000 | (tick << 8))
+            c['prio'] = PRIO_DELAY
+
+    # ── Pass 2: key-offs (both MIDI idioms arrive here as note.end_ft) ──
+    skipped_offs = 0
+    for n in notes:
+        if n.drum and not drum_keyoff:
+            continue
+        row, tick = n.end_ft // speed, n.end_ft % speed
+        c = cells.get((n.voice, row))
+        if c is None:
+            c = _cell(cells, n.voice, row)
+            c['note'] = NOTE_KEYOFF
+            if tick > 0:
+                c['eff']  = (TOP_S, 0xD000 | (tick << 8))
+                c['prio'] = PRIO_DELAY
+        elif c['note'] == NOTE_NOP:
+            c['note'] = NOTE_KEYOFF
+            if tick > 0 and c['eff'] is None:
+                c['eff']  = (TOP_S, 0xD000 | (tick << 8))
+                c['prio'] = PRIO_DELAY
+        else:
+            skipped_offs += 1    # row taken by a retrigger — which cuts anyway
+    if skipped_offs:
+        vprint(f"  info: {skipped_offs} key-off(s) absorbed by same-row retriggers")
+
+    # ── Pass 3: pitch-bend portamento segments ──
+    # One linear segment per row: the cell carries the exact 4096-TET target
+    # plus G at units/tick sized to land on it by row end (G slides on the
+    # speed-1 non-first ticks). Targets within eps_units are skipped (jitter
+    # simplification).
+    seg_count = 0
+    if speed >= 2:
+        for n in notes:
+            st = song.channels[n.ch]
+            if len(st.bend_ft) <= 1 and n.bend0 == 0.0:
+                continue
+            start_row = n.start_ft // speed
+            end_row   = n.end_ft   // speed
+            cur = key_to_noteval(n.key + n.bend0)
+            for r in range(start_row + 1, end_row):
+                ftr = min((r + 1) * speed, n.end_ft) + shift_ft
+                target = key_to_noteval(
+                    n.key + _curve_at(st.bend_ft, st.bend_val, ftr, 0.0))
+                if abs(target - cur) < eps_units:
+                    continue
+                if (n.voice, r) in cells:
+                    continue
+                step = -(-abs(target - cur) // (speed - 1))
+                c = _cell(cells, n.voice, r)
+                c['note'] = target
+                c['eff']  = (TOP_G, min(0xFFFF, step))
+                c['prio'] = PRIO_PORTA
+                cur = target
+                seg_count += 1
+    elif any(len(st.bend_ft) > 1 for st in song.channels):
+        vprint("  warning: --speed 1 cannot express portamento; "
+               "pitch-bend movement dropped")
+    if seg_count:
+        vprint(f"  bend: {seg_count} portamento segment(s) emitted")
+
+    # ── Pass 4: M channel volume (CC7 × CC11), per voice chronologically ──
+    by_voice = {}
+    for n in notes:
+        by_voice.setdefault(n.voice, []).append(n)
+    m_emitted = 0
+    for v, vnotes in by_voice.items():
+        vnotes.sort(key=lambda n: n.start_ft)
+        m_state = 0x3F                            # engine channel_vol default
+        for n in vnotes:
+            st = song.channels[n.ch]
+            for r in range(n.start_ft // speed, n.end_ft // speed + 1):
+                ftr = r * speed + shift_ft
+                m = round(_curve_at(st.cc7_ft,  st.cc7_val,  ftr, 100) / 127
+                          * _curve_at(st.cc11_ft, st.cc11_val, ftr, 127) / 127
+                          * 63)
+                if m == m_state:
+                    continue
+                c = _cell(cells, v, r)
+                if c['eff'] is not None:
+                    continue                      # slot busy — retry next row
+                c['eff']  = (TOP_M, (m & 0x3F) << 8)
+                c['prio'] = PRIO_M
+                m_state = m
+                m_emitted += 1
+    if m_emitted:
+        vprint(f"  cc: {m_emitted} M channel-volume effect(s) emitted")
+
+    total_rows = max(r for (_v, r) in cells) + 1
+
+    # ── Pass 5: T tempo changes ──
+    bpm0 = midi_bpm_at(shift_ft)                  # tempo in effect at row 0
+    last = taud_bpm(bpm0)
+    t_emitted = t_evict = 0
+    for ft, b in zip(song.tempo_ft, song.tempo_bpm):
+        row = (ft - shift_ft) // speed
+        if row < 0:
+            continue
+        if row >= total_rows:
+            break
+        tb = taud_bpm(b)
+        if tb == last:
+            continue
+        placed = False
+        victim = None
+        for v in range(n_voices):
+            c = cells.get((v, row))
+            if c is None or c['eff'] is None:
+                c = _cell(cells, v, row)
+                c['eff']  = (TOP_T, ((tb - 25) & 0xFF) << 8)
+                c['prio'] = PRIO_TEMPO
+                placed = True
+                break
+            if c['prio'] < PRIO_DELAY and (victim is None
+                                           or c['prio'] < victim['prio']):
+                victim = c
+        if not placed and victim is not None:
+            if victim['prio'] == PRIO_PORTA:
+                victim['note'] = NOTE_NOP         # orphan G note would retrigger
+            victim['eff']  = (TOP_T, ((tb - 25) & 0xFF) << 8)
+            victim['prio'] = PRIO_TEMPO
+            placed = True
+            t_evict += 1
+        if placed:
+            last = tb
+            t_emitted += 1
+    if t_emitted:
+        vprint(f"  tempo: {t_emitted} T effect(s)"
+               + (f" ({t_evict} evicted a lesser effect)" if t_evict else ""))
+
+    return cells, n_voices, total_rows, taud_bpm(bpm0)
+
+
+# ── Pattern / cue emission and final assembly ────────────────────────────────
+
+def build_pattern_bin(cells: dict, n_voices: int, n_cues: int) -> bytes:
+    out = bytearray(n_cues * n_voices * PATTERN_BYTES)
+    pos = 0
+    for cue in range(n_cues):
+        for v in range(n_voices):
+            for r in range(PATTERN_ROWS):
+                base = pos + r * 8
+                c = cells.get((v, cue * PATTERN_ROWS + r))
+                if c is None:
+                    out[base + 3] = 0xC0
+                    out[base + 4] = 0xC0
+                    continue
+                struct.pack_into('<H', out, base, c['note'] & 0xFFFF)
+                out[base + 2] = c['inst'] & 0xFF
+                vs, vv = c['vol']
+                ps, pv = c['pan']
+                out[base + 3] = (vv & 0x3F) | ((vs & 3) << 6)
+                out[base + 4] = (pv & 0x3F) | ((ps & 3) << 6)
+                if c['eff'] is not None:
+                    op, arg = c['eff']
+                    out[base + 5] = op & 0xFF
+                    struct.pack_into('<H', out, base + 6, arg & 0xFFFF)
+            pos += PATTERN_BYTES
+    return bytes(out)
+
+
+def assemble_taud(sf: SF2, song: Song, layer_insts: list, meta_records: list,
+                  slot_name: dict, pool: list, args) -> bytes:
+    speed, rpb = args.speed, args.rpb
+
+    # Leading-silence trim: shift the grid so the first trigger is row 0.
+    first_row = min(n.start_ft // speed for n in song.notes if n.slot > 0)
+    shift_ft = first_row * speed
+    if shift_ft:
+        vprint(f"  info: trimming {first_row} leading silent row(s)")
+        for n in song.notes:
+            n.start_ft -= shift_ft
+            n.end_ft   -= shift_ft
+
+    eps_units = args.bend_epsilon * 4096.0 / 1200.0
+    cells, n_voices, total_rows, bpm0 = emit_cells(
+        song, None, speed, rpb, eps_units, args.drum_keyoff, shift_ft,
+        args.max_voices)
+
+    n_cues = (total_rows + PATTERN_ROWS - 1) // PATTERN_ROWS
+    if n_cues > NUM_CUES:
+        sys.exit(f"error: song needs {n_cues} cues > {NUM_CUES} limit "
+                 f"(try a smaller --rpb)")
+    if n_cues * n_voices > NUM_PATTERNS_MAX:
+        sys.exit(f"error: {n_cues} cues × {n_voices} voices "
+                 f"> {NUM_PATTERNS_MAX} pattern limit")
+
+    pat_bin = build_pattern_bin(cells, n_voices, n_cues)
+    pat_bin, remap, n_unique = deduplicate_patterns(pat_bin, n_cues * n_voices)
+    vprint(f"  patterns: {n_cues * n_voices} → {n_unique} unique; "
+           f"{n_cues} cue(s), {n_voices} voice(s), {total_rows} rows")
+
+    sheet = bytearray(NUM_CUES * CUE_SIZE)
+    for ci in range(NUM_CUES):
+        sheet[ci*CUE_SIZE:(ci+1)*CUE_SIZE] = encode_cue([], 0)
+    for ci in range(n_cues):
+        pats = [remap[ci * n_voices + v] for v in range(n_voices)]
+        tail = total_rows - ci * PATTERN_ROWS
+        if ci == n_cues - 1:
+            instr = CUE_INST_HALT
+        elif tail < PATTERN_ROWS:
+            instr = cue_instruction_len(tail)
+        else:
+            instr = CUE_INST_NOP
+        sheet[ci*CUE_SIZE:(ci+1)*CUE_SIZE] = encode_cue(pats, instr)
+
+    # ── Sample + instrument bin ──
+    sampleinst_raw = build_sample_inst_bin(sf, pool, layer_insts, meta_records,
+                                           args.fadeout, bpm0)
+    assert len(sampleinst_raw) == SAMPLEINST_SIZE
+    compressed = compress_blob(sampleinst_raw, "sample+inst bin")
+    comp_size  = len(compressed)
+
+    pat_comp = compress_blob(pat_bin,      "pattern bin")
+    cue_comp = compress_blob(bytes(sheet), "cue sheet")
+
+    song_table_off = TAUD_HEADER_SIZE + comp_size
+    song_off       = song_table_off + TAUD_SONG_ENTRY
+    entry = encode_song_entry(
+        song_offset=song_off,
+        num_voices=n_voices,
+        num_patterns=n_unique,
+        bpm_stored=(bpm0 - 25) & 0xFF,
+        tick_rate=speed,
+        base_note=0xA000,
+        base_freq=8363.0,
+        flags_byte=0x00,                          # linear pitch mode
+        pat_bin_comp_size=len(pat_comp),
+        cue_sheet_comp_size=len(cue_comp),
+        global_vol=0xFF,
+        mixing_vol=0xFF,
+    )
+
+    # ── Project data: names + the Ixmp section recreating SF2 layering ──
+    proj_data = b''
+    proj_off  = 0
+    if not args.no_project_data:
+        # Names indexed by slot (0 = unused). Layer slots carry the (suffixed) layer
+        # instrument name; meta slots carry the bare preset name.
+        max_slot = max([0] + list(slot_name))
+        inst_names = ['' for _ in range(max_slot + 1)]
+        for s, nm in slot_name.items():
+            inst_names[s] = nm
+        smp_names  = [''] + [ms.name for ms in pool]
+        ixmp = {}
+        for ti in layer_insts:
+            if not ti.usable:
+                continue
+            pl = [p.to_ixmp_dict(ti.canonical, bpm0, args.fadeout)
+                  for p in ti.patches if p is not ti.canonical]
+            if pl:
+                ixmp[ti.slot] = pl
+        if ixmp:
+            vprint(f"  ixmp: {sum(len(p) for p in ixmp.values())} patch(es) "
+                   f"across {len(ixmp)} instrument(s)")
+        title = song.title or os.path.splitext(os.path.basename(args.input))[0]
+        proj_data = build_project_data(
+            project_name=title,
+            instrument_names=inst_names,
+            sample_names=smp_names,
+            ixmp_patches=ixmp or None,
+        )
+
+    header = (TAUD_MAGIC
+              + bytes([TAUD_VERSION, 1])
+              + struct.pack('<I', comp_size)
+              + struct.pack('<I', 0)              # patched below if proj data
+              + (SIGNATURE + b' ' * 14)[:14])
+    assert len(header) == TAUD_HEADER_SIZE
+
+    out = bytearray()
+    out += header
+    out += compressed
+    out += entry
+    out += pat_comp
+    out += cue_comp
+    if proj_data:
+        proj_off = len(out)
+        struct.pack_into('<I', out, 14, proj_off)
+        out += proj_data
+        vprint(f"  project data: {len(proj_data)} bytes @ {proj_off}")
+    return bytes(out)
+
+
+# ── Main ──────────────────────────────────────────────────────────────────────
+
+def main():
+    ap = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.RawDescriptionHelpFormatter)
+    ap.add_argument('input',     help='Input .mid file')
+    ap.add_argument('soundfont', help='SoundFont 2 (.sf2) sample library')
+    ap.add_argument('output', nargs='?', default=None,
+                    help='Output .taud (default: input stem + .taud)')
+    ap.add_argument('--perc-force-mapping', nargs=2, type=int, default=None,
+                    metavar=('BANK', 'INST'),
+                    help='Force the percussion channel to this SF2 preset '
+                         '(default: bank 128, channel program)')
+    ap.add_argument('--rpb', type=int, default=4, choices=(2, 4, 8, 16),
+                    help='Rows per beat (default 4 = 16th-note rows)')
+    ap.add_argument('--speed', type=int, default=6,
+                    help='Ticks per row, 1..15 (default 6)')
+    ap.add_argument('--fadeout', type=int, default=None,
+                    help='Override the computed fadeout step (0..4095). By '
+                         'default each instrument gets a safety fadeout '
+                         'sized ~4× its SF2 release time so the envelope '
+                         'release dominates and NNA ghosts always die')
+    ap.add_argument('--max-voices', type=int, default=20,
+                    help='Voice-column budget, 1..20 (default 20). NNA '
+                         'background ghosts carry release/ring tails, so '
+                         'few foreground voices are needed; songs exceeding '
+                         'the budget release the oldest pedal-held or '
+                         'soonest-ending note early')
+    ap.add_argument('--max-layers', type=int, default=4,
+                    help='Max simultaneous layers per note (default 4). Each SF2 '
+                         'preset is split into this many disjoint layers; presets '
+                         'needing >1 layer become a Metainstrument. 1 disables '
+                         'layering (first-zone-wins, like the old behaviour). '
+                         'Covers ~93%% of big-bank presets at 4, ~98%% at 5')
+    ap.add_argument('--bend-epsilon', type=float, default=4.0,
+                    help='Pitch-bend simplification threshold in cents '
+                         '(default 4.0); smaller = more faithful')
+    ap.add_argument('--drum-keyoff', action='store_true',
+                    help='Emit KEY_OFF for percussion-channel notes too '
+                         '(GM drums normally ignore note-off)')
+    ap.add_argument('--no-project-data', action='store_true',
+                    help='Omit the Project Data section — NOTE: this also '
+                         'omits Ixmp, collapsing every instrument to its '
+                         'canonical sample')
+    ap.add_argument('-v', '--verbose', action='store_true')
+    args = ap.parse_args()
+    set_verbose(args.verbose)
+
+    if not (1 <= args.speed <= 15):
+        sys.exit("error: --speed must be 1..15")
+    if not (1 <= args.max_voices <= 20):
+        sys.exit("error: --max-voices must be 1..20")
+    if not (1 <= args.max_layers <= 25):
+        sys.exit("error: --max-layers must be 1..25")
+    if args.output is None:
+        args.output = os.path.splitext(args.input)[0] + '.taud'
+
+    vprint(f"parsing MIDI '{args.input}'…")
+    division, merged = parse_midi(args.input)
+    song = extract_song(division, merged, args.rpb, args.speed)
+    vprint(f"  {len(song.notes)} note(s), {len(song.tempo_ft)} tempo event(s)")
+    if not song.notes:
+        sys.exit("error: MIDI contains no playable notes")
+
+    vprint(f"parsing SF2 '{args.soundfont}'…")
+    sf = parse_sf2(args.soundfont)
+    vprint(f"  {len(sf.presets)} preset(s), {len(sf.shdrs)} sample header(s)")
+
+    # Presets in first-use order; triggers keyed by the exact (noteVal-with-initial-
+    # bend, vol6) pair the patterns will carry, so layer trimming sees precisely what
+    # the engine matches at runtime.
+    slot_keys = []
+    seen_keys = set()
+    triggers  = {}
+    for n in song.notes:
+        if n.inst_key not in seen_keys:
+            seen_keys.add(n.inst_key)
+            slot_keys.append(n.inst_key)
+        t = triggers.setdefault(n.inst_key, {})
+        k = (key_to_noteval(n.key + n.bend0), round(n.vel * 63 / 127))
+        t[k] = t.get(k, 0) + 1
+    vprint(f"  {len(slot_keys)} preset(s) in use")
+
+    registry = {}
+    presets = build_presets(sf, slot_keys, triggers, args.perc_force_mapping,
+                            registry, args.max_layers)
+
+    # Allocate instrument-bin slots: each layer is a normal instrument; a preset with
+    # >1 layer also takes a Metainstrument slot the note references. Single-layer
+    # presets stay plain instruments (no meta, no extra slot).
+    next_slot   = 1
+    layer_insts = []      # all normal instruments, .slot assigned
+    meta_records = []     # (meta_slot, name, [(layer_slot, bbox_rect)])
+    slot_name   = {}      # slot → display name
+    note_slot   = {}      # inst_key → slot a note triggers (0 = unplayable)
+    for ik in slot_keys:
+        name, layers = presets[ik]
+        if not layers:
+            note_slot[ik] = 0
+            continue
+        need = len(layers) + (1 if len(layers) > 1 else 0)
+        if next_slot + need - 1 > 255:
+            vprint(f"  warning: 255-slot budget exhausted — preset '{name}' dropped")
+            note_slot[ik] = 0
+            continue
+        for li, ti in enumerate(layers):
+            ti.slot = next_slot; next_slot += 1
+            layer_insts.append(ti)
+            slot_name[ti.slot] = name if len(layers) == 1 else f"{name} L{li}"
+        if len(layers) == 1:
+            note_slot[ik] = layers[0].slot
+        else:
+            meta_slot = next_slot; next_slot += 1
+            meta_records.append((meta_slot, name,
+                                 [(ti.slot, _layer_bbox(ti)) for ti in layers]))
+            slot_name[meta_slot] = name
+            note_slot[ik] = meta_slot
+    vprint(f"  slots: {next_slot - 1} used — {len(layer_insts)} instrument(s), "
+           f"{len(meta_records)} Metainstrument(s)")
+
+    # Tag notes with their trigger slot; notes whose preset failed to resolve drop.
+    unplayable = 0
+    for n in song.notes:
+        n.slot = note_slot.get(n.inst_key, 0)
+        if n.slot == 0:
+            unplayable += 1
+    if unplayable:
+        vprint(f"  warning: {unplayable} note(s) dropped (unresolvable preset)")
+    song.notes = [n for n in song.notes if n.slot > 0]
+    if not song.notes:
+        sys.exit("error: no notes survived preset resolution")
+
+    # Pool = every sample referenced by a kept patch (canonical included), in
+    # deterministic first-reference order. Everything else is trimmed.
+    pool = []
+    seen = set()
+    for ti in layer_insts:
+        for p in ti.patches:
+            if id(p.ms) not in seen:
+                seen.add(id(p.ms))
+                pool.append(p.ms)
+
+    taud = assemble_taud(sf, song, layer_insts, meta_records, slot_name, pool, args)
+    sf.file.close()
+
+    with open(args.output, 'wb') as f:
+        f.write(taud)
+    print(f"wrote {len(taud)} bytes to '{args.output}'")
+
+
+if __name__ == '__main__':
+    main()
diff --git a/taud_common.py b/taud_common.py
index 482ef72..6a9b707 100644
--- a/taud_common.py
+++ b/taud_common.py
@@ -545,18 +545,94 @@ def _name_table_blob(names) -> bytes:
 
 # ── Ixmp encoder (terranmon.txt §Project Data → Ixmp) ───────────────────────
 
-# Per-patch byte layout. Field offsets must match AudioJSR223Delegate.uploadInstrumentPatches
-# (Kotlin parser) and terranmon.txt "Ixmp. Instrument extra samples".
-IXMP_PATCH_SIZE     = 31
+# Per-patch byte layout. Field offsets / version flags must match
+# AudioJSR223Delegate.uploadInstrumentPatches (Kotlin parser) and terranmon.txt
+# "Ixmp. Instrument extra samples". Patches are VARIABLE LENGTH since 2026-06-13:
+# a version byte (feature bit-flags) + 30 common bytes + optional blocks. A
+# version byte with only the 'i' bit set yields the legacy 31-byte record.
+IXMP_COMMON_SIZE    = 31     # version byte + 30 common bytes (legacy record size)
 IXMP_PAN_NO_OVERRIDE = 0xFF
 IXMP_DNV_NO_OVERRIDE = 0
 IXMP_VIBWAVE_NO_OVERRIDE = 0xFF
 
+# Version byte feature bits (terranmon.txt 0b x00Pfpvi).
+IXMP_VER_I = 0x01            # always set (version 1)
+IXMP_VER_V = 0x02            # has volume envelope block
+IXMP_VER_P = 0x04            # has panning envelope block
+IXMP_VER_F = 0x08            # has filter envelope block
+IXMP_VER_PITCH = 0x10        # has pitch envelope block ('P')
+IXMP_VER_X = 0x80            # has extra-base-info block
+
+
+# "Perceptually Significant Octet to Decibel Table" → linear gain (octet → amplitude).
+# The canonical perceptual loudness curve shared by the engine (AudioAdapter.META_MIX_GAIN),
+# the Metainstrument layer mix volume, and the base/patch initialAttenuation octet.
+# Octet 0 = silence, 159 = unity (0 dB), 255 = +24 dB.
+META_GAIN = (
+    0.0, 5e-05, 5.6e-05, 6.3e-05, 7.1e-05, 7.9e-05, 8.9e-05, 0.0001,
+    0.000112, 0.000126, 0.000141, 0.000158, 0.000178, 0.0002, 0.000224, 0.000251,
+    0.000282, 0.000316, 0.000355, 0.000398, 0.000447, 0.000501, 0.000562, 0.000631,
+    0.000708, 0.000794, 0.000891, 0.001, 0.001122, 0.001259, 0.001413, 0.001585,
+    0.001778, 0.001995, 0.002239, 0.002512, 0.002818, 0.003162, 0.003548, 0.003981,
+    0.004467, 0.005012, 0.005623, 0.00631, 0.007079, 0.007943, 0.008913, 0.01,
+    0.01122, 0.012589, 0.014125, 0.015849, 0.017783, 0.019953, 0.022387, 0.025119,
+    0.028184, 0.031623, 0.035481, 0.039811, 0.044668, 0.050119, 0.056234, 0.063096,
+    0.066834, 0.070795, 0.074989, 0.079433, 0.08414, 0.089125, 0.094406, 0.1,
+    0.105925, 0.112202, 0.11885, 0.125893, 0.133352, 0.141254, 0.149624, 0.158489,
+    0.16788, 0.177828, 0.188365, 0.199526, 0.211349, 0.223872, 0.237137, 0.251189,
+    0.258523, 0.266073, 0.273842, 0.281838, 0.290068, 0.298538, 0.307256, 0.316228,
+    0.325462, 0.334965, 0.344747, 0.354813, 0.365174, 0.375837, 0.386812, 0.398107,
+    0.409732, 0.421697, 0.43401, 0.446684, 0.459727, 0.473151, 0.486968, 0.501187,
+    0.508452, 0.515822, 0.523299, 0.530884, 0.53858, 0.546387, 0.554307, 0.562341,
+    0.570493, 0.578762, 0.587151, 0.595662, 0.604296, 0.613056, 0.621942, 0.630957,
+    0.640103, 0.649382, 0.658795, 0.668344, 0.678032, 0.68786, 0.697831, 0.707946,
+    0.718208, 0.728618, 0.73918, 0.749894, 0.760764, 0.771792, 0.782979, 0.794328,
+    0.805842, 0.817523, 0.829373, 0.841395, 0.853591, 0.865964, 0.878517, 0.891251,
+    0.90417, 0.917276, 0.930572, 0.944061, 0.957745, 0.971628, 0.985712, 1.0,
+    1.014495, 1.029201, 1.044119, 1.059254, 1.074608, 1.090184, 1.105987, 1.122018,
+    1.138282, 1.154782, 1.171521, 1.188502, 1.20573, 1.223207, 1.240938, 1.258925,
+    1.277174, 1.295687, 1.314468, 1.333521, 1.352851, 1.372461, 1.392355, 1.412538,
+    1.433013, 1.453784, 1.474857, 1.496236, 1.517924, 1.539927, 1.562248, 1.584893,
+    1.607867, 1.631173, 1.654817, 1.678804, 1.703139, 1.727826, 1.752871, 1.778279,
+    1.804056, 1.830206, 1.856735, 1.883649, 1.910953, 1.938653, 1.966754, 1.995262,
+    2.053525, 2.113489, 2.175204, 2.238721, 2.304093, 2.371374, 2.440619, 2.511886,
+    2.585235, 2.660725, 2.73842, 2.818383, 2.900681, 2.985383, 3.072557, 3.162278,
+    3.254618, 3.349654, 3.447466, 3.548134, 3.651741, 3.758374, 3.868121, 3.981072,
+    4.216965, 4.466836, 4.731513, 5.011872, 5.308844, 5.623413, 5.956621, 6.309573,
+    6.683439, 7.079458, 7.498942, 7.943282, 8.413951, 8.912509, 9.440609, 10.0,
+    10.592537, 11.220185, 11.885022, 12.589254, 13.335214, 14.125375, 14.962357, 15.848932,
+)
+
+
+def atten_cb_to_octet(atten_cb: float) -> int:
+    """SF2 initialAttenuation (centibels, ≥0) → nearest [META_GAIN] octet (159 = 0 dB /
+    unity). Returns 159 for ~0 attenuation and never 0 — octet 0 is the engine's "unset"
+    sentinel (treated as unity), so emitting it for a real value would silence the voice."""
+    if atten_cb <= 0:
+        return 159
+    g = 10.0 ** (-atten_cb / 200.0)
+    return min(range(1, 160), key=lambda o: abs(META_GAIN[o] - g))
+
+
+def _encode_env_block(env: dict) -> bytes:
+    """One v/p/f/P envelope block: LOOP word + SUSTAIN word + 25 (value, minifloat)
+    node pairs = 54 bytes. `env` keys: 'loop' (u16), 'sustain' (u16), 'nodes'
+    (list of (value 0..255, minifloat_index 0..255); padded/truncated to 25)."""
+    out = bytearray(struct.pack('<HH', int(env.get('loop', 0)) & 0xFFFF,
+                                int(env.get('sustain', 0)) & 0xFFFF))
+    nodes = list(env.get('nodes', []))
+    while len(nodes) < 25:
+        nodes.append((nodes[-1][0] if nodes else 0, 0))
+    for val, mf in nodes[:25]:
+        out.append(int(val) & 0xFF)
+        out.append(int(mf) & 0xFF)
+    return bytes(out)
+
 
 def encode_ixmp_patch(p: dict) -> bytes:
-    """Encode a single patch dict into 31 bytes.
+    """Encode one variable-length patch.
 
-    Expected keys (numeric values; defaults are applied for missing optional fields):
+    Common keys (numeric; defaults applied for missing optionals):
         pitch_start, pitch_end        : Taud 4096-TET noteVal (Uint16)
         volume_start, volume_end      : 0..63 (Uint8)
         sample_ptr                    : Uint32 (sample bin offset)
@@ -569,6 +645,16 @@ def encode_ixmp_patch(p: dict) -> bytes:
         default_note_volume           : Uint8 IT-scaled (0 = no override, default 0)
         vibrato_speed/sweep/depth/rate: Uint8 (default 0)
         vibrato_waveform              : Uint8 (0..7 or 0xFF for no override, default 0xFF)
+
+    Optional blocks (presence sets the version flag; appended in spec order x,v,p,f,P):
+        extra      : dict {fadeout (u16), default_cutoff (u16), default_resonance (u16),
+                           initial_attenuation (u8 dB-table octet),
+                           filter_sf_mode (bool — flag1 bit 0; SoundFont filter params)}
+                     → 'x' block (15 bytes)
+        vol_env    : env-block dict → 'v' block (54 bytes)
+        pan_env    : env-block dict → 'p' block
+        filter_env : env-block dict → 'f' block
+        pitch_env  : env-block dict → 'P' block
     """
     pitch_start = max(0, min(0xFFFF, int(p['pitch_start'])))
     pitch_end   = max(0, min(0xFFFF, int(p['pitch_end'])))
@@ -581,9 +667,23 @@ def encode_ixmp_patch(p: dict) -> bytes:
     loop_end    = max(0, min(0xFFFF, int(p.get('loop_end',   0))))
     rate        = max(0, min(0xFFFF, int(p.get('sampling_rate', 0))))
     detune      = max(-0x8000, min(0x7FFF, int(p.get('sample_detune', 0))))
-    return struct.pack(
+
+    extra  = p.get('extra')
+    vol_e  = p.get('vol_env')
+    pan_e  = p.get('pan_env')
+    filt_e = p.get('filter_env')
+    pit_e  = p.get('pitch_env')
+
+    ver = IXMP_VER_I
+    if extra  is not None: ver |= IXMP_VER_X
+    if vol_e  is not None: ver |= IXMP_VER_V
+    if pan_e  is not None: ver |= IXMP_VER_P
+    if filt_e is not None: ver |= IXMP_VER_F
+    if pit_e  is not None: ver |= IXMP_VER_PITCH
+
+    common = struct.pack(
         '<BHHBBIHHHHHhBBBBBBBB',
-        1,                                       # patch version
+        ver,                                     # patch version / feature flags
         pitch_start, pitch_end,
         vol_start,   vol_end,
         sample_ptr,
@@ -600,6 +700,22 @@ def encode_ixmp_patch(p: dict) -> bytes:
         int(p.get('vibrato_rate',  0))        & 0xFF,
         int(p.get('vibrato_waveform', IXMP_VIBWAVE_NO_OVERRIDE)) & 0xFF,
     )
+    out = bytearray(common)
+    if extra is not None:                        # 'x' block (15 bytes), spec order
+        # flags1 bit 0 (m): 0 = IT filter params, 1 = SoundFont (Fc cents / Q centibels).
+        flags1 = 0x01 if extra.get('filter_sf_mode') else 0x00
+        out += struct.pack('<I', flags1)         # Bit32 extra-feature-flags 1..32
+        out += struct.pack('<I', 0)              # Bit32 extra-feature-flags 33..64 (reserved)
+        out += struct.pack('<H', int(extra.get('fadeout', 0)) & 0xFFFF)
+        out += struct.pack('<H', int(extra.get('default_cutoff', 0xFFFF)) & 0xFFFF)
+        out += struct.pack('<H', int(extra.get('default_resonance', 0xFFFF)) & 0xFFFF)
+        # per-patch initialAttenuation as a dB-table octet (159 = unity); 0 = unset sentinel.
+        out.append(int(extra.get('initial_attenuation', 0)) & 0xFF)
+    if vol_e  is not None: out += _encode_env_block(vol_e)
+    if pan_e  is not None: out += _encode_env_block(pan_e)
+    if filt_e is not None: out += _encode_env_block(filt_e)
+    if pit_e  is not None: out += _encode_env_block(pit_e)
+    return bytes(out)
 
 
 def encode_ixmp_payload(patches_by_inst: dict) -> bytes:
diff --git a/terranmon.txt b/terranmon.txt
index 19e0976..285d2e2 100644
--- a/terranmon.txt
+++ b/terranmon.txt
@@ -2170,9 +2170,11 @@ from source.
           beyond it; multiply by (255/64) and round. The XM samplewise
           volume goes into byte 196.
 172 Uint8  Volume Fadeout low bits
-173 Bit8   Volume Fadeout high bits
-        0b 0000 ffff
+173 Bit8   Volume Fadeout high bits & Filter interpretation mode
+        0b 000m ffff
             f: Volume Fadeout high bits (low nibble of byte 173; high nibble reserved, must be zero)
+            m-unset: Default Cutoff (offset 182) and Default Resonance (offset 183) works like ImpulseTracker
+            m-set: Default Cutoff (offset 182 << 8 | offset 252) and Default Resonance (offset 183 << 8 | offset 253) works like SoundFont (absolute cents for Fc, height above DC gain in centibels for Q)
         * Combined 12-bit unsigned value (range 0..4095). The engine maintains
           a per-voice fadeoutVolume ∈ [0, 1] initialised to 1.0 on note-on, and
           while the voice is in key-off or NNA Note-Fade state applies once per
@@ -2346,7 +2348,15 @@ from source.
 201 Bit16x25 Filter/Pitch envelopes ; if offset 19 specified 'Pitch', this field is automatically 'filter', and vice-versa
       Byte 1: Value (00..FF)
       Byte 2: Time until the next point, in seconds (3.5 Unsigned Minifloat, biased; range 0..15.75 s, smallest non-zero step 1/256 s ≈ 3.91 ms — chosen so single tracker ticks resolve at every supported BPM). 0 = hold at this point indefinitely.
-251..255 Reserved (5 bytes free for future per-instrument fields)
+251 Uint8  initialAttenuation ; static per-instrument gain as a "Perceptually Significant Octet
+        to Decibel Table" octet (octet 159 = 0 dB / unity; −6 dB = octet 111), same table as the
+        Metainstrument layer mix volume. **0 = unity (the unset sentinel)** so legacy files
+        (these bytes were reserved/zero) are unaffected. Applied by the mixer as a
+        velocity-INDEPENDENT amplitude multiplier; NOT folded into the volume envelope, so the
+        envelope keeps full 0..63 resolution. A per-patch Ixmp 'x' block carries its own override.
+252 Uint8  Default Cutoff low bits (SoundFont mode; see offset 73)
+253 Uint8  Default Resonance low bits (SoundFont mode; see offset 73)
+254..255 Reserved (2 bytes free for future per-instrument fields)
 
 ### Metainstrument definition
 
@@ -2369,7 +2379,7 @@ instrument record). Layer records begin at byte 4.
     Uint8  Mix volume according to "Perceptually Significant Octet to Decibel Table" (octet 159 = 0 dB / unity)
     Sint16 Sample detune (in 4096-TET unit)
     Uint16 Pitch start                 ; note-range low,  4096-TET noteVal (same scale as pattern-cell note); full range = 0x0000
-    Uint16 Pitch end (inclusive)       ; note-range high;                                                     full range = 0xFFFF
+    Uint16 Pitch end (inclusive)       ; note-range high; full range = 0xFFFF
     Uint8  Volume start                ; velocity/volume-range low,  0..0x3F; full range = 0x00
     Uint8  Volume end (inclusive)      ; velocity/volume-range high, 0..0x3F; full range = 0x3F
 
@@ -2766,7 +2776,30 @@ TODO:
         For UI concerns, taut_instredit.js will take care of it (aka problem for later)
     [x] .sf2 import module (for generic use, including "Import instrument from soundfont" and midi2taud conversion)
         [x] Midi2Taud using .mid and .sf2 as input, trim unused samples and Ixmp patches
-        [ ] auto-set optimal-ish Tickspeed and RPB by MIDI analysis?
+        [ ] .sf2 specific resample handling
+            1. If length exceeds 65535 samples, calculate resampling.
+            2. If calculated resampling >= 32000, use that.
+            3. If not, resample at 32000. If there is no loop defined, then loop the last 8192 samples (converter SHOULD NOT take that number at face value; perform waveform analysis to derive a smoother loop; converter MAY use that number as a starting number) and modify the fade value such that it decays to zero after 10 or so seconds of firing.
+        [ ] Faithful .sf2 "release segment": Set NNA to 'Note Fade' (incl. drumkits), and make sure Volume Fadeout to have a correct number derived from the SF2 timecent unit (it seems SF2 defines envelope floor as 100 dB; needs check)
+        [ ] auto-set optimal-ish Tickspeed and RPB using MIDI Time Signature events and note analysis. Break pattern when Time Signature changes.
+
+            Time Signature
+
+            FF 58 04 nn dd cc bb
+
+            Time signature is expressed as 4 numbers. nn and dd represent the "numerator" and "denominator" of the signature as notated on sheet music. The denominator is a negative power of 2: 2 = quarter note, 3 = eighth, etc.
+
+            The cc expresses the number of MIDI clocks in a metronome click.
+
+            The bb parameter expresses the number of notated 32nd notes in a MIDI quarter note (24 MIDI clocks). This event allows a program to relate what MIDI thinks of as a quarter, to something entirely different.
+
+            For example, 6/8 time with a metronome click every 3 eighth notes and 24 clocks per quarter note would be the following event:
+
+            FF 58 04 06 03 18 08
+
+            NOTE: If there are no time signature events in a MIDI file, then the time signature is assumed to be 4/4.
+
+            In a format 0 file, the time signatures changes are scattered throughout the one MTrk. In format 1, the very first MTrk should consist of only the time signature (and tempo) events so that it could be read by some device capable of generating a "tempo map". It is best not to place MIDI events in this MTrk. In format 2, each MTrk should begin with at least one initial time signature (and tempo) event.
 
 TODO - list of demo songs that MUST ship with Microtone:
     * 4THSYM (rename to Fourth Symmetriad) — excellent piece for demonstrating NNAs and filter envelopes
@@ -3164,11 +3197,15 @@ prefixes:
         Uint8  vibratoRate                 ; per-sample auto-vibrato (mirrors base inst byte 188)
         Uint8  vibratoWaveform             ; bits 0-2 only (mirrors instrumentFlag bits 2-4); 0xFF = "no override"
         * Patch definition flag 'x'
-        Bit32  Extra feature flags 1..32 (reserved; keep it as all-unset)
+        Bit32  Extra feature flags 1..32
+            0b 0000 000m ; 0b 0000 0000 ; 0b 0000 0000 ; 0b 0000 0000
+                m-unset: filter params (Fc and Q) works like ImpulseTracker
+                m-set: filter params (Fc and Q) works like SoundFont
         Bit32  Extra feature flags 33..64 (reserved; keep it as all-unset)
         Uint16 Volume Fadeout    ; same encoding as identical base instrument byte 172-173
-        Uint8  Default cutoff     ; identical to base instrument byte 182
-        Uint8  Default resonance  ; identical to base instrument byte 183
+        Uint16 Default cutoff     ; identical to base instrument byte 182 and 252
+        Uint16 Default resonance  ; identical to base instrument byte 183 and 253
+        Uint8  SF2 Initial Attenuation according to "Perceptually Significant Octet to Decibel Table" (octet 159 = 0 dB, attenuation by 6 dB = octet 111). 0 = unity (unset sentinel). Overrides the base record's byte-251 attenuation for this patch; applied as a velocity-independent gain, NOT folded into the envelope.
         * Patch definition flag 'v'
         Bit16  Volume envelope LOOP word    ; identical to base instrument byte 15..16
         Bit16  Volume envelope SUSTAIN word ; identical to base instrument byte 189..190
diff --git a/tsvm_core/src/net/torvald/tsvm/AudioJSR223Delegate.kt b/tsvm_core/src/net/torvald/tsvm/AudioJSR223Delegate.kt
index e3190e3..16a7555 100644
--- a/tsvm_core/src/net/torvald/tsvm/AudioJSR223Delegate.kt
+++ b/tsvm_core/src/net/torvald/tsvm/AudioJSR223Delegate.kt
@@ -243,17 +243,17 @@ class AudioJSR223Delegate(private val vm: VM) {
         return v.envPanTimeSec
     }
 
-    /** Pitch/filter-envelope segment index — see [getVoiceEnvVolIndex]. */
+    /** Pitch-envelope segment index — see [getVoiceEnvVolIndex]. */
     fun getVoiceEnvPitchIndex(playhead: Int, voice: Int): Int {
         val v = getPlayhead(playhead)?.trackerState?.voices?.getOrNull(voice.coerceIn(0, 19)) ?: return -1
         if (!v.active) return -1
-        return v.envPfIndex
+        return v.envPitchIndex
     }
-    /** Seconds elapsed into the current pitch/filter-envelope segment. */
+    /** Seconds elapsed into the current pitch-envelope segment. */
     fun getVoiceEnvPitchTime(playhead: Int, voice: Int): Double {
         val v = getPlayhead(playhead)?.trackerState?.voices?.getOrNull(voice.coerceIn(0, 19)) ?: return 0.0
         if (!v.active) return 0.0
-        return v.envPfTimeSec
+        return v.envPitchTimeSec
     }
 
     /** Set the starting row for the next play call, resetting per-row timing and silencing active voices. */
@@ -269,26 +269,26 @@ class AudioJSR223Delegate(private val vm: VM) {
         }
     }
 
-    /** Upload up to 192 bytes defining instrument `slot` (0-255). */
+    /** Upload up to 256 bytes defining instrument `slot` (0-255). (The record was
+     *  widened from 192 to 256 bytes on 2026-05-06; the old cap silently dropped
+     *  the pan/pf SUSTAIN-word tails, DCT/DCA and the Default Note Volume byte.) */
     fun uploadInstrument(slot: Int, bytes: IntArray) {
         getFirstSnd()?.instruments?.get(slot and 0xFF)?.let { inst ->
-            for (i in 0 until minOf(192, bytes.size)) inst.setByte(i, bytes[i] and 0xFF)
+            val rec = IntArray(256)
+            for (i in 0 until minOf(256, bytes.size)) rec[i] = bytes[i] and 0xFF
+            inst.loadRecord(rec)   // detects the Metainstrument sentinel; else per-byte fields
         }
     }
 
-    /** Upload an Ixmp "extra samples" block for instrument [slot] (0-255). The payload is
-     *  a flat byte array of `count × 31` patch records — see terranmon.txt "Ixmp. Instrument
-     *  extra samples" for the on-wire field layout. Passing an empty array clears any
-     *  previously-installed patches on this instrument. */
+    /** Upload an Ixmp "extra samples" block for instrument [slot] (0-255). Patches are
+     *  VARIABLE LENGTH (since 2026-06-13): each begins with a version byte (feature
+     *  bit-flags 0b x00Pfpvi) + 30 common bytes, optionally followed by the x/v/p/f/P
+     *  blocks in that order — see terranmon.txt "Ixmp. Instrument extra samples". A
+     *  version byte with only the 'i' bit set is the legacy 31-byte record. Passing an
+     *  empty array clears any previously-installed patches on this instrument. */
     fun uploadInstrumentPatches(slot: Int, bytes: IntArray) {
         val inst = getFirstSnd()?.instruments?.get(slot and 0xFF) ?: return
-        val recordSize = 31
-        if (bytes.isEmpty() || bytes.size < recordSize) {
-            inst.extraPatches = null
-            return
-        }
-        val count = bytes.size / recordSize
-        if (count == 0) { inst.extraPatches = null; return }
+        if (bytes.size < 31) { inst.extraPatches = null; return }
         fun u8 (o: Int) = bytes[o] and 0xFF
         fun u16(o: Int) = (bytes[o] and 0xFF) or ((bytes[o + 1] and 0xFF) shl 8)
         fun s16(o: Int): Int { val v = u16(o); return if (v >= 0x8000) v - 0x10000 else v }
@@ -296,11 +296,39 @@ class AudioJSR223Delegate(private val vm: VM) {
                           ((bytes[o + 1] and 0xFF) shl 8) or
                           ((bytes[o + 2] and 0xFF) shl 16) or
                           ((bytes[o + 3] and 0xFF) shl 24)
-        val patches = Array(count) { i ->
-            val o = i * recordSize
-            // Patch version byte at offset 0 is parsed but only version 1 is recognised;
-            // a future version bump would gate alternate field layouts here.
-            AudioAdapter.TaudInstPatch(
+        val patches = ArrayList<AudioAdapter.TaudInstPatch>()
+        var o = 0
+        while (o + 31 <= bytes.size) {
+            val ver = u8(o)
+            var p = o + 31                       // version byte + 30 common bytes
+            // Optional blocks, walked in the canonical on-wire order x, v, p, f, P.
+            var hasExtra = false; var fadeoutStep = 0; var extraCutoff = 0xFF; var extraResonance = 0xFF
+            var extraAttenOctet = 0; var filterSfMode = false
+            if (ver and 0x80 != 0) {             // 'x' block (15 bytes): u32 flags1 + u32 flags2 + u16 fadeout + u16 cutoff + u16 reson + u8 initialAttenuation octet
+                if (p + 15 > bytes.size) break
+                filterSfMode = (u8(p) and 0x01) != 0           // flags1 bit 0: 0 = IT filter, 1 = SoundFont
+                fadeoutStep = u16(p + 8); extraCutoff = u16(p + 10); extraResonance = u16(p + 12)
+                extraAttenOctet = u8(p + 14)
+                hasExtra = true; p += 15
+            }
+            fun readEnv(): Triple<Array<AudioAdapter.TaudInstEnvPoint>, Int, Int>? {
+                if (p + 54 > bytes.size) return null
+                val loop = u16(p); val sus = u16(p + 2)
+                val arr = Array(25) { k ->
+                    AudioAdapter.TaudInstEnvPoint(u8(p + 4 + 2 * k), ThreeFiveMiniUfloat(u8(p + 5 + 2 * k)))
+                }
+                p += 54
+                return Triple(arr, loop, sus)
+            }
+            var volEnv: Array<AudioAdapter.TaudInstEnvPoint>? = null; var volLoop = 0; var volSus = 0
+            var panEnv: Array<AudioAdapter.TaudInstEnvPoint>? = null; var panLoop = 0; var panSus = 0
+            var filEnv: Array<AudioAdapter.TaudInstEnvPoint>? = null; var filLoop = 0; var filSus = 0
+            var pitEnv: Array<AudioAdapter.TaudInstEnvPoint>? = null; var pitLoop = 0; var pitSus = 0
+            if (ver and 0x02 != 0) { val e = readEnv() ?: break; volEnv = e.first; volLoop = e.second; volSus = e.third }
+            if (ver and 0x04 != 0) { val e = readEnv() ?: break; panEnv = e.first; panLoop = e.second; panSus = e.third }
+            if (ver and 0x08 != 0) { val e = readEnv() ?: break; filEnv = e.first; filLoop = e.second; filSus = e.third }
+            if (ver and 0x10 != 0) { val e = readEnv() ?: break; pitEnv = e.first; pitLoop = e.second; pitSus = e.third }
+            patches.add(AudioAdapter.TaudInstPatch(
                 pitchStart        = u16(o + 1),
                 pitchEnd          = u16(o + 3),
                 volumeStart       = u8 (o + 5),
@@ -319,16 +347,65 @@ class AudioJSR223Delegate(private val vm: VM) {
                 vibratoSweep      = u8 (o + 27),
                 vibratoDepth      = u8 (o + 28),
                 vibratoRate       = u8 (o + 29),
-                vibratoWaveform   = u8 (o + 30)
-            )
+                vibratoWaveform   = u8 (o + 30),
+                volEnv = volEnv, volEnvLoop = volLoop, volEnvSustain = volSus,
+                panEnv = panEnv, panEnvLoop = panLoop, panEnvSustain = panSus,
+                filterEnv = filEnv, filterEnvLoop = filLoop, filterEnvSustain = filSus,
+                pitchEnv = pitEnv, pitchEnvLoop = pitLoop, pitchEnvSustain = pitSus,
+                hasExtra = hasExtra, fadeoutStep = fadeoutStep, filterSfMode = filterSfMode,
+                extraCutoff = extraCutoff, extraResonance = extraResonance,
+                extraInitialAttenOctet = extraAttenOctet
+            ))
+            o = p
         }
-        inst.extraPatches = patches
+        inst.extraPatches = if (patches.isEmpty()) null else patches.toTypedArray()
     }
 
     /** Number of Ixmp patches currently installed on instrument [slot], or 0 if none. */
     fun getInstrumentPatchCount(slot: Int): Int =
         getFirstSnd()?.instruments?.get(slot and 0xFF)?.extraPatches?.size ?: 0
 
+    /** Read back instrument [slot]'s Ixmp patches as a flat variable-length byte array in
+     *  the upload wire format (exact inverse of [uploadInstrumentPatches]) so capture
+     *  code can re-emit the Ixmp project-data section. Empty array when none. */
+    fun getInstrumentPatches(slot: Int): IntArray {
+        val patches = getFirstSnd()?.instruments?.get(slot and 0xFF)?.extraPatches
+            ?: return IntArray(0)
+        val out = ArrayList<Int>(patches.size * 31)
+        fun w8(v: Int)  { out.add(v and 0xFF) }
+        fun w16(v: Int) { out.add(v and 0xFF); out.add((v ushr 8) and 0xFF) }
+        fun w32(v: Int) { w16(v); w16(v ushr 16) }
+        fun wEnv(env: Array<AudioAdapter.TaudInstEnvPoint>, loop: Int, sus: Int) {
+            w16(loop); w16(sus)
+            for (k in 0 until 25) { w8(env[k].value); w8(env[k].offset.index) }
+        }
+        patches.forEach { p ->
+            // Reconstruct the version byte from which optional blocks are present.
+            var ver = 0x01
+            if (p.hasExtra)         ver = ver or 0x80
+            if (p.volEnv != null)   ver = ver or 0x02
+            if (p.panEnv != null)   ver = ver or 0x04
+            if (p.filterEnv != null) ver = ver or 0x08
+            if (p.pitchEnv != null) ver = ver or 0x10
+            w8(ver)
+            w16(p.pitchStart); w16(p.pitchEnd)
+            w8(p.volumeStart); w8(p.volumeEnd)
+            w32(p.samplePtr)
+            w16(p.sampleLength); w16(p.playStart); w16(p.loopStart); w16(p.loopEnd)
+            w16(p.samplingRate); w16(p.sampleDetune)     // two's complement round-trips
+            w8(p.loopMode); w8(p.defaultPan); w8(p.defaultNoteVolume)
+            w8(p.vibratoSpeed); w8(p.vibratoSweep); w8(p.vibratoDepth)
+            w8(p.vibratoRate); w8(p.vibratoWaveform)
+            // Blocks in the canonical on-wire order x, v, p, f, P.
+            if (p.hasExtra) { w32(if (p.filterSfMode) 1 else 0); w32(0); w16(p.fadeoutStep); w16(p.extraCutoff); w16(p.extraResonance); w8(p.extraInitialAttenOctet) }
+            p.volEnv?.let    { wEnv(it, p.volEnvLoop, p.volEnvSustain) }
+            p.panEnv?.let    { wEnv(it, p.panEnvLoop, p.panEnvSustain) }
+            p.filterEnv?.let { wEnv(it, p.filterEnvLoop, p.filterEnvSustain) }
+            p.pitchEnv?.let  { wEnv(it, p.pitchEnvLoop, p.pitchEnvSustain) }
+        }
+        return out.toIntArray()
+    }
+
     /** Clear any Ixmp patches previously uploaded to instrument [slot]. */
     fun clearInstrumentPatches(slot: Int) {
         getFirstSnd()?.instruments?.get(slot and 0xFF)?.extraPatches = null
@@ -430,9 +507,17 @@ class AudioJSR223Delegate(private val vm: VM) {
             null, snd.sampleBin.ptr,
             sampleSize.toLong()
         )
-        for (i in 0 until instSize) {
-            snd.instruments[i / 256].setByte(i % 256, bytes[sampleSize + i].toInt() and 0xFF)
+        val rec = IntArray(256)
+        for (instIdx in 0 until (instSize / 256)) {
+            val base = sampleSize + instIdx * 256
+            for (k in 0 until 256) rec[k] = bytes[base + k].toInt() and 0xFF
+            snd.instruments[instIdx].loadRecord(rec)   // meta-aware
         }
+        // The blob replaces the entire sample+instrument image, so any Ixmp patches
+        // installed for the previous song are now stale (they point into the old
+        // sample pool). Drop them all; the loader re-uploads the new song's Ixmp
+        // section (if any) after this call.
+        snd.instruments.forEach { it.extraPatches = null }
         return bytes.size
     }
 
diff --git a/tsvm_core/src/net/torvald/tsvm/peripheral/AudioAdapter.kt b/tsvm_core/src/net/torvald/tsvm/peripheral/AudioAdapter.kt
index 82577c7..e519a54 100644
--- a/tsvm_core/src/net/torvald/tsvm/peripheral/AudioAdapter.kt
+++ b/tsvm_core/src/net/torvald/tsvm/peripheral/AudioAdapter.kt
@@ -131,7 +131,7 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
         const val SCOPE_BUFFER_SIZE = 2048
         // Mixer-private background-voice pool size per playhead. NNA "Continue/Note Off/Note Fade"
         // ghosts displaced foreground voices into this pool; oldest is evicted on overflow.
-        const val MAX_BG_VOICES = 64
+        const val MAX_BG_VOICES = 256
         const val MIDDLE_C = 0x5000   // reference C for instrument samplingRate (terranmon.txt:2000)
         // Amiga period at MIDDLE_C for a standard 8363 Hz instrument (NTSC clock 3579545 Hz).
         // PT "C-2" period 428 ↔ TSVM MIDDLE_C ↔ 8363 Hz; mod2taud uses the same convention.
@@ -1360,6 +1360,44 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
     // Active-sample-aware playback rate. Reads from the Voice's snapshotted sample
     // view (set by [applyActiveSample]) so Ixmp-overlaid instruments use the patch's
     // samplingRate / detune, not the base inst's.
+    // Metainstrument layer mix-gain lookup — the "Perceptually Significant Octet to
+    // Decibel Table" (terranmon.txt) converted to linear amplitude (10^(dB/20)).
+    // Octet 0 = silence, 159 = unity (0 dB), 255 = +24 dB.
+    private val META_MIX_GAIN = doubleArrayOf(
+        0.0, 5e-05, 5.6e-05, 6.3e-05, 7.1e-05, 7.9e-05, 8.9e-05, 0.0001,
+        0.000112, 0.000126, 0.000141, 0.000158, 0.000178, 0.0002, 0.000224, 0.000251,
+        0.000282, 0.000316, 0.000355, 0.000398, 0.000447, 0.000501, 0.000562, 0.000631,
+        0.000708, 0.000794, 0.000891, 0.001, 0.001122, 0.001259, 0.001413, 0.001585,
+        0.001778, 0.001995, 0.002239, 0.002512, 0.002818, 0.003162, 0.003548, 0.003981,
+        0.004467, 0.005012, 0.005623, 0.00631, 0.007079, 0.007943, 0.008913, 0.01,
+        0.01122, 0.012589, 0.014125, 0.015849, 0.017783, 0.019953, 0.022387, 0.025119,
+        0.028184, 0.031623, 0.035481, 0.039811, 0.044668, 0.050119, 0.056234, 0.063096,
+        0.066834, 0.070795, 0.074989, 0.079433, 0.08414, 0.089125, 0.094406, 0.1,
+        0.105925, 0.112202, 0.11885, 0.125893, 0.133352, 0.141254, 0.149624, 0.158489,
+        0.16788, 0.177828, 0.188365, 0.199526, 0.211349, 0.223872, 0.237137, 0.251189,
+        0.258523, 0.266073, 0.273842, 0.281838, 0.290068, 0.298538, 0.307256, 0.316228,
+        0.325462, 0.334965, 0.344747, 0.354813, 0.365174, 0.375837, 0.386812, 0.398107,
+        0.409732, 0.421697, 0.43401, 0.446684, 0.459727, 0.473151, 0.486968, 0.501187,
+        0.508452, 0.515822, 0.523299, 0.530884, 0.53858, 0.546387, 0.554307, 0.562341,
+        0.570493, 0.578762, 0.587151, 0.595662, 0.604296, 0.613056, 0.621942, 0.630957,
+        0.640103, 0.649382, 0.658795, 0.668344, 0.678032, 0.68786, 0.697831, 0.707946,
+        0.718208, 0.728618, 0.73918, 0.749894, 0.760764, 0.771792, 0.782979, 0.794328,
+        0.805842, 0.817523, 0.829373, 0.841395, 0.853591, 0.865964, 0.878517, 0.891251,
+        0.90417, 0.917276, 0.930572, 0.944061, 0.957745, 0.971628, 0.985712, 1.0,
+        1.014495, 1.029201, 1.044119, 1.059254, 1.074608, 1.090184, 1.105987, 1.122018,
+        1.138282, 1.154782, 1.171521, 1.188502, 1.20573, 1.223207, 1.240938, 1.258925,
+        1.277174, 1.295687, 1.314468, 1.333521, 1.352851, 1.372461, 1.392355, 1.412538,
+        1.433013, 1.453784, 1.474857, 1.496236, 1.517924, 1.539927, 1.562248, 1.584893,
+        1.607867, 1.631173, 1.654817, 1.678804, 1.703139, 1.727826, 1.752871, 1.778279,
+        1.804056, 1.830206, 1.856735, 1.883649, 1.910953, 1.938653, 1.966754, 1.995262,
+        2.053525, 2.113489, 2.175204, 2.238721, 2.304093, 2.371374, 2.440619, 2.511886,
+        2.585235, 2.660725, 2.73842, 2.818383, 2.900681, 2.985383, 3.072557, 3.162278,
+        3.254618, 3.349654, 3.447466, 3.548134, 3.651741, 3.758374, 3.868121, 3.981072,
+        4.216965, 4.466836, 4.731513, 5.011872, 5.308844, 5.623413, 5.956621, 6.309573,
+        6.683439, 7.079458, 7.498942, 7.943282, 8.413951, 8.912509, 9.440609, 10.0,
+        10.592537, 11.220185, 11.885022, 12.589254, 13.335214, 14.125375, 14.962357, 15.848932
+    )
+
     private fun computePlaybackRate(voice: Voice, noteVal: Int): Double =
         voice.activeSamplingRate.toDouble() / SAMPLING_RATE *
         2.0.pow((noteVal - MIDDLE_C + voice.activeSampleDetune) / 4096.0)
@@ -1405,8 +1443,78 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
             voice.activeVibratoWaveform  =
                 if (patch.vibratoWaveform == 0xFF) inst.vibratoWaveform else patch.vibratoWaveform
         }
+        resolveActiveEnvelopes(voice, inst, patch)
     }
 
+    /**
+     * Snapshot the active volume / pan / pitch / filter envelopes and the fadeout +
+     * cutoff + resonance scalars onto [voice] from either the base instrument or a
+     * resolved Ixmp patch. Called by [applyActiveSample] (every trigger).
+     *
+     * The base instrument exposes two pf-envelope slots (bytes 19.. and 197..); each is
+     * routed into the pitch or filter role by its m-bit (LOOP-word bit 7). A patch's 'P'
+     * (pitch) / 'f' (filter) blocks override the corresponding role; its 'v' / 'p' / 'x'
+     * blocks override the volume / pan envelopes and the fadeout/cutoff/resonance. Any
+     * block the patch does not carry defers to the base instrument.
+     */
+    private fun resolveActiveEnvelopes(voice: Voice, inst: TaudInst, patch: TaudInstPatch?) {
+        val volEnv = patch?.volEnv
+        if (volEnv != null) {
+            voice.activeVolEnv = volEnv; voice.activeVolEnvLoop = patch.volEnvLoop; voice.activeVolEnvSustain = patch.volEnvSustain
+        } else {
+            voice.activeVolEnv = inst.volEnvelopes; voice.activeVolEnvLoop = inst.volEnvLoop; voice.activeVolEnvSustain = inst.volEnvSustainWord
+        }
+        val panEnv = patch?.panEnv
+        if (panEnv != null) {
+            voice.activePanEnv = panEnv; voice.activePanEnvLoop = patch.panEnvLoop; voice.activePanEnvSustain = patch.panEnvSustain
+        } else {
+            voice.activePanEnv = inst.panEnvelopes; voice.activePanEnvLoop = inst.panEnvLoop; voice.activePanEnvSustain = inst.panEnvSustainWord
+        }
+
+        // Pitch + filter: route the base inst's two pf-slots by their m-bit, then let the
+        // patch override the matching role. m-bit (LOOP-word bit 7): 0 = pitch, 1 = filter.
+        var pitEnv = inst.pfEnvelopes;  var pitLoop = 0; var pitSus = 0; var pitOn = false
+        var filEnv = inst.pfEnvelopes;  var filLoop = 0; var filSus = 0; var filOn = false
+        // base slot 1 (bytes 19..)
+        if (envPresent(inst.pfEnvLoop)) {
+            if ((inst.pfEnvLoop ushr 7) and 1 != 0) { filEnv = inst.pfEnvelopes; filLoop = inst.pfEnvLoop; filSus = inst.pfEnvSustainWord; filOn = true }
+            else                                    { pitEnv = inst.pfEnvelopes; pitLoop = inst.pfEnvLoop; pitSus = inst.pfEnvSustainWord; pitOn = true }
+        }
+        // base slot 2 (bytes 197..)
+        if (envPresent(inst.pf2EnvLoop)) {
+            if ((inst.pf2EnvLoop ushr 7) and 1 != 0) { filEnv = inst.pf2Envelopes; filLoop = inst.pf2EnvLoop; filSus = inst.pf2EnvSustainWord; filOn = true }
+            else                                     { pitEnv = inst.pf2Envelopes; pitLoop = inst.pf2EnvLoop; pitSus = inst.pf2EnvSustainWord; pitOn = true }
+        }
+        // patch overrides by role
+        val pPit = patch?.pitchEnv
+        if (pPit != null)  { pitEnv = pPit; pitLoop = patch.pitchEnvLoop; pitSus = patch.pitchEnvSustain; pitOn = envPresent(patch.pitchEnvLoop) }
+        val pFil = patch?.filterEnv
+        if (pFil != null)  { filEnv = pFil; filLoop = patch.filterEnvLoop; filSus = patch.filterEnvSustain; filOn = envPresent(patch.filterEnvLoop) }
+        voice.activePitchEnv = pitEnv;  voice.activePitchEnvLoop = pitLoop;  voice.activePitchEnvSustain = pitSus;  voice.hasPitchEnv = pitOn
+        voice.activeFilterEnv = filEnv; voice.activeFilterEnvLoop = filLoop; voice.activeFilterEnvSustain = filSus; voice.hasFilterEnv = filOn
+
+        // Fadeout / cutoff / resonance / initialAttenuation / filter mode (patch 'x' block,
+        // else base inst). In SF mode the cutoff/resonance are 16-bit (cents / centibels).
+        if (patch != null && patch.hasExtra) {
+            voice.activeFadeoutStep = patch.fadeoutStep
+            voice.filterSfMode = patch.filterSfMode
+            voice.activeDefaultCutoff = patch.extraCutoff
+            voice.activeDefaultResonance = patch.extraResonance
+            voice.activeAttenGain = attenGainOf(patch.extraInitialAttenOctet)
+        } else {
+            voice.activeFadeoutStep = inst.volumeFadeoutLow or ((inst.fadeoutHigh and 0x0F) shl 8)
+            voice.filterSfMode = inst.filterSfMode
+            voice.activeDefaultCutoff = inst.defaultCutoff16
+            voice.activeDefaultResonance = inst.defaultResonance16
+            voice.activeAttenGain = attenGainOf(inst.initialAttenOctet)
+        }
+    }
+
+    /** initialAttenuation octet ("Perceptually Significant Octet to Decibel Table") → linear
+     *  amplitude multiplier. Octet 0 is the unset sentinel (= unity); 159 = 0 dB; 111 = −6 dB. */
+    private fun attenGainOf(octet: Int): Double =
+        if (octet <= 0) 1.0 else META_MIX_GAIN[octet and 0xFF]
+
     // Convert a 4096-TET noteVal to its Amiga-period equivalent (Double, no rounding).
     private fun noteValToAmigaPeriod(noteVal: Int): Double =
         AMIGA_BASE_PERIOD * 2.0.pow(-(noteVal - MIDDLE_C).toDouble() / 4096.0)
@@ -1509,8 +1617,41 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
     private val volWrap = IntArray(2)
     private val panWrap = IntArray(2)
     private val pfWrap  = IntArray(2)
+    // Scratch in/out boxes for advancePfRole (shared pitch+filter walk). Single-threaded
+    // per playhead; pitch and filter advance sequentially within one tick.
+    private val pfIdxBox  = IntArray(1)
+    private val pfTimeBox = DoubleArray(1)
 
-    private fun advanceEnvelope(voice: Voice, inst: TaudInst, tickSec: Double) {
+    /**
+     * "Key Lift" (instrument flag byte 186 bit 5; NNA pattern 0b100): MIDI-exact
+     * key release. IT key-off semantics only release the SUSTAIN wrap — the
+     * envelope playhead still walks the remainder of the pre-sustain nodes
+     * (hold/decay) before it ever reaches the release nodes, which makes
+     * held-style instruments (SF2 imports with multi-second hold/decay) ring
+     * long past the key-up like a depressed sustain pedal. A Key Lift
+     * instrument instead jumps the volume-envelope playhead straight to the
+     * sustain-end node on key-off, so the post-sustain (release) nodes play
+     * immediately — exactly what a MIDI synth does when the key is lifted.
+     *
+     * Call wherever `keyOff = true` is applied to a voice (pattern KEY_OFF,
+     * NNA Note Off ghosts, DCA Note Off, past-note S$71). The level step from
+     * the current envelope value to the sustain node is absorbed by the
+     * per-sample envVolMix smoothing.
+     */
+    private fun applyKeyLift(voice: Voice, inst: TaudInst) {
+        if (!inst.nnaKeyLift) return
+        // The volume envelope and its sustain word are the ACTIVE (patch-or-base) ones,
+        // so per-patch SF2 ADSR layers each jump to their own sustain-end node on key-off.
+        val sus = voice.activeVolEnvSustain
+        if ((sus ushr 5) and 1 == 0) return        // no sustain region — nothing to jump to
+        val susEnd = sus and 0x1F
+        if (voice.envIndex >= susEnd) return       // already at/past the release boundary
+        voice.envIndex = susEnd
+        voice.envTimeSec = 0.0
+        voice.envVolume = (voice.activeVolEnv[susEnd].value / 63.0).coerceIn(0.0, 1.0)
+    }
+
+    private fun advanceEnvelope(voice: Voice, tickSec: Double) {
         val maxIdx = 24
 
         // Volume envelope. Evaluation is gated only by voice.volEnvOn (toggled by S$7/$8);
@@ -1523,21 +1664,23 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
         // rule below would never fire — voices would hang forever on key-off / NNA-Continue.
         // Default-only envelopes (single full-volume point at value 63 with offset 0) are
         // safe to evaluate: the engine just holds at envVolume = 1.0, no audible effect.
+        // The envelope read is the ACTIVE (patch-or-base) one — see resolveActiveEnvelopes.
+        val volEnv = voice.activeVolEnv
         if (voice.volEnvOn) {
-            resolveEnvWrap(inst.volEnvLoop, inst.volEnvSustainWord, voice.keyOff, volWrap)
+            resolveEnvWrap(voice.activeVolEnvLoop, voice.activeVolEnvSustain, voice.keyOff, volWrap)
             val wStart = volWrap[0]
             val wEnd   = volWrap[1]
             val wrapping = wStart >= 0
 
             if (wrapping && voice.envIndex == wEnd && wStart == wEnd) {
                 // Hold at the wrap point (FT2 single-point sustain).
-                voice.envVolume = (inst.volEnvelopes[voice.envIndex].value / 63.0).coerceIn(0.0, 1.0)
+                voice.envVolume = (volEnv[voice.envIndex].value / 63.0).coerceIn(0.0, 1.0)
             } else if (wrapping && voice.envIndex == wEnd) {
                 voice.envTimeSec = 0.0
                 voice.envIndex = wStart
-                voice.envVolume = (inst.volEnvelopes[voice.envIndex].value / 63.0).coerceIn(0.0, 1.0)
+                voice.envVolume = (volEnv[voice.envIndex].value / 63.0).coerceIn(0.0, 1.0)
             } else if (voice.envIndex >= maxIdx) {
-                val vEnd = inst.volEnvelopes[maxIdx].value
+                val vEnd = volEnv[maxIdx].value
                 voice.envVolume = (vEnd / 63.0).coerceIn(0.0, 1.0)
                 // Schism's "envelope-end + last-value-0 ⇒ cut" rule (player/sndmix.c:493-498):
                 // applies only in fall-through (no active sustain or loop wrap) since Schism
@@ -1549,8 +1692,8 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
                 // fires.
                 if (vEnd == 0 && !wrapping) startRampOut(voice)
             } else {
-                val vOffset = inst.volEnvelopes[voice.envIndex].offset.toDouble()
-                val vCurValue = inst.volEnvelopes[voice.envIndex].value
+                val vOffset = volEnv[voice.envIndex].offset.toDouble()
+                val vCurValue = volEnv[voice.envIndex].value
                 if (vOffset == 0.0) {
                     // Reached a terminator point — envelope holds here.
                     voice.envVolume = (vCurValue / 63.0).coerceIn(0.0, 1.0)
@@ -1563,10 +1706,10 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
                         val nextIdx = if (wrapping && voice.envIndex == wEnd) wStart
                                       else (voice.envIndex + 1).coerceAtMost(maxIdx)
                         voice.envIndex = nextIdx
-                        voice.envVolume = (inst.volEnvelopes[voice.envIndex].value / 63.0).coerceIn(0.0, 1.0)
+                        voice.envVolume = (volEnv[voice.envIndex].value / 63.0).coerceIn(0.0, 1.0)
                     } else {
                         val cur = (vCurValue / 63.0).coerceIn(0.0, 1.0)
-                        val nxt = (inst.volEnvelopes[(voice.envIndex + 1).coerceAtMost(maxIdx)].value / 63.0).coerceIn(0.0, 1.0)
+                        val nxt = (volEnv[(voice.envIndex + 1).coerceAtMost(maxIdx)].value / 63.0).coerceIn(0.0, 1.0)
                         voice.envVolume = cur + (nxt - cur) * (voice.envTimeSec / vOffset)
                     }
                 }
@@ -1579,23 +1722,24 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
         // envelope marked "present but no wrap" still walks forward, matching the IT
         // idiom (pan-env flag=0x01) and Schism player/sndmix.c:470-502.
         if (!voice.hasPanEnv || !voice.panEnvOn) return
-        resolveEnvWrap(inst.panEnvLoop, inst.panEnvSustainWord, voice.keyOff, panWrap)
+        val panEnv = voice.activePanEnv
+        resolveEnvWrap(voice.activePanEnvLoop, voice.activePanEnvSustain, voice.keyOff, panWrap)
         val pStart = panWrap[0]
         val pEnd   = panWrap[1]
         val pWrapping = pStart >= 0
 
         if (pWrapping && voice.envPanIndex == pEnd && pStart == pEnd) {
-            voice.envPan = inst.panEnvelopes[voice.envPanIndex].value / 255.0
+            voice.envPan = panEnv[voice.envPanIndex].value / 255.0
         } else if (pWrapping && voice.envPanIndex == pEnd) {
             voice.envPanTimeSec = 0.0
             voice.envPanIndex = pStart
-            voice.envPan = inst.panEnvelopes[voice.envPanIndex].value / 255.0
+            voice.envPan = panEnv[voice.envPanIndex].value / 255.0
         } else if (voice.envPanIndex >= maxIdx) {
-            voice.envPan = inst.panEnvelopes[maxIdx].value / 255.0
+            voice.envPan = panEnv[maxIdx].value / 255.0
         } else {
-            val pOffset = inst.panEnvelopes[voice.envPanIndex].offset.toDouble()
+            val pOffset = panEnv[voice.envPanIndex].offset.toDouble()
             if (pOffset == 0.0) {
-                voice.envPan = inst.panEnvelopes[voice.envPanIndex].value / 255.0
+                voice.envPan = panEnv[voice.envPanIndex].value / 255.0
             } else {
                 voice.envPanTimeSec += tickSec
                 if (voice.envPanTimeSec >= pOffset) {
@@ -1603,10 +1747,10 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
                     val nextIdx = if (pWrapping && voice.envPanIndex == pEnd) pStart
                                   else (voice.envPanIndex + 1).coerceAtMost(maxIdx)
                     voice.envPanIndex = nextIdx
-                    voice.envPan = inst.panEnvelopes[voice.envPanIndex].value / 255.0
+                    voice.envPan = panEnv[voice.envPanIndex].value / 255.0
                 } else {
-                    val cur = inst.panEnvelopes[voice.envPanIndex].value / 255.0
-                    val nxt = inst.panEnvelopes[(voice.envPanIndex + 1).coerceAtMost(maxIdx)].value / 255.0
+                    val cur = panEnv[voice.envPanIndex].value / 255.0
+                    val nxt = panEnv[(voice.envPanIndex + 1).coerceAtMost(maxIdx)].value / 255.0
                     voice.envPan = cur + (nxt - cur) * (voice.envPanTimeSec / pOffset)
                 }
             }
@@ -1614,49 +1758,65 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
     }
 
     /**
-     * Advance the pitch/filter envelope by `tickSec`. Same loop / sustain semantics
-     * as advanceEnvelope. Result is stored in `voice.envPfValue` (0.0..1.0; 0.5 = unity).
+     * Generic 25-node envelope walk shared by the pitch and filter envelopes. Reads the
+     * active env array + LOOP/SUSTAIN words and the supplied playhead state, returns the
+     * new value (0.0..1.0; 0.5 = unity) and writes back the advanced index/time via the
+     * 2-element [stateIO] scratch ([0]=index, [1] holds the elapsed time as raw bits is
+     * impractical, so time is passed/returned through [timeBox]). Kept allocation-free.
      */
-    private fun advancePfEnvelope(voice: Voice, inst: TaudInst, tickSec: Double) {
-        // Same gate semantics as the pan envelope above: presence (voice.hasPfEnv) is
-        // latched at trigger time from LOOP.P; evaluation is unconditional once
-        // present, so an enabled-no-wrap envelope animates.
-        if (!voice.hasPfEnv || !voice.pfEnvOn) return
+    private fun advancePfRole(
+        env: Array<TaudInstEnvPoint>, loopWord: Int, susWord: Int, keyOff: Boolean,
+        tickSec: Double, wrapScratch: IntArray, idxBox: IntArray, timeBox: DoubleArray
+    ): Double {
         val maxIdx = 24
-        resolveEnvWrap(inst.pfEnvLoop, inst.pfEnvSustainWord, voice.keyOff, pfWrap)
-        val pSusStart = pfWrap[0]
-        val pSusEnd   = pfWrap[1]
-        val pSusOn    = pSusStart >= 0
-
-        if (pSusOn && voice.envPfIndex == pSusEnd && pSusStart == pSusEnd) {
-            voice.envPfValue = inst.pfEnvelopes[voice.envPfIndex].value / 255.0
-        } else if (pSusOn && voice.envPfIndex == pSusEnd) {
-            voice.envPfTimeSec = 0.0
-            voice.envPfIndex = pSusStart
-            voice.envPfValue = inst.pfEnvelopes[voice.envPfIndex].value / 255.0
-        } else if (voice.envPfIndex >= maxIdx) {
-            voice.envPfValue = inst.pfEnvelopes[maxIdx].value / 255.0
+        resolveEnvWrap(loopWord, susWord, keyOff, wrapScratch)
+        val susStart = wrapScratch[0]
+        val susEnd   = wrapScratch[1]
+        val susOn    = susStart >= 0
+        var idx = idxBox[0]
+        if (susOn && idx == susEnd && susStart == susEnd) {
+            return env[idx].value / 255.0
+        } else if (susOn && idx == susEnd) {
+            timeBox[0] = 0.0; idx = susStart; idxBox[0] = idx
+            return env[idx].value / 255.0
+        } else if (idx >= maxIdx) {
+            return env[maxIdx].value / 255.0
         } else {
-            val pOffset = inst.pfEnvelopes[voice.envPfIndex].offset.toDouble()
-            if (pOffset == 0.0) {
-                voice.envPfValue = inst.pfEnvelopes[voice.envPfIndex].value / 255.0
-            } else {
-                voice.envPfTimeSec += tickSec
-                if (voice.envPfTimeSec >= pOffset) {
-                    voice.envPfTimeSec -= pOffset
-                    val nextIdx = if (pSusOn && voice.envPfIndex == pSusEnd) pSusStart
-                                  else (voice.envPfIndex + 1).coerceAtMost(maxIdx)
-                    voice.envPfIndex = nextIdx
-                    voice.envPfValue = inst.pfEnvelopes[voice.envPfIndex].value / 255.0
-                } else {
-                    val cur = inst.pfEnvelopes[voice.envPfIndex].value / 255.0
-                    val nxt = inst.pfEnvelopes[(voice.envPfIndex + 1).coerceAtMost(maxIdx)].value / 255.0
-                    voice.envPfValue = cur + (nxt - cur) * (voice.envPfTimeSec / pOffset)
-                }
+            val offset = env[idx].offset.toDouble()
+            if (offset == 0.0) {
+                return env[idx].value / 255.0
             }
+            timeBox[0] += tickSec
+            if (timeBox[0] >= offset) {
+                timeBox[0] -= offset
+                idx = if (susOn && idx == susEnd) susStart else (idx + 1).coerceAtMost(maxIdx)
+                idxBox[0] = idx
+                return env[idx].value / 255.0
+            }
+            val cur = env[idx].value / 255.0
+            val nxt = env[(idx + 1).coerceAtMost(maxIdx)].value / 255.0
+            return cur + (nxt - cur) * (timeBox[0] / offset)
         }
     }
 
+    /** Advance the pitch envelope (drives playback rate; 0.5 = unity). */
+    private fun advancePitchEnvelope(voice: Voice, tickSec: Double) {
+        if (!voice.hasPitchEnv || !voice.pfEnvOn) return
+        pfIdxBox[0] = voice.envPitchIndex; pfTimeBox[0] = voice.envPitchTimeSec
+        voice.envPitchValue = advancePfRole(voice.activePitchEnv, voice.activePitchEnvLoop,
+            voice.activePitchEnvSustain, voice.keyOff, tickSec, pfWrap, pfIdxBox, pfTimeBox)
+        voice.envPitchIndex = pfIdxBox[0]; voice.envPitchTimeSec = pfTimeBox[0]
+    }
+
+    /** Advance the filter envelope (drives cutoff; 0.5 = unity). */
+    private fun advanceFilterEnvelope(voice: Voice, tickSec: Double) {
+        if (!voice.hasFilterEnv || !voice.pfEnvOn) return
+        pfIdxBox[0] = voice.envFilterIndex; pfTimeBox[0] = voice.envFilterTimeSec
+        voice.envFilterValue = advancePfRole(voice.activeFilterEnv, voice.activeFilterEnvLoop,
+            voice.activeFilterEnvSustain, voice.keyOff, tickSec, pfWrap, pfIdxBox, pfTimeBox)
+        voice.envFilterIndex = pfIdxBox[0]; voice.envFilterTimeSec = pfTimeBox[0]
+    }
+
     /**
      * Recompute the IT-compatible 2-pole resonant low-pass coefficients for
      * `voice` when its cutoff or resonance has changed since the last refresh.
@@ -1682,24 +1842,33 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
      *   y[n]      = A0 × x[n] + B0 × y[n−1] + B1 × y[n−2]
      */
     private fun refreshVoiceFilter(voice: Voice) {
-        val cut = voice.currentCutoff.coerceIn(0, 255)
-        val res = voice.currentResonance.coerceIn(0, 255)
+        val cut = voice.currentCutoff
+        val res = voice.currentResonance
         if (cut == voice.filterCutoffCached && res == voice.filterResonanceCached) return
         voice.filterCutoffCached = cut
         voice.filterResonanceCached = res
 
-        if (cut >= 255) {
-            voice.filterActive = false
-            return
+        val nyquist = SAMPLING_RATE * 0.5 - 1.0
+        val frequency: Double
+        val dmpfac: Double
+        if (voice.filterSfMode) {
+            // SoundFont mode: cutoff = absolute cents, resonance = centibels above DC gain.
+            //   freq = 8.176 Hz × 2^(cents/1200)   (cents are relative to 8.176 Hz = MIDI 0)
+            // SF2 Q is the resonant-peak height in dB×10. For this IT-style 2-pole IIR the
+            // peak gain ≈ −20·log10(dmpfac) dB, so dmpfac = 10^(−dB/20) = 10^(−Qcb/200).
+            if (cut >= 0xFFFF) { voice.filterActive = false; return }
+            frequency = (8.176 * 2.0.pow(cut / 1200.0)).coerceIn(1.0, nyquist)
+            val qcb = if (res >= 0xFFFF) 0 else res
+            // Clamp to the IT filter's max resonance (≈24 dB) to keep the IIR stable / unclipped.
+            dmpfac = 10.0.pow(-qcb / 200.0).coerceIn(0.0625, 1.0)
+        } else {
+            if (cut.coerceIn(0, 255) >= 255) { voice.filterActive = false; return }
+            val itCutoff    = cut.coerceIn(0, 254) * 0.5                 // 0..127
+            val itResonance = if (res >= 255) 0.0 else res.coerceIn(0, 254) * 0.5
+            frequency = (110.0 * 2.0.pow(itCutoff / 24.0 + 0.25)).coerceAtMost(nyquist)
+            dmpfac    = 10.0.pow(-itResonance * (24.0 / 128.0) / 20.0)
         }
 
-        val itCutoff    = cut * 0.5                                     // 0..127
-        val itResonance = if (res >= 255) 0.0 else res * 0.5            // 0..127
-
-        val nyquist   = SAMPLING_RATE * 0.5 - 1.0
-        val frequency = (110.0 * 2.0.pow(itCutoff / 24.0 + 0.25)).coerceAtMost(nyquist)
-        val dmpfac    = 10.0.pow(-itResonance * (24.0 / 128.0) / 20.0)
-
         val r = SAMPLING_RATE / (2.0 * PI * frequency)
         val d = dmpfac * r + dmpfac - 1.0
         val e = r * r
@@ -2036,6 +2205,90 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
         return if (dnv == 0) 0x3F else (dnv * 63 + 127) / 255
     }
 
+    /** Cap [TrackerState.backgroundVoices] to [MAX_BG_VOICES], preferring to evict the
+     *  oldest NON-layer ghost so a live Metainstrument note never loses one of its layers. */
+    private fun capBackgroundVoices(ts: TrackerState) {
+        while (ts.backgroundVoices.size > MAX_BG_VOICES) {
+            val idx = ts.backgroundVoices.indexOfFirst { !it.isLayerChild }
+            if (idx >= 0) ts.backgroundVoices.removeAt(idx) else ts.backgroundVoices.removeFirst()
+        }
+    }
+
+    /** Release the layer children of channel [vi] (from a previous Metainstrument note):
+     *  detach them and apply each layer instrument's own NNA so the displaced note's tail
+     *  rides on as an ordinary background ghost. Called at the start of a fresh trigger. */
+    private fun releaseLayerChildren(ts: TrackerState, vi: Int) {
+        for (bg in ts.backgroundVoices) {
+            if (!bg.isLayerChild || bg.sourceChannel != vi) continue
+            bg.isLayerChild = false
+            when (instruments[bg.instrumentId].newNoteAction) {
+                0 -> if (!bg.keyOff) { bg.keyOff = true; applyKeyLift(bg, instruments[bg.instrumentId]) }
+                1 -> bg.active = false        // note cut
+                3 -> bg.noteFading = true     // note fade
+                // 2 = continue
+            }
+        }
+    }
+
+    /** Hard-cut the layer children of channel [vi] (pattern note-cut 0x0002 on the channel). */
+    private fun cutLayerChildren(ts: TrackerState, vi: Int) {
+        for (bg in ts.backgroundVoices) if (bg.isLayerChild && bg.sourceChannel == vi) bg.active = false
+    }
+
+    /**
+     * Trigger [noteVal]/[instId] on the foreground [voice] of channel [vi]. When [instId]
+     * is a Metainstrument (terranmon.txt "Metainstrument definition"), fan out: the first
+     * layer whose (pitch × volume) rectangle contains the trigger plays on the foreground
+     * voice; every other matching layer spawns a tracked background "layer child". Old
+     * layer children of the channel are released first (per their own NNA). For a normal
+     * instrument this is exactly the historical [triggerNote] call (volOverride = -1), so
+     * non-meta playback is byte-identical.
+     *
+     * [rowVolOverride] is the V-column-derived trigger volume (or -1). For metas it is the
+     * velocity used to resolve velocity-conditional layers and the layers' note volume;
+     * the normal path ignores it to preserve legacy patch-seed semantics.
+     */
+    private fun triggerMetaOrNote(ts: TrackerState, voice: Voice, vi: Int,
+                                  noteVal: Int, instId: Int, rowVolOverride: Int) {
+        releaseLayerChildren(ts, vi)
+        val inst = if (instId != 0) instruments[instId] else instruments[voice.instrumentId]
+        if (!inst.isMeta) {
+            triggerNote(voice, noteVal, instId, -1)   // legacy path, unchanged
+            voice.layerMixGain = 1.0
+            voice.layerRelDetune = 0
+            voice.isLayerChild = false
+            return
+        }
+        val seedVol = if (rowVolOverride in 0..0x3F) rowVolOverride else 0x3F
+        val layers = inst.resolveMetaLayers(noteVal, seedVol)
+        if (layers.isEmpty()) {                       // no layer covers this note: silence
+            voice.active = false
+            voice.layerMixGain = 1.0
+            voice.layerRelDetune = 0
+            return
+        }
+        val l0 = layers[0]
+        triggerNote(voice, (noteVal + l0.detune).coerceIn(0x20, 0xFFFF), l0.instIdx, rowVolOverride)
+        voice.layerMixGain   = META_MIX_GAIN[l0.mixOctet and 0xFF]
+        voice.layerRelDetune = 0
+        voice.isLayerChild   = false
+        for (k in 1 until layers.size) {
+            val lk = layers[k]
+            val child = Voice()
+            triggerNote(child, (noteVal + lk.detune).coerceIn(0x20, 0xFFFF), lk.instIdx, rowVolOverride)
+            child.isLayerChild   = true
+            child.sourceChannel  = vi
+            child.layerRelDetune = lk.detune - l0.detune
+            child.layerMixGain   = META_MIX_GAIN[lk.mixOctet and 0xFF]
+            // Match layer 0's channel context so M/pan and the first tick agree.
+            child.channelVolume = voice.channelVolume
+            child.channelPan    = voice.channelPan
+            child.rowPan        = voice.rowPan
+            ts.backgroundVoices.addLast(child)
+        }
+        capBackgroundVoices(ts)
+    }
+
     private fun triggerNote(voice: Voice, noteVal: Int, instId: Int, volOverride: Int) {
         if (instId != 0) voice.instrumentId = instId
         val inst = instruments[voice.instrumentId]
@@ -2056,7 +2309,7 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
         voice.keyOff = false
         voice.envIndex = 0
         voice.envTimeSec = 0.0
-        voice.envVolume = (inst.volEnvelopes[0].value / 63.0).coerceIn(0.0, 1.0)
+        voice.envVolume = (voice.activeVolEnv[0].value / 63.0).coerceIn(0.0, 1.0)
         // Snap the per-sample-smoothed envelope to the fresh starting value so attack
         // transients land at the envelope's node-0 value immediately. Per-tick step is
         // recomputed by applyTrackerTick on the next tick boundary.
@@ -2064,20 +2317,20 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
         voice.envVolStep = 0.0
         voice.envPanIndex = 0
         voice.envPanTimeSec = 0.0
-        voice.envPan = inst.panEnvelopes[0].value / 255.0
+        voice.envPan = voice.activePanEnv[0].value / 255.0
         // Envelope-present gate (added 2026-05-06). Driven by the P bit at LOOP-word
         // bit 13 (high byte's bit 5; offsets 16/18/20 bit 5), set by converters
-        // whenever they emit envelope nodes. See terranmon.txt at byte 15/17/19 for
-        // the bit layout and the file-header preamble for the presence-vs-wrap
-        // distinction.
-        voice.hasPanEnv = envPresent(inst.panEnvLoop)
-        // Pitch/filter envelope state.
-        voice.hasPfEnv      = envPresent(inst.pfEnvLoop)
-        // The pf 'm' mode bit (pitch=0, filter=1) lives in the LOOP word at bit 7.
-        voice.envPfIsFilter = (inst.pfEnvLoop ushr 7) and 1 != 0
-        voice.envPfIndex    = 0
-        voice.envPfTimeSec  = 0.0
-        voice.envPfValue    = if (voice.hasPfEnv) inst.pfEnvelopes[0].value / 255.0 else 0.5
+        // whenever they emit envelope nodes. The active LOOP word is the patch-or-base
+        // one (resolveActiveEnvelopes); hasPitchEnv/hasFilterEnv are already latched there.
+        voice.hasPanEnv = envPresent(voice.activePanEnvLoop)
+        // Pitch / filter envelope playhead seeds (the role split + presence were resolved
+        // by resolveActiveEnvelopes from the base inst's two pf-slots and any patch override).
+        voice.envPitchIndex   = 0
+        voice.envPitchTimeSec = 0.0
+        voice.envPitchValue   = if (voice.hasPitchEnv) voice.activePitchEnv[0].value / 255.0 else 0.5
+        voice.envFilterIndex   = 0
+        voice.envFilterTimeSec = 0.0
+        voice.envFilterValue   = if (voice.hasFilterEnv) voice.activeFilterEnv[0].value / 255.0 else 0.5
         // Fadeout starts at unity; advances only after key-off.
         voice.fadeoutVolume = 1.0
         // Cancel any sample-end ramp left over from the previous note — a fresh trigger's
@@ -2108,7 +2361,7 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
             // The pan envelope's 'p' flag ("use default pan") lives in the pan LOOP word at bit 7.
             // An Ixmp patch's defaultPan (when non-sentinel, i.e. != 0xFF) takes precedence over
             // the base instrument's defaultPan.
-            if ((inst.panEnvLoop ushr 7) and 1 != 0) {
+            if ((voice.activePanEnvLoop ushr 7) and 1 != 0) {
                 val patchPan = patch?.defaultPan?.takeIf { it != 0xFF }
                 voice.channelPan = patchPan ?: inst.defaultPan
                 voice.rowPan = (voice.channelPan ushr 2).coerceIn(0, 63)
@@ -2122,10 +2375,11 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
                 voice.rowPan = (voice.channelPan ushr 2).coerceIn(0, 63)
             }
         }
-        // Filter cutoff/resonance defaults — adjusted per-tick by the pf envelope when in filter mode.
+        // Filter cutoff/resonance defaults — adjusted per-tick by the filter envelope. Uses
+        // the ACTIVE values (patch 'x' block overrides the base inst's defaultCutoff/Resonance).
         // 255 = filter off (IT high-bit-clear); 0..254 = active range matching IT 0..127 at double resolution.
-        voice.currentCutoff = inst.defaultCutoff
-        voice.currentResonance = inst.defaultResonance
+        voice.currentCutoff = voice.activeDefaultCutoff
+        voice.currentResonance = voice.activeDefaultResonance
         voice.filterY1 = 0.0; voice.filterY2 = 0.0
         voice.filterCutoffCached = -1   // force coefficient refresh on first tick
         voice.filterResonanceCached = -1
@@ -2218,7 +2472,7 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
             val existInst = instruments[v.instrumentId]
             when (existInst.duplicateCheckAction) {
                 0 -> { v.fadeoutVolume = 0.0; v.active = false }
-                1 -> v.keyOff = true
+                1 -> { v.keyOff = true; applyKeyLift(v, existInst) }
                 2 -> v.noteFading = true
             }
         }
@@ -2254,14 +2508,15 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
 
         val bg = ghostVoice(voice, channel)
         when (nna) {
-            0 -> bg.keyOff = true       // Note Off — release sustain; fadeout starts naturally.
+            0 -> {                      // Note Off — release sustain; fadeout starts naturally.
+                bg.keyOff = true
+                applyKeyLift(bg, instruments[bg.instrumentId])
+            }
             3 -> bg.noteFading = true   // Note Fade — fadeout immediately, sustain still loops.
             // 2 (Continue) — ghost continues unchanged.
         }
         ts.backgroundVoices.addLast(bg)
-        while (ts.backgroundVoices.size > MAX_BG_VOICES) {
-            ts.backgroundVoices.removeFirst()
-        }
+        capBackgroundVoices(ts)
     }
 
     /** Snapshot the playback-relevant state of [src] into a fresh Voice tagged for [channel]. */
@@ -2293,16 +2548,20 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
         v.envPanTimeSec = src.envPanTimeSec
         v.envPan = src.envPan
         v.hasPanEnv = src.hasPanEnv
-        v.hasPfEnv = src.hasPfEnv
-        v.envPfIndex = src.envPfIndex
-        v.envPfTimeSec = src.envPfTimeSec
-        v.envPfValue = src.envPfValue
-        v.envPfIsFilter = src.envPfIsFilter
+        v.hasPitchEnv = src.hasPitchEnv
+        v.envPitchIndex = src.envPitchIndex
+        v.envPitchTimeSec = src.envPitchTimeSec
+        v.envPitchValue = src.envPitchValue
+        v.hasFilterEnv = src.hasFilterEnv
+        v.envFilterIndex = src.envFilterIndex
+        v.envFilterTimeSec = src.envFilterTimeSec
+        v.envFilterValue = src.envFilterValue
         v.fadeoutVolume = src.fadeoutVolume
         v.autoVibPhase = src.autoVibPhase
         v.autoVibTicksSinceTrigger = src.autoVibTicksSinceTrigger
         v.currentCutoff = src.currentCutoff
         v.currentResonance = src.currentResonance
+        v.filterSfMode = src.filterSfMode
         v.filterActive = src.filterActive
         v.filterA0 = src.filterA0
         v.filterB0 = src.filterB0
@@ -2321,6 +2580,10 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
         v.panEnvOn = src.panEnvOn
         v.pfEnvOn = src.pfEnvOn
         v.noteFading = src.noteFading
+        // Keep the source's Metainstrument layer-0 mix gain on the ghost so an NNA tail of
+        // a layered note fades at the same level it was sounding (isLayerChild stays false:
+        // a ghost is a free-running tail, not a tracked child of the live note).
+        v.layerMixGain = src.layerMixGain
         // Voice-FX state (effects 8/9): preserve so the NNA-ghosted tail keeps the same timbre.
         v.clipMode = src.clipMode
         v.bitcrusherDepth = src.bitcrusherDepth
@@ -2345,6 +2608,24 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
         v.activeVibratoDepth     = src.activeVibratoDepth
         v.activeVibratoRate      = src.activeVibratoRate
         v.activeVibratoWaveform  = src.activeVibratoWaveform
+        // Active-envelope view must follow too, so the ghost keeps its patch's ADSR /
+        // pan / pitch / filter envelopes + fadeout/cutoff/resonance (not the base inst's).
+        v.activeVolEnv           = src.activeVolEnv
+        v.activeVolEnvLoop       = src.activeVolEnvLoop
+        v.activeVolEnvSustain    = src.activeVolEnvSustain
+        v.activePanEnv           = src.activePanEnv
+        v.activePanEnvLoop       = src.activePanEnvLoop
+        v.activePanEnvSustain    = src.activePanEnvSustain
+        v.activePitchEnv         = src.activePitchEnv
+        v.activePitchEnvLoop     = src.activePitchEnvLoop
+        v.activePitchEnvSustain  = src.activePitchEnvSustain
+        v.activeFilterEnv        = src.activeFilterEnv
+        v.activeFilterEnvLoop    = src.activeFilterEnvLoop
+        v.activeFilterEnvSustain = src.activeFilterEnvSustain
+        v.activeFadeoutStep      = src.activeFadeoutStep
+        v.activeDefaultCutoff    = src.activeDefaultCutoff
+        v.activeDefaultResonance = src.activeDefaultResonance
+        v.activeAttenGain        = src.activeAttenGain
         return v
     }
 
@@ -2356,7 +2637,10 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
                 while (iter.hasNext()) if (iter.next().sourceChannel == channel) iter.remove()
             }
             1 -> ts.backgroundVoices.forEach { bg ->  // Past Note Off — sustain release.
-                if (bg.sourceChannel == channel) bg.keyOff = true
+                if (bg.sourceChannel == channel) {
+                    bg.keyOff = true
+                    applyKeyLift(bg, instruments[bg.instrumentId])
+                }
             }
             2 -> ts.backgroundVoices.forEach { bg ->  // Past Note Fade — start fadeout.
                 if (bg.sourceChannel == channel) bg.noteFading = true
@@ -2522,7 +2806,7 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
                 // silences after the first row because the slide saturates at 0 and there's
                 // nothing to lift the volume back up before the next slide starts.
                 0x0000 -> {
-                    if (row.instrment != 0) {
+                    if (row.instrment != 0 && !instruments[row.instrment].isMeta) {
                         voice.instrumentId = row.instrment
                         // Re-resolve the patch on the new instrument against the voice's
                         // current note so multi-sample IT/XM instruments pick up the right
@@ -2545,8 +2829,28 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
                 // fadeoutVolume reaches 0, or immediately if FT2-mode fadeStep == 0. Setting
                 // voice.active = false here would defeat both — instruments with sustain points
                 // and non-zero fadeout (FT2 sustain-then-fade idiom) would be cut on the spot.
-                0x0001 -> { voice.keyOff = true }
-                0x0002 -> voice.active = false                  // note cut (immediate)
+                0x0001 -> {
+                    // A sub-row key-off (KEY_OFF + S$Dx) defers to the requested tick instead of
+                    // firing at tick 0 — otherwise the note is released early and, on instruments
+                    // that rely on the release leg to end, can ring on / cut short (issues 3 & 5).
+                    val dTick = if ((row.effect == EffectOp.OP_S) && ((row.effectArg ushr 12) and 0xF) == 0xD)
+                                (row.effectArg ushr 8) and 0xF else 0
+                    if (dTick > 0) {
+                        voice.noteDelayTick = dTick; voice.delayedNote = 0x0001
+                        voice.delayedInst = 0; voice.delayedVol = -1
+                    } else {
+                        voice.keyOff = true
+                        applyKeyLift(voice, instruments[voice.instrumentId])
+                    }
+                }
+                0x0002 -> {
+                    val dTick = if ((row.effect == EffectOp.OP_S) && ((row.effectArg ushr 12) and 0xF) == 0xD)
+                                (row.effectArg ushr 8) and 0xF else 0
+                    if (dTick > 0) {
+                        voice.noteDelayTick = dTick; voice.delayedNote = 0x0002
+                        voice.delayedInst = 0; voice.delayedVol = -1
+                    } else { voice.active = false; cutLayerChildren(ts, vi) }  // note cut (immediate)
+                }
                 in 0x0003..0x000F -> { /* reserved sentinel range, no engine handler */ }
                 in 0x0010..0x001F -> { /* Int0..IntF: reserved interrupt slots, no engine handler yet */ }
                 else -> {
@@ -2567,7 +2871,7 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
                         // to full level on every porta-with-inst row (e.g.
                         // nearly_there_.mod ord 0x1B ch 4 r49 jumped from ~35 to 63
                         // and the bump persisted through the following vibrato rows).
-                        if (row.instrment != 0) {
+                        if (row.instrment != 0 && !instruments[row.instrment].isMeta) {
                             voice.instrumentId = row.instrment
                             // Porta + inst-byte: re-resolve the patch on the new instrument
                             // against the voice's current note (Schism evaluates the keyboard
@@ -2596,7 +2900,11 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
                     } else {
                         applyDuplicateCheck(ts, vi, row.instrment, row.note)
                         maybeSpawnBackgroundForNNA(ts, voice, vi)
-                        triggerNote(voice, row.note, row.instrment, -1)
+                        // V-column SET value (selector 0) is the trigger velocity; passed so a
+                        // Metainstrument resolves velocity-conditional layers correctly. The
+                        // non-meta path inside triggerMetaOrNote ignores it (legacy semantics).
+                        val trigVol = if (row.volumeEff == 0) row.volume else -1
+                        triggerMetaOrNote(ts, voice, vi, row.note, row.instrment, trigVol)
                     }
                 }
             }
@@ -3023,12 +3331,23 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
                 voice.noteWasCut = true
             }
 
-            // Note delay — fire deferred trigger when the requested tick arrives.
-            // NNA fires now (not at row parse) so that delayed retriggers ghost correctly.
+            // Note delay — fire the deferred event when the requested tick arrives. A delayed
+            // KEY_OFF / note-cut (converters emit sub-row key-offs as KEY_OFF + S$Dx) applies the
+            // release/cut here instead of at tick 0; a delayed NOTE triggers with NNA now (not at
+            // row parse) so that delayed retriggers ghost correctly.
             if (voice.noteDelayTick == ts.tickInRow) {
-                applyDuplicateCheck(ts, vi, voice.delayedInst, voice.delayedNote)
-                maybeSpawnBackgroundForNNA(ts, voice, vi)
-                triggerNote(voice, voice.delayedNote, voice.delayedInst, voice.delayedVol)
+                when (voice.delayedNote) {
+                    0x0001 -> {                                   // delayed KEY_OFF
+                        voice.keyOff = true
+                        applyKeyLift(voice, instruments[voice.instrumentId])
+                    }
+                    0x0002 -> { voice.active = false; cutLayerChildren(ts, vi) }  // delayed note cut
+                    else -> {
+                        applyDuplicateCheck(ts, vi, voice.delayedInst, voice.delayedNote)
+                        maybeSpawnBackgroundForNNA(ts, voice, vi)
+                        triggerMetaOrNote(ts, voice, vi, voice.delayedNote, voice.delayedInst, voice.delayedVol)
+                    }
+                }
                 voice.noteDelayTick = -1
                 // triggerNote may have swapped in a new instrument; re-bind so the rest of this
                 // tick's per-voice work (playbackRate at L3090, envelope/fadeout/auto-vibrato)
@@ -3040,7 +3359,7 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
             }
 
             if (!voice.active) {
-                advanceEnvelope(voice, inst, tickSec)
+                advanceEnvelope(voice, tickSec)
                 voice.envVolStep = if (spt > 0.0) (voice.envVolume - voice.envVolMix) / spt else 0.0
                 continue
             }
@@ -3174,7 +3493,6 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
                 voice.retrigCounter++
                 if (voice.retrigCounter >= voice.retrigInterval) {
                     voice.retrigCounter = 0
-                    val retrigInst = instruments[voice.instrumentId]
                     // Use the voice's active sample's playStart (patch-aware) — without this
                     // a Q retrigger on a multi-sample instrument would jump to the base sample
                     // even though the voice is bound to a patch.
@@ -3182,9 +3500,11 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
                     voice.keyOff = false
                     voice.envIndex = 0; voice.envTimeSec = 0.0
                     voice.envPanIndex = 0; voice.envPanTimeSec = 0.0
-                    voice.envPan = retrigInst.panEnvelopes[0].value / 255.0
-                    voice.envPfIndex = 0; voice.envPfTimeSec = 0.0
-                    voice.envPfValue = if (voice.hasPfEnv) retrigInst.pfEnvelopes[0].value / 255.0 else 0.5
+                    voice.envPan = voice.activePanEnv[0].value / 255.0
+                    voice.envPitchIndex = 0; voice.envPitchTimeSec = 0.0
+                    voice.envPitchValue = if (voice.hasPitchEnv) voice.activePitchEnv[0].value / 255.0 else 0.5
+                    voice.envFilterIndex = 0; voice.envFilterTimeSec = 0.0
+                    voice.envFilterValue = if (voice.hasFilterEnv) voice.activeFilterEnv[0].value / 255.0 else 0.5
                     voice.fadeoutVolume = 1.0
                     voice.autoVibPhase = 0
                     voice.autoVibTicksSinceTrigger = 0
@@ -3201,24 +3521,33 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
             // IT pitch envelope max is ±16 semitones (Schism sndmix.c:455-462 indexes
             // linear_slide_up_table[abs(envpitch)] where envpitch ∈ [-256,+256] and
             // table[255] = 65536·2^(255/192) ≈ 2.504, i.e. 15.94 semitones).
-            val pitchEnvDelta = if (voice.hasPfEnv && voice.pfEnvOn && !voice.envPfIsFilter)
-                ((voice.envPfValue - 0.5) * 2.0 * 16.0 * 4096.0 / 12.0).toInt()
+            val pitchEnvDelta = if (voice.hasPitchEnv && voice.pfEnvOn)
+                ((voice.envPitchValue - 0.5) * 2.0 * 16.0 * 4096.0 / 12.0).toInt()
             else 0
 
             val finalPitch = (pitchToMixer + autoVibDelta + pitchEnvDelta).coerceIn(0x20, 0xFFFF)
             voice.playbackRate = computePlaybackRate(voice, finalPitch)
 
-            // Filter envelope (filter mode): scale baseCut by envValue (0..1, 0.5 = unity).
+            // Filter envelope: scale baseCut by envValue (0..1, 0.5 = unity).
             // Schism filters.c:80-86 computes `cutoff_used = chan->cutoff * (flt_modifier+256)/256`
             // where flt_modifier = (env_value_0..64 - 32) * 8. Mapping TSVM's [0..1] env to Schism's
             // [-256..+256] modifier and accounting for our pre-doubled defaultCutoff (it2taud.py
-            // stores IFC*2 in 0..254) gives `currentCutoff = baseCut * envPfValue` — at unity (0.5)
+            // stores IFC*2 in 0..254) gives `currentCutoff = baseCut * envFilterValue` — at unity (0.5)
             // the filter sits at IFC, at max (1.0) it opens to 2*IFC, at min (0.0) it closes.
             // If the instrument has no initial cutoff (255 = off), the envelope drives the filter
             // from the maximum active value (254) so the filter can become audible during the note.
-            if (voice.hasPfEnv && voice.pfEnvOn && voice.envPfIsFilter) {
-                val baseCut = if (inst.defaultCutoff < 255) inst.defaultCutoff else 254
-                voice.currentCutoff = (baseCut * voice.envPfValue).toInt().coerceIn(0, 254)
+            // baseCut is the ACTIVE cutoff (patch 'x' override or base inst).
+            if (voice.hasFilterEnv && voice.pfEnvOn) {
+                if (voice.filterSfMode) {
+                    // SF mode: activeDefaultCutoff is the PEAK cutoff in cents; the env scales it
+                    // down (envFilterValue 1.0 = peak/open, 0 = closed). Converter sets node values
+                    // = targetCents/peakCents so the SF2 mod-env sweep is reproduced exactly.
+                    val baseCut = if (voice.activeDefaultCutoff < 0xFFFF) voice.activeDefaultCutoff else 13500
+                    voice.currentCutoff = (baseCut * voice.envFilterValue).toInt().coerceIn(0, 0xFFFF)
+                } else {
+                    val baseCut = if (voice.activeDefaultCutoff < 255) voice.activeDefaultCutoff else 254
+                    voice.currentCutoff = (baseCut * voice.envFilterValue).toInt().coerceIn(0, 254)
+                }
             }
 
             // Refresh biquad filter coefficients once per tick (only recomputes when changed).
@@ -3235,21 +3564,22 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
             // of the same field; converters scale source values into Taud's 0..4095 unit so the engine
             // sees one consistent encoding.
             if (voice.keyOff || voice.noteFading) {
-                val fadeStep = inst.volumeFadeoutLow or ((inst.fadeoutHigh and 0x0F) shl 8)
+                val fadeStep = voice.activeFadeoutStep
                 if (fadeStep > 0) {
                     voice.fadeoutVolume = (voice.fadeoutVolume - fadeStep / 1024.0).coerceAtLeast(0.0)
                     if (voice.fadeoutVolume <= 0.0) voice.active = false
                 }
             }
 
-            advanceEnvelope(voice, inst, tickSec)
+            advanceEnvelope(voice, tickSec)
             // Compute per-sample slope so envVolMix walks smoothly to the new envVolume
             // across the next tick interval; this turns the mixer's view of the envelope
             // from a stair-step into a continuous ramp and removes the per-tick clicks
             // that are otherwise audible on steep envelope slopes (e.g., XM volume
             // envelopes with fast attack/decay nodes — the slumberjack.xm symptom).
             voice.envVolStep = if (spt > 0.0) (voice.envVolume - voice.envVolMix) / spt else 0.0
-            advancePfEnvelope(voice, inst, tickSec)
+            advancePitchEnvelope(voice, tickSec)
+            advanceFilterEnvelope(voice, tickSec)
         }
 
         // Tempo slide — applied once per tick at the playhead level (any channel that armed it).
@@ -3293,12 +3623,34 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
         while (bgIt.hasNext()) {
             val bg = bgIt.next()
             if (!bg.active) { bgIt.remove(); continue }
+            // Metainstrument layer child: re-sync pitch / key-off / volume / pan from the
+            // parent foreground voice each tick so tone-portamento, slides, KEY_OFF, M
+            // channel-volume and panning carry to every layer of the note. When the parent
+            // note ends, detach so this layer finishes its own release as a plain ghost.
+            if (bg.isLayerChild) {
+                val parent = ts.voices.getOrNull(bg.sourceChannel)
+                if (parent == null || !parent.active) {
+                    bg.isLayerChild = false
+                } else {
+                    bg.noteVal   = (parent.noteVal + bg.layerRelDetune).coerceIn(0x20, 0xFFFF)
+                    bg.basePitch = bg.noteVal
+                    bg.amigaPeriod = -1.0; bg.linearFreq = -1.0
+                    if (parent.keyOff && !bg.keyOff) { bg.keyOff = true; applyKeyLift(bg, instruments[bg.instrumentId]) }
+                    if (parent.noteFading && !bg.noteFading) bg.noteFading = true
+                    bg.channelVolume = parent.channelVolume
+                    bg.noteVolume    = parent.noteVolume
+                    bg.rowVolume     = parent.rowVolume
+                    bg.channelPan    = parent.channelPan
+                    bg.rowPan        = parent.rowPan
+                }
+            }
             val inst = instruments[bg.instrumentId]
-            advanceEnvelope(bg, inst, tickSec)
+            advanceEnvelope(bg, tickSec)
             bg.envVolStep = if (spt > 0.0) (bg.envVolume - bg.envVolMix) / spt else 0.0
-            advancePfEnvelope(bg, inst, tickSec)
+            advancePitchEnvelope(bg, tickSec)
+            advanceFilterEnvelope(bg, tickSec)
             if (bg.keyOff || bg.noteFading) {
-                val fadeStep = inst.volumeFadeoutLow or ((inst.fadeoutHigh and 0x0F) shl 8)
+                val fadeStep = bg.activeFadeoutStep
                 if (fadeStep > 0) {
                     // Mirrors the foreground-voice fade path above — single divisor of 1024.
                     bg.fadeoutVolume = (bg.fadeoutVolume - fadeStep / 1024.0).coerceAtLeast(0.0)
@@ -3306,15 +3658,15 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
             }
             // Auto-vibrato keeps running on backgrounds — it's an instrument-intrinsic LFO.
             val autoVibDelta = advanceAutoVibrato(bg, inst)
-            val pitchEnvDelta = if (bg.hasPfEnv && bg.pfEnvOn && !bg.envPfIsFilter)
-                ((bg.envPfValue - 0.5) * 2.0 * 16.0 * 4096.0 / 12.0).toInt()
+            val pitchEnvDelta = if (bg.hasPitchEnv && bg.pfEnvOn)
+                ((bg.envPitchValue - 0.5) * 2.0 * 16.0 * 4096.0 / 12.0).toInt()
             else 0
             val finalPitch = (bg.noteVal + autoVibDelta + pitchEnvDelta).coerceIn(0x20, 0xFFFF)
             bg.playbackRate = computePlaybackRate(bg, finalPitch)
-            // Filter-mode pf envelope: same scaling rule as foreground.
-            if (bg.hasPfEnv && bg.pfEnvOn && bg.envPfIsFilter) {
-                val baseCut = if (inst.defaultCutoff < 255) inst.defaultCutoff else 254
-                bg.currentCutoff = (baseCut * bg.envPfValue).toInt().coerceIn(0, 254)
+            // Filter envelope: same scaling rule as foreground, using the active cutoff.
+            if (bg.hasFilterEnv && bg.pfEnvOn) {
+                val baseCut = if (bg.activeDefaultCutoff < 255) bg.activeDefaultCutoff else 254
+                bg.currentCutoff = (baseCut * bg.envFilterValue).toInt().coerceIn(0, 254)
             }
             refreshVoiceFilter(bg)
             // Reap fully-faded ghosts so the pool stays drained.
@@ -3419,7 +3771,7 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
                 // Split the gain stack so the soundscope can see the voice amplitude independently
                 // of the playhead-wide faders (master / mixing / global volume).
                 val perVoiceGain = effEnvVol * voice.fadeoutVolume * voice.currentMixVolume *
-                                   swingScale * instGv * faderGain
+                                   swingScale * instGv * faderGain * voice.layerMixGain * voice.activeAttenGain
                 val globalGain = gvol * mvol * playhead.masterVolume / 255.0
                 val vol = perVoiceGain * globalGain
                 val pan = if (voice.hasPanEnv && voice.panEnvOn) {
@@ -3464,7 +3816,7 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
                 // changes to the source slot's fader don't affect already-ghosted voices.
                 val faderGain = (255 - bg.fader) / 255.0
                 val vol = effEnvVol * bg.fadeoutVolume * bg.currentMixVolume *
-                          swingScale * gvol * mvol * instGv * faderGain * playhead.masterVolume / 255.0
+                          swingScale * gvol * mvol * instGv * faderGain * bg.layerMixGain * bg.activeAttenGain * playhead.masterVolume / 255.0
                 val pan = if (bg.hasPanEnv && bg.panEnvOn) {
                     val envPanRaw = (bg.envPan * 255.0).roundToInt().coerceIn(0, 255)
                     (bg.channelPan + envPanRaw - 128 + bg.randomPanBias).coerceIn(0, 255)
@@ -3715,6 +4067,19 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
         // -1 for live foreground voices held by TrackerState.voices[]; 0..19 for background
         // (mixer-private) ghosts spawned by NNA on the matching channel index.
         var sourceChannel = -1
+
+        // ── Metainstrument layering ──
+        // A meta trigger plays its first matching layer on the foreground voice and spawns
+        // the remaining matching layers as background voices tagged [isLayerChild]. Each
+        // tick those children re-sync pitch / key-off / volume / pan from their parent
+        // foreground voice (ts.voices[sourceChannel]); [layerRelDetune] is the child's
+        // 4096-TET offset relative to the parent (layer 0), so tone-portamento and slides
+        // on the channel carry to every layer. [layerMixGain] is the per-layer static mix
+        // multiplier (from the layer's mix-volume octet) applied in the mixer; it is set on
+        // the foreground voice too (layer 0's gain) and defaults to 1.0 for normal notes.
+        var isLayerChild = false
+        var layerRelDetune = 0
+        var layerMixGain = 1.0
         // -1 = use instrument-default NNA; otherwise overrides the next NNA event on this voice
         // (see S $73..$76). Cleared on every fresh trigger.
         var nnaOverride = -1
@@ -3780,12 +4145,19 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
         var envPan = 0.5                   // 0.0=full-left, 1.0=full-right, 0.5=centre
         var hasPanEnv = false
 
-        // Pitch / filter envelope (instrument-supplied, byte 19-20 + bytes 121-170).
-        var hasPfEnv = false
-        var envPfIndex = 0
-        var envPfTimeSec = 0.0
-        var envPfValue = 0.5               // 0.0..1.0; 0.5 = unity (no pitch shift / unmodulated cutoff)
-        var envPfIsFilter = false          // mirror of inst.pfEnvLoop bit 7 latched at trigger
+        // Pitch and filter envelopes. The Taud instrument carries two pf-env slots
+        // (base bytes 19.. and 197..); they are routed by their m-bit into the pitch
+        // and filter roles here so SF2's single mod-env can drive both at once. IT/XM
+        // instruments populate only one role (pitch XOR filter). Per-patch Ixmp 'P'/'f'
+        // blocks override the corresponding role. 0.5 = unity (no shift / unmodulated cutoff).
+        var hasPitchEnv = false
+        var envPitchIndex = 0
+        var envPitchTimeSec = 0.0
+        var envPitchValue = 0.5
+        var hasFilterEnv = false
+        var envFilterIndex = 0
+        var envFilterTimeSec = 0.0
+        var envFilterValue = 0.5
 
         // Volume fadeout — engaged after key-off, decays to 0 at rate inst.volumeFadeoutLow.
         var fadeoutVolume = 1.0
@@ -3823,6 +4195,37 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
         var activeVibratoWaveform  = 0     // bits 0-2 only
         val activeSampleLoopSustain: Boolean get() = (activeLoopMode and 0x04) != 0
 
+        // Active-envelope view — snapshot of the base instrument's envelopes or, when an
+        // Ixmp patch overrides them (v/p/f/P/x blocks), the patch's. The advance / key-lift
+        // / fadeout / filter code reads these instead of inst.* so per-patch SF2 ADSR works.
+        // Set by [resolveActiveEnvelopes] from every trigger (alongside the active sample).
+        var activeVolEnv: Array<TaudInstEnvPoint> = Array(25) { TaudInstEnvPoint(0x3F, ThreeFiveMiniUfloat(0)) }
+        var activeVolEnvLoop = 0
+        var activeVolEnvSustain = 0
+        var activePanEnv: Array<TaudInstEnvPoint> = Array(25) { TaudInstEnvPoint(0x80, ThreeFiveMiniUfloat(0)) }
+        var activePanEnvLoop = 0
+        var activePanEnvSustain = 0
+        var activePitchEnv: Array<TaudInstEnvPoint> = Array(25) { TaudInstEnvPoint(0x80, ThreeFiveMiniUfloat(0)) }
+        var activePitchEnvLoop = 0
+        var activePitchEnvSustain = 0
+        var activeFilterEnv: Array<TaudInstEnvPoint> = Array(25) { TaudInstEnvPoint(0x80, ThreeFiveMiniUfloat(0)) }
+        var activeFilterEnvLoop = 0
+        var activeFilterEnvSustain = 0
+        var activeFadeoutStep = 0          // combined 12-bit fadeout (base byte 172-173 or patch x)
+        var activeDefaultCutoff = 0xFF
+        var activeDefaultResonance = 0xFF
+        // Filter interpretation mode (base byte 173 bit 4 / patch 'x' flag bit 0):
+        //   false (IT) : activeDefaultCutoff/Resonance + currentCutoff/Resonance are IT bytes
+        //                0..254 (255 = off); refreshVoiceFilter uses the IT cutoff/dmpfac maths.
+        //   true  (SF) : they are 16-bit — cutoff = SoundFont absolute cents, resonance =
+        //                centibels above DC gain (0xFFFF = off); refreshVoiceFilter uses the
+        //                SF maths (freq = 8.176·2^(cents/1200), dmpfac = 10^(−Qcb/200)).
+        var filterSfMode = false
+        // SF2 initialAttenuation as a linear amplitude multiplier (1.0 = unity), resolved from
+        // the active patch's 'x' block or the base instrument. Applied in the mixer alongside —
+        // and independently of — velocity / channel volume / instGlobalVolume.
+        var activeAttenGain = 1.0
+
         // NES 2A03 DMC counter for INTERP_NES_DPCM (interpolation mode 5).
         // 7-bit unsigned (0..127), slews ±2 per output sample as the sigma-delta
         // bitstream is generated on the fly. Seeded to mid-rail (63) on every
@@ -3830,9 +4233,10 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
         // from 0 to reach a typical instrument's DC level.
         var nesDpcmCounter = 63
 
-        // Filter / cutoff state — drives the per-voice IT-compatible 2-pole resonant LPF.
-        // Convention: 255 = filter off (matches IT's high-bit-clear sentinel);
-        //             0..254 = active range mirroring IT 0..127 at double resolution.
+        // Filter / cutoff state — drives the per-voice 2-pole resonant LPF.
+        // IT mode: 255 = off, 0..254 = IT 0..127 at double resolution.
+        // SF mode (filterSfMode): cutoff = SoundFont absolute cents, resonance = centibels
+        //   above DC gain (0xFFFF = off). See [refreshVoiceFilter].
         var currentCutoff = 0xFF
         var currentResonance = 0xFF
         // IT 2-pole IIR-only state (updated per output sample) and cached coefficients
@@ -4192,6 +4596,7 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
                     it.nnaOverride = -1
                     it.volEnvOn = true; it.panEnvOn = true; it.pfEnvOn = true
                     it.noteFading = false
+                    it.layerMixGain = 1.0; it.isLayerChild = false; it.layerRelDetune = 0
                     // "What's playing" state — must be cleared alongside the volume reset
                     // above, otherwise a voice can carry a stale instrumentId from a prior
                     // session into a freshly-reset volume slot. Concretely: end of session
@@ -4212,8 +4617,9 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
                     it.envIndex = 0; it.envTimeSec = 0.0; it.envVolume = 1.0
                     it.envPanIndex = 0; it.envPanTimeSec = 0.0; it.envPan = 0.5
                     it.hasPanEnv = false
-                    it.envPfIndex = 0; it.envPfTimeSec = 0.0; it.envPfValue = 0.5
-                    it.hasPfEnv = false; it.envPfIsFilter = false
+                    it.envPitchIndex = 0; it.envPitchTimeSec = 0.0; it.envPitchValue = 0.5
+                    it.envFilterIndex = 0; it.envFilterTimeSec = 0.0; it.envFilterValue = 0.5
+                    it.hasPitchEnv = false; it.hasFilterEnv = false
                     it.fadeoutVolume = 1.0
                     it.rampOutSamples = 0; it.rampOutGain = 0.0; it.rampOutStep = 0.0
                     it.noteVal = 0x0000; it.basePitch = 0x4000
@@ -4310,9 +4716,11 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
     /**
      * One Ixmp "extra sample" patch — overlays sample-scope state on a base instrument
      * for a (noteVal, rowVolume) rectangle. See terranmon.txt "Ixmp. Instrument extra
-     * samples" for the on-wire layout. Envelopes, fadeout, NNA / DCT / DCA, filter,
-     * pitch-pan, IGV and other instrument-scope fields stay on the base TaudInst —
-     * only the fields below override.
+     * samples" for the on-wire layout. Sample-scope fields always override; the optional
+     * v / p / f / P / x blocks (since 2026-06-13) additionally override the base
+     * instrument's volume / pan / filter / pitch envelopes and fadeout+cutoff+resonance.
+     * Any block left absent (null env / hasExtra == false) defers to the base TaudInst,
+     * along with NNA / DCT / DCA, pitch-pan, IGV and other instrument-scope fields.
      *
      * Sentinels: defaultPan == 0xFF, defaultNoteVolume == 0, vibratoWaveform == 0xFF
      * all mean "inherit the base instrument's value". samplingRate == 0 would silence
@@ -4337,10 +4745,48 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
         val vibratoSweep: Int,
         val vibratoDepth: Int,
         val vibratoRate: Int,
-        val vibratoWaveform: Int          // 0..7; 0xFF = no override
+        val vibratoWaveform: Int,         // 0..7; 0xFF = no override
+        // Optional per-patch envelope/scalar overrides (null/false = defer to base inst).
+        // Each env carries its own LOOP and SUSTAIN words (same encoding as the base inst).
+        val volEnv: Array<TaudInstEnvPoint>? = null,   // 'v' block
+        val volEnvLoop: Int = 0,
+        val volEnvSustain: Int = 0,
+        val panEnv: Array<TaudInstEnvPoint>? = null,   // 'p' block
+        val panEnvLoop: Int = 0,
+        val panEnvSustain: Int = 0,
+        val filterEnv: Array<TaudInstEnvPoint>? = null,// 'f' block → drives cutoff
+        val filterEnvLoop: Int = 0,
+        val filterEnvSustain: Int = 0,
+        val pitchEnv: Array<TaudInstEnvPoint>? = null, // 'P' block → drives pitch
+        val pitchEnvLoop: Int = 0,
+        val pitchEnvSustain: Int = 0,
+        val hasExtra: Boolean = false,                 // 'x' block present
+        val fadeoutStep: Int = 0,                      // combined 12-bit fadeout
+        val filterSfMode: Boolean = false,             // 'x' flag bit 0: false = IT, true = SoundFont
+        val extraCutoff: Int = 0xFF,                   // default cutoff — IT byte (255=off) or 16-bit SF cents (0xFFFF=off)
+        val extraResonance: Int = 0xFF,                // default resonance — IT byte (255=off) or 16-bit SF centibels (0xFFFF=off)
+        val extraInitialAttenOctet: Int = 0            // 'x' block per-patch initialAttenuation (dB-table octet; 0 = unity sentinel)
     ) {
         val sampleLoopSustain: Boolean get() = (loopMode and 0x04) != 0
     }
+
+    /**
+     * One layer of a Metainstrument (terranmon.txt "Metainstrument definition").
+     * References a NORMAL instrument sounded simultaneously with the other layers,
+     * gated by its (pitch × volume) rectangle, pitch-shifted by [detune] (added to
+     * the trigger noteVal) and mixed at [mixOctet] (Perceptually-Significant-Octet
+     * dB; 159 = unity). The raw octet is kept; the engine converts it to a linear
+     * gain via [META_MIX_GAIN] at trigger time. */
+    data class MetaLayer(
+        val instIdx: Int,
+        val mixOctet: Int,
+        val detune: Int,            // signed 4096-TET
+        val pitchStart: Int,
+        val pitchEnd: Int,
+        val volStart: Int,
+        val volEnd: Int,
+    )
+
     /**
      * 256-byte instrument record (terranmon.txt:2001+).
      *
@@ -4433,7 +4879,13 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
         var panEnvSustainWord: Int,         // bytes 191-192
         var pfEnvSustainWord: Int,          // bytes 193-194
         var dupCheckFlag: Int,              // byte 195 (relocated from 189)
-        var defaultNoteVolume: Int          // byte 196 — per-trigger rowVolume default
+        var defaultNoteVolume: Int,         // byte 196 — per-trigger rowVolume default
+        // 2nd pitch/filter envelope (bytes 197-250) — the mandatory complement of the
+        // byte 19.. pf-env (one pitch, one filter). Lets SF2's single modulation
+        // envelope drive both targets at once; IT/XM leave this absent (LOOP-word P=0).
+        var pf2EnvLoop: Int,                // bytes 197-198 (LOOP word, m-bit complements byte 19)
+        var pf2EnvSustainWord: Int,         // bytes 199-200
+        var pf2Envelopes: Array<TaudInstEnvPoint>  // bytes 201-250
     ) {
         constructor(index: Int) : this(
             index, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFF,
@@ -4442,12 +4894,20 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
             Array(25) { TaudInstEnvPoint(0x80, ThreeFiveMiniUfloat(0)) },
             0, 0, 0, 0, 0, 0x80, 0x5000, 0, 0, 0xFF, 0,
             0, 0, 0, 0, 0, 0, 0, 0,
-            0
+            0,
+            0, 0,
+            Array(25) { TaudInstEnvPoint(0x80, ThreeFiveMiniUfloat(0)) }
         )
 
         /** Sample-flag byte 14 bit 2 — when set, the sample loop is a sustain loop:
          *  it loops while the note is held and is escaped on key-off. */
         val sampleLoopSustain: Boolean get() = (loopMode and 0x04) != 0
+        /** Key Lift — instrumentFlag bit 5 (terranmon byte 186, NNA pattern 0b100).
+         *  MIDI-exact key release: on key-off the volume-envelope playhead jumps
+         *  straight to the sustain-end node so the post-sustain (release) nodes
+         *  play immediately, instead of IT's walk through the remaining
+         *  hold/decay nodes first. See [applyKeyLift]. */
+        val nnaKeyLift: Boolean get() = (instrumentFlag ushr 5) and 1 != 0
         /** New note action — instrumentFlag bits 0-1.
          *  0=note off, 1=note cut, 2=continue, 3=note fade. */
         val newNoteAction: Int get() = instrumentFlag and 0x03
@@ -4461,9 +4921,22 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
         /** Duplicate Check Action — 0=note cut, 1=note off, 2=note fade. */
         val duplicateCheckAction: Int get() = (dupCheckFlag ushr 2) and 0x03
 
-        // Reserved padding at offsets 197..255 (59 bytes per instrument).
-        // Byte 196 is the new "default note volume" field — see triggerNote.
-        private val reserved = ByteArray(59)
+        /** Filter interpretation mode — byte 173 bit 4 (`0b 000m_ffff`). false = ImpulseTracker
+         *  (8-bit cutoff/resonance in bytes 182/183), true = SoundFont (16-bit: cutoff cents in
+         *  byte 182<<8|252, resonance centibels in byte 183<<8|253). See [refreshVoiceFilter]. */
+        val filterSfMode: Boolean get() = (fadeoutHigh ushr 4) and 1 != 0
+        /** Default cutoff resolved for the active filter mode: 8-bit IT byte, or the 16-bit
+         *  SF absolute-cents value (high byte 182, low byte 252). */
+        val defaultCutoff16: Int get() =
+            if (filterSfMode) ((defaultCutoff and 0xFF) shl 8) or (reserved[1].toInt() and 0xFF) else defaultCutoff
+        /** Default resonance resolved for the active filter mode: 8-bit IT byte, or the 16-bit
+         *  SF centibel value (high byte 183, low byte 253). */
+        val defaultResonance16: Int get() =
+            if (filterSfMode) ((defaultResonance and 0xFF) shl 8) or (reserved[2].toInt() and 0xFF) else defaultResonance
+
+        // Reserved padding at offsets 251..255 (5 bytes per instrument). Bytes
+        // 197..250 are now the 2nd pf-envelope (pf2EnvLoop/pf2EnvSustainWord/pf2Envelopes).
+        private val reserved = ByteArray(5)
 
         // Optional Ixmp "extra sample" patches — non-null when an Ixmp block was uploaded
         // for this instrument. Patches are scanned in order at trigger time; first hit on
@@ -4481,6 +4954,68 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
             return null
         }
 
+        // ── Metainstrument (terranmon.txt "Metainstrument definition") ──
+        // Non-null when this slot's u32 sample pointer has its high 16 bits == 0xFFFF.
+        // The instrument then carries NO sample of its own; a trigger fans out into one
+        // voice per matching layer. metaRaw retains the verbatim 256-byte record so
+        // [getByte]/capture round-trips losslessly (parsing octet→gain is one-way).
+        var metaLayers: Array<MetaLayer>? = null
+        var metaRaw: IntArray? = null
+        val isMeta: Boolean get() = metaLayers != null
+
+        // initialAttenuation — a static per-instrument gain as a "Perceptually Significant
+        // Octet to Decibel Table" octet (byte 251; 159 = unity, 111 = −6 dB; same table as the
+        // Metainstrument layer mix). 0 = unity (unset sentinel) so legacy files (byte 251 was
+        // reserved/zero) are unaffected. Applied as a velocity-INDEPENDENT amplitude multiplier
+        // in the mixer, NOT folded into the volume envelope (so the envelope keeps full 0..63
+        // resolution). The per-patch 'x' block carries its own override. See [attenGainOf].
+        var initialAttenOctet: Int = 0
+
+        /** All layers whose (pitch × volume) rectangle contains the trigger, in record
+         *  order. Empty when none match (the trigger then sounds nothing). */
+        fun resolveMetaLayers(noteVal: Int, rowVolume: Int): List<MetaLayer> {
+            val layers = metaLayers ?: return emptyList()
+            return layers.filter {
+                noteVal in it.pitchStart..it.pitchEnd && rowVolume in it.volStart..it.volEnd
+            }
+        }
+
+        /** Load a full 256-byte instrument record. Detects the Metainstrument sentinel
+         *  (u32 sample-pointer high 16 bits == 0xFFFF) and parses its layer table;
+         *  otherwise falls back to the per-byte [setByte] field assignment. */
+        fun loadRecord(b: IntArray) {
+            val sp = (b[0] and 0xFF) or ((b[1] and 0xFF) shl 8) or
+                     ((b[2] and 0xFF) shl 16) or ((b[3] and 0xFF) shl 24)
+            if ((sp ushr 16) and 0xFFFF == 0xFFFF) {
+                val count = (sp ushr 8) and 0xFF                     // byte 1 = layer count
+                val layers = ArrayList<MetaLayer>(count)
+                var o = 4
+                repeat(count) {
+                    if (o + 10 > b.size) return@repeat
+                    val instIdx = b[o] and 0xFF
+                    val mixOctet = b[o + 1] and 0xFF
+                    val detRaw = (b[o + 2] and 0xFF) or ((b[o + 3] and 0xFF) shl 8)
+                    val detune = if (detRaw >= 0x8000) detRaw - 0x10000 else detRaw
+                    val pStart = (b[o + 4] and 0xFF) or ((b[o + 5] and 0xFF) shl 8)
+                    val pEnd   = (b[o + 6] and 0xFF) or ((b[o + 7] and 0xFF) shl 8)
+                    val vStart = b[o + 8] and 0xFF
+                    val vEnd   = b[o + 9] and 0xFF
+                    // Skip self-/zero-/out-of-range references; no recursion into metas
+                    // is validated here (the trigger path also guards).
+                    if (instIdx in 1..255 && instIdx != index)
+                        layers.add(MetaLayer(instIdx, mixOctet, detune, pStart, pEnd, vStart, vEnd))
+                    o += 10
+                }
+                metaLayers = if (layers.isEmpty()) null else layers.toTypedArray()
+                metaRaw = if (metaLayers != null) b.copyOf(256) else null
+                extraPatches = null
+            } else {
+                metaLayers = null
+                metaRaw = null
+                for (i in 0 until minOf(256, b.size)) setByte(i, b[i] and 0xFF)
+            }
+        }
+
         // Funk repeat (S$Fx00) bit-mask — non-destructive XOR overlay across the loop region.
         // Lazily allocated; a 1-bit flips the byte, a 0-bit leaves it intact.
         // Mask is sized for the loop length at allocation time; if the loop bounds change
@@ -4523,6 +5058,13 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
         }
 
         fun getByte(offset: Int): Byte = when (offset) {
+            // Metainstrument records play back verbatim from the stored raw bytes so
+            // capture (captureSampleInstBlob) round-trips them losslessly.
+            in 0..255 -> metaRaw?.let { return (it[offset] and 0xFF).toByte() } ?: getByteNormal(offset)
+            else -> throw InternalError("Bad offset $offset")
+        }
+
+        private fun getByteNormal(offset: Int): Byte = when (offset) {
             0 -> samplePtr.toByte()
             1 -> samplePtr.ushr(8).toByte()
             2 -> samplePtr.ushr(16).toByte()
@@ -4581,7 +5123,13 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
             194 -> pfEnvSustainWord.ushr(8).toByte()
             195 -> dupCheckFlag.toByte()
             196 -> defaultNoteVolume.toByte()
-            in 197..255 -> reserved[offset - 197]
+            197 -> pf2EnvLoop.toByte()
+            198 -> pf2EnvLoop.ushr(8).toByte()
+            199 -> pf2EnvSustainWord.toByte()
+            200 -> pf2EnvSustainWord.ushr(8).toByte()
+            in 201..250 -> envPointGet(pf2Envelopes, 201, offset)
+            251 -> initialAttenOctet.toByte()
+            in 252..255 -> reserved[offset - 251]
             else -> throw InternalError("Bad offset $offset")
         }
 
@@ -4620,7 +5168,7 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
 
             171 -> { instGlobalVolume = byte and 0xFF }
             172 -> { volumeFadeoutLow = byte and 0xFF }
-            173 -> { fadeoutHigh = byte and 0x0F }
+            173 -> { fadeoutHigh = byte and 0x1F }   // bits 0-3 = fadeout high, bit 4 = SF filter mode
             174 -> { volumeSwing = byte and 0xFF }
             175 -> { vibratoSpeed = byte and 0xFF }
             176 -> { vibratoSweep = byte and 0xFF }
@@ -4644,7 +5192,13 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
             194 -> { pfEnvSustainWord = (pfEnvSustainWord and 0x00ff) or (byte shl 8) }
             195 -> { dupCheckFlag = byte and 0x0F }
             196 -> { defaultNoteVolume = byte and 0xFF }
-            in 197..255 -> { reserved[offset - 197] = byte.toByte() }
+            197 -> { pf2EnvLoop = (pf2EnvLoop and 0xff00) or byte }
+            198 -> { pf2EnvLoop = (pf2EnvLoop and 0x00ff) or (byte shl 8) }
+            199 -> { pf2EnvSustainWord = (pf2EnvSustainWord and 0xff00) or byte }
+            200 -> { pf2EnvSustainWord = (pf2EnvSustainWord and 0x00ff) or (byte shl 8) }
+            in 201..250 -> envPointSet(pf2Envelopes, 201, offset, byte)
+            251 -> { initialAttenOctet = byte and 0xFF }
+            in 252..255 -> { reserved[offset - 251] = byte.toByte() }
             else -> throw InternalError("Bad offset $offset")
         }
     }