#!/usr/bin/env python3 """midi2taud.py — Convert Standard MIDI (.mid) + SoundFont 2 (.sf2) to TSVM Taud (.taud) Usage: python3 midi2taud.py song.mid soundfont.sf2 [output.taud] [--perc-force-mapping BANK INST] [--rpb N] [--speed N] [--fadeout N] [--bend-epsilon CENTS] [--drum-keyoff] [-v] [--no-project-data] Behaviour (per midi2taud.md): * Pitch bends are preserved as much as possible. A note starting under a non-zero bend triggers directly at the bent 4096-TET pitch (Taud notes are 4096-TET, so the trigger encodes the exact shifted pitch). Bend movement during a note is approximated as linear segments: each segment is one row carrying the exact 4096-TET target note plus tone portamento (G $xxxx, units/tick) sized to land on the target by row end. Jittery curves are simplified via --bend-epsilon (cents). RPN 0,0 pitch-bend range messages are honoured; bend values are computed as floats from the full 14-bit word (MIDIs that only drive the MSB work transparently). * Both MIDI key-off idioms — real note-off messages and note-on with velocity 0 — are translated into Taud KEY_OFF. Percussion-channel key-offs are dropped by default (GM percussion ignores note-off, and emitting them would chop one-shot drum tails); --drum-keyoff re-enables. * The SF2 key/velocity sample-layering model is recreated faithfully. Each preset's zones are partitioned into the fewest mutually-DISJOINT layers (--max-layers cap, default 4); each layer becomes one normal Taud instrument with its zones as Ixmp patches (velocity axis round(vel × 63/127)). A preset needing >1 layer is emitted as a Metainstrument (terranmon.txt "Metainstrument definition"): the note references the meta slot and the engine fans out one voice per matching layer, so SF2's simultaneous layering (and detune-stacks) now sound — overlapping zones are no longer dropped. Single-layer presets stay plain instruments. Stereo SF2 samples are mixed to mono. Unused instruments, patches, and samples are trimmed. * The SF2 volume-envelope ADSR is preserved on the (instrument-scope) Taud volume envelope: delay/attack/hold/decay nodes and a sustain region held while the key is on. There is NO release leg — the SF2 *release segment* is the Volume Fadeout (with NNA Note Fade): on key-off the voice holds at the sustain node and fades to silence over the SF2 releaseVolEnv time (measured against the 100 dB envelope floor: releaseVolEnv·(1000−sus_cb)/ 1000 seconds). Per-layer Ixmp patches carry their own fadeout when their release differs. The canonical zone's ADSR represents the instrument. * Polyphony rides the engine's New Note Action (matching MIDI semantics): every instrument (drum kits included) gets NNA = Note Fade, so a voice column is reusable the moment its note releases — the release/fade tail moves to a background ghost on the next trigger and dies over its own release time. Voice budget defaults to 16 columns (--max-voices); overflow releases the oldest pedal-held or soonest-ending note early, not cut. * Sub-row timing is carried by S $Dx note delays (one row = `--speed` ticks, default 6; one beat = `--rpb` rows, default 4 → 1/24-beat grid). MIDI tempo changes map to T $xx00 set-tempo effects; channel volume / expression (CC7 × CC11) map to M $xx00 channel-volume effects so they never disturb the velocity-driven patch selection axis. """ import argparse import array import bisect import math import os import struct import sys from taud_common import ( set_verbose, vprint, TAUD_MAGIC, TAUD_VERSION, TAUD_HEADER_SIZE, TAUD_SONG_ENTRY, SAMPLEBIN_SIZE, INSTBIN_SIZE, SAMPLEINST_SIZE, SAMPLE_LEN_LIMIT, PATTERN_ROWS, PATTERN_BYTES, NUM_PATTERNS_MAX, NUM_CUES, CUE_SIZE, NUM_VOICES, NOTE_NOP, NOTE_KEYOFF, TAUD_C4, TOP_G, TOP_M, TOP_S, TOP_T, SEL_SET, SEL_FINE, CUE_INST_NOP, CUE_INST_HALT, resample_linear, encode_cue, deduplicate_patterns, encode_song_entry, compress_blob, build_project_data, cue_instruction_len, nearest_minifloat, IXMP_PAN_NO_OVERRIDE, atten_cb_to_octet, ) SIGNATURE = b'midi2taud/TSVM' # 14 bytes UNITS_PER_SEMI = 4096.0 / 12.0 # 4096-TET units per 12-TET semitone # Effect priorities for the shared per-cell effect slot. Higher wins when a # later pass needs the slot: SD note delays carry trigger timing and are # never overwritten; T tempo is global and may evict G/M; M only takes free # slots. PRIO_FREE = 0 PRIO_M = 1 PRIO_PORTA = 2 PRIO_DELAY = 3 PRIO_TEMPO = 4 def key_to_noteval(key: float) -> int: """MIDI key (float, 60 = middle C) → Taud 4096-TET noteVal (C4 = 0x5000).""" return max(0x20, min(0xFFFF, round(TAUD_C4 + (key - 60.0) * UNITS_PER_SEMI))) # ── MIDI parser ─────────────────────────────────────────────────────────────── def _read_varlen(data: bytes, pos: int): val = 0 while True: b = data[pos]; pos += 1 val = (val << 7) | (b & 0x7F) if not (b & 0x80): return val, pos def _parse_track(data: bytes, pos: int, end: int) -> list: """Parse one MTrk body → list of (abs_tick, event_tuple).""" evs = [] tick = 0 status = 0 while pos < end: delta, pos = _read_varlen(data, pos) tick += delta if pos >= end: break b = data[pos] if b & 0x80: status = b pos += 1 elif status < 0x80: vprint(f" warning: corrupt track data at {pos:#x}, truncating track") break if status == 0xFF: # meta mtype = data[pos]; pos += 1 ln, pos = _read_varlen(data, pos) payload = data[pos:pos+ln]; pos += ln if mtype == 0x51 and ln >= 3: uspq = int.from_bytes(payload[:3], 'big') if uspq > 0: evs.append((tick, ('tempo', 60000000.0 / uspq))) elif mtype == 0x03: txt = payload.decode('latin-1', errors='replace').strip() if txt: evs.append((tick, ('title', txt))) elif mtype == 0x2F: evs.append((tick, ('eot',))) break status = 0 # meta cancels running status elif status in (0xF0, 0xF7): # sysex ln, pos = _read_varlen(data, pos) pos += ln status = 0 else: hi = status & 0xF0 ch = status & 0x0F if hi in (0xC0, 0xD0): d1 = data[pos]; pos += 1 if hi == 0xC0: evs.append((tick, ('prog', ch, d1))) else: d1 = data[pos]; d2 = data[pos+1]; pos += 2 if hi == 0x90: if d2 > 0: evs.append((tick, ('on', ch, d1, d2))) else: evs.append((tick, ('off', ch, d1))) # vel-0 idiom elif hi == 0x80: evs.append((tick, ('off', ch, d1))) elif hi == 0xB0: evs.append((tick, ('cc', ch, d1, d2))) elif hi == 0xE0: evs.append((tick, ('bend', ch, (d2 << 7) | d1))) # 0xA0 polyphonic aftertouch: ignored return evs def parse_midi(path: str): """Returns (division, merged_events). division: ('ppq', tpq) or ('smpte', fps, tpf). merged_events: [(tick, seq, event_tuple)] sorted.""" with open(path, 'rb') as f: data = f.read() if data[:4] == b'RIFF': # RMID wrapper pos = 12 while pos + 8 <= len(data): cid = data[pos:pos+4] sz = struct.unpack_from('I', data, 4)[0] fmt, ntrk, div = struct.unpack_from('>HHH', data, 8) if fmt == 2: vprint(" warning: SMF format 2 — tracks merged on a shared timeline") if div & 0x8000: fps = -struct.unpack_from('b', data, 12)[0] tpf = div & 0xFF division = ('smpte', fps, tpf) else: division = ('ppq', max(1, div)) pos = 8 + hlen merged = [] seq = 0 tracks_found = 0 while pos + 8 <= len(data) and tracks_found < ntrk: cid = data[pos:pos+4] sz = struct.unpack_from('>I', data, pos+4)[0] body_start = pos + 8 pos = body_start + sz if cid != b'MTrk': continue tracks_found += 1 for tick, ev in _parse_track(data, body_start, min(pos, len(data))): merged.append((tick, seq, ev)) seq += 1 merged.sort(key=lambda e: (e[0], e[1])) return division, merged # ── Note / controller extraction ────────────────────────────────────────────── class Note: __slots__ = ('ch', 'key', 'vel', 'start_ft', 'end_ft', 'inst_key', 'bend0', 'slot', 'voice', 'drum', 'pedal_ft') def __init__(self, ch, key, vel, start_ft, inst_key, bend0): self.ch = ch self.key = key self.vel = vel self.start_ft = start_ft self.end_ft = None self.inst_key = inst_key self.bend0 = bend0 self.slot = 0 self.voice = -1 self.drum = (inst_key[0] == 'd') self.pedal_ft = None # physical key-up time when only the pedal holds it class _ChState: __slots__ = ('bank', 'prog', 'rpn_msb', 'rpn_lsb', 'range_semi', 'range_cents', 'cur_bend', 'bend_ft', 'bend_val', 'cc7_ft', 'cc7_val', 'cc11_ft', 'cc11_val', 'cc10_ft', 'cc10_val', 'sus', 'pending', 'active') def __init__(self): self.bank = 0 self.prog = 0 self.rpn_msb = 0x7F self.rpn_lsb = 0x7F self.range_semi = 2 self.range_cents = 0 self.cur_bend = 0.0 self.bend_ft = [0]; self.bend_val = [0.0] self.cc7_ft = [0]; self.cc7_val = [100] # GM default self.cc11_ft = [0]; self.cc11_val = [127] self.cc10_ft = []; self.cc10_val = [] # empty = never set self.sus = False self.pending = [] # notes held by the sustain pedal self.active = {} # key → Note def _curve_at(fts: list, vals: list, ft: int, default): i = bisect.bisect_right(fts, ft) - 1 return vals[i] if i >= 0 else default def _curve_push(fts: list, vals: list, ft: int, val): if fts and fts[-1] == ft: vals[-1] = val else: fts.append(ft); vals.append(val) class Song: __slots__ = ('notes', 'channels', 'tempo_ft', 'tempo_bpm', 'title', 'end_ft') def extract_song(division, merged, rpb: int, speed: int) -> Song: """Walk merged MIDI events, producing note instances (with both key-off idioms resolved to a definite end time), per-channel bend/CC curves, and the tempo map — all on the Taud fine-tick (ft) grid where one row = `speed` fts and one beat = `rpb` rows.""" if division[0] == 'ppq': tpq = division[1] def to_ft(tick): return round(tick * rpb * speed / tpq) else: _, fps, tpf = division tps = max(1.0, float(fps * tpf)) # ticks per second # SMPTE timing has no musical beats: pin a 120 BPM equivalent grid. def to_ft(tick): return round((tick / tps) * 2.0 * rpb * speed) vprint(" info: SMPTE division — pinned to a 120 BPM-equivalent grid") chs = [_ChState() for _ in range(16)] notes = [] tempo_ft, tempo_bpm = [], [] title = None max_ft = 0 def end_note(n: Note, ft: int): if n.end_ft is None: n.end_ft = max(ft, n.start_ft) for tick, _seq, ev in merged: ft = to_ft(tick) if ft > max_ft: max_ft = ft kind = ev[0] if kind == 'on': _, ch, key, vel = ev st = chs[ch] prev = st.active.pop(key, None) if prev is not None: # re-strike: close the old one end_note(prev, ft) ik = ('d', st.prog) if ch == 9 else ('m', st.bank, st.prog) n = Note(ch, key, vel, ft, ik, st.cur_bend) st.active[key] = n notes.append(n) elif kind == 'off': _, ch, key = ev st = chs[ch] n = st.active.pop(key, None) if n is not None: if st.sus: n.pedal_ft = ft st.pending.append(n) else: end_note(n, ft) elif kind == 'bend': _, ch, val14 = ev st = chs[ch] # MUST be float maths: 14-bit word (or MSB-only 7-bit source, # which simply leaves the low 7 bits zero) → ±range semitones. norm = (float(val14) - 8192.0) / 8192.0 semis = norm * (st.range_semi + st.range_cents / 100.0) st.cur_bend = semis _curve_push(st.bend_ft, st.bend_val, ft, semis) elif kind == 'cc': _, ch, num, val = ev st = chs[ch] if num == 0: st.bank = val elif num == 7: _curve_push(st.cc7_ft, st.cc7_val, ft, val) elif num == 10: _curve_push(st.cc10_ft, st.cc10_val, ft, val) elif num == 11: _curve_push(st.cc11_ft, st.cc11_val, ft, val) elif num == 64: if val >= 64: st.sus = True else: st.sus = False for n in st.pending: end_note(n, ft) st.pending.clear() elif num == 100: st.rpn_lsb = val elif num == 101: st.rpn_msb = val elif num in (98, 99): # NRPN deselects RPN st.rpn_msb = st.rpn_lsb = 0x7F elif num == 6: if st.rpn_msb == 0 and st.rpn_lsb == 0: st.range_semi = val elif num == 38: if st.rpn_msb == 0 and st.rpn_lsb == 0: st.range_cents = val elif num in (120, 123): # all sound / notes off for n in list(st.active.values()): end_note(n, ft) st.active.clear() for n in st.pending: end_note(n, ft) st.pending.clear() elif num == 121: # reset all controllers st.cur_bend = 0.0 _curve_push(st.bend_ft, st.bend_val, ft, 0.0) _curve_push(st.cc11_ft, st.cc11_val, ft, 127) st.sus = False for n in st.pending: end_note(n, ft) st.pending.clear() st.rpn_msb = st.rpn_lsb = 0x7F elif kind == 'prog': _, ch, val = ev chs[ch].prog = val elif kind == 'tempo': tempo_ft.append(ft); tempo_bpm.append(ev[1]) elif kind == 'title': if title is None: title = ev[1] # Close anything still ringing at end-of-file. for st in chs: for n in list(st.active.values()): end_note(n, max_ft) st.active.clear() for n in st.pending: end_note(n, max_ft) st.pending.clear() dropped = [n for n in notes if n.end_ft <= n.start_ft] if dropped: vprint(f" info: dropped {len(dropped)} zero-length note(s)") notes = [n for n in notes if n.end_ft > n.start_ft] notes.sort(key=lambda n: (n.start_ft, n.ch, n.key)) song = Song() song.notes = notes song.channels = chs song.tempo_ft = tempo_ft song.tempo_bpm = tempo_bpm song.title = title song.end_ft = max_ft return song # ── SF2 parser ──────────────────────────────────────────────────────────────── GEN_START_OFF = 0 GEN_END_OFF = 1 GEN_STARTLOOP_OFF = 2 GEN_ENDLOOP_OFF = 3 GEN_START_COARSE = 4 GEN_MODENV2PITCH = 7 # modEnvToPitch (signed cents at full mod-env) GEN_FILTERFC = 8 # initialFilterFc (absolute cents; default 13500 = open) GEN_FILTERQ = 9 # initialFilterQ (cB of resonance; default 0) GEN_MODENV2FILT = 11 # modEnvToFilterFc (signed cents at full mod-env) GEN_END_COARSE = 12 GEN_PAN = 17 GEN_DELAY_MODENV = 25 GEN_ATTACK_MODENV = 26 GEN_HOLD_MODENV = 27 GEN_DECAY_MODENV = 28 GEN_SUSTAIN_MODENV = 29 # 0.1% units of full-scale DECREASE (0..1000) GEN_RELEASE_MODENV = 30 GEN_DELAY_VOLENV = 33 GEN_ATTACK_VOLENV = 34 GEN_HOLD_VOLENV = 35 GEN_DECAY_VOLENV = 36 GEN_SUSTAIN_VOLENV = 37 # centibels of attenuation, 0..1440 GEN_RELEASE_VOLENV = 38 GEN_INSTRUMENT = 41 GEN_KEYRANGE = 43 GEN_VELRANGE = 44 GEN_STARTLOOP_COARSE = 45 GEN_INITATTEN = 48 # initialAttenuation (cB; per-zone static gain) GEN_ENDLOOP_COARSE = 50 GEN_COARSETUNE = 51 GEN_FINETUNE = 52 GEN_SAMPLEID = 53 GEN_SAMPLEMODES = 54 GEN_SCALETUNING = 56 GEN_ROOTKEY = 58 _SIGNED_GENS = frozenset({GEN_START_OFF, GEN_END_OFF, GEN_STARTLOOP_OFF, GEN_ENDLOOP_OFF, GEN_START_COARSE, GEN_END_COARSE, GEN_STARTLOOP_COARSE, GEN_ENDLOOP_COARSE, GEN_PAN, GEN_COARSETUNE, GEN_FINETUNE, GEN_DELAY_VOLENV, GEN_ATTACK_VOLENV, GEN_HOLD_VOLENV, GEN_DECAY_VOLENV, GEN_RELEASE_VOLENV, GEN_MODENV2PITCH, GEN_MODENV2FILT, GEN_DELAY_MODENV, GEN_ATTACK_MODENV, GEN_HOLD_MODENV, GEN_DECAY_MODENV, GEN_RELEASE_MODENV, # cB/cents value-generators that are ADDITIVE (and so may be # NEGATIVE) at the preset level. Their instrument-level absolutes # all sit well under 0x8000 (atten≤1440, filterFc≤13500, Q≤960, # sustain≤1440/1000), so reading them signed is lossless there and # correct for relative preset deltas. Without this a preset zone # carrying e.g. initialAttenuation 0xFFFE (a −2 cB boost) was read # as 65534 cB → ~−6575 dB → the whole instrument went silent # (SGM 'Synth Strings 1' vol-env nodes stuck at 0). GEN_INITATTEN, GEN_FILTERFC, GEN_FILTERQ, GEN_SUSTAIN_VOLENV, GEN_SUSTAIN_MODENV}) def _timecents_to_sec(tc: int) -> float: """SF2 timecents → seconds (2^(tc/1200)); default -12000 ≈ 1 ms.""" return 2.0 ** (max(-12000, min(8000, tc)) / 1200.0) class SFSampleHdr: __slots__ = ('name', 'start', 'end', 'loopstart', 'loopend', 'rate', 'origkey', 'correction', 'link', 'stype') class SFZone: """One effective preset×instrument zone (post combination).""" __slots__ = ('keylo', 'keyhi', 'vello', 'velhi', 'sample', 'rootkey', 'tune_cents', 'modes', 'pan', 'scale', 'a_start', 'a_end', 'loop_abs_start', 'loop_abs_end', 'pair', 'rate', 'name', 'env_delay', 'env_attack', 'env_hold', 'env_decay', 'env_sustain_cb', 'env_release', # initialAttenuation (cB static per-zone gain) + static filter. 'atten_cb', 'filter_fc', 'filter_q', # modulation envelope (drives pitch and/or filter) + its targets. 'm_delay', 'm_attack', 'm_hold', 'm_decay', 'm_sustain_pc', 'm_release', 'me2pitch', 'me2filt') class SF2: __slots__ = ('presets', 'shdrs', 'file', 'smpl_off', 'smpl_size') def read_frames(self, start_frame: int, n_frames: int) -> array.array: """Read n_frames of 16-bit PCM starting at absolute frame index.""" n_avail = max(0, min(n_frames, self.smpl_size // 2 - start_frame)) a = array.array('h') if n_avail <= 0: return a self.file.seek(self.smpl_off + start_frame * 2) a.frombytes(self.file.read(n_avail * 2)) if sys.byteorder == 'big': a.byteswap() return a def _gen_amount(oper: int, raw: int) -> int: if oper in _SIGNED_GENS: return raw - 0x10000 if raw >= 0x8000 else raw return raw def _parse_bags(bag_data, gen_data, start_bag, end_bag, terminal_gen): """Resolve bags [start_bag, end_bag) into (global_gens, [zone_gens...]). Each zone_gens is {oper: amount}; zones lacking the terminal generator other than a leading global zone are discarded per the SF2 spec.""" glob = {} zones = [] n_bags = len(bag_data) // 4 for bi in range(start_bag, end_bag): g0 = struct.unpack_from(' SF2: f = open(path, 'rb') hdr = f.read(12) if hdr[:4] != b'RIFF' or hdr[8:12] != b'sfbk': sys.exit("error: not an SF2 file (bad RIFF/sfbk magic)") riff_end = 8 + struct.unpack_from('> 8) & 0xFF pvlo, pvhi = pv & 0xFF, (pv >> 8) & 0xFF for iz_raw in izones: iz = dict(iglob); iz.update(iz_raw) si = iz[GEN_SAMPLEID] if not (0 <= si < len(sf.shdrs)): continue s = sf.shdrs[si] if s.stype & 0x8000: # ROM sample continue ik = iz.get(GEN_KEYRANGE, 0x7F00) iv = iz.get(GEN_VELRANGE, 0x7F00) klo = max(ik & 0xFF, pklo); khi = min((ik >> 8) & 0xFF, pkhi) vlo = max(iv & 0xFF, pvlo); vhi = min((iv >> 8) & 0xFF, pvhi) if klo > khi or vlo > vhi: continue z = SFZone() z.keylo, z.keyhi = klo, khi z.vello, z.velhi = vlo, vhi z.sample = si rk = iz.get(GEN_ROOTKEY, -1) z.rootkey = rk if 0 <= rk <= 127 else \ (s.origkey if s.origkey <= 127 else 60) z.tune_cents = ((iz.get(GEN_COARSETUNE, 0) + pz.get(GEN_COARSETUNE, 0)) * 100 + iz.get(GEN_FINETUNE, 0) + pz.get(GEN_FINETUNE, 0) + s.correction) z.modes = iz.get(GEN_SAMPLEMODES, 0) & 3 z.pan = max(-500, min(500, iz.get(GEN_PAN, 0) + pz.get(GEN_PAN, 0))) z.scale = iz.get(GEN_SCALETUNING, 100) if z.scale != 100 and klo != khi and not scale_warned: vprint(" warning: scaleTuning != 100 on a multi-key zone " "— pitch is exact only at the zone's centre key") scale_warned = True # Volume-envelope ADSR (timecents at inst level, preset adds). z.env_delay = _timecents_to_sec(iz.get(GEN_DELAY_VOLENV, -12000) + pz.get(GEN_DELAY_VOLENV, 0)) z.env_attack = _timecents_to_sec(iz.get(GEN_ATTACK_VOLENV, -12000) + pz.get(GEN_ATTACK_VOLENV, 0)) z.env_hold = _timecents_to_sec(iz.get(GEN_HOLD_VOLENV, -12000) + pz.get(GEN_HOLD_VOLENV, 0)) z.env_decay = _timecents_to_sec(iz.get(GEN_DECAY_VOLENV, -12000) + pz.get(GEN_DECAY_VOLENV, 0)) z.env_sustain_cb = max(0, min(1440, iz.get(GEN_SUSTAIN_VOLENV, 0) + pz.get(GEN_SUSTAIN_VOLENV, 0))) z.env_release = _timecents_to_sec(iz.get(GEN_RELEASE_VOLENV, -12000) + pz.get(GEN_RELEASE_VOLENV, 0)) # initialAttenuation: per-zone static gain in cB (preset adds to inst). # Clamped to the SF2 spec range [0, 1440] so any out-of-range value can # never collapse the folded vol-env to silence (see _SIGNED_GENS note). z.atten_cb = max(0, min(1440, iz.get(GEN_INITATTEN, 0) + pz.get(GEN_INITATTEN, 0))) # Static low-pass filter. initialFilterFc is absolute cents (default # 13500 ≈ open); initialFilterQ is cB of resonance (default 0). z.filter_fc = iz.get(GEN_FILTERFC, 13500) + pz.get(GEN_FILTERFC, 0) z.filter_q = max(0, iz.get(GEN_FILTERQ, 0) + pz.get(GEN_FILTERQ, 0)) # Modulation envelope (drives pitch via modEnvToPitch and/or filter via # modEnvToFilterFc). Times are timecents; sustain is 0.1%-of-full DECREASE. z.m_delay = _timecents_to_sec(iz.get(GEN_DELAY_MODENV, -12000) + pz.get(GEN_DELAY_MODENV, 0)) z.m_attack = _timecents_to_sec(iz.get(GEN_ATTACK_MODENV, -12000) + pz.get(GEN_ATTACK_MODENV, 0)) z.m_hold = _timecents_to_sec(iz.get(GEN_HOLD_MODENV, -12000) + pz.get(GEN_HOLD_MODENV, 0)) z.m_decay = _timecents_to_sec(iz.get(GEN_DECAY_MODENV, -12000) + pz.get(GEN_DECAY_MODENV, 0)) z.m_sustain_pc = max(0, min(1000, iz.get(GEN_SUSTAIN_MODENV, 0) + pz.get(GEN_SUSTAIN_MODENV, 0))) z.m_release = _timecents_to_sec(iz.get(GEN_RELEASE_MODENV, -12000) + pz.get(GEN_RELEASE_MODENV, 0)) z.me2pitch = iz.get(GEN_MODENV2PITCH, 0) + pz.get(GEN_MODENV2PITCH, 0) z.me2filt = iz.get(GEN_MODENV2FILT, 0) + pz.get(GEN_MODENV2FILT, 0) z.a_start = (s.start + iz.get(GEN_START_OFF, 0) + 32768 * iz.get(GEN_START_COARSE, 0)) z.a_end = (s.end + iz.get(GEN_END_OFF, 0) + 32768 * iz.get(GEN_END_COARSE, 0)) z.a_start = max(0, z.a_start) z.a_end = max(z.a_start, min(z.a_end, sf.smpl_size // 2)) z.loop_abs_start = (s.loopstart + iz.get(GEN_STARTLOOP_OFF, 0) + 32768 * iz.get(GEN_STARTLOOP_COARSE, 0)) z.loop_abs_end = (s.loopend + iz.get(GEN_ENDLOOP_OFF, 0) + 32768 * iz.get(GEN_ENDLOOP_COARSE, 0)) z.pair = None z.rate = s.rate z.name = s.name zones.append(z) if zones: sf.presets[(bank, preset)] = (pname, zones) return sf # ── Preset resolution / Taud instrument building ────────────────────────────── def resolve_preset(sf: SF2, inst_key, perc_force): """inst_key: ('m', bank, prog) or ('d', prog). Returns (name, zones) or None.""" if inst_key[0] == 'd': prog = inst_key[1] cands = [] if perc_force is not None: cands.append(tuple(perc_force)) cands += [(128, prog), (128, 0)] else: _, bank, prog = inst_key cands = [(bank, prog), (0, prog)] for c in cands: if c in sf.presets: return sf.presets[c] # Last resort: same program number in any bank, then nothing. prog = inst_key[1] if inst_key[0] == 'd' else inst_key[2] for (b, p) in sorted(sf.presets): if p == prog: return sf.presets[(b, p)] return None def merge_stereo_zones(zones: list, shdrs: list) -> list: """Collapse L/R zone pairs into single mono zones. Two flavours are merged: (1) LINKED stereo — samples are each other's sampleLink with L/R types; (2) PAN stereo — two MONO-typed zones with the same key/vel rect and opposite hard pan (±500). SGM/Timbres store most "stereo" samples this way (e.g. 'VA LGFF C3-L' / '…-R'), NOT as linked L/R. The merged zone mixes both channels to mono and drops the pan override. Merging is essential: an unmerged R zone fully overlaps its L zone, so the disjointify spills it into a SECOND layer that then plays CENTRED alongside the L zone — a spurious +6 dB doubling. Lone L/R zones keep their channel.""" out = [] used = set() for i, z in enumerate(zones): if i in used: continue s = shdrs[z.sample] partner = None if s.stype in (2, 4) and 0 <= s.link < len(shdrs): for j in range(i + 1, len(zones)): if j in used: continue z2 = zones[j] if (z2.sample == s.link and (z2.keylo, z2.keyhi, z2.vello, z2.velhi) == (z.keylo, z.keyhi, z.vello, z.velhi) and z2.modes == z.modes and z2.rootkey == z.rootkey): partner = j break if partner is None and z.pan is not None and abs(z.pan) >= 400: for j in range(i + 1, len(zones)): if j in used: continue z2 = zones[j] if (z2.sample != z.sample and z2.pan is not None and abs(z2.pan) >= 400 and (z.pan < 0) != (z2.pan < 0) # opposite sides and (z2.keylo, z2.keyhi, z2.vello, z2.velhi) == (z.keylo, z.keyhi, z.vello, z.velhi) and z2.modes == z.modes and z2.rootkey == z.rootkey): partner = j break if partner is not None: used.add(partner) z2 = zones[partner] z.pair = (z.sample, z2.sample, z2.a_start) z.pan = None # mixed to mono → centred z.a_end = z.a_start + min(z.a_end - z.a_start, z2.a_end - z2.a_start) out.append(z) return out def _rect_of_zone(z: SFZone): """Zone key/vel ranges → Taud (pitch_lo, pitch_hi, vol_lo, vol_hi). Pitch bounds sit on half-semitone boundaries so triggers carrying an initial pitch bend (< 50 cents) still land inside the right rectangle; adjacent zones stay disjoint. Velocity per Ixmp note 5: round(v·63/127).""" if z.keylo <= 0: plo = 0x0000 else: plo = max(0, min(0xFFFF, round(TAUD_C4 + (z.keylo - 0.5 - 60) * UNITS_PER_SEMI))) if z.keyhi >= 127: phi = 0xFFFF else: phi = max(0, min(0xFFFF, round(TAUD_C4 + (z.keyhi + 0.5 - 60) * UNITS_PER_SEMI) - 1)) vlo = round(z.vello * 63 / 127) vhi = round(z.velhi * 63 / 127) return (plo, phi, vlo, vhi) def _rect_subtract(r, k): """Pieces of rectangle r not covered by rectangle k (≤ 4 pieces).""" p0, p1, v0, v1 = r q0, q1, w0, w1 = k if p1 < q0 or p0 > q1 or v1 < w0 or v0 > w1: return [r] pieces = [] if p0 < q0: pieces.append((p0, q0 - 1, v0, v1)) if p1 > q1: pieces.append((q1 + 1, p1, v0, v1)) m0, m1 = max(p0, q0), min(p1, q1) if v0 < w0: pieces.append((m0, m1, v0, w0 - 1)) if v1 > w1: pieces.append((m0, m1, w1 + 1, v1)) return pieces class MonoSample: """One pooled (deduplicated) mono u8 sample slice.""" __slots__ = ('pair', 'a_start', 'frames', 'rate', 'name', 'data', 'ratio', 'offset', 'loop_native', 'synth_loop', 'synth_decay') def __init__(self, z: SFZone): self.pair = z.pair # None or (idxL, idxR, b_start) self.a_start = z.a_start self.frames = max(0, z.a_end - z.a_start) self.rate = z.rate self.name = z.name self.data = None self.ratio = 1.0 self.offset = 0 # SF2 loop in NATIVE frames (mirrors the Patch loop test), or None when this # slice has no loop. Used by build_sample_inst_bin to decide how to fit an # over-length sample: a no-loop sample gets a synthesized loop, a looped one # is preserved (kept at 32 kHz when its loop fits, else fit-to-cap). Dedup # keeps the first zone's loop (same slice ⇒ same loop in practice). ls_n = max(0, min(z.loop_abs_start - z.a_start, self.frames)) le_n = max(0, min(z.loop_abs_end - z.a_start, self.frames)) self.loop_native = (ls_n, le_n) if (z.modes in (1, 3) and le_n - ls_n >= 2) else None # Set when a too-long, originally UN-looped sample is resampled to the 32 kHz # floor and given a synthesized sustain loop (see _synth_sustain_loop): a # (loop_start, loop_end) pair in the FINAL output-frame domain (already scaled # by every resample) and the seconds over which a peak->0 vol-envelope fades # the looped note to silence (_synth_decay_vol_env). When set, the loop points # and vol-envelope of EVERY record/patch using this sample are overridden. self.synth_loop = None self.synth_decay = None def key(self): return (self.pair[0], self.pair[1], self.a_start, self.frames) \ if self.pair else (-1, -1, self.a_start, self.frames) def render(self, sf: SF2): if self.data is not None: return n = min(self.frames, 1 << 24) # hard sanity cap (16M frames) if self.pair: la = sf.read_frames(self.a_start, n) ra = sf.read_frames(self.pair[2], n) m = min(len(la), len(ra)) self.data = bytes((((la[i] + ra[i]) >> 1) >> 8) + 128 & 0xFF for i in range(m)) else: la = sf.read_frames(self.a_start, n) self.data = bytes(((s >> 8) + 128) & 0xFF for s in la) self.frames = len(self.data) class Patch: """One Ixmp-patch-to-be: a disjoint rect plus the zone's sample fields.""" __slots__ = ('rect', 'zone', 'ms', 'loop_start', 'loop_end', 'loop_mode', 'detune', 'pan8', 'hits') def __init__(self, rect, z: SFZone, ms: MonoSample): self.rect = rect self.zone = z self.ms = ms ls = z.loop_abs_start - z.a_start le = z.loop_abs_end - z.a_start nf = max(0, z.a_end - z.a_start) ls = max(0, min(ls, nf)); le = max(0, min(le, nf)) if z.modes in (1, 3) and le - ls >= 2: self.loop_mode = 1 | (0x4 if z.modes == 3 else 0) self.loop_start = ls self.loop_end = le else: self.loop_mode = 0 self.loop_start = 0 self.loop_end = 0 # samplingRate = SF2 rate; the rootkey/tuning shift goes into the # signed 4096-TET detune so MIDI key 60 always means noteVal 0x5000. # scaleTuning (cents per key, 0 = fixed-pitch drums) is folded in # around the zone's centre key: exact for single-key zones, exact # everywhere when scale = 100. k_ref = (z.keylo + z.keyhi) / 2.0 det = round(((k_ref - z.rootkey) * (z.scale / 100.0) - (k_ref - 60.0)) * UNITS_PER_SEMI + z.tune_cents * 4096.0 / 1200.0) self.detune = max(-0x8000, min(0x7FFF, det)) if z.pan is None: self.pan8 = IXMP_PAN_NO_OVERRIDE else: self.pan8 = max(0, min(255, round(127.5 + z.pan * 255.0 / 1000.0))) self.hits = 0 def to_ixmp_dict(self, canonical, bpm0, fadeout_override): r = self.ms.ratio # Synthesized-loop samples carry their loop in the final output-frame domain # (already resampled) and force a plain forward loop; otherwise the zone's SF2 # loop scaled by this sample's resample ratio. if self.ms.synth_loop is not None: ls_w, le_w, lm_w = self.ms.synth_loop[0], self.ms.synth_loop[1], 1 else: ls_w = round(self.loop_start * r) le_w = round(self.loop_end * r) lm_w = self.loop_mode d = { 'pitch_start': self.rect[0], 'pitch_end': self.rect[1], 'volume_start': self.rect[2], 'volume_end': self.rect[3], 'sample_ptr': self.ms.offset, 'sample_length': min(len(self.ms.data), 0xFFFF), 'play_start': 0, 'loop_start': min(0xFFFF, ls_w), 'loop_end': min(0xFFFF, le_w), 'sampling_rate': max(1, min(0xFFFF, round(self.ms.rate * r))), 'sample_detune': self.detune, 'loop_mode': lm_w, 'default_pan': self.pan8, 'default_note_volume': 0, # no override → base DNV 'vibrato_speed': 0, 'vibrato_sweep': 0, 'vibrato_depth': 0, 'vibrato_rate': 0, 'vibrato_waveform': 0xFF, # no override } # Per-patch overrides — emitted ONLY when they differ from the canonical # zone (whose envelopes/filter live in the base instrument record, which the # patch falls through to when a block is absent). This is what gives SF2 # velocity / key layers their own ADSR + filter while keeping patches lean. z, c = self.zone, canonical.zone # Effective vol-env: a synthesized-loop sample uses a peak->0 decay (no sustain), # else the zone's SF2 ADSR. Emitted only when it differs from the canonical's. vol_self = _effective_vol_env(z, self.ms) vol_canon = _effective_vol_env(c, canonical.ms) if vol_self != vol_canon: d['vol_env'] = vol_self # SF-mode filter: mode flag + 16-bit cutoff cents / Q centibels + filter env. sf_s, cut_s, res_s, filt_s = _zone_filter_sf(z) sf_c, cut_c, res_c, filt_c = _zone_filter_sf(c) pit_s = _pitch_env_block(z) if z.me2pitch else None pit_c = _pitch_env_block(c) if c.me2pitch else None # Emit the 'x' block when filter (mode/cutoff/resonance/env) OR initialAttenuation # differs from the canonical (base) zone. initialAttenuation is a per-voice gain (NOT # folded into the env); when 'x' is present it carries this patch's atten, else the # voice inherits the base record's atten. A differing filter ENV must co-emit 'x' # because the env's node ratios scale the patch's OWN peak cutoff (the 'x' cutoff). att_s = atten_cb_to_octet(z.atten_cb) att_c = atten_cb_to_octet(c.atten_cb) # Volume Fadeout = this patch's own SF2 release segment; emit 'x' when it (or any # filter / atten field) differs from the canonical zone so the per-layer release # time is faithful (an absent 'x' falls through to the base record's fadeout). A # synthesized-loop sample disables its key-off fadeout (its decay is the vol-env, # which runs from note-on regardless of key state). fo_s = 0 if self.ms.synth_loop is not None else _zone_fadeout(z, bpm0, fadeout_override) fo_c = 0 if canonical.ms.synth_loop is not None else _zone_fadeout(c, bpm0, fadeout_override) filt_differs = (filt_s != filt_c) if (sf_s != sf_c or cut_s != cut_c or res_s != res_c or att_s != att_c or filt_differs or fo_s != fo_c): d['extra'] = {'fadeout': fo_s, 'filter_sf_mode': sf_s, 'default_cutoff': cut_s, 'default_resonance': res_s, 'initial_attenuation': att_s} if filt_s is not None and filt_differs: d['filter_env'] = filt_s if pit_s is not None and pit_s != pit_c: d['pitch_env'] = pit_s return d class TaudInstrument: __slots__ = ('slot', 'inst_key', 'name', 'patches', 'canonical', 'usable') # patches: kept Patch list in zone order, canonical Patch INCLUDED # (the Ixmp emitter skips it; the base record carries its fields). def _rect_overlap(a, b) -> bool: """True when two (pitch_lo, pitch_hi, vol_lo, vol_hi) rectangles intersect.""" p0, p1, v0, v1 = a q0, q1, w0, w1 = b return not (p1 < q0 or p0 > q1 or v1 < w0 or v0 > w1) def _partition_layers(zones: list, registry: dict, max_layers: int): """Split zones into disjoint layers by ITERATED first-wins disjointify. Layer 0 is the classic disjointify result: each zone is rectangle-SUBTRACTED against the rects already placed in the layer, so its non-overlapping pieces tile in. This is essential — the velocity axis quantises 0..127 → 0..63, so adjacent SF2 velocity splits round to ranges that touch/overlap by ~1 unit; subtraction absorbs that boundary sliver into the first zone instead of spawning a spurious extra layer (which would DOUBLE the level at boundary velocities). Only a zone that is *fully* covered by the layer below — SF2's real simultaneous layering, detune-stacks, duplicate zones — spills down to the next layer, where the same disjointify runs over the spilled set. Returns ([ [(rect, zone, ms), …] per layer ], dropped_zone_count).""" remaining = [] for z in zones: ms = MonoSample(z) if ms.frames < 2: continue ms = registry.setdefault(ms.key(), ms) remaining.append((z, ms)) layers = [] while remaining and len(layers) < max_layers: kept_rects = [] layer = [] spill = [] for z, ms in remaining: pieces = [_rect_of_zone(z)] for k in kept_rects: pieces = [p2 for p in pieces for p2 in _rect_subtract(p, k)] if not pieces: break pieces = [p for p in pieces if p[0] <= p[1] and p[2] <= p[3]] if not pieces: spill.append((z, ms)) # fully overlapped → next layer continue for p in pieces: kept_rects.append(p) layer.append((p, z, ms)) if layer: layers.append(layer) remaining = spill return layers, len(remaining) def _build_layer_instrument(name: str, items: list, trig: dict): """One normal TaudInstrument from a layer's disjoint (rect, zone, ms) items, trimmed to patches actually hit by a trigger. None when no patch is hit (the layer is silent for the whole song → dropped).""" all_patches = [Patch(r, z, ms) for (r, z, ms) in items] for (nv, v6), cnt in trig.items(): for p in all_patches: r = p.rect if r[0] <= nv <= r[1] and r[2] <= v6 <= r[3]: p.hits += cnt break kept = [p for p in all_patches if p.hits > 0] if not kept: return None ti = TaudInstrument() ti.name = name ti.patches = kept ti.canonical = max(kept, key=lambda p: p.hits) ti.usable = True ti.slot = 0 ti.inst_key = None return ti def build_presets(sf: SF2, slot_keys: list, triggers: dict, perc_force, registry: dict, max_layers: int) -> dict: """For each preset (inst_key), partition its SF2 zones into disjoint layers and build one normal TaudInstrument per layer (trimmed to triggered patches). Returns dict[inst_key → (name, [layer TaudInstrument])]. Downstream, a preset with >1 layer becomes a Metainstrument; a single-layer preset stays a plain instrument. `registry` dedupes MonoSamples across all presets/layers.""" presets = {} for ik in slot_keys: res = resolve_preset(sf, ik, perc_force) if res is None: vprint(f" warning: no SF2 preset for {ik!r} — its notes are dropped") presets[ik] = ('(missing preset)', []) continue name, zones = res zones = merge_stereo_zones(zones, sf.shdrs) layer_items, dropped = _partition_layers(zones, registry, max_layers) if dropped: vprint(f" warning: '{name}': {dropped} zone(s) exceed the " f"{max_layers}-layer cap and were dropped (raise --max-layers)") trig = triggers.get(ik, {}) layers = [ti for items in layer_items if (ti := _build_layer_instrument(name, items, trig)) is not None] if not layers and layer_items: # Nothing triggered (out-of-range): keep the single patch nearest the # mean trigger pitch so the preset still sounds (matches the old path). mean_nv = (sum(nv * c for (nv, _), c in trig.items()) / max(1, sum(trig.values()))) if trig else TAUD_C4 flat = [Patch(r, z, ms) for items in layer_items for (r, z, ms) in items] best = min(flat, key=lambda p: abs((p.rect[0] + p.rect[1]) / 2 - mean_nv)) ti = TaudInstrument() ti.name = name; ti.patches = [best]; ti.canonical = best ti.usable = True; ti.slot = 0; ti.inst_key = ik layers = [ti] for ti in layers: ti.inst_key = ik presets[ik] = (name, layers) if layers: vprint(f" preset '{name}': {len(zones)} zone(s) → {len(layers)} layer(s)" + (" → Metainstrument" if len(layers) > 1 else "")) else: vprint(f" warning: '{name}': no usable zones — notes dropped") return presets # Metainstrument mix-volume octet for an unmixed layer (159 = 0 dB / unity); the # converter folds per-zone level/tune into each layer instrument's patches, so the # meta layers stay neutral. (terranmon.txt "Perceptually Significant Octet …".) META_UNITY_OCTET = 159 def _layer_bbox(ti: 'TaudInstrument'): """Bounding (pitch_lo, pitch_hi, vol_lo, vol_hi) over a layer instrument's kept patch rects — the Metainstrument layer's gating rectangle.""" rs = [p.rect for p in ti.patches] return (min(r[0] for r in rs), max(r[1] for r in rs), min(r[2] for r in rs), max(r[3] for r in rs)) # ── Sample pool + instrument bin ────────────────────────────────────────────── def _env_seg_count(t_sec: float) -> int: """Number of linear segments to approximate an exponential (linear-dB) ramp of `t_sec` seconds. Short ramps keep the old 2-segment shape; long ramps (the 5–20 s SF2 decays/releases that a 2-point line collapses badly) get up to 8 segments so the curve stays smooth (issue 4).""" return max(3, min(8, 2 + round(t_sec / 2.0))) def _adsr_to_env(z: SFZone): """SF2 volume-envelope ADSR → (env_points, sustain_idx, release_sec). env_points is up to 25 (value 0..63, minifloat_idx) pairs; each node's minifloat encodes the time to the NEXT node (engine interpolates values linearly across that span). The envelope carries the delay/attack/hold/decay legs and ENDS at the sustain node — there is NO release leg. The engine wraps on the sustain node while the key is held (SUSTAIN word); on key-off it holds at that terminal node and the Volume Fadeout (emitted with NNA Note Fade) is the SF2 *release segment* (see _zone_fadeout). SF2's decay is LINEAR in dB (exponential in amplitude); per the SF2 spec decayVolEnv is the full-100dB time, truncated by the sustain level. The decay leg is sampled at equal-time (= equal-dB) points and emitted as a piecewise-linear-amplitude approximation — segment count scales with its duration (issue 4) so multi-second decays don't collapse to a 2-point line. release_sec (= SF2 releaseVolEnv) is returned only to feed the fadeout calc. """ EPS = 0.004 # below the minifloat resolution (1/256 s) sus_cb = min(z.env_sustain_cb, 1000.0) # clamp to 100 dB full-scale slevel = 10.0 ** (-z.env_sustain_cb / 200.0) s63 = max(0, min(63, round(63 * slevel))) pts = [] # (value, delta_sec_to_next) if z.env_delay >= EPS: pts.append((0, z.env_delay)) if z.env_attack >= EPS: pts.append((0, z.env_attack)) hold = z.env_hold if z.env_hold >= EPS else 0.0 # Decay leg: peak (63) → sustain (s63), exponential amplitude over `edec` seconds. # The peak node carries the hold time. The final decay node is the sustain node # (appended below), so the in-between nodes are f = 1/n .. (n-1)/n. if s63 < 63: edec = z.env_decay * sus_cb / 1000.0 if edec >= EPS: n = _env_seg_count(edec) seg = edec / n pts.append((63, hold + seg)) # peak, held then 1st seg for i in range(1, n): # f = 1/n .. (n-1)/n f = i / n v = round(63 * 10.0 ** (-(sus_cb * f) / 200.0)) pts.append((max(s63, min(63, v)), seg)) else: pts.append((63, hold)) sustain_idx = len(pts) # the node appended next is the sustain node rel = z.env_release # No release leg: the sustain node is the terminal node. While the key is held the # engine wraps on it (SUSTAIN word); after key-off it holds there and the Volume # Fadeout (NNA Note Fade) performs the SF2 release segment (see _zone_fadeout). A # zero sustain leaves a terminal 0 node, so the engine retires the voice naturally # at the end of decay. pts.append((s63, 0.0)) # sustain node = terminator env = [(v, nearest_minifloat(d)) for v, d in pts[:25]] while len(env) < 25: env.append((env[-1][0], 0)) return env, min(sustain_idx, 24), rel # Envelope LOOP-word bits (terranmon.txt base byte 15/17/19). ENV_PRESENT_BIT = 0x2000 # P — envelope present in source (LOOP-word bit 13) ENV_SUS_ENABLE = 0x0020 # b — enable the SUSTAIN wrap (SUSTAIN-word bit 5) ENV_PF_FILTER = 0x0080 # m — pitch/filter LOOP-word bit 7 (1 = filter) def _atten_gain(atten_cb: float) -> float: """SF2 initialAttenuation (cB) → linear amplitude multiplier (≤ 1.0).""" return 10.0 ** (-max(0.0, atten_cb) / 200.0) def _vol_env_block(z: SFZone): """Taud volume-envelope block dict from a zone's SF2 ADSR — the PURE ADSR shape at full 0..63 resolution. initialAttenuation is NO LONGER folded into the node peak (it would crush a heavily-attenuated env to peak ~3 and zero its tail, e.g. SGM 'Fantasia'); it is now carried as a separate per-voice gain — base record bytes 251-252 / Ixmp 'x' block initialAttenuation — applied in the mixer. Returns (block_dict, sustain_idx, release_sec).""" env, sidx, rel = _adsr_to_env(z) nodes = [(max(0, min(63, v)), mf) for (v, mf) in env] sustain = ENV_SUS_ENABLE | ((sidx & 0x1F) << 8) | (sidx & 0x1F) return {'loop': ENV_PRESENT_BIT, 'sustain': sustain, 'nodes': nodes}, sidx, rel # SF2 initialFilterFc default ≈ 13500 cents (~20 kHz) means "no filter / fully open". SF2_FILTER_OPEN_CENTS = 13500 # Taud SF-mode "filter off" sentinel for the 16-bit cutoff/resonance fields. SF_FILTER_OFF = 0xFFFF def _zone_filter_sf(z: SFZone): """Resolve a zone's filter into Taud SF-mode parameters. Taud SF mode (base byte 173 bit 4 / patch 'x' flag) stores the cutoff as SoundFont **absolute cents** and resonance as **centibels above DC gain** — the engine computes freq = 8.176·2^(cents/1200) and dmpfac = 10^(−Qcb/200), so there is no ImpulseTracker ~5 kHz cutoff ceiling. When the zone has a modulation envelope driving the cutoff, the stored cutoff is the PEAK the envelope reaches and the filter-env nodes scale it back down (see [_filter_env_block_sf]); the engine's `currentCutoff = baseCut · envValue` then reproduces the SF2 sweep exactly (linear-in-cents = the right log-Hz sweep). Returns (sf_mode, cutoff16, resonance16, filter_env_block_or_None). sf_mode False → no filter (IT-mode 'off').""" base_fc = z.filter_fc amt = z.me2filt has_static = base_fc < SF2_FILTER_OPEN_CENTS has_env = bool(amt) if not has_static and not has_env: return False, SF_FILTER_OFF, SF_FILTER_OFF, None peak = max(1, min(0xFFFE, round(base_fc + max(0, amt)))) # engine baseCut qcb = max(0, min(0xFFFE, round(z.filter_q))) # cB above DC gain env = _filter_env_block_sf(z, base_fc, amt, peak) if has_env else None return True, peak, qcb, env def _filter_env_block_sf(z: SFZone, base_fc: float, amt: float, peak: int) -> dict: """Filter envelope in SF-cents domain. Each node value = cutoff_cents(u)/peak·255 following the SF2 modulation-envelope DAHDSR (u walks 0→1→sustain), where cutoff_cents(u) = base_fc + amt·u. 0xFF (255) = fully open at `peak`; the release returns to the base cutoff. The engine multiplies `peak` (= baseCut) by node/255 each tick, so the node ratios reproduce the SF2 cutoff sweep.""" EPS = 0.004 sus_u = 1.0 - z.m_sustain_pc / 1000.0 # mod-env sustain level (0..1) def nodeval(u: float) -> int: cents = base_fc + amt * u return max(0, min(255, round(255.0 * cents / peak))) pts = [] # (value_byte, secs_to_next) if z.m_delay >= EPS: pts.append((nodeval(0.0), z.m_delay)) pts.append((nodeval(0.0), z.m_attack if z.m_attack >= EPS else 0.0)) hold = z.m_hold if z.m_hold >= EPS else 0.0 if sus_u < 1.0 and z.m_decay >= EPS: pts.append((nodeval(1.0), hold + z.m_decay)) sustain_idx = len(pts) pts.append((nodeval(sus_u), z.m_release if z.m_release >= EPS else 0.0)) else: pts.append((nodeval(1.0), hold)) sustain_idx = len(pts) - 1 pts.append((nodeval(0.0), 0.0)) # release returns to base cutoff nodes = [(v, nearest_minifloat(d)) for v, d in pts[:25]] while len(nodes) < 25: nodes.append((nodes[-1][0], 0)) sustain_idx = min(sustain_idx, 24) loop = ENV_PRESENT_BIT | ENV_PF_FILTER # m-bit set = filter role sustain = ENV_SUS_ENABLE | ((sustain_idx & 0x1F) << 8) | (sustain_idx & 0x1F) return {'loop': loop, 'sustain': sustain, 'nodes': nodes} def _zone_fadeout(z: SFZone, bpm0: int, fadeout_override) -> int: """Volume Fadeout step encoding the zone's SF2 release segment (gen 38, releaseVolEnv). With NNA Note Fade the fadeout IS the release: on key-off the voice holds at the sustain level and fades linearly to silence. The SF2 release ramps a constant 100 dB per `releaseVolEnv` seconds (spec sfspec24.txt:1934-1941 — "until 100dB attenuation were reached"), so the time from the sustain level (sus_cb cB of attenuation) down to the 100 dB floor is releaseVolEnv·(1000−sus_cb)/1000. fadeStep makes the fadeout complete in that wall-clock time at bpm0: the engine subtracts fadeStep/1024 of unit volume per song tick, and the tick rate is bpm0·2/5 Hz, giving fadeStep = 2560/(fade_sec·bpm0).""" if fadeout_override is not None: return min(0xFFF, max(0, fadeout_override)) sus_cb = min(max(0.0, z.env_sustain_cb), 1000.0) fade_sec = max(0.02, z.env_release * (1000.0 - sus_cb) / 1000.0) return max(1, min(0xFFF, round(2560.0 / (fade_sec * bpm0)))) def _extra_block(z: SFZone, bpm0: int, fadeout_override) -> dict: """The 'x' block: release-segment fadeout + SF-mode static cutoff/resonance + filter mode.""" sf_mode, cut16, res16, _ = _zone_filter_sf(z) return {'fadeout': _zone_fadeout(z, bpm0, fadeout_override), 'filter_sf_mode': sf_mode, 'default_cutoff': cut16, 'default_resonance': res16} def _pitch_env_block(z: SFZone) -> dict: """Pitch ('P') envelope block from the SF2 modulation envelope (DAHDSR), scaled by modEnvToPitch. Engine value mapping (byte/255; 0.5 = 0x80 = unity): envValue 1.0 → +16 semitones, so value = 0.5 + semis/32. The mod-env is unipolar 0→1; release returns to unity (0x80). (Filter envelopes are built separately in cents domain by [_filter_env_block_sf].)""" EPS = 0.004 amount_cents = z.me2pitch sus_lvl = 1.0 - z.m_sustain_pc / 1000.0 # mod-env sustain level (0..1) def mapval(u: float) -> int: val = 0.5 + (amount_cents * u / 100.0) / 32.0 return max(0, min(255, round(255 * max(0.0, min(1.0, val))))) pts = [] # (value_byte, secs_to_next) if z.m_delay >= EPS: pts.append((mapval(0.0), z.m_delay)) pts.append((mapval(0.0), z.m_attack if z.m_attack >= EPS else 0.0)) hold = z.m_hold if z.m_hold >= EPS else 0.0 if sus_lvl < 1.0 and z.m_decay >= EPS: pts.append((mapval(1.0), hold + z.m_decay)) sustain_idx = len(pts) pts.append((mapval(sus_lvl), z.m_release if z.m_release >= EPS else 0.0)) else: pts.append((mapval(1.0), hold)) sustain_idx = len(pts) - 1 pts.append((mapval(0.0), 0.0)) # release returns to unity (0x80) nodes = [(v, nearest_minifloat(d)) for v, d in pts[:25]] while len(nodes) < 25: nodes.append((nodes[-1][0], 0)) sustain_idx = min(sustain_idx, 24) loop = ENV_PRESENT_BIT # m-bit clear = pitch role sustain = ENV_SUS_ENABLE | ((sustain_idx & 0x1F) << 8) | (sustain_idx & 0x1F) return {'loop': loop, 'sustain': sustain, 'nodes': nodes} def _zone_pf_envs(z: SFZone): """Return (filter_env_block_or_None, pitch_env_block_or_None) for a zone's modulation envelope. SF2's single mod-env can drive both targets at once; the filter leg is built in SF-cents domain (see [_zone_filter_sf]).""" _, _, _, filt = _zone_filter_sf(z) pit = _pitch_env_block(z) if z.me2pitch else None return filt, pit # ── SF2 long-sample resampling + synthesized sustain loop ───────────────────── # # Per-sample handling when a rendered MonoSample exceeds the 65535-frame u16 cap # (terranmon.txt sample_length is u16). Two strategies, by the rate that fitting # the WHOLE sample into 65535 frames would leave: # (1)/(2) rate >= SF2_RESAMPLE_FLOOR_HZ → downsample the whole sample to 65535 # frames (quality stays acceptable, full sample preserved). # (3) rate < SF2_RESAMPLE_FLOOR_HZ → resample to the 32 kHz floor instead # (keeps full bandwidth), keep the first 65535 frames, and — when the # sample has NO loop of its own — synthesize a near-seamless forward # loop near the end so held notes keep sounding, plus a peak->0 decay # vol-envelope (see _synth_decay_vol_env) that retires the voice # ~SF2_SYNTH_DECAY_SEC after the note fires. SF2_RESAMPLE_FLOOR_HZ = 32000 # TSVM native audio rate (= full-bandwidth floor) SF2_SYNTH_DECAY_SEC = 10.0 # looped-note fade-to-silence span (from note-on) SF2_LOOP_HINT = 8192 # spec's "last 8192 samples" → MAX loop period searched SF2_LOOP_MIN_PERIOD = 512 # min loop period (avoid buzzy ultra-short loops) SF2_LOOP_MATCH_WIN = 256 # forward-window length used to score a loop seam SF2_LOOP_MATCH_STEP = 2 # stride within the match window (speed/quality trade) SF2_LOOP_COARSE_STEP = 32 # period stride for the coarse search pass def _synth_sustain_loop(data: bytes, cap: int, hint: int): """Pick a near-seamless forward loop near the end of a resampled, originally UN-looped sample, and truncate it to <= `cap` frames. Returns (body, loop_start, loop_end) with the loop region [loop_start, loop_end) (loop_end exclusive — matches the engine's mode-1 wrap, AudioAdapter.kt:2126). The loop is chosen by minimising the sum-of-squared-difference between the W-frame windows that FOLLOW loop_start and loop_end. Forward playback wraps loop_end -> loop_start, so matching data[loop_start+k] ~= data[loop_end+k] makes the post-wrap texture continue the pre-wrap texture seamlessly (the k=0 term also matches the immediate seam value). `hint` (the spec's "last 8192 samples") is the MAXIMUM loop period searched, NOT taken at face value: the analysis settles on the smoothest-looping period in [SF2_LOOP_MIN_PERIOD, hint] via a coarse sweep refined locally.""" keep = min(len(data), cap) W = SF2_LOOP_MATCH_WIN # loop_end sits W frames before the kept end so the forward match window # [loop_end, loop_end + W) stays within the data. loop_end = keep - W p_max = min(hint, loop_end) p_min = min(SF2_LOOP_MIN_PERIOD, p_max) if loop_end <= p_min: # too short to loop (not expected in case 3) return data[:keep], max(0, keep - 2), keep def seam_err(ls: int) -> int: s = 0 le = loop_end for k in range(0, W, SF2_LOOP_MATCH_STEP): d = data[ls + k] - data[le + k] s += d * d return s best_p = p_min best_e = seam_err(loop_end - best_p) p = p_min + SF2_LOOP_COARSE_STEP while p <= p_max: e = seam_err(loop_end - p) if e < best_e: best_e, best_p = e, p p += SF2_LOOP_COARSE_STEP lo = max(p_min, best_p - SF2_LOOP_COARSE_STEP) hi = min(p_max, best_p + SF2_LOOP_COARSE_STEP) for p in range(lo, hi + 1): e = seam_err(loop_end - p) if e < best_e: best_e, best_p = e, p loop_start = max(0, min(loop_end - 2, loop_end - best_p)) return data[:keep], loop_start, loop_end def _synth_decay_vol_env(decay_sec: float) -> dict: """Volume-envelope block for a synthesized-loop sample: an immediate peak that decays exponentially (linear-dB) to silence over `decay_sec`, with NO sustain or loop wrap. The looped sample would otherwise sound forever; this envelope fades it from the instant the note fires and — because there is no wrap (resolveEnvWrap returns range (-1,-1)) — the engine's fall-through 'envelope ends at 0 => cut' rule (AudioAdapter.kt:1693/1701) retires the voice once it reaches the terminal 0 node, ~decay_sec after firing, regardless of key state. The drop spans the representable 63->1 range (~36 dB); the final node is a true 0 terminator.""" DROP_CB = 360.0 # 63 -> 1 fills the whole decay span n = _env_seg_count(decay_sec) seg = decay_sec / n pts = [(63, seg)] # peak, held one segment then decays for i in range(1, n): v = round(63 * 10.0 ** (-(DROP_CB * (i / n)) / 200.0)) pts.append((max(1, min(63, v)), seg)) pts.append((0, 0.0)) # terminal 0 node => fall-through cut nodes = [(v, nearest_minifloat(d)) for v, d in pts[:25]] while len(nodes) < 25: nodes.append((0, 0)) return {'loop': ENV_PRESENT_BIT, 'sustain': 0, 'nodes': nodes} def _effective_vol_env(z: SFZone, ms: 'MonoSample') -> dict: """Volume-envelope block for a (zone, sample): a synthesized-loop sample fades from note-on via a peak->0 decay (no sustain), overriding the SF2 ADSR; otherwise the zone's SF2 ADSR shape (_vol_env_block).""" if ms is not None and ms.synth_decay is not None: return _synth_decay_vol_env(ms.synth_decay) blk, _, _ = _vol_env_block(z) return blk def build_sample_inst_bin(sf: SF2, pool: list, layer_insts: list, meta_records: list, fadeout_override, bpm0: int): """Render & pool every used MonoSample (with the 65535-byte per-sample and 8 MB global caps), write the 256-byte normal-instrument records for every layer instrument, then the Metainstrument records. Returns the raw SAMPLEINST_SIZE image.""" for ms in pool: ms.render(sf) # Per-sample u16 cap. A sample over the 65535-frame limit is shrunk one of two # ways (see the SF2 long-sample section above): downsample the whole thing when # that keeps the rate >= 32 kHz; otherwise resample to the 32 kHz floor, keep the # first 65535 frames and synthesize a sustain loop + decay (only when the sample # has no loop of its own — a sample with an SF2 loop is left to fall-through, as # its loop already lets it sustain within whatever frames fit). for ms in pool: native_len = len(ms.data) if native_len <= SAMPLE_LEN_LIMIT: continue r_fit = SAMPLE_LEN_LIMIT / native_len rate_fit = ms.rate * r_fit r32 = SF2_RESAMPLE_FLOOR_HZ / ms.rate # loop_end in 32 kHz frames (0 when unlooped) decides whether a 32 kHz render # still contains the loop within the 65535-frame cap. le32 = round(ms.loop_native[1] * r32) if ms.loop_native else 0 def _fit_whole(): """(1)/(2) downsample the WHOLE sample to <= 65535 frames. Used when the fitted rate stays >= 32 kHz, or as the fall-back for a looped sample whose loop sits past the cap at 32 kHz (only fit-to-cap keeps that far loop).""" ms.data = resample_linear(ms.data, r_fit) ms.ratio *= len(ms.data) / native_len if rate_fit >= SF2_RESAMPLE_FLOOR_HZ: _fit_whole() vprint(f" info: '{ms.name}' {native_len} frames > 64K cap; " f"resampling by {r_fit:.4f} (rate {rate_fit:.0f} Hz)") elif ms.loop_native is None: # (3) No loop: resample to the 32 kHz floor (full bandwidth), keep the first # 65535 frames and synthesize a near-seamless sustain loop near the end, plus # a peak->0 decay vol-envelope that fades the looped note to silence from # note-on (the SF2 sample stops on its own otherwise; a loop would ring). resampled = resample_linear(ms.data, r32) ms.ratio *= len(resampled) / native_len # effective rate -> 32 kHz ms.data = resampled body, ls, le = _synth_sustain_loop(ms.data, SAMPLE_LEN_LIMIT, SF2_LOOP_HINT) ms.data = body ms.synth_loop = (ls, le) ms.synth_decay = SF2_SYNTH_DECAY_SEC vprint(f" info: '{ms.name}' {native_len} frames > 64K cap, long & unlooped; " f"32 kHz, kept {len(body)} frames, synth loop [{ls}..{le}] " f"+ {SF2_SYNTH_DECAY_SEC:.0f}s decay") elif le32 <= SAMPLE_LEN_LIMIT - 2: # (3) Looped, and the loop fits at the 32 kHz floor: resample to 32 kHz and # keep the first 65535 frames. The per-patch loop points (native * ratio) # land within the kept data, so the SF2 loop + ADSR are preserved at full # bandwidth (a sustain-loop release tail past loop_end is truncated to fit). resampled = resample_linear(ms.data, r32) ms.ratio *= len(resampled) / native_len ms.data = resampled[:SAMPLE_LEN_LIMIT] vprint(f" info: '{ms.name}' {native_len} frames > 64K cap, long & looped; " f"32 kHz, kept first {len(ms.data)} frames (loop_end {le32})") else: # (3) Looped but the loop sits past the 65535-frame cap at 32 kHz (a far-end # sustain loop on a multi-second sample): the floor rate can't hold it, so # downsample the whole sample to fit — the ratio-scaled loop stays valid, # at a sub-32 kHz rate. (This is the pre-existing fit-to-cap behaviour.) _fit_whole() vprint(f" info: '{ms.name}' {native_len} frames > 64K cap, long, looped, " f"far loop; fit-to-cap by {r_fit:.4f} (rate {ms.rate * r_fit:.0f} Hz)") # Global 8 MB pool cap. Resamples every sample down equally; synthesized loop # points ride the same ratio so the loop stays valid in the shrunken data. total = sum(len(ms.data) for ms in pool) if total > SAMPLEBIN_SIZE: g = SAMPLEBIN_SIZE / total vprint(f" info: sample pool overflow ({total} bytes); " f"resampling all by {g:.4f}") for ms in pool: old = len(ms.data) ms.data = resample_linear(ms.data, g) ms.ratio *= len(ms.data) / old if ms.synth_loop is not None: le = min(len(ms.data) - 1, round(ms.synth_loop[1] * g)) ls = max(0, min(le - 2, round(ms.synth_loop[0] * g))) ms.synth_loop = (ls, le) sample_bin = bytearray(SAMPLEBIN_SIZE) pos = 0 for ms in pool: n = min(len(ms.data), SAMPLEBIN_SIZE - pos) if n < len(ms.data): vprint(f" warning: pool full, truncating '{ms.name}'") ms.data = ms.data[:n] if ms.synth_loop is not None: # keep the synthesized loop inside the data le = min(n - 1, ms.synth_loop[1]) ms.synth_loop = (max(0, min(le - 2, ms.synth_loop[0])), le) sample_bin[pos:pos+n] = ms.data ms.offset = pos pos += n vprint(f" sample pool: {len(pool)} sample(s), {pos} bytes") inst_bin = bytearray(INSTBIN_SIZE) for ti in layer_insts: if not ti.usable: continue c = ti.canonical ms = c.ms r = ms.ratio base = ti.slot * 256 struct.pack_into('0 decay envelope (no sustain) so # its otherwise-infinite loop fades to silence ~SF2_SYNTH_DECAY_SEC after firing. wenv(15, 189, 21, _effective_vol_env(c.zone, ms)) # Pan envelope: none (default unity nodes; P bit clear in LOOP word). struct.pack_into('> 8) & 0x0F) | (0x10 if sf_mode else 0) inst_bin[base + 177] = (0x80 if c.pan8 == IXMP_PAN_NO_OVERRIDE else c.pan8) # default pan struct.pack_into('> 8) & 0xFF # cutoff high (SF cents / IT byte) inst_bin[base + 252] = cut16 & 0xFF # cutoff low (SF mode) inst_bin[base + 183] = (res16 >> 8) & 0xFF # resonance high inst_bin[base + 253] = res16 & 0xFF # resonance low (SF mode) struct.pack_into(' dict: c = cells.get((v, row)) if c is None: c = {'note': NOTE_NOP, 'inst': 0, 'vol': (SEL_FINE, 0), 'pan': (SEL_FINE, 0), 'eff': None, 'prio': PRIO_FREE} cells[(v, row)] = c return c def allocate_voices(notes: list, speed: int, max_voices: int) -> int: """Greedy per-row interval scheduling onto as few columns as possible. The engine's New Note Action does the heavy lifting (matching MIDI polyphony semantics): a fresh trigger on an occupied voice migrates the old note into the mixer's background-ghost pool, so a voice is reusable the moment its note is *released* — the Note-Fade tail rides the ghost (fading over the instrument's SF2 release). Melodic voices free at their key-off row; drum voices (no key-off by default) free on the very next row. Stealing is therefore graceful: the victim is released early, not cut. Mutates note.voice (and truncates stolen notes' end_ft). Returns the number of voices used.""" cap = max(1, min(max_voices, NUM_VOICES)) v_end = [] # voice → first row at which it is free again v_slot = [] # voice → last instrument slot (affinity only) v_note = [] # voice → currently scheduled note stolen = 0 for n in notes: srow = n.start_ft // speed free = [v for v in range(len(v_end)) if v_end[v] <= srow] v = next((x for x in free if v_slot[x] == n.slot), free[0] if free else -1) if v < 0: if len(v_end) < cap: v = len(v_end) v_end.append(0); v_slot.append(0); v_note.append(None) else: # Steal preference: notes held only by the sustain pedal lose # least (their key is already up); otherwise the note ending # soonest. Either way NNA turns the steal into an early release. pedal = [x for x in range(len(v_end)) if v_note[x] is not None and v_note[x].pedal_ft is not None and v_note[x].pedal_ft <= n.start_ft] cand = pedal if pedal else range(len(v_end)) v = min(cand, key=lambda x: v_end[x]) victim = v_note[v] if victim is not None and victim.end_ft > n.start_ft: victim.end_ft = n.start_ft stolen += 1 if n.drum: end_row = srow + 1 # ghost carries the ring else: end_row = max(srow + 1, n.end_ft // speed) # free at key-off row n.voice = v v_end[v], v_slot[v], v_note[v] = end_row, n.slot, n if stolen: vprint(f" info: polyphony exceeded {cap} voices; {stolen} note(s) " f"released early (NNA ghost keeps the tail)") return len(v_end) def emit_cells(song: Song, insts: dict, speed: int, rpb: int, eps_units: float, drum_keyoff: bool, shift_ft: int, max_voices: int) -> tuple: """Place triggers, key-offs, portamento bend segments, M channel-volume and T tempo effects into the (voice,row) cell grid. Returns (cells, n_voices, total_rows, taud_bpm0).""" notes = [n for n in song.notes if n.slot > 0] def midi_bpm_at(ft): i = bisect.bisect_right(song.tempo_ft, ft) - 1 return song.tempo_bpm[i] if i >= 0 else 120.0 scale = rpb * speed / 24.0 def taud_bpm(b): t = round(b * scale) if not (25 <= t <= 280): vprint(f" warning: tempo {b:.1f} BPM maps to Taud {t}, " f"clamped to 25..280 (try a different --rpb/--speed)") return max(25, min(280, t)) n_voices = allocate_voices(notes, speed, max_voices) if n_voices == 0: sys.exit("error: no playable notes") vprint(f" voices: {n_voices} used (cap {max_voices}; NNA carries tails)") cells = {} # ── Pass 1: triggers ── for n in notes: row, tick = n.start_ft // speed, n.start_ft % speed c = _cell(cells, n.voice, row) nv = key_to_noteval(n.key + n.bend0) c['note'] = nv c['inst'] = n.slot c['vol'] = (SEL_SET, round(n.vel * 63 / 127)) st = song.channels[n.ch] if st.cc10_ft: pan = _curve_at(st.cc10_ft, st.cc10_val, n.start_ft + shift_ft, 64) c['pan'] = (SEL_SET, round(pan * 63 / 127)) if tick > 0: c['eff'] = (TOP_S, 0xD000 | (tick << 8)) c['prio'] = PRIO_DELAY # ── Pass 2: key-offs (both MIDI idioms arrive here as note.end_ft) ── skipped_offs = 0 for n in notes: if n.drum and not drum_keyoff: continue row, tick = n.end_ft // speed, n.end_ft % speed srow = n.start_ft // speed if row == srow: # Sub-row note (shorter than one tracker row): its key-off would land on # its OWN trigger row, where the trigger cell already sits — pass 2 would # then skip it ("row taken") and the note would ring forever until the next # trigger on this voice. Push the key-off to the next row (tick 0) so a # staccato note rounds up to ~1 row instead of hanging. If the next row is # itself a fresh trigger, that note cuts/NNAs this one anyway (skip is fine). row = srow + 1 tick = 0 c = cells.get((n.voice, row)) if c is None: c = _cell(cells, n.voice, row) c['note'] = NOTE_KEYOFF if tick > 0: c['eff'] = (TOP_S, 0xD000 | (tick << 8)) c['prio'] = PRIO_DELAY elif c['note'] == NOTE_NOP: c['note'] = NOTE_KEYOFF if tick > 0 and c['eff'] is None: c['eff'] = (TOP_S, 0xD000 | (tick << 8)) c['prio'] = PRIO_DELAY else: skipped_offs += 1 # row taken by a retrigger — which cuts/NNAs anyway if skipped_offs: vprint(f" info: {skipped_offs} key-off(s) absorbed by same-row retriggers") # ── Pass 3: pitch-bend portamento segments ── # One linear segment per row: the cell carries the exact 4096-TET target # plus G at units/tick sized to land on it by row end (G slides on the # speed-1 non-first ticks). Targets within eps_units are skipped (jitter # simplification). seg_count = 0 if speed >= 2: for n in notes: st = song.channels[n.ch] if len(st.bend_ft) <= 1 and n.bend0 == 0.0: continue start_row = n.start_ft // speed end_row = n.end_ft // speed cur = key_to_noteval(n.key + n.bend0) for r in range(start_row + 1, end_row): ftr = min((r + 1) * speed, n.end_ft) + shift_ft target = key_to_noteval( n.key + _curve_at(st.bend_ft, st.bend_val, ftr, 0.0)) if abs(target - cur) < eps_units: continue if (n.voice, r) in cells: continue step = -(-abs(target - cur) // (speed - 1)) c = _cell(cells, n.voice, r) c['note'] = target c['eff'] = (TOP_G, min(0xFFFF, step)) c['prio'] = PRIO_PORTA cur = target seg_count += 1 elif any(len(st.bend_ft) > 1 for st in song.channels): vprint(" warning: --speed 1 cannot express portamento; " "pitch-bend movement dropped") if seg_count: vprint(f" bend: {seg_count} portamento segment(s) emitted") # ── Pass 4: M channel volume (CC7 × CC11), per voice chronologically ── by_voice = {} for n in notes: by_voice.setdefault(n.voice, []).append(n) m_emitted = 0 for v, vnotes in by_voice.items(): vnotes.sort(key=lambda n: n.start_ft) m_state = 0x3F # engine channel_vol default for n in vnotes: st = song.channels[n.ch] for r in range(n.start_ft // speed, n.end_ft // speed + 1): ftr = r * speed + shift_ft m = round(_curve_at(st.cc7_ft, st.cc7_val, ftr, 100) / 127 * _curve_at(st.cc11_ft, st.cc11_val, ftr, 127) / 127 * 63) if m == m_state: continue c = _cell(cells, v, r) if c['eff'] is not None: continue # slot busy — retry next row c['eff'] = (TOP_M, (m & 0x3F) << 8) c['prio'] = PRIO_M m_state = m m_emitted += 1 if m_emitted: vprint(f" cc: {m_emitted} M channel-volume effect(s) emitted") total_rows = max(r for (_v, r) in cells) + 1 # ── Pass 5: T tempo changes ── bpm0 = midi_bpm_at(shift_ft) # tempo in effect at row 0 last = taud_bpm(bpm0) t_emitted = t_evict = 0 for ft, b in zip(song.tempo_ft, song.tempo_bpm): row = (ft - shift_ft) // speed if row < 0: continue if row >= total_rows: break tb = taud_bpm(b) if tb == last: continue placed = False victim = None for v in range(n_voices): c = cells.get((v, row)) if c is None or c['eff'] is None: c = _cell(cells, v, row) c['eff'] = (TOP_T, ((tb - 25) & 0xFF) << 8) c['prio'] = PRIO_TEMPO placed = True break if c['prio'] < PRIO_DELAY and (victim is None or c['prio'] < victim['prio']): victim = c if not placed and victim is not None: if victim['prio'] == PRIO_PORTA: victim['note'] = NOTE_NOP # orphan G note would retrigger victim['eff'] = (TOP_T, ((tb - 25) & 0xFF) << 8) victim['prio'] = PRIO_TEMPO placed = True t_evict += 1 if placed: last = tb t_emitted += 1 if t_emitted: vprint(f" tempo: {t_emitted} T effect(s)" + (f" ({t_evict} evicted a lesser effect)" if t_evict else "")) return cells, n_voices, total_rows, taud_bpm(bpm0) # ── Pattern / cue emission and final assembly ──────────────────────────────── def build_pattern_bin(cells: dict, n_voices: int, n_cues: int) -> bytes: out = bytearray(n_cues * n_voices * PATTERN_BYTES) pos = 0 for cue in range(n_cues): for v in range(n_voices): for r in range(PATTERN_ROWS): base = pos + r * 8 c = cells.get((v, cue * PATTERN_ROWS + r)) if c is None: out[base + 3] = 0xC0 out[base + 4] = 0xC0 continue struct.pack_into(' bytes: speed, rpb = args.speed, args.rpb # Leading-silence trim: shift the grid so the first trigger is row 0. first_row = min(n.start_ft // speed for n in song.notes if n.slot > 0) shift_ft = first_row * speed if shift_ft: vprint(f" info: trimming {first_row} leading silent row(s)") for n in song.notes: n.start_ft -= shift_ft n.end_ft -= shift_ft eps_units = args.bend_epsilon * 4096.0 / 1200.0 cells, n_voices, total_rows, bpm0 = emit_cells( song, None, speed, rpb, eps_units, args.drum_keyoff, shift_ft, args.max_voices) n_cues = (total_rows + PATTERN_ROWS - 1) // PATTERN_ROWS if n_cues > NUM_CUES: sys.exit(f"error: song needs {n_cues} cues > {NUM_CUES} limit " f"(try a smaller --rpb)") if n_cues * n_voices > NUM_PATTERNS_MAX: sys.exit(f"error: {n_cues} cues × {n_voices} voices " f"> {NUM_PATTERNS_MAX} pattern limit") pat_bin = build_pattern_bin(cells, n_voices, n_cues) pat_bin, remap, n_unique = deduplicate_patterns(pat_bin, n_cues * n_voices) vprint(f" patterns: {n_cues * n_voices} → {n_unique} unique; " f"{n_cues} cue(s), {n_voices} voice(s), {total_rows} rows") sheet = bytearray(NUM_CUES * CUE_SIZE) for ci in range(NUM_CUES): sheet[ci*CUE_SIZE:(ci+1)*CUE_SIZE] = encode_cue([], 0) for ci in range(n_cues): pats = [remap[ci * n_voices + v] for v in range(n_voices)] tail = total_rows - ci * PATTERN_ROWS if ci == n_cues - 1: instr = CUE_INST_HALT elif tail < PATTERN_ROWS: instr = cue_instruction_len(tail) else: instr = CUE_INST_NOP sheet[ci*CUE_SIZE:(ci+1)*CUE_SIZE] = encode_cue(pats, instr) # ── Sample + instrument bin ── sampleinst_raw = build_sample_inst_bin(sf, pool, layer_insts, meta_records, args.fadeout, bpm0) assert len(sampleinst_raw) == SAMPLEINST_SIZE compressed = compress_blob(sampleinst_raw, "sample+inst bin") comp_size = len(compressed) pat_comp = compress_blob(pat_bin, "pattern bin") cue_comp = compress_blob(bytes(sheet), "cue sheet") song_table_off = TAUD_HEADER_SIZE + comp_size song_off = song_table_off + TAUD_SONG_ENTRY entry = encode_song_entry( song_offset=song_off, num_voices=n_voices, num_patterns=n_unique, bpm_stored=(bpm0 - 25) & 0xFF, tick_rate=speed, base_note=0xA000, base_freq=8363.0, flags_byte=0x00, # linear pitch mode pat_bin_comp_size=len(pat_comp), cue_sheet_comp_size=len(cue_comp), global_vol=0xFF, mixing_vol=0xFF, ) # ── Project data: names + the Ixmp section recreating SF2 layering ── proj_data = b'' proj_off = 0 if not args.no_project_data: # Names indexed by slot (0 = unused). Layer slots carry the (suffixed) layer # instrument name; meta slots carry the bare preset name. max_slot = max([0] + list(slot_name)) inst_names = ['' for _ in range(max_slot + 1)] for s, nm in slot_name.items(): inst_names[s] = nm smp_names = [''] + [ms.name for ms in pool] ixmp = {} for ti in layer_insts: if not ti.usable: continue pl = [p.to_ixmp_dict(ti.canonical, bpm0, args.fadeout) for p in ti.patches if p is not ti.canonical] if pl: ixmp[ti.slot] = pl if ixmp: vprint(f" ixmp: {sum(len(p) for p in ixmp.values())} patch(es) " f"across {len(ixmp)} instrument(s)") title = song.title or os.path.splitext(os.path.basename(args.input))[0] proj_data = build_project_data( project_name=title, instrument_names=inst_names, sample_names=smp_names, ixmp_patches=ixmp or None, ) header = (TAUD_MAGIC + bytes([TAUD_VERSION, 1]) + struct.pack('1 layer become a Metainstrument. 1 disables ' 'layering (first-zone-wins, like the old behaviour). ' 'Covers ~93%% of big-bank presets at 4, ~98%% at 5') ap.add_argument('--bend-epsilon', type=float, default=4.0, help='Pitch-bend simplification threshold in cents ' '(default 4.0); smaller = more faithful') ap.add_argument('--drum-keyoff', action='store_true', help='Emit KEY_OFF for percussion-channel notes too ' '(GM drums normally ignore note-off)') ap.add_argument('--no-project-data', action='store_true', help='Omit the Project Data section — NOTE: this also ' 'omits Ixmp, collapsing every instrument to its ' 'canonical sample') ap.add_argument('-v', '--verbose', action='store_true') args = ap.parse_args() set_verbose(args.verbose) if not (1 <= args.speed <= 15): sys.exit("error: --speed must be 1..15") if not (1 <= args.max_voices <= 20): sys.exit("error: --max-voices must be 1..20") if not (1 <= args.max_layers <= 25): sys.exit("error: --max-layers must be 1..25") if args.output is None: args.output = os.path.splitext(args.input)[0] + '.taud' vprint(f"parsing MIDI '{args.input}'…") division, merged = parse_midi(args.input) song = extract_song(division, merged, args.rpb, args.speed) vprint(f" {len(song.notes)} note(s), {len(song.tempo_ft)} tempo event(s)") if not song.notes: sys.exit("error: MIDI contains no playable notes") vprint(f"parsing SF2 '{args.soundfont}'…") sf = parse_sf2(args.soundfont) vprint(f" {len(sf.presets)} preset(s), {len(sf.shdrs)} sample header(s)") # Presets in first-use order; triggers keyed by the exact (noteVal-with-initial- # bend, vol6) pair the patterns will carry, so layer trimming sees precisely what # the engine matches at runtime. slot_keys = [] seen_keys = set() triggers = {} for n in song.notes: if n.inst_key not in seen_keys: seen_keys.add(n.inst_key) slot_keys.append(n.inst_key) t = triggers.setdefault(n.inst_key, {}) k = (key_to_noteval(n.key + n.bend0), round(n.vel * 63 / 127)) t[k] = t.get(k, 0) + 1 vprint(f" {len(slot_keys)} preset(s) in use") registry = {} presets = build_presets(sf, slot_keys, triggers, args.perc_force_mapping, registry, args.max_layers) # Allocate instrument-bin slots: each layer is a normal instrument; a preset with # >1 layer also takes a Metainstrument slot the note references. Single-layer # presets stay plain instruments (no meta, no extra slot). next_slot = 1 layer_insts = [] # all normal instruments, .slot assigned meta_records = [] # (meta_slot, name, [(layer_slot, bbox_rect)]) slot_name = {} # slot → display name note_slot = {} # inst_key → slot a note triggers (0 = unplayable) for ik in slot_keys: name, layers = presets[ik] if not layers: note_slot[ik] = 0 continue need = len(layers) + (1 if len(layers) > 1 else 0) if next_slot + need - 1 > 255: vprint(f" warning: 255-slot budget exhausted — preset '{name}' dropped") note_slot[ik] = 0 continue for li, ti in enumerate(layers): ti.slot = next_slot; next_slot += 1 layer_insts.append(ti) slot_name[ti.slot] = name if len(layers) == 1 else f"{name} L{li}" if len(layers) == 1: note_slot[ik] = layers[0].slot else: meta_slot = next_slot; next_slot += 1 meta_records.append((meta_slot, name, [(ti.slot, _layer_bbox(ti)) for ti in layers])) slot_name[meta_slot] = name note_slot[ik] = meta_slot vprint(f" slots: {next_slot - 1} used — {len(layer_insts)} instrument(s), " f"{len(meta_records)} Metainstrument(s)") # Tag notes with their trigger slot; notes whose preset failed to resolve drop. unplayable = 0 for n in song.notes: n.slot = note_slot.get(n.inst_key, 0) if n.slot == 0: unplayable += 1 if unplayable: vprint(f" warning: {unplayable} note(s) dropped (unresolvable preset)") song.notes = [n for n in song.notes if n.slot > 0] if not song.notes: sys.exit("error: no notes survived preset resolution") # Pool = every sample referenced by a kept patch (canonical included), in # deterministic first-reference order. Everything else is trimmed. pool = [] seen = set() for ti in layer_insts: for p in ti.patches: if id(p.ms) not in seen: seen.add(id(p.ms)) pool.append(p.ms) taud = assemble_taud(sf, song, layer_insts, meta_records, slot_name, pool, args) sf.file.close() with open(args.output, 'wb') as f: f.write(taud) print(f"wrote {len(taud)} bytes to '{args.output}'") if __name__ == '__main__': main()