taut: sample dedup

2026-06-12 15:44:05 +09:00 · 2026-05-29 15:01:55 +09:00
parent 43e5baadf4
commit 9e8af96c32
3 changed files with 47 additions and 17 deletions
--- a/assets/disk0/tvdos/bin/taut.js
+++ b/assets/disk0/tvdos/bin/taut.js
@@ -3344,16 +3344,16 @@ function drawSamplesProperties() {
 let smpUsedScroll = 0
 function drawSamplesUsedBy() {
    const rightW = SCRW - SMP_RIGHT_X + 1
    con.move(SMP_USED_Y, SMP_RIGHT_X)
    con.color_pair(colSmpUsedHdr, colBackPtn)
    print('Used by instruments:'.padEnd(rightW))
    const s = (samplesCache && samplesCache[smpListCursor]) || null
    const used = s ? s.usedBy : []
    const names = (songsMeta && songsMeta.instNames) || []
    const visible = SMP_USED_LIST_H
    const rightW = SCRW - SMP_RIGHT_X + 1
    con.move(SMP_USED_Y, SMP_RIGHT_X)
    con.color_pair(colSmpUsedHdr, colBackPtn)
    print(`Used by instruments (${used.length}):`.padEnd(rightW))
    if (smpUsedScroll > Math.max(0, used.length - visible))
        smpUsedScroll = Math.max(0, used.length - visible)
    if (smpUsedScroll < 0) smpUsedScroll = 0
--- a/it2taud.py
+++ b/it2taud.py
@@ -1190,13 +1190,29 @@ def build_sample_inst_bin_it(samples_or_proxy: list,
    sample_bin = bytearray(SAMPLEBIN_SIZE)
    offsets    = {}
    pos        = 0
    # IT use_instruments mode points many Taud instrument slots at the same
    # underlying sample object (e.g. seven "ChipBass.*" instruments all play
    # "ChipBass.looped"). Write each distinct sample's PCM into the pool once and
    # let every referencing slot share the offset, rather than emitting one
    # identical copy per slot. `pool_order` records the distinct samples in
    # ascending-offset order — the order taut.js's sample viewer expects SNam to
    # follow (it dedupes instrument records by (ptr,len), sorts by ptr, and
    # matches SNam[i+1] positionally — see taut.js buildSampleIndex).
    written    = {}     # id(sample) -> pool offset already written
    pool_order = []     # distinct sample objects, in pool (ascending-offset) order
    for idx, s in pcm_list:
        shared = written.get(id(s))
        if shared is not None:
            offsets[idx] = shared
            continue
        n = min(len(s.sample_data), SAMPLEBIN_SIZE - pos)
        if n <= 0:
            vprint(f"  warning: sample bin full, dropping '{s.name}'")
            offsets[idx] = 0; s.length = 0; continue
        sample_bin[pos:pos+n] = s.sample_data[:n]
        offsets[idx] = pos
        written[id(s)] = pos
        pool_order.append(s)
        if n < len(s.sample_data):
            vprint(f"  warning: '{s.name}' truncated {len(s.sample_data)} → {n}")
            s.length = n
@@ -1384,7 +1400,7 @@ def build_sample_inst_bin_it(samples_or_proxy: list,
        vprint(f"  instrument[{taud_idx}] '{s.name}' ptr:{ptr} c5spd:{s.c5_speed}")
-    return bytes(sample_bin) + bytes(inst_bin), offsets, ratio
+    return bytes(sample_bin) + bytes(inst_bin), offsets, ratio, pool_order
 # ── Pattern builder ───────────────────────────────────────────────────────────
@@ -1899,7 +1915,7 @@ def assemble_taud(h: ITHeader, samples: list, instruments: list,
                'dct':        inst.dct,
                'dca':        inst.dca,
            }
-        sampleinst_raw, _, sample_ratio = build_sample_inst_bin_it(proxy, instr_data_by_slot)
+        sampleinst_raw, _, sample_ratio, pool_order = build_sample_inst_bin_it(proxy, instr_data_by_slot)
    else:
        # Samples referenced directly; proxy is samples list (0-based, slot 0 unused)
        proxy = [None] + list(samples)
@@ -1908,7 +1924,7 @@ def assemble_taud(h: ITHeader, samples: list, instruments: list,
            for i, s in enumerate(samples)
            if s is not None
        }
-        sampleinst_raw, _, sample_ratio = build_sample_inst_bin_it(proxy)
+        sampleinst_raw, _, sample_ratio, pool_order = build_sample_inst_bin_it(proxy)
    assert len(sampleinst_raw) == SAMPLEINST_SIZE
@@ -1961,8 +1977,14 @@ def assemble_taud(h: ITHeader, samples: list, instruments: list,
    if with_project_data:
        inst_names = [''] + [(inst.name if inst is not None else '')
                             for inst in instruments[:255]]
-        smp_names  = [''] + [(s.name if s is not None else '')
+        # SNam mirrors the deduplicated sample pool: one entry per distinct
-                             for s in samples[:255]]
+        # sample, in pool order, named after the sample itself. taut.js dedupes
        # instrument records by (ptr,len), sorts ascending by ptr, and matches
        # SNam[i+1] positionally to that list, so this ordering labels every
        # sample correctly and a shared sample (e.g. "ChipBass.looped") appears
        # exactly once instead of once per referencing instrument slot.
        smp_names  = [''] + [(getattr(s, 'name', '') or '')
                             for s in pool_order[:255]]
        proj_data = build_project_data(
            project_name=h.title,
            instrument_names=inst_names,
--- a/s3m2taud.py
+++ b/s3m2taud.py
@@ -138,7 +138,11 @@ def parse_instruments(data: bytes, h: S3MHeader) -> list:
            continue
        inst = S3MInstrument()
        inst.itype    = data[ptr]
-        inst.filename = data[ptr+1:ptr+13].rstrip(b'\x00').decode('latin-1', errors='replace')
+        # 12-byte DOS filename field; null-terminated with possible trailing
        # garbage after the terminator (ST3 doesn't zero the tail). Truncate at
        # the first null. This field carries the per-sample short name (e.g.
        # 'HIT1') as distinct from the 28-byte title at 0x30.
        inst.filename = data[ptr+1:ptr+13].split(b'\x00', 1)[0].decode('latin-1', errors='replace')
        # memseg: 3 bytes at offsets 0x0D,0x0E,0x0F — high byte first (quirk)
        memseg_hi  = data[ptr + 0x0D]
        memseg_lo  = struct.unpack_from('<H', data, ptr + 0x0E)[0]
@@ -939,17 +943,21 @@ def assemble_taud(h: S3MHeader, instruments: list, patterns: list,
        cur_off += len(pat_comp) + len(cue_comp)
    # ── Project Data (optional) ──────────────────────────────────────────────
-    # S3M instruments and samples share the same slot space, so the names go
+    # S3M instruments and samples share the same slot space, but carry two
-    # into both INam and SNam (1-based; slot 0 empty).
+    # distinct name fields: the 28-byte title (inst.name → INam) and the
    # 12-byte DOS filename (inst.filename → SNam). e.g. WHEN.s3m instrument #1
    # is titled "(c) Purple Motion / 1994" with sample name 'HIT1'.
    proj_data = b''
    proj_off  = 0
    if with_project_data:
-        names = [''] + [(inst.name if inst is not None else '')
+        inst_names   = [''] + [(inst.name     if inst is not None else '')
-                        for inst in instruments[:255]]
+                               for inst in instruments[:255]]
        sample_names = [''] + [(inst.filename if inst is not None else '')
                               for inst in instruments[:255]]
        proj_data = build_project_data(
            project_name=h.title,
-            instrument_names=names,
+            instrument_names=inst_names,
-            sample_names=names,
+            sample_names=sample_names,
        )
        if proj_data:
            proj_off = cur_off