taut: sample dedup

This commit is contained in:
minjaesong
2026-05-29 15:01:55 +09:00
parent 43e5baadf4
commit 9e8af96c32
3 changed files with 47 additions and 17 deletions

View File

@@ -3344,16 +3344,16 @@ function drawSamplesProperties() {
let smpUsedScroll = 0 let smpUsedScroll = 0
function drawSamplesUsedBy() { function drawSamplesUsedBy() {
const rightW = SCRW - SMP_RIGHT_X + 1
con.move(SMP_USED_Y, SMP_RIGHT_X)
con.color_pair(colSmpUsedHdr, colBackPtn)
print('Used by instruments:'.padEnd(rightW))
const s = (samplesCache && samplesCache[smpListCursor]) || null const s = (samplesCache && samplesCache[smpListCursor]) || null
const used = s ? s.usedBy : [] const used = s ? s.usedBy : []
const names = (songsMeta && songsMeta.instNames) || [] const names = (songsMeta && songsMeta.instNames) || []
const visible = SMP_USED_LIST_H const visible = SMP_USED_LIST_H
const rightW = SCRW - SMP_RIGHT_X + 1
con.move(SMP_USED_Y, SMP_RIGHT_X)
con.color_pair(colSmpUsedHdr, colBackPtn)
print(`Used by instruments (${used.length}):`.padEnd(rightW))
if (smpUsedScroll > Math.max(0, used.length - visible)) if (smpUsedScroll > Math.max(0, used.length - visible))
smpUsedScroll = Math.max(0, used.length - visible) smpUsedScroll = Math.max(0, used.length - visible)
if (smpUsedScroll < 0) smpUsedScroll = 0 if (smpUsedScroll < 0) smpUsedScroll = 0

View File

@@ -1190,13 +1190,29 @@ def build_sample_inst_bin_it(samples_or_proxy: list,
sample_bin = bytearray(SAMPLEBIN_SIZE) sample_bin = bytearray(SAMPLEBIN_SIZE)
offsets = {} offsets = {}
pos = 0 pos = 0
# IT use_instruments mode points many Taud instrument slots at the same
# underlying sample object (e.g. seven "ChipBass.*" instruments all play
# "ChipBass.looped"). Write each distinct sample's PCM into the pool once and
# let every referencing slot share the offset, rather than emitting one
# identical copy per slot. `pool_order` records the distinct samples in
# ascending-offset order — the order taut.js's sample viewer expects SNam to
# follow (it dedupes instrument records by (ptr,len), sorts by ptr, and
# matches SNam[i+1] positionally — see taut.js buildSampleIndex).
written = {} # id(sample) -> pool offset already written
pool_order = [] # distinct sample objects, in pool (ascending-offset) order
for idx, s in pcm_list: for idx, s in pcm_list:
shared = written.get(id(s))
if shared is not None:
offsets[idx] = shared
continue
n = min(len(s.sample_data), SAMPLEBIN_SIZE - pos) n = min(len(s.sample_data), SAMPLEBIN_SIZE - pos)
if n <= 0: if n <= 0:
vprint(f" warning: sample bin full, dropping '{s.name}'") vprint(f" warning: sample bin full, dropping '{s.name}'")
offsets[idx] = 0; s.length = 0; continue offsets[idx] = 0; s.length = 0; continue
sample_bin[pos:pos+n] = s.sample_data[:n] sample_bin[pos:pos+n] = s.sample_data[:n]
offsets[idx] = pos offsets[idx] = pos
written[id(s)] = pos
pool_order.append(s)
if n < len(s.sample_data): if n < len(s.sample_data):
vprint(f" warning: '{s.name}' truncated {len(s.sample_data)}{n}") vprint(f" warning: '{s.name}' truncated {len(s.sample_data)}{n}")
s.length = n s.length = n
@@ -1384,7 +1400,7 @@ def build_sample_inst_bin_it(samples_or_proxy: list,
vprint(f" instrument[{taud_idx}] '{s.name}' ptr:{ptr} c5spd:{s.c5_speed}") vprint(f" instrument[{taud_idx}] '{s.name}' ptr:{ptr} c5spd:{s.c5_speed}")
return bytes(sample_bin) + bytes(inst_bin), offsets, ratio return bytes(sample_bin) + bytes(inst_bin), offsets, ratio, pool_order
# ── Pattern builder ─────────────────────────────────────────────────────────── # ── Pattern builder ───────────────────────────────────────────────────────────
@@ -1899,7 +1915,7 @@ def assemble_taud(h: ITHeader, samples: list, instruments: list,
'dct': inst.dct, 'dct': inst.dct,
'dca': inst.dca, 'dca': inst.dca,
} }
sampleinst_raw, _, sample_ratio = build_sample_inst_bin_it(proxy, instr_data_by_slot) sampleinst_raw, _, sample_ratio, pool_order = build_sample_inst_bin_it(proxy, instr_data_by_slot)
else: else:
# Samples referenced directly; proxy is samples list (0-based, slot 0 unused) # Samples referenced directly; proxy is samples list (0-based, slot 0 unused)
proxy = [None] + list(samples) proxy = [None] + list(samples)
@@ -1908,7 +1924,7 @@ def assemble_taud(h: ITHeader, samples: list, instruments: list,
for i, s in enumerate(samples) for i, s in enumerate(samples)
if s is not None if s is not None
} }
sampleinst_raw, _, sample_ratio = build_sample_inst_bin_it(proxy) sampleinst_raw, _, sample_ratio, pool_order = build_sample_inst_bin_it(proxy)
assert len(sampleinst_raw) == SAMPLEINST_SIZE assert len(sampleinst_raw) == SAMPLEINST_SIZE
@@ -1961,8 +1977,14 @@ def assemble_taud(h: ITHeader, samples: list, instruments: list,
if with_project_data: if with_project_data:
inst_names = [''] + [(inst.name if inst is not None else '') inst_names = [''] + [(inst.name if inst is not None else '')
for inst in instruments[:255]] for inst in instruments[:255]]
smp_names = [''] + [(s.name if s is not None else '') # SNam mirrors the deduplicated sample pool: one entry per distinct
for s in samples[:255]] # sample, in pool order, named after the sample itself. taut.js dedupes
# instrument records by (ptr,len), sorts ascending by ptr, and matches
# SNam[i+1] positionally to that list, so this ordering labels every
# sample correctly and a shared sample (e.g. "ChipBass.looped") appears
# exactly once instead of once per referencing instrument slot.
smp_names = [''] + [(getattr(s, 'name', '') or '')
for s in pool_order[:255]]
proj_data = build_project_data( proj_data = build_project_data(
project_name=h.title, project_name=h.title,
instrument_names=inst_names, instrument_names=inst_names,

View File

@@ -138,7 +138,11 @@ def parse_instruments(data: bytes, h: S3MHeader) -> list:
continue continue
inst = S3MInstrument() inst = S3MInstrument()
inst.itype = data[ptr] inst.itype = data[ptr]
inst.filename = data[ptr+1:ptr+13].rstrip(b'\x00').decode('latin-1', errors='replace') # 12-byte DOS filename field; null-terminated with possible trailing
# garbage after the terminator (ST3 doesn't zero the tail). Truncate at
# the first null. This field carries the per-sample short name (e.g.
# 'HIT1') as distinct from the 28-byte title at 0x30.
inst.filename = data[ptr+1:ptr+13].split(b'\x00', 1)[0].decode('latin-1', errors='replace')
# memseg: 3 bytes at offsets 0x0D,0x0E,0x0F — high byte first (quirk) # memseg: 3 bytes at offsets 0x0D,0x0E,0x0F — high byte first (quirk)
memseg_hi = data[ptr + 0x0D] memseg_hi = data[ptr + 0x0D]
memseg_lo = struct.unpack_from('<H', data, ptr + 0x0E)[0] memseg_lo = struct.unpack_from('<H', data, ptr + 0x0E)[0]
@@ -939,17 +943,21 @@ def assemble_taud(h: S3MHeader, instruments: list, patterns: list,
cur_off += len(pat_comp) + len(cue_comp) cur_off += len(pat_comp) + len(cue_comp)
# ── Project Data (optional) ────────────────────────────────────────────── # ── Project Data (optional) ──────────────────────────────────────────────
# S3M instruments and samples share the same slot space, so the names go # S3M instruments and samples share the same slot space, but carry two
# into both INam and SNam (1-based; slot 0 empty). # distinct name fields: the 28-byte title (inst.name → INam) and the
# 12-byte DOS filename (inst.filename → SNam). e.g. WHEN.s3m instrument #1
# is titled "(c) Purple Motion / 1994" with sample name 'HIT1'.
proj_data = b'' proj_data = b''
proj_off = 0 proj_off = 0
if with_project_data: if with_project_data:
names = [''] + [(inst.name if inst is not None else '') inst_names = [''] + [(inst.name if inst is not None else '')
for inst in instruments[:255]] for inst in instruments[:255]]
sample_names = [''] + [(inst.filename if inst is not None else '')
for inst in instruments[:255]]
proj_data = build_project_data( proj_data = build_project_data(
project_name=h.title, project_name=h.title,
instrument_names=names, instrument_names=inst_names,
sample_names=names, sample_names=sample_names,
) )
if proj_data: if proj_data:
proj_off = cur_off proj_off = cur_off