Files
Tharo 970af5600a Audio: Fix note values (#2716)
Due to the layout of the pitch_frequencies lookup table, note values
computed in extraction were reflected around middle C (midi note
number 60). This didn't matter for matching, values would successfully
roundtrip. However when using samples in external programs or
converting soundfonts to standard formats the note values would lead
to incorrect playback of sounds. This change corrects the note values
so that external programs correctly infer the pitch of the sound when
played at a particular MIDI key.
2026-03-10 06:22:28 +09:00

685 lines
26 KiB
Python

# SPDX-FileCopyrightText: © 2024 ZeldaRET
# SPDX-License-Identifier: CC0-1.0
#
#
#
import math, struct
from typing import Dict, Optional
from .audio_tables import AudioCodeTableEntry
from .audiobank_structs import AudioSampleCodec, SoundFontSample, AdpcmBook, AdpcmLoop
from .extraction_xml import SampleBankExtractionDescription
from .tuning import pitch_names, note_z64_to_midi, recalc_tuning, rate_from_tuning, rank_rates_notes, BAD_FLOATS
from .util import align, XMLWriter, f32_to_u32
class AIFCFile:
def __init__(self):
self.sections = []
self.total_size = 0
@staticmethod
def pstring(data):
return bytes([len(data)]) + data + (b"" if len(data) % 2 else b"\0")
@staticmethod
def serialize_f80(num):
"""
Convert num to 80-bit float. Does not accept denormal/infinity/nan but these should never appear anyway.
"""
num = float(num)
if num == 0.0:
return b"\0" * 10
elif num == -0.0:
return b"\x80" + b"\0" * 9
f64_bits, = struct.unpack(">Q", struct.pack(">d", num))
f64_sign_bit = f64_bits & (2 ** 63)
f64_exponent = (f64_bits ^ f64_sign_bit) >> 52
assert f64_exponent != 0, "can't handle denormals"
assert f64_exponent != 0x7FF, "can't handle infinity/nan"
f64_exponent -= 1023
f64_mantissa = f64_bits & (2 ** 52 - 1)
f80_sign_bit = f64_sign_bit << (80 - 64)
f80_exponent = (f64_exponent + 0x3FFF) << 64
f80_mantissa = (2 ** 63) | (f64_mantissa << (63 - 52))
f80 = f80_sign_bit | f80_exponent | f80_mantissa
return struct.pack(">HQ", f80 >> 64, f80 & (2 ** 64 - 1))
def add_section(self, tp, data):
assert isinstance(tp, bytes)
assert isinstance(data, bytes)
self.sections.append((tp, data))
self.total_size += align(len(data),2) + 8
def add_custom_section(self, tp, data):
self.add_section(b"APPL", b"stoc" + self.pstring(tp) + data)
def remove_section(self, tp):
assert isinstance(tp, bytes)
for s_tp, s_data in self.sections:
if s_tp == tp:
self.sections.remove((s_tp, s_data))
self.total_size -= align(len(s_data),2) + 8
return
def commit(self, outpath):
self.total_size += 4
with open(outpath, "wb") as outfile:
outfile.write(b"FORM" + struct.pack(">I", self.total_size) + b"AIFC")
for tp, data in self.sections:
outfile.write(tp + struct.pack(">I", len(data)))
outfile.write(data)
if len(data) % 2:
outfile.write(b"\0")
class AudioTableData:
"""
Unaccounted data in the Audiotable
"""
def __init__(self, start, end, data):
self.start : int = start
self.end : int = end
self.data = data
assert len(self.data) % 2 == 0
self.name : str = None
self.filename : str = None
def __len__(self):
return len(self.data)
def to_asm(self, name):
out = f"# {name} [0x{self.start:X}:0x{self.end:X}](0x{self.end-self.start:X})\n\n"
out += " .byte "
for i,b in enumerate(self.data):
if i != 0 and i % 32 == 0:
out = out[:-2] + "\n .byte "
out += f"0x{b:02X}, "
out = out[:-2] + "\n\n"
return out
def to_file(self, outpath : str):
# Output as binary blob
with open(outpath, "wb") as outfile:
outfile.write(self.data)
PCM16_SAMPLE_SIZE = 16
class AudioTableSample(AudioTableData):
"""
Sample in the Audiotable
"""
def __init__(self, start : int, end : int, header : SoundFontSample, data, book : AdpcmBook, loop : AdpcmLoop, padding=None):
super().__init__(start, end, data)
self.header : SoundFontSample = header
self.book : AdpcmBook = book
self.loop : AdpcmLoop = loop
self.padding = padding
self.notes_rates = set()
self.sample_rate = None
self.base_note = None
self.tuning_map = None
if self.loop.count == 0:
# If a count is 0 the loop end must be the (bugged, vadpcm_enc computed it wrong originally) frame count
num_frames_bugged = (len(self.data) * PCM16_SAMPLE_SIZE) // self.frame_size()
assert self.loop.end == num_frames_bugged, f"{self.loop.end}, {num_frames_bugged}"
def clone(self, start, end, padding):
new_sample = AudioTableSample(start, end, self.header, self.data, self.book, self.loop, padding)
new_sample.notes_rates = self.notes_rates
return new_sample
def frame_size(self):
return {
AudioSampleCodec.CODEC_ADPCM : 9,
AudioSampleCodec.CODEC_S8 : 16,
AudioSampleCodec.CODEC_S16_INMEMORY : 32,
AudioSampleCodec.CODEC_SMALL_ADPCM : 5,
AudioSampleCodec.CODEC_REVERB : 0,
AudioSampleCodec.CODEC_S16 : 32
}[self.header.codec]
def codec_id(self):
return {
AudioSampleCodec.CODEC_ADPCM : b'ADP9',
AudioSampleCodec.CODEC_S8 : b'HPCM',
AudioSampleCodec.CODEC_S16_INMEMORY : b'NONE',
AudioSampleCodec.CODEC_SMALL_ADPCM : b'ADP5',
AudioSampleCodec.CODEC_REVERB : b'RVRB',
AudioSampleCodec.CODEC_S16 : b'NONE',
}[self.header.codec]
def codec_name(self):
return {
AudioSampleCodec.CODEC_ADPCM : b"Nintendo/SGI VADPCM 9-bytes/frame",
AudioSampleCodec.CODEC_S8 : b"Half-frame PCM",
AudioSampleCodec.CODEC_S16_INMEMORY : b"Uncompressed",
AudioSampleCodec.CODEC_SMALL_ADPCM : b"Nintendo/SGI VADPCM 5-bytes/frame",
AudioSampleCodec.CODEC_REVERB : b"Nintendo Reverb format",
AudioSampleCodec.CODEC_S16 : b"Uncompressed"
}[self.header.codec]
def codec_file_extension_compressed(self):
ext = {
AudioSampleCodec.CODEC_ADPCM : ".aifc",
AudioSampleCodec.CODEC_S8 : None,
AudioSampleCodec.CODEC_S16_INMEMORY : None,
AudioSampleCodec.CODEC_SMALL_ADPCM : ".half.aifc",
AudioSampleCodec.CODEC_REVERB : None,
AudioSampleCodec.CODEC_S16 : ".aiff",
}[self.header.codec]
assert ext is not None
return ext
def codec_file_extension_decompressed(self):
ext = {
AudioSampleCodec.CODEC_ADPCM : ".wav",
AudioSampleCodec.CODEC_S8 : None,
AudioSampleCodec.CODEC_S16_INMEMORY : None,
AudioSampleCodec.CODEC_SMALL_ADPCM : ".half.wav",
AudioSampleCodec.CODEC_REVERB : None,
AudioSampleCodec.CODEC_S16 : ".wav",
}[self.header.codec]
assert ext is not None
return ext
def base_note_number(self):
return note_z64_to_midi(self.base_note)
def resolve_basenote_rate(self, extraction_sample_info : Optional[Dict[str,str]]):
assert len(self.notes_rates) != 0
# rate_3ds = None
# if SAMPLERATES_3DS is not None:
# rate_3ds = SAMPLERATES_3DS[self.bank_num].get(i, None)
tuning_map = {}
def update_tuning_map(tuning, rate, note):
tuning_map.update({ tuning : (rate, note) })
# check
tuning_bits = f32_to_u32(tuning)
ntuning = recalc_tuning(rate, note)
assert ntuning == tuning or tuning_bits in BAD_FLOATS, \
f"Got: {ntuning}(0x{f32_to_u32(ntuning):X}), Expected: {tuning}(0x{f32_to_u32(tuning):X})"
if len(self.notes_rates) == 1:
# only need to match one tuning value
notes_rates,tuning = self.notes_rates.pop()
# if rate_3ds is not None and rate_3ds not in [rate for _,rate in notes_rates]:
# print(f"NONMATCHING: 3DS={rate_3ds} N64={[rate for _,rate in notes_rates]}")
if len(notes_rates) == 1:
# only one possible combination of samplerate and basenote
final_note,final_rate = notes_rates[0]
else:
# Several possible combinations of samplerate and basenote that result in the same tuning value,
# choose just one by arbitrary ranking
final_rate,(final_note,) = rank_rates_notes(tuple((rate, (note,)) for note,rate in notes_rates))
update_tuning_map(tuning, final_rate, final_note)
else:
# need to match for multiple tuning values
# produce a list of samplerates that are common to all entries, the correct samplerate is most likely in
# this intersection
rate_cands = set.intersection(*(set(rate for note,rate in nrs) for nrs,t in self.notes_rates))
# if rate_3ds is not None and rate_3ds not in rate_cands:
# print(f"NONMATCHING: 3DS={rate_3ds} N64={rate_cands}")
if len(rate_cands) == 0:
# no common samplerates, arbitrarily rank each separately to get best candidate for each tuning, then
# rank those again to find the one we should associate with the sample itself
finalists = []
for all_layout,tuning in self.notes_rates:
best_rate,(best_note,) = rank_rates_notes([(rate, (note,)) for note, rate in all_layout])
update_tuning_map(tuning, best_rate, best_note)
finalists.append((best_rate,(best_note,)))
final_rate,(final_note,) = rank_rates_notes(finalists)
else:
tunings = [t for nrs,t in self.notes_rates]
# Found one or more common samplerate, select just one by arbitrary ranking
# build a map from samplerate -> note value for each entry
dicts = tuple(dict((rate,note) for note,rate in nrs) for nrs,t in self.notes_rates)
# list of tuples (rate, (notes for each entry)) for each candidate samplerate
final_rate,final_notes = rank_rates_notes([(rate, tuple(D[rate] for D in dicts)) for rate in rate_cands])
finalists = []
# map the result of this stage to the tunings
for tuning,note in zip(tunings,final_notes):
update_tuning_map(tuning, final_rate, note)
finalists.append((final_rate,(note,)))
# select best note to go in the sample
final_rate,(final_note,) = rank_rates_notes(finalists)
if extraction_sample_info is not None:
assert "SampleRate" in extraction_sample_info and "BaseNote" in extraction_sample_info
final_rate = int(extraction_sample_info["SampleRate"])
final_note = pitch_names.index(extraction_sample_info["BaseNote"])
# print(" ",len(FINAL_NOTES_RATES), FINAL_NOTES_RATES)
# if rate_3ds is not None and len(FINAL_NOTES_RATES) == 1:
# print(f"3DS : {rate_3ds} N64 : {FINAL_NOTES_RATES[0][0]}")
# if rate_3ds != FINAL_NOTES_RATES[0][0]:
# print("NONMATCHING AFTER RANKING")
# else:
# print("No 3DS comparison")
self.notes_rates = None
self.sample_rate = final_rate
self.base_note = final_note
self.tuning_map = tuning_map
def to_file(self, outpath : str):
assert self.sample_rate is not None and self.base_note is not None,\
f"The sample must have been assigned a samplerate and basenote to be extracted to AIFC: [0x{self.start:X}:0x{self.end:X}]\n{self.header}"
NUM_CHANNELS = 1
# Note this computes the correct number of frames, The original sdk tool vadpcm_enc contained a bug where aifc
# files would sometimes be 1-off in the reported number of frames. We do not reproduce this.
num_frames = (len(self.data) // self.frame_size()) * PCM16_SAMPLE_SIZE
aifc = AIFCFile()
aifc.add_section(b"COMM",
struct.pack(">hIh", NUM_CHANNELS, num_frames, PCM16_SAMPLE_SIZE)
+ AIFCFile.serialize_f80(self.sample_rate)
+ self.codec_id()
+ AIFCFile.pstring(self.codec_name())
)
aifc.add_section(b"INST",
struct.pack(">bbbbbbhhhhhhh",
self.base_note_number(),
0, # detune
# TODO fill in the rest? with what?
0, # lownote
0, # highnote
0, # lowvel
0, # highvel
0, # gain
0,0,0, # sustain(mode,start,end)
0,0,0, # release(mode,start,end)
)
)
aifc.add_custom_section(b"VADPCMCODES", self.book.serialize())
if self.loop.count != 0:
# We don't need to write a VADPCMLOOPS chunk if the count is 0 as we can represent these by the absence of
# a VADPCMLOOPS chunk; a count of 0 indicates the sample has no loop, the start and end of a loop with
# count=0 are always 0 and the end of the sample respectively.
aifc.add_custom_section(b"VADPCMLOOPS", self.loop.serialize())
aifc.add_section(b"SSND", struct.pack(">II", 0, 0) + bytes(self.data))
aifc.commit(outpath)
def to_asm(self, name):
out = f"# {name} [0x{self.start:X}:0x{self.end:X}](0x{self.end-self.start:X})\n"
out += "\n"
out += f".global {name}\n"
out += f"{name}:\n"
out += f".global {name}_OFF\n"
out += f".set {name}_OFF, . - $start\n"
out += "\n"
out += " .byte "
for i,b in enumerate(self.data):
if i != 0 and i % 32 == 0:
out = out[:-2] + "\n .byte "
out += f"0x{b:02X}, "
out = out[:-2] + "\n"
if len(self.padding) == 0 or all(b == 0 for b in self.padding):
out += " .balign 16\n"
else:
out += f"# PADDING\n"
out += " .byte " + ", ".join(f"0x{b:02X}" for b in self.padding) + "\n"
out += "\n"
return out
class AudioTableFile:
"""
Single sample bank in the Audiotable
"""
def __init__(self, bank_num : int, audiotable_seg : memoryview, table_entry : AudioCodeTableEntry,
seg_offset : int, buffer_bug : bool = False,
extraction_desc : Optional[SampleBankExtractionDescription] = None):
self.bank_num = bank_num
self.table_entry : AudioCodeTableEntry = table_entry
self.data = self.table_entry.data(audiotable_seg, seg_offset)
self.buffer_bug = buffer_bug
self.samples_final = None
if extraction_desc is None:
self.file_name = f"SampleBank_{self.bank_num}"
self.name = f"SampleBank_{self.bank_num}"
self.extraction_sample_info_versions = []
self.extraction_sample_info = None
self.extraction_blob_info = None
else:
self.file_name = extraction_desc.file_name
self.name = extraction_desc.name
self.extraction_sample_info_versions = extraction_desc.sample_info_versions
self.extraction_sample_info = extraction_desc.sample_info
self.extraction_blob_info = extraction_desc.blob_info
self.pointer_indices = []
self.samples = {}
self.coverage = set()
def register_ptr(self, index):
self.pointer_indices.append(index)
def dump_bin(self, path):
with open(path, "wb") as outfile:
outfile.write(self.data)
def __len__(self):
return len(self.data)
def add_sample(self, sample_header : SoundFontSample, book : AdpcmBook, loop : AdpcmLoop, tuning : float, ob):
# collect sample data
sample_start = sample_header.sample_addr
sample_end = sample_header.sample_addr + sample_header.size
sample_end_aligned = align(sample_end, 16)
sample_data = self.data[sample_start:sample_end]
sample_padding = self.data[sample_end:sample_end_aligned]
notes_rates = rate_from_tuning(tuning)
# update coverage
self.coverage.add((sample_start, sample_end_aligned, sample_end))
if sample_start in self.samples:
# if this sample start was already recorded, compare with previous
prev_sample : AudioTableSample = self.samples[sample_start]
# check data integrity, these should not change if the same is the same
assert prev_sample.end == sample_end
assert prev_sample.header.codec == sample_header.codec
assert prev_sample.book == book
assert prev_sample.loop == loop
# add notes/rates candidates
prev_sample.notes_rates.add((notes_rates, tuning))
else:
# if this sample start was not recorded, add it
new_sample = AudioTableSample(sample_start, sample_end, sample_header, sample_data, book, loop, sample_padding)
new_sample.notes_rates.add((notes_rates, tuning))
self.samples[sample_start] = new_sample
def lookup_sample(self, offset : int) -> AudioTableSample:
return self.samples[offset]
def sample_name(self, sample : AudioTableSample, index : int):
if self.extraction_sample_info is not None and index < len(self.extraction_sample_info):
return self.extraction_sample_info[index]["Name"]
return f"SAMPLE_{self.bank_num}_{index}"
def sample_filename(self, sample : AudioTableSample, index : int):
ext = sample.codec_file_extension_compressed()
if self.extraction_sample_info is not None and index < len(self.extraction_sample_info):
return self.extraction_sample_info[index]["FileName"] + ext
npad = int(math.floor(1 + math.log10(len(self.samples)))) if len(self.samples) != 0 else 0
return f"Sample{index:0{npad}}{ext}"
def blob_filename(self, start, end, index):
if self.extraction_blob_info is not None and index < len(self.extraction_blob_info):
return self.extraction_blob_info[index]["Name"]
return f"UNACCOUNTED_{start:X}_{end:X}"
def finalize_samples(self):
self.samples_final = list(sorted(self.samples.values(), key = lambda sample : sample.start))
for i,sample in enumerate(self.samples_final):
sample : AudioTableSample
sample.resolve_basenote_rate(self.extraction_sample_info[i] if self.extraction_sample_info is not None else None)
def finalize_coverage(self, all_sample_banks):
if len(self.coverage) != 0:
# merge ranges if there are any
self.coverage = list(sorted(self.coverage))
merged = [list(self.coverage.pop(0))]
while len(self.coverage) != 0:
next = self.coverage.pop(0)
if merged[-1][1] == next[0]:
merged[-1][1] = next[1]
merged[-1][2] = next[2]
else:
merged.append(list(next))
self.coverage = merged
# check fully covered
if len(self.coverage) == 1 and self.coverage[0][0] == 0 and self.coverage[0][1] == len(self.data):
return # all accounted
# not fully covered, determine ranges of unaccounted data
if len(self.coverage) == 0:
# absolutely nothing is accounted for
unaccounted_ranges = [(0, len(self))]
else:
unaccounted_ranges = []
# deal with gap at the start
if self.coverage[0][0] != 0:
unaccounted_ranges.append((0, self.coverage[0][0]))
# deal with gaps in the middle
for j,cvg in enumerate(self.coverage[:-1]):
start = cvg[1]
end = self.coverage[j + 1][0]
if start != end:
unaccounted_ranges.append((start, end))
# deal with gap at the end
if self.coverage[-1][1] != len(self):
unaccounted_ranges.append((self.coverage[-1][1], len(self)))
# TODO if an unaccounted range is in the extraction xml, trust it before searching other banks
unaccounted_str = "[" + ", ".join(f"(0x{start:06X}, 0x{end:06X})" for start,end in unaccounted_ranges) + "]"
print(f"Sample Bank {self.bank_num} has incomplete coverage. Unaccounted: {unaccounted_str}")
# search other banks for matches
for start,end in unaccounted_ranges:
while start != end:
found = False
for j,bank in enumerate(all_sample_banks):
if not isinstance(bank, AudioTableFile):
# Ignore pointer entries
continue
for sample in bank.samples_final:
sample : AudioTableSample
sample_end = start + len(sample)
sample_end_aligned = align(sample_end, 16)
if self.data[start:sample_end] == sample.data:
print(f" Located match for range [0x{start:X}:0x{sample_end:X}] in bank {j} at 0x{sample.start:X}")
new_sample = sample.clone(start, sample_end, self.data[sample_end:sample_end_aligned])
new_sample.start = start
new_sample.end = sample_end
new_sample.sample_rate = sample.sample_rate
new_sample.base_note = sample.base_note
self.samples_final.append(new_sample)
found = True
start = sample_end_aligned
break
if found:
break
else:
# found no matches, blob it
print(f" No match found in other banks for range [0x{start:X}:0x{end:X}], leaving as binary blob")
self.samples_final.append(AudioTableData(start, end, self.data[start:end]))
break
# Final sort
self.samples_final.sort(key = lambda sample : sample.start)
def assign_names(self):
i = 0
j = 0
for sample in self.samples_final:
if isinstance(sample, AudioTableSample):
sample : AudioTableSample
sample.name = self.sample_name(sample, i)
sample.filename = self.sample_filename(sample, i)
i += 1
else:
sample : AudioTableData
name = self.blob_filename(sample.start, sample.end, j)
sample.name = name
sample.filename = f"{name}.bin"
j += 1
def to_xml(self, base_path):
xml = XMLWriter()
start = {
"Name" : self.name,
"Index" : self.bank_num,
"Medium" : self.table_entry.medium.name,
"CachePolicy" : self.table_entry.cache_policy.name,
}
if self.buffer_bug:
start["BufferBug"] = "true"
xml.write_start_tag("SampleBank", start)
# write pointers
for index in self.pointer_indices:
xml.write_element("Pointer", { "Index" : index })
# write samples/blobs
for sample in self.samples_final:
if isinstance(sample, AudioTableSample):
sample : AudioTableSample
xml.write_element("Sample", {
"Name" : sample.name,
"Path" : f"$(BUILD_DIR)/{base_path}/{sample.filename}",
})
else:
sample : AudioTableData
xml.write_element("Blob", {
"Name" : sample.name,
"Path" : f"$(BUILD_DIR)/{base_path}/{sample.filename}",
})
xml.write_end_tag()
return str(xml)
def write_extraction_xml(self, path):
xml = XMLWriter()
xml.write_comment("This file is only for extraction of vanilla data. For other purposes see assets/audio/samplebanks/")
xml.write_start_tag("SampleBank", {
"Name" : self.name,
"Index" : self.bank_num,
})
# Write elements from the old xml version verbatim
i = 0
for entry_name,entry_attrs,in_version in self.extraction_sample_info_versions:
xml.write_element(entry_name, entry_attrs)
i += in_version
# Write any new elements
for sample in self.samples_final[i:]:
if isinstance(sample, AudioTableSample):
sample : AudioTableSample
attrs = {
"Name" : sample.name,
"FileName" : sample.filename.replace(sample.codec_file_extension_compressed(), ""),
"SampleRate" : sample.sample_rate,
"BaseNote" : pitch_names[sample.base_note],
}
xml.write_element("Sample", attrs)
else:
sample : AudioTableData
attrs = {
"Name" : sample.name,
}
xml.write_element("Blob", attrs)
xml.write_end_tag()
with open(path, "w") as outfile:
outfile.write(str(xml))
def write_s_file(self, name, path):
with open(path, "w") as outfile:
out = ".rdata\n"
out += "\n"
out += ".balign 16\n"
out += "\n"
out += f".global {name}\n"
out += f"{name}_Start:\n"
out += "$start:\n"
out += "\n"
outfile.write(out)
i = 0
for sample in self.samples:
if isinstance(sample, AudioTableSample):
sample : AudioTableSample
outfile.write(sample.to_asm(self.sample_name(i)))
i += 1
else:
sample : AudioTableData
outfile.write(sample.to_asm("__UNACCOUNTED__"))