mirror of https://github.com/ACreTeam/ac-decomp
1243 lines
65 KiB
Python
1243 lines
65 KiB
Python
import argparse
|
|
import struct
|
|
import re
|
|
from typing import List, Dict, Any, Set, Tuple
|
|
|
|
# Argument types
|
|
ARG_U8 = "u8"
|
|
ARG_S8 = "s8"
|
|
ARG_U16 = "u16"
|
|
ARG_S16 = "s16"
|
|
ARG_VARLEN = "varlen"
|
|
ARG_NOTE = "note"
|
|
ARG_ABS_ADDRESS = "abs_address"
|
|
ARG_REL_ADDRESS = "rel_address"
|
|
ARG_DATA_ADDRESS = "data_address"
|
|
|
|
class MMLCommand:
|
|
"""Represents a single MML command definition."""
|
|
def __init__(self, name: str, opcode: int, args: List[str] = None, context: str = "unknown"):
|
|
self.name = name
|
|
self.opcode = opcode
|
|
self.args = args if args else []
|
|
self.context = context
|
|
|
|
def __repr__(self):
|
|
return f'MMLCommand(name="{self.name}", opcode=0x{self.opcode:X}, args={self.args}, context="{self.context}")'
|
|
|
|
# ==============================================================================
|
|
# STATIC COMMAND DEFINITIONS
|
|
# ==============================================================================
|
|
# This data is generated from parsing audiocommon.h and track.c
|
|
# SCOM_TABLE mapping for subtrack commands >= 0xA0 (index = opcode - 0xA0)
|
|
# Format: (arg0_type, arg1_type, arg2_type, ...) where types are 'u8' or 's16'
|
|
SCOM_TABLE_ARGS = {
|
|
0xA0: ('s16',), 0xA1: (), 0xA2: ('s16',), 0xA3: (), 0xA4: ('u8',), 0xA5: (), 0xA6: ('u8', 's16',), 0xA7: ('u8',),
|
|
0xA8: (), 0xA9: (), 0xAA: (), 0xAB: (), 0xAC: (), 0xAD: (), 0xAE: (), 0xAF: (),
|
|
0xB0: ('s16',), 0xB1: (), 0xB2: ('s16',), 0xB3: ('u8',), 0xB4: (), 0xB5: (), 0xB6: (), 0xB7: ('s16',),
|
|
0xB8: ('u8',), 0xB9: ('u8',), 0xBA: ('u8',), 0xBB: ('u8', 's16',), 0xBC: ('s16',), 0xBD: ('s16',), 0xBE: ('u8',), 0xBF: (),
|
|
0xC0: (), 0xC1: ('u8',), 0xC2: ('abs_address',), 0xC3: (), 0xC4: (), 0xC5: (), 0xC6: ('u8',), 0xC7: ('u8', 's16',),
|
|
0xC8: ('s8',), 0xC9: ('u8',), 0xCA: ('u8',), 0xCB: ('s16',), 0xCC: ('u8',), 0xCD: ('u8',), 0xCE: ('s16',), 0xCF: ('s16',),
|
|
0xD0: ('u8',), 0xD1: ('u8',), 0xD2: ('u8',), 0xD3: ('u8',), 0xD4: ('u8',), 0xD5: ('u8',), 0xD6: ('u8',), 0xD7: ('u8',),
|
|
0xD8: ('u8',), 0xD9: ('u8',), 0xDA: ('s16',), 0xDB: ('u8',), 0xDC: ('u8',), 0xDD: ('u8',), 0xDE: ('s16',), 0xDF: ('u8',),
|
|
0xE0: ('u8',), 0xE1: ('u8', 'u8', 'u8'), 0xE2: ('u8', 'u8', 'u8'), 0xE3: ('u8',), 0xE4: (), 0xE5: ('u8',), 0xE6: ('u8',),
|
|
0xE7: ('s16',), 0xE8: ('u8', 'u8', 'u8'), 0xE9: ('u8',), 0xEA: (), 0xEB: ('u8', 'u8',), 0xEC: (), 0xED: ('u8',), 0xEE: ('u8',),
|
|
0xEF: ('s16', 'u8',), 0xF0: (), 0xF1: ('u8',),
|
|
# Common commands (F2-FF use Convert_Com which uses same table)
|
|
0xF2: ('u8',), 0xF3: ('u8',), 0xF4: ('u8',), 0xF5: ('s16',), 0xF6: (), 0xF7: (), 0xF8: ('u8',),
|
|
0xF9: ('s16',), 0xFA: ('s16',), 0xFB: ('s16',), 0xFC: ('s16',), 0xFD: (), 0xFE: (), 0xFF: (),
|
|
}
|
|
|
|
def get_args_for_subtrack_cmd(opcode):
|
|
"""Get argument types for a subtrack command based on SCOM_TABLE."""
|
|
if opcode >= 0xA0:
|
|
# SCOM_TABLE_ARGS uses absolute opcodes as keys, not indices
|
|
return list(SCOM_TABLE_ARGS.get(opcode, ()))
|
|
# Commands < 0xA0 have fixed arguments based on code analysis
|
|
if 0x00 <= opcode <= 0x0F: # delay commands
|
|
return []
|
|
elif 0x10 <= opcode <= 0x1F: # voiceload/synthload
|
|
return []
|
|
elif 0x20 <= opcode <= 0x2F: # start subtrack
|
|
return ['abs_address']
|
|
elif 0x30 <= opcode <= 0x3F: # write subtrack port
|
|
return ['u8']
|
|
elif 0x40 <= opcode <= 0x4F: # read subtrack port
|
|
return ['u8']
|
|
elif 0x50 <= opcode <= 0x57: # macro subtract port
|
|
return []
|
|
elif 0x60 <= opcode <= 0x67: # macro read port
|
|
return []
|
|
elif 0x70 <= opcode <= 0x77: # port write macro reg
|
|
return []
|
|
elif 0x78 <= opcode <= 0x7B: # note start
|
|
return ['s16']
|
|
elif 0x80 <= opcode <= 0x83: # read note finished
|
|
return []
|
|
elif 0x88 <= opcode <= 0x8B: # note set pc
|
|
return ['abs_address']
|
|
elif 0x90 <= opcode <= 0x93: # note stop
|
|
return []
|
|
elif 0x98 <= opcode <= 0x9B: # note start dyntbl
|
|
return []
|
|
return []
|
|
|
|
COMMAND_MAPS = {
|
|
'group': {
|
|
# 0x00-0x0F: macro value load subtrack disabled
|
|
**{i: MMLCommand(name=f"grp_macro_value_load_subtrack{i}_disabled",
|
|
opcode=i, args=[], context="group") for i in range(0x00, 0x10)},
|
|
|
|
# 0x40-0x4F: disable subtrack
|
|
**{i: MMLCommand(name=f"grp_disable_subtrack{i - 0x40}",
|
|
opcode=i, args=[], context="group") for i in range(0x40, 0x50)},
|
|
|
|
# 0x50-0x57: macro value subtract from port
|
|
**{i: MMLCommand(name=f"grp_macro_value_subtract_by_port{i - 0x50}",
|
|
opcode=i, args=[], context="group") for i in range(0x50, 0x58)},
|
|
|
|
# 0x60-0x67: async load
|
|
**{i: MMLCommand(name=f"grp_async_load_port{i - 0x60}",
|
|
opcode=i, args=['u8', 'u8'], context="group") for i in range(0x60, 0x68)},
|
|
|
|
# 0x70-0x77: macro value write port
|
|
**{i: MMLCommand(name=f"grp_macro_value_write_port{i - 0x70}",
|
|
opcode=i, args=[], context="group") for i in range(0x70, 0x78)},
|
|
|
|
# 0x80-0x87: macro value read port
|
|
**{i: MMLCommand(name=f"grp_macro_value_read_port{i - 0x80}",
|
|
opcode=i, args=[], context="group") for i in range(0x80, 0x88)},
|
|
|
|
# 0x90-0x9F: start subtrack (absolute address)
|
|
**{i: MMLCommand(name=f"grp_start_subtrack{i - 0x90}",
|
|
opcode=i, args=['abs_address'], context="group") for i in range(0x90, 0xA0)},
|
|
|
|
# 0xA0-0xAF: start relative subtrack (reads s16 for relative offset)
|
|
**{i: MMLCommand(name=f"grp_start_relative_subtrack{i - 0xA0}",
|
|
opcode=i, args=['s16'], context="group") for i in range(0xA0, 0xB0)},
|
|
|
|
# 0xB0-0xB7: seq load (reads u8 seq_id, u16 data address)
|
|
**{i: MMLCommand(name=f"grp_seq_load_port{i - 0xB0}",
|
|
opcode=i, args=['u8', 'data_address'], context="group") for i in range(0xB0, 0xB8)},
|
|
|
|
# 0xBE: callback
|
|
0xBE: MMLCommand(name="grp_callback", opcode=0xBE, args=['u8'], context="group"),
|
|
|
|
# 0xC1: c1
|
|
0xC1: MMLCommand(name="grp_c1", opcode=0xC1, args=[], context="group"),
|
|
|
|
# 0xC2: dynamic branch
|
|
0xC2: MMLCommand(name="grp_dynamic_branch", opcode=0xC2, args=['abs_address'], context="group"),
|
|
|
|
# 0xC3: mute subtracks
|
|
0xC3: MMLCommand(name="grp_mute_subtracks", opcode=0xC3, args=['abs_address'], context="group"),
|
|
|
|
# 0xC4: start seq
|
|
0xC4: MMLCommand(name="grp_start_seq", opcode=0xC4, args=['u8', 'u8'], context="group"),
|
|
|
|
# 0xC5: update counter
|
|
0xC5: MMLCommand(name="grp_update_counter", opcode=0xC5, args=['u16'], context="group"),
|
|
|
|
# 0xC6: stop seq
|
|
0xC6: MMLCommand(name="grp_stop_seq", opcode=0xC6, args=[], context="group"),
|
|
|
|
# 0xC7: macro value store
|
|
0xC7: MMLCommand(name="grp_macro_value_store", opcode=0xC7, args=['u8', 'abs_address'], context="group"),
|
|
|
|
# 0xC8: macro value subtract
|
|
0xC8: MMLCommand(name="grp_macro_value_subtract", opcode=0xC8, args=['u8'], context="group"),
|
|
|
|
# 0xC9: macro value bit and
|
|
0xC9: MMLCommand(name="grp_macro_value_bit_and", opcode=0xC9, args=['u8'], context="group"),
|
|
|
|
# 0xCA: ca
|
|
0xCA: MMLCommand(name="grp_ca", opcode=0xCA, args=[], context="group"),
|
|
|
|
# 0xCB: cb
|
|
0xCB: MMLCommand(name="grp_cb", opcode=0xCB, args=[], context="group"),
|
|
|
|
# 0xCC: macro value load
|
|
0xCC: MMLCommand(name="grp_macro_value_load", opcode=0xCC, args=['u8'], context="group"),
|
|
|
|
# 0xCD: dyn tbl call
|
|
0xCD: MMLCommand(name="grp_dyn_tbl_call", opcode=0xCD, args=['abs_address'], context="group"),
|
|
|
|
# 0xCE: macro value random
|
|
0xCE: MMLCommand(name="grp_macro_value_random", opcode=0xCE, args=['u8'], context="group"),
|
|
|
|
# 0xCF: cf
|
|
0xCF: MMLCommand(name="grp_cf", opcode=0xCF, args=[], context="group"),
|
|
|
|
# 0xD0: set note alloc policy
|
|
0xD0: MMLCommand(name="grp_set_note_alloc_policy", opcode=0xD0, args=['u8'], context="group"),
|
|
|
|
# 0xD1: set short note gate time tbl
|
|
0xD1: MMLCommand(name="grp_set_short_note_gate_time_tbl", opcode=0xD1, args=['data_address'], context="group"),
|
|
|
|
# 0xD2: set short note velocity tbl
|
|
0xD2: MMLCommand(name="grp_set_short_note_velocity_tbl", opcode=0xD2, args=['data_address'], context="group"),
|
|
|
|
# 0xD3: set mute behavior
|
|
0xD3: MMLCommand(name="grp_set_mute_behavior", opcode=0xD3, args=['u8'], context="group"),
|
|
|
|
# 0xD4: mute
|
|
0xD4: MMLCommand(name="grp_mute", opcode=0xD4, args=[], context="group"),
|
|
|
|
# 0xD5: set mute scale
|
|
0xD5: MMLCommand(name="grp_set_mute_scale", opcode=0xD5, args=['u8'], context="group"),
|
|
|
|
# 0xD6: disable subtracks
|
|
0xD6: MMLCommand(name="grp_disable_subtracks", opcode=0xD6, args=['u16'], context="group"),
|
|
|
|
# 0xD7: alloc subtracks
|
|
0xD7: MMLCommand(name="grp_alloc_subtracks", opcode=0xD7, args=['u16'], context="group"),
|
|
|
|
# 0xD8: d8
|
|
0xD8: MMLCommand(name="grp_d8", opcode=0xD8, args=[], context="group"),
|
|
|
|
# 0xD9: set volume scale
|
|
0xD9: MMLCommand(name="grp_set_volume_scale", opcode=0xD9, args=['u8'], context="group"),
|
|
|
|
# 0xDA: change volume
|
|
0xDA: MMLCommand(name="grp_change_volume", opcode=0xDA, args=['u8', 'u16'], context="group"),
|
|
|
|
# 0xDB: set volume
|
|
0xDB: MMLCommand(name="grp_set_volume", opcode=0xDB, args=['u8'], context="group"),
|
|
|
|
# 0xDC: set tempo change
|
|
0xDC: MMLCommand(name="grp_set_tempo_change", opcode=0xDC, args=['u8'], context="group"),
|
|
|
|
# 0xDD: set tempo
|
|
0xDD: MMLCommand(name="grp_set_tempo", opcode=0xDD, args=['u8'], context="group"),
|
|
|
|
# 0xDE: transposition relative
|
|
0xDE: MMLCommand(name="grp_transposition_relative", opcode=0xDE, args=['u8'], context="group"),
|
|
|
|
# 0xDF: transposition absolute
|
|
0xDF: MMLCommand(name="grp_transposition_absolute", opcode=0xDF, args=[], context="group"),
|
|
|
|
# 0xEF: ef
|
|
0xEF: MMLCommand(name="grp_ef", opcode=0xEF, args=['u16', 'u8'], context="group"),
|
|
|
|
# 0xF0: release voices
|
|
0xF0: MMLCommand(name="grp_release_voices", opcode=0xF0, args=[], context="group"),
|
|
|
|
# 0xF1: reserve voices
|
|
0xF1: MMLCommand(name="grp_reserve_voices", opcode=0xF1, args=['u8'], context="group"),
|
|
},
|
|
'subtrack': {
|
|
# 0x00-0x0F: delay
|
|
**{i: MMLCommand(name="sub_delay_clear" if i == 0 else f"sub_delay_{i}",
|
|
opcode=i, args=[], context="subtrack") for i in range(0x00, 0x10)},
|
|
|
|
# 0x10-0x17: voiceload
|
|
**{i: MMLCommand(name=f"sub_voiceload_port{i - 0x10}",
|
|
opcode=i, args=[], context="subtrack") for i in range(0x10, 0x18)},
|
|
|
|
# 0x18-0x1F: synthload
|
|
**{i: MMLCommand(name=f"sub_synthload_port{i - 0x18}",
|
|
opcode=i, args=[], context="subtrack") for i in range(0x18, 0x20)},
|
|
|
|
# 0x20-0x2F: start subtrack
|
|
**{i: MMLCommand(name=f"sub_start_subtrack{i - 0x20}",
|
|
opcode=i, args=['abs_address'], context="subtrack") for i in range(0x20, 0x30)},
|
|
|
|
# 0x30-0x3F: write subtrack port
|
|
**{i: MMLCommand(name=f"sub_write_subtrack{i - 0x30}_port",
|
|
opcode=i, args=['u8'], context="subtrack") for i in range(0x30, 0x40)},
|
|
|
|
# 0x40-0x4F: read subtrack port
|
|
**{i: MMLCommand(name=f"sub_read_subtrack{i - 0x40}_port",
|
|
opcode=i, args=['u8'], context="subtrack") for i in range(0x40, 0x50)},
|
|
|
|
# 0x50-0x57: macro subtract port
|
|
**{i: MMLCommand(name=f"sub_macro_subtract_port{i - 0x50}",
|
|
opcode=i, args=[], context="subtrack") for i in range(0x50, 0x58)},
|
|
|
|
# 0x60-0x67: macro read port
|
|
**{i: MMLCommand(name=f"sub_macro_read_port{i - 0x60}",
|
|
opcode=i, args=[], context="subtrack") for i in range(0x60, 0x68)},
|
|
|
|
# 0x70-0x77: port write macro reg
|
|
**{i: MMLCommand(name=f"sub_port{i - 0x70}_write_macro_reg",
|
|
opcode=i, args=[], context="subtrack") for i in range(0x70, 0x78)},
|
|
|
|
# 0x78-0x7B: note start (all read s16 relative offset)
|
|
**{i: MMLCommand(name=f"sub_note{i - 0x78}_start",
|
|
opcode=i, args=['s16'], context="subtrack") for i in range(0x78, 0x7C)},
|
|
|
|
# 0x80-0x83: read note finished
|
|
**{i: MMLCommand(name=f"sub_read_note{i - 0x80}_finished",
|
|
opcode=i, args=[], context="subtrack") for i in range(0x80, 0x84)},
|
|
|
|
# 0x88-0x8B: note set pc
|
|
**{i: MMLCommand(name=f"sub_note{i - 0x88}_set_pc",
|
|
opcode=i, args=['abs_address'], context="subtrack") for i in range(0x88, 0x8C)},
|
|
|
|
# 0x90-0x97: note stop
|
|
**{i: MMLCommand(name=f"sub_note{i - 0x90}_stop",
|
|
opcode=i, args=[], context="subtrack") for i in range(0x90, 0x97)},
|
|
|
|
# 0x98-0x9B: note start dyntbl
|
|
**{i: MMLCommand(name=f"sub_note{i - 0x98}_start_dyntbl",
|
|
opcode=i, args=[], context="subtrack") for i in range(0x98, 0x9C)},
|
|
|
|
# 0xA0-0xFF: Use SCOM_TABLE
|
|
# Map of opcode to command name
|
|
**{i: MMLCommand(name={
|
|
0xA0: "sub_macro_load_from_sfx_state", 0xA1: "sub_macro_load_from_sfx_state_dynval",
|
|
0xA2: "sub_sfx_state_set", 0xA3: "sub_sfx_state_set_dynval", 0xA4: "sub_surround_effect_idx_set",
|
|
0xA5: "sub_macro_add_subtrack_idx", 0xA6: "sub_write_goup_seq_offset_by_subtrack",
|
|
0xA7: "sub_macro_bit_mod", 0xA8: "sub_dynval_special", 0xB0: "sub_set_filter",
|
|
0xB1: "sub_clear_filter", 0xB2: "sub_load_dynval_from_group_seq", 0xB3: "sub_load_filter",
|
|
0xB4: "sub_set_dyntbl_from_group_seq", 0xB5: "sub_load_dynval_from_dyntbl",
|
|
0xB6: "sub_macro_load_from_dyntbl", 0xB7: "sub_random_dynval", 0xB8: "sub_macro_random_val",
|
|
0xB9: "sub_set_vel_random_variance", 0xBA: "sub_set_gate_time_random_variance",
|
|
0xBB: "sub_set_comb_filter_size_gain", 0xBC: "sub_add_dynval", 0xBD: "sub_set_sample_start_pos",
|
|
0xBE: "sub_macro_set_from_callback", 0xC1: "sub_voice_set", 0xC2: "sub_set_dyntbl",
|
|
0xC3: "sub_large_note_on", 0xC4: "sub_large_note_off", 0xC5: "sub_jmp_dyntbl",
|
|
0xC6: "sub_set_instrument_bank", 0xC7: "sub_write_group_seq", 0xC8: "sub_macro_subtract",
|
|
0xC9: "sub_macro_and", 0xCA: "sub_set_mute_flags", 0xCB: "sub_macro_load_from_group_seq",
|
|
0xCC: "sub_macro_set", 0xCD: "sub_disable_subtrack", 0xCE: "sub_set_dynval",
|
|
0xCF: "sub_write_dynval_to_group_seq", 0xD0: "sub_stereo_phase_set",
|
|
0xD1: "sub_set_note_alloc_policy", 0xD2: "sub_set_sustain", 0xD3: "sub_large_bend_pitch",
|
|
0xD4: "sub_set_reverb_vol", 0xD7: "sub_set_vibrato_rate", 0xD8: "sub_set_vibrato_depth",
|
|
0xD9: "sub_set_decay_idx", 0xDA: "sub_set_envelope", 0xDB: "sub_set_transposition",
|
|
0xDC: "sub_set_pan_weight", 0xDD: "sub_set_pan", 0xDE: "sub_set_freq_scale",
|
|
0xDF: "sub_set_vol", 0xE0: "sub_set_vol_scale", 0xE1: "sub_set_vibrato_rate_linear",
|
|
0xE2: "sub_set_vibrato_depth_linear", 0xE3: "sub_set_vibrato_delay", 0xE4: "sub_dyntbl_call",
|
|
0xE5: "sub_set_reverb_idx", 0xE6: "sub_set_book_ofs", 0xE7: "sub_set_env_params_from_group_seq",
|
|
0xE8: "sub_set_env_params", 0xE9: "sub_set_priority", 0xEA: "sub_stop",
|
|
0xEB: "sub_init_instruments", 0xEC: "sub_reset_vibrato", 0xED: "sub_set_gain",
|
|
0xEE: "sub_small_bend_pitch", 0xF0: "sub_dealloc_voices", 0xF1: "sub_alloc_voices",
|
|
}.get(i, f"sub_unknown_{i:02X}"), opcode=i, args=get_args_for_subtrack_cmd(i), context="subtrack")
|
|
for i in range(0xA0, 0x100)},
|
|
},
|
|
'note': {
|
|
# 0xC0: mute
|
|
0xC0: MMLCommand(name="note_mute", opcode=0xC0, args=['varlen'], context="note"),
|
|
|
|
# 0xC1: set velocity sq
|
|
0xC1: MMLCommand(name="note_set_velocity_sq", opcode=0xC1, args=['u8'], context="note"),
|
|
|
|
# 0xC2: set transposition
|
|
0xC2: MMLCommand(name="note_set_transposition", opcode=0xC2, args=['u8'], context="note"),
|
|
|
|
# 0xC3: set short note default delay
|
|
0xC3: MMLCommand(name="note_set_short_note_default_delay", opcode=0xC3, args=['varlen'], context="note"),
|
|
|
|
# 0xC4: continuous on
|
|
0xC4: MMLCommand(name="note_continuous_on", opcode=0xC4, args=[], context="note"),
|
|
|
|
# 0xC5: continuous off
|
|
0xC5: MMLCommand(name="note_continuous_off", opcode=0xC5, args=[], context="note"),
|
|
|
|
# 0xC6: set instrument
|
|
0xC6: MMLCommand(name="note_set_instrument", opcode=0xC6, args=['u8'], context="note"),
|
|
|
|
# 0xC7: enable sweep
|
|
0xC7: MMLCommand(name="note_enable_sweep", opcode=0xC7, args=['u8', 'u8', 'varlen'], context="note"),
|
|
|
|
# 0xC8: disable sweep
|
|
0xC8: MMLCommand(name="note_disable_sweep", opcode=0xC8, args=[], context="note"),
|
|
|
|
# 0xC9: set gate time
|
|
0xC9: MMLCommand(name="note_set_gate_time", opcode=0xC9, args=['u8'], context="note"),
|
|
|
|
# 0xCA: set pan
|
|
0xCA: MMLCommand(name="note_set_pan", opcode=0xCA, args=['u8'], context="note"),
|
|
|
|
# 0xCB: set adsr envelope decay idx
|
|
0xCB: MMLCommand(name="note_set_adsr_envelope_decay_idx", opcode=0xCB, args=['data_address', 'u8'], context="note"),
|
|
|
|
# 0xCC: ignore drum pan
|
|
0xCC: MMLCommand(name="note_ignore_drum_pan", opcode=0xCC, args=[], context="note"),
|
|
|
|
# 0xCD: set stereo phase
|
|
0xCD: MMLCommand(name="note_set_stereo_phase", opcode=0xCD, args=['u8'], context="note"),
|
|
|
|
# 0xCE: set bend
|
|
0xCE: MMLCommand(name="note_set_bend", opcode=0xCE, args=['u8'], context="note"),
|
|
|
|
# 0xCF: set adsr decay idx
|
|
0xCF: MMLCommand(name="note_set_adsr_decay_idx", opcode=0xCF, args=['u8'], context="note"),
|
|
|
|
# 0xD0-0xDF: short note velocity sq
|
|
**{i: MMLCommand(name=f"note_short_note_velocity_sq{i - 0xD0}",
|
|
opcode=i, args=[], context="note") for i in range(0xD0, 0xE0)},
|
|
|
|
# 0xE0-0xEF: set short note gate time
|
|
**{i: MMLCommand(name=f"note_set_short_note_gate_time{i - 0xE0}",
|
|
opcode=i, args=[], context="note") for i in range(0xE0, 0xF0)},
|
|
|
|
# 0xF0: disable flags
|
|
0xF0: MMLCommand(name="note_disable_flags", opcode=0xF0, args=['u16'], context="note"),
|
|
|
|
# 0xF1: set surround effect idx
|
|
0xF1: MMLCommand(name="note_set_surround_effect_idx", opcode=0xF1, args=['u8'], context="note"),
|
|
},
|
|
'common': {
|
|
# 0xF2: branch rel not eq zero (reads u8 but treated as s8 relative)
|
|
0xF2: MMLCommand(name="common_branch_rel_not_eq_zero", opcode=0xF2, args=['rel_address'], context="common"),
|
|
|
|
# 0xF3: branch rel eq zero (reads u8 but treated as s8 relative)
|
|
0xF3: MMLCommand(name="common_branch_rel_eq_zero", opcode=0xF3, args=['rel_address'], context="common"),
|
|
|
|
# 0xF4: branch rel (reads u8 but treated as s8 relative)
|
|
0xF4: MMLCommand(name="common_branch_rel", opcode=0xF4, args=['rel_address'], context="common"),
|
|
|
|
# 0xF5: branch abs greq zero (reads s16 but treated as u16 absolute address)
|
|
0xF5: MMLCommand(name="common_branch_abs_greq_zero", opcode=0xF5, args=['abs_address'], context="common"),
|
|
|
|
# 0xF6: break
|
|
0xF6: MMLCommand(name="common_break", opcode=0xF6, args=[], context="common"),
|
|
|
|
# 0xF7: loop end
|
|
0xF7: MMLCommand(name="common_loop_end", opcode=0xF7, args=[], context="common"),
|
|
|
|
# 0xF8: loop (reads u8 loop count)
|
|
0xF8: MMLCommand(name="common_loop", opcode=0xF8, args=['u8'], context="common"),
|
|
|
|
# 0xF9: branch abs not eq zero (reads s16 but treated as u16 absolute address)
|
|
0xF9: MMLCommand(name="common_branch_abs_not_eq_zero", opcode=0xF9, args=['abs_address'], context="common"),
|
|
|
|
# 0xFA: branch abs eq zero (reads s16 but treated as u16 absolute address)
|
|
0xFA: MMLCommand(name="common_branch_abs_eq_zero", opcode=0xFA, args=['abs_address'], context="common"),
|
|
|
|
# 0xFB: branch abs (reads s16 but treated as u16 absolute address)
|
|
0xFB: MMLCommand(name="common_branch_abs", opcode=0xFB, args=['abs_address'], context="common"),
|
|
|
|
# 0xFC: call (reads s16 but treated as u16 absolute address)
|
|
0xFC: MMLCommand(name="common_call", opcode=0xFC, args=['abs_address'], context="common"),
|
|
|
|
# 0xFD: delay n frames (reads varlen)
|
|
0xFD: MMLCommand(name="common_delay_n_frames", opcode=0xFD, args=['varlen'], context="common"),
|
|
|
|
# 0xFE: delay 1 frame
|
|
0xFE: MMLCommand(name="common_delay_1_frame", opcode=0xFE, args=[], context="common"),
|
|
|
|
# 0xFF: stop script
|
|
0xFF: MMLCommand(name="common_stop_script", opcode=0xFF, args=[], context="common"),
|
|
}
|
|
}
|
|
|
|
def get_note_name(note_val: int) -> str:
|
|
"""Converts a MIDI note number to its string representation."""
|
|
if note_val > 127:
|
|
return f"0x{note_val:02X}"
|
|
notes = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]
|
|
note_idx = note_val % 12
|
|
octave = (note_val // 12) - 1
|
|
return f"{notes[note_idx]}{octave}"
|
|
|
|
class MMLDisassembler:
|
|
"""A two-pass disassembler for the Animal Crossing MML format."""
|
|
|
|
def __init__(self, data: bytes, mode: str = "group"):
|
|
self.data = data
|
|
self.mode = mode # Fallback mode for analyzing partial sections (not starting at offset 0)
|
|
# Note: For full sequences starting at offset 0, mode is always "group" and auto-detected
|
|
|
|
# Labels marked by type: group, subtrack, note, or data
|
|
self.labels: Dict[int, Tuple[str, str]] = {} # offset -> (label_name, label_type)
|
|
self.data_labels: Dict[int, Tuple[str, str]] = {} # offset -> (label_name, "data")
|
|
self.reference_map: Dict[int, str] = {}
|
|
self.visited_offsets: Set[int] = set()
|
|
|
|
# Track mode contexts for subtrack and note commands
|
|
# Maps offset -> (mode, context_info)
|
|
# mode: "group", "subtrack", "note"
|
|
# context_info: For subtrack: (subtrack_idx,), For note: (subtrack_idx, note_idx, subtrack_pc)
|
|
self.mode_contexts: Dict[int, Tuple[str, Tuple]] = {}
|
|
# Track subtrack PCs for note start calculations
|
|
self.subtrack_pcs: Dict[int, int] = {} # Maps (subtrack_start_offset, current_pc) -> subtrack_pc
|
|
# Track large_notes flag per subtrack (default False)
|
|
# Maps subtrack_idx -> large_notes (bool)
|
|
self.subtrack_large_notes: Dict[int, bool] = {}
|
|
# Track last dynval address loaded per subtrack for u16 data marking
|
|
# Maps subtrack_idx -> address loaded by sub_load_dynval_from_group_seq
|
|
self.subtrack_last_dynval_addr: Dict[int, int] = {}
|
|
# Optional base address to add to displayed addresses/labels (useful in raw bytes mode)
|
|
self.address_base: int = 0
|
|
|
|
def read_u8(self, offset: int) -> int:
|
|
return self.data[offset]
|
|
|
|
def read_s8(self, offset: int) -> int:
|
|
return struct.unpack_from('>b', self.data, offset)[0]
|
|
|
|
def read_u16(self, offset: int) -> int:
|
|
return struct.unpack_from('>H', self.data, offset)[0]
|
|
|
|
def read_s16(self, offset: int) -> int:
|
|
return struct.unpack_from('>h', self.data, offset)[0]
|
|
|
|
def read_varlen(self, offset: int) -> Tuple[int, int]:
|
|
"""Read varlen value (1-2 bytes, same as Nas_ReadLengthData)."""
|
|
val = self.read_u8(offset)
|
|
if val & 0x80:
|
|
return (((val & 0x7F) << 8) | self.read_u8(offset + 1)), 2
|
|
return val, 1
|
|
|
|
def read_length_data(self, offset: int) -> Tuple[int, int]:
|
|
"""Alias for read_varlen (Nas_ReadLengthData in track.c)."""
|
|
return self.read_varlen(offset)
|
|
|
|
def _get_command(self, opcode: int, mode: str = None) -> MMLCommand:
|
|
"""Get command for given opcode and mode."""
|
|
if mode is None:
|
|
mode = self.mode
|
|
if opcode >= 0xF2:
|
|
return COMMAND_MAPS.get("common", {}).get(opcode)
|
|
command_map = COMMAND_MAPS.get(mode, {})
|
|
return command_map.get(opcode)
|
|
|
|
def _analyze_at_offset(self, start_offset: int, current_mode: str, context_info: Tuple = ()):
|
|
"""Recursively analyze commands starting at given offset with specified mode."""
|
|
from collections import deque
|
|
q = deque([(start_offset, current_mode, context_info)])
|
|
|
|
while q:
|
|
offset, mode, context = q.popleft()
|
|
if offset >= len(self.data) or offset in self.visited_offsets:
|
|
continue
|
|
|
|
self.visited_offsets.add(offset)
|
|
self.mode_contexts[offset] = (mode, context)
|
|
|
|
addr = offset
|
|
opcode = self.read_u8(addr)
|
|
offset_after_opcode = addr + 1
|
|
|
|
command = self._get_command(opcode, mode)
|
|
if not command:
|
|
if mode == "note" and 0x00 <= opcode <= 0xBF: # It's a note (0x00-0xBF)
|
|
# Get large_notes flag for this subtrack
|
|
subtrack_idx = context[0] if context and len(context) > 0 else 0
|
|
large_notes = self.subtrack_large_notes.get(subtrack_idx, False)
|
|
|
|
# Parse note based on large_notes flag
|
|
if large_notes:
|
|
# Large notes format
|
|
if (opcode & 0xC0) == 0x00: # 0x00-0x3F
|
|
delay, delay_size = self.read_length_data(offset_after_opcode)
|
|
next_offset = offset_after_opcode + delay_size + 2 # delay, u8 vel, u8 gate
|
|
elif (opcode & 0xC0) == 0x40: # 0x40-0x7F
|
|
delay, delay_size = self.read_length_data(offset_after_opcode)
|
|
next_offset = offset_after_opcode + delay_size + 1 # delay, u8 vel (gate=0)
|
|
else: # 0x80-0xBF
|
|
next_offset = offset_after_opcode + 2 # u8 vel, u8 gate (delay=last_delay)
|
|
else:
|
|
# Small notes format
|
|
if (opcode & 0xC0) == 0x00: # 0x00-0x3F
|
|
delay, delay_size = self.read_length_data(offset_after_opcode)
|
|
next_offset = offset_after_opcode + delay_size
|
|
else: # 0x40-0x7F or 0x80-0xBF (no extra bytes)
|
|
next_offset = offset_after_opcode
|
|
|
|
if next_offset not in self.visited_offsets and next_offset < len(self.data):
|
|
q.append((next_offset, mode, context))
|
|
continue
|
|
|
|
arg_offset = offset_after_opcode
|
|
arg_size = 0
|
|
subtrack_start_offset = None
|
|
note_start_info = None
|
|
note_set_pc_addr = None # For sub_noteX_set_pc commands
|
|
dynval_addr = None # For sub_load_dynval_from_group_seq command
|
|
|
|
# Check for mode transitions based on opcode ranges
|
|
is_subtrack_start = mode == "group" and (0x90 <= opcode <= 0x9F)
|
|
is_relative_subtrack_start = mode == "group" and (0xA0 <= opcode <= 0xAF)
|
|
is_note_start = mode == "subtrack" and (0x78 <= opcode <= 0x7B)
|
|
is_note_set_pc = mode == "subtrack" and (0x88 <= opcode <= 0x8B)
|
|
is_load_dynval_from_group_seq = mode == "subtrack" and opcode == 0xB2
|
|
is_set_dyntbl_from_group_seq = mode == "subtrack" and opcode == 0xB4
|
|
|
|
# Commands that use abs_address for data (not code)
|
|
# These commands use abs_address to point to data in seq_data:
|
|
# - sub_set_dyntbl (0xC2): sets dynamic table pointer
|
|
# - grp_macro_value_store (0xC7): stores macro value to data
|
|
# - grp_dyn_tbl_call (0xCD): reads data address from table for call
|
|
is_abs_address_data = False
|
|
if command:
|
|
if command.name in ["sub_set_dyntbl", "grp_macro_value_store", "grp_dyn_tbl_call"]:
|
|
is_abs_address_data = True
|
|
|
|
arg_incomplete = False
|
|
for arg_type in command.args:
|
|
size = 0
|
|
target_addr = -1
|
|
is_data = False
|
|
|
|
if arg_type in [ARG_U8, ARG_S8]:
|
|
# Need 1 byte available
|
|
if arg_offset >= len(self.data):
|
|
arg_incomplete = True
|
|
size = 0
|
|
|
|
else:
|
|
size = 1
|
|
elif arg_type in [ARG_U16, ARG_S16]:
|
|
# Need 2 bytes available
|
|
remaining = len(self.data) - arg_offset
|
|
if remaining < 2:
|
|
arg_incomplete = True
|
|
size = max(0, remaining)
|
|
else:
|
|
size = 2
|
|
elif arg_type == ARG_VARLEN:
|
|
# Need at least 1 byte for varlen
|
|
if arg_offset >= len(self.data):
|
|
arg_incomplete = True
|
|
size = 0
|
|
else:
|
|
_, size = self.read_varlen(arg_offset)
|
|
elif arg_type == ARG_ABS_ADDRESS:
|
|
remaining = len(self.data) - arg_offset
|
|
if remaining < 2:
|
|
arg_incomplete = True
|
|
size = max(0, remaining)
|
|
target_addr = -1
|
|
else:
|
|
size = 2
|
|
target_addr = self.read_u16(arg_offset)
|
|
if is_subtrack_start:
|
|
# Switch to subtrack mode at absolute address
|
|
subtrack_idx = opcode - 0x90
|
|
subtrack_start_offset = target_addr
|
|
elif is_note_set_pc:
|
|
# Note set PC command - mark target as note
|
|
note_set_pc_addr = target_addr
|
|
elif is_abs_address_data:
|
|
# This abs_address refers to data, not code
|
|
is_data = True
|
|
elif arg_type == ARG_REL_ADDRESS:
|
|
if arg_offset >= len(self.data):
|
|
arg_incomplete = True
|
|
size = 0
|
|
else:
|
|
size = 1
|
|
rel_offset = self.read_s8(arg_offset)
|
|
# Calculate target address: relative to address AFTER entire instruction
|
|
# Instruction size = 1 (opcode) + arg_size (previous args) + size (this arg)
|
|
instruction_end_addr = addr + 1 + arg_size + size
|
|
target_addr = instruction_end_addr + rel_offset
|
|
elif arg_type == ARG_DATA_ADDRESS:
|
|
remaining = len(self.data) - arg_offset
|
|
if remaining < 2:
|
|
arg_incomplete = True
|
|
size = max(0, remaining)
|
|
target_addr = -1
|
|
else:
|
|
size = 2
|
|
target_addr = self.read_u16(arg_offset)
|
|
is_data = True
|
|
elif arg_type == 's16':
|
|
remaining = len(self.data) - arg_offset
|
|
if remaining < 2:
|
|
arg_incomplete = True
|
|
size = max(0, remaining)
|
|
s16_val = 0
|
|
else:
|
|
size = 2
|
|
s16_val = self.read_s16(arg_offset)
|
|
if is_relative_subtrack_start:
|
|
# Switch to subtrack mode at relative address
|
|
subtrack_idx = opcode - 0xA0
|
|
subtrack_start_offset = addr + 3 + s16_val # cmd + s16 arg + offset
|
|
elif is_note_start:
|
|
# Switch to note mode at subtrack PC + offset
|
|
note_idx = opcode - 0x78
|
|
subtrack_idx = context[0] if context else 0
|
|
# Calculate note start address: current PC + relative offset
|
|
note_start_addr = addr + 3 + s16_val # cmd + s16 arg + offset
|
|
note_start_info = (subtrack_idx, note_idx, note_start_addr)
|
|
elif is_load_dynval_from_group_seq or is_set_dyntbl_from_group_seq:
|
|
# The s16 argument is actually an unsigned absolute address into the sequence script
|
|
# Track.c casts cmdArgs[0] to u16, so read it as u16 directly
|
|
dynval_addr_u16 = self.read_u16(arg_offset)
|
|
dynval_addr = dynval_addr_u16
|
|
|
|
if arg_incomplete:
|
|
# Not enough bytes to parse this argument; advance to end of available data for this cmd
|
|
# and stop parsing further args for this command.
|
|
arg_offset += size
|
|
arg_size += size
|
|
break
|
|
|
|
if target_addr != -1:
|
|
if is_data:
|
|
if target_addr not in self.data_labels:
|
|
self.data_labels[target_addr] = (f"D_{target_addr:04X}", "data")
|
|
else:
|
|
# Don't create labels here for addresses that will be handled by mode transitions
|
|
# (subtrack/note starts/note set PC will create labels with correct types later)
|
|
# Skip if this is an abs_address for subtrack start or note set PC, or s16 for relative subtrack/note start
|
|
skip_label_creation = (arg_type == ARG_ABS_ADDRESS and (is_subtrack_start or is_note_set_pc)) or \
|
|
(arg_type == 's16' and (is_relative_subtrack_start or is_note_start))
|
|
if not skip_label_creation:
|
|
if target_addr not in self.labels:
|
|
self.labels[target_addr] = (f"L_{target_addr:04X}", mode)
|
|
if target_addr not in self.visited_offsets and target_addr < len(self.data):
|
|
# Keep same mode for regular jumps/calls
|
|
q.append((target_addr, mode, context))
|
|
|
|
arg_offset += size
|
|
arg_size += size
|
|
|
|
if arg_incomplete:
|
|
# Stop processing further args for this command
|
|
break
|
|
|
|
# Calculate next offset after this command
|
|
next_offset = addr + 1 + arg_size # opcode + args
|
|
|
|
# Handle mode transitions - add subtrack/note entry points to queue
|
|
if subtrack_start_offset is not None and subtrack_start_offset < len(self.data):
|
|
subtrack_idx = opcode & 0x0F if opcode >= 0x90 else (opcode - 0xA0) & 0x0F
|
|
# Initialize large_notes flag for this subtrack (default False)
|
|
if subtrack_idx not in self.subtrack_large_notes:
|
|
self.subtrack_large_notes[subtrack_idx] = False
|
|
# Create or update label with correct type (subtrack)
|
|
if subtrack_start_offset in self.labels:
|
|
# Update existing label type if it was created as "group" earlier
|
|
label_name, label_type = self.labels[subtrack_start_offset]
|
|
if label_type == "group":
|
|
self.labels[subtrack_start_offset] = (f"L_{subtrack_start_offset:04X}", "subtrack")
|
|
else:
|
|
self.labels[subtrack_start_offset] = (f"L_{subtrack_start_offset:04X}", "subtrack")
|
|
if subtrack_start_offset not in self.visited_offsets:
|
|
q.append((subtrack_start_offset, "subtrack", (subtrack_idx,)))
|
|
|
|
if note_start_info is not None and note_start_info[2] < len(self.data):
|
|
subtrack_idx, note_idx, note_addr = note_start_info
|
|
# Create or update label with correct type (note)
|
|
if note_addr in self.labels:
|
|
# Update existing label type if it was created as "subtrack" or "group" earlier
|
|
label_name, label_type = self.labels[note_addr]
|
|
if label_type in ["group", "subtrack"]:
|
|
self.labels[note_addr] = (f"L_{note_addr:04X}", "note")
|
|
else:
|
|
self.labels[note_addr] = (f"L_{note_addr:04X}", "note")
|
|
if note_addr not in self.visited_offsets:
|
|
q.append((note_addr, "note", (subtrack_idx, note_idx)))
|
|
|
|
# Handle note set PC - mark target address as note and analyze it
|
|
if note_set_pc_addr is not None and note_set_pc_addr < len(self.data):
|
|
note_idx = opcode - 0x88
|
|
subtrack_idx = context[0] if context else 0
|
|
# Create or update label with correct type (note)
|
|
if note_set_pc_addr in self.labels:
|
|
# Update existing label type if it was created as "subtrack" or "group" earlier
|
|
label_name, label_type = self.labels[note_set_pc_addr]
|
|
if label_type in ["group", "subtrack"]:
|
|
self.labels[note_set_pc_addr] = (f"L_{note_set_pc_addr:04X}", "note")
|
|
else:
|
|
self.labels[note_set_pc_addr] = (f"L_{note_set_pc_addr:04X}", "note")
|
|
# Note: note_set_pc sets the PC for the note layer, so we should analyze from that address
|
|
if note_set_pc_addr not in self.visited_offsets:
|
|
q.append((note_set_pc_addr, "note", (subtrack_idx, note_idx)))
|
|
|
|
# Handle dynval address from sub_load_dynval_from_group_seq - mark as data
|
|
# Track the base address for potential u16 data marking when sub_set_dyntbl_from_group_seq follows
|
|
if dynval_addr is not None and dynval_addr < len(self.data):
|
|
# The s16 argument is actually an unsigned absolute address into the sequence script
|
|
# Mark the address as data (it's an offset into the sequence script)
|
|
if dynval_addr not in self.data_labels:
|
|
self.data_labels[dynval_addr] = (f"D_{dynval_addr:04X}", "data")
|
|
|
|
# Track this base address for this subtrack (if we're in subtrack mode)
|
|
# sub_load_dynval_from_group_seq reads from: base_addr + m->value * 2
|
|
# We'll track this to mark u16 data when sub_set_dyntbl_from_group_seq follows
|
|
if mode == "subtrack":
|
|
subtrack_idx = context[0] if context and len(context) > 0 else 0
|
|
self.subtrack_last_dynval_addr[subtrack_idx] = dynval_addr
|
|
|
|
# Handle sub_set_dyntbl_from_group_seq - if it follows sub_load_dynval_from_group_seq,
|
|
# mark the data at the address that would be loaded (assuming m->value = 0 for simplicity)
|
|
# as u16 data. The actual address depends on m->value at runtime, but we can at least
|
|
# mark the base case.
|
|
if is_set_dyntbl_from_group_seq and mode == "subtrack":
|
|
subtrack_idx = context[0] if context and len(context) > 0 else 0
|
|
if subtrack_idx in self.subtrack_last_dynval_addr:
|
|
# This command uses the value loaded by sub_load_dynval_from_group_seq
|
|
# The address stored in dynamic_value points to u16 data (array of [u16])
|
|
# sub_load_dynval_from_group_seq reads from: base_addr + m->value * 2
|
|
# For marking purposes, we'll check the base address (m->value = 0 case)
|
|
base_addr = self.subtrack_last_dynval_addr[subtrack_idx]
|
|
if base_addr < len(self.data) - 1: # Need at least 2 bytes for u16
|
|
# Read the u16 value from base_addr (assuming m->value = 0)
|
|
# This is the address that sub_set_dyntbl_from_group_seq would use
|
|
target_data_addr = self.read_u16(base_addr)
|
|
if target_data_addr < len(self.data):
|
|
# Mark as u16 data (update if it was just marked as "data")
|
|
if target_data_addr in self.data_labels:
|
|
label_name, _ = self.data_labels[target_data_addr]
|
|
self.data_labels[target_data_addr] = (label_name, "u16")
|
|
else:
|
|
self.data_labels[target_data_addr] = (f"D_{target_data_addr:04X}", "u16")
|
|
|
|
# Handle large_notes flag changes in subtrack mode
|
|
if mode == "subtrack" and command.name in ["sub_large_note_on", "sub_large_note_off"]:
|
|
subtrack_idx = context[0] if context and len(context) > 0 else 0
|
|
# Note: These command names are counterintuitive - ON sets to False, OFF sets to True
|
|
if command.name == "sub_large_note_on":
|
|
self.subtrack_large_notes[subtrack_idx] = False
|
|
elif command.name == "sub_large_note_off":
|
|
self.subtrack_large_notes[subtrack_idx] = True
|
|
|
|
# Continue sequential execution in same mode if not a terminal command
|
|
# Note: delay commands pause execution but don't stop sequential analysis
|
|
# Unconditional branches stop sequential flow, but we still analyze the target
|
|
terminal_commands = ["common_stop_script", "grp_stop_seq", "sub_stop", "common_branch_abs"]
|
|
if command.name not in terminal_commands and next_offset not in self.visited_offsets and next_offset < len(self.data):
|
|
q.append((next_offset, mode, context))
|
|
|
|
def analysis_pass(self, start_offset: int = 0, start_mode: str = None):
|
|
"""First pass: find all jump/call targets and data references to create labels.
|
|
|
|
Args:
|
|
start_offset: Offset to start analysis from (default: 0)
|
|
start_mode: Mode to start in (default: "group" for sequences, or self.mode for single-section analysis)
|
|
"""
|
|
print("Starting analysis pass...")
|
|
# Default to the disassembler's configured mode unless explicitly overridden
|
|
if start_mode is None:
|
|
start_mode = self.mode
|
|
self._analyze_at_offset(start_offset, start_mode, ())
|
|
|
|
# Mark all unreferenced bytes as data
|
|
self._mark_unreferenced_bytes_as_data()
|
|
|
|
print(f"Analysis complete. Found {len(self.labels)} code labels and {len(self.data_labels)} data labels.")
|
|
|
|
def _mark_unreferenced_bytes_as_data(self):
|
|
"""Mark all unreferenced bytes in the script as data."""
|
|
# Find all unreferenced byte offsets
|
|
all_offsets = set(range(len(self.data)))
|
|
unreferenced_all = sorted(all_offsets - self.visited_offsets)
|
|
|
|
# Filter out offsets that should be treated as code in current context (e.g., notes in note mode)
|
|
unreferenced = []
|
|
for addr in unreferenced_all:
|
|
mode, _ = self.mode_contexts.get(addr, (self.mode, ()))
|
|
opcode = self.read_u8(addr)
|
|
if mode == "note" and (0x00 <= opcode <= 0xBF or opcode == 0xC0):
|
|
continue
|
|
unreferenced.append(addr)
|
|
|
|
# Group consecutive unreferenced bytes into data sections
|
|
if unreferenced:
|
|
data_start = unreferenced[0]
|
|
for i in range(1, len(unreferenced)):
|
|
if unreferenced[i] != unreferenced[i-1] + 1:
|
|
# End of current data section, create label for start
|
|
if data_start not in self.data_labels:
|
|
self.data_labels[data_start] = (f"D_{data_start:04X}", "data")
|
|
data_start = unreferenced[i]
|
|
# Create label for the last data section
|
|
if data_start not in self.data_labels:
|
|
self.data_labels[data_start] = (f"D_{data_start:04X}", "data")
|
|
|
|
def disassembly_pass(self) -> List[str]:
|
|
"""Second pass: generate the disassembled output text."""
|
|
# Process all visited offsets in order, using their stored mode context
|
|
lines = []
|
|
processed_offsets = set() # Track which byte offsets we've already processed
|
|
# Track large_notes flag state per subtrack as we process addresses
|
|
# (reset from analysis pass, will be updated as we encounter flag-changing commands)
|
|
disasm_large_notes = {idx: False for idx in self.subtrack_large_notes.keys()}
|
|
|
|
# Sort all offsets (visited code + unreferenced data) by address
|
|
all_offsets = sorted(set(range(len(self.data))))
|
|
|
|
for addr in all_offsets:
|
|
if addr in processed_offsets:
|
|
continue
|
|
|
|
# Determine mode/context for this address
|
|
mode, context = self.mode_contexts.get(addr, (self.mode, ()))
|
|
opcode = self.read_u8(addr)
|
|
disp = self.address_base + addr
|
|
line_prefix = f"/* {disp:04X} */ "
|
|
offset = addr + 1
|
|
|
|
# Check if this is an unreferenced byte (not in visited_offsets)
|
|
is_unreferenced = addr not in self.visited_offsets
|
|
|
|
# Handle unreferenced bytes as data
|
|
# BUT: if we're in note mode and this looks like a note opcode (0x00-0xBF),
|
|
# disassemble it as a note instead of dumping as data to improve raw/note UX.
|
|
if is_unreferenced and not (mode == "note" and 0x00 <= opcode <= 0xBF):
|
|
# Add label if this is a labeled data address
|
|
if addr in self.data_labels:
|
|
lines.append(f"\nD_{(self.address_base + addr):04X}: ; data")
|
|
|
|
# Output as raw byte
|
|
byte_val = opcode
|
|
lines.append(f"{line_prefix}.db 0x{byte_val:02X}")
|
|
processed_offsets.add(addr)
|
|
continue
|
|
|
|
# Helper formatters for labels/addresses using base
|
|
def _fmt_code_label(a: int, label_type: str) -> str:
|
|
return f"L_{(self.address_base + a):04X}"
|
|
|
|
def _fmt_data_label(a: int) -> str:
|
|
return f"D_{(self.address_base + a):04X}"
|
|
|
|
def _fmt_hex(a: int) -> str:
|
|
return f"0x{(self.address_base + a):04X}"
|
|
|
|
# Add label if this is a labeled address
|
|
if addr in self.labels:
|
|
_stored_name, label_type = self.labels[addr]
|
|
lines.append(f"\n{_fmt_code_label(addr, label_type)}: ; {label_type}")
|
|
if addr in self.data_labels:
|
|
lines.append(f"\n{_fmt_data_label(addr)}: ; data")
|
|
|
|
command = self._get_command(opcode, mode)
|
|
|
|
# Calculate command size
|
|
cmd_size = 1 # opcode byte
|
|
if not command:
|
|
if mode == "note" and 0x00 <= opcode <= 0xBF:
|
|
# Get large_notes flag for this subtrack (use current state from disassembly)
|
|
subtrack_idx = context[0] if context and len(context) > 0 else 0
|
|
large_notes = disasm_large_notes.get(subtrack_idx, False)
|
|
|
|
note_value = opcode & 0x3F # Lower 6 bits (instrument number)
|
|
arg_offset = offset
|
|
|
|
if large_notes:
|
|
# Large notes format
|
|
if (opcode & 0xC0) == 0x00: # 0x00-0x3F
|
|
delay, delay_size = self.read_length_data(arg_offset)
|
|
arg_offset += delay_size
|
|
velocity = self.read_u8(arg_offset)
|
|
arg_offset += 1
|
|
gate = self.read_u8(arg_offset)
|
|
arg_offset += 1
|
|
cmd_size = arg_offset - addr
|
|
lines.append(f"{line_prefix}note {get_note_name(note_value)}, {delay}, {velocity}, {gate}")
|
|
elif (opcode & 0xC0) == 0x40: # 0x40-0x7F
|
|
delay, delay_size = self.read_length_data(arg_offset)
|
|
arg_offset += delay_size
|
|
velocity = self.read_u8(arg_offset)
|
|
arg_offset += 1
|
|
cmd_size = arg_offset - addr
|
|
lines.append(f"{line_prefix}note {get_note_name(note_value)}, {delay}, {velocity}, 0")
|
|
else: # 0x80-0xBF
|
|
velocity = self.read_u8(arg_offset)
|
|
arg_offset += 1
|
|
gate = self.read_u8(arg_offset)
|
|
arg_offset += 1
|
|
cmd_size = arg_offset - addr
|
|
lines.append(f"{line_prefix}note {get_note_name(note_value)}, <last>, {velocity}, {gate}")
|
|
else:
|
|
# Small notes format
|
|
if (opcode & 0xC0) == 0x00: # 0x00-0x3F
|
|
delay, delay_size = self.read_length_data(arg_offset)
|
|
arg_offset += delay_size
|
|
cmd_size = arg_offset - addr
|
|
lines.append(f"{line_prefix}note {get_note_name(note_value)}, {delay}")
|
|
elif (opcode & 0xC0) == 0x40: # 0x40-0x7F
|
|
cmd_size = 1
|
|
lines.append(f"{line_prefix}note {get_note_name(note_value)}, <default>")
|
|
else: # 0x80-0xBF
|
|
cmd_size = 1
|
|
lines.append(f"{line_prefix}note {get_note_name(note_value)}, <last>")
|
|
elif mode == "note" and opcode == 0xC0:
|
|
# Mute command
|
|
delay, delay_size = self.read_length_data(offset)
|
|
cmd_size = 1 + delay_size
|
|
lines.append(f"{line_prefix}note_mute {delay}")
|
|
else:
|
|
lines.append(f"{line_prefix}.db 0x{opcode:02X} ; Unknown Opcode")
|
|
# Mark all bytes of this command as processed
|
|
for i in range(cmd_size):
|
|
processed_offsets.add(addr + i)
|
|
continue
|
|
|
|
arg_values = []
|
|
arg_size = 0
|
|
arg_incomplete = False
|
|
|
|
for arg_type in command.args:
|
|
size = 0
|
|
val = None
|
|
if arg_type == ARG_U8:
|
|
if offset + arg_size >= len(self.data):
|
|
arg_incomplete, val, size = True, "?", 0
|
|
else:
|
|
val, size = self.read_u8(offset + arg_size), 1
|
|
elif arg_type == ARG_S8:
|
|
if offset + arg_size >= len(self.data):
|
|
arg_incomplete, val, size = True, "?", 0
|
|
else:
|
|
val, size = self.read_s8(offset + arg_size), 1
|
|
elif arg_type == ARG_U16:
|
|
if offset + arg_size + 1 >= len(self.data):
|
|
arg_incomplete, val, size = True, "?", max(0, len(self.data) - (offset + arg_size))
|
|
else:
|
|
val, size = self.read_u16(offset + arg_size), 2
|
|
elif arg_type == ARG_S16:
|
|
if offset + arg_size + 1 >= len(self.data):
|
|
arg_incomplete, val, size = True, "?", max(0, len(self.data) - (offset + arg_size))
|
|
else:
|
|
val, size = self.read_s16(offset + arg_size), 2
|
|
elif arg_type == ARG_VARLEN:
|
|
if offset + arg_size >= len(self.data):
|
|
arg_incomplete, val, size = True, "?", 0
|
|
else:
|
|
val, size = self.read_varlen(offset + arg_size)
|
|
elif arg_type == ARG_ABS_ADDRESS:
|
|
if offset + arg_size + 1 >= len(self.data):
|
|
arg_incomplete, val, size = True, "?", max(0, len(self.data) - (offset + arg_size))
|
|
else:
|
|
val, size = self.read_u16(offset + arg_size), 2
|
|
# Check if this is a data address (for commands like sub_set_dyntbl, grp_macro_value_store, grp_dyn_tbl_call)
|
|
is_data_addr = False
|
|
if command and command.name in ["sub_set_dyntbl", "grp_macro_value_store", "grp_dyn_tbl_call"]:
|
|
is_data_addr = True
|
|
|
|
if is_data_addr:
|
|
# Look up data label name if it exists
|
|
if not isinstance(val, str) and val in self.data_labels:
|
|
val = _fmt_data_label(val)
|
|
else:
|
|
val = val if isinstance(val, str) else _fmt_data_label(val if isinstance(val, int) else 0)
|
|
else:
|
|
# Look up code label name if it exists
|
|
if not isinstance(val, str) and val in self.labels:
|
|
_lbl, _lt = self.labels[val]
|
|
val = _fmt_code_label(val, _lt)
|
|
else:
|
|
val = val if isinstance(val, str) else _fmt_hex(val)
|
|
elif arg_type == ARG_REL_ADDRESS:
|
|
if offset + arg_size >= len(self.data):
|
|
arg_incomplete, rel_offset, size = True, 0, 0
|
|
else:
|
|
rel_offset, size = self.read_s8(offset + arg_size), 1
|
|
# Calculate target address: instruction starts at addr, total size will be 1 (opcode) + arg_size (previous args) + size (this arg)
|
|
# Target is relative to the address AFTER the entire instruction
|
|
instruction_end_addr = addr + 1 + arg_size + size
|
|
rel_addr = instruction_end_addr + rel_offset
|
|
# Look up label name if it exists
|
|
if rel_addr in self.labels:
|
|
_label_name, _ltype = self.labels[rel_addr]
|
|
# Show label with relative offset comment
|
|
val = f"{_fmt_code_label(rel_addr, _ltype)} /* rel: {rel_offset:+d} */"
|
|
else:
|
|
# Show absolute address with relative offset comment
|
|
val = f"{_fmt_hex(rel_addr)} /* rel: {rel_offset:+d} */"
|
|
elif arg_type == ARG_DATA_ADDRESS:
|
|
if offset + arg_size + 1 >= len(self.data):
|
|
arg_incomplete, val, size = True, "?", max(0, len(self.data) - (offset + arg_size))
|
|
else:
|
|
val, size = self.read_u16(offset + arg_size), 2
|
|
# Look up data label if it exists
|
|
if not isinstance(val, str) and val in self.data_labels:
|
|
val = _fmt_data_label(val)
|
|
else:
|
|
val = val if isinstance(val, str) else _fmt_hex(val)
|
|
elif arg_type == 's16':
|
|
if offset + arg_size + 1 >= len(self.data):
|
|
arg_incomplete, val, size = True, 0, max(0, len(self.data) - (offset + arg_size))
|
|
else:
|
|
val, size = self.read_s16(offset + arg_size), 2
|
|
# For relative subtrack start or note start, calculate target address
|
|
if mode == "group" and command.name.startswith("grp_start_relative_subtrack"):
|
|
rel_offset = val
|
|
target_addr = addr + 3 + rel_offset
|
|
if target_addr in self.labels:
|
|
_l, _lt = self.labels[target_addr]
|
|
val = _fmt_code_label(target_addr, _lt)
|
|
else:
|
|
val = _fmt_hex(target_addr)
|
|
elif mode == "subtrack" and command.name.startswith("sub_note") and command.name.endswith("_start"):
|
|
rel_offset = val
|
|
target_addr = addr + 3 + rel_offset
|
|
if target_addr in self.labels:
|
|
_l, _lt = self.labels[target_addr]
|
|
val = _fmt_code_label(target_addr, _lt)
|
|
else:
|
|
val = _fmt_hex(target_addr)
|
|
elif mode == "subtrack" and command.name in ["sub_load_dynval_from_group_seq", "sub_set_dyntbl_from_group_seq"]:
|
|
# The s16 argument is actually an unsigned absolute address into the sequence script
|
|
# Track.c casts cmdArgs[0] to u16, so read it as u16 directly
|
|
dynval_addr = self.read_u16(offset + arg_size)
|
|
# Mark the address as data if it's valid
|
|
if dynval_addr < len(self.data) and dynval_addr not in self.data_labels:
|
|
self.data_labels[dynval_addr] = (f"D_{dynval_addr:04X}", "data")
|
|
# Show as data label in output (check after marking)
|
|
if dynval_addr in self.data_labels:
|
|
val = _fmt_data_label(dynval_addr)
|
|
else:
|
|
# Fallback: show as label format even if not in dict yet
|
|
val = _fmt_data_label(dynval_addr)
|
|
|
|
# Format numeric values > 255 as hex
|
|
# Only format if val is still a numeric type (not already a string from label/address formatting)
|
|
if isinstance(val, int):
|
|
# Negative values stay as decimal
|
|
if val < 0:
|
|
val = str(val)
|
|
elif val > 255:
|
|
# Format as hex, determining appropriate width based on value
|
|
if val > 0xFFFF:
|
|
val = f"0x{val:08X}"
|
|
elif val > 0xFF:
|
|
val = f"0x{val:04X}"
|
|
else:
|
|
# Values 0-255 stay as decimal
|
|
val = str(val)
|
|
elif val is None:
|
|
val = "?"
|
|
|
|
arg_values.append(val)
|
|
arg_size += size
|
|
|
|
if arg_incomplete:
|
|
break
|
|
|
|
cmd_size += arg_size
|
|
arg_str = ", ".join(map(str, arg_values))
|
|
lines.append(f"{line_prefix}{command.name:<35} {arg_str}")
|
|
|
|
# Handle large_notes flag changes in subtrack mode during disassembly
|
|
if mode == "subtrack" and command.name in ["sub_large_note_on", "sub_large_note_off"]:
|
|
subtrack_idx = context[0] if context and len(context) > 0 else 0
|
|
# Note: These command names are counterintuitive - ON sets to False, OFF sets to True
|
|
if command.name == "sub_large_note_on":
|
|
disasm_large_notes[subtrack_idx] = False
|
|
elif command.name == "sub_large_note_off":
|
|
disasm_large_notes[subtrack_idx] = True
|
|
|
|
# Mark all bytes of this command as processed
|
|
for i in range(cmd_size):
|
|
processed_offsets.add(addr + i)
|
|
|
|
return lines
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="A two-pass disassembler for Animal Crossing MML.")
|
|
# Either provide an input file or raw bytes
|
|
parser.add_argument("input_file", nargs='?', help="Path to the compiled MML binary file.")
|
|
parser.add_argument("--raw-bytes", dest="raw_bytes", default=None,
|
|
help="Hex byte string to disassemble (e.g. '00 12 3F' or '00123F' or '0x00,0x12,0x3F').")
|
|
parser.add_argument("-o", "--output", help="Path to write the disassembled text file to. (Default: stdout)")
|
|
parser.add_argument("-m", "--mode", choices=['group', 'subtrack', 'note'], default='group',
|
|
help="Initial disassembly mode (default: group). For full sequences starting at offset 0, "
|
|
"mode is auto-detected as 'group' and transitions are tracked automatically. "
|
|
"This parameter is only used when analyzing partial sections not starting at offset 0.")
|
|
parser.add_argument("--addr-base", dest="addr_base", default=None,
|
|
help="Optional base address (hex like 0x500 or decimal) to add to output addresses.")
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Load data from either raw bytes or file
|
|
data = None
|
|
if args.raw_bytes is not None:
|
|
raw = args.raw_bytes
|
|
bytes_out = bytearray()
|
|
# Support a textual .db listing format like: "/* 0517 */ .db 0xEB" per line
|
|
if '.db' in raw:
|
|
import re as _re
|
|
# Extract all .db byte values in order of appearance
|
|
# Matches: .db 0xNN or .db NN
|
|
pattern = _re.compile(r"\.db\s+0x([0-9A-Fa-f]{1,2})|\.db\s+([0-9A-Fa-f]{1,2})")
|
|
found = pattern.findall(raw)
|
|
if not found:
|
|
print("Error: could not parse any .db bytes from provided text")
|
|
return
|
|
for a, b in found:
|
|
hh = a or b
|
|
try:
|
|
val = int(hh, 16)
|
|
except ValueError:
|
|
print(f"Error: invalid .db byte '{hh}'")
|
|
return
|
|
if not (0 <= val <= 0xFF):
|
|
print(f"Error: .db byte out of range (0-255): {val}")
|
|
return
|
|
bytes_out.append(val)
|
|
data = bytes(bytes_out)
|
|
else:
|
|
# Normalize incoming string: remove 0x prefixes, commas, underscores, newlines, and spaces
|
|
tokens = (
|
|
raw.replace(',', ' ').replace('\n', ' ').replace('\r', ' ').replace('_', ' ').split()
|
|
)
|
|
# If the entire string has no spaces and even length, treat as continuous hex
|
|
if len(tokens) == 1 and all(c in '0123456789abcdefABCDEFxX' for c in tokens[0]):
|
|
t = tokens[0]
|
|
if t.startswith('0x') or t.startswith('0X'):
|
|
t = t[2:]
|
|
if len(t) % 2 != 0:
|
|
print("Error: raw-bytes hex string must have even length")
|
|
return
|
|
try:
|
|
bytes_out.extend(bytes.fromhex(t))
|
|
except ValueError:
|
|
print("Error: failed to parse raw-bytes hex string")
|
|
return
|
|
else:
|
|
# Parse each token as a byte (supports '0xNN' or 'NN')
|
|
for tok in tokens:
|
|
tt = tok
|
|
if tt.startswith('0x') or tt.startswith('0X'):
|
|
tt = tt[2:]
|
|
try:
|
|
val = int(tt, 16)
|
|
except ValueError:
|
|
print(f"Error: invalid byte token '{tok}' in --raw-bytes")
|
|
return
|
|
if not (0 <= val <= 0xFF):
|
|
print(f"Error: byte value out of range (0-255): {val}")
|
|
return
|
|
bytes_out.append(val)
|
|
data = bytes(bytes_out)
|
|
else:
|
|
if not args.input_file:
|
|
print("Error: either provide an input_file or --raw-bytes")
|
|
return
|
|
try:
|
|
with open(args.input_file, "rb") as f:
|
|
data = f.read()
|
|
except FileNotFoundError as e:
|
|
print(f"Error: File not found - {e.filename}")
|
|
return
|
|
|
|
# Run the disassembler
|
|
# For full sequences (starting at offset 0), mode is always "group" and auto-detected
|
|
# The mode parameter is only relevant when analyzing partial sections
|
|
disassembler = MMLDisassembler(data, args.mode)
|
|
# Apply address base if provided
|
|
if args.addr_base is not None:
|
|
try:
|
|
if isinstance(args.addr_base, str) and args.addr_base.lower().startswith('0x'):
|
|
disassembler.address_base = int(args.addr_base, 16)
|
|
else:
|
|
disassembler.address_base = int(args.addr_base, 10)
|
|
except ValueError:
|
|
print("Error: invalid --addr-base value; expected hex (0x...) or decimal integer")
|
|
return
|
|
disassembler.analysis_pass()
|
|
output_lines = disassembler.disassembly_pass()
|
|
|
|
output_str = "\n".join(output_lines)
|
|
|
|
if args.output:
|
|
try:
|
|
with open(args.output, "w") as f:
|
|
f.write(output_str)
|
|
print(f"Disassembly complete. Output written to {args.output}")
|
|
except IOError:
|
|
print(f"Error: Could not write to output file {args.output}")
|
|
else:
|
|
print(output_str)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|