From ad361e125d4b0d57b3650e30d377fe1e089366a5 Mon Sep 17 00:00:00 2001
From: Cuyler36 <Cuyler36@users.noreply.github.com>
Date: Thu, 23 Oct 2025 02:52:41 -0400
Subject: [PATCH 1/3] match jaudio_NES/driver Nas_SynthMain

---
 include/jaudio_NES/audiocommon.h         |  19 +-
 include/jaudio_NES/audiostruct.h         |   2 +-
 include/jaudio_NES/driver.h              |   2 +-
 include/jaudio_NES/system.h              |   1 +
 src/static/jaudio_NES/internal/channel.c |   6 +-
 src/static/jaudio_NES/internal/driver.c  | 985 ++++++++++++-----------
 6 files changed, 532 insertions(+), 483 deletions(-)

diff --git a/include/jaudio_NES/audiocommon.h b/include/jaudio_NES/audiocommon.h
index 42c1fa93..0dfa9e9a 100644
--- a/include/jaudio_NES/audiocommon.h
+++ b/include/jaudio_NES/audiocommon.h
@@ -68,6 +68,14 @@ extern "C" {
 #define A_CMD_LOADCACHE     24
 #define A_CMD_EXIT          25
 
+#define aUnkCmd3(pkt, a1, a2, a3)                                       \
+{                                                                       \
+        Acmd *_a = (Acmd *)pkt;                                         \
+                                                                        \
+        _a->words.w0 = _SHIFTL(A_CMD_UNK3, 24, 8) | _SHIFTL(a3, 0, 16);     \
+        _a->words.w1 = _SHIFTL(a1, 16, 16) | _SHIFTL(a2, 0, 16);        \
+}
+
 #define	aHalfCut(pkt, src, dst, len)						\
 {									\
 	Acmd *_a = (Acmd *)pkt;						\
@@ -84,6 +92,14 @@ extern "C" {
 	_a->words.w1 = _SHIFTL(rampL, 16, 16) | _SHIFTL(rampR, 0, 16);		\
 }
 
+#define	aLoadCache(pkt, dst, src, len)						\
+{									\
+	Acmd *_a = (Acmd *)pkt;						\
+									\
+	_a->words.w0 = _SHIFTL(A_CMD_LOADCACHE, 24, 8) | _SHIFTL((len) >> 4, 16, 8) | _SHIFTL(src, 0, 16);    		\
+	_a->words.w1 = (u32)(dst);		\
+}
+
 #define	aLoadBuffer2(pkt, dst, src, len)						\
 {									\
 	Acmd *_a = (Acmd *)pkt;						\
@@ -234,7 +250,8 @@ typedef enum SoundOutputMode {
     /* 0 */ SOUND_OUTPUT_STEREO,
     /* 1 */ SOUND_OUTPUT_HEADSET,
     /* 2 */ SOUND_OUTPUT_SURROUND,
-    /* 3 */ SOUND_OUTPUT_MONO
+    /* 3 */ SOUND_OUTPUT_MONO,
+    /* 4 */ SOUND_OUTPUT_DOLBY_SURROUND
 } SoundOutputMode;
 
 typedef enum SampleCodec {
diff --git a/include/jaudio_NES/audiostruct.h b/include/jaudio_NES/audiostruct.h
index dd3b1bbb..d8a5d7ca 100644
--- a/include/jaudio_NES/audiostruct.h
+++ b/include/jaudio_NES/audiostruct.h
@@ -322,7 +322,7 @@ typedef struct playbackch_ {
     /* 0x54 */ sweep portamento_sweep;
     /* 0x60 */ tmtable vibrato_tmtable;
     /* 0x7C */ s32 _7C;
-    /* 0x80 */ u8 _80;
+    /* 0x80 */ u8 vel_conv_table_idx;
     /* 0x84 */ u32 start_sample_pos;
     /* 0x88 */ u8 _88[0x18];
 } playbackch;
diff --git a/include/jaudio_NES/driver.h b/include/jaudio_NES/driver.h
index 2c4da73f..95171e8f 100644
--- a/include/jaudio_NES/driver.h
+++ b/include/jaudio_NES/driver.h
@@ -19,7 +19,7 @@
 extern Acmd* Nas_smzAudioFrame(Acmd* cmds, s32* processed_cmds, s16* pSamples, s32 nSamples);
 extern Acmd* Nas_DriveRsp(s16* aiBuf, s32 aiBufLen, Acmd* cmd, s32 updateIndex);
 extern Acmd* Nas_SynthMain(s32 chan_id, commonch* common, driverch* driver, s16* samples, s32 samples_per_update, Acmd* cmd, s32 update_idx);
-extern Acmd* Nas_Synth_Resample(Acmd* cmd, driverch* driver, s32 size, u16 frequencyFixedPoint, u16 sampleDmemBeforeResampling, s32 flags);
+extern Acmd* Nas_Synth_Resample(Acmd* cmd, const driverch* driver, s32 size, u16 frequencyFixedPoint, u16 sampleDmemBeforeResampling, s32 flags);
 extern Acmd* Nas_DolbySurround(Acmd* cmd, commonch* common, driverch* driver, s32 samples_per_update, s32 dmem, s32 flags);
 extern Acmd* Nas_Synth_Envelope(Acmd* cmd, commonch* common, driverch* driver, s32 samples_per_update, u16 dmem, s32 haasEffectDelaySide, s32 flags);
 extern Acmd* Nas_Synth_Delay(Acmd* cmd, commonch* common, driverch* driver, s32 size, s32 flags, s32 haasEffectDelaySide);
diff --git a/include/jaudio_NES/system.h b/include/jaudio_NES/system.h
index 68df7203..af94d89f 100644
--- a/include/jaudio_NES/system.h
+++ b/include/jaudio_NES/system.h
@@ -61,6 +61,7 @@ extern void Nas_BgCopyInit(void);
 extern void MK_Init(void);
 extern void Nas_WaveDmaNew(s32 n_channels);
 
+extern Na_SyncProc NA_SYNC_PROC;
 extern BOOL AUDIO_SYSTEM_READY;
 extern Na_DmaProc NA_DMA_PROC;
 extern OSMesgQueue MK_QUEUE;
diff --git a/src/static/jaudio_NES/internal/channel.c b/src/static/jaudio_NES/internal/channel.c
index 52995849..930b30d8 100644
--- a/src/static/jaudio_NES/internal/channel.c
+++ b/src/static/jaudio_NES/internal/channel.c
@@ -805,9 +805,9 @@ extern void Nas_EntryTrack(channel* chan, note* n) {
         }
     }
 
-    playback->_80 = (int)(n->velocity_square2 * 11.5f);
-    if (playback->_80 > 15) {
-        playback->_80 = 15;
+    playback->vel_conv_table_idx = (int)(n->velocity_square2 * 11.5f);
+    if (playback->vel_conv_table_idx > 15) {
+        playback->vel_conv_table_idx = 15;
     }
 
     playback->bank_id = subtrack->bank_id;
diff --git a/src/static/jaudio_NES/internal/driver.c b/src/static/jaudio_NES/internal/driver.c
index 220d051f..4c497d43 100644
--- a/src/static/jaudio_NES/internal/driver.c
+++ b/src/static/jaudio_NES/internal/driver.c
@@ -25,7 +25,7 @@ typedef enum {
 
 static u16 NOISE_TABLE[] = { 0, 1, 2, 4, 8, 12, 16, 20, 24, 32, 36, 40, 46, 52, 56, 53 };
 static dspch_ DSPCH[64];
-static u32 STOP_VELOCONV = 0;
+static s32 STOP_VELOCONV = 0;
 
 static u32 Env_DataH = (A_ENVMIXER << 24) | (0x00 << 16) | (0x00 << 8) | (0x00);
 static u32 Env_DataL1 = 0x58AAC4DE;
@@ -655,7 +655,6 @@ extern Acmd* Nas_DriveRsp(s16* aiBuf, s32 aiBufLen, Acmd* cmd, s32 updateIndex)
     return cmd;
 }
 
-// TODO: this function was taken mostly from MM but needs a lot of work
 extern Acmd* Nas_SynthMain(s32 chan_id, commonch* common, driverch* driver, s16* samples, s32 samples_per_update, Acmd* cmd, s32 update_idx) {
     s32 pad1[1];
     s32 size;
@@ -663,21 +662,22 @@ extern Acmd* Nas_SynthMain(s32 chan_id, commonch* common, driverch* driver, s16*
     smzwavetable* sample;
     adpcmloop* loopInfo;
     s32 numSamplesUntilEnd;
+    s32 flags;
+    s32 skipBytes;
+    s32 dmemUncompressedAddrOffset1;
+    s32 numSamplesToLoadAdj;
+    s32 numSamplesProcessed;
+    s32 numFirstFrameSamplesToIgnore;
+    s32 numSamplesToDecode;
     s32 numSamplesInThisIteration;
     s32 sampleFinished;
     s32 loopToPoint;
-    s32 flags;
     u16 frequencyFixedPoint;
     s32 gain;
     s32 frameIndex;
-    s32 skipBytes;
-    void* combFilterState;
-    s32 numSamplesToDecode;
-    s32 numFirstFrameSamplesToIgnore;
-    u8* sampleAddr;
     u32 numSamplesToLoadFixedPoint;
-    s32 numSamplesToLoadAdj;
-    s32 numSamplesProcessed;
+    s32 numSamplesToLoad;
+    u8* sampleAddr;
     s32 sampleEndPos;
     s32 numSamplesToProcess;
     s32 dmemUncompressedAddrOffset2;
@@ -685,526 +685,557 @@ extern Acmd* Nas_SynthMain(s32 chan_id, commonch* common, driverch* driver, s16*
     s32 numSamplesInFirstFrame;
     s32 numTrailingSamplesToIgnore;
     s32 pad3[3];
+    s16 sampleDataDmemAddr;
     s32 frameSize;
     s32 numFramesToDecode;
-    s32 skipInitialSamples;
-    s32 zeroOffset;
     u8* samplesToLoadAddr;
     s32 numParts;
     s32 curPart;
     s32 sampleDataChunkAlignPad;
     s32 haasEffectDelaySide;
-    s32 numSamplesToLoadFirstPart;
-    u16 sampleDmemBeforeResampling;
+    s32 sampleDataChunkSize;
+    s32 skipInitialSamples;
+    s32 zeroOffset;
     s32 sampleAddrOffset;
     s32 combFilterDmem;
-    s32 dmemUncompressedAddrOffset1;
     channel* chan;
-    u32 numSamplesToLoad;
-    u16 combFilterSize;
-    u16 combFilterGain;
+    u32 combFilterSize;
+    u32 combFilterGain;
+    void* combFilterState;
     s16* filter;
+    s32 numSamplesToLoadFirstPart;
+    u16 sampleDmemBeforeResampling;
+    u16 something_unused_from_sm64;
+    u32 stack_chan_id;
+    u32 stack_update_idx;
+    u8* tmpSamplesToLoadAddr;
     s32 bookOffset = common->book_ofs;
     s32 finished = common->finished;
-    s32 sampleDataChunkSize;
-    s16 sampleDataDmemAddr;
 
+    something_unused_from_sm64 = 0;
     chan = &AG.channels[chan_id];
-    flags = A_CONTINUE;
     DCTouchRange(chan, sizeof(channel));
     size = SAMPLE_SIZE * samples_per_update;
+    
+    // Deviation from MM: Ensure that the wavetable isn't null
+    if (common->tuned_sample != NULL) {
+        flags = A_CONTINUE;
 
-    // Initialize the synthesis state
-    if (common->needs_init == true) {
-        flags = A_INIT;
-        driver->at_loop_point = false;
-        driver->stop_loop = false;
-        driver->sample_pos_integer_part = chan->playback_ch.start_sample_pos;
-        driver->sample_pos_fractional_part = 0;
-        driver->current_volume_left = 0;
-        driver->current_volume_right = 0;
-        driver->prev_haas_effect_left_delay_size = 0;
-        driver->prev_haas_effect_right_delay_size = 0;
-        driver->cur_reverb_vol = common->target_reverb_volume;
-        driver->num_parts = 0;
-        driver->comb_filter_needs_init = true;
-        chan->common_ch.finished = false;
-        driver->_18[7] = chan->playback_ch._80; // Never set, never used
-        finished = false;
-    }
+        // Initialize the synthesis state
+        if (common->needs_init == true) {
+            flags = A_INIT;
+            driver->at_loop_point = false;
+            driver->stop_loop = false;
+            driver->sample_pos_integer_part = chan->playback_ch.start_sample_pos;
+            driver->sample_pos_fractional_part = 0;
 
-    // Process the sample in either one or two parts
-    numParts = common->has_two_parts + 1;
-
-    // Determine number of samples to load based on samples_per_update and relative frequency
-    frequencyFixedPoint = common->frequency_fixed_point;
-    numSamplesToLoadFixedPoint = (frequencyFixedPoint * size) + driver->sample_pos_fractional_part;
-    numSamplesToLoad = numSamplesToLoadFixedPoint >> 16;
-
-    if (numSamplesToLoad == 0) {
-        skipBytes = false;
-    }
-
-    driver->sample_pos_fractional_part = numSamplesToLoadFixedPoint & 0xFFFF;
-
-    // Partially-optimized out no-op ifs required for matching. SM64 decomp
-    // makes it clear that this is how it should look.
-    if ((driver->num_parts == 1) && (numParts == 2)) {
-    } else if ((driver->num_parts == 2) && (numParts == 1)) {
-    } else {
-    }
-
-    driver->num_parts = numParts;
-
-    // deviation from MM here, no synth wave check
-
-    sample = common->tuned_sample->wavetable;
-    loopInfo = sample->loop;
-
-    if (chan->playback_ch.status != 0) {
-        driver->stop_loop = true;
-    }
-
-    if ((loopInfo->count == 2) && driver->stop_loop) {
-        sampleEndPos = loopInfo->sample_end;
-    } else {
-        sampleEndPos = loopInfo->loop_end;
-    }
-
-    sampleAddr = sample->sample;
-    numSamplesToLoadFirstPart = 0;
-
-    // If the frequency requested is more than double that of the raw sample,
-    // then the sample processing is split into two parts.
-    for (curPart = 0; curPart < numParts; curPart++) {
-        numSamplesProcessed = 0;
-        dmemUncompressedAddrOffset1 = 0;
-
-        // Adjust the number of samples to load only if there are two parts and an odd number of samples
-        if (numParts == 1) {
-            numSamplesToLoadAdj = numSamplesToLoad;
-        } else if (numSamplesToLoad & 1) {
-            // round down for the first part
-            // round up for the second part
-            numSamplesToLoadAdj = (numSamplesToLoad & ~1) + (curPart * 2);
-        } else {
-            numSamplesToLoadAdj = numSamplesToLoad;
-        }
-
-        // Load the ADPCM codeBook
-        if ((sample->codec == CODEC_ADPCM) || (sample->codec == CODEC_SMALL_ADPCM)) {
-            if (AG.adpcm_code_book != sample->book->codebook) {
-                u32 numEntries;
-
-                switch (bookOffset) {
-                    case 1:
-                        // AG.adpcm_code_book = &gInvalidAdpcmCodeBook[1];
-                        break;
-
-                    case 2:
-                    case 3:
-                    default:
-                        AG.adpcm_code_book = sample->book->codebook;
-                        break;
-                }
-
-                numEntries = SAMPLES_PER_FRAME * sample->book->order * sample->book->n_predictors;
-                aLoadADPCM(cmd++, numEntries, AG.adpcm_code_book);
-            }
-        }
-
-        // Continue processing samples until the number of samples needed to load is reached
-        while (numSamplesProcessed != numSamplesToLoadAdj) {
-            sampleFinished = false;
-            loopToPoint = false;
-            dmemUncompressedAddrOffset2 = 0;
-
-            numFirstFrameSamplesToIgnore = driver->sample_pos_integer_part & 0xF;
-            numSamplesUntilEnd = sampleEndPos - driver->sample_pos_integer_part;
-
-            // Calculate number of samples to process this loop
-            numSamplesToProcess = numSamplesToLoadAdj - numSamplesProcessed;
-
-            if ((numFirstFrameSamplesToIgnore == 0) && !driver->at_loop_point) {
-                numFirstFrameSamplesToIgnore = SAMPLES_PER_FRAME;
-            }
-            numSamplesInFirstFrame = SAMPLES_PER_FRAME - numFirstFrameSamplesToIgnore;
-
-            // Determine the number of samples to decode based on whether the end will be reached or not.
-            if (numSamplesToProcess < numSamplesUntilEnd) {
-                // The end will not be reached.
-                numFramesToDecode =
-                    (s32)(numSamplesToProcess - numSamplesInFirstFrame + SAMPLES_PER_FRAME - 1) / SAMPLES_PER_FRAME;
-                numSamplesToDecode = numFramesToDecode * SAMPLES_PER_FRAME;
-                numTrailingSamplesToIgnore = numSamplesInFirstFrame + numSamplesToDecode - numSamplesToProcess;
+            // deviation from MM
+            if (chan->playback_ch.adsr_envp.state.flags.unused == TRUE) {
+                // previously unused ADSR state bit seems to be used for preserving volume
+                driver->current_volume_left = common->target_volume_left << 4;
+                driver->current_volume_right = common->target_volume_right << 4;
             } else {
-                // The end will be reached.
-                numSamplesToDecode = numSamplesUntilEnd - numSamplesInFirstFrame;
-                numTrailingSamplesToIgnore = 0;
-                if (numSamplesToDecode <= 0) {
-                    numSamplesToDecode = 0;
-                    numSamplesInFirstFrame = numSamplesUntilEnd;
-                }
-                numFramesToDecode = (numSamplesToDecode + SAMPLES_PER_FRAME - 1) / SAMPLES_PER_FRAME;
-                if (loopInfo->count != 0) {
-                    if ((loopInfo->count == 2) && driver->stop_loop) {
-                        sampleFinished = true;
-                    } else {
-                        // Loop around and restart
-                        loopToPoint = true;
+                driver->current_volume_left = 0;
+                driver->current_volume_right = 0;
+            }
+
+            driver->prev_haas_effect_left_delay_size = 0;
+            driver->prev_haas_effect_right_delay_size = 0;
+            driver->cur_reverb_vol = common->target_reverb_volume;
+            driver->num_parts = 0;
+            driver->comb_filter_needs_init = true;
+            chan->common_ch.finished = false;
+            driver->vel_conv_table_idx = chan->playback_ch.vel_conv_table_idx;
+            finished = false;
+        }
+
+        // Process the sample in either one or two parts
+        numParts = common->has_two_parts + 1;
+
+        // Determine number of samples to load based on samples_per_update and relative frequency
+        frequencyFixedPoint = common->frequency_fixed_point;
+        numSamplesToLoadFixedPoint = (frequencyFixedPoint * samples_per_update * SAMPLE_SIZE) + driver->sample_pos_fractional_part;
+        numSamplesToLoad = (u16)(numSamplesToLoadFixedPoint >> 16);
+
+        if (numSamplesToLoad == 0) {
+            skipBytes = false;
+        }
+
+        driver->sample_pos_fractional_part = numSamplesToLoadFixedPoint & 0xFFFF;
+
+        // Partially-optimized out no-op ifs required for matching. SM64 decomp
+        // makes it clear that this is how it should look.
+        if (((driver->num_parts == 1) && (numParts == 2))) {
+        } else if (((driver->num_parts == 2) && (numParts == 1))) {
+        } else {
+            something_unused_from_sm64 = 0;
+        }
+        
+        driver->num_parts = numParts;
+        
+        // deviation from MM here, no synth wave check
+        
+        sample = common->tuned_sample->wavetable;
+        loopInfo = sample->loop;
+
+        if (chan->playback_ch.status != 0) {
+            driver->stop_loop = true;
+        }
+
+        if ((loopInfo->count == 2) && driver->stop_loop) {
+            sampleEndPos = loopInfo->sample_end;
+        } else {
+            sampleEndPos = loopInfo->loop_end;
+        }
+
+        stack_chan_id = chan_id;
+        stack_update_idx = update_idx;
+        sampleAddr = sample->sample;
+        numSamplesToLoadFirstPart = 0;
+
+        // If the frequency requested is more than double that of the raw sample,
+        // then the sample processing is split into two parts.
+        for (curPart = 0; curPart < numParts; curPart++) {
+            numSamplesProcessed = 0;
+            dmemUncompressedAddrOffset1 = 0;
+
+            // Adjust the number of samples to load only if there are two parts and an odd number of samples
+            if (numParts == 1) {
+                numSamplesToLoadAdj = numSamplesToLoad;
+            } else if ((s32)numSamplesToLoad & 1) {
+                // round down for the first part
+                // round up for the second part
+                numSamplesToLoadAdj = (numSamplesToLoad & ~1) + (curPart * 2);
+            } else {
+                numSamplesToLoadAdj = numSamplesToLoad;
+            }
+
+            // Load the ADPCM codeBook
+            if ((sample->codec == CODEC_ADPCM) || (sample->codec == CODEC_SMALL_ADPCM)) {
+                if (AG.adpcm_code_book != sample->book->codebook) {
+                    u32 numEntries;
+
+                    switch (bookOffset) {
+                        case 1:
+                            // AG.adpcm_code_book = &gInvalidAdpcmCodeBook[1];
+                            // break;
+
+                        case 2:
+                        case 3:
+                        default:
+                            AG.adpcm_code_book = sample->book->codebook;
+                            break;
+                    }
+
+                    numEntries = SAMPLE_SIZE * sample->book->order * sample->book->n_predictors;
+                    numEntries *= 8; // deviation from MM: multiply numEntries by 8
+                    if (numEntries != 0) {
+                        aLoadADPCM(cmd++, numEntries, AG.adpcm_code_book);
                     }
-                } else {
-                    sampleFinished = true;
                 }
             }
 
-            // Set parameters based on compression type
-            switch (sample->codec) {
-                case CODEC_ADPCM:
-                    // 16 2-byte samples (32 bytes) compressed into 4-bit samples (8 bytes) + 1 header byte
-                    frameSize = 9;
-                    skipInitialSamples = SAMPLES_PER_FRAME;
-                    zeroOffset = 0;
-                    break;
+            // Continue processing samples until the number of samples needed to load is reached
+            while (numSamplesProcessed != numSamplesToLoadAdj) {
+                sampleFinished = false;
+                loopToPoint = false;
+                dmemUncompressedAddrOffset2 = 0;
 
-                case CODEC_SMALL_ADPCM:
-                    // 16 2-byte samples (32 bytes) compressed into 2-bit samples (4 bytes) + 1 header byte
-                    frameSize = 5;
-                    skipInitialSamples = SAMPLES_PER_FRAME;
-                    zeroOffset = 0;
-                    break;
+                numFirstFrameSamplesToIgnore = driver->sample_pos_integer_part & 0xF;
+                numSamplesUntilEnd = sampleEndPos - driver->sample_pos_integer_part;
 
-                case CODEC_UNK7:
-                    // 2 2-byte samples (4 bytes) processed without decompression
-                    frameSize = 4;
-                    skipInitialSamples = SAMPLES_PER_FRAME;
-                    zeroOffset = 0;
-                    break;
+                // Calculate number of samples to process this loop
+                numSamplesToProcess = numSamplesToLoadAdj - numSamplesProcessed;
 
-                case CODEC_S8:
-                    // 16 2-byte samples (32 bytes) compressed into 8-bit samples (16 bytes)
-                    frameSize = 16;
-                    skipInitialSamples = SAMPLES_PER_FRAME;
-                    zeroOffset = 0;
-                    break;
+                if ((numFirstFrameSamplesToIgnore == 0) && !driver->at_loop_point) {
+                    numFirstFrameSamplesToIgnore = SAMPLES_PER_FRAME;
+                }
+                numSamplesInFirstFrame = SAMPLES_PER_FRAME - numFirstFrameSamplesToIgnore;
 
-                case CODEC_REVERB:
-                    reverbAddrSrc = (void*)0xFFFFFFFF;
-                    if (NA_SOUND_CALLBACK != NULL) {
-                        reverbAddrSrc = NA_SOUND_CALLBACK(sample, numSamplesToLoadAdj, flags, chan_id);
+                // Determine the number of samples to decode based on whether the end will be reached or not.
+                if (numSamplesUntilEnd > numSamplesToProcess) {
+                    // The end will not be reached.
+                    numFramesToDecode =
+                        (s32)(numSamplesToProcess + SAMPLES_PER_FRAME - 1 - numSamplesInFirstFrame) / SAMPLES_PER_FRAME;
+                    numSamplesToDecode = numFramesToDecode * SAMPLES_PER_FRAME;
+                    numTrailingSamplesToIgnore = numSamplesInFirstFrame + numSamplesToDecode - numSamplesToProcess;
+                } else {
+                    // The end will be reached.
+                    numSamplesToDecode = numSamplesUntilEnd - numSamplesInFirstFrame;
+                    if (numSamplesToDecode <= 0) {
+                        numSamplesToDecode = 0;
+                        numSamplesInFirstFrame = numSamplesUntilEnd;
                     }
-
-                    if (reverbAddrSrc == (void*)0xFFFFFFFF) {
-                        sampleFinished = true;
-                    } else if (reverbAddrSrc == NULL) {
-                        return cmd;
+                    numTrailingSamplesToIgnore = 0;
+                    numFramesToDecode = (numSamplesToDecode + SAMPLES_PER_FRAME - 1) / SAMPLES_PER_FRAME;
+                    if (loopInfo->count != 0) {
+                        if ((loopInfo->count == 2) && driver->stop_loop) {
+                            sampleFinished = true;
+                        } else {
+                            // Loop around and restart
+                            loopToPoint = true;
+                        }
                     } else {
-                        Nas_LoadBuffer2(cmd++, DMEM_UNCOMPRESSED_NOTE,
-                                                (numSamplesToLoadAdj + SAMPLES_PER_FRAME) * SAMPLE_SIZE,
-                                                (s32)reverbAddrSrc);
-                        flags = A_CONTINUE;
+                        sampleFinished = true;
+                    }
+                }
+
+                // Set parameters based on compression type
+                switch (sample->codec) {
+                    case CODEC_ADPCM:
+                        // 16 2-byte samples (32 bytes) compressed into 4-bit samples (8 bytes) + 1 header byte
+                        frameSize = 9;
+                        skipInitialSamples = SAMPLES_PER_FRAME;
+                        zeroOffset = 0;
+                        break;
+
+                    case CODEC_SMALL_ADPCM:
+                        // 16 2-byte samples (32 bytes) compressed into 2-bit samples (4 bytes) + 1 header byte
+                        frameSize = 5;
+                        skipInitialSamples = SAMPLES_PER_FRAME;
+                        zeroOffset = 0;
+                        break;
+
+                    case CODEC_UNK7:
+                        // 2 2-byte samples (4 bytes) processed without decompression
+                        frameSize = 4;
+                        skipInitialSamples = SAMPLES_PER_FRAME;
+                        zeroOffset = 0;
+                        break;
+
+                    case CODEC_S8:
+                        // 16 2-byte samples (32 bytes) compressed into 8-bit samples (16 bytes)
+                        frameSize = 16;
+                        skipInitialSamples = SAMPLES_PER_FRAME;
+                        zeroOffset = 0;
+                        break;
+
+                    case CODEC_REVERB:
+                        reverbAddrSrc = (void*)0xFFFFFFFF;
+                        if (NA_SOUND_CALLBACK != NULL) {
+                            // ???
+                            reverbAddrSrc = (void*)((u32)NA_SOUND_CALLBACK(sample, numSamplesToLoadAdj, flags, chan_id) & 0xFF);
+                        }
+
+                        if ((s32)reverbAddrSrc == 0xFFFFFFFF) {
+                            sampleFinished = true;
+                        } else if ((s32)reverbAddrSrc == 0) {
+                            return cmd;
+                        } else {
+                            Nas_LoadBuffer2(cmd++, DMEM_UNCOMPRESSED_NOTE,
+                                numSamplesToLoadAdj * SAMPLE_SIZE + SAMPLES_PER_FRAME * SAMPLE_SIZE,
+                                (s32)reverbAddrSrc);
+                            // deviation from MM:
+                            goto codec_continue_and_skip;
+                            // flags = A_CONTINUE;
+                            // skipBytes = 0;
+                            // numSamplesProcessed = numSamplesToLoadAdj;
+                            // dmemUncompressedAddrOffset1 = numSamplesToLoadAdj;
+                        }
+                        goto skip;
+
+                    case CODEC_S16_INMEMORY:
+                    case CODEC_UNK6:
+                        Nas_ClearBuffer(cmd++, DMEM_UNCOMPRESSED_NOTE,
+                            numSamplesToLoadAdj * SAMPLE_SIZE + SAMPLES_PER_FRAME * SAMPLE_SIZE);
+codec_continue_and_skip:
                         skipBytes = 0;
+                        flags = A_CONTINUE;
                         numSamplesProcessed = numSamplesToLoadAdj;
                         dmemUncompressedAddrOffset1 = numSamplesToLoadAdj;
-                    }
-                    goto skip;
+                        goto skip;
 
-                case CODEC_S16_INMEMORY:
-                case CODEC_UNK6:
-                    Nas_ClearBuffer(cmd++, DMEM_UNCOMPRESSED_NOTE,
-                                            (numSamplesToLoadAdj + SAMPLES_PER_FRAME) * SAMPLE_SIZE);
-                    flags = A_CONTINUE;
-                    skipBytes = 0;
-                    numSamplesProcessed = numSamplesToLoadAdj;
-                    dmemUncompressedAddrOffset1 = numSamplesToLoadAdj;
-                    goto skip;
-
-                case CODEC_S16:
-                    Nas_ClearBuffer(cmd++, DMEM_UNCOMPRESSED_NOTE,
-                                            (numSamplesToLoadAdj + SAMPLES_PER_FRAME) * SAMPLE_SIZE);
-                    flags = A_CONTINUE;
-                    skipBytes = 0;
-                    numSamplesProcessed = numSamplesToLoadAdj;
-                    dmemUncompressedAddrOffset1 = numSamplesToLoadAdj;
-                    goto skip;
-
-                default:
-                    break;
-            }
-
-            // Move the compressed raw sample data from ram into the rsp (DMEM)
-            if (numFramesToDecode != 0) {
-                // Get the offset from the start of the sample to where the sample is currently playing from
-                frameIndex = (driver->sample_pos_integer_part + skipInitialSamples - numFirstFrameSamplesToIgnore) /
-                                SAMPLES_PER_FRAME;
-                sampleAddrOffset = frameIndex * frameSize;
-
-                // Get the ram address of the requested sample chunk
-                if (sample->medium == MEDIUM_RAM) {
-                    // Sample is already loaded into ram
-                    samplesToLoadAddr = sampleAddr + (zeroOffset + sampleAddrOffset);
-                } else if (AG._2A14) { // always false
-                    return cmd;
-                } else if (sample->medium == MEDIUM_DISK) {
-                    // This medium is unsupported so terminate processing this chan
-                    return cmd;
-                } else {
-                    // This medium is not in ram, so dma the requested sample into ram
-                    samplesToLoadAddr = (u8*)Nas_WaveDmaCallBack((u32)(sampleAddr + (zeroOffset + sampleAddrOffset)),
-                        ALIGN_PREV((numFramesToDecode * frameSize) + SAMPLES_PER_FRAME, 16), flags,
-                        &driver->sample_dma_idx, sample->medium);
-                }
-
-                if (samplesToLoadAddr == NULL) {
-                    // The ram address was unsuccessfully allocated
-                    return cmd;
-                }
-
-                // Move the raw sample chunk from ram to the rsp
-                // DMEM at the addresses before DMEM_COMPRESSED_ADPCM_DATA
-                sampleDataChunkAlignPad = (u32)samplesToLoadAddr & 0xF;
-                sampleDataChunkSize = ALIGN_PREV((numFramesToDecode * frameSize) + SAMPLES_PER_FRAME, 16);
-                sampleDataDmemAddr = DMEM_COMPRESSED_ADPCM_DATA - sampleDataChunkSize;
-                aLoadBuffer2(cmd++, samplesToLoadAddr - sampleDataChunkAlignPad, sampleDataDmemAddr,
-                            sampleDataChunkSize);
-            } else {
-                numSamplesToDecode = 0;
-                sampleDataChunkAlignPad = 0;
-            }
-
-            if (driver->at_loop_point) {
-                aSetLoop(cmd++, sample->loop->predictor_state);
-                flags = A_LOOP;
-                driver->at_loop_point = false;
-            }
-
-            numSamplesInThisIteration = numSamplesToDecode + numSamplesInFirstFrame - numTrailingSamplesToIgnore;
-
-            if (numSamplesProcessed == 0) {
-                //! FAKE:
-                if (1) {}
-                skipBytes = numFirstFrameSamplesToIgnore * SAMPLE_SIZE;
-            } else {
-                dmemUncompressedAddrOffset2 = ALIGN_PREV(dmemUncompressedAddrOffset1 + 8 * SAMPLE_SIZE, 16);
-            }
-
-            // Decompress the raw sample chunks in the rsp
-            // Goes from adpcm (compressed) sample data to pcm (uncompressed) sample data
-            switch (sample->codec) {
-                case CODEC_ADPCM:
-                    sampleDataChunkSize = ALIGN_PREV((numFramesToDecode * frameSize) + SAMPLES_PER_FRAME, 16);
-                    sampleDataDmemAddr = DMEM_COMPRESSED_ADPCM_DATA - sampleDataChunkSize;
-                    aSetBuffer(cmd++, 0, sampleDataDmemAddr + sampleDataChunkAlignPad,
-                                DMEM_UNCOMPRESSED_NOTE + dmemUncompressedAddrOffset2,
-                                numSamplesToDecode * SAMPLE_SIZE);
-                    aADPCMdec(cmd++, flags, driver->synth_params->adpcm_state);
-                    break;
-
-                case CODEC_SMALL_ADPCM:
-                    sampleDataChunkSize = ALIGN_PREV((numFramesToDecode * frameSize) + SAMPLES_PER_FRAME, 16);
-                    sampleDataDmemAddr = DMEM_COMPRESSED_ADPCM_DATA - sampleDataChunkSize;
-                    aSetBuffer(cmd++, 0, sampleDataDmemAddr + sampleDataChunkAlignPad,
-                                DMEM_UNCOMPRESSED_NOTE + dmemUncompressedAddrOffset2,
-                                numSamplesToDecode * SAMPLE_SIZE);
-                    aADPCMdec(cmd++, flags | A_ADPCM_SHORT, driver->synth_params->adpcm_state);
-                    break;
-
-                case CODEC_S8:
-                    sampleDataChunkSize = ALIGN_PREV((numFramesToDecode * frameSize) + SAMPLES_PER_FRAME, 16);
-                    sampleDataDmemAddr = DMEM_COMPRESSED_ADPCM_DATA - sampleDataChunkSize;
-                    Nas_SetBuffer(cmd++, 0, sampleDataDmemAddr + sampleDataChunkAlignPad,
-                                            DMEM_UNCOMPRESSED_NOTE + dmemUncompressedAddrOffset2,
-                                            numSamplesToDecode * SAMPLE_SIZE);
-                    Nas_PCM8dec(cmd++, flags, (s32)driver->synth_params->adpcm_state);
-                    break;
-
-                case CODEC_UNK7:
-                default:
-                    // No decompression
-                    break;
-            }
-
-            if (numSamplesProcessed != 0) {
-                aDMEMMove(cmd++,
-                            DMEM_UNCOMPRESSED_NOTE + dmemUncompressedAddrOffset2 +
-                                (numFirstFrameSamplesToIgnore * SAMPLE_SIZE),
-                            DMEM_UNCOMPRESSED_NOTE + dmemUncompressedAddrOffset1,
-                            numSamplesInThisIteration * SAMPLE_SIZE);
-            }
-
-            numSamplesProcessed += numSamplesInThisIteration;
-
-            switch (flags) {
-                case A_INIT:
-                    skipBytes = SAMPLES_PER_FRAME * SAMPLE_SIZE;
-                    dmemUncompressedAddrOffset1 = (numSamplesToDecode + SAMPLES_PER_FRAME) * SAMPLE_SIZE;
-                    break;
-
-                case A_LOOP:
-                    dmemUncompressedAddrOffset1 =
-                        numSamplesInThisIteration * SAMPLE_SIZE + dmemUncompressedAddrOffset1;
-                    break;
-
-                default:
-                    if (dmemUncompressedAddrOffset1 != 0) {
-                        dmemUncompressedAddrOffset1 =
-                            numSamplesInThisIteration * SAMPLE_SIZE + dmemUncompressedAddrOffset1;
-                    } else {
-                        dmemUncompressedAddrOffset1 =
-                            (numFirstFrameSamplesToIgnore + numSamplesInThisIteration) * SAMPLE_SIZE;
-                    }
-                    break;
-            }
-
-            flags = A_CONTINUE;
-
-        skip:
-
-            // Update what to do with the samples next
-            if (sampleFinished) {
-                if ((numSamplesToLoadAdj - numSamplesProcessed) != 0) {
-                    Nas_ClearBuffer(cmd++, DMEM_UNCOMPRESSED_NOTE + dmemUncompressedAddrOffset1,
-                                            (numSamplesToLoadAdj - numSamplesProcessed) * SAMPLE_SIZE);
-                }
-                finished = true;
-                chan->common_ch.finished = true;
-                __Nas_WaveTerminateProcess(update_idx, chan_id);
-                break; // break out of the for-loop
-            } else if (loopToPoint) {
-                driver->at_loop_point = true;
-                driver->sample_pos_integer_part = loopInfo->loop_start;
-            } else {
-                driver->sample_pos_integer_part += numSamplesToProcess;
-            }
-        }
-
-        switch (numParts) {
-            case 1:
-                sampleDmemBeforeResampling = DMEM_UNCOMPRESSED_NOTE + skipBytes;
-                break;
-
-            case 2:
-                switch (curPart) {
-                    case 0:
-                        Nas_HalfCut(cmd++, DMEM_UNCOMPRESSED_NOTE + skipBytes,
-                                            DMEM_TEMP + (SAMPLES_PER_FRAME * SAMPLE_SIZE),
-                                            ALIGN_PREV(numSamplesToLoadAdj / 2, 8));
-                        numSamplesToLoadFirstPart = numSamplesToLoadAdj;
-                        sampleDmemBeforeResampling = DMEM_TEMP + (SAMPLES_PER_FRAME * SAMPLE_SIZE);
-                        if (finished) {
-                            Nas_ClearBuffer(cmd++, sampleDmemBeforeResampling + numSamplesToLoadFirstPart,
-                                                    numSamplesToLoadAdj + SAMPLES_PER_FRAME);
-                        }
-                        break;
-
-                    case 1:
-                        Nas_HalfCut(cmd++, DMEM_UNCOMPRESSED_NOTE + skipBytes,
-                                            DMEM_TEMP + (SAMPLES_PER_FRAME * SAMPLE_SIZE) + numSamplesToLoadFirstPart,
-                                            ALIGN_PREV(numSamplesToLoadAdj / 2, 8));
-                        break;
+                    case CODEC_S16:
+                        Nas_ClearBuffer(cmd++, DMEM_UNCOMPRESSED_NOTE,
+                            numSamplesToLoadAdj * SAMPLE_SIZE + SAMPLES_PER_FRAME * SAMPLE_SIZE);
+                        skipBytes = 0;
+                        flags = A_CONTINUE;
+                        numSamplesProcessed = numSamplesToLoadAdj;
+                        dmemUncompressedAddrOffset1 = numSamplesToLoadAdj;
+                        goto skip;
 
                     default:
                         break;
                 }
+
+                sampleDataChunkSize = ALIGN_PREV((numFramesToDecode * frameSize) + 31, 16);
+                sampleDataDmemAddr = DMEM_COMPRESSED_ADPCM_DATA - sampleDataChunkSize;
+
+                // Move the compressed raw sample data from ram into the rsp (DMEM)
+                if (numFramesToDecode != 0) {
+                    // Get the offset from the start of the sample to where the sample is currently playing from
+                    frameIndex = (skipInitialSamples + driver->sample_pos_integer_part  - numFirstFrameSamplesToIgnore) /
+                                    SAMPLES_PER_FRAME;
+                    sampleAddrOffset = frameIndex * frameSize;
+                    tmpSamplesToLoadAddr = sampleAddr + sampleAddrOffset;
+                    tmpSamplesToLoadAddr += zeroOffset;
+
+                    // Get the ram address of the requested sample chunk
+                    if (sample->medium == MEDIUM_RAM) {
+                        // Sample is already loaded into ram
+                        if (sample->bit31 == TRUE) {
+                            samplesToLoadAddr = (u8*)(*NA_SYNC_PROC)(tmpSamplesToLoadAddr, numFramesToDecode * frameSize);
+                        } else {
+                            samplesToLoadAddr = tmpSamplesToLoadAddr;
+                        }
+                    } else if (AG._2A14) { // always false
+                        return cmd;
+                    } else if (sample->medium == MEDIUM_DISK) {
+                        // This medium is unsupported so terminate processing this chan
+                        return cmd;
+                    } else {
+                        // This medium is not in ram, so dma the requested sample into ram
+                        samplesToLoadAddr = (u8*)Nas_WaveDmaCallBack((u32)(tmpSamplesToLoadAddr),
+                        sampleDataChunkSize, flags,
+                            &driver->sample_dma_idx, sample->medium);
+                    }
+
+                    if (samplesToLoadAddr == NULL) {
+                        // The ram address was unsuccessfully allocated
+                        return cmd;
+                    }
+
+                    // Move the raw sample chunk from ram to the rsp
+                    // DMEM at the addresses before DMEM_COMPRESSED_ADPCM_DATA
+                    sampleDataChunkAlignPad = (u32)samplesToLoadAddr & 0xF;
+                    aLoadCache(cmd++, samplesToLoadAddr - sampleDataChunkAlignPad, sampleDataDmemAddr,
+                                sampleDataChunkSize);
+                } else {
+                    numSamplesToDecode = 0;
+                    sampleDataChunkAlignPad = 0;
+                }
+
+                if (driver->at_loop_point) {
+                    aSetLoop(cmd++, sample->loop->predictor_state);
+                    flags = A_LOOP;
+                    driver->at_loop_point = false;
+                }
+
+                numSamplesInThisIteration = numSamplesToDecode + numSamplesInFirstFrame - numTrailingSamplesToIgnore;
+
+                if (numSamplesProcessed == 0) {
+                    //! FAKE:
+                    if (1) {}
+                    skipBytes = numFirstFrameSamplesToIgnore * SAMPLE_SIZE;
+                } else {
+                    dmemUncompressedAddrOffset2 = ALIGN_NEXT(dmemUncompressedAddrOffset1 + 8 * SAMPLE_SIZE, 16);
+                }
+
+                // Decompress the raw sample chunks in the rsp
+                // Goes from adpcm (compressed) sample data to pcm (uncompressed) sample data
+                switch (sample->codec) {
+                    case CODEC_ADPCM:
+                        aSetBuffer(cmd++, 0, sampleDataDmemAddr + sampleDataChunkAlignPad,
+                                    DMEM_UNCOMPRESSED_NOTE + dmemUncompressedAddrOffset2,
+                                    numSamplesToDecode * SAMPLE_SIZE);
+                        aADPCMdec(cmd++, flags, driver->synth_params->adpcm_state);
+                        break;
+
+                    case CODEC_SMALL_ADPCM:
+                        aSetBuffer(cmd++, 0, sampleDataDmemAddr + sampleDataChunkAlignPad,
+                                    DMEM_UNCOMPRESSED_NOTE + dmemUncompressedAddrOffset2,
+                                    numSamplesToDecode * SAMPLE_SIZE);
+                        aADPCMdec(cmd++, flags | A_ADPCM_SHORT, driver->synth_params->adpcm_state);
+                        break;
+
+                    case CODEC_S8:
+                        Nas_SetBuffer(cmd++, 0, sampleDataDmemAddr + sampleDataChunkAlignPad,
+                                                DMEM_UNCOMPRESSED_NOTE + dmemUncompressedAddrOffset2,
+                                                numSamplesToDecode * SAMPLE_SIZE);
+                        Nas_PCM8dec(cmd++, flags, (s32)driver->synth_params->adpcm_state);
+                        break;
+
+                    case CODEC_UNK7:
+                    default:
+                        // No decompression
+                        break;
+                }
+
+                if (numSamplesProcessed != 0) {
+                    aDMEMMove(cmd++,
+                                DMEM_UNCOMPRESSED_NOTE + dmemUncompressedAddrOffset2 +
+                                    (numFirstFrameSamplesToIgnore * SAMPLE_SIZE),
+                                DMEM_UNCOMPRESSED_NOTE + dmemUncompressedAddrOffset1,
+                                numSamplesInThisIteration * SAMPLE_SIZE);
+                }
+
+                numSamplesProcessed += numSamplesInThisIteration;
+
+                switch (flags) {
+                    case A_INIT:
+                        skipBytes = SAMPLES_PER_FRAME * SAMPLE_SIZE;
+                        dmemUncompressedAddrOffset1 = numSamplesToDecode * SAMPLE_SIZE + SAMPLES_PER_FRAME * SAMPLE_SIZE;
+                        break;
+
+                    case A_LOOP:
+                        dmemUncompressedAddrOffset1 =
+                            numSamplesInThisIteration * SAMPLE_SIZE + dmemUncompressedAddrOffset1;
+                        break;
+
+                    default:
+                        if (dmemUncompressedAddrOffset1 != 0) {
+                            dmemUncompressedAddrOffset1 =
+                                numSamplesInThisIteration * SAMPLE_SIZE + dmemUncompressedAddrOffset1;
+                        } else {
+                            dmemUncompressedAddrOffset1 =
+                                (numFirstFrameSamplesToIgnore + numSamplesInThisIteration) * SAMPLE_SIZE;
+                        }
+                        break;
+                }
+
+                flags = A_CONTINUE;
+
+            skip:
+
+                // Update what to do with the samples next
+                if (sampleFinished) {
+                    s32 dmemClearCount = numSamplesToLoadAdj - numSamplesProcessed;
+                    if (dmemClearCount != 0) {
+                        Nas_ClearBuffer(cmd++, DMEM_UNCOMPRESSED_NOTE + dmemUncompressedAddrOffset1,
+                            dmemClearCount * SAMPLE_SIZE);
+                    }
+                    finished = true;
+                    chan->common_ch.finished = true;
+                    __Nas_WaveTerminateProcess(stack_update_idx, stack_chan_id);
+                    break; // break out of the for-loop
+                } else if (loopToPoint) {
+                    driver->at_loop_point = true;
+                    driver->sample_pos_integer_part = loopInfo->loop_start;
+                } else {
+                    driver->sample_pos_integer_part += numSamplesToProcess;
+                }
+            }
+
+            switch (numParts) {
+                case 1:
+                    sampleDmemBeforeResampling = DMEM_UNCOMPRESSED_NOTE + skipBytes;
+                    break;
+
+                case 2:
+                    switch (curPart) {
+                        case 0:
+                            Nas_HalfCut(cmd++, DMEM_UNCOMPRESSED_NOTE + skipBytes,
+                                                DMEM_TEMP + (SAMPLES_PER_FRAME * SAMPLE_SIZE),
+                                                ALIGN_NEXT(numSamplesToLoadAdj / 2, 8));
+                            numSamplesToLoadFirstPart = numSamplesToLoadAdj;
+                            sampleDmemBeforeResampling = DMEM_TEMP + (SAMPLES_PER_FRAME * SAMPLE_SIZE);
+                            if (finished) {
+                                Nas_ClearBuffer(cmd++, sampleDmemBeforeResampling + numSamplesToLoadFirstPart,
+                                                        numSamplesToLoadAdj + SAMPLES_PER_FRAME);
+                            }
+                            break;
+
+                        case 1:
+                            Nas_HalfCut(cmd++, DMEM_UNCOMPRESSED_NOTE + skipBytes,
+                                                DMEM_TEMP + (SAMPLES_PER_FRAME * SAMPLE_SIZE) + numSamplesToLoadFirstPart,
+                                                ALIGN_NEXT(numSamplesToLoadAdj / 2, 8));
+                            break;
+
+                        default:
+                            break;
+                    }
+                    break;
+
+                default:
+                    break;
+            }
+            if (finished) {
                 break;
-
-            default:
-                break;
+            }
         }
-        if (finished) {
-            break;
+
+        // Update the flags for the signal processing below
+        flags = A_CONTINUE;
+        if (common->needs_init == true) {
+            flags = A_INIT;
+            common->needs_init = false;
         }
-    }
 
-    // Update the flags for the signal processing below
-    flags = A_CONTINUE;
-    if (common->needs_init == true) {
-        common->needs_init = false;
-        flags = A_INIT;
-    }
+        // Resample the decompressed mono-signal to the correct pitch
+        cmd = Nas_Synth_Resample(cmd, driver, size, frequencyFixedPoint,
+                                    sampleDmemBeforeResampling, flags | something_unused_from_sm64);
 
-    // Resample the decompressed mono-signal to the correct pitch
-    cmd = Nas_Synth_Resample(cmd, driver, size, frequencyFixedPoint,
-                                   sampleDmemBeforeResampling, flags);
-
-    // Not present in MM
-    if (bookOffset != 0) {
-        Nas_NoiseFilter(cmd++, DMEM_TEMP, samples_per_update, NOISE_TABLE[bookOffset >> 4], NOISE_TABLE[bookOffset & 0xF]);
-    }
-
-    // Apply the gain to the mono-signal to adjust the volume
-    gain = common->gain;
-    if (gain != 0) {
-        // A gain of 0x10 (a UQ4.4 number) is equivalent to 1.0 and represents no volume change
-        if (gain < 0x10) {
-            gain = 0x10;
+        // Not present in MM
+        if (bookOffset != 0) {
+            Nas_NoiseFilter(cmd++, DMEM_TEMP, samples_per_update, NOISE_TABLE[(bookOffset >> 4) & 0xF], NOISE_TABLE[bookOffset & 0xF]);
         }
-        Nas_DistFilter(cmd++, gain, DMEM_TEMP, 0, size * SAMPLE_SIZE);
-    }
 
-    // Not in MM
-    if (!STOP_VELOCONV) {
-        // Load the velocity convolution table into DMEM_0x800
-        aLoadBuffer2(cmd++, (u32)VELOCONV_TABLE[bookOffset], 0x800, sizeof(VELOCONV_TABLE[bookOffset]));
-    }
+        // Apply the gain to the mono-signal to adjust the volume
+        gain = common->gain;
+        if (gain != 0) {
+            // A gain of 0x10 (a UQ4.4 number) is equivalent to 1.0 and represents no volume change
+            if (gain < 0x10) {
+                gain = 0x10;
+            }
+            Nas_DistFilter(cmd++, gain, DMEM_TEMP, 0, size + SAMPLES_PER_FRAME * SAMPLE_SIZE);
+        }
 
-    // Apply the filter to the mono-signal
-    filter = common->filter;
-    if (filter != 0) {
-        Nas_FirLoadTable(cmd++, size, filter);
-        Nas_FirFilter(cmd++, flags, DMEM_TEMP, driver->synth_params->filter_state);
-    }
+        // Not in MM
+        if (!STOP_VELOCONV) {
+            // Load the velocity convolution table into DMEM_0x800
+            s32 vel_conv_idx = driver->vel_conv_table_idx;
+            aLoadBuffer2(cmd++, (u32)VELOCONV_TABLE[vel_conv_idx], 0x800, sizeof(VELOCONV_TABLE[vel_conv_idx]));
+            aUnkCmd3(cmd++, DMEM_TEMP, 0x800, samples_per_update);
+        }
 
-    // Apply the comb filter to the mono-signal by taking the signal with a small temporal offset,
-    // and adding it back to itself
-    combFilterSize = common->comb_filter_size;
-    combFilterGain = common->comb_filter_gain;
-    combFilterState = driver->synth_params->comb_filter_state;
-    if ((combFilterSize != 0) && (common->comb_filter_gain != 0)) {
-        Nas_DMEMMove(cmd++, DMEM_TEMP, DMEM_COMB_TEMP, size);
-        combFilterDmem = DMEM_COMB_TEMP - combFilterSize;
-        if (driver->comb_filter_needs_init) {
-            Nas_ClearBuffer(cmd++, combFilterDmem, combFilterSize);
-            driver->comb_filter_needs_init = false;
+        // Apply the filter to the mono-signal
+        filter = common->filter;
+        if (filter != 0) {
+            Nas_FirLoadTable(cmd++, size, filter);
+            Nas_FirFilter(cmd++, flags, DMEM_TEMP, driver->synth_params->filter_state);
+        }
+
+        // Apply the comb filter to the mono-signal by taking the signal with a small temporal offset,
+        // and adding it back to itself
+        combFilterSize = common->comb_filter_size;
+        combFilterGain = common->comb_filter_gain;
+        combFilterState = driver->synth_params->comb_filter_state;
+        if ((common->comb_filter_size != 0) && (common->comb_filter_gain != 0)) {
+            Nas_DMEMMove(cmd++, DMEM_TEMP, DMEM_COMB_TEMP, size);
+            // combFilterDmem = DMEM_COMB_TEMP - combFilterSize;
+            if (driver->comb_filter_needs_init) {
+                Nas_ClearBuffer(cmd++, DMEM_COMB_TEMP - combFilterSize, combFilterSize);
+                driver->comb_filter_needs_init = false;
+            } else {
+                Nas_LoadBuffer2(cmd++, DMEM_COMB_TEMP - combFilterSize, combFilterSize, (s32)combFilterState);
+            }
+            Nas_SaveBuffer2(cmd++, DMEM_TEMP - combFilterSize + size, combFilterSize, (s32)combFilterState);
+            Nas_Mix(cmd++, size >> 4, combFilterGain, DMEM_COMB_TEMP, DMEM_COMB_TEMP - combFilterSize);
+            Nas_DMEMMove(cmd++, DMEM_COMB_TEMP - combFilterSize, DMEM_TEMP, size);
         } else {
-            Nas_LoadBuffer2(cmd++, combFilterDmem, combFilterSize, (s32)combFilterState);
+            driver->comb_filter_needs_init = true;
         }
-        Nas_SaveBuffer2(cmd++, DMEM_TEMP + size - combFilterSize, combFilterSize, (s32)combFilterState);
-        Nas_Mix(cmd++, size >> 4, combFilterGain, DMEM_COMB_TEMP, combFilterDmem);
-        Nas_DMEMMove(cmd++, combFilterDmem, DMEM_TEMP, size);
-    } else {
-        driver->comb_filter_needs_init = true;
-    }
 
-    // Determine the behavior of the audio processing that leads to the haas effect
-    if ((common->haas_effect_left_delay_size != 0) || (driver->prev_haas_effect_left_delay_size != 0)) {
-        haasEffectDelaySide = HAAS_EFFECT_DELAY_LEFT;
-    } else if ((common->haas_effect_right_delay_size != 0) || (driver->prev_haas_effect_right_delay_size != 0)) {
-        haasEffectDelaySide = HAAS_EFFECT_DELAY_RIGHT;
-    } else {
-        haasEffectDelaySide = HAAS_EFFECT_DELAY_NONE;
-    }
-
-    // Apply an unknown effect based on the surround sound-mode
-    if (AG.sound_mode == SOUND_OUTPUT_SURROUND) {
-        // common->target_volume_left = common->target_volume_left >> 1;
-        // common->target_volume_right = common->target_volume_right >> 1;
-        if (common->surround_effect_idx != 0xFF) {
-            cmd = Nas_DolbySurround(cmd, common, driver, samples_per_update, DMEM_TEMP, flags);
+        // Determine the behavior of the audio processing that leads to the haas effect
+        if ((common->haas_effect_left_delay_size != 0) || (driver->prev_haas_effect_left_delay_size != 0)) {
+            haasEffectDelaySide = HAAS_EFFECT_DELAY_LEFT;
+        } else if ((common->haas_effect_right_delay_size != 0) || (driver->prev_haas_effect_right_delay_size != 0)) {
+            haasEffectDelaySide = HAAS_EFFECT_DELAY_RIGHT;
+        } else {
+            haasEffectDelaySide = HAAS_EFFECT_DELAY_NONE;
         }
-    }
 
-    // Split the mono-signal into left and right channels:
-    // Both for dry signal (to go to the speakers now)
-    // and for wet signal (to go to a reverb buffer to be stored, and brought back later to produce an echo)
-    cmd = Nas_Synth_Envelope(cmd, common, driver, samples_per_update, DMEM_TEMP, haasEffectDelaySide, flags);
-
-    // Apply the haas effect by delaying either the left or the right channel by a small amount
-    if (common->use_haas_effect) {
-        if (!(flags & A_INIT)) {
-            flags = A_CONTINUE;
+        // Apply an unknown effect based on the surround sound-mode
+        if (AG.sound_mode == SOUND_OUTPUT_DOLBY_SURROUND) {
+            // common->target_volume_left = common->target_volume_left >> 1;
+            // common->target_volume_right = common->target_volume_right >> 1;
+            if (common->surround_effect_idx != 0xFF) {
+                cmd = Nas_DolbySurround(cmd, common, driver, samples_per_update, DMEM_TEMP, flags);
+            }
+        }
+
+        // Split the mono-signal into left and right channels:
+        // Both for dry signal (to go to the speakers now)
+        // and for wet signal (to go to a reverb buffer to be stored, and brought back later to produce an echo)
+        cmd = Nas_Synth_Envelope(cmd, common, driver, samples_per_update, DMEM_TEMP, haasEffectDelaySide, flags);
+
+        // Apply the haas effect by delaying either the left or the right channel by a small amount
+        if (common->use_haas_effect) {
+            if (!(flags & A_INIT)) {
+                flags = A_CONTINUE;
+            }
+            cmd = Nas_Synth_Delay(cmd, common, driver, size, flags, haasEffectDelaySide);
         }
-        cmd = Nas_Synth_Delay(cmd, common, driver, size, flags, haasEffectDelaySide);
     }
 
     return cmd;

From ce56cfc0bc3a13150706357a09bf7559b81603fe Mon Sep 17 00:00:00 2001
From: Cuyler36 <Cuyler36@users.noreply.github.com>
Date: Thu, 23 Oct 2025 04:00:49 -0400
Subject: [PATCH 2/3] jaudio_NES/driver.c 98%

---
 include/jaudio_NES/audiocommon.h        |  30 ++++
 include/jaudio_NES/audiostruct.h        |   2 +-
 include/jaudio_NES/driver.h             |   2 +-
 src/static/jaudio_NES/internal/driver.c | 228 +++++++++++++++++++++++-
 4 files changed, 255 insertions(+), 7 deletions(-)

diff --git a/include/jaudio_NES/audiocommon.h b/include/jaudio_NES/audiocommon.h
index 0dfa9e9a..7c5d2d36 100644
--- a/include/jaudio_NES/audiocommon.h
+++ b/include/jaudio_NES/audiocommon.h
@@ -168,6 +168,36 @@ extern "C" {
 
 #define aFirLoadTable(pkt, size, addr) aFirFilter(pkt, 2, size, addr)
 
+#define aEnvMixer2(pkt, dmemi, count, swapLR, x0, x1, x2, x3, m, bits)   \
+{                                                                       \
+        Acmd *_a = (Acmd *)pkt;                                         \
+                                                                        \
+        _a->words.w0 = (bits | _SHIFTL(dmemi >> 4, 16, 8) |             \
+                _SHIFTL(count, 8, 8) | _SHIFTL(swapLR, 4, 1) |          \
+                _SHIFTL(x0, 3, 1) | _SHIFTL(x1, 2, 1) |                 \
+                _SHIFTL(x2, 1, 1) | _SHIFTL(x3, 0, 1));                 \
+        _a->words.w1 = (unsigned int)(m);                               \
+}
+
+#define aAddMixer(pkt, count, dmemi, dmemo, a4)                         \
+{                                                                       \
+        Acmd *_a = (Acmd *)pkt;                                         \
+                                                                        \
+        _a->words.w0 = (_SHIFTL(A_CMD_ADDMIXER, 24, 8) |                    \
+                _SHIFTL(count >> 4, 16, 8) | _SHIFTL(a4, 0, 16));       \
+        _a->words.w1 = _SHIFTL(dmemi, 16, 16) | _SHIFTL(dmemo, 0, 16);  \
+}
+
+// from MM
+#define aResampleZoh(pkt, pitch, pitchAccu)                             \
+{                                                                       \
+        Acmd *_a = (Acmd *)pkt;                                         \
+                                                                        \
+        _a->words.w0 = (_SHIFTL(A_CMD_RESAMPLE_ZOH, 24, 8) |                \
+                _SHIFTL(pitch, 0, 16));                                 \
+        _a->words.w1 = _SHIFTL(pitchAccu, 0, 16);                       \
+}
+
 #define NA_MAKE_COMMAND(a0, a1, a2, a3) \
     (u32)((((a0) & 0xFF) << 24) | (((a1) & 0xFF) << 16) | (((a2) & 0xFF) << 8) | (((a3) & 0xFF) << 0))
 
diff --git a/include/jaudio_NES/audiostruct.h b/include/jaudio_NES/audiostruct.h
index d8a5d7ca..a8de0d20 100644
--- a/include/jaudio_NES/audiostruct.h
+++ b/include/jaudio_NES/audiostruct.h
@@ -278,7 +278,7 @@ typedef struct driverch_ {
     /* 0x06 */ u8 comb_filter_needs_init;
     /* 0x07 */ u8 vel_conv_table_idx;
     /* 0x08 */ u16 sample_pos_fractional_part;
-    /* 0x0A */ u16 sound_effect_gain;
+    /* 0x0A */ u16 surround_effect_gain;
     /* 0x0C */ s32 sample_pos_integer_part;
     /* 0x10 */ synthparams* synth_params;
     /* 0x14 */ s16 current_volume_left;
diff --git a/include/jaudio_NES/driver.h b/include/jaudio_NES/driver.h
index 95171e8f..b09a5292 100644
--- a/include/jaudio_NES/driver.h
+++ b/include/jaudio_NES/driver.h
@@ -19,7 +19,7 @@
 extern Acmd* Nas_smzAudioFrame(Acmd* cmds, s32* processed_cmds, s16* pSamples, s32 nSamples);
 extern Acmd* Nas_DriveRsp(s16* aiBuf, s32 aiBufLen, Acmd* cmd, s32 updateIndex);
 extern Acmd* Nas_SynthMain(s32 chan_id, commonch* common, driverch* driver, s16* samples, s32 samples_per_update, Acmd* cmd, s32 update_idx);
-extern Acmd* Nas_Synth_Resample(Acmd* cmd, const driverch* driver, s32 size, u16 frequencyFixedPoint, u16 sampleDmemBeforeResampling, s32 flags);
+extern Acmd* Nas_Synth_Resample(Acmd* cmd, const driverch* driver, s32 size, u16 pitch, u16 sampleDmemBeforeResampling, s32 flags);
 extern Acmd* Nas_DolbySurround(Acmd* cmd, commonch* common, driverch* driver, s32 samples_per_update, s32 dmem, s32 flags);
 extern Acmd* Nas_Synth_Envelope(Acmd* cmd, commonch* common, driverch* driver, s32 samples_per_update, u16 dmem, s32 haasEffectDelaySide, s32 flags);
 extern Acmd* Nas_Synth_Delay(Acmd* cmd, commonch* common, driverch* driver, s32 size, s32 flags, s32 haasEffectDelaySide);
diff --git a/src/static/jaudio_NES/internal/driver.c b/src/static/jaudio_NES/internal/driver.c
index 4c497d43..16dec490 100644
--- a/src/static/jaudio_NES/internal/driver.c
+++ b/src/static/jaudio_NES/internal/driver.c
@@ -1,5 +1,6 @@
 #include "jaudio_NES/driver.h"
 
+#include "PR/abi.h"
 #include "jaudio_NES/audiocommon.h"
 #include "jaudio_NES/audiostruct.h"
 #include "jaudio_NES/astest.h"
@@ -7,6 +8,7 @@
 #include "jaudio_NES/track.h"
 #include "jaudio_NES/system.h"
 #include "os/OSCache.h"
+#include "types.h"
 
 #define DMEM_TEMP 0x380
 #define DMEM_WET_TEMP 0x3A0
@@ -14,8 +16,11 @@
 #define DMEM_LEFT_CH 0x900
 #define DMEM_RIGHT_CH 0xAA0
 #define DMEM_WET_LEFT_CH 0xC40
+#define DMEM_WET_RIGHT_CH 0xDE0
 #define DMEM_UNCOMPRESSED_NOTE 0x540
 #define DMEM_COMPRESSED_ADPCM_DATA 0x900
+#define DMEM_HAAS_TEMP 0x580
+#define DMEM_SURROUND_TEMP 0x480
 
 typedef enum {
     /* 0 */ HAAS_EFFECT_DELAY_NONE,
@@ -23,14 +28,14 @@ typedef enum {
     /* 2 */ HAAS_EFFECT_DELAY_RIGHT // Delay right channel so that left channel is heard first
 } HaasEffectDelaySide;
 
-static u16 NOISE_TABLE[] = { 0, 1, 2, 4, 8, 12, 16, 20, 24, 32, 36, 40, 46, 52, 56, 53 };
+static u16 NOISE_TABLE[] = { 0, 1, 2, 4, 8, 12, 16, 20, 24, 32, 36, 40, 46, 52, 56, 64 };
 static dspch_ DSPCH[64];
 static s32 STOP_VELOCONV = 0;
 
-static u32 Env_DataH = (A_ENVMIXER << 24) | (0x00 << 16) | (0x00 << 8) | (0x00);
-static u32 Env_DataL1 = 0x58AAC4DE;
-static u32 Env_DataL2 = 0x9058C4DE;
-static u32 Env_DataL3 = 0x9058C4DE;
+static u32 Env_DataH = (A_CMD_ENVMIXER << 24) | (0x00 << 16) | (0x00 << 8) | (0x00);
+static u32 Env_Data_L1 = 0x58AAC4DE;
+static u32 Env_Data_L2 = 0x9058C4DE;
+static u32 Env_Data_L3 = 0x90AAC4DE;
 
 static Acmd* __LoadAuxBuf(Acmd* cmd, u16 ofs, u16 startPos, s32 size, delay* del_p);
 static Acmd* __SaveAuxBuf(Acmd* cmd, u16 ofs, u16 startPos, s32 size, delay* del_p);
@@ -1240,3 +1245,216 @@ codec_continue_and_skip:
 
     return cmd;
 }
+
+Acmd* Nas_DolbySurround(Acmd* cmd, commonch* common, driverch* driver, s32 num_samples_per_update, s32 haas_dmem, s32 flags) {
+    s32 size;
+    s32 wetGain;
+    u16 dryGain;
+    s64 dmem = DMEM_SURROUND_TEMP;
+    f32 decayGain;
+
+    size = num_samples_per_update * SAMPLE_SIZE;
+    Nas_DMEMMove(cmd++, haas_dmem, DMEM_HAAS_TEMP, size);
+    dryGain = driver->surround_effect_gain;
+
+    if (flags == A_INIT) {
+        aClearBuffer(cmd++, dmem, sizeof(driver->synth_params->surround_effect_state));
+        driver->surround_effect_gain = 0;
+    } else {
+        wetGain = (driver->surround_effect_gain * driver->cur_reverb_vol) >> 7;
+        
+        aLoadBuffer2(cmd++, driver->synth_params->surround_effect_state, dmem, sizeof(driver->synth_params->surround_effect_state));
+        
+        aMix(cmd++, size >> 4, dryGain, dmem, DMEM_LEFT_CH);
+        aMix(cmd++, size >> 4, (dryGain ^ 0xFFFF), dmem, DMEM_RIGHT_CH);
+        
+        aMix(cmd++, size >> 4, wetGain, dmem, DMEM_WET_LEFT_CH);
+        aMix(cmd++, size >> 4, (wetGain ^ 0xFFFF), dmem, DMEM_WET_RIGHT_CH);
+    }
+
+    aSaveBuffer2(cmd++, DMEM_SURROUND_TEMP + size, driver->synth_params->surround_effect_state, sizeof(driver->synth_params->surround_effect_state));
+
+    decayGain = (common->target_volume_left + common->target_volume_right) / 8192.0f; // 1.0f / 0x2000
+
+    if (decayGain > 1.0f) {
+        decayGain = 1.0f;
+    }
+
+    decayGain = decayGain * StereoLeft[127 - common->surround_effect_idx];
+    driver->surround_effect_gain = ((decayGain * 0x7FFF) + driver->surround_effect_gain) / 2;
+
+    Nas_DMEMMove(cmd++, DMEM_HAAS_TEMP, haas_dmem, size);
+
+    return cmd;
+}
+
+extern Acmd* Nas_Synth_Resample(Acmd* cmd, const driverch* driver, s32 size, u16 pitch, u16 sampleDmemBeforeResampling, s32 flags) {
+    if (pitch == 0) {
+        Nas_ClearBuffer(cmd++, DMEM_TEMP, size);
+    } else {
+        aSetBuffer(cmd++, 0, sampleDmemBeforeResampling, DMEM_TEMP, size);
+        aResample(cmd++, flags, pitch, driver->synth_params->final_resample_state);
+    }
+
+    return cmd;
+}
+
+extern Acmd* Nas_Synth_Envelope(Acmd* cmd, commonch* common, driverch* driver, s32 samples_per_update, u16 dmem, s32 haasEffectDelaySide, s32 flags) {
+    u16 targetVolRight;
+    u16 targetVolLeft;
+    u32 dmemDests;
+    u16 curVolLeft;
+    u16 curVolRight;
+    s32 curReverbVolAndFlags;
+    u16 curReverbVol;
+    s32 targetReverbVol;
+    s16 rampLeft;
+    s16 rampRight;
+    s16 rampReverb;
+    f32 defaultPanVolume;
+
+    
+    targetReverbVol = common->target_reverb_volume;
+    curVolLeft = driver->current_volume_left;
+    curVolRight = driver->current_volume_right;
+
+    targetVolLeft = common->target_volume_left << 4;
+    targetVolRight = common->target_volume_right << 4;
+
+    if ((AG.sound_mode == SOUND_OUTPUT_DOLBY_SURROUND)) {
+        u8 idx = common->surround_effect_idx;
+        
+        if (idx != 0xFF) {
+            defaultPanVolume = StereoLeft[idx];
+            targetVolLeft *= defaultPanVolume;
+            targetVolRight *= defaultPanVolume;
+        }
+    }
+
+    if (targetVolLeft != curVolLeft) {
+        rampLeft = (targetVolLeft - curVolLeft) / (samples_per_update >> 3);
+    } else {
+        rampLeft = 0;
+    }
+
+    if (targetVolRight != curVolRight) {
+        rampRight = (targetVolRight - curVolRight) / (samples_per_update >> 3);
+    } else {
+        rampRight = 0;
+    }
+
+    curReverbVolAndFlags = (s16)driver->cur_reverb_vol;
+    if (targetReverbVol != curReverbVolAndFlags) {
+        curReverbVol = curReverbVolAndFlags & 0x7F;
+        rampReverb = (((targetReverbVol & 0x7F) - (curReverbVol)) << 9) / (samples_per_update >> 3);
+        driver->cur_reverb_vol = targetReverbVol;
+    } else {
+        rampReverb = 0;
+    }
+
+    driver->current_volume_left = curVolLeft + (rampLeft * (samples_per_update >> 3));
+    driver->current_volume_right = curVolRight + (rampRight * (samples_per_update >> 3));
+
+    if (common->use_haas_effect) {
+        Nas_ClearBuffer(cmd++, DMEM_HAAS_TEMP, DMEM_1CH_SIZE);
+        curReverbVol = curReverbVolAndFlags & 0x7F;
+        Nas_SetEnvParam(cmd++, curReverbVol * 2, rampReverb, rampLeft, rampRight);
+        Nas_SetEnvParam2(cmd++, curVolLeft, curVolRight);
+
+        switch (haasEffectDelaySide) {
+            case HAAS_EFFECT_DELAY_LEFT:
+                // Store the left dry channel in a temp space to be delayed to produce the haas effect
+                dmemDests = Env_Data_L1;
+                break;
+
+            case HAAS_EFFECT_DELAY_RIGHT:
+                // Store the right dry channel in a temp space to be delayed to produce the haas effect
+                dmemDests = Env_Data_L2;
+                break;
+
+            default: // HAAS_EFFECT_DELAY_NONE
+                dmemDests = Env_Data_L3;
+                break;
+        }
+    } else {
+        curReverbVol = curReverbVolAndFlags & 0x7F;
+        aSetEnvParam(cmd++, curReverbVol * 2, rampReverb, rampLeft, rampRight);
+        aSetEnvParam2(cmd++, curVolLeft, curVolRight);
+        dmemDests = Env_Data_L3;
+    }
+
+    aEnvMixer2(cmd++, dmem, samples_per_update, (curReverbVolAndFlags & 0x80) >> 7,
+              common->strong_reverb_right, common->strong_reverb_left,
+              common->strong_right, common->strong_left, dmemDests, Env_DataH);
+
+    return cmd;
+}
+
+extern Acmd* Nas_Synth_Delay(Acmd* cmd, commonch* common, driverch* driver, s32 size, s32 flags, s32 haasEffectDelaySide) {
+    u16 dmemDest;
+    u16 pitch;
+    u16 prevHaasEffectDelaySize;
+    u16 haasEffectDelaySize;
+
+    switch (haasEffectDelaySide) {
+        case HAAS_EFFECT_DELAY_LEFT:
+            // Delay the sample on the left channel
+            // This allows the right channel to be heard first
+            dmemDest = DMEM_LEFT_CH;
+            haasEffectDelaySize = common->haas_effect_left_delay_size;
+            prevHaasEffectDelaySize = driver->prev_haas_effect_left_delay_size;
+            driver->prev_haas_effect_left_delay_size = haasEffectDelaySize;
+            driver->prev_haas_effect_right_delay_size = 0;
+            break;
+
+        case HAAS_EFFECT_DELAY_RIGHT:
+            // Delay the sample on the right channel
+            // This allows the left channel to be heard first
+            dmemDest = DMEM_RIGHT_CH;
+            haasEffectDelaySize = common->haas_effect_right_delay_size;
+            prevHaasEffectDelaySize = driver->prev_haas_effect_right_delay_size;
+            driver->prev_haas_effect_right_delay_size = haasEffectDelaySize;
+            driver->prev_haas_effect_left_delay_size = 0;
+            break;
+
+        default: // HAAS_EFFECT_DELAY_NONE
+            return cmd;
+    }
+
+    if (flags != A_INIT) {
+        // Slightly adjust the sample rate in order to fit a change in sample delay
+        if (haasEffectDelaySize != prevHaasEffectDelaySize) {
+            pitch = (((size << 0xF) / 2) - 1) / ((size + haasEffectDelaySize - prevHaasEffectDelaySize - 2) / 2);
+            aSetBuffer(cmd++, 0, DMEM_HAAS_TEMP, DMEM_TEMP, size + haasEffectDelaySize - prevHaasEffectDelaySize);
+            aResampleZoh(cmd++, pitch, 0);
+        } else {
+            aDMEMMove(cmd++, DMEM_HAAS_TEMP, DMEM_TEMP, size);
+        }
+
+        if (prevHaasEffectDelaySize != 0) {
+            aLoadBuffer2(cmd++, driver->synth_params->haas_effect_delay_state, DMEM_HAAS_TEMP,
+                        ALIGN_NEXT(prevHaasEffectDelaySize, 16));
+            aDMEMMove(cmd++, DMEM_TEMP, DMEM_HAAS_TEMP + prevHaasEffectDelaySize,
+                      size + haasEffectDelaySize - prevHaasEffectDelaySize);
+        } else {
+            aDMEMMove(cmd++, DMEM_TEMP, DMEM_HAAS_TEMP, size + haasEffectDelaySize);
+        }
+    } else {
+        // Just apply a delay directly
+        aDMEMMove(cmd++, DMEM_HAAS_TEMP, DMEM_TEMP, size);
+        if (haasEffectDelaySize) { // != 0
+            aClearBuffer(cmd++, DMEM_HAAS_TEMP, haasEffectDelaySize);
+        }
+        aDMEMMove(cmd++, DMEM_TEMP, DMEM_HAAS_TEMP + haasEffectDelaySize, size);
+    }
+
+    if (haasEffectDelaySize) { // != 0
+        // Save excessive samples for next iteration
+        aSaveBuffer2(cmd++, DMEM_HAAS_TEMP + size, driver->synth_params->haas_effect_delay_state,
+                    ALIGN_NEXT(haasEffectDelaySize, 16));
+    }
+
+    aAddMixer(cmd++, ALIGN_NEXT(size, 64), DMEM_HAAS_TEMP, dmemDest, 0x7FFF);
+
+    return cmd;
+}

From cd77c59ce584e116486e668c0115d91b5e5126ba Mon Sep 17 00:00:00 2001
From: Cuyler36 <Cuyler36@users.noreply.github.com>
Date: Thu, 23 Oct 2025 11:05:29 -0400
Subject: [PATCH 3/3] Match jaudio_NES/driver Nas_Synth_Delay

---
 src/static/jaudio_NES/internal/driver.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/static/jaudio_NES/internal/driver.c b/src/static/jaudio_NES/internal/driver.c
index 16dec490..76ad93fd 100644
--- a/src/static/jaudio_NES/internal/driver.c
+++ b/src/static/jaudio_NES/internal/driver.c
@@ -1391,10 +1391,10 @@ extern Acmd* Nas_Synth_Envelope(Acmd* cmd, commonch* common, driverch* driver, s
 }
 
 extern Acmd* Nas_Synth_Delay(Acmd* cmd, commonch* common, driverch* driver, s32 size, s32 flags, s32 haasEffectDelaySide) {
+    u16 haasEffectDelaySize;
+    u16 prevHaasEffectDelaySize;
     u16 dmemDest;
     u16 pitch;
-    u16 prevHaasEffectDelaySize;
-    u16 haasEffectDelaySize;
 
     switch (haasEffectDelaySide) {
         case HAAS_EFFECT_DELAY_LEFT:
@@ -1424,7 +1424,7 @@ extern Acmd* Nas_Synth_Delay(Acmd* cmd, commonch* common, driverch* driver, s32
     if (flags != A_INIT) {
         // Slightly adjust the sample rate in order to fit a change in sample delay
         if (haasEffectDelaySize != prevHaasEffectDelaySize) {
-            pitch = (((size << 0xF) / 2) - 1) / ((size + haasEffectDelaySize - prevHaasEffectDelaySize - 2) / 2);
+            pitch = pitch = (((size << 0xF) / 2) - 1) / ((size + haasEffectDelaySize - prevHaasEffectDelaySize - 2) / 2);
             aSetBuffer(cmd++, 0, DMEM_HAAS_TEMP, DMEM_TEMP, size + haasEffectDelaySize - prevHaasEffectDelaySize);
             aResampleZoh(cmd++, pitch, 0);
         } else {
@@ -1450,7 +1450,7 @@ extern Acmd* Nas_Synth_Delay(Acmd* cmd, commonch* common, driverch* driver, s32
 
     if (haasEffectDelaySize) { // != 0
         // Save excessive samples for next iteration
-        aSaveBuffer2(cmd++, DMEM_HAAS_TEMP + size, driver->synth_params->haas_effect_delay_state,
+        aSaveBuffer2(cmd++, driver->synth_params->haas_effect_delay_state, DMEM_HAAS_TEMP + size,
                     ALIGN_NEXT(haasEffectDelaySize, 16));
     }