dusklight/src/dusk/audio/DuskDsp.cpp

#include <ar.h>
#include <dolphin/os.h>

#include "DuskDsp.hpp"

#include <algorithm>
#include <cassert>
#include <cstdio>
#include <span>

#include "Adpcm.hpp"
#include "JSystem/JAudio2/JASDriverIF.h"
#include "dusk/audio/DuskAudioSystem.h"
#include "dusk/endian.h"
#include "global.h"

using namespace dusk::audio;

ChannelAuxData dusk::audio::ChannelAux[DSP_CHANNELS] = {};

static bool sDumpWasActive = false;
static FILE* sChannelDumpFiles[DSP_CHANNELS] = {};

static void OpenChannelDumpFiles() {
    char name[32];
    for (int i = 0; i < DSP_CHANNELS; i++) {
        snprintf(name, sizeof(name), "channel_%02d.raw", i);
        sChannelDumpFiles[i] = fopen(name, "wb");
    }
}

static void CloseChannelDumpFiles() {
    for (int i = 0; i < DSP_CHANNELS; i++) {
        if (sChannelDumpFiles[i]) {
            fclose(sChannelDumpFiles[i]);
            sChannelDumpFiles[i] = nullptr;
        }
    }
}

f32 dusk::audio::MasterVolume = 1.0f;
f32 dusk::audio::PrevMasterVolume = 1.0f;
bool dusk::audio::EnableReverb = true;
bool dusk::audio::DumpAudio = false;

/**
 * Validate that a DSP channel's format is actually something we know how to play.
 */
static bool ValidateChannelWaveFormat(const JASDsp::TChannel& channel) {
    if (channel.mSamplesPerBlock == AdpcmSampleCount && channel.mBytesPerBlock == Adpcm4FrameSize)
        return true;
    if (channel.mSamplesPerBlock == 1 && channel.mBytesPerBlock == 16)
        return true;
    /*
    if (channel.mSamplesPerBlock == AdpcmSampleCount && channel.mBytesPerBlock == Adpcm2FrameSize)
        return true;
    if (channel.mSamplesPerBlock == 1 && channel.mBytesPerBlock == 8)
        return true;
    */
    return false;
}

/**
 * Validate that a DSP channel is actually something we know how to play.
 */
static void ValidateChannel(const JASDsp::TChannel& channel) {
    if (!ValidateChannelWaveFormat(channel)) {
        CRASH(
            "Unable to handle channel format: %02x, %02x\n",
            channel.mSamplesPerBlock,
            channel.mBytesPerBlock);
    }
}

static u32 ConvertSamplesToDataLength(const JASDsp::TChannel& channel, u32 samples) {
    if (samples % channel.mSamplesPerBlock != 0) {
        // Ensure we round up.
        samples += channel.mSamplesPerBlock;
        //CRASH("Indivisible sample count: %d\n", samples);
    }

    return (samples / channel.mSamplesPerBlock) * BlockBytes(channel);
}

/**
 * Render the audio data contributed by a single DSP channel. Reads & decodes new input samples.
 */
static void RenderChannel(
    JASDsp::TChannel& channel,
    ChannelAuxData& channelAux,
    OutputSubframe& subframe);

/**
 * Converts a pitch value on a DSP channel to a sample rate.
 */
constexpr static int PitchToSampleRate(u16 value) {
    return static_cast<int>(static_cast<u64>(SampleRate) * value / 4096);
}

/**
 * Reset state for a DSP channel between independent playbacks.
 */
static void ResetChannel(JASDsp::TChannel& channel, ChannelAuxData& aux) {
    aux.resetCount += 1;

    channel.mSamplesLeft = channel.mEndSample - channel.mSamplePosition;

    aux.hist0 = 0;
    aux.hist1 = 0;

    aux.decodeBufCount = 0;
    aux.resamplePos = 0.0;
    aux.resamplePrev = 0;

    for (auto& volume : aux.prevVolume) {
        volume = NAN;
    }

    channel.mResetFlag = false;
}

/**
 * Mix subframe data from src into dst.
 */
static void MixSubframe(DspSubframe& dst, const DspSubframe& src) {
    for (int i = 0; i < dst.size(); i++) {
        dst[i] += src[i];
    }
}

void dusk::audio::DspRender(OutputSubframe& subframe) {
    if (DumpAudio != sDumpWasActive) {
        sDumpWasActive = DumpAudio;
        if (DumpAudio) {
            OpenChannelDumpFiles();
        } else {
            CloseChannelDumpFiles();
        }
    }

    std::span channels(JASDsp::CH_BUF, DSP_CHANNELS);

    for (int i = 0; i < channels.size(); i++) {
        auto& channel = channels[i];
        auto& channelAux = ChannelAux[i];

        bool skipRender = false;

        if (!channel.mIsActive) {
            skipRender = true;
        }
        else if (channel.mPauseFlag) {
            // Not really sure what the practical difference between pause and
            // deactivation is. Either avoids clearing state or allows the DSP to avoid popping?
            skipRender = true;
        }
        else if (channel.mForcedStop) {
            channel.mIsFinished = true;
            skipRender = true;
        }
        else if (channel.mWaveAramAddress == 0) {
            // I think these are oscillator channels? Not backed by audio.
            // No idea how to implement these yet, so skip them.
            channel.mIsFinished = true;
            skipRender = true;
        }

        OutputSubframe channelSubframe = {};

        if (!skipRender) {
            ValidateChannel(channel);
            RenderChannel(channel, channelAux, channelSubframe);
        }

        if (EnableReverb) {
            // scale the input to the reverb rather than using wet/dry on the output.
            // this way the reverb's internal buffers accumulate energy proportional to mAutoMixerFxMix,
            // so any tail always decays at the correct level regardless of mAutoMixerFxMix changes
            // prevents transients when the next sound starts playing with a different reverb level
            // 700.0f was pulled out of my ass and just sounds good enough for console
            f32 inputGain = (!skipRender) ? (channel.mAutoMixerFxMix >> 8) / 700.0f : 0.0f;

            OutputSubframe reverbSubframe = {};
            for (int j = 0; j < DSP_SUBFRAME_SIZE; j++) {
                reverbSubframe.channels[0][j] = channelSubframe.channels[0][j] * inputGain;
                reverbSubframe.channels[1][j] = channelSubframe.channels[1][j] * inputGain;
            }

            channelAux.reverb.processreplace(
                reverbSubframe.channels[0].data(), reverbSubframe.channels[1].data(),
                reverbSubframe.channels[0].data(), reverbSubframe.channels[1].data(),
                DSP_SUBFRAME_SIZE, 1
            );

            for (int j = 0; j < DSP_SUBFRAME_SIZE; j++) {
                channelSubframe.channels[0][j] += reverbSubframe.channels[0][j];
                channelSubframe.channels[1][j] += reverbSubframe.channels[1][j];
            }
        }

        if (DumpAudio && sChannelDumpFiles[i]) {
            for (int j = 0; j < DSP_SUBFRAME_SIZE; j++) {
                fwrite(&channelSubframe.channels[0][j], sizeof(f32), 1, sChannelDumpFiles[i]);
                fwrite(&channelSubframe.channels[1][j], sizeof(f32), 1, sChannelDumpFiles[i]);
            }
        }

        for (int o = 0; o < subframe.channels.size(); o++) {
            MixSubframe(subframe.channels[o], channelSubframe.channels[o]);
        }
    }

    for (auto& channel : subframe.channels) {
        ApplyVolume(channel, channel, PrevMasterVolume, MasterVolume);
    }
    PrevMasterVolume = MasterVolume;
}

/**
 * Actually decode samples from memory for the given audio channel.
 */
static void ReadSampleData(
    const JASDsp::TChannel& channel,
    ChannelAuxData& aux,
    const u8* data,
    size_t dataLength,
    s16* pcm,
    size_t pcmLength) {
    if (channel.mSamplesPerBlock == 1) {
        if (channel.mBytesPerBlock == 0x10) {
            // PCM16
            assert(reinterpret_cast<uintptr_t>(data) % 2 == 0 && "PCM data must be aligned");
            assert(dataLength % 2 == 0 && "Data length must be multiple of 2");
            assert(dataLength * 2 >= pcmLength && "Input too small!");

            auto srcPcm = reinterpret_cast<const BE(s16)*>(data);
            for (size_t i = 0; i < pcmLength; i++) {
                pcm[i] = srcPcm[i];
            }
        } else {
            CRASH("Unsupported format: PCM8");
        }
    } else {
        if (channel.mBytesPerBlock == 9) {
            Adpcm4ToPcm16(data, dataLength, pcm, pcmLength, aux.hist1, aux.hist0);
        } else {
            CRASH("Unsupported format: ADPCM2");
        }
    }
}

/**
 * Read a single *contiguous* chunk of sample data from a channel into outBuf
 *
 * @returns Amount of samples written to outBuf. May be less than desiredSamples
 */
static int ReadChannelSamplesChunk(
    JASDsp::TChannel& channel,
    ChannelAuxData& aux,
    int desiredSamples,
    s16* outBuf,
    int outBufSize) {

    assert(desiredSamples >= 0);

    auto aramBase = static_cast<u8*>(ARGetStorageAddress()) + channel.mWaveAramAddress;

    // Streaming logic directly modifies mSamplesLeft.
    // So we use that as our tracking of where we are.
    auto curSamplePosition = channel.mEndSample - channel.mSamplesLeft;

    u32 skipSamples = curSamplePosition % channel.mSamplesPerBlock;
    if (skipSamples != 0) {
        // We need to start reading in the middle of a block. This can happen thanks to loops.
        // So we move back to the start of the block and keep track that those samples should
        // *not* be emitted.
        desiredSamples += static_cast<int>(skipSamples);
        curSamplePosition -= skipSamples;

        channel.mSamplesLeft += skipSamples;
        channel.mSamplePosition -= skipSamples;
    }

    // Pad desiredSamples so that we always leave the channel block-aligned.
    desiredSamples = ALIGN_NEXT(desiredSamples, channel.mSamplesPerBlock);

    assert(curSamplePosition % channel.mSamplesPerBlock == 0);
    auto dataPosition = ConvertSamplesToDataLength(channel, curSamplePosition);

    u32 renderSamples = std::min(channel.mSamplesLeft, static_cast<u32>(desiredSamples));

    int renderSize = static_cast<int>(sizeof(s16) * renderSamples);
    auto renderData = static_cast<s16*>(alloca(renderSize));
    memset(renderData, 0, renderSize);

    ReadSampleData(
        channel,
        aux,
        aramBase + dataPosition,
        ConvertSamplesToDataLength(channel, renderSamples),
        renderData,
        renderSamples);

    channel.mSamplesLeft -= renderSamples;
    channel.mSamplePosition += renderSamples;

    int outputCount = static_cast<int>(renderSamples - skipSamples);

    // this should never be hit with the limits on pitch shift (i think) but just in case!!
    outputCount = std::min(outputCount, outBufSize);
    if (outputCount > 0) {
        memcpy(outBuf, renderData + skipSamples, outputCount * sizeof(s16));
    }

    assert(curSamplePosition % channel.mSamplesPerBlock == 0 || channel.mSamplesLeft == 0);

    return outputCount;
}

/**
 * Fill decodeBuf with at least `needed` samples, fewer may be written if the channel has no loop and its data ends
 */
static void FillDecodeBuf(JASDsp::TChannel& channel, ChannelAuxData& aux, int needed) {
    while (aux.decodeBufCount < needed) {
        if (channel.mSamplesLeft == 0) {
            if (!channel.mLoopFlag) {
                // we aren't a looping channel and there's no samples left, we out of this fuckin loop
                break;
            } else {
                // we are looping, handle loop logic
                channel.mSamplesLeft = channel.mEndSample - channel.mLoopStartSample;
                channel.mSamplePosition = channel.mLoopStartSample;
                aux.hist1 = channel.mpPenult;
                aux.hist0 = channel.mpLast;
            }
        }

        int remainingDecodeSpace = ChannelAuxData::DECODE_BUF_SIZE - aux.decodeBufCount;
        if (remainingDecodeSpace == 0) {
            break;
        }

        aux.decodeBufCount += ReadChannelSamplesChunk(
            channel, aux, std::min(remainingDecodeSpace, needed - aux.decodeBufCount),
            aux.decodeBuf + aux.decodeBufCount, remainingDecodeSpace
        );
    }

    channel.mAramStreamPosition = channel.mWaveAramAddress + ConvertSamplesToDataLength(channel, channel.mSamplePosition);
}

/**
 * Get the expected BusConnect value needed to define the given output channel in a DSP channel.
 */
constexpr u16 GetBusConnect(const OutputChannel channel) {
    switch (channel) {
    // TODO: This is a guess for now.
    case OutputChannel::LEFT:
        return 0x0D00;
    case OutputChannel::RIGHT:
        return 0x0D60;
    default:
        CRASH("Invalid output channel!");
    }
}

/**
 * For a DSP channel the JASDsp::OutputChannelConfig value targeting the given output channel.
 * Returns null if the DSP channel does not output to this output channel.
 */
static const JASDsp::OutputChannelConfig* GetOutputConfig(
    const JASDsp::TChannel& sourceChannel,
    OutputChannel channel) {

    auto busConnect = GetBusConnect(channel);
    for (const auto& mOutputChannel : sourceChannel.mOutputChannels) {
        auto config = &mOutputChannel;
        if (config->mBusConnect == busConnect) {
            return config;
        }
    }

    return nullptr;
}

struct VolumeValue {
    f32 Target;
    f32 Init;
};

/**
 * Get the volume that the given DSP channel should render to the given output channel at.
 */
static VolumeValue GetVolumeForOutputChannel(
    const JASDsp::TChannel& sourceChannel,
    OutputChannel outputChannel) {

    u16 volume;
    u16 initVolume;
    f32 panValue = 1;
    if (sourceChannel.mAutoMixerBeenSet) {
        volume = sourceChannel.mAutoMixerVolume;
        initVolume = sourceChannel.mAutoMixerInitVolume;

        auto autoMixerPan = static_cast<f32>(sourceChannel.mAutoMixerPanDolby >> 8) / 127;

        switch (outputChannel) {
            case OutputChannel::LEFT:
                panValue = 1 - autoMixerPan;
                break;
            case OutputChannel::RIGHT:
                panValue = autoMixerPan;
                break;
            default:
                CRASH("Unhandled output channel: OutputChannel");
        }

    } else {
        auto config = GetOutputConfig(sourceChannel, outputChannel);
        if (config == nullptr) {
            return {0, 0};
        }

        volume = config->mTargetVolume;
        initVolume = config->mCurrentVolume;
    }

    // TODO: interpolate to avoid popping.
    f32 targetRatio = VolumeFromU16(volume);
    targetRatio *= panValue;

    f32 initRatio = VolumeFromU16(initVolume);
    initRatio *= panValue;

    return {targetRatio, initRatio};
}

/**
 * Given decoded & resampled input samples, render a DSP channel to a given output channel.
 */
static void RenderOutputChannel(
    const JASDsp::TChannel& sourceChannel,
    ChannelAuxData& aux,
    OutputChannel outputChannel,
    const std::span<f32> inputSamples,
    OutputSubframe& fullOutputSubframe) {

    auto& outputSubframe = fullOutputSubframe[outputChannel];
    assert(inputSamples.size() <= outputSubframe.size());

    auto volume = GetVolumeForOutputChannel(sourceChannel, outputChannel);

    f32 targetVolume = volume.Target;
    auto& prevVolume = aux.PrevVolume(outputChannel);
    if (std::isnan(prevVolume)) {
        // Initialize previous volume to new volume on first render.
        prevVolume = volume.Init;
    }

    if (prevVolume == 0 && targetVolume == 0) {
        return;
    }

    ApplyVolume(outputSubframe, inputSamples, prevVolume, targetVolume);
    prevVolume = targetVolume;
}

/**
 * Fetch, decode, resample, output
 */
static void RenderChannel(
    JASDsp::TChannel& channel,
    ChannelAuxData& channelAux,
    OutputSubframe& subframe) {

    if (channel.mResetFlag) {
        ResetChannel(channel, channelAux);
    }

    // how many input samples we step per output sample, aka the resampling ratio
    f32 step = (f32)PitchToSampleRate(channel.mPitch) / SampleRate;

    // how many input samples to resample to DSP_SUBFRAME_SIZE output samples
    int needed = static_cast<int>(channelAux.resamplePos + DSP_SUBFRAME_SIZE * step) + 2;

    FillDecodeBuf(channel, channelAux, needed);

    // source ran dry, channel is finished
    if(channelAux.decodeBufCount < needed) {
        channel.mIsFinished = true;
    }

    DspSubframe audioLoadBuffer = {};
    f64 pos = channelAux.resamplePos;
    s16 prev = channelAux.resamplePrev;
    s16 next = channelAux.decodeBufCount > 0 ? channelAux.decodeBuf[0] : prev;
    int srcIdx = 0;

    // linear resampling and f32 conversion
    for (int i = 0; i < DSP_SUBFRAME_SIZE; i++) {
        audioLoadBuffer[i] = static_cast<f32>(prev + pos * (next - prev)) / 32768.0f;
        pos += step;
        while (pos >= 1.0) {
            pos -= 1.0;
            prev = next;
            srcIdx++;
            next = srcIdx < channelAux.decodeBufCount ? channelAux.decodeBuf[srcIdx] : prev;
        }
    }

    // save resampler state for the next subframe, prevents popping on pitch change
    channelAux.resamplePos = pos;
    channelAux.resamplePrev = prev;

    // move any remaining samples in the decode buf to the beginning
    int remainingDecodeBuf = channelAux.decodeBufCount - srcIdx;
    if (remainingDecodeBuf > 0) {
        memmove(channelAux.decodeBuf, channelAux.decodeBuf + srcIdx, remainingDecodeBuf * sizeof(s16));
    }

    channelAux.decodeBufCount = std::max(0, remainingDecodeBuf);

    auto hasReadSamples = std::span(audioLoadBuffer).subspan(0, DSP_SUBFRAME_SIZE);

    static_assert(OutputSubframe::NUM_CHANNELS == 2, "Keep RenderChannel in sync!");

    RenderOutputChannel(channel, channelAux, OutputChannel::LEFT, hasReadSamples, subframe);
    RenderOutputChannel(channel, channelAux, OutputChannel::RIGHT, hasReadSamples, subframe);
}

void dusk::audio::DspInit() {
    for (int i = 0; i < DSP_CHANNELS; i++) {
        auto& channelAux = ChannelAux[i];
        channelAux.reverb.setwet(1.0f);
        channelAux.reverb.setdry(0.0f);
        channelAux.reverb.setroomsize(0.4f);
        channelAux.reverb.setdamp(0.7f);
        channelAux.reverb.setwidth(1.0f);
        channelAux.reverb.setmode(0.0f);
        channelAux.reverb.mute();
    }
}

void dusk::audio::ApplyVolume(
    std::span<f32> dst,
    const std::span<f32> src,
    const f32 startVolume,
    const f32 endVolume) {
    assert(dst.size() >= src.size());

    if (startVolume == endVolume) {
        for (int i = 0; i < src.size(); i++) {
            dst[i] = src[i] * startVolume;
        }
    } else {
        const f32 step = (endVolume - startVolume) / static_cast<f32>(src.size());
        auto curVolume = startVolume;
        for (int i = 0; i < src.size(); i++) {
            dst[i] = src[i] * curVolume;
            curVolume += step;
        }
    }
}