From 807931c9384a822ad0a54b3fbdec5f7af7a83bc1 Mon Sep 17 00:00:00 2001 From: Tom <1568512+tomcl7@users.noreply.github.com> Date: Wed, 11 Mar 2026 17:43:12 -0400 Subject: [PATCH] fix(audio): frame queuing, XMA decoder sync completion, and diagnostics - Clamp queued_frames to 4-64 range, null validation in RegisterRenderDriverClient - XMA work completion event, WriteRegister blocks until contexts finish - XMABlockWhileInUse checks work_buffer_ptr to prevent hangs --- include/rex/audio/audio_system.h | 5 +-- include/rex/audio/xma/context.h | 13 +++++++ src/audio/audio_system.cpp | 44 +++++++++++++++++++--- src/audio/sdl/sdl_audio_driver.cpp | 12 ++++++ src/audio/xma_context.cpp | 3 +- src/audio/xma_decoder.cpp | 35 +++++++++-------- src/kernel/xboxkrnl/xboxkrnl_audio.cpp | 17 +++++++++ src/kernel/xboxkrnl/xboxkrnl_audio_xma.cpp | 4 ++ 8 files changed, 106 insertions(+), 27 deletions(-) diff --git a/include/native/audio/audio_system.h b/include/native/audio/audio_system.h index cf67c344..6c6a72fb 100644 --- a/include/native/audio/audio_system.h +++ b/include/native/audio/audio_system.h @@ -66,13 +66,12 @@ class AudioSystem : public system::IAudioSystem { AudioDriver** out_driver) = 0; virtual void DestroyDriver(AudioDriver* driver) = 0; - // TODO(gibbed): respect XAUDIO2_MAX_QUEUED_BUFFERS somehow (ie min(64, - // XAUDIO2_MAX_QUEUED_BUFFERS)) - // static const size_t kMaximumQueuedFrames = 64; + static constexpr size_t kMaximumQueuedFrames = 64; memory::Memory* memory_ = nullptr; runtime::Processor* processor_ = nullptr; std::unique_ptr xma_decoder_; + uint32_t queued_frames_; std::atomic worker_running_ = {false}; system::object_ref worker_thread_; diff --git a/include/native/audio/xma/context.h b/include/native/audio/xma/context.h index 75581831..47db366e 100644 --- a/include/native/audio/xma/context.h +++ b/include/native/audio/xma/context.h @@ -19,6 +19,7 @@ #include #include +#include // XMA audio format: // From research, XMA appears to be based on WMA Pro with @@ -165,6 +166,17 @@ class XmaContext { void set_is_allocated(bool is_allocated) { is_allocated_ = is_allocated; } void set_is_enabled(bool is_enabled) { is_enabled_ = is_enabled; } + void SignalWorkDone() { + if (work_completion_event_) { + work_completion_event_->Set(); + } + } + void WaitForWorkDone() { + if (work_completion_event_) { + rex::thread::Wait(work_completion_event_.get(), false); + } + } + private: static void SwapInputBuffer(XMA_CONTEXT_DATA* data); static bool TrySetupNextLoop(XMA_CONTEXT_DATA* data, bool ignore_input_buffer_offset); @@ -188,6 +200,7 @@ class XmaContext { int PrepareDecoder(uint8_t* packet, int sample_rate, bool is_two_channel); memory::Memory* memory_ = nullptr; + std::unique_ptr work_completion_event_; uint32_t id_ = 0; uint32_t guest_ptr_ = 0; diff --git a/src/native/audio/audio_system.cpp b/src/native/audio/audio_system.cpp index 7a5042a6..6b9767c9 100644 --- a/src/native/audio/audio_system.cpp +++ b/src/native/audio/audio_system.cpp @@ -24,7 +24,9 @@ #include #include -REXCVAR_DEFINE_INT32(audio_maxqframes, 64, "Audio", "Adjust audio maximum queued frames"); +REXCVAR_DEFINE_INT32( + audio_maxqframes, 8, "Audio", + "Max buffered audio frames (range 4-64). Lower reduces latency but may cause stuttering."); // As with normal Microsoft, there are like twelve different ways to access // the audio APIs. Early games use XMA*() methods almost exclusively to touch @@ -44,8 +46,12 @@ AudioSystem::AudioSystem(runtime::Processor* processor) : memory_(processor->memory()), processor_(processor), worker_running_(false) { std::memset(clients_, 0, sizeof(clients_)); + queued_frames_ = std::min( + static_cast(kMaximumQueuedFrames), + std::max(static_cast(REXCVAR_GET(audio_maxqframes)), static_cast(4))); + for (size_t i = 0; i < kMaximumClientCount; ++i) { - client_semaphores_[i] = rex::thread::Semaphore::Create(0, REXCVAR_GET(audio_maxqframes)); + client_semaphores_[i] = rex::thread::Semaphore::Create(0, queued_frames_); assert_not_null(client_semaphores_[i]); wait_handles_[i] = client_semaphores_[i].get(); } @@ -89,6 +95,7 @@ void AudioSystem::WorkerThreadMain() { Initialize(); // Main run loop. + uint32_t diag_pump_count = 0; while (worker_running_) { // These handles signify the number of submitted samples. Once we reach // 64 samples, we wait until our audio backend releases a semaphore @@ -96,10 +103,16 @@ void AudioSystem::WorkerThreadMain() { auto result = rex::thread::WaitAny(wait_handles_, rex::countof(wait_handles_), true, std::chrono::milliseconds(500)); if (result.first == rex::thread::WaitResult::kFailed) { - // TODO: Assert? + REXAPU_WARN("AudioWorker: WaitAny failed"); continue; } + if (result.first == rex::thread::WaitResult::kTimeout) { + if (diag_pump_count < 5) { + REXAPU_DEBUG("AudioWorker: WaitAny timed out (no semaphore signals)"); + } + } + if (result.first == thread::WaitResult::kSuccess && result.second == kMaximumClientCount) { // Shutdown event signaled. if (paused_) { @@ -121,10 +134,20 @@ void AudioSystem::WorkerThreadMain() { global_lock.unlock(); if (client_callback) { + if (diag_pump_count < 10) { + REXAPU_DEBUG("AudioWorker: dispatching callback {:08X} with arg {:08X} for client {}", + client_callback, client_callback_arg, index); + } SCOPE_profile_cpu_i("apu", "rex::audio::AudioSystem->client_callback"); uint64_t args[] = {client_callback_arg}; processor_->Execute(worker_thread_->thread_state(), client_callback, args, rex::countof(args)); + if (diag_pump_count < 10) { + REXAPU_DEBUG("AudioWorker: callback returned for client {}", index); + } + diag_pump_count++; + } else { + REXAPU_DEBUG("AudioWorker: semaphore signaled for client {} but callback is 0", index); } pumped = true; @@ -188,13 +211,17 @@ void AudioSystem::Shutdown() { } X_STATUS AudioSystem::RegisterClient(uint32_t callback, uint32_t callback_arg, size_t* out_index) { + REXAPU_DEBUG("AudioSystem::RegisterClient: callback={:08X} callback_arg={:08X}", callback, + callback_arg); auto global_lock = global_critical_region_.Acquire(); auto index = FindFreeClient(); assert_true(index >= 0); + REXAPU_DEBUG("AudioSystem::RegisterClient: using client index={} queued_frames={}", index, + queued_frames_); auto client_semaphore = client_semaphores_[index].get(); - auto ret = client_semaphore->Release(REXCVAR_GET(audio_maxqframes), nullptr); + auto ret = client_semaphore->Release(queued_frames_, nullptr); assert_true(ret); AudioDriver* driver; @@ -219,6 +246,13 @@ X_STATUS AudioSystem::RegisterClient(uint32_t callback, uint32_t callback_arg, s void AudioSystem::SubmitFrame(size_t index, uint32_t samples_ptr) { SCOPE_profile_cpu_f("apu"); + static uint32_t submit_count = 0; + if (submit_count < 10) { + REXAPU_DEBUG("AudioSystem::SubmitFrame called: index={} samples_ptr={:08X}", index, + samples_ptr); + submit_count++; + } + auto global_lock = global_critical_region_.Acquire(); assert_true(index < kMaximumClientCount); assert_true(clients_[index].driver != NULL); @@ -296,7 +330,7 @@ bool AudioSystem::Restore(stream::ByteStream* stream) { client.in_use = true; auto client_semaphore = client_semaphores_[id].get(); - auto ret = client_semaphore->Release(REXCVAR_GET(audio_maxqframes), nullptr); + auto ret = client_semaphore->Release(queued_frames_, nullptr); assert_true(ret); AudioDriver* driver = nullptr; diff --git a/src/native/audio/sdl/sdl_audio_driver.cpp b/src/native/audio/sdl/sdl_audio_driver.cpp index 554c03bc..bd56f36c 100644 --- a/src/native/audio/sdl/sdl_audio_driver.cpp +++ b/src/native/audio/sdl/sdl_audio_driver.cpp @@ -110,6 +110,13 @@ void SDLAudioDriver::SubmitFrame(uint32_t frame_ptr) { std::memcpy(output_frame, input_frame, frame_samples_ * sizeof(float)); + static uint32_t sdl_submit_count = 0; + if (sdl_submit_count < 10) { + REXAPU_DEBUG("SDLAudioDriver::SubmitFrame: frame_ptr={:08X} queued_count={}", frame_ptr, + frames_queued_.size() + 1); + sdl_submit_count++; + } + { std::unique_lock guard(frames_mutex_); frames_queued_.push(output_frame); @@ -146,8 +153,13 @@ void SDLAudioDriver::SDLCallback(void* userdata, Uint8* stream, int len) { assert_true(len == static_cast(sizeof(float) * channel_samples_ * driver->sdl_device_channels_)); + static uint32_t sdl_callback_count = 0; std::unique_lock guard(driver->frames_mutex_); if (driver->frames_queued_.empty()) { + if (sdl_callback_count < 10) { + REXAPU_DEBUG("SDLCallback: no frames queued (silence)"); + sdl_callback_count++; + } std::memset(stream, 0, len); } else { auto buffer = driver->frames_queued_.front(); diff --git a/src/native/audio/xma/context.cpp b/src/native/audio/xma/context.cpp index 76e589e9..1b96d707 100644 --- a/src/native/audio/xma/context.cpp +++ b/src/native/audio/xma/context.cpp @@ -39,7 +39,8 @@ namespace rex::audio { using stream::BitStream; -XmaContext::XmaContext() = default; +XmaContext::XmaContext() + : work_completion_event_(rex::thread::Event::CreateAutoResetEvent(false)) {} XmaContext::~XmaContext() { if (av_context_) { diff --git a/src/native/audio/xma/decoder.cpp b/src/native/audio/xma/decoder.cpp index f228692f..7780bac0 100644 --- a/src/native/audio/xma/decoder.cpp +++ b/src/native/audio/xma/decoder.cpp @@ -137,18 +137,16 @@ X_STATUS XmaDecoder::Setup(system::KernelState* kernel_state) { } void XmaDecoder::WorkerThreadMain() { - uint32_t idle_loop_count = 0; while (worker_running_) { // Okay, let's loop through XMA contexts to find ones we need to decode! bool did_work = false; for (uint32_t n = 0; n < kContextCount && worker_running_; n++) { XmaContext& context = contexts_[n]; - did_work = context.Work() || did_work; - - // TODO: Need thread safety to do this. - // Probably not too important though. - // registers_.current_context = n; - // registers_.next_context = (n + 1) % kContextCount; + bool worked = context.Work(); + if (worked) { + context.SignalWorkDone(); + } + did_work = did_work || worked; } if (paused_) { @@ -156,18 +154,11 @@ void XmaDecoder::WorkerThreadMain() { resume_fence_.Wait(); } - if (!did_work) { - idle_loop_count++; - } else { - idle_loop_count = 0; - } - - if (idle_loop_count > 500) { - // Idle for an extended period. Introduce a 20ms wait. - rex::thread::Wait(work_event_.get(), false, std::chrono::milliseconds(20)); + if (did_work) { + continue; } - - rex::thread::MaybeYield(); + // No work done this iteration, block until signaled. + rex::thread::Wait(work_event_.get(), false); } } @@ -292,6 +283,7 @@ void XmaDecoder::WriteRegister(uint32_t addr, uint32_t value) { // The context ID is a bit in the range of the entire context array. uint32_t base_context_id = (r - XmaRegister::Context0Kick) * 32; + uint32_t kicked_value = value; for (int i = 0; value && i < 32; ++i, value >>= 1) { if (value & 1) { uint32_t context_id = base_context_id + i; @@ -301,6 +293,13 @@ void XmaDecoder::WriteRegister(uint32_t addr, uint32_t value) { } // Signal the decoder thread to start processing. work_event_->Set(); + // Block until the worker finishes, so the game sees updated context data. + for (int i = 0; kicked_value && i < 32; ++i, kicked_value >>= 1) { + if (kicked_value & 1) { + uint32_t context_id = base_context_id + i; + contexts_[context_id].WaitForWorkDone(); + } + } } else if (r >= XmaRegister::Context0Lock && r <= XmaRegister::Context9Lock) { // Context lock command. // This requests a lock by flagging the context. diff --git a/src/kernel/xboxkrnl/xboxkrnl_audio.cpp b/src/kernel/xboxkrnl/xboxkrnl_audio.cpp index 31407f26..dfd9db81 100644 --- a/src/kernel/xboxkrnl/xboxkrnl_audio.cpp +++ b/src/kernel/xboxkrnl/xboxkrnl_audio.cpp @@ -54,7 +54,17 @@ ppc_u32_result_t XAudioEnableDucker_entry(ppc_u32_t unk) { ppc_u32_result_t XAudioRegisterRenderDriverClient_entry(ppc_pu32_t callback_ptr, ppc_pu32_t driver_ptr) { + REXKRNL_DEBUG("XAudioRegisterRenderDriverClient called! callback_ptr={:08X} driver_ptr={:08X}", + callback_ptr.guest_address(), driver_ptr.guest_address()); + if (!callback_ptr) { + return X_E_INVALIDARG; + } + uint32_t callback = callback_ptr[0]; + + if (!callback) { + return X_E_INVALIDARG; + } uint32_t callback_arg = callback_ptr[1]; auto* audio_system = static_cast(kernel_state()->emulator()->audio_system()); @@ -82,6 +92,13 @@ ppc_u32_result_t XAudioSubmitRenderDriverFrame_entry(ppc_pvoid_t driver_ptr, ppc_pvoid_t samples_ptr) { assert_true((driver_ptr.guest_address() & 0xFFFF0000) == 0x41550000); + static uint32_t submit_krnl_count = 0; + if (submit_krnl_count < 10) { + REXKRNL_DEBUG("XAudioSubmitRenderDriverFrame: driver={:08X} samples={:08X}", + driver_ptr.guest_address(), samples_ptr.guest_address()); + submit_krnl_count++; + } + auto* audio_system = static_cast(kernel_state()->emulator()->audio_system()); audio_system->SubmitFrame(driver_ptr.guest_address() & 0x0000FFFF, samples_ptr.guest_address()); diff --git a/src/kernel/xboxkrnl/xboxkrnl_audio_xma.cpp b/src/kernel/xboxkrnl/xboxkrnl_audio_xma.cpp index 17a4b4e1..3102f4fc 100644 --- a/src/kernel/xboxkrnl/xboxkrnl_audio_xma.cpp +++ b/src/kernel/xboxkrnl/xboxkrnl_audio_xma.cpp @@ -64,6 +64,7 @@ using rex::audio::XMA_CONTEXT_DATA; // https://msdn.microsoft.com/en-us/library/windows/desktop/microsoft.directx_sdk.xaudio2.xaudio2_buffer(v=vs.85).aspx ppc_u32_result_t XMACreateContext_entry(ppc_pu32_t context_out_ptr) { + REXKRNL_DEBUG("XMACreateContext called!"); auto xma_decoder = static_cast(kernel_state()->emulator()->audio_system())->xma_decoder(); uint32_t context_ptr = xma_decoder->AllocateContext(); @@ -341,6 +342,9 @@ ppc_u32_result_t XMABlockWhileInUse_entry(ppc_pvoid_t context_ptr) { if (!context.input_buffer_0_valid && !context.input_buffer_1_valid) { break; } + if (!context.work_buffer_ptr) { + break; + } rex::thread::Sleep(std::chrono::milliseconds(1)); } while (true); return 0; -- 2.52.0.windows.1