From a0271eca2604f6fc655eaa0b572e9a4fc3bd97f2 Mon Sep 17 00:00:00 2001 From: Tom <1568512+tomcl7@users.noreply.github.com> Date: Fri, 20 Mar 2026 08:18:02 -0400 Subject: [PATCH] feat(audio): rewrite XMA decoder with loop fixes from Xenia Canary Ports xma_context_new from Xenia Canary, fixing audio looping and improving decoder correctness: - Exact-match loop detection (== not >=) with subframe precision - Decode/Consume split architecture for subframe-level output control - StoreContextMerged to prevent race conditions with game context writes - SwapInputBuffer sets read offset past packet header - Output space uses subframe_decode_count instead of full frame - Consume-only context support, packet skip 0xFF, split frame headers - XMA_CONTEXT_DATA: identified error_status, output_buffer_padding fields - XMA helpers: const correctness, IsPacketXma2Type --- include/rex/audio/xma/context.h | 136 +++-- include/rex/audio/xma/helpers.h | 30 +- src/audio/xma_context.cpp | 994 ++++++++++++++------------------ 3 files changed, 530 insertions(+), 630 deletions(-) diff --git a/include/native/audio/xma/context.h b/include/native/audio/xma/context.h index 8dfd17c2..5298d1eb 100644 --- a/include/native/audio/xma/context.h +++ b/include/native/audio/xma/context.h @@ -14,8 +14,6 @@ #include #include #include -#include -// #include #include #include @@ -69,7 +67,8 @@ struct XMA_CONTEXT_DATA { uint32_t loop_subframe_skip : 3; // +17bit, XMASetLoopData might be // subframe_decode_count uint32_t subframe_decode_count : 4; // +20bit - uint32_t subframe_skip_count : 3; // +24bit + uint32_t output_buffer_padding : 3; // +24bit, extra output buffer blocks + // reserved per decoded frame uint32_t sample_rate : 2; // +27bit enum of sample rates uint32_t is_stereo : 1; // +29bit uint32_t unk_dword_1_c : 1; // +30bit @@ -77,12 +76,14 @@ struct XMA_CONTEXT_DATA { // DWORD 2 uint32_t input_buffer_read_offset : 26; // XMAGetInputBufferReadOffset - uint32_t unk_dword_2 : 6; // ErrorStatus/ErrorSet (?) + uint32_t error_status : 5; // ErrorStatus + uint32_t error_set : 1; // ErrorSet // DWORD 3 - uint32_t loop_start : 26; // XMASetLoopData LoopStartOffset - // frame offset in bits - uint32_t unk_dword_3 : 6; // ? ParserErrorStatus/ParserErrorSet(?) + uint32_t loop_start : 26; // XMASetLoopData LoopStartOffset + // frame offset in bits + uint32_t parser_error_status : 5; // ParserErrorStatus + uint32_t parser_error_set : 1; // ParserErrorSet // DWORD 4 uint32_t loop_end : 26; // XMASetLoopData LoopEndOffset @@ -118,6 +119,32 @@ struct XMA_CONTEXT_DATA { memory::copy_and_swap(reinterpret_cast(ptr), reinterpret_cast(this), sizeof(XMA_CONTEXT_DATA) / 4); } + + bool IsInputBufferValid(uint8_t buffer_index) const { + return buffer_index == 0 ? input_buffer_0_valid : input_buffer_1_valid; + } + + bool IsCurrentInputBufferValid() const { return IsInputBufferValid(current_buffer); } + + bool IsAnyInputBufferValid() const { return input_buffer_0_valid || input_buffer_1_valid; } + + uint32_t GetInputBufferAddress(uint8_t buffer_index) const { + return buffer_index == 0 ? input_buffer_0_ptr : input_buffer_1_ptr; + } + + uint32_t GetCurrentInputBufferAddress() const { return GetInputBufferAddress(current_buffer); } + + uint32_t GetInputBufferPacketCount(uint8_t buffer_index) const { + return buffer_index == 0 ? input_buffer_0_packet_count : input_buffer_1_packet_count; + } + + uint32_t GetCurrentInputBufferPacketCount() const { + return GetInputBufferPacketCount(current_buffer); + } + + bool IsConsumeOnlyContext() const { + return (input_buffer_0_packet_count | input_buffer_1_packet_count) == 0; + } }; static_assert_size(XMA_CONTEXT_DATA, 64); @@ -129,11 +156,26 @@ struct Xma2ExtraData { static_assert_size(Xma2ExtraData, 34); #pragma pack(pop) +struct kPacketInfo { + uint8_t frame_count_ = 0; + uint8_t current_frame_ = 0; + uint32_t current_frame_size_ = 0; + + bool isLastFrameInPacket() const { + return frame_count_ == 0 || current_frame_ == frame_count_ - 1; + } +}; + +static constexpr int kIdToSampleRate[4] = {24000, 32000, 44100, 48000}; + class XmaContext { public: static const uint32_t kBytesPerPacket = 2048; static const uint32_t kBitsPerPacket = kBytesPerPacket * 8; - static const uint32_t kBitsPerHeader = 33; + static const uint32_t kBitsPerPacketHeader = 32; + static const uint32_t kBitsPerFrameHeader = 15; + static const uint32_t kBytesPerPacketHeader = 4; + static const uint32_t kBytesPerPacketData = kBytesPerPacket - kBytesPerPacketHeader; static const uint32_t kBytesPerSample = 2; static const uint32_t kSamplesPerFrame = 512; @@ -141,8 +183,9 @@ class XmaContext { static const uint32_t kBytesPerFrameChannel = kSamplesPerFrame * kBytesPerSample; static const uint32_t kBytesPerSubframeChannel = kSamplesPerSubframe * kBytesPerSample; - // static const uint32_t kOutputBytesPerBlock = 256; - // static const uint32_t kOutputMaxSizeBytes = 31 * kOutputBytesPerBlock; + static const uint32_t kOutputBytesPerBlock = 256; + static const uint32_t kOutputMaxSizeBytes = 31 * kOutputBytesPerBlock; + static const uint32_t kMaxFrameSizeinBits = 0x4000 - kBitsPerPacketHeader; explicit XmaContext(); ~XmaContext(); @@ -181,25 +224,32 @@ class XmaContext { private: static void SwapInputBuffer(XMA_CONTEXT_DATA* data); - static bool TrySetupNextLoop(XMA_CONTEXT_DATA* data, bool ignore_input_buffer_offset); - static void NextPacket(XMA_CONTEXT_DATA* data); static int GetSampleRate(int id); - // Get the offset of the next frame. Does not traverse packets. - static size_t GetNextFrame(uint8_t* block, size_t size, size_t bit_offset); - // Get the containing packet number of the frame pointed to by the offset. - static int GetFramePacketNumber(uint8_t* block, size_t size, size_t bit_offset); - // Get the packet number and the index of the frame inside that packet - static std::tuple GetFrameNumber(uint8_t* block, size_t size, size_t bit_offset); - // Get the number of frames contained in the packet (including truncated) and - // if the last frame is split. - static std::tuple GetPacketFrameCount(uint8_t* packet); - - // Convert sample format and swap bytes - static void ConvertFrame(const uint8_t** samples, bool is_two_channel, uint8_t* output_buffer); + static int16_t GetPacketNumber(size_t size, size_t bit_offset); + static uint32_t GetCurrentInputBufferSize(XMA_CONTEXT_DATA* data); + + kPacketInfo GetPacketInfo(uint8_t* packet, uint32_t frame_offset); + uint32_t GetAmountOfBitsToRead(uint32_t remaining_stream_bits, uint32_t frame_size); + const uint8_t* GetNextPacket(XMA_CONTEXT_DATA* data, uint32_t next_packet_index, + uint32_t current_input_packet_count); + uint32_t GetNextPacketReadOffset(uint8_t* buffer, uint32_t next_packet_index, + uint32_t current_input_packet_count); + uint8_t* GetCurrentInputBuffer(XMA_CONTEXT_DATA* data); - bool ValidFrameOffset(uint8_t* block, size_t size_bytes, size_t frame_offset_bits); void Decode(XMA_CONTEXT_DATA* data); - int PrepareDecoder(uint8_t* packet, int sample_rate, bool is_two_channel); + void Consume(memory::RingBuffer* output_rb, const XMA_CONTEXT_DATA* data); + void UpdateLoopStatus(XMA_CONTEXT_DATA* data); + void ClearLocked(XMA_CONTEXT_DATA* data); + + memory::RingBuffer PrepareOutputRingBuffer(XMA_CONTEXT_DATA* data); + int PrepareDecoder(int sample_rate, bool is_two_channel); + void PreparePacket(uint32_t frame_size, uint32_t frame_padding); + bool DecodePacket(AVCodecContext* av_context, const AVPacket* av_packet, AVFrame* av_frame); + + void StoreContextMerged(const XMA_CONTEXT_DATA& data, const XMA_CONTEXT_DATA& initial_data, + uint8_t* context_ptr); + + static void ConvertFrame(const uint8_t** samples, bool is_two_channel, uint8_t* output_buffer); memory::Memory* memory_ = nullptr; std::unique_ptr work_completion_event_; @@ -209,35 +259,27 @@ class XmaContext { std::mutex lock_; std::atomic is_allocated_ = false; std::atomic is_enabled_ = false; - // bool is_dirty_ = true; // ffmpeg structures AVPacket* av_packet_ = nullptr; AVCodec* av_codec_ = nullptr; AVCodecContext* av_context_ = nullptr; AVFrame* av_frame_ = nullptr; - // uint32_t decoded_consumed_samples_ = 0; // TODO do this dynamically - // int decoded_idx_ = -1; - - // bool partial_frame_saved_ = false; - // bool partial_frame_size_known_ = false; - // size_t partial_frame_total_size_bits_ = 0; - // size_t partial_frame_start_offset_bits_ = 0; - // size_t partial_frame_offset_bits_ = 0; // blah internal don't use this - // std::vector partial_frame_buffer_; - uint32_t packets_skip_ = 0; - - // bool split_frame_pending_ = false; - uint32_t split_frame_len_ = 0; - uint32_t split_frame_len_partial_ = 0; - uint8_t split_frame_padding_start_ = 0; - // first byte contains bit offset information - std::array xma_frame_; - // uint8_t* current_frame_ = nullptr; - // conversion buffer for 2 channel frame + // Packet data buffer (two packets worth for split frame handling) + std::array input_buffer_; + // First byte contains bit offset information + std::array xma_frame_; + // Conversion buffer for up to 2-channel frame std::array raw_frame_; - // std::vector current_frame_ = std::vector(0); + + // Output buffer tracking + int32_t remaining_subframe_blocks_in_output_buffer_ = 0; + uint8_t current_frame_remaining_subframes_ = 0; + + // Loop subframe precision state + uint8_t loop_frame_output_limit_ = 0; + bool loop_start_skip_pending_ = false; }; } // namespace rex::audio diff --git a/include/native/audio/xma/helpers.h b/include/native/audio/xma/helpers.h index b2e0e10e..638e406a 100644 --- a/include/native/audio/xma/helpers.h +++ b/include/native/audio/xma/helpers.h @@ -17,30 +17,30 @@ namespace rex::audio::xma { -static const uint32_t kMaxFrameLength = 0x7FFF; +static constexpr uint32_t kMaxFrameLength = 0x7FFF; -// Get number of frames that /begin/ in this packet. -inline uint32_t GetPacketFrameCount(uint8_t* packet) { - return (uint8_t)(packet[0] >> 2); +// Get number of frames that /begin/ in this packet. Valid only for XMA2 packets. +inline uint8_t GetPacketFrameCount(const uint8_t* packet) { + return packet[0] >> 2; } // Get the first frame offset in bits -inline uint32_t GetPacketFrameOffset(uint8_t* packet) { - uint32_t val = (uint16_t)(((packet[0] & 0x3) << 13) | (packet[1] << 5) | (packet[2] >> 3)); - // if (val > kBitsPerPacket - kBitsPerHeader) { - // // There is no data in this packet - // return -1; - // } else { +inline uint32_t GetPacketFrameOffset(const uint8_t* packet) { + uint32_t val = + static_cast(((packet[0] & 0x3) << 13) | (packet[1] << 5) | (packet[2] >> 3)); return val + 32; - // } } -inline uint32_t GetPacketMetadata(uint8_t* packet) { - return (uint8_t)(packet[2] & 0x7); +inline uint8_t GetPacketMetadata(const uint8_t* packet) { + return packet[2] & 0x7; } -inline uint32_t GetPacketSkipCount(uint8_t* packet) { - return (uint8_t)(packet[3]); +inline bool IsPacketXma2Type(const uint8_t* packet) { + return GetPacketMetadata(packet) == 1; +} + +inline uint8_t GetPacketSkipCount(const uint8_t* packet) { + return packet[3]; } } // namespace rex::audio::xma diff --git a/src/native/audio/xma/context.cpp b/src/native/audio/xma/context.cpp index 49aae593..a6e79733 100644 --- a/src/native/audio/xma/context.cpp +++ b/src/native/audio/xma/context.cpp @@ -52,9 +52,6 @@ XmaContext::~XmaContext() { if (av_frame_) { av_frame_free(&av_frame_); } - // if (current_frame_) { - // delete[] current_frame_; - // } } int XmaContext::Setup(uint32_t id, memory::Memory* memory, uint32_t guest_ptr) { @@ -94,35 +91,66 @@ int XmaContext::Setup(uint32_t id, memory::Memory* memory, uint32_t guest_ptr) { } bool XmaContext::Work() { - std::lock_guard lock(lock_); if (!is_allocated() || !is_enabled()) { return false; } + std::lock_guard lock(lock_); set_is_enabled(false); auto context_ptr = memory()->TranslateVirtual(guest_ptr()); XMA_CONTEXT_DATA data(context_ptr); - Decode(&data); - data.Store(context_ptr); - return true; -} + const XMA_CONTEXT_DATA initial_data = data; -void XmaContext::Enable() { - std::lock_guard lock(lock_); + if (!data.output_buffer_valid) { + return true; + } - auto context_ptr = memory()->TranslateVirtual(guest_ptr()); - XMA_CONTEXT_DATA data(context_ptr); + memory::RingBuffer output_rb = PrepareOutputRingBuffer(&data); - REXAPU_TRACE("XmaContext: kicking context {} (buffer {} {}/{} bits)", id(), - static_cast(data.current_buffer), - static_cast(data.input_buffer_read_offset), - (data.current_buffer == 0 ? data.input_buffer_0_packet_count - : data.input_buffer_1_packet_count) * - kBitsPerPacket); + // Consume-only context: no input, just drain remaining subframes. + if (data.IsConsumeOnlyContext()) { + if (current_frame_remaining_subframes_ == 0) { + return true; + } + Consume(&output_rb, &data); + data.output_buffer_write_offset = output_rb.write_offset() / kOutputBytesPerBlock; + StoreContextMerged(data, initial_data, context_ptr); + return true; + } - data.Store(context_ptr); + // Minimum free blocks needed before attempting a decode. + // Use subframe_decode_count (clamped to 1) instead of full frame size. + const uint32_t effective_sdc = std::max(static_cast(1), data.subframe_decode_count); + const int32_t minimum_subframe_decode_count = + static_cast(effective_sdc) + data.output_buffer_padding; + + if (minimum_subframe_decode_count > remaining_subframe_blocks_in_output_buffer_) { + StoreContextMerged(data, initial_data, context_ptr); + return true; + } + + while (remaining_subframe_blocks_in_output_buffer_ >= minimum_subframe_decode_count) { + Decode(&data); + Consume(&output_rb, &data); + + if (!data.IsAnyInputBufferValid() || data.error_status == 4) { + break; + } + } + data.output_buffer_write_offset = output_rb.write_offset() / kOutputBytesPerBlock; + + if (output_rb.empty()) { + data.output_buffer_valid = 0; + } + + StoreContextMerged(data, initial_data, context_ptr); + return true; +} + +void XmaContext::Enable() { + std::lock_guard lock(lock_); set_is_enabled(true); } @@ -143,685 +171,515 @@ void XmaContext::Clear() { auto context_ptr = memory()->TranslateVirtual(guest_ptr()); XMA_CONTEXT_DATA data(context_ptr); + ClearLocked(&data); + data.Store(context_ptr); +} - data.input_buffer_0_valid = 0; - data.input_buffer_1_valid = 0; - data.output_buffer_valid = 0; +void XmaContext::ClearLocked(XMA_CONTEXT_DATA* data) { + data->input_buffer_0_valid = 0; + data->input_buffer_1_valid = 0; + data->output_buffer_valid = 0; - data.output_buffer_read_offset = 0; - data.output_buffer_write_offset = 0; + data->input_buffer_read_offset = kBitsPerPacketHeader; + data->output_buffer_read_offset = 0; + data->output_buffer_write_offset = 0; - data.Store(context_ptr); + current_frame_remaining_subframes_ = 0; + loop_frame_output_limit_ = 0; + loop_start_skip_pending_ = false; } void XmaContext::Disable() { std::lock_guard lock(lock_); - REXAPU_TRACE("XmaContext: disabling context {}", id()); set_is_enabled(false); } void XmaContext::Release() { - // Lock it in case the decoder thread is working on it now. std::lock_guard lock(lock_); assert_true(is_allocated()); set_is_allocated(false); auto context_ptr = memory()->TranslateVirtual(guest_ptr()); - std::memset(context_ptr, 0, sizeof(XMA_CONTEXT_DATA)); // Zero it. + std::memset(context_ptr, 0, sizeof(XMA_CONTEXT_DATA)); } void XmaContext::SwapInputBuffer(XMA_CONTEXT_DATA* data) { - // No more frames. if (data->current_buffer == 0) { data->input_buffer_0_valid = 0; } else { data->input_buffer_1_valid = 0; } data->current_buffer ^= 1; - data->input_buffer_read_offset = 0; + data->input_buffer_read_offset = kBitsPerPacketHeader; } -bool XmaContext::TrySetupNextLoop(XMA_CONTEXT_DATA* data, bool ignore_input_buffer_offset) { - // Setup the input buffer offset if next loop exists. - // TODO(Pseudo-Kernel): Need to handle loop in the following cases. - // 1. loop_start == loop_end == 0 - // 2. loop_start > loop_end && loop_count > 0 - if (data->loop_count > 0 && data->loop_start < data->loop_end && - (ignore_input_buffer_offset || data->input_buffer_read_offset >= data->loop_end)) { - // Loop back to the beginning. - data->input_buffer_read_offset = data->loop_start; - if (data->loop_count < 255) { - data->loop_count--; - } - return true; +void XmaContext::UpdateLoopStatus(XMA_CONTEXT_DATA* data) { + if (data->loop_count == 0) { + return; } - return false; -} -/* -void XmaContext::NextPacket( - uint8_t* input_buffer, - uint32_t input_size, - uint32_t input_buffer_read_offset) { -*/ -void XmaContext::NextPacket(XMA_CONTEXT_DATA* data) { - // auto packet_idx = GetFramePacketNumber(input_buffer, input_size, - // input_buffer_read_offset); + const uint32_t loop_start = std::max(kBitsPerPacketHeader, data->loop_start); + const uint32_t loop_end = std::max(kBitsPerPacketHeader, data->loop_end); + + if (data->input_buffer_read_offset != loop_end) { + return; + } - // packet_idx++; - // if (packet_idx++ >= input_size) + data->input_buffer_read_offset = loop_start; + loop_start_skip_pending_ = true; + + if (data->loop_count < 255) { + data->loop_count--; + } } int XmaContext::GetSampleRate(int id) { - switch (id) { - case 0: - return 24000; - case 1: - return 32000; - case 2: - return 44100; - case 3: - return 48000; - } - assert_always(); - return 0; + return kIdToSampleRate[std::min(id, 3)]; } -bool XmaContext::ValidFrameOffset(uint8_t* block, size_t size_bytes, size_t frame_offset_bits) { - uint32_t packet_num = GetFramePacketNumber(block, size_bytes, frame_offset_bits); - if (packet_num == -1) { - // Invalid packet number - return false; +int16_t XmaContext::GetPacketNumber(size_t size, size_t bit_offset) { + if (bit_offset < kBitsPerPacketHeader) { + assert_always(); + return -1; + } + if (bit_offset >= (size << 3)) { + assert_always(); + return -1; } + size_t byte_offset = bit_offset >> 3; + size_t packet_number = byte_offset / kBytesPerPacket; + return static_cast(packet_number); +} - uint8_t* packet = block + (packet_num * kBytesPerPacket); - size_t relative_offset_bits = frame_offset_bits % kBitsPerPacket; +uint32_t XmaContext::GetCurrentInputBufferSize(XMA_CONTEXT_DATA* data) { + return data->GetCurrentInputBufferPacketCount() * kBytesPerPacket; +} - uint32_t first_frame_offset = xma::GetPacketFrameOffset(packet); - if (first_frame_offset == -1 || first_frame_offset > kBitsPerPacket) { - // Packet only contains a partial frame, so no frames can start here. - return false; +uint8_t* XmaContext::GetCurrentInputBuffer(XMA_CONTEXT_DATA* data) { + return memory()->TranslatePhysical(data->GetCurrentInputBufferAddress()); +} + +uint32_t XmaContext::GetAmountOfBitsToRead(uint32_t remaining_stream_bits, uint32_t frame_size) { + return std::min(remaining_stream_bits, frame_size); +} + +const uint8_t* XmaContext::GetNextPacket(XMA_CONTEXT_DATA* data, uint32_t next_packet_index, + uint32_t current_input_packet_count) { + if (next_packet_index < current_input_packet_count) { + return memory()->TranslatePhysical(data->GetCurrentInputBufferAddress()) + + next_packet_index * kBytesPerPacket; + } + + const uint8_t next_buffer_index = data->current_buffer ^ 1; + if (!data->IsInputBufferValid(next_buffer_index)) { + return nullptr; + } + + const uint32_t next_buffer_address = data->GetInputBufferAddress(next_buffer_index); + if (!next_buffer_address) { + REXAPU_ERROR("XmaContext {}: Buffer marked valid but has null pointer!", id()); + return nullptr; } + return memory()->TranslatePhysical(next_buffer_address); +} + +uint32_t XmaContext::GetNextPacketReadOffset(uint8_t* buffer, uint32_t next_packet_index, + uint32_t current_input_packet_count) { + while (next_packet_index < current_input_packet_count) { + uint8_t* next_packet = buffer + (next_packet_index * kBytesPerPacket); + const uint32_t packet_frame_offset = xma::GetPacketFrameOffset(next_packet); + + if (packet_frame_offset <= kMaxFrameSizeinBits) { + return (next_packet_index * kBitsPerPacket) + packet_frame_offset; + } + next_packet_index++; + } + + return kBitsPerPacketHeader; +} + +memory::RingBuffer XmaContext::PrepareOutputRingBuffer(XMA_CONTEXT_DATA* data) { + const uint32_t output_capacity = data->output_buffer_block_count * kOutputBytesPerBlock; + const uint32_t output_read_offset = data->output_buffer_read_offset * kOutputBytesPerBlock; + const uint32_t output_write_offset = data->output_buffer_write_offset * kOutputBytesPerBlock; + + if (output_capacity > kOutputMaxSizeBytes) { + REXAPU_WARN( + "XmaContext {}: Output buffer exceeds expected size! " + "(Actual: {} Max: {})", + id(), output_capacity, kOutputMaxSizeBytes); + } + + uint8_t* output_buffer = memory()->TranslatePhysical(data->output_buffer_ptr); + + memory::RingBuffer output_rb(output_buffer, output_capacity); + output_rb.set_read_offset(output_read_offset); + output_rb.set_write_offset(output_write_offset); + remaining_subframe_blocks_in_output_buffer_ = + static_cast(output_rb.write_count()) / kOutputBytesPerBlock; + + return output_rb; +} + +kPacketInfo XmaContext::GetPacketInfo(uint8_t* packet, uint32_t frame_offset) { + kPacketInfo packet_info = {}; + + const uint32_t first_frame_offset = xma::GetPacketFrameOffset(packet); BitStream stream(packet, kBitsPerPacket); stream.SetOffset(first_frame_offset); + + if (frame_offset < first_frame_offset) { + packet_info.current_frame_ = 0; + packet_info.current_frame_size_ = first_frame_offset - frame_offset; + } + while (true) { - if (stream.offset_bits() == relative_offset_bits) { - return true; + if (stream.BitsRemaining() < kBitsPerFrameHeader) { + break; } - if (stream.BitsRemaining() < 15) { - // Not enough room for another frame header. - return false; + const uint64_t frame_size = stream.Peek(kBitsPerFrameHeader); + if (frame_size == 0 || frame_size == xma::kMaxFrameLength) { + break; } - uint64_t size = stream.Read(15); - if ((size - 15) > stream.BitsRemaining()) { - // Last frame. - return false; - } else if (size == 0x7FFF) { - // Invalid frame (and last of this packet) - return false; + if (stream.offset_bits() == frame_offset) { + packet_info.current_frame_ = packet_info.frame_count_; + packet_info.current_frame_size_ = static_cast(frame_size); } - stream.Advance(size - 16); + packet_info.frame_count_++; - // Read the trailing bit to see if frames follow - if (stream.Read(1) == 0) { + if (frame_size > stream.BitsRemaining()) { break; } - } - return false; -} + stream.Advance(frame_size - 1); -static void dump_raw(AVFrame* frame, int id) { - FILE* outfile = fopen(fmt::format("out{}.raw", id).c_str(), "ab"); - if (!outfile) { - return; + if (stream.Read(1) == 0) { + break; + } } - size_t data_size = sizeof(float); - for (int i = 0; i < frame->nb_samples; i++) { - for (int ch = 0; ch < frame->channels; ch++) { - fwrite(frame->data[ch] + data_size * i, 1, data_size, outfile); + + if (xma::IsPacketXma2Type(packet)) { + const uint8_t xma2_frame_count = xma::GetPacketFrameCount(packet); + if (xma2_frame_count > packet_info.frame_count_) { + if (packet_info.current_frame_size_ == 0) { + packet_info.current_frame_ = packet_info.frame_count_; + } + packet_info.frame_count_ = xma2_frame_count; } } - fclose(outfile); + return packet_info; } -void XmaContext::Decode(XMA_CONTEXT_DATA* data) { - SCOPE_profile_cpu_f("apu"); - - // What I see: - // XMA outputs 2 bytes per sample - // 512 samples per frame (128 per subframe) - // Max output size is data.output_buffer_block_count * 256 +void XmaContext::StoreContextMerged(const XMA_CONTEXT_DATA& data, + const XMA_CONTEXT_DATA& initial_data, uint8_t* context_ptr) { + XMA_CONTEXT_DATA fresh(context_ptr); - // This decoder is fed packets (max 4095 per buffer) - // Packets contain "some" frames - // 32bit header (big endian) + fresh.loop_count = data.loop_count; + fresh.output_buffer_write_offset = data.output_buffer_write_offset; + if (initial_data.input_buffer_0_valid && !data.input_buffer_0_valid) { + fresh.input_buffer_0_valid = 0; + } + if (initial_data.input_buffer_1_valid && !data.input_buffer_1_valid) { + fresh.input_buffer_1_valid = 0; + } - // Frames are the smallest thing the SPUs can decode. - // They can and usually will span packets. + if (initial_data.output_buffer_valid && !data.output_buffer_valid) { + fresh.output_buffer_valid = 0; + } - // Sample rates (data.sample_rate): - // 0 - 24 kHz - // 1 - 32 kHz - // 2 - 44.1 kHz - // 3 - 48 kHz + fresh.input_buffer_read_offset = data.input_buffer_read_offset; + fresh.error_status = data.error_status; + fresh.current_buffer = data.current_buffer; + fresh.output_buffer_read_offset = data.output_buffer_read_offset; - // SPUs also support stereo decoding. (data.is_stereo) + fresh.Store(context_ptr); +} - // Check the output buffer - we cannot decode anything else if it's - // unavailable. - if (!data->output_buffer_valid) { +void XmaContext::Consume(memory::RingBuffer* output_rb, const XMA_CONTEXT_DATA* data) { + if (!current_frame_remaining_subframes_) { return; } - // No available data. - if (!data->input_buffer_0_valid && !data->input_buffer_1_valid) { - data->output_buffer_valid = 0; - return; + if (loop_frame_output_limit_ > 0) { + const uint8_t total_subframes = (kBytesPerFrameChannel / kOutputBytesPerBlock) + << data->is_stereo; + const uint8_t consumed = total_subframes - current_frame_remaining_subframes_; + if (consumed >= loop_frame_output_limit_) { + remaining_subframe_blocks_in_output_buffer_ -= data->output_buffer_padding; + current_frame_remaining_subframes_ = 0; + loop_frame_output_limit_ = 0; + return; + } } - // XAudio Loops - // loop_count: - // - XAUDIO2_MAX_LOOP_COUNT = 254 - // - XAUDIO2_LOOP_INFINITE = 255 - // loop_start/loop_end are bit offsets to a specific frame - - // Translate pointers for future use. - // Sometimes the game will use rolling input buffers. If they do, we cannot - // assume they form a complete block! In addition, the buffers DO NOT have - // to be contiguous! - uint8_t* in0 = - data->input_buffer_0_valid ? memory()->TranslatePhysical(data->input_buffer_0_ptr) : nullptr; - uint8_t* in1 = - data->input_buffer_1_valid ? memory()->TranslatePhysical(data->input_buffer_1_ptr) : nullptr; - uint8_t* current_input_buffer = data->current_buffer ? in1 : in0; - - REXAPU_TRACE("Processing context {} (offset {}, buffer {}, ptr {:p})", id(), - static_cast(data->input_buffer_read_offset), - static_cast(data->current_buffer), - static_cast(current_input_buffer)); - - size_t input_buffer_0_size = data->input_buffer_0_packet_count * kBytesPerPacket; - size_t input_buffer_1_size = data->input_buffer_1_packet_count * kBytesPerPacket; - size_t input_total_size = input_buffer_0_size + input_buffer_1_size; - - size_t current_input_size = data->current_buffer ? input_buffer_1_size : input_buffer_0_size; - size_t current_input_packet_count = current_input_size / kBytesPerPacket; - - // Output buffers are in raw PCM samples, 256 bytes per block. - // Output buffer is a ring buffer. We need to write from the write offset - // to the read offset. - uint8_t* output_buffer = memory()->TranslatePhysical(data->output_buffer_ptr); - uint32_t output_capacity = data->output_buffer_block_count * kBytesPerSubframeChannel; - uint32_t output_read_offset = data->output_buffer_read_offset * kBytesPerSubframeChannel; - uint32_t output_write_offset = data->output_buffer_write_offset * kBytesPerSubframeChannel; - - memory::RingBuffer output_rb(output_buffer, output_capacity); - output_rb.set_read_offset(output_read_offset); - output_rb.set_write_offset(output_write_offset); + const uint8_t effective_sdc = std::max(static_cast(1), data->subframe_decode_count); + int8_t subframes_to_write = std::min(static_cast(current_frame_remaining_subframes_), + static_cast(effective_sdc)); - // We can only decode an entire frame and write it out at a time, so - // don't save any samples. - // TODO(JoelLinn): subframes when looping - size_t output_remaining_bytes = output_rb.write_count(); - output_remaining_bytes -= output_remaining_bytes % (kBytesPerFrameChannel << data->is_stereo); - - // is_dirty_ = true; // TODO - // is_dirty_ = false; // TODO - assert_false(data->stop_when_done); - assert_false(data->interrupt_when_done); - static int total_samples = 0; - bool reuse_input_buffer = false; - // Decode until we can't write any more data. - while (output_remaining_bytes > 0) { - if (!data->input_buffer_0_valid && !data->input_buffer_1_valid) { - // Out of data. - break; + if (loop_frame_output_limit_ > 0) { + const uint8_t total_subframes = (kBytesPerFrameChannel / kOutputBytesPerBlock) + << data->is_stereo; + const uint8_t consumed = total_subframes - current_frame_remaining_subframes_; + const int8_t remaining_until_limit = static_cast(loop_frame_output_limit_ - consumed); + if (subframes_to_write > remaining_until_limit) { + subframes_to_write = remaining_until_limit; } + } - // Setup the input buffer if we are at loop_end. - // The input buffer must not be swapped out until all loops are processed. - reuse_input_buffer = TrySetupNextLoop(data, false); - - // assert_true(packets_skip_ == 0); - // assert_true(split_frame_len_ == 0); - // assert_true(split_frame_len_partial_ == 0); - - // Where are we in the buffer (in XMA jargon) - int packet_idx, frame_idx, frame_count; - uint8_t* packet; - bool frame_last_split; - - BitStream stream(current_input_buffer, current_input_size * 8); - stream.SetOffset(data->input_buffer_read_offset); - - // if we had a buffer swap try to skip packets first - if (packets_skip_ > 0) { - packet_idx = GetFramePacketNumber(current_input_buffer, current_input_size, - data->input_buffer_read_offset); - while (packets_skip_ > 0) { - packets_skip_--; - packet_idx++; - if (packet_idx >= current_input_packet_count) { - if (!reuse_input_buffer) { - // Last packet. Try setup once more. - reuse_input_buffer = TrySetupNextLoop(data, true); - } - if (!reuse_input_buffer) { - SwapInputBuffer(data); - } - return; - } - } - // invalid frame pointer but needed for us - data->input_buffer_read_offset = packet_idx * kBitsPerPacket; - // continue; - } + const int8_t raw_frame_read_offset = + ((kBytesPerFrameChannel / kOutputBytesPerBlock) << data->is_stereo) - + current_frame_remaining_subframes_; - if (split_frame_len_) { - // handle a frame that was split over two packages - packet_idx = GetFramePacketNumber(current_input_buffer, current_input_size, - data->input_buffer_read_offset); - packet = current_input_buffer + packet_idx * kBytesPerPacket; - std::tie(frame_count, frame_last_split) = GetPacketFrameCount(packet); - frame_idx = -1; - - stream = BitStream(current_input_buffer, (packet_idx + 1) * kBitsPerPacket); - stream.SetOffset(packet_idx * kBitsPerPacket + 32); - - if (split_frame_len_ > xma::kMaxFrameLength) { - // TODO write CopyPeekMethod - auto offset = stream.offset_bits(); - stream.Copy( - xma_frame_.data() + 1 + ((split_frame_len_partial_ + split_frame_padding_start_) / 8), - 15 - split_frame_len_partial_); - stream.SetOffset(offset); - BitStream slen(xma_frame_.data() + 1, 15 + split_frame_padding_start_); - slen.Advance(split_frame_padding_start_); - split_frame_len_ = static_cast(slen.Read(15)); - } + output_rb->Write(raw_frame_.data() + (kOutputBytesPerBlock * raw_frame_read_offset), + subframes_to_write * kOutputBytesPerBlock); - if (frame_count > 0) { - assert_true(xma::GetPacketFrameOffset(packet) - 32 == - split_frame_len_ - split_frame_len_partial_); - } - - auto offset = stream.Copy( - xma_frame_.data() + 1 + ((split_frame_len_partial_ + split_frame_padding_start_) / 8), - split_frame_len_ - split_frame_len_partial_); - assert_true(offset == (split_frame_padding_start_ + split_frame_len_partial_) % 8); - } else { - if (data->input_buffer_read_offset % kBitsPerPacket == 0) { - // Invalid offset. Go ahead and set it. - int packet_number = GetFramePacketNumber(current_input_buffer, current_input_size, - data->input_buffer_read_offset); - - if (packet_number == -1) { - return; - } - - auto offset = - xma::GetPacketFrameOffset(current_input_buffer + kBytesPerPacket * packet_number) + - data->input_buffer_read_offset; - if (offset == -1) { - // No more frames. - SwapInputBuffer(data); - // TODO partial frames? end? - REXAPU_ERROR("XmaContext {}: TODO partial frames? end?", id()); - assert_always("TODO"); - return; - } else { - data->input_buffer_read_offset = offset; - } - } + const int8_t headroom = (current_frame_remaining_subframes_ - subframes_to_write == 0) + ? data->output_buffer_padding + : 0; - if (!ValidFrameOffset(current_input_buffer, current_input_size, - data->input_buffer_read_offset)) { - REXAPU_DEBUG("XmaContext {}: Invalid read offset {}!", id(), - static_cast(data->input_buffer_read_offset)); - SwapInputBuffer(data); - return; - } + remaining_subframe_blocks_in_output_buffer_ -= subframes_to_write + headroom; + current_frame_remaining_subframes_ -= subframes_to_write; +} - // Where are we in the buffer (in XMA jargon) - std::tie(packet_idx, frame_idx) = - GetFrameNumber(current_input_buffer, current_input_size, data->input_buffer_read_offset); - // TODO handle - assert_true(packet_idx >= 0); - assert_true(frame_idx >= 0); - packet = current_input_buffer + packet_idx * kBytesPerPacket; - // frames that belong to this packet - std::tie(frame_count, frame_last_split) = GetPacketFrameCount(packet); - assert_true(frame_count >= 0); // TODO end - - PrepareDecoder(packet, data->sample_rate, bool(data->is_stereo)); - - // Current frame is split to next packet: - bool frame_is_split = frame_last_split && (frame_idx >= frame_count - 1); - - stream = BitStream(current_input_buffer, (packet_idx + 1) * kBitsPerPacket); - stream.SetOffset(data->input_buffer_read_offset); - // int frame_len; - // int frame_len_partial - split_frame_len_partial_ = static_cast(stream.BitsRemaining()); - if (split_frame_len_partial_ >= 15) { - split_frame_len_ = static_cast(stream.Peek(15)); - } else { - // assert_always(); - split_frame_len_ = xma::kMaxFrameLength + 1; - } - assert_true(frame_is_split == (split_frame_len_ > split_frame_len_partial_)); +int XmaContext::PrepareDecoder(int sample_rate, bool is_two_channel) { + sample_rate = GetSampleRate(sample_rate); - // TODO fix bitstream copy - std::memset(xma_frame_.data(), 0, xma_frame_.size()); + uint32_t channels = is_two_channel ? 2 : 1; + if (av_context_->sample_rate != sample_rate || + av_context_->channels != static_cast(channels)) { + avcodec_close(av_context_); + av_free(av_context_); + av_context_ = avcodec_alloc_context3(av_codec_); - { - auto offset = stream.Copy(xma_frame_.data() + 1, - std::min(split_frame_len_, split_frame_len_partial_)); - assert_true(offset < 8); - split_frame_padding_start_ = static_cast(offset); - } + av_context_->sample_rate = sample_rate; + av_context_->channels = channels; - if (frame_is_split) { - // go to next xma packet of this stream - packets_skip_ = xma::GetPacketSkipCount(packet) + 1; - while (packets_skip_ > 0) { - packets_skip_--; - packet += kBytesPerPacket; - packet_idx++; - if (packet_idx >= current_input_packet_count) { - if (!reuse_input_buffer) { - // Last packet. Try setup once more. - reuse_input_buffer = TrySetupNextLoop(data, true); - } - if (!reuse_input_buffer) { - SwapInputBuffer(data); - } - return; - } - } - // TODO guest might read this: - data->input_buffer_read_offset = packet_idx * kBitsPerPacket; - continue; - } + if (avcodec_open2(av_context_, av_codec_, NULL) < 0) { + REXAPU_ERROR("XmaContext: Failed to reopen FFmpeg context"); + return -1; } + return 1; + } + return 0; +} - av_packet_->data = xma_frame_.data(); - av_packet_->size = - static_cast(1 + ((split_frame_padding_start_ + split_frame_len_) / 8) + - (((split_frame_padding_start_ + split_frame_len_) % 8) ? 1 : 0)); - - auto padding_end = av_packet_->size * 8 - (8 + split_frame_padding_start_ + split_frame_len_); - assert_true(padding_end < 8); - xma_frame_[0] = ((split_frame_padding_start_ & 7) << 5) | ((padding_end & 7) << 2); +void XmaContext::PreparePacket(uint32_t frame_size, uint32_t frame_padding) { + av_packet_->data = xma_frame_.data(); + av_packet_->size = static_cast(1 + ((frame_padding + frame_size) / 8) + + (((frame_padding + frame_size) % 8) ? 1 : 0)); - split_frame_len_ = 0; - split_frame_len_partial_ = 0; - split_frame_padding_start_ = 0; + auto padding_end = av_packet_->size * 8 - (8 + frame_padding + frame_size); + assert_true(padding_end < 8); + xma_frame_[0] = ((frame_padding & 7) << 5) | ((padding_end & 7) << 2); +} - auto ret = avcodec_send_packet(av_context_, av_packet_); - if (ret < 0) { - REXAPU_ERROR("XmaContext {}: Error sending packet for decoding", id()); - // TODO bail out - assert_always(); - } - ret = avcodec_receive_frame(av_context_, av_frame_); - /* - if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) - // TODO AVERROR_EOF??? - break; - else - */ - if (ret < 0) { - REXAPU_ERROR("XmaContext {}: Error during decoding", id()); - assert_always(); - return; // TODO bail out - } - assert_true(ret == 0); - - { - // copy over 1 frame - // update input buffer read offset - - // assert(decoded_consumed_samples_ + kSamplesPerFrame <= - // current_frame_.size()); - assert_true(av_context_->sample_fmt == AV_SAMPLE_FMT_FLTP); - // assert_true(frame_is_split == (frame_idx == -1)); - - // dump_raw(av_frame_, id()); - ConvertFrame((const uint8_t**)av_frame_->data, bool(data->is_stereo), raw_frame_.data()); - // decoded_consumed_samples_ += kSamplesPerFrame; - - auto byte_count = kBytesPerFrameChannel << data->is_stereo; - assert_true(output_remaining_bytes >= byte_count); - output_rb.Write(raw_frame_.data(), byte_count); - output_remaining_bytes -= byte_count; - data->output_buffer_write_offset = output_rb.write_offset() / 256; - - total_samples += id_ == 0 ? kSamplesPerFrame : 0; - - uint32_t offset = data->input_buffer_read_offset; - // if (offset % (kBytesPerSample * 8) == 0) { - // offset = xma::GetPacketFrameOffset(packet); - //} - offset = - static_cast(GetNextFrame(current_input_buffer, current_input_size, offset)); - // assert_true((offset == 0) == - // (frame_is_split || (frame_idx + 1 >= frame_count))); - if (frame_idx + 1 >= frame_count) { - // Skip to next packet (no split frame) - packets_skip_ = xma::GetPacketSkipCount(packet) + 1; - while (packets_skip_ > 0) { - packets_skip_--; - packet_idx++; - if (packet_idx >= current_input_packet_count) { - if (!reuse_input_buffer) { - // Last packet. Try setup once more. - reuse_input_buffer = TrySetupNextLoop(data, true); - } - if (!reuse_input_buffer) { - SwapInputBuffer(data); - } - return; - } - } - packet = current_input_buffer + packet_idx * kBytesPerPacket; - offset = xma::GetPacketFrameOffset(packet) + packet_idx * kBitsPerPacket; - } - if (offset == 0 || frame_idx == -1) { - // Next packet but we already skipped to it - if (packet_idx >= current_input_packet_count) { - // Buffer is fully used - if (!reuse_input_buffer) { - // Last packet. Try setup once more. - reuse_input_buffer = TrySetupNextLoop(data, true); - } - if (!reuse_input_buffer) { - SwapInputBuffer(data); - } - break; - } - offset = xma::GetPacketFrameOffset(packet) + packet_idx * kBitsPerPacket; - } - // TODO buffer bounds check - assert_true(data->input_buffer_read_offset < offset); - data->input_buffer_read_offset = offset; - } +bool XmaContext::DecodePacket(AVCodecContext* av_context, const AVPacket* av_packet, + AVFrame* av_frame) { + auto ret = avcodec_send_packet(av_context, av_packet); + if (ret < 0) { + REXAPU_ERROR("XmaContext {}: Error sending packet for decoding ({})", id(), ret); + return false; } + ret = avcodec_receive_frame(av_context, av_frame); - // assert_true((split_frame_len_ != 0) == (data->input_buffer_read_offset == - // 0)); - - // The game will kick us again with a new output buffer later. - // It's important that we only invalidate this if we actually wrote to it!! - if (output_rb.write_offset() == output_rb.read_offset()) { - data->output_buffer_valid = 0; + if (ret == AVERROR(EAGAIN)) { + return false; } + if (ret < 0) { + REXAPU_ERROR("XmaContext {}: Error during decoding ({})", id(), ret); + return false; + } + return true; } -size_t XmaContext::GetNextFrame(uint8_t* block, size_t size, size_t bit_offset) { - // offset = xma::GetPacketFrameOffset(packet); - // TODO meh - // auto next_packet = bit_offset - bit_offset % kBitsPerPacket + - // kBitsPerPacket; - auto packet_idx = GetFramePacketNumber(block, size, bit_offset); - - BitStream stream(block, size * 8); - stream.SetOffset(bit_offset); +void XmaContext::Decode(XMA_CONTEXT_DATA* data) { + SCOPE_profile_cpu_f("apu"); - if (stream.BitsRemaining() < 15) { - return 0; + if (!data->IsAnyInputBufferValid()) { + return; } - uint64_t len = stream.Read(15); - if ((len - 15) > stream.BitsRemaining()) { - // assert_always("TODO"); - // *bit_offset = next_packet; - // return false; - // return next_packet; - return 0; - } else if (len >= xma::kMaxFrameLength) { - // assert_always("TODO"); - // *bit_offset = next_packet; - // return false; - return 0; - // return next_packet; + if (current_frame_remaining_subframes_ > 0) { + return; } - stream.Advance(len - (15 + 1)); - // Read the trailing bit to see if frames follow - if (stream.Read(1) == 0) { - return 0; + if (!data->IsCurrentInputBufferValid()) { + SwapInputBuffer(data); + if (!data->IsCurrentInputBufferValid()) { + return; + } } - bit_offset += len; - if (packet_idx < GetFramePacketNumber(block, size, bit_offset)) { - return 0; + uint8_t* current_input_buffer = GetCurrentInputBuffer(data); + + input_buffer_.fill(0); + + // Detect loop end frame before UpdateLoopStatus resets the offset. + bool is_loop_end_frame = false; + if (data->loop_count > 0) { + const uint32_t loop_end = std::max(kBitsPerPacketHeader, data->loop_end); + is_loop_end_frame = (data->input_buffer_read_offset == loop_end); } - return bit_offset; -} -int XmaContext::GetFramePacketNumber(uint8_t* block, size_t size, size_t bit_offset) { - size *= 8; - if (bit_offset >= size) { - // Not good :( - assert_always(); - return -1; + UpdateLoopStatus(data); + + if (!data->output_buffer_block_count) { + REXAPU_ERROR("XmaContext {}: Error - Received 0 for output_buffer_block_count!", id()); + return; } - size_t byte_offset = bit_offset >> 3; - size_t packet_number = byte_offset / kBytesPerPacket; + if (data->input_buffer_read_offset < kBitsPerPacketHeader) { + data->input_buffer_read_offset = kBitsPerPacketHeader; + } - return (uint32_t)packet_number; -} + const uint32_t current_input_size = GetCurrentInputBufferSize(data); + const uint32_t current_input_packet_count = current_input_size / kBytesPerPacket; -std::tuple XmaContext::GetFrameNumber(uint8_t* block, size_t size, size_t bit_offset) { - auto packet_idx = GetFramePacketNumber(block, size, bit_offset); + const int16_t packet_index = GetPacketNumber(current_input_size, data->input_buffer_read_offset); - if (packet_idx < 0 || (packet_idx + 1) * kBytesPerPacket > size) { - assert_always(); - return {packet_idx, -2}; + if (packet_index == -1) { + REXAPU_ERROR("XmaContext {}: Invalid packet index. Input read offset: {}", id(), + static_cast(data->input_buffer_read_offset)); + return; } - if (bit_offset == 0) { - return {packet_idx, -1}; + uint8_t* packet = current_input_buffer + (packet_index * kBytesPerPacket); + const uint32_t packet_first_frame_offset = xma::GetPacketFrameOffset(packet); + uint32_t relative_offset = data->input_buffer_read_offset % kBitsPerPacket; + + if (relative_offset < packet_first_frame_offset) { + data->input_buffer_read_offset = (packet_index * kBitsPerPacket) + packet_first_frame_offset; + relative_offset = packet_first_frame_offset; } - uint8_t* packet = block + (packet_idx * kBytesPerPacket); - auto first_frame_offset = xma::GetPacketFrameOffset(packet); - BitStream stream(block, size * 8); - stream.SetOffset(packet_idx * kBitsPerPacket + first_frame_offset); + const uint8_t skip_count = xma::GetPacketSkipCount(packet); - int frame_idx = 0; - while (true) { - if (stream.BitsRemaining() < 15) { - break; + // Full packet skip (0xFF) -- no new frames begin in this packet. + if (skip_count == 0xFF) { + uint32_t next_input_offset = + GetNextPacketReadOffset(current_input_buffer, packet_index + 1, current_input_packet_count); + if (next_input_offset == kBitsPerPacketHeader) { + SwapInputBuffer(data); } + data->input_buffer_read_offset = next_input_offset; + return; + } - if (stream.offset_bits() == bit_offset) { - break; - } + kPacketInfo packet_info = GetPacketInfo(packet, relative_offset); + const uint32_t packet_to_skip = skip_count + 1; + const uint32_t next_packet_index = packet_index + packet_to_skip; - uint64_t size = stream.Read(15); - if ((size - 15) > stream.BitsRemaining()) { - // Last frame. - break; - } else if (size == 0x7FFF) { - // Invalid frame (and last of this packet) - break; + // Frame header split across packet boundary. + if (packet_info.current_frame_size_ == 0) { + const uint8_t* next_packet = GetNextPacket(data, next_packet_index, current_input_packet_count); + if (!next_packet) { + SwapInputBuffer(data); + return; } + std::memcpy(input_buffer_.data(), packet + kBytesPerPacketHeader, kBytesPerPacketData); + std::memcpy(input_buffer_.data() + kBytesPerPacketData, next_packet + kBytesPerPacketHeader, + kBytesPerPacketData); - stream.Advance(size - (15 + 1)); + BitStream combined(input_buffer_.data(), (kBitsPerPacket - kBitsPerPacketHeader) * 2); + combined.SetOffset(relative_offset - kBitsPerPacketHeader); - // Read the trailing bit to see if frames follow - if (stream.Read(1) == 0) { - break; + uint64_t frame_size = combined.Peek(kBitsPerFrameHeader); + if (frame_size == xma::kMaxFrameLength) { + data->error_status = 4; + return; } - frame_idx++; + packet_info.current_frame_size_ = static_cast(frame_size); } - return {packet_idx, frame_idx}; -} -std::tuple XmaContext::GetPacketFrameCount(uint8_t* packet) { - auto first_frame_offset = xma::GetPacketFrameOffset(packet); - if (first_frame_offset > kBitsPerPacket - kBitsPerHeader) { - // frame offset is beyond packet end - return {0, false}; - } + BitStream stream(current_input_buffer, (packet_index + 1) * kBitsPerPacket); + stream.SetOffset(data->input_buffer_read_offset); - BitStream stream(packet, kBitsPerPacket); - stream.SetOffset(first_frame_offset); - int frame_count = 0; + const uint64_t bits_to_copy = GetAmountOfBitsToRead(static_cast(stream.BitsRemaining()), + packet_info.current_frame_size_); - while (true) { - frame_count++; - if (stream.BitsRemaining() < 15) { - return {frame_count, true}; - } + if (bits_to_copy == 0) { + REXAPU_ERROR("XmaContext {}: There are no bits to copy!", id()); + SwapInputBuffer(data); + return; + } - uint64_t size = stream.Read(15); - if ((size - 15) > stream.BitsRemaining()) { - return {frame_count, true}; - } else if (size == 0x7FFF) { - assert_always(); - return {frame_count, true}; + if (packet_info.isLastFrameInPacket()) { + if (stream.BitsRemaining() < packet_info.current_frame_size_) { + const uint8_t* next_packet = + GetNextPacket(data, next_packet_index, current_input_packet_count); + if (!next_packet) { + data->error_status = 4; + return; + } + std::memcpy(input_buffer_.data() + kBytesPerPacketData, next_packet + kBytesPerPacketHeader, + kBytesPerPacketData); } + } - stream.Advance(size - (15 + 1)); + std::memcpy(input_buffer_.data(), packet + kBytesPerPacketHeader, kBytesPerPacketData); - if (stream.Read(1) == 0) { - return {frame_count, false}; + stream = BitStream(input_buffer_.data(), (kBitsPerPacket - kBitsPerPacketHeader) * 2); + stream.SetOffset(relative_offset - kBitsPerPacketHeader); + + xma_frame_.fill(0); + + const uint32_t padding_start = + static_cast(stream.Copy(xma_frame_.data() + 1, packet_info.current_frame_size_)); + + raw_frame_.fill(0); + + PrepareDecoder(data->sample_rate, bool(data->is_stereo)); + PreparePacket(packet_info.current_frame_size_, padding_start); + if (DecodePacket(av_context_, av_packet_, av_frame_)) { + ConvertFrame(reinterpret_cast(&av_frame_->data), bool(data->is_stereo), + raw_frame_.data()); + current_frame_remaining_subframes_ = 4 << data->is_stereo; + + // Loop end: limit output to subframes 0..loop_subframe_end. + if (is_loop_end_frame) { + loop_frame_output_limit_ = (data->loop_subframe_end + 1) << data->is_stereo; + } else { + loop_frame_output_limit_ = 0; } - } -} -int XmaContext::PrepareDecoder(uint8_t* packet, int sample_rate, bool is_two_channel) { - // Sanity check: Packet metadata is always 1 for XMA2/0 for XMA - assert_true((packet[2] & 0x7) == 1 || (packet[2] & 0x7) == 0); + // Loop start: skip leading subframes per loop_subframe_skip. + if (loop_start_skip_pending_) { + const uint8_t skip = data->loop_subframe_skip << data->is_stereo; + if (skip < current_frame_remaining_subframes_) { + current_frame_remaining_subframes_ -= skip; + } + loop_start_skip_pending_ = false; + } + } - sample_rate = GetSampleRate(sample_rate); + // Compute where to go next. + if (!packet_info.isLastFrameInPacket()) { + const uint32_t next_frame_offset = + (data->input_buffer_read_offset + bits_to_copy) % kBitsPerPacket; + data->input_buffer_read_offset = (packet_index * kBitsPerPacket) + next_frame_offset; + return; + } - // Re-initialize the context with new sample rate and channels. - uint32_t channels = is_two_channel ? 2 : 1; - if (av_context_->sample_rate != sample_rate || av_context_->channels != channels) { - // We have to reopen the codec so it'll realloc whatever data it needs. - // TODO(DrChat): Find a better way. - avcodec_close(av_context_); + uint32_t next_input_offset = + GetNextPacketReadOffset(current_input_buffer, next_packet_index, current_input_packet_count); - av_context_->sample_rate = sample_rate; - av_context_->channels = channels; + if (next_input_offset == kBitsPerPacketHeader) { + SwapInputBuffer(data); + if (data->IsAnyInputBufferValid()) { + next_input_offset = xma::GetPacketFrameOffset( + memory()->TranslatePhysical(data->GetCurrentInputBufferAddress())); - if (avcodec_open2(av_context_, av_codec_, NULL) < 0) { - REXAPU_ERROR("XmaContext: Failed to reopen FFmpeg context"); - return -1; + if (next_input_offset > kMaxFrameSizeinBits) { + SwapInputBuffer(data); + return; + } } - return 1; } - return 0; + data->input_buffer_read_offset = next_input_offset; } void XmaContext::ConvertFrame(const uint8_t** samples, bool is_two_channel, -- 2.52.0.windows.1