From 882d20f6862844b47380e4ae24c58f13aabe91d8 Mon Sep 17 00:00:00 2001 From: salh Date: Sun, 19 Apr 2026 13:05:43 +0300 Subject: [PATCH] Refactor native graphics backend and fix build errors --- .gitignore | 1 + CMakeLists.txt | 6 + README.md | 154 +-- docs/RENDERER_ARCHITECTURE.txt | 30 + .../ac6_backend_capture_bridge.cpp | 218 ++++ .../ac6_backend_capture_bridge.h | 66 + src/ac6_backend_fixes/ac6_backend_hooks.cpp | 197 +++ src/ac6_backend_fixes/ac6_backend_hooks.h | 102 ++ .../ac6_backend_pass_classifier.cpp | 60 + .../ac6_backend_pass_classifier.h | 10 + src/ac6_native_graphics.cpp | 338 +++-- src/ac6_native_graphics.h | 34 +- src/ac6_native_graphics_overlay.cpp | 163 +-- .../ac6_render_frontend.cpp | 9 +- src/ac6_native_renderer/ac6_render_frontend.h | 1 + .../backends/d3d12_backend.cpp | 1093 ++++++++++++++--- .../backends/d3d12_backend.h | 105 +- .../backends/d3d12_resource_manager.cpp | 258 ++++ .../backends/d3d12_resource_manager.h | 84 ++ .../backends/d3d12_resource_tracker.cpp | 52 + .../backends/d3d12_resource_tracker.h | 33 + .../backends/d3d12_shader_manager.cpp | 283 +++++ .../backends/d3d12_shader_manager.h | 63 + .../backends/metal_backend.cpp | 3 +- .../backends/metal_backend.h | 3 +- .../backends/vulkan_backend.cpp | 3 +- .../backends/vulkan_backend.h | 3 +- src/ac6_native_renderer/execution_plan.cpp | 1 + src/ac6_native_renderer/execution_plan.h | 1 + src/ac6_native_renderer/native_renderer.cpp | 46 +- src/ac6_native_renderer/native_renderer.h | 19 +- src/ac6_native_renderer/render_device.cpp | 23 +- src/ac6_native_renderer/render_device.h | 18 +- src/ac6_native_renderer/replay_executor.cpp | 12 + src/ac6_native_renderer/replay_executor.h | 12 + src/ac6_native_renderer/replay_ir.cpp | 1 + src/ac6_native_renderer/replay_ir.h | 1 + src/ac6_native_renderer/types.h | 20 + src/ac6recomp_app.h | 27 +- src/d3d_hooks.cpp | 58 +- src/d3d_state.h | 3 + src/main.cpp | 61 +- src/render_hooks.cpp | 15 +- src/render_hooks.h | 1 + .../include/rex/graphics/graphics_system.h | 21 +- .../include/rex/system/interfaces/graphics.h | 5 + .../src/graphics/d3d12/command_processor.cpp | 149 ++- .../src/graphics/graphics_system.cpp | 62 +- .../src/native/ui/windowed_app_main_win.cpp | 22 +- 49 files changed, 3383 insertions(+), 567 deletions(-) create mode 100644 docs/RENDERER_ARCHITECTURE.txt create mode 100644 src/ac6_backend_fixes/ac6_backend_capture_bridge.cpp create mode 100644 src/ac6_backend_fixes/ac6_backend_capture_bridge.h create mode 100644 src/ac6_backend_fixes/ac6_backend_hooks.cpp create mode 100644 src/ac6_backend_fixes/ac6_backend_hooks.h create mode 100644 src/ac6_backend_fixes/ac6_backend_pass_classifier.cpp create mode 100644 src/ac6_backend_fixes/ac6_backend_pass_classifier.h create mode 100644 src/ac6_native_renderer/backends/d3d12_resource_manager.cpp create mode 100644 src/ac6_native_renderer/backends/d3d12_resource_manager.h create mode 100644 src/ac6_native_renderer/backends/d3d12_resource_tracker.cpp create mode 100644 src/ac6_native_renderer/backends/d3d12_resource_tracker.h create mode 100644 src/ac6_native_renderer/backends/d3d12_shader_manager.cpp create mode 100644 src/ac6_native_renderer/backends/d3d12_shader_manager.h diff --git a/.gitignore b/.gitignore index 7d620b31..f885c641 100644 --- a/.gitignore +++ b/.gitignore @@ -35,6 +35,7 @@ rexglue_sdk_new/ autoresearch/ *.bak *.tmp +*.md tmp_*.py build_*.txt *.log diff --git a/CMakeLists.txt b/CMakeLists.txt index 0b72d389..3448460b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -22,6 +22,9 @@ set(AC6RECOMP_SOURCES src/render_hooks.cpp src/ac6_native_graphics.cpp src/ac6_native_graphics_overlay.cpp + src/ac6_backend_fixes/ac6_backend_capture_bridge.cpp + src/ac6_backend_fixes/ac6_backend_hooks.cpp + src/ac6_backend_fixes/ac6_backend_pass_classifier.cpp src/ac6_native_renderer/ac6_render_frontend.cpp src/ac6_native_renderer/execution_plan.cpp src/ac6_native_renderer/frame_plan.cpp @@ -29,6 +32,9 @@ set(AC6RECOMP_SOURCES src/ac6_native_renderer/replay_executor.cpp src/ac6_native_renderer/backends/backend_factory.cpp src/ac6_native_renderer/backends/d3d12_backend.cpp + src/ac6_native_renderer/backends/d3d12_resource_manager.cpp + src/ac6_native_renderer/backends/d3d12_resource_tracker.cpp + src/ac6_native_renderer/backends/d3d12_shader_manager.cpp src/ac6_native_renderer/backends/metal_backend.cpp src/ac6_native_renderer/backends/vulkan_backend.cpp src/ac6_native_renderer/frame_scheduler.cpp diff --git a/README.md b/README.md index bded050a..2ca8a0a2 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,16 @@ DEV TESTING BRANCH. THINGS WILL BREAK HERE > [!CAUTION] > This project is still work in progress. It can boot and run in-game, but bugs, crashes, and missing functionality should be expected. -A native PC port of **Ace Combat 6: Fires of Liberation** (Xbox 360), built on top of the [ReXGlue SDK](https://github.com/rexglue/rexglue-sdk). The Xbox 360 PowerPC binary is statically recompiled to x86-64 so the original game logic runs natively on your host CPU, with a fully native D3D12/Vulkan renderer replacing the original Xenos GPU pipeline. +A native PC port of **Ace Combat 6: Fires of Liberation** built on top of the [ReXGlue SDK](https://github.com/rexglue/rexglue-sdk). The Xbox 360 PowerPC game code is statically recompiled to x86-64, while visible rendering currently remains authoritative in the vendored RexGlue/Xenia graphics backend. + +The AC6-specific graphics layer in this repo is now focused on: + +- frame capture and diagnostics +- swap-path inspection and overlay reporting +- backend-fix routing for AC6-specific rendering issues +- future selective override and modding hooks + +The legacy AC6 replay renderer is still present as experimental tooling, but it is **not** the default render path and it does **not** hijack presentation unless `ac6_graphics_mode=legacy_replay_experimental` and `ac6_experimental_replay_present=true`. ## Repository policy @@ -20,149 +29,44 @@ Do **not** commit or redistribute: Users must supply their own legally obtained game files locally. ---- - -## Prerequisites - -| Tool | Version | Notes | -|---|---|---| -| [CMake](https://cmake.org/) | 3.25+ | | -| [Ninja](https://ninja-build.org/) | any recent | required generator | -| [Clang/LLVM](https://releases.llvm.org/) | any recent | `clang` / `clang++` must be on `PATH` | -| Windows SDK | 10.0.19041+ | D3D12 headers (Windows only) | - -> [!NOTE] -> The Linux preset uses `clang-20` / `clang++-20` directly. Install the versioned binaries via your distro's package manager (`apt install clang-20`) or via the [LLVM APT repository](https://apt.llvm.org). - ---- - -## Acquiring the game files - -1. Obtain the original Xbox 360 disc image (ISO) by dumping your own disc. - Guides and tools: [consolemods.org – ISO Extraction & Repacking](https://consolemods.org/wiki/Xbox:ISO_Extraction_%26_Repacking) -2. Extract the XEX and game data from the ISO. -3. Place the resulting files inside the `assets/` directory (created manually — it is git-ignored): - -```text -assets/ - default.xex ← required by the codegen step - media/ ← game data (audio, video, maps, …) - … -``` - ---- - -## Clone - -```bash -git clone https://github.com/sal063/AC6_recomp.git -cd AC6_recomp -``` - -> [!NOTE] -> The ReXGlue SDK (`thirdparty/rexglue-sdk/`) is vendored directly in the repository. No submodule init is needed. - ---- - ## Build -### 1 — Configure - ```bash cmake --preset win-amd64-relwithdebinfo -``` - -### 2 — Generate the recompiled code (first time, and after updating `default.xex`) - -```bash cmake --build --preset win-amd64-relwithdebinfo --target ac6recomp_codegen -``` - -This step reads `assets/default.xex`, lifts all PowerPC instructions to C++, and writes the output to `generated/`. It can take a few minutes. - -### 3 — Re-run CMake configure - -```bash cmake --preset win-amd64-relwithdebinfo -``` - -Re-run configure after codegen so CMake picks up the generated `generated/sources.cmake` file and adds the generated `.cpp` sources to the target. - -### 4 — Build the runtime - -```bash cmake --build --preset win-amd64-relwithdebinfo ``` -The executable is placed at: +The executable is placed at `out/build/win-amd64-relwithdebinfo/ac6recomp.exe`. -``` -out/build/win-amd64-relwithdebinfo/ac6recomp.exe -``` +## Runtime Defaults -> [!TIP] -> `RelWithDebInfo` is the recommended preset — it gives near-release performance with symbols intact for debugging. A full `Release` build disables assertions and can be used for distribution. +The default AC6 graphics configuration after this pivot is: -### Available presets +- `ac6_native_graphics_enabled=true` +- `ac6_graphics_mode=hybrid_backend_fixes` +- `ac6_render_capture=true` +- `ac6_experimental_replay_present=false` -| Preset | Platform | Build type | -|---|---|---| -| `win-amd64-debug` | Windows | Debug | -| `win-amd64-release` | Windows | Release | -| `win-amd64-relwithdebinfo` | Windows | RelWithDebInfo ✅ recommended | -| `linux-amd64-debug` | Linux | Debug | -| `linux-amd64-release` | Linux | Release | -| `linux-amd64-relwithdebinfo` | Linux | RelWithDebInfo | - ---- - -## Run - -```bash -./out/build/win-amd64-relwithdebinfo/ac6recomp assets -``` - -The single argument is the path to the directory containing your game files (`assets/` by default). The runtime resolves all paths relative to it. - ---- - -## Linux - -Substitute `win-amd64-relwithdebinfo` with `linux-amd64-relwithdebinfo` in every command above. - -```bash -cmake --preset linux-amd64-relwithdebinfo -cmake --build --preset linux-amd64-relwithdebinfo --target ac6recomp_codegen -cmake --preset linux-amd64-relwithdebinfo -cmake --build --preset linux-amd64-relwithdebinfo -./out/build/linux-amd64-relwithdebinfo/ac6recomp assets -``` - ---- +That means the RexGlue/Xenia D3D12 backend remains the visible renderer by default, while AC6-specific analysis and diagnostics stay active. ## Project layout -``` +```text AC6_recomp/ -├── src/ Host-side runtime & renderer -│ ├── main.cpp -│ ├── ac6_native_graphics.* Xenon → native GPU command translation -│ ├── ac6_native_renderer/ Native rendering backend (D3D12 / Vulkan) -│ │ ├── backends/ Per-API backend implementations -│ │ ├── frame_plan.* Frame dependency graph construction -│ │ ├── frame_scheduler.* CPU/GPU timeline management -│ │ ├── native_renderer.* Top-level renderer orchestration -│ │ └── render_device.* Device abstraction layer -│ └── d3d_hooks.* Low-level D3D intercept layer -├── thirdparty/rexglue-sdk/ ReXGlue SDK (vendored) -├── assets/ ← NOT in repo; place your game files here -├── generated/ ← NOT in repo; output of codegen step -├── CMakeLists.txt -└── CMakePresets.json +|- src/ +| |- ac6_backend_fixes/ AC6-specific backend diagnostics and fix routing +| |- ac6_native_graphics.* AC6 frame-boundary analysis and overlay status +| |- ac6_native_renderer/ Experimental replay renderer and research tooling +| |- d3d_hooks.* Guest D3D capture and shadow-state hooks +| `- render_hooks.* Timing and frame pacing hooks +|- thirdparty/rexglue-sdk/ Vendored RexGlue SDK +|- generated/ Generated recomp sources +|- assets/ Local game files, not kept in repo +`- docs/RENDERER_ARCHITECTURE.txt ``` ---- - ## License See [LICENSE](LICENSE). diff --git a/docs/RENDERER_ARCHITECTURE.txt b/docs/RENDERER_ARCHITECTURE.txt new file mode 100644 index 00000000..ea4b73fd --- /dev/null +++ b/docs/RENDERER_ARCHITECTURE.txt @@ -0,0 +1,30 @@ +AC6 renderer architecture after the pivot +========================================= + +Authoritative renderer +---------------------- +Visible rendering now defaults to the vendored RexGlue/Xenia graphics backend. +The guest GPU command stream, the RexGlue D3D12 command processor, and the presenter remain the only authoritative presentation path in the default configuration. + +AC6-specific layer +------------------ +The AC6 code in this repository sits alongside that backend and provides: + +- frame-boundary capture and shadow-state analysis +- swap-path inspection and overlay reporting +- AC6-specific pass classification and repeated-signature tracking +- timing telemetry for host frame pacing, guest vblank cadence, and audio queue state +- narrow extension points for future AC6 fixes and selective modding overrides + +Experimental replay renderer +---------------------------- +The legacy AC6 native replay renderer is retained only as experimental tooling. +It may still be initialized for research or selective override work, but it is not the shipping render path and it does not override presentation unless explicitly opted in at runtime. + +Current default runtime behavior +-------------------------------- +- `ac6_graphics_mode=hybrid_backend_fixes` +- `ac6_experimental_replay_present=false` +- render capture remains enabled by default + +This keeps diagnostics and backend-fix infrastructure active without introducing a competing render authority. diff --git a/src/ac6_backend_fixes/ac6_backend_capture_bridge.cpp b/src/ac6_backend_fixes/ac6_backend_capture_bridge.cpp new file mode 100644 index 00000000..7e88cf41 --- /dev/null +++ b/src/ac6_backend_fixes/ac6_backend_capture_bridge.cpp @@ -0,0 +1,218 @@ +#include "ac6_backend_capture_bridge.h" + +#include +#include + +namespace ac6::backend { +namespace { + +template +uint32_t CountNonZero(const Container& values) { + uint32_t count = 0; + for (const auto& value : values) { + if (value) { + ++count; + } + } + return count; +} + +uint32_t CountBoundStreams( + const std::array& streams) { + uint32_t count = 0; + for (const auto& stream : streams) { + if (stream.buffer) { + ++count; + } + } + return count; +} + +uint32_t CountBoundSamplers( + const std::array& samplers) { + uint32_t count = 0; + for (const auto& sampler : samplers) { + if (sampler.mag_filter || sampler.min_filter || sampler.mip_filter || + sampler.mip_level || sampler.border_color) { + ++count; + } + } + return count; +} + +void HashU32(uint64_t& hash, uint32_t value) { + constexpr uint64_t kFnvPrime = 1099511628211ull; + hash ^= value; + hash *= kFnvPrime; +} + +void HashU64(uint64_t& hash, uint64_t value) { + HashU32(hash, uint32_t(value & 0xFFFFFFFFull)); + HashU32(hash, uint32_t(value >> 32)); +} + +bool IsHalfResLike(const ac6::d3d::ShadowState& shadow_state, + const rex::system::GraphicsSwapSubmission* swap_submission) { + if (!swap_submission || !swap_submission->frontbuffer_width || + !swap_submission->frontbuffer_height || !shadow_state.viewport.width || + !shadow_state.viewport.height) { + return false; + } + + const uint32_t swap_width = swap_submission->frontbuffer_width; + const uint32_t swap_height = swap_submission->frontbuffer_height; + return shadow_state.viewport.width * 4 <= swap_width * 3 || + shadow_state.viewport.height * 4 <= swap_height * 3; +} + +bool IsLikelyUiPass(const ac6::d3d::FrameCaptureSummary& capture_summary, + const ac6::d3d::ShadowState& shadow_state, + const rex::system::GraphicsSwapSubmission* swap_submission) { + if (shadow_state.depth_stencil != 0 || !swap_submission || + !swap_submission->frontbuffer_width || !swap_submission->frontbuffer_height) { + return false; + } + + const bool viewport_matches_swap = + shadow_state.viewport.width == swap_submission->frontbuffer_width && + shadow_state.viewport.height == swap_submission->frontbuffer_height; + const bool low_complexity = + capture_summary.draw_count > 0 && capture_summary.draw_count <= 96 && + capture_summary.resolve_count == 0; + return viewport_matches_swap && low_complexity; +} + +bool IsLikelyParticlePass(const ac6::d3d::FrameCaptureSummary& capture_summary, + const ac6::d3d::ShadowState& shadow_state) { + const bool mostly_primitive = + capture_summary.primitive_draw_count > capture_summary.indexed_draw_count && + capture_summary.primitive_draw_count > 0; + const bool light_bindings = + CountBoundSamplers(shadow_state.samplers) <= 4 && + CountBoundStreams(shadow_state.streams) <= 4; + return mostly_primitive || (capture_summary.resolve_count > 0 && light_bindings && + shadow_state.depth_stencil == 0); +} + +bool IsLikelyAdditive(const ac6::d3d::FrameCaptureSummary& capture_summary, + const ac6::d3d::ShadowState& shadow_state) { + if (shadow_state.depth_stencil != 0) { + return false; + } + return capture_summary.clear_count == 0 && + CountNonZero(shadow_state.textures) > 0 && + CountBoundSamplers(shadow_state.samplers) > 0; +} + +} // namespace + +uint64_t HashSwapTextureFetch(const rex::system::GraphicsSwapSubmission& submission) { + constexpr uint64_t kFnvOffsetBasis = 1469598103934665603ull; + uint64_t hash = kFnvOffsetBasis; + for (uint32_t word : submission.texture_fetch) { + HashU32(hash, word); + } + return hash; +} + +RenderEventSignature BuildRenderEventSignature( + const ac6::d3d::FrameCaptureSnapshot& frame_capture, + const ac6::d3d::FrameCaptureSummary& capture_summary, + const ac6::d3d::ShadowState& shadow_state, + const rex::system::GraphicsSwapSubmission* swap_submission, + const uint64_t active_vertex_shader_hash, + const uint64_t active_pixel_shader_hash) { + constexpr uint64_t kFnvOffsetBasis = 1469598103934665603ull; + (void)frame_capture; + + RenderEventSignature signature; + signature.capture_record_signature = capture_summary.record_signature; + signature.swap_texture_fetch_signature = + swap_submission ? HashSwapTextureFetch(*swap_submission) : 0; + signature.render_target_0 = shadow_state.render_targets[0]; + signature.depth_stencil = shadow_state.depth_stencil; + signature.viewport_width = shadow_state.viewport.width; + signature.viewport_height = shadow_state.viewport.height; + signature.draw_count = capture_summary.draw_count; + signature.clear_count = capture_summary.clear_count; + signature.resolve_count = capture_summary.resolve_count; + signature.indexed_draw_count = capture_summary.indexed_draw_count; + signature.primitive_draw_count = capture_summary.primitive_draw_count; + signature.texture_count = CountNonZero(shadow_state.textures); + signature.sampler_count = CountBoundSamplers(shadow_state.samplers); + signature.stream_count = CountBoundStreams(shadow_state.streams); + signature.fetch_constant_count = CountNonZero(shadow_state.texture_fetch_ptrs); + signature.shader_gpr_alloc = shadow_state.shader_gpr_alloc; + signature.active_vertex_shader_hash = active_vertex_shader_hash; + signature.active_pixel_shader_hash = active_pixel_shader_hash; + signature.has_depth_stencil = shadow_state.depth_stencil != 0; + signature.has_resolve = capture_summary.resolve_count != 0; + signature.half_res_like = IsHalfResLike(shadow_state, swap_submission); + signature.post_process_like = + signature.has_resolve && !signature.has_depth_stencil; + signature.ui_like = + IsLikelyUiPass(capture_summary, shadow_state, swap_submission); + signature.particle_like = + IsLikelyParticlePass(capture_summary, shadow_state); + signature.additive_like = + IsLikelyAdditive(capture_summary, shadow_state); + + uint64_t hash = kFnvOffsetBasis; + HashU64(hash, signature.capture_record_signature); + HashU64(hash, signature.swap_texture_fetch_signature); + HashU32(hash, signature.render_target_0); + HashU32(hash, signature.depth_stencil); + HashU32(hash, signature.viewport_width); + HashU32(hash, signature.viewport_height); + HashU32(hash, signature.draw_count); + HashU32(hash, signature.resolve_count); + HashU32(hash, signature.texture_count); + HashU32(hash, signature.sampler_count); + HashU32(hash, signature.stream_count); + HashU32(hash, signature.fetch_constant_count); + HashU32(hash, signature.shader_gpr_alloc); + HashU64(hash, shadow_state.vertex_fetch_layout_signature); + HashU64(hash, shadow_state.texture_fetch_layout_signature); + HashU64(hash, shadow_state.resource_binding_signature); + signature.stable_id = hash; + + return signature; +} + +std::string BuildSignatureTags(const RenderEventSignature& signature) { + std::string tags; + auto append = [&tags](const char* token) { + if (!tags.empty()) { + tags.append(", "); + } + tags.append(token); + }; + + if (signature.has_depth_stencil) { + append("depth"); + } + if (signature.has_resolve) { + append("resolve"); + } + if (signature.half_res_like) { + append("half_res"); + } + if (signature.post_process_like) { + append("post"); + } + if (signature.ui_like) { + append("ui"); + } + if (signature.particle_like) { + append("particles"); + } + if (signature.additive_like) { + append("additive"); + } + if (tags.empty()) { + tags = "unclassified"; + } + return tags; +} + +} // namespace ac6::backend diff --git a/src/ac6_backend_fixes/ac6_backend_capture_bridge.h b/src/ac6_backend_fixes/ac6_backend_capture_bridge.h new file mode 100644 index 00000000..386f6352 --- /dev/null +++ b/src/ac6_backend_fixes/ac6_backend_capture_bridge.h @@ -0,0 +1,66 @@ +#pragma once + +#include +#include + +#include + +#include "../d3d_state.h" + +namespace ac6::backend { + +enum class SignatureClass : uint8_t { + kUnknown, + kScene, + kPostProcess, + kUiComposite, + kParticles, + kClouds, + kSmoke, + kExplosions, + kMissileTrails, +}; + +struct RenderEventSignature { + uint64_t stable_id = 0; + uint64_t capture_record_signature = 0; + uint64_t swap_texture_fetch_signature = 0; + uint32_t render_target_0 = 0; + uint32_t depth_stencil = 0; + uint32_t viewport_width = 0; + uint32_t viewport_height = 0; + uint32_t draw_count = 0; + uint32_t clear_count = 0; + uint32_t resolve_count = 0; + uint32_t indexed_draw_count = 0; + uint32_t primitive_draw_count = 0; + uint32_t texture_count = 0; + uint32_t sampler_count = 0; + uint32_t stream_count = 0; + uint32_t fetch_constant_count = 0; + uint32_t shader_gpr_alloc = 0; + uint64_t active_vertex_shader_hash = 0; + uint64_t active_pixel_shader_hash = 0; + bool has_depth_stencil = false; + bool has_resolve = false; + bool half_res_like = false; + bool post_process_like = false; + bool ui_like = false; + bool particle_like = false; + bool additive_like = false; + SignatureClass classification = SignatureClass::kUnknown; +}; + +uint64_t HashSwapTextureFetch(const rex::system::GraphicsSwapSubmission& submission); + +RenderEventSignature BuildRenderEventSignature( + const ac6::d3d::FrameCaptureSnapshot& frame_capture, + const ac6::d3d::FrameCaptureSummary& capture_summary, + const ac6::d3d::ShadowState& shadow_state, + const rex::system::GraphicsSwapSubmission* swap_submission, + uint64_t active_vertex_shader_hash, + uint64_t active_pixel_shader_hash); + +std::string BuildSignatureTags(const RenderEventSignature& signature); + +} // namespace ac6::backend diff --git a/src/ac6_backend_fixes/ac6_backend_hooks.cpp b/src/ac6_backend_fixes/ac6_backend_hooks.cpp new file mode 100644 index 00000000..34b0db8a --- /dev/null +++ b/src/ac6_backend_fixes/ac6_backend_hooks.cpp @@ -0,0 +1,197 @@ +#include "ac6_backend_hooks.h" + +#include +#include + +#include + +#include "ac6_backend_pass_classifier.h" +#include "render_hooks.h" + +REXCVAR_DEFINE_BOOL(ac6_backend_debug_swap, false, "AC6/Backend", + "Log AC6 swap-path diagnostics from the authoritative backend"); +REXCVAR_DEFINE_BOOL(ac6_backend_log_signatures, false, "AC6/Backend", + "Log AC6 capture signatures used for backend-fix routing"); + +namespace ac6::backend { +namespace { + +std::mutex g_snapshot_mutex; +BackendDiagnosticsSnapshot g_snapshot{}; +std::unordered_map g_signature_hits; + +bool ShouldLogSignature(const BackendDiagnosticsSnapshot& snapshot) { + if (!REXCVAR_GET(ac6_backend_log_signatures) || snapshot.latest_signature.stable_id == 0) { + return false; + } + return snapshot.repeated_signature_count == 1 || + (snapshot.repeated_signature_count % 32) == 0; +} + +} // namespace + +void AnalyzeFrameBoundary( + const ac6::d3d::FrameCaptureSnapshot& frame_capture, + const ac6::d3d::FrameCaptureSummary& capture_summary, + const ac6::d3d::ShadowState& shadow_state, + const rex::system::GraphicsSwapSubmission* swap_submission, + const uint64_t swap_submission_sequence, + const uint64_t guest_vblank_interval_ticks, + const uint64_t last_guest_vblank_tick, + const ac6::FrameStats& frame_stats, + const rex::audio::AudioTelemetrySnapshot* audio_telemetry, + const rex::audio::AudioClientTimingSnapshot* audio_timing) { + std::lock_guard lock(g_snapshot_mutex); + + const uint64_t previous_swap_sequence = g_snapshot.swap_submission_sequence; + g_snapshot.valid = true; + g_snapshot.frame_index = capture_summary.frame_index; + g_snapshot.swap_submission_sequence = swap_submission_sequence; + g_snapshot.swap_submission_valid = swap_submission != nullptr; + g_snapshot.guest_vblank_interval_ticks = guest_vblank_interval_ticks; + g_snapshot.last_guest_vblank_tick = last_guest_vblank_tick; + g_snapshot.host_frame_time_ms = frame_stats.frame_time_ms; + g_snapshot.host_fps = frame_stats.fps; + g_snapshot.host_frame_count = frame_stats.frame_count; + g_snapshot.capture_draw_count = capture_summary.draw_count; + g_snapshot.capture_clear_count = capture_summary.clear_count; + g_snapshot.capture_resolve_count = capture_summary.resolve_count; + + if (swap_submission) { + g_snapshot.frontbuffer_width = swap_submission->frontbuffer_width; + g_snapshot.frontbuffer_height = swap_submission->frontbuffer_height; + g_snapshot.texture_format = swap_submission->texture_format; + g_snapshot.color_space = swap_submission->color_space; + } else { + g_snapshot.frontbuffer_width = 0; + g_snapshot.frontbuffer_height = 0; + g_snapshot.texture_format = 0; + g_snapshot.color_space = 0; + } + + if (swap_submission_sequence != previous_swap_sequence) { + g_snapshot.swap_source = SwapSourceType::kUnknown; + g_snapshot.active_vertex_shader_hash = 0; + g_snapshot.active_pixel_shader_hash = 0; + } + + if (audio_telemetry) { + g_snapshot.audio_active_clients = audio_telemetry->active_clients; + g_snapshot.audio_queued_frames = audio_telemetry->queued_frames; + g_snapshot.audio_peak_queued_frames = audio_telemetry->peak_queued_frames; + g_snapshot.audio_dropped_frames = audio_telemetry->dropped_frames; + g_snapshot.audio_underruns = audio_telemetry->underruns; + g_snapshot.audio_silence_injections = audio_telemetry->silence_injections; + g_snapshot.audio_backend_name = audio_telemetry->backend_name; + } else { + g_snapshot.audio_active_clients = 0; + g_snapshot.audio_queued_frames = 0; + g_snapshot.audio_peak_queued_frames = 0; + g_snapshot.audio_dropped_frames = 0; + g_snapshot.audio_underruns = 0; + g_snapshot.audio_silence_injections = 0; + g_snapshot.audio_backend_name.clear(); + } + + if (audio_timing) { + g_snapshot.audio_timing_valid = true; + g_snapshot.audio_consumed_frames = audio_timing->consumed_frames; + g_snapshot.audio_submitted_tic = audio_timing->submitted_tic; + g_snapshot.audio_host_elapsed_tic = audio_timing->host_elapsed_tic; + g_snapshot.audio_startup_inflight_frames = audio_timing->startup_inflight_frames; + g_snapshot.audio_callback_dispatch_count = audio_timing->callback_dispatch_count; + g_snapshot.audio_callback_throttle_count = audio_timing->callback_throttle_count; + } else { + g_snapshot.audio_timing_valid = false; + g_snapshot.audio_consumed_frames = 0; + g_snapshot.audio_submitted_tic = 0; + g_snapshot.audio_host_elapsed_tic = 0; + g_snapshot.audio_startup_inflight_frames = 0; + g_snapshot.audio_callback_dispatch_count = 0; + g_snapshot.audio_callback_throttle_count = 0; + } + + g_snapshot.latest_signature = BuildRenderEventSignature( + frame_capture, capture_summary, shadow_state, swap_submission, + g_snapshot.active_vertex_shader_hash, g_snapshot.active_pixel_shader_hash); + g_snapshot.latest_signature.classification = + ClassifySignature(g_snapshot.latest_signature); + g_snapshot.latest_signature_tags = BuildSignatureTags(g_snapshot.latest_signature); + g_snapshot.repeated_signature_count = + ++g_signature_hits[g_snapshot.latest_signature.stable_id]; + + if (ShouldLogSignature(g_snapshot)) { + REXLOG_INFO( + "AC6 backend signature frame={} class={} id={:016X} hits={} tags={} draws={} resolves={}", + g_snapshot.frame_index, ToString(g_snapshot.latest_signature.classification), + g_snapshot.latest_signature.stable_id, g_snapshot.repeated_signature_count, + g_snapshot.latest_signature_tags, g_snapshot.capture_draw_count, + g_snapshot.capture_resolve_count); + } +} + +void ReportSwapDecision(const rex::system::GraphicsSwapSubmission& submission, + const uint64_t submission_sequence, + const SwapSourceType swap_source, + const bool swap_source_scaled, + const uint32_t guest_output_width, + const uint32_t guest_output_height, + const uint32_t source_width, + const uint32_t source_height, + const uint64_t active_vertex_shader_hash, + const uint64_t active_pixel_shader_hash) { + std::lock_guard lock(g_snapshot_mutex); + + g_snapshot.valid = true; + g_snapshot.swap_submission_valid = true; + g_snapshot.swap_submission_sequence = submission_sequence; + g_snapshot.swap_source = swap_source; + g_snapshot.swap_source_scaled = swap_source_scaled; + g_snapshot.guest_output_width = guest_output_width; + g_snapshot.guest_output_height = guest_output_height; + g_snapshot.source_width = source_width; + g_snapshot.source_height = source_height; + g_snapshot.frontbuffer_width = submission.frontbuffer_width; + g_snapshot.frontbuffer_height = submission.frontbuffer_height; + g_snapshot.texture_format = submission.texture_format; + g_snapshot.color_space = submission.color_space; + g_snapshot.active_vertex_shader_hash = active_vertex_shader_hash; + g_snapshot.active_pixel_shader_hash = active_pixel_shader_hash; + g_snapshot.latest_signature.active_vertex_shader_hash = active_vertex_shader_hash; + g_snapshot.latest_signature.active_pixel_shader_hash = active_pixel_shader_hash; + + if (REXCVAR_GET(ac6_backend_debug_swap)) { + REXLOG_INFO( + "AC6 swap source={} guest={}x{} source={}x{} scaled={} vs={:016X} ps={:016X}", + ToString(swap_source), guest_output_width, guest_output_height, source_width, + source_height, swap_source_scaled ? "yes" : "no", active_vertex_shader_hash, + active_pixel_shader_hash); + } +} + +BackendDiagnosticsSnapshot GetDiagnosticsSnapshot() { + std::lock_guard lock(g_snapshot_mutex); + return g_snapshot; +} + +void ShutdownDiagnostics() { + std::lock_guard lock(g_snapshot_mutex); + g_snapshot = {}; + g_signature_hits.clear(); +} + +const char* ToString(const SwapSourceType swap_source) { + switch (swap_source) { + case SwapSourceType::kGuestSwapTexture: + return "guest_swap_texture"; + case SwapSourceType::kDirectDisplayFallback: + return "direct_display_fallback"; + case SwapSourceType::kExperimentalReplayOverride: + return "experimental_replay_override"; + case SwapSourceType::kUnknown: + default: + return "unknown"; + } +} + +} // namespace ac6::backend diff --git a/src/ac6_backend_fixes/ac6_backend_hooks.h b/src/ac6_backend_fixes/ac6_backend_hooks.h new file mode 100644 index 00000000..7d088e57 --- /dev/null +++ b/src/ac6_backend_fixes/ac6_backend_hooks.h @@ -0,0 +1,102 @@ +#pragma once + +#include +#include + +#include +#include +#include + +#include "ac6_backend_capture_bridge.h" +#include "ac6_backend_pass_classifier.h" +#include "../d3d_state.h" + +REXCVAR_DECLARE(bool, ac6_backend_debug_swap); +REXCVAR_DECLARE(bool, ac6_backend_log_signatures); + +namespace ac6 { +struct FrameStats; +} + +namespace ac6::backend { + +enum class SwapSourceType : uint8_t { + kUnknown, + kGuestSwapTexture, + kDirectDisplayFallback, + kExperimentalReplayOverride, +}; + +struct BackendDiagnosticsSnapshot { + bool valid = false; + uint64_t frame_index = 0; + uint64_t swap_submission_sequence = 0; + bool swap_submission_valid = false; + SwapSourceType swap_source = SwapSourceType::kUnknown; + bool swap_source_scaled = false; + uint32_t source_width = 0; + uint32_t source_height = 0; + uint32_t guest_output_width = 0; + uint32_t guest_output_height = 0; + uint32_t frontbuffer_width = 0; + uint32_t frontbuffer_height = 0; + uint32_t texture_format = 0; + uint32_t color_space = 0; + uint32_t audio_active_clients = 0; + uint32_t audio_queued_frames = 0; + uint32_t audio_peak_queued_frames = 0; + uint32_t audio_dropped_frames = 0; + uint32_t audio_underruns = 0; + uint32_t audio_silence_injections = 0; + uint32_t audio_startup_inflight_frames = 0; + uint32_t audio_callback_dispatch_count = 0; + uint32_t audio_callback_throttle_count = 0; + uint64_t active_vertex_shader_hash = 0; + uint64_t active_pixel_shader_hash = 0; + uint64_t guest_vblank_interval_ticks = 0; + uint64_t last_guest_vblank_tick = 0; + uint64_t audio_consumed_frames = 0; + uint64_t audio_submitted_tic = 0; + uint64_t audio_host_elapsed_tic = 0; + double host_frame_time_ms = 0.0; + double host_fps = 0.0; + uint64_t host_frame_count = 0; + uint32_t capture_draw_count = 0; + uint32_t capture_clear_count = 0; + uint32_t capture_resolve_count = 0; + uint32_t repeated_signature_count = 0; + bool audio_timing_valid = false; + std::string audio_backend_name; + std::string latest_signature_tags; + RenderEventSignature latest_signature{}; +}; + +void AnalyzeFrameBoundary( + const ac6::d3d::FrameCaptureSnapshot& frame_capture, + const ac6::d3d::FrameCaptureSummary& capture_summary, + const ac6::d3d::ShadowState& shadow_state, + const rex::system::GraphicsSwapSubmission* swap_submission, + uint64_t swap_submission_sequence, + uint64_t guest_vblank_interval_ticks, + uint64_t last_guest_vblank_tick, + const ac6::FrameStats& frame_stats, + const rex::audio::AudioTelemetrySnapshot* audio_telemetry, + const rex::audio::AudioClientTimingSnapshot* audio_timing); + +void ReportSwapDecision(const rex::system::GraphicsSwapSubmission& submission, + uint64_t submission_sequence, + SwapSourceType swap_source, + bool swap_source_scaled, + uint32_t guest_output_width, + uint32_t guest_output_height, + uint32_t source_width, + uint32_t source_height, + uint64_t active_vertex_shader_hash, + uint64_t active_pixel_shader_hash); + +BackendDiagnosticsSnapshot GetDiagnosticsSnapshot(); +void ShutdownDiagnostics(); + +const char* ToString(SwapSourceType swap_source); + +} // namespace ac6::backend diff --git a/src/ac6_backend_fixes/ac6_backend_pass_classifier.cpp b/src/ac6_backend_fixes/ac6_backend_pass_classifier.cpp new file mode 100644 index 00000000..831a64e6 --- /dev/null +++ b/src/ac6_backend_fixes/ac6_backend_pass_classifier.cpp @@ -0,0 +1,60 @@ +#include "ac6_backend_pass_classifier.h" + +namespace ac6::backend { + +SignatureClass ClassifySignature(const RenderEventSignature& signature) { + if (signature.ui_like) { + return SignatureClass::kUiComposite; + } + if (signature.particle_like && signature.additive_like && + signature.viewport_width && signature.viewport_height && + signature.viewport_width >= signature.viewport_height * 2) { + return SignatureClass::kMissileTrails; + } + if (signature.half_res_like && signature.post_process_like && + signature.sampler_count >= 4 && signature.fetch_constant_count >= 2) { + return SignatureClass::kClouds; + } + if (signature.half_res_like && signature.particle_like && signature.additive_like) { + return SignatureClass::kExplosions; + } + if (signature.half_res_like && signature.post_process_like) { + return SignatureClass::kSmoke; + } + if (signature.particle_like) { + return SignatureClass::kParticles; + } + if (signature.post_process_like) { + return SignatureClass::kPostProcess; + } + if (signature.has_depth_stencil) { + return SignatureClass::kScene; + } + return SignatureClass::kUnknown; +} + +const char* ToString(const SignatureClass signature_class) { + switch (signature_class) { + case SignatureClass::kScene: + return "scene"; + case SignatureClass::kPostProcess: + return "post_process"; + case SignatureClass::kUiComposite: + return "ui_composite"; + case SignatureClass::kParticles: + return "particles"; + case SignatureClass::kClouds: + return "clouds"; + case SignatureClass::kSmoke: + return "smoke"; + case SignatureClass::kExplosions: + return "explosions"; + case SignatureClass::kMissileTrails: + return "missile_trails"; + case SignatureClass::kUnknown: + default: + return "unknown"; + } +} + +} // namespace ac6::backend diff --git a/src/ac6_backend_fixes/ac6_backend_pass_classifier.h b/src/ac6_backend_fixes/ac6_backend_pass_classifier.h new file mode 100644 index 00000000..5e8c1324 --- /dev/null +++ b/src/ac6_backend_fixes/ac6_backend_pass_classifier.h @@ -0,0 +1,10 @@ +#pragma once + +#include "ac6_backend_capture_bridge.h" + +namespace ac6::backend { + +SignatureClass ClassifySignature(const RenderEventSignature& signature); +const char* ToString(SignatureClass signature_class); + +} // namespace ac6::backend diff --git a/src/ac6_native_graphics.cpp b/src/ac6_native_graphics.cpp index ed8c2bac..9d4c7e00 100644 --- a/src/ac6_native_graphics.cpp +++ b/src/ac6_native_graphics.cpp @@ -1,148 +1,340 @@ #include "ac6_native_graphics.h" -#include +#include #include +#include #include +#include #include +#include +#include +#include +#include "ac6_native_renderer/backends/d3d12_backend.h" #include "ac6_native_renderer/native_renderer.h" #include "d3d_hooks.h" +#include "render_hooks.h" REXCVAR_DEFINE_BOOL(ac6_native_graphics_enabled, true, "AC6/NativeGraphics", - "Enable AC6 native renderer frame-plan execution from captured D3D state"); + "Enable AC6 graphics capture analysis, overlay reporting, and backend fixes"); REXCVAR_DEFINE_BOOL(ac6_native_graphics_require_capture, true, "AC6/NativeGraphics", - "Force render-capture on while native graphics execution is enabled"); + "Keep render capture enabled while AC6 graphics analysis is active"); +REXCVAR_DEFINE_STRING(ac6_graphics_mode, "hybrid_backend_fixes", "AC6/NativeGraphics", + "AC6 graphics runtime mode: disabled, analysis_only, hybrid_backend_fixes, legacy_replay_experimental") + .allowed({"disabled", "analysis_only", "hybrid_backend_fixes", "legacy_replay_experimental"}); +REXCVAR_DEFINE_BOOL(ac6_experimental_replay_present, false, "AC6/NativeGraphics", + "Allow the legacy AC6 replay renderer to override the RexGlue swap source"); REXCVAR_DEFINE_STRING(ac6_native_graphics_backend, "auto", "AC6/NativeGraphics", - "Preferred native backend: auto, d3d12, vulkan, metal") - .allowed({"auto", "d3d12", "vulkan", "metal"}); + "Legacy experimental replay backend preference"); REXCVAR_DEFINE_STRING(ac6_native_graphics_feature_level, "scene_submission", "AC6/NativeGraphics", - "Native renderer feature level: bootstrap, scene_submission, parity_validation, shipping") + "Legacy experimental replay feature level (shipping is a legacy scaffold label)") .allowed({"bootstrap", "scene_submission", "parity_validation", "shipping"}); REXCVAR_DEFINE_INT32(ac6_native_graphics_frames_in_flight, 2, "AC6/NativeGraphics", - "Native renderer max frames in flight") + "Legacy experimental replay max frames in flight") .range(1, 4); namespace ac6::graphics { namespace { -std::mutex g_native_graphics_mutex; ac6::renderer::NativeRenderer g_native_renderer; NativeGraphicsRuntimeStatus g_runtime_status{}; +ac6::renderer::D3D12Backend* g_d3d12_backend = nullptr; -ac6::renderer::BackendType ParseBackend(std::string_view value) { - if (value == "d3d12") { - return ac6::renderer::BackendType::kD3D12; +GraphicsRuntimeMode ParseGraphicsMode(std::string_view value) { + if (value == "disabled") { + return GraphicsRuntimeMode::kDisabled; } - if (value == "vulkan") { - return ac6::renderer::BackendType::kVulkan; + if (value == "analysis_only") { + return GraphicsRuntimeMode::kAnalysisOnly; } - if (value == "metal") { - return ac6::renderer::BackendType::kMetal; + if (value == "legacy_replay_experimental") { + return GraphicsRuntimeMode::kLegacyReplayExperimental; } - return ac6::renderer::BackendType::kUnknown; + return GraphicsRuntimeMode::kHybridBackendFixes; } ac6::renderer::FeatureLevel ParseFeatureLevel(std::string_view value) { - if (value == "bootstrap") { - return ac6::renderer::FeatureLevel::kBootstrap; + using ac6::renderer::FeatureLevel; + if (value == "scene_submission") { + return FeatureLevel::kSceneSubmission; } if (value == "parity_validation") { - return ac6::renderer::FeatureLevel::kParityValidation; + return FeatureLevel::kParityValidation; } if (value == "shipping") { - return ac6::renderer::FeatureLevel::kShipping; + return FeatureLevel::kShipping; } - return ac6::renderer::FeatureLevel::kSceneSubmission; + return FeatureLevel::kBootstrap; } -ac6::renderer::NativeRendererConfig BuildRendererConfig() { - ac6::renderer::NativeRendererConfig config; - config.preferred_backend = ParseBackend(REXCVAR_GET(ac6_native_graphics_backend)); - config.feature_level = ParseFeatureLevel(REXCVAR_GET(ac6_native_graphics_feature_level)); - config.max_frames_in_flight = static_cast(REXCVAR_GET(ac6_native_graphics_frames_in_flight)); - config.enable_debug_markers = true; - config.enable_validation = true; - return config; +bool IsReplayMode(const GraphicsRuntimeMode mode) { + return mode == GraphicsRuntimeMode::kLegacyReplayExperimental; } -bool EnsureInitialized() { +void ResetReplayStatus() { + g_runtime_status.initialized = false; + g_runtime_status.replay_frames_built = 0; + g_runtime_status.active_backend = ac6::renderer::BackendType::kUnknown; + g_runtime_status.renderer_stats = {}; + g_runtime_status.frontend_summary = {}; + g_runtime_status.replay_summary = {}; + g_runtime_status.execution_summary = {}; + g_runtime_status.executor_summary = {}; + g_runtime_status.backend_executor_status = {}; + g_runtime_status.frame_plan = {}; + g_runtime_status.latest_renderer_frame_index = 0; + g_runtime_status.last_meaningful_renderer_frame_index = 0; + g_runtime_status.showing_latched_snapshot = false; +} + +void SyncRuntimeFlags() { + g_runtime_status.enabled = REXCVAR_GET(ac6_native_graphics_enabled); + g_runtime_status.mode = ParseGraphicsMode(REXCVAR_GET(ac6_graphics_mode)); + g_runtime_status.capture_enabled = REXCVAR_GET(ac6_render_capture); + g_runtime_status.authoritative_renderer_active = + g_runtime_status.enabled && + g_runtime_status.mode != GraphicsRuntimeMode::kDisabled; + g_runtime_status.experimental_replay_present = + g_runtime_status.enabled && + IsReplayMode(g_runtime_status.mode) && + REXCVAR_GET(ac6_experimental_replay_present); +} + +void RefreshRuntimeStatusFromRenderer() { + g_runtime_status.active_backend = g_native_renderer.GetStats().active_backend; + g_runtime_status.feature_level = g_native_renderer.feature_level(); + g_runtime_status.renderer_stats = g_native_renderer.GetStats(); + g_runtime_status.frontend_summary = g_native_renderer.frontend_summary(); + g_runtime_status.replay_summary = g_native_renderer.replay_summary(); + g_runtime_status.execution_summary = g_native_renderer.execution_summary(); + g_runtime_status.executor_summary = g_native_renderer.executor_summary(); + g_runtime_status.backend_executor_status = g_native_renderer.backend_executor_status(); + g_runtime_status.frame_plan = g_native_renderer.frame_plan(); +} + +bool IsMeaningfulRendererSnapshot( + const ac6::d3d::FrameCaptureSummary& capture_summary, + const ac6::renderer::FrontendFrameSummary& frontend_summary, + const ac6::renderer::ReplayFrameSummary& replay_summary, + const ac6::renderer::ExecutionFrameSummary& execution_summary, + const ac6::renderer::ReplayExecutorFrameSummary& executor_summary, + const ac6::renderer::BackendExecutorStatus& backend_status) { + return capture_summary.draw_count != 0 || capture_summary.clear_count != 0 || + capture_summary.resolve_count != 0 || + frontend_summary.total_command_count != 0 || + replay_summary.command_count != 0 || + execution_summary.command_count != 0 || + executor_summary.command_count != 0 || + backend_status.draw_attempt_count != 0 || + backend_status.clear_command_count != 0 || + backend_status.resolve_command_count != 0; +} + +void ShutdownReplayRenderer() { + g_d3d12_backend = nullptr; + if (!g_runtime_status.initialized) { + return; + } + g_native_renderer.Shutdown(); + ResetReplayStatus(); +} + +bool EnsureExperimentalReplayInitialized(rex::memory::Memory* memory) { + if (!g_runtime_status.enabled || !IsReplayMode(g_runtime_status.mode)) { + return false; + } if (g_runtime_status.initialized) { return true; } ++g_runtime_status.init_attempts; - const ac6::renderer::NativeRendererConfig config = BuildRendererConfig(); - if (!g_native_renderer.Initialize(config)) { - g_runtime_status.had_init_failure = true; - REXLOG_ERROR("AC6 native graphics failed to initialize backend={}", - ac6::renderer::ToString(ac6::renderer::ResolveBackend(config.preferred_backend))); + auto* ts = rex::runtime::ThreadState::Get(); + if (!ts || !ts->context() || !ts->context()->kernel_state) { return false; } - g_runtime_status.initialized = true; - g_runtime_status.had_init_failure = false; - ++g_runtime_status.init_successes; - g_runtime_status.feature_level = config.feature_level; - return true; -} + auto* graphics_system = ts->context()->kernel_state->graphics_system(); + if (!graphics_system || !graphics_system->provider()) { + return false; + } -void UpdateStatusFromRendererUnlocked() { - g_runtime_status.renderer_stats = g_native_renderer.GetStats(); - g_runtime_status.active_backend = g_runtime_status.renderer_stats.active_backend; - g_runtime_status.frontend_summary = g_native_renderer.frontend_summary(); - g_runtime_status.replay_summary = g_native_renderer.replay_summary(); - g_runtime_status.execution_summary = g_native_renderer.execution_summary(); - g_runtime_status.executor_summary = g_native_renderer.executor_summary(); - g_runtime_status.backend_executor_status = - g_native_renderer.backend_executor_status(); - g_runtime_status.frame_plan = g_native_renderer.frame_plan(); + auto* d3d_provider = + dynamic_cast(graphics_system->provider()); + if (!d3d_provider) { + g_runtime_status.had_init_failure = true; + return false; + } + + ID3D12Device* device = d3d_provider->GetDevice(); + ID3D12CommandQueue* queue = d3d_provider->GetDirectQueue(); + if (!device || !queue) { + return false; + } + + ac6::renderer::NativeRendererConfig config; + config.preferred_backend = ac6::renderer::BackendType::kD3D12; + config.feature_level = + ParseFeatureLevel(REXCVAR_GET(ac6_native_graphics_feature_level)); + config.max_frames_in_flight = static_cast( + std::clamp(REXCVAR_GET(ac6_native_graphics_frames_in_flight), 1, 4)); + config.enable_debug_markers = true; + config.enable_validation = false; + + if (!g_native_renderer.InitializeShared(config, memory, device, queue)) { + g_runtime_status.had_init_failure = true; + return false; + } + + g_d3d12_backend = g_native_renderer.GetD3D12Backend(); + ++g_runtime_status.init_successes; + g_runtime_status.initialized = true; + RefreshRuntimeStatusFromRenderer(); + REXLOG_INFO("AC6 graphics: legacy experimental replay renderer initialized"); + return true; } } // namespace -void OnFrameBoundary() { - std::scoped_lock lock(g_native_graphics_mutex); +std::string_view ToString(const GraphicsRuntimeMode mode) { + switch (mode) { + case GraphicsRuntimeMode::kDisabled: + return "disabled"; + case GraphicsRuntimeMode::kAnalysisOnly: + return "analysis_only"; + case GraphicsRuntimeMode::kHybridBackendFixes: + return "hybrid_backend_fixes"; + case GraphicsRuntimeMode::kLegacyReplayExperimental: + return "legacy_replay_experimental"; + default: + return "unknown"; + } +} - g_runtime_status.enabled = REXCVAR_GET(ac6_native_graphics_enabled); - if (!g_runtime_status.enabled) { - if (g_runtime_status.initialized) { - g_native_renderer.Shutdown(); - g_runtime_status.initialized = false; - } +void OnFrameBoundary(rex::memory::Memory* memory) { + SyncRuntimeFlags(); + + if (!g_runtime_status.enabled || g_runtime_status.mode == GraphicsRuntimeMode::kDisabled) { + ShutdownReplayRenderer(); + ac6::backend::ShutdownDiagnostics(); return; } - if (REXCVAR_GET(ac6_native_graphics_require_capture) && !REXCVAR_GET(ac6_render_capture)) { + if (REXCVAR_GET(ac6_native_graphics_require_capture)) { REXCVAR_SET(ac6_render_capture, true); + g_runtime_status.capture_enabled = true; } - if (!EnsureInitialized()) { - return; + if (!IsReplayMode(g_runtime_status.mode)) { + ShutdownReplayRenderer(); } + ac6::d3d::OnFrameBoundary(); + const ac6::d3d::FrameCaptureSnapshot frame_capture = ac6::d3d::GetFrameCapture(); - g_runtime_status.capture_summary = ac6::d3d::GetFrameCaptureSummary(); + const ac6::d3d::FrameCaptureSummary capture_summary = ac6::d3d::GetFrameCaptureSummary(); + const ac6::d3d::ShadowState shadow_state = ac6::d3d::GetShadowState(); + + ++g_runtime_status.analysis_frames_observed; + g_runtime_status.capture_summary = capture_summary; + g_runtime_status.latest_capture_frame_index = capture_summary.frame_index; + if (capture_summary.draw_count || capture_summary.clear_count || + capture_summary.resolve_count) { + g_runtime_status.last_meaningful_capture_frame_index = capture_summary.frame_index; + } + + rex::system::GraphicsSwapSubmission swap_submission{}; + uint64_t swap_sequence = 0; + uint64_t guest_vblank_interval_ticks = 0; + uint64_t last_guest_vblank_tick = 0; + rex::audio::AudioTelemetrySnapshot audio_telemetry{}; + rex::audio::AudioClientTimingSnapshot audio_timing{}; + const rex::audio::AudioTelemetrySnapshot* audio_telemetry_ptr = nullptr; + const rex::audio::AudioClientTimingSnapshot* audio_timing_ptr = nullptr; + + auto* ts = rex::runtime::ThreadState::Get(); + if (ts && ts->context() && ts->context()->kernel_state) { + auto* kernel_state = ts->context()->kernel_state; + if (auto* concrete_graphics = + dynamic_cast(kernel_state->graphics_system())) { + concrete_graphics->GetLastSwapSubmission(&swap_submission, &swap_sequence); + guest_vblank_interval_ticks = concrete_graphics->guest_vblank_interval_ticks(); + last_guest_vblank_tick = concrete_graphics->last_vblank_interrupt_guest_tick(); + } + if (auto* native_audio = kernel_state->native_audio_system()) { + audio_telemetry = native_audio->GetTelemetrySnapshot(); + audio_telemetry_ptr = &audio_telemetry; + if (audio_telemetry.active_clients != 0) { + audio_timing = native_audio->GetClientTimingSnapshot(0); + audio_timing_ptr = &audio_timing; + } + } + } + + ac6::backend::AnalyzeFrameBoundary( + frame_capture, capture_summary, shadow_state, + swap_sequence ? &swap_submission : nullptr, swap_sequence, + guest_vblank_interval_ticks, last_guest_vblank_tick, ac6::GetFrameStats(), + audio_telemetry_ptr, audio_timing_ptr); + g_runtime_status.backend_diagnostics = ac6::backend::GetDiagnosticsSnapshot(); + + if (!IsReplayMode(g_runtime_status.mode)) { + return; + } + if (!EnsureExperimentalReplayInitialized(memory)) { + return; + } g_native_renderer.BeginFrame(); g_native_renderer.BuildCapturedFrame(frame_capture); - ++g_runtime_status.frames_built; - UpdateStatusFromRendererUnlocked(); + g_runtime_status.replay_frames_built = g_native_renderer.GetStats().frame_count; + g_runtime_status.latest_renderer_frame_index = + g_native_renderer.GetStats().frame_count; + + const ac6::renderer::FrontendFrameSummary frontend_summary = + g_native_renderer.frontend_summary(); + const ac6::renderer::ReplayFrameSummary replay_summary = + g_native_renderer.replay_summary(); + const ac6::renderer::ExecutionFrameSummary execution_summary = + g_native_renderer.execution_summary(); + const ac6::renderer::ReplayExecutorFrameSummary executor_summary = + g_native_renderer.executor_summary(); + const ac6::renderer::BackendExecutorStatus backend_status = + g_native_renderer.backend_executor_status(); + + if (IsMeaningfulRendererSnapshot(capture_summary, frontend_summary, replay_summary, + execution_summary, executor_summary, backend_status) || + g_runtime_status.last_meaningful_renderer_frame_index == 0) { + RefreshRuntimeStatusFromRenderer(); + g_runtime_status.showing_latched_snapshot = false; + g_runtime_status.last_meaningful_renderer_frame_index = + g_runtime_status.latest_renderer_frame_index; + } else { + g_runtime_status.active_backend = g_native_renderer.GetStats().active_backend; + g_runtime_status.feature_level = g_native_renderer.feature_level(); + g_runtime_status.renderer_stats = g_native_renderer.GetStats(); + g_runtime_status.showing_latched_snapshot = true; + } } void Shutdown() { - std::scoped_lock lock(g_native_graphics_mutex); - if (!g_runtime_status.initialized) { - return; - } - g_native_renderer.Shutdown(); - g_runtime_status.initialized = false; + ShutdownReplayRenderer(); } NativeGraphicsRuntimeStatus GetRuntimeStatus() { - std::scoped_lock lock(g_native_graphics_mutex); + SyncRuntimeFlags(); + g_runtime_status.backend_diagnostics = ac6::backend::GetDiagnosticsSnapshot(); return g_runtime_status; } -} // namespace ac6::graphics +ID3D12Resource* GetNativeOutputTexture() { + SyncRuntimeFlags(); + if (!g_runtime_status.enabled || !g_runtime_status.initialized || + !IsReplayMode(g_runtime_status.mode) || + !REXCVAR_GET(ac6_experimental_replay_present)) { + return nullptr; + } + return g_d3d12_backend ? g_d3d12_backend->GetOutputTexture() : nullptr; +} +} // namespace ac6::graphics diff --git a/src/ac6_native_graphics.h b/src/ac6_native_graphics.h index 31a54be9..e0f117f6 100644 --- a/src/ac6_native_graphics.h +++ b/src/ac6_native_graphics.h @@ -1,24 +1,49 @@ #pragma once #include +#include +#include + +#include "ac6_backend_fixes/ac6_backend_hooks.h" #include "ac6_native_renderer/ac6_render_frontend.h" #include "ac6_native_renderer/execution_plan.h" #include "ac6_native_renderer/frame_plan.h" -#include "ac6_native_renderer/replay_ir.h" #include "ac6_native_renderer/replay_executor.h" +#include "ac6_native_renderer/replay_ir.h" #include "ac6_native_renderer/types.h" #include "d3d_state.h" +struct ID3D12Resource; + namespace ac6::graphics { +enum class GraphicsRuntimeMode : uint8_t { + kDisabled, + kAnalysisOnly, + kHybridBackendFixes, + kLegacyReplayExperimental, +}; + +std::string_view ToString(GraphicsRuntimeMode mode); + struct NativeGraphicsRuntimeStatus { bool enabled = false; + GraphicsRuntimeMode mode = GraphicsRuntimeMode::kHybridBackendFixes; + bool capture_enabled = false; + bool authoritative_renderer_active = false; + bool experimental_replay_present = false; bool initialized = false; bool had_init_failure = false; + bool showing_latched_snapshot = false; uint64_t init_attempts = 0; uint64_t init_successes = 0; - uint64_t frames_built = 0; + uint64_t analysis_frames_observed = 0; + uint64_t replay_frames_built = 0; + uint64_t latest_capture_frame_index = 0; + uint64_t latest_renderer_frame_index = 0; + uint64_t last_meaningful_capture_frame_index = 0; + uint64_t last_meaningful_renderer_frame_index = 0; ac6::renderer::BackendType active_backend = ac6::renderer::BackendType::kUnknown; ac6::renderer::FeatureLevel feature_level = ac6::renderer::FeatureLevel::kBootstrap; @@ -29,13 +54,14 @@ struct NativeGraphicsRuntimeStatus { ac6::renderer::ReplayExecutorFrameSummary executor_summary{}; ac6::renderer::BackendExecutorStatus backend_executor_status{}; ac6::d3d::FrameCaptureSummary capture_summary{}; + ac6::backend::BackendDiagnosticsSnapshot backend_diagnostics{}; ac6::renderer::NativeFramePlan frame_plan{}; }; -void OnFrameBoundary(); +void OnFrameBoundary(rex::memory::Memory* memory); void Shutdown(); NativeGraphicsRuntimeStatus GetRuntimeStatus(); +ID3D12Resource* GetNativeOutputTexture(); } // namespace ac6::graphics - diff --git a/src/ac6_native_graphics_overlay.cpp b/src/ac6_native_graphics_overlay.cpp index d8481020..af9a6cdd 100644 --- a/src/ac6_native_graphics_overlay.cpp +++ b/src/ac6_native_graphics_overlay.cpp @@ -17,91 +17,112 @@ void NativeGraphicsStatusDialog::OnDraw(ImGuiIO& io) { return; } - if (!ImGui::Begin("AC6 Native Graphics##status", &visible_, ImGuiWindowFlags_NoCollapse)) { + if (!ImGui::Begin("AC6 Graphics Diagnostics##status", &visible_, + ImGuiWindowFlags_NoCollapse)) { ImGui::End(); return; } const NativeGraphicsRuntimeStatus status = GetRuntimeStatus(); - ImGui::Text("enabled: %s", status.enabled ? "true" : "false"); - ImGui::Text("initialized: %s", status.initialized ? "true" : "false"); - ImGui::Text("init failures seen: %s", status.had_init_failure ? "true" : "false"); - ImGui::Text("init attempts/successes: %llu / %llu", - static_cast(status.init_attempts), - static_cast(status.init_successes)); - ImGui::Text("frames built: %llu", static_cast(status.frames_built)); - ImGui::Separator(); - ImGui::Text("backend: %s", ac6::renderer::ToString(status.active_backend).data()); - ImGui::Text("feature level: %s", ac6::renderer::ToString(status.feature_level).data()); - ImGui::Text("renderer frames: %llu", - static_cast(status.renderer_stats.frame_count)); - ImGui::Text("render passes built: %llu", - static_cast(status.renderer_stats.built_pass_count)); - ImGui::Text("backend submits: %llu", - static_cast(status.renderer_stats.backend_submit_count)); - ImGui::Text("frontend passes/commands: %u / %u", status.frontend_summary.pass_count, - status.frontend_summary.total_command_count); - ImGui::Text("replay passes/commands: %u / %u", status.replay_summary.pass_count, - status.replay_summary.command_count); - ImGui::Text("execution passes/commands: %u / %u", - status.execution_summary.pass_count, status.execution_summary.command_count); - ImGui::Text("executor passes/commands: %u / %u", - status.executor_summary.pass_count, status.executor_summary.command_count); + const auto& diagnostics = status.backend_diagnostics; + + ImGui::Text("module: %s", status.enabled ? "enabled" : "disabled"); + ImGui::Text("mode: %.*s", static_cast(ToString(status.mode).size()), + ToString(status.mode).data()); + ImGui::Text("authoritative renderer: %s", + status.authoritative_renderer_active ? "RexGlue/Xenia D3D12 backend" + : "disabled"); + ImGui::Text("capture active: %s", status.capture_enabled ? "yes" : "no"); + ImGui::Text("experimental replay present override: %s", + status.experimental_replay_present ? "enabled" : "disabled"); + ImGui::Text("analysis frames / replay frames: %llu / %llu", + static_cast(status.analysis_frames_observed), + static_cast(status.replay_frames_built)); + ImGui::Separator(); ImGui::Text("capture frame: %llu", static_cast(status.capture_summary.frame_index)); - ImGui::Text("capture draws/clears/resolves: %u / %u / %u", + ImGui::Text("capture draws / clears / resolves: %u / %u / %u", status.capture_summary.draw_count, status.capture_summary.clear_count, status.capture_summary.resolve_count); + ImGui::Text("capture indexed / shared / primitive: %u / %u / %u", + status.capture_summary.indexed_draw_count, + status.capture_summary.indexed_shared_draw_count, + status.capture_summary.primitive_draw_count); + ImGui::Text("capture rt0 switches / unique rt0: %u / %u", + status.capture_summary.rt0_switch_count, + status.capture_summary.unique_rt0_count); + ImGui::Text("frame-end viewport: %ux%u", + status.capture_summary.frame_end_viewport_width, + status.capture_summary.frame_end_viewport_height); + ImGui::Separator(); - ImGui::TextUnformatted("guest draw counts (this frame, pre-reset):"); - ImGui::Text(" indexed / shared / primitive: %u / %u / %u", - status.capture_summary.frame_stats.draw_calls_indexed, - status.capture_summary.frame_stats.draw_calls_indexed_shared, - status.capture_summary.frame_stats.draw_calls_primitive); - ImGui::Text(" set_sampler / set_texture_fetch: %u / %u", - status.capture_summary.frame_stats.set_sampler_state_calls, - status.capture_summary.frame_stats.set_texture_fetch_calls); - ImGui::TextUnformatted("primitive topology (D3D9 type, all draws):"); - ImGui::Text(" point %u line %u strip %u tri %u triStrip %u fan %u other %u", - status.capture_summary.topology_pointlist, status.capture_summary.topology_linelist, - status.capture_summary.topology_linestrip, status.capture_summary.topology_trianglelist, - status.capture_summary.topology_trianglestrip, status.capture_summary.topology_trianglefan, - status.capture_summary.topology_other); - ImGui::Text("last draw: prim_type=%u count=%u flags=0x%X", - status.capture_summary.last_draw_primitive_type, status.capture_summary.last_draw_count, - status.capture_summary.last_draw_flags); + ImGui::Text("swap source: %s", ac6::backend::ToString(diagnostics.swap_source)); + ImGui::Text("frontbuffer / guest output: %ux%u / %ux%u", + diagnostics.frontbuffer_width, diagnostics.frontbuffer_height, + diagnostics.guest_output_width, diagnostics.guest_output_height); + ImGui::Text("swap source extent: %ux%u (%s)", + diagnostics.source_width, diagnostics.source_height, + diagnostics.swap_source_scaled ? "scaled" : "unscaled"); + ImGui::Text("present classification: %s", + ac6::backend::ToString(diagnostics.latest_signature.classification)); + ImGui::Text("signature: %016llX hits=%u", + static_cast(diagnostics.latest_signature.stable_id), + diagnostics.repeated_signature_count); + ImGui::TextWrapped("signature tags: %s", + diagnostics.latest_signature_tags.empty() + ? "none" + : diagnostics.latest_signature_tags.c_str()); + ImGui::Text("authoritative VS / PS: %016llX / %016llX", + static_cast(diagnostics.active_vertex_shader_hash), + static_cast(diagnostics.active_pixel_shader_hash)); + ImGui::Text("vblank interval / last tick: %llu / %llu", + static_cast(diagnostics.guest_vblank_interval_ticks), + static_cast(diagnostics.last_guest_vblank_tick)); + ImGui::Text("host frame time / fps: %.2f ms / %.2f", + diagnostics.host_frame_time_ms, diagnostics.host_fps); + ImGui::Separator(); - ImGui::Text("planned output: %ux%u", status.frame_plan.output_width, - status.frame_plan.output_height); - ImGui::Text("replay output/present: %ux%u / %s", status.replay_summary.output_width, - status.replay_summary.output_height, - status.replay_summary.has_present_pass ? "yes" : "no"); - ImGui::Text("execution output/present: %ux%u / %s", - status.execution_summary.output_width, status.execution_summary.output_height, - status.execution_summary.has_present_pass ? "yes" : "no"); - ImGui::Text("executor output/present: %ux%u / %s", - status.executor_summary.output_width, status.executor_summary.output_height, - status.executor_summary.has_present_pass ? "yes" : "no"); - ImGui::Text("executor graphics/present/resource: %u / %u / %u", - status.executor_summary.graphics_pass_count, - status.executor_summary.present_pass_count, - status.executor_summary.resource_translation_pass_count); - ImGui::Text("backend consumed frame/passes/cmds: %s / %u / %u", - status.backend_executor_status.frame_valid ? "yes" : "no", - status.backend_executor_status.submitted_pass_count, - status.backend_executor_status.submitted_command_count); - ImGui::Text("backend resource/pso/descriptors: %u / %u / %u", - status.backend_executor_status.resource_translation_pass_count, - status.backend_executor_status.pipeline_state_pass_count, - status.backend_executor_status.descriptor_setup_pass_count); - ImGui::Text("stages scene/post/ui: %s / %s / %s", - status.frame_plan.has_scene_stage ? "yes" : "no", - status.frame_plan.has_post_process_stage ? "yes" : "no", - status.frame_plan.has_ui_stage ? "yes" : "no"); + ImGui::Text("audio backend: %s", + diagnostics.audio_backend_name.empty() + ? "unavailable" + : diagnostics.audio_backend_name.c_str()); + ImGui::Text("audio clients / queued / peak: %u / %u / %u", + diagnostics.audio_active_clients, diagnostics.audio_queued_frames, + diagnostics.audio_peak_queued_frames); + ImGui::Text("audio underruns / dropped / silence inject: %u / %u / %u", + diagnostics.audio_underruns, diagnostics.audio_dropped_frames, + diagnostics.audio_silence_injections); + ImGui::Text("audio consumed frames / submitted tic / host tic: %llu / %llu / %llu", + static_cast(diagnostics.audio_consumed_frames), + static_cast(diagnostics.audio_submitted_tic), + static_cast(diagnostics.audio_host_elapsed_tic)); + ImGui::Text("audio startup inflight / callback dispatch / throttle: %u / %u / %u", + diagnostics.audio_startup_inflight_frames, + diagnostics.audio_callback_dispatch_count, + diagnostics.audio_callback_throttle_count); + + if (status.mode == GraphicsRuntimeMode::kLegacyReplayExperimental) { + ImGui::Separator(); + ImGui::TextUnformatted("legacy replay diagnostics (experimental):"); + ImGui::Text("initialized: %s", status.initialized ? "true" : "false"); + ImGui::Text("init failures seen: %s", status.had_init_failure ? "true" : "false"); + ImGui::Text("replay backend: %s", + ac6::renderer::ToString(status.active_backend).data()); + ImGui::Text("replay feature level: %s", + ac6::renderer::ToString(status.feature_level).data()); + ImGui::Text("frontend / replay / execution commands: %u / %u / %u", + status.frontend_summary.total_command_count, + status.replay_summary.command_count, + status.execution_summary.command_count); + ImGui::Text("backend draw attempts / success: %u / %u", + status.backend_executor_status.draw_attempt_count, + status.backend_executor_status.draw_success_count); + ImGui::Text("planned output: %ux%u", status.frame_plan.output_width, + status.frame_plan.output_height); + } ImGui::End(); } } // namespace ac6::graphics - diff --git a/src/ac6_native_renderer/ac6_render_frontend.cpp b/src/ac6_native_renderer/ac6_render_frontend.cpp index 4cae3f16..5bc4de8a 100644 --- a/src/ac6_native_renderer/ac6_render_frontend.cpp +++ b/src/ac6_native_renderer/ac6_render_frontend.cpp @@ -5,10 +5,10 @@ namespace ac6::renderer { namespace { -template -uint32_t CountNonZeroEntries(const std::array& values) { +template +uint32_t CountNonZeroEntries(const Container& values) { uint32_t count = 0; - for (const T& value : values) { + for (const auto& value : values) { if (value) { ++count; } @@ -73,6 +73,7 @@ ObservedCommandDesc MakeObservedCommand(const ac6::d3d::DrawCallRecord& draw) { .viewport_y = draw.shadow_state.viewport.y, .viewport_width = draw.shadow_state.viewport.width, .viewport_height = draw.shadow_state.viewport.height, + .shadow_state = draw.shadow_state, }; } @@ -96,6 +97,7 @@ ObservedCommandDesc MakeObservedCommand(const ac6::d3d::ClearRecord& clear) { .viewport_y = clear.shadow_state.viewport.y, .viewport_width = clear.shadow_state.viewport.width, .viewport_height = clear.shadow_state.viewport.height, + .shadow_state = clear.shadow_state, }; } @@ -113,6 +115,7 @@ ObservedCommandDesc MakeObservedCommand(const ac6::d3d::ResolveRecord& resolve) .viewport_y = resolve.shadow_state.viewport.y, .viewport_width = resolve.shadow_state.viewport.width, .viewport_height = resolve.shadow_state.viewport.height, + .shadow_state = resolve.shadow_state, }; } diff --git a/src/ac6_native_renderer/ac6_render_frontend.h b/src/ac6_native_renderer/ac6_render_frontend.h index afe9821a..d0243793 100644 --- a/src/ac6_native_renderer/ac6_render_frontend.h +++ b/src/ac6_native_renderer/ac6_render_frontend.h @@ -43,6 +43,7 @@ struct ObservedCommandDesc { uint32_t viewport_y = 0; uint32_t viewport_width = 0; uint32_t viewport_height = 0; + ac6::d3d::ShadowState shadow_state{}; }; struct ObservedPassDesc { diff --git a/src/ac6_native_renderer/backends/d3d12_backend.cpp b/src/ac6_native_renderer/backends/d3d12_backend.cpp index d74ddfa3..3806f5dd 100644 --- a/src/ac6_native_renderer/backends/d3d12_backend.cpp +++ b/src/ac6_native_renderer/backends/d3d12_backend.cpp @@ -1,14 +1,159 @@ +#ifndef NOMINMAX +#define NOMINMAX +#endif #include "d3d12_backend.h" +#include + #include +#include #if defined(_WIN32) #pragma comment(lib, "d3d12.lib") #pragma comment(lib, "dxgi.lib") +#pragma comment(lib, "d3dcompiler.lib") #endif namespace ac6::renderer { +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +#if defined(_WIN32) + +// Compute a safe index-buffer upload size from draw count + index type. +// Xenos always uses 16-bit indices unless count > 65535. +static uint32_t SafeIndexBufferSize(uint32_t index_count) { + // Xbox 360 indices are always big-endian uint16. Cap at 4 MB. + const uint64_t byte_size = uint64_t(index_count) * sizeof(uint16_t); + return byte_size <= 4u * 1024u * 1024u ? static_cast(byte_size) : 0u; +} + +// Compute a safe vertex-buffer upload size from vertex count + stride. +static uint32_t SafeVertexBufferSize(uint32_t vertex_count, uint32_t stride) { + if (stride == 0 || vertex_count == 0) return 0; + const uint64_t byte_size = uint64_t(vertex_count) * uint64_t(stride); + // Cap at 8 MB per stream. + return byte_size <= 8u * 1024u * 1024u ? static_cast(byte_size) : 0u; +} + +// Validate a guest address is non-zero before calling TranslateVirtual. +static bool ValidGuestAddress(uint32_t addr) { + return addr != 0 && addr < 0xFF000000u; +} + +// Hash a byte span with FNV-1a. +static uint64_t FnvHash64(const void* data, size_t size) { + constexpr uint64_t kBasis = 14695981039346656037ull; + constexpr uint64_t kPrime = 1099511628211ull; + uint64_t h = kBasis; + const auto* p = static_cast(data); + for (size_t i = 0; i < size; ++i) { h ^= p[i]; h *= kPrime; } + return h; +} + +static uint16_t ByteSwap16(uint16_t value) { + return static_cast((value << 8) | (value >> 8)); +} + +static D3D12_PRIMITIVE_TOPOLOGY_TYPE PrimitiveTypeToTopologyType(uint32_t primitive_type) { + switch (primitive_type) { + case 1: + return D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; + case 2: + case 3: + return D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; + case 4: + case 8: + case 13: + case 5: + case 6: + default: + return D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + } +} + +static D3D12_PRIMITIVE_TOPOLOGY PrimitiveTypeToTopology(uint32_t primitive_type) { + switch (primitive_type) { + case 1: + return D3D_PRIMITIVE_TOPOLOGY_POINTLIST; + case 2: + return D3D_PRIMITIVE_TOPOLOGY_LINELIST; + case 3: + return D3D_PRIMITIVE_TOPOLOGY_LINESTRIP; + case 4: + case 8: + case 13: + return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; + case 5: + return D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; + case 6: + // Triangle fans need index expansion. Use strip as the least-bad fallback + // until guest primitive conversion is implemented. + return D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; + default: + return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; + } +} + +static bool IsQuadListPrimitive(uint32_t primitive_type) { + // Xenos kQuadList. + return primitive_type == 13; +} + +static bool IsRectangleListPrimitive(uint32_t primitive_type) { + // Xenos kRectangleList. + return primitive_type == 8; +} + +static bool NeedsSyntheticIndices(uint32_t primitive_type) { + return IsQuadListPrimitive(primitive_type) || IsRectangleListPrimitive(primitive_type); +} + +static uint32_t GuessColorOffset(uint32_t vertex_stride) { + if (vertex_stride == 20 || vertex_stride == 24) { + return 12; + } + if (vertex_stride >= 28) { + return 16; + } + return 0xFFFFFFFFu; +} + +struct DrawRootConstants { + uint32_t vertex_base_offset = 0; + uint32_t vertex_stride = 0; + uint32_t vertex_buffer_size = 0; + uint32_t viewport_x = 0; + uint32_t viewport_y = 0; + uint32_t viewport_width = 0; + uint32_t viewport_height = 0; + uint32_t color_offset = 0xFFFFFFFFu; + uint32_t flags = 0; +}; + +static void CreateRawBufferSRV(ID3D12Device* device, + ID3D12Resource* resource, + uint32_t size_bytes, + D3D12_CPU_DESCRIPTOR_HANDLE handle) { + D3D12_SHADER_RESOURCE_VIEW_DESC desc = {}; + desc.Format = DXGI_FORMAT_R32_TYPELESS; + desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; + desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + desc.Buffer.FirstElement = 0; + desc.Buffer.NumElements = std::max(1u, (size_bytes + 3u) / 4u); + desc.Buffer.StructureByteStride = 0; + desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW; + device->CreateShaderResourceView(resource, &desc, handle); +} + +#endif // _WIN32 + +// --------------------------------------------------------------------------- +// D3D12Backend public interface +// --------------------------------------------------------------------------- + bool D3D12Backend::IsSupported() const { #if defined(_WIN32) return true; @@ -17,281 +162,845 @@ bool D3D12Backend::IsSupported() const { #endif } -bool D3D12Backend::Initialize(const NativeRendererConfig& config) { - if (initialized_) { - return true; - } +// --------------------------------------------------------------------------- +// InitializeShared — reuse the emulator's already-created device/queue +// --------------------------------------------------------------------------- +bool D3D12Backend::InitializeShared(const NativeRendererConfig& config, + rex::memory::Memory* memory, + ID3D12Device* device, + ID3D12CommandQueue* queue) { + if (initialized_) return true; + memory_ = memory; + device_ = device; + graphics_queue_ = queue; + + REXLOG_INFO("D3D12Backend::InitializeShared device=0x{:016X}", (uint64_t)device_.Get()); + + if (!CreateCommandObjects(config.max_frames_in_flight)) return false; + if (!resource_manager_.Initialize(device_.Get(), config.max_frames_in_flight)) return false; + if (!shader_manager_.Initialize(device_.Get())) return false; + if (!CreateRootSignature()) return false; + + frame_scheduler_.Configure(config.max_frames_in_flight); + initialized_ = true; + REXLOG_INFO("D3D12Backend::InitializeShared succeeded max_frames={}", config.max_frames_in_flight); + return true; +} + +// --------------------------------------------------------------------------- +// Initialize — create our own device (standalone mode) +// --------------------------------------------------------------------------- +bool D3D12Backend::Initialize(const NativeRendererConfig& config, + rex::memory::Memory* memory) { + if (initialized_) return true; + memory_ = memory; + REXLOG_INFO("D3D12Backend::Initialize starting"); #if defined(_WIN32) - if (!CreateDevice()) { - REXLOG_ERROR("D3D12 CreateDevice failed."); - return false; - } - - if (!CreateCommandObjects(config.max_frames_in_flight)) { - REXLOG_ERROR("D3D12 CreateCommandObjects failed."); - return false; - } - + if (!CreateDevice()) return false; + if (!CreateCommandObjects(config.max_frames_in_flight)) return false; + if (!resource_manager_.Initialize(device_.Get(), config.max_frames_in_flight)) return false; + if (!shader_manager_.Initialize(device_.Get())) return false; + if (!CreateRootSignature()) return false; frame_scheduler_.Configure(config.max_frames_in_flight); #endif - executor_status_ = {}; - executor_status_.initialized = true; initialized_ = true; - REXLOG_INFO("AC6 native renderer D3D12 backend initialized successfully with max_frames_in_flight={}", config.max_frames_in_flight); + REXLOG_INFO("D3D12Backend::Initialize succeeded max_frames={}", config.max_frames_in_flight); return true; } +// --------------------------------------------------------------------------- +// Phase 4 helper — ensure output texture exists at correct size +// --------------------------------------------------------------------------- +#if defined(_WIN32) +bool D3D12Backend::EnsureOutputTexture(uint32_t width, uint32_t height) { + if (!device_) return false; + // Clamp / default + if (width == 0) width = 1280; + if (height == 0) height = 720; + + // Texture exists and is the correct size — reuse it. + if (output_texture_ && output_width_ == width && output_height_ == height) + return true; + + // If we already have a texture at a different size: keep the old one rather + // than destroying it mid-flight. Resize only when safe (i.e., on first creation). + if (output_texture_) { + // Already created at a different size — just return success and keep + // rendering into the old size to avoid a WaitForGpu on the shared queue. + return true; + } + + D3D12_RESOURCE_DESC desc = {}; + desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; + desc.Width = width; + desc.Height = height; + desc.DepthOrArraySize = 1; + desc.MipLevels = 1; + desc.Format = kOutputFormat; + desc.SampleDesc.Count = 1; + desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; + + D3D12_CLEAR_VALUE clear_val = {}; + clear_val.Format = kOutputFormat; + clear_val.Color[3] = 1.0f; + + D3D12_HEAP_PROPERTIES heap = {}; + heap.Type = D3D12_HEAP_TYPE_DEFAULT; + + HRESULT hr = device_->CreateCommittedResource( + &heap, D3D12_HEAP_FLAG_NONE, &desc, + D3D12_RESOURCE_STATE_RENDER_TARGET, &clear_val, + IID_PPV_ARGS(&output_texture_)); + if (FAILED(hr)) { + REXLOG_ERROR("D3D12Backend: CreateCommittedResource output texture failed 0x{:08X}", (uint32_t)hr); + return false; + } + output_texture_->SetName(L"ac6.native.output"); + + // Create/recreate the RTV heap + descriptor + if (!output_rtv_heap_) { + D3D12_DESCRIPTOR_HEAP_DESC rtv_desc = {}; + rtv_desc.NumDescriptors = 1; + rtv_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV; + hr = device_->CreateDescriptorHeap(&rtv_desc, IID_PPV_ARGS(&output_rtv_heap_)); + if (FAILED(hr)) return false; + } + output_rtv_ = output_rtv_heap_->GetCPUDescriptorHandleForHeapStart(); + device_->CreateRenderTargetView(output_texture_.Get(), nullptr, output_rtv_); + + // Track the new resource + resource_tracker_.TrackResource(output_texture_.Get(), D3D12_RESOURCE_STATE_RENDER_TARGET); + + output_width_ = width; + output_height_ = height; + REXLOG_INFO("D3D12Backend: output texture {}x{} created", width, height); + return true; +} +#endif + +// --------------------------------------------------------------------------- +// SubmitExecutorFrame — Phases 1, 3, 4 +// --------------------------------------------------------------------------- bool D3D12Backend::SubmitExecutorFrame(const ReplayExecutorFrame& frame) { - if (!initialized_) { + if (!initialized_ || !device_) return false; + + // Guard: device removed? + if (FAILED(device_->GetDeviceRemovedReason())) { + REXLOG_ERROR("D3D12Backend: device removed, disabling backend"); + initialized_ = false; return false; } #if defined(_WIN32) - frame_scheduler_.BeginFrame(); - uint32_t slot = frame_scheduler_.frame_slot(); - FrameContext& frame_ctx = frame_contexts_[slot]; + submission_debug_stats_ = {}; - // Wait for the GPU to finish with this frame slot if needed. - if (fence_->GetCompletedValue() < frame_ctx.fence_value) { - fence_->SetEventOnCompletion(frame_ctx.fence_value, (HANDLE)fence_event_); + // Determine output dimensions from the frame summary + uint32_t out_w = frame.summary.output_width; + uint32_t out_h = frame.summary.output_height; + if (!EnsureOutputTexture(out_w, out_h)) return false; + + // Pick the frame slot + uint32_t slot = static_cast(frame.summary.frame_index % frame_contexts_.size()); + FrameContext& ctx = frame_contexts_[slot]; + + // Wait for this slot to finish on GPU + if (fence_->GetCompletedValue() < ctx.fence_value) { + fence_->SetEventOnCompletion(ctx.fence_value, (HANDLE)fence_event_); WaitForSingleObject((HANDLE)fence_event_, INFINITE); } - // Reset the command allocator for the current frame slot. - HRESULT hr = frame_ctx.command_allocator->Reset(); - if (FAILED(hr)) { - REXLOG_ERROR("Failed to reset command allocator."); - return false; - } + // Reset for this frame + HRESULT hr = ctx.command_allocator->Reset(); + if (FAILED(hr)) { REXLOG_ERROR("D3D12Backend: allocator Reset failed"); return false; } - // Reset the command list, using the reset allocator. - hr = command_list_->Reset(frame_ctx.command_allocator.Get(), nullptr); - if (FAILED(hr)) { - REXLOG_ERROR("Failed to reset command list."); - return false; - } + hr = ctx.command_list->Reset(ctx.command_allocator.Get(), nullptr); + if (FAILED(hr)) { REXLOG_ERROR("D3D12Backend: cmdlist Reset failed"); return false; } - // ----------------------------------------------------------------- - // Workstreams 2 & 3: Minimal Resource Translation and Pipeline Setup - // We mock the caching and PSO fetching by checking the requirement counts. - // ----------------------------------------------------------------- + resource_manager_.BeginFrame(slot); + + // ------------------------------------------------------------------- + // Phase 4: Transition output texture → RTV, clear it + // ------------------------------------------------------------------- + resource_tracker_.TransitionBarrier(ctx.command_list.Get(), + output_texture_.Get(), + D3D12_RESOURCE_STATE_RENDER_TARGET); + float clear_color[4] = {0.05f, 0.05f, 0.08f, 1.0f}; + ctx.command_list->ClearRenderTargetView(output_rtv_, clear_color, 0, nullptr); + ctx.command_list->OMSetRenderTargets(1, &output_rtv_, FALSE, nullptr); + + D3D12_VIEWPORT vp = {0.f, 0.f, (float)output_width_, (float)output_height_, 0.f, 1.f}; + D3D12_RECT scissor = {0, 0, (LONG)output_width_, (LONG)output_height_}; + ctx.command_list->RSSetViewports(1, &vp); + ctx.command_list->RSSetScissorRects(1, &scissor); + ctx.command_list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + + // ------------------------------------------------------------------- + // Phase 3: Dispatch per-pass commands + // ------------------------------------------------------------------- for (const ReplayExecutorPassPacket& pass : frame.passes) { - if (pass.requires_resource_translation) { - // Mock resource translation lookup - for (const auto& cmd : pass.commands) { - if (cmd.touches_render_target) { - resource_cache_[cmd.execution_command_index] = dummy_output_resource_; - } - } - } - if (pass.requires_pipeline_state) { - // Mock PSO fetch - for (const auto& cmd : pass.commands) { - if (cmd.requires_pipeline_state) { - pso_cache_[cmd.execution_command_index] = nullptr; // mock PSO - } - } - } + DispatchPassCommands(ctx.command_list.Get(), pass, slot); } - hr = command_list_->Close(); - if (FAILED(hr)) { - REXLOG_ERROR("Failed to close command list."); - return false; - } + // ------------------------------------------------------------------- + // Phase 4: Transition output → PIXEL_SHADER_RESOURCE for blit consumer + // ------------------------------------------------------------------- + resource_tracker_.TransitionBarrier(ctx.command_list.Get(), + output_texture_.Get(), + D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); - ID3D12CommandList* ppCommandLists[] = { command_list_.Get() }; - graphics_queue_->ExecuteCommandLists(1, ppCommandLists); + hr = ctx.command_list->Close(); + if (FAILED(hr)) { REXLOG_ERROR("D3D12Backend: cmdlist Close failed"); return false; } - // Update the fence value for the current frame slot. - current_fence_value_++; + ID3D12CommandList* lists[] = { ctx.command_list.Get() }; + graphics_queue_->ExecuteCommandLists(1, lists); + + ++current_fence_value_; hr = graphics_queue_->Signal(fence_.Get(), current_fence_value_); - if (FAILED(hr)) { - REXLOG_ERROR("Failed to signal queue."); - return false; - } - frame_ctx.fence_value = current_fence_value_; - + if (FAILED(hr)) { REXLOG_ERROR("D3D12Backend: Signal failed"); return false; } + ctx.fence_value = current_fence_value_; #endif executor_status_ = { - .initialized = true, - .frame_valid = frame.summary.valid, - .frame_index = frame.summary.frame_index, - .submitted_pass_count = frame.summary.pass_count, - .submitted_command_count = frame.summary.command_count, - .graphics_pass_count = frame.summary.graphics_pass_count, - .async_compute_pass_count = frame.summary.async_compute_pass_count, - .copy_pass_count = frame.summary.copy_pass_count, - .present_pass_count = frame.summary.present_pass_count, - .resource_translation_pass_count = - frame.summary.resource_translation_pass_count, - .pipeline_state_pass_count = frame.summary.pipeline_state_pass_count, - .descriptor_setup_pass_count = frame.summary.descriptor_setup_pass_count, + .initialized = true, + .frame_valid = frame.summary.valid, + .frame_index = frame.summary.frame_index, + .submitted_pass_count = frame.summary.pass_count, + .submitted_command_count = frame.summary.command_count, + .graphics_pass_count = frame.summary.graphics_pass_count, + .async_compute_pass_count = frame.summary.async_compute_pass_count, + .copy_pass_count = frame.summary.copy_pass_count, + .present_pass_count = frame.summary.present_pass_count, + .resource_translation_pass_count = frame.summary.resource_translation_pass_count, + .pipeline_state_pass_count = frame.summary.pipeline_state_pass_count, + .descriptor_setup_pass_count = frame.summary.descriptor_setup_pass_count, + .draw_attempt_count = submission_debug_stats_.draw_attempt_count, + .draw_success_count = submission_debug_stats_.draw_success_count, + .draw_prepare_failure_count = submission_debug_stats_.draw_prepare_failure_count, + .draw_pso_failure_count = submission_debug_stats_.draw_pso_failure_count, + .indexed_draw_count = submission_debug_stats_.indexed_draw_count, + .non_indexed_draw_count = submission_debug_stats_.non_indexed_draw_count, + .clear_command_count = submission_debug_stats_.clear_command_count, + .resolve_command_count = submission_debug_stats_.resolve_command_count, + .invalid_stream_binding_count = submission_debug_stats_.invalid_stream_binding_count, + .invalid_index_buffer_count = submission_debug_stats_.invalid_index_buffer_count, + .index_count_overflow_count = submission_debug_stats_.index_count_overflow_count, + .index_data_unavailable_count = submission_debug_stats_.index_data_unavailable_count, + .index_buffer_create_failure_count = + submission_debug_stats_.index_buffer_create_failure_count, + .index_upload_failure_count = submission_debug_stats_.index_upload_failure_count, + .zero_vertex_count = submission_debug_stats_.zero_vertex_count, + .invalid_vertex_range_count = submission_debug_stats_.invalid_vertex_range_count, + .vertex_buffer_size_invalid_count = + submission_debug_stats_.vertex_buffer_size_invalid_count, + .vertex_buffer_create_failure_count = + submission_debug_stats_.vertex_buffer_create_failure_count, + .vertex_data_unavailable_count = + submission_debug_stats_.vertex_data_unavailable_count, + .vertex_upload_failure_count = submission_debug_stats_.vertex_upload_failure_count, }; - - REXLOG_TRACE( - "AC6 native renderer D3D12 submit frame={} passes={} commands={} graphics={} present={} resource={} pso={} descriptors={}", - executor_status_.frame_index, executor_status_.submitted_pass_count, - executor_status_.submitted_command_count, - executor_status_.graphics_pass_count, executor_status_.present_pass_count, - executor_status_.resource_translation_pass_count, - executor_status_.pipeline_state_pass_count, - executor_status_.descriptor_setup_pass_count); - return true; } -void D3D12Backend::Shutdown() { - if (!initialized_) { - return; +// --------------------------------------------------------------------------- +// Phase 3: per-pass command dispatch +// --------------------------------------------------------------------------- +#if defined(_WIN32) +void D3D12Backend::DispatchPassCommands(ID3D12GraphicsCommandList* cmd, + const ReplayExecutorPassPacket& pass, + uint32_t slot) { + ID3D12DescriptorHeap* descriptor_heaps[] = {resource_manager_.GetSrvHeap()}; + const DXGI_FORMAT rt_fmt = kOutputFormat; + const DXGI_FORMAT ds_fmt = DXGI_FORMAT_UNKNOWN; + const bool soft_part = (pass.role == ReplayPassRole::kPostProcess || + pass.role == ReplayPassRole::kPresent); + + for (const ReplayExecutorCommandPacket& command : pass.commands) { + switch (command.category) { + case ExecutionCommandCategory::kClear: { + ++submission_debug_stats_.clear_command_count; + // Use the captured clear color rather than deriving nonsense from the + // render-target handle stored in shadow state. + float cc[4] = { + static_cast((command.color >> 16) & 0xFF) / 255.f, + static_cast((command.color >> 8) & 0xFF) / 255.f, + static_cast((command.color >> 0) & 0xFF) / 255.f, + static_cast((command.color >> 24) & 0xFF) / 255.f, + }; + cmd->ClearRenderTargetView(output_rtv_, cc, 0, nullptr); + break; + } + + case ExecutionCommandCategory::kDraw: { + ++submission_debug_stats_.draw_attempt_count; + // Draw a full-screen triangle via SV_VertexID — the passthrough VS + // does not need any vertex buffer. Real geometry binding (Phase 1.4) + // requires a CPU-safe readback copy, not a live TranslateVirtual read. + DrawResources draw_resources; + if (!PrepareDrawResources(cmd, command, slot, draw_resources)) { + ++submission_debug_stats_.draw_prepare_failure_count; + break; + } + + const uint64_t pso_hash = MakePSOHash(rt_fmt, ds_fmt, + draw_resources.topology_type, + soft_part); + ID3D12PipelineState* pso = shader_manager_.GetOrCreatePSO( + pso_hash, root_signature_.Get(), rt_fmt, ds_fmt, + draw_resources.topology_type, soft_part); + if (!pso) { + ++submission_debug_stats_.draw_pso_failure_count; + break; + } + + DrawRootConstants constants; + constants.vertex_base_offset = draw_resources.vertex_base_offset; + constants.vertex_stride = draw_resources.vertex_stride; + constants.vertex_buffer_size = draw_resources.vertex_buffer_size; + constants.viewport_x = command.shadow_state.viewport.x; + constants.viewport_y = command.shadow_state.viewport.y; + constants.viewport_width = + command.shadow_state.viewport.width ? command.shadow_state.viewport.width : pass.output_width; + constants.viewport_height = + command.shadow_state.viewport.height ? command.shadow_state.viewport.height : pass.output_height; + constants.color_offset = draw_resources.color_offset; + + D3D12_VIEWPORT vp = { + static_cast(constants.viewport_x), + static_cast(constants.viewport_y), + static_cast(std::max(constants.viewport_width, 1u)), + static_cast(std::max(constants.viewport_height, 1u)), + 0.0f, + 1.0f, + }; + D3D12_RECT scissor = { + static_cast(constants.viewport_x), + static_cast(constants.viewport_y), + static_cast(constants.viewport_x + std::max(constants.viewport_width, 1u)), + static_cast(constants.viewport_y + std::max(constants.viewport_height, 1u)), + }; + + cmd->SetPipelineState(pso); + cmd->SetGraphicsRootSignature(root_signature_.Get()); + if (descriptor_heaps[0]) { + cmd->SetDescriptorHeaps(1, descriptor_heaps); + cmd->SetGraphicsRootDescriptorTable(1, draw_resources.vertex_buffer_gpu); + } + cmd->SetGraphicsRoot32BitConstants( + 0, sizeof(constants) / sizeof(uint32_t), &constants, 0); + cmd->RSSetViewports(1, &vp); + cmd->RSSetScissorRects(1, &scissor); + cmd->IASetPrimitiveTopology(draw_resources.topology); + + if (draw_resources.indexed) { + ++submission_debug_stats_.indexed_draw_count; + cmd->DrawIndexedInstanced(draw_resources.draw_count, 1, + draw_resources.draw_start, 0, 0); + } else { + ++submission_debug_stats_.non_indexed_draw_count; + cmd->DrawInstanced(draw_resources.draw_count, 1, + draw_resources.draw_start, 0); + } + ++submission_debug_stats_.draw_success_count; + break; + } + + case ExecutionCommandCategory::kResolve: + ++submission_debug_stats_.resolve_command_count; + // Resolve is a no-op at this stage — handled by the output RT clear + break; + + default: + break; + } } +} + +// --------------------------------------------------------------------------- +// Phase 1: Safe buffer upload +// --------------------------------------------------------------------------- +bool D3D12Backend::PrepareDrawResources(ID3D12GraphicsCommandList* cmd, + const ReplayExecutorCommandPacket& command, + uint32_t slot, + DrawResources& out_resources) { + (void)slot; + if (!memory_ || !cmd || command.count == 0) return false; + out_resources.draw_count = command.count; + out_resources.draw_start = 0; + const auto& ss = command.shadow_state; + const auto& stream = ss.streams[0]; + if (!ValidGuestAddress(stream.buffer) || stream.stride == 0) { + ++submission_debug_stats_.invalid_stream_binding_count; + return false; + } + + uint32_t vertex_count = 0; + uint32_t vertex_first = command.start; + if (command.draw_kind != d3d::DrawCallKind::kPrimitive) { + if (!ValidGuestAddress(ss.index_buffer)) { + if (!NeedsSyntheticIndices(command.primitive_type)) { + ++submission_debug_stats_.invalid_index_buffer_count; + return false; + } + + if (IsRectangleListPrimitive(command.primitive_type)) { + // D3D9 resolve-style rectangle lists are frequently emitted as 3-vertex + // draws. Our replay path doesn't implement rectangle expansion yet, so + // prefer a non-indexed triangle fallback over dropping the draw. + out_resources.indexed = false; + out_resources.draw_count = command.count; + out_resources.draw_start = 0; + vertex_count = command.count; + vertex_first = command.start; + } else { + const uint32_t quad_count = command.count / 4; + if (quad_count == 0) { + ++submission_debug_stats_.invalid_index_buffer_count; + return false; + } + const uint32_t expanded_index_count = quad_count * 6; + const uint32_t ib_size = SafeIndexBufferSize(expanded_index_count); + if (ib_size == 0) { + ++submission_debug_stats_.index_count_overflow_count; + return false; + } + + std::vector host_indices; + host_indices.reserve(expanded_index_count); + for (uint32_t quad = 0; quad < quad_count; ++quad) { + const uint32_t base_vertex = quad * 4; + if (base_vertex + 3 > UINT16_MAX) { + ++submission_debug_stats_.index_count_overflow_count; + return false; + } + const uint16_t i0 = static_cast(base_vertex + 0); + const uint16_t i1 = static_cast(base_vertex + 1); + const uint16_t i2 = static_cast(base_vertex + 2); + const uint16_t i3 = static_cast(base_vertex + 3); + host_indices.push_back(i0); + host_indices.push_back(i1); + host_indices.push_back(i2); + host_indices.push_back(i0); + host_indices.push_back(i2); + host_indices.push_back(i3); + } + + ID3D12Resource* host_ib = resource_manager_.GetOrCreateBuffer( + stream.buffer ^ 0x80000000u ^ command.start, ib_size); + if (!host_ib) { + ++submission_debug_stats_.index_buffer_create_failure_count; + return false; + } + + resource_tracker_.TransitionBarrier(cmd, host_ib, D3D12_RESOURCE_STATE_COPY_DEST); + if (!resource_manager_.UploadData(cmd, host_ib, host_indices.data(), ib_size)) { + ++submission_debug_stats_.index_upload_failure_count; + return false; + } + resource_tracker_.TransitionBarrier(cmd, host_ib, D3D12_RESOURCE_STATE_INDEX_BUFFER); + + D3D12_INDEX_BUFFER_VIEW ibv = {}; + ibv.BufferLocation = host_ib->GetGPUVirtualAddress(); + ibv.SizeInBytes = ib_size; + ibv.Format = DXGI_FORMAT_R16_UINT; + cmd->IASetIndexBuffer(&ibv); + + out_resources.indexed = true; + out_resources.draw_count = expanded_index_count; + out_resources.draw_start = 0; + vertex_count = command.count; + vertex_first = command.start; + } + } else { + const uint64_t index_end_u64 = + uint64_t(command.start) + uint64_t(command.count); + if (index_end_u64 > UINT32_MAX) { + ++submission_debug_stats_.index_count_overflow_count; + return false; + } + const uint32_t upload_index_count = command.count; + const uint32_t ib_size = SafeIndexBufferSize(upload_index_count); + const auto* guest_indices = memory_->TranslateVirtual(ss.index_buffer); + if (!guest_indices || ib_size == 0) { + ++submission_debug_stats_.index_data_unavailable_count; + return false; + } + + std::vector host_indices(upload_index_count); + uint32_t min_index = UINT32_MAX; + uint32_t max_index = 0; + for (uint32_t i = 0; i < upload_index_count; ++i) { + const uint16_t value = ByteSwap16(guest_indices[command.start + i]); + host_indices[i] = value; + min_index = std::min(min_index, value); + max_index = std::max(max_index, value); + } + vertex_first = min_index == UINT32_MAX ? 0u : min_index; + vertex_count = max_index >= vertex_first ? (max_index - vertex_first + 1u) : 0u; + + if (IsQuadListPrimitive(command.primitive_type)) { + const uint32_t quad_count = command.count / 4; + if (quad_count == 0) { + ++submission_debug_stats_.invalid_index_buffer_count; + return false; + } + + std::vector quad_indices; + quad_indices.reserve(static_cast(quad_count) * 6); + for (uint32_t quad = 0; quad < quad_count; ++quad) { + const uint32_t base = command.start + quad * 4; + if (base + 3 >= host_indices.size()) { + break; + } + const uint16_t i0 = host_indices[base + 0]; + const uint16_t i1 = host_indices[base + 1]; + const uint16_t i2 = host_indices[base + 2]; + const uint16_t i3 = host_indices[base + 3]; + quad_indices.push_back(i0); + quad_indices.push_back(i1); + quad_indices.push_back(i2); + quad_indices.push_back(i0); + quad_indices.push_back(i2); + quad_indices.push_back(i3); + } + + if (quad_indices.empty()) { + ++submission_debug_stats_.invalid_index_buffer_count; + return false; + } + + host_indices = std::move(quad_indices); + out_resources.draw_count = static_cast(host_indices.size()); + out_resources.draw_start = 0; + + min_index = UINT32_MAX; + max_index = 0; + for (const uint16_t value : host_indices) { + min_index = std::min(min_index, value); + max_index = std::max(max_index, value); + } + vertex_first = min_index == UINT32_MAX ? 0u : min_index; + vertex_count = max_index >= vertex_first ? (max_index - vertex_first + 1u) : 0u; + } + + if (vertex_count == 0) { + ++submission_debug_stats_.zero_vertex_count; + return false; + } + + if (vertex_first != 0) { + for (uint16_t& value : host_indices) { + value = static_cast(value - vertex_first); + } + } + + const uint32_t upload_ib_size = + static_cast(host_indices.size() * sizeof(uint16_t)); + ID3D12Resource* host_ib = + resource_manager_.GetOrCreateBuffer(ss.index_buffer, upload_ib_size); + if (!host_ib) { + ++submission_debug_stats_.index_buffer_create_failure_count; + return false; + } + + resource_tracker_.TransitionBarrier(cmd, host_ib, D3D12_RESOURCE_STATE_COPY_DEST); + if (!resource_manager_.UploadData(cmd, host_ib, host_indices.data(), upload_ib_size)) { + ++submission_debug_stats_.index_upload_failure_count; + return false; + } + resource_tracker_.TransitionBarrier(cmd, host_ib, D3D12_RESOURCE_STATE_INDEX_BUFFER); + + D3D12_INDEX_BUFFER_VIEW ibv = {}; + ibv.BufferLocation = host_ib->GetGPUVirtualAddress(); + ibv.SizeInBytes = upload_ib_size; + ibv.Format = DXGI_FORMAT_R16_UINT; + cmd->IASetIndexBuffer(&ibv); + out_resources.indexed = true; + } + } else { + if (IsQuadListPrimitive(command.primitive_type)) { + const uint32_t quad_count = command.count / 4; + if (quad_count == 0) { + ++submission_debug_stats_.invalid_vertex_range_count; + return false; + } + const uint32_t expanded_index_count = quad_count * 6; + const uint32_t ib_size = SafeIndexBufferSize(expanded_index_count); + if (ib_size == 0) { + ++submission_debug_stats_.index_count_overflow_count; + return false; + } + + std::vector host_indices; + host_indices.reserve(expanded_index_count); + for (uint32_t quad = 0; quad < quad_count; ++quad) { + const uint32_t base_vertex = command.start + quad * 4; + if (base_vertex + 3 > UINT16_MAX) { + ++submission_debug_stats_.index_count_overflow_count; + return false; + } + const uint16_t i0 = static_cast(base_vertex + 0); + const uint16_t i1 = static_cast(base_vertex + 1); + const uint16_t i2 = static_cast(base_vertex + 2); + const uint16_t i3 = static_cast(base_vertex + 3); + host_indices.push_back(i0); + host_indices.push_back(i1); + host_indices.push_back(i2); + host_indices.push_back(i0); + host_indices.push_back(i2); + host_indices.push_back(i3); + } + + ID3D12Resource* host_ib = resource_manager_.GetOrCreateBuffer( + stream.buffer ^ 0x80000000u ^ command.start, ib_size); + if (!host_ib) { + ++submission_debug_stats_.index_buffer_create_failure_count; + return false; + } + + resource_tracker_.TransitionBarrier(cmd, host_ib, D3D12_RESOURCE_STATE_COPY_DEST); + if (!resource_manager_.UploadData(cmd, host_ib, host_indices.data(), ib_size)) { + ++submission_debug_stats_.index_upload_failure_count; + return false; + } + resource_tracker_.TransitionBarrier(cmd, host_ib, D3D12_RESOURCE_STATE_INDEX_BUFFER); + + D3D12_INDEX_BUFFER_VIEW ibv = {}; + ibv.BufferLocation = host_ib->GetGPUVirtualAddress(); + ibv.SizeInBytes = ib_size; + ibv.Format = DXGI_FORMAT_R16_UINT; + cmd->IASetIndexBuffer(&ibv); + out_resources.indexed = true; + out_resources.draw_count = expanded_index_count; + out_resources.draw_start = 0; + } + + const uint64_t vertex_count_u64 = uint64_t(command.count); + if (vertex_count_u64 > UINT32_MAX) { + return false; + } + vertex_count = static_cast(vertex_count_u64); + vertex_first = command.start; + } + + // --- Vertex streams --- + // We only bind stream 0 (the primary stream) for now — a complete implementation + // would iterate all kMaxStreams, but that is expensive and risks upload overflow. + if (vertex_count == 0) { + ++submission_debug_stats_.zero_vertex_count; + return false; + } + + const uint64_t guest_start_u64 = + uint64_t(stream.buffer) + uint64_t(stream.offset) + + uint64_t(vertex_first) * uint64_t(stream.stride); + if (guest_start_u64 > UINT32_MAX) { + ++submission_debug_stats_.invalid_vertex_range_count; + return false; + } + const uint32_t guest_start = static_cast(guest_start_u64); + if (!ValidGuestAddress(guest_start)) { + ++submission_debug_stats_.invalid_vertex_range_count; + return false; + } + + const uint32_t vb_size = SafeVertexBufferSize(vertex_count, stream.stride); + if (vb_size == 0) { + ++submission_debug_stats_.vertex_buffer_size_invalid_count; + return false; + } + + ID3D12Resource* host_vb = resource_manager_.GetOrCreateBuffer(guest_start, vb_size); + if (!host_vb) { + ++submission_debug_stats_.vertex_buffer_create_failure_count; + return false; + } + + const void* guest_data = memory_->TranslateVirtual(guest_start); + if (!guest_data) { + ++submission_debug_stats_.vertex_data_unavailable_count; + return false; + } + + resource_tracker_.TransitionBarrier(cmd, host_vb, D3D12_RESOURCE_STATE_COPY_DEST); + if (!resource_manager_.UploadData(cmd, host_vb, guest_data, vb_size)) { + ++submission_debug_stats_.vertex_upload_failure_count; + return false; + } + resource_tracker_.TransitionBarrier(cmd, host_vb, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + + const auto vb_srv = resource_manager_.AllocateSRV(); + CreateRawBufferSRV(device_.Get(), host_vb, vb_size, vb_srv.cpu_handle); + + out_resources.valid = true; + out_resources.vertex_stride = stream.stride; + out_resources.vertex_buffer_size = vb_size; + out_resources.color_offset = GuessColorOffset(stream.stride); + out_resources.vertex_buffer_gpu = vb_srv.gpu_handle; + out_resources.topology = PrimitiveTypeToTopology(command.primitive_type); + out_resources.topology_type = PrimitiveTypeToTopologyType(command.primitive_type); + return true; +} + +// --------------------------------------------------------------------------- +// PSO hash +// --------------------------------------------------------------------------- +uint64_t D3D12Backend::MakePSOHash(DXGI_FORMAT rt_fmt, DXGI_FORMAT ds_fmt, + D3D12_PRIMITIVE_TOPOLOGY_TYPE topo, + bool soft_particle) const { + struct Key { uint32_t rt, ds, topo; uint8_t sp; }; + Key k = { (uint32_t)rt_fmt, (uint32_t)ds_fmt, (uint32_t)topo, static_cast(soft_particle ? 1u : 0u) }; + return FnvHash64(&k, sizeof(k)); +} + +#endif // _WIN32 + +// --------------------------------------------------------------------------- +// Shutdown +// --------------------------------------------------------------------------- +void D3D12Backend::Shutdown() { + if (!initialized_) return; #if defined(_WIN32) WaitForGpu(); + resource_tracker_.Reset(); + resource_manager_.Shutdown(); + shader_manager_.Shutdown(); + + output_texture_.Reset(); + output_rtv_heap_.Reset(); + if (fence_event_) { CloseHandle((HANDLE)fence_event_); fence_event_ = nullptr; } - - command_list_.Reset(); + for (auto& c : frame_contexts_) { + c.command_list.Reset(); + c.command_allocator.Reset(); + } frame_contexts_.clear(); graphics_queue_.Reset(); fence_.Reset(); + root_signature_.Reset(); device_.Reset(); dxgi_factory_.Reset(); #endif - executor_status_ = {}; initialized_ = false; + executor_status_ = {}; } +// --------------------------------------------------------------------------- +// Device / command object creation +// --------------------------------------------------------------------------- #if defined(_WIN32) bool D3D12Backend::CreateDevice() { - UINT dxgiFactoryFlags = 0; + if (device_) return true; + UINT flags = 0; #if defined(_DEBUG) - // Enable the D3D12 debug layer. - Microsoft::WRL::ComPtr debugController; - if (SUCCEEDED(D3D12GetDebugInterface(IID_PPV_ARGS(&debugController)))) { - debugController->EnableDebugLayer(); - dxgiFactoryFlags |= DXGI_CREATE_FACTORY_DEBUG; + Microsoft::WRL::ComPtr dbg; + if (SUCCEEDED(D3D12GetDebugInterface(IID_PPV_ARGS(&dbg)))) { + dbg->EnableDebugLayer(); + flags |= DXGI_CREATE_FACTORY_DEBUG; } #endif - - HRESULT hr = CreateDXGIFactory2(dxgiFactoryFlags, IID_PPV_ARGS(&dxgi_factory_)); + HRESULT hr = CreateDXGIFactory2(flags, IID_PPV_ARGS(&dxgi_factory_)); if (FAILED(hr)) return false; - // Try to create the device hr = D3D12CreateDevice(nullptr, D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&device_)); if (FAILED(hr)) { - // Try WARP - Microsoft::WRL::ComPtr warpAdapter; - hr = dxgi_factory_->EnumWarpAdapter(IID_PPV_ARGS(&warpAdapter)); + Microsoft::WRL::ComPtr warp; + hr = dxgi_factory_->EnumWarpAdapter(IID_PPV_ARGS(&warp)); if (FAILED(hr)) return false; - - hr = D3D12CreateDevice(warpAdapter.Get(), D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&device_)); + hr = D3D12CreateDevice(warp.Get(), D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&device_)); if (FAILED(hr)) return false; } - + REXLOG_INFO("D3D12Backend: device created 0x{:016X}", (uint64_t)device_.Get()); return true; } bool D3D12Backend::CreateCommandObjects(uint32_t num_frames) { - D3D12_COMMAND_QUEUE_DESC queueDesc = {}; - queueDesc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE; - queueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT; - - HRESULT hr = device_->CreateCommandQueue(&queueDesc, IID_PPV_ARGS(&graphics_queue_)); - if (FAILED(hr)) return false; - - frame_contexts_.resize(num_frames); - for (uint32_t i = 0; i < num_frames; ++i) { - hr = device_->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&frame_contexts_[i].command_allocator)); + HRESULT hr; + if (!graphics_queue_) { + D3D12_COMMAND_QUEUE_DESC qd = {}; + qd.Type = D3D12_COMMAND_LIST_TYPE_DIRECT; + qd.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE; + hr = device_->CreateCommandQueue(&qd, IID_PPV_ARGS(&graphics_queue_)); if (FAILED(hr)) return false; } - hr = device_->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, frame_contexts_[0].command_allocator.Get(), nullptr, IID_PPV_ARGS(&command_list_)); - if (FAILED(hr)) return false; - - // Close initially, since it will be reset on first submit - command_list_->Close(); + frame_contexts_.resize(num_frames); + for (uint32_t i = 0; i < num_frames; ++i) { + hr = device_->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, + IID_PPV_ARGS(&frame_contexts_[i].command_allocator)); + if (FAILED(hr)) return false; + hr = device_->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, + frame_contexts_[i].command_allocator.Get(), nullptr, + IID_PPV_ARGS(&frame_contexts_[i].command_list)); + if (FAILED(hr)) return false; + frame_contexts_[i].command_list->Close(); + } hr = device_->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&fence_)); if (FAILED(hr)) return false; current_fence_value_ = 0; fence_event_ = CreateEventA(nullptr, FALSE, FALSE, nullptr); - if (fence_event_ == nullptr) { + return fence_event_ != nullptr; +} + +bool D3D12Backend::CreateRootSignature() { + // Slot 0: 16 root constants (b0) + // Slot 1: SRV descriptor table (t0..t15) — visible to PS + // Slot 2: Sampler descriptor table (s0..s15) — visible to PS + D3D12_ROOT_PARAMETER params[2] = {}; + + params[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; + params[0].Constants.ShaderRegister = 0; + params[0].Constants.RegisterSpace = 0; + params[0].Constants.Num32BitValues = sizeof(DrawRootConstants) / sizeof(uint32_t); + params[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX; + + D3D12_DESCRIPTOR_RANGE srv_range = {}; + srv_range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + srv_range.NumDescriptors = 1; + srv_range.BaseShaderRegister = 0; + srv_range.RegisterSpace = 0; + srv_range.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; + + params[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + params[1].DescriptorTable.NumDescriptorRanges = 1; + params[1].DescriptorTable.pDescriptorRanges = &srv_range; + params[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX; + + D3D12_ROOT_SIGNATURE_DESC desc = {}; + desc.NumParameters = 2; + desc.pParameters = params; + desc.NumStaticSamplers = 0; + desc.pStaticSamplers = nullptr; + desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT; + + Microsoft::WRL::ComPtr blob, err; + if (FAILED(D3D12SerializeRootSignature(&desc, D3D_ROOT_SIGNATURE_VERSION_1, &blob, &err))) return false; - } - - // Create an RTV descriptor heap for the dummy output resource - D3D12_DESCRIPTOR_HEAP_DESC rtvHeapDesc = {}; - rtvHeapDesc.NumDescriptors = 1; - rtvHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV; - rtvHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; - hr = device_->CreateDescriptorHeap(&rtvHeapDesc, IID_PPV_ARGS(&rtv_heap_)); - if (FAILED(hr)) return false; - - rtv_descriptor_size_ = device_->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_RTV); - - // Create a dummy output texture (1280x720, RGBA8) - D3D12_HEAP_PROPERTIES heapProps = {}; - heapProps.Type = D3D12_HEAP_TYPE_DEFAULT; - heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; - heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; - heapProps.CreationNodeMask = 1; - heapProps.VisibleNodeMask = 1; - - D3D12_RESOURCE_DESC resourceDesc = {}; - resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; - resourceDesc.Alignment = 0; - resourceDesc.Width = 1280; - resourceDesc.Height = 720; - resourceDesc.DepthOrArraySize = 1; - resourceDesc.MipLevels = 1; - resourceDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - resourceDesc.SampleDesc.Count = 1; - resourceDesc.SampleDesc.Quality = 0; - resourceDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; - resourceDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; - - hr = device_->CreateCommittedResource( - &heapProps, - D3D12_HEAP_FLAG_NONE, - &resourceDesc, - D3D12_RESOURCE_STATE_RENDER_TARGET, - nullptr, - IID_PPV_ARGS(&dummy_output_resource_)); - if (FAILED(hr)) return false; - - // Create RTV - D3D12_RENDER_TARGET_VIEW_DESC rtvDesc = {}; - rtvDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; - rtvDesc.Texture2D.MipSlice = 0; - rtvDesc.Texture2D.PlaneSlice = 0; - device_->CreateRenderTargetView(dummy_output_resource_.Get(), &rtvDesc, rtv_heap_->GetCPUDescriptorHandleForHeapStart()); - - return true; + return SUCCEEDED(device_->CreateRootSignature( + 0, blob->GetBufferPointer(), blob->GetBufferSize(), + IID_PPV_ARGS(&root_signature_))); } void D3D12Backend::WaitForGpu() { - if (graphics_queue_ && fence_ && fence_event_) { - current_fence_value_++; - HRESULT hr = graphics_queue_->Signal(fence_.Get(), current_fence_value_); - if (SUCCEEDED(hr)) { - if (fence_->GetCompletedValue() < current_fence_value_) { - fence_->SetEventOnCompletion(current_fence_value_, (HANDLE)fence_event_); - WaitForSingleObject((HANDLE)fence_event_, INFINITE); - } + if (!graphics_queue_ || !fence_ || !fence_event_) return; + ++current_fence_value_; + if (SUCCEEDED(graphics_queue_->Signal(fence_.Get(), current_fence_value_))) { + if (fence_->GetCompletedValue() < current_fence_value_) { + fence_->SetEventOnCompletion(current_fence_value_, (HANDLE)fence_event_); + WaitForSingleObject((HANDLE)fence_event_, INFINITE); } } } -#endif +#endif // _WIN32 -} // namespace ac6::renderer \ No newline at end of file +} // namespace ac6::renderer diff --git a/src/ac6_native_renderer/backends/d3d12_backend.h b/src/ac6_native_renderer/backends/d3d12_backend.h index 7ab1eab7..09670526 100644 --- a/src/ac6_native_renderer/backends/d3d12_backend.h +++ b/src/ac6_native_renderer/backends/d3d12_backend.h @@ -3,6 +3,10 @@ #include "../render_device.h" #include "../frame_scheduler.h" +#include "d3d12_resource_manager.h" +#include "d3d12_resource_tracker.h" +#include "d3d12_shader_manager.h" + #include #include @@ -14,49 +18,120 @@ namespace ac6::renderer { +// Experimental replay backend retained for research and targeted override work. +// The authoritative default presentation path remains the RexGlue backend. class D3D12Backend final : public RenderDeviceBackend { public: BackendType GetType() const override { return BackendType::kD3D12; } std::string_view GetName() const override { return "d3d12"; } bool IsSupported() const override; - bool Initialize(const NativeRendererConfig& config) override; + bool Initialize(const NativeRendererConfig& config, rex::memory::Memory* memory) override; + bool InitializeShared(const NativeRendererConfig& config, rex::memory::Memory* memory, + ID3D12Device* device, ID3D12CommandQueue* queue); bool SubmitExecutorFrame(const ReplayExecutorFrame& frame) override; BackendExecutorStatus GetExecutorStatus() const override { return executor_status_; } void Shutdown() override; + // Phase 4: Returns the native output texture for swapchain blit. + // nullptr until a frame has been rendered. + ID3D12Resource* GetOutputTexture() const { return output_texture_.Get(); } + private: BackendExecutorStatus executor_status_{}; bool initialized_ = false; + rex::memory::Memory* memory_ = nullptr; #if defined(_WIN32) struct FrameContext { - Microsoft::WRL::ComPtr command_allocator; + Microsoft::WRL::ComPtr command_allocator; + Microsoft::WRL::ComPtr command_list; uint64_t fence_value = 0; }; - Microsoft::WRL::ComPtr dxgi_factory_; - Microsoft::WRL::ComPtr device_; - Microsoft::WRL::ComPtr graphics_queue_; - Microsoft::WRL::ComPtr command_list_; + struct DrawResources { + bool valid = false; + bool indexed = false; + uint32_t draw_count = 0; + uint32_t draw_start = 0; + uint32_t vertex_base_offset = 0; + uint32_t vertex_stride = 0; + uint32_t vertex_buffer_size = 0; + uint32_t color_offset = 0xFFFFFFFFu; + D3D12_GPU_DESCRIPTOR_HANDLE vertex_buffer_gpu{}; + D3D12_PRIMITIVE_TOPOLOGY topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; + D3D12_PRIMITIVE_TOPOLOGY_TYPE topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + }; - Microsoft::WRL::ComPtr fence_; - void* fence_event_ = nullptr; // HANDLE + struct SubmissionDebugStats { + uint32_t draw_attempt_count = 0; + uint32_t draw_success_count = 0; + uint32_t draw_prepare_failure_count = 0; + uint32_t draw_pso_failure_count = 0; + uint32_t indexed_draw_count = 0; + uint32_t non_indexed_draw_count = 0; + uint32_t clear_command_count = 0; + uint32_t resolve_command_count = 0; + uint32_t invalid_stream_binding_count = 0; + uint32_t invalid_index_buffer_count = 0; + uint32_t index_count_overflow_count = 0; + uint32_t index_data_unavailable_count = 0; + uint32_t index_buffer_create_failure_count = 0; + uint32_t index_upload_failure_count = 0; + uint32_t zero_vertex_count = 0; + uint32_t invalid_vertex_range_count = 0; + uint32_t vertex_buffer_size_invalid_count = 0; + uint32_t vertex_buffer_create_failure_count = 0; + uint32_t vertex_data_unavailable_count = 0; + uint32_t vertex_upload_failure_count = 0; + }; + + Microsoft::WRL::ComPtr dxgi_factory_; + Microsoft::WRL::ComPtr device_; + Microsoft::WRL::ComPtr graphics_queue_; + + Microsoft::WRL::ComPtr fence_; + void* fence_event_ = nullptr; uint64_t current_fence_value_ = 0; - Microsoft::WRL::ComPtr rtv_heap_; - Microsoft::WRL::ComPtr dummy_output_resource_; - uint32_t rtv_descriptor_size_ = 0; + Microsoft::WRL::ComPtr root_signature_; - FrameScheduler frame_scheduler_; + // Phase 4: output render target (native renderer draws into this) + Microsoft::WRL::ComPtr output_texture_; + Microsoft::WRL::ComPtr output_rtv_heap_; + D3D12_CPU_DESCRIPTOR_HANDLE output_rtv_{}; + uint32_t output_width_ = 0; + uint32_t output_height_ = 0; + static constexpr DXGI_FORMAT kOutputFormat = DXGI_FORMAT_R8G8B8A8_UNORM; + + FrameScheduler frame_scheduler_; std::vector frame_contexts_; + SubmissionDebugStats submission_debug_stats_{}; - std::unordered_map> resource_cache_; - std::unordered_map> pso_cache_; + D3D12ResourceManager resource_manager_; + D3D12ResourceTracker resource_tracker_; + D3D12ShaderManager shader_manager_; + + // PSO state hash helper + uint64_t MakePSOHash(DXGI_FORMAT rt_fmt, DXGI_FORMAT ds_fmt, + D3D12_PRIMITIVE_TOPOLOGY_TYPE topo, bool soft_particle) const; + + // Ensure output texture is created at the right size + bool EnsureOutputTexture(uint32_t width, uint32_t height); bool CreateDevice(); bool CreateCommandObjects(uint32_t num_frames); + bool CreateRootSignature(); void WaitForGpu(); + + // Phase 3 helpers + void DispatchPassCommands(ID3D12GraphicsCommandList* cmd, + const ReplayExecutorPassPacket& pass, + uint32_t slot); + bool PrepareDrawResources(ID3D12GraphicsCommandList* cmd, + const ReplayExecutorCommandPacket& command, + uint32_t slot, + DrawResources& out_resources); #endif }; -} // namespace ac6::renderer \ No newline at end of file +} // namespace ac6::renderer diff --git a/src/ac6_native_renderer/backends/d3d12_resource_manager.cpp b/src/ac6_native_renderer/backends/d3d12_resource_manager.cpp new file mode 100644 index 00000000..5aed2a31 --- /dev/null +++ b/src/ac6_native_renderer/backends/d3d12_resource_manager.cpp @@ -0,0 +1,258 @@ +#ifndef NOMINMAX +#define NOMINMAX +#endif +#include "d3d12_resource_manager.h" + +#include +#include + +namespace ac6::renderer { + +bool D3D12ResourceManager::Initialize(ID3D12Device* device, uint32_t max_frames) { + device_ = device; + max_frames_ = max_frames; + + D3D12_DESCRIPTOR_HEAP_DESC rtv_desc = {}; + rtv_desc.NumDescriptors = kMaxRtvDescriptors; + rtv_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV; + if (FAILED(device_->CreateDescriptorHeap(&rtv_desc, IID_PPV_ARGS(&rtv_heap_)))) return false; + rtv_size_ = device_->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_RTV); + + D3D12_DESCRIPTOR_HEAP_DESC dsv_desc = {}; + dsv_desc.NumDescriptors = kMaxDsvDescriptors; + dsv_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_DSV; + if (FAILED(device_->CreateDescriptorHeap(&dsv_desc, IID_PPV_ARGS(&dsv_heap_)))) return false; + dsv_size_ = device_->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_DSV); + + D3D12_DESCRIPTOR_HEAP_DESC srv_desc = {}; + srv_desc.NumDescriptors = kMaxSrvDescriptors; + srv_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + srv_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + if (FAILED(device_->CreateDescriptorHeap(&srv_desc, IID_PPV_ARGS(&srv_heap_)))) return false; + srv_size_ = device_->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + + frame_contexts_.resize(max_frames); + for (uint32_t i = 0; i < max_frames; ++i) { + D3D12_HEAP_PROPERTIES upload_props = {}; + upload_props.Type = D3D12_HEAP_TYPE_UPLOAD; + + D3D12_RESOURCE_DESC buffer_desc = {}; + buffer_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + buffer_desc.Width = kUploadBufferSize; + buffer_desc.Height = 1; + buffer_desc.DepthOrArraySize = 1; + buffer_desc.MipLevels = 1; + buffer_desc.Format = DXGI_FORMAT_UNKNOWN; + buffer_desc.SampleDesc.Count = 1; + buffer_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + + if (FAILED(device_->CreateCommittedResource(&upload_props, D3D12_HEAP_FLAG_NONE, &buffer_desc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, IID_PPV_ARGS(&frame_contexts_[i].upload_buffer)))) { + return false; + } + + D3D12_RANGE read_range = {0, 0}; + if (FAILED(frame_contexts_[i].upload_buffer->Map(0, &read_range, reinterpret_cast(&frame_contexts_[i].upload_ptr)))) { + return false; + } + } + + return true; +} + +void D3D12ResourceManager::Shutdown() { + for (auto& ctx : frame_contexts_) { + if (ctx.upload_buffer) { + ctx.upload_buffer->Unmap(0, nullptr); + } + } + frame_contexts_.clear(); + resource_cache_.clear(); + rtv_heap_.Reset(); + dsv_heap_.Reset(); + srv_heap_.Reset(); +} + +void D3D12ResourceManager::BeginFrame(uint32_t frame_index) { + current_frame_index_ = frame_index; + // Reset transient descriptors + rtv_ptr_ = 0; + dsv_ptr_ = 0; + srv_ptr_ = 0; + + FrameContext& ctx = frame_contexts_[current_frame_index_ % max_frames_]; + ctx.upload_offset = 0; + + // Simple LRU cleanup could go here +} + +ID3D12Resource* D3D12ResourceManager::GetOrCreateBuffer(uint32_t guest_address, uint32_t size, D3D12_RESOURCE_FLAGS flags) { + if (guest_address == 0) return nullptr; + + auto it = resource_cache_.find(guest_address); + if (it != resource_cache_.end()) { + if (it->second.size_bytes >= size) { + it->second.last_used_frame = current_frame_index_; + return it->second.resource.Get(); + } + REXLOG_INFO("Growing cached D3D12 buffer for guest address 0x{:08X} from {} to {} bytes", + guest_address, it->second.size_bytes, size); + resource_cache_.erase(it); + } + + if (size == 0) { + return nullptr; + } + + D3D12_HEAP_PROPERTIES heap_props = {}; + heap_props.Type = D3D12_HEAP_TYPE_DEFAULT; + + D3D12_RESOURCE_DESC desc = {}; + desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + desc.Width = size; + desc.Height = 1; + desc.DepthOrArraySize = 1; + desc.MipLevels = 1; + desc.Format = DXGI_FORMAT_UNKNOWN; + desc.SampleDesc.Count = 1; + desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + desc.Flags = flags; + + Microsoft::WRL::ComPtr resource; + if (FAILED(device_->CreateCommittedResource(&heap_props, D3D12_HEAP_FLAG_NONE, &desc, D3D12_RESOURCE_STATE_COMMON, nullptr, IID_PPV_ARGS(&resource)))) { + REXLOG_ERROR("Failed to create D3D12 buffer for guest address 0x{:08X}", guest_address); + return nullptr; + } + + resource_cache_[guest_address] = {resource, size, current_frame_index_}; + return resource.Get(); +} + +ID3D12Resource* D3D12ResourceManager::GetOrCreateTexture(uint32_t guest_address, const d3d::ShadowState& state) { + // In a real implementation, we would extract width, height, format from the fetch constant at guest_address + // For this scaffold realization, we use guest_address as the key. + (void)state; + + if (guest_address == 0) return nullptr; + + auto it = resource_cache_.find(guest_address); + if (it != resource_cache_.end()) { + it->second.last_used_frame = current_frame_index_; + return it->second.resource.Get(); + } + + // Placeholder texture creation + D3D12_RESOURCE_DESC desc = {}; + desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; + desc.Width = 1024; // Mock + desc.Height = 1024; // Mock + desc.DepthOrArraySize = 1; + desc.MipLevels = 1; + desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + desc.SampleDesc.Count = 1; + desc.Flags = D3D12_RESOURCE_FLAG_NONE; + + D3D12_HEAP_PROPERTIES heap_props = {}; + heap_props.Type = D3D12_HEAP_TYPE_DEFAULT; + + Microsoft::WRL::ComPtr resource; + if (FAILED(device_->CreateCommittedResource(&heap_props, D3D12_HEAP_FLAG_NONE, &desc, D3D12_RESOURCE_STATE_COMMON, nullptr, IID_PPV_ARGS(&resource)))) { + return nullptr; + } + + resource_cache_[guest_address] = {resource, 0, current_frame_index_}; + return resource.Get(); +} + +D3D12ResourceManager::ResourceView D3D12ResourceManager::AllocateRTV() { + uint32_t index = rtv_ptr_++; + D3D12_CPU_DESCRIPTOR_HANDLE cpu = rtv_heap_->GetCPUDescriptorHandleForHeapStart(); + cpu.ptr += index * rtv_size_; + return {cpu, {0}, index}; +} + +D3D12ResourceManager::ResourceView D3D12ResourceManager::AllocateDSV() { + uint32_t index = dsv_ptr_++; + D3D12_CPU_DESCRIPTOR_HANDLE cpu = dsv_heap_->GetCPUDescriptorHandleForHeapStart(); + cpu.ptr += index * dsv_size_; + return {cpu, {0}, index}; +} + +D3D12ResourceManager::ResourceView D3D12ResourceManager::AllocateSRV() { + uint32_t index = srv_ptr_++; + D3D12_CPU_DESCRIPTOR_HANDLE cpu = srv_heap_->GetCPUDescriptorHandleForHeapStart(); + D3D12_GPU_DESCRIPTOR_HANDLE gpu = srv_heap_->GetGPUDescriptorHandleForHeapStart(); + cpu.ptr += index * srv_size_; + gpu.ptr += index * srv_size_; + return {cpu, gpu, index}; +} + +bool D3D12ResourceManager::UploadData(ID3D12GraphicsCommandList* command_list, ID3D12Resource* destination, const void* data, uint64_t size, uint64_t destination_offset) { + if (!destination || !data || size == 0) return false; + + FrameContext& ctx = frame_contexts_[current_frame_index_ % max_frames_]; + + // Align offset to 256 bytes for good practice, though not strictly required for all buffer copies + uint32_t aligned_offset = (ctx.upload_offset + 255) & ~255; + if (aligned_offset + size > kUploadBufferSize) { + REXLOG_ERROR("Upload buffer overflow in frame {}", current_frame_index_); + return false; + } + + memcpy(ctx.upload_ptr + aligned_offset, data, size); + ctx.upload_offset = aligned_offset + static_cast(size); + + command_list->CopyBufferRegion(destination, destination_offset, ctx.upload_buffer.Get(), aligned_offset, size); + return true; +} + +DXGI_FORMAT D3D12ResourceManager::TranslateColorFormat(uint32_t guest_format) { + using namespace rex::graphics::xenos; + switch (static_cast(guest_format)) { + case ColorRenderTargetFormat::k_8_8_8_8: return DXGI_FORMAT_R8G8B8A8_UNORM; + case ColorRenderTargetFormat::k_8_8_8_8_GAMMA: return DXGI_FORMAT_R8G8B8A8_UNORM_SRGB; + case ColorRenderTargetFormat::k_2_10_10_10: return DXGI_FORMAT_R10G10B10A2_UNORM; + case ColorRenderTargetFormat::k_16_16_FLOAT: return DXGI_FORMAT_R16G16_FLOAT; + case ColorRenderTargetFormat::k_16_16_16_16_FLOAT: return DXGI_FORMAT_R16G16B16A16_FLOAT; + case ColorRenderTargetFormat::k_32_FLOAT: return DXGI_FORMAT_R32_FLOAT; + case ColorRenderTargetFormat::k_32_32_FLOAT: return DXGI_FORMAT_R32G32_FLOAT; + default: return DXGI_FORMAT_R8G8B8A8_UNORM; + } +} + +DXGI_FORMAT D3D12ResourceManager::TranslateDepthFormat(uint32_t guest_format) { + using namespace rex::graphics::xenos; + switch (static_cast(guest_format)) { + case DepthRenderTargetFormat::kD24S8: return DXGI_FORMAT_D24_UNORM_S8_UINT; + case DepthRenderTargetFormat::kD24FS8: return DXGI_FORMAT_D32_FLOAT_S8X24_UINT; // Nearest + default: return DXGI_FORMAT_D24_UNORM_S8_UINT; + } +} + +DXGI_FORMAT D3D12ResourceManager::TranslateTextureFormat(uint32_t guest_format) { + using namespace rex::graphics::xenos; + switch (static_cast(guest_format)) { + case TextureFormat::k_8_8_8_8: return DXGI_FORMAT_R8G8B8A8_UNORM; + case TextureFormat::k_DXT1: return DXGI_FORMAT_BC1_UNORM; + case TextureFormat::k_DXT2_3: return DXGI_FORMAT_BC2_UNORM; + case TextureFormat::k_DXT4_5: return DXGI_FORMAT_BC3_UNORM; + case TextureFormat::k_16_16_16_16_FLOAT: return DXGI_FORMAT_R16G16B16A16_FLOAT; + case TextureFormat::k_32_FLOAT: return DXGI_FORMAT_R32_FLOAT; + default: return DXGI_FORMAT_R8G8B8A8_UNORM; + } +} + +DXGI_FORMAT D3D12ResourceManager::TranslateVertexFormat(uint32_t guest_format) { + using namespace rex::graphics::xenos; + switch (static_cast(guest_format)) { + case VertexFormat::k_32_32_32_32_FLOAT: return DXGI_FORMAT_R32G32B32A32_FLOAT; + case VertexFormat::k_32_32_32_FLOAT: return DXGI_FORMAT_R32G32B32_FLOAT; + case VertexFormat::k_32_32_FLOAT: return DXGI_FORMAT_R32G32_FLOAT; + case VertexFormat::k_32_FLOAT: return DXGI_FORMAT_R32_FLOAT; + case VertexFormat::k_16_16_16_16_FLOAT: return DXGI_FORMAT_R16G16B16A16_FLOAT; + case VertexFormat::k_16_16_FLOAT: return DXGI_FORMAT_R16G16_FLOAT; + case VertexFormat::k_8_8_8_8: return DXGI_FORMAT_R8G8B8A8_UNORM; + default: return DXGI_FORMAT_R32G32B32A32_FLOAT; + } +} + +} // namespace ac6::renderer diff --git a/src/ac6_native_renderer/backends/d3d12_resource_manager.h b/src/ac6_native_renderer/backends/d3d12_resource_manager.h new file mode 100644 index 00000000..96915dfc --- /dev/null +++ b/src/ac6_native_renderer/backends/d3d12_resource_manager.h @@ -0,0 +1,84 @@ +#pragma once + +#include +#include + +#include +#include + +#include "../types.h" +#include "../../d3d_state.h" + +namespace ac6::renderer { + +class D3D12ResourceManager { + public: + struct ResourceView { + D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle; + D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle; + uint32_t heap_index; + }; + + bool Initialize(ID3D12Device* device, uint32_t max_frames); + void Shutdown(); + + void BeginFrame(uint32_t frame_index); + + // Translation + ID3D12Resource* GetOrCreateBuffer(uint32_t guest_address, uint32_t size, D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE); + ID3D12Resource* GetOrCreateTexture(uint32_t guest_address, const d3d::ShadowState& state); + + // Descriptor management + ResourceView AllocateRTV(); + ResourceView AllocateDSV(); + ResourceView AllocateSRV(); + ID3D12DescriptorHeap* GetSrvHeap() const { return srv_heap_.Get(); } + + // Data sync + bool UploadData(ID3D12GraphicsCommandList* command_list, ID3D12Resource* destination, const void* data, uint64_t size, uint64_t destination_offset = 0); + + // Format translation + DXGI_FORMAT TranslateColorFormat(uint32_t guest_format); + DXGI_FORMAT TranslateDepthFormat(uint32_t guest_format); + DXGI_FORMAT TranslateTextureFormat(uint32_t guest_format); + DXGI_FORMAT TranslateVertexFormat(uint32_t guest_format); + + private: + ID3D12Device* device_ = nullptr; + uint32_t max_frames_ = 0; + uint32_t current_frame_index_ = 0; + + struct CachedResource { + Microsoft::WRL::ComPtr resource; + uint64_t size_bytes = 0; + uint64_t last_used_frame = 0; + }; + + std::unordered_map resource_cache_; + + Microsoft::WRL::ComPtr rtv_heap_; + Microsoft::WRL::ComPtr dsv_heap_; + Microsoft::WRL::ComPtr srv_heap_; + + uint32_t rtv_ptr_ = 0; + uint32_t dsv_ptr_ = 0; + uint32_t srv_ptr_ = 0; + + uint32_t rtv_size_ = 0; + uint32_t dsv_size_ = 0; + uint32_t srv_size_ = 0; + + struct FrameContext { + Microsoft::WRL::ComPtr upload_buffer; + uint8_t* upload_ptr = nullptr; + uint32_t upload_offset = 0; + }; + std::vector frame_contexts_; + + static constexpr uint32_t kMaxRtvDescriptors = 1024; + static constexpr uint32_t kMaxDsvDescriptors = 256; + static constexpr uint32_t kMaxSrvDescriptors = 4096; + static constexpr uint32_t kUploadBufferSize = 16 * 1024 * 1024; // 16 MB per frame +}; + +} // namespace ac6::renderer diff --git a/src/ac6_native_renderer/backends/d3d12_resource_tracker.cpp b/src/ac6_native_renderer/backends/d3d12_resource_tracker.cpp new file mode 100644 index 00000000..96946db2 --- /dev/null +++ b/src/ac6_native_renderer/backends/d3d12_resource_tracker.cpp @@ -0,0 +1,52 @@ +#include "d3d12_resource_tracker.h" + +namespace ac6::renderer { + +void D3D12ResourceTracker::TrackResource(ID3D12Resource* resource, + D3D12_RESOURCE_STATES initial_state) { + if (!resource) return; + tracked_[resource] = {initial_state}; +} + +bool D3D12ResourceTracker::TransitionBarrier(ID3D12GraphicsCommandList* cmd_list, + ID3D12Resource* resource, + D3D12_RESOURCE_STATES target_state) { + if (!resource || !cmd_list) return false; + + auto it = tracked_.find(resource); + if (it == tracked_.end()) { + // First time seeing this resource — assume COMMON + tracked_[resource] = {D3D12_RESOURCE_STATE_COMMON}; + it = tracked_.find(resource); + } + + if (it->second.current_state == target_state) { + return false; // No transition needed + } + + D3D12_RESOURCE_BARRIER barrier = {}; + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; + barrier.Transition.pResource = resource; + barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + barrier.Transition.StateBefore = it->second.current_state; + barrier.Transition.StateAfter = target_state; + + cmd_list->ResourceBarrier(1, &barrier); + it->second.current_state = target_state; + return true; +} + +void D3D12ResourceTracker::FlushBarriers(ID3D12GraphicsCommandList* cmd_list) { + if (pending_barriers_.empty() || !cmd_list) return; + cmd_list->ResourceBarrier(static_cast(pending_barriers_.size()), + pending_barriers_.data()); + pending_barriers_.clear(); +} + +void D3D12ResourceTracker::Reset() { + tracked_.clear(); + pending_barriers_.clear(); +} + +} // namespace ac6::renderer diff --git a/src/ac6_native_renderer/backends/d3d12_resource_tracker.h b/src/ac6_native_renderer/backends/d3d12_resource_tracker.h new file mode 100644 index 00000000..907e4118 --- /dev/null +++ b/src/ac6_native_renderer/backends/d3d12_resource_tracker.h @@ -0,0 +1,33 @@ +#pragma once + +#include +#include +#include + +namespace ac6::renderer { + +// Tracks D3D12 resource states for automatic barrier generation. +class D3D12ResourceTracker { + public: + void TrackResource(ID3D12Resource* resource, D3D12_RESOURCE_STATES initial_state); + + // Returns true if a barrier was needed and appended. + bool TransitionBarrier(ID3D12GraphicsCommandList* cmd_list, + ID3D12Resource* resource, + D3D12_RESOURCE_STATES target_state); + + // Flush all pending barriers at once (batch mode). + void FlushBarriers(ID3D12GraphicsCommandList* cmd_list); + + void Reset(); + + private: + struct TrackedResource { + D3D12_RESOURCE_STATES current_state = D3D12_RESOURCE_STATE_COMMON; + }; + + std::unordered_map tracked_; + std::vector pending_barriers_; +}; + +} // namespace ac6::renderer diff --git a/src/ac6_native_renderer/backends/d3d12_shader_manager.cpp b/src/ac6_native_renderer/backends/d3d12_shader_manager.cpp new file mode 100644 index 00000000..c2bdd26d --- /dev/null +++ b/src/ac6_native_renderer/backends/d3d12_shader_manager.cpp @@ -0,0 +1,283 @@ +#ifndef NOMINMAX +#define NOMINMAX +#endif +#include "d3d12_shader_manager.h" + +#include + +#include + +#pragma comment(lib, "d3dcompiler.lib") + +namespace ac6::renderer { + +// --------------------------------------------------------------------------- +// Embedded HLSL shaders (compiled inline at runtime, no external files) +// --------------------------------------------------------------------------- + +// Generic vertex-pulling shader. It reads stream 0 directly as a raw byte +// buffer, byte-swaps guest big-endian dwords, and interprets the first float3 +// plus an optional fourth component as position data. Pre-transformed menu / UI vertices are projected from the +// captured viewport into clip space; already-transformed clip-space vertices +// are passed through. +static constexpr const char kPassthroughVS[] = R"HLSL( +cbuffer DrawConstants : register(b0) { + uint vertex_base_offset; + uint vertex_stride; + uint vertex_buffer_size; + uint viewport_x; + uint viewport_y; + uint viewport_width; + uint viewport_height; + uint color_offset; + uint flags; +}; + +ByteAddressBuffer g_vertex_buffer : register(t0); + +struct VSOut { + float4 pos : SV_Position; + float4 color : COLOR0; + float2 uv : TEXCOORD0; +}; + +uint ByteSwap32(uint v) { + return (v << 24) | ((v << 8) & 0x00FF0000u) | ((v >> 8) & 0x0000FF00u) | (v >> 24); +} + +bool CanLoadDword(uint byte_offset) { + return (byte_offset & 3u) == 0u && (byte_offset + 4u) <= vertex_buffer_size; +} + +float LoadGuestFloat(uint byte_offset, float fallback_value) { + if (!CanLoadDword(byte_offset)) { + return fallback_value; + } + return asfloat(ByteSwap32(g_vertex_buffer.Load(byte_offset))); +} + +float4 LoadGuestColor(uint byte_offset, float4 fallback_value) { + if (!CanLoadDword(byte_offset)) { + return fallback_value; + } + uint packed = ByteSwap32(g_vertex_buffer.Load(byte_offset)); + return float4( + float((packed >> 16) & 0xFFu) / 255.0f, + float((packed >> 8) & 0xFFu) / 255.0f, + float((packed >> 0) & 0xFFu) / 255.0f, + max(float((packed >> 24) & 0xFFu) / 255.0f, 1.0f / 255.0f)); +} + +VSOut main(uint vid : SV_VertexID) { + uint stride = vertex_stride; + uint byte_offset = vertex_base_offset + vid * stride; + float4 default_color = float4(1.0f, 1.0f, 1.0f, 1.0f); + + // Unsupported stream layouts should draw nothing rather than issue invalid + // raw-buffer reads that can remove the device. + if (stride == 0u || byte_offset >= vertex_buffer_size) { + VSOut empty; + empty.pos = float4(0.0f, 0.0f, 0.0f, 0.0f); + empty.color = default_color; + empty.uv = float2(0.0f, 0.0f); + return empty; + } + + float4 raw_pos = float4( + LoadGuestFloat(byte_offset + 0, 0.0f), + LoadGuestFloat(byte_offset + 4, 0.0f), + LoadGuestFloat(byte_offset + 8, 0.0f), + 1.0f); + if (stride >= 16u && CanLoadDword(byte_offset + 12u)) { + float candidate_w = LoadGuestFloat(byte_offset + 12, 1.0f); + if (candidate_w == candidate_w && abs(candidate_w) < 10000.0f) { + raw_pos.w = candidate_w; + } + } + bool has_viewport = viewport_width != 0 && viewport_height != 0; + bool looks_screen_space = + has_viewport && + (abs(raw_pos.x) > 2.5f || abs(raw_pos.y) > 2.5f || raw_pos.w > 2.0f || raw_pos.w < 0.0f); + + VSOut o; + if (looks_screen_space) { + float2 viewport_size = float2(max(viewport_width, 1u), max(viewport_height, 1u)); + float2 viewport_origin = float2(viewport_x, viewport_y); + float2 pixel = raw_pos.xy - viewport_origin; + float2 ndc = float2( + pixel.x / viewport_size.x * 2.0f - 1.0f, + 1.0f - pixel.y / viewport_size.y * 2.0f); + o.pos = float4(ndc, saturate(raw_pos.z), 1.0f); + } else { + float w = abs(raw_pos.w) > 1.0e-6f ? raw_pos.w : 1.0f; + o.pos = float4(raw_pos.xyz, w); + } + + if (color_offset != 0xFFFFFFFFu && (color_offset + 4u) <= stride) { + o.color = LoadGuestColor(byte_offset + color_offset, default_color); + } else { + o.color = default_color; + } + o.uv = raw_pos.xy; + return o; +} +)HLSL"; + +// Simple pixel shader: output the pulled vertex color. +static constexpr const char kPassthroughPS[] = R"HLSL( +struct PSIn { + float4 pos : SV_Position; + float4 color : COLOR0; + float2 uv : TEXCOORD0; +}; + +float4 main(PSIn i) : SV_Target { + return i.color; +} +)HLSL"; + +// Diagnostic variant for post-process / present-tagged passes. This makes +// successful fullscreen-style replay obvious even before real texture sampling +// is implemented. +static constexpr const char kSoftParticlePS[] = R"HLSL( +struct PSIn { + float4 pos : SV_Position; + float4 color : COLOR0; + float2 uv : TEXCOORD0; +}; + +float4 main(PSIn i) : SV_Target { + float2 pos_band = frac(abs(i.pos.xy) * 0.015625f); + float2 uv_band = frac(abs(i.uv.xy) * 0.001953125f); + float3 debug_color = float3( + max(pos_band.x, 0.2f), + max(pos_band.y, 0.2f), + max(frac(uv_band.x + uv_band.y), 0.35f)); + return float4(debug_color, 1.0f); +} +)HLSL"; + +// --------------------------------------------------------------------------- + +/*static*/ Microsoft::WRL::ComPtr D3D12ShaderManager::CompileHLSL( + const char* source, const char* entry_point, const char* target) { + UINT flags = 0; +#if defined(_DEBUG) + flags = D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION; +#else + flags = D3DCOMPILE_OPTIMIZATION_LEVEL3; +#endif + + Microsoft::WRL::ComPtr blob; + Microsoft::WRL::ComPtr error; + HRESULT hr = D3DCompile(source, strlen(source), nullptr, nullptr, nullptr, + entry_point, target, flags, 0, &blob, &error); + if (FAILED(hr)) { + if (error) { + REXLOG_ERROR("Shader compile error [{}]: {}", entry_point, + static_cast(error->GetBufferPointer())); + } else { + REXLOG_ERROR("Shader compile error [{}]: hr=0x{:08X}", entry_point, + static_cast(hr)); + } + return nullptr; + } + return blob; +} + +bool D3D12ShaderManager::Initialize(ID3D12Device* device) { + device_ = device; + + REXLOG_INFO("D3D12ShaderManager: Compiling passthrough VS..."); + passthrough_vs_ = CompileHLSL(kPassthroughVS, "main", "vs_5_0"); + if (!passthrough_vs_) { + REXLOG_ERROR("D3D12ShaderManager: Failed to compile passthrough VS"); + return false; + } + + REXLOG_INFO("D3D12ShaderManager: Compiling passthrough PS..."); + passthrough_ps_ = CompileHLSL(kPassthroughPS, "main", "ps_5_0"); + if (!passthrough_ps_) { + REXLOG_ERROR("D3D12ShaderManager: Failed to compile passthrough PS"); + return false; + } + + REXLOG_INFO("D3D12ShaderManager: Compiling soft-particle PS..."); + soft_particle_ps_ = CompileHLSL(kSoftParticlePS, "main", "ps_5_0"); + if (!soft_particle_ps_) { + REXLOG_WARN("D3D12ShaderManager: Soft-particle PS compile failed, using passthrough"); + soft_particle_ps_ = passthrough_ps_; + } + + REXLOG_INFO("D3D12ShaderManager: All shaders compiled successfully"); + return true; +} + +void D3D12ShaderManager::Shutdown() { + pso_cache_.clear(); + passthrough_vs_.Reset(); + passthrough_ps_.Reset(); + soft_particle_ps_.Reset(); + device_ = nullptr; +} + +ID3D12PipelineState* D3D12ShaderManager::GetOrCreatePSO( + uint64_t state_hash, + ID3D12RootSignature* root_sig, + DXGI_FORMAT rt_format, + DXGI_FORMAT ds_format, + D3D12_PRIMITIVE_TOPOLOGY_TYPE topology_type, + bool use_soft_particle_ps) { + auto it = pso_cache_.find(state_hash); + if (it != pso_cache_.end()) { + return it->second.pso.Get(); + } + + if (!device_ || !passthrough_vs_ || !passthrough_ps_) { + return nullptr; + } + + ID3DBlob* ps = use_soft_particle_ps ? soft_particle_ps_.Get() : passthrough_ps_.Get(); + + D3D12_GRAPHICS_PIPELINE_STATE_DESC desc = {}; + desc.pRootSignature = root_sig; + desc.VS = {passthrough_vs_->GetBufferPointer(), passthrough_vs_->GetBufferSize()}; + desc.PS = {ps->GetBufferPointer(), ps->GetBufferSize()}; + desc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID; + desc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE; + desc.RasterizerState.DepthClipEnable = TRUE; + desc.BlendState.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL; + desc.BlendState.RenderTarget[0].BlendEnable = use_soft_particle_ps ? TRUE : FALSE; + desc.BlendState.RenderTarget[0].SrcBlend = D3D12_BLEND_SRC_ALPHA; + desc.BlendState.RenderTarget[0].DestBlend = D3D12_BLEND_INV_SRC_ALPHA; + desc.BlendState.RenderTarget[0].BlendOp = D3D12_BLEND_OP_ADD; + desc.BlendState.RenderTarget[0].SrcBlendAlpha = D3D12_BLEND_ONE; + desc.BlendState.RenderTarget[0].DestBlendAlpha = D3D12_BLEND_ZERO; + desc.BlendState.RenderTarget[0].BlendOpAlpha = D3D12_BLEND_OP_ADD; + desc.DepthStencilState.DepthEnable = (ds_format != DXGI_FORMAT_UNKNOWN) ? TRUE : FALSE; + desc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL; + desc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_LESS_EQUAL; + desc.SampleMask = UINT_MAX; + desc.PrimitiveTopologyType = topology_type; + desc.NumRenderTargets = (rt_format != DXGI_FORMAT_UNKNOWN) ? 1 : 0; + if (rt_format != DXGI_FORMAT_UNKNOWN) { + desc.RTVFormats[0] = rt_format; + } + desc.DSVFormat = ds_format; + desc.SampleDesc.Count = 1; + + Microsoft::WRL::ComPtr pso; + HRESULT hr = device_->CreateGraphicsPipelineState(&desc, IID_PPV_ARGS(&pso)); + if (FAILED(hr)) { + REXLOG_ERROR( + "D3D12ShaderManager: CreateGraphicsPipelineState failed 0x{:08X} hash=0x{:016X}", + static_cast(hr), state_hash); + return nullptr; + } + + auto& entry = pso_cache_[state_hash]; + entry.pso = pso; + return entry.pso.Get(); +} + +} // namespace ac6::renderer diff --git a/src/ac6_native_renderer/backends/d3d12_shader_manager.h b/src/ac6_native_renderer/backends/d3d12_shader_manager.h new file mode 100644 index 00000000..bd545dba --- /dev/null +++ b/src/ac6_native_renderer/backends/d3d12_shader_manager.h @@ -0,0 +1,63 @@ +#pragma once + +#include +#include +#include +#include + +#include +#include +#include + +namespace ac6::renderer { + +// Manages native passthrough shaders and a PSO cache for the native renderer. +// Uses D3DCompile to compile embedded HLSL source inline at startup — no +// external shader files required. +class D3D12ShaderManager { + public: + bool Initialize(ID3D12Device* device); + void Shutdown(); + + // Returns the shared passthrough vertex shader blob (compiled once). + ID3DBlob* GetPassthroughVS() const { return passthrough_vs_.Get(); } + + // Returns the shared solid-color pixel shader blob (compiled once). + ID3DBlob* GetPassthroughPS() const { return passthrough_ps_.Get(); } + + // Returns the soft-particle / high-precision effect pixel shader blob. + ID3DBlob* GetSoftParticlePS() const { return soft_particle_ps_.Get(); } + + // Get or create a PSO for the given state hash. + // root_sig must already be created on the device. + // Returns nullptr on failure. + ID3D12PipelineState* GetOrCreatePSO( + uint64_t state_hash, + ID3D12RootSignature* root_sig, + DXGI_FORMAT rt_format, + DXGI_FORMAT ds_format, + D3D12_PRIMITIVE_TOPOLOGY_TYPE topology_type, + bool use_soft_particle_ps = false); + + // Backwards compat stubs (unused but keep old callers linking) + ID3DBlob* GetVertexShader(uint32_t /*guest_hash*/) { return passthrough_vs_.Get(); } + ID3DBlob* GetPixelShader(uint32_t /*guest_hash*/) { return passthrough_ps_.Get(); } + ID3DBlob* GetGenericSoftParticleShader() { return soft_particle_ps_.Get(); } + + private: + ID3D12Device* device_ = nullptr; + + Microsoft::WRL::ComPtr passthrough_vs_; + Microsoft::WRL::ComPtr passthrough_ps_; + Microsoft::WRL::ComPtr soft_particle_ps_; + + struct CachedPSO { + Microsoft::WRL::ComPtr pso; + }; + std::unordered_map pso_cache_; + + static Microsoft::WRL::ComPtr CompileHLSL( + const char* source, const char* entry_point, const char* target); +}; + +} // namespace ac6::renderer diff --git a/src/ac6_native_renderer/backends/metal_backend.cpp b/src/ac6_native_renderer/backends/metal_backend.cpp index 78f5f6e5..a1f5ebe6 100644 --- a/src/ac6_native_renderer/backends/metal_backend.cpp +++ b/src/ac6_native_renderer/backends/metal_backend.cpp @@ -12,8 +12,9 @@ bool MetalBackend::IsSupported() const { #endif } -bool MetalBackend::Initialize(const NativeRendererConfig& config) { +bool MetalBackend::Initialize(const NativeRendererConfig& config, rex::memory::Memory* memory) { (void)config; + (void)memory; if (initialized_) { return true; } diff --git a/src/ac6_native_renderer/backends/metal_backend.h b/src/ac6_native_renderer/backends/metal_backend.h index 74f0f636..323544ae 100644 --- a/src/ac6_native_renderer/backends/metal_backend.h +++ b/src/ac6_native_renderer/backends/metal_backend.h @@ -9,7 +9,8 @@ class MetalBackend final : public RenderDeviceBackend { BackendType GetType() const override { return BackendType::kMetal; } std::string_view GetName() const override { return "metal"; } bool IsSupported() const override; - bool Initialize(const NativeRendererConfig& config) override; + bool Initialize(const NativeRendererConfig& config, rex::memory::Memory* memory) override; + bool InitializeShared(const NativeRendererConfig& config, rex::memory::Memory* memory, ID3D12Device* device, ID3D12CommandQueue* queue) override { return false; } bool SubmitExecutorFrame(const ReplayExecutorFrame& frame) override; BackendExecutorStatus GetExecutorStatus() const override { return executor_status_; } void Shutdown() override; diff --git a/src/ac6_native_renderer/backends/vulkan_backend.cpp b/src/ac6_native_renderer/backends/vulkan_backend.cpp index 9ad88ecf..b96fa09d 100644 --- a/src/ac6_native_renderer/backends/vulkan_backend.cpp +++ b/src/ac6_native_renderer/backends/vulkan_backend.cpp @@ -12,8 +12,9 @@ bool VulkanBackend::IsSupported() const { #endif } -bool VulkanBackend::Initialize(const NativeRendererConfig& config) { +bool VulkanBackend::Initialize(const NativeRendererConfig& config, rex::memory::Memory* memory) { (void)config; + (void)memory; if (initialized_) { return true; } diff --git a/src/ac6_native_renderer/backends/vulkan_backend.h b/src/ac6_native_renderer/backends/vulkan_backend.h index c7ffad1f..f0863fc8 100644 --- a/src/ac6_native_renderer/backends/vulkan_backend.h +++ b/src/ac6_native_renderer/backends/vulkan_backend.h @@ -9,7 +9,8 @@ class VulkanBackend final : public RenderDeviceBackend { BackendType GetType() const override { return BackendType::kVulkan; } std::string_view GetName() const override { return "vulkan"; } bool IsSupported() const override; - bool Initialize(const NativeRendererConfig& config) override; + bool Initialize(const NativeRendererConfig& config, rex::memory::Memory* memory) override; + bool InitializeShared(const NativeRendererConfig& config, rex::memory::Memory* memory, ID3D12Device* device, ID3D12CommandQueue* queue) override { return false; } bool SubmitExecutorFrame(const ReplayExecutorFrame& frame) override; BackendExecutorStatus GetExecutorStatus() const override { return executor_status_; } void Shutdown() override; diff --git a/src/ac6_native_renderer/execution_plan.cpp b/src/ac6_native_renderer/execution_plan.cpp index e1a1f08a..a9e32641 100644 --- a/src/ac6_native_renderer/execution_plan.cpp +++ b/src/ac6_native_renderer/execution_plan.cpp @@ -48,6 +48,7 @@ ExecutionCommandPacket BuildExecutionCommandPacket(const ReplayCommandDesc& comm .viewport_y = command.viewport_y, .viewport_width = command.viewport_width, .viewport_height = command.viewport_height, + .shadow_state = command.shadow_state, }; } diff --git a/src/ac6_native_renderer/execution_plan.h b/src/ac6_native_renderer/execution_plan.h index 3fc616a3..fcc5de8d 100644 --- a/src/ac6_native_renderer/execution_plan.h +++ b/src/ac6_native_renderer/execution_plan.h @@ -42,6 +42,7 @@ struct ExecutionCommandPacket { uint32_t viewport_y = 0; uint32_t viewport_width = 0; uint32_t viewport_height = 0; + ac6::d3d::ShadowState shadow_state{}; }; struct ExecutionResourceRequirements { diff --git a/src/ac6_native_renderer/native_renderer.cpp b/src/ac6_native_renderer/native_renderer.cpp index 7458ea6a..8b294905 100644 --- a/src/ac6_native_renderer/native_renderer.cpp +++ b/src/ac6_native_renderer/native_renderer.cpp @@ -1,5 +1,6 @@ #include "native_renderer.h" +#include "backends/d3d12_backend.h" #include #include @@ -48,13 +49,31 @@ NativeRenderer::~NativeRenderer() { Shutdown(); } -bool NativeRenderer::Initialize(const NativeRendererConfig& config) { +bool NativeRenderer::InitializeShared(const NativeRendererConfig& config, rex::memory::Memory* memory, ID3D12Device* device, ID3D12CommandQueue* queue) { Shutdown(); - config_ = config; scheduler_.Configure(config_.max_frames_in_flight); - if (!device_.Initialize(config_)) { + if (!device_.InitializeShared(config, memory, device, queue)) { + return false; + } + + stats_.initialized = true; + stats_.active_backend = BackendType::kD3D12; + return true; +} + +bool NativeRenderer::Initialize(const NativeRendererConfig& config, rex::memory::Memory* memory) { + REXLOG_INFO("NativeRenderer::Initialize starting"); + Shutdown(); + + config_ = config; + REXLOG_INFO("NativeRenderer: Configuring scheduler (max_frames={})", config_.max_frames_in_flight); + scheduler_.Configure(config_.max_frames_in_flight); + + REXLOG_INFO("NativeRenderer: Initializing render device..."); + if (!device_.Initialize(config_, memory)) { + REXLOG_ERROR("NativeRenderer: device_.Initialize failed"); return false; } @@ -136,7 +155,7 @@ void NativeRenderer::BuildCapturedFrame( } stats_.built_pass_count += graph_.pass_count(); - REXLOG_TRACE( + REXLOG_INFO( "AC6 native renderer observed frame={} frontend_passes={} replay_passes={} replay_commands={} execution_passes={} execution_commands={} executor_passes={} executor_commands={} backend_submits={} selected={} draws={} clears={} resolves={} plan_valid={} out={}x{}", summary.frame_index, summary.pass_count, replay_frame_.summary.pass_count, replay_frame_.summary.command_count, execution_plan_.summary.pass_count, @@ -149,3 +168,22 @@ void NativeRenderer::BuildCapturedFrame( } } // namespace ac6::renderer + +// --------------------------------------------------------------------------- +// Phase 4: backend accessors +// --------------------------------------------------------------------------- +namespace ac6::renderer { + +D3D12Backend* NativeRenderer::GetD3D12Backend() const { + if (!device_.backend() || device_.active_backend() != BackendType::kD3D12) { + return nullptr; + } + return static_cast(device_.backend()); +} + +ID3D12Resource* NativeRenderer::GetOutputTexture() const { + D3D12Backend* b = GetD3D12Backend(); + return b ? b->GetOutputTexture() : nullptr; +} + +} // namespace ac6::renderer diff --git a/src/ac6_native_renderer/native_renderer.h b/src/ac6_native_renderer/native_renderer.h index d93c2f37..819d3f6b 100644 --- a/src/ac6_native_renderer/native_renderer.h +++ b/src/ac6_native_renderer/native_renderer.h @@ -2,6 +2,8 @@ #include +#include + #include "ac6_render_frontend.h" #include "execution_plan.h" #include "frame_scheduler.h" @@ -12,8 +14,14 @@ #include "render_graph.h" #include "types.h" +// Forward declare so callers can access the output texture without pulling in +// all of d3d12_backend.h transitively. +struct ID3D12Resource; + namespace ac6::renderer { +// Experimental capture-replay renderer retained for diagnostics and future +// targeted overrides. It is not the default presentation path. class NativeRenderer { public: NativeRenderer(); @@ -22,7 +30,8 @@ class NativeRenderer { NativeRenderer(const NativeRenderer&) = delete; NativeRenderer& operator=(const NativeRenderer&) = delete; - bool Initialize(const NativeRendererConfig& config); + bool Initialize(const NativeRendererConfig& config, rex::memory::Memory* memory); + bool InitializeShared(const NativeRendererConfig& config, rex::memory::Memory* memory, ID3D12Device* device, ID3D12CommandQueue* queue); void Shutdown(); void BeginFrame(); @@ -48,6 +57,14 @@ class NativeRenderer { return device_.executor_status(); } + // Phase 4: returns the native output texture produced by the D3D12 backend, + // or nullptr if not yet available. The texture is in + // D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE after SubmitExecutorFrame. + ID3D12Resource* GetOutputTexture() const; + + // Phase 4: returns the raw D3D12Backend* (nullptr for non-D3D12 backends). + class D3D12Backend* GetD3D12Backend() const; + private: NativeRendererConfig config_{}; NativeRendererStats stats_{}; diff --git a/src/ac6_native_renderer/render_device.cpp b/src/ac6_native_renderer/render_device.cpp index 88ee484f..c4b365e2 100644 --- a/src/ac6_native_renderer/render_device.cpp +++ b/src/ac6_native_renderer/render_device.cpp @@ -4,6 +4,8 @@ #include +#include "backends/d3d12_backend.h" + namespace ac6::renderer { RenderDevice::RenderDevice() = default; @@ -12,7 +14,24 @@ RenderDevice::~RenderDevice() { Shutdown(); } -bool RenderDevice::Initialize(const NativeRendererConfig& config) { +bool RenderDevice::InitializeShared(const NativeRendererConfig& config, rex::memory::Memory* memory, ID3D12Device* device, ID3D12CommandQueue* queue) { + Shutdown(); + + active_backend_ = BackendType::kD3D12; + backend_ = CreateBackend(active_backend_); + if (!backend_) return false; + + auto* d3d_backend = static_cast(backend_.get()); + if (!d3d_backend->InitializeShared(config, memory, device, queue)) { + backend_.reset(); + return false; + } + + initialized_ = true; + return true; +} + +bool RenderDevice::Initialize(const NativeRendererConfig& config, rex::memory::Memory* memory) { Shutdown(); active_backend_ = ResolveBackend(config.preferred_backend); @@ -30,7 +49,7 @@ bool RenderDevice::Initialize(const NativeRendererConfig& config) { active_backend_ = BackendType::kUnknown; return false; } - if (!backend_->Initialize(config)) { + if (!backend_->Initialize(config, memory)) { REXLOG_ERROR("AC6 native renderer backend {} failed initialization", backend_->GetName()); backend_.reset(); diff --git a/src/ac6_native_renderer/render_device.h b/src/ac6_native_renderer/render_device.h index fff562a0..fa90b90c 100644 --- a/src/ac6_native_renderer/render_device.h +++ b/src/ac6_native_renderer/render_device.h @@ -3,6 +3,15 @@ #include #include +#if defined(_WIN32) +#include +#else +struct ID3D12Device; +struct ID3D12CommandQueue; +#endif + +#include + #include "replay_executor.h" #include "types.h" @@ -15,7 +24,8 @@ class RenderDeviceBackend { virtual BackendType GetType() const = 0; virtual std::string_view GetName() const = 0; virtual bool IsSupported() const = 0; - virtual bool Initialize(const NativeRendererConfig& config) = 0; + virtual bool Initialize(const NativeRendererConfig& config, rex::memory::Memory* memory) = 0; + virtual bool InitializeShared(const NativeRendererConfig& config, rex::memory::Memory* memory, ID3D12Device* device, ID3D12CommandQueue* queue) = 0; virtual bool SubmitExecutorFrame(const ReplayExecutorFrame& frame) = 0; virtual BackendExecutorStatus GetExecutorStatus() const = 0; virtual void Shutdown() = 0; @@ -29,7 +39,8 @@ class RenderDevice { RenderDevice(const RenderDevice&) = delete; RenderDevice& operator=(const RenderDevice&) = delete; - bool Initialize(const NativeRendererConfig& config); + bool Initialize(const NativeRendererConfig& config, rex::memory::Memory* memory); + bool InitializeShared(const NativeRendererConfig& config, rex::memory::Memory* memory, ID3D12Device* device, ID3D12CommandQueue* queue); void Shutdown(); bool SubmitExecutorFrame(const ReplayExecutorFrame& frame); @@ -38,6 +49,9 @@ class RenderDevice { std::string_view backend_name() const; BackendExecutorStatus executor_status() const; + // Raw backend pointer — used by NativeRenderer to downcast to D3D12Backend*. + RenderDeviceBackend* backend() const { return backend_.get(); } + private: std::unique_ptr backend_; BackendType active_backend_ = BackendType::kUnknown; diff --git a/src/ac6_native_renderer/replay_executor.cpp b/src/ac6_native_renderer/replay_executor.cpp index 13a9fe98..2848936d 100644 --- a/src/ac6_native_renderer/replay_executor.cpp +++ b/src/ac6_native_renderer/replay_executor.cpp @@ -32,6 +32,18 @@ ReplayExecutorCommandPacket BuildExecutorCommandPacket( command.fetch_constant_count != 0), .touches_render_target = command.render_target_0 != 0, .touches_depth_stencil = command.depth_stencil != 0, + // Forwarded dispatch fields + .draw_kind = command.draw_kind, + .primitive_type = command.primitive_type, + .start = command.start, + .count = command.count, + .flags = command.flags, + .rect_count = command.rect_count, + .captured_rect_count = command.captured_rect_count, + .color = command.color, + .stencil = command.stencil, + .depth = command.depth, + .shadow_state = command.shadow_state, }; } diff --git a/src/ac6_native_renderer/replay_executor.h b/src/ac6_native_renderer/replay_executor.h index c62dbb39..65abc55d 100644 --- a/src/ac6_native_renderer/replay_executor.h +++ b/src/ac6_native_renderer/replay_executor.h @@ -25,6 +25,18 @@ struct ReplayExecutorCommandPacket { bool requires_descriptor_setup = false; bool touches_render_target = false; bool touches_depth_stencil = false; + // Draw call dispatch fields (forwarded from ExecutionCommandPacket) + ac6::d3d::DrawCallKind draw_kind = ac6::d3d::DrawCallKind::kIndexed; + uint32_t primitive_type = 0; + uint32_t start = 0; + uint32_t count = 0; + uint32_t flags = 0; + uint32_t rect_count = 0; + uint32_t captured_rect_count = 0; + uint32_t color = 0; + uint32_t stencil = 0; + float depth = 1.0f; + ac6::d3d::ShadowState shadow_state{}; }; struct ReplayExecutorPassPacket { diff --git a/src/ac6_native_renderer/replay_ir.cpp b/src/ac6_native_renderer/replay_ir.cpp index a1f724c5..3f1f0516 100644 --- a/src/ac6_native_renderer/replay_ir.cpp +++ b/src/ac6_native_renderer/replay_ir.cpp @@ -49,6 +49,7 @@ ReplayCommandDesc BuildReplayCommand(const ObservedCommandDesc& command) { .viewport_y = command.viewport_y, .viewport_width = command.viewport_width, .viewport_height = command.viewport_height, + .shadow_state = command.shadow_state, }; } diff --git a/src/ac6_native_renderer/replay_ir.h b/src/ac6_native_renderer/replay_ir.h index a69fd218..1936113a 100644 --- a/src/ac6_native_renderer/replay_ir.h +++ b/src/ac6_native_renderer/replay_ir.h @@ -41,6 +41,7 @@ struct ReplayCommandDesc { uint32_t viewport_y = 0; uint32_t viewport_width = 0; uint32_t viewport_height = 0; + ac6::d3d::ShadowState shadow_state{}; }; struct ReplayPassDesc { diff --git a/src/ac6_native_renderer/types.h b/src/ac6_native_renderer/types.h index 766d7df9..58215888 100644 --- a/src/ac6_native_renderer/types.h +++ b/src/ac6_native_renderer/types.h @@ -49,6 +49,26 @@ struct BackendExecutorStatus { uint32_t resource_translation_pass_count = 0; uint32_t pipeline_state_pass_count = 0; uint32_t descriptor_setup_pass_count = 0; + uint32_t draw_attempt_count = 0; + uint32_t draw_success_count = 0; + uint32_t draw_prepare_failure_count = 0; + uint32_t draw_pso_failure_count = 0; + uint32_t indexed_draw_count = 0; + uint32_t non_indexed_draw_count = 0; + uint32_t clear_command_count = 0; + uint32_t resolve_command_count = 0; + uint32_t invalid_stream_binding_count = 0; + uint32_t invalid_index_buffer_count = 0; + uint32_t index_count_overflow_count = 0; + uint32_t index_data_unavailable_count = 0; + uint32_t index_buffer_create_failure_count = 0; + uint32_t index_upload_failure_count = 0; + uint32_t zero_vertex_count = 0; + uint32_t invalid_vertex_range_count = 0; + uint32_t vertex_buffer_size_invalid_count = 0; + uint32_t vertex_buffer_create_failure_count = 0; + uint32_t vertex_data_unavailable_count = 0; + uint32_t vertex_upload_failure_count = 0; }; constexpr std::string_view ToString(BackendType backend) { diff --git a/src/ac6recomp_app.h b/src/ac6recomp_app.h index 9933524d..55e873d6 100644 --- a/src/ac6recomp_app.h +++ b/src/ac6recomp_app.h @@ -4,6 +4,7 @@ #include +#include "ac6_native_graphics.h" #include "ac6_native_graphics_overlay.h" #include "generated/ac6recomp_config.h" @@ -11,14 +12,38 @@ class Ac6recompApp : public rex::ReXApp { public: using rex::ReXApp::ReXApp; + Ac6recompApp(rex::ui::WindowedAppContext& ctx, std::string_view name, rex::PPCImageInfo ppc_info) + : rex::ReXApp(ctx, name, ppc_info) { + REXLOG_INFO("Ac6recompApp constructor"); + } + static std::unique_ptr Create( rex::ui::WindowedAppContext& ctx) { + REXLOG_INFO("Ac6recompApp::Create"); return std::unique_ptr(new Ac6recompApp(ctx, "ac6recomp", PPCImageConfig)); } protected: + void OnPreSetup(rex::RuntimeConfig& config) override { + REXLOG_INFO("Ac6recompApp::OnPreSetup"); + rex::ReXApp::OnPreSetup(config); + } + + void OnPostSetup() override { + REXLOG_INFO("Ac6recompApp::OnPostSetup"); + rex::ReXApp::OnPostSetup(); + + auto* graphics_sys = runtime()->graphics_system(); + if (graphics_sys) { + graphics_sys->SetFrameBoundaryCallback([](rex::memory::Memory* memory) { + ::ac6::graphics::OnFrameBoundary(memory); + }); + REXLOG_INFO("Ac6recompApp: Native frame boundary callback registered"); + } + } + void OnCreateDialogs(rex::ui::ImGuiDrawer* drawer) override { - rex::ReXApp::OnCreateDialogs(drawer); + REXLOG_INFO("Ac6recompApp::OnCreateDialogs"); native_graphics_status_dialog_ = std::make_unique(drawer); native_graphics_status_dialog_->Show(); diff --git a/src/d3d_hooks.cpp b/src/d3d_hooks.cpp index 5c4a4b48..9d422436 100644 --- a/src/d3d_hooks.cpp +++ b/src/d3d_hooks.cpp @@ -35,10 +35,10 @@ std::vector g_live_draws; std::vector g_live_clears; std::vector g_live_resolves; -template -uint32_t CountNonZero(const std::array& values) { +template +uint32_t CountNonZero(const Container& values) { uint32_t count = 0; - for (const T& value : values) { + for (const auto& value : values) { if (value) { ++count; } @@ -74,6 +74,54 @@ void HashU32(uint64_t& hash, uint32_t value) { hash *= kFnvPrime; } +void HashU64(uint64_t& hash, uint64_t value) { + HashU32(hash, static_cast(value & 0xFFFFFFFFull)); + HashU32(hash, static_cast(value >> 32)); +} + +uint64_t ComputeVertexFetchLayoutSignature(const ac6::d3d::ShadowState& shadow) { + constexpr uint64_t kFnvOffsetBasis = 1469598103934665603ull; + uint64_t hash = kFnvOffsetBasis; + HashU32(hash, shadow.vertex_declaration); + for (const auto& stream : shadow.streams) { + HashU32(hash, stream.buffer); + HashU32(hash, stream.offset); + HashU32(hash, stream.stride); + } + return hash; +} + +uint64_t ComputeTextureFetchLayoutSignature(const ac6::d3d::ShadowState& shadow) { + constexpr uint64_t kFnvOffsetBasis = 1469598103934665603ull; + uint64_t hash = kFnvOffsetBasis; + for (uint32_t texture : shadow.texture_fetch_ptrs) { + HashU32(hash, texture); + } + return hash; +} + +uint64_t ComputeResourceBindingSignature(const ac6::d3d::ShadowState& shadow) { + constexpr uint64_t kFnvOffsetBasis = 1469598103934665603ull; + uint64_t hash = kFnvOffsetBasis; + for (uint32_t target : shadow.render_targets) { + HashU32(hash, target); + } + HashU32(hash, shadow.depth_stencil); + for (uint32_t texture : shadow.textures) { + HashU32(hash, texture); + } + for (const auto& sampler : shadow.samplers) { + HashU32(hash, sampler.mag_filter); + HashU32(hash, sampler.min_filter); + HashU32(hash, sampler.mip_filter); + HashU32(hash, sampler.mip_level); + HashU32(hash, sampler.border_color); + } + HashU64(hash, shadow.vertex_fetch_layout_signature); + HashU64(hash, shadow.texture_fetch_layout_signature); + return hash; +} + void HashDrawRecord(uint64_t& hash, const ac6::d3d::DrawCallRecord& draw) { HashU32(hash, static_cast(draw.kind)); HashU32(hash, draw.primitive_type); @@ -189,6 +237,9 @@ ac6::d3d::ShadowState SnapshotShadowState(uint32_t device) { if (device != 0) { shadow.device = device; } + shadow.vertex_fetch_layout_signature = ComputeVertexFetchLayoutSignature(shadow); + shadow.texture_fetch_layout_signature = ComputeTextureFetchLayoutSignature(shadow); + shadow.resource_binding_signature = ComputeResourceBindingSignature(shadow); return shadow; } @@ -726,6 +777,7 @@ PPC_FUNC_IMPL(rex_sub_821E10C8) { namespace ac6::d3d { void OnFrameBoundary() { + REXLOG_INFO("d3d::OnFrameBoundary: frame_index={}", g_capture_live_frame_index); ac6::d3d::DrawStatsSnapshot draw_stats = SnapshotDrawStats(); std::unique_lock lock(g_snapshot_mutex); diff --git a/src/d3d_state.h b/src/d3d_state.h index 80656448..0ccfe3e4 100644 --- a/src/d3d_state.h +++ b/src/d3d_state.h @@ -106,6 +106,9 @@ struct ShadowState { std::array samplers{}; std::array texture_fetch_ptrs{}; uint32_t shader_gpr_alloc{0}; + uint64_t vertex_fetch_layout_signature{0}; + uint64_t texture_fetch_layout_signature{0}; + uint64_t resource_binding_signature{0}; struct { uint32_t x{0}; diff --git a/src/main.cpp b/src/main.cpp index 6ba3e65b..96328d98 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -3,11 +3,68 @@ // // This file is yours to edit. 'rexglue migrate' will NOT overwrite it. +#include +#include + +REXCVAR_DECLARE(bool, ac6_render_capture); +REXCVAR_DECLARE(bool, ac6_timing_hooks_enabled); +REXCVAR_DECLARE(bool, ac6_unlock_fps); +REXCVAR_DECLARE(bool, ac6_native_graphics_enabled); +REXCVAR_DECLARE(bool, ac6_experimental_replay_present); +REXCVAR_DECLARE(std::string, ac6_graphics_mode); +REXCVAR_DECLARE(std::string, log_file); +REXCVAR_DECLARE(std::string, log_level); + #include "generated/ac6recomp_config.h" #include "generated/ac6recomp_init.h" -#include +#include +#include #include "ac6recomp_app.h" -REX_DEFINE_APP(ac6recomp, Ac6recompApp::Create) +// Early boot log to catch crashes before the SDK logger is ready +std::ofstream g_boot_log; + +void InitEarlyLog() { + g_boot_log.open("boot.log", std::ios::out | std::ios::trunc); + if (g_boot_log.is_open()) { + g_boot_log << "AC6 Recompiled Early Boot Log" << std::endl; + g_boot_log << "-----------------------------" << std::endl; + g_boot_log.flush(); + } + std::cout << "Early boot logging initialized." << std::endl; +} + +std::unique_ptr Ac6recompAppCreate(rex::ui::WindowedAppContext& ctx) { + if (g_boot_log.is_open()) { + g_boot_log << "Ac6recompApp::Create called" << std::endl; + g_boot_log.flush(); + } + + // Force SDK logging to a file as well + REXCVAR_SET(log_file, "ac6recomp.log"); + REXCVAR_SET(log_level, "info"); + REXCVAR_SET(ac6_native_graphics_enabled, true); + REXCVAR_SET(ac6_graphics_mode, "hybrid_backend_fixes"); + REXCVAR_SET(ac6_experimental_replay_present, false); + REXCVAR_SET(ac6_render_capture, true); + REXCVAR_SET(ac6_timing_hooks_enabled, true); + REXCVAR_SET(ac6_unlock_fps, true); + + REXLOG_INFO("Ac6recompAppCreate: graphics mode={} replay_present={} capture={}", + REXCVAR_GET(ac6_graphics_mode), + REXCVAR_GET(ac6_experimental_replay_present) ? "true" : "false", + REXCVAR_GET(ac6_render_capture) ? "true" : "false"); + + return Ac6recompApp::Create(ctx); +} + +REX_DEFINE_APP(ac6recomp, Ac6recompAppCreate) + +// Hook into static initialization to start log early +struct EarlyBoot { + EarlyBoot() { + InitEarlyLog(); + } +} g_early_boot; diff --git a/src/render_hooks.cpp b/src/render_hooks.cpp index 24015e51..83405b4f 100644 --- a/src/render_hooks.cpp +++ b/src/render_hooks.cpp @@ -6,6 +6,7 @@ #include #include +#include REXCVAR_DEFINE_BOOL(ac6_unlock_fps, false, "AC6", "Unlock frame rate to 60fps"); REXCVAR_DEFINE_BOOL(ac6_timing_hooks_enabled, true, "AC6", @@ -47,13 +48,14 @@ void ac6DeltaDivisorHook(PPCRegister& r29) { } void ac6PresentTimingHook(PPCRegister& /*r31*/) { - ac6::d3d::OnFrameBoundary(); - ac6::graphics::OnFrameBoundary(); + static uint64_t last_log = 0; + if (g_frame_count % 60 == 0 && g_frame_count != last_log) { + REXLOG_INFO("ac6PresentTimingHook firing: frame={}", g_frame_count); + last_log = g_frame_count; + } + // ac6::d3d::OnFrameBoundary(); // MOVED TO GPU THREAD const auto now = Clock::now(); - double frame_time_ms = 0.0; - double fps = 0.0; - uint64_t frame_count = 0; { std::lock_guard lock(g_frame_mutex); if (g_frame_start.time_since_epoch().count() != 0) { @@ -63,9 +65,6 @@ void ac6PresentTimingHook(PPCRegister& /*r31*/) { ++g_frame_count; } g_frame_start = now; - frame_time_ms = g_frame_time_ms; - fps = g_fps; - frame_count = g_frame_count; } } diff --git a/src/render_hooks.h b/src/render_hooks.h index 8515ba1e..51f8ac94 100644 --- a/src/render_hooks.h +++ b/src/render_hooks.h @@ -6,6 +6,7 @@ #include REXCVAR_DECLARE(bool, ac6_unlock_fps); +REXCVAR_DECLARE(bool, ac6_timing_hooks_enabled); namespace ac6 { diff --git a/thirdparty/rexglue-sdk/include/rex/graphics/graphics_system.h b/thirdparty/rexglue-sdk/include/rex/graphics/graphics_system.h index 2675fed7..fb4a84be 100644 --- a/thirdparty/rexglue-sdk/include/rex/graphics/graphics_system.h +++ b/thirdparty/rexglue-sdk/include/rex/graphics/graphics_system.h @@ -74,7 +74,10 @@ class GraphicsSystem : public system::IGraphicsSystem { virtual void EnableReadPointerWriteBack(uint32_t ptr, uint32_t block_size_log2) override; virtual void SetInterruptCallback(uint32_t callback, uint32_t user_data) override; + virtual void SetFrameBoundaryCallback(std::function callback) override; virtual bool HandleVideoSwap(const system::GraphicsSwapSubmission& submission) override; + bool GetLastSwapSubmission(system::GraphicsSwapSubmission* out_submission, + uint64_t* out_sequence = nullptr) const; void DispatchInterruptCallback(uint32_t source, uint32_t cpu); virtual void ClearCaches(); @@ -91,6 +94,13 @@ class GraphicsSystem : public system::IGraphicsSystem { void Pause(); void Resume(); + uint64_t guest_vblank_interval_ticks() const { + return guest_vblank_interval_ticks_.load(std::memory_order_acquire); + } + uint64_t last_vblank_interrupt_guest_tick() const { + return last_vblank_interrupt_guest_tick_.load(std::memory_order_acquire); + } + bool Save(::rex::stream::ByteStream* stream); bool Restore(::rex::stream::ByteStream* stream); @@ -107,12 +117,6 @@ class GraphicsSystem : public system::IGraphicsSystem { void WriteRegister(uint32_t addr, uint32_t value); void MarkVblank(); - uint64_t guest_vblank_interval_ticks() const { - return guest_vblank_interval_ticks_.load(std::memory_order_acquire); - } - uint64_t last_vblank_interrupt_guest_tick() const { - return last_vblank_interrupt_guest_tick_.load(std::memory_order_acquire); - } memory::Memory* memory_ = nullptr; runtime::FunctionDispatcher* function_dispatcher_ = nullptr; @@ -127,6 +131,11 @@ class GraphicsSystem : public system::IGraphicsSystem { system::object_ref vsync_worker_thread_; std::atomic guest_vblank_interval_ticks_{0}; std::atomic last_vblank_interrupt_guest_tick_{0}; + mutable std::mutex last_swap_submission_mutex_; + system::GraphicsSwapSubmission last_swap_submission_{}; + uint64_t last_swap_submission_sequence_ = 0; + + std::function frame_boundary_callback_; RegisterFile register_file_; std::unique_ptr command_processor_; diff --git a/thirdparty/rexglue-sdk/include/rex/system/interfaces/graphics.h b/thirdparty/rexglue-sdk/include/rex/system/interfaces/graphics.h index 48d51480..43348194 100644 --- a/thirdparty/rexglue-sdk/include/rex/system/interfaces/graphics.h +++ b/thirdparty/rexglue-sdk/include/rex/system/interfaces/graphics.h @@ -12,10 +12,14 @@ #pragma once #include +#include #include // Forward declarations +namespace rex::memory { +class Memory; +} namespace rex::runtime { class FunctionDispatcher; } @@ -52,6 +56,7 @@ class IGraphicsSystem { virtual void InitializeRingBuffer(uint32_t ptr, uint32_t size_log2) = 0; virtual void EnableReadPointerWriteBack(uint32_t ptr, uint32_t block_size_log2) = 0; virtual void SetInterruptCallback(uint32_t callback, uint32_t user_data) = 0; + virtual void SetFrameBoundaryCallback(std::function callback) = 0; virtual bool HandleVideoSwap(const GraphicsSwapSubmission& submission) = 0; }; diff --git a/thirdparty/rexglue-sdk/src/graphics/d3d12/command_processor.cpp b/thirdparty/rexglue-sdk/src/graphics/d3d12/command_processor.cpp index fe9482a3..45f22372 100644 --- a/thirdparty/rexglue-sdk/src/graphics/d3d12/command_processor.cpp +++ b/thirdparty/rexglue-sdk/src/graphics/d3d12/command_processor.cpp @@ -31,6 +31,9 @@ #include #include +#include "../../../../../src/ac6_backend_fixes/ac6_backend_hooks.h" +#include "../../../../../src/ac6_native_graphics.h" + REXCVAR_DEFINE_BOOL(d3d12_bindless, true, "GPU/D3D12", "Use bindless resources where available") .lifecycle(rex::cvar::Lifecycle::kRequiresRestart); @@ -1989,39 +1992,62 @@ bool D3D12CommandProcessor::IssueSwapInternal(uint32_t frontbuffer_ptr, return false; } + // Let AC6 consume the frame-boundary callback on the GPU thread before + // choosing the swap source. This is analysis-first by default; the legacy + // replay path only overrides presentation if explicitly enabled. + { + system::GraphicsSwapSubmission frame_boundary_submission = {}; + frame_boundary_submission.frontbuffer_virtual_address = frontbuffer_ptr; + frame_boundary_submission.frontbuffer_width = frontbuffer_width; + frame_boundary_submission.frontbuffer_height = frontbuffer_height; + graphics_system_->HandleVideoSwap(frame_boundary_submission); + } + // Obtain the actual swap source texture size (resolution-scaled if it's a // resolve destination, or not otherwise). D3D12_SHADER_RESOURCE_VIEW_DESC swap_texture_srv_desc = {}; xenos::TextureFormat frontbuffer_format; uint32_t frontbuffer_width_unscaled = 0, frontbuffer_height_unscaled = 0; - - REXGPU_ERROR("IssueSwap: Calling RequestSwapTexture for fb={:08X}", frontbuffer_ptr); - ID3D12Resource* swap_texture_resource = - texture_cache_->RequestSwapTexture(swap_texture_srv_desc, frontbuffer_format, - &frontbuffer_width_unscaled, &frontbuffer_height_unscaled); - if (!swap_texture_resource) { - REXGPU_ERROR("IssueSwap: RequestSwapTexture returned null, trying UpdateDirectDisplayTexture"); - swap_texture_resource = UpdateDirectDisplayTexture( - frontbuffer_ptr, frontbuffer_width, frontbuffer_height, + bool using_native_swap_texture = false; + bool used_direct_display_fallback = false; + + ID3D12Resource* swap_texture_resource = ac6::graphics::GetNativeOutputTexture(); + if (swap_texture_resource) { + D3D12_RESOURCE_DESC native_desc = swap_texture_resource->GetDesc(); + swap_texture_srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + swap_texture_srv_desc.Format = native_desc.Format; + swap_texture_srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + swap_texture_srv_desc.Texture2D.MostDetailedMip = 0; + swap_texture_srv_desc.Texture2D.MipLevels = 1; + swap_texture_srv_desc.Texture2D.PlaneSlice = 0; + swap_texture_srv_desc.Texture2D.ResourceMinLODClamp = 0.0f; + frontbuffer_format = xenos::TextureFormat::k_8_8_8_8; + frontbuffer_width_unscaled = uint32_t(native_desc.Width); + frontbuffer_height_unscaled = native_desc.Height; + frontbuffer_width = frontbuffer_width_unscaled; + frontbuffer_height = frontbuffer_height_unscaled; + using_native_swap_texture = true; + } else { + swap_texture_resource = texture_cache_->RequestSwapTexture( swap_texture_srv_desc, frontbuffer_format, &frontbuffer_width_unscaled, &frontbuffer_height_unscaled); - if (!swap_texture_resource) { - // Dump texture fetch constant 0 for debugging - const auto& regs = *register_file_; - auto fetch = regs.GetTextureFetch(0); - REXGPU_ERROR( - "IssueSwap: RequestSwapTexture failed - fetch0: {:08X} {:08X} {:08X} {:08X} {:08X} {:08X}", - fetch.dword_0, fetch.dword_1, fetch.dword_2, fetch.dword_3, fetch.dword_4, fetch.dword_5); - EndSubmission(true); - return false; - } - } else { - REXGPU_ERROR("IssueSwap: RequestSwapTexture SUCCESS {:016X}", (uint64_t)swap_texture_resource); - static bool logged_non_black = false; - if (!logged_non_black) { - REXGPU_ERROR("Non-black frame detected"); - logged_non_black = true; + swap_texture_resource = UpdateDirectDisplayTexture( + frontbuffer_ptr, frontbuffer_width, frontbuffer_height, + swap_texture_srv_desc, frontbuffer_format, + &frontbuffer_width_unscaled, &frontbuffer_height_unscaled); + used_direct_display_fallback = swap_texture_resource != nullptr; + + if (!swap_texture_resource) { + // Dump texture fetch constant 0 for debugging + const auto& regs = *register_file_; + auto fetch = regs.GetTextureFetch(0); + REXGPU_ERROR( + "IssueSwap: RequestSwapTexture failed - fetch0: {:08X} {:08X} {:08X} {:08X} {:08X} {:08X}", + fetch.dword_0, fetch.dword_1, fetch.dword_2, fetch.dword_3, fetch.dword_4, fetch.dword_5); + EndSubmission(true); + return false; + } } } D3D12_RESOURCE_DESC swap_texture_desc = swap_texture_resource->GetDesc(); @@ -2076,6 +2102,36 @@ bool D3D12CommandProcessor::IssueSwapInternal(uint32_t frontbuffer_ptr, } } + system::GraphicsSwapSubmission ac6_submission = {}; + uint64_t ac6_submission_sequence = 0; + graphics_system_->GetLastSwapSubmission(&ac6_submission, &ac6_submission_sequence); + if (!ac6_submission_sequence) { + ac6_submission.frontbuffer_virtual_address = frontbuffer_ptr; + ac6_submission.frontbuffer_width = frontbuffer_width; + ac6_submission.frontbuffer_height = frontbuffer_height; + } + + auto* ac6_vertex_shader = static_cast(active_vertex_shader()); + auto* ac6_pixel_shader = static_cast(active_pixel_shader()); + uint64_t ac6_vertex_shader_hash = + ac6_vertex_shader ? ac6_vertex_shader->ucode_data_hash() : 0; + uint64_t ac6_pixel_shader_hash = + ac6_pixel_shader ? ac6_pixel_shader->ucode_data_hash() : 0; + + ac6::backend::SwapSourceType ac6_swap_source = + ac6::backend::SwapSourceType::kGuestSwapTexture; + if (using_native_swap_texture) { + ac6_swap_source = ac6::backend::SwapSourceType::kExperimentalReplayOverride; + } else if (used_direct_display_fallback) { + ac6_swap_source = ac6::backend::SwapSourceType::kDirectDisplayFallback; + } + + ac6::backend::ReportSwapDecision( + ac6_submission, ac6_submission_sequence, ac6_swap_source, + swap_source_scaled, guest_output_width, guest_output_height, + source_width_scaled, source_height_scaled, ac6_vertex_shader_hash, + ac6_pixel_shader_hash); + system::X_VIDEO_MODE video_mode; kernel::xboxkrnl::VdQueryVideoMode(&video_mode); uint32_t display_width = std::max(uint32_t(1), uint32_t(video_mode.display_width)); @@ -2083,9 +2139,9 @@ bool D3D12CommandProcessor::IssueSwapInternal(uint32_t frontbuffer_ptr, bool refreshed = presenter->RefreshGuestOutput( guest_output_width, guest_output_height, display_width, display_height, - [this, &swap_texture_srv_desc, frontbuffer_format, swap_texture_resource, guest_output_width, + [this, &swap_texture_srv_desc, frontbuffer_format, swap_texture_resource, + using_native_swap_texture, guest_output_width, guest_output_height](ui::Presenter::GuestOutputRefreshContext& context) -> bool { - REXGPU_ERROR("Inside RefreshGuestOutput lambda for fb!"); const ui::d3d12::D3D12Provider& provider = GetD3D12Provider(); ID3D12Device* device = provider.GetDevice(); @@ -2258,6 +2314,11 @@ bool D3D12CommandProcessor::IssueSwapInternal(uint32_t frontbuffer_ptr, apply_gamma_descriptors[1].first); REXGPU_ERROR("RefreshGuestOutput: checkpoint 3.3 - PushTransitionBarrier"); + if (using_native_swap_texture) { + PushTransitionBarrier(swap_texture_resource, + D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, + D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + } PushTransitionBarrier(gamma_ramp_buffer_.Get(), gamma_ramp_buffer_state_, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); gamma_ramp_buffer_state_ = D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; @@ -2389,6 +2450,11 @@ bool D3D12CommandProcessor::IssueSwapInternal(uint32_t frontbuffer_ptr, // Need to submit all the commands before giving the image back to the // presenter so it can submit its own commands for displaying it to the // queue. + if (using_native_swap_texture) { + PushTransitionBarrier(swap_texture_resource, + D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, + D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + } REXGPU_ERROR("RefreshGuestOutput: checkpoint 6 (SubmitBarriers pre-EndSubmission)"); SubmitBarriers(); REXGPU_ERROR("RefreshGuestOutput: checkpoint 7 (EndSubmission)"); @@ -5205,7 +5271,10 @@ ID3D12Resource* rex::graphics::d3d12::D3D12CommandProcessor::UpdateDirectDisplay uint32_t height = frontbuffer_height ? frontbuffer_height : 720; uint32_t bpp = 4; - REXGPU_ERROR("UpdateDirectDisplayTexture: processing fb_ptr={:08X} w={} h={}", frontbuffer_ptr, width, height); + if (REXCVAR_GET(ac6_backend_debug_swap)) { + REXGPU_INFO("UpdateDirectDisplayTexture: fb_ptr={:08X} w={} h={}", frontbuffer_ptr, + width, height); + } bool is_direct_fb = false; uint32_t fb_addr = 0; @@ -5260,7 +5329,9 @@ ID3D12Resource* rex::graphics::d3d12::D3D12CommandProcessor::UpdateDirectDisplay ID3D12Device* device = GetD3D12Provider().GetDevice(); if (!direct_display_texture_) { - REXGPU_ERROR("UpdateDirectDisplayTexture: creating direct_display_texture_"); + if (REXCVAR_GET(ac6_backend_debug_swap)) { + REXGPU_INFO("UpdateDirectDisplayTexture: creating fallback frontbuffer texture"); + } D3D12_RESOURCE_DESC desc = {}; desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; desc.Width = width; @@ -5295,7 +5366,6 @@ ID3D12Resource* rex::graphics::d3d12::D3D12CommandProcessor::UpdateDirectDisplay uint32_t row_pitch = rex::align(width * bpp, uint32_t(D3D12_TEXTURE_DATA_PITCH_ALIGNMENT)); - bool non_black_detected = false; uint32_t chunk_height = 256; for (uint32_t y = 0; y < height; y += chunk_height) { uint32_t rows_to_copy = std::min(height - y, chunk_height); @@ -5315,15 +5385,6 @@ ID3D12Resource* rex::graphics::d3d12::D3D12CommandProcessor::UpdateDirectDisplay std::memcpy(upload_mapping + cy * row_pitch, src_ptr, width * bpp); - if (!non_black_detected) { - uint32_t* p = reinterpret_cast(src_ptr); - for (uint32_t x = 0; x < width; ++x) { - if ((p[x] & 0xFFFFFF) != 0) { - non_black_detected = true; - break; - } - } - } } } @@ -5349,20 +5410,10 @@ ID3D12Resource* rex::graphics::d3d12::D3D12CommandProcessor::UpdateDirectDisplay } } - if (non_black_detected) { - static bool logged_non_black = false; - if (!logged_non_black) { - REXGPU_ERROR("Non-black frame detected"); - logged_non_black = true; - } - } - PushTransitionBarrier(direct_display_texture_.Get(), direct_display_texture_state_, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); direct_display_texture_state_ = D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; SubmitBarriers(); - REXGPU_ERROR("UpdateDirectDisplayTexture: SUCCESS for fb={:08X}", fb_addr); - srv_desc_out.Format = DXGI_FORMAT_R8G8B8A8_UNORM; srv_desc_out.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; srv_desc_out.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; diff --git a/thirdparty/rexglue-sdk/src/graphics/graphics_system.cpp b/thirdparty/rexglue-sdk/src/graphics/graphics_system.cpp index 0f9cdcf5..f9742d5b 100644 --- a/thirdparty/rexglue-sdk/src/graphics/graphics_system.cpp +++ b/thirdparty/rexglue-sdk/src/graphics/graphics_system.cpp @@ -45,6 +45,43 @@ REXCVAR_DEFINE_STRING(swap_post_effect, "none", "GPU", "Swap post effect: none, namespace { constexpr bool kStoreShaders = true; +bool HasMeaningfulTextureFetch(const rex::system::GraphicsSwapSubmission& submission) { + for (uint32_t word : submission.texture_fetch) { + if (word != 0) { + return true; + } + } + return false; +} + +rex::system::GraphicsSwapSubmission MergeSwapSubmission( + const rex::system::GraphicsSwapSubmission& base, + const rex::system::GraphicsSwapSubmission& incoming) { + rex::system::GraphicsSwapSubmission merged = base; + if (incoming.frontbuffer_virtual_address) { + merged.frontbuffer_virtual_address = incoming.frontbuffer_virtual_address; + } + if (incoming.frontbuffer_physical_address) { + merged.frontbuffer_physical_address = incoming.frontbuffer_physical_address; + } + if (incoming.frontbuffer_width) { + merged.frontbuffer_width = incoming.frontbuffer_width; + } + if (incoming.frontbuffer_height) { + merged.frontbuffer_height = incoming.frontbuffer_height; + } + if (incoming.texture_format) { + merged.texture_format = incoming.texture_format; + } + if (incoming.color_space) { + merged.color_space = incoming.color_space; + } + if (HasMeaningfulTextureFetch(incoming)) { + merged.texture_fetch = incoming.texture_fetch; + } + return merged; +} + rex::graphics::CommandProcessor::SwapPostEffect ParseSwapPostEffect( const std::string& effect_name) { std::string lowered = effect_name; @@ -284,11 +321,34 @@ void GraphicsSystem::SetInterruptCallback(uint32_t callback, uint32_t user_data) REXGPU_INFO("SetInterruptCallback({:08X}, {:08X})", callback, user_data); } +void GraphicsSystem::SetFrameBoundaryCallback(std::function callback) { + frame_boundary_callback_ = std::move(callback); +} + bool GraphicsSystem::HandleVideoSwap(const system::GraphicsSwapSubmission& submission) { - (void)submission; + { + std::lock_guard lock(last_swap_submission_mutex_); + last_swap_submission_ = MergeSwapSubmission(last_swap_submission_, submission); + ++last_swap_submission_sequence_; + } + if (frame_boundary_callback_) { + frame_boundary_callback_(memory_); + } return false; } +bool GraphicsSystem::GetLastSwapSubmission(system::GraphicsSwapSubmission* out_submission, + uint64_t* out_sequence) const { + std::lock_guard lock(last_swap_submission_mutex_); + if (out_submission) { + *out_submission = last_swap_submission_; + } + if (out_sequence) { + *out_sequence = last_swap_submission_sequence_; + } + return last_swap_submission_sequence_ != 0; +} + void GraphicsSystem::DispatchInterruptCallback(uint32_t source, uint32_t cpu) { if (!interrupt_callback_) { return; diff --git a/thirdparty/rexglue-sdk/src/native/ui/windowed_app_main_win.cpp b/thirdparty/rexglue-sdk/src/native/ui/windowed_app_main_win.cpp index fcd337f8..5786df44 100644 --- a/thirdparty/rexglue-sdk/src/native/ui/windowed_app_main_win.cpp +++ b/thirdparty/rexglue-sdk/src/native/ui/windowed_app_main_win.cpp @@ -60,6 +60,11 @@ int WINAPI wWinMain(HINSTANCE hinstance, HINSTANCE hinstance_prev, LPWSTR comman auto remaining = rex::cvar::Init(static_cast(argv_ptrs.size()), argv_ptrs.data()); rex::cvar::ApplyEnvironment(); + // Force logging to a file immediately + auto log_config = rex::BuildLogConfig("ac6_boot.log", "info", {}); + rex::InitLogging(log_config); + REXLOG_INFO("wWinMain started"); + // Allocate a console for debugging if enabled if (REXCVAR_GET(enable_console)) { AllocConsole(); @@ -73,14 +78,19 @@ int WINAPI wWinMain(HINSTANCE hinstance, HINSTANCE hinstance_prev, LPWSTR comman int result; { + REXLOG_INFO("wWinMain: Creating Win32WindowedAppContext..."); rex::ui::Win32WindowedAppContext app_context(hinstance, show_cmd); // TODO(Triang3l): Initialize creates a window. Set DPI awareness via the // manifest. + REXLOG_INFO("wWinMain: Initializing app context..."); if (!app_context.Initialize()) { + REXLOG_ERROR("wWinMain: app_context.Initialize failed"); return EXIT_FAILURE; } + REXLOG_INFO("wWinMain: Getting app creator..."); std::unique_ptr app = rex::ui::GetWindowedAppCreator()(app_context); + REXLOG_INFO("wWinMain: App instance created"); // Match remaining positional args to app's expected options const auto& option_names = app->GetPositionalOptions(); @@ -93,14 +103,24 @@ int WINAPI wWinMain(HINSTANCE hinstance, HINSTANCE hinstance_prev, LPWSTR comman // Initialize COM on the UI thread with the apartment-threaded concurrency // model, so dialogs can be used. + REXLOG_INFO("wWinMain: Initializing COM..."); if (FAILED(CoInitializeEx(nullptr, COINIT_APARTMENTTHREADED))) { + REXLOG_ERROR("wWinMain: CoInitializeEx failed"); return EXIT_FAILURE; } // TODO: Port InitializeWin32App from Xenia // rex::InitializeWin32App(app->GetName()); - result = app->OnInitialize() ? app_context.RunMainMessageLoop() : EXIT_FAILURE; + REXLOG_INFO("wWinMain: Calling app->OnInitialize()..."); + if (!app->OnInitialize()) { + REXLOG_ERROR("wWinMain: app->OnInitialize failed"); + return EXIT_FAILURE; + } + + REXLOG_INFO("wWinMain: Entering main message loop..."); + result = app_context.RunMainMessageLoop(); + REXLOG_INFO("wWinMain: Main message loop exited with result {}", result); app->InvokeOnDestroy(); }