From e0cf1eea0346918d6da64cecb095f8b91b6b8e6c Mon Sep 17 00:00:00 2001 From: salh Date: Sat, 18 Apr 2026 10:34:53 +0300 Subject: [PATCH] Add execution plan and replay executor scaffolding --- CMakeLists.txt | 2 + src/Milestone.md | 57 ++++++ src/ac6_native_graphics.cpp | 2 + src/ac6_native_graphics.h | 4 + src/ac6_native_graphics_overlay.cpp | 14 ++ src/ac6_native_renderer/execution_plan.cpp | 192 ++++++++++++++++++++ src/ac6_native_renderer/execution_plan.h | 110 +++++++++++ src/ac6_native_renderer/native_renderer.cpp | 74 +++----- src/ac6_native_renderer/native_renderer.h | 10 + src/ac6_native_renderer/replay_executor.cpp | 175 ++++++++++++++++++ src/ac6_native_renderer/replay_executor.h | 82 +++++++++ 11 files changed, 678 insertions(+), 44 deletions(-) create mode 100644 src/Milestone.md create mode 100644 src/ac6_native_renderer/execution_plan.cpp create mode 100644 src/ac6_native_renderer/execution_plan.h create mode 100644 src/ac6_native_renderer/replay_executor.cpp create mode 100644 src/ac6_native_renderer/replay_executor.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 92b8a82c..0b72d389 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,8 +23,10 @@ set(AC6RECOMP_SOURCES src/ac6_native_graphics.cpp src/ac6_native_graphics_overlay.cpp src/ac6_native_renderer/ac6_render_frontend.cpp + src/ac6_native_renderer/execution_plan.cpp src/ac6_native_renderer/frame_plan.cpp src/ac6_native_renderer/replay_ir.cpp + src/ac6_native_renderer/replay_executor.cpp src/ac6_native_renderer/backends/backend_factory.cpp src/ac6_native_renderer/backends/d3d12_backend.cpp src/ac6_native_renderer/backends/metal_backend.cpp diff --git a/src/Milestone.md b/src/Milestone.md new file mode 100644 index 00000000..abae93e1 --- /dev/null +++ b/src/Milestone.md @@ -0,0 +1,57 @@ +Roadmap + +- Milestone 1: Lock down capture analysis by preserving replay-shaped commands inside each observed pass, then expose counts in debug UI. +- Milestone 2: Introduce a backend-agnostic replay IR that converts pass commands into explicit draw/clear/resolve execution packets. +- Milestone 3: Implement the real D3D12 backend path first: device, queue, allocators, fences, frame slots, and present. +- Milestone 4: Add guest-to-host resource translation for RTs, depth, textures, vertex/index buffers, and fetch constants. +- Milestone 5: Add pipeline/shader translation and PSO caching, then target first visible native output from one selected pass. +- Milestone 6: Add parity validation mode, capture-based comparisons, and rollout gates for bootstrap -> scene_submission -> parity_validation -> shipping. + +Completed + +- Milestone 1 is complete. +- Milestone 2 is now in place at the data-model level. + +Work Completed + +- Added a backend-agnostic observed command model with `ObservedCommandType` and `ObservedCommandDesc` in `ac6_render_frontend.h`. +- Extended each observed pass to retain its ordered command list in `ac6_render_frontend.h`. +- Updated frontend capture processing to materialize per-command draw, clear, and resolve records while preserving pass grouping in `ac6_render_frontend.cpp`. +- Added `total_command_count` to the frontend summary so the runtime can report more than just pass counts. +- Wired the frontend summary into runtime status in `ac6_native_graphics.h` and `ac6_native_graphics.cpp`. +- Surfaced frontend pass/command counts in `ac6_native_graphics_overlay.cpp`. +- Added a new replay IR layer in `replay_ir.h` and `replay_ir.cpp`. +- Introduced `ReplayPassRole`, `ReplayCommandDesc`, `ReplayPassDesc`, `ReplayFrameSummary`, and `ReplayFrame`. +- Added `ReplayIrBuilder` so the renderer can build a replay frame from frontend passes plus the frame plan. +- Added a new execution-plan layer in `execution_plan.h` and `execution_plan.cpp`. +- Introduced `ExecutionCommandCategory`, `ExecutionCommandPacket`, `ExecutionResourceRequirements`, `ExecutionPassPacket`, `ExecutionFrameSummary`, and `ExecutionFramePlan`. +- Added `ExecutionPlanBuilder` so the renderer can derive backend-ready pass packets from `ReplayFrame` plus frame-plan hints. +- Added a new replay-executor layer in `replay_executor.h` and `replay_executor.cpp`. +- Introduced `SubmissionQueueType`, `ReplayExecutorCommandPacket`, `ReplayExecutorPassPacket`, `ReplayExecutorFrameSummary`, and `ReplayExecutorFrame`. +- Added `ReplayExecutorPlanBuilder` so the renderer can derive submission-oriented pass packets from `ExecutionFramePlan`. +- Updated `NativeRenderer` to build replay IR first, then execution plan, then replay-executor packets, then derive the current `RenderGraph` from executor passes. +- Exposed replay summary data through `ac6_native_graphics.h` and `ac6_native_graphics.cpp`. +- Exposed execution-plan summary data through `ac6_native_graphics.h` and `ac6_native_graphics.cpp`. +- Exposed replay-executor summary data through `ac6_native_graphics.h` and `ac6_native_graphics.cpp`. +- Surfaced replay, execution, and executor pass/command counts plus output-present state in `ac6_native_graphics_overlay.cpp`. +- Updated `CMakeLists.txt` to compile `replay_ir.cpp`, `execution_plan.cpp`, and `replay_executor.cpp`. + +Why This Matters + +- The renderer no longer stops at pass heuristics alone; it now carries replay IR, execution-plan, and executor artifacts forward. +- This creates the bridge between capture analysis and future backend execution without forcing full D3D12 command-list submission too early. +- The execution plan tracks stable per-pass resource requirements and command categories, while the replay executor now shapes queue-ready submission packets. +- The overlay now shows whether frontend analysis, replay IR, execution planning, and executor shaping stay aligned frame to frame. + +Verification + +- VS Code diagnostics are clean for the edited files. +- Full preset build verification is still blocked by an existing Ninja build-tree issue: `Re-checking globbed directories... ninja: fatal: GetOverlappedResult: The operation completed successfully.` +- A fresh scratch configure also cannot complete in the current terminal environment because no C++ compiler is available in `PATH`. +- I did not see source-level diagnostics from the replay-executor changes themselves. + +Next Step + +- Start consuming `ReplayExecutorFrame` in a real D3D12 submission path instead of only translating it back into `RenderGraph`. +- Add guest-to-host resource translation for executor packets: render targets, depth, textures, vertex/index buffers, and fetch constants. +- Add D3D12-side placeholders for PSO binding, descriptor setup, and barrier/state transitions while the executor contract stabilizes. diff --git a/src/ac6_native_graphics.cpp b/src/ac6_native_graphics.cpp index 474d762a..be558549 100644 --- a/src/ac6_native_graphics.cpp +++ b/src/ac6_native_graphics.cpp @@ -92,6 +92,8 @@ void UpdateStatusFromRendererUnlocked() { g_runtime_status.active_backend = g_runtime_status.renderer_stats.active_backend; g_runtime_status.frontend_summary = g_native_renderer.frontend_summary(); g_runtime_status.replay_summary = g_native_renderer.replay_summary(); + g_runtime_status.execution_summary = g_native_renderer.execution_summary(); + g_runtime_status.executor_summary = g_native_renderer.executor_summary(); g_runtime_status.frame_plan = g_native_renderer.frame_plan(); } diff --git a/src/ac6_native_graphics.h b/src/ac6_native_graphics.h index 2c447da7..89d24c38 100644 --- a/src/ac6_native_graphics.h +++ b/src/ac6_native_graphics.h @@ -3,8 +3,10 @@ #include #include "ac6_native_renderer/ac6_render_frontend.h" +#include "ac6_native_renderer/execution_plan.h" #include "ac6_native_renderer/frame_plan.h" #include "ac6_native_renderer/replay_ir.h" +#include "ac6_native_renderer/replay_executor.h" #include "ac6_native_renderer/types.h" #include "d3d_state.h" @@ -23,6 +25,8 @@ struct NativeGraphicsRuntimeStatus { ac6::renderer::NativeRendererStats renderer_stats{}; ac6::renderer::FrontendFrameSummary frontend_summary{}; ac6::renderer::ReplayFrameSummary replay_summary{}; + ac6::renderer::ExecutionFrameSummary execution_summary{}; + ac6::renderer::ReplayExecutorFrameSummary executor_summary{}; ac6::d3d::FrameCaptureSummary capture_summary{}; ac6::renderer::NativeFramePlan frame_plan{}; }; diff --git a/src/ac6_native_graphics_overlay.cpp b/src/ac6_native_graphics_overlay.cpp index 5a394e7a..e6071e4c 100644 --- a/src/ac6_native_graphics_overlay.cpp +++ b/src/ac6_native_graphics_overlay.cpp @@ -41,6 +41,10 @@ void NativeGraphicsStatusDialog::OnDraw(ImGuiIO& io) { status.frontend_summary.total_command_count); ImGui::Text("replay passes/commands: %u / %u", status.replay_summary.pass_count, status.replay_summary.command_count); + ImGui::Text("execution passes/commands: %u / %u", + status.execution_summary.pass_count, status.execution_summary.command_count); + ImGui::Text("executor passes/commands: %u / %u", + status.executor_summary.pass_count, status.executor_summary.command_count); ImGui::Separator(); ImGui::Text("capture frame: %llu", static_cast(status.capture_summary.frame_index)); @@ -71,6 +75,16 @@ void NativeGraphicsStatusDialog::OnDraw(ImGuiIO& io) { ImGui::Text("replay output/present: %ux%u / %s", status.replay_summary.output_width, status.replay_summary.output_height, status.replay_summary.has_present_pass ? "yes" : "no"); + ImGui::Text("execution output/present: %ux%u / %s", + status.execution_summary.output_width, status.execution_summary.output_height, + status.execution_summary.has_present_pass ? "yes" : "no"); + ImGui::Text("executor output/present: %ux%u / %s", + status.executor_summary.output_width, status.executor_summary.output_height, + status.executor_summary.has_present_pass ? "yes" : "no"); + ImGui::Text("executor graphics/present/resource: %u / %u / %u", + status.executor_summary.graphics_pass_count, + status.executor_summary.present_pass_count, + status.executor_summary.resource_translation_pass_count); ImGui::Text("stages scene/post/ui: %s / %s / %s", status.frame_plan.has_scene_stage ? "yes" : "no", status.frame_plan.has_post_process_stage ? "yes" : "no", diff --git a/src/ac6_native_renderer/execution_plan.cpp b/src/ac6_native_renderer/execution_plan.cpp new file mode 100644 index 00000000..e1a1f08a --- /dev/null +++ b/src/ac6_native_renderer/execution_plan.cpp @@ -0,0 +1,192 @@ +#include "execution_plan.h" + +#include +#include + +namespace ac6::renderer { +namespace { + +ExecutionCommandCategory ToExecutionCommandCategory(ObservedCommandType type) { + switch (type) { + case ObservedCommandType::kDraw: + return ExecutionCommandCategory::kDraw; + case ObservedCommandType::kClear: + return ExecutionCommandCategory::kClear; + case ObservedCommandType::kResolve: + return ExecutionCommandCategory::kResolve; + default: + return ExecutionCommandCategory::kNone; + } +} + +ExecutionCommandPacket BuildExecutionCommandPacket(const ReplayCommandDesc& command, + uint32_t replay_pass_index, + uint32_t replay_command_index) { + return ExecutionCommandPacket{ + .category = ToExecutionCommandCategory(command.type), + .source_type = command.type, + .replay_pass_index = replay_pass_index, + .replay_command_index = replay_command_index, + .sequence = command.sequence, + .draw_kind = command.draw_kind, + .primitive_type = command.primitive_type, + .start = command.start, + .count = command.count, + .flags = command.flags, + .rect_count = command.rect_count, + .captured_rect_count = command.captured_rect_count, + .color = command.color, + .stencil = command.stencil, + .depth = command.depth, + .texture_count = command.texture_count, + .stream_count = command.stream_count, + .sampler_count = command.sampler_count, + .fetch_constant_count = command.fetch_constant_count, + .render_target_0 = command.render_target_0, + .depth_stencil = command.depth_stencil, + .viewport_x = command.viewport_x, + .viewport_y = command.viewport_y, + .viewport_width = command.viewport_width, + .viewport_height = command.viewport_height, + }; +} + +void AccumulateResourceRequirements(ExecutionResourceRequirements& resources, + const ExecutionCommandPacket& command) { + resources.needs_render_target |= command.render_target_0 != 0; + resources.needs_depth_stencil |= command.depth_stencil != 0; + resources.max_texture_count = + std::max(resources.max_texture_count, command.texture_count); + resources.max_stream_count = + std::max(resources.max_stream_count, command.stream_count); + resources.max_sampler_count = + std::max(resources.max_sampler_count, command.sampler_count); + resources.max_fetch_constant_count = + std::max(resources.max_fetch_constant_count, command.fetch_constant_count); + resources.max_viewport_width = + std::max(resources.max_viewport_width, command.viewport_width); + resources.max_viewport_height = + std::max(resources.max_viewport_height, command.viewport_height); + + if (command.category != ExecutionCommandCategory::kDraw) { + return; + } + + resources.needs_vertex_streams |= command.stream_count != 0; + resources.needs_index_buffer |= command.draw_kind != ac6::d3d::DrawCallKind::kPrimitive; + resources.needs_textures |= command.texture_count != 0; + resources.needs_samplers |= command.sampler_count != 0; + resources.needs_fetch_constants |= command.fetch_constant_count != 0; +} + +ExecutionPassPacket BuildExecutionPassPacket(const ReplayPassDesc& replay_pass, + uint32_t replay_pass_index, + const NativeFramePlan& frame_plan) { + ExecutionPassPacket pass_packet; + pass_packet.name = replay_pass.name; + pass_packet.role = replay_pass.role; + pass_packet.replay_pass_valid = true; + pass_packet.replay_pass_index = replay_pass_index; + pass_packet.source_pass_valid = replay_pass.source_pass_valid; + pass_packet.source_pass_index = replay_pass.source_pass_index; + pass_packet.draw_count = replay_pass.draw_count; + pass_packet.clear_count = replay_pass.clear_count; + pass_packet.resolve_count = replay_pass.resolve_count; + pass_packet.render_target_0 = replay_pass.render_target_0; + pass_packet.depth_stencil = replay_pass.depth_stencil; + pass_packet.output_width = replay_pass.viewport_width; + pass_packet.output_height = replay_pass.viewport_height; + pass_packet.selected_for_present = replay_pass.selected_for_present; + pass_packet.commands.reserve(replay_pass.commands.size()); + + if (pass_packet.role == ReplayPassRole::kPresent) { + pass_packet.output_width = frame_plan.output_width; + pass_packet.output_height = frame_plan.output_height; + } + + for (uint32_t i = 0; i < replay_pass.commands.size(); ++i) { + ExecutionCommandPacket command_packet = + BuildExecutionCommandPacket(replay_pass.commands[i], replay_pass_index, i); + AccumulateResourceRequirements(pass_packet.resources, command_packet); + pass_packet.commands.push_back(std::move(command_packet)); + } + + pass_packet.resources.needs_render_target |= pass_packet.render_target_0 != 0; + pass_packet.resources.needs_depth_stencil |= pass_packet.depth_stencil != 0; + pass_packet.resources.max_viewport_width = + std::max(pass_packet.resources.max_viewport_width, pass_packet.output_width); + pass_packet.resources.max_viewport_height = + std::max(pass_packet.resources.max_viewport_height, pass_packet.output_height); + return pass_packet; +} + +void AccumulateSummary(ExecutionFrameSummary& summary, + const ExecutionPassPacket& pass_packet) { + ++summary.pass_count; + summary.command_count += static_cast(pass_packet.commands.size()); + summary.draw_packet_count += pass_packet.draw_count; + summary.clear_packet_count += pass_packet.clear_count; + summary.resolve_packet_count += pass_packet.resolve_count; + if (pass_packet.role == ReplayPassRole::kPresent) { + ++summary.present_pass_count; + summary.has_present_pass = true; + } + summary.valid = summary.pass_count != 0; +} + +} // namespace + +const char* ToString(ExecutionCommandCategory category) { + switch (category) { + case ExecutionCommandCategory::kDraw: + return "draw"; + case ExecutionCommandCategory::kClear: + return "clear"; + case ExecutionCommandCategory::kResolve: + return "resolve"; + case ExecutionCommandCategory::kNone: + default: + return "none"; + } +} + +ExecutionFramePlan ExecutionPlanBuilder::BuildBootstrapPlan(uint64_t frame_index) const { + ExecutionFramePlan plan; + plan.summary.frame_index = frame_index; + + ExecutionPassPacket bootstrap_pass; + bootstrap_pass.name = "ac6.execution.bootstrap"; + bootstrap_pass.role = ReplayPassRole::kBootstrap; + + AccumulateSummary(plan.summary, bootstrap_pass); + plan.passes.push_back(std::move(bootstrap_pass)); + return plan; +} + +ExecutionFramePlan ExecutionPlanBuilder::Build( + const ReplayFrame& replay_frame, const NativeFramePlan& frame_plan) const { + ExecutionFramePlan plan; + plan.summary.frame_index = replay_frame.summary.frame_index; + plan.summary.output_width = replay_frame.summary.output_width; + plan.summary.output_height = replay_frame.summary.output_height; + + if (!replay_frame.summary.valid || replay_frame.passes.empty()) { + return plan; + } + + plan.passes.reserve(replay_frame.passes.size()); + for (uint32_t i = 0; i < replay_frame.passes.size(); ++i) { + ExecutionPassPacket pass_packet = + BuildExecutionPassPacket(replay_frame.passes[i], i, frame_plan); + AccumulateSummary(plan.summary, pass_packet); + plan.passes.push_back(std::move(pass_packet)); + } + + plan.summary.valid = + plan.summary.pass_count != 0 && + (!frame_plan.valid || + (plan.summary.output_width != 0 && plan.summary.output_height != 0)); + return plan; +} + +} // namespace ac6::renderer diff --git a/src/ac6_native_renderer/execution_plan.h b/src/ac6_native_renderer/execution_plan.h new file mode 100644 index 00000000..3fc616a3 --- /dev/null +++ b/src/ac6_native_renderer/execution_plan.h @@ -0,0 +1,110 @@ +#pragma once + +#include +#include +#include + +#include "frame_plan.h" +#include "replay_ir.h" + +namespace ac6::renderer { + +enum class ExecutionCommandCategory : uint8_t { + kNone = 0, + kDraw = 1, + kClear = 2, + kResolve = 3, +}; + +struct ExecutionCommandPacket { + ExecutionCommandCategory category = ExecutionCommandCategory::kNone; + ObservedCommandType source_type = ObservedCommandType::kDraw; + uint32_t replay_pass_index = 0; + uint32_t replay_command_index = 0; + uint32_t sequence = 0; + ac6::d3d::DrawCallKind draw_kind = ac6::d3d::DrawCallKind::kIndexed; + uint32_t primitive_type = 0; + uint32_t start = 0; + uint32_t count = 0; + uint32_t flags = 0; + uint32_t rect_count = 0; + uint32_t captured_rect_count = 0; + uint32_t color = 0; + uint32_t stencil = 0; + float depth = 1.0f; + uint32_t texture_count = 0; + uint32_t stream_count = 0; + uint32_t sampler_count = 0; + uint32_t fetch_constant_count = 0; + uint32_t render_target_0 = 0; + uint32_t depth_stencil = 0; + uint32_t viewport_x = 0; + uint32_t viewport_y = 0; + uint32_t viewport_width = 0; + uint32_t viewport_height = 0; +}; + +struct ExecutionResourceRequirements { + bool needs_render_target = false; + bool needs_depth_stencil = false; + bool needs_vertex_streams = false; + bool needs_index_buffer = false; + bool needs_textures = false; + bool needs_samplers = false; + bool needs_fetch_constants = false; + uint32_t max_texture_count = 0; + uint32_t max_stream_count = 0; + uint32_t max_sampler_count = 0; + uint32_t max_fetch_constant_count = 0; + uint32_t max_viewport_width = 0; + uint32_t max_viewport_height = 0; +}; + +struct ExecutionPassPacket { + std::string name; + ReplayPassRole role = ReplayPassRole::kUnknown; + bool replay_pass_valid = false; + uint32_t replay_pass_index = 0; + bool source_pass_valid = false; + uint32_t source_pass_index = 0; + uint32_t draw_count = 0; + uint32_t clear_count = 0; + uint32_t resolve_count = 0; + uint32_t render_target_0 = 0; + uint32_t depth_stencil = 0; + uint32_t output_width = 0; + uint32_t output_height = 0; + bool selected_for_present = false; + ExecutionResourceRequirements resources{}; + std::vector commands; +}; + +struct ExecutionFrameSummary { + bool valid = false; + uint64_t frame_index = 0; + uint32_t pass_count = 0; + uint32_t command_count = 0; + uint32_t draw_packet_count = 0; + uint32_t clear_packet_count = 0; + uint32_t resolve_packet_count = 0; + uint32_t present_pass_count = 0; + uint32_t output_width = 0; + uint32_t output_height = 0; + bool has_present_pass = false; +}; + +struct ExecutionFramePlan { + ExecutionFrameSummary summary{}; + std::vector passes; +}; + +class ExecutionPlanBuilder { + public: + ExecutionFramePlan BuildBootstrapPlan(uint64_t frame_index) const; + ExecutionFramePlan Build(const ReplayFrame& replay_frame, + const NativeFramePlan& frame_plan) const; +}; + +const char* ToString(ExecutionCommandCategory category); + +} // namespace ac6::renderer diff --git a/src/ac6_native_renderer/native_renderer.cpp b/src/ac6_native_renderer/native_renderer.cpp index 2a8e18e1..8a79e40b 100644 --- a/src/ac6_native_renderer/native_renderer.cpp +++ b/src/ac6_native_renderer/native_renderer.cpp @@ -7,20 +7,6 @@ namespace ac6::renderer { namespace { -RenderPassKind ToRenderPassKind(ObservedPassKind kind) { - switch (kind) { - case ObservedPassKind::kScene: - return RenderPassKind::kScene; - case ObservedPassKind::kPostProcess: - return RenderPassKind::kPostProcess; - case ObservedPassKind::kUiComposite: - return RenderPassKind::kUiComposite; - case ObservedPassKind::kUnknown: - default: - return RenderPassKind::kUnknown; - } -} - RenderPassKind ToRenderPassKind(ReplayPassRole role) { switch (role) { case ReplayPassRole::kScene: @@ -38,6 +24,22 @@ RenderPassKind ToRenderPassKind(ReplayPassRole role) { } } +RenderPassDesc BuildRenderPassDesc(const ReplayExecutorPassPacket& pass) { + return RenderPassDesc{ + .name = pass.name, + .kind = ToRenderPassKind(pass.role), + .async_compute = false, + .draw_count = pass.draw_count, + .clear_count = pass.clear_count, + .resolve_count = pass.resolve_count, + .render_target_0 = pass.render_target_0, + .depth_stencil = pass.depth_stencil, + .viewport_width = pass.output_width, + .viewport_height = pass.output_height, + .selected_for_present = pass.selected_for_present, + }; +} + } // namespace NativeRenderer::NativeRenderer() = default; @@ -70,6 +72,8 @@ void NativeRenderer::Shutdown() { frontend_.Reset(); frame_plan_ = {}; replay_frame_ = {}; + execution_plan_ = {}; + executor_frame_ = {}; stats_ = {}; } @@ -89,23 +93,13 @@ void NativeRenderer::BuildBootstrapFrame() { frame_plan_ = {}; replay_frame_ = replay_builder_.BuildBootstrapFrame(scheduler_.frame_index()); + execution_plan_ = execution_builder_.BuildBootstrapPlan(scheduler_.frame_index()); + executor_frame_ = executor_builder_.BuildBootstrapFrame(scheduler_.frame_index()); // Phase-1: do not present. Build a minimal graph to prove deterministic // ownership without touching Rexglue emulation paths. - for (const ReplayPassDesc& pass : replay_frame_.passes) { - graph_.AddPass(RenderPassDesc{ - .name = pass.name, - .kind = ToRenderPassKind(pass.role), - .async_compute = false, - .draw_count = pass.draw_count, - .clear_count = pass.clear_count, - .resolve_count = pass.resolve_count, - .render_target_0 = pass.render_target_0, - .depth_stencil = pass.depth_stencil, - .viewport_width = pass.viewport_width, - .viewport_height = pass.viewport_height, - .selected_for_present = pass.selected_for_present, - }); + for (const ReplayExecutorPassPacket& pass : executor_frame_.passes) { + graph_.AddPass(BuildRenderPassDesc(pass)); } stats_.built_pass_count += graph_.pass_count(); @@ -127,28 +121,20 @@ void NativeRenderer::BuildCapturedFrame( frame_plan_ = planner_.Build(summary, frontend_.passes()); replay_frame_ = replay_builder_.Build(summary, frontend_.passes(), frame_plan_); + execution_plan_ = execution_builder_.Build(replay_frame_, frame_plan_); + executor_frame_ = executor_builder_.Build(execution_plan_); - for (const ReplayPassDesc& pass : replay_frame_.passes) { - graph_.AddPass(RenderPassDesc{ - .name = pass.name, - .kind = ToRenderPassKind(pass.role), - .async_compute = false, - .draw_count = pass.draw_count, - .clear_count = pass.clear_count, - .resolve_count = pass.resolve_count, - .render_target_0 = pass.render_target_0, - .depth_stencil = pass.depth_stencil, - .viewport_width = pass.viewport_width, - .viewport_height = pass.viewport_height, - .selected_for_present = pass.selected_for_present, - }); + for (const ReplayExecutorPassPacket& pass : executor_frame_.passes) { + graph_.AddPass(BuildRenderPassDesc(pass)); } stats_.built_pass_count += graph_.pass_count(); REXLOG_TRACE( - "AC6 native renderer observed frame={} frontend_passes={} replay_passes={} replay_commands={} selected={} draws={} clears={} resolves={} plan_valid={} out={}x{}", + "AC6 native renderer observed frame={} frontend_passes={} replay_passes={} replay_commands={} execution_passes={} execution_commands={} executor_passes={} executor_commands={} selected={} draws={} clears={} resolves={} plan_valid={} out={}x{}", summary.frame_index, summary.pass_count, replay_frame_.summary.pass_count, - replay_frame_.summary.command_count, summary.selected_pass_index, + replay_frame_.summary.command_count, execution_plan_.summary.pass_count, + execution_plan_.summary.command_count, executor_frame_.summary.pass_count, + executor_frame_.summary.command_count, summary.selected_pass_index, summary.total_draw_count, summary.total_clear_count, summary.total_resolve_count, frame_plan_.valid, frame_plan_.output_width, frame_plan_.output_height); diff --git a/src/ac6_native_renderer/native_renderer.h b/src/ac6_native_renderer/native_renderer.h index 2ad14705..56456888 100644 --- a/src/ac6_native_renderer/native_renderer.h +++ b/src/ac6_native_renderer/native_renderer.h @@ -3,9 +3,11 @@ #include #include "ac6_render_frontend.h" +#include "execution_plan.h" #include "frame_scheduler.h" #include "frame_plan.h" #include "replay_ir.h" +#include "replay_executor.h" #include "render_device.h" #include "render_graph.h" #include "types.h" @@ -38,6 +40,10 @@ class NativeRenderer { NativeFramePlan frame_plan() const { return frame_plan_; } ReplayFrameSummary replay_summary() const { return replay_frame_.summary; } const ReplayFrame& replay_frame() const { return replay_frame_; } + ExecutionFrameSummary execution_summary() const { return execution_plan_.summary; } + const ExecutionFramePlan& execution_plan() const { return execution_plan_; } + ReplayExecutorFrameSummary executor_summary() const { return executor_frame_.summary; } + const ReplayExecutorFrame& executor_frame() const { return executor_frame_; } private: NativeRendererConfig config_{}; @@ -48,8 +54,12 @@ class NativeRenderer { Ac6RenderFrontend frontend_{}; FramePlanner planner_{}; ReplayIrBuilder replay_builder_{}; + ExecutionPlanBuilder execution_builder_{}; + ReplayExecutorPlanBuilder executor_builder_{}; NativeFramePlan frame_plan_{}; ReplayFrame replay_frame_{}; + ExecutionFramePlan execution_plan_{}; + ReplayExecutorFrame executor_frame_{}; }; } // namespace ac6::renderer diff --git a/src/ac6_native_renderer/replay_executor.cpp b/src/ac6_native_renderer/replay_executor.cpp new file mode 100644 index 00000000..13a9fe98 --- /dev/null +++ b/src/ac6_native_renderer/replay_executor.cpp @@ -0,0 +1,175 @@ +#include "replay_executor.h" + +#include + +namespace ac6::renderer { +namespace { + +SubmissionQueueType SelectQueue(const ExecutionPassPacket& pass) { + (void)pass; + // Current scaffold keeps all work on the graphics queue until backend + // implementations can prove safe async-compute or copy splits. + return SubmissionQueueType::kGraphics; +} + +ReplayExecutorCommandPacket BuildExecutorCommandPacket( + const ExecutionCommandPacket& command, uint32_t execution_pass_index, + uint32_t execution_command_index) { + const bool is_draw = command.category == ExecutionCommandCategory::kDraw; + return ReplayExecutorCommandPacket{ + .category = command.category, + .execution_pass_index = execution_pass_index, + .execution_command_index = execution_command_index, + .sequence = command.sequence, + .requires_resource_translation = + command.render_target_0 != 0 || command.depth_stencil != 0 || + command.texture_count != 0 || command.stream_count != 0 || + command.fetch_constant_count != 0, + .requires_pipeline_state = is_draw, + .requires_descriptor_setup = + is_draw && + (command.texture_count != 0 || command.sampler_count != 0 || + command.fetch_constant_count != 0), + .touches_render_target = command.render_target_0 != 0, + .touches_depth_stencil = command.depth_stencil != 0, + }; +} + +ReplayExecutorPassPacket BuildExecutorPassPacket(const ExecutionPassPacket& pass, + uint32_t execution_pass_index) { + ReplayExecutorPassPacket executor_pass; + executor_pass.name = pass.name; + executor_pass.role = pass.role; + executor_pass.queue = SelectQueue(pass); + executor_pass.execution_pass_valid = true; + executor_pass.execution_pass_index = execution_pass_index; + executor_pass.draw_count = pass.draw_count; + executor_pass.clear_count = pass.clear_count; + executor_pass.resolve_count = pass.resolve_count; + executor_pass.render_target_0 = pass.render_target_0; + executor_pass.depth_stencil = pass.depth_stencil; + executor_pass.output_width = pass.output_width; + executor_pass.output_height = pass.output_height; + executor_pass.selected_for_present = pass.selected_for_present; + executor_pass.requires_present = pass.selected_for_present; + executor_pass.resources = pass.resources; + executor_pass.commands.reserve(pass.commands.size()); + + for (uint32_t i = 0; i < pass.commands.size(); ++i) { + ReplayExecutorCommandPacket command_packet = + BuildExecutorCommandPacket(pass.commands[i], execution_pass_index, i); + executor_pass.requires_resource_translation |= + command_packet.requires_resource_translation; + executor_pass.requires_pipeline_state |= command_packet.requires_pipeline_state; + executor_pass.requires_descriptor_setup |= + command_packet.requires_descriptor_setup; + executor_pass.commands.push_back(std::move(command_packet)); + } + + executor_pass.requires_resource_translation |= + pass.resources.needs_render_target || pass.resources.needs_depth_stencil || + pass.resources.needs_vertex_streams || pass.resources.needs_index_buffer || + pass.resources.needs_textures || pass.resources.needs_fetch_constants; + executor_pass.requires_pipeline_state |= pass.draw_count != 0; + executor_pass.requires_descriptor_setup |= + pass.resources.needs_textures || pass.resources.needs_samplers || + pass.resources.needs_fetch_constants; + executor_pass.requires_present |= pass.role == ReplayPassRole::kPresent; + return executor_pass; +} + +void AccumulateSummary(ReplayExecutorFrameSummary& summary, + const ReplayExecutorPassPacket& pass) { + ++summary.pass_count; + summary.command_count += static_cast(pass.commands.size()); + + switch (pass.queue) { + case SubmissionQueueType::kGraphics: + ++summary.graphics_pass_count; + break; + case SubmissionQueueType::kAsyncCompute: + ++summary.async_compute_pass_count; + break; + case SubmissionQueueType::kCopy: + ++summary.copy_pass_count; + break; + case SubmissionQueueType::kUnknown: + default: + break; + } + + if (pass.requires_present) { + ++summary.present_pass_count; + summary.has_present_pass = true; + } + if (pass.requires_resource_translation) { + ++summary.resource_translation_pass_count; + } + if (pass.requires_pipeline_state) { + ++summary.pipeline_state_pass_count; + } + if (pass.requires_descriptor_setup) { + ++summary.descriptor_setup_pass_count; + } + summary.valid = summary.pass_count != 0; +} + +} // namespace + +const char* ToString(SubmissionQueueType queue) { + switch (queue) { + case SubmissionQueueType::kGraphics: + return "graphics"; + case SubmissionQueueType::kAsyncCompute: + return "async_compute"; + case SubmissionQueueType::kCopy: + return "copy"; + case SubmissionQueueType::kUnknown: + default: + return "unknown"; + } +} + +ReplayExecutorFrame ReplayExecutorPlanBuilder::BuildBootstrapFrame( + uint64_t frame_index) const { + ReplayExecutorFrame frame; + frame.summary.frame_index = frame_index; + + ReplayExecutorPassPacket bootstrap_pass; + bootstrap_pass.name = "ac6.executor.bootstrap"; + bootstrap_pass.role = ReplayPassRole::kBootstrap; + bootstrap_pass.queue = SubmissionQueueType::kGraphics; + + AccumulateSummary(frame.summary, bootstrap_pass); + frame.passes.push_back(std::move(bootstrap_pass)); + return frame; +} + +ReplayExecutorFrame ReplayExecutorPlanBuilder::Build( + const ExecutionFramePlan& execution_plan) const { + ReplayExecutorFrame frame; + frame.summary.frame_index = execution_plan.summary.frame_index; + frame.summary.output_width = execution_plan.summary.output_width; + frame.summary.output_height = execution_plan.summary.output_height; + + if (!execution_plan.summary.valid || execution_plan.passes.empty()) { + return frame; + } + + frame.passes.reserve(execution_plan.passes.size()); + for (uint32_t i = 0; i < execution_plan.passes.size(); ++i) { + ReplayExecutorPassPacket pass = + BuildExecutorPassPacket(execution_plan.passes[i], i); + AccumulateSummary(frame.summary, pass); + frame.passes.push_back(std::move(pass)); + } + + frame.summary.valid = + frame.summary.pass_count != 0 && + (frame.summary.output_width != 0 || frame.summary.output_height != 0 || + execution_plan.summary.frame_index != 0 || + !frame.passes.empty()); + return frame; +} + +} // namespace ac6::renderer diff --git a/src/ac6_native_renderer/replay_executor.h b/src/ac6_native_renderer/replay_executor.h new file mode 100644 index 00000000..c62dbb39 --- /dev/null +++ b/src/ac6_native_renderer/replay_executor.h @@ -0,0 +1,82 @@ +#pragma once + +#include +#include +#include + +#include "execution_plan.h" + +namespace ac6::renderer { + +enum class SubmissionQueueType : uint8_t { + kUnknown = 0, + kGraphics = 1, + kAsyncCompute = 2, + kCopy = 3, +}; + +struct ReplayExecutorCommandPacket { + ExecutionCommandCategory category = ExecutionCommandCategory::kNone; + uint32_t execution_pass_index = 0; + uint32_t execution_command_index = 0; + uint32_t sequence = 0; + bool requires_resource_translation = false; + bool requires_pipeline_state = false; + bool requires_descriptor_setup = false; + bool touches_render_target = false; + bool touches_depth_stencil = false; +}; + +struct ReplayExecutorPassPacket { + std::string name; + ReplayPassRole role = ReplayPassRole::kUnknown; + SubmissionQueueType queue = SubmissionQueueType::kUnknown; + bool execution_pass_valid = false; + uint32_t execution_pass_index = 0; + uint32_t draw_count = 0; + uint32_t clear_count = 0; + uint32_t resolve_count = 0; + uint32_t render_target_0 = 0; + uint32_t depth_stencil = 0; + uint32_t output_width = 0; + uint32_t output_height = 0; + bool selected_for_present = false; + bool requires_present = false; + bool requires_resource_translation = false; + bool requires_pipeline_state = false; + bool requires_descriptor_setup = false; + ExecutionResourceRequirements resources{}; + std::vector commands; +}; + +struct ReplayExecutorFrameSummary { + bool valid = false; + uint64_t frame_index = 0; + uint32_t pass_count = 0; + uint32_t command_count = 0; + uint32_t graphics_pass_count = 0; + uint32_t async_compute_pass_count = 0; + uint32_t copy_pass_count = 0; + uint32_t present_pass_count = 0; + uint32_t resource_translation_pass_count = 0; + uint32_t pipeline_state_pass_count = 0; + uint32_t descriptor_setup_pass_count = 0; + uint32_t output_width = 0; + uint32_t output_height = 0; + bool has_present_pass = false; +}; + +struct ReplayExecutorFrame { + ReplayExecutorFrameSummary summary{}; + std::vector passes; +}; + +class ReplayExecutorPlanBuilder { + public: + ReplayExecutorFrame BuildBootstrapFrame(uint64_t frame_index) const; + ReplayExecutorFrame Build(const ExecutionFramePlan& execution_plan) const; +}; + +const char* ToString(SubmissionQueueType queue); + +} // namespace ac6::renderer