Add 60fps cutscene clamp for in-engine cinematics

Suspend the FPS unlock while a demo-manager Exec (DD sub_82184460 / EM sub_821856F8) ticks, so the frame-locked IngameCinematics Sequencer plays at native ~30fps instead of double speed. Adds ac6_cutscene_clamp CVar (default on).
This commit is contained in:
salh
2026-06-15 16:03:43 +03:00
parent 0d7a528395
commit c2e2fbfbbc
27 changed files with 620 additions and 291 deletions
@@ -33,7 +33,7 @@ class SpirvShaderTranslator : public ShaderTranslator {
// TODO(Triang3l): Change to 0xYYYYMMDD once it's out of the rapid
// prototyping stage (easier to do small granular updates with an
// incremental counter).
static constexpr uint32_t kVersion = 12;
static constexpr uint32_t kVersion = 13;
enum class DepthStencilMode : uint32_t {
kNoModifiers,
@@ -577,6 +577,7 @@ class SpirvShaderTranslator : public ShaderTranslator {
void StartFragmentShaderBeforeMain();
void StartFragmentShaderInMain();
void CompleteFragmentShaderInMain();
void CompleteFragmentShader_DSV_DepthTo24Bit();
// Updates the current flow control condition (to be called in the beginning
// of exec and in jumps), closing the previous conditionals if needed.
@@ -946,6 +947,10 @@ class SpirvShaderTranslator : public ShaderTranslator {
// With fragment shader interlock, variables in the main function.
// Otherwise, framebuffer color attachment outputs.
std::array<spv::Id, xenos::kMaxColorRenderTargets> output_or_var_fragment_data_;
// Function-scoped staging variable for guest oDepth writes. FSI consumes this
// through var_main_fragment_depth_; FBO copies it to gl_FragDepth after guest
// control flow is complete.
spv::Id output_or_var_fragment_depth_;
// For host render targets and only when needed - float.
spv::Id output_fragment_depth_;
// For host render targets and only when needed - int[1].
@@ -758,11 +758,9 @@ class VulkanCommandProcessor : public CommandProcessor {
uint32_t host_index = UINT32_MAX;
bool valid = false;
} active_occlusion_query_;
struct VertexBufferState {
uint32_t address = UINT32_MAX;
uint32_t size = UINT32_MAX;
};
std::array<VertexBufferState, 96> vertex_buffer_states_{};
static constexpr uint32_t kVertexFetchConstantCount = 96;
// Bit is set when the vertex buffer at that index has been requested in the
// current frame. Cleared between frames and on fetch constant writes.
uint64_t vertex_buffers_in_sync_[2] = {};
std::unordered_map<uint64_t, ReadbackBuffer> readback_buffers_;
std::unordered_map<uint64_t, ReadbackBuffer> memexport_readback_buffers_;
+2
View File
@@ -31,6 +31,8 @@ REXCVAR_DEFINE_BOOL(use_fuzzy_alpha_epsilon, true, "GPU",
REXCVAR_DEFINE_BOOL(vfetch_index_rounding_bias, false, "GPU/Shader",
"Apply small epsilon bias to vertex fetch indices before "
"flooring to fix black triangles caused by RCP precision");
REXCVAR_DEFINE_BOOL(draw_resolution_scaled_texture_offsets, true, "GPU/Shader",
"Scale texture offsets with draw resolution");
REXCVAR_DEFINE_BOOL(gpu_debug_markers, false, "GPU",
"Insert debug markers into GPU command streams for tools "
"like PIX and RenderDoc. Automatically enabled when "
@@ -24,9 +24,6 @@
#include <rex/math.h>
#include <rex/string.h>
REXCVAR_DEFINE_BOOL(draw_resolution_scaled_texture_offsets, true, "GPU/Shader",
"Scale texture offsets with draw resolution");
namespace rex::graphics {
using namespace ucode;
@@ -129,6 +129,7 @@ void SpirvShaderTranslator::Reset() {
output_per_vertex_member_cull_distance_ = UINT32_MAX;
type_output_per_vertex_ = spv::NoResult;
output_per_vertex_ = spv::NoResult;
output_or_var_fragment_depth_ = spv::NoResult;
output_fragment_depth_ = spv::NoResult;
output_fragment_sample_mask_ = spv::NoResult;
@@ -2923,6 +2924,7 @@ void SpirvShaderTranslator::StartFragmentShaderBeforeMain() {
type_float_, "gl_FragDepth");
builder_->addDecoration(output_fragment_depth_, spv::DecorationBuiltIn,
spv::BuiltInFragDepth);
builder_->addDecoration(output_fragment_depth_, spv::DecorationInvariant);
main_interface_.push_back(output_fragment_depth_);
}
if (alpha_to_coverage_possible && features_.sample_rate_shading) {
@@ -2957,17 +2959,18 @@ void SpirvShaderTranslator::StartFragmentShaderInMain() {
// to the execution mask GPUs naturally have.
}
if (current_shader().writes_depth()) {
output_or_var_fragment_depth_ =
builder_->createVariable(spv::NoPrecision, spv::StorageClassFunction, type_float_,
"xe_var_fragment_depth", const_float_0_);
}
if (edram_fragment_shader_interlock_) {
// Initialize color output variables with fragment shader interlock.
std::fill(output_or_var_fragment_data_.begin(), output_or_var_fragment_data_.end(),
spv::NoResult);
var_main_fragment_depth_ = spv::NoResult;
var_main_fragment_depth_ = output_or_var_fragment_depth_;
var_main_fsi_color_written_ = spv::NoResult;
if (current_shader().writes_depth()) {
var_main_fragment_depth_ =
builder_->createVariable(spv::NoPrecision, spv::StorageClassFunction, type_float_,
"xe_var_fragment_depth", const_float_0_);
}
uint32_t color_targets_written = current_shader().writes_color_targets();
if (color_targets_written) {
static const char* const kFragmentDataVariableNames[] = {
@@ -3616,8 +3619,7 @@ void SpirvShaderTranslator::StoreResult(const InstructionResult& result, spv::Id
// Writes X to scalar gl_FragDepth.
assert_true(used_write_mask == 0b0001);
assert_true(current_shader().writes_depth());
target_pointer =
edram_fragment_shader_interlock_ ? var_main_fragment_depth_ : output_fragment_depth_;
target_pointer = output_or_var_fragment_depth_;
// Guest depth output is expected to be [0, 1].
is_clamped = true;
} break;
@@ -1365,45 +1365,7 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() {
}
}
if (!edram_fragment_shader_interlock_ && output_fragment_depth_ != spv::NoResult) {
Modification::DepthStencilMode depth_stencil_mode =
GetSpirvShaderModification().pixel.depth_stencil_mode;
if (depth_stencil_mode == Modification::DepthStencilMode::kFloat24Truncating ||
depth_stencil_mode == Modification::DepthStencilMode::kFloat24Rounding) {
// For oDepth, depth is already in guest [0, 1].
// Without oDepth, reconstruct guest [0, 1] from host [0, 0.5] by
// doubling gl_FragCoord.z and saturating.
spv::Id depth;
if (current_shader().writes_depth()) {
depth = builder_->createLoad(output_fragment_depth_, spv::NoPrecision);
} else {
assert_true(input_fragment_coordinates_ != spv::NoResult);
id_vector_temp_.clear();
id_vector_temp_.push_back(builder_->makeIntConstant(2));
depth = builder_->createLoad(
builder_->createAccessChain(spv::StorageClassInput, input_fragment_coordinates_,
id_vector_temp_),
spv::NoPrecision);
if (IsSampleRate()) {
// Statically use gl_SampleID to keep this path at sample frequency.
assert_true(input_sample_id_ != spv::NoResult);
builder_->createLoad(input_sample_id_, spv::NoPrecision);
}
depth = builder_->createTriBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450NClamp,
builder_->createNoContractionBinOp(spv::OpFMul, type_float_, depth,
builder_->makeFloatConstant(2.0f)),
const_float_0_, const_float_1_);
}
// Convert guest [0, 1] float32 to float24 and back to host [0, 0.5].
spv::Id depth_float24 = SpirvShaderTranslator::PreClampedDepthTo20e4(
*builder_, depth, depth_stencil_mode == Modification::DepthStencilMode::kFloat24Rounding,
false, ext_inst_glsl_std_450_);
depth = SpirvShaderTranslator::Depth20e4To32(*builder_, depth_float24, 0, true, false,
ext_inst_glsl_std_450_);
builder_->createStore(depth, output_fragment_depth_);
}
}
CompleteFragmentShader_DSV_DepthTo24Bit();
if (edram_fragment_shader_interlock_) {
if (block_fsi_if_after_depth_stencil_merge) {
@@ -1425,6 +1387,72 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() {
}
}
void SpirvShaderTranslator::CompleteFragmentShader_DSV_DepthTo24Bit() {
if (edram_fragment_shader_interlock_ || output_fragment_depth_ == spv::NoResult) {
return;
}
Modification::DepthStencilMode depth_stencil_mode =
GetSpirvShaderModification().pixel.depth_stencil_mode;
bool convert_float24_depth =
depth_stencil_mode == Modification::DepthStencilMode::kFloat24Truncating ||
depth_stencil_mode == Modification::DepthStencilMode::kFloat24Rounding;
bool shader_writes_depth = current_shader().writes_depth();
if (!shader_writes_depth && !convert_float24_depth) {
return;
}
// For oDepth, depth is already in guest [0, 1]. Without oDepth, reconstruct
// guest [0, 1] from host [0, 0.5] by doubling gl_FragCoord.z and saturating.
spv::Id depth;
if (shader_writes_depth) {
assert_true(output_or_var_fragment_depth_ != spv::NoResult);
depth = builder_->createLoad(output_or_var_fragment_depth_, spv::NoPrecision);
} else {
assert_true(input_fragment_coordinates_ != spv::NoResult);
id_vector_temp_.clear();
id_vector_temp_.push_back(builder_->makeIntConstant(2));
depth = builder_->createLoad(
builder_->createAccessChain(spv::StorageClassInput, input_fragment_coordinates_,
id_vector_temp_),
spv::NoPrecision);
if (IsSampleRate()) {
// Statically use gl_SampleID to keep this path at sample frequency.
assert_true(input_sample_id_ != spv::NoResult);
builder_->createLoad(input_sample_id_, spv::NoPrecision);
}
depth = builder_->createTriBuiltinCall(
type_float_, ext_inst_glsl_std_450_, GLSLstd450NClamp,
builder_->createNoContractionBinOp(spv::OpFMul, type_float_, depth,
builder_->makeFloatConstant(2.0f)),
const_float_0_, const_float_1_);
}
if (convert_float24_depth) {
// Convert guest [0, 1] float32 to float24 and back to host [0, 0.5].
spv::Id depth_float24 = SpirvShaderTranslator::PreClampedDepthTo20e4(
*builder_, depth, depth_stencil_mode == Modification::DepthStencilMode::kFloat24Rounding,
false, ext_inst_glsl_std_450_);
depth = SpirvShaderTranslator::Depth20e4To32(*builder_, depth_float24, 0, true, false,
ext_inst_glsl_std_450_);
} else if (shader_writes_depth) {
// oDepth bypasses viewport depth scaling, so dynamically remap guest
// 0...1 to host 0...0.5 whenever the bound depth buffer is D24FS8.
spv::Id depth_float24_flag = builder_->createBinOp(
spv::OpINotEqual, type_bool_,
builder_->createBinOp(spv::OpBitwiseAnd, type_uint_, main_system_constant_flags_,
builder_->makeUintConstant(kSysFlag_DepthFloat24)),
const_uint_0_);
depth = builder_->createTriOp(
spv::OpSelect, type_float_, depth_float24_flag,
builder_->createNoContractionBinOp(spv::OpFMul, type_float_, depth,
builder_->makeFloatConstant(0.5f)),
depth);
}
builder_->createStore(depth, output_fragment_depth_);
}
spv::Id SpirvShaderTranslator::LoadMsaaSamplesFromFlags() {
return builder_->createTriOp(spv::OpBitFieldUExtract, type_uint_, main_system_constant_flags_,
builder_->makeUintConstant(kSysFlag_MsaaSamples_Shift),
@@ -46,6 +46,8 @@
#include <rex/ui/vulkan/presenter.h>
#include <rex/ui/vulkan/util.h>
#include "../../../../../src/ac6_backend_fixes/ac6_backend_hooks.h"
// Legacy backend compatibility aliases for shared readback controls.
REXCVAR_DEFINE_BOOL(vulkan_readback_resolve, false, "GPU/Vulkan",
"Read render-to-texture results on the CPU")
@@ -613,14 +615,10 @@ void VulkanCommandProcessor::InvalidateGpuMemory() {
void VulkanCommandProcessor::InvalidateAllVertexBufferResidency() {
vertex_buffers_in_sync_[0] = 0;
vertex_buffers_in_sync_[1] = 0;
for (VertexBufferState& state : vertex_buffer_states_) {
state.address = UINT32_MAX;
state.size = UINT32_MAX;
}
}
void VulkanCommandProcessor::InvalidateVertexBufferResidency(uint32_t vfetch_index) {
if (vfetch_index >= vertex_buffer_states_.size()) {
if (vfetch_index >= kVertexFetchConstantCount) {
return;
}
vertex_buffers_in_sync_[vfetch_index >> 6] &= ~(uint64_t(1) << (vfetch_index & 63));
@@ -631,10 +629,10 @@ void VulkanCommandProcessor::InvalidateVertexBufferResidencyRange(uint32_t first
if (first_vfetch > last_vfetch) {
std::swap(first_vfetch, last_vfetch);
}
if (first_vfetch >= vertex_buffer_states_.size()) {
if (first_vfetch >= kVertexFetchConstantCount) {
return;
}
last_vfetch = std::min(last_vfetch, uint32_t(vertex_buffer_states_.size() - 1));
last_vfetch = std::min(last_vfetch, kVertexFetchConstantCount - 1);
for (uint32_t vfetch_index = first_vfetch; vfetch_index <= last_vfetch; ++vfetch_index) {
InvalidateVertexBufferResidency(vfetch_index);
}
@@ -2300,6 +2298,16 @@ void VulkanCommandProcessor::IssueSwap(uint32_t frontbuffer_ptr, uint32_t frontb
return;
}
// Keep the AC6 frame-boundary diagnostics in sync with the D3D12 path before
// the swap source is selected and presented.
{
system::GraphicsSwapSubmission frame_boundary_submission = {};
frame_boundary_submission.frontbuffer_virtual_address = frontbuffer_ptr;
frame_boundary_submission.frontbuffer_width = frontbuffer_width;
frame_boundary_submission.frontbuffer_height = frontbuffer_height;
graphics_system_->HandleVideoSwap(frame_boundary_submission);
}
bool skip_present_due_async_placeholder = REXCVAR_GET(async_shader_compilation) &&
REXCVAR_GET(vulkan_async_skip_incomplete_frames) &&
frame_used_async_placeholder_pipeline_;
@@ -2414,6 +2422,29 @@ void VulkanCommandProcessor::IssueSwap(uint32_t frontbuffer_ptr, uint32_t frontb
frontbuffer_height_unscaled, guest_output_width, guest_output_height,
static_cast<uint32_t>(frontbuffer_format));
system::GraphicsSwapSubmission ac6_submission = {};
uint64_t ac6_submission_sequence = 0;
graphics_system_->GetLastSwapSubmission(&ac6_submission, &ac6_submission_sequence);
if (!ac6_submission_sequence) {
ac6_submission.frontbuffer_virtual_address = frontbuffer_ptr;
ac6_submission.frontbuffer_width = frontbuffer_width;
ac6_submission.frontbuffer_height = frontbuffer_height;
}
auto* ac6_vertex_shader = active_vertex_shader();
auto* ac6_pixel_shader = active_pixel_shader();
uint64_t ac6_vertex_shader_hash =
ac6_vertex_shader ? ac6_vertex_shader->ucode_data_hash() : 0;
uint64_t ac6_pixel_shader_hash =
ac6_pixel_shader ? ac6_pixel_shader->ucode_data_hash() : 0;
ac6::backend::ReportSwapDecision(
ac6_submission, ac6_submission_sequence,
ac6::backend::SwapSourceType::kGuestSwapTexture,
swap_source_scaled, guest_output_width, guest_output_height,
frontbuffer_width_scaled, frontbuffer_height_scaled,
ac6_vertex_shader_hash, ac6_pixel_shader_hash);
system::X_VIDEO_MODE video_mode;
kernel::xboxkrnl::VdQueryVideoMode(&video_mode);
uint32_t display_width = std::max(uint32_t(1), uint32_t(video_mode.display_width));
@@ -4020,11 +4051,6 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, uint32_t
vfetch_index, vfetch_constant.dword_0, vfetch_constant.dword_1);
return false;
}
VertexBufferState& state = vertex_buffer_states_[vfetch_index];
if (state.address == vfetch_constant.address && state.size == vfetch_constant.size) {
vertex_buffers_in_sync_[vfetch_index >> 6] |= vfetch_bit;
continue;
}
if (!shared_memory_->RequestRange(vfetch_constant.address << 2, vfetch_constant.size << 2)) {
REXGPU_ERROR(
"Failed to request vertex buffer at 0x{:08X} (size {}) in the shared "
@@ -4032,8 +4058,6 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, uint32_t
vfetch_constant.address << 2, vfetch_constant.size << 2);
return false;
}
state.address = vfetch_constant.address;
state.size = vfetch_constant.size;
vertex_buffers_in_sync_[vfetch_index >> 6] |= vfetch_bit;
}
}
+1
View File
@@ -127,6 +127,7 @@ endif()
if(WIN32)
target_link_libraries(rexui PUBLIC
dwmapi
dxgi
Shcore
)
else()