mirror of
https://github.com/sal063/AC6_recomp
synced 2026-06-25 10:12:19 -04:00
Add 60fps cutscene clamp for in-engine cinematics
Suspend the FPS unlock while a demo-manager Exec (DD sub_82184460 / EM sub_821856F8) ticks, so the frame-locked IngameCinematics Sequencer plays at native ~30fps instead of double speed. Adds ac6_cutscene_clamp CVar (default on).
This commit is contained in:
+6
-1
@@ -33,7 +33,7 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
||||
// TODO(Triang3l): Change to 0xYYYYMMDD once it's out of the rapid
|
||||
// prototyping stage (easier to do small granular updates with an
|
||||
// incremental counter).
|
||||
static constexpr uint32_t kVersion = 12;
|
||||
static constexpr uint32_t kVersion = 13;
|
||||
|
||||
enum class DepthStencilMode : uint32_t {
|
||||
kNoModifiers,
|
||||
@@ -577,6 +577,7 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
||||
void StartFragmentShaderBeforeMain();
|
||||
void StartFragmentShaderInMain();
|
||||
void CompleteFragmentShaderInMain();
|
||||
void CompleteFragmentShader_DSV_DepthTo24Bit();
|
||||
|
||||
// Updates the current flow control condition (to be called in the beginning
|
||||
// of exec and in jumps), closing the previous conditionals if needed.
|
||||
@@ -946,6 +947,10 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
||||
// With fragment shader interlock, variables in the main function.
|
||||
// Otherwise, framebuffer color attachment outputs.
|
||||
std::array<spv::Id, xenos::kMaxColorRenderTargets> output_or_var_fragment_data_;
|
||||
// Function-scoped staging variable for guest oDepth writes. FSI consumes this
|
||||
// through var_main_fragment_depth_; FBO copies it to gl_FragDepth after guest
|
||||
// control flow is complete.
|
||||
spv::Id output_or_var_fragment_depth_;
|
||||
// For host render targets and only when needed - float.
|
||||
spv::Id output_fragment_depth_;
|
||||
// For host render targets and only when needed - int[1].
|
||||
|
||||
@@ -758,11 +758,9 @@ class VulkanCommandProcessor : public CommandProcessor {
|
||||
uint32_t host_index = UINT32_MAX;
|
||||
bool valid = false;
|
||||
} active_occlusion_query_;
|
||||
struct VertexBufferState {
|
||||
uint32_t address = UINT32_MAX;
|
||||
uint32_t size = UINT32_MAX;
|
||||
};
|
||||
std::array<VertexBufferState, 96> vertex_buffer_states_{};
|
||||
static constexpr uint32_t kVertexFetchConstantCount = 96;
|
||||
// Bit is set when the vertex buffer at that index has been requested in the
|
||||
// current frame. Cleared between frames and on fetch constant writes.
|
||||
uint64_t vertex_buffers_in_sync_[2] = {};
|
||||
std::unordered_map<uint64_t, ReadbackBuffer> readback_buffers_;
|
||||
std::unordered_map<uint64_t, ReadbackBuffer> memexport_readback_buffers_;
|
||||
|
||||
@@ -31,6 +31,8 @@ REXCVAR_DEFINE_BOOL(use_fuzzy_alpha_epsilon, true, "GPU",
|
||||
REXCVAR_DEFINE_BOOL(vfetch_index_rounding_bias, false, "GPU/Shader",
|
||||
"Apply small epsilon bias to vertex fetch indices before "
|
||||
"flooring to fix black triangles caused by RCP precision");
|
||||
REXCVAR_DEFINE_BOOL(draw_resolution_scaled_texture_offsets, true, "GPU/Shader",
|
||||
"Scale texture offsets with draw resolution");
|
||||
REXCVAR_DEFINE_BOOL(gpu_debug_markers, false, "GPU",
|
||||
"Insert debug markers into GPU command streams for tools "
|
||||
"like PIX and RenderDoc. Automatically enabled when "
|
||||
|
||||
@@ -24,9 +24,6 @@
|
||||
#include <rex/math.h>
|
||||
#include <rex/string.h>
|
||||
|
||||
REXCVAR_DEFINE_BOOL(draw_resolution_scaled_texture_offsets, true, "GPU/Shader",
|
||||
"Scale texture offsets with draw resolution");
|
||||
|
||||
namespace rex::graphics {
|
||||
using namespace ucode;
|
||||
|
||||
|
||||
@@ -129,6 +129,7 @@ void SpirvShaderTranslator::Reset() {
|
||||
output_per_vertex_member_cull_distance_ = UINT32_MAX;
|
||||
type_output_per_vertex_ = spv::NoResult;
|
||||
output_per_vertex_ = spv::NoResult;
|
||||
output_or_var_fragment_depth_ = spv::NoResult;
|
||||
output_fragment_depth_ = spv::NoResult;
|
||||
output_fragment_sample_mask_ = spv::NoResult;
|
||||
|
||||
@@ -2923,6 +2924,7 @@ void SpirvShaderTranslator::StartFragmentShaderBeforeMain() {
|
||||
type_float_, "gl_FragDepth");
|
||||
builder_->addDecoration(output_fragment_depth_, spv::DecorationBuiltIn,
|
||||
spv::BuiltInFragDepth);
|
||||
builder_->addDecoration(output_fragment_depth_, spv::DecorationInvariant);
|
||||
main_interface_.push_back(output_fragment_depth_);
|
||||
}
|
||||
if (alpha_to_coverage_possible && features_.sample_rate_shading) {
|
||||
@@ -2957,17 +2959,18 @@ void SpirvShaderTranslator::StartFragmentShaderInMain() {
|
||||
// to the execution mask GPUs naturally have.
|
||||
}
|
||||
|
||||
if (current_shader().writes_depth()) {
|
||||
output_or_var_fragment_depth_ =
|
||||
builder_->createVariable(spv::NoPrecision, spv::StorageClassFunction, type_float_,
|
||||
"xe_var_fragment_depth", const_float_0_);
|
||||
}
|
||||
|
||||
if (edram_fragment_shader_interlock_) {
|
||||
// Initialize color output variables with fragment shader interlock.
|
||||
std::fill(output_or_var_fragment_data_.begin(), output_or_var_fragment_data_.end(),
|
||||
spv::NoResult);
|
||||
var_main_fragment_depth_ = spv::NoResult;
|
||||
var_main_fragment_depth_ = output_or_var_fragment_depth_;
|
||||
var_main_fsi_color_written_ = spv::NoResult;
|
||||
if (current_shader().writes_depth()) {
|
||||
var_main_fragment_depth_ =
|
||||
builder_->createVariable(spv::NoPrecision, spv::StorageClassFunction, type_float_,
|
||||
"xe_var_fragment_depth", const_float_0_);
|
||||
}
|
||||
uint32_t color_targets_written = current_shader().writes_color_targets();
|
||||
if (color_targets_written) {
|
||||
static const char* const kFragmentDataVariableNames[] = {
|
||||
@@ -3616,8 +3619,7 @@ void SpirvShaderTranslator::StoreResult(const InstructionResult& result, spv::Id
|
||||
// Writes X to scalar gl_FragDepth.
|
||||
assert_true(used_write_mask == 0b0001);
|
||||
assert_true(current_shader().writes_depth());
|
||||
target_pointer =
|
||||
edram_fragment_shader_interlock_ ? var_main_fragment_depth_ : output_fragment_depth_;
|
||||
target_pointer = output_or_var_fragment_depth_;
|
||||
// Guest depth output is expected to be [0, 1].
|
||||
is_clamped = true;
|
||||
} break;
|
||||
|
||||
+67
-39
@@ -1365,45 +1365,7 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() {
|
||||
}
|
||||
}
|
||||
|
||||
if (!edram_fragment_shader_interlock_ && output_fragment_depth_ != spv::NoResult) {
|
||||
Modification::DepthStencilMode depth_stencil_mode =
|
||||
GetSpirvShaderModification().pixel.depth_stencil_mode;
|
||||
if (depth_stencil_mode == Modification::DepthStencilMode::kFloat24Truncating ||
|
||||
depth_stencil_mode == Modification::DepthStencilMode::kFloat24Rounding) {
|
||||
// For oDepth, depth is already in guest [0, 1].
|
||||
// Without oDepth, reconstruct guest [0, 1] from host [0, 0.5] by
|
||||
// doubling gl_FragCoord.z and saturating.
|
||||
spv::Id depth;
|
||||
if (current_shader().writes_depth()) {
|
||||
depth = builder_->createLoad(output_fragment_depth_, spv::NoPrecision);
|
||||
} else {
|
||||
assert_true(input_fragment_coordinates_ != spv::NoResult);
|
||||
id_vector_temp_.clear();
|
||||
id_vector_temp_.push_back(builder_->makeIntConstant(2));
|
||||
depth = builder_->createLoad(
|
||||
builder_->createAccessChain(spv::StorageClassInput, input_fragment_coordinates_,
|
||||
id_vector_temp_),
|
||||
spv::NoPrecision);
|
||||
if (IsSampleRate()) {
|
||||
// Statically use gl_SampleID to keep this path at sample frequency.
|
||||
assert_true(input_sample_id_ != spv::NoResult);
|
||||
builder_->createLoad(input_sample_id_, spv::NoPrecision);
|
||||
}
|
||||
depth = builder_->createTriBuiltinCall(
|
||||
type_float_, ext_inst_glsl_std_450_, GLSLstd450NClamp,
|
||||
builder_->createNoContractionBinOp(spv::OpFMul, type_float_, depth,
|
||||
builder_->makeFloatConstant(2.0f)),
|
||||
const_float_0_, const_float_1_);
|
||||
}
|
||||
// Convert guest [0, 1] float32 to float24 and back to host [0, 0.5].
|
||||
spv::Id depth_float24 = SpirvShaderTranslator::PreClampedDepthTo20e4(
|
||||
*builder_, depth, depth_stencil_mode == Modification::DepthStencilMode::kFloat24Rounding,
|
||||
false, ext_inst_glsl_std_450_);
|
||||
depth = SpirvShaderTranslator::Depth20e4To32(*builder_, depth_float24, 0, true, false,
|
||||
ext_inst_glsl_std_450_);
|
||||
builder_->createStore(depth, output_fragment_depth_);
|
||||
}
|
||||
}
|
||||
CompleteFragmentShader_DSV_DepthTo24Bit();
|
||||
|
||||
if (edram_fragment_shader_interlock_) {
|
||||
if (block_fsi_if_after_depth_stencil_merge) {
|
||||
@@ -1425,6 +1387,72 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() {
|
||||
}
|
||||
}
|
||||
|
||||
void SpirvShaderTranslator::CompleteFragmentShader_DSV_DepthTo24Bit() {
|
||||
if (edram_fragment_shader_interlock_ || output_fragment_depth_ == spv::NoResult) {
|
||||
return;
|
||||
}
|
||||
|
||||
Modification::DepthStencilMode depth_stencil_mode =
|
||||
GetSpirvShaderModification().pixel.depth_stencil_mode;
|
||||
bool convert_float24_depth =
|
||||
depth_stencil_mode == Modification::DepthStencilMode::kFloat24Truncating ||
|
||||
depth_stencil_mode == Modification::DepthStencilMode::kFloat24Rounding;
|
||||
bool shader_writes_depth = current_shader().writes_depth();
|
||||
if (!shader_writes_depth && !convert_float24_depth) {
|
||||
return;
|
||||
}
|
||||
|
||||
// For oDepth, depth is already in guest [0, 1]. Without oDepth, reconstruct
|
||||
// guest [0, 1] from host [0, 0.5] by doubling gl_FragCoord.z and saturating.
|
||||
spv::Id depth;
|
||||
if (shader_writes_depth) {
|
||||
assert_true(output_or_var_fragment_depth_ != spv::NoResult);
|
||||
depth = builder_->createLoad(output_or_var_fragment_depth_, spv::NoPrecision);
|
||||
} else {
|
||||
assert_true(input_fragment_coordinates_ != spv::NoResult);
|
||||
id_vector_temp_.clear();
|
||||
id_vector_temp_.push_back(builder_->makeIntConstant(2));
|
||||
depth = builder_->createLoad(
|
||||
builder_->createAccessChain(spv::StorageClassInput, input_fragment_coordinates_,
|
||||
id_vector_temp_),
|
||||
spv::NoPrecision);
|
||||
if (IsSampleRate()) {
|
||||
// Statically use gl_SampleID to keep this path at sample frequency.
|
||||
assert_true(input_sample_id_ != spv::NoResult);
|
||||
builder_->createLoad(input_sample_id_, spv::NoPrecision);
|
||||
}
|
||||
depth = builder_->createTriBuiltinCall(
|
||||
type_float_, ext_inst_glsl_std_450_, GLSLstd450NClamp,
|
||||
builder_->createNoContractionBinOp(spv::OpFMul, type_float_, depth,
|
||||
builder_->makeFloatConstant(2.0f)),
|
||||
const_float_0_, const_float_1_);
|
||||
}
|
||||
|
||||
if (convert_float24_depth) {
|
||||
// Convert guest [0, 1] float32 to float24 and back to host [0, 0.5].
|
||||
spv::Id depth_float24 = SpirvShaderTranslator::PreClampedDepthTo20e4(
|
||||
*builder_, depth, depth_stencil_mode == Modification::DepthStencilMode::kFloat24Rounding,
|
||||
false, ext_inst_glsl_std_450_);
|
||||
depth = SpirvShaderTranslator::Depth20e4To32(*builder_, depth_float24, 0, true, false,
|
||||
ext_inst_glsl_std_450_);
|
||||
} else if (shader_writes_depth) {
|
||||
// oDepth bypasses viewport depth scaling, so dynamically remap guest
|
||||
// 0...1 to host 0...0.5 whenever the bound depth buffer is D24FS8.
|
||||
spv::Id depth_float24_flag = builder_->createBinOp(
|
||||
spv::OpINotEqual, type_bool_,
|
||||
builder_->createBinOp(spv::OpBitwiseAnd, type_uint_, main_system_constant_flags_,
|
||||
builder_->makeUintConstant(kSysFlag_DepthFloat24)),
|
||||
const_uint_0_);
|
||||
depth = builder_->createTriOp(
|
||||
spv::OpSelect, type_float_, depth_float24_flag,
|
||||
builder_->createNoContractionBinOp(spv::OpFMul, type_float_, depth,
|
||||
builder_->makeFloatConstant(0.5f)),
|
||||
depth);
|
||||
}
|
||||
|
||||
builder_->createStore(depth, output_fragment_depth_);
|
||||
}
|
||||
|
||||
spv::Id SpirvShaderTranslator::LoadMsaaSamplesFromFlags() {
|
||||
return builder_->createTriOp(spv::OpBitFieldUExtract, type_uint_, main_system_constant_flags_,
|
||||
builder_->makeUintConstant(kSysFlag_MsaaSamples_Shift),
|
||||
|
||||
@@ -46,6 +46,8 @@
|
||||
#include <rex/ui/vulkan/presenter.h>
|
||||
#include <rex/ui/vulkan/util.h>
|
||||
|
||||
#include "../../../../../src/ac6_backend_fixes/ac6_backend_hooks.h"
|
||||
|
||||
// Legacy backend compatibility aliases for shared readback controls.
|
||||
REXCVAR_DEFINE_BOOL(vulkan_readback_resolve, false, "GPU/Vulkan",
|
||||
"Read render-to-texture results on the CPU")
|
||||
@@ -613,14 +615,10 @@ void VulkanCommandProcessor::InvalidateGpuMemory() {
|
||||
void VulkanCommandProcessor::InvalidateAllVertexBufferResidency() {
|
||||
vertex_buffers_in_sync_[0] = 0;
|
||||
vertex_buffers_in_sync_[1] = 0;
|
||||
for (VertexBufferState& state : vertex_buffer_states_) {
|
||||
state.address = UINT32_MAX;
|
||||
state.size = UINT32_MAX;
|
||||
}
|
||||
}
|
||||
|
||||
void VulkanCommandProcessor::InvalidateVertexBufferResidency(uint32_t vfetch_index) {
|
||||
if (vfetch_index >= vertex_buffer_states_.size()) {
|
||||
if (vfetch_index >= kVertexFetchConstantCount) {
|
||||
return;
|
||||
}
|
||||
vertex_buffers_in_sync_[vfetch_index >> 6] &= ~(uint64_t(1) << (vfetch_index & 63));
|
||||
@@ -631,10 +629,10 @@ void VulkanCommandProcessor::InvalidateVertexBufferResidencyRange(uint32_t first
|
||||
if (first_vfetch > last_vfetch) {
|
||||
std::swap(first_vfetch, last_vfetch);
|
||||
}
|
||||
if (first_vfetch >= vertex_buffer_states_.size()) {
|
||||
if (first_vfetch >= kVertexFetchConstantCount) {
|
||||
return;
|
||||
}
|
||||
last_vfetch = std::min(last_vfetch, uint32_t(vertex_buffer_states_.size() - 1));
|
||||
last_vfetch = std::min(last_vfetch, kVertexFetchConstantCount - 1);
|
||||
for (uint32_t vfetch_index = first_vfetch; vfetch_index <= last_vfetch; ++vfetch_index) {
|
||||
InvalidateVertexBufferResidency(vfetch_index);
|
||||
}
|
||||
@@ -2300,6 +2298,16 @@ void VulkanCommandProcessor::IssueSwap(uint32_t frontbuffer_ptr, uint32_t frontb
|
||||
return;
|
||||
}
|
||||
|
||||
// Keep the AC6 frame-boundary diagnostics in sync with the D3D12 path before
|
||||
// the swap source is selected and presented.
|
||||
{
|
||||
system::GraphicsSwapSubmission frame_boundary_submission = {};
|
||||
frame_boundary_submission.frontbuffer_virtual_address = frontbuffer_ptr;
|
||||
frame_boundary_submission.frontbuffer_width = frontbuffer_width;
|
||||
frame_boundary_submission.frontbuffer_height = frontbuffer_height;
|
||||
graphics_system_->HandleVideoSwap(frame_boundary_submission);
|
||||
}
|
||||
|
||||
bool skip_present_due_async_placeholder = REXCVAR_GET(async_shader_compilation) &&
|
||||
REXCVAR_GET(vulkan_async_skip_incomplete_frames) &&
|
||||
frame_used_async_placeholder_pipeline_;
|
||||
@@ -2414,6 +2422,29 @@ void VulkanCommandProcessor::IssueSwap(uint32_t frontbuffer_ptr, uint32_t frontb
|
||||
frontbuffer_height_unscaled, guest_output_width, guest_output_height,
|
||||
static_cast<uint32_t>(frontbuffer_format));
|
||||
|
||||
system::GraphicsSwapSubmission ac6_submission = {};
|
||||
uint64_t ac6_submission_sequence = 0;
|
||||
graphics_system_->GetLastSwapSubmission(&ac6_submission, &ac6_submission_sequence);
|
||||
if (!ac6_submission_sequence) {
|
||||
ac6_submission.frontbuffer_virtual_address = frontbuffer_ptr;
|
||||
ac6_submission.frontbuffer_width = frontbuffer_width;
|
||||
ac6_submission.frontbuffer_height = frontbuffer_height;
|
||||
}
|
||||
|
||||
auto* ac6_vertex_shader = active_vertex_shader();
|
||||
auto* ac6_pixel_shader = active_pixel_shader();
|
||||
uint64_t ac6_vertex_shader_hash =
|
||||
ac6_vertex_shader ? ac6_vertex_shader->ucode_data_hash() : 0;
|
||||
uint64_t ac6_pixel_shader_hash =
|
||||
ac6_pixel_shader ? ac6_pixel_shader->ucode_data_hash() : 0;
|
||||
|
||||
ac6::backend::ReportSwapDecision(
|
||||
ac6_submission, ac6_submission_sequence,
|
||||
ac6::backend::SwapSourceType::kGuestSwapTexture,
|
||||
swap_source_scaled, guest_output_width, guest_output_height,
|
||||
frontbuffer_width_scaled, frontbuffer_height_scaled,
|
||||
ac6_vertex_shader_hash, ac6_pixel_shader_hash);
|
||||
|
||||
system::X_VIDEO_MODE video_mode;
|
||||
kernel::xboxkrnl::VdQueryVideoMode(&video_mode);
|
||||
uint32_t display_width = std::max(uint32_t(1), uint32_t(video_mode.display_width));
|
||||
@@ -4020,11 +4051,6 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, uint32_t
|
||||
vfetch_index, vfetch_constant.dword_0, vfetch_constant.dword_1);
|
||||
return false;
|
||||
}
|
||||
VertexBufferState& state = vertex_buffer_states_[vfetch_index];
|
||||
if (state.address == vfetch_constant.address && state.size == vfetch_constant.size) {
|
||||
vertex_buffers_in_sync_[vfetch_index >> 6] |= vfetch_bit;
|
||||
continue;
|
||||
}
|
||||
if (!shared_memory_->RequestRange(vfetch_constant.address << 2, vfetch_constant.size << 2)) {
|
||||
REXGPU_ERROR(
|
||||
"Failed to request vertex buffer at 0x{:08X} (size {}) in the shared "
|
||||
@@ -4032,8 +4058,6 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, uint32_t
|
||||
vfetch_constant.address << 2, vfetch_constant.size << 2);
|
||||
return false;
|
||||
}
|
||||
state.address = vfetch_constant.address;
|
||||
state.size = vfetch_constant.size;
|
||||
vertex_buffers_in_sync_[vfetch_index >> 6] |= vfetch_bit;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -127,6 +127,7 @@ endif()
|
||||
if(WIN32)
|
||||
target_link_libraries(rexui PUBLIC
|
||||
dwmapi
|
||||
dxgi
|
||||
Shcore
|
||||
)
|
||||
else()
|
||||
|
||||
Reference in New Issue
Block a user