mirror of https://github.com/RPCS3/rpcs3
rsx/cfg/fp: Add delay-slot detection to remove unnecessary barriers
- Reduces emitted barriers by like 99%
This commit is contained in:
parent
93f89b8a74
commit
1e6fe1f4ab
|
|
@ -1,6 +1,7 @@
|
|||
#include "stdafx.h"
|
||||
#include "RegisterAnnotationPass.h"
|
||||
#include "Emu/RSX/Program/Assembler/FPOpcodes.h"
|
||||
#include "Emu/RSX/Program/RSXFragmentProgram.h"
|
||||
|
||||
#include <span>
|
||||
#include <unordered_map>
|
||||
|
|
@ -13,6 +14,38 @@ namespace rsx::assembler::FP
|
|||
static constexpr char content_float16 = 'H';
|
||||
static constexpr char content_dual = 'D';
|
||||
|
||||
bool is_delay_slot(const Instruction& instruction)
|
||||
{
|
||||
OPDEST dst{ .HEX = instruction.bytecode[0] };
|
||||
SRC0 src0{ .HEX = instruction.bytecode[1] };
|
||||
SRC1 src1{ .HEX = instruction.bytecode[2] };
|
||||
|
||||
if (dst.opcode != RSX_FP_OPCODE_MOV || // These slots are always populated with MOV
|
||||
dst.no_dest || // Must have a sink
|
||||
src0.reg_type != RSX_FP_REGISTER_TYPE_TEMP || // Must read from reg
|
||||
dst.dest_reg != src0.tmp_reg_index || // Must be a write-to-self
|
||||
dst.fp16 || // Always full lane. We need to collect more data on this but it won't matter
|
||||
dst.saturate || // Precision modifier
|
||||
(dst.prec != RSX_FP_PRECISION_REAL &&
|
||||
dst.prec != RSX_FP_PRECISION_UNKNOWN)) // Cannot have precision modifiers
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check if we have precision modifiers on the source
|
||||
if (src0.abs || src0.neg || src1.scale)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (dst.mask_x && src0.swizzle_x != 0) return false;
|
||||
if (dst.mask_y && src0.swizzle_y != 1) return false;
|
||||
if (dst.mask_z && src0.swizzle_z != 2) return false;
|
||||
if (dst.mask_w && src0.swizzle_w != 3) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
std::vector<RegisterRef> compile_register_file(const std::array<char, 48 * 8>& file)
|
||||
{
|
||||
std::vector<RegisterRef> results;
|
||||
|
|
@ -90,10 +123,15 @@ namespace rsx::assembler::FP
|
|||
}
|
||||
|
||||
// Decay instructions into register references
|
||||
void annotate_instructions(BasicBlock* block, const RSXFragmentProgram& prog)
|
||||
void annotate_instructions(BasicBlock* block, const RSXFragmentProgram& prog, bool skip_delay_slots)
|
||||
{
|
||||
for (auto& instruction : block->instructions)
|
||||
{
|
||||
if (skip_delay_slots && is_delay_slot(instruction))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
const u32 operand_count = get_operand_count(static_cast<FP_opcode>(instruction.opcode));
|
||||
for (u32 i = 0; i < operand_count; i++)
|
||||
{
|
||||
|
|
@ -178,7 +216,7 @@ namespace rsx::assembler::FP
|
|||
{
|
||||
for (auto& block : graph.blocks)
|
||||
{
|
||||
annotate_instructions(&block, m_prog);
|
||||
annotate_instructions(&block, m_prog, m_config.skip_delay_slots);
|
||||
annotate_block_io(&block);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,6 +6,11 @@ struct RSXFragmentProgram;
|
|||
|
||||
namespace rsx::assembler::FP
|
||||
{
|
||||
struct RegisterAnnotationPassOptions
|
||||
{
|
||||
bool skip_delay_slots = false; // When enabled, detect delay slots and ignore annotating them.
|
||||
};
|
||||
|
||||
// The annotation pass annotates each basic block with 2 pieces of information:
|
||||
// 1. The "input" register list for a block.
|
||||
// 2. The "output" register list for a block (clobber list).
|
||||
|
|
@ -14,13 +19,16 @@ namespace rsx::assembler::FP
|
|||
class RegisterAnnotationPass : public CFGPass
|
||||
{
|
||||
public:
|
||||
RegisterAnnotationPass(const RSXFragmentProgram& prog)
|
||||
: m_prog(prog)
|
||||
RegisterAnnotationPass(
|
||||
const RSXFragmentProgram& prog,
|
||||
const RegisterAnnotationPassOptions& options = {})
|
||||
: m_prog(prog), m_config(options)
|
||||
{}
|
||||
|
||||
void run(FlowGraph& graph) override;
|
||||
|
||||
private:
|
||||
const RSXFragmentProgram& m_prog;
|
||||
RegisterAnnotationPassOptions m_config;
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1294,7 +1294,7 @@ std::string FragmentProgramDecompiler::Decompile()
|
|||
const auto rop_inputs = get_fragment_program_output_set(m_prog.ctrl, m_prog.mrt_buffers_count);
|
||||
rop_block->input_list.insert(rop_block->input_list.end(), rop_inputs.begin(), rop_inputs.end());
|
||||
|
||||
FP::RegisterAnnotationPass annotation_pass{ m_prog };
|
||||
FP::RegisterAnnotationPass annotation_pass{ m_prog, { .skip_delay_slots = true } };
|
||||
FP::RegisterDependencyPass dependency_pass{};
|
||||
|
||||
annotation_pass.run(graph);
|
||||
|
|
|
|||
|
|
@ -568,4 +568,30 @@ namespace rsx::assembler
|
|||
EXPECT_EQ(src1.fp16, 1);
|
||||
EXPECT_EQ(src1.swizzle_x, 1);
|
||||
}
|
||||
|
||||
TEST(TestFPIR, RegisterDependencyPass_SkipDelaySlots)
|
||||
{
|
||||
// Instruction 2 clobers H1 which in turn clobbers R0.
|
||||
// Instruction 3 reads from R0 but is a delay slot that does nothing and can be NOPed.
|
||||
auto graph = CFG_from_source(R"(
|
||||
ADD R1, R0, R1;
|
||||
MOV H1, R1
|
||||
MOV R0, R0;
|
||||
)");
|
||||
|
||||
ASSERT_EQ(graph.blocks.size(), 1);
|
||||
ASSERT_EQ(graph.blocks.front().instructions.size(), 3);
|
||||
|
||||
auto& block = graph.blocks.front();
|
||||
RSXFragmentProgram prog{};
|
||||
|
||||
FP::RegisterAnnotationPass annotation_pass{ prog, { .skip_delay_slots = true } };
|
||||
FP::RegisterDependencyPass deps_pass{};
|
||||
|
||||
annotation_pass.run(graph);
|
||||
deps_pass.run(graph);
|
||||
|
||||
// Delay slot detection will cause no dependency injection
|
||||
ASSERT_EQ(block.instructions.size(), 3);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue