rsx/cfg: Increase valid register file size to 768 bytes per pixel pipe

This commit is contained in:
kd-11 2025-12-16 01:18:31 +03:00
parent a7e0e4d76e
commit b701e5e3ff
4 changed files with 25 additions and 22 deletions

View File

@ -402,12 +402,10 @@ namespace rsx::assembler::FP
return {};
}
constexpr u32 register_file_max_len = 48 * 8; // H0 - H47, R0 - R23
const u32 lane_width = reg.reg.f16 ? 2 : 4;
const u32 file_offset = reg.reg.id * lane_width * 4;
ensure(file_offset < register_file_max_len, "Invalid register index");
ensure(file_offset < constants::register_file_max_len, "Invalid register index");
rsx::simple_array<u32> result{};
auto insert_lane = [&](u32 word_offset)

View File

@ -85,6 +85,21 @@ namespace rsx::assembler
namespace FP
{
namespace constants
{
// The ISA can encode for 48 registers of any width.
// This allows to encode R0-R64 and H0-H95, though there aren't enough addressing bits for the latter.
constexpr u32 register_file_max_len = 48 * 16;
// Enums for analysis passes.
constexpr char content_unknown = 0;
constexpr char content_float32 = 'R';
constexpr char content_float16 = 'H';
constexpr char content_dual = 'D';
}
using register_file_t = std::array<char, constants::register_file_max_len>;
// Returns number of operands consumed by an instruction
u8 get_operand_count(FP_opcode opcode);

View File

@ -9,11 +9,7 @@
namespace rsx::assembler::FP
{
static constexpr u32 register_file_length = 48 * 8; // 24 F32 or 48 F16 registers
static constexpr char content_unknown = 0;
static constexpr char content_float32 = 'R';
static constexpr char content_float16 = 'H';
static constexpr char content_dual = 'D';
using namespace constants;
bool is_delay_slot(const Instruction& instruction)
{
@ -60,7 +56,7 @@ namespace rsx::assembler::FP
return true;
}
std::vector<RegisterRef> compile_register_file(const std::array<char, 48 * 8>& file)
std::vector<RegisterRef> compile_register_file(const register_file_t& file)
{
std::vector<RegisterRef> results;
@ -163,11 +159,11 @@ namespace rsx::assembler::FP
// Annotate each block with input and output lanes (read and clobber list)
void annotate_block_io(BasicBlock* block)
{
alignas(16) std::array<char, register_file_length> output_register_file;
alignas(16) std::array<char, register_file_length> input_register_file; // We'll eventually replace with a bitfield mask, but for ease of debugging, we use char for now
alignas(16) register_file_t output_register_file;
alignas(16) register_file_t input_register_file; // We'll eventually replace with a bitfield mask, but for ease of debugging, we use char for now
std::memset(output_register_file.data(), content_unknown, register_file_length);
std::memset(input_register_file.data(), content_unknown, register_file_length);
std::memset(output_register_file.data(), content_unknown, register_file_max_len);
std::memset(input_register_file.data(), content_unknown, register_file_max_len);
for (const auto& instruction : block->instructions)
{

View File

@ -9,13 +9,7 @@
namespace rsx::assembler::FP
{
static constexpr u32 register_file_length = 48 * 8; // 24 F32 or 48 F16 registers
static constexpr char content_unknown = 0;
static constexpr char content_float32 = 'R';
static constexpr char content_float16 = 'H';
static constexpr char content_dual = 'D';
using register_file_t = std::array<char, register_file_length>;
using namespace constants;
struct DependencyPassContext
{
@ -293,7 +287,7 @@ namespace rsx::assembler::FP
void insert_dependency_barriers(DependencyPassContext& ctx, BasicBlock* block)
{
register_file_t& register_file = ctx.exec_register_map[block];
std::memset(register_file.data(), content_unknown, register_file_length);
std::memset(register_file.data(), content_unknown, register_file_max_len);
std::unordered_set<u32> barrier16;
std::unordered_set<u32> barrier32;
@ -403,7 +397,7 @@ namespace rsx::assembler::FP
if (ctx.sync_register_map.find(target) == ctx.sync_register_map.end())
{
auto& blob = ctx.sync_register_map[target];
std::memset(blob.data(), content_unknown, register_file_length);
std::memset(blob.data(), content_unknown, register_file_max_len);
}
auto& sync_register_file = ctx.sync_register_map[target];