/** ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** * Copyright 2022 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** * * @modified Tom Clay, 2026 - Adapted for ReXGlue runtime */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "../../../../../src/ac6_backend_fixes/ac6_backend_hooks.h" #include "../../../../../src/ac6_texture_overrides.h" namespace rex::graphics::d3d12 { // Generated with `xb buildshaders`. namespace shaders { #include "../shaders/bytecode/d3d12_5_1/texture_load_128bpb_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_128bpb_scaled_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_16bpb_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_16bpb_scaled_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_32bpb_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_32bpb_scaled_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_64bpb_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_64bpb_scaled_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_8bpb_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_8bpb_scaled_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_bgrg8_rgb8_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_bgrg8_rgbg8_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_ctx1_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_depth_float_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_depth_float_scaled_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_depth_unorm_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_depth_unorm_scaled_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_dxn_rg8_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_dxt1_rgba8_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_dxt3_rgba8_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_dxt3a_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_dxt3aas1111_bgra4_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_dxt5_rgba8_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_dxt5a_r8_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_gbgr8_grgb8_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_gbgr8_rgb8_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_r10g11b11_rgba16_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_r10g11b11_rgba16_scaled_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_r10g11b11_rgba16_snorm_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_r10g11b11_rgba16_snorm_scaled_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_r11g11b10_rgba16_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_r11g11b10_rgba16_scaled_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_r11g11b10_rgba16_snorm_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_r11g11b10_rgba16_snorm_scaled_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_r16_snorm_float_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_r16_snorm_float_scaled_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_r16_unorm_float_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_r16_unorm_float_scaled_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_r4g4b4a4_b4g4r4a4_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_r4g4b4a4_b4g4r4a4_scaled_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_r5g5b5a1_b5g5r5a1_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_r5g5b5a1_b5g5r5a1_scaled_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_r5g5b6_b5g6r5_swizzle_rbga_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_r5g5b6_b5g6r5_swizzle_rbga_scaled_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_r5g6b5_b5g6r5_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_r5g6b5_b5g6r5_scaled_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_rg16_snorm_float_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_rg16_snorm_float_scaled_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_rg16_unorm_float_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_rg16_unorm_float_scaled_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_rgba16_snorm_float_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_rgba16_snorm_float_scaled_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_rgba16_unorm_float_cs.h" #include "../shaders/bytecode/d3d12_5_1/texture_load_rgba16_unorm_float_scaled_cs.h" } // namespace shaders namespace { constexpr D3D12_FORMAT_SUPPORT1 kLinearFilterSupport = D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE; bool IsFormatSampleFilterable(ID3D12Device* device, DXGI_FORMAT format) { if (format == DXGI_FORMAT_UNKNOWN) { return false; } D3D12_FEATURE_DATA_FORMAT_SUPPORT support = {format, D3D12_FORMAT_SUPPORT1_NONE, D3D12_FORMAT_SUPPORT2_NONE}; if (FAILED( device->CheckFeatureSupport(D3D12_FEATURE_FORMAT_SUPPORT, &support, sizeof(support)))) { return false; } return (support.Support1 & kLinearFilterSupport) == kLinearFilterSupport; } } // namespace const D3D12TextureCache::HostFormat D3D12TextureCache::kBestHostFormats[64] = { // k_1_REVERSE {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_1 {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_8 {DXGI_FORMAT_R8_TYPELESS, DXGI_FORMAT_R8_UNORM, kLoadShaderIndex8bpb, DXGI_FORMAT_R8_SNORM, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_1_5_5_5 // Red and blue swapped in the load shader for simplicity. {DXGI_FORMAT_B5G5R5A1_UNORM, DXGI_FORMAT_B5G5R5A1_UNORM, kLoadShaderIndexR5G5B5A1ToB5G5R5A1, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_5_6_5 // Red and blue swapped in the load shader for simplicity. {DXGI_FORMAT_B5G6R5_UNORM, DXGI_FORMAT_B5G6R5_UNORM, kLoadShaderIndexR5G6B5ToB5G6R5, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, // k_6_5_5 // On the host, green bits in blue, blue bits in green. {DXGI_FORMAT_B5G6R5_UNORM, DXGI_FORMAT_B5G6R5_UNORM, kLoadShaderIndexR5G5B6ToB5G6R5WithRBGASwizzle, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, XE_GPU_MAKE_TEXTURE_SWIZZLE(R, B, G, G)}, // k_8_8_8_8 {DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R8G8B8A8_UNORM, kLoadShaderIndex32bpb, DXGI_FORMAT_R8G8B8A8_SNORM, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_2_10_10_10 {DXGI_FORMAT_R10G10B10A2_TYPELESS, DXGI_FORMAT_R10G10B10A2_UNORM, kLoadShaderIndex32bpb, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_8_A {DXGI_FORMAT_R8_TYPELESS, DXGI_FORMAT_R8_UNORM, kLoadShaderIndex8bpb, DXGI_FORMAT_R8_SNORM, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_8_B {DXGI_FORMAT_R8_TYPELESS, DXGI_FORMAT_R8_UNORM, kLoadShaderIndex8bpb, DXGI_FORMAT_R8_SNORM, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_8_8 {DXGI_FORMAT_R8G8_TYPELESS, DXGI_FORMAT_R8G8_UNORM, kLoadShaderIndex16bpb, DXGI_FORMAT_R8G8_SNORM, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, // k_Cr_Y1_Cb_Y0_REP // Red and blue swapped in the load shader for simplicity. // TODO(Triang3l): The DXGI_FORMAT_R8G8B8A8_U/SNORM conversion is usable for // the signed version, separate unsigned and signed load shaders completely // (as one doesn't need decompression for this format, while another does). {DXGI_FORMAT_G8R8_G8B8_UNORM, DXGI_FORMAT_G8R8_G8B8_UNORM, kLoadShaderIndexGBGR8ToGRGB8, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, true, DXGI_FORMAT_R8G8B8A8_UNORM, kLoadShaderIndexGBGR8ToRGB8, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, // k_Y1_Cr_Y0_Cb_REP // Red and blue swapped in the load shader for simplicity. // TODO(Triang3l): The DXGI_FORMAT_R8G8B8A8_U/SNORM conversion is usable for // the signed version, separate unsigned and signed load shaders completely // (as one doesn't need decompression for this format, while another does). {DXGI_FORMAT_R8G8_B8G8_UNORM, DXGI_FORMAT_R8G8_B8G8_UNORM, kLoadShaderIndexBGRG8ToRGBG8, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, true, DXGI_FORMAT_R8G8B8A8_UNORM, kLoadShaderIndexBGRG8ToRGB8, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, // k_16_16_EDRAM // Not usable as a texture, also has -32...32 range. {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, // k_8_8_8_8_A {DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R8G8B8A8_UNORM, kLoadShaderIndex32bpb, DXGI_FORMAT_R8G8B8A8_SNORM, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_4_4_4_4 // Red and blue swapped in the load shader for simplicity. {DXGI_FORMAT_B4G4R4A4_UNORM, DXGI_FORMAT_B4G4R4A4_UNORM, kLoadShaderIndexRGBA4ToBGRA4, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_10_11_11 {DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_UNORM, kLoadShaderIndexR11G11B10ToRGBA16, DXGI_FORMAT_R16G16B16A16_SNORM, kLoadShaderIndexR11G11B10ToRGBA16SNorm, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, // k_11_11_10 {DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_UNORM, kLoadShaderIndexR10G11B11ToRGBA16, DXGI_FORMAT_R16G16B16A16_SNORM, kLoadShaderIndexR10G11B11ToRGBA16SNorm, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, // k_DXT1 {DXGI_FORMAT_BC1_UNORM, DXGI_FORMAT_BC1_UNORM, kLoadShaderIndex64bpb, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, true, DXGI_FORMAT_R8G8B8A8_UNORM, kLoadShaderIndexDXT1ToRGBA8, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_DXT2_3 {DXGI_FORMAT_BC2_UNORM, DXGI_FORMAT_BC2_UNORM, kLoadShaderIndex128bpb, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, true, DXGI_FORMAT_R8G8B8A8_UNORM, kLoadShaderIndexDXT3ToRGBA8, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_DXT4_5 {DXGI_FORMAT_BC3_UNORM, DXGI_FORMAT_BC3_UNORM, kLoadShaderIndex128bpb, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, true, DXGI_FORMAT_R8G8B8A8_UNORM, kLoadShaderIndexDXT5ToRGBA8, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_16_16_16_16_EDRAM // Not usable as a texture, also has -32...32 range. {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // R32_FLOAT for depth because shaders would require an additional SRV to // sample stencil, which we don't provide. // k_24_8 {DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32_FLOAT, kLoadShaderIndexDepthUnorm, DXGI_FORMAT_R32_FLOAT, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_24_8_FLOAT {DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32_FLOAT, kLoadShaderIndexDepthFloat, DXGI_FORMAT_R32_FLOAT, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_16 {DXGI_FORMAT_R16_TYPELESS, DXGI_FORMAT_R16_UNORM, kLoadShaderIndex16bpb, DXGI_FORMAT_R16_SNORM, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_16_16 {DXGI_FORMAT_R16G16_TYPELESS, DXGI_FORMAT_R16G16_UNORM, kLoadShaderIndex32bpb, DXGI_FORMAT_R16G16_SNORM, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, // k_16_16_16_16 {DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_UNORM, kLoadShaderIndex64bpb, DXGI_FORMAT_R16G16B16A16_SNORM, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_16_EXPAND {DXGI_FORMAT_R16_FLOAT, DXGI_FORMAT_R16_FLOAT, kLoadShaderIndex16bpb, DXGI_FORMAT_R16_FLOAT, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_16_16_EXPAND {DXGI_FORMAT_R16G16_FLOAT, DXGI_FORMAT_R16G16_FLOAT, kLoadShaderIndex32bpb, DXGI_FORMAT_R16G16_FLOAT, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, // k_16_16_16_16_EXPAND {DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_FLOAT, kLoadShaderIndex64bpb, DXGI_FORMAT_R16G16B16A16_FLOAT, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_16_FLOAT {DXGI_FORMAT_R16_FLOAT, DXGI_FORMAT_R16_FLOAT, kLoadShaderIndex16bpb, DXGI_FORMAT_R16_FLOAT, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_16_16_FLOAT {DXGI_FORMAT_R16G16_FLOAT, DXGI_FORMAT_R16G16_FLOAT, kLoadShaderIndex32bpb, DXGI_FORMAT_R16G16_FLOAT, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, // k_16_16_16_16_FLOAT {DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_FLOAT, kLoadShaderIndex64bpb, DXGI_FORMAT_R16G16B16A16_FLOAT, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_32 {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_32_32 {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, // k_32_32_32_32 {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_32_FLOAT {DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32_FLOAT, kLoadShaderIndex32bpb, DXGI_FORMAT_R32_FLOAT, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_32_32_FLOAT {DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32_FLOAT, kLoadShaderIndex64bpb, DXGI_FORMAT_R32G32_FLOAT, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, // k_32_32_32_32_FLOAT {DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_R32G32B32A32_FLOAT, kLoadShaderIndex128bpb, DXGI_FORMAT_R32G32B32A32_FLOAT, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_32_AS_8 {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_32_AS_8_8 {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, // k_16_MPEG {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_16_16_MPEG {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, // k_8_INTERLACED {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_32_AS_8_INTERLACED {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_32_AS_8_8_INTERLACED {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, // k_16_INTERLACED {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_16_MPEG_INTERLACED {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_16_16_MPEG_INTERLACED {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, // k_DXN {DXGI_FORMAT_BC5_UNORM, DXGI_FORMAT_BC5_UNORM, kLoadShaderIndex128bpb, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, true, DXGI_FORMAT_R8G8_UNORM, kLoadShaderIndexDXNToRG8, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, // k_8_8_8_8_AS_16_16_16_16 {DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R8G8B8A8_UNORM, kLoadShaderIndex32bpb, DXGI_FORMAT_R8G8B8A8_SNORM, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_DXT1_AS_16_16_16_16 {DXGI_FORMAT_BC1_UNORM, DXGI_FORMAT_BC1_UNORM, kLoadShaderIndex64bpb, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, true, DXGI_FORMAT_R8G8B8A8_UNORM, kLoadShaderIndexDXT1ToRGBA8, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_DXT2_3_AS_16_16_16_16 {DXGI_FORMAT_BC2_UNORM, DXGI_FORMAT_BC2_UNORM, kLoadShaderIndex128bpb, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, true, DXGI_FORMAT_R8G8B8A8_UNORM, kLoadShaderIndexDXT3ToRGBA8, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_DXT4_5_AS_16_16_16_16 {DXGI_FORMAT_BC3_UNORM, DXGI_FORMAT_BC3_UNORM, kLoadShaderIndex128bpb, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, true, DXGI_FORMAT_R8G8B8A8_UNORM, kLoadShaderIndexDXT5ToRGBA8, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_2_10_10_10_AS_16_16_16_16 {DXGI_FORMAT_R10G10B10A2_UNORM, DXGI_FORMAT_R10G10B10A2_UNORM, kLoadShaderIndex32bpb, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_10_11_11_AS_16_16_16_16 {DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_UNORM, kLoadShaderIndexR11G11B10ToRGBA16, DXGI_FORMAT_R16G16B16A16_SNORM, kLoadShaderIndexR11G11B10ToRGBA16SNorm, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, // k_11_11_10_AS_16_16_16_16 {DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_UNORM, kLoadShaderIndexR10G11B11ToRGBA16, DXGI_FORMAT_R16G16B16A16_SNORM, kLoadShaderIndexR10G11B11ToRGBA16SNorm, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, // k_32_32_32_FLOAT {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, // k_DXT3A // R8_UNORM has the same size as BC2, but doesn't have the 4x4 size // alignment requirement. {DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8_UNORM, kLoadShaderIndexDXT3A, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_DXT5A {DXGI_FORMAT_BC4_UNORM, DXGI_FORMAT_BC4_UNORM, kLoadShaderIndex64bpb, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, true, DXGI_FORMAT_R8_UNORM, kLoadShaderIndexDXT5AToR8, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_CTX1 {DXGI_FORMAT_R8G8_UNORM, DXGI_FORMAT_R8G8_UNORM, kLoadShaderIndexCTX1, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, // k_DXT3A_AS_1_1_1_1 {DXGI_FORMAT_B4G4R4A4_UNORM, DXGI_FORMAT_B4G4R4A4_UNORM, kLoadShaderIndexDXT3AAs1111ToBGRA4, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_8_8_8_8_GAMMA_EDRAM // Not usable as a texture. {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_2_10_10_10_FLOAT_EDRAM // Not usable as a texture. {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, }; D3D12TextureCache::D3D12TextureCache(const RegisterFile& register_file, D3D12SharedMemory& shared_memory, uint32_t draw_resolution_scale_x, uint32_t draw_resolution_scale_y, D3D12CommandProcessor& command_processor, bool bindless_resources_used) : TextureCache(register_file, shared_memory, draw_resolution_scale_x, draw_resolution_scale_y), command_processor_(command_processor), bindless_resources_used_(bindless_resources_used) {} D3D12TextureCache::~D3D12TextureCache() { // While the texture descriptor cache still exists (referenced by // ~D3D12Texture), destroy all textures. DestroyAllTextures(true); // First release the buffers to detach them from the heaps. for (std::unique_ptr& scaled_resolve_buffer_ptr : scaled_resolve_2gb_buffers_) { scaled_resolve_buffer_ptr.reset(); } scaled_resolve_heaps_.clear(); COUNT_profile_set("gpu/texture_cache/scaled_resolve_buffer_used_mb", 0); } bool D3D12TextureCache::Initialize() { const ui::d3d12::D3D12Provider& provider = command_processor_.GetD3D12Provider(); ID3D12Device* device = provider.GetDevice(); std::memcpy(host_formats_, kBestHostFormats, sizeof(host_formats_)); bool rgba16_norm_filterable = IsFormatSampleFilterable(device, DXGI_FORMAT_R16G16B16A16_UNORM) && IsFormatSampleFilterable(device, DXGI_FORMAT_R16G16B16A16_SNORM); bool rgba16_float_filterable = IsFormatSampleFilterable(device, DXGI_FORMAT_R16G16B16A16_FLOAT); if (!rgba16_norm_filterable && rgba16_float_filterable) { constexpr std::array kPackedRGBA16FallbackFormats = { xenos::TextureFormat::k_10_11_11, xenos::TextureFormat::k_11_11_10, xenos::TextureFormat::k_10_11_11_AS_16_16_16_16, xenos::TextureFormat::k_11_11_10_AS_16_16_16_16, }; for (xenos::TextureFormat format : kPackedRGBA16FallbackFormats) { HostFormat& host_format = host_formats_[uint32_t(format)]; host_format.dxgi_format_unsigned = DXGI_FORMAT_R16G16B16A16_FLOAT; host_format.dxgi_format_signed = DXGI_FORMAT_R16G16B16A16_FLOAT; } REXGPU_WARN( "D3D12TextureCache: Using RGBA16 float fallback for 10_11_11 / 11_11_10 textures"); } if (IsFormatSampleFilterable(device, DXGI_FORMAT_R16_FLOAT)) { HostFormat& host_format_16 = host_formats_[uint32_t(xenos::TextureFormat::k_16)]; if (!IsFormatSampleFilterable(device, DXGI_FORMAT_R16_UNORM)) { host_format_16.dxgi_format_unsigned = DXGI_FORMAT_R16_FLOAT; host_format_16.load_shader = kLoadShaderIndexR16UNormToFloat; } if (!IsFormatSampleFilterable(device, DXGI_FORMAT_R16_SNORM)) { host_format_16.dxgi_format_signed = DXGI_FORMAT_R16_FLOAT; host_format_16.load_shader_signed = kLoadShaderIndexR16SNormToFloat; } if (host_format_16.load_shader != kLoadShaderIndex16bpb && host_format_16.load_shader_signed == kLoadShaderIndexUnknown) { host_format_16.load_shader_signed = kLoadShaderIndex16bpb; } } if (IsFormatSampleFilterable(device, DXGI_FORMAT_R16G16_FLOAT)) { HostFormat& host_format_16_16 = host_formats_[uint32_t(xenos::TextureFormat::k_16_16)]; if (!IsFormatSampleFilterable(device, DXGI_FORMAT_R16G16_UNORM)) { host_format_16_16.dxgi_format_unsigned = DXGI_FORMAT_R16G16_FLOAT; host_format_16_16.load_shader = kLoadShaderIndexRG16UNormToFloat; } if (!IsFormatSampleFilterable(device, DXGI_FORMAT_R16G16_SNORM)) { host_format_16_16.dxgi_format_signed = DXGI_FORMAT_R16G16_FLOAT; host_format_16_16.load_shader_signed = kLoadShaderIndexRG16SNormToFloat; } if (host_format_16_16.load_shader != kLoadShaderIndex32bpb && host_format_16_16.load_shader_signed == kLoadShaderIndexUnknown) { host_format_16_16.load_shader_signed = kLoadShaderIndex32bpb; } } if (rgba16_float_filterable) { HostFormat& host_format_16_16_16_16 = host_formats_[uint32_t(xenos::TextureFormat::k_16_16_16_16)]; if (!IsFormatSampleFilterable(device, DXGI_FORMAT_R16G16B16A16_UNORM)) { host_format_16_16_16_16.dxgi_format_unsigned = DXGI_FORMAT_R16G16B16A16_FLOAT; host_format_16_16_16_16.load_shader = kLoadShaderIndexRGBA16UNormToFloat; } if (!IsFormatSampleFilterable(device, DXGI_FORMAT_R16G16B16A16_SNORM)) { host_format_16_16_16_16.dxgi_format_signed = DXGI_FORMAT_R16G16B16A16_FLOAT; host_format_16_16_16_16.load_shader_signed = kLoadShaderIndexRGBA16SNormToFloat; } if (host_format_16_16_16_16.load_shader != kLoadShaderIndex64bpb && host_format_16_16_16_16.load_shader_signed == kLoadShaderIndexUnknown) { host_format_16_16_16_16.load_shader_signed = kLoadShaderIndex64bpb; } } if (IsDrawResolutionScaled()) { // Buffers not used yet - no need aliasing barriers to change ownership of // gigabytes between even and odd buffers. std::memset(scaled_resolve_1gb_buffer_indices_, UINT8_MAX, sizeof(scaled_resolve_1gb_buffer_indices_)); assert_true(scaled_resolve_heaps_.empty()); uint64_t scaled_resolve_address_space_size = uint64_t(SharedMemory::kBufferSize) * (draw_resolution_scale_x() * draw_resolution_scale_y()); scaled_resolve_heaps_.resize( size_t(scaled_resolve_address_space_size >> kScaledResolveHeapSizeLog2)); } scaled_resolve_heap_count_ = 0; // Create the loading root signature. D3D12_ROOT_PARAMETER root_parameters[3]; // Parameter 0 is constants (changed multiple times when untiling). root_parameters[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; root_parameters[0].Constants.ShaderRegister = 0; root_parameters[0].Constants.RegisterSpace = 0; root_parameters[0].Constants.Num32BitValues = sizeof(LoadConstants) / sizeof(uint32_t); root_parameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; // Parameter 1 is the source (may be changed multiple times for the same // destination). D3D12_DESCRIPTOR_RANGE root_dest_range; root_dest_range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; root_dest_range.NumDescriptors = 1; root_dest_range.BaseShaderRegister = 0; root_dest_range.RegisterSpace = 0; root_dest_range.OffsetInDescriptorsFromTableStart = 0; root_parameters[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; root_parameters[1].DescriptorTable.NumDescriptorRanges = 1; root_parameters[1].DescriptorTable.pDescriptorRanges = &root_dest_range; root_parameters[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; // Parameter 2 is the destination. D3D12_DESCRIPTOR_RANGE root_source_range; root_source_range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; root_source_range.NumDescriptors = 1; root_source_range.BaseShaderRegister = 0; root_source_range.RegisterSpace = 0; root_source_range.OffsetInDescriptorsFromTableStart = 0; root_parameters[2].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; root_parameters[2].DescriptorTable.NumDescriptorRanges = 1; root_parameters[2].DescriptorTable.pDescriptorRanges = &root_source_range; root_parameters[2].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; D3D12_ROOT_SIGNATURE_DESC root_signature_desc; root_signature_desc.NumParameters = UINT(rex::countof(root_parameters)); root_signature_desc.pParameters = root_parameters; root_signature_desc.NumStaticSamplers = 0; root_signature_desc.pStaticSamplers = nullptr; root_signature_desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE; *(load_root_signature_.ReleaseAndGetAddressOf()) = ui::d3d12::util::CreateRootSignature(provider, root_signature_desc); if (!load_root_signature_) { REXGPU_ERROR( "D3D12TextureCache: Failed to create the texture loading root " "signature"); return false; } // Specify the load shader code. D3D12_SHADER_BYTECODE load_shader_code[kLoadShaderCount] = {}; load_shader_code[kLoadShaderIndex8bpb] = D3D12_SHADER_BYTECODE{shaders::texture_load_8bpb_cs, sizeof(shaders::texture_load_8bpb_cs)}; load_shader_code[kLoadShaderIndex16bpb] = D3D12_SHADER_BYTECODE{shaders::texture_load_16bpb_cs, sizeof(shaders::texture_load_16bpb_cs)}; load_shader_code[kLoadShaderIndex32bpb] = D3D12_SHADER_BYTECODE{shaders::texture_load_32bpb_cs, sizeof(shaders::texture_load_32bpb_cs)}; load_shader_code[kLoadShaderIndex64bpb] = D3D12_SHADER_BYTECODE{shaders::texture_load_64bpb_cs, sizeof(shaders::texture_load_64bpb_cs)}; load_shader_code[kLoadShaderIndex128bpb] = D3D12_SHADER_BYTECODE{ shaders::texture_load_128bpb_cs, sizeof(shaders::texture_load_128bpb_cs)}; load_shader_code[kLoadShaderIndexR5G5B5A1ToB5G5R5A1] = D3D12_SHADER_BYTECODE{shaders::texture_load_r5g5b5a1_b5g5r5a1_cs, sizeof(shaders::texture_load_r5g5b5a1_b5g5r5a1_cs)}; load_shader_code[kLoadShaderIndexR5G6B5ToB5G6R5] = D3D12_SHADER_BYTECODE{ shaders::texture_load_r5g6b5_b5g6r5_cs, sizeof(shaders::texture_load_r5g6b5_b5g6r5_cs)}; load_shader_code[kLoadShaderIndexR5G5B6ToB5G6R5WithRBGASwizzle] = D3D12_SHADER_BYTECODE{shaders::texture_load_r5g5b6_b5g6r5_swizzle_rbga_cs, sizeof(shaders::texture_load_r5g5b6_b5g6r5_swizzle_rbga_cs)}; load_shader_code[kLoadShaderIndexRGBA4ToBGRA4] = D3D12_SHADER_BYTECODE{shaders::texture_load_r4g4b4a4_b4g4r4a4_cs, sizeof(shaders::texture_load_r4g4b4a4_b4g4r4a4_cs)}; load_shader_code[kLoadShaderIndexGBGR8ToGRGB8] = D3D12_SHADER_BYTECODE{ shaders::texture_load_gbgr8_grgb8_cs, sizeof(shaders::texture_load_gbgr8_grgb8_cs)}; load_shader_code[kLoadShaderIndexGBGR8ToRGB8] = D3D12_SHADER_BYTECODE{ shaders::texture_load_gbgr8_rgb8_cs, sizeof(shaders::texture_load_gbgr8_rgb8_cs)}; load_shader_code[kLoadShaderIndexBGRG8ToRGBG8] = D3D12_SHADER_BYTECODE{ shaders::texture_load_bgrg8_rgbg8_cs, sizeof(shaders::texture_load_bgrg8_rgbg8_cs)}; load_shader_code[kLoadShaderIndexBGRG8ToRGB8] = D3D12_SHADER_BYTECODE{ shaders::texture_load_bgrg8_rgb8_cs, sizeof(shaders::texture_load_bgrg8_rgb8_cs)}; load_shader_code[kLoadShaderIndexR10G11B11ToRGBA16] = D3D12_SHADER_BYTECODE{ shaders::texture_load_r10g11b11_rgba16_cs, sizeof(shaders::texture_load_r10g11b11_rgba16_cs)}; load_shader_code[kLoadShaderIndexR10G11B11ToRGBA16SNorm] = D3D12_SHADER_BYTECODE{shaders::texture_load_r10g11b11_rgba16_snorm_cs, sizeof(shaders::texture_load_r10g11b11_rgba16_snorm_cs)}; load_shader_code[kLoadShaderIndexR11G11B10ToRGBA16] = D3D12_SHADER_BYTECODE{ shaders::texture_load_r11g11b10_rgba16_cs, sizeof(shaders::texture_load_r11g11b10_rgba16_cs)}; load_shader_code[kLoadShaderIndexR11G11B10ToRGBA16SNorm] = D3D12_SHADER_BYTECODE{shaders::texture_load_r11g11b10_rgba16_snorm_cs, sizeof(shaders::texture_load_r11g11b10_rgba16_snorm_cs)}; load_shader_code[kLoadShaderIndexR16UNormToFloat] = D3D12_SHADER_BYTECODE{ shaders::texture_load_r16_unorm_float_cs, sizeof(shaders::texture_load_r16_unorm_float_cs)}; load_shader_code[kLoadShaderIndexR16SNormToFloat] = D3D12_SHADER_BYTECODE{ shaders::texture_load_r16_snorm_float_cs, sizeof(shaders::texture_load_r16_snorm_float_cs)}; load_shader_code[kLoadShaderIndexRG16UNormToFloat] = D3D12_SHADER_BYTECODE{ shaders::texture_load_rg16_unorm_float_cs, sizeof(shaders::texture_load_rg16_unorm_float_cs)}; load_shader_code[kLoadShaderIndexRG16SNormToFloat] = D3D12_SHADER_BYTECODE{ shaders::texture_load_rg16_snorm_float_cs, sizeof(shaders::texture_load_rg16_snorm_float_cs)}; load_shader_code[kLoadShaderIndexRGBA16UNormToFloat] = D3D12_SHADER_BYTECODE{ shaders::texture_load_rgba16_unorm_float_cs, sizeof(shaders::texture_load_rgba16_unorm_float_cs)}; load_shader_code[kLoadShaderIndexRGBA16SNormToFloat] = D3D12_SHADER_BYTECODE{ shaders::texture_load_rgba16_snorm_float_cs, sizeof(shaders::texture_load_rgba16_snorm_float_cs)}; load_shader_code[kLoadShaderIndexDXT1ToRGBA8] = D3D12_SHADER_BYTECODE{ shaders::texture_load_dxt1_rgba8_cs, sizeof(shaders::texture_load_dxt1_rgba8_cs)}; load_shader_code[kLoadShaderIndexDXT3ToRGBA8] = D3D12_SHADER_BYTECODE{ shaders::texture_load_dxt3_rgba8_cs, sizeof(shaders::texture_load_dxt3_rgba8_cs)}; load_shader_code[kLoadShaderIndexDXT5ToRGBA8] = D3D12_SHADER_BYTECODE{ shaders::texture_load_dxt5_rgba8_cs, sizeof(shaders::texture_load_dxt5_rgba8_cs)}; load_shader_code[kLoadShaderIndexDXNToRG8] = D3D12_SHADER_BYTECODE{ shaders::texture_load_dxn_rg8_cs, sizeof(shaders::texture_load_dxn_rg8_cs)}; load_shader_code[kLoadShaderIndexDXT3A] = D3D12_SHADER_BYTECODE{shaders::texture_load_dxt3a_cs, sizeof(shaders::texture_load_dxt3a_cs)}; load_shader_code[kLoadShaderIndexDXT3AAs1111ToBGRA4] = D3D12_SHADER_BYTECODE{shaders::texture_load_dxt3aas1111_bgra4_cs, sizeof(shaders::texture_load_dxt3aas1111_bgra4_cs)}; load_shader_code[kLoadShaderIndexDXT5AToR8] = D3D12_SHADER_BYTECODE{ shaders::texture_load_dxt5a_r8_cs, sizeof(shaders::texture_load_dxt5a_r8_cs)}; load_shader_code[kLoadShaderIndexCTX1] = D3D12_SHADER_BYTECODE{shaders::texture_load_ctx1_cs, sizeof(shaders::texture_load_ctx1_cs)}; load_shader_code[kLoadShaderIndexDepthUnorm] = D3D12_SHADER_BYTECODE{ shaders::texture_load_depth_unorm_cs, sizeof(shaders::texture_load_depth_unorm_cs)}; load_shader_code[kLoadShaderIndexDepthFloat] = D3D12_SHADER_BYTECODE{ shaders::texture_load_depth_float_cs, sizeof(shaders::texture_load_depth_float_cs)}; D3D12_SHADER_BYTECODE load_shader_code_scaled[kLoadShaderCount] = {}; if (IsDrawResolutionScaled()) { load_shader_code_scaled[kLoadShaderIndex8bpb] = D3D12_SHADER_BYTECODE{ shaders::texture_load_8bpb_scaled_cs, sizeof(shaders::texture_load_8bpb_scaled_cs)}; load_shader_code_scaled[kLoadShaderIndex16bpb] = D3D12_SHADER_BYTECODE{ shaders::texture_load_16bpb_scaled_cs, sizeof(shaders::texture_load_16bpb_scaled_cs)}; load_shader_code_scaled[kLoadShaderIndex32bpb] = D3D12_SHADER_BYTECODE{ shaders::texture_load_32bpb_scaled_cs, sizeof(shaders::texture_load_32bpb_scaled_cs)}; load_shader_code_scaled[kLoadShaderIndex64bpb] = D3D12_SHADER_BYTECODE{ shaders::texture_load_64bpb_scaled_cs, sizeof(shaders::texture_load_64bpb_scaled_cs)}; load_shader_code_scaled[kLoadShaderIndex128bpb] = D3D12_SHADER_BYTECODE{ shaders::texture_load_128bpb_scaled_cs, sizeof(shaders::texture_load_128bpb_scaled_cs)}; load_shader_code_scaled[kLoadShaderIndexR5G5B5A1ToB5G5R5A1] = D3D12_SHADER_BYTECODE{shaders::texture_load_r5g5b5a1_b5g5r5a1_scaled_cs, sizeof(shaders::texture_load_r5g5b5a1_b5g5r5a1_scaled_cs)}; load_shader_code_scaled[kLoadShaderIndexR5G6B5ToB5G6R5] = D3D12_SHADER_BYTECODE{shaders::texture_load_r5g6b5_b5g6r5_scaled_cs, sizeof(shaders::texture_load_r5g6b5_b5g6r5_scaled_cs)}; load_shader_code_scaled[kLoadShaderIndexR5G5B6ToB5G6R5WithRBGASwizzle] = D3D12_SHADER_BYTECODE{shaders::texture_load_r5g5b6_b5g6r5_swizzle_rbga_scaled_cs, sizeof(shaders::texture_load_r5g5b6_b5g6r5_swizzle_rbga_scaled_cs)}; load_shader_code_scaled[kLoadShaderIndexRGBA4ToBGRA4] = D3D12_SHADER_BYTECODE{shaders::texture_load_r4g4b4a4_b4g4r4a4_scaled_cs, sizeof(shaders::texture_load_r4g4b4a4_b4g4r4a4_scaled_cs)}; load_shader_code_scaled[kLoadShaderIndexR10G11B11ToRGBA16] = D3D12_SHADER_BYTECODE{shaders::texture_load_r10g11b11_rgba16_scaled_cs, sizeof(shaders::texture_load_r10g11b11_rgba16_scaled_cs)}; load_shader_code_scaled[kLoadShaderIndexR10G11B11ToRGBA16SNorm] = D3D12_SHADER_BYTECODE{shaders::texture_load_r10g11b11_rgba16_snorm_scaled_cs, sizeof(shaders::texture_load_r10g11b11_rgba16_snorm_scaled_cs)}; load_shader_code_scaled[kLoadShaderIndexR11G11B10ToRGBA16] = D3D12_SHADER_BYTECODE{shaders::texture_load_r11g11b10_rgba16_scaled_cs, sizeof(shaders::texture_load_r11g11b10_rgba16_scaled_cs)}; load_shader_code_scaled[kLoadShaderIndexR11G11B10ToRGBA16SNorm] = D3D12_SHADER_BYTECODE{shaders::texture_load_r11g11b10_rgba16_snorm_scaled_cs, sizeof(shaders::texture_load_r11g11b10_rgba16_snorm_scaled_cs)}; load_shader_code_scaled[kLoadShaderIndexR16UNormToFloat] = D3D12_SHADER_BYTECODE{shaders::texture_load_r16_unorm_float_scaled_cs, sizeof(shaders::texture_load_r16_unorm_float_scaled_cs)}; load_shader_code_scaled[kLoadShaderIndexR16SNormToFloat] = D3D12_SHADER_BYTECODE{shaders::texture_load_r16_snorm_float_scaled_cs, sizeof(shaders::texture_load_r16_snorm_float_scaled_cs)}; load_shader_code_scaled[kLoadShaderIndexRG16UNormToFloat] = D3D12_SHADER_BYTECODE{shaders::texture_load_rg16_unorm_float_scaled_cs, sizeof(shaders::texture_load_rg16_unorm_float_scaled_cs)}; load_shader_code_scaled[kLoadShaderIndexRG16SNormToFloat] = D3D12_SHADER_BYTECODE{shaders::texture_load_rg16_snorm_float_scaled_cs, sizeof(shaders::texture_load_rg16_snorm_float_scaled_cs)}; load_shader_code_scaled[kLoadShaderIndexRGBA16UNormToFloat] = D3D12_SHADER_BYTECODE{shaders::texture_load_rgba16_unorm_float_scaled_cs, sizeof(shaders::texture_load_rgba16_unorm_float_scaled_cs)}; load_shader_code_scaled[kLoadShaderIndexRGBA16SNormToFloat] = D3D12_SHADER_BYTECODE{shaders::texture_load_rgba16_snorm_float_scaled_cs, sizeof(shaders::texture_load_rgba16_snorm_float_scaled_cs)}; load_shader_code_scaled[kLoadShaderIndexDepthUnorm] = D3D12_SHADER_BYTECODE{shaders::texture_load_depth_unorm_scaled_cs, sizeof(shaders::texture_load_depth_unorm_scaled_cs)}; load_shader_code_scaled[kLoadShaderIndexDepthFloat] = D3D12_SHADER_BYTECODE{shaders::texture_load_depth_float_scaled_cs, sizeof(shaders::texture_load_depth_float_scaled_cs)}; } // Create the loading pipelines. for (size_t i = 0; i < kLoadShaderCount; ++i) { const D3D12_SHADER_BYTECODE& current_load_shader_code = load_shader_code[i]; if (!current_load_shader_code.pShaderBytecode) { continue; } *(load_pipelines_[i].ReleaseAndGetAddressOf()) = ui::d3d12::util::CreateComputePipeline( device, current_load_shader_code.pShaderBytecode, current_load_shader_code.BytecodeLength, load_root_signature_.Get()); if (!load_pipelines_[i]) { REXGPU_ERROR( "D3D12TextureCache: Failed to create the texture loading pipeline " "for shader {}", i); return false; } if (IsDrawResolutionScaled()) { const D3D12_SHADER_BYTECODE& current_load_shader_code_scaled = load_shader_code_scaled[i]; if (current_load_shader_code_scaled.pShaderBytecode) { *(load_pipelines_scaled_[i].ReleaseAndGetAddressOf()) = ui::d3d12::util::CreateComputePipeline( device, current_load_shader_code_scaled.pShaderBytecode, current_load_shader_code_scaled.BytecodeLength, load_root_signature_.Get()); if (!load_pipelines_scaled_[i]) { REXGPU_ERROR( "D3D12TextureCache: Failed to create the resolution-scaled " "texture loading pipeline for shader {}", i); return false; } } } } srv_descriptor_cache_allocated_ = 0; // Create a heap with null SRV descriptors, since it's faster to copy a // descriptor than to create an SRV, and null descriptors are used a lot (for // the signed version when only unsigned is used, for instance). D3D12_DESCRIPTOR_HEAP_DESC null_srv_descriptor_heap_desc; null_srv_descriptor_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; null_srv_descriptor_heap_desc.NumDescriptors = uint32_t(NullSRVDescriptorIndex::kCount); null_srv_descriptor_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; null_srv_descriptor_heap_desc.NodeMask = 0; if (FAILED(device->CreateDescriptorHeap(&null_srv_descriptor_heap_desc, IID_PPV_ARGS(&null_srv_descriptor_heap_)))) { REXGPU_ERROR( "D3D12TextureCache: Failed to create the descriptor heap for null " "SRVs"); return false; } null_srv_descriptor_heap_start_ = null_srv_descriptor_heap_->GetCPUDescriptorHandleForHeapStart(); D3D12_SHADER_RESOURCE_VIEW_DESC null_srv_desc; null_srv_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; null_srv_desc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING( D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0, D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0, D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0, D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0); null_srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY; null_srv_desc.Texture2DArray.MostDetailedMip = 0; null_srv_desc.Texture2DArray.MipLevels = 1; null_srv_desc.Texture2DArray.FirstArraySlice = 0; null_srv_desc.Texture2DArray.ArraySize = 1; null_srv_desc.Texture2DArray.PlaneSlice = 0; null_srv_desc.Texture2DArray.ResourceMinLODClamp = 0.0f; device->CreateShaderResourceView( nullptr, &null_srv_desc, provider.OffsetViewDescriptor(null_srv_descriptor_heap_start_, uint32_t(NullSRVDescriptorIndex::k2DArray))); null_srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D; null_srv_desc.Texture3D.MostDetailedMip = 0; null_srv_desc.Texture3D.MipLevels = 1; null_srv_desc.Texture3D.ResourceMinLODClamp = 0.0f; device->CreateShaderResourceView( nullptr, &null_srv_desc, provider.OffsetViewDescriptor(null_srv_descriptor_heap_start_, uint32_t(NullSRVDescriptorIndex::k3D))); null_srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBE; null_srv_desc.TextureCube.MostDetailedMip = 0; null_srv_desc.TextureCube.MipLevels = 1; null_srv_desc.TextureCube.ResourceMinLODClamp = 0.0f; device->CreateShaderResourceView( nullptr, &null_srv_desc, provider.OffsetViewDescriptor(null_srv_descriptor_heap_start_, uint32_t(NullSRVDescriptorIndex::kCube))); return true; } void D3D12TextureCache::ClearCache() { TextureCache::ClearCache(); // Clear texture descriptor cache. srv_descriptor_cache_free_.clear(); srv_descriptor_cache_allocated_ = 0; srv_descriptor_cache_.clear(); } void D3D12TextureCache::BeginSubmission(uint64_t new_submission_index) { TextureCache::BeginSubmission(new_submission_index); // ExecuteCommandLists is a full UAV and aliasing barrier. if (IsDrawResolutionScaled()) { size_t scaled_resolve_buffer_count = GetScaledResolveBufferCount(); for (size_t i = 0; i < scaled_resolve_buffer_count; ++i) { ScaledResolveVirtualBuffer* scaled_resolve_buffer = scaled_resolve_2gb_buffers_[i].get(); if (scaled_resolve_buffer) { scaled_resolve_buffer->ClearUAVBarrierPending(); } } std::memset(scaled_resolve_1gb_buffer_indices_, UINT8_MAX, sizeof(scaled_resolve_1gb_buffer_indices_)); } } void D3D12TextureCache::BeginFrame() { TextureCache::BeginFrame(); ProcessCompletedTextureTransfers(); std::memset(unsupported_format_features_used_, 0, sizeof(unsupported_format_features_used_)); } void D3D12TextureCache::EndFrame() { // Report used unsupported texture formats. bool unsupported_header_written = false; for (uint32_t i = 0; i < 64; ++i) { uint32_t unsupported_features = unsupported_format_features_used_[i]; if (unsupported_features == 0) { continue; } if (!unsupported_header_written) { REXGPU_ERROR("Unsupported texture formats used in the frame:"); unsupported_header_written = true; } REXGPU_ERROR("* {}{}{}{}", FormatInfo::Get(xenos::TextureFormat(i))->name, unsupported_features & kUnsupportedResourceBit ? " resource" : "", unsupported_features & kUnsupportedUnormBit ? " unsigned" : "", unsupported_features & kUnsupportedSnormBit ? " signed" : ""); unsupported_format_features_used_[i] = 0; } } void D3D12TextureCache::RequestTextures(uint32_t used_texture_mask) { #if XE_GPU_FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // XE_GPU_FINE_GRAINED_DRAW_SCOPES TextureCache::RequestTextures(used_texture_mask); // Pre-create 3D-as-2D wrappers before draw setup. Wrapper loading may bind // compute pipelines and must happen in the texture request phase. if (REXCVAR_GET(gpu_3d_to_2d_texture)) { uint32_t textures_3d = used_texture_mask; uint32_t index_3d; while (rex::bit_scan_forward(textures_3d, &index_3d)) { textures_3d &= ~(uint32_t(1) << index_3d); const TextureBinding* binding = GetValidTextureBinding(index_3d); if (!binding || binding->key.dimension != xenos::DataDimension::k3D) { continue; } D3D12Texture* texture = static_cast(binding->texture); if (texture) { texture->GetOrCreate3DAs2DResource(D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); } D3D12Texture* texture_signed = static_cast(binding->texture_signed); if (texture_signed) { texture_signed->GetOrCreate3DAs2DResource(D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); } } } // Transition the textures to the needed usage - always in // NON_PIXEL_SHADER_RESOURCE | PIXEL_SHADER_RESOURCE states because barriers // between read-only stages, if needed, are discouraged (also if these were // tracked separately, checks would be needed to make sure, if the same // texture is bound through different fetch constants to both VS and PS, it // would be in both states). uint32_t textures_remaining = used_texture_mask; uint32_t index; while (rex::bit_scan_forward(textures_remaining, &index)) { textures_remaining &= ~(uint32_t(1) << index); const TextureBinding* binding = GetValidTextureBinding(index); if (!binding) { continue; } D3D12Texture* binding_texture = static_cast(binding->texture); if (binding_texture != nullptr) { // Will be referenced by the command list, so mark as used. binding_texture->MarkAsUsed(); command_processor_.PushTransitionBarrier( binding_texture->resource(), binding_texture->SetResourceState(D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); } D3D12Texture* binding_texture_signed = static_cast(binding->texture_signed); if (binding_texture_signed != nullptr) { binding_texture_signed->MarkAsUsed(); command_processor_.PushTransitionBarrier( binding_texture_signed->resource(), binding_texture_signed->SetResourceState(D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); } } } bool D3D12TextureCache::AreActiveTextureSRVKeysUpToDate( const TextureSRVKey* keys, const D3D12Shader::TextureBinding* host_shader_bindings, size_t host_shader_binding_count) const { for (size_t i = 0; i < host_shader_binding_count; ++i) { const TextureSRVKey& key = keys[i]; const TextureBinding* binding = GetValidTextureBinding(host_shader_bindings[i].fetch_constant); if (!binding) { if (key.key.is_valid) { return false; } continue; } if (key.key != binding->key || key.host_swizzle != binding->host_swizzle || key.swizzled_signs != binding->swizzled_signs) { return false; } } return true; } void D3D12TextureCache::WriteActiveTextureSRVKeys( TextureSRVKey* keys, const D3D12Shader::TextureBinding* host_shader_bindings, size_t host_shader_binding_count) const { for (size_t i = 0; i < host_shader_binding_count; ++i) { TextureSRVKey& key = keys[i]; const TextureBinding* binding = GetValidTextureBinding(host_shader_bindings[i].fetch_constant); if (!binding) { key.key.MakeInvalid(); key.host_swizzle = xenos::XE_GPU_TEXTURE_SWIZZLE_0000; key.swizzled_signs = kSwizzledSignsUnsigned; continue; } key.key = binding->key; key.host_swizzle = binding->host_swizzle; key.swizzled_signs = binding->swizzled_signs; } } void D3D12TextureCache::WriteActiveTextureBindfulSRV( const D3D12Shader::TextureBinding& host_shader_binding, D3D12_CPU_DESCRIPTOR_HANDLE handle) { assert_false(bindless_resources_used_); uint32_t descriptor_index = UINT32_MAX; Texture* texture = nullptr; uint32_t fetch_constant_index = host_shader_binding.fetch_constant; const TextureBinding* binding = GetValidTextureBinding(fetch_constant_index); if (binding && AreDimensionsCompatible(host_shader_binding.dimension, binding->key.dimension)) { bool force_special_view = binding->key.dimension == xenos::DataDimension::k3D && (host_shader_binding.dimension == xenos::FetchOpDimension::k1D || host_shader_binding.dimension == xenos::FetchOpDimension::k2D); const D3D12TextureBinding& d3d12_binding = d3d12_texture_bindings_[fetch_constant_index]; if (host_shader_binding.is_signed) { // Not supporting signed compressed textures - hopefully DXN and DXT5A are // not used as signed. if (texture_util::IsAnySignSigned(binding->swizzled_signs)) { texture = IsSignedVersionSeparateForFormat(binding->key) ? binding->texture_signed : binding->texture; if (force_special_view && texture) { descriptor_index = FindOrCreateTextureDescriptor(*static_cast(texture), xenos::DataDimension::k2DOrStacked, true, binding->host_swizzle); } else { descriptor_index = d3d12_binding.descriptor_index_signed; } } } else { if (texture_util::IsAnySignNotSigned(binding->swizzled_signs)) { texture = binding->texture; if (force_special_view && texture) { descriptor_index = FindOrCreateTextureDescriptor(*static_cast(texture), xenos::DataDimension::k2DOrStacked, false, binding->host_swizzle); } else { descriptor_index = d3d12_binding.descriptor_index; } } } } const ui::d3d12::D3D12Provider& provider = command_processor_.GetD3D12Provider(); D3D12_CPU_DESCRIPTOR_HANDLE source_handle; if (descriptor_index != UINT32_MAX) { assert_not_null(texture); texture->MarkAsUsed(); source_handle = GetTextureDescriptorCPUHandle(descriptor_index); } else { NullSRVDescriptorIndex null_descriptor_index; switch (host_shader_binding.dimension) { case xenos::FetchOpDimension::k3DOrStacked: null_descriptor_index = NullSRVDescriptorIndex::k3D; break; case xenos::FetchOpDimension::kCube: null_descriptor_index = NullSRVDescriptorIndex::kCube; break; default: assert_true(host_shader_binding.dimension == xenos::FetchOpDimension::k1D || host_shader_binding.dimension == xenos::FetchOpDimension::k2D); null_descriptor_index = NullSRVDescriptorIndex::k2DArray; } source_handle = provider.OffsetViewDescriptor(null_srv_descriptor_heap_start_, uint32_t(null_descriptor_index)); } auto device = provider.GetDevice(); { #if XE_GPU_FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_i("gpu", "rex::graphics::d3d12::D3D12TextureCache::WriteActiveTextureBindfulSRV->" "CopyDescriptorsSimple"); #endif // XE_GPU_FINE_GRAINED_DRAW_SCOPES device->CopyDescriptorsSimple(1, handle, source_handle, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); } } uint32_t D3D12TextureCache::GetActiveTextureBindlessSRVIndex( const D3D12Shader::TextureBinding& host_shader_binding) { assert_true(bindless_resources_used_); uint32_t descriptor_index = UINT32_MAX; uint32_t fetch_constant_index = host_shader_binding.fetch_constant; const TextureBinding* binding = GetValidTextureBinding(fetch_constant_index); if (binding && AreDimensionsCompatible(host_shader_binding.dimension, binding->key.dimension)) { bool force_special_view = binding->key.dimension == xenos::DataDimension::k3D && (host_shader_binding.dimension == xenos::FetchOpDimension::k1D || host_shader_binding.dimension == xenos::FetchOpDimension::k2D); const D3D12TextureBinding& d3d12_binding = d3d12_texture_bindings_[fetch_constant_index]; if (force_special_view) { Texture* texture = nullptr; bool use_signed = host_shader_binding.is_signed && texture_util::IsAnySignSigned(binding->swizzled_signs); if (use_signed) { texture = IsSignedVersionSeparateForFormat(binding->key) ? binding->texture_signed : binding->texture; } else { texture = binding->texture; } if (texture) { descriptor_index = FindOrCreateTextureDescriptor(*static_cast(texture), xenos::DataDimension::k2DOrStacked, use_signed, binding->host_swizzle); } } else { descriptor_index = host_shader_binding.is_signed ? d3d12_binding.descriptor_index_signed : d3d12_binding.descriptor_index; } } if (descriptor_index == UINT32_MAX) { switch (host_shader_binding.dimension) { case xenos::FetchOpDimension::k3DOrStacked: descriptor_index = uint32_t(D3D12CommandProcessor::SystemBindlessView::kNullTexture3D); break; case xenos::FetchOpDimension::kCube: descriptor_index = uint32_t(D3D12CommandProcessor::SystemBindlessView::kNullTextureCube); break; default: assert_true(host_shader_binding.dimension == xenos::FetchOpDimension::k1D || host_shader_binding.dimension == xenos::FetchOpDimension::k2D); descriptor_index = uint32_t(D3D12CommandProcessor::SystemBindlessView::kNullTexture2DArray); } } return descriptor_index; } D3D12TextureCache::SamplerParameters D3D12TextureCache::GetSamplerParameters( const D3D12Shader::SamplerBinding& binding) const { const auto& regs = register_file(); xenos::xe_gpu_texture_fetch_t fetch = regs.GetTextureFetch(binding.fetch_constant); SamplerParameters parameters; xenos::ClampMode fetch_clamp_x, fetch_clamp_y, fetch_clamp_z; texture_util::GetClampModesForDimension(fetch, fetch_clamp_x, fetch_clamp_y, fetch_clamp_z); parameters.clamp_x = NormalizeClampMode(fetch_clamp_x); parameters.clamp_y = NormalizeClampMode(fetch_clamp_y); parameters.clamp_z = NormalizeClampMode(fetch_clamp_z); if (xenos::ClampModeUsesBorder(parameters.clamp_x) || xenos::ClampModeUsesBorder(parameters.clamp_y) || xenos::ClampModeUsesBorder(parameters.clamp_z)) { parameters.border_color = fetch.border_color; } else { parameters.border_color = xenos::BorderColor::k_ABGR_Black; } uint32_t mip_min_level, mip_max_level; texture_util::GetSubresourcesFromFetchConstant(fetch, nullptr, nullptr, nullptr, nullptr, nullptr, &mip_min_level, &mip_max_level); parameters.mip_min_level = mip_min_level; bool has_mips = mip_max_level > mip_min_level; xenos::TextureFilter mag_filter = binding.mag_filter == xenos::TextureFilter::kUseFetchConst ? fetch.mag_filter : binding.mag_filter; xenos::TextureFilter min_filter = binding.min_filter == xenos::TextureFilter::kUseFetchConst ? fetch.min_filter : binding.min_filter; xenos::TextureFilter mip_filter = binding.mip_filter == xenos::TextureFilter::kUseFetchConst ? fetch.mip_filter : binding.mip_filter; bool min_mag_linear = (mag_filter == xenos::TextureFilter::kLinear) && (min_filter == xenos::TextureFilter::kLinear); bool mip_filter_bilinear_or_trilinear = mip_filter == xenos::TextureFilter::kPoint || mip_filter == xenos::TextureFilter::kLinear; bool mip_base_map = mip_filter == xenos::TextureFilter::kBaseMap; // TODO(Triang3l): Disable filtering for texture formats not supporting it. xenos::AnisoFilter aniso_filter = binding.aniso_filter == xenos::AnisoFilter::kUseFetchConst ? fetch.aniso_filter : binding.aniso_filter; int32_t anisotropic_override = REXCVAR_GET(anisotropic_override); if (anisotropic_override > -1 && anisotropic_override < 6 && has_mips && !mip_base_map && min_mag_linear && mip_filter_bilinear_or_trilinear) { aniso_filter = xenos::AnisoFilter(anisotropic_override); } aniso_filter = std::min(aniso_filter, xenos::AnisoFilter::kMax_16_1); parameters.aniso_filter = aniso_filter; if (aniso_filter != xenos::AnisoFilter::kDisabled) { parameters.mag_linear = 1; parameters.min_linear = 1; parameters.mip_linear = 1; } else { parameters.mag_linear = mag_filter == xenos::TextureFilter::kLinear; parameters.min_linear = min_filter == xenos::TextureFilter::kLinear; parameters.mip_linear = mip_filter == xenos::TextureFilter::kLinear; } parameters.mip_base_map = mip_base_map; return parameters; } void D3D12TextureCache::WriteSampler(SamplerParameters parameters, D3D12_CPU_DESCRIPTOR_HANDLE handle) const { D3D12_SAMPLER_DESC desc; if (parameters.aniso_filter != xenos::AnisoFilter::kDisabled) { desc.Filter = D3D12_FILTER_ANISOTROPIC; desc.MaxAnisotropy = 1u << (uint32_t(parameters.aniso_filter) - 1); } else { D3D12_FILTER_TYPE d3d_filter_min = parameters.min_linear ? D3D12_FILTER_TYPE_LINEAR : D3D12_FILTER_TYPE_POINT; D3D12_FILTER_TYPE d3d_filter_mag = parameters.mag_linear ? D3D12_FILTER_TYPE_LINEAR : D3D12_FILTER_TYPE_POINT; D3D12_FILTER_TYPE d3d_filter_mip = parameters.mip_linear ? D3D12_FILTER_TYPE_LINEAR : D3D12_FILTER_TYPE_POINT; desc.Filter = D3D12_ENCODE_BASIC_FILTER(d3d_filter_min, d3d_filter_mag, d3d_filter_mip, D3D12_FILTER_REDUCTION_TYPE_STANDARD); desc.MaxAnisotropy = 1; } static const D3D12_TEXTURE_ADDRESS_MODE kAddressModeMap[] = { /* kRepeat */ D3D12_TEXTURE_ADDRESS_MODE_WRAP, /* kMirroredRepeat */ D3D12_TEXTURE_ADDRESS_MODE_MIRROR, /* kClampToEdge */ D3D12_TEXTURE_ADDRESS_MODE_CLAMP, /* kMirrorClampToEdge */ D3D12_TEXTURE_ADDRESS_MODE_MIRROR_ONCE, // No GL_CLAMP (clamp to half edge, half border) equivalent in Direct3D // 12, but there's no Direct3D 9 equivalent anyway, and too weird to be // suitable for intentional real usage. /* kClampToHalfway */ D3D12_TEXTURE_ADDRESS_MODE_CLAMP, // No mirror and clamp to border equivalents in Direct3D 12, but they // aren't there in Direct3D 9 either. /* kMirrorClampToHalfway */ D3D12_TEXTURE_ADDRESS_MODE_MIRROR_ONCE, /* kClampToBorder */ D3D12_TEXTURE_ADDRESS_MODE_BORDER, /* kMirrorClampToBorder */ D3D12_TEXTURE_ADDRESS_MODE_MIRROR_ONCE, }; desc.AddressU = kAddressModeMap[uint32_t(parameters.clamp_x)]; desc.AddressV = kAddressModeMap[uint32_t(parameters.clamp_y)]; desc.AddressW = kAddressModeMap[uint32_t(parameters.clamp_z)]; // LOD biasing is performed in shaders. desc.MipLODBias = 0.0f; desc.ComparisonFunc = D3D12_COMPARISON_FUNC_NEVER; switch (parameters.border_color) { case xenos::BorderColor::k_ABGR_White: desc.BorderColor[0] = 1.0f; desc.BorderColor[1] = 1.0f; desc.BorderColor[2] = 1.0f; desc.BorderColor[3] = 1.0f; break; case xenos::BorderColor::k_ACBYCR_Black: desc.BorderColor[0] = 0.5f; desc.BorderColor[1] = 0.0f; desc.BorderColor[2] = 0.5f; desc.BorderColor[3] = 0.0f; break; case xenos::BorderColor::k_ACBCRY_Black: desc.BorderColor[0] = 0.0f; desc.BorderColor[1] = 0.5f; desc.BorderColor[2] = 0.5f; desc.BorderColor[3] = 0.0f; break; default: assert_true(parameters.border_color == xenos::BorderColor::k_ABGR_Black); desc.BorderColor[0] = 0.0f; desc.BorderColor[1] = 0.0f; desc.BorderColor[2] = 0.0f; desc.BorderColor[3] = 0.0f; break; } desc.MinLOD = float(parameters.mip_min_level); if (parameters.mip_base_map) { // "It is undefined whether LOD clamping based on MinLOD and MaxLOD Sampler // states should happen before or after deciding if magnification is // occuring" - Direct3D 11.3 Functional Specification. // Using the GL_NEAREST / GL_LINEAR minification filter emulation logic // described in the Vulkan VkSamplerCreateInfo specification, preserving // magnification vs. minification - point mip sampling (usable only without // anisotropic filtering on Direct3D 12) and MaxLOD 0.25. With anisotropic // filtering, magnification vs. minification doesn't matter as the filter is // always linear for both on Direct3D 12 - but linear filtering specifically // is what must not be done for kBaseMap, so setting MaxLOD to MinLOD. desc.MaxLOD = desc.MinLOD; if (parameters.aniso_filter == xenos::AnisoFilter::kDisabled) { assert_false(parameters.mip_linear); desc.MaxLOD += 0.25f; } } else { // Maximum mip level is in the texture resource itself. desc.MaxLOD = FLT_MAX; } ID3D12Device* device = command_processor_.GetD3D12Provider().GetDevice(); device->CreateSampler(&desc, handle); } bool D3D12TextureCache::ClampDrawResolutionScaleToMaxSupported( uint32_t& scale_x, uint32_t& scale_y, const ui::d3d12::D3D12Provider& provider) { bool was_clamped; if (provider.GetTiledResourcesTier() < D3D12_TILED_RESOURCES_TIER_1) { was_clamped = scale_x > 1 || scale_y > 1; scale_x = 1; scale_y = 1; return !was_clamped; } // Limit to the virtual address space available for a resource. was_clamped = false; uint32_t virtual_address_bits_per_resource = provider.GetVirtualAddressBitsPerResource(); while (scale_x > 1 || scale_y > 1) { uint64_t highest_scaled_address = uint64_t(SharedMemory::kBufferSize) * (scale_x * scale_y) - 1; if (uint32_t(64) - rex::lzcnt(highest_scaled_address) <= virtual_address_bits_per_resource) { break; } // When reducing from a square size, prefer decreasing the horizontal // resolution as vertical resolution difference is visible more clearly in // perspective. was_clamped = true; if (scale_x >= scale_y) { --scale_x; } else { --scale_y; } } return !was_clamped; } bool D3D12TextureCache::EnsureScaledResolveMemoryCommitted(uint32_t start_unscaled, uint32_t length_unscaled, uint32_t length_scaled_alignment_log2) { assert_true(IsDrawResolutionScaled()); if (length_unscaled == 0) { return true; } if (start_unscaled > SharedMemory::kBufferSize || (SharedMemory::kBufferSize - start_unscaled) < length_unscaled) { // Exceeds the physical address space. return false; } uint32_t draw_resolution_scale_area = draw_resolution_scale_x() * draw_resolution_scale_y(); uint64_t first_scaled = uint64_t(start_unscaled) * draw_resolution_scale_area; uint64_t length_scaled_alignment_bits = (UINT64_C(1) << length_scaled_alignment_log2) - 1; uint64_t last_scaled = (uint64_t(start_unscaled + (length_unscaled - 1)) * draw_resolution_scale_area + length_scaled_alignment_bits) & ~length_scaled_alignment_bits; const ui::d3d12::D3D12Provider& provider = command_processor_.GetD3D12Provider(); ID3D12Device* device = provider.GetDevice(); // Ensure GPU virtual memory for buffers that may be used to access the range // is allocated - buffers are created. Always creating both buffers for all // addresses before creating the heaps so when creating a new buffer, it can // be safely assumed that no existing heaps should be mapped to it. std::array possible_buffers_first = GetPossibleScaledResolveBufferIndices(first_scaled); std::array possible_buffers_last = GetPossibleScaledResolveBufferIndices(last_scaled); size_t possible_buffer_first = std::min(possible_buffers_first[0], possible_buffers_first[1]); size_t possible_buffer_last = std::max(possible_buffers_last[0], possible_buffers_last[1]); for (size_t i = possible_buffer_first; i <= possible_buffer_last; ++i) { if (scaled_resolve_2gb_buffers_[i]) { continue; } D3D12_RESOURCE_DESC scaled_resolve_buffer_desc; // Buffer indices are gigabytes. ui::d3d12::util::FillBufferResourceDesc( scaled_resolve_buffer_desc, std::min( uint64_t(1) << 31, uint64_t(SharedMemory::kBufferSize) * draw_resolution_scale_area - (uint64_t(i) << 30)), D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); // The first access will be a resolve. constexpr D3D12_RESOURCE_STATES kScaledResolveVirtualBufferInitialState = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; ID3D12Resource* scaled_resolve_buffer_resource; if (FAILED(device->CreateReservedResource(&scaled_resolve_buffer_desc, kScaledResolveVirtualBufferInitialState, nullptr, IID_PPV_ARGS(&scaled_resolve_buffer_resource)))) { REXGPU_ERROR( "D3D12TextureCache: Failed to create a 2 GB tiled buffer for draw " "resolution scaling"); return false; } scaled_resolve_2gb_buffers_[i] = std::unique_ptr(new ScaledResolveVirtualBuffer( scaled_resolve_buffer_resource, kScaledResolveVirtualBufferInitialState)); scaled_resolve_buffer_resource->Release(); } uint32_t heap_first = uint32_t(first_scaled >> kScaledResolveHeapSizeLog2); uint32_t heap_last = uint32_t(last_scaled >> kScaledResolveHeapSizeLog2); for (uint32_t i = heap_first; i <= heap_last; ++i) { if (scaled_resolve_heaps_[i]) { continue; } auto direct_queue = provider.GetDirectQueue(); D3D12_HEAP_DESC heap_desc = {}; heap_desc.SizeInBytes = kScaledResolveHeapSize; heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT; heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS | provider.GetHeapFlagCreateNotZeroed(); Microsoft::WRL::ComPtr scaled_resolve_heap; if (FAILED(device->CreateHeap(&heap_desc, IID_PPV_ARGS(&scaled_resolve_heap)))) { REXGPU_ERROR("D3D12TextureCache: Failed to create a scaled resolve tile heap"); return false; } scaled_resolve_heaps_[i] = scaled_resolve_heap; ++scaled_resolve_heap_count_; COUNT_profile_set("gpu/texture_cache/scaled_resolve_buffer_used_mb", scaled_resolve_heap_count_ << (kScaledResolveHeapSizeLog2 - 20)); D3D12_TILED_RESOURCE_COORDINATE region_start_coordinates; region_start_coordinates.Y = 0; region_start_coordinates.Z = 0; region_start_coordinates.Subresource = 0; D3D12_TILE_REGION_SIZE region_size; region_size.NumTiles = kScaledResolveHeapSize / D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES; region_size.UseBox = FALSE; D3D12_TILE_RANGE_FLAGS range_flags = D3D12_TILE_RANGE_FLAG_NONE; UINT heap_range_start_offset = 0; UINT range_tile_count = kScaledResolveHeapSize / D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES; std::array buffer_indices = GetPossibleScaledResolveBufferIndices(uint64_t(i) << kScaledResolveHeapSizeLog2); for (size_t j = 0; j < 2; ++j) { size_t buffer_index = buffer_indices[j]; if (j && buffer_index == buffer_indices[0]) { break; } region_start_coordinates.X = UINT(((uint64_t(i) << kScaledResolveHeapSizeLog2) - (uint64_t(buffer_index) << 30)) / D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES); direct_queue->UpdateTileMappings( scaled_resolve_2gb_buffers_[buffer_index]->resource(), 1, ®ion_start_coordinates, ®ion_size, scaled_resolve_heap.Get(), 1, &range_flags, &heap_range_start_offset, &range_tile_count, D3D12_TILE_MAPPING_FLAG_NONE); } command_processor_.NotifyQueueOperationsDoneDirectly(); } return true; } bool D3D12TextureCache::MakeScaledResolveRangeCurrent(uint32_t start_unscaled, uint32_t length_unscaled, uint32_t length_scaled_alignment_log2) { assert_true(IsDrawResolutionScaled()); if (!length_unscaled || start_unscaled >= SharedMemory::kBufferSize || (SharedMemory::kBufferSize - start_unscaled) < length_unscaled) { // If length is 0, the needed buffer can't be chosen because no buffer is // needed. return false; } uint32_t draw_resolution_scale_area = draw_resolution_scale_x() * draw_resolution_scale_y(); uint64_t start_scaled = uint64_t(start_unscaled) * draw_resolution_scale_area; uint64_t length_scaled_alignment_bits = (UINT64_C(1) << length_scaled_alignment_log2) - 1; uint64_t length_scaled = (uint64_t(length_unscaled) * draw_resolution_scale_area + length_scaled_alignment_bits) & ~length_scaled_alignment_bits; uint64_t last_scaled = start_scaled + (length_scaled - 1); // Get one or two buffers that can hold the whole range. std::array possible_buffer_indices_first = GetPossibleScaledResolveBufferIndices(start_scaled); std::array possible_buffer_indices_last = GetPossibleScaledResolveBufferIndices(last_scaled); size_t possible_buffer_indices_common[2]; size_t possible_buffer_indices_common_count = 0; for (size_t i = 0; i <= size_t(possible_buffer_indices_first[0] != possible_buffer_indices_first[1]); ++i) { size_t possible_buffer_index_first = possible_buffer_indices_first[i]; for (size_t j = 0; j <= size_t(possible_buffer_indices_last[0] != possible_buffer_indices_last[1]); ++j) { if (possible_buffer_indices_last[j] == possible_buffer_index_first) { bool possible_buffer_index_already_added = false; for (size_t k = 0; k < possible_buffer_indices_common_count; ++k) { if (possible_buffer_indices_common[k] == possible_buffer_index_first) { possible_buffer_index_already_added = true; break; } } if (!possible_buffer_index_already_added) { assert_true(possible_buffer_indices_common_count < 2); possible_buffer_indices_common[possible_buffer_indices_common_count++] = possible_buffer_index_first; } } } } if (!possible_buffer_indices_common_count) { // Too wide range requested - no buffer that contains both the start and the // end. return false; } size_t gigabyte_first = size_t(start_scaled >> 30); size_t gigabyte_last = size_t(last_scaled >> 30); // Choose the buffer that the range will be accessed through. size_t new_buffer_index; if (possible_buffer_indices_common_count >= 2) { // Prefer the buffer that is already used to make less aliasing barriers. assert_true(gigabyte_first + 1 >= gigabyte_last); size_t possible_buffer_indices_already_used[2] = {}; for (size_t i = gigabyte_first; i <= gigabyte_last; ++i) { size_t gigabyte_current_buffer_index = scaled_resolve_1gb_buffer_indices_[i]; for (size_t j = 0; j < possible_buffer_indices_common_count; ++j) { if (possible_buffer_indices_common[j] == gigabyte_current_buffer_index) { ++possible_buffer_indices_already_used[j]; } } } new_buffer_index = possible_buffer_indices_common[size_t( possible_buffer_indices_already_used[1] > possible_buffer_indices_already_used[0])]; } else { // The range can be accessed only by one buffer. new_buffer_index = possible_buffer_indices_common[0]; } // Switch the current buffer for the range. const ScaledResolveVirtualBuffer* new_buffer = scaled_resolve_2gb_buffers_[new_buffer_index].get(); assert_not_null(new_buffer); ID3D12Resource* new_buffer_resource = new_buffer->resource(); for (size_t i = gigabyte_first; i <= gigabyte_last; ++i) { size_t gigabyte_current_buffer_index = scaled_resolve_1gb_buffer_indices_[i]; if (gigabyte_current_buffer_index == new_buffer_index) { continue; } if (gigabyte_current_buffer_index != SIZE_MAX) { ScaledResolveVirtualBuffer* gigabyte_current_buffer = scaled_resolve_2gb_buffers_[gigabyte_current_buffer_index].get(); assert_not_null(gigabyte_current_buffer); command_processor_.PushAliasingBarrier(gigabyte_current_buffer->resource(), new_buffer_resource); // An aliasing barrier synchronizes and flushes everything. gigabyte_current_buffer->ClearUAVBarrierPending(); } scaled_resolve_1gb_buffer_indices_[i] = new_buffer_index; } scaled_resolve_current_range_start_scaled_ = start_scaled; scaled_resolve_current_range_length_scaled_ = length_scaled; return true; } void D3D12TextureCache::TransitionCurrentScaledResolveRange(D3D12_RESOURCE_STATES new_state) { assert_true(IsDrawResolutionScaled()); ScaledResolveVirtualBuffer& buffer = GetCurrentScaledResolveBuffer(); command_processor_.PushTransitionBarrier(buffer.resource(), buffer.SetResourceState(new_state), new_state); } void D3D12TextureCache::CreateCurrentScaledResolveRangeUintPow2SRV( D3D12_CPU_DESCRIPTOR_HANDLE handle, uint32_t element_size_bytes_pow2) { assert_true(IsDrawResolutionScaled()); size_t buffer_index = GetCurrentScaledResolveBufferIndex(); const ScaledResolveVirtualBuffer* buffer = scaled_resolve_2gb_buffers_[buffer_index].get(); assert_not_null(buffer); ui::d3d12::util::CreateBufferTypedSRV( command_processor_.GetD3D12Provider().GetDevice(), handle, buffer->resource(), ui::d3d12::util::GetUintPow2DXGIFormat(element_size_bytes_pow2), uint32_t(scaled_resolve_current_range_length_scaled_ >> element_size_bytes_pow2), (scaled_resolve_current_range_start_scaled_ - (uint64_t(buffer_index) << 30)) >> element_size_bytes_pow2); } void D3D12TextureCache::CreateCurrentScaledResolveRangeUintPow2UAV( D3D12_CPU_DESCRIPTOR_HANDLE handle, uint32_t element_size_bytes_pow2) { assert_true(IsDrawResolutionScaled()); size_t buffer_index = GetCurrentScaledResolveBufferIndex(); const ScaledResolveVirtualBuffer* buffer = scaled_resolve_2gb_buffers_[buffer_index].get(); assert_not_null(buffer); ui::d3d12::util::CreateBufferTypedUAV( command_processor_.GetD3D12Provider().GetDevice(), handle, buffer->resource(), ui::d3d12::util::GetUintPow2DXGIFormat(element_size_bytes_pow2), uint32_t(scaled_resolve_current_range_length_scaled_ >> element_size_bytes_pow2), (scaled_resolve_current_range_start_scaled_ - (uint64_t(buffer_index) << 30)) >> element_size_bytes_pow2); } ID3D12Resource* D3D12TextureCache::RequestSwapTexture(D3D12_SHADER_RESOURCE_VIEW_DESC& srv_desc_out, xenos::TextureFormat& format_out, uint32_t* width_unscaled_out, uint32_t* height_unscaled_out) { const auto& regs = register_file(); xenos::xe_gpu_texture_fetch_t fetch = regs.GetTextureFetch(0); TextureKey key; BindingInfoFromFetchConstant(fetch, key, nullptr); if (!key.is_valid || key.base_page == 0 || key.dimension != xenos::DataDimension::k2DOrStacked) { return nullptr; } D3D12Texture* texture = static_cast(FindOrCreateTexture(key)); if (texture == nullptr || !LoadTextureData(*texture)) { return nullptr; } texture->MarkAsUsed(); // The swap texture is likely to be used only for the presentation compute // shader, and not during emulation, where it'd be NON_PIXEL_SHADER_RESOURCE | // PIXEL_SHADER_RESOURCE. ID3D12Resource* texture_resource = texture->resource(); command_processor_.PushTransitionBarrier( texture_resource, texture->SetResourceState(D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); srv_desc_out.Format = GetDXGIUnormFormat(key); srv_desc_out.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; srv_desc_out.Shader4ComponentMapping = GuestToHostSwizzle(fetch.swizzle, GetHostFormatSwizzle(key)) | D3D12_SHADER_COMPONENT_MAPPING_ALWAYS_SET_BIT_AVOIDING_ZEROMEM_MISTAKES; srv_desc_out.Texture2D.MostDetailedMip = 0; srv_desc_out.Texture2D.MipLevels = 1; srv_desc_out.Texture2D.PlaneSlice = 0; srv_desc_out.Texture2D.ResourceMinLODClamp = 0.0f; // Only texture->key, not the result of BindingInfoFromFetchConstant, contains // whether the texture is scaled. key = texture->key(); if (width_unscaled_out) { *width_unscaled_out = key.GetWidth(); } if (height_unscaled_out) { *height_unscaled_out = key.GetHeight(); } format_out = key.format; return texture_resource; } D3D12TextureCache::D3D12Texture::D3D12Texture(D3D12TextureCache& texture_cache, const TextureKey& key, ID3D12Resource* resource, D3D12_RESOURCE_STATES resource_state, bool track_usage) : Texture(texture_cache, key, track_usage), resource_(resource), resource_state_(resource_state) { ID3D12Device* device = texture_cache.command_processor_.GetD3D12Provider().GetDevice(); D3D12_RESOURCE_DESC resource_desc = resource_->GetDesc(); SetHostMemoryUsage(device->GetResourceAllocationInfo(0, 1, &resource_desc).SizeInBytes); } D3D12TextureCache::D3D12Texture::~D3D12Texture() { auto& d3d12_texture_cache = static_cast(texture_cache()); for (const auto& descriptor_pair : srv_descriptors_) { d3d12_texture_cache.ReleaseTextureDescriptor(descriptor_pair.second); } } bool D3D12TextureCache::IsDecompressionNeeded(xenos::TextureFormat format, uint32_t width, uint32_t height) const { DXGI_FORMAT dxgi_format_uncompressed = host_formats_[uint32_t(format)].dxgi_format_uncompressed; if (dxgi_format_uncompressed == DXGI_FORMAT_UNKNOWN) { return false; } const FormatInfo* format_info = FormatInfo::Get(format); if (!(width & (format_info->block_width - 1)) && !(height & (format_info->block_height - 1))) { return false; } // UnalignedBlockTexturesSupported is for block-compressed textures with the // block size of 4x4, but not for 2x1 (4:2:2) subsampled formats. if (format_info->block_width == 4 && format_info->block_height == 4 && command_processor_.GetD3D12Provider().AreUnalignedBlockTexturesSupported()) { return false; } return true; } TextureCache::LoadShaderIndex D3D12TextureCache::GetLoadShaderIndex(TextureKey key) const { const HostFormat& host_format = host_formats_[uint32_t(key.format)]; if (key.signed_separate) { return host_format.load_shader_signed; } if (IsDecompressionNeeded(key.format, key.GetWidth(), key.GetHeight())) { return host_format.load_shader_decompress; } return host_format.load_shader; } bool D3D12TextureCache::IsSignedVersionSeparateForFormat(TextureKey key) const { const HostFormat& host_format = host_formats_[uint32_t(key.format)]; return host_format.load_shader_signed != kLoadShaderIndexUnknown && host_format.load_shader_signed != host_format.load_shader; } bool D3D12TextureCache::IsScaledResolveSupportedForFormat(TextureKey key) const { LoadShaderIndex load_shader = GetLoadShaderIndex(key); return load_shader != kLoadShaderIndexUnknown && load_pipelines_scaled_[load_shader] != nullptr; } uint32_t D3D12TextureCache::GetHostFormatSwizzle(TextureKey key) const { return host_formats_[uint32_t(key.format)].swizzle; } uint32_t D3D12TextureCache::GetMaxHostTextureWidthHeight(xenos::DataDimension dimension) const { switch (dimension) { case xenos::DataDimension::k1D: case xenos::DataDimension::k2DOrStacked: // 1D and 2D are emulated as 2D arrays. return D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION; case xenos::DataDimension::k3D: return D3D12_REQ_TEXTURE3D_U_V_OR_W_DIMENSION; case xenos::DataDimension::kCube: return D3D12_REQ_TEXTURECUBE_DIMENSION; default: assert_unhandled_case(dimension); return 0; } } uint32_t D3D12TextureCache::GetMaxHostTextureDepthOrArraySize( xenos::DataDimension dimension) const { switch (dimension) { case xenos::DataDimension::k1D: case xenos::DataDimension::k2DOrStacked: // 1D and 2D are emulated as 2D arrays. return D3D12_REQ_TEXTURE2D_ARRAY_AXIS_DIMENSION; case xenos::DataDimension::k3D: return D3D12_REQ_TEXTURE3D_U_V_OR_W_DIMENSION; case xenos::DataDimension::kCube: return D3D12_REQ_TEXTURE2D_ARRAY_AXIS_DIMENSION / 6 * 6; default: assert_unhandled_case(dimension); return 0; } } std::unique_ptr D3D12TextureCache::CreateTexture(TextureKey key) { D3D12_RESOURCE_DESC desc; desc.Format = GetDXGIResourceFormat(key); if (desc.Format == DXGI_FORMAT_UNKNOWN) { unsupported_format_features_used_[uint32_t(key.format)] |= kUnsupportedResourceBit; return nullptr; } if (key.dimension == xenos::DataDimension::k3D) { desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE3D; } else { // 1D textures are treated as 2D for simplicity. desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; } desc.Alignment = 0; desc.Width = key.GetWidth(); desc.Height = key.GetHeight(); if (key.scaled_resolve) { desc.Width *= draw_resolution_scale_x(); desc.Height *= draw_resolution_scale_y(); } desc.DepthOrArraySize = key.GetDepthOrArraySize(); desc.MipLevels = key.mip_max_level + 1; desc.SampleDesc.Count = 1; desc.SampleDesc.Quality = 0; desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; // Untiling through a buffer instead of using unordered access because copying // is not done that often. desc.Flags = D3D12_RESOURCE_FLAG_NONE; const ui::d3d12::D3D12Provider& provider = command_processor_.GetD3D12Provider(); ID3D12Device* device = provider.GetDevice(); // Assuming untiling will be the next operation. D3D12_RESOURCE_STATES resource_state = D3D12_RESOURCE_STATE_COPY_DEST; Microsoft::WRL::ComPtr resource; if (FAILED(device->CreateCommittedResource(&ui::d3d12::util::kHeapPropertiesDefault, provider.GetHeapFlagCreateNotZeroed(), &desc, resource_state, nullptr, IID_PPV_ARGS(&resource)))) { return nullptr; } return std::unique_ptr(new D3D12Texture(*this, key, resource.Get(), resource_state)); } bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture, bool load_base, bool load_mips) { D3D12Texture& d3d12_texture = static_cast(texture); TextureKey texture_key = d3d12_texture.key(); DeferredCommandList& command_list = command_processor_.GetDeferredCommandList(); ID3D12Device* device = command_processor_.GetD3D12Provider().GetDevice(); // Get the pipeline. LoadShaderIndex load_shader = GetLoadShaderIndex(texture_key); if (load_shader == kLoadShaderIndexUnknown) { return false; } bool host_format_is_signed = texture_key.signed_separate && IsSignedVersionSeparateForFormat(texture_key); DXGI_FORMAT host_sample_format = host_format_is_signed ? host_formats_[uint32_t(texture_key.format)].dxgi_format_signed : GetDXGIUnormFormat(texture_key); LoadShaderIndex load_shader_float_convert = kLoadShaderIndexUnknown; if (host_sample_format == DXGI_FORMAT_R16G16B16A16_FLOAT) { switch (load_shader) { case kLoadShaderIndexR10G11B11ToRGBA16: case kLoadShaderIndexR11G11B10ToRGBA16: load_shader_float_convert = kLoadShaderIndexRGBA16UNormToFloat; break; case kLoadShaderIndexR10G11B11ToRGBA16SNorm: case kLoadShaderIndexR11G11B10ToRGBA16SNorm: load_shader_float_convert = kLoadShaderIndexRGBA16SNormToFloat; break; default: break; } } bool texture_resolution_scaled = texture_key.scaled_resolve; ID3D12PipelineState* pipeline = texture_resolution_scaled ? load_pipelines_scaled_[load_shader].Get() : load_pipelines_[load_shader].Get(); if (pipeline == nullptr) { return false; } const LoadShaderInfo& load_shader_info = GetLoadShaderInfo(load_shader); const LoadShaderInfo* load_shader_info_float_convert = nullptr; ID3D12PipelineState* pipeline_float_convert = nullptr; if (load_shader_float_convert != kLoadShaderIndexUnknown) { pipeline_float_convert = texture_resolution_scaled ? load_pipelines_scaled_[load_shader_float_convert].Get() : load_pipelines_[load_shader_float_convert].Get(); if (pipeline_float_convert == nullptr) { return false; } load_shader_info_float_convert = &GetLoadShaderInfo(load_shader_float_convert); } // Get the guest layout. const texture_util::TextureGuestLayout& guest_layout = d3d12_texture.guest_layout(); xenos::DataDimension dimension = texture_key.dimension; bool is_3d = dimension == xenos::DataDimension::k3D; bool is_3d_tiling = is_3d || d3d12_texture.force_load_3d_tiling(); uint32_t width = texture_key.GetWidth(); uint32_t height = texture_key.GetHeight(); uint32_t depth_or_array_size = texture_key.GetDepthOrArraySize(); uint32_t depth = is_3d ? depth_or_array_size : 1; uint32_t array_size = is_3d ? 1 : depth_or_array_size; xenos::TextureFormat guest_format = texture_key.format; const FormatInfo* guest_format_info = FormatInfo::Get(guest_format); uint32_t block_width = guest_format_info->block_width; uint32_t block_height = guest_format_info->block_height; uint32_t bytes_per_block = guest_format_info->bytes_per_block(); uint32_t level_first = load_base ? 0 : 1; uint32_t level_last = load_mips ? texture_key.mip_max_level : 0; assert_true(level_first <= level_last); uint32_t level_packed = guest_layout.packed_level; uint32_t level_stored_first = std::min(level_first, level_packed); uint32_t level_stored_last = std::min(level_last, level_packed); uint32_t texture_resolution_scale_x = texture_resolution_scaled ? draw_resolution_scale_x() : 1; uint32_t texture_resolution_scale_y = texture_resolution_scaled ? draw_resolution_scale_y() : 1; // The loop counter can mean two things depending on whether the packed mip // tail is stored as mip 0, because in this case, it would be ambiguous since // both the base and the mips would be on "level 0", but stored in separate // places. uint32_t loop_level_first, loop_level_last; if (level_packed == 0) { // Packed mip tail is the level 0 - may need to load mip tails for the base, // the mips, or both. // Loop iteration 0 - base packed mip tail. // Loop iteration 1 - mips packed mip tail. loop_level_first = uint32_t(level_first != 0); loop_level_last = uint32_t(level_last != 0); } else { // Packed mip tail is not the level 0. // Loop iteration is the actual level being loaded. loop_level_first = level_stored_first; loop_level_last = level_stored_last; } // Get the host layout and the buffer. bool host_block_compressed = host_formats_[uint32_t(guest_format)].is_block_compressed && !IsDecompressionNeeded(guest_format, width, height); uint32_t host_block_width = host_block_compressed ? block_width : 1; uint32_t host_block_height = host_block_compressed ? block_height : 1; uint32_t host_x_blocks_per_thread = UINT32_C(1) << load_shader_info.guest_x_blocks_per_thread_log2; if (!host_block_compressed) { // Decompressing guest blocks. host_x_blocks_per_thread *= block_width; } UINT64 copy_buffer_size = 0; D3D12_PLACED_SUBRESOURCE_FOOTPRINT host_slice_layout_base; UINT64 host_slice_size_base; // Indexing is the same as for guest stored mips: // 1...min(level_last, level_packed) if level_packed is not 0, or only 0 if // level_packed == 0. D3D12_PLACED_SUBRESOURCE_FOOTPRINT host_slice_layouts_mips[xenos::kTextureMaxMips]; UINT64 host_slice_sizes_mips[xenos::kTextureMaxMips]; // Using custom calculations instead of GetCopyableFootprints because // shaders may unconditionally copy multiple blocks along X per thread for // simplicity, to make sure all rows (also including the last one - // GetCopyableFootprints aligns row offsets, but not the total size) are // properly padded to the number of blocks copied in an invocation without // implicit assumptions about D3D12_TEXTURE_DATA_PITCH_ALIGNMENT. DXGI_FORMAT host_copy_format = GetDXGIResourceFormat(guest_format, width, height); for (uint32_t loop_level = loop_level_first; loop_level <= loop_level_last; ++loop_level) { bool is_base = loop_level == 0; uint32_t level = (level_packed == 0) ? 0 : loop_level; D3D12_PLACED_SUBRESOURCE_FOOTPRINT& level_host_slice_layout = is_base ? host_slice_layout_base : host_slice_layouts_mips[level]; level_host_slice_layout.Offset = copy_buffer_size; level_host_slice_layout.Footprint.Format = host_copy_format; if (level == level_packed) { // Loading the packed tail for the base or the mips - load the whole tail // to copy regions out of it. const texture_util::TextureGuestLayout::Level& guest_layout_packed = is_base ? guest_layout.base : guest_layout.mips[level]; level_host_slice_layout.Footprint.Width = guest_layout_packed.x_extent_blocks * block_width; level_host_slice_layout.Footprint.Height = guest_layout_packed.y_extent_blocks * block_height; level_host_slice_layout.Footprint.Depth = guest_layout_packed.z_extent; } else { level_host_slice_layout.Footprint.Width = std::max(width >> level, uint32_t(1)); level_host_slice_layout.Footprint.Height = std::max(height >> level, uint32_t(1)); level_host_slice_layout.Footprint.Depth = std::max(depth >> level, uint32_t(1)); } level_host_slice_layout.Footprint.Width = rex::round_up(level_host_slice_layout.Footprint.Width * texture_resolution_scale_x, UINT(host_block_width)); level_host_slice_layout.Footprint.Height = rex::round_up(level_host_slice_layout.Footprint.Height * texture_resolution_scale_y, UINT(host_block_height)); level_host_slice_layout.Footprint.RowPitch = rex::align(rex::round_up(level_host_slice_layout.Footprint.Width / host_block_width, host_x_blocks_per_thread) * load_shader_info.bytes_per_host_block, uint32_t(D3D12_TEXTURE_DATA_PITCH_ALIGNMENT)); UINT64 level_host_slice_size = rex::align(UINT64(level_host_slice_layout.Footprint.RowPitch) * (level_host_slice_layout.Footprint.Height / host_block_height) * level_host_slice_layout.Footprint.Depth, UINT64(D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT)); (is_base ? host_slice_size_base : host_slice_sizes_mips[level]) = level_host_slice_size; copy_buffer_size += level_host_slice_size * array_size; } D3D12_RESOURCE_STATES copy_buffer_state = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; ID3D12Resource* copy_buffer = command_processor_.RequestScratchGPUBuffer(uint32_t(copy_buffer_size), copy_buffer_state); if (copy_buffer == nullptr) { return false; } // Begin loading. // May use different buffers for scaled base and mips, and also addressability // of more than 128 * 2^20 (2^D3D12_REQ_BUFFER_RESOURCE_TEXEL_COUNT_2_TO_EXP) // texels is not mandatory - need two separate UAV descriptors for base and // mips. // Destination. uint32_t descriptor_count = 1; if (texture_resolution_scaled) { // Source - base and mips, one or both. descriptor_count += (level_first == 0 && level_last != 0) ? 2 : 1; } else { // Source - shared memory. if (!bindless_resources_used_) { ++descriptor_count; } } if (pipeline_float_convert != nullptr) { ++descriptor_count; } std::array descriptors_allocated; if (!command_processor_.RequestOneUseSingleViewDescriptors(descriptor_count, descriptors_allocated.data())) { command_processor_.ReleaseScratchGPUBuffer(copy_buffer, copy_buffer_state); return false; } uint32_t descriptor_write_index = 0; command_processor_.SetExternalPipeline(pipeline); command_list.D3DSetComputeRootSignature(load_root_signature_.Get()); // Set up the destination descriptor. assert_true(descriptor_write_index < descriptor_count); ui::d3d12::util::DescriptorCpuGpuHandlePair descriptor_dest = descriptors_allocated[descriptor_write_index++]; ui::d3d12::util::CreateBufferTypedUAV( device, descriptor_dest.first, copy_buffer, ui::d3d12::util::GetUintPow2DXGIFormat(load_shader_info.dest_bpe_log2), uint32_t(copy_buffer_size) >> load_shader_info.dest_bpe_log2); command_list.D3DSetComputeRootDescriptorTable(2, descriptor_dest.second); // Set up the unscaled source descriptor (scaled needs two descriptors that // depend on the buffer being current, so they will be set later - for mips, // after loading the base is done). if (!texture_resolution_scaled) { D3D12SharedMemory& d3d12_shared_memory = static_cast(shared_memory()); d3d12_shared_memory.UseForReading(); ui::d3d12::util::DescriptorCpuGpuHandlePair descriptor_unscaled_source; if (bindless_resources_used_) { descriptor_unscaled_source = command_processor_.GetSharedMemoryUintPow2BindlessSRVHandlePair( load_shader_info.source_bpe_log2); } else { assert_true(descriptor_write_index < descriptor_count); descriptor_unscaled_source = descriptors_allocated[descriptor_write_index++]; d3d12_shared_memory.WriteUintPow2SRVDescriptor(descriptor_unscaled_source.first, load_shader_info.source_bpe_log2); } command_list.D3DSetComputeRootDescriptorTable(1, descriptor_unscaled_source.second); } ui::d3d12::util::DescriptorCpuGpuHandlePair descriptor_float_convert_source = {}; if (pipeline_float_convert != nullptr) { assert_true(descriptor_write_index < descriptor_count); descriptor_float_convert_source = descriptors_allocated[descriptor_write_index++]; } // Submit the copy buffer population commands. auto& cbuffer_pool = command_processor_.GetConstantBufferPool(); LoadConstants load_constants; // 3 bits for each. assert_true(texture_resolution_scale_x <= 7); assert_true(texture_resolution_scale_y <= 7); load_constants.is_tiled_3d_endian_scale = uint32_t(texture_key.tiled) | (uint32_t(is_3d_tiling) << 1) | (uint32_t(texture_key.endianness) << 2) | (texture_resolution_scale_x << 4) | (texture_resolution_scale_y << 7); // The loop is slices within levels because the base and the levels may need // different portions of the scaled resolve virtual address space to be // available through buffers, and to create a descriptor, the buffer start // address is required - which may be different for base and mips. bool scaled_mips_source_set_up = false; uint32_t guest_x_blocks_per_group_log2 = load_shader_info.GetGuestXBlocksPerGroupLog2(); for (uint32_t loop_level = loop_level_first; loop_level <= loop_level_last; ++loop_level) { bool is_base = loop_level == 0; uint32_t level = (level_packed == 0) ? 0 : loop_level; uint32_t guest_address = (is_base ? texture_key.base_page : texture_key.mip_page) << 12; // Set up the base or mips source, also making it accessible if loading from // scaled resolve memory. if (texture_resolution_scaled && (is_base || !scaled_mips_source_set_up)) { uint32_t guest_size_unscaled = is_base ? d3d12_texture.GetGuestBaseSize() : d3d12_texture.GetGuestMipsSize(); if (!MakeScaledResolveRangeCurrent(guest_address, guest_size_unscaled, load_shader_info.source_bpe_log2)) { command_processor_.ReleaseScratchGPUBuffer(copy_buffer, copy_buffer_state); return false; } TransitionCurrentScaledResolveRange(D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); assert_true(descriptor_write_index < descriptor_count); ui::d3d12::util::DescriptorCpuGpuHandlePair descriptor_scaled_source = descriptors_allocated[descriptor_write_index++]; CreateCurrentScaledResolveRangeUintPow2SRV(descriptor_scaled_source.first, load_shader_info.source_bpe_log2); command_list.D3DSetComputeRootDescriptorTable(1, descriptor_scaled_source.second); if (!is_base) { scaled_mips_source_set_up = true; } } if (texture_resolution_scaled) { // Offset already applied in the buffer because more than 512 MB can't be // directly addresses as R32 on some hardware (above // 2^D3D12_REQ_BUFFER_RESOURCE_TEXEL_COUNT_2_TO_EXP). load_constants.guest_offset = 0; } else { load_constants.guest_offset = guest_address; } if (!is_base) { load_constants.guest_offset += guest_layout.mip_offsets_bytes[level] * (texture_resolution_scale_x * texture_resolution_scale_y); } const texture_util::TextureGuestLayout::Level& level_guest_layout = is_base ? guest_layout.base : guest_layout.mips[level]; uint32_t level_guest_pitch = level_guest_layout.row_pitch_bytes; if (texture_key.tiled) { // Shaders expect pitch in blocks for tiled textures. level_guest_pitch /= bytes_per_block; assert_zero(level_guest_pitch & (xenos::kTextureTileWidthHeight - 1)); } load_constants.guest_pitch_aligned = level_guest_pitch; load_constants.guest_z_stride_block_rows_aligned = level_guest_layout.z_slice_stride_block_rows; assert_true(!is_3d_tiling || !(load_constants.guest_z_stride_block_rows_aligned & (xenos::kTextureTileWidthHeight - 1))); uint32_t level_width, level_height, level_depth; if (level == level_packed) { // This is the packed mip tail, containing not only the specified level, // but also other levels at different offsets - load the entire needed // extents. level_width = level_guest_layout.x_extent_blocks * block_width; level_height = level_guest_layout.y_extent_blocks * block_height; level_depth = level_guest_layout.z_extent; } else { level_width = std::max(width >> level, uint32_t(1)); level_height = std::max(height >> level, uint32_t(1)); level_depth = std::max(depth >> level, uint32_t(1)); } load_constants.size_blocks[0] = (level_width + (block_width - 1)) / block_width * texture_resolution_scale_x; load_constants.size_blocks[1] = (level_height + (block_height - 1)) / block_height * texture_resolution_scale_y; load_constants.size_blocks[2] = level_depth; load_constants.height_texels = level_height; uint32_t group_count_x = (load_constants.size_blocks[0] + ((UINT32_C(1) << guest_x_blocks_per_group_log2) - 1)) >> guest_x_blocks_per_group_log2; uint32_t group_count_y = (load_constants.size_blocks[1] + ((UINT32_C(1) << kLoadGuestYBlocksPerGroupLog2) - 1)) >> kLoadGuestYBlocksPerGroupLog2; const D3D12_PLACED_SUBRESOURCE_FOOTPRINT& level_host_slice_layout = is_base ? host_slice_layout_base : host_slice_layouts_mips[level]; uint32_t host_slice_size = uint32_t(is_base ? host_slice_size_base : host_slice_sizes_mips[level]); load_constants.host_offset = uint32_t(level_host_slice_layout.Offset); load_constants.host_pitch = level_host_slice_layout.Footprint.RowPitch; command_list.D3DSetComputeRoot32BitConstants(0, sizeof(load_constants) / sizeof(uint32_t), &load_constants, 0); uint32_t level_array_slice_stride_bytes_scaled = level_guest_layout.array_slice_stride_bytes * (texture_resolution_scale_x * texture_resolution_scale_y); for (uint32_t slice = 0; slice < array_size; ++slice) { if (slice != 0) { command_list.D3DSetComputeRoot32BitConstants( 0, sizeof(load_constants.guest_offset) / sizeof(uint32_t), &load_constants.guest_offset, offsetof(LoadConstants, guest_offset) / sizeof(uint32_t)); command_list.D3DSetComputeRoot32BitConstants( 0, sizeof(load_constants.host_offset) / sizeof(uint32_t), &load_constants.host_offset, offsetof(LoadConstants, host_offset) / sizeof(uint32_t)); } assert_true(copy_buffer_state == D3D12_RESOURCE_STATE_UNORDERED_ACCESS); command_processor_.SubmitBarriers(); command_list.D3DDispatch(group_count_x, group_count_y, load_constants.size_blocks[2]); load_constants.guest_offset += level_array_slice_stride_bytes_scaled; load_constants.host_offset += host_slice_size; } } ID3D12Resource* copy_buffer_copy_source = copy_buffer; D3D12_RESOURCE_STATES copy_buffer_copy_source_state = copy_buffer_state; if (pipeline_float_convert != nullptr) { static std::array float_fallback_log_mask = {}; uint8_t float_fallback_log_bit = host_format_is_signed ? uint8_t(1 << 1) : uint8_t(1 << 0); uint8_t& float_fallback_log_state = float_fallback_log_mask[uint32_t(texture_key.format)]; if ((float_fallback_log_state & float_fallback_log_bit) == 0) { float_fallback_log_state |= float_fallback_log_bit; REXGPU_INFO("D3D12TextureCache: Using two-pass float fallback for format {} ({})", FormatInfo::Get(texture_key.format)->name, host_format_is_signed ? "signed" : "unsigned"); } D3D12_RESOURCE_STATES copy_buffer_float_state = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; ID3D12Resource* copy_buffer_float = command_processor_.RequestScratchGPUBuffer( uint32_t(copy_buffer_size), copy_buffer_float_state); if (copy_buffer_float == nullptr) { command_processor_.ReleaseScratchGPUBuffer(copy_buffer, copy_buffer_state); return false; } command_processor_.PushTransitionBarrier(copy_buffer, copy_buffer_state, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); copy_buffer_state = D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; command_processor_.SubmitBarriers(); ui::d3d12::util::CreateBufferTypedSRV( device, descriptor_float_convert_source.first, copy_buffer, ui::d3d12::util::GetUintPow2DXGIFormat(load_shader_info_float_convert->source_bpe_log2), uint32_t(copy_buffer_size) >> load_shader_info_float_convert->source_bpe_log2); ui::d3d12::util::CreateBufferTypedUAV( device, descriptor_dest.first, copy_buffer_float, ui::d3d12::util::GetUintPow2DXGIFormat(load_shader_info_float_convert->dest_bpe_log2), uint32_t(copy_buffer_size) >> load_shader_info_float_convert->dest_bpe_log2); command_processor_.SetExternalPipeline(pipeline_float_convert); command_list.D3DSetComputeRootSignature(load_root_signature_.Get()); command_list.D3DSetComputeRootDescriptorTable(1, descriptor_float_convert_source.second); command_list.D3DSetComputeRootDescriptorTable(2, descriptor_dest.second); LoadConstants load_constants_float_convert; load_constants_float_convert.is_tiled_3d_endian_scale = (uint32_t(is_3d) << 1) | (UINT32_C(1) << 4) | (UINT32_C(1) << 7); uint32_t guest_x_blocks_per_group_log2_float_convert = load_shader_info_float_convert->GetGuestXBlocksPerGroupLog2(); for (uint32_t loop_level = loop_level_first; loop_level <= loop_level_last; ++loop_level) { bool is_base = loop_level == 0; uint32_t level = (level_packed == 0) ? 0 : loop_level; const texture_util::TextureGuestLayout::Level& level_guest_layout = is_base ? guest_layout.base : guest_layout.mips[level]; uint32_t level_width, level_height, level_depth; if (level == level_packed) { level_width = level_guest_layout.x_extent_blocks * block_width; level_height = level_guest_layout.y_extent_blocks * block_height; level_depth = level_guest_layout.z_extent; } else { level_width = std::max(width >> level, uint32_t(1)); level_height = std::max(height >> level, uint32_t(1)); level_depth = std::max(depth >> level, uint32_t(1)); } load_constants_float_convert.size_blocks[0] = (level_width + (block_width - 1)) / block_width * texture_resolution_scale_x; load_constants_float_convert.size_blocks[1] = (level_height + (block_height - 1)) / block_height * texture_resolution_scale_y; load_constants_float_convert.size_blocks[2] = level_depth; load_constants_float_convert.height_texels = level_height; uint32_t group_count_x = (load_constants_float_convert.size_blocks[0] + ((UINT32_C(1) << guest_x_blocks_per_group_log2_float_convert) - 1)) >> guest_x_blocks_per_group_log2_float_convert; uint32_t group_count_y = (load_constants_float_convert.size_blocks[1] + ((UINT32_C(1) << kLoadGuestYBlocksPerGroupLog2) - 1)) >> kLoadGuestYBlocksPerGroupLog2; const D3D12_PLACED_SUBRESOURCE_FOOTPRINT& level_host_slice_layout = is_base ? host_slice_layout_base : host_slice_layouts_mips[level]; uint32_t host_slice_size = uint32_t(is_base ? host_slice_size_base : host_slice_sizes_mips[level]); load_constants_float_convert.guest_offset = uint32_t(level_host_slice_layout.Offset); load_constants_float_convert.guest_pitch_aligned = level_host_slice_layout.Footprint.RowPitch; load_constants_float_convert.guest_z_stride_block_rows_aligned = level_host_slice_layout.Footprint.Height; load_constants_float_convert.host_offset = uint32_t(level_host_slice_layout.Offset); load_constants_float_convert.host_pitch = level_host_slice_layout.Footprint.RowPitch; command_list.D3DSetComputeRoot32BitConstants( 0, sizeof(load_constants_float_convert) / sizeof(uint32_t), &load_constants_float_convert, 0); for (uint32_t slice = 0; slice < array_size; ++slice) { if (slice != 0) { command_list.D3DSetComputeRoot32BitConstants( 0, sizeof(load_constants_float_convert.guest_offset) / sizeof(uint32_t), &load_constants_float_convert.guest_offset, offsetof(LoadConstants, guest_offset) / sizeof(uint32_t)); command_list.D3DSetComputeRoot32BitConstants( 0, sizeof(load_constants_float_convert.host_offset) / sizeof(uint32_t), &load_constants_float_convert.host_offset, offsetof(LoadConstants, host_offset) / sizeof(uint32_t)); } command_list.D3DDispatch(group_count_x, group_count_y, load_constants_float_convert.size_blocks[2]); load_constants_float_convert.guest_offset += host_slice_size; load_constants_float_convert.host_offset += host_slice_size; } } command_processor_.ReleaseScratchGPUBuffer(copy_buffer, copy_buffer_state); copy_buffer_copy_source = copy_buffer_float; copy_buffer_copy_source_state = copy_buffer_float_state; } // Update LRU caching because the texture will be used by the command list. d3d12_texture.MarkAsUsed(); // Submit copying from the copy buffer to the host texture. ID3D12Resource* texture_resource = d3d12_texture.resource(); command_processor_.PushTransitionBarrier( texture_resource, d3d12_texture.SetResourceState(D3D12_RESOURCE_STATE_COPY_DEST), D3D12_RESOURCE_STATE_COPY_DEST); command_processor_.PushTransitionBarrier(copy_buffer_copy_source, copy_buffer_copy_source_state, D3D12_RESOURCE_STATE_COPY_SOURCE); copy_buffer_copy_source_state = D3D12_RESOURCE_STATE_COPY_SOURCE; command_processor_.SubmitBarriers(); uint32_t texture_level_count = texture_key.mip_max_level + 1; D3D12_TEXTURE_COPY_LOCATION location_source, location_dest; location_source.pResource = copy_buffer_copy_source; location_source.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; location_dest.pResource = texture_resource; location_dest.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; for (uint32_t level = level_first; level <= level_last; ++level) { uint32_t guest_level = std::min(level, level_packed); location_source.PlacedFootprint = level ? host_slice_layouts_mips[guest_level] : host_slice_layout_base; location_dest.SubresourceIndex = level; UINT64 host_slice_size = level ? host_slice_sizes_mips[guest_level] : host_slice_size_base; D3D12_BOX source_box; const D3D12_BOX* source_box_ptr; if (level >= level_packed) { uint32_t level_offset_blocks_x, level_offset_blocks_y, level_offset_z; texture_util::GetPackedMipOffset(width, height, depth, guest_format, level, level_offset_blocks_x, level_offset_blocks_y, level_offset_z); source_box.left = level_offset_blocks_x * block_width * texture_resolution_scale_x; source_box.top = level_offset_blocks_y * block_height * texture_resolution_scale_y; source_box.front = level_offset_z; source_box.right = source_box.left + rex::align(std::max((width * texture_resolution_scale_x) >> level, uint32_t(1)), host_block_width); source_box.bottom = source_box.top + rex::align(std::max((height * texture_resolution_scale_y) >> level, uint32_t(1)), host_block_height); source_box.back = source_box.front + std::max(depth >> level, uint32_t(1)); source_box_ptr = &source_box; } else { source_box_ptr = nullptr; } for (uint32_t slice = 0; slice < array_size; ++slice) { command_list.D3DCopyTextureRegion(&location_dest, 0, 0, 0, &location_source, source_box_ptr); location_dest.SubresourceIndex += texture_level_count; location_source.PlacedFootprint.Offset += host_slice_size; } } command_processor_.ReleaseScratchGPUBuffer(copy_buffer_copy_source, copy_buffer_copy_source_state); DXGI_FORMAT swap_format = host_format_is_signed ? host_formats_[uint32_t(texture_key.format)].dxgi_format_signed : GetDXGIUnormFormat(texture_key); if (swap_format != DXGI_FORMAT_UNKNOWN && texture_key.dimension != xenos::DataDimension::kCube) { ScheduleTextureDump(d3d12_texture, swap_format); ApplyTextureReplacement(d3d12_texture, swap_format); } return true; } void D3D12TextureCache::ProcessCompletedTextureTransfers() { const uint64_t completed_submission = command_processor_.GetCompletedSubmission(); for (auto it = pending_upload_resources_.begin(); it != pending_upload_resources_.end();) { if (it->submission_index > completed_submission) { ++it; continue; } it = pending_upload_resources_.erase(it); } for (auto it = pending_texture_dumps_.begin(); it != pending_texture_dumps_.end();) { if (it->submission_index > completed_submission) { ++it; continue; } D3D12_RANGE read_range; read_range.Begin = 0; read_range.End = SIZE_T(it->total_size); void* mapped = nullptr; if (FAILED(it->readback_buffer->Map(0, &read_range, &mapped))) { REXGPU_WARN("Texture swap dump {}: failed to map readback buffer", it->stable_key); it = pending_texture_dumps_.erase(it); continue; } ac6::textures::DdsImageData dds_image; dds_image.format = it->dxgi_format; dds_image.dimension = it->resource_dimension; dds_image.width = it->width; dds_image.height = it->height; dds_image.depth_or_array_size = it->depth_or_array_size; dds_image.mip_count = it->mip_count; dds_image.is_cube = false; dds_image.subresources.reserve(it->footprints.size()); bool build_failed = false; for (size_t subresource_index = 0; subresource_index < it->footprints.size(); ++subresource_index) { const uint32_t mip_index = it->resource_dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D ? uint32_t(subresource_index) : (uint32_t(subresource_index) % it->mip_count); ac6::textures::DdsSubresource subresource; subresource.width = std::max(it->width >> mip_index, 1u); subresource.height = std::max(it->height >> mip_index, 1u); subresource.depth = it->resource_dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D ? std::max(it->depth_or_array_size >> mip_index, 1u) : 1u; ac6::textures::TextureSubresourceLayout tight_layout = {}; if (!ac6::textures::GetTightTextureSubresourceLayout( it->dxgi_format, subresource.width, subresource.height, tight_layout)) { REXGPU_WARN("Texture swap dump {}: unsupported dump format {}", it->stable_key, uint32_t(it->dxgi_format)); build_failed = true; break; } subresource.row_pitch = tight_layout.row_pitch; subresource.slice_pitch = tight_layout.slice_pitch; subresource.data.resize(size_t(subresource.slice_pitch) * subresource.depth); const uint8_t* source_base = reinterpret_cast(mapped) + it->footprints[subresource_index].Offset; const uint32_t source_row_pitch = it->footprints[subresource_index].Footprint.RowPitch; const uint32_t source_row_count = it->row_counts[subresource_index]; for (uint32_t z = 0; z < subresource.depth; ++z) { const uint8_t* source_slice = source_base + size_t(z) * source_row_pitch * source_row_count; uint8_t* dest_slice = subresource.data.data() + size_t(z) * subresource.slice_pitch; for (uint32_t row = 0; row < tight_layout.row_count; ++row) { std::memcpy(dest_slice + size_t(row) * subresource.row_pitch, source_slice + size_t(row) * source_row_pitch, subresource.row_pitch); } } dds_image.subresources.push_back(std::move(subresource)); } it->readback_buffer->Unmap(0, nullptr); if (build_failed) { it = pending_texture_dumps_.erase(it); continue; } ac6::textures::TextureDumpMetadata metadata; metadata.stable_key = it->stable_key; metadata.texture_key_hash = it->texture_key_hash; metadata.base_page = it->base_page; metadata.mip_page = it->mip_page; metadata.dimension = it->guest_dimension; metadata.width = it->width; metadata.height = it->height; metadata.depth_or_array_size = it->depth_or_array_size; metadata.mip_count = it->mip_count; metadata.guest_format = it->guest_format; metadata.endianness = it->endianness; metadata.dxgi_format = uint32_t(it->dxgi_format); metadata.tiled = it->tiled; metadata.packed_mips = it->packed_mips; metadata.signed_separate = it->signed_separate; metadata.scaled_resolve = it->scaled_resolve; metadata.frame_index = it->frame_index; metadata.signature_stable_id = it->signature_stable_id; metadata.active_vertex_shader_hash = it->active_vertex_shader_hash; metadata.active_pixel_shader_hash = it->active_pixel_shader_hash; metadata.signature_tags = it->signature_tags; std::string error; if (!ac6::textures::WriteDdsToFile(ac6::textures::GetTextureDumpDdsPath(it->stable_key), dds_image, &error)) { REXGPU_WARN("Texture swap dump {}: failed to write DDS ({})", it->stable_key, error); } else if (!ac6::textures::WriteDumpMetadata( ac6::textures::GetTextureDumpMetadataPath(it->stable_key), metadata, &error)) { REXGPU_WARN("Texture swap dump {}: failed to write metadata ({})", it->stable_key, error); } it = pending_texture_dumps_.erase(it); } } bool D3D12TextureCache::ScheduleTextureDump(D3D12Texture& texture, DXGI_FORMAT dump_format) { if (!ac6::textures::TextureDumpEnabled() || !ac6::textures::IsSupportedTextureSwapFormat(dump_format)) { return false; } const TextureKey& key = texture.key(); const uint64_t texture_key_hash = XXH3_64bits(&key, sizeof(key)); const std::string stable_key = ac6::textures::BuildTextureStableKey( texture_key_hash, key.base_page, key.mip_page, uint32_t(key.dimension), key.GetWidth(), key.GetHeight(), key.GetDepthOrArraySize(), key.mip_max_level + 1, uint32_t(key.format), uint32_t(key.endianness), key.tiled != 0, key.packed_mips != 0, key.signed_separate != 0, key.scaled_resolve != 0); if (dumped_texture_keys_.contains(stable_key) || ac6::textures::DumpExists(stable_key)) { dumped_texture_keys_.insert(stable_key); return false; } ID3D12Resource* texture_resource = texture.resource(); D3D12_RESOURCE_DESC resource_desc = texture_resource->GetDesc(); if (resource_desc.Dimension != D3D12_RESOURCE_DIMENSION_TEXTURE2D && resource_desc.Dimension != D3D12_RESOURCE_DIMENSION_TEXTURE3D) { return false; } const uint32_t subresource_count = resource_desc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D ? resource_desc.MipLevels : resource_desc.MipLevels * resource_desc.DepthOrArraySize; std::vector footprints(subresource_count); std::vector row_counts(subresource_count); std::vector row_sizes(subresource_count); UINT64 total_size = 0; ID3D12Device* device = command_processor_.GetD3D12Provider().GetDevice(); device->GetCopyableFootprints(&resource_desc, 0, subresource_count, 0, footprints.data(), row_counts.data(), row_sizes.data(), &total_size); ID3D12Resource* readback_resource = command_processor_.RequestReadbackBuffer(uint32_t(total_size)); if (!readback_resource) { return false; } const D3D12_RESOURCE_STATES previous_state = texture.SetResourceState(D3D12_RESOURCE_STATE_COPY_SOURCE); if (previous_state != D3D12_RESOURCE_STATE_COPY_SOURCE) { command_processor_.PushTransitionBarrier(texture_resource, previous_state, D3D12_RESOURCE_STATE_COPY_SOURCE); command_processor_.SubmitBarriers(); } DeferredCommandList& command_list = command_processor_.GetDeferredCommandList(); for (uint32_t subresource_index = 0; subresource_index < subresource_count; ++subresource_index) { D3D12_TEXTURE_COPY_LOCATION source = {}; source.pResource = texture_resource; source.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; source.SubresourceIndex = subresource_index; D3D12_TEXTURE_COPY_LOCATION dest = {}; dest.pResource = readback_resource; dest.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; dest.PlacedFootprint = footprints[subresource_index]; command_list.D3DCopyTextureRegion(&dest, 0, 0, 0, &source, nullptr); } const ac6::backend::BackendDiagnosticsSnapshot diagnostics = ac6::backend::GetDiagnosticsSnapshot(); PendingTextureDump pending_dump; pending_dump.submission_index = command_processor_.GetCurrentSubmission(); pending_dump.total_size = total_size; pending_dump.texture_key_hash = texture_key_hash; pending_dump.base_page = key.base_page; pending_dump.mip_page = key.mip_page; pending_dump.guest_dimension = uint32_t(key.dimension); pending_dump.width = key.GetWidth(); pending_dump.height = key.GetHeight(); pending_dump.depth_or_array_size = key.GetDepthOrArraySize(); pending_dump.mip_count = key.mip_max_level + 1; pending_dump.guest_format = uint32_t(key.format); pending_dump.endianness = uint32_t(key.endianness); pending_dump.dxgi_format = dump_format; pending_dump.resource_dimension = resource_desc.Dimension; pending_dump.tiled = key.tiled != 0; pending_dump.packed_mips = key.packed_mips != 0; pending_dump.signed_separate = key.signed_separate != 0; pending_dump.scaled_resolve = key.scaled_resolve != 0; pending_dump.frame_index = diagnostics.frame_index; pending_dump.signature_stable_id = diagnostics.latest_signature.stable_id; pending_dump.active_vertex_shader_hash = diagnostics.active_vertex_shader_hash; pending_dump.active_pixel_shader_hash = diagnostics.active_pixel_shader_hash; pending_dump.stable_key = stable_key; pending_dump.signature_tags = diagnostics.latest_signature_tags; pending_dump.readback_buffer = readback_resource; pending_dump.footprints = std::move(footprints); pending_dump.row_counts.reserve(row_counts.size()); for (UINT row_count : row_counts) { pending_dump.row_counts.push_back(uint32_t(row_count)); } pending_texture_dumps_.push_back(std::move(pending_dump)); dumped_texture_keys_.insert(stable_key); return true; } bool D3D12TextureCache::ApplyTextureReplacement(D3D12Texture& texture, DXGI_FORMAT replacement_format) { if (!ac6::textures::TextureReplacementEnabled() || !ac6::textures::IsSupportedTextureSwapFormat(replacement_format)) { return false; } const TextureKey& key = texture.key(); const uint64_t texture_key_hash = XXH3_64bits(&key, sizeof(key)); const std::string stable_key = ac6::textures::BuildTextureStableKey( texture_key_hash, key.base_page, key.mip_page, uint32_t(key.dimension), key.GetWidth(), key.GetHeight(), key.GetDepthOrArraySize(), key.mip_max_level + 1, uint32_t(key.format), uint32_t(key.endianness), key.tiled != 0, key.packed_mips != 0, key.signed_separate != 0, key.scaled_resolve != 0); const std::optional replacement_path = ac6::textures::ResolveReplacementDdsPath(stable_key); if (!replacement_path) { return false; } ac6::textures::DdsImageData replacement; std::string error; if (!ac6::textures::LoadDdsFromFile(*replacement_path, replacement, &error)) { if (replacement_warning_keys_.insert(stable_key).second) { REXGPU_WARN("Texture swap {}: failed to load replacement {} ({})", stable_key, replacement_path->string(), error); } return false; } ID3D12Resource* texture_resource = texture.resource(); const D3D12_RESOURCE_DESC resource_desc = texture_resource->GetDesc(); if (replacement.is_cube || replacement.format != replacement_format || replacement.dimension != resource_desc.Dimension || replacement.width != resource_desc.Width || replacement.height != resource_desc.Height || replacement.depth_or_array_size != resource_desc.DepthOrArraySize || replacement.mip_count != resource_desc.MipLevels) { if (replacement_warning_keys_.insert(stable_key).second) { REXGPU_WARN( "Texture swap {}: replacement {} does not match expected format/layout (expected {} {}x{}x{} mips={})", stable_key, replacement_path->string(), ac6::textures::DescribeDxgiFormat(replacement_format), uint32_t(resource_desc.Width), resource_desc.Height, resource_desc.DepthOrArraySize, resource_desc.MipLevels); } return false; } const uint32_t subresource_count = resource_desc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D ? resource_desc.MipLevels : resource_desc.MipLevels * resource_desc.DepthOrArraySize; if (replacement.subresources.size() != subresource_count) { if (replacement_warning_keys_.insert(stable_key).second) { REXGPU_WARN("Texture swap {}: replacement {} has {} subresources, expected {}", stable_key, replacement_path->string(), replacement.subresources.size(), subresource_count); } return false; } ID3D12Device* device = command_processor_.GetD3D12Provider().GetDevice(); std::vector footprints(subresource_count); std::vector row_counts(subresource_count); std::vector row_sizes(subresource_count); UINT64 upload_size = 0; device->GetCopyableFootprints(&resource_desc, 0, subresource_count, 0, footprints.data(), row_counts.data(), row_sizes.data(), &upload_size); D3D12_RESOURCE_DESC upload_desc = {}; upload_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; upload_desc.Alignment = 0; upload_desc.Width = upload_size; upload_desc.Height = 1; upload_desc.DepthOrArraySize = 1; upload_desc.MipLevels = 1; upload_desc.Format = DXGI_FORMAT_UNKNOWN; upload_desc.SampleDesc.Count = 1; upload_desc.SampleDesc.Quality = 0; upload_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; upload_desc.Flags = D3D12_RESOURCE_FLAG_NONE; Microsoft::WRL::ComPtr upload_buffer; if (FAILED(device->CreateCommittedResource(&ui::d3d12::util::kHeapPropertiesUpload, command_processor_.GetD3D12Provider().GetHeapFlagCreateNotZeroed(), &upload_desc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, IID_PPV_ARGS(&upload_buffer)))) { if (replacement_warning_keys_.insert(stable_key).second) { REXGPU_WARN("Texture swap {}: failed to create upload buffer for {}", stable_key, replacement_path->string()); } return false; } D3D12_RANGE no_read_range = {}; void* mapped_upload = nullptr; if (FAILED(upload_buffer->Map(0, &no_read_range, &mapped_upload))) { if (replacement_warning_keys_.insert(stable_key).second) { REXGPU_WARN("Texture swap {}: failed to map upload buffer for {}", stable_key, replacement_path->string()); } return false; } for (uint32_t subresource_index = 0; subresource_index < subresource_count; ++subresource_index) { const ac6::textures::DdsSubresource& subresource = replacement.subresources[subresource_index]; const uint8_t* source_base = subresource.data.data(); uint8_t* dest_base = reinterpret_cast(mapped_upload) + footprints[subresource_index].Offset; const uint32_t dest_row_pitch = footprints[subresource_index].Footprint.RowPitch; for (uint32_t z = 0; z < subresource.depth; ++z) { const uint8_t* source_slice = source_base + size_t(z) * subresource.slice_pitch; uint8_t* dest_slice = dest_base + size_t(z) * dest_row_pitch * row_counts[subresource_index]; for (uint32_t row = 0; row < row_counts[subresource_index]; ++row) { std::memcpy(dest_slice + size_t(row) * dest_row_pitch, source_slice + size_t(row) * subresource.row_pitch, subresource.row_pitch); } } } upload_buffer->Unmap(0, nullptr); const D3D12_RESOURCE_STATES previous_state = texture.SetResourceState(D3D12_RESOURCE_STATE_COPY_DEST); if (previous_state != D3D12_RESOURCE_STATE_COPY_DEST) { command_processor_.PushTransitionBarrier(texture_resource, previous_state, D3D12_RESOURCE_STATE_COPY_DEST); command_processor_.SubmitBarriers(); } DeferredCommandList& command_list = command_processor_.GetDeferredCommandList(); for (uint32_t subresource_index = 0; subresource_index < subresource_count; ++subresource_index) { D3D12_TEXTURE_COPY_LOCATION source = {}; source.pResource = upload_buffer.Get(); source.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; source.PlacedFootprint = footprints[subresource_index]; D3D12_TEXTURE_COPY_LOCATION dest = {}; dest.pResource = texture_resource; dest.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; dest.SubresourceIndex = subresource_index; command_list.D3DCopyTextureRegion(&dest, 0, 0, 0, &source, nullptr); } pending_upload_resources_.push_back( PendingUploadResource{command_processor_.GetCurrentSubmission(), upload_buffer}); replacement_warning_keys_.erase(stable_key); return true; } void D3D12TextureCache::UpdateTextureBindingsImpl(uint32_t fetch_constant_mask) { uint32_t bindings_remaining = fetch_constant_mask; uint32_t binding_index; while (rex::bit_scan_forward(bindings_remaining, &binding_index)) { bindings_remaining &= ~(UINT32_C(1) << binding_index); D3D12TextureBinding& d3d12_binding = d3d12_texture_bindings_[binding_index]; d3d12_binding.Reset(); const TextureBinding* binding = GetValidTextureBinding(binding_index); if (!binding) { continue; } if (IsSignedVersionSeparateForFormat(binding->key)) { if (binding->texture && texture_util::IsAnySignNotSigned(binding->swizzled_signs)) { d3d12_binding.descriptor_index = FindOrCreateTextureDescriptor(*static_cast(binding->texture), binding->key.dimension, false, binding->host_swizzle); } if (binding->texture_signed && texture_util::IsAnySignSigned(binding->swizzled_signs)) { d3d12_binding.descriptor_index_signed = FindOrCreateTextureDescriptor(*static_cast(binding->texture_signed), binding->key.dimension, true, binding->host_swizzle); } } else { D3D12Texture* texture = static_cast(binding->texture); if (texture) { if (texture_util::IsAnySignNotSigned(binding->swizzled_signs)) { d3d12_binding.descriptor_index = FindOrCreateTextureDescriptor( *texture, binding->key.dimension, false, binding->host_swizzle); } if (texture_util::IsAnySignSigned(binding->swizzled_signs)) { d3d12_binding.descriptor_index_signed = FindOrCreateTextureDescriptor( *texture, binding->key.dimension, true, binding->host_swizzle); } } } } } ID3D12Resource* D3D12TextureCache::D3D12Texture::GetOrCreate3DAs2DResource( D3D12_RESOURCE_STATES end_state) { if (!REXCVAR_GET(gpu_3d_to_2d_texture)) { return nullptr; } auto& d3d12_cache = static_cast(texture_cache()); if (texture_3d_as_2d_) { d3d12_cache.command_processor_.PushTransitionBarrier( texture_3d_as_2d_->resource(), texture_3d_as_2d_->SetResourceState(end_state), end_state); return texture_3d_as_2d_->resource(); } const ui::d3d12::D3D12Provider& provider = d3d12_cache.command_processor_.GetD3D12Provider(); ID3D12Device* device = provider.GetDevice(); D3D12_RESOURCE_DESC desc = {}; desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; desc.Alignment = 0; desc.Width = key().GetWidth(); desc.Height = key().GetHeight(); desc.DepthOrArraySize = 1; desc.MipLevels = 1; desc.Format = d3d12_cache.GetDXGIResourceFormat(key()); if (desc.Format == DXGI_FORMAT_UNKNOWN) { return nullptr; } desc.SampleDesc.Count = 1; desc.SampleDesc.Quality = 0; desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; desc.Flags = D3D12_RESOURCE_FLAG_NONE; D3D12_RESOURCE_STATES initial_state = D3D12_RESOURCE_STATE_COPY_DEST; Microsoft::WRL::ComPtr resource_2d; if (FAILED(device->CreateCommittedResource(&ui::d3d12::util::kHeapPropertiesDefault, provider.GetHeapFlagCreateNotZeroed(), &desc, initial_state, nullptr, IID_PPV_ARGS(&resource_2d)))) { REXGPU_ERROR("D3D12TextureCache: Failed to create 3D-as-2D wrapper resource"); return nullptr; } TextureKey key_2d = key(); key_2d.depth_or_array_size_minus_1 = 0; key_2d.mip_max_level = 0; texture_3d_as_2d_.reset( new D3D12Texture(d3d12_cache, key_2d, resource_2d.Get(), initial_state, false)); texture_3d_as_2d_->SetForceLoad3DTiling(true); if (!d3d12_cache.LoadTextureData(*texture_3d_as_2d_)) { REXGPU_ERROR("D3D12TextureCache: Failed to load 3D-as-2D wrapper data"); texture_3d_as_2d_.reset(); return nullptr; } d3d12_cache.command_processor_.PushTransitionBarrier( texture_3d_as_2d_->resource(), texture_3d_as_2d_->SetResourceState(end_state), end_state); return texture_3d_as_2d_->resource(); } uint32_t D3D12TextureCache::FindOrCreateTextureDescriptor(D3D12Texture& texture, xenos::DataDimension dimension, bool is_signed, uint32_t host_swizzle) { D3D12Texture::SRVDescriptorKey descriptor_key; descriptor_key.key = 0; descriptor_key.is_signed = uint32_t(is_signed); descriptor_key.host_swizzle = host_swizzle; descriptor_key.dimension = uint32_t(dimension); // Try to find an existing descriptor. uint32_t existing_descriptor_index = texture.GetSRVDescriptorIndex(descriptor_key); if (existing_descriptor_index != UINT32_MAX) { return existing_descriptor_index; } TextureKey texture_key = texture.key(); // Create a new bindless or cached descriptor if supported. D3D12_SHADER_RESOURCE_VIEW_DESC desc = {}; if (IsSignedVersionSeparateForFormat(texture_key) && texture_key.signed_separate != uint32_t(is_signed)) { // Not the version with the needed signedness. return UINT32_MAX; } xenos::TextureFormat format = texture_key.format; if (is_signed) { // Not supporting signed compressed textures - hopefully DXN and DXT5A are // not used as signed. desc.Format = host_formats_[uint32_t(format)].dxgi_format_signed; } else { desc.Format = GetDXGIUnormFormat(texture_key); } if (desc.Format == DXGI_FORMAT_UNKNOWN) { unsupported_format_features_used_[uint32_t(format)] |= is_signed ? kUnsupportedSnormBit : kUnsupportedUnormBit; return UINT32_MAX; } uint32_t mip_levels = texture_key.mip_max_level + 1; ID3D12Resource* resource_for_view = texture.resource(); switch (dimension) { case xenos::DataDimension::k3D: desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D; desc.Texture3D.MostDetailedMip = 0; desc.Texture3D.MipLevels = mip_levels; desc.Texture3D.ResourceMinLODClamp = 0.0f; break; case xenos::DataDimension::kCube: desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBE; desc.TextureCube.MostDetailedMip = 0; desc.TextureCube.MipLevels = mip_levels; desc.TextureCube.ResourceMinLODClamp = 0.0f; break; case xenos::DataDimension::k1D: case xenos::DataDimension::k2DOrStacked: desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY; if (texture_key.dimension == xenos::DataDimension::k3D) { resource_for_view = texture.GetOrCreate3DAs2DResource(D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); if (!resource_for_view) { return UINT32_MAX; } desc.Texture2DArray.MostDetailedMip = 0; desc.Texture2DArray.MipLevels = 1; desc.Texture2DArray.FirstArraySlice = 0; desc.Texture2DArray.ArraySize = 1; desc.Texture2DArray.PlaneSlice = 0; desc.Texture2DArray.ResourceMinLODClamp = 0.0f; } else { desc.Texture2DArray.MostDetailedMip = 0; desc.Texture2DArray.MipLevels = mip_levels; desc.Texture2DArray.FirstArraySlice = 0; desc.Texture2DArray.ArraySize = texture_key.GetDepthOrArraySize(); desc.Texture2DArray.PlaneSlice = 0; desc.Texture2DArray.ResourceMinLODClamp = 0.0f; } break; default: assert_unhandled_case(dimension); return UINT32_MAX; } desc.Shader4ComponentMapping = host_swizzle | D3D12_SHADER_COMPONENT_MAPPING_ALWAYS_SET_BIT_AVOIDING_ZEROMEM_MISTAKES; ID3D12Device* device = command_processor_.GetD3D12Provider().GetDevice(); uint32_t descriptor_index; if (bindless_resources_used_) { descriptor_index = command_processor_.RequestPersistentViewBindlessDescriptor(); if (descriptor_index == UINT32_MAX) { REXGPU_ERROR( "Failed to create a texture descriptor - no free bindless view " "descriptors"); return UINT32_MAX; } } else { if (!srv_descriptor_cache_free_.empty()) { descriptor_index = srv_descriptor_cache_free_.back(); srv_descriptor_cache_free_.pop_back(); } else { // Allocated + 1 (including the descriptor that is being added), rounded // up to kSRVDescriptorCachePageSize, (allocated + 1 + size - 1). uint32_t cache_pages_needed = (srv_descriptor_cache_allocated_ + kSRVDescriptorCachePageSize) / kSRVDescriptorCachePageSize; if (srv_descriptor_cache_.size() < cache_pages_needed) { D3D12_DESCRIPTOR_HEAP_DESC cache_heap_desc; cache_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; cache_heap_desc.NumDescriptors = kSRVDescriptorCachePageSize; cache_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; cache_heap_desc.NodeMask = 0; while (srv_descriptor_cache_.size() < cache_pages_needed) { Microsoft::WRL::ComPtr cache_heap; if (FAILED(device->CreateDescriptorHeap(&cache_heap_desc, IID_PPV_ARGS(&cache_heap)))) { REXGPU_ERROR( "D3D12TextureCache: Failed to create a texture descriptor - " "couldn't create a descriptor cache heap"); return UINT32_MAX; } srv_descriptor_cache_.emplace_back(cache_heap.Get()); } } descriptor_index = srv_descriptor_cache_allocated_++; } } device->CreateShaderResourceView(resource_for_view, &desc, GetTextureDescriptorCPUHandle(descriptor_index)); texture.AddSRVDescriptorIndex(descriptor_key, descriptor_index); return descriptor_index; } void D3D12TextureCache::ReleaseTextureDescriptor(uint32_t descriptor_index) { if (bindless_resources_used_) { command_processor_.ReleaseViewBindlessDescriptorImmediately(descriptor_index); } else { srv_descriptor_cache_free_.push_back(descriptor_index); } } D3D12_CPU_DESCRIPTOR_HANDLE D3D12TextureCache::GetTextureDescriptorCPUHandle( uint32_t descriptor_index) const { const ui::d3d12::D3D12Provider& provider = command_processor_.GetD3D12Provider(); if (bindless_resources_used_) { return provider.OffsetViewDescriptor(command_processor_.GetViewBindlessHeapCPUStart(), descriptor_index); } D3D12_CPU_DESCRIPTOR_HANDLE heap_start = srv_descriptor_cache_[descriptor_index / kSRVDescriptorCachePageSize].heap_start(); uint32_t heap_offset = descriptor_index % kSRVDescriptorCachePageSize; return provider.OffsetViewDescriptor(heap_start, heap_offset); } xenos::ClampMode D3D12TextureCache::NormalizeClampMode(xenos::ClampMode clamp_mode) const { if (clamp_mode == xenos::ClampMode::kClampToHalfway) { // No GL_CLAMP (clamp to half edge, half border) equivalent in Direct3D 12, // but there's no Direct3D 9 equivalent anyway, and too weird to be suitable // for intentional real usage. return xenos::ClampMode::kClampToEdge; } if (clamp_mode == xenos::ClampMode::kMirrorClampToHalfway || clamp_mode == xenos::ClampMode::kMirrorClampToBorder) { // No Direct3D 12 equivalents. return xenos::ClampMode::kMirrorClampToEdge; } return clamp_mode; } } // namespace rex::graphics::d3d12