Files
AC6_recomp/thirdparty/rexglue-sdk/src/graphics/d3d12/texture_cache.cpp
T

2868 lines
142 KiB
C++

/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2022 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*
* @modified Tom Clay, 2026 - Adapted for ReXGlue runtime
*/
#include <algorithm>
#include <array>
#include <cfloat>
#include <cstddef>
#include <cstring>
#include <memory>
#include <utility>
#include <rex/assert.h>
#include <rex/dbg.h>
#include <rex/graphics/d3d12/command_processor.h>
#include <rex/graphics/d3d12/shared_memory.h>
#include <rex/graphics/d3d12/texture_cache.h>
#include <rex/graphics/flags.h>
#include <rex/graphics/pipeline/texture/info.h>
#include <rex/graphics/pipeline/texture/util.h>
#include <rex/graphics/xenos.h>
#include <rex/logging.h>
#include <rex/math.h>
#include <rex/ui/d3d12/d3d12_upload_buffer_pool.h>
#include <rex/ui/d3d12/d3d12_util.h>
#include <rex/hash.h>
#include "../../../../../src/ac6_backend_fixes/ac6_backend_hooks.h"
#include "../../../../../src/ac6_texture_overrides.h"
namespace rex::graphics::d3d12 {
// Generated with `xb buildshaders`.
namespace shaders {
#include "../shaders/bytecode/d3d12_5_1/texture_load_128bpb_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_128bpb_scaled_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_16bpb_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_16bpb_scaled_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_32bpb_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_32bpb_scaled_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_64bpb_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_64bpb_scaled_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_8bpb_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_8bpb_scaled_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_bgrg8_rgb8_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_bgrg8_rgbg8_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_ctx1_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_depth_float_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_depth_float_scaled_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_depth_unorm_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_depth_unorm_scaled_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_dxn_rg8_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_dxt1_rgba8_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_dxt3_rgba8_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_dxt3a_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_dxt3aas1111_bgra4_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_dxt5_rgba8_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_dxt5a_r8_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_gbgr8_grgb8_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_gbgr8_rgb8_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_r10g11b11_rgba16_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_r10g11b11_rgba16_scaled_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_r10g11b11_rgba16_snorm_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_r10g11b11_rgba16_snorm_scaled_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_r11g11b10_rgba16_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_r11g11b10_rgba16_scaled_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_r11g11b10_rgba16_snorm_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_r11g11b10_rgba16_snorm_scaled_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_r16_snorm_float_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_r16_snorm_float_scaled_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_r16_unorm_float_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_r16_unorm_float_scaled_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_r4g4b4a4_b4g4r4a4_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_r4g4b4a4_b4g4r4a4_scaled_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_r5g5b5a1_b5g5r5a1_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_r5g5b5a1_b5g5r5a1_scaled_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_r5g5b6_b5g6r5_swizzle_rbga_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_r5g5b6_b5g6r5_swizzle_rbga_scaled_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_r5g6b5_b5g6r5_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_r5g6b5_b5g6r5_scaled_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_rg16_snorm_float_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_rg16_snorm_float_scaled_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_rg16_unorm_float_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_rg16_unorm_float_scaled_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_rgba16_snorm_float_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_rgba16_snorm_float_scaled_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_rgba16_unorm_float_cs.h"
#include "../shaders/bytecode/d3d12_5_1/texture_load_rgba16_unorm_float_scaled_cs.h"
} // namespace shaders
namespace {
constexpr D3D12_FORMAT_SUPPORT1 kLinearFilterSupport = D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE;
bool IsFormatSampleFilterable(ID3D12Device* device, DXGI_FORMAT format) {
if (format == DXGI_FORMAT_UNKNOWN) {
return false;
}
D3D12_FEATURE_DATA_FORMAT_SUPPORT support = {format, D3D12_FORMAT_SUPPORT1_NONE,
D3D12_FORMAT_SUPPORT2_NONE};
if (FAILED(
device->CheckFeatureSupport(D3D12_FEATURE_FORMAT_SUPPORT, &support, sizeof(support)))) {
return false;
}
return (support.Support1 & kLinearFilterSupport) == kLinearFilterSupport;
}
} // namespace
const D3D12TextureCache::HostFormat D3D12TextureCache::kBestHostFormats[64] = {
// k_1_REVERSE
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
// k_1
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
// k_8
{DXGI_FORMAT_R8_TYPELESS, DXGI_FORMAT_R8_UNORM, kLoadShaderIndex8bpb, DXGI_FORMAT_R8_SNORM,
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
// k_1_5_5_5
// Red and blue swapped in the load shader for simplicity.
{DXGI_FORMAT_B5G5R5A1_UNORM, DXGI_FORMAT_B5G5R5A1_UNORM, kLoadShaderIndexR5G5B5A1ToB5G5R5A1,
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
// k_5_6_5
// Red and blue swapped in the load shader for simplicity.
{DXGI_FORMAT_B5G6R5_UNORM, DXGI_FORMAT_B5G6R5_UNORM, kLoadShaderIndexR5G6B5ToB5G6R5,
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB},
// k_6_5_5
// On the host, green bits in blue, blue bits in green.
{DXGI_FORMAT_B5G6R5_UNORM, DXGI_FORMAT_B5G6R5_UNORM,
kLoadShaderIndexR5G5B6ToB5G6R5WithRBGASwizzle, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, XE_GPU_MAKE_TEXTURE_SWIZZLE(R, B, G, G)},
// k_8_8_8_8
{DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R8G8B8A8_UNORM, kLoadShaderIndex32bpb,
DXGI_FORMAT_R8G8B8A8_SNORM, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
// k_2_10_10_10
{DXGI_FORMAT_R10G10B10A2_TYPELESS, DXGI_FORMAT_R10G10B10A2_UNORM, kLoadShaderIndex32bpb,
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
// k_8_A
{DXGI_FORMAT_R8_TYPELESS, DXGI_FORMAT_R8_UNORM, kLoadShaderIndex8bpb, DXGI_FORMAT_R8_SNORM,
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
// k_8_B
{DXGI_FORMAT_R8_TYPELESS, DXGI_FORMAT_R8_UNORM, kLoadShaderIndex8bpb, DXGI_FORMAT_R8_SNORM,
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
// k_8_8
{DXGI_FORMAT_R8G8_TYPELESS, DXGI_FORMAT_R8G8_UNORM, kLoadShaderIndex16bpb,
DXGI_FORMAT_R8G8_SNORM, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG},
// k_Cr_Y1_Cb_Y0_REP
// Red and blue swapped in the load shader for simplicity.
// TODO(Triang3l): The DXGI_FORMAT_R8G8B8A8_U/SNORM conversion is usable for
// the signed version, separate unsigned and signed load shaders completely
// (as one doesn't need decompression for this format, while another does).
{DXGI_FORMAT_G8R8_G8B8_UNORM, DXGI_FORMAT_G8R8_G8B8_UNORM, kLoadShaderIndexGBGR8ToGRGB8,
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, true, DXGI_FORMAT_R8G8B8A8_UNORM,
kLoadShaderIndexGBGR8ToRGB8, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB},
// k_Y1_Cr_Y0_Cb_REP
// Red and blue swapped in the load shader for simplicity.
// TODO(Triang3l): The DXGI_FORMAT_R8G8B8A8_U/SNORM conversion is usable for
// the signed version, separate unsigned and signed load shaders completely
// (as one doesn't need decompression for this format, while another does).
{DXGI_FORMAT_R8G8_B8G8_UNORM, DXGI_FORMAT_R8G8_B8G8_UNORM, kLoadShaderIndexBGRG8ToRGBG8,
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, true, DXGI_FORMAT_R8G8B8A8_UNORM,
kLoadShaderIndexBGRG8ToRGB8, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB},
// k_16_16_EDRAM
// Not usable as a texture, also has -32...32 range.
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG},
// k_8_8_8_8_A
{DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R8G8B8A8_UNORM, kLoadShaderIndex32bpb,
DXGI_FORMAT_R8G8B8A8_SNORM, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown,
xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
// k_4_4_4_4
// Red and blue swapped in the load shader for simplicity.
{DXGI_FORMAT_B4G4R4A4_UNORM, DXGI_FORMAT_B4G4R4A4_UNORM, kLoadShaderIndexRGBA4ToBGRA4,
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
// k_10_11_11
{DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_UNORM,
kLoadShaderIndexR11G11B10ToRGBA16, DXGI_FORMAT_R16G16B16A16_SNORM,
kLoadShaderIndexR11G11B10ToRGBA16SNorm, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB},
// k_11_11_10
{DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_UNORM,
kLoadShaderIndexR10G11B11ToRGBA16, DXGI_FORMAT_R16G16B16A16_SNORM,
kLoadShaderIndexR10G11B11ToRGBA16SNorm, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB},
// k_DXT1
{DXGI_FORMAT_BC1_UNORM, DXGI_FORMAT_BC1_UNORM, kLoadShaderIndex64bpb, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, true, DXGI_FORMAT_R8G8B8A8_UNORM, kLoadShaderIndexDXT1ToRGBA8,
xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
// k_DXT2_3
{DXGI_FORMAT_BC2_UNORM, DXGI_FORMAT_BC2_UNORM, kLoadShaderIndex128bpb, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, true, DXGI_FORMAT_R8G8B8A8_UNORM, kLoadShaderIndexDXT3ToRGBA8,
xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
// k_DXT4_5
{DXGI_FORMAT_BC3_UNORM, DXGI_FORMAT_BC3_UNORM, kLoadShaderIndex128bpb, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, true, DXGI_FORMAT_R8G8B8A8_UNORM, kLoadShaderIndexDXT5ToRGBA8,
xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
// k_16_16_16_16_EDRAM
// Not usable as a texture, also has -32...32 range.
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
// R32_FLOAT for depth because shaders would require an additional SRV to
// sample stencil, which we don't provide.
// k_24_8
{DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32_FLOAT, kLoadShaderIndexDepthUnorm,
DXGI_FORMAT_R32_FLOAT, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
// k_24_8_FLOAT
{DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32_FLOAT, kLoadShaderIndexDepthFloat,
DXGI_FORMAT_R32_FLOAT, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
// k_16
{DXGI_FORMAT_R16_TYPELESS, DXGI_FORMAT_R16_UNORM, kLoadShaderIndex16bpb, DXGI_FORMAT_R16_SNORM,
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
// k_16_16
{DXGI_FORMAT_R16G16_TYPELESS, DXGI_FORMAT_R16G16_UNORM, kLoadShaderIndex32bpb,
DXGI_FORMAT_R16G16_SNORM, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG},
// k_16_16_16_16
{DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_UNORM, kLoadShaderIndex64bpb,
DXGI_FORMAT_R16G16B16A16_SNORM, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
// k_16_EXPAND
{DXGI_FORMAT_R16_FLOAT, DXGI_FORMAT_R16_FLOAT, kLoadShaderIndex16bpb, DXGI_FORMAT_R16_FLOAT,
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
// k_16_16_EXPAND
{DXGI_FORMAT_R16G16_FLOAT, DXGI_FORMAT_R16G16_FLOAT, kLoadShaderIndex32bpb,
DXGI_FORMAT_R16G16_FLOAT, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG},
// k_16_16_16_16_EXPAND
{DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_FLOAT, kLoadShaderIndex64bpb,
DXGI_FORMAT_R16G16B16A16_FLOAT, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
// k_16_FLOAT
{DXGI_FORMAT_R16_FLOAT, DXGI_FORMAT_R16_FLOAT, kLoadShaderIndex16bpb, DXGI_FORMAT_R16_FLOAT,
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
// k_16_16_FLOAT
{DXGI_FORMAT_R16G16_FLOAT, DXGI_FORMAT_R16G16_FLOAT, kLoadShaderIndex32bpb,
DXGI_FORMAT_R16G16_FLOAT, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG},
// k_16_16_16_16_FLOAT
{DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_FLOAT, kLoadShaderIndex64bpb,
DXGI_FORMAT_R16G16B16A16_FLOAT, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
// k_32
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
// k_32_32
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG},
// k_32_32_32_32
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
// k_32_FLOAT
{DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32_FLOAT, kLoadShaderIndex32bpb, DXGI_FORMAT_R32_FLOAT,
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
// k_32_32_FLOAT
{DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32_FLOAT, kLoadShaderIndex64bpb,
DXGI_FORMAT_R32G32_FLOAT, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG},
// k_32_32_32_32_FLOAT
{DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_R32G32B32A32_FLOAT, kLoadShaderIndex128bpb,
DXGI_FORMAT_R32G32B32A32_FLOAT, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
// k_32_AS_8
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
// k_32_AS_8_8
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG},
// k_16_MPEG
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
// k_16_16_MPEG
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG},
// k_8_INTERLACED
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
// k_32_AS_8_INTERLACED
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
// k_32_AS_8_8_INTERLACED
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG},
// k_16_INTERLACED
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
// k_16_MPEG_INTERLACED
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
// k_16_16_MPEG_INTERLACED
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG},
// k_DXN
{DXGI_FORMAT_BC5_UNORM, DXGI_FORMAT_BC5_UNORM, kLoadShaderIndex128bpb, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, true, DXGI_FORMAT_R8G8_UNORM, kLoadShaderIndexDXNToRG8,
xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG},
// k_8_8_8_8_AS_16_16_16_16
{DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R8G8B8A8_UNORM, kLoadShaderIndex32bpb,
DXGI_FORMAT_R8G8B8A8_SNORM, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
// k_DXT1_AS_16_16_16_16
{DXGI_FORMAT_BC1_UNORM, DXGI_FORMAT_BC1_UNORM, kLoadShaderIndex64bpb, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, true, DXGI_FORMAT_R8G8B8A8_UNORM, kLoadShaderIndexDXT1ToRGBA8,
xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
// k_DXT2_3_AS_16_16_16_16
{DXGI_FORMAT_BC2_UNORM, DXGI_FORMAT_BC2_UNORM, kLoadShaderIndex128bpb, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, true, DXGI_FORMAT_R8G8B8A8_UNORM, kLoadShaderIndexDXT3ToRGBA8,
xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
// k_DXT4_5_AS_16_16_16_16
{DXGI_FORMAT_BC3_UNORM, DXGI_FORMAT_BC3_UNORM, kLoadShaderIndex128bpb, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, true, DXGI_FORMAT_R8G8B8A8_UNORM, kLoadShaderIndexDXT5ToRGBA8,
xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
// k_2_10_10_10_AS_16_16_16_16
{DXGI_FORMAT_R10G10B10A2_UNORM, DXGI_FORMAT_R10G10B10A2_UNORM, kLoadShaderIndex32bpb,
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
// k_10_11_11_AS_16_16_16_16
{DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_UNORM,
kLoadShaderIndexR11G11B10ToRGBA16, DXGI_FORMAT_R16G16B16A16_SNORM,
kLoadShaderIndexR11G11B10ToRGBA16SNorm, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB},
// k_11_11_10_AS_16_16_16_16
{DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_UNORM,
kLoadShaderIndexR10G11B11ToRGBA16, DXGI_FORMAT_R16G16B16A16_SNORM,
kLoadShaderIndexR10G11B11ToRGBA16SNorm, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB},
// k_32_32_32_FLOAT
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB},
// k_DXT3A
// R8_UNORM has the same size as BC2, but doesn't have the 4x4 size
// alignment requirement.
{DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8_UNORM, kLoadShaderIndexDXT3A, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
// k_DXT5A
{DXGI_FORMAT_BC4_UNORM, DXGI_FORMAT_BC4_UNORM, kLoadShaderIndex64bpb, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, true, DXGI_FORMAT_R8_UNORM, kLoadShaderIndexDXT5AToR8,
xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
// k_CTX1
{DXGI_FORMAT_R8G8_UNORM, DXGI_FORMAT_R8G8_UNORM, kLoadShaderIndexCTX1, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG},
// k_DXT3A_AS_1_1_1_1
{DXGI_FORMAT_B4G4R4A4_UNORM, DXGI_FORMAT_B4G4R4A4_UNORM, kLoadShaderIndexDXT3AAs1111ToBGRA4,
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
// k_8_8_8_8_GAMMA_EDRAM
// Not usable as a texture.
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
// k_2_10_10_10_FLOAT_EDRAM
// Not usable as a texture.
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, DXGI_FORMAT_UNKNOWN,
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
};
D3D12TextureCache::D3D12TextureCache(const RegisterFile& register_file,
D3D12SharedMemory& shared_memory,
uint32_t draw_resolution_scale_x,
uint32_t draw_resolution_scale_y,
D3D12CommandProcessor& command_processor,
bool bindless_resources_used)
: TextureCache(register_file, shared_memory, draw_resolution_scale_x, draw_resolution_scale_y),
command_processor_(command_processor),
bindless_resources_used_(bindless_resources_used) {}
D3D12TextureCache::~D3D12TextureCache() {
// While the texture descriptor cache still exists (referenced by
// ~D3D12Texture), destroy all textures.
DestroyAllTextures(true);
// First release the buffers to detach them from the heaps.
for (std::unique_ptr<ScaledResolveVirtualBuffer>& scaled_resolve_buffer_ptr :
scaled_resolve_2gb_buffers_) {
scaled_resolve_buffer_ptr.reset();
}
scaled_resolve_heaps_.clear();
COUNT_profile_set("gpu/texture_cache/scaled_resolve_buffer_used_mb", 0);
}
bool D3D12TextureCache::Initialize() {
const ui::d3d12::D3D12Provider& provider = command_processor_.GetD3D12Provider();
ID3D12Device* device = provider.GetDevice();
std::memcpy(host_formats_, kBestHostFormats, sizeof(host_formats_));
bool rgba16_norm_filterable =
IsFormatSampleFilterable(device, DXGI_FORMAT_R16G16B16A16_UNORM) &&
IsFormatSampleFilterable(device, DXGI_FORMAT_R16G16B16A16_SNORM);
bool rgba16_float_filterable = IsFormatSampleFilterable(device, DXGI_FORMAT_R16G16B16A16_FLOAT);
if (!rgba16_norm_filterable && rgba16_float_filterable) {
constexpr std::array<xenos::TextureFormat, 4> kPackedRGBA16FallbackFormats = {
xenos::TextureFormat::k_10_11_11,
xenos::TextureFormat::k_11_11_10,
xenos::TextureFormat::k_10_11_11_AS_16_16_16_16,
xenos::TextureFormat::k_11_11_10_AS_16_16_16_16,
};
for (xenos::TextureFormat format : kPackedRGBA16FallbackFormats) {
HostFormat& host_format = host_formats_[uint32_t(format)];
host_format.dxgi_format_unsigned = DXGI_FORMAT_R16G16B16A16_FLOAT;
host_format.dxgi_format_signed = DXGI_FORMAT_R16G16B16A16_FLOAT;
}
REXGPU_WARN(
"D3D12TextureCache: Using RGBA16 float fallback for 10_11_11 / 11_11_10 textures");
}
if (IsFormatSampleFilterable(device, DXGI_FORMAT_R16_FLOAT)) {
HostFormat& host_format_16 = host_formats_[uint32_t(xenos::TextureFormat::k_16)];
if (!IsFormatSampleFilterable(device, DXGI_FORMAT_R16_UNORM)) {
host_format_16.dxgi_format_unsigned = DXGI_FORMAT_R16_FLOAT;
host_format_16.load_shader = kLoadShaderIndexR16UNormToFloat;
}
if (!IsFormatSampleFilterable(device, DXGI_FORMAT_R16_SNORM)) {
host_format_16.dxgi_format_signed = DXGI_FORMAT_R16_FLOAT;
host_format_16.load_shader_signed = kLoadShaderIndexR16SNormToFloat;
}
if (host_format_16.load_shader != kLoadShaderIndex16bpb &&
host_format_16.load_shader_signed == kLoadShaderIndexUnknown) {
host_format_16.load_shader_signed = kLoadShaderIndex16bpb;
}
}
if (IsFormatSampleFilterable(device, DXGI_FORMAT_R16G16_FLOAT)) {
HostFormat& host_format_16_16 = host_formats_[uint32_t(xenos::TextureFormat::k_16_16)];
if (!IsFormatSampleFilterable(device, DXGI_FORMAT_R16G16_UNORM)) {
host_format_16_16.dxgi_format_unsigned = DXGI_FORMAT_R16G16_FLOAT;
host_format_16_16.load_shader = kLoadShaderIndexRG16UNormToFloat;
}
if (!IsFormatSampleFilterable(device, DXGI_FORMAT_R16G16_SNORM)) {
host_format_16_16.dxgi_format_signed = DXGI_FORMAT_R16G16_FLOAT;
host_format_16_16.load_shader_signed = kLoadShaderIndexRG16SNormToFloat;
}
if (host_format_16_16.load_shader != kLoadShaderIndex32bpb &&
host_format_16_16.load_shader_signed == kLoadShaderIndexUnknown) {
host_format_16_16.load_shader_signed = kLoadShaderIndex32bpb;
}
}
if (rgba16_float_filterable) {
HostFormat& host_format_16_16_16_16 =
host_formats_[uint32_t(xenos::TextureFormat::k_16_16_16_16)];
if (!IsFormatSampleFilterable(device, DXGI_FORMAT_R16G16B16A16_UNORM)) {
host_format_16_16_16_16.dxgi_format_unsigned = DXGI_FORMAT_R16G16B16A16_FLOAT;
host_format_16_16_16_16.load_shader = kLoadShaderIndexRGBA16UNormToFloat;
}
if (!IsFormatSampleFilterable(device, DXGI_FORMAT_R16G16B16A16_SNORM)) {
host_format_16_16_16_16.dxgi_format_signed = DXGI_FORMAT_R16G16B16A16_FLOAT;
host_format_16_16_16_16.load_shader_signed = kLoadShaderIndexRGBA16SNormToFloat;
}
if (host_format_16_16_16_16.load_shader != kLoadShaderIndex64bpb &&
host_format_16_16_16_16.load_shader_signed == kLoadShaderIndexUnknown) {
host_format_16_16_16_16.load_shader_signed = kLoadShaderIndex64bpb;
}
}
if (IsDrawResolutionScaled()) {
// Buffers not used yet - no need aliasing barriers to change ownership of
// gigabytes between even and odd buffers.
std::memset(scaled_resolve_1gb_buffer_indices_, UINT8_MAX,
sizeof(scaled_resolve_1gb_buffer_indices_));
assert_true(scaled_resolve_heaps_.empty());
uint64_t scaled_resolve_address_space_size =
uint64_t(SharedMemory::kBufferSize) *
(draw_resolution_scale_x() * draw_resolution_scale_y());
scaled_resolve_heaps_.resize(
size_t(scaled_resolve_address_space_size >> kScaledResolveHeapSizeLog2));
}
scaled_resolve_heap_count_ = 0;
// Create the loading root signature.
D3D12_ROOT_PARAMETER root_parameters[3];
// Parameter 0 is constants (changed multiple times when untiling).
root_parameters[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS;
root_parameters[0].Constants.ShaderRegister = 0;
root_parameters[0].Constants.RegisterSpace = 0;
root_parameters[0].Constants.Num32BitValues = sizeof(LoadConstants) / sizeof(uint32_t);
root_parameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
// Parameter 1 is the source (may be changed multiple times for the same
// destination).
D3D12_DESCRIPTOR_RANGE root_dest_range;
root_dest_range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
root_dest_range.NumDescriptors = 1;
root_dest_range.BaseShaderRegister = 0;
root_dest_range.RegisterSpace = 0;
root_dest_range.OffsetInDescriptorsFromTableStart = 0;
root_parameters[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
root_parameters[1].DescriptorTable.NumDescriptorRanges = 1;
root_parameters[1].DescriptorTable.pDescriptorRanges = &root_dest_range;
root_parameters[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
// Parameter 2 is the destination.
D3D12_DESCRIPTOR_RANGE root_source_range;
root_source_range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
root_source_range.NumDescriptors = 1;
root_source_range.BaseShaderRegister = 0;
root_source_range.RegisterSpace = 0;
root_source_range.OffsetInDescriptorsFromTableStart = 0;
root_parameters[2].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
root_parameters[2].DescriptorTable.NumDescriptorRanges = 1;
root_parameters[2].DescriptorTable.pDescriptorRanges = &root_source_range;
root_parameters[2].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
D3D12_ROOT_SIGNATURE_DESC root_signature_desc;
root_signature_desc.NumParameters = UINT(rex::countof(root_parameters));
root_signature_desc.pParameters = root_parameters;
root_signature_desc.NumStaticSamplers = 0;
root_signature_desc.pStaticSamplers = nullptr;
root_signature_desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE;
*(load_root_signature_.ReleaseAndGetAddressOf()) =
ui::d3d12::util::CreateRootSignature(provider, root_signature_desc);
if (!load_root_signature_) {
REXGPU_ERROR(
"D3D12TextureCache: Failed to create the texture loading root "
"signature");
return false;
}
// Specify the load shader code.
D3D12_SHADER_BYTECODE load_shader_code[kLoadShaderCount] = {};
load_shader_code[kLoadShaderIndex8bpb] =
D3D12_SHADER_BYTECODE{shaders::texture_load_8bpb_cs, sizeof(shaders::texture_load_8bpb_cs)};
load_shader_code[kLoadShaderIndex16bpb] =
D3D12_SHADER_BYTECODE{shaders::texture_load_16bpb_cs, sizeof(shaders::texture_load_16bpb_cs)};
load_shader_code[kLoadShaderIndex32bpb] =
D3D12_SHADER_BYTECODE{shaders::texture_load_32bpb_cs, sizeof(shaders::texture_load_32bpb_cs)};
load_shader_code[kLoadShaderIndex64bpb] =
D3D12_SHADER_BYTECODE{shaders::texture_load_64bpb_cs, sizeof(shaders::texture_load_64bpb_cs)};
load_shader_code[kLoadShaderIndex128bpb] = D3D12_SHADER_BYTECODE{
shaders::texture_load_128bpb_cs, sizeof(shaders::texture_load_128bpb_cs)};
load_shader_code[kLoadShaderIndexR5G5B5A1ToB5G5R5A1] =
D3D12_SHADER_BYTECODE{shaders::texture_load_r5g5b5a1_b5g5r5a1_cs,
sizeof(shaders::texture_load_r5g5b5a1_b5g5r5a1_cs)};
load_shader_code[kLoadShaderIndexR5G6B5ToB5G6R5] = D3D12_SHADER_BYTECODE{
shaders::texture_load_r5g6b5_b5g6r5_cs, sizeof(shaders::texture_load_r5g6b5_b5g6r5_cs)};
load_shader_code[kLoadShaderIndexR5G5B6ToB5G6R5WithRBGASwizzle] =
D3D12_SHADER_BYTECODE{shaders::texture_load_r5g5b6_b5g6r5_swizzle_rbga_cs,
sizeof(shaders::texture_load_r5g5b6_b5g6r5_swizzle_rbga_cs)};
load_shader_code[kLoadShaderIndexRGBA4ToBGRA4] =
D3D12_SHADER_BYTECODE{shaders::texture_load_r4g4b4a4_b4g4r4a4_cs,
sizeof(shaders::texture_load_r4g4b4a4_b4g4r4a4_cs)};
load_shader_code[kLoadShaderIndexGBGR8ToGRGB8] = D3D12_SHADER_BYTECODE{
shaders::texture_load_gbgr8_grgb8_cs, sizeof(shaders::texture_load_gbgr8_grgb8_cs)};
load_shader_code[kLoadShaderIndexGBGR8ToRGB8] = D3D12_SHADER_BYTECODE{
shaders::texture_load_gbgr8_rgb8_cs, sizeof(shaders::texture_load_gbgr8_rgb8_cs)};
load_shader_code[kLoadShaderIndexBGRG8ToRGBG8] = D3D12_SHADER_BYTECODE{
shaders::texture_load_bgrg8_rgbg8_cs, sizeof(shaders::texture_load_bgrg8_rgbg8_cs)};
load_shader_code[kLoadShaderIndexBGRG8ToRGB8] = D3D12_SHADER_BYTECODE{
shaders::texture_load_bgrg8_rgb8_cs, sizeof(shaders::texture_load_bgrg8_rgb8_cs)};
load_shader_code[kLoadShaderIndexR10G11B11ToRGBA16] = D3D12_SHADER_BYTECODE{
shaders::texture_load_r10g11b11_rgba16_cs, sizeof(shaders::texture_load_r10g11b11_rgba16_cs)};
load_shader_code[kLoadShaderIndexR10G11B11ToRGBA16SNorm] =
D3D12_SHADER_BYTECODE{shaders::texture_load_r10g11b11_rgba16_snorm_cs,
sizeof(shaders::texture_load_r10g11b11_rgba16_snorm_cs)};
load_shader_code[kLoadShaderIndexR11G11B10ToRGBA16] = D3D12_SHADER_BYTECODE{
shaders::texture_load_r11g11b10_rgba16_cs, sizeof(shaders::texture_load_r11g11b10_rgba16_cs)};
load_shader_code[kLoadShaderIndexR11G11B10ToRGBA16SNorm] =
D3D12_SHADER_BYTECODE{shaders::texture_load_r11g11b10_rgba16_snorm_cs,
sizeof(shaders::texture_load_r11g11b10_rgba16_snorm_cs)};
load_shader_code[kLoadShaderIndexR16UNormToFloat] = D3D12_SHADER_BYTECODE{
shaders::texture_load_r16_unorm_float_cs, sizeof(shaders::texture_load_r16_unorm_float_cs)};
load_shader_code[kLoadShaderIndexR16SNormToFloat] = D3D12_SHADER_BYTECODE{
shaders::texture_load_r16_snorm_float_cs, sizeof(shaders::texture_load_r16_snorm_float_cs)};
load_shader_code[kLoadShaderIndexRG16UNormToFloat] = D3D12_SHADER_BYTECODE{
shaders::texture_load_rg16_unorm_float_cs, sizeof(shaders::texture_load_rg16_unorm_float_cs)};
load_shader_code[kLoadShaderIndexRG16SNormToFloat] = D3D12_SHADER_BYTECODE{
shaders::texture_load_rg16_snorm_float_cs, sizeof(shaders::texture_load_rg16_snorm_float_cs)};
load_shader_code[kLoadShaderIndexRGBA16UNormToFloat] = D3D12_SHADER_BYTECODE{
shaders::texture_load_rgba16_unorm_float_cs,
sizeof(shaders::texture_load_rgba16_unorm_float_cs)};
load_shader_code[kLoadShaderIndexRGBA16SNormToFloat] = D3D12_SHADER_BYTECODE{
shaders::texture_load_rgba16_snorm_float_cs,
sizeof(shaders::texture_load_rgba16_snorm_float_cs)};
load_shader_code[kLoadShaderIndexDXT1ToRGBA8] = D3D12_SHADER_BYTECODE{
shaders::texture_load_dxt1_rgba8_cs, sizeof(shaders::texture_load_dxt1_rgba8_cs)};
load_shader_code[kLoadShaderIndexDXT3ToRGBA8] = D3D12_SHADER_BYTECODE{
shaders::texture_load_dxt3_rgba8_cs, sizeof(shaders::texture_load_dxt3_rgba8_cs)};
load_shader_code[kLoadShaderIndexDXT5ToRGBA8] = D3D12_SHADER_BYTECODE{
shaders::texture_load_dxt5_rgba8_cs, sizeof(shaders::texture_load_dxt5_rgba8_cs)};
load_shader_code[kLoadShaderIndexDXNToRG8] = D3D12_SHADER_BYTECODE{
shaders::texture_load_dxn_rg8_cs, sizeof(shaders::texture_load_dxn_rg8_cs)};
load_shader_code[kLoadShaderIndexDXT3A] =
D3D12_SHADER_BYTECODE{shaders::texture_load_dxt3a_cs, sizeof(shaders::texture_load_dxt3a_cs)};
load_shader_code[kLoadShaderIndexDXT3AAs1111ToBGRA4] =
D3D12_SHADER_BYTECODE{shaders::texture_load_dxt3aas1111_bgra4_cs,
sizeof(shaders::texture_load_dxt3aas1111_bgra4_cs)};
load_shader_code[kLoadShaderIndexDXT5AToR8] = D3D12_SHADER_BYTECODE{
shaders::texture_load_dxt5a_r8_cs, sizeof(shaders::texture_load_dxt5a_r8_cs)};
load_shader_code[kLoadShaderIndexCTX1] =
D3D12_SHADER_BYTECODE{shaders::texture_load_ctx1_cs, sizeof(shaders::texture_load_ctx1_cs)};
load_shader_code[kLoadShaderIndexDepthUnorm] = D3D12_SHADER_BYTECODE{
shaders::texture_load_depth_unorm_cs, sizeof(shaders::texture_load_depth_unorm_cs)};
load_shader_code[kLoadShaderIndexDepthFloat] = D3D12_SHADER_BYTECODE{
shaders::texture_load_depth_float_cs, sizeof(shaders::texture_load_depth_float_cs)};
D3D12_SHADER_BYTECODE load_shader_code_scaled[kLoadShaderCount] = {};
if (IsDrawResolutionScaled()) {
load_shader_code_scaled[kLoadShaderIndex8bpb] = D3D12_SHADER_BYTECODE{
shaders::texture_load_8bpb_scaled_cs, sizeof(shaders::texture_load_8bpb_scaled_cs)};
load_shader_code_scaled[kLoadShaderIndex16bpb] = D3D12_SHADER_BYTECODE{
shaders::texture_load_16bpb_scaled_cs, sizeof(shaders::texture_load_16bpb_scaled_cs)};
load_shader_code_scaled[kLoadShaderIndex32bpb] = D3D12_SHADER_BYTECODE{
shaders::texture_load_32bpb_scaled_cs, sizeof(shaders::texture_load_32bpb_scaled_cs)};
load_shader_code_scaled[kLoadShaderIndex64bpb] = D3D12_SHADER_BYTECODE{
shaders::texture_load_64bpb_scaled_cs, sizeof(shaders::texture_load_64bpb_scaled_cs)};
load_shader_code_scaled[kLoadShaderIndex128bpb] = D3D12_SHADER_BYTECODE{
shaders::texture_load_128bpb_scaled_cs, sizeof(shaders::texture_load_128bpb_scaled_cs)};
load_shader_code_scaled[kLoadShaderIndexR5G5B5A1ToB5G5R5A1] =
D3D12_SHADER_BYTECODE{shaders::texture_load_r5g5b5a1_b5g5r5a1_scaled_cs,
sizeof(shaders::texture_load_r5g5b5a1_b5g5r5a1_scaled_cs)};
load_shader_code_scaled[kLoadShaderIndexR5G6B5ToB5G6R5] =
D3D12_SHADER_BYTECODE{shaders::texture_load_r5g6b5_b5g6r5_scaled_cs,
sizeof(shaders::texture_load_r5g6b5_b5g6r5_scaled_cs)};
load_shader_code_scaled[kLoadShaderIndexR5G5B6ToB5G6R5WithRBGASwizzle] =
D3D12_SHADER_BYTECODE{shaders::texture_load_r5g5b6_b5g6r5_swizzle_rbga_scaled_cs,
sizeof(shaders::texture_load_r5g5b6_b5g6r5_swizzle_rbga_scaled_cs)};
load_shader_code_scaled[kLoadShaderIndexRGBA4ToBGRA4] =
D3D12_SHADER_BYTECODE{shaders::texture_load_r4g4b4a4_b4g4r4a4_scaled_cs,
sizeof(shaders::texture_load_r4g4b4a4_b4g4r4a4_scaled_cs)};
load_shader_code_scaled[kLoadShaderIndexR10G11B11ToRGBA16] =
D3D12_SHADER_BYTECODE{shaders::texture_load_r10g11b11_rgba16_scaled_cs,
sizeof(shaders::texture_load_r10g11b11_rgba16_scaled_cs)};
load_shader_code_scaled[kLoadShaderIndexR10G11B11ToRGBA16SNorm] =
D3D12_SHADER_BYTECODE{shaders::texture_load_r10g11b11_rgba16_snorm_scaled_cs,
sizeof(shaders::texture_load_r10g11b11_rgba16_snorm_scaled_cs)};
load_shader_code_scaled[kLoadShaderIndexR11G11B10ToRGBA16] =
D3D12_SHADER_BYTECODE{shaders::texture_load_r11g11b10_rgba16_scaled_cs,
sizeof(shaders::texture_load_r11g11b10_rgba16_scaled_cs)};
load_shader_code_scaled[kLoadShaderIndexR11G11B10ToRGBA16SNorm] =
D3D12_SHADER_BYTECODE{shaders::texture_load_r11g11b10_rgba16_snorm_scaled_cs,
sizeof(shaders::texture_load_r11g11b10_rgba16_snorm_scaled_cs)};
load_shader_code_scaled[kLoadShaderIndexR16UNormToFloat] =
D3D12_SHADER_BYTECODE{shaders::texture_load_r16_unorm_float_scaled_cs,
sizeof(shaders::texture_load_r16_unorm_float_scaled_cs)};
load_shader_code_scaled[kLoadShaderIndexR16SNormToFloat] =
D3D12_SHADER_BYTECODE{shaders::texture_load_r16_snorm_float_scaled_cs,
sizeof(shaders::texture_load_r16_snorm_float_scaled_cs)};
load_shader_code_scaled[kLoadShaderIndexRG16UNormToFloat] =
D3D12_SHADER_BYTECODE{shaders::texture_load_rg16_unorm_float_scaled_cs,
sizeof(shaders::texture_load_rg16_unorm_float_scaled_cs)};
load_shader_code_scaled[kLoadShaderIndexRG16SNormToFloat] =
D3D12_SHADER_BYTECODE{shaders::texture_load_rg16_snorm_float_scaled_cs,
sizeof(shaders::texture_load_rg16_snorm_float_scaled_cs)};
load_shader_code_scaled[kLoadShaderIndexRGBA16UNormToFloat] =
D3D12_SHADER_BYTECODE{shaders::texture_load_rgba16_unorm_float_scaled_cs,
sizeof(shaders::texture_load_rgba16_unorm_float_scaled_cs)};
load_shader_code_scaled[kLoadShaderIndexRGBA16SNormToFloat] =
D3D12_SHADER_BYTECODE{shaders::texture_load_rgba16_snorm_float_scaled_cs,
sizeof(shaders::texture_load_rgba16_snorm_float_scaled_cs)};
load_shader_code_scaled[kLoadShaderIndexDepthUnorm] =
D3D12_SHADER_BYTECODE{shaders::texture_load_depth_unorm_scaled_cs,
sizeof(shaders::texture_load_depth_unorm_scaled_cs)};
load_shader_code_scaled[kLoadShaderIndexDepthFloat] =
D3D12_SHADER_BYTECODE{shaders::texture_load_depth_float_scaled_cs,
sizeof(shaders::texture_load_depth_float_scaled_cs)};
}
// Create the loading pipelines.
for (size_t i = 0; i < kLoadShaderCount; ++i) {
const D3D12_SHADER_BYTECODE& current_load_shader_code = load_shader_code[i];
if (!current_load_shader_code.pShaderBytecode) {
continue;
}
*(load_pipelines_[i].ReleaseAndGetAddressOf()) = ui::d3d12::util::CreateComputePipeline(
device, current_load_shader_code.pShaderBytecode, current_load_shader_code.BytecodeLength,
load_root_signature_.Get());
if (!load_pipelines_[i]) {
REXGPU_ERROR(
"D3D12TextureCache: Failed to create the texture loading pipeline "
"for shader {}",
i);
return false;
}
if (IsDrawResolutionScaled()) {
const D3D12_SHADER_BYTECODE& current_load_shader_code_scaled = load_shader_code_scaled[i];
if (current_load_shader_code_scaled.pShaderBytecode) {
*(load_pipelines_scaled_[i].ReleaseAndGetAddressOf()) =
ui::d3d12::util::CreateComputePipeline(
device, current_load_shader_code_scaled.pShaderBytecode,
current_load_shader_code_scaled.BytecodeLength, load_root_signature_.Get());
if (!load_pipelines_scaled_[i]) {
REXGPU_ERROR(
"D3D12TextureCache: Failed to create the resolution-scaled "
"texture loading pipeline for shader {}",
i);
return false;
}
}
}
}
srv_descriptor_cache_allocated_ = 0;
// Create a heap with null SRV descriptors, since it's faster to copy a
// descriptor than to create an SRV, and null descriptors are used a lot (for
// the signed version when only unsigned is used, for instance).
D3D12_DESCRIPTOR_HEAP_DESC null_srv_descriptor_heap_desc;
null_srv_descriptor_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
null_srv_descriptor_heap_desc.NumDescriptors = uint32_t(NullSRVDescriptorIndex::kCount);
null_srv_descriptor_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE;
null_srv_descriptor_heap_desc.NodeMask = 0;
if (FAILED(device->CreateDescriptorHeap(&null_srv_descriptor_heap_desc,
IID_PPV_ARGS(&null_srv_descriptor_heap_)))) {
REXGPU_ERROR(
"D3D12TextureCache: Failed to create the descriptor heap for null "
"SRVs");
return false;
}
null_srv_descriptor_heap_start_ = null_srv_descriptor_heap_->GetCPUDescriptorHandleForHeapStart();
D3D12_SHADER_RESOURCE_VIEW_DESC null_srv_desc;
null_srv_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
null_srv_desc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(
D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0, D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0,
D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0, D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0);
null_srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY;
null_srv_desc.Texture2DArray.MostDetailedMip = 0;
null_srv_desc.Texture2DArray.MipLevels = 1;
null_srv_desc.Texture2DArray.FirstArraySlice = 0;
null_srv_desc.Texture2DArray.ArraySize = 1;
null_srv_desc.Texture2DArray.PlaneSlice = 0;
null_srv_desc.Texture2DArray.ResourceMinLODClamp = 0.0f;
device->CreateShaderResourceView(
nullptr, &null_srv_desc,
provider.OffsetViewDescriptor(null_srv_descriptor_heap_start_,
uint32_t(NullSRVDescriptorIndex::k2DArray)));
null_srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D;
null_srv_desc.Texture3D.MostDetailedMip = 0;
null_srv_desc.Texture3D.MipLevels = 1;
null_srv_desc.Texture3D.ResourceMinLODClamp = 0.0f;
device->CreateShaderResourceView(
nullptr, &null_srv_desc,
provider.OffsetViewDescriptor(null_srv_descriptor_heap_start_,
uint32_t(NullSRVDescriptorIndex::k3D)));
null_srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBE;
null_srv_desc.TextureCube.MostDetailedMip = 0;
null_srv_desc.TextureCube.MipLevels = 1;
null_srv_desc.TextureCube.ResourceMinLODClamp = 0.0f;
device->CreateShaderResourceView(
nullptr, &null_srv_desc,
provider.OffsetViewDescriptor(null_srv_descriptor_heap_start_,
uint32_t(NullSRVDescriptorIndex::kCube)));
return true;
}
void D3D12TextureCache::ClearCache() {
TextureCache::ClearCache();
// Clear texture descriptor cache.
srv_descriptor_cache_free_.clear();
srv_descriptor_cache_allocated_ = 0;
srv_descriptor_cache_.clear();
}
void D3D12TextureCache::BeginSubmission(uint64_t new_submission_index) {
TextureCache::BeginSubmission(new_submission_index);
// ExecuteCommandLists is a full UAV and aliasing barrier.
if (IsDrawResolutionScaled()) {
size_t scaled_resolve_buffer_count = GetScaledResolveBufferCount();
for (size_t i = 0; i < scaled_resolve_buffer_count; ++i) {
ScaledResolveVirtualBuffer* scaled_resolve_buffer = scaled_resolve_2gb_buffers_[i].get();
if (scaled_resolve_buffer) {
scaled_resolve_buffer->ClearUAVBarrierPending();
}
}
std::memset(scaled_resolve_1gb_buffer_indices_, UINT8_MAX,
sizeof(scaled_resolve_1gb_buffer_indices_));
}
}
void D3D12TextureCache::BeginFrame() {
TextureCache::BeginFrame();
ProcessCompletedTextureTransfers();
std::memset(unsupported_format_features_used_, 0, sizeof(unsupported_format_features_used_));
}
void D3D12TextureCache::EndFrame() {
// Report used unsupported texture formats.
bool unsupported_header_written = false;
for (uint32_t i = 0; i < 64; ++i) {
uint32_t unsupported_features = unsupported_format_features_used_[i];
if (unsupported_features == 0) {
continue;
}
if (!unsupported_header_written) {
REXGPU_ERROR("Unsupported texture formats used in the frame:");
unsupported_header_written = true;
}
REXGPU_ERROR("* {}{}{}{}", FormatInfo::Get(xenos::TextureFormat(i))->name,
unsupported_features & kUnsupportedResourceBit ? " resource" : "",
unsupported_features & kUnsupportedUnormBit ? " unsigned" : "",
unsupported_features & kUnsupportedSnormBit ? " signed" : "");
unsupported_format_features_used_[i] = 0;
}
}
void D3D12TextureCache::RequestTextures(uint32_t used_texture_mask) {
#if XE_GPU_FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // XE_GPU_FINE_GRAINED_DRAW_SCOPES
TextureCache::RequestTextures(used_texture_mask);
// Pre-create 3D-as-2D wrappers before draw setup. Wrapper loading may bind
// compute pipelines and must happen in the texture request phase.
if (REXCVAR_GET(gpu_3d_to_2d_texture)) {
uint32_t textures_3d = used_texture_mask;
uint32_t index_3d;
while (rex::bit_scan_forward(textures_3d, &index_3d)) {
textures_3d &= ~(uint32_t(1) << index_3d);
const TextureBinding* binding = GetValidTextureBinding(index_3d);
if (!binding || binding->key.dimension != xenos::DataDimension::k3D) {
continue;
}
D3D12Texture* texture = static_cast<D3D12Texture*>(binding->texture);
if (texture) {
texture->GetOrCreate3DAs2DResource(D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE |
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
}
D3D12Texture* texture_signed = static_cast<D3D12Texture*>(binding->texture_signed);
if (texture_signed) {
texture_signed->GetOrCreate3DAs2DResource(D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE |
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
}
}
}
// Transition the textures to the needed usage - always in
// NON_PIXEL_SHADER_RESOURCE | PIXEL_SHADER_RESOURCE states because barriers
// between read-only stages, if needed, are discouraged (also if these were
// tracked separately, checks would be needed to make sure, if the same
// texture is bound through different fetch constants to both VS and PS, it
// would be in both states).
uint32_t textures_remaining = used_texture_mask;
uint32_t index;
while (rex::bit_scan_forward(textures_remaining, &index)) {
textures_remaining &= ~(uint32_t(1) << index);
const TextureBinding* binding = GetValidTextureBinding(index);
if (!binding) {
continue;
}
D3D12Texture* binding_texture = static_cast<D3D12Texture*>(binding->texture);
if (binding_texture != nullptr) {
// Will be referenced by the command list, so mark as used.
binding_texture->MarkAsUsed();
command_processor_.PushTransitionBarrier(
binding_texture->resource(),
binding_texture->SetResourceState(D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE |
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE),
D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE |
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
}
D3D12Texture* binding_texture_signed = static_cast<D3D12Texture*>(binding->texture_signed);
if (binding_texture_signed != nullptr) {
binding_texture_signed->MarkAsUsed();
command_processor_.PushTransitionBarrier(
binding_texture_signed->resource(),
binding_texture_signed->SetResourceState(D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE |
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE),
D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE |
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
}
}
}
bool D3D12TextureCache::AreActiveTextureSRVKeysUpToDate(
const TextureSRVKey* keys, const D3D12Shader::TextureBinding* host_shader_bindings,
size_t host_shader_binding_count) const {
for (size_t i = 0; i < host_shader_binding_count; ++i) {
const TextureSRVKey& key = keys[i];
const TextureBinding* binding = GetValidTextureBinding(host_shader_bindings[i].fetch_constant);
if (!binding) {
if (key.key.is_valid) {
return false;
}
continue;
}
if (key.key != binding->key || key.host_swizzle != binding->host_swizzle ||
key.swizzled_signs != binding->swizzled_signs) {
return false;
}
}
return true;
}
void D3D12TextureCache::WriteActiveTextureSRVKeys(
TextureSRVKey* keys, const D3D12Shader::TextureBinding* host_shader_bindings,
size_t host_shader_binding_count) const {
for (size_t i = 0; i < host_shader_binding_count; ++i) {
TextureSRVKey& key = keys[i];
const TextureBinding* binding = GetValidTextureBinding(host_shader_bindings[i].fetch_constant);
if (!binding) {
key.key.MakeInvalid();
key.host_swizzle = xenos::XE_GPU_TEXTURE_SWIZZLE_0000;
key.swizzled_signs = kSwizzledSignsUnsigned;
continue;
}
key.key = binding->key;
key.host_swizzle = binding->host_swizzle;
key.swizzled_signs = binding->swizzled_signs;
}
}
void D3D12TextureCache::WriteActiveTextureBindfulSRV(
const D3D12Shader::TextureBinding& host_shader_binding, D3D12_CPU_DESCRIPTOR_HANDLE handle) {
assert_false(bindless_resources_used_);
uint32_t descriptor_index = UINT32_MAX;
Texture* texture = nullptr;
uint32_t fetch_constant_index = host_shader_binding.fetch_constant;
const TextureBinding* binding = GetValidTextureBinding(fetch_constant_index);
if (binding && AreDimensionsCompatible(host_shader_binding.dimension, binding->key.dimension)) {
bool force_special_view = binding->key.dimension == xenos::DataDimension::k3D &&
(host_shader_binding.dimension == xenos::FetchOpDimension::k1D ||
host_shader_binding.dimension == xenos::FetchOpDimension::k2D);
const D3D12TextureBinding& d3d12_binding = d3d12_texture_bindings_[fetch_constant_index];
if (host_shader_binding.is_signed) {
// Not supporting signed compressed textures - hopefully DXN and DXT5A are
// not used as signed.
if (texture_util::IsAnySignSigned(binding->swizzled_signs)) {
texture = IsSignedVersionSeparateForFormat(binding->key) ? binding->texture_signed
: binding->texture;
if (force_special_view && texture) {
descriptor_index = FindOrCreateTextureDescriptor(*static_cast<D3D12Texture*>(texture),
xenos::DataDimension::k2DOrStacked, true,
binding->host_swizzle);
} else {
descriptor_index = d3d12_binding.descriptor_index_signed;
}
}
} else {
if (texture_util::IsAnySignNotSigned(binding->swizzled_signs)) {
texture = binding->texture;
if (force_special_view && texture) {
descriptor_index = FindOrCreateTextureDescriptor(*static_cast<D3D12Texture*>(texture),
xenos::DataDimension::k2DOrStacked,
false, binding->host_swizzle);
} else {
descriptor_index = d3d12_binding.descriptor_index;
}
}
}
}
const ui::d3d12::D3D12Provider& provider = command_processor_.GetD3D12Provider();
D3D12_CPU_DESCRIPTOR_HANDLE source_handle;
if (descriptor_index != UINT32_MAX) {
assert_not_null(texture);
texture->MarkAsUsed();
source_handle = GetTextureDescriptorCPUHandle(descriptor_index);
} else {
NullSRVDescriptorIndex null_descriptor_index;
switch (host_shader_binding.dimension) {
case xenos::FetchOpDimension::k3DOrStacked:
null_descriptor_index = NullSRVDescriptorIndex::k3D;
break;
case xenos::FetchOpDimension::kCube:
null_descriptor_index = NullSRVDescriptorIndex::kCube;
break;
default:
assert_true(host_shader_binding.dimension == xenos::FetchOpDimension::k1D ||
host_shader_binding.dimension == xenos::FetchOpDimension::k2D);
null_descriptor_index = NullSRVDescriptorIndex::k2DArray;
}
source_handle = provider.OffsetViewDescriptor(null_srv_descriptor_heap_start_,
uint32_t(null_descriptor_index));
}
auto device = provider.GetDevice();
{
#if XE_GPU_FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_i("gpu",
"rex::graphics::d3d12::D3D12TextureCache::WriteActiveTextureBindfulSRV->"
"CopyDescriptorsSimple");
#endif // XE_GPU_FINE_GRAINED_DRAW_SCOPES
device->CopyDescriptorsSimple(1, handle, source_handle, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
}
}
uint32_t D3D12TextureCache::GetActiveTextureBindlessSRVIndex(
const D3D12Shader::TextureBinding& host_shader_binding) {
assert_true(bindless_resources_used_);
uint32_t descriptor_index = UINT32_MAX;
uint32_t fetch_constant_index = host_shader_binding.fetch_constant;
const TextureBinding* binding = GetValidTextureBinding(fetch_constant_index);
if (binding && AreDimensionsCompatible(host_shader_binding.dimension, binding->key.dimension)) {
bool force_special_view = binding->key.dimension == xenos::DataDimension::k3D &&
(host_shader_binding.dimension == xenos::FetchOpDimension::k1D ||
host_shader_binding.dimension == xenos::FetchOpDimension::k2D);
const D3D12TextureBinding& d3d12_binding = d3d12_texture_bindings_[fetch_constant_index];
if (force_special_view) {
Texture* texture = nullptr;
bool use_signed =
host_shader_binding.is_signed && texture_util::IsAnySignSigned(binding->swizzled_signs);
if (use_signed) {
texture = IsSignedVersionSeparateForFormat(binding->key) ? binding->texture_signed
: binding->texture;
} else {
texture = binding->texture;
}
if (texture) {
descriptor_index = FindOrCreateTextureDescriptor(*static_cast<D3D12Texture*>(texture),
xenos::DataDimension::k2DOrStacked,
use_signed, binding->host_swizzle);
}
} else {
descriptor_index = host_shader_binding.is_signed ? d3d12_binding.descriptor_index_signed
: d3d12_binding.descriptor_index;
}
}
if (descriptor_index == UINT32_MAX) {
switch (host_shader_binding.dimension) {
case xenos::FetchOpDimension::k3DOrStacked:
descriptor_index = uint32_t(D3D12CommandProcessor::SystemBindlessView::kNullTexture3D);
break;
case xenos::FetchOpDimension::kCube:
descriptor_index = uint32_t(D3D12CommandProcessor::SystemBindlessView::kNullTextureCube);
break;
default:
assert_true(host_shader_binding.dimension == xenos::FetchOpDimension::k1D ||
host_shader_binding.dimension == xenos::FetchOpDimension::k2D);
descriptor_index = uint32_t(D3D12CommandProcessor::SystemBindlessView::kNullTexture2DArray);
}
}
return descriptor_index;
}
D3D12TextureCache::SamplerParameters D3D12TextureCache::GetSamplerParameters(
const D3D12Shader::SamplerBinding& binding) const {
const auto& regs = register_file();
xenos::xe_gpu_texture_fetch_t fetch = regs.GetTextureFetch(binding.fetch_constant);
SamplerParameters parameters;
xenos::ClampMode fetch_clamp_x, fetch_clamp_y, fetch_clamp_z;
texture_util::GetClampModesForDimension(fetch, fetch_clamp_x, fetch_clamp_y, fetch_clamp_z);
parameters.clamp_x = NormalizeClampMode(fetch_clamp_x);
parameters.clamp_y = NormalizeClampMode(fetch_clamp_y);
parameters.clamp_z = NormalizeClampMode(fetch_clamp_z);
if (xenos::ClampModeUsesBorder(parameters.clamp_x) ||
xenos::ClampModeUsesBorder(parameters.clamp_y) ||
xenos::ClampModeUsesBorder(parameters.clamp_z)) {
parameters.border_color = fetch.border_color;
} else {
parameters.border_color = xenos::BorderColor::k_ABGR_Black;
}
uint32_t mip_min_level, mip_max_level;
texture_util::GetSubresourcesFromFetchConstant(fetch, nullptr, nullptr, nullptr, nullptr, nullptr,
&mip_min_level, &mip_max_level);
parameters.mip_min_level = mip_min_level;
bool has_mips = mip_max_level > mip_min_level;
xenos::TextureFilter mag_filter = binding.mag_filter == xenos::TextureFilter::kUseFetchConst
? fetch.mag_filter
: binding.mag_filter;
xenos::TextureFilter min_filter = binding.min_filter == xenos::TextureFilter::kUseFetchConst
? fetch.min_filter
: binding.min_filter;
xenos::TextureFilter mip_filter = binding.mip_filter == xenos::TextureFilter::kUseFetchConst
? fetch.mip_filter
: binding.mip_filter;
bool min_mag_linear = (mag_filter == xenos::TextureFilter::kLinear) &&
(min_filter == xenos::TextureFilter::kLinear);
bool mip_filter_bilinear_or_trilinear =
mip_filter == xenos::TextureFilter::kPoint || mip_filter == xenos::TextureFilter::kLinear;
bool mip_base_map = mip_filter == xenos::TextureFilter::kBaseMap;
// TODO(Triang3l): Disable filtering for texture formats not supporting it.
xenos::AnisoFilter aniso_filter = binding.aniso_filter == xenos::AnisoFilter::kUseFetchConst
? fetch.aniso_filter
: binding.aniso_filter;
int32_t anisotropic_override = REXCVAR_GET(anisotropic_override);
if (anisotropic_override > -1 && anisotropic_override < 6 && has_mips && !mip_base_map &&
min_mag_linear && mip_filter_bilinear_or_trilinear) {
aniso_filter = xenos::AnisoFilter(anisotropic_override);
}
aniso_filter = std::min(aniso_filter, xenos::AnisoFilter::kMax_16_1);
parameters.aniso_filter = aniso_filter;
if (aniso_filter != xenos::AnisoFilter::kDisabled) {
parameters.mag_linear = 1;
parameters.min_linear = 1;
parameters.mip_linear = 1;
} else {
parameters.mag_linear = mag_filter == xenos::TextureFilter::kLinear;
parameters.min_linear = min_filter == xenos::TextureFilter::kLinear;
parameters.mip_linear = mip_filter == xenos::TextureFilter::kLinear;
}
parameters.mip_base_map = mip_base_map;
return parameters;
}
void D3D12TextureCache::WriteSampler(SamplerParameters parameters,
D3D12_CPU_DESCRIPTOR_HANDLE handle) const {
D3D12_SAMPLER_DESC desc;
if (parameters.aniso_filter != xenos::AnisoFilter::kDisabled) {
desc.Filter = D3D12_FILTER_ANISOTROPIC;
desc.MaxAnisotropy = 1u << (uint32_t(parameters.aniso_filter) - 1);
} else {
D3D12_FILTER_TYPE d3d_filter_min =
parameters.min_linear ? D3D12_FILTER_TYPE_LINEAR : D3D12_FILTER_TYPE_POINT;
D3D12_FILTER_TYPE d3d_filter_mag =
parameters.mag_linear ? D3D12_FILTER_TYPE_LINEAR : D3D12_FILTER_TYPE_POINT;
D3D12_FILTER_TYPE d3d_filter_mip =
parameters.mip_linear ? D3D12_FILTER_TYPE_LINEAR : D3D12_FILTER_TYPE_POINT;
desc.Filter = D3D12_ENCODE_BASIC_FILTER(d3d_filter_min, d3d_filter_mag, d3d_filter_mip,
D3D12_FILTER_REDUCTION_TYPE_STANDARD);
desc.MaxAnisotropy = 1;
}
static const D3D12_TEXTURE_ADDRESS_MODE kAddressModeMap[] = {
/* kRepeat */ D3D12_TEXTURE_ADDRESS_MODE_WRAP,
/* kMirroredRepeat */ D3D12_TEXTURE_ADDRESS_MODE_MIRROR,
/* kClampToEdge */ D3D12_TEXTURE_ADDRESS_MODE_CLAMP,
/* kMirrorClampToEdge */ D3D12_TEXTURE_ADDRESS_MODE_MIRROR_ONCE,
// No GL_CLAMP (clamp to half edge, half border) equivalent in Direct3D
// 12, but there's no Direct3D 9 equivalent anyway, and too weird to be
// suitable for intentional real usage.
/* kClampToHalfway */ D3D12_TEXTURE_ADDRESS_MODE_CLAMP,
// No mirror and clamp to border equivalents in Direct3D 12, but they
// aren't there in Direct3D 9 either.
/* kMirrorClampToHalfway */ D3D12_TEXTURE_ADDRESS_MODE_MIRROR_ONCE,
/* kClampToBorder */ D3D12_TEXTURE_ADDRESS_MODE_BORDER,
/* kMirrorClampToBorder */ D3D12_TEXTURE_ADDRESS_MODE_MIRROR_ONCE,
};
desc.AddressU = kAddressModeMap[uint32_t(parameters.clamp_x)];
desc.AddressV = kAddressModeMap[uint32_t(parameters.clamp_y)];
desc.AddressW = kAddressModeMap[uint32_t(parameters.clamp_z)];
// LOD biasing is performed in shaders.
desc.MipLODBias = 0.0f;
desc.ComparisonFunc = D3D12_COMPARISON_FUNC_NEVER;
switch (parameters.border_color) {
case xenos::BorderColor::k_ABGR_White:
desc.BorderColor[0] = 1.0f;
desc.BorderColor[1] = 1.0f;
desc.BorderColor[2] = 1.0f;
desc.BorderColor[3] = 1.0f;
break;
case xenos::BorderColor::k_ACBYCR_Black:
desc.BorderColor[0] = 0.5f;
desc.BorderColor[1] = 0.0f;
desc.BorderColor[2] = 0.5f;
desc.BorderColor[3] = 0.0f;
break;
case xenos::BorderColor::k_ACBCRY_Black:
desc.BorderColor[0] = 0.0f;
desc.BorderColor[1] = 0.5f;
desc.BorderColor[2] = 0.5f;
desc.BorderColor[3] = 0.0f;
break;
default:
assert_true(parameters.border_color == xenos::BorderColor::k_ABGR_Black);
desc.BorderColor[0] = 0.0f;
desc.BorderColor[1] = 0.0f;
desc.BorderColor[2] = 0.0f;
desc.BorderColor[3] = 0.0f;
break;
}
desc.MinLOD = float(parameters.mip_min_level);
if (parameters.mip_base_map) {
// "It is undefined whether LOD clamping based on MinLOD and MaxLOD Sampler
// states should happen before or after deciding if magnification is
// occuring" - Direct3D 11.3 Functional Specification.
// Using the GL_NEAREST / GL_LINEAR minification filter emulation logic
// described in the Vulkan VkSamplerCreateInfo specification, preserving
// magnification vs. minification - point mip sampling (usable only without
// anisotropic filtering on Direct3D 12) and MaxLOD 0.25. With anisotropic
// filtering, magnification vs. minification doesn't matter as the filter is
// always linear for both on Direct3D 12 - but linear filtering specifically
// is what must not be done for kBaseMap, so setting MaxLOD to MinLOD.
desc.MaxLOD = desc.MinLOD;
if (parameters.aniso_filter == xenos::AnisoFilter::kDisabled) {
assert_false(parameters.mip_linear);
desc.MaxLOD += 0.25f;
}
} else {
// Maximum mip level is in the texture resource itself.
desc.MaxLOD = FLT_MAX;
}
ID3D12Device* device = command_processor_.GetD3D12Provider().GetDevice();
device->CreateSampler(&desc, handle);
}
bool D3D12TextureCache::ClampDrawResolutionScaleToMaxSupported(
uint32_t& scale_x, uint32_t& scale_y, const ui::d3d12::D3D12Provider& provider) {
bool was_clamped;
if (provider.GetTiledResourcesTier() < D3D12_TILED_RESOURCES_TIER_1) {
was_clamped = scale_x > 1 || scale_y > 1;
scale_x = 1;
scale_y = 1;
return !was_clamped;
}
// Limit to the virtual address space available for a resource.
was_clamped = false;
uint32_t virtual_address_bits_per_resource = provider.GetVirtualAddressBitsPerResource();
while (scale_x > 1 || scale_y > 1) {
uint64_t highest_scaled_address = uint64_t(SharedMemory::kBufferSize) * (scale_x * scale_y) - 1;
if (uint32_t(64) - rex::lzcnt(highest_scaled_address) <= virtual_address_bits_per_resource) {
break;
}
// When reducing from a square size, prefer decreasing the horizontal
// resolution as vertical resolution difference is visible more clearly in
// perspective.
was_clamped = true;
if (scale_x >= scale_y) {
--scale_x;
} else {
--scale_y;
}
}
return !was_clamped;
}
bool D3D12TextureCache::EnsureScaledResolveMemoryCommitted(uint32_t start_unscaled,
uint32_t length_unscaled,
uint32_t length_scaled_alignment_log2) {
assert_true(IsDrawResolutionScaled());
if (length_unscaled == 0) {
return true;
}
if (start_unscaled > SharedMemory::kBufferSize ||
(SharedMemory::kBufferSize - start_unscaled) < length_unscaled) {
// Exceeds the physical address space.
return false;
}
uint32_t draw_resolution_scale_area = draw_resolution_scale_x() * draw_resolution_scale_y();
uint64_t first_scaled = uint64_t(start_unscaled) * draw_resolution_scale_area;
uint64_t length_scaled_alignment_bits = (UINT64_C(1) << length_scaled_alignment_log2) - 1;
uint64_t last_scaled =
(uint64_t(start_unscaled + (length_unscaled - 1)) * draw_resolution_scale_area +
length_scaled_alignment_bits) &
~length_scaled_alignment_bits;
const ui::d3d12::D3D12Provider& provider = command_processor_.GetD3D12Provider();
ID3D12Device* device = provider.GetDevice();
// Ensure GPU virtual memory for buffers that may be used to access the range
// is allocated - buffers are created. Always creating both buffers for all
// addresses before creating the heaps so when creating a new buffer, it can
// be safely assumed that no existing heaps should be mapped to it.
std::array<size_t, 2> possible_buffers_first =
GetPossibleScaledResolveBufferIndices(first_scaled);
std::array<size_t, 2> possible_buffers_last = GetPossibleScaledResolveBufferIndices(last_scaled);
size_t possible_buffer_first = std::min(possible_buffers_first[0], possible_buffers_first[1]);
size_t possible_buffer_last = std::max(possible_buffers_last[0], possible_buffers_last[1]);
for (size_t i = possible_buffer_first; i <= possible_buffer_last; ++i) {
if (scaled_resolve_2gb_buffers_[i]) {
continue;
}
D3D12_RESOURCE_DESC scaled_resolve_buffer_desc;
// Buffer indices are gigabytes.
ui::d3d12::util::FillBufferResourceDesc(
scaled_resolve_buffer_desc,
std::min(
uint64_t(1) << 31,
uint64_t(SharedMemory::kBufferSize) * draw_resolution_scale_area - (uint64_t(i) << 30)),
D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS);
// The first access will be a resolve.
constexpr D3D12_RESOURCE_STATES kScaledResolveVirtualBufferInitialState =
D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
ID3D12Resource* scaled_resolve_buffer_resource;
if (FAILED(device->CreateReservedResource(&scaled_resolve_buffer_desc,
kScaledResolveVirtualBufferInitialState, nullptr,
IID_PPV_ARGS(&scaled_resolve_buffer_resource)))) {
REXGPU_ERROR(
"D3D12TextureCache: Failed to create a 2 GB tiled buffer for draw "
"resolution scaling");
return false;
}
scaled_resolve_2gb_buffers_[i] =
std::unique_ptr<ScaledResolveVirtualBuffer>(new ScaledResolveVirtualBuffer(
scaled_resolve_buffer_resource, kScaledResolveVirtualBufferInitialState));
scaled_resolve_buffer_resource->Release();
}
uint32_t heap_first = uint32_t(first_scaled >> kScaledResolveHeapSizeLog2);
uint32_t heap_last = uint32_t(last_scaled >> kScaledResolveHeapSizeLog2);
for (uint32_t i = heap_first; i <= heap_last; ++i) {
if (scaled_resolve_heaps_[i]) {
continue;
}
auto direct_queue = provider.GetDirectQueue();
D3D12_HEAP_DESC heap_desc = {};
heap_desc.SizeInBytes = kScaledResolveHeapSize;
heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT;
heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS | provider.GetHeapFlagCreateNotZeroed();
Microsoft::WRL::ComPtr<ID3D12Heap> scaled_resolve_heap;
if (FAILED(device->CreateHeap(&heap_desc, IID_PPV_ARGS(&scaled_resolve_heap)))) {
REXGPU_ERROR("D3D12TextureCache: Failed to create a scaled resolve tile heap");
return false;
}
scaled_resolve_heaps_[i] = scaled_resolve_heap;
++scaled_resolve_heap_count_;
COUNT_profile_set("gpu/texture_cache/scaled_resolve_buffer_used_mb",
scaled_resolve_heap_count_ << (kScaledResolveHeapSizeLog2 - 20));
D3D12_TILED_RESOURCE_COORDINATE region_start_coordinates;
region_start_coordinates.Y = 0;
region_start_coordinates.Z = 0;
region_start_coordinates.Subresource = 0;
D3D12_TILE_REGION_SIZE region_size;
region_size.NumTiles = kScaledResolveHeapSize / D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES;
region_size.UseBox = FALSE;
D3D12_TILE_RANGE_FLAGS range_flags = D3D12_TILE_RANGE_FLAG_NONE;
UINT heap_range_start_offset = 0;
UINT range_tile_count = kScaledResolveHeapSize / D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES;
std::array<size_t, 2> buffer_indices =
GetPossibleScaledResolveBufferIndices(uint64_t(i) << kScaledResolveHeapSizeLog2);
for (size_t j = 0; j < 2; ++j) {
size_t buffer_index = buffer_indices[j];
if (j && buffer_index == buffer_indices[0]) {
break;
}
region_start_coordinates.X =
UINT(((uint64_t(i) << kScaledResolveHeapSizeLog2) - (uint64_t(buffer_index) << 30)) /
D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES);
direct_queue->UpdateTileMappings(
scaled_resolve_2gb_buffers_[buffer_index]->resource(), 1, &region_start_coordinates,
&region_size, scaled_resolve_heap.Get(), 1, &range_flags, &heap_range_start_offset,
&range_tile_count, D3D12_TILE_MAPPING_FLAG_NONE);
}
command_processor_.NotifyQueueOperationsDoneDirectly();
}
return true;
}
bool D3D12TextureCache::MakeScaledResolveRangeCurrent(uint32_t start_unscaled,
uint32_t length_unscaled,
uint32_t length_scaled_alignment_log2) {
assert_true(IsDrawResolutionScaled());
if (!length_unscaled || start_unscaled >= SharedMemory::kBufferSize ||
(SharedMemory::kBufferSize - start_unscaled) < length_unscaled) {
// If length is 0, the needed buffer can't be chosen because no buffer is
// needed.
return false;
}
uint32_t draw_resolution_scale_area = draw_resolution_scale_x() * draw_resolution_scale_y();
uint64_t start_scaled = uint64_t(start_unscaled) * draw_resolution_scale_area;
uint64_t length_scaled_alignment_bits = (UINT64_C(1) << length_scaled_alignment_log2) - 1;
uint64_t length_scaled =
(uint64_t(length_unscaled) * draw_resolution_scale_area + length_scaled_alignment_bits) &
~length_scaled_alignment_bits;
uint64_t last_scaled = start_scaled + (length_scaled - 1);
// Get one or two buffers that can hold the whole range.
std::array<size_t, 2> possible_buffer_indices_first =
GetPossibleScaledResolveBufferIndices(start_scaled);
std::array<size_t, 2> possible_buffer_indices_last =
GetPossibleScaledResolveBufferIndices(last_scaled);
size_t possible_buffer_indices_common[2];
size_t possible_buffer_indices_common_count = 0;
for (size_t i = 0;
i <= size_t(possible_buffer_indices_first[0] != possible_buffer_indices_first[1]); ++i) {
size_t possible_buffer_index_first = possible_buffer_indices_first[i];
for (size_t j = 0;
j <= size_t(possible_buffer_indices_last[0] != possible_buffer_indices_last[1]); ++j) {
if (possible_buffer_indices_last[j] == possible_buffer_index_first) {
bool possible_buffer_index_already_added = false;
for (size_t k = 0; k < possible_buffer_indices_common_count; ++k) {
if (possible_buffer_indices_common[k] == possible_buffer_index_first) {
possible_buffer_index_already_added = true;
break;
}
}
if (!possible_buffer_index_already_added) {
assert_true(possible_buffer_indices_common_count < 2);
possible_buffer_indices_common[possible_buffer_indices_common_count++] =
possible_buffer_index_first;
}
}
}
}
if (!possible_buffer_indices_common_count) {
// Too wide range requested - no buffer that contains both the start and the
// end.
return false;
}
size_t gigabyte_first = size_t(start_scaled >> 30);
size_t gigabyte_last = size_t(last_scaled >> 30);
// Choose the buffer that the range will be accessed through.
size_t new_buffer_index;
if (possible_buffer_indices_common_count >= 2) {
// Prefer the buffer that is already used to make less aliasing barriers.
assert_true(gigabyte_first + 1 >= gigabyte_last);
size_t possible_buffer_indices_already_used[2] = {};
for (size_t i = gigabyte_first; i <= gigabyte_last; ++i) {
size_t gigabyte_current_buffer_index = scaled_resolve_1gb_buffer_indices_[i];
for (size_t j = 0; j < possible_buffer_indices_common_count; ++j) {
if (possible_buffer_indices_common[j] == gigabyte_current_buffer_index) {
++possible_buffer_indices_already_used[j];
}
}
}
new_buffer_index = possible_buffer_indices_common[size_t(
possible_buffer_indices_already_used[1] > possible_buffer_indices_already_used[0])];
} else {
// The range can be accessed only by one buffer.
new_buffer_index = possible_buffer_indices_common[0];
}
// Switch the current buffer for the range.
const ScaledResolveVirtualBuffer* new_buffer =
scaled_resolve_2gb_buffers_[new_buffer_index].get();
assert_not_null(new_buffer);
ID3D12Resource* new_buffer_resource = new_buffer->resource();
for (size_t i = gigabyte_first; i <= gigabyte_last; ++i) {
size_t gigabyte_current_buffer_index = scaled_resolve_1gb_buffer_indices_[i];
if (gigabyte_current_buffer_index == new_buffer_index) {
continue;
}
if (gigabyte_current_buffer_index != SIZE_MAX) {
ScaledResolveVirtualBuffer* gigabyte_current_buffer =
scaled_resolve_2gb_buffers_[gigabyte_current_buffer_index].get();
assert_not_null(gigabyte_current_buffer);
command_processor_.PushAliasingBarrier(gigabyte_current_buffer->resource(),
new_buffer_resource);
// An aliasing barrier synchronizes and flushes everything.
gigabyte_current_buffer->ClearUAVBarrierPending();
}
scaled_resolve_1gb_buffer_indices_[i] = new_buffer_index;
}
scaled_resolve_current_range_start_scaled_ = start_scaled;
scaled_resolve_current_range_length_scaled_ = length_scaled;
return true;
}
void D3D12TextureCache::TransitionCurrentScaledResolveRange(D3D12_RESOURCE_STATES new_state) {
assert_true(IsDrawResolutionScaled());
ScaledResolveVirtualBuffer& buffer = GetCurrentScaledResolveBuffer();
command_processor_.PushTransitionBarrier(buffer.resource(), buffer.SetResourceState(new_state),
new_state);
}
void D3D12TextureCache::CreateCurrentScaledResolveRangeUintPow2SRV(
D3D12_CPU_DESCRIPTOR_HANDLE handle, uint32_t element_size_bytes_pow2) {
assert_true(IsDrawResolutionScaled());
size_t buffer_index = GetCurrentScaledResolveBufferIndex();
const ScaledResolveVirtualBuffer* buffer = scaled_resolve_2gb_buffers_[buffer_index].get();
assert_not_null(buffer);
ui::d3d12::util::CreateBufferTypedSRV(
command_processor_.GetD3D12Provider().GetDevice(), handle, buffer->resource(),
ui::d3d12::util::GetUintPow2DXGIFormat(element_size_bytes_pow2),
uint32_t(scaled_resolve_current_range_length_scaled_ >> element_size_bytes_pow2),
(scaled_resolve_current_range_start_scaled_ - (uint64_t(buffer_index) << 30)) >>
element_size_bytes_pow2);
}
void D3D12TextureCache::CreateCurrentScaledResolveRangeUintPow2UAV(
D3D12_CPU_DESCRIPTOR_HANDLE handle, uint32_t element_size_bytes_pow2) {
assert_true(IsDrawResolutionScaled());
size_t buffer_index = GetCurrentScaledResolveBufferIndex();
const ScaledResolveVirtualBuffer* buffer = scaled_resolve_2gb_buffers_[buffer_index].get();
assert_not_null(buffer);
ui::d3d12::util::CreateBufferTypedUAV(
command_processor_.GetD3D12Provider().GetDevice(), handle, buffer->resource(),
ui::d3d12::util::GetUintPow2DXGIFormat(element_size_bytes_pow2),
uint32_t(scaled_resolve_current_range_length_scaled_ >> element_size_bytes_pow2),
(scaled_resolve_current_range_start_scaled_ - (uint64_t(buffer_index) << 30)) >>
element_size_bytes_pow2);
}
ID3D12Resource* D3D12TextureCache::RequestSwapTexture(D3D12_SHADER_RESOURCE_VIEW_DESC& srv_desc_out,
xenos::TextureFormat& format_out,
uint32_t* width_unscaled_out,
uint32_t* height_unscaled_out) {
const auto& regs = register_file();
xenos::xe_gpu_texture_fetch_t fetch = regs.GetTextureFetch(0);
TextureKey key;
BindingInfoFromFetchConstant(fetch, key, nullptr);
if (!key.is_valid || key.base_page == 0 || key.dimension != xenos::DataDimension::k2DOrStacked) {
return nullptr;
}
D3D12Texture* texture = static_cast<D3D12Texture*>(FindOrCreateTexture(key));
if (texture == nullptr || !LoadTextureData(*texture)) {
return nullptr;
}
texture->MarkAsUsed();
// The swap texture is likely to be used only for the presentation compute
// shader, and not during emulation, where it'd be NON_PIXEL_SHADER_RESOURCE |
// PIXEL_SHADER_RESOURCE.
ID3D12Resource* texture_resource = texture->resource();
command_processor_.PushTransitionBarrier(
texture_resource, texture->SetResourceState(D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE),
D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE);
srv_desc_out.Format = GetDXGIUnormFormat(key);
srv_desc_out.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
srv_desc_out.Shader4ComponentMapping =
GuestToHostSwizzle(fetch.swizzle, GetHostFormatSwizzle(key)) |
D3D12_SHADER_COMPONENT_MAPPING_ALWAYS_SET_BIT_AVOIDING_ZEROMEM_MISTAKES;
srv_desc_out.Texture2D.MostDetailedMip = 0;
srv_desc_out.Texture2D.MipLevels = 1;
srv_desc_out.Texture2D.PlaneSlice = 0;
srv_desc_out.Texture2D.ResourceMinLODClamp = 0.0f;
// Only texture->key, not the result of BindingInfoFromFetchConstant, contains
// whether the texture is scaled.
key = texture->key();
if (width_unscaled_out) {
*width_unscaled_out = key.GetWidth();
}
if (height_unscaled_out) {
*height_unscaled_out = key.GetHeight();
}
format_out = key.format;
return texture_resource;
}
D3D12TextureCache::D3D12Texture::D3D12Texture(D3D12TextureCache& texture_cache,
const TextureKey& key, ID3D12Resource* resource,
D3D12_RESOURCE_STATES resource_state,
bool track_usage)
: Texture(texture_cache, key, track_usage),
resource_(resource),
resource_state_(resource_state) {
ID3D12Device* device = texture_cache.command_processor_.GetD3D12Provider().GetDevice();
D3D12_RESOURCE_DESC resource_desc = resource_->GetDesc();
SetHostMemoryUsage(device->GetResourceAllocationInfo(0, 1, &resource_desc).SizeInBytes);
}
D3D12TextureCache::D3D12Texture::~D3D12Texture() {
auto& d3d12_texture_cache = static_cast<D3D12TextureCache&>(texture_cache());
for (const auto& descriptor_pair : srv_descriptors_) {
d3d12_texture_cache.ReleaseTextureDescriptor(descriptor_pair.second);
}
}
bool D3D12TextureCache::IsDecompressionNeeded(xenos::TextureFormat format, uint32_t width,
uint32_t height) const {
DXGI_FORMAT dxgi_format_uncompressed = host_formats_[uint32_t(format)].dxgi_format_uncompressed;
if (dxgi_format_uncompressed == DXGI_FORMAT_UNKNOWN) {
return false;
}
const FormatInfo* format_info = FormatInfo::Get(format);
if (!(width & (format_info->block_width - 1)) && !(height & (format_info->block_height - 1))) {
return false;
}
// UnalignedBlockTexturesSupported is for block-compressed textures with the
// block size of 4x4, but not for 2x1 (4:2:2) subsampled formats.
if (format_info->block_width == 4 && format_info->block_height == 4 &&
command_processor_.GetD3D12Provider().AreUnalignedBlockTexturesSupported()) {
return false;
}
return true;
}
TextureCache::LoadShaderIndex D3D12TextureCache::GetLoadShaderIndex(TextureKey key) const {
const HostFormat& host_format = host_formats_[uint32_t(key.format)];
if (key.signed_separate) {
return host_format.load_shader_signed;
}
if (IsDecompressionNeeded(key.format, key.GetWidth(), key.GetHeight())) {
return host_format.load_shader_decompress;
}
return host_format.load_shader;
}
bool D3D12TextureCache::IsSignedVersionSeparateForFormat(TextureKey key) const {
const HostFormat& host_format = host_formats_[uint32_t(key.format)];
return host_format.load_shader_signed != kLoadShaderIndexUnknown &&
host_format.load_shader_signed != host_format.load_shader;
}
bool D3D12TextureCache::IsScaledResolveSupportedForFormat(TextureKey key) const {
LoadShaderIndex load_shader = GetLoadShaderIndex(key);
return load_shader != kLoadShaderIndexUnknown && load_pipelines_scaled_[load_shader] != nullptr;
}
uint32_t D3D12TextureCache::GetHostFormatSwizzle(TextureKey key) const {
return host_formats_[uint32_t(key.format)].swizzle;
}
uint32_t D3D12TextureCache::GetMaxHostTextureWidthHeight(xenos::DataDimension dimension) const {
switch (dimension) {
case xenos::DataDimension::k1D:
case xenos::DataDimension::k2DOrStacked:
// 1D and 2D are emulated as 2D arrays.
return D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION;
case xenos::DataDimension::k3D:
return D3D12_REQ_TEXTURE3D_U_V_OR_W_DIMENSION;
case xenos::DataDimension::kCube:
return D3D12_REQ_TEXTURECUBE_DIMENSION;
default:
assert_unhandled_case(dimension);
return 0;
}
}
uint32_t D3D12TextureCache::GetMaxHostTextureDepthOrArraySize(
xenos::DataDimension dimension) const {
switch (dimension) {
case xenos::DataDimension::k1D:
case xenos::DataDimension::k2DOrStacked:
// 1D and 2D are emulated as 2D arrays.
return D3D12_REQ_TEXTURE2D_ARRAY_AXIS_DIMENSION;
case xenos::DataDimension::k3D:
return D3D12_REQ_TEXTURE3D_U_V_OR_W_DIMENSION;
case xenos::DataDimension::kCube:
return D3D12_REQ_TEXTURE2D_ARRAY_AXIS_DIMENSION / 6 * 6;
default:
assert_unhandled_case(dimension);
return 0;
}
}
std::unique_ptr<TextureCache::Texture> D3D12TextureCache::CreateTexture(TextureKey key) {
D3D12_RESOURCE_DESC desc;
desc.Format = GetDXGIResourceFormat(key);
if (desc.Format == DXGI_FORMAT_UNKNOWN) {
unsupported_format_features_used_[uint32_t(key.format)] |= kUnsupportedResourceBit;
return nullptr;
}
if (key.dimension == xenos::DataDimension::k3D) {
desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE3D;
} else {
// 1D textures are treated as 2D for simplicity.
desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
}
desc.Alignment = 0;
desc.Width = key.GetWidth();
desc.Height = key.GetHeight();
if (key.scaled_resolve) {
desc.Width *= draw_resolution_scale_x();
desc.Height *= draw_resolution_scale_y();
}
desc.DepthOrArraySize = key.GetDepthOrArraySize();
desc.MipLevels = key.mip_max_level + 1;
desc.SampleDesc.Count = 1;
desc.SampleDesc.Quality = 0;
desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
// Untiling through a buffer instead of using unordered access because copying
// is not done that often.
desc.Flags = D3D12_RESOURCE_FLAG_NONE;
const ui::d3d12::D3D12Provider& provider = command_processor_.GetD3D12Provider();
ID3D12Device* device = provider.GetDevice();
// Assuming untiling will be the next operation.
D3D12_RESOURCE_STATES resource_state = D3D12_RESOURCE_STATE_COPY_DEST;
Microsoft::WRL::ComPtr<ID3D12Resource> resource;
if (FAILED(device->CreateCommittedResource(&ui::d3d12::util::kHeapPropertiesDefault,
provider.GetHeapFlagCreateNotZeroed(), &desc,
resource_state, nullptr, IID_PPV_ARGS(&resource)))) {
return nullptr;
}
return std::unique_ptr<Texture>(new D3D12Texture(*this, key, resource.Get(), resource_state));
}
bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture, bool load_base,
bool load_mips) {
D3D12Texture& d3d12_texture = static_cast<D3D12Texture&>(texture);
TextureKey texture_key = d3d12_texture.key();
DeferredCommandList& command_list = command_processor_.GetDeferredCommandList();
ID3D12Device* device = command_processor_.GetD3D12Provider().GetDevice();
// Get the pipeline.
LoadShaderIndex load_shader = GetLoadShaderIndex(texture_key);
if (load_shader == kLoadShaderIndexUnknown) {
return false;
}
bool host_format_is_signed =
texture_key.signed_separate && IsSignedVersionSeparateForFormat(texture_key);
DXGI_FORMAT host_sample_format = host_format_is_signed
? host_formats_[uint32_t(texture_key.format)].dxgi_format_signed
: GetDXGIUnormFormat(texture_key);
LoadShaderIndex load_shader_float_convert = kLoadShaderIndexUnknown;
if (host_sample_format == DXGI_FORMAT_R16G16B16A16_FLOAT) {
switch (load_shader) {
case kLoadShaderIndexR10G11B11ToRGBA16:
case kLoadShaderIndexR11G11B10ToRGBA16:
load_shader_float_convert = kLoadShaderIndexRGBA16UNormToFloat;
break;
case kLoadShaderIndexR10G11B11ToRGBA16SNorm:
case kLoadShaderIndexR11G11B10ToRGBA16SNorm:
load_shader_float_convert = kLoadShaderIndexRGBA16SNormToFloat;
break;
default:
break;
}
}
bool texture_resolution_scaled = texture_key.scaled_resolve;
ID3D12PipelineState* pipeline = texture_resolution_scaled
? load_pipelines_scaled_[load_shader].Get()
: load_pipelines_[load_shader].Get();
if (pipeline == nullptr) {
return false;
}
const LoadShaderInfo& load_shader_info = GetLoadShaderInfo(load_shader);
const LoadShaderInfo* load_shader_info_float_convert = nullptr;
ID3D12PipelineState* pipeline_float_convert = nullptr;
if (load_shader_float_convert != kLoadShaderIndexUnknown) {
pipeline_float_convert = texture_resolution_scaled
? load_pipelines_scaled_[load_shader_float_convert].Get()
: load_pipelines_[load_shader_float_convert].Get();
if (pipeline_float_convert == nullptr) {
return false;
}
load_shader_info_float_convert = &GetLoadShaderInfo(load_shader_float_convert);
}
// Get the guest layout.
const texture_util::TextureGuestLayout& guest_layout = d3d12_texture.guest_layout();
xenos::DataDimension dimension = texture_key.dimension;
bool is_3d = dimension == xenos::DataDimension::k3D;
bool is_3d_tiling = is_3d || d3d12_texture.force_load_3d_tiling();
uint32_t width = texture_key.GetWidth();
uint32_t height = texture_key.GetHeight();
uint32_t depth_or_array_size = texture_key.GetDepthOrArraySize();
uint32_t depth = is_3d ? depth_or_array_size : 1;
uint32_t array_size = is_3d ? 1 : depth_or_array_size;
xenos::TextureFormat guest_format = texture_key.format;
const FormatInfo* guest_format_info = FormatInfo::Get(guest_format);
uint32_t block_width = guest_format_info->block_width;
uint32_t block_height = guest_format_info->block_height;
uint32_t bytes_per_block = guest_format_info->bytes_per_block();
uint32_t level_first = load_base ? 0 : 1;
uint32_t level_last = load_mips ? texture_key.mip_max_level : 0;
assert_true(level_first <= level_last);
uint32_t level_packed = guest_layout.packed_level;
uint32_t level_stored_first = std::min(level_first, level_packed);
uint32_t level_stored_last = std::min(level_last, level_packed);
uint32_t texture_resolution_scale_x = texture_resolution_scaled ? draw_resolution_scale_x() : 1;
uint32_t texture_resolution_scale_y = texture_resolution_scaled ? draw_resolution_scale_y() : 1;
// The loop counter can mean two things depending on whether the packed mip
// tail is stored as mip 0, because in this case, it would be ambiguous since
// both the base and the mips would be on "level 0", but stored in separate
// places.
uint32_t loop_level_first, loop_level_last;
if (level_packed == 0) {
// Packed mip tail is the level 0 - may need to load mip tails for the base,
// the mips, or both.
// Loop iteration 0 - base packed mip tail.
// Loop iteration 1 - mips packed mip tail.
loop_level_first = uint32_t(level_first != 0);
loop_level_last = uint32_t(level_last != 0);
} else {
// Packed mip tail is not the level 0.
// Loop iteration is the actual level being loaded.
loop_level_first = level_stored_first;
loop_level_last = level_stored_last;
}
// Get the host layout and the buffer.
bool host_block_compressed = host_formats_[uint32_t(guest_format)].is_block_compressed &&
!IsDecompressionNeeded(guest_format, width, height);
uint32_t host_block_width = host_block_compressed ? block_width : 1;
uint32_t host_block_height = host_block_compressed ? block_height : 1;
uint32_t host_x_blocks_per_thread = UINT32_C(1)
<< load_shader_info.guest_x_blocks_per_thread_log2;
if (!host_block_compressed) {
// Decompressing guest blocks.
host_x_blocks_per_thread *= block_width;
}
UINT64 copy_buffer_size = 0;
D3D12_PLACED_SUBRESOURCE_FOOTPRINT host_slice_layout_base;
UINT64 host_slice_size_base;
// Indexing is the same as for guest stored mips:
// 1...min(level_last, level_packed) if level_packed is not 0, or only 0 if
// level_packed == 0.
D3D12_PLACED_SUBRESOURCE_FOOTPRINT
host_slice_layouts_mips[xenos::kTextureMaxMips];
UINT64 host_slice_sizes_mips[xenos::kTextureMaxMips];
// Using custom calculations instead of GetCopyableFootprints because
// shaders may unconditionally copy multiple blocks along X per thread for
// simplicity, to make sure all rows (also including the last one -
// GetCopyableFootprints aligns row offsets, but not the total size) are
// properly padded to the number of blocks copied in an invocation without
// implicit assumptions about D3D12_TEXTURE_DATA_PITCH_ALIGNMENT.
DXGI_FORMAT host_copy_format = GetDXGIResourceFormat(guest_format, width, height);
for (uint32_t loop_level = loop_level_first; loop_level <= loop_level_last; ++loop_level) {
bool is_base = loop_level == 0;
uint32_t level = (level_packed == 0) ? 0 : loop_level;
D3D12_PLACED_SUBRESOURCE_FOOTPRINT& level_host_slice_layout =
is_base ? host_slice_layout_base : host_slice_layouts_mips[level];
level_host_slice_layout.Offset = copy_buffer_size;
level_host_slice_layout.Footprint.Format = host_copy_format;
if (level == level_packed) {
// Loading the packed tail for the base or the mips - load the whole tail
// to copy regions out of it.
const texture_util::TextureGuestLayout::Level& guest_layout_packed =
is_base ? guest_layout.base : guest_layout.mips[level];
level_host_slice_layout.Footprint.Width = guest_layout_packed.x_extent_blocks * block_width;
level_host_slice_layout.Footprint.Height = guest_layout_packed.y_extent_blocks * block_height;
level_host_slice_layout.Footprint.Depth = guest_layout_packed.z_extent;
} else {
level_host_slice_layout.Footprint.Width = std::max(width >> level, uint32_t(1));
level_host_slice_layout.Footprint.Height = std::max(height >> level, uint32_t(1));
level_host_slice_layout.Footprint.Depth = std::max(depth >> level, uint32_t(1));
}
level_host_slice_layout.Footprint.Width =
rex::round_up(level_host_slice_layout.Footprint.Width * texture_resolution_scale_x,
UINT(host_block_width));
level_host_slice_layout.Footprint.Height =
rex::round_up(level_host_slice_layout.Footprint.Height * texture_resolution_scale_y,
UINT(host_block_height));
level_host_slice_layout.Footprint.RowPitch =
rex::align(rex::round_up(level_host_slice_layout.Footprint.Width / host_block_width,
host_x_blocks_per_thread) *
load_shader_info.bytes_per_host_block,
uint32_t(D3D12_TEXTURE_DATA_PITCH_ALIGNMENT));
UINT64 level_host_slice_size =
rex::align(UINT64(level_host_slice_layout.Footprint.RowPitch) *
(level_host_slice_layout.Footprint.Height / host_block_height) *
level_host_slice_layout.Footprint.Depth,
UINT64(D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT));
(is_base ? host_slice_size_base : host_slice_sizes_mips[level]) = level_host_slice_size;
copy_buffer_size += level_host_slice_size * array_size;
}
D3D12_RESOURCE_STATES copy_buffer_state = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
ID3D12Resource* copy_buffer =
command_processor_.RequestScratchGPUBuffer(uint32_t(copy_buffer_size), copy_buffer_state);
if (copy_buffer == nullptr) {
return false;
}
// Begin loading.
// May use different buffers for scaled base and mips, and also addressability
// of more than 128 * 2^20 (2^D3D12_REQ_BUFFER_RESOURCE_TEXEL_COUNT_2_TO_EXP)
// texels is not mandatory - need two separate UAV descriptors for base and
// mips.
// Destination.
uint32_t descriptor_count = 1;
if (texture_resolution_scaled) {
// Source - base and mips, one or both.
descriptor_count += (level_first == 0 && level_last != 0) ? 2 : 1;
} else {
// Source - shared memory.
if (!bindless_resources_used_) {
++descriptor_count;
}
}
if (pipeline_float_convert != nullptr) {
++descriptor_count;
}
std::array<ui::d3d12::util::DescriptorCpuGpuHandlePair, 4> descriptors_allocated;
if (!command_processor_.RequestOneUseSingleViewDescriptors(descriptor_count,
descriptors_allocated.data())) {
command_processor_.ReleaseScratchGPUBuffer(copy_buffer, copy_buffer_state);
return false;
}
uint32_t descriptor_write_index = 0;
command_processor_.SetExternalPipeline(pipeline);
command_list.D3DSetComputeRootSignature(load_root_signature_.Get());
// Set up the destination descriptor.
assert_true(descriptor_write_index < descriptor_count);
ui::d3d12::util::DescriptorCpuGpuHandlePair descriptor_dest =
descriptors_allocated[descriptor_write_index++];
ui::d3d12::util::CreateBufferTypedUAV(
device, descriptor_dest.first, copy_buffer,
ui::d3d12::util::GetUintPow2DXGIFormat(load_shader_info.dest_bpe_log2),
uint32_t(copy_buffer_size) >> load_shader_info.dest_bpe_log2);
command_list.D3DSetComputeRootDescriptorTable(2, descriptor_dest.second);
// Set up the unscaled source descriptor (scaled needs two descriptors that
// depend on the buffer being current, so they will be set later - for mips,
// after loading the base is done).
if (!texture_resolution_scaled) {
D3D12SharedMemory& d3d12_shared_memory = static_cast<D3D12SharedMemory&>(shared_memory());
d3d12_shared_memory.UseForReading();
ui::d3d12::util::DescriptorCpuGpuHandlePair descriptor_unscaled_source;
if (bindless_resources_used_) {
descriptor_unscaled_source = command_processor_.GetSharedMemoryUintPow2BindlessSRVHandlePair(
load_shader_info.source_bpe_log2);
} else {
assert_true(descriptor_write_index < descriptor_count);
descriptor_unscaled_source = descriptors_allocated[descriptor_write_index++];
d3d12_shared_memory.WriteUintPow2SRVDescriptor(descriptor_unscaled_source.first,
load_shader_info.source_bpe_log2);
}
command_list.D3DSetComputeRootDescriptorTable(1, descriptor_unscaled_source.second);
}
ui::d3d12::util::DescriptorCpuGpuHandlePair descriptor_float_convert_source = {};
if (pipeline_float_convert != nullptr) {
assert_true(descriptor_write_index < descriptor_count);
descriptor_float_convert_source = descriptors_allocated[descriptor_write_index++];
}
// Submit the copy buffer population commands.
auto& cbuffer_pool = command_processor_.GetConstantBufferPool();
LoadConstants load_constants;
// 3 bits for each.
assert_true(texture_resolution_scale_x <= 7);
assert_true(texture_resolution_scale_y <= 7);
load_constants.is_tiled_3d_endian_scale =
uint32_t(texture_key.tiled) | (uint32_t(is_3d_tiling) << 1) |
(uint32_t(texture_key.endianness) << 2) | (texture_resolution_scale_x << 4) |
(texture_resolution_scale_y << 7);
// The loop is slices within levels because the base and the levels may need
// different portions of the scaled resolve virtual address space to be
// available through buffers, and to create a descriptor, the buffer start
// address is required - which may be different for base and mips.
bool scaled_mips_source_set_up = false;
uint32_t guest_x_blocks_per_group_log2 = load_shader_info.GetGuestXBlocksPerGroupLog2();
for (uint32_t loop_level = loop_level_first; loop_level <= loop_level_last; ++loop_level) {
bool is_base = loop_level == 0;
uint32_t level = (level_packed == 0) ? 0 : loop_level;
uint32_t guest_address = (is_base ? texture_key.base_page : texture_key.mip_page) << 12;
// Set up the base or mips source, also making it accessible if loading from
// scaled resolve memory.
if (texture_resolution_scaled && (is_base || !scaled_mips_source_set_up)) {
uint32_t guest_size_unscaled =
is_base ? d3d12_texture.GetGuestBaseSize() : d3d12_texture.GetGuestMipsSize();
if (!MakeScaledResolveRangeCurrent(guest_address, guest_size_unscaled,
load_shader_info.source_bpe_log2)) {
command_processor_.ReleaseScratchGPUBuffer(copy_buffer, copy_buffer_state);
return false;
}
TransitionCurrentScaledResolveRange(D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE);
assert_true(descriptor_write_index < descriptor_count);
ui::d3d12::util::DescriptorCpuGpuHandlePair descriptor_scaled_source =
descriptors_allocated[descriptor_write_index++];
CreateCurrentScaledResolveRangeUintPow2SRV(descriptor_scaled_source.first,
load_shader_info.source_bpe_log2);
command_list.D3DSetComputeRootDescriptorTable(1, descriptor_scaled_source.second);
if (!is_base) {
scaled_mips_source_set_up = true;
}
}
if (texture_resolution_scaled) {
// Offset already applied in the buffer because more than 512 MB can't be
// directly addresses as R32 on some hardware (above
// 2^D3D12_REQ_BUFFER_RESOURCE_TEXEL_COUNT_2_TO_EXP).
load_constants.guest_offset = 0;
} else {
load_constants.guest_offset = guest_address;
}
if (!is_base) {
load_constants.guest_offset += guest_layout.mip_offsets_bytes[level] *
(texture_resolution_scale_x * texture_resolution_scale_y);
}
const texture_util::TextureGuestLayout::Level& level_guest_layout =
is_base ? guest_layout.base : guest_layout.mips[level];
uint32_t level_guest_pitch = level_guest_layout.row_pitch_bytes;
if (texture_key.tiled) {
// Shaders expect pitch in blocks for tiled textures.
level_guest_pitch /= bytes_per_block;
assert_zero(level_guest_pitch & (xenos::kTextureTileWidthHeight - 1));
}
load_constants.guest_pitch_aligned = level_guest_pitch;
load_constants.guest_z_stride_block_rows_aligned = level_guest_layout.z_slice_stride_block_rows;
assert_true(!is_3d_tiling || !(load_constants.guest_z_stride_block_rows_aligned &
(xenos::kTextureTileWidthHeight - 1)));
uint32_t level_width, level_height, level_depth;
if (level == level_packed) {
// This is the packed mip tail, containing not only the specified level,
// but also other levels at different offsets - load the entire needed
// extents.
level_width = level_guest_layout.x_extent_blocks * block_width;
level_height = level_guest_layout.y_extent_blocks * block_height;
level_depth = level_guest_layout.z_extent;
} else {
level_width = std::max(width >> level, uint32_t(1));
level_height = std::max(height >> level, uint32_t(1));
level_depth = std::max(depth >> level, uint32_t(1));
}
load_constants.size_blocks[0] =
(level_width + (block_width - 1)) / block_width * texture_resolution_scale_x;
load_constants.size_blocks[1] =
(level_height + (block_height - 1)) / block_height * texture_resolution_scale_y;
load_constants.size_blocks[2] = level_depth;
load_constants.height_texels = level_height;
uint32_t group_count_x =
(load_constants.size_blocks[0] + ((UINT32_C(1) << guest_x_blocks_per_group_log2) - 1)) >>
guest_x_blocks_per_group_log2;
uint32_t group_count_y =
(load_constants.size_blocks[1] + ((UINT32_C(1) << kLoadGuestYBlocksPerGroupLog2) - 1)) >>
kLoadGuestYBlocksPerGroupLog2;
const D3D12_PLACED_SUBRESOURCE_FOOTPRINT& level_host_slice_layout =
is_base ? host_slice_layout_base : host_slice_layouts_mips[level];
uint32_t host_slice_size =
uint32_t(is_base ? host_slice_size_base : host_slice_sizes_mips[level]);
load_constants.host_offset = uint32_t(level_host_slice_layout.Offset);
load_constants.host_pitch = level_host_slice_layout.Footprint.RowPitch;
command_list.D3DSetComputeRoot32BitConstants(0, sizeof(load_constants) / sizeof(uint32_t),
&load_constants, 0);
uint32_t level_array_slice_stride_bytes_scaled =
level_guest_layout.array_slice_stride_bytes *
(texture_resolution_scale_x * texture_resolution_scale_y);
for (uint32_t slice = 0; slice < array_size; ++slice) {
if (slice != 0) {
command_list.D3DSetComputeRoot32BitConstants(
0, sizeof(load_constants.guest_offset) / sizeof(uint32_t), &load_constants.guest_offset,
offsetof(LoadConstants, guest_offset) / sizeof(uint32_t));
command_list.D3DSetComputeRoot32BitConstants(
0, sizeof(load_constants.host_offset) / sizeof(uint32_t), &load_constants.host_offset,
offsetof(LoadConstants, host_offset) / sizeof(uint32_t));
}
assert_true(copy_buffer_state == D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
command_processor_.SubmitBarriers();
command_list.D3DDispatch(group_count_x, group_count_y, load_constants.size_blocks[2]);
load_constants.guest_offset += level_array_slice_stride_bytes_scaled;
load_constants.host_offset += host_slice_size;
}
}
ID3D12Resource* copy_buffer_copy_source = copy_buffer;
D3D12_RESOURCE_STATES copy_buffer_copy_source_state = copy_buffer_state;
if (pipeline_float_convert != nullptr) {
static std::array<uint8_t, 64> float_fallback_log_mask = {};
uint8_t float_fallback_log_bit = host_format_is_signed ? uint8_t(1 << 1) : uint8_t(1 << 0);
uint8_t& float_fallback_log_state = float_fallback_log_mask[uint32_t(texture_key.format)];
if ((float_fallback_log_state & float_fallback_log_bit) == 0) {
float_fallback_log_state |= float_fallback_log_bit;
REXGPU_INFO("D3D12TextureCache: Using two-pass float fallback for format {} ({})",
FormatInfo::Get(texture_key.format)->name,
host_format_is_signed ? "signed" : "unsigned");
}
D3D12_RESOURCE_STATES copy_buffer_float_state = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
ID3D12Resource* copy_buffer_float = command_processor_.RequestScratchGPUBuffer(
uint32_t(copy_buffer_size), copy_buffer_float_state);
if (copy_buffer_float == nullptr) {
command_processor_.ReleaseScratchGPUBuffer(copy_buffer, copy_buffer_state);
return false;
}
command_processor_.PushTransitionBarrier(copy_buffer, copy_buffer_state,
D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE);
copy_buffer_state = D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
command_processor_.SubmitBarriers();
ui::d3d12::util::CreateBufferTypedSRV(
device, descriptor_float_convert_source.first, copy_buffer,
ui::d3d12::util::GetUintPow2DXGIFormat(load_shader_info_float_convert->source_bpe_log2),
uint32_t(copy_buffer_size) >> load_shader_info_float_convert->source_bpe_log2);
ui::d3d12::util::CreateBufferTypedUAV(
device, descriptor_dest.first, copy_buffer_float,
ui::d3d12::util::GetUintPow2DXGIFormat(load_shader_info_float_convert->dest_bpe_log2),
uint32_t(copy_buffer_size) >> load_shader_info_float_convert->dest_bpe_log2);
command_processor_.SetExternalPipeline(pipeline_float_convert);
command_list.D3DSetComputeRootSignature(load_root_signature_.Get());
command_list.D3DSetComputeRootDescriptorTable(1, descriptor_float_convert_source.second);
command_list.D3DSetComputeRootDescriptorTable(2, descriptor_dest.second);
LoadConstants load_constants_float_convert;
load_constants_float_convert.is_tiled_3d_endian_scale =
(uint32_t(is_3d) << 1) | (UINT32_C(1) << 4) | (UINT32_C(1) << 7);
uint32_t guest_x_blocks_per_group_log2_float_convert =
load_shader_info_float_convert->GetGuestXBlocksPerGroupLog2();
for (uint32_t loop_level = loop_level_first; loop_level <= loop_level_last; ++loop_level) {
bool is_base = loop_level == 0;
uint32_t level = (level_packed == 0) ? 0 : loop_level;
const texture_util::TextureGuestLayout::Level& level_guest_layout =
is_base ? guest_layout.base : guest_layout.mips[level];
uint32_t level_width, level_height, level_depth;
if (level == level_packed) {
level_width = level_guest_layout.x_extent_blocks * block_width;
level_height = level_guest_layout.y_extent_blocks * block_height;
level_depth = level_guest_layout.z_extent;
} else {
level_width = std::max(width >> level, uint32_t(1));
level_height = std::max(height >> level, uint32_t(1));
level_depth = std::max(depth >> level, uint32_t(1));
}
load_constants_float_convert.size_blocks[0] =
(level_width + (block_width - 1)) / block_width * texture_resolution_scale_x;
load_constants_float_convert.size_blocks[1] =
(level_height + (block_height - 1)) / block_height * texture_resolution_scale_y;
load_constants_float_convert.size_blocks[2] = level_depth;
load_constants_float_convert.height_texels = level_height;
uint32_t group_count_x =
(load_constants_float_convert.size_blocks[0] +
((UINT32_C(1) << guest_x_blocks_per_group_log2_float_convert) - 1)) >>
guest_x_blocks_per_group_log2_float_convert;
uint32_t group_count_y =
(load_constants_float_convert.size_blocks[1] +
((UINT32_C(1) << kLoadGuestYBlocksPerGroupLog2) - 1)) >>
kLoadGuestYBlocksPerGroupLog2;
const D3D12_PLACED_SUBRESOURCE_FOOTPRINT& level_host_slice_layout =
is_base ? host_slice_layout_base : host_slice_layouts_mips[level];
uint32_t host_slice_size =
uint32_t(is_base ? host_slice_size_base : host_slice_sizes_mips[level]);
load_constants_float_convert.guest_offset = uint32_t(level_host_slice_layout.Offset);
load_constants_float_convert.guest_pitch_aligned = level_host_slice_layout.Footprint.RowPitch;
load_constants_float_convert.guest_z_stride_block_rows_aligned =
level_host_slice_layout.Footprint.Height;
load_constants_float_convert.host_offset = uint32_t(level_host_slice_layout.Offset);
load_constants_float_convert.host_pitch = level_host_slice_layout.Footprint.RowPitch;
command_list.D3DSetComputeRoot32BitConstants(
0, sizeof(load_constants_float_convert) / sizeof(uint32_t),
&load_constants_float_convert, 0);
for (uint32_t slice = 0; slice < array_size; ++slice) {
if (slice != 0) {
command_list.D3DSetComputeRoot32BitConstants(
0, sizeof(load_constants_float_convert.guest_offset) / sizeof(uint32_t),
&load_constants_float_convert.guest_offset,
offsetof(LoadConstants, guest_offset) / sizeof(uint32_t));
command_list.D3DSetComputeRoot32BitConstants(
0, sizeof(load_constants_float_convert.host_offset) / sizeof(uint32_t),
&load_constants_float_convert.host_offset,
offsetof(LoadConstants, host_offset) / sizeof(uint32_t));
}
command_list.D3DDispatch(group_count_x, group_count_y,
load_constants_float_convert.size_blocks[2]);
load_constants_float_convert.guest_offset += host_slice_size;
load_constants_float_convert.host_offset += host_slice_size;
}
}
command_processor_.ReleaseScratchGPUBuffer(copy_buffer, copy_buffer_state);
copy_buffer_copy_source = copy_buffer_float;
copy_buffer_copy_source_state = copy_buffer_float_state;
}
// Update LRU caching because the texture will be used by the command list.
d3d12_texture.MarkAsUsed();
// Submit copying from the copy buffer to the host texture.
ID3D12Resource* texture_resource = d3d12_texture.resource();
command_processor_.PushTransitionBarrier(
texture_resource, d3d12_texture.SetResourceState(D3D12_RESOURCE_STATE_COPY_DEST),
D3D12_RESOURCE_STATE_COPY_DEST);
command_processor_.PushTransitionBarrier(copy_buffer_copy_source, copy_buffer_copy_source_state,
D3D12_RESOURCE_STATE_COPY_SOURCE);
copy_buffer_copy_source_state = D3D12_RESOURCE_STATE_COPY_SOURCE;
command_processor_.SubmitBarriers();
uint32_t texture_level_count = texture_key.mip_max_level + 1;
D3D12_TEXTURE_COPY_LOCATION location_source, location_dest;
location_source.pResource = copy_buffer_copy_source;
location_source.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
location_dest.pResource = texture_resource;
location_dest.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
for (uint32_t level = level_first; level <= level_last; ++level) {
uint32_t guest_level = std::min(level, level_packed);
location_source.PlacedFootprint =
level ? host_slice_layouts_mips[guest_level] : host_slice_layout_base;
location_dest.SubresourceIndex = level;
UINT64 host_slice_size = level ? host_slice_sizes_mips[guest_level] : host_slice_size_base;
D3D12_BOX source_box;
const D3D12_BOX* source_box_ptr;
if (level >= level_packed) {
uint32_t level_offset_blocks_x, level_offset_blocks_y, level_offset_z;
texture_util::GetPackedMipOffset(width, height, depth, guest_format, level,
level_offset_blocks_x, level_offset_blocks_y,
level_offset_z);
source_box.left = level_offset_blocks_x * block_width * texture_resolution_scale_x;
source_box.top = level_offset_blocks_y * block_height * texture_resolution_scale_y;
source_box.front = level_offset_z;
source_box.right =
source_box.left +
rex::align(std::max((width * texture_resolution_scale_x) >> level, uint32_t(1)),
host_block_width);
source_box.bottom =
source_box.top +
rex::align(std::max((height * texture_resolution_scale_y) >> level, uint32_t(1)),
host_block_height);
source_box.back = source_box.front + std::max(depth >> level, uint32_t(1));
source_box_ptr = &source_box;
} else {
source_box_ptr = nullptr;
}
for (uint32_t slice = 0; slice < array_size; ++slice) {
command_list.D3DCopyTextureRegion(&location_dest, 0, 0, 0, &location_source, source_box_ptr);
location_dest.SubresourceIndex += texture_level_count;
location_source.PlacedFootprint.Offset += host_slice_size;
}
}
command_processor_.ReleaseScratchGPUBuffer(copy_buffer_copy_source, copy_buffer_copy_source_state);
DXGI_FORMAT swap_format = host_format_is_signed
? host_formats_[uint32_t(texture_key.format)].dxgi_format_signed
: GetDXGIUnormFormat(texture_key);
if (swap_format != DXGI_FORMAT_UNKNOWN && texture_key.dimension != xenos::DataDimension::kCube) {
ScheduleTextureDump(d3d12_texture, swap_format);
ApplyTextureReplacement(d3d12_texture, swap_format);
}
return true;
}
void D3D12TextureCache::ProcessCompletedTextureTransfers() {
const uint64_t completed_submission = command_processor_.GetCompletedSubmission();
for (auto it = pending_upload_resources_.begin(); it != pending_upload_resources_.end();) {
if (it->submission_index > completed_submission) {
++it;
continue;
}
it = pending_upload_resources_.erase(it);
}
for (auto it = pending_texture_dumps_.begin(); it != pending_texture_dumps_.end();) {
if (it->submission_index > completed_submission) {
++it;
continue;
}
D3D12_RANGE read_range;
read_range.Begin = 0;
read_range.End = SIZE_T(it->total_size);
void* mapped = nullptr;
if (FAILED(it->readback_buffer->Map(0, &read_range, &mapped))) {
REXGPU_WARN("Texture swap dump {}: failed to map readback buffer", it->stable_key);
it = pending_texture_dumps_.erase(it);
continue;
}
ac6::textures::DdsImageData dds_image;
dds_image.format = it->dxgi_format;
dds_image.dimension = it->resource_dimension;
dds_image.width = it->width;
dds_image.height = it->height;
dds_image.depth_or_array_size = it->depth_or_array_size;
dds_image.mip_count = it->mip_count;
dds_image.is_cube = false;
dds_image.subresources.reserve(it->footprints.size());
bool build_failed = false;
for (size_t subresource_index = 0; subresource_index < it->footprints.size(); ++subresource_index) {
const uint32_t mip_index =
it->resource_dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D
? uint32_t(subresource_index)
: (uint32_t(subresource_index) % it->mip_count);
ac6::textures::DdsSubresource subresource;
subresource.width = std::max(it->width >> mip_index, 1u);
subresource.height = std::max(it->height >> mip_index, 1u);
subresource.depth = it->resource_dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D
? std::max(it->depth_or_array_size >> mip_index, 1u)
: 1u;
ac6::textures::TextureSubresourceLayout tight_layout = {};
if (!ac6::textures::GetTightTextureSubresourceLayout(
it->dxgi_format, subresource.width, subresource.height, tight_layout)) {
REXGPU_WARN("Texture swap dump {}: unsupported dump format {}",
it->stable_key, uint32_t(it->dxgi_format));
build_failed = true;
break;
}
subresource.row_pitch = tight_layout.row_pitch;
subresource.slice_pitch = tight_layout.slice_pitch;
subresource.data.resize(size_t(subresource.slice_pitch) * subresource.depth);
const uint8_t* source_base =
reinterpret_cast<const uint8_t*>(mapped) + it->footprints[subresource_index].Offset;
const uint32_t source_row_pitch = it->footprints[subresource_index].Footprint.RowPitch;
const uint32_t source_row_count = it->row_counts[subresource_index];
for (uint32_t z = 0; z < subresource.depth; ++z) {
const uint8_t* source_slice = source_base + size_t(z) * source_row_pitch * source_row_count;
uint8_t* dest_slice = subresource.data.data() + size_t(z) * subresource.slice_pitch;
for (uint32_t row = 0; row < tight_layout.row_count; ++row) {
std::memcpy(dest_slice + size_t(row) * subresource.row_pitch,
source_slice + size_t(row) * source_row_pitch, subresource.row_pitch);
}
}
dds_image.subresources.push_back(std::move(subresource));
}
it->readback_buffer->Unmap(0, nullptr);
if (build_failed) {
it = pending_texture_dumps_.erase(it);
continue;
}
ac6::textures::TextureDumpMetadata metadata;
metadata.stable_key = it->stable_key;
metadata.texture_key_hash = it->texture_key_hash;
metadata.base_page = it->base_page;
metadata.mip_page = it->mip_page;
metadata.dimension = it->guest_dimension;
metadata.width = it->width;
metadata.height = it->height;
metadata.depth_or_array_size = it->depth_or_array_size;
metadata.mip_count = it->mip_count;
metadata.guest_format = it->guest_format;
metadata.endianness = it->endianness;
metadata.dxgi_format = uint32_t(it->dxgi_format);
metadata.tiled = it->tiled;
metadata.packed_mips = it->packed_mips;
metadata.signed_separate = it->signed_separate;
metadata.scaled_resolve = it->scaled_resolve;
metadata.frame_index = it->frame_index;
metadata.signature_stable_id = it->signature_stable_id;
metadata.active_vertex_shader_hash = it->active_vertex_shader_hash;
metadata.active_pixel_shader_hash = it->active_pixel_shader_hash;
metadata.signature_tags = it->signature_tags;
std::string error;
if (!ac6::textures::WriteDdsToFile(ac6::textures::GetTextureDumpDdsPath(it->stable_key),
dds_image, &error)) {
REXGPU_WARN("Texture swap dump {}: failed to write DDS ({})", it->stable_key, error);
} else if (!ac6::textures::WriteDumpMetadata(
ac6::textures::GetTextureDumpMetadataPath(it->stable_key), metadata, &error)) {
REXGPU_WARN("Texture swap dump {}: failed to write metadata ({})", it->stable_key, error);
}
it = pending_texture_dumps_.erase(it);
}
}
bool D3D12TextureCache::ScheduleTextureDump(D3D12Texture& texture, DXGI_FORMAT dump_format) {
if (!ac6::textures::TextureDumpEnabled() || !ac6::textures::IsSupportedTextureSwapFormat(dump_format)) {
return false;
}
const TextureKey& key = texture.key();
const uint64_t texture_key_hash = XXH3_64bits(&key, sizeof(key));
const std::string stable_key = ac6::textures::BuildTextureStableKey(
texture_key_hash, key.base_page, key.mip_page, uint32_t(key.dimension), key.GetWidth(),
key.GetHeight(), key.GetDepthOrArraySize(), key.mip_max_level + 1, uint32_t(key.format),
uint32_t(key.endianness), key.tiled != 0, key.packed_mips != 0, key.signed_separate != 0,
key.scaled_resolve != 0);
if (dumped_texture_keys_.contains(stable_key) || ac6::textures::DumpExists(stable_key)) {
dumped_texture_keys_.insert(stable_key);
return false;
}
ID3D12Resource* texture_resource = texture.resource();
D3D12_RESOURCE_DESC resource_desc = texture_resource->GetDesc();
if (resource_desc.Dimension != D3D12_RESOURCE_DIMENSION_TEXTURE2D &&
resource_desc.Dimension != D3D12_RESOURCE_DIMENSION_TEXTURE3D) {
return false;
}
const uint32_t subresource_count =
resource_desc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D
? resource_desc.MipLevels
: resource_desc.MipLevels * resource_desc.DepthOrArraySize;
std::vector<D3D12_PLACED_SUBRESOURCE_FOOTPRINT> footprints(subresource_count);
std::vector<UINT> row_counts(subresource_count);
std::vector<UINT64> row_sizes(subresource_count);
UINT64 total_size = 0;
ID3D12Device* device = command_processor_.GetD3D12Provider().GetDevice();
device->GetCopyableFootprints(&resource_desc, 0, subresource_count, 0, footprints.data(),
row_counts.data(), row_sizes.data(), &total_size);
ID3D12Resource* readback_resource = command_processor_.RequestReadbackBuffer(uint32_t(total_size));
if (!readback_resource) {
return false;
}
const D3D12_RESOURCE_STATES previous_state = texture.SetResourceState(D3D12_RESOURCE_STATE_COPY_SOURCE);
if (previous_state != D3D12_RESOURCE_STATE_COPY_SOURCE) {
command_processor_.PushTransitionBarrier(texture_resource, previous_state,
D3D12_RESOURCE_STATE_COPY_SOURCE);
command_processor_.SubmitBarriers();
}
DeferredCommandList& command_list = command_processor_.GetDeferredCommandList();
for (uint32_t subresource_index = 0; subresource_index < subresource_count; ++subresource_index) {
D3D12_TEXTURE_COPY_LOCATION source = {};
source.pResource = texture_resource;
source.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
source.SubresourceIndex = subresource_index;
D3D12_TEXTURE_COPY_LOCATION dest = {};
dest.pResource = readback_resource;
dest.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
dest.PlacedFootprint = footprints[subresource_index];
command_list.D3DCopyTextureRegion(&dest, 0, 0, 0, &source, nullptr);
}
const ac6::backend::BackendDiagnosticsSnapshot diagnostics = ac6::backend::GetDiagnosticsSnapshot();
PendingTextureDump pending_dump;
pending_dump.submission_index = command_processor_.GetCurrentSubmission();
pending_dump.total_size = total_size;
pending_dump.texture_key_hash = texture_key_hash;
pending_dump.base_page = key.base_page;
pending_dump.mip_page = key.mip_page;
pending_dump.guest_dimension = uint32_t(key.dimension);
pending_dump.width = key.GetWidth();
pending_dump.height = key.GetHeight();
pending_dump.depth_or_array_size = key.GetDepthOrArraySize();
pending_dump.mip_count = key.mip_max_level + 1;
pending_dump.guest_format = uint32_t(key.format);
pending_dump.endianness = uint32_t(key.endianness);
pending_dump.dxgi_format = dump_format;
pending_dump.resource_dimension = resource_desc.Dimension;
pending_dump.tiled = key.tiled != 0;
pending_dump.packed_mips = key.packed_mips != 0;
pending_dump.signed_separate = key.signed_separate != 0;
pending_dump.scaled_resolve = key.scaled_resolve != 0;
pending_dump.frame_index = diagnostics.frame_index;
pending_dump.signature_stable_id = diagnostics.latest_signature.stable_id;
pending_dump.active_vertex_shader_hash = diagnostics.active_vertex_shader_hash;
pending_dump.active_pixel_shader_hash = diagnostics.active_pixel_shader_hash;
pending_dump.stable_key = stable_key;
pending_dump.signature_tags = diagnostics.latest_signature_tags;
pending_dump.readback_buffer = readback_resource;
pending_dump.footprints = std::move(footprints);
pending_dump.row_counts.reserve(row_counts.size());
for (UINT row_count : row_counts) {
pending_dump.row_counts.push_back(uint32_t(row_count));
}
pending_texture_dumps_.push_back(std::move(pending_dump));
dumped_texture_keys_.insert(stable_key);
return true;
}
bool D3D12TextureCache::ApplyTextureReplacement(D3D12Texture& texture, DXGI_FORMAT replacement_format) {
if (!ac6::textures::TextureReplacementEnabled() ||
!ac6::textures::IsSupportedTextureSwapFormat(replacement_format)) {
return false;
}
const TextureKey& key = texture.key();
const uint64_t texture_key_hash = XXH3_64bits(&key, sizeof(key));
const std::string stable_key = ac6::textures::BuildTextureStableKey(
texture_key_hash, key.base_page, key.mip_page, uint32_t(key.dimension), key.GetWidth(),
key.GetHeight(), key.GetDepthOrArraySize(), key.mip_max_level + 1, uint32_t(key.format),
uint32_t(key.endianness), key.tiled != 0, key.packed_mips != 0, key.signed_separate != 0,
key.scaled_resolve != 0);
const std::optional<std::filesystem::path> replacement_path =
ac6::textures::ResolveReplacementDdsPath(stable_key);
if (!replacement_path) {
return false;
}
ac6::textures::DdsImageData replacement;
std::string error;
if (!ac6::textures::LoadDdsFromFile(*replacement_path, replacement, &error)) {
if (replacement_warning_keys_.insert(stable_key).second) {
REXGPU_WARN("Texture swap {}: failed to load replacement {} ({})", stable_key,
replacement_path->string(), error);
}
return false;
}
ID3D12Resource* texture_resource = texture.resource();
const D3D12_RESOURCE_DESC resource_desc = texture_resource->GetDesc();
if (replacement.is_cube || replacement.format != replacement_format ||
replacement.dimension != resource_desc.Dimension || replacement.width != resource_desc.Width ||
replacement.height != resource_desc.Height ||
replacement.depth_or_array_size != resource_desc.DepthOrArraySize ||
replacement.mip_count != resource_desc.MipLevels) {
if (replacement_warning_keys_.insert(stable_key).second) {
REXGPU_WARN(
"Texture swap {}: replacement {} does not match expected format/layout (expected {} {}x{}x{} mips={})",
stable_key, replacement_path->string(), ac6::textures::DescribeDxgiFormat(replacement_format),
uint32_t(resource_desc.Width), resource_desc.Height, resource_desc.DepthOrArraySize,
resource_desc.MipLevels);
}
return false;
}
const uint32_t subresource_count =
resource_desc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D
? resource_desc.MipLevels
: resource_desc.MipLevels * resource_desc.DepthOrArraySize;
if (replacement.subresources.size() != subresource_count) {
if (replacement_warning_keys_.insert(stable_key).second) {
REXGPU_WARN("Texture swap {}: replacement {} has {} subresources, expected {}", stable_key,
replacement_path->string(), replacement.subresources.size(), subresource_count);
}
return false;
}
ID3D12Device* device = command_processor_.GetD3D12Provider().GetDevice();
std::vector<D3D12_PLACED_SUBRESOURCE_FOOTPRINT> footprints(subresource_count);
std::vector<UINT> row_counts(subresource_count);
std::vector<UINT64> row_sizes(subresource_count);
UINT64 upload_size = 0;
device->GetCopyableFootprints(&resource_desc, 0, subresource_count, 0, footprints.data(),
row_counts.data(), row_sizes.data(), &upload_size);
D3D12_RESOURCE_DESC upload_desc = {};
upload_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
upload_desc.Alignment = 0;
upload_desc.Width = upload_size;
upload_desc.Height = 1;
upload_desc.DepthOrArraySize = 1;
upload_desc.MipLevels = 1;
upload_desc.Format = DXGI_FORMAT_UNKNOWN;
upload_desc.SampleDesc.Count = 1;
upload_desc.SampleDesc.Quality = 0;
upload_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
upload_desc.Flags = D3D12_RESOURCE_FLAG_NONE;
Microsoft::WRL::ComPtr<ID3D12Resource> upload_buffer;
if (FAILED(device->CreateCommittedResource(&ui::d3d12::util::kHeapPropertiesUpload,
command_processor_.GetD3D12Provider().GetHeapFlagCreateNotZeroed(),
&upload_desc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr,
IID_PPV_ARGS(&upload_buffer)))) {
if (replacement_warning_keys_.insert(stable_key).second) {
REXGPU_WARN("Texture swap {}: failed to create upload buffer for {}", stable_key,
replacement_path->string());
}
return false;
}
D3D12_RANGE no_read_range = {};
void* mapped_upload = nullptr;
if (FAILED(upload_buffer->Map(0, &no_read_range, &mapped_upload))) {
if (replacement_warning_keys_.insert(stable_key).second) {
REXGPU_WARN("Texture swap {}: failed to map upload buffer for {}", stable_key,
replacement_path->string());
}
return false;
}
for (uint32_t subresource_index = 0; subresource_index < subresource_count; ++subresource_index) {
const ac6::textures::DdsSubresource& subresource = replacement.subresources[subresource_index];
const uint8_t* source_base = subresource.data.data();
uint8_t* dest_base = reinterpret_cast<uint8_t*>(mapped_upload) + footprints[subresource_index].Offset;
const uint32_t dest_row_pitch = footprints[subresource_index].Footprint.RowPitch;
for (uint32_t z = 0; z < subresource.depth; ++z) {
const uint8_t* source_slice = source_base + size_t(z) * subresource.slice_pitch;
uint8_t* dest_slice = dest_base + size_t(z) * dest_row_pitch * row_counts[subresource_index];
for (uint32_t row = 0; row < row_counts[subresource_index]; ++row) {
std::memcpy(dest_slice + size_t(row) * dest_row_pitch,
source_slice + size_t(row) * subresource.row_pitch, subresource.row_pitch);
}
}
}
upload_buffer->Unmap(0, nullptr);
const D3D12_RESOURCE_STATES previous_state = texture.SetResourceState(D3D12_RESOURCE_STATE_COPY_DEST);
if (previous_state != D3D12_RESOURCE_STATE_COPY_DEST) {
command_processor_.PushTransitionBarrier(texture_resource, previous_state,
D3D12_RESOURCE_STATE_COPY_DEST);
command_processor_.SubmitBarriers();
}
DeferredCommandList& command_list = command_processor_.GetDeferredCommandList();
for (uint32_t subresource_index = 0; subresource_index < subresource_count; ++subresource_index) {
D3D12_TEXTURE_COPY_LOCATION source = {};
source.pResource = upload_buffer.Get();
source.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
source.PlacedFootprint = footprints[subresource_index];
D3D12_TEXTURE_COPY_LOCATION dest = {};
dest.pResource = texture_resource;
dest.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
dest.SubresourceIndex = subresource_index;
command_list.D3DCopyTextureRegion(&dest, 0, 0, 0, &source, nullptr);
}
pending_upload_resources_.push_back(
PendingUploadResource{command_processor_.GetCurrentSubmission(), upload_buffer});
replacement_warning_keys_.erase(stable_key);
return true;
}
void D3D12TextureCache::UpdateTextureBindingsImpl(uint32_t fetch_constant_mask) {
uint32_t bindings_remaining = fetch_constant_mask;
uint32_t binding_index;
while (rex::bit_scan_forward(bindings_remaining, &binding_index)) {
bindings_remaining &= ~(UINT32_C(1) << binding_index);
D3D12TextureBinding& d3d12_binding = d3d12_texture_bindings_[binding_index];
d3d12_binding.Reset();
const TextureBinding* binding = GetValidTextureBinding(binding_index);
if (!binding) {
continue;
}
if (IsSignedVersionSeparateForFormat(binding->key)) {
if (binding->texture && texture_util::IsAnySignNotSigned(binding->swizzled_signs)) {
d3d12_binding.descriptor_index =
FindOrCreateTextureDescriptor(*static_cast<D3D12Texture*>(binding->texture),
binding->key.dimension, false, binding->host_swizzle);
}
if (binding->texture_signed && texture_util::IsAnySignSigned(binding->swizzled_signs)) {
d3d12_binding.descriptor_index_signed =
FindOrCreateTextureDescriptor(*static_cast<D3D12Texture*>(binding->texture_signed),
binding->key.dimension, true, binding->host_swizzle);
}
} else {
D3D12Texture* texture = static_cast<D3D12Texture*>(binding->texture);
if (texture) {
if (texture_util::IsAnySignNotSigned(binding->swizzled_signs)) {
d3d12_binding.descriptor_index = FindOrCreateTextureDescriptor(
*texture, binding->key.dimension, false, binding->host_swizzle);
}
if (texture_util::IsAnySignSigned(binding->swizzled_signs)) {
d3d12_binding.descriptor_index_signed = FindOrCreateTextureDescriptor(
*texture, binding->key.dimension, true, binding->host_swizzle);
}
}
}
}
}
ID3D12Resource* D3D12TextureCache::D3D12Texture::GetOrCreate3DAs2DResource(
D3D12_RESOURCE_STATES end_state) {
if (!REXCVAR_GET(gpu_3d_to_2d_texture)) {
return nullptr;
}
auto& d3d12_cache = static_cast<D3D12TextureCache&>(texture_cache());
if (texture_3d_as_2d_) {
d3d12_cache.command_processor_.PushTransitionBarrier(
texture_3d_as_2d_->resource(), texture_3d_as_2d_->SetResourceState(end_state), end_state);
return texture_3d_as_2d_->resource();
}
const ui::d3d12::D3D12Provider& provider = d3d12_cache.command_processor_.GetD3D12Provider();
ID3D12Device* device = provider.GetDevice();
D3D12_RESOURCE_DESC desc = {};
desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
desc.Alignment = 0;
desc.Width = key().GetWidth();
desc.Height = key().GetHeight();
desc.DepthOrArraySize = 1;
desc.MipLevels = 1;
desc.Format = d3d12_cache.GetDXGIResourceFormat(key());
if (desc.Format == DXGI_FORMAT_UNKNOWN) {
return nullptr;
}
desc.SampleDesc.Count = 1;
desc.SampleDesc.Quality = 0;
desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
desc.Flags = D3D12_RESOURCE_FLAG_NONE;
D3D12_RESOURCE_STATES initial_state = D3D12_RESOURCE_STATE_COPY_DEST;
Microsoft::WRL::ComPtr<ID3D12Resource> resource_2d;
if (FAILED(device->CreateCommittedResource(&ui::d3d12::util::kHeapPropertiesDefault,
provider.GetHeapFlagCreateNotZeroed(), &desc,
initial_state, nullptr, IID_PPV_ARGS(&resource_2d)))) {
REXGPU_ERROR("D3D12TextureCache: Failed to create 3D-as-2D wrapper resource");
return nullptr;
}
TextureKey key_2d = key();
key_2d.depth_or_array_size_minus_1 = 0;
key_2d.mip_max_level = 0;
texture_3d_as_2d_.reset(
new D3D12Texture(d3d12_cache, key_2d, resource_2d.Get(), initial_state, false));
texture_3d_as_2d_->SetForceLoad3DTiling(true);
if (!d3d12_cache.LoadTextureData(*texture_3d_as_2d_)) {
REXGPU_ERROR("D3D12TextureCache: Failed to load 3D-as-2D wrapper data");
texture_3d_as_2d_.reset();
return nullptr;
}
d3d12_cache.command_processor_.PushTransitionBarrier(
texture_3d_as_2d_->resource(), texture_3d_as_2d_->SetResourceState(end_state), end_state);
return texture_3d_as_2d_->resource();
}
uint32_t D3D12TextureCache::FindOrCreateTextureDescriptor(D3D12Texture& texture,
xenos::DataDimension dimension,
bool is_signed, uint32_t host_swizzle) {
D3D12Texture::SRVDescriptorKey descriptor_key;
descriptor_key.key = 0;
descriptor_key.is_signed = uint32_t(is_signed);
descriptor_key.host_swizzle = host_swizzle;
descriptor_key.dimension = uint32_t(dimension);
// Try to find an existing descriptor.
uint32_t existing_descriptor_index = texture.GetSRVDescriptorIndex(descriptor_key);
if (existing_descriptor_index != UINT32_MAX) {
return existing_descriptor_index;
}
TextureKey texture_key = texture.key();
// Create a new bindless or cached descriptor if supported.
D3D12_SHADER_RESOURCE_VIEW_DESC desc = {};
if (IsSignedVersionSeparateForFormat(texture_key) &&
texture_key.signed_separate != uint32_t(is_signed)) {
// Not the version with the needed signedness.
return UINT32_MAX;
}
xenos::TextureFormat format = texture_key.format;
if (is_signed) {
// Not supporting signed compressed textures - hopefully DXN and DXT5A are
// not used as signed.
desc.Format = host_formats_[uint32_t(format)].dxgi_format_signed;
} else {
desc.Format = GetDXGIUnormFormat(texture_key);
}
if (desc.Format == DXGI_FORMAT_UNKNOWN) {
unsupported_format_features_used_[uint32_t(format)] |=
is_signed ? kUnsupportedSnormBit : kUnsupportedUnormBit;
return UINT32_MAX;
}
uint32_t mip_levels = texture_key.mip_max_level + 1;
ID3D12Resource* resource_for_view = texture.resource();
switch (dimension) {
case xenos::DataDimension::k3D:
desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D;
desc.Texture3D.MostDetailedMip = 0;
desc.Texture3D.MipLevels = mip_levels;
desc.Texture3D.ResourceMinLODClamp = 0.0f;
break;
case xenos::DataDimension::kCube:
desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBE;
desc.TextureCube.MostDetailedMip = 0;
desc.TextureCube.MipLevels = mip_levels;
desc.TextureCube.ResourceMinLODClamp = 0.0f;
break;
case xenos::DataDimension::k1D:
case xenos::DataDimension::k2DOrStacked:
desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY;
if (texture_key.dimension == xenos::DataDimension::k3D) {
resource_for_view =
texture.GetOrCreate3DAs2DResource(D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE |
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
if (!resource_for_view) {
return UINT32_MAX;
}
desc.Texture2DArray.MostDetailedMip = 0;
desc.Texture2DArray.MipLevels = 1;
desc.Texture2DArray.FirstArraySlice = 0;
desc.Texture2DArray.ArraySize = 1;
desc.Texture2DArray.PlaneSlice = 0;
desc.Texture2DArray.ResourceMinLODClamp = 0.0f;
} else {
desc.Texture2DArray.MostDetailedMip = 0;
desc.Texture2DArray.MipLevels = mip_levels;
desc.Texture2DArray.FirstArraySlice = 0;
desc.Texture2DArray.ArraySize = texture_key.GetDepthOrArraySize();
desc.Texture2DArray.PlaneSlice = 0;
desc.Texture2DArray.ResourceMinLODClamp = 0.0f;
}
break;
default:
assert_unhandled_case(dimension);
return UINT32_MAX;
}
desc.Shader4ComponentMapping =
host_swizzle | D3D12_SHADER_COMPONENT_MAPPING_ALWAYS_SET_BIT_AVOIDING_ZEROMEM_MISTAKES;
ID3D12Device* device = command_processor_.GetD3D12Provider().GetDevice();
uint32_t descriptor_index;
if (bindless_resources_used_) {
descriptor_index = command_processor_.RequestPersistentViewBindlessDescriptor();
if (descriptor_index == UINT32_MAX) {
REXGPU_ERROR(
"Failed to create a texture descriptor - no free bindless view "
"descriptors");
return UINT32_MAX;
}
} else {
if (!srv_descriptor_cache_free_.empty()) {
descriptor_index = srv_descriptor_cache_free_.back();
srv_descriptor_cache_free_.pop_back();
} else {
// Allocated + 1 (including the descriptor that is being added), rounded
// up to kSRVDescriptorCachePageSize, (allocated + 1 + size - 1).
uint32_t cache_pages_needed =
(srv_descriptor_cache_allocated_ + kSRVDescriptorCachePageSize) /
kSRVDescriptorCachePageSize;
if (srv_descriptor_cache_.size() < cache_pages_needed) {
D3D12_DESCRIPTOR_HEAP_DESC cache_heap_desc;
cache_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
cache_heap_desc.NumDescriptors = kSRVDescriptorCachePageSize;
cache_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE;
cache_heap_desc.NodeMask = 0;
while (srv_descriptor_cache_.size() < cache_pages_needed) {
Microsoft::WRL::ComPtr<ID3D12DescriptorHeap> cache_heap;
if (FAILED(device->CreateDescriptorHeap(&cache_heap_desc, IID_PPV_ARGS(&cache_heap)))) {
REXGPU_ERROR(
"D3D12TextureCache: Failed to create a texture descriptor - "
"couldn't create a descriptor cache heap");
return UINT32_MAX;
}
srv_descriptor_cache_.emplace_back(cache_heap.Get());
}
}
descriptor_index = srv_descriptor_cache_allocated_++;
}
}
device->CreateShaderResourceView(resource_for_view, &desc,
GetTextureDescriptorCPUHandle(descriptor_index));
texture.AddSRVDescriptorIndex(descriptor_key, descriptor_index);
return descriptor_index;
}
void D3D12TextureCache::ReleaseTextureDescriptor(uint32_t descriptor_index) {
if (bindless_resources_used_) {
command_processor_.ReleaseViewBindlessDescriptorImmediately(descriptor_index);
} else {
srv_descriptor_cache_free_.push_back(descriptor_index);
}
}
D3D12_CPU_DESCRIPTOR_HANDLE D3D12TextureCache::GetTextureDescriptorCPUHandle(
uint32_t descriptor_index) const {
const ui::d3d12::D3D12Provider& provider = command_processor_.GetD3D12Provider();
if (bindless_resources_used_) {
return provider.OffsetViewDescriptor(command_processor_.GetViewBindlessHeapCPUStart(),
descriptor_index);
}
D3D12_CPU_DESCRIPTOR_HANDLE heap_start =
srv_descriptor_cache_[descriptor_index / kSRVDescriptorCachePageSize].heap_start();
uint32_t heap_offset = descriptor_index % kSRVDescriptorCachePageSize;
return provider.OffsetViewDescriptor(heap_start, heap_offset);
}
xenos::ClampMode D3D12TextureCache::NormalizeClampMode(xenos::ClampMode clamp_mode) const {
if (clamp_mode == xenos::ClampMode::kClampToHalfway) {
// No GL_CLAMP (clamp to half edge, half border) equivalent in Direct3D 12,
// but there's no Direct3D 9 equivalent anyway, and too weird to be suitable
// for intentional real usage.
return xenos::ClampMode::kClampToEdge;
}
if (clamp_mode == xenos::ClampMode::kMirrorClampToHalfway ||
clamp_mode == xenos::ClampMode::kMirrorClampToBorder) {
// No Direct3D 12 equivalents.
return xenos::ClampMode::kMirrorClampToEdge;
}
return clamp_mode;
}
} // namespace rex::graphics::d3d12