mirror of
https://github.com/TwilitRealm/dusklight
synced 2026-06-10 12:22:12 -04:00
added computeSurfaceInfo from decaf for NextPow2 padded mips in GTX
This commit is contained in:
@@ -40,6 +40,7 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <bit>
|
||||
|
||||
namespace dusk::tphd::addrlib {
|
||||
|
||||
@@ -70,6 +71,16 @@ static u32 Log2(u32 v) {
|
||||
return r;
|
||||
}
|
||||
|
||||
static constexpr bool IsPow2(u32 v) { return v != 0 && (v & (v - 1)) == 0; }
|
||||
|
||||
|
||||
static constexpr u32 NextPow2(u32 v) {
|
||||
return v <= 1 ? 1u : std::bit_ceil(v);
|
||||
}
|
||||
static constexpr u32 PowTwoAlign(u32 v, u32 align) {
|
||||
return (v + align - 1) & ~(align - 1);
|
||||
}
|
||||
|
||||
// ---- Tile-mode classification ---------------------------------------------
|
||||
|
||||
static u32 ComputeSurfaceThickness(TileMode tm) {
|
||||
@@ -432,4 +443,249 @@ std::vector<u8> deswizzle(const SurfaceDesc& desc, std::span<const u8> tiledByte
|
||||
return linear;
|
||||
}
|
||||
|
||||
// R600AddrLib::ConvertToNonBankSwappedMode (r600addrlib.cpp:355)
|
||||
static TileMode ConvertToNonBankSwappedMode(TileMode tm) {
|
||||
switch (tm) {
|
||||
case TileMode::Tiled2BThin1: return TileMode::Tiled2DThin1;
|
||||
case TileMode::Tiled2BThin2: return TileMode::Tiled2DThin2;
|
||||
case TileMode::Tiled2BThin4: return TileMode::Tiled2DThin4;
|
||||
case TileMode::Tiled2BThick: return TileMode::Tiled2DThick;
|
||||
case TileMode::Tiled3BThin1: return TileMode::Tiled3DThin1;
|
||||
case TileMode::Tiled3BThick: return TileMode::Tiled3DThick;
|
||||
default: return tm;
|
||||
}
|
||||
}
|
||||
|
||||
// R600AddrLib::ComputeSurfaceMipLevelTileMode (r600addrlib.cpp:544).
|
||||
static TileMode ComputeSurfaceMipLevelTileMode(TileMode baseTileMode,
|
||||
u32 bpp,
|
||||
u32 level,
|
||||
u32 width,
|
||||
u32 height,
|
||||
u32 numSamples,
|
||||
bool noRecursive) {
|
||||
// ComputeSurfaceTileSlices == 1 for our case (numSamples=1, thin).
|
||||
// HwlDegradeThickTileMode is identity for thin tiles.
|
||||
TileMode tileMode = baseTileMode;
|
||||
|
||||
const u32 rotation = ComputeSurfaceRotationFromTileMode(tileMode);
|
||||
if ((rotation % kPipes) == 0) {
|
||||
switch (tileMode) {
|
||||
case TileMode::Tiled3DThin1: tileMode = TileMode::Tiled2DThin1; break;
|
||||
case TileMode::Tiled3DThick: tileMode = TileMode::Tiled2DThick; break;
|
||||
case TileMode::Tiled3BThin1: tileMode = TileMode::Tiled2BThin1; break;
|
||||
case TileMode::Tiled3BThick: tileMode = TileMode::Tiled2BThick; break;
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
|
||||
if (noRecursive || level == 0) {
|
||||
return tileMode;
|
||||
}
|
||||
|
||||
if (bpp == 96 || bpp == 48 || bpp == 24) bpp /= 3;
|
||||
|
||||
width = NextPow2(width);
|
||||
height = NextPow2(height);
|
||||
|
||||
tileMode = ConvertToNonBankSwappedMode(tileMode);
|
||||
|
||||
const u32 thickness = ComputeSurfaceThickness(tileMode);
|
||||
const u32 microTileBytes = BITS_TO_BYTES(thickness * bpp * 64);
|
||||
const u32 widthAlignFactor = (microTileBytes <= kPipeInterleaveBytes)
|
||||
? (kPipeInterleaveBytes / microTileBytes) : 1u;
|
||||
|
||||
u32 macroTileWidth = 8 * kBanks;
|
||||
u32 macroTileHeight = 8 * kPipes;
|
||||
|
||||
switch (tileMode) {
|
||||
case TileMode::Tiled2DThin1:
|
||||
case TileMode::Tiled3DThin1:
|
||||
if (width < widthAlignFactor * macroTileWidth || height < macroTileHeight) {
|
||||
tileMode = TileMode::Tiled1DThin1;
|
||||
}
|
||||
break;
|
||||
case TileMode::Tiled2DThin2:
|
||||
macroTileWidth >>= 1; macroTileHeight *= 2;
|
||||
if (width < widthAlignFactor * macroTileWidth || height < macroTileHeight) {
|
||||
tileMode = TileMode::Tiled1DThin1;
|
||||
}
|
||||
break;
|
||||
case TileMode::Tiled2DThin4:
|
||||
macroTileWidth >>= 2; macroTileHeight *= 4;
|
||||
if (width < widthAlignFactor * macroTileWidth || height < macroTileHeight) {
|
||||
tileMode = TileMode::Tiled1DThin1;
|
||||
}
|
||||
break;
|
||||
case TileMode::Tiled2DThick:
|
||||
case TileMode::Tiled3DThick:
|
||||
if (width < widthAlignFactor * macroTileWidth || height < macroTileHeight) {
|
||||
tileMode = TileMode::Tiled1DThick;
|
||||
}
|
||||
break;
|
||||
default: break;
|
||||
}
|
||||
|
||||
// numSlices < 4 collapse — our textures are all 2D (slices=1), so the
|
||||
// Thick→Thin demote always fires when we hit a thick mode.
|
||||
if (tileMode == TileMode::Tiled1DThick) tileMode = TileMode::Tiled1DThin1;
|
||||
else if (tileMode == TileMode::Tiled2DThick) tileMode = TileMode::Tiled2DThin1;
|
||||
else if (tileMode == TileMode::Tiled3DThick) tileMode = TileMode::Tiled3DThin1;
|
||||
|
||||
return ComputeSurfaceMipLevelTileMode(tileMode, bpp, level, width, height,
|
||||
numSamples, /*noRecursive*/ true);
|
||||
}
|
||||
|
||||
// R600AddrLib::ComputeSurfaceAlignmentsMicrotiled (r600addrlib.cpp:714).
|
||||
static void ComputeAlignmentsMicroTiled(TileMode tileMode, u32 bpp, u32 numSamples,
|
||||
u32& pitchAlign, u32& heightAlign) {
|
||||
if (bpp == 96 || bpp == 48 || bpp == 24) bpp /= 3;
|
||||
const u32 thickness = ComputeSurfaceThickness(tileMode);
|
||||
const u32 pitchAlignment = kPipeInterleaveBytes / bpp / numSamples / thickness;
|
||||
pitchAlign = std::max<u32>(8u, pitchAlignment);
|
||||
heightAlign = 8;
|
||||
// AdjustPitchAlignment is no-op without flags.display.
|
||||
}
|
||||
|
||||
// R600AddrLib::ComputeSurfaceAlignmentsMacrotiled (r600addrlib.cpp:805).
|
||||
static void ComputeAlignmentsMacroTiled(TileMode tileMode, u32 bpp, u32 numSamples,
|
||||
u32& pitchAlign, u32& heightAlign,
|
||||
u32& macroTileWidth, u32& macroTileHeight) {
|
||||
const u32 aspectRatio = (tileMode == TileMode::Tiled2DThin2 ||
|
||||
tileMode == TileMode::Tiled2BThin2) ? 2u
|
||||
: (tileMode == TileMode::Tiled2DThin4 ||
|
||||
tileMode == TileMode::Tiled2BThin4) ? 4u : 1u;
|
||||
const u32 thickness = ComputeSurfaceThickness(tileMode);
|
||||
if (bpp == 96 || bpp == 48 || bpp == 24) bpp /= 3;
|
||||
if (bpp == 3) bpp = 1;
|
||||
|
||||
macroTileWidth = 8 * kBanks / aspectRatio;
|
||||
macroTileHeight = aspectRatio * 8 * kPipes;
|
||||
pitchAlign = std::max<u32>(macroTileWidth,
|
||||
macroTileWidth * (kPipeInterleaveBytes / bpp / (8 * thickness) / numSamples));
|
||||
heightAlign = macroTileHeight;
|
||||
// IsDualBaseAlignNeeded is R6XX-only -> false here; baseAlign branch skipped.
|
||||
}
|
||||
|
||||
// AddrLib::PadDimensions (addrlib.cpp:433), simplified: no cube, no slice padding.
|
||||
static void PadDimensions(TileMode /*tm*/, u32& pitch, u32 pitchAlign,
|
||||
u32& height, u32 heightAlign, u32 padDims) {
|
||||
if (padDims == 0) padDims = 3;
|
||||
if (IsPow2(pitchAlign)) {
|
||||
pitch = PowTwoAlign(pitch, pitchAlign);
|
||||
} else {
|
||||
pitch = ((pitch + pitchAlign - 1) / pitchAlign) * pitchAlign;
|
||||
}
|
||||
if (padDims > 1) {
|
||||
height = PowTwoAlign(height, heightAlign);
|
||||
}
|
||||
}
|
||||
|
||||
// R600AddrLib::ComputeSurfaceInfoMicroTiled (r600addrlib.cpp:969).
|
||||
static void ComputeSurfaceInfoMicroTiled(u32 width, u32 height, u32 numSamples, u32 bpp,
|
||||
TileMode tileMode, u32 mipLevel, SurfaceInfoOut& out) {
|
||||
u32 pitch = width;
|
||||
u32 h = height;
|
||||
if (mipLevel) {
|
||||
pitch = NextPow2(pitch);
|
||||
h = NextPow2(h);
|
||||
// numSlices < 4 / thick collapse: no thick at this point for our flow.
|
||||
}
|
||||
u32 pitchAlign = 0, heightAlign = 0;
|
||||
ComputeAlignmentsMicroTiled(tileMode, bpp, numSamples, pitchAlign, heightAlign);
|
||||
PadDimensions(tileMode, pitch, pitchAlign, h, heightAlign, /*padDims*/ 0);
|
||||
out.pitch = pitch;
|
||||
out.height = height;
|
||||
out.heightAligned = h;
|
||||
out.tileMode = tileMode;
|
||||
}
|
||||
|
||||
// R600AddrLib::ComputeSurfaceInfoMacroTiled (r600addrlib.cpp:1198).
|
||||
static void ComputeSurfaceInfoMacroTiled(u32 width, u32 height, u32 numSamples, u32 bpp,
|
||||
TileMode tileMode, TileMode baseTileMode,
|
||||
u32 mipLevel, SurfaceInfoOut& out) {
|
||||
u32 pitch = width;
|
||||
u32 h = height;
|
||||
if (mipLevel) {
|
||||
pitch = NextPow2(pitch);
|
||||
h = NextPow2(h);
|
||||
}
|
||||
u32 pitchAlignBase = 0, heightAlignBase = 0, mwBase = 0, mhBase = 0;
|
||||
if (tileMode != baseTileMode && mipLevel != 0 &&
|
||||
IsThickMacroTiled(baseTileMode) && !IsThickMacroTiled(tileMode)) {
|
||||
ComputeAlignmentsMacroTiled(baseTileMode, bpp, numSamples,
|
||||
pitchAlignBase, heightAlignBase, mwBase, mhBase);
|
||||
const u32 pitchAlignFactor = std::max<u32>(1u, (kPipeInterleaveBytes >> 3) / bpp);
|
||||
if (pitch < (pitchAlignBase * pitchAlignFactor) || h < heightAlignBase) {
|
||||
ComputeSurfaceInfoMicroTiled(width, height, numSamples, bpp,
|
||||
TileMode::Tiled1DThin1, mipLevel, out);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
u32 pitchAlign = 0, heightAlign = 0, macroWidth = 0, macroHeight = 0;
|
||||
ComputeAlignmentsMacroTiled(tileMode, bpp, numSamples,
|
||||
pitchAlign, heightAlign, macroWidth, macroHeight);
|
||||
const u32 bankSwappedWidth = ComputeSurfaceBankSwappedWidth(tileMode, bpp, numSamples, pitch);
|
||||
pitchAlign = std::max(pitchAlign, bankSwappedWidth);
|
||||
// IsDualPitchAlignNeeded is R6XX-only -> false here.
|
||||
PadDimensions(tileMode, pitch, pitchAlign, h, heightAlign, /*padDims*/ 0);
|
||||
out.pitch = pitch;
|
||||
out.height = height;
|
||||
out.heightAligned = h;
|
||||
out.tileMode = tileMode;
|
||||
}
|
||||
|
||||
void computeSurfaceInfo(const SurfaceInfoIn& in, SurfaceInfoOut& out) {
|
||||
// AddrLib::ComputeMipLevel + R600AddrLib::HwlComputeMipLevel: align BCN
|
||||
// base dims to 4 pixels; for mipLevel>0, reduce dims and NextPow2 them.
|
||||
u32 width = in.width;
|
||||
u32 height = in.height;
|
||||
if (in.isBcn && in.mipLevel == 0) {
|
||||
width = PowTwoAlign(width, 4u);
|
||||
height = PowTwoAlign(height, 4u);
|
||||
}
|
||||
if (in.mipLevel > 0) {
|
||||
width = std::max(1u, width >> in.mipLevel);
|
||||
height = std::max(1u, height >> in.mipLevel);
|
||||
width = NextPow2(width);
|
||||
height = NextPow2(height);
|
||||
}
|
||||
|
||||
if (in.isBcn) {
|
||||
width = (width + 3) / 4;
|
||||
height = (height + 3) / 4;
|
||||
}
|
||||
|
||||
const u32 numSamples = 1;
|
||||
const TileMode demoted = ComputeSurfaceMipLevelTileMode(in.tileMode, in.bpp,
|
||||
in.mipLevel, width, height,
|
||||
numSamples, /*noRecursive*/ false);
|
||||
out.width = width;
|
||||
out.height = height;
|
||||
out.tileMode = demoted;
|
||||
|
||||
switch (demoted) {
|
||||
case TileMode::LinearGeneral:
|
||||
case TileMode::LinearAligned: {
|
||||
// ComputeSurfaceInfoLinear
|
||||
const u32 pa = std::max<u32>(64u, kPipeInterleaveBytes / in.bpp / numSamples);
|
||||
u32 pitch = width, h = height;
|
||||
if (in.mipLevel) { pitch = NextPow2(pitch); h = NextPow2(h); }
|
||||
PadDimensions(demoted, pitch, pa, h, 1u, /*padDims*/ 0);
|
||||
out.pitch = pitch; out.heightAligned = h;
|
||||
break;
|
||||
}
|
||||
case TileMode::Tiled1DThin1:
|
||||
case TileMode::Tiled1DThick:
|
||||
ComputeSurfaceInfoMicroTiled(width, height, numSamples, in.bpp,
|
||||
demoted, in.mipLevel, out);
|
||||
break;
|
||||
default:
|
||||
ComputeSurfaceInfoMacroTiled(width, height, numSamples, in.bpp,
|
||||
demoted, in.tileMode, in.mipLevel, out);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace dusk::tphd::addrlib
|
||||
|
||||
@@ -52,6 +52,27 @@ struct SurfaceDesc {
|
||||
// Deswizzle a single surface mip level into a row-major linear buffer.
|
||||
std::vector<u8> deswizzle(const SurfaceDesc& desc, std::span<const u8> tiledBytes);
|
||||
|
||||
struct SurfaceInfoIn {
|
||||
u32 width; // pixels at mip 0 (caller supplies surface base dims)
|
||||
u32 height; // pixels at mip 0
|
||||
u32 bpp; // bits per pixel (e.g. 32 for RGBA8). For BCN, bits per
|
||||
// 4x4 block (64 for BC1, 128 for BC3/5).
|
||||
u32 mipLevel; // 0 = base, 1..N = mip
|
||||
TileMode tileMode;
|
||||
bool isBcn;
|
||||
};
|
||||
|
||||
struct SurfaceInfoOut {
|
||||
u32 width; // possibly NextPow2'd / block-converted dim used for
|
||||
// the address computation (BCN blocks if isBcn)
|
||||
u32 height; // similar
|
||||
u32 pitch; // aligned pitch used by deswizzle (block units if BCN)
|
||||
u32 heightAligned;
|
||||
TileMode tileMode; // possibly demoted (2D→1D for small mips)
|
||||
};
|
||||
|
||||
void computeSurfaceInfo(const SurfaceInfoIn& in, SurfaceInfoOut& out);
|
||||
|
||||
} // namespace dusk::tphd::addrlib
|
||||
|
||||
#endif
|
||||
|
||||
@@ -324,36 +324,25 @@ MipLevelDesc mipLevelDesc(const GtxSurface& s, u32 level, bool isBcn, u32 bpp) {
|
||||
MipLevelDesc d{};
|
||||
d.width = std::max(1u, s.width >> level);
|
||||
d.height = std::max(1u, s.height >> level);
|
||||
d.tileMode = static_cast<addrlib::TileMode>(s.tileMode);
|
||||
|
||||
if (level == 0) {
|
||||
d.pitch = s.pitch;
|
||||
d.pitch = s.pitch;
|
||||
d.tileMode = static_cast<addrlib::TileMode>(s.tileMode);
|
||||
return d;
|
||||
}
|
||||
|
||||
// Mirror decaf's widthAlignFactor: when one microtile is smaller than
|
||||
// the pipe interleave (256 B), the demote threshold scales up.
|
||||
const u32 microTileBytes = (bpp * 64u) / 8u;
|
||||
const u32 widthAlignFactor = (microTileBytes <= 256u) ? (256u / microTileBytes) : 1u;
|
||||
|
||||
if (d.tileMode == addrlib::TileMode::Tiled2DThin1 ||
|
||||
d.tileMode == addrlib::TileMode::Tiled2BThin1) {
|
||||
const u32 demoteWidth = widthAlignFactor * 32u;
|
||||
const u32 wElem = isBcn ? (d.width + 3u) / 4u : d.width;
|
||||
const u32 hElem = isBcn ? (d.height + 3u) / 4u : d.height;
|
||||
if (wElem < demoteWidth || hElem < 16u) {
|
||||
d.tileMode = addrlib::TileMode::Tiled1DThin1;
|
||||
}
|
||||
}
|
||||
|
||||
const bool is1D = (d.tileMode == addrlib::TileMode::Tiled1DThin1 ||
|
||||
d.tileMode == addrlib::TileMode::Tiled1DThick);
|
||||
const u32 alignment = is1D ? (8u * widthAlignFactor) : 32u;
|
||||
|
||||
const u32 pixelsPerBlock = isBcn ? 4u : 1u;
|
||||
const u32 widthInBlock = (d.width + pixelsPerBlock - 1u) / pixelsPerBlock;
|
||||
u32 levelPitch = ((widthInBlock + alignment - 1u) / alignment) * alignment;
|
||||
d.pitch = std::max(1u, levelPitch);
|
||||
const addrlib::SurfaceInfoIn si{
|
||||
.width = s.width,
|
||||
.height = s.height,
|
||||
.bpp = bpp,
|
||||
.mipLevel = level,
|
||||
.tileMode = static_cast<addrlib::TileMode>(s.tileMode),
|
||||
.isBcn = isBcn,
|
||||
};
|
||||
addrlib::SurfaceInfoOut so{};
|
||||
addrlib::computeSurfaceInfo(si, so);
|
||||
d.pitch = so.pitch; // block units for BCN, pixel units for plain.
|
||||
d.tileMode = so.tileMode;
|
||||
return d;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user