Files
2026-04-17 20:09:41 +03:00

1031 lines
48 KiB
C++

#pragma once
// Native UI runtime - presenter abstraction
// Part of the AC6 Recompilation native presenter/window layer
#include <algorithm>
#include <array>
#include <atomic>
#include <climits>
#include <cmath>
#include <condition_variable>
#include <cstddef>
#include <cstdint>
#include <functional>
#include <map>
#include <memory>
#include <mutex>
#include <thread>
#include <utility>
#include <vector>
#include <rex/assert.h>
#include <rex/math.h>
#include <rex/platform.h>
#include <rex/types.h>
#include <rex/ui/flags.h>
#include <native/ui/surface.h>
#include <rex/ui/ui_drawer.h>
#if REX_PLATFORM_WIN32
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#endif
#ifndef NOMINMAX
#define NOMINMAX
#endif
#include <dxgi.h>
#include <windows.h>
#include <wrl/client.h>
#endif // XE_PLATFORM
namespace rex {
namespace ui {
class Presenter;
class Window;
class Win32Window;
class UIDrawContext {
public:
UIDrawContext(const UIDrawContext& context) = delete;
UIDrawContext& operator=(const UIDrawContext& context) = delete;
virtual ~UIDrawContext() = default;
Presenter& presenter() const { return presenter_; }
// It's assumed that the render target size will be either equal to the size
// of the surface, or the render target will be stretched to cover the entire
// surface (not in the corner of the surface).
uint32_t render_target_width() const { return render_target_width_; }
uint32_t render_target_height() const { return render_target_height_; }
protected:
explicit UIDrawContext(Presenter& presenter, uint32_t render_target_width,
uint32_t render_target_height)
: presenter_(presenter),
render_target_width_(render_target_width),
render_target_height_(render_target_height) {}
private:
Presenter& presenter_;
uint32_t render_target_width_;
uint32_t render_target_height_;
};
struct RawImage {
uint32_t width = 0;
uint32_t height = 0;
size_t stride = 0;
// R8 G8 B8 X8. The last row is not required to be padded to the stride.
std::vector<uint8_t> data;
};
// The presenter displays up to two layers of content on a host surface:
// - Guest output image, focusing on lowering latency and maintaining stable
// frame pacing, with various scaling and sharpening methods and letterboxing;
// - Xenia's internal UI (such as the profiler and Dear ImGui).
//
// The guest output image may be refreshed from any thread generating it
// (usually the GPU emulation thread), as long as there are no multiple threads
// doing that simultaneously (since that would functionally be a race condition
// even if refreshing is performed in a critical section).
//
// The UI overlays are managed entirely by the UI thread.
//
// Painting on the host surface may occur in two places:
// - If there are no UI overlays, painting of the guest output may be performed
// immediately from the thread refreshing it, to bypass the OS scheduling and
// event handling. This is especially important on platforms where the native
// surface paint event has a frame rate limit (such as the display refresh
// rate), and the limit may differ greatly from the guest frame rate (such as
// presenting a 30 or 60 FPS guest to a 144 Hz host surface).
// - If the UI overlays (owned by the UI thread) are present, painting of both
// the guest output (is available) and the UI is done exclusively from the
// platform paint event handler. The guest output without UI overlays may also
// be painted from the platform paint callback in certain cases, such as when
// an additional paint beyond the guest's frame rate may be needed (like when
// resizing the window), or when painting from the thread refreshing the guest
// output is undesirable (for instance, if it will result in waiting for host
// vertical sync in that thread too early if host vertical sync can't be
// disabled on the platform, blocking the next frame of GPU emulation).
//
// The composition of the guest and the UI is done by Xenia manually, as opposed
// to using platform functionality such as DirectComposition, in order to have
// more predictability of GPU queue scheduling, but primarily to be able to take
// advantage of independent host presentation where it's available, so variable
// refresh rate may be used where possible, and latency may be significantly
// reduced. Also, at least on some configurations (checked on Windows 11 21H2 on
// Nvidia GeForce GTX 1070 with driver version 472.12), when in borderless
// fullscreen, any composition causes the DXGI Present to wait for vertical sync
// on the GPU even if the sync interval 0 is specified.
//
// An intermediate image with the size requested by the guest is used for guest
// output in all cases. Even though it adds some GPU overhead, especially in the
// 1:1 size case, using it solves multiple issues:
// - Presentation may be done more often than by the guest.
// - There is clear separation between pre-scaling and mid- / post-scaling
// operations. The gamma ramp, for instance, may be applied before scaling,
// with one lookup per pixel rather than four with fetch4.
// - A simpler compute shader may be used instead of setting up the whole
// graphics pipeline for copying in the GPU command processor in all cases,
// while Direct3D 12 does not allow UAVs for swap chain buffers.
//
// The presenter limits the frame rate of the UI overlay (when possible) to a
// value that's ideally the refresh rate of the monitor containing the window if
// the platform's paint event doesn't have an internal limiter. However, where
// possible, the arrival of a new guest output image will interrupt the UI tick
// wait.
//
// Because the UI overlays preclude the possibility of presenting directly from
// the thread refreshing the guest output, and on some platforms, result in the
// frame rate limiting of paint events manifesting itself, there must be no
// persistent UI overlays that haven't been explicitly requested by the user.
// However, for temporary (primarily non-modal) UI elements such as various
// timed notifications, using the Presenter should be preferred to implementing
// them via overlaying native windows on top of the presentation surface on
// platforms where the concept of independent presentation exists, as multiple
// windows will result in native composition disabling it.
//
// The painting connection between the Presenter and the Surface can be managed
// only by the UI thread. However, the thread refreshing the guest output may
// still mark the current connection as outdated and ask the UI thread (by
// requesting painting) to try to recover - but the guest output refresh thread
// must not try to reconnect by itself, as methods of the Surface are available
// only to the UI thread.
class Presenter {
public:
// May be actually called on the UI thread even if statically_from_ui_thread
// is false, such as when the guest output is refreshed by the UI thread.
using HostGpuLossCallback =
std::function<void(bool is_responsible, bool statically_from_ui_thread)>;
static void FatalErrorHostGpuLossCallback(bool is_responsible, bool statically_from_ui_thread);
class GuestOutputRefreshContext {
public:
GuestOutputRefreshContext(const GuestOutputRefreshContext& context) = delete;
GuestOutputRefreshContext& operator=(const GuestOutputRefreshContext& context) = delete;
virtual ~GuestOutputRefreshContext() = default;
// Sets whether the source actually has no more than 8 bits of precision
// (though the image provided by the refresher may still have a higher
// storage precision). If never called, assuming it's false.
void SetIs8bpc(bool is_8bpc) { is_8bpc_out_ref_ = is_8bpc; }
protected:
GuestOutputRefreshContext(bool& is_8bpc_out_ref) : is_8bpc_out_ref_(is_8bpc_out_ref) {
is_8bpc_out_ref = false;
}
private:
bool& is_8bpc_out_ref_;
};
class GuestOutputPaintConfig {
public:
enum class Effect {
kBilinear,
#if defined(REX_HAS_FIDELITYFX_SDK)
kCas,
// AMD FidelityFX Super Resolution upsampling, Contrast Adaptive
// Sharpening otherwise.
kFsr,
// FidelityFX FSR2 selection. Uses the runtime temporal upscaler path
// where available; currently still experimental due to limited temporal
// inputs in the presenter path.
kFsr2,
// FidelityFX FSR3 selection. Uses the runtime temporal upscaler path
// where available; currently still experimental due to limited temporal
// inputs in the presenter path.
kFsr3,
#endif
};
#if defined(REX_HAS_FIDELITYFX_SDK)
enum class FsrQualityMode {
// Keep current behavior and use the guest output size as-is.
kAuto,
kNativeAa,
kQuality,
kBalanced,
kPerformance,
kUltraPerformance,
};
// This value is used as a lerp factor.
static constexpr float kCasAdditionalSharpnessMin = 0.0f;
static constexpr float kCasAdditionalSharpnessMax = 1.0f;
static constexpr float kCasAdditionalSharpnessDefault = 0.0f;
static_assert(kCasAdditionalSharpnessDefault >= kCasAdditionalSharpnessMin &&
kCasAdditionalSharpnessDefault <= kCasAdditionalSharpnessMax);
// EASU (as well as CAS) is designed for scaling by factors of up to 2x2.
// Some sensible limit for unusual cases, when the game for some reason
// presents a very small back buffer.
// This is enough for 480p > 960p > 1920p > 3840p > 7680p (bigger than 8K,
// or 4320p).
static constexpr uint32_t kFsrMaxUpscalingPassesMax = 4;
static constexpr float kFsrSharpnessReductionMin = 0.0f;
// "Values above 2.0 won't make a visible difference."
// https://raw.githubusercontent.com/GPUOpen-Effects/FidelityFX-FSR/master/docs/FidelityFX-FSR-Overview-Integration.pdf
static constexpr float kFsrSharpnessReductionMax = 2.0f;
static constexpr float kFsrSharpnessReductionDefault = 0.2f;
static_assert(kFsrSharpnessReductionDefault >= kFsrSharpnessReductionMin &&
kFsrSharpnessReductionDefault <= kFsrSharpnessReductionMax);
#endif // defined(REX_HAS_FIDELITYFX_SDK)
// In the sharpness setters, min / max with a constant as the first argument
// also drops NaNs.
bool GetAllowOverscanCutoff() const { return allow_overscan_cutoff_; }
void SetAllowOverscanCutoff(bool new_allow_overscan_cutoff) {
allow_overscan_cutoff_ = new_allow_overscan_cutoff;
}
Effect GetEffect() const { return effect_; }
void SetEffect(Effect new_effect) { effect_ = new_effect; }
#if defined(REX_HAS_FIDELITYFX_SDK)
float GetCasAdditionalSharpness() const { return cas_additional_sharpness_; }
void SetCasAdditionalSharpness(float new_cas_additional_sharpness) {
cas_additional_sharpness_ =
std::min(kCasAdditionalSharpnessMax,
std::max(kCasAdditionalSharpnessMin, new_cas_additional_sharpness));
}
uint32_t GetFsrMaxUpsamplingPasses() const { return fsr_max_upsampling_passes_; }
void SetFsrMaxUpsamplingPasses(uint32_t new_fsr_max_upsampling_passes) {
fsr_max_upsampling_passes_ =
std::min(kFsrMaxUpscalingPassesMax, std::max(uint32_t(1), new_fsr_max_upsampling_passes));
}
// In stops.
float GetFsrSharpnessReduction() const { return fsr_sharpness_reduction_; }
void SetFsrSharpnessReduction(float new_fsr_sharpness_reduction) {
fsr_sharpness_reduction_ =
std::min(kFsrSharpnessReductionMax,
std::max(kFsrSharpnessReductionMin, new_fsr_sharpness_reduction));
}
FsrQualityMode GetFsrQualityMode() const { return fsr_quality_mode_; }
void SetFsrQualityMode(FsrQualityMode new_fsr_quality_mode) {
fsr_quality_mode_ = new_fsr_quality_mode;
}
#endif // defined(REX_HAS_FIDELITYFX_SDK)
// Very tiny effect, but highly noticeable, for instance, on the sky in the
// 4D5307E6 main menu (prominently in Custom Games, especially with FSR -
// banding around the clouds can be clearly seen without dithering with 8bpc
// final host output).
bool GetDither() const { return dither_; }
void SetDither(bool new_dither) { dither_ = new_dither; }
private:
// Tools, rather than the emulator itself, must not allow overscan cutoff
// and must use the kBilinear effect as the image must be as close to the
// original front buffer as possible.
bool allow_overscan_cutoff_ = false;
Effect effect_ = Effect::kBilinear;
#if defined(REX_HAS_FIDELITYFX_SDK)
float cas_additional_sharpness_ = kCasAdditionalSharpnessDefault;
uint32_t fsr_max_upsampling_passes_ = kFsrMaxUpscalingPassesMax;
float fsr_sharpness_reduction_ = kFsrSharpnessReductionDefault;
FsrQualityMode fsr_quality_mode_ = FsrQualityMode::kAuto;
#endif
bool dither_ = false;
};
Presenter(const Presenter& presenter) = delete;
Presenter& operator=(const Presenter& presenter) = delete;
virtual ~Presenter();
virtual Surface::TypeFlags GetSupportedSurfaceTypes() const = 0;
// For calling from the Window for the Presenter attached to it.
// May be called from the destructor of the presenter through the window.
void SetWindowSurfaceFromUIThread(Window* new_window, Surface* new_surface);
void OnSurfaceMonitorUpdateFromUIThread(bool old_monitor_potentially_disconnected);
void OnSurfaceResizeFromUIThread();
// For calling from the platform paint event handler. Refreshes the surface
// connection if needed, and also paints if possible and if needed (if there
// are no UI overlays, and the guest output is presented directly from the
// thread refreshing it, the paint may be skipped unless there has been an
// explicit request previously or force_paint is true). If painting happens,
// both the guest output and the UI overlays (if any are active) are drawn.
// The background / letterbox of the painted context will be black - windows
// should preferably have a black background before a Presenter is attached to
// them too.
void PaintFromUIThread(bool force_paint = false);
// Pass 0 as width or height to disable guest output until the next refresh
// with an actual size. The display aspect ratio may be specified like 16:9 or
// like 1280:720, both are accepted, for simplicity, the guest display size
// may just be passed. The callback will receive a backend-specific context,
// and will not be called in case of an error such as the wrong size, or if
// guest output is disabled. Returns whether the callback was called and it
// returned true. The callback must submit all updating work to the host GPU
// before successfully returning, and also signal all the GPU synchronization
// primitives required by the GuestOutputRefreshContext implementation.
bool RefreshGuestOutput(uint32_t frontbuffer_width, uint32_t frontbuffer_height,
uint32_t display_aspect_ratio_x, uint32_t display_aspect_ratio_y,
std::function<bool(GuestOutputRefreshContext& context)> refresher);
// The implementation must be callable from any thread, including from
// multiple at the same time, and it should acquire the latest guest output
// image via ConsumeGuestOutput.
virtual bool CaptureGuestOutput(RawImage& image_out) = 0;
const GuestOutputPaintConfig& GetGuestOutputPaintConfigFromUIThread() const {
return guest_output_paint_config_;
}
// For simplicity, may be called repeatedly even if no changes have been made.
void SetGuestOutputPaintConfigFromUIThread(const GuestOutputPaintConfig& new_config);
void AddUIDrawerFromUIThread(UIDrawer* drawer, size_t z_order);
void RemoveUIDrawerFromUIThread(UIDrawer* drawer);
// Requests (re)painting with the UI if there's UI to draw.
void RequestUIPaintFromUIThread();
protected:
enum class PaintResult {
kPresented,
kPresentedSuboptimal,
// Refused for internal reasons or a host API side failure, but still may
// try to present without resetting the graphics provider in the future.
kNotPresented,
kNotPresentedConnectionOutdated,
kGpuLostExternally,
kGpuLostResponsible,
};
enum class SurfacePaintConnectResult {
// Redrawing not necessary, nothing changed. Must not be returned for a new
// connection (when was previously disconnected from the surface).
kSuccessUnchanged,
kSuccess,
kFailure,
kFailureSurfaceUnusable,
};
static constexpr uint32_t kGuestOutputMailboxSize = 3;
struct GuestOutputProperties {
// At least any value being 0 here means the guest output is disabled for
// this frame.
uint32_t frontbuffer_width;
uint32_t frontbuffer_height;
// Guest display aspect ratio numerator and denominator (both 16:9 and
// 1280:720 kinds of values are accepted).
uint32_t display_aspect_ratio_x;
uint32_t display_aspect_ratio_y;
bool is_8bpc;
GuestOutputProperties() { SetToInactive(); }
bool IsActive() const {
return frontbuffer_width && frontbuffer_height && display_aspect_ratio_x &&
display_aspect_ratio_y;
}
void SetToInactive() {
frontbuffer_width = 0;
frontbuffer_height = 0;
display_aspect_ratio_x = 0;
display_aspect_ratio_y = 0;
is_8bpc = false;
}
};
enum class GuestOutputPaintEffect {
kBilinear,
kBilinearDither,
#if defined(REX_HAS_FIDELITYFX_SDK)
kCasSharpen,
kCasSharpenDither,
kCasResample,
kCasResampleDither,
kFsrEasu,
kFsrRcas,
kFsrRcasDither,
#endif
kCount,
};
static constexpr bool CanGuestOutputPaintEffectBeIntermediate(GuestOutputPaintEffect effect) {
switch (effect) {
case GuestOutputPaintEffect::kBilinear:
// Dithering is never performed in intermediate passes because it may be
// interpreted as features by the subsequent passes.
case GuestOutputPaintEffect::kBilinearDither:
#if defined(REX_HAS_FIDELITYFX_SDK)
case GuestOutputPaintEffect::kCasSharpenDither:
case GuestOutputPaintEffect::kCasResampleDither:
case GuestOutputPaintEffect::kFsrRcasDither:
#endif
return false;
default:
// The result of any other effect can be stretched with bilinear
// filtering to the final resolution.
return true;
};
}
static constexpr bool CanGuestOutputPaintEffectBeFinal(GuestOutputPaintEffect effect) {
switch (effect) {
#if defined(REX_HAS_FIDELITYFX_SDK)
case GuestOutputPaintEffect::kFsrEasu:
return false;
#endif
default:
return true;
};
}
#if defined(REX_HAS_FIDELITYFX_SDK)
// The longest path is kFsrMaxUpscalingPassesMax + optionally RCAS +
// optionally bilinear, when upscaling by more than
// 2^kFsrMaxUpscalingPassesMax along any direction.
// Non-FSR paths are either only bilinear, only CAS, or (when upscaling by
// more than 2 along any direction) CAS followed by bilinear.
static constexpr size_t kMaxGuestOutputPaintEffects =
GuestOutputPaintConfig::kFsrMaxUpscalingPassesMax + 2;
#else
// Bilinear-only path: at most 1 effect.
static constexpr size_t kMaxGuestOutputPaintEffects = 1;
#endif
struct GuestOutputPaintFlow {
// Letterbox on up to 4 sides.
static constexpr size_t kMaxClearRectangles = 4;
struct ClearRectangle {
uint32_t x;
uint32_t y;
uint32_t width;
uint32_t height;
};
GuestOutputProperties properties;
// If 0, don't display the guest output.
size_t effect_count;
std::array<GuestOutputPaintEffect, kMaxGuestOutputPaintEffects> effects;
std::array<std::pair<uint32_t, uint32_t>, kMaxGuestOutputPaintEffects> effect_output_sizes;
// Offset of the rectangle for final drawing to the host window with
// letterboxing.
int32_t output_x;
int32_t output_y;
// If there is guest output (effect_count is not 0), contains the letterbox
// rectangles around the guest output.
size_t letterbox_clear_rectangle_count;
std::array<ClearRectangle, kMaxClearRectangles> letterbox_clear_rectangles;
void GetEffectInputSize(size_t effect_index, uint32_t& width_out, uint32_t& height_out) const {
assert_true(effect_index < effect_count);
if (!effect_index) {
width_out = properties.frontbuffer_width;
height_out = properties.frontbuffer_height;
return;
}
const std::pair<uint32_t, uint32_t>& intermediate_size =
effect_output_sizes[effect_index - 1];
width_out = intermediate_size.first;
height_out = intermediate_size.second;
}
void GetEffectOutputOffset(size_t effect_index, int32_t& x_out, int32_t& y_out) const {
assert_true(effect_index < effect_count);
if (effect_index + 1 < effect_count) {
x_out = 0;
y_out = 0;
return;
}
x_out = output_x;
y_out = output_y;
}
};
struct BilinearConstants {
int32_t output_offset[2];
float output_size_inv[2];
void Initialize(const GuestOutputPaintFlow& flow, size_t effect_index) {
flow.GetEffectOutputOffset(effect_index, output_offset[0], output_offset[1]);
const std::pair<uint32_t, uint32_t>& output_size = flow.effect_output_sizes[effect_index];
output_size_inv[0] = 1.0f / float(output_size.first);
output_size_inv[1] = 1.0f / float(output_size.second);
}
};
#if defined(REX_HAS_FIDELITYFX_SDK)
static constexpr float CalculateCasPostSetupSharpness(float sharpness) {
// CasSetup const1.x.
return -1.0f / (8.0f - 3.0f * sharpness);
}
struct CasSharpenConstants {
int32_t output_offset[2];
float sharpness_post_setup;
void Initialize(const GuestOutputPaintFlow& flow, size_t effect_index,
const GuestOutputPaintConfig& config) {
flow.GetEffectOutputOffset(effect_index, output_offset[0], output_offset[1]);
sharpness_post_setup = CalculateCasPostSetupSharpness(config.GetCasAdditionalSharpness());
}
};
struct CasResampleConstants {
int32_t output_offset[2];
// Input size / output size.
float input_output_size_ratio[2];
float sharpness_post_setup;
void Initialize(const GuestOutputPaintFlow& flow, size_t effect_index,
const GuestOutputPaintConfig& config) {
flow.GetEffectOutputOffset(effect_index, output_offset[0], output_offset[1]);
uint32_t input_width, input_height;
flow.GetEffectInputSize(effect_index, input_width, input_height);
const std::pair<uint32_t, uint32_t>& output_size = flow.effect_output_sizes[effect_index];
input_output_size_ratio[0] = float(input_width) / float(output_size.first);
input_output_size_ratio[1] = float(input_height) / float(output_size.second);
sharpness_post_setup = CalculateCasPostSetupSharpness(config.GetCasAdditionalSharpness());
}
};
struct FsrEasuConstants {
// No output offset because the EASU pass is always done to an intermediate
// framebuffer.
float input_output_size_ratio[2];
float input_size_inv[2];
void Initialize(const GuestOutputPaintFlow& flow, size_t effect_index) {
uint32_t input_width, input_height;
flow.GetEffectInputSize(effect_index, input_width, input_height);
const std::pair<uint32_t, uint32_t>& output_size = flow.effect_output_sizes[effect_index];
input_output_size_ratio[0] = float(input_width) / float(output_size.first);
input_output_size_ratio[1] = float(input_height) / float(output_size.second);
input_size_inv[0] = 1.0f / float(input_width);
input_size_inv[1] = 1.0f / float(input_height);
}
};
struct FsrRcasConstants {
int32_t output_offset[2];
float sharpness_post_setup;
static float CalculatePostSetupSharpness(float sharpness_reduction_stops) {
// FsrRcasCon const0.x.
return std::exp2f(-sharpness_reduction_stops);
}
void Initialize(const GuestOutputPaintFlow& flow, size_t effect_index,
const GuestOutputPaintConfig& config) {
flow.GetEffectOutputOffset(effect_index, output_offset[0], output_offset[1]);
sharpness_post_setup = CalculatePostSetupSharpness(config.GetFsrSharpnessReduction());
}
};
#endif // defined(REX_HAS_FIDELITYFX_SDK)
explicit Presenter(HostGpuLossCallback host_gpu_loss_callback)
: host_gpu_loss_callback_(host_gpu_loss_callback) {}
// Must be called by the implementation's initialization, before the presenter
// is used for anything.
bool InitializeCommonSurfaceIndependent();
// ConnectOrReconnect and Disconnect are callable only by the UI thread and
// only when it has access to painting (PaintMode is not
// kGuestOutputThreadImmediately).
// Called only for a non-zero-area surface potentially supporting painting via
// the presenter. In case of a failure, internally no resources referencing
// the surface must be held by the implementation anymore - the implementation
// must be left in the same state as after
// DisconnectPaintingFromSurfaceFromUIThreadImpl. If the call is successful,
// the implementation must write to is_vsync_implicit_out whether the
// connection will now have vertical sync forced by the host window system,
// which may cause undesirable waits on the CPU when beginning or ending
// frames.
virtual SurfacePaintConnectResult ConnectOrReconnectPaintingToSurfaceFromUIThread(
Surface& new_surface, uint32_t new_surface_width, uint32_t new_surface_height,
bool was_paintable, bool& is_vsync_implicit_out) = 0;
// Releases resources referencing the surface in the implementation if they
// are held by it. Call through DisconnectPaintingFromSurfaceFromUIThread to
// ensure the implementation is only called while the connection is active.
virtual void DisconnectPaintingFromSurfaceFromUIThreadImpl() = 0;
// The returned lock interlocks multiple consumers (but not the producer and
// the consumer) and must be held while accessing implementation-specific
// objects that depend on the image or its index in the mailbox (unless there
// are other locking mechanisms involved for the resources, such as reference
// counting for the guest output images, which doesn't have to be atomic
// though for the reason described later in this paragraph, or assumptions
// like of main target painting being possible only in at most one thread at
// once). While this lock is held, the currently acquired image index can't be
// changed (by other consumers advancing the acquired image index to the new
// ready image index), so the image with the index given by this function
// can't be released and be made writable or given to a different consumer
// (thus it's owned exclusively by the consumer who has called this function).
// The properties are returned by copy rather than returning a pointer to them
// or asking the consumer to pull them for the current mailbox index, so there
// are less things to take into consideration while leaving the guest output
// consumer critical section earlier (as if a pointer was returned, the data
// behind it could be overwritten at any time after leaving the consumer
// critical section) if the implementation has its own synchronization
// mechanisms that allow for doing so as described earlier. Returns UINT32_MAX
// as the mailbox index if the image is inactive (if it's active, it has
// proper properties though).
[[nodiscard]] std::unique_lock<std::mutex> ConsumeGuestOutput(
uint32_t& mailbox_index_or_max_if_inactive_out, GuestOutputProperties* properties_out,
GuestOutputPaintConfig* paint_config_out);
// The properties are passed explicitly, not taken from the current acquired
// image, so it can be called for a copy of the acquired image's properties
// outside the consumer lock if the implementation has its own synchronization
// (like reference counting for the guest output images) that makes it
// possible to leave the consumer critical section earlier. Also, the guest
// output paint configuration is passed explicitly too so calling this
// function multiple times is safer.
GuestOutputPaintFlow GetGuestOutputPaintFlow(const GuestOutputProperties& properties,
uint32_t host_rt_width, uint32_t host_rt_height,
uint32_t max_rt_width, uint32_t max_rt_height,
const GuestOutputPaintConfig& config) const;
// is_8bpc_out_ref is where to write whether the source actually has no more
// than 8 bits of precision per channel (though the image provided by the
// refresher may still have a higher storage precision) - if not written, it
// will be assumed to be false.
virtual bool RefreshGuestOutputImpl(
uint32_t mailbox_index, uint32_t frontbuffer_width, uint32_t frontbuffer_height,
std::function<bool(GuestOutputRefreshContext& context)> refresher, bool& is_8bpc_out_ref) = 0;
// For guest output capturing (for debugging use thus - shouldn't be adding
// any noise like dithering that's not present in the original image),
// converting a 10bpc RGB pixel to 8bpc that can be stored in common image
// formats.
static uint32_t Packed10bpcRGBTo8bpcBytes(uint32_t rgb10) {
// Conversion almost according to the Direct3D 10+ rules (unorm > float >
// unorm), but with one multiplication rather than separate division and
// multiplication - the results are the same for unorm10 to unorm8.
if constexpr (std::endian::native == std::endian::big) {
return (uint32_t(float(rgb10 & 0x3FF) * (255.0f / 1023.0f) + 0.5f) << 24) |
(uint32_t(float((rgb10 >> 10) & 0x3FF) * (255.0f / 1023.0f) + 0.5f) << 16) |
(uint32_t(float((rgb10 >> 20) & 0x3FF) * (255.0f / 1023.0f) + 0.5f) << 8) |
uint32_t(0xFF);
}
return uint32_t(float(rgb10 & 0x3FF) * (255.0f / 1023.0f) + 0.5f) |
(uint32_t(float((rgb10 >> 10) & 0x3FF) * (255.0f / 1023.0f) + 0.5f) << 8) |
(uint32_t(float((rgb10 >> 20) & 0x3FF) * (255.0f / 1023.0f) + 0.5f) << 16) |
(uint32_t(0xFF) << 24);
}
// Paints and presents the guest output if available (or just solid black
// color), and if requested, the UI on top of it.
//
// May be called from the non-UI thread, but only to paint the guest output
// (no UI drawing, with execute_ui_drawers disabled).
//
// Call via PaintAndPresent.
virtual PaintResult PaintAndPresentImpl(bool execute_ui_drawers) = 0;
// For calling from the painting implementations if requested.
void ExecuteUIDrawersFromUIThread(UIDrawContext& ui_draw_context);
private:
enum class PaintMode {
// Don't paint at all.
// Painting lifecycle is accessible only by the UI thread.
// window_->RequestPaint() must not be called in this mode at all regardless
// of whether the Window object exists because the Window object in this
// case may correspond to a window without a paintable Surface (in a closed
// state, or in the middle of a surface change), and non-UI threads (such as
// the guest output thread) may result in a race condition internally inside
// Window::RequestPaint during the access to the Window's state, such as the
// availability of the Surface that can handle the paint (therefore, if
// there's no Surface, this is the only valid mode).
kNone,
// Guest output refreshing notifies the `window_`, which must be valid and
// safe to call RequestPaint for, that painting should be done in the UI
// thread (including the UI if needed). Painting is possible, and painting
// lifecycle is accessible, only by the UI thread.
kUIThreadOnRequest,
// Paint immediately in the guest output thread for lower latency. The
// `window_`, however, may be notified that the surface painting connection
// has become outdated (via RequestPaint, as in this case the UI thread will
// need to repaint as sooner as possible after reconnecting anyway), and
// change the surface connection state accordingly (only to
// kConnectedOutdated).
// Painting is possible only by the guest output thread, lifecycle
// management cannot be done from the UI thread until it takes over.
kGuestOutputThreadImmediately,
};
enum class SurfacePaintConnectionState {
// No surface at all, or couldn't connect with the current state of the
// surface (such as because the surface was zero-sized because the window
// was minimized, for example). Or, the connection has become outdated, and
// the attempt to reconnect at kRetryConnectingSoon has failed. Try to
// reconnect if anything changes in the state of the surface, such as its
// size.
kUnconnectedRetryAtStateChange,
// Can't connect to the current existing surface (the surface has been lost
// or it's completely incompatible). No point in retrying connecting until
// the surface is replaced.
kUnconnectedSurfaceReportedUnusable,
// Everything is fine, can paint. The connection might have become
// suboptimal though, and haven't tried refreshing yet, but still usable for
// painting nonetheless.
kConnectedPaintable,
// The implementation still holds resources associated with the connection,
// but presentation has reported that it has become outdated, try
// reconnecting as soon as possible (at the next paint attempt, requesting
// it if needed). This is the only state that the guest output thread may
// transition the connection to (from kConnectedPaintable only) if it has
// access to painting (the paint mode is kGuestOutputThreadImmediately).
kConnectedOutdated,
};
static constexpr bool IsConnectedSurfacePaintConnectionState(
SurfacePaintConnectionState connection_state) {
return connection_state == SurfacePaintConnectionState::kConnectedPaintable ||
connection_state == SurfacePaintConnectionState::kConnectedOutdated;
}
struct UIDrawerReference {
UIDrawer* drawer;
uint64_t last_draw;
explicit UIDrawerReference(UIDrawer* drawer, uint64_t last_draw = UINT64_MAX)
: drawer(drawer), last_draw(last_draw) {}
};
void SetPaintModeFromUIThread(PaintMode new_mode);
// Based on conditions like whether UI needs to be drawn and whether vertical
// sync is implicit - see the implementation for the requirements.
// is_paintable is an explicit parameter because this function may be called
// in two scenarios:
// - After connection updates - painting connection is owned by the UI thread,
// so the actual state can be obtained and passed here so kNone can be
// returned.
// - When merely toggling something local to the UI thread - only to toggle
// between the two threads, but not to switch from or to kNone (make sure
// it's not kNone before calling), pass `true` in this case.
PaintMode GetDesiredPaintModeFromUIThread(bool is_paintable) const;
// Callable only by the UI thread and only when it has access to painting
// (PaintMode is not kGuestOutputThreadImmediately).
// This can be called to a surface after having not been connected to any (in
// this case, surface_paint_connection_state_ must be
// kUnconnectedRetryAtStateChange, not kUnconnectedNoUsableSurface, otherwise
// the call will be dropped), or to handle surface state changes such as
// resizing. However, this must not be called to change directly from one
// surface to another - need to disconnect prior to that, because the
// implementation may assume that the surface is still the same, and may try
// to, for instance, resize the buffers for the existing surface.
void UpdateSurfacePaintConnectionFromUIThread(bool* repaint_needed_out,
bool update_paint_mode_to_desired);
// Callable only by the UI thread and only when it has access to painting
// (PaintMode is not kGuestOutputThreadImmediately).
// See DisconnectPaintingFromSurfaceFromUIThreadImpl for more information.
void DisconnectPaintingFromSurfaceFromUIThread(SurfacePaintConnectionState new_state);
// Can be called from any thread if an existing window_ safe to RequestPaint
// (not closed) is available in it, so doesn't check the surface painting
// connection state. Returns whether the window_->RequestPaint() call has been
// made.
bool RequestPaintOrConnectionRecoveryViaWindow(bool force_ui_thread_paint_tick);
// Platform-specific function refreshing the monitor the current window
// surface is on, through the Surface or its Window. A reference to the
// monitor is held only when a Surface is available, so it's automatically
// dropped when the Window loses its Surface when it's being closed (but the
// Window object keeps being attached to the Presenter), for instance.
void UpdateSurfaceMonitorFromUIThread(bool old_monitor_potentially_disconnected);
// Platform-specific function returning whether the surface the presenter is
// currently attached it is actually visible on any monitor. UI thread
// painting may be dropped if this returns false - need to request painting if
// the surface appears on a monitor again. May be using the state cached at
// window / surface state changes, not the actual state from the platform.
bool InSurfaceOnMonitorFromUIThread() const;
// Calls PaintAndPresentImpl and does post-paint checks that are safe to do on
// both the UI thread and the guest output thread. See the information about
// PaintAndPresentImpl for details.
// A kPresentedSuboptimal result is returned as is, but the connection may or
// may not be made outdated if that happens - though if it's
// kPresentedSuboptimal rather than kNotPresentedConnectionOutdated, the image
// has been successfully sent to the OS presentation at least.
PaintResult PaintAndPresent(bool execute_ui_drawers);
void HandleUIDrawersChangeFromUIThread(bool drawers_were_empty);
bool AreUITicksNeededFromUIThread() const {
// UI drawing should be done, and painting needs to be possible (coarsely
// checking because the actual connection state, including outdated, may be
// currently unavailable from the UI thread).
// There's no need to limit the frame rate manually if there is vertical
// sync in the presentation already as that might result in inconsistent
// frame pacing and potentially skipped vertical sync intervals.
return !ui_drawers_.empty() && paint_mode_ != PaintMode::kNone &&
!surface_paint_connection_has_implicit_vsync_;
}
void UpdateUITicksNeededFromUIThread();
void WaitForUITickFromUIThread();
// May be called from any thread.
void ForceUIThreadPaintTick();
// Must be called only in the end of entry points - reinitialization of the
// presenter may be done by the handler if it was called from the UI thread
// (even if the UI thread argument is false - such as when the guest output is
// refreshed on the UI thread).
HostGpuLossCallback host_gpu_loss_callback_;
// May be accessed by the guest output thread if the paint mode is not kNone,
// to request painting (for kUIThreadOnRequest) or reconnection (for
// kGuestOutputThreadImmediately) in the UI thread. Set the paint mode to
// kNone before modifying (that naturally has to be done anyway by
// disconnecting painting).
Window* window_ = nullptr;
// The surface of the `window_` the presenter is currently attached to.
Surface* surface_ = nullptr;
// Mutex protecting paint_mode_ (and, in the guest output thread, objects
// related to painting themselves).
//
// The UI thread (as the mode is modifiable only by it) can use it as
// "barriers", like:
// 1) If needed, lock and disable guest output thread access to painting.
// 2) Interact with the painting connection.
// 3) If needed, lock and re-enable guest output thread access to the
// painting.
//
// On the other hand, the guest output thread _must_ hold it all the time it's
// painting, to ensure the mode stays the same while it's painting.
std::mutex paint_mode_mutex_;
// UI thread: writable, guest output thread: read-only.
PaintMode paint_mode_ = PaintMode::kNone;
// These fields can be accessed _exclusively_ by either the UI thread or the
// guest output thread, depending on paint_mode_.
// If it's kGuestOutputThreadImmediately, they can be accessed _only_ by the
// guest output thread (though the UI thread can still read, but not modify,
// fields that are writable by the UI thread and readadable by both).
// Otherwise, they can be accessed _only_ by the UI thread.
// The connection state may be changed from the guest output thread, but only
// from kConnectedPaintable to kConnectedOutdated.
SurfacePaintConnectionState surface_paint_connection_state_ =
SurfacePaintConnectionState::kUnconnectedRetryAtStateChange;
// If the surface connection was optimal at the last paint attempt, but now
// has become suboptimal, need to try to reconnect. But only in this case - if
// the connection has been suboptimal from the very beginning don't try to
// reconnect every frame.
bool surface_paint_connection_was_optimal_at_successful_paint_ = false;
// Modifiable only by the UI thread (therefore can be accessed by the UI
// thread regardless of the paint mode) while (re)connecting painting to the
// surface.
bool surface_paint_connection_has_implicit_vsync_ = false;
// Modifiable only by the UI thread, can be read by the thread that's
// painting.
uint32_t surface_width_in_paint_connection_ = 0;
uint32_t surface_height_in_paint_connection_ = 0;
// Can be set by both the UI thread and the guest output thread before doing
// window_->RequestPaint() - whether an extra painting (preceded by
// reconnection if needed, and painting) was requested, primarily after some
// state change that may effect the surface painting connection, resulting in
// the need to refresh it as soon as possible.
//
// Relaxed memory order is enough, everything that may influence painting is
// either local to the UI thread or protected with barriers elsewhere.
//
// There's no need to bother about resetting this variable when losing
// connection as the next successful reconnection should be followed by a
// repaint request anyway.
std::atomic<bool> ui_thread_paint_requested_{false};
std::mutex guest_output_paint_config_mutex_;
// UI thread: writable, guest output thread: read-only.
GuestOutputPaintConfig guest_output_paint_config_;
// Single-producer-multiple-consumers (lock-free SPSC + consumer lock) mailbox
// for presenting of the most up-to-date guest output image without long
// interlocking between guest output refreshing and painting.
static_assert(kGuestOutputMailboxSize == 3);
// The "acquired" image (in bits 0:1) is the one that is currently being read,
// or was last read, by a consumer of the guest output. The index of it can be
// modified only by the consumer and stays the same while it's processing the
// image.
// The "ready" image (in bits 2:3) is the most up-to-date image that the
// refresher has completely written, and a consumer may acquire it. It may be
// == acquired if there has been no refresh since the last acquisition.
// These two images can be accessed by painting in parallel, in an unordered
// way, with guest output refreshing.
std::atomic<uint32_t> guest_output_mailbox_acquired_and_ready_{0};
// The "writable" image is different than both "acquired" and "ready" and is
// accessible only by the guest output refreshing - it's the image that the
// refresher may write to.
uint32_t guest_output_mailbox_writable_ = 1;
// The guest output images may be consumed by two operations - painting, and
// capturing to a CPU-side buffer. These two usually never happen in parallel
// in reality though, as they're usually not even needed both at once in the
// same app within Xenia, so there's no need to create any
// complex lock-free synchronization between the two, but still, the situation
// when multiple consumers want the guest output image at the same is
// perfectly valid (unlike for producers, because even with a producer lock
// that would still be a race condition since the two refreshes themselves
// will be done in an undefined order) - so, a sufficient synchronization
// mechanism is used to make sure multiple consumers can acquire images
// without interfering with each other.
// While this is held, paint_mode_mutex_ must not be locked (the lock order is
// the reverse when painting in the guest output thread - painting is done
// with paint_mode_mutex_ held in this case, and guest output consumption
// happens as part of painting.
std::mutex guest_output_mailbox_consumer_mutex_;
std::array<GuestOutputProperties, kGuestOutputMailboxSize> guest_output_properties_;
// Accessible only by refreshing, whether the last refresh contained an image
// rather than being blank.
bool guest_output_active_last_refresh_ = false;
// Ordered by the Z order, and then by the time of addition.
// Note: All the iteration logic involving this Z ordering must be the same as
// in input handling (in the input listeners in the Window), but in reverse.
std::multimap<size_t, UIDrawerReference> ui_drawers_;
size_t ui_draw_current_ = 0;
size_t ui_draw_current_z_order_;
std::multimap<size_t, UIDrawerReference>::iterator ui_draw_next_iterator_;
bool is_executing_ui_drawers_ = false;
// Whether currently running the logic of PaintFromUIThread, so certain
// actions (such as changing the paint mode, requesting a redraw) must be
// deferred and be handled by the tail of PaintFromUIThread for consistency
// with what PaintFromUIThread does internally.
bool is_in_ui_thread_paint_ = false;
bool request_guest_output_paint_after_current_ui_thread_paint_;
bool request_ui_paint_after_current_ui_thread_paint_;
// Platform-specific, but implementation-agnostic parts, primarily for
// limiting of the frame rate of the UI to avoid drawing the UI at extreme
// frame rates wasting the CPU and the GPU resources and starving everything
// else. The waits performed here must be interruptible by guest output
// presentation requests to prevent adding arbitrary amounts of latency to it.
// On Android and GTK, this is not needed, the frame rate of draw events is
// limited to the display refresh rate internally.
#if REX_PLATFORM_WIN32
static Microsoft::WRL::ComPtr<IDXGIOutput> GetDXGIOutputForMonitor(IDXGIFactory1* factory,
HMONITOR monitor);
bool AreDXGIUITicksWaitable(
[[maybe_unused]] const std::unique_lock<std::mutex>& dxgi_ui_tick_lock) {
return dxgi_ui_ticks_needed_ && !dxgi_ui_tick_thread_shutdown_ && dxgi_ui_tick_output_;
}
void DXGIUITickThread();
// Accessible only from the UI thread, to avoid updating monitor-dependent
// information such as the DXGI output if the monitor hasn't actually been
// changed in the current state change (such as window positioning changes).
HMONITOR surface_win32_monitor_ = nullptr;
// Requiring the lowest version of DXGI for IDXGIOutput::WaitForVBlank, which
// is available even on Windows Vista, but for IDXGIFactory1::IsCurrent,
// DXGI 1.1 is needed (available starting from Windows 7; also mixing DXGI 1.0
// and 1.1+ in the Direct3D 12 code is not supported, see CreateDXGIFactory on
// MSDN). The factory is created when it's needed, and may be released and
// recreated when it's not current anymore and that becomes relevant.
Microsoft::WRL::ComPtr<IDXGIFactory1> dxgi_ui_tick_factory_;
// Accessible only from the UI thread, though the value is taken from the
// tick-mutex-protected variable.
uint64_t dxgi_ui_tick_last_draw_ = 0;
std::mutex dxgi_ui_tick_mutex_;
uint64_t dxgi_ui_tick_last_vblank_ = 1;
// If output is null or shutdown is true, the signal may not be sent, either
// don't limit the frame rate in this case (an exceptional situation, such as
// a failure to find the output in DXGI), or don't draw at all if the window
// was removed from a connected monitor.
Microsoft::WRL::ComPtr<IDXGIOutput> dxgi_ui_tick_output_;
// To avoid allocating processing resources to the thread when nothing needs
// the ticks (not drawing the UI), the thread waits for vertical blanking
// intervals only when the UI drawing ticks are needed, and sleeping waiting
// for the control condition variable signals otherwise. Modifiable only from
// the UI thread, so readable by it without locking the mutex.
bool dxgi_ui_ticks_needed_ = false;
// The shutdown flag is modifiable only from the UI thread.
bool dxgi_ui_tick_thread_shutdown_ = false;
bool dxgi_ui_tick_force_requested_ = false;
std::condition_variable dxgi_ui_tick_control_condition_;
// May be signaled by guest output refreshing.
std::condition_variable dxgi_ui_tick_signal_condition_;
std::thread dxgi_ui_tick_thread_;
#endif // XE_PLATFORM
};
} // namespace ui
} // namespace rex