#ifndef DUSK_TIME_H #define DUSK_TIME_H #include #include #include "SDL3/SDL_timer.h" #ifdef _WIN32 #ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN #endif #ifndef NOMINMAX #define NOMINMAX #endif #include #include #include #endif #ifdef __APPLE__ #include #if defined(__x86_64__) || defined(__i386__) #include #endif #endif class Limiter { public: using duration_t = Uint64; void Reset() { m_oldTime = SDL_GetTicksNS(); } duration_t Sleep(duration_t targetFrameTime) { if (targetFrameTime == 0) { return 0; } const Uint64 start = SDL_GetTicksNS(); duration_t adjustedSleepTime = SleepTime(targetFrameTime); if (adjustedSleepTime > 0) { NanoSleep(adjustedSleepTime); const duration_t elapsed = TimeSince(start); const duration_t overslept = elapsed > adjustedSleepTime ? elapsed - adjustedSleepTime : 0; if (overslept < targetFrameTime) { m_overheadTimes[m_overheadTimeIdx] = overslept; m_overheadTimeIdx = (m_overheadTimeIdx + 1) % m_overheadTimes.size(); } } Reset(); return adjustedSleepTime; } duration_t SleepTime(duration_t targetFrameTime) { const duration_t elapsed = TimeSince(m_oldTime); const duration_t sleepTime = elapsed < targetFrameTime ? targetFrameTime - elapsed : 0; m_overhead = std::accumulate(m_overheadTimes.begin(), m_overheadTimes.end(), duration_t{0}) / m_overheadTimes.size(); if (sleepTime > m_overhead) { return sleepTime - m_overhead; } return 0; } private: Uint64 m_oldTime = 0; std::array m_overheadTimes{}; size_t m_overheadTimeIdx = 0; duration_t m_overhead = 0; duration_t TimeSince(Uint64 start) const { return SDL_GetTicksNS() - start; } #if _WIN32 void NanoSleep(const duration_t duration) { static bool initialized = false; static double countPerNs; static size_t numSleeps = 0; // QueryPerformanceFrequency's result is constant, but calling it occasionally // appears to stabilize QueryPerformanceCounter. Without it, the game drifts // from 60hz to 144hz. (Cursed, but I suspect it's NVIDIA/G-SYNC related) if (!initialized || numSleeps++ % 1000 == 0) { LARGE_INTEGER freq; if (QueryPerformanceFrequency(&freq) == 0) { return; } countPerNs = static_cast(freq.QuadPart) / 1e9; initialized = true; numSleeps = 0; } LARGE_INTEGER start, current; QueryPerformanceCounter(&start); const LONGLONG ticksToWait = static_cast(duration * countPerNs); const Uint64 ms = duration / 1'000'000ULL; if (ms > 1) { ::Sleep(static_cast(ms - 1)); } do { QueryPerformanceCounter(¤t); #if defined(_M_ARM64) || defined(_M_ARM) __yield(); #else _mm_pause(); #endif } while (current.QuadPart - start.QuadPart < ticksToWait); } #elif defined (__APPLE__) void NanoSleep(const duration_t duration) { // Hybrid approach using Apple Mach uint64_t start_mach = mach_absolute_time(); mach_timebase_info_data_t timebase_info; mach_timebase_info(&timebase_info); uint64_t total_mach_ticks = (duration * timebase_info.denom) / timebase_info.numer; uint64_t target_mach = start_mach + total_mach_ticks; uint64_t buffer_ns = 2'000'000ULL; uint64_t buffer_mach_ticks = (buffer_ns * timebase_info.denom) / timebase_info.numer; if (total_mach_ticks > buffer_mach_ticks) { uint64_t sleep_until_mach = target_mach - buffer_mach_ticks; mach_wait_until(sleep_until_mach); } while (mach_absolute_time() < target_mach) { #if defined(__aarch64__) || defined(__arm__) asm volatile("yield" ::: "memory"); // Hardware hint, not a scheduler hint. #else _mm_pause(); #endif } } #else void NanoSleep(const duration_t duration) { SDL_DelayPrecise(duration); } #endif }; #endif