mirror of
https://github.com/sal063/AC6_recomp
synced 2026-05-24 07:11:16 -04:00
269 lines
7.7 KiB
C++
269 lines
7.7 KiB
C++
#pragma once
|
|
|
|
#include <algorithm>
|
|
#include <bit>
|
|
#include <cmath>
|
|
#include <cstdint>
|
|
#include <cstring>
|
|
#include <limits>
|
|
#include <numeric>
|
|
#include <type_traits>
|
|
#include <utility>
|
|
|
|
#include <native/platform.h>
|
|
|
|
#if REX_ARCH_AMD64
|
|
#include <xmmintrin.h>
|
|
#endif
|
|
|
|
namespace rex {
|
|
|
|
template <typename T, size_t N>
|
|
constexpr size_t countof(T (&)[N]) {
|
|
return N;
|
|
}
|
|
|
|
template <typename T>
|
|
constexpr bool is_pow2(T value) {
|
|
return (value & (value - 1)) == 0;
|
|
}
|
|
|
|
template <typename T>
|
|
constexpr T align(T value, T alignment) {
|
|
return (value + alignment - 1) & ~(alignment - 1);
|
|
}
|
|
|
|
template <typename T, typename V>
|
|
constexpr T round_up(T value, V multiple, bool force_non_zero = true) {
|
|
if (force_non_zero && !value) {
|
|
return static_cast<T>(multiple);
|
|
}
|
|
return static_cast<T>((value + multiple - 1) / multiple * multiple);
|
|
}
|
|
|
|
template <typename T>
|
|
T clamp_float(T value, T min_value, T max_value) {
|
|
const T clamped_to_min = std::isgreater(value, min_value) ? value : min_value;
|
|
return std::isless(clamped_to_min, max_value) ? clamped_to_min : max_value;
|
|
}
|
|
|
|
template <typename T>
|
|
T saturate(T value) {
|
|
return clamp_float(value, static_cast<T>(0.0f), static_cast<T>(1.0f));
|
|
}
|
|
|
|
template <typename T>
|
|
T next_pow2(T value) {
|
|
value--;
|
|
value |= value >> 1;
|
|
value |= value >> 2;
|
|
value |= value >> 4;
|
|
value |= value >> 8;
|
|
value |= value >> 16;
|
|
value++;
|
|
return value;
|
|
}
|
|
|
|
template <typename T>
|
|
constexpr T greatest_common_divisor(T a, T b) {
|
|
if constexpr (__cpp_lib_gcd_lcm) {
|
|
return std::gcd(a, b);
|
|
} else {
|
|
while (b) {
|
|
a = std::exchange(b, a % b);
|
|
}
|
|
return a;
|
|
}
|
|
}
|
|
|
|
template <typename T>
|
|
constexpr void reduce_fraction(T& numerator, T& denominator) {
|
|
const auto gcd = greatest_common_divisor(numerator, denominator);
|
|
numerator /= gcd;
|
|
denominator /= gcd;
|
|
}
|
|
|
|
template <typename T>
|
|
constexpr void reduce_fraction(std::pair<T, T>& fraction) {
|
|
reduce_fraction(fraction.first, fraction.second);
|
|
}
|
|
|
|
constexpr uint32_t make_bitmask(uint32_t first, uint32_t last) {
|
|
return (static_cast<uint32_t>(-1) >> (31 - last)) & ~((1u << first) - 1);
|
|
}
|
|
|
|
constexpr uint32_t select_bits(uint32_t value, uint32_t first, uint32_t last) {
|
|
return (value & make_bitmask(first, last)) >> first;
|
|
}
|
|
|
|
template <class T>
|
|
constexpr uint32_t bit_count(T value) {
|
|
return static_cast<uint32_t>(std::popcount(value));
|
|
}
|
|
|
|
uint8_t lzcnt(uint8_t value);
|
|
uint8_t lzcnt(uint16_t value);
|
|
uint8_t lzcnt(uint32_t value);
|
|
uint8_t lzcnt(uint64_t value);
|
|
|
|
uint8_t tzcnt(uint8_t value);
|
|
uint8_t tzcnt(uint16_t value);
|
|
uint8_t tzcnt(uint32_t value);
|
|
uint8_t tzcnt(uint64_t value);
|
|
|
|
inline uint8_t lzcnt(int8_t value) { return lzcnt(static_cast<uint8_t>(value)); }
|
|
inline uint8_t lzcnt(int16_t value) { return lzcnt(static_cast<uint16_t>(value)); }
|
|
inline uint8_t lzcnt(int32_t value) { return lzcnt(static_cast<uint32_t>(value)); }
|
|
inline uint8_t lzcnt(int64_t value) { return lzcnt(static_cast<uint64_t>(value)); }
|
|
|
|
inline uint8_t tzcnt(int8_t value) { return tzcnt(static_cast<uint8_t>(value)); }
|
|
inline uint8_t tzcnt(int16_t value) { return tzcnt(static_cast<uint16_t>(value)); }
|
|
inline uint8_t tzcnt(int32_t value) { return tzcnt(static_cast<uint32_t>(value)); }
|
|
inline uint8_t tzcnt(int64_t value) { return tzcnt(static_cast<uint64_t>(value)); }
|
|
|
|
bool bit_scan_forward(uint32_t value, uint32_t* out_first_set_index);
|
|
bool bit_scan_forward(uint64_t value, uint32_t* out_first_set_index);
|
|
|
|
inline bool bit_scan_forward(int32_t value, uint32_t* out_first_set_index) {
|
|
return bit_scan_forward(static_cast<uint32_t>(value), out_first_set_index);
|
|
}
|
|
|
|
inline bool bit_scan_forward(int64_t value, uint32_t* out_first_set_index) {
|
|
return bit_scan_forward(static_cast<uint64_t>(value), out_first_set_index);
|
|
}
|
|
|
|
template <typename T>
|
|
inline T log2_floor(T value) {
|
|
return static_cast<T>(sizeof(T) * 8 - 1 - lzcnt(value));
|
|
}
|
|
|
|
template <typename T>
|
|
inline T log2_ceil(T value) {
|
|
return static_cast<T>(sizeof(T) * 8 - lzcnt(static_cast<T>(value - 1)));
|
|
}
|
|
|
|
template <typename T>
|
|
inline T rotate_left(T value, uint8_t shift) {
|
|
return static_cast<T>((T(value) << shift) | (T(value) >> ((sizeof(T) * 8) - shift)));
|
|
}
|
|
|
|
#if REX_ARCH_AMD64
|
|
template <int N>
|
|
float m128_f32(const __m128& value) {
|
|
float result;
|
|
_mm_store_ss(&result, _mm_shuffle_ps(value, value, _MM_SHUFFLE(N, N, N, N)));
|
|
return result;
|
|
}
|
|
|
|
template <int N>
|
|
int32_t m128_i32(const __m128& value) {
|
|
float result;
|
|
_mm_store_ss(&result, _mm_shuffle_ps(value, value, _MM_SHUFFLE(N, N, N, N)));
|
|
return std::bit_cast<int32_t>(result);
|
|
}
|
|
|
|
template <int N>
|
|
double m128_f64(const __m128d& value) {
|
|
double result;
|
|
_mm_store_sd(&result, _mm_shuffle_pd(value, value, _MM_SHUFFLE2(N, N)));
|
|
return result;
|
|
}
|
|
|
|
template <int N>
|
|
double m128_f64(const __m128& value) {
|
|
return m128_f64<N>(_mm_castps_pd(value));
|
|
}
|
|
|
|
template <int N>
|
|
int64_t m128_i64(const __m128d& value) {
|
|
double result;
|
|
_mm_store_sd(&result, _mm_shuffle_pd(value, value, _MM_SHUFFLE2(N, N)));
|
|
return std::bit_cast<int64_t>(result);
|
|
}
|
|
|
|
template <int N>
|
|
int64_t m128_i64(const __m128& value) {
|
|
return m128_i64<N>(_mm_castps_pd(value));
|
|
}
|
|
#endif
|
|
|
|
inline uint16_t float_to_xenos_half(
|
|
float value, bool preserve_denormal = false, bool round_to_nearest_even = false) {
|
|
const uint32_t integer_value = std::bit_cast<uint32_t>(value);
|
|
const uint32_t abs_value = integer_value & 0x7FFFFFFFu;
|
|
uint32_t result;
|
|
if (abs_value >= 0x47FFE000u) {
|
|
result = 0x7FFFu;
|
|
} else {
|
|
if (abs_value < 0x38800000u) {
|
|
if (preserve_denormal) {
|
|
const uint32_t shift = std::min(uint32_t(113u - (abs_value >> 23u)), uint32_t(24u));
|
|
result = (0x800000u | (abs_value & 0x7FFFFFu)) >> shift;
|
|
} else {
|
|
result = 0u;
|
|
}
|
|
} else {
|
|
result = abs_value + 0xC8000000u;
|
|
}
|
|
if (round_to_nearest_even) {
|
|
result += 0xFFFu + ((result >> 13u) & 1u);
|
|
}
|
|
result = (result >> 13u) & 0x7FFFu;
|
|
}
|
|
return static_cast<uint16_t>(result | ((integer_value & 0x80000000u) >> 16u));
|
|
}
|
|
|
|
inline float xenos_half_to_float(uint16_t value, bool preserve_denormal = false) {
|
|
uint32_t mantissa = value & 0x3FFu;
|
|
uint32_t exponent = (value >> 10u) & 0x1Fu;
|
|
if (!exponent) {
|
|
if (!preserve_denormal) {
|
|
mantissa = 0;
|
|
} else if (mantissa) {
|
|
const uint32_t mantissa_lzcnt = rex::lzcnt(mantissa) - (32u - 11u);
|
|
exponent = uint32_t(1 - int32_t(mantissa_lzcnt));
|
|
mantissa = (mantissa << mantissa_lzcnt) & 0x3FFu;
|
|
}
|
|
if (!mantissa) {
|
|
exponent = uint32_t(-112);
|
|
}
|
|
}
|
|
const uint32_t result =
|
|
(uint32_t(value & 0x8000u) << 16u) | ((exponent + 112u) << 23u) | (mantissa << 13u);
|
|
return std::bit_cast<float>(result);
|
|
}
|
|
|
|
template <typename T>
|
|
inline T sat_add(T a, T b) {
|
|
using unsigned_type = typename std::make_unsigned<T>::type;
|
|
unsigned_type result = unsigned_type(a) + unsigned_type(b);
|
|
if (std::is_unsigned<T>::value) {
|
|
result |= unsigned_type(-static_cast<typename std::make_signed<T>::type>(result < unsigned_type(a)));
|
|
} else {
|
|
const unsigned_type overflowed =
|
|
(unsigned_type(a) >> (sizeof(T) * 8 - 1)) + std::numeric_limits<T>::max();
|
|
if (T((overflowed ^ unsigned_type(b)) | ~(unsigned_type(b) ^ result)) >= 0) {
|
|
result = overflowed;
|
|
}
|
|
}
|
|
return T(result);
|
|
}
|
|
|
|
template <typename T>
|
|
inline T sat_sub(T a, T b) {
|
|
using unsigned_type = typename std::make_unsigned<T>::type;
|
|
unsigned_type result = unsigned_type(a) - unsigned_type(b);
|
|
if (std::is_unsigned<T>::value) {
|
|
result &= unsigned_type(-static_cast<typename std::make_signed<T>::type>(result <= unsigned_type(a)));
|
|
} else {
|
|
const unsigned_type overflowed =
|
|
(unsigned_type(a) >> (sizeof(T) * 8 - 1)) + std::numeric_limits<T>::max();
|
|
if (T((overflowed ^ unsigned_type(b)) & (overflowed ^ result)) < 0) {
|
|
result = overflowed;
|
|
}
|
|
}
|
|
return T(result);
|
|
}
|
|
|
|
} // namespace rex
|