#pragma once #include #include #include #include #include #include #include #include #include #include #if REX_ARCH_AMD64 #include #endif namespace rex { template constexpr size_t countof(T (&)[N]) { return N; } template constexpr bool is_pow2(T value) { return (value & (value - 1)) == 0; } template constexpr T align(T value, T alignment) { return (value + alignment - 1) & ~(alignment - 1); } template constexpr T round_up(T value, V multiple, bool force_non_zero = true) { if (force_non_zero && !value) { return static_cast(multiple); } return static_cast((value + multiple - 1) / multiple * multiple); } template T clamp_float(T value, T min_value, T max_value) { const T clamped_to_min = std::isgreater(value, min_value) ? value : min_value; return std::isless(clamped_to_min, max_value) ? clamped_to_min : max_value; } template T saturate(T value) { return clamp_float(value, static_cast(0.0f), static_cast(1.0f)); } template T next_pow2(T value) { value--; value |= value >> 1; value |= value >> 2; value |= value >> 4; value |= value >> 8; value |= value >> 16; value++; return value; } template constexpr T greatest_common_divisor(T a, T b) { if constexpr (__cpp_lib_gcd_lcm) { return std::gcd(a, b); } else { while (b) { a = std::exchange(b, a % b); } return a; } } template constexpr void reduce_fraction(T& numerator, T& denominator) { const auto gcd = greatest_common_divisor(numerator, denominator); numerator /= gcd; denominator /= gcd; } template constexpr void reduce_fraction(std::pair& fraction) { reduce_fraction(fraction.first, fraction.second); } constexpr uint32_t make_bitmask(uint32_t first, uint32_t last) { return (static_cast(-1) >> (31 - last)) & ~((1u << first) - 1); } constexpr uint32_t select_bits(uint32_t value, uint32_t first, uint32_t last) { return (value & make_bitmask(first, last)) >> first; } template constexpr uint32_t bit_count(T value) { return static_cast(std::popcount(value)); } uint8_t lzcnt(uint8_t value); uint8_t lzcnt(uint16_t value); uint8_t lzcnt(uint32_t value); uint8_t lzcnt(uint64_t value); uint8_t tzcnt(uint8_t value); uint8_t tzcnt(uint16_t value); uint8_t tzcnt(uint32_t value); uint8_t tzcnt(uint64_t value); inline uint8_t lzcnt(int8_t value) { return lzcnt(static_cast(value)); } inline uint8_t lzcnt(int16_t value) { return lzcnt(static_cast(value)); } inline uint8_t lzcnt(int32_t value) { return lzcnt(static_cast(value)); } inline uint8_t lzcnt(int64_t value) { return lzcnt(static_cast(value)); } inline uint8_t tzcnt(int8_t value) { return tzcnt(static_cast(value)); } inline uint8_t tzcnt(int16_t value) { return tzcnt(static_cast(value)); } inline uint8_t tzcnt(int32_t value) { return tzcnt(static_cast(value)); } inline uint8_t tzcnt(int64_t value) { return tzcnt(static_cast(value)); } bool bit_scan_forward(uint32_t value, uint32_t* out_first_set_index); bool bit_scan_forward(uint64_t value, uint32_t* out_first_set_index); inline bool bit_scan_forward(int32_t value, uint32_t* out_first_set_index) { return bit_scan_forward(static_cast(value), out_first_set_index); } inline bool bit_scan_forward(int64_t value, uint32_t* out_first_set_index) { return bit_scan_forward(static_cast(value), out_first_set_index); } template inline T log2_floor(T value) { return static_cast(sizeof(T) * 8 - 1 - lzcnt(value)); } template inline T log2_ceil(T value) { return static_cast(sizeof(T) * 8 - lzcnt(static_cast(value - 1))); } template inline T rotate_left(T value, uint8_t shift) { return static_cast((T(value) << shift) | (T(value) >> ((sizeof(T) * 8) - shift))); } #if REX_ARCH_AMD64 template float m128_f32(const __m128& value) { float result; _mm_store_ss(&result, _mm_shuffle_ps(value, value, _MM_SHUFFLE(N, N, N, N))); return result; } template int32_t m128_i32(const __m128& value) { float result; _mm_store_ss(&result, _mm_shuffle_ps(value, value, _MM_SHUFFLE(N, N, N, N))); return std::bit_cast(result); } template double m128_f64(const __m128d& value) { double result; _mm_store_sd(&result, _mm_shuffle_pd(value, value, _MM_SHUFFLE2(N, N))); return result; } template double m128_f64(const __m128& value) { return m128_f64(_mm_castps_pd(value)); } template int64_t m128_i64(const __m128d& value) { double result; _mm_store_sd(&result, _mm_shuffle_pd(value, value, _MM_SHUFFLE2(N, N))); return std::bit_cast(result); } template int64_t m128_i64(const __m128& value) { return m128_i64(_mm_castps_pd(value)); } #endif inline uint16_t float_to_xenos_half( float value, bool preserve_denormal = false, bool round_to_nearest_even = false) { const uint32_t integer_value = std::bit_cast(value); const uint32_t abs_value = integer_value & 0x7FFFFFFFu; uint32_t result; if (abs_value >= 0x47FFE000u) { result = 0x7FFFu; } else { if (abs_value < 0x38800000u) { if (preserve_denormal) { const uint32_t shift = std::min(uint32_t(113u - (abs_value >> 23u)), uint32_t(24u)); result = (0x800000u | (abs_value & 0x7FFFFFu)) >> shift; } else { result = 0u; } } else { result = abs_value + 0xC8000000u; } if (round_to_nearest_even) { result += 0xFFFu + ((result >> 13u) & 1u); } result = (result >> 13u) & 0x7FFFu; } return static_cast(result | ((integer_value & 0x80000000u) >> 16u)); } inline float xenos_half_to_float(uint16_t value, bool preserve_denormal = false) { uint32_t mantissa = value & 0x3FFu; uint32_t exponent = (value >> 10u) & 0x1Fu; if (!exponent) { if (!preserve_denormal) { mantissa = 0; } else if (mantissa) { const uint32_t mantissa_lzcnt = rex::lzcnt(mantissa) - (32u - 11u); exponent = uint32_t(1 - int32_t(mantissa_lzcnt)); mantissa = (mantissa << mantissa_lzcnt) & 0x3FFu; } if (!mantissa) { exponent = uint32_t(-112); } } const uint32_t result = (uint32_t(value & 0x8000u) << 16u) | ((exponent + 112u) << 23u) | (mantissa << 13u); return std::bit_cast(result); } template inline T sat_add(T a, T b) { using unsigned_type = typename std::make_unsigned::type; unsigned_type result = unsigned_type(a) + unsigned_type(b); if (std::is_unsigned::value) { result |= unsigned_type(-static_cast::type>(result < unsigned_type(a))); } else { const unsigned_type overflowed = (unsigned_type(a) >> (sizeof(T) * 8 - 1)) + std::numeric_limits::max(); if (T((overflowed ^ unsigned_type(b)) | ~(unsigned_type(b) ^ result)) >= 0) { result = overflowed; } } return T(result); } template inline T sat_sub(T a, T b) { using unsigned_type = typename std::make_unsigned::type; unsigned_type result = unsigned_type(a) - unsigned_type(b); if (std::is_unsigned::value) { result &= unsigned_type(-static_cast::type>(result <= unsigned_type(a))); } else { const unsigned_type overflowed = (unsigned_type(a) >> (sizeof(T) * 8 - 1)) + std::numeric_limits::max(); if (T((overflowed ^ unsigned_type(b)) & (overflowed ^ result)) < 0) { result = overflowed; } } return T(result); } } // namespace rex