mirror of https://github.com/ollama/ollama
Merge 8466b205b8 into a013693f80
This commit is contained in:
commit
c0fb854ee4
|
|
@ -24,6 +24,9 @@ curl -fsSL https://ollama.com/install.sh | sh
|
|||
|
||||
[Manual install instructions](https://docs.ollama.com/linux#manual-install)
|
||||
|
||||
> [!TIP]
|
||||
> Running ROCm on UMA-only AMD APUs (e.g., Radeon 760M) requires staging additional ROCm runtime files and enabling experimental discovery flags. See [docs/rocm-apu.md](docs/rocm-apu.md) for the exact build and runtime steps used on this branch.
|
||||
|
||||
### Docker
|
||||
|
||||
The official [Ollama Docker image](https://hub.docker.com/r/ollama/ollama) `ollama/ollama` is available on Docker Hub.
|
||||
|
|
@ -32,6 +35,7 @@ The official [Ollama Docker image](https://hub.docker.com/r/ollama/ollama) `olla
|
|||
|
||||
- [ollama-python](https://github.com/ollama/ollama-python)
|
||||
- [ollama-js](https://github.com/ollama/ollama-js)
|
||||
- [Experimental ROCm iGPU Guide](docs/rocm-apu.md)
|
||||
|
||||
### Community
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,329 @@
|
|||
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
|
||||
# file Copyright.txt or https://cmake.org/licensing for details.
|
||||
|
||||
include(${CMAKE_ROOT}/Modules/CMakeDetermineCompiler.cmake)
|
||||
include(${CMAKE_ROOT}/Modules/CMakeParseImplicitLinkInfo.cmake)
|
||||
include(${CMAKE_ROOT}/Modules/CMakeParseLibraryArchitecture.cmake)
|
||||
|
||||
if(NOT ((CMAKE_GENERATOR MATCHES "Make") OR
|
||||
(CMAKE_GENERATOR MATCHES "Ninja")))
|
||||
message(FATAL_ERROR "HIP language not currently supported by \"${CMAKE_GENERATOR}\" generator")
|
||||
endif()
|
||||
|
||||
if(NOT CMAKE_HIP_PLATFORM)
|
||||
execute_process(COMMAND hipconfig --platform
|
||||
OUTPUT_VARIABLE _CMAKE_HIPCONFIG_PLATFORM OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
RESULT_VARIABLE _CMAKE_HIPCONFIG_RESULT
|
||||
)
|
||||
if(_CMAKE_HIPCONFIG_RESULT EQUAL 0 AND _CMAKE_HIPCONFIG_PLATFORM MATCHES "^(nvidia|nvcc)$")
|
||||
set(CMAKE_HIP_PLATFORM "nvidia" CACHE STRING "HIP platform" FORCE)
|
||||
else()
|
||||
set(CMAKE_HIP_PLATFORM "amd" CACHE STRING "HIP platform" FORCE)
|
||||
endif()
|
||||
endif()
|
||||
if(NOT CMAKE_HIP_PLATFORM MATCHES "^(amd|nvidia)$")
|
||||
message(FATAL_ERROR
|
||||
"The CMAKE_HIP_PLATFORM has unsupported value:\n"
|
||||
" '${CMAKE_HIP_PLATFORM}'\n"
|
||||
"It must be 'amd' or 'nvidia'."
|
||||
)
|
||||
endif()
|
||||
|
||||
if(NOT CMAKE_HIP_COMPILER)
|
||||
set(CMAKE_HIP_COMPILER_INIT NOTFOUND)
|
||||
|
||||
# prefer the environment variable HIPCXX
|
||||
if(NOT $ENV{HIPCXX} STREQUAL "")
|
||||
get_filename_component(CMAKE_HIP_COMPILER_INIT $ENV{HIPCXX} PROGRAM PROGRAM_ARGS CMAKE_HIP_FLAGS_ENV_INIT)
|
||||
if(CMAKE_HIP_FLAGS_ENV_INIT)
|
||||
set(CMAKE_HIP_COMPILER_ARG1 "${CMAKE_HIP_FLAGS_ENV_INIT}" CACHE STRING "Arguments to CXX compiler")
|
||||
endif()
|
||||
if(NOT EXISTS ${CMAKE_HIP_COMPILER_INIT})
|
||||
message(FATAL_ERROR "Could not find compiler set in environment variable HIPCXX:\n$ENV{HIPCXX}.\n${CMAKE_HIP_COMPILER_INIT}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# finally list compilers to try
|
||||
if(NOT CMAKE_HIP_COMPILER_INIT)
|
||||
if(CMAKE_HIP_PLATFORM STREQUAL "nvidia")
|
||||
set(CMAKE_HIP_COMPILER_LIST nvcc)
|
||||
elseif(CMAKE_HIP_PLATFORM STREQUAL "amd")
|
||||
set(CMAKE_HIP_COMPILER_LIST clang++)
|
||||
|
||||
# Look for the Clang coming with ROCm to support HIP.
|
||||
execute_process(COMMAND hipconfig --hipclangpath
|
||||
OUTPUT_VARIABLE _CMAKE_HIPCONFIG_CLANGPATH
|
||||
RESULT_VARIABLE _CMAKE_HIPCONFIG_RESULT
|
||||
)
|
||||
if(_CMAKE_HIPCONFIG_RESULT EQUAL 0 AND EXISTS "${_CMAKE_HIPCONFIG_CLANGPATH}")
|
||||
set(CMAKE_HIP_COMPILER_HINTS "${_CMAKE_HIPCONFIG_CLANGPATH}")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
_cmake_find_compiler(HIP)
|
||||
else()
|
||||
_cmake_find_compiler_path(HIP)
|
||||
endif()
|
||||
|
||||
mark_as_advanced(CMAKE_HIP_COMPILER)
|
||||
|
||||
# Build a small source file to identify the compiler.
|
||||
if(NOT CMAKE_HIP_COMPILER_ID_RUN)
|
||||
set(CMAKE_HIP_COMPILER_ID_RUN 1)
|
||||
|
||||
include(${CMAKE_ROOT}/Modules/CMakeDetermineCompilerId.cmake)
|
||||
|
||||
# We determine the vendor to use the right flags for detection right away.
|
||||
# The main compiler identification is still needed below to extract other information.
|
||||
list(APPEND CMAKE_HIP_COMPILER_ID_VENDORS NVIDIA Clang)
|
||||
set(CMAKE_HIP_COMPILER_ID_VENDOR_REGEX_NVIDIA "nvcc: NVIDIA \\(R\\) Cuda compiler driver")
|
||||
set(CMAKE_HIP_COMPILER_ID_VENDOR_REGEX_Clang "(clang version)")
|
||||
CMAKE_DETERMINE_COMPILER_ID_VENDOR(HIP "--version")
|
||||
|
||||
if(CMAKE_HIP_COMPILER_ID STREQUAL "NVIDIA")
|
||||
# Find the CUDA toolkit to get:
|
||||
# - CMAKE_HIP_COMPILER_CUDA_TOOLKIT_VERSION
|
||||
# - CMAKE_HIP_COMPILER_CUDA_TOOLKIT_ROOT
|
||||
# - CMAKE_HIP_COMPILER_CUDA_LIBRARY_ROOT
|
||||
# We save them in CMakeHIPCompiler.cmake.
|
||||
# Match arguments with cmake_cuda_architectures_all call.
|
||||
include(Internal/CMakeCUDAFindToolkit)
|
||||
cmake_cuda_find_toolkit(HIP CMAKE_HIP_COMPILER_CUDA_)
|
||||
|
||||
# If the user set CMAKE_HIP_ARCHITECTURES, validate its value.
|
||||
include(Internal/CMakeCUDAArchitecturesValidate)
|
||||
cmake_cuda_architectures_validate(HIP)
|
||||
|
||||
if(NOT CMAKE_HIP_HOST_COMPILER AND NOT $ENV{HIPHOSTCXX} STREQUAL "")
|
||||
get_filename_component(CMAKE_HIP_HOST_COMPILER $ENV{HIPHOSTCXX} PROGRAM)
|
||||
if(NOT EXISTS "${CMAKE_HIP_HOST_COMPILER}")
|
||||
message(FATAL_ERROR "Could not find compiler set in environment variable HIPHOSTCXX:\n$ENV{HIPHOSTCXX}.\n${CMAKE_HIP_HOST_COMPILER}")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(CMAKE_HIP_COMPILER_ID STREQUAL "Clang")
|
||||
list(APPEND CMAKE_HIP_COMPILER_ID_TEST_FLAGS_FIRST "-v")
|
||||
elseif(CMAKE_HIP_COMPILER_ID STREQUAL "NVIDIA")
|
||||
# Tell nvcc to treat .hip files as CUDA sources.
|
||||
list(APPEND CMAKE_HIP_COMPILER_ID_TEST_FLAGS_FIRST "-x cu -v")
|
||||
if(CMAKE_HIP_HOST_COMPILER)
|
||||
string(APPEND CMAKE_HIP_COMPILER_ID_TEST_FLAGS_FIRST " -ccbin=\"${CMAKE_HIP_HOST_COMPILER}\"")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# We perform compiler identification for a second time to extract implicit linking info.
|
||||
# We need to unset the compiler ID otherwise CMAKE_DETERMINE_COMPILER_ID() doesn't work.
|
||||
set(CMAKE_HIP_COMPILER_ID)
|
||||
set(CMAKE_HIP_PLATFORM_ID)
|
||||
file(READ ${CMAKE_ROOT}/Modules/CMakePlatformId.h.in
|
||||
CMAKE_HIP_COMPILER_ID_PLATFORM_CONTENT)
|
||||
|
||||
CMAKE_DETERMINE_COMPILER_ID(HIP HIPFLAGS CMakeHIPCompilerId.hip)
|
||||
|
||||
if(CMAKE_HIP_COMPILER_ID STREQUAL "NVIDIA")
|
||||
include(Internal/CMakeCUDAArchitecturesAll)
|
||||
# From CMAKE_HIP_COMPILER_CUDA_TOOLKIT_VERSION and CMAKE_HIP_COMPILER_{ID,VERSION}, get:
|
||||
# - CMAKE_HIP_ARCHITECTURES_ALL
|
||||
# - CMAKE_HIP_ARCHITECTURES_ALL_MAJOR
|
||||
# Match arguments with cmake_cuda_find_toolkit call.
|
||||
cmake_cuda_architectures_all(HIP CMAKE_HIP_COMPILER_CUDA_)
|
||||
endif()
|
||||
|
||||
_cmake_find_compiler_sysroot(HIP)
|
||||
endif()
|
||||
|
||||
if(NOT CMAKE_HIP_COMPILER_ROCM_ROOT AND CMAKE_HIP_COMPILER_ID STREQUAL "Clang")
|
||||
execute_process(COMMAND "${CMAKE_HIP_COMPILER}" -v -print-targets
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
RESULT_VARIABLE _CMAKE_HIP_COMPILER_RESULT
|
||||
OUTPUT_VARIABLE _CMAKE_HIP_COMPILER_STDOUT
|
||||
ERROR_VARIABLE _CMAKE_HIP_COMPILER_STDERR
|
||||
)
|
||||
|
||||
if(_CMAKE_HIP_COMPILER_RESULT EQUAL 0 AND _CMAKE_HIP_COMPILER_STDERR MATCHES "Found HIP installation: *([^,]*)[,\n]")
|
||||
set(CMAKE_HIP_COMPILER_ROCM_ROOT "${CMAKE_MATCH_1}")
|
||||
file(TO_CMAKE_PATH "${CMAKE_HIP_COMPILER_ROCM_ROOT}" CMAKE_HIP_COMPILER_ROCM_ROOT)
|
||||
endif()
|
||||
endif()
|
||||
if(NOT CMAKE_HIP_COMPILER_ROCM_ROOT)
|
||||
execute_process(
|
||||
COMMAND hipconfig --rocmpath
|
||||
OUTPUT_VARIABLE _CMAKE_HIPCONFIG_ROCMPATH
|
||||
RESULT_VARIABLE _CMAKE_HIPCONFIG_RESULT
|
||||
)
|
||||
if(_CMAKE_HIPCONFIG_RESULT EQUAL 0 AND EXISTS "${_CMAKE_HIPCONFIG_ROCMPATH}")
|
||||
set(CMAKE_HIP_COMPILER_ROCM_ROOT "${_CMAKE_HIPCONFIG_ROCMPATH}")
|
||||
endif()
|
||||
endif()
|
||||
if(NOT CMAKE_HIP_COMPILER_ROCM_ROOT)
|
||||
message(FATAL_ERROR "Failed to find ROCm root directory.")
|
||||
endif()
|
||||
|
||||
if(CMAKE_HIP_PLATFORM STREQUAL "amd")
|
||||
# For this platform we need the hip-lang cmake package.
|
||||
|
||||
# Normally implicit link information is not detected until ABI detection,
|
||||
# but we need to populate CMAKE_HIP_LIBRARY_ARCHITECTURE to find hip-lang.
|
||||
cmake_parse_implicit_link_info("${CMAKE_HIP_COMPILER_PRODUCED_OUTPUT}"
|
||||
_CMAKE_HIP_COMPILER_ID_IMPLICIT_LIBS
|
||||
_CMAKE_HIP_COMPILER_ID_IMPLICIT_DIRS
|
||||
_CMAKE_HIP_COMPILER_ID_IMPLICIT_FWKS
|
||||
_CMAKE_HIP_COMPILER_ID_IMPLICIT_LOG
|
||||
"" LANGUAGE HIP)
|
||||
message(CONFIGURE_LOG
|
||||
"Parsed HIP implicit link information from compiler id output:\n${_CMAKE_HIP_COMPILER_ID_IMPLICIT_LOG}\n\n")
|
||||
cmake_parse_library_architecture(HIP "${_CMAKE_HIP_COMPILER_ID_IMPLICIT_DIRS}" "" CMAKE_HIP_LIBRARY_ARCHITECTURE)
|
||||
if(CMAKE_HIP_LIBRARY_ARCHITECTURE)
|
||||
message(CONFIGURE_LOG
|
||||
"Parsed HIP library architecture from compiler id output: ${CMAKE_HIP_LIBRARY_ARCHITECTURE}\n")
|
||||
endif()
|
||||
unset(_CMAKE_HIP_COMPILER_ID_IMPLICIT_LIBS)
|
||||
unset(_CMAKE_HIP_COMPILER_ID_IMPLICIT_DIRS)
|
||||
unset(_CMAKE_HIP_COMPILER_ID_IMPLICIT_FWKS)
|
||||
unset(_CMAKE_HIP_COMPILER_ID_IMPLICIT_LOG)
|
||||
|
||||
if(NOT CMAKE_HIP_COMPILER_ROCM_LIB)
|
||||
set(_CMAKE_HIP_COMPILER_ROCM_LIB_DIRS
|
||||
"${CMAKE_HIP_COMPILER_ROCM_ROOT}/lib"
|
||||
"${CMAKE_HIP_COMPILER_ROCM_ROOT}/lib64"
|
||||
)
|
||||
if(CMAKE_HIP_LIBRARY_ARCHITECTURE)
|
||||
list(APPEND _CMAKE_HIP_COMPILER_ROCM_LIB_DIRS "${CMAKE_HIP_COMPILER_ROCM_ROOT}/lib/${CMAKE_HIP_LIBRARY_ARCHITECTURE}")
|
||||
endif()
|
||||
foreach(dir IN LISTS _CMAKE_HIP_COMPILER_ROCM_LIB_DIRS)
|
||||
if(EXISTS "${dir}/cmake/hip-lang/hip-lang-config.cmake")
|
||||
set(CMAKE_HIP_COMPILER_ROCM_LIB "${dir}")
|
||||
break()
|
||||
endif()
|
||||
endforeach()
|
||||
if(NOT CMAKE_HIP_COMPILER_ROCM_LIB)
|
||||
list(TRANSFORM _CMAKE_HIP_COMPILER_ROCM_LIB_DIRS APPEND "/cmake/hip-lang/hip-lang-config.cmake")
|
||||
string(REPLACE ";" "\n " _CMAKE_HIP_COMPILER_ROCM_LIB_DIRS "${_CMAKE_HIP_COMPILER_ROCM_LIB_DIRS}")
|
||||
message(FATAL_ERROR
|
||||
"The ROCm root directory:\n"
|
||||
" ${CMAKE_HIP_COMPILER_ROCM_ROOT}\n"
|
||||
"does not contain the HIP runtime CMake package, expected at one of:\n"
|
||||
" ${_CMAKE_HIP_COMPILER_ROCM_LIB_DIRS}\n"
|
||||
)
|
||||
endif()
|
||||
unset(_CMAKE_HIP_COMPILER_ROCM_LIB_DIRS)
|
||||
endif()
|
||||
if(NOT DEFINED CMAKE_SIZEOF_VOID_P)
|
||||
# We have not yet determined the target ABI but we need 'find_package' to
|
||||
# search lib64 directories to find hip-lang CMake package dependencies.
|
||||
# This will be replaced by ABI detection later.
|
||||
set(CMAKE_HIP_SIZEOF_DATA_PTR 8)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (NOT _CMAKE_TOOLCHAIN_LOCATION)
|
||||
get_filename_component(_CMAKE_TOOLCHAIN_LOCATION "${CMAKE_HIP_COMPILER}" PATH)
|
||||
endif ()
|
||||
|
||||
set(_CMAKE_PROCESSING_LANGUAGE "HIP")
|
||||
include(CMakeFindBinUtils)
|
||||
include(Compiler/${CMAKE_HIP_COMPILER_ID}-FindBinUtils OPTIONAL)
|
||||
unset(_CMAKE_PROCESSING_LANGUAGE)
|
||||
|
||||
if(CMAKE_HIP_COMPILER_ID STREQUAL "Clang")
|
||||
set(CMAKE_HIP_RUNTIME_LIBRARY_DEFAULT "SHARED")
|
||||
elseif(CMAKE_HIP_COMPILER_ID STREQUAL "NVIDIA")
|
||||
include(Internal/CMakeNVCCParseImplicitInfo)
|
||||
# Parse CMAKE_HIP_COMPILER_PRODUCED_OUTPUT to get:
|
||||
# - CMAKE_HIP_ARCHITECTURES_DEFAULT
|
||||
# - CMAKE_HIP_HOST_IMPLICIT_LINK_DIRECTORIES
|
||||
# - CMAKE_HIP_HOST_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES
|
||||
# - CMAKE_HIP_HOST_IMPLICIT_LINK_LIBRARIES
|
||||
# - CMAKE_HIP_HOST_LINK_LAUNCHER
|
||||
# - CMAKE_HIP_RUNTIME_LIBRARY_DEFAULT
|
||||
# - CMAKE_HIP_CUDA_TOOLKIT_INCLUDE_DIRECTORIES
|
||||
# Match arguments with cmake_nvcc_filter_implicit_info call in CMakeTestHIPCompiler.
|
||||
cmake_nvcc_parse_implicit_info(HIP CMAKE_HIP_CUDA_)
|
||||
|
||||
include(Internal/CMakeCUDAFilterImplicitLibs)
|
||||
# Filter out implicit link libraries that should not be passed unconditionally.
|
||||
cmake_cuda_filter_implicit_libs(CMAKE_HIP_HOST_IMPLICIT_LINK_LIBRARIES)
|
||||
endif()
|
||||
|
||||
if(CMAKE_HIP_COMPILER_SYSROOT)
|
||||
string(CONCAT _SET_CMAKE_HIP_COMPILER_SYSROOT
|
||||
"set(CMAKE_HIP_COMPILER_SYSROOT \"${CMAKE_HIP_COMPILER_SYSROOT}\")\n"
|
||||
"set(CMAKE_COMPILER_SYSROOT \"${CMAKE_HIP_COMPILER_SYSROOT}\")")
|
||||
else()
|
||||
set(_SET_CMAKE_HIP_COMPILER_SYSROOT "")
|
||||
endif()
|
||||
|
||||
if(CMAKE_HIP_COMPILER_ARCHITECTURE_ID)
|
||||
set(_SET_CMAKE_HIP_COMPILER_ARCHITECTURE_ID
|
||||
"set(CMAKE_HIP_COMPILER_ARCHITECTURE_ID ${CMAKE_HIP_COMPILER_ARCHITECTURE_ID})")
|
||||
else()
|
||||
set(_SET_CMAKE_HIP_COMPILER_ARCHITECTURE_ID "")
|
||||
endif()
|
||||
|
||||
if(MSVC_HIP_ARCHITECTURE_ID)
|
||||
set(SET_MSVC_HIP_ARCHITECTURE_ID
|
||||
"set(MSVC_HIP_ARCHITECTURE_ID ${MSVC_HIP_ARCHITECTURE_ID})")
|
||||
endif()
|
||||
|
||||
if(CMAKE_HIP_COMPILER_ID STREQUAL "NVIDIA")
|
||||
if(NOT "$ENV{CUDAARCHS}" STREQUAL "")
|
||||
set(CMAKE_HIP_ARCHITECTURES "$ENV{CUDAARCHS}" CACHE STRING "CUDA architectures")
|
||||
endif()
|
||||
|
||||
# If the user did not set CMAKE_HIP_ARCHITECTURES, use the compiler's default.
|
||||
if("${CMAKE_HIP_ARCHITECTURES}" STREQUAL "")
|
||||
set(CMAKE_HIP_ARCHITECTURES "${CMAKE_HIP_ARCHITECTURES_DEFAULT}" CACHE STRING "HIP architectures" FORCE)
|
||||
if(NOT CMAKE_HIP_ARCHITECTURES)
|
||||
message(FATAL_ERROR "Failed to detect a default HIP architecture.\n\nCompiler output:\n${CMAKE_HIP_COMPILER_PRODUCED_OUTPUT}")
|
||||
endif()
|
||||
endif()
|
||||
unset(CMAKE_HIP_ARCHITECTURES_DEFAULT)
|
||||
elseif(NOT DEFINED CMAKE_HIP_ARCHITECTURES)
|
||||
# Use 'rocm_agent_enumerator' to get the current GPU architecture.
|
||||
set(_CMAKE_HIP_ARCHITECTURES)
|
||||
find_program(_CMAKE_HIP_ROCM_AGENT_ENUMERATOR
|
||||
NAMES rocm_agent_enumerator
|
||||
HINTS "${CMAKE_HIP_COMPILER_ROCM_ROOT}/bin"
|
||||
NO_CACHE)
|
||||
if(_CMAKE_HIP_ROCM_AGENT_ENUMERATOR)
|
||||
execute_process(COMMAND "${_CMAKE_HIP_ROCM_AGENT_ENUMERATOR}" -t GPU
|
||||
RESULT_VARIABLE _CMAKE_ROCM_AGENT_ENUMERATOR_RESULT
|
||||
OUTPUT_VARIABLE _CMAKE_ROCM_AGENT_ENUMERATOR_STDOUT
|
||||
ERROR_VARIABLE _CMAKE_ROCM_AGENT_ENUMERATOR_STDERR
|
||||
)
|
||||
if(_CMAKE_ROCM_AGENT_ENUMERATOR_RESULT EQUAL 0)
|
||||
separate_arguments(_hip_archs NATIVE_COMMAND "${_CMAKE_ROCM_AGENT_ENUMERATOR_STDOUT}")
|
||||
foreach(_hip_arch ${_hip_archs})
|
||||
if(_hip_arch STREQUAL "gfx000")
|
||||
continue()
|
||||
endif()
|
||||
string(FIND ${_hip_arch} ":" pos)
|
||||
if(NOT pos STREQUAL "-1")
|
||||
string(SUBSTRING ${_hip_arch} 0 ${pos} _hip_arch)
|
||||
endif()
|
||||
list(APPEND _CMAKE_HIP_ARCHITECTURES "${_hip_arch}")
|
||||
endforeach()
|
||||
endif()
|
||||
unset(_CMAKE_ROCM_AGENT_ENUMERATOR_RESULT)
|
||||
unset(_CMAKE_ROCM_AGENT_ENUMERATOR_STDOUT)
|
||||
unset(_CMAKE_ROCM_AGENT_ENUMERATOR_STDERR)
|
||||
endif()
|
||||
unset(_CMAKE_HIP_ROCM_AGENT_ENUMERATOR)
|
||||
if(_CMAKE_HIP_ARCHITECTURES)
|
||||
set(CMAKE_HIP_ARCHITECTURES "${_CMAKE_HIP_ARCHITECTURES}" CACHE STRING "HIP architectures")
|
||||
elseif(CMAKE_HIP_COMPILER_PRODUCED_OUTPUT MATCHES " -target-cpu ([a-z0-9]+) ")
|
||||
set(CMAKE_HIP_ARCHITECTURES "${CMAKE_MATCH_1}" CACHE STRING "HIP architectures")
|
||||
else()
|
||||
message(FATAL_ERROR "Failed to find a default HIP architecture.")
|
||||
endif()
|
||||
unset(_CMAKE_HIP_ARCHITECTURES)
|
||||
endif()
|
||||
|
||||
# configure variables set in this file for fast reload later on
|
||||
configure_file(${CMAKE_ROOT}/Modules/CMakeHIPCompiler.cmake.in
|
||||
${CMAKE_PLATFORM_INFO_DIR}/CMakeHIPCompiler.cmake
|
||||
@ONLY
|
||||
)
|
||||
set(CMAKE_HIP_COMPILER_ENV_VAR "HIPCXX")
|
||||
File diff suppressed because one or more lines are too long
|
|
@ -0,0 +1 @@
|
|||
Error: listen tcp 127.0.0.1:11434: bind: address already in use
|
||||
|
|
@ -0,0 +1,92 @@
|
|||
# Experimental ROCm iGPU Support
|
||||
|
||||
This branch adds a ROCm backend path geared toward AMD APUs that only expose a small VRAM aperture but share a large UMA pool with the CPU. The steps below outline how to reproduce the build and how to run Ollama with the staged ROCm runtime.
|
||||
|
||||
> **Warning**
|
||||
> Upstream ROCm does not officially support these APUs yet. Expect driver updates, kernel parameters, or environment variables such as `HSA_OVERRIDE_GFX_VERSION` to change between releases.
|
||||
|
||||
## 1. Stage the ROCm runtime
|
||||
|
||||
We avoid touching the system installation by unpacking the required RPMs into `build/rocm-stage`.
|
||||
|
||||
```bash
|
||||
mkdir -p build/rocm-stage build/rpm-tmp
|
||||
cd build/rpm-tmp
|
||||
dnf download \
|
||||
hipblas hipblas-devel hipblas-common-devel \
|
||||
rocblas rocblas-devel \
|
||||
rocsolver rocsolver-devel \
|
||||
rocm-hip-devel rocm-device-libs rocm-comgr rocm-comgr-devel
|
||||
|
||||
cd ../rocm-stage
|
||||
for rpm in ../rpm-tmp/*.rpm; do
|
||||
echo "extracting ${rpm}"
|
||||
rpm2cpio "${rpm}" | bsdtar -xf -
|
||||
done
|
||||
```
|
||||
|
||||
Important staged paths after extraction:
|
||||
|
||||
| Purpose | Location |
|
||||
| ------------------------ | ----------------------------------------------- |
|
||||
| HIP/rocBLAS libraries | `build/rocm-stage/lib64` |
|
||||
| Tensile kernels (rocBLAS)| `build/rocm-stage/lib64/rocblas/library` |
|
||||
| Headers (`hip`, `rocblas`)| `build/rocm-stage/include` |
|
||||
|
||||
## 2. Build the ROCm backend
|
||||
|
||||
Configure CMake with the preset that targets ROCm 6.x and point it at the staged HIP compiler:
|
||||
|
||||
```bash
|
||||
cmake --preset "ROCm 6" -B build/rocm \
|
||||
-DGGML_VULKAN=OFF \
|
||||
-DCMAKE_INSTALL_PREFIX=/usr/local \
|
||||
-DCMAKE_HIP_COMPILER=/usr/bin/hipcc \
|
||||
-DCMAKE_PREFIX_PATH="$PWD/build/rocm-stage"
|
||||
|
||||
cmake --build build/rocm --target ggml-hip -j$(nproc)
|
||||
```
|
||||
|
||||
Artifacts land in `build/lib/ollama/rocm` (and mirrored in `dist/lib/ollama/rocm` when packaging). These include `libggml-hip.so`, CPU fallback variants, Vulkan, and `librocsolver.so`.
|
||||
|
||||
## 3. Run Ollama on ROCm
|
||||
|
||||
The runner needs to see both the GGML plugins and the staged ROCm runtime. The following environment block works for an AMD Radeon 760M with a UMA carve-out:
|
||||
|
||||
```bash
|
||||
export BASE=$HOME/ollama-gpu
|
||||
export OLLAMA_LIBRARY_PATH=$BASE/build/lib/ollama/rocm:$BASE/build/lib/ollama
|
||||
export LD_LIBRARY_PATH=$OLLAMA_LIBRARY_PATH:$BASE/build/rocm-stage/lib64:${LD_LIBRARY_PATH:-}
|
||||
export ROCBLAS_TENSILE_LIBPATH=$BASE/build/rocm-stage/lib64/rocblas/library
|
||||
export ROCBLAS_TENSILE_PATH=$ROCBLAS_TENSILE_LIBPATH
|
||||
|
||||
export HSA_OVERRIDE_GFX_VERSION=11.0.0 # spoof gfx1100 for Phoenix
|
||||
export GGML_HIP_FORCE_GTT=1 # force GTT allocations for UMA memory
|
||||
export OLLAMA_GPU_DRIVER=rocm
|
||||
export OLLAMA_GPU=100 # opt into GPU-only scheduling
|
||||
export OLLAMA_LLM_LIBRARY=rocm # skip CUDA/Vulkan discovery noise
|
||||
export OLLAMA_VULKAN=0 # optional: suppress Vulkan backend
|
||||
|
||||
$BASE/build/ollama serve
|
||||
```
|
||||
|
||||
On launch you should see log lines similar to:
|
||||
|
||||
```
|
||||
library=ROCm compute=gfx1100 name=ROCm0 description="AMD Radeon 760M Graphics"
|
||||
ggml_hip_get_device_memory using GTT memory for 0000:0e:00.0 (total=16352354304 free=15034097664)
|
||||
```
|
||||
|
||||
If the runner crashes before enumerating devices:
|
||||
|
||||
- Double-check that `ROCBLAS_TENSILE_LIBPATH` points to the staged `rocblas/library`.
|
||||
- Ensure no other `LD_LIBRARY_PATH` entries override `libamdhip64.so`.
|
||||
- Try unsetting `HSA_OVERRIDE_GFX_VERSION` to confirm whether the kernel patch is still needed on your system.
|
||||
|
||||
> Example discovery + run log: [`docs/logs/rocm-760m-run.log`](logs/rocm-760m-run.log). The matching `curl` response is saved as [`docs/logs/rocm-760m-run-response.json`](logs/rocm-760m-run-response.json).
|
||||
|
||||
## 4. Sharing this build
|
||||
|
||||
- Keep the staged RPMs alongside the branch so others can reproduce the exact runtime.
|
||||
- Include `/tmp/ollama_rocm_run.log` or similar discovery logs in issues/PRs to help maintainers understand the UMA setup.
|
||||
- Mention any kernel parameters (e.g., large UMA buffer in firmware) when opening upstream tickets.
|
||||
|
|
@ -360,6 +360,36 @@ func StartRunner(ollamaEngine bool, modelPath string, gpuLibs []string, out io.W
|
|||
|
||||
// Note: we always put our dependency paths first
|
||||
// since these are the exact version we compiled/linked against
|
||||
userLibs := []string{}
|
||||
if existing, ok := os.LookupEnv("OLLAMA_LIBRARY_PATH"); ok && existing != "" {
|
||||
userLibs = filepath.SplitList(existing)
|
||||
}
|
||||
if len(userLibs) != 0 {
|
||||
seen := make(map[string]struct{}, len(userLibs)+len(gpuLibs))
|
||||
merged := make([]string, 0, len(userLibs)+len(gpuLibs))
|
||||
for _, dir := range userLibs {
|
||||
if dir == "" {
|
||||
continue
|
||||
}
|
||||
if _, ok := seen[dir]; ok {
|
||||
continue
|
||||
}
|
||||
seen[dir] = struct{}{}
|
||||
merged = append(merged, dir)
|
||||
}
|
||||
for _, dir := range gpuLibs {
|
||||
if dir == "" {
|
||||
continue
|
||||
}
|
||||
if _, ok := seen[dir]; ok {
|
||||
continue
|
||||
}
|
||||
seen[dir] = struct{}{}
|
||||
merged = append(merged, dir)
|
||||
}
|
||||
gpuLibs = merged
|
||||
}
|
||||
|
||||
libraryPaths := append([]string{}, gpuLibs...)
|
||||
if libraryPath, ok := os.LookupEnv(pathEnv); ok {
|
||||
libraryPaths = append(libraryPaths, filepath.SplitList(libraryPath)...)
|
||||
|
|
|
|||
|
|
@ -1,5 +1,107 @@
|
|||
#pragma once
|
||||
|
||||
#include <cmath>
|
||||
#include <type_traits>
|
||||
|
||||
#if defined(__HIP_PLATFORM_AMD__)
|
||||
#include <hip/amd_detail/device_library_decls.h>
|
||||
#include <hip/amd_detail/math_fwd.h>
|
||||
|
||||
static __host__ __device__ inline float ggml_hip_max_f32(float a, float b) {
|
||||
#if defined(__HIP_DEVICE_COMPILE__)
|
||||
return __ocml_fmax_f32(a, b);
|
||||
#else
|
||||
return a > b ? a : b;
|
||||
#endif
|
||||
}
|
||||
|
||||
static __host__ __device__ inline double ggml_hip_max_f64(double a, double b) {
|
||||
#if defined(__HIP_DEVICE_COMPILE__)
|
||||
return __ocml_fmax_f64(a, b);
|
||||
#else
|
||||
return a > b ? a : b;
|
||||
#endif
|
||||
}
|
||||
|
||||
static __host__ __device__ inline float ggml_hip_min_f32(float a, float b) {
|
||||
#if defined(__HIP_DEVICE_COMPILE__)
|
||||
return __ocml_fmin_f32(a, b);
|
||||
#else
|
||||
return a < b ? a : b;
|
||||
#endif
|
||||
}
|
||||
|
||||
static __host__ __device__ inline double ggml_hip_min_f64(double a, double b) {
|
||||
#if defined(__HIP_DEVICE_COMPILE__)
|
||||
return __ocml_fmin_f64(a, b);
|
||||
#else
|
||||
return a < b ? a : b;
|
||||
#endif
|
||||
}
|
||||
|
||||
static __host__ __device__ inline float ggml_hip_pow_f32(float base, float exp) {
|
||||
#if defined(__HIP_DEVICE_COMPILE__)
|
||||
return __ocml_pow_f32(base, exp);
|
||||
#else
|
||||
return std::pow(base, exp);
|
||||
#endif
|
||||
}
|
||||
|
||||
static __host__ __device__ inline double ggml_hip_pow_f64(double base, double exp) {
|
||||
#if defined(__HIP_DEVICE_COMPILE__)
|
||||
return __ocml_pow_f64(base, exp);
|
||||
#else
|
||||
return std::pow(base, exp);
|
||||
#endif
|
||||
}
|
||||
|
||||
template <typename A, typename B,
|
||||
typename std::enable_if<std::is_integral<A>::value && std::is_integral<B>::value, int>::type = 0>
|
||||
__host__ __device__ inline typename std::common_type<A, B>::type ggml_hip_int_max(A a, B b) {
|
||||
using R = typename std::common_type<A, B>::type;
|
||||
const R aa = static_cast<R>(a);
|
||||
const R bb = static_cast<R>(b);
|
||||
return aa > bb ? aa : bb;
|
||||
}
|
||||
|
||||
template <typename A, typename B,
|
||||
typename std::enable_if<std::is_integral<A>::value && std::is_integral<B>::value, int>::type = 0>
|
||||
__host__ __device__ inline typename std::common_type<A, B>::type ggml_hip_int_min(A a, B b) {
|
||||
using R = typename std::common_type<A, B>::type;
|
||||
const R aa = static_cast<R>(a);
|
||||
const R bb = static_cast<R>(b);
|
||||
return aa < bb ? aa : bb;
|
||||
}
|
||||
|
||||
__host__ __device__ inline float max(float a, float b) {
|
||||
return ggml_hip_max_f32(a, b);
|
||||
}
|
||||
|
||||
__host__ __device__ inline double max(double a, double b) {
|
||||
return ggml_hip_max_f64(a, b);
|
||||
}
|
||||
|
||||
template <typename A, typename B,
|
||||
typename std::enable_if<std::is_integral<A>::value && std::is_integral<B>::value, int>::type = 0>
|
||||
__host__ __device__ inline typename std::common_type<A, B>::type max(A a, B b) {
|
||||
return ggml_hip_int_max(a, b);
|
||||
}
|
||||
|
||||
__host__ __device__ inline float min(float a, float b) {
|
||||
return ggml_hip_min_f32(a, b);
|
||||
}
|
||||
|
||||
__host__ __device__ inline double min(double a, double b) {
|
||||
return ggml_hip_min_f64(a, b);
|
||||
}
|
||||
|
||||
template <typename A, typename B,
|
||||
typename std::enable_if<std::is_integral<A>::value && std::is_integral<B>::value, int>::type = 0>
|
||||
__host__ __device__ inline typename std::common_type<A, B>::type min(A a, B b) {
|
||||
return ggml_hip_int_min(a, b);
|
||||
}
|
||||
#endif // defined(__HIP_PLATFORM_AMD__)
|
||||
|
||||
#define HIP_DISABLE_WARP_SYNC_BUILTINS 1
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <hipblas/hipblas.h>
|
||||
|
|
@ -8,6 +110,232 @@
|
|||
// for rocblas_initialize()
|
||||
#include "rocblas/rocblas.h"
|
||||
|
||||
#if defined(__HIP_PLATFORM_AMD__)
|
||||
#undef fmaxf
|
||||
#define fmaxf(a, b) ggml_hip_max_f32((a), (b))
|
||||
#undef fminf
|
||||
#define fminf(a, b) ggml_hip_min_f32((a), (b))
|
||||
#undef powf
|
||||
#define powf(a, b) ggml_hip_pow_f32((a), (b))
|
||||
|
||||
static __host__ __device__ inline float ggml_hip_expf(float x) {
|
||||
#if defined(__HIP_DEVICE_COMPILE__)
|
||||
return __ocml_exp_f32(x);
|
||||
#else
|
||||
using std::exp;
|
||||
return static_cast<float>(exp(x));
|
||||
#endif
|
||||
}
|
||||
#undef expf
|
||||
#define expf(x) ggml_hip_expf((x))
|
||||
|
||||
static __host__ __device__ inline float ggml_hip_expm1f(float x) {
|
||||
#if defined(__HIP_DEVICE_COMPILE__)
|
||||
return __ocml_expm1_f32(x);
|
||||
#else
|
||||
using std::expm1;
|
||||
return static_cast<float>(expm1(x));
|
||||
#endif
|
||||
}
|
||||
#undef expm1f
|
||||
#define expm1f(x) ggml_hip_expm1f((x))
|
||||
|
||||
static __host__ __device__ inline float ggml_hip_logf(float x) {
|
||||
#if defined(__HIP_DEVICE_COMPILE__)
|
||||
return __ocml_log_f32(x);
|
||||
#else
|
||||
using std::log;
|
||||
return static_cast<float>(log(x));
|
||||
#endif
|
||||
}
|
||||
#undef logf
|
||||
#define logf(x) ggml_hip_logf((x))
|
||||
|
||||
static __host__ __device__ inline float ggml_hip_log1pf(float x) {
|
||||
#if defined(__HIP_DEVICE_COMPILE__)
|
||||
return __ocml_log1p_f32(x);
|
||||
#else
|
||||
using std::log1p;
|
||||
return static_cast<float>(log1p(x));
|
||||
#endif
|
||||
}
|
||||
#undef log1pf
|
||||
#define log1pf(x) ggml_hip_log1pf((x))
|
||||
|
||||
static __host__ __device__ inline float ggml_hip_log2f(float x) {
|
||||
#if defined(__HIP_DEVICE_COMPILE__)
|
||||
return __ocml_log2_f32(x);
|
||||
#else
|
||||
using std::log2;
|
||||
return static_cast<float>(log2(x));
|
||||
#endif
|
||||
}
|
||||
#undef log2f
|
||||
#define log2f(x) ggml_hip_log2f((x))
|
||||
|
||||
static __host__ __device__ inline float ggml_hip_tanhf(float x) {
|
||||
#if defined(__HIP_DEVICE_COMPILE__)
|
||||
return __ocml_tanh_f32(x);
|
||||
#else
|
||||
using std::tanh;
|
||||
return static_cast<float>(tanh(x));
|
||||
#endif
|
||||
}
|
||||
#undef tanhf
|
||||
#define tanhf(x) ggml_hip_tanhf((x))
|
||||
|
||||
static __host__ __device__ inline float ggml_hip_sinf(float x) {
|
||||
#if defined(__HIP_DEVICE_COMPILE__)
|
||||
return __ocml_sin_f32(x);
|
||||
#else
|
||||
using std::sin;
|
||||
return static_cast<float>(sin(x));
|
||||
#endif
|
||||
}
|
||||
#undef sinf
|
||||
#define sinf(x) ggml_hip_sinf((x))
|
||||
|
||||
static __host__ __device__ inline float ggml_hip_cosf(float x) {
|
||||
#if defined(__HIP_DEVICE_COMPILE__)
|
||||
return __ocml_cos_f32(x);
|
||||
#else
|
||||
using std::cos;
|
||||
return static_cast<float>(cos(x));
|
||||
#endif
|
||||
}
|
||||
#undef cosf
|
||||
#define cosf(x) ggml_hip_cosf((x))
|
||||
|
||||
static __host__ __device__ inline float ggml_hip_erff(float x) {
|
||||
#if defined(__HIP_DEVICE_COMPILE__)
|
||||
return __ocml_erf_f32(x);
|
||||
#else
|
||||
using std::erf;
|
||||
return static_cast<float>(erf(x));
|
||||
#endif
|
||||
}
|
||||
#undef erff
|
||||
#define erff(x) ggml_hip_erff((x))
|
||||
|
||||
static __host__ __device__ inline float ggml_hip_fabsf(float x) {
|
||||
#if defined(__HIP_DEVICE_COMPILE__)
|
||||
return __ocml_fabs_f32(x);
|
||||
#else
|
||||
using std::fabs;
|
||||
return static_cast<float>(fabs(x));
|
||||
#endif
|
||||
}
|
||||
#undef fabsf
|
||||
#define fabsf(x) ggml_hip_fabsf((x))
|
||||
|
||||
static __host__ __device__ inline float ggml_hip_floorf(float x) {
|
||||
#if defined(__HIP_DEVICE_COMPILE__)
|
||||
return __ocml_floor_f32(x);
|
||||
#else
|
||||
using std::floor;
|
||||
return static_cast<float>(floor(x));
|
||||
#endif
|
||||
}
|
||||
#undef floorf
|
||||
#define floorf(x) ggml_hip_floorf((x))
|
||||
|
||||
static __host__ __device__ inline float ggml_hip_ceilf(float x) {
|
||||
#if defined(__HIP_DEVICE_COMPILE__)
|
||||
return __ocml_ceil_f32(x);
|
||||
#else
|
||||
using std::ceil;
|
||||
return static_cast<float>(ceil(x));
|
||||
#endif
|
||||
}
|
||||
#undef ceilf
|
||||
#define ceilf(x) ggml_hip_ceilf((x))
|
||||
|
||||
static __host__ __device__ inline float ggml_hip_roundf(float x) {
|
||||
#if defined(__HIP_DEVICE_COMPILE__)
|
||||
return __ocml_round_f32(x);
|
||||
#else
|
||||
using std::round;
|
||||
return static_cast<float>(round(x));
|
||||
#endif
|
||||
}
|
||||
#undef roundf
|
||||
#define roundf(x) ggml_hip_roundf((x))
|
||||
|
||||
static __host__ __device__ inline float ggml_hip_round_scalar(float x) {
|
||||
#if defined(__HIP_DEVICE_COMPILE__)
|
||||
return __ocml_round_f32(x);
|
||||
#else
|
||||
using std::round;
|
||||
return static_cast<float>(round(x));
|
||||
#endif
|
||||
}
|
||||
|
||||
static __host__ __device__ inline double ggml_hip_round_scalar(double x) {
|
||||
#if defined(__HIP_DEVICE_COMPILE__)
|
||||
return __ocml_round_f64(x);
|
||||
#else
|
||||
using std::round;
|
||||
return round(x);
|
||||
#endif
|
||||
}
|
||||
#undef round
|
||||
#define round(x) ggml_hip_round_scalar((x))
|
||||
|
||||
static __host__ __device__ inline float ggml_hip_sqrtf(float x) {
|
||||
#if defined(__HIP_DEVICE_COMPILE__)
|
||||
return __ocml_sqrt_f32(x);
|
||||
#else
|
||||
using std::sqrt;
|
||||
return static_cast<float>(sqrt(x));
|
||||
#endif
|
||||
}
|
||||
#undef sqrtf
|
||||
#define sqrtf(x) ggml_hip_sqrtf((x))
|
||||
|
||||
static __host__ __device__ inline float ggml_hip_rsqrtf(float x) {
|
||||
#if defined(__HIP_DEVICE_COMPILE__)
|
||||
return __ocml_rsqrt_f32(x);
|
||||
#else
|
||||
using std::sqrt;
|
||||
return 1.0f / static_cast<float>(sqrt(x));
|
||||
#endif
|
||||
}
|
||||
#undef rsqrtf
|
||||
#define rsqrtf(x) ggml_hip_rsqrtf((x))
|
||||
|
||||
static __host__ __device__ inline float ggml_hip_trunc_scalar(float x) {
|
||||
#if defined(__HIP_DEVICE_COMPILE__)
|
||||
return __ocml_trunc_f32(x);
|
||||
#else
|
||||
using std::trunc;
|
||||
return static_cast<float>(trunc(x));
|
||||
#endif
|
||||
}
|
||||
|
||||
static __host__ __device__ inline double ggml_hip_trunc_scalar(double x) {
|
||||
#if defined(__HIP_DEVICE_COMPILE__)
|
||||
return __ocml_trunc_f64(x);
|
||||
#else
|
||||
using std::trunc;
|
||||
return trunc(x);
|
||||
#endif
|
||||
}
|
||||
#undef trunc
|
||||
#define trunc(x) ggml_hip_trunc_scalar((x))
|
||||
|
||||
static __host__ __device__ inline int ggml_hip_isinf(float x) {
|
||||
#if defined(__HIP_DEVICE_COMPILE__)
|
||||
return __ocml_isinf_f32(x);
|
||||
#else
|
||||
using std::isinf;
|
||||
return static_cast<int>(isinf(x));
|
||||
#endif
|
||||
}
|
||||
#undef isinf
|
||||
#define isinf(x) ggml_hip_isinf((x))
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(GGML_HIP_ROCWMMA_FATTN)
|
||||
#include <rocwmma/rocwmma-version.hpp>
|
||||
#endif // defined(GGML_HIP_ROCWMMA_FATTN)
|
||||
|
|
|
|||
|
|
@ -442,6 +442,8 @@ int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total) {
|
|||
#include <string>
|
||||
#include <vector>
|
||||
#include <filesystem>
|
||||
#include <cstdlib>
|
||||
#include <strings.h>
|
||||
|
||||
#include <sys/stat.h>
|
||||
#include <dirent.h>
|
||||
|
|
@ -449,6 +451,60 @@ int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total) {
|
|||
#include <glob.h>
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
namespace {
|
||||
|
||||
static bool ggml_env_flag_enabled(const char *name) {
|
||||
const char *value = getenv(name);
|
||||
if (value == nullptr) {
|
||||
return false;
|
||||
}
|
||||
if (*value == '\0') {
|
||||
return true;
|
||||
}
|
||||
|
||||
if ((value[0] == '0' && value[1] == '\0') ||
|
||||
strcasecmp(value, "false") == 0 ||
|
||||
strcasecmp(value, "off") == 0) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool ggml_read_sysfs_value(const std::string &path, uint64_t &value) {
|
||||
std::ifstream stream(path.c_str());
|
||||
if (!stream.is_open()) {
|
||||
GGML_LOG_DEBUG("%s unable to open sysfs node %s\n", __func__, path.c_str());
|
||||
return false;
|
||||
}
|
||||
stream >> value;
|
||||
if (stream.fail()) {
|
||||
GGML_LOG_DEBUG("%s unable to parse sysfs node %s\n", __func__, path.c_str());
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool ggml_should_use_gtt(uint64_t vram_total, uint64_t gtt_total) {
|
||||
if (ggml_env_flag_enabled("GGML_HIP_DISABLE_GTT")) {
|
||||
return false;
|
||||
}
|
||||
if (ggml_env_flag_enabled("GGML_HIP_FORCE_GTT")) {
|
||||
return gtt_total > 0;
|
||||
}
|
||||
if (gtt_total == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const uint64_t umaThreshold = 1024ull * 1024ull * 1024ull; // 1 GiB
|
||||
if (vram_total == 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return vram_total <= umaThreshold && gtt_total > vram_total;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
extern "C" {
|
||||
|
||||
int ggml_hip_mgmt_init() {
|
||||
|
|
@ -461,6 +517,8 @@ int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total) {
|
|||
const std::string drmTotalMemoryFile = "mem_info_vram_total";
|
||||
const std::string drmUsedMemoryFile = "mem_info_vram_used";
|
||||
const std::string drmUeventPCISlotLabel = "PCI_SLOT_NAME=";
|
||||
const std::string drmGttTotalFile = "mem_info_gtt_total";
|
||||
const std::string drmGttUsedFile = "mem_info_gtt_used";
|
||||
|
||||
glob_t glob_result;
|
||||
glob(drmDeviceGlob.c_str(), GLOB_NOSORT, NULL, &glob_result);
|
||||
|
|
@ -495,7 +553,6 @@ int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total) {
|
|||
|
||||
uint64_t memory;
|
||||
totalFileStream >> memory;
|
||||
*total = memory;
|
||||
|
||||
std::string usedFile = dir + "/" + drmUsedMemoryFile;
|
||||
std::ifstream usedFileStream(usedFile.c_str());
|
||||
|
|
@ -508,7 +565,30 @@ int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total) {
|
|||
|
||||
uint64_t memoryUsed;
|
||||
usedFileStream >> memoryUsed;
|
||||
*free = memory - memoryUsed;
|
||||
|
||||
uint64_t gttTotal = 0;
|
||||
uint64_t gttUsed = 0;
|
||||
bool hasGttTotal = ggml_read_sysfs_value(dir + "/" + drmGttTotalFile, gttTotal);
|
||||
bool hasGttUsed = ggml_read_sysfs_value(dir + "/" + drmGttUsedFile, gttUsed);
|
||||
bool useGtt = ggml_should_use_gtt(memory, hasGttTotal ? gttTotal : 0);
|
||||
|
||||
if (useGtt && hasGttTotal) {
|
||||
uint64_t freeGtt = gttTotal;
|
||||
if (hasGttUsed && gttTotal > gttUsed) {
|
||||
freeGtt = gttTotal - gttUsed;
|
||||
}
|
||||
*total = gttTotal;
|
||||
*free = freeGtt;
|
||||
GGML_LOG_INFO("%s using GTT memory for %s (total=%zu free=%zu)\n",
|
||||
__func__, id, *total, *free);
|
||||
} else {
|
||||
*total = memory;
|
||||
if (memory > memoryUsed) {
|
||||
*free = memory - memoryUsed;
|
||||
} else {
|
||||
*free = 0;
|
||||
}
|
||||
}
|
||||
|
||||
file.close();
|
||||
globfree(&glob_result);
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ import (
|
|||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ollama/ollama/envconfig"
|
||||
"github.com/ollama/ollama/format"
|
||||
"github.com/ollama/ollama/logutil"
|
||||
)
|
||||
|
|
@ -538,6 +539,9 @@ func GetVisibleDevicesEnv(l []DeviceInfo, mustFilter bool) map[string]string {
|
|||
func (d DeviceInfo) NeedsInitValidation() bool {
|
||||
// ROCm: rocblas will crash on unsupported devices.
|
||||
// CUDA: verify CC is supported by the version of the library
|
||||
if d.Library == "ROCm" && envconfig.HsaOverrideGfxVersion() != "" {
|
||||
return false
|
||||
}
|
||||
return d.Library == "ROCm" || d.Library == "CUDA"
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue