diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index ec4d24502..cbf6d2935 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -104,6 +104,13 @@ jobs: install: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q4-WinSvr2022-For-HIP.exe rocm-version: '6.2' flags: '-DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_FLAGS="-parallel-jobs=4 -Wno-ignored-attributes -Wno-deprecated-pragma" -DCMAKE_CXX_FLAGS="-parallel-jobs=4 -Wno-ignored-attributes -Wno-deprecated-pragma"' + runner_dir: 'rocm' + - os: windows + arch: amd64 + preset: Vulkan + install: https://sdk.lunarg.com/sdk/download/1.4.321.1/windows/vulkansdk-windows-X64-1.4.321.1.exe + flags: '' + runner_dir: 'vulkan' runs-on: ${{ matrix.arch == 'arm64' && format('{0}-{1}', matrix.os, matrix.arch) || matrix.os }} environment: release env: @@ -113,13 +120,14 @@ jobs: run: | choco install -y --no-progress ccache ninja ccache -o cache_dir=${{ github.workspace }}\.ccache - - if: startsWith(matrix.preset, 'CUDA ') || startsWith(matrix.preset, 'ROCm ') + - if: startsWith(matrix.preset, 'CUDA ') || startsWith(matrix.preset, 'ROCm ') || startsWith(matrix.preset, 'Vulkan') id: cache-install uses: actions/cache/restore@v4 with: path: | C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA C:\Program Files\AMD\ROCm + C:\VulkanSDK key: ${{ matrix.install }} - if: startsWith(matrix.preset, 'CUDA ') name: Install CUDA ${{ matrix.cuda-version }} @@ -149,6 +157,18 @@ jobs: echo "HIPCXX=$hipPath\bin\clang++.exe" | Out-File -FilePath $env:GITHUB_ENV -Append echo "HIP_PLATFORM=amd" | Out-File -FilePath $env:GITHUB_ENV -Append echo "CMAKE_PREFIX_PATH=$hipPath" | Out-File -FilePath $env:GITHUB_ENV -Append + - if: matrix.preset == 'Vulkan' + name: Install Vulkan ${{ matrix.rocm-version }} + run: | + $ErrorActionPreference = "Stop" + if ("${{ steps.cache-install.outputs.cache-hit }}" -ne 'true') { + Invoke-WebRequest -Uri "${{ matrix.install }}" -OutFile "install.exe" + Start-Process -FilePath .\install.exe -ArgumentList "-c","--am","--al","in" -NoNewWindow -Wait + } + + $vulkanPath = (Resolve-Path "C:\VulkanSDK\*").path + echo "$vulkanPath\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append + echo "VULKAN_SDK=$vulkanPath" >> $env:GITHUB_ENV - if: matrix.preset == 'CPU' run: | echo "CC=clang.exe" | Out-File -FilePath $env:GITHUB_ENV -Append @@ -159,6 +179,7 @@ jobs: path: | C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA C:\Program Files\AMD\ROCm + C:\VulkanSDK key: ${{ matrix.install }} - uses: actions/checkout@v4 - uses: actions/cache@v4 @@ -312,13 +333,13 @@ jobs: include: - os: linux arch: amd64 - target: archive_novulkan + target: archive - os: linux arch: amd64 target: rocm - os: linux arch: arm64 - target: archive_novulkan + target: archive runs-on: ${{ matrix.arch == 'arm64' && format('{0}-{1}', matrix.os, matrix.arch) || matrix.os }} environment: release needs: setup-environment @@ -374,14 +395,12 @@ jobs: include: - os: linux arch: arm64 - target: novulkan build-args: | CGO_CFLAGS CGO_CXXFLAGS GOFLAGS - os: linux arch: amd64 - target: novulkan build-args: | CGO_CFLAGS CGO_CXXFLAGS @@ -394,14 +413,6 @@ jobs: CGO_CXXFLAGS GOFLAGS FLAVOR=rocm - - os: linux - arch: amd64 - suffix: '-vulkan' - target: default - build-args: | - CGO_CFLAGS - CGO_CXXFLAGS - GOFLAGS runs-on: ${{ matrix.arch == 'arm64' && format('{0}-{1}', matrix.os, matrix.arch) || matrix.os }} environment: release needs: setup-environment @@ -419,7 +430,6 @@ jobs: with: context: . platforms: ${{ matrix.os }}/${{ matrix.arch }} - target: ${{ matrix.preset }} build-args: ${{ matrix.build-args }} outputs: type=image,name=${{ vars.DOCKER_REPO }},push-by-digest=true,name-canonical=true,push=true cache-from: type=registry,ref=${{ vars.DOCKER_REPO }}:latest diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index d74da923c..82f2b0403 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -172,6 +172,7 @@ jobs: path: | C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA C:\Program Files\AMD\ROCm + C:\VulkanSDK key: ${{ matrix.install }} - uses: actions/checkout@v4 - uses: actions/cache@v4 diff --git a/Dockerfile b/Dockerfile index c56c229aa..3a9365061 100644 --- a/Dockerfile +++ b/Dockerfile @@ -159,32 +159,7 @@ ARG VULKANVERSION COPY --from=cpu dist/lib/ollama /lib/ollama COPY --from=build /bin/ollama /bin/ollama -# Temporary opt-out stages for Vulkan -FROM --platform=linux/amd64 scratch AS amd64_novulkan -# COPY --from=cuda-11 dist/lib/ollama/ /lib/ollama/ -COPY --from=cuda-12 dist/lib/ollama /lib/ollama/ -COPY --from=cuda-13 dist/lib/ollama /lib/ollama/ -FROM arm64 AS arm64_novulkan -FROM ${FLAVOR}_novulkan AS archive_novulkan -COPY --from=cpu dist/lib/ollama /lib/ollama -COPY --from=build /bin/ollama /bin/ollama -FROM ubuntu:24.04 AS novulkan -RUN apt-get update \ - && apt-get install -y ca-certificates \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* -COPY --from=archive_novulkan /bin /usr/bin -ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin -COPY --from=archive_novulkan /lib/ollama /usr/lib/ollama -ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64 -ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility -ENV NVIDIA_VISIBLE_DEVICES=all -ENV OLLAMA_HOST=0.0.0.0:11434 -EXPOSE 11434 -ENTRYPOINT ["/bin/ollama"] -CMD ["serve"] - -FROM ubuntu:24.04 AS default +FROM ubuntu:24.04 RUN apt-get update \ && apt-get install -y ca-certificates libvulkan1 \ && apt-get clean \ diff --git a/discover/runner.go b/discover/runner.go index 4d44dae29..bf2110bc7 100644 --- a/discover/runner.go +++ b/discover/runner.go @@ -94,6 +94,9 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml. continue } else if jetpack != "" && filepath.Base(dir) != "cuda_"+jetpack { continue + } else if !envconfig.EnableVulkan() && strings.Contains(filepath.Base(dir), "vulkan") { + slog.Info("experimental Vulkan support disabled. To enable, set OLLAMA_VULKAN=1") + continue } dirs = []string{ml.LibOllamaPath, dir} } else { diff --git a/docs/docker.mdx b/docs/docker.mdx index 22d2bc339..ba770a706 100644 --- a/docs/docker.mdx +++ b/docs/docker.mdx @@ -68,6 +68,15 @@ To run Ollama using Docker with AMD GPUs, use the `rocm` tag and the following c docker run -d --device /dev/kfd --device /dev/dri -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama:rocm ``` +## Vulkan Support + +Vulkan is bundled into the `ollama/ollama` image. + +```shell +docker run -d --device /dev/kfd --device /dev/dri -v ollama:/root/.ollama -p 11434:11434 -e OLLAMA_VULKAN=1 --name ollama ollama/ollama +``` + + ## Run model locally Now you can run a model: @@ -79,3 +88,4 @@ docker exec -it ollama ollama run llama3.2 ## Try different models More models can be found on the [Ollama library](https://ollama.com/library). + diff --git a/docs/gpu.mdx b/docs/gpu.mdx index bf1142218..36bfd3da7 100644 --- a/docs/gpu.mdx +++ b/docs/gpu.mdx @@ -52,7 +52,11 @@ sudo modprobe nvidia_uvm` ## AMD Radeon -Ollama supports the following AMD GPUs: +Ollama supports the following AMD GPUs via the ROCm library: + +> [!NOTE] +> Additional AMD GPU support is provided by the Vulkan Library - see below. + ### Linux Support @@ -124,3 +128,39 @@ accessing the AMD GPU devices. On the host system you can run ## Metal (Apple GPUs) Ollama supports GPU acceleration on Apple devices via the Metal API. + + +## Vulkan GPU Support + +> [!NOTE] +> Vulkan is currently an Experimental feature. To enable, you must set OLLAMA_VULKAN=1 for the Ollama server as +described in the [FAQ](faq.md#how-do-i-configure-ollama-server) + +Additional GPU support on Windows and Linux is provided via +[Vulkan](https://www.vulkan.org/). On Windows most GPU vendors drivers come +bundled with Vulkan support and require no additional setup steps. Most Linux +distributions require installing additional components, and you may have +multiple options for Vulkan drivers between Mesa and GPU Vendor specific packages + +- Linux Intel GPU Instructions - https://dgpu-docs.intel.com/driver/client/overview.html +- Linux AMD GPU Instructions - https://amdgpu-install.readthedocs.io/en/latest/install-script.html#specifying-a-vulkan-implementation + +For AMD GPUs on some Linux distributions, you may need to add the `ollama` user to the `render` group. + +The Ollama scheduler leverages available VRAM data reported by the GPU libraries to +make optimal scheduling decisions. Vulkan requires additional capabilities or +running as root to expose this available VRAM data. If neither root access or this +capability are granted, Ollama will use approximate sizes of the models +to make best effort scheduling decisions. + +```bash +sudo setcap cap_perfmon+ep /usr/local/bin/ollama +``` + +### GPU Selection + +To select specific Vulkan GPU(s), you can set the environment variable +`GGML_VK_VISIBLE_DEVICES` to one or more numeric IDs on the Ollama server as +described in the [FAQ](faq.md#how-do-i-configure-ollama-server). If you +encounter any problems with Vulkan based GPUs, you can disable all Vulkan GPUs +by setting `GGML_VK_VISIBLE_DEVICES=-1` \ No newline at end of file diff --git a/envconfig/config.go b/envconfig/config.go index d155bd8f9..238e5e6e1 100644 --- a/envconfig/config.go +++ b/envconfig/config.go @@ -196,8 +196,6 @@ var ( NoPrune = Bool("OLLAMA_NOPRUNE") // SchedSpread allows scheduling models across all GPUs. SchedSpread = Bool("OLLAMA_SCHED_SPREAD") - // IntelGPU enables experimental Intel GPU detection. - IntelGPU = Bool("OLLAMA_INTEL_GPU") // MultiUserCache optimizes prompt caching for multi-user scenarios MultiUserCache = Bool("OLLAMA_MULTIUSER_CACHE") // Enable the new Ollama engine @@ -206,6 +204,8 @@ var ( ContextLength = Uint("OLLAMA_CONTEXT_LENGTH", 4096) // Auth enables authentication between the Ollama client and server UseAuth = Bool("OLLAMA_AUTH") + // Enable Vulkan backend + EnableVulkan = Bool("OLLAMA_VULKAN") ) func String(s string) func() string { @@ -314,7 +314,7 @@ func AsMap() map[string]EnvVar { ret["GGML_VK_VISIBLE_DEVICES"] = EnvVar{"GGML_VK_VISIBLE_DEVICES", VkVisibleDevices(), "Set which Vulkan devices are visible by numeric ID"} ret["GPU_DEVICE_ORDINAL"] = EnvVar{"GPU_DEVICE_ORDINAL", GpuDeviceOrdinal(), "Set which AMD devices are visible by numeric ID"} ret["HSA_OVERRIDE_GFX_VERSION"] = EnvVar{"HSA_OVERRIDE_GFX_VERSION", HsaOverrideGfxVersion(), "Override the gfx used for all detected AMD GPUs"} - ret["OLLAMA_INTEL_GPU"] = EnvVar{"OLLAMA_INTEL_GPU", IntelGPU(), "Enable experimental Intel GPU detection"} + ret["OLLAMA_VULKAN"] = EnvVar{"OLLAMA_VULKAN", EnableVulkan(), "Enable experimental Vulkan support"} } return ret diff --git a/scripts/build_linux.sh b/scripts/build_linux.sh index 8287c11c2..618722d11 100755 --- a/scripts/build_linux.sh +++ b/scripts/build_linux.sh @@ -13,13 +13,12 @@ set -eu . $(dirname $0)/env.sh mkdir -p dist -NOVULKAN=${NOVULKAN:-""} docker buildx build \ --output type=local,dest=./dist/ \ --platform=${PLATFORM} \ ${OLLAMA_COMMON_BUILD_ARGS} \ - --target archive${NOVULKAN} \ + --target archive \ -f Dockerfile \ .