Enable Vulkan with a temporary opt-in setting (#12931)

* docs: vulkan information * Revert "CI: Set up temporary opt-out Vulkan support (#12614)" This reverts commit 8b6e5baee7. * vulkan: temporary opt-in for Vulkan support Revert this once we're ready to enable by default. * win: add vulkan CI build
2025-11-12 08:40:38 -08:00 · 2025-11-12 08:40:38 -08:00 · 6286d9a3a5
parent 3a9e8e9fd4
commit 6286d9a3a5
8 changed files with 84 additions and 46 deletions
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@ -104,6 +104,13 @@ jobs:
            install: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q4-WinSvr2022-For-HIP.exe
            rocm-version: '6.2'
            flags: '-DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_FLAGS="-parallel-jobs=4 -Wno-ignored-attributes -Wno-deprecated-pragma" -DCMAKE_CXX_FLAGS="-parallel-jobs=4 -Wno-ignored-attributes -Wno-deprecated-pragma"'
            runner_dir: 'rocm'
          - os: windows
            arch: amd64
            preset: Vulkan
            install: https://sdk.lunarg.com/sdk/download/1.4.321.1/windows/vulkansdk-windows-X64-1.4.321.1.exe
            flags: ''
            runner_dir: 'vulkan'
    runs-on: ${{ matrix.arch == 'arm64' && format('{0}-{1}', matrix.os, matrix.arch) || matrix.os }}
    environment: release
    env:
@ -113,13 +120,14 @@ jobs:
        run: |
          choco install -y --no-progress ccache ninja
          ccache -o cache_dir=${{ github.workspace }}\.ccache
-      - if: startsWith(matrix.preset, 'CUDA ') || startsWith(matrix.preset, 'ROCm ')
+      - if: startsWith(matrix.preset, 'CUDA ') || startsWith(matrix.preset, 'ROCm ') || startsWith(matrix.preset, 'Vulkan')
        id: cache-install
        uses: actions/cache/restore@v4
        with:
          path: |
            C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA
            C:\Program Files\AMD\ROCm
            C:\VulkanSDK
          key: ${{ matrix.install }}
      - if: startsWith(matrix.preset, 'CUDA ')
        name: Install CUDA ${{ matrix.cuda-version }}
@ -149,6 +157,18 @@ jobs:
          echo "HIPCXX=$hipPath\bin\clang++.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
          echo "HIP_PLATFORM=amd" | Out-File -FilePath $env:GITHUB_ENV -Append
          echo "CMAKE_PREFIX_PATH=$hipPath" | Out-File -FilePath $env:GITHUB_ENV -Append
      - if: matrix.preset == 'Vulkan'
        name: Install Vulkan ${{ matrix.rocm-version }}
        run: |
          $ErrorActionPreference = "Stop"
          if ("${{ steps.cache-install.outputs.cache-hit }}" -ne 'true') {
            Invoke-WebRequest -Uri "${{ matrix.install }}" -OutFile "install.exe"
            Start-Process -FilePath .\install.exe -ArgumentList "-c","--am","--al","in" -NoNewWindow -Wait
          }
          $vulkanPath = (Resolve-Path "C:\VulkanSDK\*").path
          echo "$vulkanPath\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
          echo "VULKAN_SDK=$vulkanPath" >> $env:GITHUB_ENV
      - if: matrix.preset == 'CPU'
        run: |
          echo "CC=clang.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
@ -159,6 +179,7 @@ jobs:
          path: |
            C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA
            C:\Program Files\AMD\ROCm
            C:\VulkanSDK
          key: ${{ matrix.install }}
      - uses: actions/checkout@v4
      - uses: actions/cache@v4
@ -312,13 +333,13 @@ jobs:
        include:
          - os: linux
            arch: amd64
-            target: archive_novulkan
+            target: archive
          - os: linux
            arch: amd64
            target: rocm
          - os: linux
            arch: arm64
-            target: archive_novulkan
+            target: archive
    runs-on: ${{ matrix.arch == 'arm64' && format('{0}-{1}', matrix.os, matrix.arch) || matrix.os }}
    environment: release
    needs: setup-environment
@ -374,14 +395,12 @@ jobs:
        include:
          - os: linux
            arch: arm64
            target: novulkan
            build-args: |
              CGO_CFLAGS
              CGO_CXXFLAGS
              GOFLAGS
          - os: linux
            arch: amd64
            target: novulkan
            build-args: |
              CGO_CFLAGS
              CGO_CXXFLAGS
@ -394,14 +413,6 @@ jobs:
              CGO_CXXFLAGS
              GOFLAGS
              FLAVOR=rocm
          - os: linux
            arch: amd64
            suffix: '-vulkan'
            target: default
            build-args: |
              CGO_CFLAGS
              CGO_CXXFLAGS
              GOFLAGS
    runs-on: ${{ matrix.arch == 'arm64' && format('{0}-{1}', matrix.os, matrix.arch) || matrix.os }}
    environment: release
    needs: setup-environment
@ -419,7 +430,6 @@ jobs:
        with:
          context: .
          platforms: ${{ matrix.os }}/${{ matrix.arch }}
          target: ${{ matrix.preset }}
          build-args: ${{ matrix.build-args }}
          outputs: type=image,name=${{ vars.DOCKER_REPO }},push-by-digest=true,name-canonical=true,push=true
          cache-from: type=registry,ref=${{ vars.DOCKER_REPO }}:latest
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@ -172,6 +172,7 @@ jobs:
          path: |
            C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA
            C:\Program Files\AMD\ROCm
            C:\VulkanSDK
          key: ${{ matrix.install }}
      - uses: actions/checkout@v4
      - uses: actions/cache@v4
--- a/27
+++ b/27
@ -159,32 +159,7 @@ ARG VULKANVERSION
 COPY --from=cpu dist/lib/ollama /lib/ollama
 COPY --from=build /bin/ollama /bin/ollama
-# Temporary opt-out stages for Vulkan
+FROM ubuntu:24.04
 FROM --platform=linux/amd64 scratch AS amd64_novulkan
 # COPY --from=cuda-11 dist/lib/ollama/ /lib/ollama/
 COPY --from=cuda-12 dist/lib/ollama /lib/ollama/
 COPY --from=cuda-13 dist/lib/ollama /lib/ollama/
 FROM arm64 AS arm64_novulkan
 FROM ${FLAVOR}_novulkan AS archive_novulkan
 COPY --from=cpu dist/lib/ollama /lib/ollama
 COPY --from=build /bin/ollama /bin/ollama
 FROM ubuntu:24.04 AS novulkan
 RUN apt-get update \
    && apt-get install -y ca-certificates \
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/*
 COPY --from=archive_novulkan /bin /usr/bin
 ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
 COPY --from=archive_novulkan /lib/ollama /usr/lib/ollama
 ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64
 ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
 ENV NVIDIA_VISIBLE_DEVICES=all
 ENV OLLAMA_HOST=0.0.0.0:11434
 EXPOSE 11434
 ENTRYPOINT ["/bin/ollama"]
 CMD ["serve"]
 FROM ubuntu:24.04 AS default
 RUN apt-get update \
    && apt-get install -y ca-certificates libvulkan1 \
    && apt-get clean \
--- a/discover/runner.go
+++ b/discover/runner.go
@ -94,6 +94,9 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
 					continue
 				} else if jetpack != "" && filepath.Base(dir) != "cuda_"+jetpack {
 					continue
 				} else if !envconfig.EnableVulkan() && strings.Contains(filepath.Base(dir), "vulkan") {
 					slog.Info("experimental Vulkan support disabled.  To enable, set OLLAMA_VULKAN=1")
 					continue
 				}
 				dirs = []string{ml.LibOllamaPath, dir}
 			} else {
--- a/docs/docker.mdx
+++ b/docs/docker.mdx
@ -68,6 +68,15 @@ To run Ollama using Docker with AMD GPUs, use the `rocm` tag and the following c
 docker run -d --device /dev/kfd --device /dev/dri -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama:rocm
 ```
 ## Vulkan Support
 Vulkan is bundled into the `ollama/ollama` image.  
 ```shell
 docker run -d --device /dev/kfd --device /dev/dri -v ollama:/root/.ollama -p 11434:11434 -e OLLAMA_VULKAN=1 --name ollama ollama/ollama
 ```
 ## Run model locally
 Now you can run a model:
@ -79,3 +88,4 @@ docker exec -it ollama ollama run llama3.2
 ## Try different models
 More models can be found on the [Ollama library](https://ollama.com/library).
--- a/docs/gpu.mdx
+++ b/docs/gpu.mdx
@ -52,7 +52,11 @@ sudo modprobe nvidia_uvm`
 ## AMD Radeon
-Ollama supports the following AMD GPUs:
+Ollama supports the following AMD GPUs via the ROCm library:
 > [!NOTE]
 > Additional AMD GPU support is provided by the Vulkan Library - see below.
 ### Linux Support
@ -124,3 +128,39 @@ accessing the AMD GPU devices. On the host system you can run
 ## Metal (Apple GPUs)
 Ollama supports GPU acceleration on Apple devices via the Metal API.
 ## Vulkan GPU Support
 > [!NOTE]
 > Vulkan is currently an Experimental feature.  To enable, you must set OLLAMA_VULKAN=1 for the Ollama server as
 described in the [FAQ](faq.md#how-do-i-configure-ollama-server)
 Additional GPU support on Windows and Linux is provided via
 [Vulkan](https://www.vulkan.org/). On Windows most GPU vendors drivers come
 bundled with Vulkan support and require no additional setup steps. Most Linux
 distributions require installing additional components, and you may have
 multiple options for Vulkan drivers between Mesa and GPU Vendor specific packages
 - Linux Intel GPU Instructions - https://dgpu-docs.intel.com/driver/client/overview.html
 - Linux AMD GPU Instructions - https://amdgpu-install.readthedocs.io/en/latest/install-script.html#specifying-a-vulkan-implementation
 For AMD GPUs on some Linux distributions, you may need to add the `ollama` user to the `render` group.
 The Ollama scheduler leverages available VRAM data reported by the GPU libraries to
 make optimal scheduling decisions.  Vulkan requires additional capabilities or
 running as root to expose this available VRAM data.  If neither root access or this
 capability are granted, Ollama will use approximate sizes of the models
 to make best effort scheduling decisions.
 ```bash
 sudo setcap cap_perfmon+ep /usr/local/bin/ollama
 ```
 ### GPU Selection
 To select specific Vulkan GPU(s), you can set the environment variable
 `GGML_VK_VISIBLE_DEVICES` to one or more numeric IDs on the Ollama server as
 described in the [FAQ](faq.md#how-do-i-configure-ollama-server). If you
 encounter any problems with Vulkan based GPUs, you can disable all Vulkan GPUs
 by setting `GGML_VK_VISIBLE_DEVICES=-1` 
--- a/envconfig/config.go
+++ b/envconfig/config.go
@ -196,8 +196,6 @@ var (
 	NoPrune = Bool("OLLAMA_NOPRUNE")
 	// SchedSpread allows scheduling models across all GPUs.
 	SchedSpread = Bool("OLLAMA_SCHED_SPREAD")
 	// IntelGPU enables experimental Intel GPU detection.
 	IntelGPU = Bool("OLLAMA_INTEL_GPU")
 	// MultiUserCache optimizes prompt caching for multi-user scenarios
 	MultiUserCache = Bool("OLLAMA_MULTIUSER_CACHE")
 	// Enable the new Ollama engine
@ -206,6 +204,8 @@ var (
 	ContextLength = Uint("OLLAMA_CONTEXT_LENGTH", 4096)
 	// Auth enables authentication between the Ollama client and server
 	UseAuth = Bool("OLLAMA_AUTH")
 	// Enable Vulkan backend
 	EnableVulkan = Bool("OLLAMA_VULKAN")
 )
 func String(s string) func() string {
@ -314,7 +314,7 @@ func AsMap() map[string]EnvVar {
 		ret["GGML_VK_VISIBLE_DEVICES"] = EnvVar{"GGML_VK_VISIBLE_DEVICES", VkVisibleDevices(), "Set which Vulkan devices are visible by numeric ID"}
 		ret["GPU_DEVICE_ORDINAL"] = EnvVar{"GPU_DEVICE_ORDINAL", GpuDeviceOrdinal(), "Set which AMD devices are visible by numeric ID"}
 		ret["HSA_OVERRIDE_GFX_VERSION"] = EnvVar{"HSA_OVERRIDE_GFX_VERSION", HsaOverrideGfxVersion(), "Override the gfx used for all detected AMD GPUs"}
-		ret["OLLAMA_INTEL_GPU"] = EnvVar{"OLLAMA_INTEL_GPU", IntelGPU(), "Enable experimental Intel GPU detection"}
+		ret["OLLAMA_VULKAN"] = EnvVar{"OLLAMA_VULKAN", EnableVulkan(), "Enable experimental Vulkan support"}
 	}
 	return ret
--- a/scripts/build_linux.sh
+++ b/scripts/build_linux.sh
@ -13,13 +13,12 @@ set -eu
 . $(dirname $0)/env.sh
 mkdir -p dist
 NOVULKAN=${NOVULKAN:-""}
 docker buildx build \
        --output type=local,dest=./dist/ \
        --platform=${PLATFORM} \
        ${OLLAMA_COMMON_BUILD_ARGS} \
-        --target archive${NOVULKAN} \
+        --target archive \
        -f Dockerfile \
        .