mirror of https://github.com/ollama/ollama
Enable Vulkan with a temporary opt-in setting (#12931)
* docs: vulkan information
* Revert "CI: Set up temporary opt-out Vulkan support (#12614)"
This reverts commit 8b6e5baee7.
* vulkan: temporary opt-in for Vulkan support
Revert this once we're ready to enable by default.
* win: add vulkan CI build
This commit is contained in:
parent
3a9e8e9fd4
commit
6286d9a3a5
|
|
@ -104,6 +104,13 @@ jobs:
|
||||||
install: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q4-WinSvr2022-For-HIP.exe
|
install: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q4-WinSvr2022-For-HIP.exe
|
||||||
rocm-version: '6.2'
|
rocm-version: '6.2'
|
||||||
flags: '-DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_FLAGS="-parallel-jobs=4 -Wno-ignored-attributes -Wno-deprecated-pragma" -DCMAKE_CXX_FLAGS="-parallel-jobs=4 -Wno-ignored-attributes -Wno-deprecated-pragma"'
|
flags: '-DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_FLAGS="-parallel-jobs=4 -Wno-ignored-attributes -Wno-deprecated-pragma" -DCMAKE_CXX_FLAGS="-parallel-jobs=4 -Wno-ignored-attributes -Wno-deprecated-pragma"'
|
||||||
|
runner_dir: 'rocm'
|
||||||
|
- os: windows
|
||||||
|
arch: amd64
|
||||||
|
preset: Vulkan
|
||||||
|
install: https://sdk.lunarg.com/sdk/download/1.4.321.1/windows/vulkansdk-windows-X64-1.4.321.1.exe
|
||||||
|
flags: ''
|
||||||
|
runner_dir: 'vulkan'
|
||||||
runs-on: ${{ matrix.arch == 'arm64' && format('{0}-{1}', matrix.os, matrix.arch) || matrix.os }}
|
runs-on: ${{ matrix.arch == 'arm64' && format('{0}-{1}', matrix.os, matrix.arch) || matrix.os }}
|
||||||
environment: release
|
environment: release
|
||||||
env:
|
env:
|
||||||
|
|
@ -113,13 +120,14 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
choco install -y --no-progress ccache ninja
|
choco install -y --no-progress ccache ninja
|
||||||
ccache -o cache_dir=${{ github.workspace }}\.ccache
|
ccache -o cache_dir=${{ github.workspace }}\.ccache
|
||||||
- if: startsWith(matrix.preset, 'CUDA ') || startsWith(matrix.preset, 'ROCm ')
|
- if: startsWith(matrix.preset, 'CUDA ') || startsWith(matrix.preset, 'ROCm ') || startsWith(matrix.preset, 'Vulkan')
|
||||||
id: cache-install
|
id: cache-install
|
||||||
uses: actions/cache/restore@v4
|
uses: actions/cache/restore@v4
|
||||||
with:
|
with:
|
||||||
path: |
|
path: |
|
||||||
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA
|
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA
|
||||||
C:\Program Files\AMD\ROCm
|
C:\Program Files\AMD\ROCm
|
||||||
|
C:\VulkanSDK
|
||||||
key: ${{ matrix.install }}
|
key: ${{ matrix.install }}
|
||||||
- if: startsWith(matrix.preset, 'CUDA ')
|
- if: startsWith(matrix.preset, 'CUDA ')
|
||||||
name: Install CUDA ${{ matrix.cuda-version }}
|
name: Install CUDA ${{ matrix.cuda-version }}
|
||||||
|
|
@ -149,6 +157,18 @@ jobs:
|
||||||
echo "HIPCXX=$hipPath\bin\clang++.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
|
echo "HIPCXX=$hipPath\bin\clang++.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
|
||||||
echo "HIP_PLATFORM=amd" | Out-File -FilePath $env:GITHUB_ENV -Append
|
echo "HIP_PLATFORM=amd" | Out-File -FilePath $env:GITHUB_ENV -Append
|
||||||
echo "CMAKE_PREFIX_PATH=$hipPath" | Out-File -FilePath $env:GITHUB_ENV -Append
|
echo "CMAKE_PREFIX_PATH=$hipPath" | Out-File -FilePath $env:GITHUB_ENV -Append
|
||||||
|
- if: matrix.preset == 'Vulkan'
|
||||||
|
name: Install Vulkan ${{ matrix.rocm-version }}
|
||||||
|
run: |
|
||||||
|
$ErrorActionPreference = "Stop"
|
||||||
|
if ("${{ steps.cache-install.outputs.cache-hit }}" -ne 'true') {
|
||||||
|
Invoke-WebRequest -Uri "${{ matrix.install }}" -OutFile "install.exe"
|
||||||
|
Start-Process -FilePath .\install.exe -ArgumentList "-c","--am","--al","in" -NoNewWindow -Wait
|
||||||
|
}
|
||||||
|
|
||||||
|
$vulkanPath = (Resolve-Path "C:\VulkanSDK\*").path
|
||||||
|
echo "$vulkanPath\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
||||||
|
echo "VULKAN_SDK=$vulkanPath" >> $env:GITHUB_ENV
|
||||||
- if: matrix.preset == 'CPU'
|
- if: matrix.preset == 'CPU'
|
||||||
run: |
|
run: |
|
||||||
echo "CC=clang.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
|
echo "CC=clang.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
|
||||||
|
|
@ -159,6 +179,7 @@ jobs:
|
||||||
path: |
|
path: |
|
||||||
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA
|
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA
|
||||||
C:\Program Files\AMD\ROCm
|
C:\Program Files\AMD\ROCm
|
||||||
|
C:\VulkanSDK
|
||||||
key: ${{ matrix.install }}
|
key: ${{ matrix.install }}
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
- uses: actions/cache@v4
|
- uses: actions/cache@v4
|
||||||
|
|
@ -312,13 +333,13 @@ jobs:
|
||||||
include:
|
include:
|
||||||
- os: linux
|
- os: linux
|
||||||
arch: amd64
|
arch: amd64
|
||||||
target: archive_novulkan
|
target: archive
|
||||||
- os: linux
|
- os: linux
|
||||||
arch: amd64
|
arch: amd64
|
||||||
target: rocm
|
target: rocm
|
||||||
- os: linux
|
- os: linux
|
||||||
arch: arm64
|
arch: arm64
|
||||||
target: archive_novulkan
|
target: archive
|
||||||
runs-on: ${{ matrix.arch == 'arm64' && format('{0}-{1}', matrix.os, matrix.arch) || matrix.os }}
|
runs-on: ${{ matrix.arch == 'arm64' && format('{0}-{1}', matrix.os, matrix.arch) || matrix.os }}
|
||||||
environment: release
|
environment: release
|
||||||
needs: setup-environment
|
needs: setup-environment
|
||||||
|
|
@ -374,14 +395,12 @@ jobs:
|
||||||
include:
|
include:
|
||||||
- os: linux
|
- os: linux
|
||||||
arch: arm64
|
arch: arm64
|
||||||
target: novulkan
|
|
||||||
build-args: |
|
build-args: |
|
||||||
CGO_CFLAGS
|
CGO_CFLAGS
|
||||||
CGO_CXXFLAGS
|
CGO_CXXFLAGS
|
||||||
GOFLAGS
|
GOFLAGS
|
||||||
- os: linux
|
- os: linux
|
||||||
arch: amd64
|
arch: amd64
|
||||||
target: novulkan
|
|
||||||
build-args: |
|
build-args: |
|
||||||
CGO_CFLAGS
|
CGO_CFLAGS
|
||||||
CGO_CXXFLAGS
|
CGO_CXXFLAGS
|
||||||
|
|
@ -394,14 +413,6 @@ jobs:
|
||||||
CGO_CXXFLAGS
|
CGO_CXXFLAGS
|
||||||
GOFLAGS
|
GOFLAGS
|
||||||
FLAVOR=rocm
|
FLAVOR=rocm
|
||||||
- os: linux
|
|
||||||
arch: amd64
|
|
||||||
suffix: '-vulkan'
|
|
||||||
target: default
|
|
||||||
build-args: |
|
|
||||||
CGO_CFLAGS
|
|
||||||
CGO_CXXFLAGS
|
|
||||||
GOFLAGS
|
|
||||||
runs-on: ${{ matrix.arch == 'arm64' && format('{0}-{1}', matrix.os, matrix.arch) || matrix.os }}
|
runs-on: ${{ matrix.arch == 'arm64' && format('{0}-{1}', matrix.os, matrix.arch) || matrix.os }}
|
||||||
environment: release
|
environment: release
|
||||||
needs: setup-environment
|
needs: setup-environment
|
||||||
|
|
@ -419,7 +430,6 @@ jobs:
|
||||||
with:
|
with:
|
||||||
context: .
|
context: .
|
||||||
platforms: ${{ matrix.os }}/${{ matrix.arch }}
|
platforms: ${{ matrix.os }}/${{ matrix.arch }}
|
||||||
target: ${{ matrix.preset }}
|
|
||||||
build-args: ${{ matrix.build-args }}
|
build-args: ${{ matrix.build-args }}
|
||||||
outputs: type=image,name=${{ vars.DOCKER_REPO }},push-by-digest=true,name-canonical=true,push=true
|
outputs: type=image,name=${{ vars.DOCKER_REPO }},push-by-digest=true,name-canonical=true,push=true
|
||||||
cache-from: type=registry,ref=${{ vars.DOCKER_REPO }}:latest
|
cache-from: type=registry,ref=${{ vars.DOCKER_REPO }}:latest
|
||||||
|
|
|
||||||
|
|
@ -172,6 +172,7 @@ jobs:
|
||||||
path: |
|
path: |
|
||||||
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA
|
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA
|
||||||
C:\Program Files\AMD\ROCm
|
C:\Program Files\AMD\ROCm
|
||||||
|
C:\VulkanSDK
|
||||||
key: ${{ matrix.install }}
|
key: ${{ matrix.install }}
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
- uses: actions/cache@v4
|
- uses: actions/cache@v4
|
||||||
|
|
|
||||||
27
Dockerfile
27
Dockerfile
|
|
@ -159,32 +159,7 @@ ARG VULKANVERSION
|
||||||
COPY --from=cpu dist/lib/ollama /lib/ollama
|
COPY --from=cpu dist/lib/ollama /lib/ollama
|
||||||
COPY --from=build /bin/ollama /bin/ollama
|
COPY --from=build /bin/ollama /bin/ollama
|
||||||
|
|
||||||
# Temporary opt-out stages for Vulkan
|
FROM ubuntu:24.04
|
||||||
FROM --platform=linux/amd64 scratch AS amd64_novulkan
|
|
||||||
# COPY --from=cuda-11 dist/lib/ollama/ /lib/ollama/
|
|
||||||
COPY --from=cuda-12 dist/lib/ollama /lib/ollama/
|
|
||||||
COPY --from=cuda-13 dist/lib/ollama /lib/ollama/
|
|
||||||
FROM arm64 AS arm64_novulkan
|
|
||||||
FROM ${FLAVOR}_novulkan AS archive_novulkan
|
|
||||||
COPY --from=cpu dist/lib/ollama /lib/ollama
|
|
||||||
COPY --from=build /bin/ollama /bin/ollama
|
|
||||||
FROM ubuntu:24.04 AS novulkan
|
|
||||||
RUN apt-get update \
|
|
||||||
&& apt-get install -y ca-certificates \
|
|
||||||
&& apt-get clean \
|
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
|
||||||
COPY --from=archive_novulkan /bin /usr/bin
|
|
||||||
ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
|
||||||
COPY --from=archive_novulkan /lib/ollama /usr/lib/ollama
|
|
||||||
ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64
|
|
||||||
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
|
||||||
ENV NVIDIA_VISIBLE_DEVICES=all
|
|
||||||
ENV OLLAMA_HOST=0.0.0.0:11434
|
|
||||||
EXPOSE 11434
|
|
||||||
ENTRYPOINT ["/bin/ollama"]
|
|
||||||
CMD ["serve"]
|
|
||||||
|
|
||||||
FROM ubuntu:24.04 AS default
|
|
||||||
RUN apt-get update \
|
RUN apt-get update \
|
||||||
&& apt-get install -y ca-certificates libvulkan1 \
|
&& apt-get install -y ca-certificates libvulkan1 \
|
||||||
&& apt-get clean \
|
&& apt-get clean \
|
||||||
|
|
|
||||||
|
|
@ -94,6 +94,9 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
|
||||||
continue
|
continue
|
||||||
} else if jetpack != "" && filepath.Base(dir) != "cuda_"+jetpack {
|
} else if jetpack != "" && filepath.Base(dir) != "cuda_"+jetpack {
|
||||||
continue
|
continue
|
||||||
|
} else if !envconfig.EnableVulkan() && strings.Contains(filepath.Base(dir), "vulkan") {
|
||||||
|
slog.Info("experimental Vulkan support disabled. To enable, set OLLAMA_VULKAN=1")
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
dirs = []string{ml.LibOllamaPath, dir}
|
dirs = []string{ml.LibOllamaPath, dir}
|
||||||
} else {
|
} else {
|
||||||
|
|
|
||||||
|
|
@ -68,6 +68,15 @@ To run Ollama using Docker with AMD GPUs, use the `rocm` tag and the following c
|
||||||
docker run -d --device /dev/kfd --device /dev/dri -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama:rocm
|
docker run -d --device /dev/kfd --device /dev/dri -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama:rocm
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Vulkan Support
|
||||||
|
|
||||||
|
Vulkan is bundled into the `ollama/ollama` image.
|
||||||
|
|
||||||
|
```shell
|
||||||
|
docker run -d --device /dev/kfd --device /dev/dri -v ollama:/root/.ollama -p 11434:11434 -e OLLAMA_VULKAN=1 --name ollama ollama/ollama
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
## Run model locally
|
## Run model locally
|
||||||
|
|
||||||
Now you can run a model:
|
Now you can run a model:
|
||||||
|
|
@ -79,3 +88,4 @@ docker exec -it ollama ollama run llama3.2
|
||||||
## Try different models
|
## Try different models
|
||||||
|
|
||||||
More models can be found on the [Ollama library](https://ollama.com/library).
|
More models can be found on the [Ollama library](https://ollama.com/library).
|
||||||
|
|
||||||
|
|
|
||||||
42
docs/gpu.mdx
42
docs/gpu.mdx
|
|
@ -52,7 +52,11 @@ sudo modprobe nvidia_uvm`
|
||||||
|
|
||||||
## AMD Radeon
|
## AMD Radeon
|
||||||
|
|
||||||
Ollama supports the following AMD GPUs:
|
Ollama supports the following AMD GPUs via the ROCm library:
|
||||||
|
|
||||||
|
> [!NOTE]
|
||||||
|
> Additional AMD GPU support is provided by the Vulkan Library - see below.
|
||||||
|
|
||||||
|
|
||||||
### Linux Support
|
### Linux Support
|
||||||
|
|
||||||
|
|
@ -124,3 +128,39 @@ accessing the AMD GPU devices. On the host system you can run
|
||||||
## Metal (Apple GPUs)
|
## Metal (Apple GPUs)
|
||||||
|
|
||||||
Ollama supports GPU acceleration on Apple devices via the Metal API.
|
Ollama supports GPU acceleration on Apple devices via the Metal API.
|
||||||
|
|
||||||
|
|
||||||
|
## Vulkan GPU Support
|
||||||
|
|
||||||
|
> [!NOTE]
|
||||||
|
> Vulkan is currently an Experimental feature. To enable, you must set OLLAMA_VULKAN=1 for the Ollama server as
|
||||||
|
described in the [FAQ](faq.md#how-do-i-configure-ollama-server)
|
||||||
|
|
||||||
|
Additional GPU support on Windows and Linux is provided via
|
||||||
|
[Vulkan](https://www.vulkan.org/). On Windows most GPU vendors drivers come
|
||||||
|
bundled with Vulkan support and require no additional setup steps. Most Linux
|
||||||
|
distributions require installing additional components, and you may have
|
||||||
|
multiple options for Vulkan drivers between Mesa and GPU Vendor specific packages
|
||||||
|
|
||||||
|
- Linux Intel GPU Instructions - https://dgpu-docs.intel.com/driver/client/overview.html
|
||||||
|
- Linux AMD GPU Instructions - https://amdgpu-install.readthedocs.io/en/latest/install-script.html#specifying-a-vulkan-implementation
|
||||||
|
|
||||||
|
For AMD GPUs on some Linux distributions, you may need to add the `ollama` user to the `render` group.
|
||||||
|
|
||||||
|
The Ollama scheduler leverages available VRAM data reported by the GPU libraries to
|
||||||
|
make optimal scheduling decisions. Vulkan requires additional capabilities or
|
||||||
|
running as root to expose this available VRAM data. If neither root access or this
|
||||||
|
capability are granted, Ollama will use approximate sizes of the models
|
||||||
|
to make best effort scheduling decisions.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo setcap cap_perfmon+ep /usr/local/bin/ollama
|
||||||
|
```
|
||||||
|
|
||||||
|
### GPU Selection
|
||||||
|
|
||||||
|
To select specific Vulkan GPU(s), you can set the environment variable
|
||||||
|
`GGML_VK_VISIBLE_DEVICES` to one or more numeric IDs on the Ollama server as
|
||||||
|
described in the [FAQ](faq.md#how-do-i-configure-ollama-server). If you
|
||||||
|
encounter any problems with Vulkan based GPUs, you can disable all Vulkan GPUs
|
||||||
|
by setting `GGML_VK_VISIBLE_DEVICES=-1`
|
||||||
|
|
@ -196,8 +196,6 @@ var (
|
||||||
NoPrune = Bool("OLLAMA_NOPRUNE")
|
NoPrune = Bool("OLLAMA_NOPRUNE")
|
||||||
// SchedSpread allows scheduling models across all GPUs.
|
// SchedSpread allows scheduling models across all GPUs.
|
||||||
SchedSpread = Bool("OLLAMA_SCHED_SPREAD")
|
SchedSpread = Bool("OLLAMA_SCHED_SPREAD")
|
||||||
// IntelGPU enables experimental Intel GPU detection.
|
|
||||||
IntelGPU = Bool("OLLAMA_INTEL_GPU")
|
|
||||||
// MultiUserCache optimizes prompt caching for multi-user scenarios
|
// MultiUserCache optimizes prompt caching for multi-user scenarios
|
||||||
MultiUserCache = Bool("OLLAMA_MULTIUSER_CACHE")
|
MultiUserCache = Bool("OLLAMA_MULTIUSER_CACHE")
|
||||||
// Enable the new Ollama engine
|
// Enable the new Ollama engine
|
||||||
|
|
@ -206,6 +204,8 @@ var (
|
||||||
ContextLength = Uint("OLLAMA_CONTEXT_LENGTH", 4096)
|
ContextLength = Uint("OLLAMA_CONTEXT_LENGTH", 4096)
|
||||||
// Auth enables authentication between the Ollama client and server
|
// Auth enables authentication between the Ollama client and server
|
||||||
UseAuth = Bool("OLLAMA_AUTH")
|
UseAuth = Bool("OLLAMA_AUTH")
|
||||||
|
// Enable Vulkan backend
|
||||||
|
EnableVulkan = Bool("OLLAMA_VULKAN")
|
||||||
)
|
)
|
||||||
|
|
||||||
func String(s string) func() string {
|
func String(s string) func() string {
|
||||||
|
|
@ -314,7 +314,7 @@ func AsMap() map[string]EnvVar {
|
||||||
ret["GGML_VK_VISIBLE_DEVICES"] = EnvVar{"GGML_VK_VISIBLE_DEVICES", VkVisibleDevices(), "Set which Vulkan devices are visible by numeric ID"}
|
ret["GGML_VK_VISIBLE_DEVICES"] = EnvVar{"GGML_VK_VISIBLE_DEVICES", VkVisibleDevices(), "Set which Vulkan devices are visible by numeric ID"}
|
||||||
ret["GPU_DEVICE_ORDINAL"] = EnvVar{"GPU_DEVICE_ORDINAL", GpuDeviceOrdinal(), "Set which AMD devices are visible by numeric ID"}
|
ret["GPU_DEVICE_ORDINAL"] = EnvVar{"GPU_DEVICE_ORDINAL", GpuDeviceOrdinal(), "Set which AMD devices are visible by numeric ID"}
|
||||||
ret["HSA_OVERRIDE_GFX_VERSION"] = EnvVar{"HSA_OVERRIDE_GFX_VERSION", HsaOverrideGfxVersion(), "Override the gfx used for all detected AMD GPUs"}
|
ret["HSA_OVERRIDE_GFX_VERSION"] = EnvVar{"HSA_OVERRIDE_GFX_VERSION", HsaOverrideGfxVersion(), "Override the gfx used for all detected AMD GPUs"}
|
||||||
ret["OLLAMA_INTEL_GPU"] = EnvVar{"OLLAMA_INTEL_GPU", IntelGPU(), "Enable experimental Intel GPU detection"}
|
ret["OLLAMA_VULKAN"] = EnvVar{"OLLAMA_VULKAN", EnableVulkan(), "Enable experimental Vulkan support"}
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
|
|
||||||
|
|
@ -13,13 +13,12 @@ set -eu
|
||||||
. $(dirname $0)/env.sh
|
. $(dirname $0)/env.sh
|
||||||
|
|
||||||
mkdir -p dist
|
mkdir -p dist
|
||||||
NOVULKAN=${NOVULKAN:-""}
|
|
||||||
|
|
||||||
docker buildx build \
|
docker buildx build \
|
||||||
--output type=local,dest=./dist/ \
|
--output type=local,dest=./dist/ \
|
||||||
--platform=${PLATFORM} \
|
--platform=${PLATFORM} \
|
||||||
${OLLAMA_COMMON_BUILD_ARGS} \
|
${OLLAMA_COMMON_BUILD_ARGS} \
|
||||||
--target archive${NOVULKAN} \
|
--target archive \
|
||||||
-f Dockerfile \
|
-f Dockerfile \
|
||||||
.
|
.
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue