mirror of https://github.com/ollama/ollama
Merge branch 'main' into drifkin/array-head-count-simple
This commit is contained in:
commit
a3f7dd3e98
|
|
@ -103,11 +103,6 @@ jobs:
|
||||||
arch: [amd64]
|
arch: [amd64]
|
||||||
preset: ['CPU']
|
preset: ['CPU']
|
||||||
include:
|
include:
|
||||||
- os: windows
|
|
||||||
arch: amd64
|
|
||||||
preset: 'CUDA 11'
|
|
||||||
install: https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe
|
|
||||||
cuda-version: '11.3'
|
|
||||||
- os: windows
|
- os: windows
|
||||||
arch: amd64
|
arch: amd64
|
||||||
preset: 'CUDA 12'
|
preset: 'CUDA 12'
|
||||||
|
|
@ -160,6 +155,9 @@ jobs:
|
||||||
echo "$hipPath\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
echo "$hipPath\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
||||||
echo "CC=$hipPath\bin\clang.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
|
echo "CC=$hipPath\bin\clang.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
|
||||||
echo "CXX=$hipPath\bin\clang++.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
|
echo "CXX=$hipPath\bin\clang++.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
|
||||||
|
echo "HIPCXX=$hipPath\bin\clang++.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
|
||||||
|
echo "HIP_PLATFORM=amd" | Out-File -FilePath $env:GITHUB_ENV -Append
|
||||||
|
echo "CMAKE_PREFIX_PATH=$hipPath" | Out-File -FilePath $env:GITHUB_ENV -Append
|
||||||
- if: matrix.preset == 'CPU'
|
- if: matrix.preset == 'CPU'
|
||||||
run: |
|
run: |
|
||||||
echo "CC=clang.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
|
echo "CC=clang.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
|
||||||
|
|
@ -178,8 +176,8 @@ jobs:
|
||||||
key: ccache-${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.preset }}
|
key: ccache-${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.preset }}
|
||||||
- name: Build target "${{ matrix.preset }}"
|
- name: Build target "${{ matrix.preset }}"
|
||||||
run: |
|
run: |
|
||||||
Import-Module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
|
Import-Module 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
|
||||||
Enter-VsDevShell -VsInstallPath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -SkipAutomaticLocation -DevCmdArguments '-arch=x64 -no_logo'
|
Enter-VsDevShell -VsInstallPath 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise' -SkipAutomaticLocation -DevCmdArguments '-arch=x64 -no_logo'
|
||||||
cmake --preset "${{ matrix.preset }}"
|
cmake --preset "${{ matrix.preset }}"
|
||||||
cmake --build --parallel --preset "${{ matrix.preset }}"
|
cmake --build --parallel --preset "${{ matrix.preset }}"
|
||||||
cmake --install build --component "${{ startsWith(matrix.preset, 'CUDA ') && 'CUDA' || startsWith(matrix.preset, 'ROCm ') && 'HIP' || 'CPU' }}" --strip --parallel 8
|
cmake --install build --component "${{ startsWith(matrix.preset, 'CUDA ') && 'CUDA' || startsWith(matrix.preset, 'ROCm ') && 'HIP' || 'CPU' }}" --strip --parallel 8
|
||||||
|
|
@ -246,7 +244,7 @@ jobs:
|
||||||
dist\${{ matrix.os }}-${{ matrix.arch }}-app.exe
|
dist\${{ matrix.os }}-${{ matrix.arch }}-app.exe
|
||||||
|
|
||||||
windows-sign:
|
windows-sign:
|
||||||
runs-on: windows-2022
|
runs-on: windows
|
||||||
environment: release
|
environment: release
|
||||||
needs: [windows-depends, windows-build]
|
needs: [windows-depends, windows-build]
|
||||||
steps:
|
steps:
|
||||||
|
|
@ -324,8 +322,6 @@ jobs:
|
||||||
case "$COMPONENT" in
|
case "$COMPONENT" in
|
||||||
bin/ollama) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
|
bin/ollama) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
|
||||||
lib/ollama/*.so) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
|
lib/ollama/*.so) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
|
||||||
lib/ollama/cuda_v11) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
|
|
||||||
lib/ollama/cuda_v12) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
|
|
||||||
lib/ollama/cuda_jetpack5) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack5.tar.in ;;
|
lib/ollama/cuda_jetpack5) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack5.tar.in ;;
|
||||||
lib/ollama/cuda_jetpack6) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack6.tar.in ;;
|
lib/ollama/cuda_jetpack6) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack6.tar.in ;;
|
||||||
lib/ollama/rocm) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-rocm.tar.in ;;
|
lib/ollama/rocm) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-rocm.tar.in ;;
|
||||||
|
|
|
||||||
|
|
@ -36,7 +36,7 @@ jobs:
|
||||||
| xargs python3 -c "import sys; from pathlib import Path; print(any(Path(x).match(glob) for x in sys.argv[1:] for glob in '$*'.split(' ')))"
|
| xargs python3 -c "import sys; from pathlib import Path; print(any(Path(x).match(glob) for x in sys.argv[1:] for glob in '$*'.split(' ')))"
|
||||||
}
|
}
|
||||||
|
|
||||||
echo changed=$(changed 'llama/llama.cpp/**' 'ml/backend/ggml/ggml/**') | tee -a $GITHUB_OUTPUT
|
echo changed=$(changed 'llama/llama.cpp/**/*' 'ml/backend/ggml/ggml/**/*') | tee -a $GITHUB_OUTPUT
|
||||||
|
|
||||||
linux:
|
linux:
|
||||||
needs: [changes]
|
needs: [changes]
|
||||||
|
|
@ -46,7 +46,7 @@ jobs:
|
||||||
include:
|
include:
|
||||||
- preset: CPU
|
- preset: CPU
|
||||||
- preset: CUDA
|
- preset: CUDA
|
||||||
container: nvidia/cuda:11.8.0-devel-ubuntu22.04
|
container: nvidia/cuda:12.8.1-devel-ubuntu22.04
|
||||||
flags: '-DCMAKE_CUDA_ARCHITECTURES=87'
|
flags: '-DCMAKE_CUDA_ARCHITECTURES=87'
|
||||||
- preset: ROCm
|
- preset: ROCm
|
||||||
container: rocm/dev-ubuntu-22.04:6.1.2
|
container: rocm/dev-ubuntu-22.04:6.1.2
|
||||||
|
|
@ -78,7 +78,7 @@ jobs:
|
||||||
include:
|
include:
|
||||||
- preset: CPU
|
- preset: CPU
|
||||||
- preset: CUDA
|
- preset: CUDA
|
||||||
install: https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe
|
install: https://developer.download.nvidia.com/compute/cuda/12.8.0/local_installers/cuda_12.8.0_571.96_windows.exe
|
||||||
flags: '-DCMAKE_CUDA_ARCHITECTURES=80'
|
flags: '-DCMAKE_CUDA_ARCHITECTURES=80'
|
||||||
- preset: ROCm
|
- preset: ROCm
|
||||||
install: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q4-WinSvr2022-For-HIP.exe
|
install: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q4-WinSvr2022-For-HIP.exe
|
||||||
|
|
@ -102,7 +102,7 @@ jobs:
|
||||||
$ErrorActionPreference = "Stop"
|
$ErrorActionPreference = "Stop"
|
||||||
if ("${{ steps.cache-install.outputs.cache-hit }}" -ne 'true') {
|
if ("${{ steps.cache-install.outputs.cache-hit }}" -ne 'true') {
|
||||||
Invoke-WebRequest -Uri "${{ matrix.install }}" -OutFile "install.exe"
|
Invoke-WebRequest -Uri "${{ matrix.install }}" -OutFile "install.exe"
|
||||||
Start-Process -FilePath .\install.exe -ArgumentList (@("-s", "cudart_11.3", "nvcc_11.3", "cublas_11.3", "cublas_dev_11.3")) -NoNewWindow -Wait
|
Start-Process -FilePath .\install.exe -ArgumentList (@("-s", "cudart_12.8", "nvcc_12.8", "cublas_12.8", "cublas_dev_12.8")) -NoNewWindow -Wait
|
||||||
}
|
}
|
||||||
|
|
||||||
$cudaPath = (Resolve-Path "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\*").path
|
$cudaPath = (Resolve-Path "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\*").path
|
||||||
|
|
@ -120,6 +120,9 @@ jobs:
|
||||||
echo "$hipPath\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
echo "$hipPath\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
||||||
echo "CC=$hipPath\bin\clang.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
|
echo "CC=$hipPath\bin\clang.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
|
||||||
echo "CXX=$hipPath\bin\clang++.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
|
echo "CXX=$hipPath\bin\clang++.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
|
||||||
|
echo "HIPCXX=$hipPath\bin\clang++.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
|
||||||
|
echo "HIP_PLATFORM=amd" | Out-File -FilePath $env:GITHUB_ENV -Append
|
||||||
|
echo "CMAKE_PREFIX_PATH=$hipPath" | Out-File -FilePath $env:GITHUB_ENV -Append
|
||||||
- if: ${{ !cancelled() && steps.cache-install.outputs.cache-hit != 'true' }}
|
- if: ${{ !cancelled() && steps.cache-install.outputs.cache-hit != 'true' }}
|
||||||
uses: actions/cache/save@v4
|
uses: actions/cache/save@v4
|
||||||
with:
|
with:
|
||||||
|
|
@ -133,8 +136,8 @@ jobs:
|
||||||
path: ${{ github.workspace }}\.ccache
|
path: ${{ github.workspace }}\.ccache
|
||||||
key: ccache-${{ runner.os }}-${{ runner.arch }}-${{ matrix.preset }}
|
key: ccache-${{ runner.os }}-${{ runner.arch }}-${{ matrix.preset }}
|
||||||
- run: |
|
- run: |
|
||||||
Import-Module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
|
Import-Module 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
|
||||||
Enter-VsDevShell -VsInstallPath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -SkipAutomaticLocation -DevCmdArguments '-arch=x64 -no_logo'
|
Enter-VsDevShell -VsInstallPath 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise' -SkipAutomaticLocation -DevCmdArguments '-arch=x64 -no_logo'
|
||||||
cmake --preset "${{ matrix.preset }}" ${{ matrix.flags }}
|
cmake --preset "${{ matrix.preset }}" ${{ matrix.flags }}
|
||||||
cmake --build --parallel --preset "${{ matrix.preset }}"
|
cmake --build --parallel --preset "${{ matrix.preset }}"
|
||||||
env:
|
env:
|
||||||
|
|
|
||||||
|
|
@ -78,14 +78,13 @@ if(CMAKE_CUDA_COMPILER)
|
||||||
|
|
||||||
find_package(CUDAToolkit)
|
find_package(CUDAToolkit)
|
||||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cuda)
|
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cuda)
|
||||||
set(OLLAMA_CUDA_INSTALL_DIR ${OLLAMA_INSTALL_DIR}/cuda_v${CUDAToolkit_VERSION_MAJOR})
|
|
||||||
install(TARGETS ggml-cuda
|
install(TARGETS ggml-cuda
|
||||||
RUNTIME_DEPENDENCIES
|
RUNTIME_DEPENDENCIES
|
||||||
DIRECTORIES ${CUDAToolkit_BIN_DIR} ${CUDAToolkit_LIBRARY_DIR}
|
DIRECTORIES ${CUDAToolkit_BIN_DIR} ${CUDAToolkit_LIBRARY_DIR}
|
||||||
PRE_INCLUDE_REGEXES cublas cublasLt cudart
|
PRE_INCLUDE_REGEXES cublas cublasLt cudart
|
||||||
PRE_EXCLUDE_REGEXES ".*"
|
PRE_EXCLUDE_REGEXES ".*"
|
||||||
RUNTIME DESTINATION ${OLLAMA_CUDA_INSTALL_DIR} COMPONENT CUDA
|
RUNTIME DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT CUDA
|
||||||
LIBRARY DESTINATION ${OLLAMA_CUDA_INSTALL_DIR} COMPONENT CUDA
|
LIBRARY DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT CUDA
|
||||||
)
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
|
@ -116,7 +115,11 @@ if(CMAKE_HIP_COMPILER)
|
||||||
|
|
||||||
set(OLLAMA_HIP_INSTALL_DIR ${OLLAMA_INSTALL_DIR}/rocm)
|
set(OLLAMA_HIP_INSTALL_DIR ${OLLAMA_INSTALL_DIR}/rocm)
|
||||||
install(TARGETS ggml-hip
|
install(TARGETS ggml-hip
|
||||||
RUNTIME_DEPENDENCIES
|
RUNTIME_DEPENDENCY_SET rocm
|
||||||
|
RUNTIME DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT HIP
|
||||||
|
LIBRARY DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT HIP
|
||||||
|
)
|
||||||
|
install(RUNTIME_DEPENDENCY_SET rocm
|
||||||
DIRECTORIES ${HIP_BIN_INSTALL_DIR} ${HIP_LIB_INSTALL_DIR}
|
DIRECTORIES ${HIP_BIN_INSTALL_DIR} ${HIP_LIB_INSTALL_DIR}
|
||||||
PRE_INCLUDE_REGEXES hipblas rocblas amdhip64 rocsolver amd_comgr hsa-runtime64 rocsparse tinfo rocprofiler-register drm drm_amdgpu numa elf
|
PRE_INCLUDE_REGEXES hipblas rocblas amdhip64 rocsolver amd_comgr hsa-runtime64 rocsparse tinfo rocprofiler-register drm drm_amdgpu numa elf
|
||||||
PRE_EXCLUDE_REGEXES ".*"
|
PRE_EXCLUDE_REGEXES ".*"
|
||||||
|
|
|
||||||
|
|
@ -17,14 +17,6 @@
|
||||||
"name": "CUDA",
|
"name": "CUDA",
|
||||||
"inherits": [ "Default" ]
|
"inherits": [ "Default" ]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"name": "CUDA 11",
|
|
||||||
"inherits": [ "CUDA" ],
|
|
||||||
"cacheVariables": {
|
|
||||||
"CMAKE_CUDA_ARCHITECTURES": "50;52;53;60;61;70;75;80;86",
|
|
||||||
"CMAKE_CUDA_FLAGS": "-Wno-deprecated-gpu-targets -t 2"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"name": "CUDA 12",
|
"name": "CUDA 12",
|
||||||
"inherits": [ "CUDA" ],
|
"inherits": [ "CUDA" ],
|
||||||
|
|
@ -79,11 +71,6 @@
|
||||||
"configurePreset": "CUDA",
|
"configurePreset": "CUDA",
|
||||||
"targets": [ "ggml-cuda" ]
|
"targets": [ "ggml-cuda" ]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"name": "CUDA 11",
|
|
||||||
"inherits": [ "CUDA" ],
|
|
||||||
"configurePreset": "CUDA 11"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"name": "CUDA 12",
|
"name": "CUDA 12",
|
||||||
"inherits": [ "CUDA" ],
|
"inherits": [ "CUDA" ],
|
||||||
|
|
|
||||||
24
Dockerfile
24
Dockerfile
|
|
@ -7,12 +7,13 @@ ARG JETPACK5VERSION=r35.4.1
|
||||||
ARG JETPACK6VERSION=r36.4.0
|
ARG JETPACK6VERSION=r36.4.0
|
||||||
ARG CMAKEVERSION=3.31.2
|
ARG CMAKEVERSION=3.31.2
|
||||||
|
|
||||||
# CUDA v11 requires gcc v10. v10.3 has regressions, so the rockylinux 8.5 AppStream has the latest compatible version
|
# We require gcc v10 minimum. v10.3 has regressions, so the rockylinux 8.5 AppStream has the latest compatible version
|
||||||
FROM --platform=linux/amd64 rocm/dev-almalinux-8:${ROCMVERSION}-complete AS base-amd64
|
FROM --platform=linux/amd64 rocm/dev-almalinux-8:${ROCMVERSION}-complete AS base-amd64
|
||||||
RUN yum install -y yum-utils \
|
RUN yum install -y yum-utils \
|
||||||
&& yum-config-manager --add-repo https://dl.rockylinux.org/vault/rocky/8.5/AppStream/\$basearch/os/ \
|
&& yum-config-manager --add-repo https://dl.rockylinux.org/vault/rocky/8.5/AppStream/\$basearch/os/ \
|
||||||
&& rpm --import https://dl.rockylinux.org/pub/rocky/RPM-GPG-KEY-Rocky-8 \
|
&& rpm --import https://dl.rockylinux.org/pub/rocky/RPM-GPG-KEY-Rocky-8 \
|
||||||
&& dnf install -y yum-utils ccache gcc-toolset-10-gcc-10.2.1-8.2.el8 gcc-toolset-10-gcc-c++-10.2.1-8.2.el8 gcc-toolset-10-binutils-2.35-11.el8 \
|
&& dnf install -y yum-utils ccache gcc-toolset-10-gcc-10.2.1-8.2.el8 gcc-toolset-10-gcc-c++-10.2.1-8.2.el8 gcc-toolset-10-binutils-2.35-11.el8 \
|
||||||
|
&& dnf install -y ccache \
|
||||||
&& yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo
|
&& yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo
|
||||||
ENV PATH=/opt/rh/gcc-toolset-10/root/usr/bin:$PATH
|
ENV PATH=/opt/rh/gcc-toolset-10/root/usr/bin:$PATH
|
||||||
|
|
||||||
|
|
@ -38,15 +39,6 @@ RUN --mount=type=cache,target=/root/.ccache \
|
||||||
&& cmake --build --parallel --preset 'CPU' \
|
&& cmake --build --parallel --preset 'CPU' \
|
||||||
&& cmake --install build --component CPU --strip --parallel 8
|
&& cmake --install build --component CPU --strip --parallel 8
|
||||||
|
|
||||||
FROM base AS cuda-11
|
|
||||||
ARG CUDA11VERSION=11.3
|
|
||||||
RUN dnf install -y cuda-toolkit-${CUDA11VERSION//./-}
|
|
||||||
ENV PATH=/usr/local/cuda-11/bin:$PATH
|
|
||||||
RUN --mount=type=cache,target=/root/.ccache \
|
|
||||||
cmake --preset 'CUDA 11' \
|
|
||||||
&& cmake --build --parallel --preset 'CUDA 11' \
|
|
||||||
&& cmake --install build --component CUDA --strip --parallel 8
|
|
||||||
|
|
||||||
FROM base AS cuda-12
|
FROM base AS cuda-12
|
||||||
ARG CUDA12VERSION=12.8
|
ARG CUDA12VERSION=12.8
|
||||||
RUN dnf install -y cuda-toolkit-${CUDA12VERSION//./-}
|
RUN dnf install -y cuda-toolkit-${CUDA12VERSION//./-}
|
||||||
|
|
@ -98,17 +90,15 @@ RUN --mount=type=cache,target=/root/.cache/go-build \
|
||||||
go build -trimpath -buildmode=pie -o /bin/ollama .
|
go build -trimpath -buildmode=pie -o /bin/ollama .
|
||||||
|
|
||||||
FROM --platform=linux/amd64 scratch AS amd64
|
FROM --platform=linux/amd64 scratch AS amd64
|
||||||
COPY --from=cuda-11 dist/lib/ollama/cuda_v11 /lib/ollama/cuda_v11
|
COPY --from=cuda-12 dist/lib/ollama /lib/ollama
|
||||||
COPY --from=cuda-12 dist/lib/ollama/cuda_v12 /lib/ollama/cuda_v12
|
|
||||||
|
|
||||||
FROM --platform=linux/arm64 scratch AS arm64
|
FROM --platform=linux/arm64 scratch AS arm64
|
||||||
COPY --from=cuda-11 dist/lib/ollama/cuda_v11 /lib/ollama/cuda_v11
|
COPY --from=cuda-12 dist/lib/ollama /lib/ollama/cuda_sbsa
|
||||||
COPY --from=cuda-12 dist/lib/ollama/cuda_v12 /lib/ollama/cuda_v12
|
COPY --from=jetpack-5 dist/lib/ollama /lib/ollama/cuda_jetpack5
|
||||||
COPY --from=jetpack-5 dist/lib/ollama/cuda_v11 /lib/ollama/cuda_jetpack5
|
COPY --from=jetpack-6 dist/lib/ollama /lib/ollama/cuda_jetpack6
|
||||||
COPY --from=jetpack-6 dist/lib/ollama/cuda_v12 /lib/ollama/cuda_jetpack6
|
|
||||||
|
|
||||||
FROM scratch AS rocm
|
FROM scratch AS rocm
|
||||||
COPY --from=rocm-6 dist/lib/ollama/rocm /lib/ollama/rocm
|
COPY --from=rocm-6 dist/lib/ollama /lib/ollama
|
||||||
|
|
||||||
FROM ${FLAVOR} AS archive
|
FROM ${FLAVOR} AS archive
|
||||||
COPY --from=cpu dist/lib/ollama /lib/ollama
|
COPY --from=cpu dist/lib/ollama /lib/ollama
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@
|
||||||
package discover
|
package discover
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
"regexp"
|
"regexp"
|
||||||
|
|
@ -55,10 +56,13 @@ func cudaVariant(gpuInfo CudaGPUInfo) string {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return "sbsa"
|
||||||
}
|
}
|
||||||
|
|
||||||
// driver 12.0 has problems with the cuda v12 library, so run v11 on those older drivers
|
// driver 12.0 has problems with the cuda v12 library, so run v11 on those older drivers
|
||||||
if gpuInfo.DriverMajor < 12 || (gpuInfo.DriverMajor == 12 && gpuInfo.DriverMinor == 0) {
|
if gpuInfo.DriverMajor < 12 || (gpuInfo.DriverMajor == 12 && gpuInfo.DriverMinor == 0) {
|
||||||
|
// The detected driver is older than Feb 2023
|
||||||
|
slog.Warn("old CUDA driver detected - please upgrade to a newer driver", "version", fmt.Sprintf("%d.%d", gpuInfo.DriverMajor, gpuInfo.DriverMinor))
|
||||||
return "v11"
|
return "v11"
|
||||||
}
|
}
|
||||||
return "v12"
|
return "v12"
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,7 @@ import (
|
||||||
// '../lib/ollama' on Linux and the executable's directory on macOS
|
// '../lib/ollama' on Linux and the executable's directory on macOS
|
||||||
// note: distribution builds, additional GPU-specific libraries are
|
// note: distribution builds, additional GPU-specific libraries are
|
||||||
// found in subdirectories of the returned path, such as
|
// found in subdirectories of the returned path, such as
|
||||||
// 'cuda_v11', 'cuda_v12', 'rocm', etc.
|
// 'cuda_v12', 'rocm', etc.
|
||||||
var LibOllamaPath string = func() string {
|
var LibOllamaPath string = func() string {
|
||||||
exe, err := os.Executable()
|
exe, err := os.Executable()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
# GPU
|
# GPU
|
||||||
## Nvidia
|
## Nvidia
|
||||||
Ollama supports Nvidia GPUs with compute capability 5.0+.
|
Ollama supports Nvidia GPUs with compute capability 5.0+ and driver version 531 and newer.
|
||||||
|
|
||||||
Check your compute compatibility to see if your card is supported:
|
Check your compute compatibility to see if your card is supported:
|
||||||
[https://developer.nvidia.com/cuda-gpus](https://developer.nvidia.com/cuda-gpus)
|
[https://developer.nvidia.com/cuda-gpus](https://developer.nvidia.com/cuda-gpus)
|
||||||
|
|
|
||||||
|
|
@ -43,7 +43,7 @@ Ollama includes multiple LLM libraries compiled for different GPUs and CPU vecto
|
||||||
In the server log, you will see a message that looks something like this (varies from release to release):
|
In the server log, you will see a message that looks something like this (varies from release to release):
|
||||||
|
|
||||||
```
|
```
|
||||||
Dynamic LLM libraries [rocm_v6 cpu cpu_avx cpu_avx2 cuda_v11 rocm_v5]
|
Dynamic LLM libraries [rocm_v6 cpu cpu_avx cpu_avx2 cuda_v12 rocm_v5]
|
||||||
```
|
```
|
||||||
|
|
||||||
**Experimental LLM Library Override**
|
**Experimental LLM Library Override**
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,32 @@
|
||||||
|
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Daniel Hiltgen <daniel@ollama.com>
|
||||||
|
Date: Sun, 22 Jun 2025 09:22:05 -0700
|
||||||
|
Subject: [PATCH] temporary prevent rocm+cuda mixed loading
|
||||||
|
|
||||||
|
---
|
||||||
|
ggml/src/ggml-backend-reg.cpp | 12 ++++++++++--
|
||||||
|
1 file changed, 10 insertions(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp
|
||||||
|
index 4e67d243..8f49f084 100644
|
||||||
|
--- a/ggml/src/ggml-backend-reg.cpp
|
||||||
|
+++ b/ggml/src/ggml-backend-reg.cpp
|
||||||
|
@@ -573,8 +573,16 @@ void ggml_backend_load_all_from_path(const char * dir_path) {
|
||||||
|
|
||||||
|
ggml_backend_load_best("blas", silent, dir_path);
|
||||||
|
ggml_backend_load_best("cann", silent, dir_path);
|
||||||
|
- ggml_backend_load_best("cuda", silent, dir_path);
|
||||||
|
- ggml_backend_load_best("hip", silent, dir_path);
|
||||||
|
+
|
||||||
|
+ // Avoid mixed hip+cuda configurations
|
||||||
|
+ const char * hip_devices = std::getenv("HIP_VISIBLE_DEVICES");
|
||||||
|
+ const char * rocr_devices = std::getenv("ROCR_VISIBLE_DEVICES");
|
||||||
|
+ if (!hip_devices && !rocr_devices) {
|
||||||
|
+ ggml_backend_load_best("cuda", silent, dir_path);
|
||||||
|
+ } else {
|
||||||
|
+ ggml_backend_load_best("hip", silent, dir_path);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
ggml_backend_load_best("kompute", silent, dir_path);
|
||||||
|
ggml_backend_load_best("metal", silent, dir_path);
|
||||||
|
ggml_backend_load_best("rpc", silent, dir_path);
|
||||||
|
|
@ -139,6 +139,13 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a
|
||||||
gpus = discover.GetCPUInfo()
|
gpus = discover.GetCPUInfo()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Verify the requested context size is <= the model training size
|
||||||
|
trainCtx := f.KV().ContextLength()
|
||||||
|
if opts.NumCtx/numParallel > int(trainCtx) && trainCtx > 0 {
|
||||||
|
slog.Warn("requested context size too large for model", "num_ctx", opts.NumCtx, "num_parallel", numParallel, "n_ctx_train", trainCtx)
|
||||||
|
opts.NumCtx = int(trainCtx) * numParallel
|
||||||
|
}
|
||||||
|
|
||||||
estimate := EstimateGPULayers(gpus, f, projectors, opts, numParallel)
|
estimate := EstimateGPULayers(gpus, f, projectors, opts, numParallel)
|
||||||
if len(gpus) > 1 || gpus[0].Library != "cpu" {
|
if len(gpus) > 1 || gpus[0].Library != "cpu" {
|
||||||
switch {
|
switch {
|
||||||
|
|
@ -311,7 +318,7 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a
|
||||||
params = append(params, "--mmproj", projectors[0])
|
params = append(params, "--mmproj", projectors[0])
|
||||||
}
|
}
|
||||||
|
|
||||||
// iterate through compatible GPU libraries such as 'cuda_v12', 'cuda_v11', 'rocm', etc.
|
// iterate through compatible GPU libraries such as 'cuda_v12', 'rocm', etc.
|
||||||
// adding each library's respective path to the LD_LIBRARY_PATH, until finally running
|
// adding each library's respective path to the LD_LIBRARY_PATH, until finally running
|
||||||
// without any LD_LIBRARY_PATH flags
|
// without any LD_LIBRARY_PATH flags
|
||||||
for {
|
for {
|
||||||
|
|
|
||||||
|
|
@ -573,8 +573,16 @@ void ggml_backend_load_all_from_path(const char * dir_path) {
|
||||||
|
|
||||||
ggml_backend_load_best("blas", silent, dir_path);
|
ggml_backend_load_best("blas", silent, dir_path);
|
||||||
ggml_backend_load_best("cann", silent, dir_path);
|
ggml_backend_load_best("cann", silent, dir_path);
|
||||||
ggml_backend_load_best("cuda", silent, dir_path);
|
|
||||||
ggml_backend_load_best("hip", silent, dir_path);
|
// Avoid mixed hip+cuda configurations
|
||||||
|
const char * hip_devices = std::getenv("HIP_VISIBLE_DEVICES");
|
||||||
|
const char * rocr_devices = std::getenv("ROCR_VISIBLE_DEVICES");
|
||||||
|
if (!hip_devices && !rocr_devices) {
|
||||||
|
ggml_backend_load_best("cuda", silent, dir_path);
|
||||||
|
} else {
|
||||||
|
ggml_backend_load_best("hip", silent, dir_path);
|
||||||
|
}
|
||||||
|
|
||||||
ggml_backend_load_best("kompute", silent, dir_path);
|
ggml_backend_load_best("kompute", silent, dir_path);
|
||||||
ggml_backend_load_best("metal", silent, dir_path);
|
ggml_backend_load_best("metal", silent, dir_path);
|
||||||
ggml_backend_load_best("rpc", silent, dir_path);
|
ggml_backend_load_best("rpc", silent, dir_path);
|
||||||
|
|
|
||||||
|
|
@ -27,7 +27,6 @@ function checkEnv() {
|
||||||
$env:VCToolsRedistDir=(get-item "${MSVC_INSTALL}\VC\Redist\MSVC\*")[0]
|
$env:VCToolsRedistDir=(get-item "${MSVC_INSTALL}\VC\Redist\MSVC\*")[0]
|
||||||
}
|
}
|
||||||
# Locate CUDA versions
|
# Locate CUDA versions
|
||||||
# Note: this assumes every version found will be built
|
|
||||||
$cudaList=(get-item "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v*\bin\" -ea 'silentlycontinue')
|
$cudaList=(get-item "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v*\bin\" -ea 'silentlycontinue')
|
||||||
if ($cudaList.length -eq 0) {
|
if ($cudaList.length -eq 0) {
|
||||||
$d=(get-command -ea 'silentlycontinue' nvcc).path
|
$d=(get-command -ea 'silentlycontinue' nvcc).path
|
||||||
|
|
@ -94,19 +93,6 @@ function buildOllama() {
|
||||||
|
|
||||||
$hashEnv = @{}
|
$hashEnv = @{}
|
||||||
Get-ChildItem env: | foreach { $hashEnv[$_.Name] = $_.Value }
|
Get-ChildItem env: | foreach { $hashEnv[$_.Name] = $_.Value }
|
||||||
if ("$script:CUDA_DIRS".Contains("v11")) {
|
|
||||||
$hashEnv.Keys | foreach { if ($_.Contains("CUDA_PATH_V11")) { $v11="$_" }}
|
|
||||||
$env:CUDAToolkit_ROOT=$hashEnv[$v11]
|
|
||||||
write-host "Building CUDA v11 backend libraries"
|
|
||||||
# Note: cuda v11 requires msvc 2019 so force the older generator
|
|
||||||
# to avoid 2022 (or newer) from being used as the default
|
|
||||||
& cmake --fresh --preset "CUDA 11" -G "Visual Studio 16 2019" --install-prefix $script:DIST_DIR
|
|
||||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
|
||||||
& cmake --build --preset "CUDA 11" --config Release --parallel $script:JOBS
|
|
||||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
|
||||||
& cmake --install build --component "CUDA" --strip
|
|
||||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
|
||||||
}
|
|
||||||
if ("$script:CUDA_DIRS".Contains("v12")) {
|
if ("$script:CUDA_DIRS".Contains("v12")) {
|
||||||
$hashEnv.Keys | foreach { if ($_.Contains("CUDA_PATH_V12")) { $v12="$_" }}
|
$hashEnv.Keys | foreach { if ($_.Contains("CUDA_PATH_V12")) { $v12="$_" }}
|
||||||
$env:CUDAToolkit_ROOT=$hashEnv[$v12]
|
$env:CUDAToolkit_ROOT=$hashEnv[$v12]
|
||||||
|
|
|
||||||
|
|
@ -10,9 +10,7 @@ OLLAMA_COMMON_BUILD_ARGS="--build-arg=VERSION \
|
||||||
--build-arg=GOFLAGS \
|
--build-arg=GOFLAGS \
|
||||||
--build-arg=OLLAMA_CUSTOM_CPU_DEFS \
|
--build-arg=OLLAMA_CUSTOM_CPU_DEFS \
|
||||||
--build-arg=OLLAMA_SKIP_CUDA_GENERATE \
|
--build-arg=OLLAMA_SKIP_CUDA_GENERATE \
|
||||||
--build-arg=OLLAMA_SKIP_CUDA_11_GENERATE \
|
|
||||||
--build-arg=OLLAMA_SKIP_CUDA_12_GENERATE \
|
--build-arg=OLLAMA_SKIP_CUDA_12_GENERATE \
|
||||||
--build-arg=CUDA_V11_ARCHITECTURES \
|
|
||||||
--build-arg=CUDA_V12_ARCHITECTURES \
|
--build-arg=CUDA_V12_ARCHITECTURES \
|
||||||
--build-arg=OLLAMA_SKIP_ROCM_GENERATE \
|
--build-arg=OLLAMA_SKIP_ROCM_GENERATE \
|
||||||
--build-arg=OLLAMA_FAST_BUILD \
|
--build-arg=OLLAMA_FAST_BUILD \
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue