mirror of https://github.com/ollama/ollama
81 lines
3.2 KiB
Diff
81 lines
3.2 KiB
Diff
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
From: Masato Nakasaka <masato.nakasaka@intel.com>
|
|
Date: Fri, 31 Oct 2025 16:18:59 +0900
|
|
Subject: [PATCH] vulkan: Fix crash when FP16 mul_mat accumulation is not
|
|
supported (#16796)
|
|
|
|
* Experimenting crash fix
|
|
|
|
* added assert for aborting and fixed comment
|
|
|
|
* changed to check if a pipeline is empty or not
|
|
|
|
* Moved function in class definition
|
|
|
|
* replaced with is_empty
|
|
|
|
* Modified is_empty to check only unaligned pipelines
|
|
---
|
|
ggml/src/ggml-vulkan/ggml-vulkan.cpp | 20 +++++++++++++-------
|
|
1 file changed, 13 insertions(+), 7 deletions(-)
|
|
|
|
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
|
|
index 3604ceb04..80185d9f0 100644
|
|
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
|
|
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
|
|
@@ -146,8 +146,13 @@ static void ggml_vk_destroy_pipeline(vk::Device& device, vk_pipeline& pipeline);
|
|
struct vk_matmul_pipeline_struct {
|
|
vk_pipeline l, m, s;
|
|
vk_pipeline a_l, a_m, a_s;
|
|
+ // Returns true when all unaligned pipelines are null.
|
|
+ // We only check for unaligned variants since one of the unaligned pipelines must exist
|
|
+ // while aligned pipelines are optional
|
|
+ bool is_empty() const {
|
|
+ return l == nullptr && m == nullptr && s == nullptr;
|
|
+ }
|
|
};
|
|
-
|
|
typedef std::shared_ptr<vk_matmul_pipeline_struct> vk_matmul_pipeline;
|
|
|
|
struct vk_matmul_pipeline2 {
|
|
@@ -5080,7 +5085,7 @@ static vk_matmul_pipeline ggml_vk_get_mul_mat_mat_pipeline(ggml_backend_vk_conte
|
|
if (src1_type == GGML_TYPE_Q8_1) {
|
|
vk_matmul_pipeline pipelines = ctx->device->pipeline_dequant_mul_mat_mat_q8_1[src0_type].f32acc;
|
|
|
|
- if (pipelines->s == nullptr && pipelines->m == nullptr && pipelines->l == nullptr) {
|
|
+ if (pipelines->is_empty()) {
|
|
return nullptr;
|
|
}
|
|
|
|
@@ -5229,7 +5234,7 @@ static vk_matmul_pipeline ggml_vk_get_mul_mat_mat_id_pipeline(ggml_backend_vk_co
|
|
if (src1_type == GGML_TYPE_Q8_1) {
|
|
vk_matmul_pipeline pipelines = ctx->device->pipeline_dequant_mul_mat_mat_id_q8_1[src0_type].f32acc;
|
|
|
|
- if (pipelines->s == nullptr && pipelines->m == nullptr && pipelines->l == nullptr) {
|
|
+ if (pipelines->is_empty()) {
|
|
return nullptr;
|
|
}
|
|
|
|
@@ -5264,16 +5269,17 @@ static vk_matmul_pipeline ggml_vk_get_mul_mat_mat_id_pipeline(ggml_backend_vk_co
|
|
return nullptr;
|
|
}
|
|
|
|
+ vk_matmul_pipeline2& mmp = ctx->device->pipeline_dequant_mul_mat_mat_id[src0_type];
|
|
// XXX TODO 'prec' is not actually allowed in mul_mat_id.
|
|
bool prefer_fp16acc = ctx->device->fp16 /*&& prec == GGML_PREC_DEFAULT*/;
|
|
- bool support_fp16acc = ctx->device->pipeline_dequant_mul_mat_mat_id[src0_type].f16acc != nullptr;
|
|
- bool support_fp32acc = ctx->device->pipeline_dequant_mul_mat_mat_id[src0_type].f32acc != nullptr;
|
|
+ bool support_fp16acc = !mmp.f16acc->is_empty();
|
|
+ bool support_fp32acc = !mmp.f32acc->is_empty();
|
|
|
|
if (support_fp16acc && (prefer_fp16acc || !support_fp32acc)) {
|
|
- return ctx->device->pipeline_dequant_mul_mat_mat_id[src0_type].f16acc;
|
|
+ return mmp.f16acc;
|
|
} else {
|
|
GGML_ASSERT(support_fp32acc);
|
|
- return ctx->device->pipeline_dequant_mul_mat_mat_id[src0_type].f32acc;
|
|
+ return mmp.f32acc;
|
|
}
|
|
}
|
|
|