vulkan: enable mmvq for q2_k on NVIDIA (llama/17675)
This commit is contained in:
parent
32ba1ec8e0
commit
7e97d3b069
|
|
@ -6948,6 +6948,10 @@ static bool ggml_vk_should_use_mmvq(const vk_device& device, uint32_t m, uint32_
|
|||
// Quantization overhead is not worth it for small k
|
||||
switch (device->vendor_id) {
|
||||
case VK_VENDOR_ID_NVIDIA:
|
||||
if (src0_type == GGML_TYPE_Q2_K) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (k <= 4096) {
|
||||
return false;
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue