vulkan: enable mmvq for q2_k on NVIDIA (llama/17675)
This commit is contained in:
parent
32ba1ec8e0
commit
7e97d3b069
|
|
@ -6948,6 +6948,10 @@ static bool ggml_vk_should_use_mmvq(const vk_device& device, uint32_t m, uint32_
|
||||||
// Quantization overhead is not worth it for small k
|
// Quantization overhead is not worth it for small k
|
||||||
switch (device->vendor_id) {
|
switch (device->vendor_id) {
|
||||||
case VK_VENDOR_ID_NVIDIA:
|
case VK_VENDOR_ID_NVIDIA:
|
||||||
|
if (src0_type == GGML_TYPE_Q2_K) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
if (k <= 4096) {
|
if (k <= 4096) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue