ggml : remove GGML_KQ_MASK_PAD constant (llama/17910)

* ggml : remove GGML_KQ_MASK_PAD constant

* cont : remove comment
This commit is contained in:
Georgi Gerganov 2025-12-10 20:53:16 +02:00
parent ca8ea18d06
commit cd9b8c6d18
No known key found for this signature in database
GPG Key ID: 449E073F9DC10735
2 changed files with 5 additions and 9 deletions

View File

@ -2305,13 +2305,11 @@ extern "C" {
float stop, float stop,
float step); float step);
#define GGML_KQ_MASK_PAD 1 // q: [n_embd_k, n_batch, n_head, ne3 ]
// k: [n_embd_k, n_kv, n_head_kv, ne3 ]
// q: [n_embd_k, n_batch, n_head, ne3 ] // v: [n_embd_v, n_kv, n_head_kv, ne3 ] !! not transposed !!
// k: [n_embd_k, n_kv, n_head_kv, ne3 ] // mask: [n_kv, n_batch, ne32, ne33]
// v: [n_embd_v, n_kv, n_head_kv, ne3 ] !! not transposed !! // res: [n_embd_v, n_head, n_batch, ne3 ] !! permuted !!
// mask: [n_kv, n_batch_pad, ne32, ne33] !! n_batch_pad = GGML_PAD(n_batch, GGML_KQ_MASK_PAD) !!
// res: [n_embd_v, n_head, n_batch, ne3 ] !! permuted !!
// //
// broadcast: // broadcast:
// n_head % n_head_kv == 0 // n_head % n_head_kv == 0

View File

@ -5260,8 +5260,6 @@ struct ggml_tensor * ggml_flash_attn_ext(
if (mask) { if (mask) {
GGML_ASSERT(ggml_is_contiguous(mask)); GGML_ASSERT(ggml_is_contiguous(mask));
GGML_ASSERT(mask->ne[1] >= GGML_PAD(q->ne[1], GGML_KQ_MASK_PAD) &&
"the Flash-Attention kernel requires the mask to be padded to GGML_KQ_MASK_PAD and at least n_queries big");
//GGML_ASSERT(ggml_can_repeat_rows(mask, qk)); //GGML_ASSERT(ggml_can_repeat_rows(mask, qk));
GGML_ASSERT(q->ne[2] % mask->ne[2] == 0); GGML_ASSERT(q->ne[2] % mask->ne[2] == 0);