ggml : remove GGML_KQ_MASK_PAD constant (llama/17910)
* ggml : remove GGML_KQ_MASK_PAD constant * cont : remove comment
This commit is contained in:
parent
ca8ea18d06
commit
cd9b8c6d18
|
|
@ -2305,12 +2305,10 @@ extern "C" {
|
|||
float stop,
|
||||
float step);
|
||||
|
||||
#define GGML_KQ_MASK_PAD 1
|
||||
|
||||
// q: [n_embd_k, n_batch, n_head, ne3 ]
|
||||
// k: [n_embd_k, n_kv, n_head_kv, ne3 ]
|
||||
// v: [n_embd_v, n_kv, n_head_kv, ne3 ] !! not transposed !!
|
||||
// mask: [n_kv, n_batch_pad, ne32, ne33] !! n_batch_pad = GGML_PAD(n_batch, GGML_KQ_MASK_PAD) !!
|
||||
// mask: [n_kv, n_batch, ne32, ne33]
|
||||
// res: [n_embd_v, n_head, n_batch, ne3 ] !! permuted !!
|
||||
//
|
||||
// broadcast:
|
||||
|
|
|
|||
|
|
@ -5260,8 +5260,6 @@ struct ggml_tensor * ggml_flash_attn_ext(
|
|||
|
||||
if (mask) {
|
||||
GGML_ASSERT(ggml_is_contiguous(mask));
|
||||
GGML_ASSERT(mask->ne[1] >= GGML_PAD(q->ne[1], GGML_KQ_MASK_PAD) &&
|
||||
"the Flash-Attention kernel requires the mask to be padded to GGML_KQ_MASK_PAD and at least n_queries big");
|
||||
//GGML_ASSERT(ggml_can_repeat_rows(mask, qk));
|
||||
|
||||
GGML_ASSERT(q->ne[2] % mask->ne[2] == 0);
|
||||
|
|
|
|||
Loading…
Reference in New Issue