ggml : allow fill node alloc inplace (llama/17870)

2025-12-09 12:23:47 +01:00 · 2025-12-09 12:23:47 +01:00 · ba463fb577
parent 79d86a5c2c
commit ba463fb577
2 changed files with 2 additions and 1 deletions
--- a/ggml/src/ggml-alloc.c
+++ b/ggml/src/ggml-alloc.c
@ -25,6 +25,7 @@ static bool ggml_is_view(const struct ggml_tensor * t) {
 // ops that return true for this function must not use restrict pointers for their backend implementations
 bool ggml_op_can_inplace(enum ggml_op op) {
    switch (op) {
        case GGML_OP_FILL:
        case GGML_OP_SCALE:
        case GGML_OP_DIAG_MASK_ZERO:
        case GGML_OP_DIAG_MASK_INF:
--- a/ggml/src/ggml-cuda/fill.cu
+++ b/ggml/src/ggml-cuda/fill.cu
@ -4,7 +4,7 @@
 #define CUDA_FILL_BLOCK_SIZE 256
 template <typename T>
-static __global__ void fill_kernel(T * __restrict__ dst, const int64_t k, const T value) {
+static __global__ void fill_kernel(T * dst, const int64_t k, const T value) {
    const int64_t i = (int64_t)blockDim.x * blockIdx.x + threadIdx.x;
    if (i >= k) {
        return;