ggml : allow fill node alloc inplace (llama/17870)
This commit is contained in:
parent
79d86a5c2c
commit
ba463fb577
|
|
@ -25,6 +25,7 @@ static bool ggml_is_view(const struct ggml_tensor * t) {
|
||||||
// ops that return true for this function must not use restrict pointers for their backend implementations
|
// ops that return true for this function must not use restrict pointers for their backend implementations
|
||||||
bool ggml_op_can_inplace(enum ggml_op op) {
|
bool ggml_op_can_inplace(enum ggml_op op) {
|
||||||
switch (op) {
|
switch (op) {
|
||||||
|
case GGML_OP_FILL:
|
||||||
case GGML_OP_SCALE:
|
case GGML_OP_SCALE:
|
||||||
case GGML_OP_DIAG_MASK_ZERO:
|
case GGML_OP_DIAG_MASK_ZERO:
|
||||||
case GGML_OP_DIAG_MASK_INF:
|
case GGML_OP_DIAG_MASK_INF:
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,7 @@
|
||||||
#define CUDA_FILL_BLOCK_SIZE 256
|
#define CUDA_FILL_BLOCK_SIZE 256
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static __global__ void fill_kernel(T * __restrict__ dst, const int64_t k, const T value) {
|
static __global__ void fill_kernel(T * dst, const int64_t k, const T value) {
|
||||||
const int64_t i = (int64_t)blockDim.x * blockIdx.x + threadIdx.x;
|
const int64_t i = (int64_t)blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
if (i >= k) {
|
if (i >= k) {
|
||||||
return;
|
return;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue