Merge tag 'bpf-next-6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Pull bpf updates from Alexei Starovoitov: - Add BPF uprobe session support (Jiri Olsa) - Optimize uprobe performance (Andrii Nakryiko) - Add bpf_fastcall support to helpers and kfuncs (Eduard Zingerman) - Avoid calling free_htab_elem() under hash map bucket lock (Hou Tao) - Prevent tailcall infinite loop caused by freplace (Leon Hwang) - Mark raw_tracepoint arguments as nullable (Kumar Kartikeya Dwivedi) - Introduce uptr support in the task local storage map (Martin KaFai Lau) - Stringify errno log messages in libbpf (Mykyta Yatsenko) - Add kmem_cache BPF iterator for perf's lock profiling (Namhyung Kim) - Support BPF objects of either endianness in libbpf (Tony Ambardar) - Add ksym to struct_ops trampoline to fix stack trace (Xu Kuohai) - Introduce private stack for eligible BPF programs (Yonghong Song) - Migrate samples/bpf tests to selftests/bpf test_progs (Daniel T. Lee) - Migrate test_sock to selftests/bpf test_progs (Jordan Rife) * tag 'bpf-next-6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (152 commits) libbpf: Change hash_combine parameters from long to unsigned long selftests/bpf: Fix build error with llvm 19 libbpf: Fix memory leak in bpf_program__attach_uprobe_multi bpf: use common instruction history across all states bpf: Add necessary migrate_disable to range_tree. bpf: Do not alloc arena on unsupported arches selftests/bpf: Set test path for token/obj_priv_implicit_token_envvar selftests/bpf: Add a test for arena range tree algorithm bpf: Introduce range_tree data structure and use it in bpf arena samples/bpf: Remove unused variable in xdp2skb_meta_kern.c samples/bpf: Remove unused variables in tc_l2_redirect_kern.c bpftool: Cast variable `var` to long long bpf, x86: Propagate tailcall info only for subprogs bpf: Add kernel symbol for struct_ops trampoline bpf: Use function pointers count as struct_ops links count bpf: Remove unused member rcu from bpf_struct_ops_map selftests/bpf: Add struct_ops prog private stack tests bpf: Support private stack for struct_ops progs selftests/bpf: Add tracing prog private stack tests bpf, x86: Support private stack in jit ...
This commit is contained in:
@@ -16,7 +16,7 @@ obj-$(CONFIG_BPF_SYSCALL) += disasm.o mprog.o
|
||||
obj-$(CONFIG_BPF_JIT) += trampoline.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += btf.o memalloc.o
|
||||
ifeq ($(CONFIG_MMU)$(CONFIG_64BIT),yy)
|
||||
obj-$(CONFIG_BPF_SYSCALL) += arena.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += arena.o range_tree.o
|
||||
endif
|
||||
obj-$(CONFIG_BPF_JIT) += dispatcher.o
|
||||
ifeq ($(CONFIG_NET),y)
|
||||
@@ -52,3 +52,4 @@ obj-$(CONFIG_BPF_PRELOAD) += preload/
|
||||
obj-$(CONFIG_BPF_SYSCALL) += relo_core.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += btf_iter.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += btf_relocate.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += kmem_cache_iter.o
|
||||
|
||||
@@ -3,9 +3,11 @@
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/btf.h>
|
||||
#include <linux/err.h>
|
||||
#include "linux/filter.h"
|
||||
#include <linux/btf_ids.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include "range_tree.h"
|
||||
|
||||
/*
|
||||
* bpf_arena is a sparsely populated shared memory region between bpf program and
|
||||
@@ -45,7 +47,7 @@ struct bpf_arena {
|
||||
u64 user_vm_start;
|
||||
u64 user_vm_end;
|
||||
struct vm_struct *kern_vm;
|
||||
struct maple_tree mt;
|
||||
struct range_tree rt;
|
||||
struct list_head vma_list;
|
||||
struct mutex lock;
|
||||
};
|
||||
@@ -98,6 +100,9 @@ static struct bpf_map *arena_map_alloc(union bpf_attr *attr)
|
||||
u64 vm_range;
|
||||
int err = -ENOMEM;
|
||||
|
||||
if (!bpf_jit_supports_arena())
|
||||
return ERR_PTR(-EOPNOTSUPP);
|
||||
|
||||
if (attr->key_size || attr->value_size || attr->max_entries == 0 ||
|
||||
/* BPF_F_MMAPABLE must be set */
|
||||
!(attr->map_flags & BPF_F_MMAPABLE) ||
|
||||
@@ -132,7 +137,8 @@ static struct bpf_map *arena_map_alloc(union bpf_attr *attr)
|
||||
|
||||
INIT_LIST_HEAD(&arena->vma_list);
|
||||
bpf_map_init_from_attr(&arena->map, attr);
|
||||
mt_init_flags(&arena->mt, MT_FLAGS_ALLOC_RANGE);
|
||||
range_tree_init(&arena->rt);
|
||||
range_tree_set(&arena->rt, 0, attr->max_entries);
|
||||
mutex_init(&arena->lock);
|
||||
|
||||
return &arena->map;
|
||||
@@ -183,7 +189,7 @@ static void arena_map_free(struct bpf_map *map)
|
||||
apply_to_existing_page_range(&init_mm, bpf_arena_get_kern_vm_start(arena),
|
||||
KERN_VM_SZ - GUARD_SZ, existing_page_cb, NULL);
|
||||
free_vm_area(arena->kern_vm);
|
||||
mtree_destroy(&arena->mt);
|
||||
range_tree_destroy(&arena->rt);
|
||||
bpf_map_area_free(arena);
|
||||
}
|
||||
|
||||
@@ -274,20 +280,20 @@ static vm_fault_t arena_vm_fault(struct vm_fault *vmf)
|
||||
/* User space requested to segfault when page is not allocated by bpf prog */
|
||||
return VM_FAULT_SIGSEGV;
|
||||
|
||||
ret = mtree_insert(&arena->mt, vmf->pgoff, MT_ENTRY, GFP_KERNEL);
|
||||
ret = range_tree_clear(&arena->rt, vmf->pgoff, 1);
|
||||
if (ret)
|
||||
return VM_FAULT_SIGSEGV;
|
||||
|
||||
/* Account into memcg of the process that created bpf_arena */
|
||||
ret = bpf_map_alloc_pages(map, GFP_KERNEL | __GFP_ZERO, NUMA_NO_NODE, 1, &page);
|
||||
if (ret) {
|
||||
mtree_erase(&arena->mt, vmf->pgoff);
|
||||
range_tree_set(&arena->rt, vmf->pgoff, 1);
|
||||
return VM_FAULT_SIGSEGV;
|
||||
}
|
||||
|
||||
ret = vm_area_map_pages(arena->kern_vm, kaddr, kaddr + PAGE_SIZE, &page);
|
||||
if (ret) {
|
||||
mtree_erase(&arena->mt, vmf->pgoff);
|
||||
range_tree_set(&arena->rt, vmf->pgoff, 1);
|
||||
__free_page(page);
|
||||
return VM_FAULT_SIGSEGV;
|
||||
}
|
||||
@@ -444,12 +450,16 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
|
||||
|
||||
guard(mutex)(&arena->lock);
|
||||
|
||||
if (uaddr)
|
||||
ret = mtree_insert_range(&arena->mt, pgoff, pgoff + page_cnt - 1,
|
||||
MT_ENTRY, GFP_KERNEL);
|
||||
else
|
||||
ret = mtree_alloc_range(&arena->mt, &pgoff, MT_ENTRY,
|
||||
page_cnt, 0, page_cnt_max - 1, GFP_KERNEL);
|
||||
if (uaddr) {
|
||||
ret = is_range_tree_set(&arena->rt, pgoff, page_cnt);
|
||||
if (ret)
|
||||
goto out_free_pages;
|
||||
ret = range_tree_clear(&arena->rt, pgoff, page_cnt);
|
||||
} else {
|
||||
ret = pgoff = range_tree_find(&arena->rt, page_cnt);
|
||||
if (pgoff >= 0)
|
||||
ret = range_tree_clear(&arena->rt, pgoff, page_cnt);
|
||||
}
|
||||
if (ret)
|
||||
goto out_free_pages;
|
||||
|
||||
@@ -476,7 +486,7 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
|
||||
kvfree(pages);
|
||||
return clear_lo32(arena->user_vm_start) + uaddr32;
|
||||
out:
|
||||
mtree_erase(&arena->mt, pgoff);
|
||||
range_tree_set(&arena->rt, pgoff, page_cnt);
|
||||
out_free_pages:
|
||||
kvfree(pages);
|
||||
return 0;
|
||||
@@ -516,7 +526,7 @@ static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt)
|
||||
|
||||
pgoff = compute_pgoff(arena, uaddr);
|
||||
/* clear range */
|
||||
mtree_store_range(&arena->mt, pgoff, pgoff + page_cnt - 1, NULL, GFP_KERNEL);
|
||||
range_tree_set(&arena->rt, pgoff, page_cnt);
|
||||
|
||||
if (page_cnt > 1)
|
||||
/* bulk zap if multiple pages being freed */
|
||||
|
||||
@@ -947,22 +947,44 @@ static void *prog_fd_array_get_ptr(struct bpf_map *map,
|
||||
struct file *map_file, int fd)
|
||||
{
|
||||
struct bpf_prog *prog = bpf_prog_get(fd);
|
||||
bool is_extended;
|
||||
|
||||
if (IS_ERR(prog))
|
||||
return prog;
|
||||
|
||||
if (!bpf_prog_map_compatible(map, prog)) {
|
||||
if (prog->type == BPF_PROG_TYPE_EXT ||
|
||||
!bpf_prog_map_compatible(map, prog)) {
|
||||
bpf_prog_put(prog);
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
mutex_lock(&prog->aux->ext_mutex);
|
||||
is_extended = prog->aux->is_extended;
|
||||
if (!is_extended)
|
||||
prog->aux->prog_array_member_cnt++;
|
||||
mutex_unlock(&prog->aux->ext_mutex);
|
||||
if (is_extended) {
|
||||
/* Extended prog can not be tail callee. It's to prevent a
|
||||
* potential infinite loop like:
|
||||
* tail callee prog entry -> tail callee prog subprog ->
|
||||
* freplace prog entry --tailcall-> tail callee prog entry.
|
||||
*/
|
||||
bpf_prog_put(prog);
|
||||
return ERR_PTR(-EBUSY);
|
||||
}
|
||||
|
||||
return prog;
|
||||
}
|
||||
|
||||
static void prog_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
|
||||
{
|
||||
struct bpf_prog *prog = ptr;
|
||||
|
||||
mutex_lock(&prog->aux->ext_mutex);
|
||||
prog->aux->prog_array_member_cnt--;
|
||||
mutex_unlock(&prog->aux->ext_mutex);
|
||||
/* bpf_prog is freed after one RCU or tasks trace grace period */
|
||||
bpf_prog_put(ptr);
|
||||
bpf_prog_put(prog);
|
||||
}
|
||||
|
||||
static u32 prog_fd_array_sys_lookup_elem(void *ptr)
|
||||
|
||||
@@ -107,7 +107,7 @@ static long bpf_cgrp_storage_update_elem(struct bpf_map *map, void *key,
|
||||
|
||||
bpf_cgrp_storage_lock();
|
||||
sdata = bpf_local_storage_update(cgroup, (struct bpf_local_storage_map *)map,
|
||||
value, map_flags, GFP_ATOMIC);
|
||||
value, map_flags, false, GFP_ATOMIC);
|
||||
bpf_cgrp_storage_unlock();
|
||||
cgroup_put(cgroup);
|
||||
return PTR_ERR_OR_ZERO(sdata);
|
||||
@@ -181,7 +181,7 @@ BPF_CALL_5(bpf_cgrp_storage_get, struct bpf_map *, map, struct cgroup *, cgroup,
|
||||
if (!percpu_ref_is_dying(&cgroup->self.refcnt) &&
|
||||
(flags & BPF_LOCAL_STORAGE_GET_F_CREATE))
|
||||
sdata = bpf_local_storage_update(cgroup, (struct bpf_local_storage_map *)map,
|
||||
value, BPF_NOEXIST, gfp_flags);
|
||||
value, BPF_NOEXIST, false, gfp_flags);
|
||||
|
||||
unlock:
|
||||
bpf_cgrp_storage_unlock();
|
||||
|
||||
@@ -99,7 +99,7 @@ static long bpf_fd_inode_storage_update_elem(struct bpf_map *map, void *key,
|
||||
|
||||
sdata = bpf_local_storage_update(file_inode(fd_file(f)),
|
||||
(struct bpf_local_storage_map *)map,
|
||||
value, map_flags, GFP_ATOMIC);
|
||||
value, map_flags, false, GFP_ATOMIC);
|
||||
return PTR_ERR_OR_ZERO(sdata);
|
||||
}
|
||||
|
||||
@@ -153,7 +153,7 @@ BPF_CALL_5(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
|
||||
if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) {
|
||||
sdata = bpf_local_storage_update(
|
||||
inode, (struct bpf_local_storage_map *)map, value,
|
||||
BPF_NOEXIST, gfp_flags);
|
||||
BPF_NOEXIST, false, gfp_flags);
|
||||
return IS_ERR(sdata) ? (unsigned long)NULL :
|
||||
(unsigned long)sdata->data;
|
||||
}
|
||||
|
||||
@@ -73,7 +73,7 @@ static bool selem_linked_to_map(const struct bpf_local_storage_elem *selem)
|
||||
|
||||
struct bpf_local_storage_elem *
|
||||
bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
|
||||
void *value, bool charge_mem, gfp_t gfp_flags)
|
||||
void *value, bool charge_mem, bool swap_uptrs, gfp_t gfp_flags)
|
||||
{
|
||||
struct bpf_local_storage_elem *selem;
|
||||
|
||||
@@ -99,9 +99,12 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
|
||||
}
|
||||
|
||||
if (selem) {
|
||||
if (value)
|
||||
if (value) {
|
||||
/* No need to call check_and_init_map_value as memory is zero init */
|
||||
copy_map_value(&smap->map, SDATA(selem)->data, value);
|
||||
/* No need to call check_and_init_map_value as memory is zero init */
|
||||
if (swap_uptrs)
|
||||
bpf_obj_swap_uptrs(smap->map.record, SDATA(selem)->data, value);
|
||||
}
|
||||
return selem;
|
||||
}
|
||||
|
||||
@@ -209,8 +212,12 @@ static void __bpf_selem_free(struct bpf_local_storage_elem *selem,
|
||||
static void bpf_selem_free_rcu(struct rcu_head *rcu)
|
||||
{
|
||||
struct bpf_local_storage_elem *selem;
|
||||
struct bpf_local_storage_map *smap;
|
||||
|
||||
selem = container_of(rcu, struct bpf_local_storage_elem, rcu);
|
||||
/* The bpf_local_storage_map_free will wait for rcu_barrier */
|
||||
smap = rcu_dereference_check(SDATA(selem)->smap, 1);
|
||||
bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
|
||||
bpf_mem_cache_raw_free(selem);
|
||||
}
|
||||
|
||||
@@ -226,16 +233,25 @@ void bpf_selem_free(struct bpf_local_storage_elem *selem,
|
||||
struct bpf_local_storage_map *smap,
|
||||
bool reuse_now)
|
||||
{
|
||||
bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
|
||||
|
||||
if (!smap->bpf_ma) {
|
||||
/* Only task storage has uptrs and task storage
|
||||
* has moved to bpf_mem_alloc. Meaning smap->bpf_ma == true
|
||||
* for task storage, so this bpf_obj_free_fields() won't unpin
|
||||
* any uptr.
|
||||
*/
|
||||
bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
|
||||
__bpf_selem_free(selem, reuse_now);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!reuse_now) {
|
||||
call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_trace_rcu);
|
||||
} else {
|
||||
if (reuse_now) {
|
||||
/* reuse_now == true only happens when the storage owner
|
||||
* (e.g. task_struct) is being destructed or the map itself
|
||||
* is being destructed (ie map_free). In both cases,
|
||||
* no bpf prog can have a hold on the selem. It is
|
||||
* safe to unpin the uptrs and free the selem now.
|
||||
*/
|
||||
bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
|
||||
/* Instead of using the vanilla call_rcu(),
|
||||
* bpf_mem_cache_free will be able to reuse selem
|
||||
* immediately.
|
||||
@@ -243,6 +259,26 @@ void bpf_selem_free(struct bpf_local_storage_elem *selem,
|
||||
migrate_disable();
|
||||
bpf_mem_cache_free(&smap->selem_ma, selem);
|
||||
migrate_enable();
|
||||
return;
|
||||
}
|
||||
|
||||
call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_trace_rcu);
|
||||
}
|
||||
|
||||
static void bpf_selem_free_list(struct hlist_head *list, bool reuse_now)
|
||||
{
|
||||
struct bpf_local_storage_elem *selem;
|
||||
struct bpf_local_storage_map *smap;
|
||||
struct hlist_node *n;
|
||||
|
||||
/* The "_safe" iteration is needed.
|
||||
* The loop is not removing the selem from the list
|
||||
* but bpf_selem_free will use the selem->rcu_head
|
||||
* which is union-ized with the selem->free_node.
|
||||
*/
|
||||
hlist_for_each_entry_safe(selem, n, list, free_node) {
|
||||
smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
|
||||
bpf_selem_free(selem, smap, reuse_now);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -252,7 +288,7 @@ void bpf_selem_free(struct bpf_local_storage_elem *selem,
|
||||
*/
|
||||
static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
|
||||
struct bpf_local_storage_elem *selem,
|
||||
bool uncharge_mem, bool reuse_now)
|
||||
bool uncharge_mem, struct hlist_head *free_selem_list)
|
||||
{
|
||||
struct bpf_local_storage_map *smap;
|
||||
bool free_local_storage;
|
||||
@@ -296,7 +332,7 @@ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_stor
|
||||
SDATA(selem))
|
||||
RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL);
|
||||
|
||||
bpf_selem_free(selem, smap, reuse_now);
|
||||
hlist_add_head(&selem->free_node, free_selem_list);
|
||||
|
||||
if (rcu_access_pointer(local_storage->smap) == smap)
|
||||
RCU_INIT_POINTER(local_storage->smap, NULL);
|
||||
@@ -345,6 +381,7 @@ static void bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem,
|
||||
struct bpf_local_storage_map *storage_smap;
|
||||
struct bpf_local_storage *local_storage;
|
||||
bool bpf_ma, free_local_storage = false;
|
||||
HLIST_HEAD(selem_free_list);
|
||||
unsigned long flags;
|
||||
|
||||
if (unlikely(!selem_linked_to_storage_lockless(selem)))
|
||||
@@ -360,9 +397,11 @@ static void bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem,
|
||||
raw_spin_lock_irqsave(&local_storage->lock, flags);
|
||||
if (likely(selem_linked_to_storage(selem)))
|
||||
free_local_storage = bpf_selem_unlink_storage_nolock(
|
||||
local_storage, selem, true, reuse_now);
|
||||
local_storage, selem, true, &selem_free_list);
|
||||
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
|
||||
|
||||
bpf_selem_free_list(&selem_free_list, reuse_now);
|
||||
|
||||
if (free_local_storage)
|
||||
bpf_local_storage_free(local_storage, storage_smap, bpf_ma, reuse_now);
|
||||
}
|
||||
@@ -524,11 +563,12 @@ uncharge:
|
||||
*/
|
||||
struct bpf_local_storage_data *
|
||||
bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
|
||||
void *value, u64 map_flags, gfp_t gfp_flags)
|
||||
void *value, u64 map_flags, bool swap_uptrs, gfp_t gfp_flags)
|
||||
{
|
||||
struct bpf_local_storage_data *old_sdata = NULL;
|
||||
struct bpf_local_storage_elem *alloc_selem, *selem = NULL;
|
||||
struct bpf_local_storage *local_storage;
|
||||
HLIST_HEAD(old_selem_free_list);
|
||||
unsigned long flags;
|
||||
int err;
|
||||
|
||||
@@ -550,7 +590,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
|
||||
if (err)
|
||||
return ERR_PTR(err);
|
||||
|
||||
selem = bpf_selem_alloc(smap, owner, value, true, gfp_flags);
|
||||
selem = bpf_selem_alloc(smap, owner, value, true, swap_uptrs, gfp_flags);
|
||||
if (!selem)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
@@ -584,7 +624,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
|
||||
/* A lookup has just been done before and concluded a new selem is
|
||||
* needed. The chance of an unnecessary alloc is unlikely.
|
||||
*/
|
||||
alloc_selem = selem = bpf_selem_alloc(smap, owner, value, true, gfp_flags);
|
||||
alloc_selem = selem = bpf_selem_alloc(smap, owner, value, true, swap_uptrs, gfp_flags);
|
||||
if (!alloc_selem)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
@@ -624,11 +664,12 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
|
||||
if (old_sdata) {
|
||||
bpf_selem_unlink_map(SELEM(old_sdata));
|
||||
bpf_selem_unlink_storage_nolock(local_storage, SELEM(old_sdata),
|
||||
true, false);
|
||||
true, &old_selem_free_list);
|
||||
}
|
||||
|
||||
unlock:
|
||||
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
|
||||
bpf_selem_free_list(&old_selem_free_list, false);
|
||||
if (alloc_selem) {
|
||||
mem_uncharge(smap, owner, smap->elem_size);
|
||||
bpf_selem_free(alloc_selem, smap, true);
|
||||
@@ -706,6 +747,7 @@ void bpf_local_storage_destroy(struct bpf_local_storage *local_storage)
|
||||
struct bpf_local_storage_map *storage_smap;
|
||||
struct bpf_local_storage_elem *selem;
|
||||
bool bpf_ma, free_storage = false;
|
||||
HLIST_HEAD(free_selem_list);
|
||||
struct hlist_node *n;
|
||||
unsigned long flags;
|
||||
|
||||
@@ -734,10 +776,12 @@ void bpf_local_storage_destroy(struct bpf_local_storage *local_storage)
|
||||
* of the loop will set the free_cgroup_storage to true.
|
||||
*/
|
||||
free_storage = bpf_selem_unlink_storage_nolock(
|
||||
local_storage, selem, true, true);
|
||||
local_storage, selem, true, &free_selem_list);
|
||||
}
|
||||
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
|
||||
|
||||
bpf_selem_free_list(&free_selem_list, true);
|
||||
|
||||
if (free_storage)
|
||||
bpf_local_storage_free(local_storage, storage_smap, bpf_ma, true);
|
||||
}
|
||||
@@ -883,6 +927,9 @@ void bpf_local_storage_map_free(struct bpf_map *map,
|
||||
synchronize_rcu();
|
||||
|
||||
if (smap->bpf_ma) {
|
||||
rcu_barrier_tasks_trace();
|
||||
if (!rcu_trace_implies_rcu_gp())
|
||||
rcu_barrier();
|
||||
bpf_mem_alloc_destroy(&smap->selem_ma);
|
||||
bpf_mem_alloc_destroy(&smap->storage_ma);
|
||||
}
|
||||
|
||||
@@ -23,7 +23,6 @@ struct bpf_struct_ops_value {
|
||||
|
||||
struct bpf_struct_ops_map {
|
||||
struct bpf_map map;
|
||||
struct rcu_head rcu;
|
||||
const struct bpf_struct_ops_desc *st_ops_desc;
|
||||
/* protect map_update */
|
||||
struct mutex lock;
|
||||
@@ -32,7 +31,9 @@ struct bpf_struct_ops_map {
|
||||
* (in kvalue.data).
|
||||
*/
|
||||
struct bpf_link **links;
|
||||
u32 links_cnt;
|
||||
/* ksyms for bpf trampolines */
|
||||
struct bpf_ksym **ksyms;
|
||||
u32 funcs_cnt;
|
||||
u32 image_pages_cnt;
|
||||
/* image_pages is an array of pages that has all the trampolines
|
||||
* that stores the func args before calling the bpf_prog.
|
||||
@@ -481,11 +482,11 @@ static void bpf_struct_ops_map_put_progs(struct bpf_struct_ops_map *st_map)
|
||||
{
|
||||
u32 i;
|
||||
|
||||
for (i = 0; i < st_map->links_cnt; i++) {
|
||||
if (st_map->links[i]) {
|
||||
bpf_link_put(st_map->links[i]);
|
||||
st_map->links[i] = NULL;
|
||||
}
|
||||
for (i = 0; i < st_map->funcs_cnt; i++) {
|
||||
if (!st_map->links[i])
|
||||
break;
|
||||
bpf_link_put(st_map->links[i]);
|
||||
st_map->links[i] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -586,6 +587,49 @@ int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void bpf_struct_ops_ksym_init(const char *tname, const char *mname,
|
||||
void *image, unsigned int size,
|
||||
struct bpf_ksym *ksym)
|
||||
{
|
||||
snprintf(ksym->name, KSYM_NAME_LEN, "bpf__%s_%s", tname, mname);
|
||||
INIT_LIST_HEAD_RCU(&ksym->lnode);
|
||||
bpf_image_ksym_init(image, size, ksym);
|
||||
}
|
||||
|
||||
static void bpf_struct_ops_map_add_ksyms(struct bpf_struct_ops_map *st_map)
|
||||
{
|
||||
u32 i;
|
||||
|
||||
for (i = 0; i < st_map->funcs_cnt; i++) {
|
||||
if (!st_map->ksyms[i])
|
||||
break;
|
||||
bpf_image_ksym_add(st_map->ksyms[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static void bpf_struct_ops_map_del_ksyms(struct bpf_struct_ops_map *st_map)
|
||||
{
|
||||
u32 i;
|
||||
|
||||
for (i = 0; i < st_map->funcs_cnt; i++) {
|
||||
if (!st_map->ksyms[i])
|
||||
break;
|
||||
bpf_image_ksym_del(st_map->ksyms[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static void bpf_struct_ops_map_free_ksyms(struct bpf_struct_ops_map *st_map)
|
||||
{
|
||||
u32 i;
|
||||
|
||||
for (i = 0; i < st_map->funcs_cnt; i++) {
|
||||
if (!st_map->ksyms[i])
|
||||
break;
|
||||
kfree(st_map->ksyms[i]);
|
||||
st_map->ksyms[i] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
|
||||
void *value, u64 flags)
|
||||
{
|
||||
@@ -601,6 +645,9 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
|
||||
int prog_fd, err;
|
||||
u32 i, trampoline_start, image_off = 0;
|
||||
void *cur_image = NULL, *image = NULL;
|
||||
struct bpf_link **plink;
|
||||
struct bpf_ksym **pksym;
|
||||
const char *tname, *mname;
|
||||
|
||||
if (flags)
|
||||
return -EINVAL;
|
||||
@@ -639,14 +686,19 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
|
||||
udata = &uvalue->data;
|
||||
kdata = &kvalue->data;
|
||||
|
||||
plink = st_map->links;
|
||||
pksym = st_map->ksyms;
|
||||
tname = btf_name_by_offset(st_map->btf, t->name_off);
|
||||
module_type = btf_type_by_id(btf_vmlinux, st_ops_ids[IDX_MODULE_ID]);
|
||||
for_each_member(i, t, member) {
|
||||
const struct btf_type *mtype, *ptype;
|
||||
struct bpf_prog *prog;
|
||||
struct bpf_tramp_link *link;
|
||||
struct bpf_ksym *ksym;
|
||||
u32 moff;
|
||||
|
||||
moff = __btf_member_bit_offset(t, member) / 8;
|
||||
mname = btf_name_by_offset(st_map->btf, member->name_off);
|
||||
ptype = btf_type_resolve_ptr(st_map->btf, member->type, NULL);
|
||||
if (ptype == module_type) {
|
||||
if (*(void **)(udata + moff))
|
||||
@@ -714,7 +766,14 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
|
||||
}
|
||||
bpf_link_init(&link->link, BPF_LINK_TYPE_STRUCT_OPS,
|
||||
&bpf_struct_ops_link_lops, prog);
|
||||
st_map->links[i] = &link->link;
|
||||
*plink++ = &link->link;
|
||||
|
||||
ksym = kzalloc(sizeof(*ksym), GFP_USER);
|
||||
if (!ksym) {
|
||||
err = -ENOMEM;
|
||||
goto reset_unlock;
|
||||
}
|
||||
*pksym++ = ksym;
|
||||
|
||||
trampoline_start = image_off;
|
||||
err = bpf_struct_ops_prepare_trampoline(tlinks, link,
|
||||
@@ -735,6 +794,12 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
|
||||
|
||||
/* put prog_id to udata */
|
||||
*(unsigned long *)(udata + moff) = prog->aux->id;
|
||||
|
||||
/* init ksym for this trampoline */
|
||||
bpf_struct_ops_ksym_init(tname, mname,
|
||||
image + trampoline_start,
|
||||
image_off - trampoline_start,
|
||||
ksym);
|
||||
}
|
||||
|
||||
if (st_ops->validate) {
|
||||
@@ -783,6 +848,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
|
||||
*/
|
||||
|
||||
reset_unlock:
|
||||
bpf_struct_ops_map_free_ksyms(st_map);
|
||||
bpf_struct_ops_map_free_image(st_map);
|
||||
bpf_struct_ops_map_put_progs(st_map);
|
||||
memset(uvalue, 0, map->value_size);
|
||||
@@ -790,6 +856,8 @@ reset_unlock:
|
||||
unlock:
|
||||
kfree(tlinks);
|
||||
mutex_unlock(&st_map->lock);
|
||||
if (!err)
|
||||
bpf_struct_ops_map_add_ksyms(st_map);
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -849,7 +917,10 @@ static void __bpf_struct_ops_map_free(struct bpf_map *map)
|
||||
|
||||
if (st_map->links)
|
||||
bpf_struct_ops_map_put_progs(st_map);
|
||||
if (st_map->ksyms)
|
||||
bpf_struct_ops_map_free_ksyms(st_map);
|
||||
bpf_map_area_free(st_map->links);
|
||||
bpf_map_area_free(st_map->ksyms);
|
||||
bpf_struct_ops_map_free_image(st_map);
|
||||
bpf_map_area_free(st_map->uvalue);
|
||||
bpf_map_area_free(st_map);
|
||||
@@ -866,6 +937,8 @@ static void bpf_struct_ops_map_free(struct bpf_map *map)
|
||||
if (btf_is_module(st_map->btf))
|
||||
module_put(st_map->st_ops_desc->st_ops->owner);
|
||||
|
||||
bpf_struct_ops_map_del_ksyms(st_map);
|
||||
|
||||
/* The struct_ops's function may switch to another struct_ops.
|
||||
*
|
||||
* For example, bpf_tcp_cc_x->init() may switch to
|
||||
@@ -895,6 +968,19 @@ static int bpf_struct_ops_map_alloc_check(union bpf_attr *attr)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u32 count_func_ptrs(const struct btf *btf, const struct btf_type *t)
|
||||
{
|
||||
int i;
|
||||
u32 count;
|
||||
const struct btf_member *member;
|
||||
|
||||
count = 0;
|
||||
for_each_member(i, t, member)
|
||||
if (btf_type_resolve_func_ptr(btf, member->type, NULL))
|
||||
count++;
|
||||
return count;
|
||||
}
|
||||
|
||||
static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
|
||||
{
|
||||
const struct bpf_struct_ops_desc *st_ops_desc;
|
||||
@@ -961,11 +1047,15 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
|
||||
map = &st_map->map;
|
||||
|
||||
st_map->uvalue = bpf_map_area_alloc(vt->size, NUMA_NO_NODE);
|
||||
st_map->links_cnt = btf_type_vlen(t);
|
||||
st_map->funcs_cnt = count_func_ptrs(btf, t);
|
||||
st_map->links =
|
||||
bpf_map_area_alloc(st_map->links_cnt * sizeof(struct bpf_links *),
|
||||
bpf_map_area_alloc(st_map->funcs_cnt * sizeof(struct bpf_link *),
|
||||
NUMA_NO_NODE);
|
||||
if (!st_map->uvalue || !st_map->links) {
|
||||
|
||||
st_map->ksyms =
|
||||
bpf_map_area_alloc(st_map->funcs_cnt * sizeof(struct bpf_ksym *),
|
||||
NUMA_NO_NODE);
|
||||
if (!st_map->uvalue || !st_map->links || !st_map->ksyms) {
|
||||
ret = -ENOMEM;
|
||||
goto errout_free;
|
||||
}
|
||||
@@ -994,7 +1084,8 @@ static u64 bpf_struct_ops_map_mem_usage(const struct bpf_map *map)
|
||||
usage = sizeof(*st_map) +
|
||||
vt->size - sizeof(struct bpf_struct_ops_value);
|
||||
usage += vt->size;
|
||||
usage += btf_type_vlen(vt) * sizeof(struct bpf_links *);
|
||||
usage += st_map->funcs_cnt * sizeof(struct bpf_link *);
|
||||
usage += st_map->funcs_cnt * sizeof(struct bpf_ksym *);
|
||||
usage += PAGE_SIZE;
|
||||
return usage;
|
||||
}
|
||||
|
||||
@@ -128,6 +128,9 @@ static long bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key,
|
||||
struct pid *pid;
|
||||
int fd, err;
|
||||
|
||||
if ((map_flags & BPF_F_LOCK) && btf_record_has_field(map->record, BPF_UPTR))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
fd = *(int *)key;
|
||||
pid = pidfd_get_pid(fd, &f_flags);
|
||||
if (IS_ERR(pid))
|
||||
@@ -146,7 +149,7 @@ static long bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key,
|
||||
bpf_task_storage_lock();
|
||||
sdata = bpf_local_storage_update(
|
||||
task, (struct bpf_local_storage_map *)map, value, map_flags,
|
||||
GFP_ATOMIC);
|
||||
true, GFP_ATOMIC);
|
||||
bpf_task_storage_unlock();
|
||||
|
||||
err = PTR_ERR_OR_ZERO(sdata);
|
||||
@@ -218,7 +221,7 @@ static void *__bpf_task_storage_get(struct bpf_map *map,
|
||||
(flags & BPF_LOCAL_STORAGE_GET_F_CREATE) && nobusy) {
|
||||
sdata = bpf_local_storage_update(
|
||||
task, (struct bpf_local_storage_map *)map, value,
|
||||
BPF_NOEXIST, gfp_flags);
|
||||
BPF_NOEXIST, false, gfp_flags);
|
||||
return IS_ERR(sdata) ? NULL : sdata->data;
|
||||
}
|
||||
|
||||
|
||||
@@ -2808,7 +2808,7 @@ static void btf_ref_type_log(struct btf_verifier_env *env,
|
||||
btf_verifier_log(env, "type_id=%u", t->type);
|
||||
}
|
||||
|
||||
static struct btf_kind_operations modifier_ops = {
|
||||
static const struct btf_kind_operations modifier_ops = {
|
||||
.check_meta = btf_ref_type_check_meta,
|
||||
.resolve = btf_modifier_resolve,
|
||||
.check_member = btf_modifier_check_member,
|
||||
@@ -2817,7 +2817,7 @@ static struct btf_kind_operations modifier_ops = {
|
||||
.show = btf_modifier_show,
|
||||
};
|
||||
|
||||
static struct btf_kind_operations ptr_ops = {
|
||||
static const struct btf_kind_operations ptr_ops = {
|
||||
.check_meta = btf_ref_type_check_meta,
|
||||
.resolve = btf_ptr_resolve,
|
||||
.check_member = btf_ptr_check_member,
|
||||
@@ -2858,7 +2858,7 @@ static void btf_fwd_type_log(struct btf_verifier_env *env,
|
||||
btf_verifier_log(env, "%s", btf_type_kflag(t) ? "union" : "struct");
|
||||
}
|
||||
|
||||
static struct btf_kind_operations fwd_ops = {
|
||||
static const struct btf_kind_operations fwd_ops = {
|
||||
.check_meta = btf_fwd_check_meta,
|
||||
.resolve = btf_df_resolve,
|
||||
.check_member = btf_df_check_member,
|
||||
@@ -3109,7 +3109,7 @@ static void btf_array_show(const struct btf *btf, const struct btf_type *t,
|
||||
__btf_array_show(btf, t, type_id, data, bits_offset, show);
|
||||
}
|
||||
|
||||
static struct btf_kind_operations array_ops = {
|
||||
static const struct btf_kind_operations array_ops = {
|
||||
.check_meta = btf_array_check_meta,
|
||||
.resolve = btf_array_resolve,
|
||||
.check_member = btf_array_check_member,
|
||||
@@ -3334,7 +3334,7 @@ static int btf_find_struct(const struct btf *btf, const struct btf_type *t,
|
||||
}
|
||||
|
||||
static int btf_find_kptr(const struct btf *btf, const struct btf_type *t,
|
||||
u32 off, int sz, struct btf_field_info *info)
|
||||
u32 off, int sz, struct btf_field_info *info, u32 field_mask)
|
||||
{
|
||||
enum btf_field_type type;
|
||||
u32 res_id;
|
||||
@@ -3358,9 +3358,14 @@ static int btf_find_kptr(const struct btf *btf, const struct btf_type *t,
|
||||
type = BPF_KPTR_REF;
|
||||
else if (!strcmp("percpu_kptr", __btf_name_by_offset(btf, t->name_off)))
|
||||
type = BPF_KPTR_PERCPU;
|
||||
else if (!strcmp("uptr", __btf_name_by_offset(btf, t->name_off)))
|
||||
type = BPF_UPTR;
|
||||
else
|
||||
return -EINVAL;
|
||||
|
||||
if (!(type & field_mask))
|
||||
return BTF_FIELD_IGNORE;
|
||||
|
||||
/* Get the base type */
|
||||
t = btf_type_skip_modifiers(btf, t->type, &res_id);
|
||||
/* Only pointer to struct is allowed */
|
||||
@@ -3502,7 +3507,7 @@ static int btf_get_field_type(const struct btf *btf, const struct btf_type *var_
|
||||
field_mask_test_name(BPF_REFCOUNT, "bpf_refcount");
|
||||
|
||||
/* Only return BPF_KPTR when all other types with matchable names fail */
|
||||
if (field_mask & BPF_KPTR && !__btf_type_is_struct(var_type)) {
|
||||
if (field_mask & (BPF_KPTR | BPF_UPTR) && !__btf_type_is_struct(var_type)) {
|
||||
type = BPF_KPTR_REF;
|
||||
goto end;
|
||||
}
|
||||
@@ -3535,6 +3540,7 @@ static int btf_repeat_fields(struct btf_field_info *info, int info_cnt,
|
||||
case BPF_KPTR_UNREF:
|
||||
case BPF_KPTR_REF:
|
||||
case BPF_KPTR_PERCPU:
|
||||
case BPF_UPTR:
|
||||
case BPF_LIST_HEAD:
|
||||
case BPF_RB_ROOT:
|
||||
break;
|
||||
@@ -3667,8 +3673,9 @@ static int btf_find_field_one(const struct btf *btf,
|
||||
case BPF_KPTR_UNREF:
|
||||
case BPF_KPTR_REF:
|
||||
case BPF_KPTR_PERCPU:
|
||||
case BPF_UPTR:
|
||||
ret = btf_find_kptr(btf, var_type, off, sz,
|
||||
info_cnt ? &info[0] : &tmp);
|
||||
info_cnt ? &info[0] : &tmp, field_mask);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
break;
|
||||
@@ -3991,6 +3998,7 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
|
||||
case BPF_KPTR_UNREF:
|
||||
case BPF_KPTR_REF:
|
||||
case BPF_KPTR_PERCPU:
|
||||
case BPF_UPTR:
|
||||
ret = btf_parse_kptr(btf, &rec->fields[i], &info_arr[i]);
|
||||
if (ret < 0)
|
||||
goto end;
|
||||
@@ -4050,12 +4058,28 @@ int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec)
|
||||
* Hence we only need to ensure that bpf_{list_head,rb_root} ownership
|
||||
* does not form cycles.
|
||||
*/
|
||||
if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & BPF_GRAPH_ROOT))
|
||||
if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & (BPF_GRAPH_ROOT | BPF_UPTR)))
|
||||
return 0;
|
||||
for (i = 0; i < rec->cnt; i++) {
|
||||
struct btf_struct_meta *meta;
|
||||
const struct btf_type *t;
|
||||
u32 btf_id;
|
||||
|
||||
if (rec->fields[i].type == BPF_UPTR) {
|
||||
/* The uptr only supports pinning one page and cannot
|
||||
* point to a kernel struct
|
||||
*/
|
||||
if (btf_is_kernel(rec->fields[i].kptr.btf))
|
||||
return -EINVAL;
|
||||
t = btf_type_by_id(rec->fields[i].kptr.btf,
|
||||
rec->fields[i].kptr.btf_id);
|
||||
if (!t->size)
|
||||
return -EINVAL;
|
||||
if (t->size > PAGE_SIZE)
|
||||
return -E2BIG;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!(rec->fields[i].type & BPF_GRAPH_ROOT))
|
||||
continue;
|
||||
btf_id = rec->fields[i].graph_root.value_btf_id;
|
||||
@@ -4191,7 +4215,7 @@ static void btf_struct_show(const struct btf *btf, const struct btf_type *t,
|
||||
__btf_struct_show(btf, t, type_id, data, bits_offset, show);
|
||||
}
|
||||
|
||||
static struct btf_kind_operations struct_ops = {
|
||||
static const struct btf_kind_operations struct_ops = {
|
||||
.check_meta = btf_struct_check_meta,
|
||||
.resolve = btf_struct_resolve,
|
||||
.check_member = btf_struct_check_member,
|
||||
@@ -4359,7 +4383,7 @@ static void btf_enum_show(const struct btf *btf, const struct btf_type *t,
|
||||
btf_show_end_type(show);
|
||||
}
|
||||
|
||||
static struct btf_kind_operations enum_ops = {
|
||||
static const struct btf_kind_operations enum_ops = {
|
||||
.check_meta = btf_enum_check_meta,
|
||||
.resolve = btf_df_resolve,
|
||||
.check_member = btf_enum_check_member,
|
||||
@@ -4462,7 +4486,7 @@ static void btf_enum64_show(const struct btf *btf, const struct btf_type *t,
|
||||
btf_show_end_type(show);
|
||||
}
|
||||
|
||||
static struct btf_kind_operations enum64_ops = {
|
||||
static const struct btf_kind_operations enum64_ops = {
|
||||
.check_meta = btf_enum64_check_meta,
|
||||
.resolve = btf_df_resolve,
|
||||
.check_member = btf_enum_check_member,
|
||||
@@ -4540,7 +4564,7 @@ done:
|
||||
btf_verifier_log(env, ")");
|
||||
}
|
||||
|
||||
static struct btf_kind_operations func_proto_ops = {
|
||||
static const struct btf_kind_operations func_proto_ops = {
|
||||
.check_meta = btf_func_proto_check_meta,
|
||||
.resolve = btf_df_resolve,
|
||||
/*
|
||||
@@ -4598,7 +4622,7 @@ static int btf_func_resolve(struct btf_verifier_env *env,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct btf_kind_operations func_ops = {
|
||||
static const struct btf_kind_operations func_ops = {
|
||||
.check_meta = btf_func_check_meta,
|
||||
.resolve = btf_func_resolve,
|
||||
.check_member = btf_df_check_member,
|
||||
@@ -5566,7 +5590,7 @@ btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf)
|
||||
goto free_aof;
|
||||
}
|
||||
|
||||
ret = btf_find_kptr(btf, t, 0, 0, &tmp);
|
||||
ret = btf_find_kptr(btf, t, 0, 0, &tmp, BPF_KPTR);
|
||||
if (ret != BTF_FIELD_FOUND)
|
||||
continue;
|
||||
|
||||
@@ -6564,7 +6588,10 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
|
||||
if (prog_args_trusted(prog))
|
||||
info->reg_type |= PTR_TRUSTED;
|
||||
|
||||
if (btf_param_match_suffix(btf, &args[arg], "__nullable"))
|
||||
/* Raw tracepoint arguments always get marked as maybe NULL */
|
||||
if (bpf_prog_is_raw_tp(prog))
|
||||
info->reg_type |= PTR_MAYBE_NULL;
|
||||
else if (btf_param_match_suffix(btf, &args[arg], "__nullable"))
|
||||
info->reg_type |= PTR_MAYBE_NULL;
|
||||
|
||||
if (tgt_prog) {
|
||||
|
||||
@@ -131,6 +131,7 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag
|
||||
INIT_LIST_HEAD_RCU(&fp->aux->ksym_prefix.lnode);
|
||||
#endif
|
||||
mutex_init(&fp->aux->used_maps_mutex);
|
||||
mutex_init(&fp->aux->ext_mutex);
|
||||
mutex_init(&fp->aux->dst_mutex);
|
||||
|
||||
return fp;
|
||||
@@ -3044,6 +3045,11 @@ bool __weak bpf_jit_supports_exceptions(void)
|
||||
return false;
|
||||
}
|
||||
|
||||
bool __weak bpf_jit_supports_private_stack(void)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
void __weak arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -154,7 +154,8 @@ void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
|
||||
d->image = NULL;
|
||||
goto out;
|
||||
}
|
||||
bpf_image_ksym_add(d->image, PAGE_SIZE, &d->ksym);
|
||||
bpf_image_ksym_init(d->image, PAGE_SIZE, &d->ksym);
|
||||
bpf_image_ksym_add(&d->ksym);
|
||||
}
|
||||
|
||||
prev_num_progs = d->num_progs;
|
||||
|
||||
@@ -896,9 +896,12 @@ find_first_elem:
|
||||
static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l)
|
||||
{
|
||||
check_and_free_fields(htab, l);
|
||||
|
||||
migrate_disable();
|
||||
if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH)
|
||||
bpf_mem_cache_free(&htab->pcpu_ma, l->ptr_to_pptr);
|
||||
bpf_mem_cache_free(&htab->ma, l);
|
||||
migrate_enable();
|
||||
}
|
||||
|
||||
static void htab_put_fd_value(struct bpf_htab *htab, struct htab_elem *l)
|
||||
@@ -948,7 +951,7 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
|
||||
if (htab_is_prealloc(htab)) {
|
||||
bpf_map_dec_elem_count(&htab->map);
|
||||
check_and_free_fields(htab, l);
|
||||
__pcpu_freelist_push(&htab->freelist, &l->fnode);
|
||||
pcpu_freelist_push(&htab->freelist, &l->fnode);
|
||||
} else {
|
||||
dec_elem_count(htab);
|
||||
htab_elem_free(htab, l);
|
||||
@@ -1018,7 +1021,6 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
|
||||
*/
|
||||
pl_new = this_cpu_ptr(htab->extra_elems);
|
||||
l_new = *pl_new;
|
||||
htab_put_fd_value(htab, old_elem);
|
||||
*pl_new = old_elem;
|
||||
} else {
|
||||
struct pcpu_freelist_node *l;
|
||||
@@ -1105,6 +1107,7 @@ static long htab_map_update_elem(struct bpf_map *map, void *key, void *value,
|
||||
struct htab_elem *l_new = NULL, *l_old;
|
||||
struct hlist_nulls_head *head;
|
||||
unsigned long flags;
|
||||
void *old_map_ptr;
|
||||
struct bucket *b;
|
||||
u32 key_size, hash;
|
||||
int ret;
|
||||
@@ -1183,12 +1186,27 @@ static long htab_map_update_elem(struct bpf_map *map, void *key, void *value,
|
||||
hlist_nulls_add_head_rcu(&l_new->hash_node, head);
|
||||
if (l_old) {
|
||||
hlist_nulls_del_rcu(&l_old->hash_node);
|
||||
|
||||
/* l_old has already been stashed in htab->extra_elems, free
|
||||
* its special fields before it is available for reuse. Also
|
||||
* save the old map pointer in htab of maps before unlock
|
||||
* and release it after unlock.
|
||||
*/
|
||||
old_map_ptr = NULL;
|
||||
if (htab_is_prealloc(htab)) {
|
||||
if (map->ops->map_fd_put_ptr)
|
||||
old_map_ptr = fd_htab_map_get_ptr(map, l_old);
|
||||
check_and_free_fields(htab, l_old);
|
||||
}
|
||||
}
|
||||
htab_unlock_bucket(htab, b, hash, flags);
|
||||
if (l_old) {
|
||||
if (old_map_ptr)
|
||||
map->ops->map_fd_put_ptr(map, old_map_ptr, true);
|
||||
if (!htab_is_prealloc(htab))
|
||||
free_htab_elem(htab, l_old);
|
||||
else
|
||||
check_and_free_fields(htab, l_old);
|
||||
}
|
||||
ret = 0;
|
||||
return 0;
|
||||
err:
|
||||
htab_unlock_bucket(htab, b, hash, flags);
|
||||
return ret;
|
||||
@@ -1432,15 +1450,15 @@ static long htab_map_delete_elem(struct bpf_map *map, void *key)
|
||||
return ret;
|
||||
|
||||
l = lookup_elem_raw(head, hash, key, key_size);
|
||||
|
||||
if (l) {
|
||||
if (l)
|
||||
hlist_nulls_del_rcu(&l->hash_node);
|
||||
free_htab_elem(htab, l);
|
||||
} else {
|
||||
else
|
||||
ret = -ENOENT;
|
||||
}
|
||||
|
||||
htab_unlock_bucket(htab, b, hash, flags);
|
||||
|
||||
if (l)
|
||||
free_htab_elem(htab, l);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -1853,13 +1871,14 @@ again_nocopy:
|
||||
* may cause deadlock. See comments in function
|
||||
* prealloc_lru_pop(). Let us do bpf_lru_push_free()
|
||||
* after releasing the bucket lock.
|
||||
*
|
||||
* For htab of maps, htab_put_fd_value() in
|
||||
* free_htab_elem() may acquire a spinlock with bucket
|
||||
* lock being held and it violates the lock rule, so
|
||||
* invoke free_htab_elem() after unlock as well.
|
||||
*/
|
||||
if (is_lru_map) {
|
||||
l->batch_flink = node_to_free;
|
||||
node_to_free = l;
|
||||
} else {
|
||||
free_htab_elem(htab, l);
|
||||
}
|
||||
l->batch_flink = node_to_free;
|
||||
node_to_free = l;
|
||||
}
|
||||
dst_key += key_size;
|
||||
dst_val += value_size;
|
||||
@@ -1871,7 +1890,10 @@ again_nocopy:
|
||||
while (node_to_free) {
|
||||
l = node_to_free;
|
||||
node_to_free = node_to_free->batch_flink;
|
||||
htab_lru_push_free(htab, l);
|
||||
if (is_lru_map)
|
||||
htab_lru_push_free(htab, l);
|
||||
else
|
||||
free_htab_elem(htab, l);
|
||||
}
|
||||
|
||||
next_batch:
|
||||
|
||||
@@ -2521,6 +2521,25 @@ __bpf_kfunc struct task_struct *bpf_task_from_pid(s32 pid)
|
||||
return p;
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_task_from_vpid - Find a struct task_struct from its vpid by looking it up
|
||||
* in the pid namespace of the current task. If a task is returned, it must
|
||||
* either be stored in a map, or released with bpf_task_release().
|
||||
* @vpid: The vpid of the task being looked up.
|
||||
*/
|
||||
__bpf_kfunc struct task_struct *bpf_task_from_vpid(s32 vpid)
|
||||
{
|
||||
struct task_struct *p;
|
||||
|
||||
rcu_read_lock();
|
||||
p = find_task_by_vpid(vpid);
|
||||
if (p)
|
||||
p = bpf_task_acquire(p);
|
||||
rcu_read_unlock();
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_dynptr_slice() - Obtain a read-only pointer to the dynptr data.
|
||||
* @p: The dynptr whose data slice to retrieve
|
||||
@@ -3068,7 +3087,9 @@ BTF_ID_FLAGS(func, bpf_task_under_cgroup, KF_RCU)
|
||||
BTF_ID_FLAGS(func, bpf_task_get_cgroup1, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
|
||||
#endif
|
||||
BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE | KF_RET_NULL)
|
||||
BTF_ID_FLAGS(func, bpf_task_from_vpid, KF_ACQUIRE | KF_RET_NULL)
|
||||
BTF_ID_FLAGS(func, bpf_throw)
|
||||
BTF_ID_FLAGS(func, bpf_send_signal_task, KF_TRUSTED_ARGS)
|
||||
BTF_KFUNCS_END(generic_btf_ids)
|
||||
|
||||
static const struct btf_kfunc_id_set generic_kfunc_set = {
|
||||
@@ -3086,8 +3107,8 @@ BTF_ID(func, bpf_cgroup_release_dtor)
|
||||
#endif
|
||||
|
||||
BTF_KFUNCS_START(common_btf_ids)
|
||||
BTF_ID_FLAGS(func, bpf_cast_to_kern_ctx)
|
||||
BTF_ID_FLAGS(func, bpf_rdonly_cast)
|
||||
BTF_ID_FLAGS(func, bpf_cast_to_kern_ctx, KF_FASTCALL)
|
||||
BTF_ID_FLAGS(func, bpf_rdonly_cast, KF_FASTCALL)
|
||||
BTF_ID_FLAGS(func, bpf_rcu_read_lock)
|
||||
BTF_ID_FLAGS(func, bpf_rcu_read_unlock)
|
||||
BTF_ID_FLAGS(func, bpf_dynptr_slice, KF_RET_NULL)
|
||||
@@ -3124,6 +3145,10 @@ BTF_ID_FLAGS(func, bpf_iter_bits_new, KF_ITER_NEW)
|
||||
BTF_ID_FLAGS(func, bpf_iter_bits_next, KF_ITER_NEXT | KF_RET_NULL)
|
||||
BTF_ID_FLAGS(func, bpf_iter_bits_destroy, KF_ITER_DESTROY)
|
||||
BTF_ID_FLAGS(func, bpf_copy_from_user_str, KF_SLEEPABLE)
|
||||
BTF_ID_FLAGS(func, bpf_get_kmem_cache)
|
||||
BTF_ID_FLAGS(func, bpf_iter_kmem_cache_new, KF_ITER_NEW | KF_SLEEPABLE)
|
||||
BTF_ID_FLAGS(func, bpf_iter_kmem_cache_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLEEPABLE)
|
||||
BTF_ID_FLAGS(func, bpf_iter_kmem_cache_destroy, KF_ITER_DESTROY | KF_SLEEPABLE)
|
||||
BTF_KFUNCS_END(common_btf_ids)
|
||||
|
||||
static const struct btf_kfunc_id_set common_kfunc_set = {
|
||||
|
||||
238
kernel/bpf/kmem_cache_iter.c
Normal file
238
kernel/bpf/kmem_cache_iter.c
Normal file
@@ -0,0 +1,238 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/* Copyright (c) 2024 Google */
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/btf_ids.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/seq_file.h>
|
||||
|
||||
#include "../../mm/slab.h" /* kmem_cache, slab_caches and slab_mutex */
|
||||
|
||||
/* open-coded version */
|
||||
struct bpf_iter_kmem_cache {
|
||||
__u64 __opaque[1];
|
||||
} __attribute__((aligned(8)));
|
||||
|
||||
struct bpf_iter_kmem_cache_kern {
|
||||
struct kmem_cache *pos;
|
||||
} __attribute__((aligned(8)));
|
||||
|
||||
#define KMEM_CACHE_POS_START ((void *)1L)
|
||||
|
||||
__bpf_kfunc_start_defs();
|
||||
|
||||
__bpf_kfunc int bpf_iter_kmem_cache_new(struct bpf_iter_kmem_cache *it)
|
||||
{
|
||||
struct bpf_iter_kmem_cache_kern *kit = (void *)it;
|
||||
|
||||
BUILD_BUG_ON(sizeof(*kit) > sizeof(*it));
|
||||
BUILD_BUG_ON(__alignof__(*kit) != __alignof__(*it));
|
||||
|
||||
kit->pos = KMEM_CACHE_POS_START;
|
||||
return 0;
|
||||
}
|
||||
|
||||
__bpf_kfunc struct kmem_cache *bpf_iter_kmem_cache_next(struct bpf_iter_kmem_cache *it)
|
||||
{
|
||||
struct bpf_iter_kmem_cache_kern *kit = (void *)it;
|
||||
struct kmem_cache *prev = kit->pos;
|
||||
struct kmem_cache *next;
|
||||
bool destroy = false;
|
||||
|
||||
if (!prev)
|
||||
return NULL;
|
||||
|
||||
mutex_lock(&slab_mutex);
|
||||
|
||||
if (list_empty(&slab_caches)) {
|
||||
mutex_unlock(&slab_mutex);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (prev == KMEM_CACHE_POS_START)
|
||||
next = list_first_entry(&slab_caches, struct kmem_cache, list);
|
||||
else if (list_last_entry(&slab_caches, struct kmem_cache, list) == prev)
|
||||
next = NULL;
|
||||
else
|
||||
next = list_next_entry(prev, list);
|
||||
|
||||
/* boot_caches have negative refcount, don't touch them */
|
||||
if (next && next->refcount > 0)
|
||||
next->refcount++;
|
||||
|
||||
/* Skip kmem_cache_destroy() for active entries */
|
||||
if (prev && prev != KMEM_CACHE_POS_START) {
|
||||
if (prev->refcount > 1)
|
||||
prev->refcount--;
|
||||
else if (prev->refcount == 1)
|
||||
destroy = true;
|
||||
}
|
||||
|
||||
mutex_unlock(&slab_mutex);
|
||||
|
||||
if (destroy)
|
||||
kmem_cache_destroy(prev);
|
||||
|
||||
kit->pos = next;
|
||||
return next;
|
||||
}
|
||||
|
||||
__bpf_kfunc void bpf_iter_kmem_cache_destroy(struct bpf_iter_kmem_cache *it)
|
||||
{
|
||||
struct bpf_iter_kmem_cache_kern *kit = (void *)it;
|
||||
struct kmem_cache *s = kit->pos;
|
||||
bool destroy = false;
|
||||
|
||||
if (s == NULL || s == KMEM_CACHE_POS_START)
|
||||
return;
|
||||
|
||||
mutex_lock(&slab_mutex);
|
||||
|
||||
/* Skip kmem_cache_destroy() for active entries */
|
||||
if (s->refcount > 1)
|
||||
s->refcount--;
|
||||
else if (s->refcount == 1)
|
||||
destroy = true;
|
||||
|
||||
mutex_unlock(&slab_mutex);
|
||||
|
||||
if (destroy)
|
||||
kmem_cache_destroy(s);
|
||||
}
|
||||
|
||||
__bpf_kfunc_end_defs();
|
||||
|
||||
struct bpf_iter__kmem_cache {
|
||||
__bpf_md_ptr(struct bpf_iter_meta *, meta);
|
||||
__bpf_md_ptr(struct kmem_cache *, s);
|
||||
};
|
||||
|
||||
union kmem_cache_iter_priv {
|
||||
struct bpf_iter_kmem_cache it;
|
||||
struct bpf_iter_kmem_cache_kern kit;
|
||||
};
|
||||
|
||||
static void *kmem_cache_iter_seq_start(struct seq_file *seq, loff_t *pos)
|
||||
{
|
||||
loff_t cnt = 0;
|
||||
bool found = false;
|
||||
struct kmem_cache *s;
|
||||
union kmem_cache_iter_priv *p = seq->private;
|
||||
|
||||
mutex_lock(&slab_mutex);
|
||||
|
||||
/* Find an entry at the given position in the slab_caches list instead
|
||||
* of keeping a reference (of the last visited entry, if any) out of
|
||||
* slab_mutex. It might miss something if one is deleted in the middle
|
||||
* while it releases the lock. But it should be rare and there's not
|
||||
* much we can do about it.
|
||||
*/
|
||||
list_for_each_entry(s, &slab_caches, list) {
|
||||
if (cnt == *pos) {
|
||||
/* Make sure this entry remains in the list by getting
|
||||
* a new reference count. Note that boot_cache entries
|
||||
* have a negative refcount, so don't touch them.
|
||||
*/
|
||||
if (s->refcount > 0)
|
||||
s->refcount++;
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
cnt++;
|
||||
}
|
||||
mutex_unlock(&slab_mutex);
|
||||
|
||||
if (!found)
|
||||
s = NULL;
|
||||
|
||||
p->kit.pos = s;
|
||||
return s;
|
||||
}
|
||||
|
||||
static void kmem_cache_iter_seq_stop(struct seq_file *seq, void *v)
|
||||
{
|
||||
struct bpf_iter_meta meta;
|
||||
struct bpf_iter__kmem_cache ctx = {
|
||||
.meta = &meta,
|
||||
.s = v,
|
||||
};
|
||||
union kmem_cache_iter_priv *p = seq->private;
|
||||
struct bpf_prog *prog;
|
||||
|
||||
meta.seq = seq;
|
||||
prog = bpf_iter_get_info(&meta, true);
|
||||
if (prog && !ctx.s)
|
||||
bpf_iter_run_prog(prog, &ctx);
|
||||
|
||||
bpf_iter_kmem_cache_destroy(&p->it);
|
||||
}
|
||||
|
||||
static void *kmem_cache_iter_seq_next(struct seq_file *seq, void *v, loff_t *pos)
|
||||
{
|
||||
union kmem_cache_iter_priv *p = seq->private;
|
||||
|
||||
++*pos;
|
||||
|
||||
return bpf_iter_kmem_cache_next(&p->it);
|
||||
}
|
||||
|
||||
static int kmem_cache_iter_seq_show(struct seq_file *seq, void *v)
|
||||
{
|
||||
struct bpf_iter_meta meta;
|
||||
struct bpf_iter__kmem_cache ctx = {
|
||||
.meta = &meta,
|
||||
.s = v,
|
||||
};
|
||||
struct bpf_prog *prog;
|
||||
int ret = 0;
|
||||
|
||||
meta.seq = seq;
|
||||
prog = bpf_iter_get_info(&meta, false);
|
||||
if (prog)
|
||||
ret = bpf_iter_run_prog(prog, &ctx);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct seq_operations kmem_cache_iter_seq_ops = {
|
||||
.start = kmem_cache_iter_seq_start,
|
||||
.next = kmem_cache_iter_seq_next,
|
||||
.stop = kmem_cache_iter_seq_stop,
|
||||
.show = kmem_cache_iter_seq_show,
|
||||
};
|
||||
|
||||
BTF_ID_LIST_GLOBAL_SINGLE(bpf_kmem_cache_btf_id, struct, kmem_cache)
|
||||
|
||||
static const struct bpf_iter_seq_info kmem_cache_iter_seq_info = {
|
||||
.seq_ops = &kmem_cache_iter_seq_ops,
|
||||
.seq_priv_size = sizeof(union kmem_cache_iter_priv),
|
||||
};
|
||||
|
||||
static void bpf_iter_kmem_cache_show_fdinfo(const struct bpf_iter_aux_info *aux,
|
||||
struct seq_file *seq)
|
||||
{
|
||||
seq_puts(seq, "kmem_cache iter\n");
|
||||
}
|
||||
|
||||
DEFINE_BPF_ITER_FUNC(kmem_cache, struct bpf_iter_meta *meta,
|
||||
struct kmem_cache *s)
|
||||
|
||||
static struct bpf_iter_reg bpf_kmem_cache_reg_info = {
|
||||
.target = "kmem_cache",
|
||||
.feature = BPF_ITER_RESCHED,
|
||||
.show_fdinfo = bpf_iter_kmem_cache_show_fdinfo,
|
||||
.ctx_arg_info_size = 1,
|
||||
.ctx_arg_info = {
|
||||
{ offsetof(struct bpf_iter__kmem_cache, s),
|
||||
PTR_TO_BTF_ID_OR_NULL | PTR_TRUSTED },
|
||||
},
|
||||
.seq_info = &kmem_cache_iter_seq_info,
|
||||
};
|
||||
|
||||
static int __init bpf_kmem_cache_iter_init(void)
|
||||
{
|
||||
bpf_kmem_cache_reg_info.ctx_arg_info[0].btf_id = bpf_kmem_cache_btf_id[0];
|
||||
return bpf_iter_reg_target(&bpf_kmem_cache_reg_info);
|
||||
}
|
||||
|
||||
late_initcall(bpf_kmem_cache_iter_init);
|
||||
@@ -254,11 +254,8 @@ static void alloc_bulk(struct bpf_mem_cache *c, int cnt, int node, bool atomic)
|
||||
|
||||
static void free_one(void *obj, bool percpu)
|
||||
{
|
||||
if (percpu) {
|
||||
if (percpu)
|
||||
free_percpu(((void __percpu **)obj)[1]);
|
||||
kfree(obj);
|
||||
return;
|
||||
}
|
||||
|
||||
kfree(obj);
|
||||
}
|
||||
|
||||
272
kernel/bpf/range_tree.c
Normal file
272
kernel/bpf/range_tree.c
Normal file
@@ -0,0 +1,272 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
|
||||
#include <linux/interval_tree_generic.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/bpf_mem_alloc.h>
|
||||
#include <linux/bpf.h>
|
||||
#include "range_tree.h"
|
||||
|
||||
/*
|
||||
* struct range_tree is a data structure used to allocate contiguous memory
|
||||
* ranges in bpf arena. It's a large bitmap. The contiguous sequence of bits is
|
||||
* represented by struct range_node or 'rn' for short.
|
||||
* rn->rn_rbnode links it into an interval tree while
|
||||
* rn->rb_range_size links it into a second rbtree sorted by size of the range.
|
||||
* __find_range() performs binary search and best fit algorithm to find the
|
||||
* range less or equal requested size.
|
||||
* range_tree_clear/set() clears or sets a range of bits in this bitmap. The
|
||||
* adjacent ranges are merged or split at the same time.
|
||||
*
|
||||
* The split/merge logic is based/borrowed from XFS's xbitmap32 added
|
||||
* in commit 6772fcc8890a ("xfs: convert xbitmap to interval tree").
|
||||
*
|
||||
* The implementation relies on external lock to protect rbtree-s.
|
||||
* The alloc/free of range_node-s is done via bpf_mem_alloc.
|
||||
*
|
||||
* bpf arena is using range_tree to represent unallocated slots.
|
||||
* At init time:
|
||||
* range_tree_set(rt, 0, max);
|
||||
* Then:
|
||||
* start = range_tree_find(rt, len);
|
||||
* if (start >= 0)
|
||||
* range_tree_clear(rt, start, len);
|
||||
* to find free range and mark slots as allocated and later:
|
||||
* range_tree_set(rt, start, len);
|
||||
* to mark as unallocated after use.
|
||||
*/
|
||||
struct range_node {
|
||||
struct rb_node rn_rbnode;
|
||||
struct rb_node rb_range_size;
|
||||
u32 rn_start;
|
||||
u32 rn_last; /* inclusive */
|
||||
u32 __rn_subtree_last;
|
||||
};
|
||||
|
||||
static struct range_node *rb_to_range_node(struct rb_node *rb)
|
||||
{
|
||||
return rb_entry(rb, struct range_node, rb_range_size);
|
||||
}
|
||||
|
||||
static u32 rn_size(struct range_node *rn)
|
||||
{
|
||||
return rn->rn_last - rn->rn_start + 1;
|
||||
}
|
||||
|
||||
/* Find range that fits best to requested size */
|
||||
static inline struct range_node *__find_range(struct range_tree *rt, u32 len)
|
||||
{
|
||||
struct rb_node *rb = rt->range_size_root.rb_root.rb_node;
|
||||
struct range_node *best = NULL;
|
||||
|
||||
while (rb) {
|
||||
struct range_node *rn = rb_to_range_node(rb);
|
||||
|
||||
if (len <= rn_size(rn)) {
|
||||
best = rn;
|
||||
rb = rb->rb_right;
|
||||
} else {
|
||||
rb = rb->rb_left;
|
||||
}
|
||||
}
|
||||
|
||||
return best;
|
||||
}
|
||||
|
||||
s64 range_tree_find(struct range_tree *rt, u32 len)
|
||||
{
|
||||
struct range_node *rn;
|
||||
|
||||
rn = __find_range(rt, len);
|
||||
if (!rn)
|
||||
return -ENOENT;
|
||||
return rn->rn_start;
|
||||
}
|
||||
|
||||
/* Insert the range into rbtree sorted by the range size */
|
||||
static inline void __range_size_insert(struct range_node *rn,
|
||||
struct rb_root_cached *root)
|
||||
{
|
||||
struct rb_node **link = &root->rb_root.rb_node, *rb = NULL;
|
||||
u64 size = rn_size(rn);
|
||||
bool leftmost = true;
|
||||
|
||||
while (*link) {
|
||||
rb = *link;
|
||||
if (size > rn_size(rb_to_range_node(rb))) {
|
||||
link = &rb->rb_left;
|
||||
} else {
|
||||
link = &rb->rb_right;
|
||||
leftmost = false;
|
||||
}
|
||||
}
|
||||
|
||||
rb_link_node(&rn->rb_range_size, rb, link);
|
||||
rb_insert_color_cached(&rn->rb_range_size, root, leftmost);
|
||||
}
|
||||
|
||||
#define START(node) ((node)->rn_start)
|
||||
#define LAST(node) ((node)->rn_last)
|
||||
|
||||
INTERVAL_TREE_DEFINE(struct range_node, rn_rbnode, u32,
|
||||
__rn_subtree_last, START, LAST,
|
||||
static inline __maybe_unused,
|
||||
__range_it)
|
||||
|
||||
static inline __maybe_unused void
|
||||
range_it_insert(struct range_node *rn, struct range_tree *rt)
|
||||
{
|
||||
__range_size_insert(rn, &rt->range_size_root);
|
||||
__range_it_insert(rn, &rt->it_root);
|
||||
}
|
||||
|
||||
static inline __maybe_unused void
|
||||
range_it_remove(struct range_node *rn, struct range_tree *rt)
|
||||
{
|
||||
rb_erase_cached(&rn->rb_range_size, &rt->range_size_root);
|
||||
RB_CLEAR_NODE(&rn->rb_range_size);
|
||||
__range_it_remove(rn, &rt->it_root);
|
||||
}
|
||||
|
||||
static inline __maybe_unused struct range_node *
|
||||
range_it_iter_first(struct range_tree *rt, u32 start, u32 last)
|
||||
{
|
||||
return __range_it_iter_first(&rt->it_root, start, last);
|
||||
}
|
||||
|
||||
/* Clear the range in this range tree */
|
||||
int range_tree_clear(struct range_tree *rt, u32 start, u32 len)
|
||||
{
|
||||
u32 last = start + len - 1;
|
||||
struct range_node *new_rn;
|
||||
struct range_node *rn;
|
||||
|
||||
while ((rn = range_it_iter_first(rt, start, last))) {
|
||||
if (rn->rn_start < start && rn->rn_last > last) {
|
||||
u32 old_last = rn->rn_last;
|
||||
|
||||
/* Overlaps with the entire clearing range */
|
||||
range_it_remove(rn, rt);
|
||||
rn->rn_last = start - 1;
|
||||
range_it_insert(rn, rt);
|
||||
|
||||
/* Add a range */
|
||||
migrate_disable();
|
||||
new_rn = bpf_mem_alloc(&bpf_global_ma, sizeof(struct range_node));
|
||||
migrate_enable();
|
||||
if (!new_rn)
|
||||
return -ENOMEM;
|
||||
new_rn->rn_start = last + 1;
|
||||
new_rn->rn_last = old_last;
|
||||
range_it_insert(new_rn, rt);
|
||||
} else if (rn->rn_start < start) {
|
||||
/* Overlaps with the left side of the clearing range */
|
||||
range_it_remove(rn, rt);
|
||||
rn->rn_last = start - 1;
|
||||
range_it_insert(rn, rt);
|
||||
} else if (rn->rn_last > last) {
|
||||
/* Overlaps with the right side of the clearing range */
|
||||
range_it_remove(rn, rt);
|
||||
rn->rn_start = last + 1;
|
||||
range_it_insert(rn, rt);
|
||||
break;
|
||||
} else {
|
||||
/* in the middle of the clearing range */
|
||||
range_it_remove(rn, rt);
|
||||
migrate_disable();
|
||||
bpf_mem_free(&bpf_global_ma, rn);
|
||||
migrate_enable();
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Is the whole range set ? */
|
||||
int is_range_tree_set(struct range_tree *rt, u32 start, u32 len)
|
||||
{
|
||||
u32 last = start + len - 1;
|
||||
struct range_node *left;
|
||||
|
||||
/* Is this whole range set ? */
|
||||
left = range_it_iter_first(rt, start, last);
|
||||
if (left && left->rn_start <= start && left->rn_last >= last)
|
||||
return 0;
|
||||
return -ESRCH;
|
||||
}
|
||||
|
||||
/* Set the range in this range tree */
|
||||
int range_tree_set(struct range_tree *rt, u32 start, u32 len)
|
||||
{
|
||||
u32 last = start + len - 1;
|
||||
struct range_node *right;
|
||||
struct range_node *left;
|
||||
int err;
|
||||
|
||||
/* Is this whole range already set ? */
|
||||
left = range_it_iter_first(rt, start, last);
|
||||
if (left && left->rn_start <= start && left->rn_last >= last)
|
||||
return 0;
|
||||
|
||||
/* Clear out everything in the range we want to set. */
|
||||
err = range_tree_clear(rt, start, len);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/* Do we have a left-adjacent range ? */
|
||||
left = range_it_iter_first(rt, start - 1, start - 1);
|
||||
if (left && left->rn_last + 1 != start)
|
||||
return -EFAULT;
|
||||
|
||||
/* Do we have a right-adjacent range ? */
|
||||
right = range_it_iter_first(rt, last + 1, last + 1);
|
||||
if (right && right->rn_start != last + 1)
|
||||
return -EFAULT;
|
||||
|
||||
if (left && right) {
|
||||
/* Combine left and right adjacent ranges */
|
||||
range_it_remove(left, rt);
|
||||
range_it_remove(right, rt);
|
||||
left->rn_last = right->rn_last;
|
||||
range_it_insert(left, rt);
|
||||
migrate_disable();
|
||||
bpf_mem_free(&bpf_global_ma, right);
|
||||
migrate_enable();
|
||||
} else if (left) {
|
||||
/* Combine with the left range */
|
||||
range_it_remove(left, rt);
|
||||
left->rn_last = last;
|
||||
range_it_insert(left, rt);
|
||||
} else if (right) {
|
||||
/* Combine with the right range */
|
||||
range_it_remove(right, rt);
|
||||
right->rn_start = start;
|
||||
range_it_insert(right, rt);
|
||||
} else {
|
||||
migrate_disable();
|
||||
left = bpf_mem_alloc(&bpf_global_ma, sizeof(struct range_node));
|
||||
migrate_enable();
|
||||
if (!left)
|
||||
return -ENOMEM;
|
||||
left->rn_start = start;
|
||||
left->rn_last = last;
|
||||
range_it_insert(left, rt);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void range_tree_destroy(struct range_tree *rt)
|
||||
{
|
||||
struct range_node *rn;
|
||||
|
||||
while ((rn = range_it_iter_first(rt, 0, -1U))) {
|
||||
range_it_remove(rn, rt);
|
||||
migrate_disable();
|
||||
bpf_mem_free(&bpf_global_ma, rn);
|
||||
migrate_enable();
|
||||
}
|
||||
}
|
||||
|
||||
void range_tree_init(struct range_tree *rt)
|
||||
{
|
||||
rt->it_root = RB_ROOT_CACHED;
|
||||
rt->range_size_root = RB_ROOT_CACHED;
|
||||
}
|
||||
21
kernel/bpf/range_tree.h
Normal file
21
kernel/bpf/range_tree.h
Normal file
@@ -0,0 +1,21 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
|
||||
#ifndef _RANGE_TREE_H
|
||||
#define _RANGE_TREE_H 1
|
||||
|
||||
struct range_tree {
|
||||
/* root of interval tree */
|
||||
struct rb_root_cached it_root;
|
||||
/* root of rbtree of interval sizes */
|
||||
struct rb_root_cached range_size_root;
|
||||
};
|
||||
|
||||
void range_tree_init(struct range_tree *rt);
|
||||
void range_tree_destroy(struct range_tree *rt);
|
||||
|
||||
int range_tree_clear(struct range_tree *rt, u32 start, u32 len);
|
||||
int range_tree_set(struct range_tree *rt, u32 start, u32 len);
|
||||
int is_range_tree_set(struct range_tree *rt, u32 start, u32 len);
|
||||
s64 range_tree_find(struct range_tree *rt, u32 len);
|
||||
|
||||
#endif
|
||||
@@ -155,6 +155,89 @@ static void maybe_wait_bpf_programs(struct bpf_map *map)
|
||||
synchronize_rcu();
|
||||
}
|
||||
|
||||
static void unpin_uptr_kaddr(void *kaddr)
|
||||
{
|
||||
if (kaddr)
|
||||
unpin_user_page(virt_to_page(kaddr));
|
||||
}
|
||||
|
||||
static void __bpf_obj_unpin_uptrs(struct btf_record *rec, u32 cnt, void *obj)
|
||||
{
|
||||
const struct btf_field *field;
|
||||
void **uptr_addr;
|
||||
int i;
|
||||
|
||||
for (i = 0, field = rec->fields; i < cnt; i++, field++) {
|
||||
if (field->type != BPF_UPTR)
|
||||
continue;
|
||||
|
||||
uptr_addr = obj + field->offset;
|
||||
unpin_uptr_kaddr(*uptr_addr);
|
||||
}
|
||||
}
|
||||
|
||||
static void bpf_obj_unpin_uptrs(struct btf_record *rec, void *obj)
|
||||
{
|
||||
if (!btf_record_has_field(rec, BPF_UPTR))
|
||||
return;
|
||||
|
||||
__bpf_obj_unpin_uptrs(rec, rec->cnt, obj);
|
||||
}
|
||||
|
||||
static int bpf_obj_pin_uptrs(struct btf_record *rec, void *obj)
|
||||
{
|
||||
const struct btf_field *field;
|
||||
const struct btf_type *t;
|
||||
unsigned long start, end;
|
||||
struct page *page;
|
||||
void **uptr_addr;
|
||||
int i, err;
|
||||
|
||||
if (!btf_record_has_field(rec, BPF_UPTR))
|
||||
return 0;
|
||||
|
||||
for (i = 0, field = rec->fields; i < rec->cnt; i++, field++) {
|
||||
if (field->type != BPF_UPTR)
|
||||
continue;
|
||||
|
||||
uptr_addr = obj + field->offset;
|
||||
start = *(unsigned long *)uptr_addr;
|
||||
if (!start)
|
||||
continue;
|
||||
|
||||
t = btf_type_by_id(field->kptr.btf, field->kptr.btf_id);
|
||||
/* t->size was checked for zero before */
|
||||
if (check_add_overflow(start, t->size - 1, &end)) {
|
||||
err = -EFAULT;
|
||||
goto unpin_all;
|
||||
}
|
||||
|
||||
/* The uptr's struct cannot span across two pages */
|
||||
if ((start & PAGE_MASK) != (end & PAGE_MASK)) {
|
||||
err = -EOPNOTSUPP;
|
||||
goto unpin_all;
|
||||
}
|
||||
|
||||
err = pin_user_pages_fast(start, 1, FOLL_LONGTERM | FOLL_WRITE, &page);
|
||||
if (err != 1)
|
||||
goto unpin_all;
|
||||
|
||||
if (PageHighMem(page)) {
|
||||
err = -EOPNOTSUPP;
|
||||
unpin_user_page(page);
|
||||
goto unpin_all;
|
||||
}
|
||||
|
||||
*uptr_addr = page_address(page) + offset_in_page(start);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
unpin_all:
|
||||
__bpf_obj_unpin_uptrs(rec, i, obj);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int bpf_map_update_value(struct bpf_map *map, struct file *map_file,
|
||||
void *key, void *value, __u64 flags)
|
||||
{
|
||||
@@ -199,9 +282,14 @@ static int bpf_map_update_value(struct bpf_map *map, struct file *map_file,
|
||||
map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) {
|
||||
err = map->ops->map_push_elem(map, value, flags);
|
||||
} else {
|
||||
rcu_read_lock();
|
||||
err = map->ops->map_update_elem(map, key, value, flags);
|
||||
rcu_read_unlock();
|
||||
err = bpf_obj_pin_uptrs(map->record, value);
|
||||
if (!err) {
|
||||
rcu_read_lock();
|
||||
err = map->ops->map_update_elem(map, key, value, flags);
|
||||
rcu_read_unlock();
|
||||
if (err)
|
||||
bpf_obj_unpin_uptrs(map->record, value);
|
||||
}
|
||||
}
|
||||
bpf_enable_instrumentation();
|
||||
|
||||
@@ -548,6 +636,7 @@ void btf_record_free(struct btf_record *rec)
|
||||
case BPF_KPTR_UNREF:
|
||||
case BPF_KPTR_REF:
|
||||
case BPF_KPTR_PERCPU:
|
||||
case BPF_UPTR:
|
||||
if (rec->fields[i].kptr.module)
|
||||
module_put(rec->fields[i].kptr.module);
|
||||
if (btf_is_kernel(rec->fields[i].kptr.btf))
|
||||
@@ -597,6 +686,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec)
|
||||
case BPF_KPTR_UNREF:
|
||||
case BPF_KPTR_REF:
|
||||
case BPF_KPTR_PERCPU:
|
||||
case BPF_UPTR:
|
||||
if (btf_is_kernel(fields[i].kptr.btf))
|
||||
btf_get(fields[i].kptr.btf);
|
||||
if (fields[i].kptr.module && !try_module_get(fields[i].kptr.module)) {
|
||||
@@ -714,6 +804,10 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
|
||||
field->kptr.dtor(xchgd_field);
|
||||
}
|
||||
break;
|
||||
case BPF_UPTR:
|
||||
/* The caller ensured that no one is using the uptr */
|
||||
unpin_uptr_kaddr(*(void **)field_ptr);
|
||||
break;
|
||||
case BPF_LIST_HEAD:
|
||||
if (WARN_ON_ONCE(rec->spin_lock_off < 0))
|
||||
continue;
|
||||
@@ -1105,7 +1199,7 @@ static int map_check_btf(struct bpf_map *map, struct bpf_token *token,
|
||||
|
||||
map->record = btf_parse_fields(btf, value_type,
|
||||
BPF_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD |
|
||||
BPF_RB_ROOT | BPF_REFCOUNT | BPF_WORKQUEUE,
|
||||
BPF_RB_ROOT | BPF_REFCOUNT | BPF_WORKQUEUE | BPF_UPTR,
|
||||
map->value_size);
|
||||
if (!IS_ERR_OR_NULL(map->record)) {
|
||||
int i;
|
||||
@@ -1161,6 +1255,12 @@ static int map_check_btf(struct bpf_map *map, struct bpf_token *token,
|
||||
goto free_map_tab;
|
||||
}
|
||||
break;
|
||||
case BPF_UPTR:
|
||||
if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE) {
|
||||
ret = -EOPNOTSUPP;
|
||||
goto free_map_tab;
|
||||
}
|
||||
break;
|
||||
case BPF_LIST_HEAD:
|
||||
case BPF_RB_ROOT:
|
||||
if (map->map_type != BPF_MAP_TYPE_HASH &&
|
||||
@@ -3218,7 +3318,8 @@ static void bpf_tracing_link_release(struct bpf_link *link)
|
||||
container_of(link, struct bpf_tracing_link, link.link);
|
||||
|
||||
WARN_ON_ONCE(bpf_trampoline_unlink_prog(&tr_link->link,
|
||||
tr_link->trampoline));
|
||||
tr_link->trampoline,
|
||||
tr_link->tgt_prog));
|
||||
|
||||
bpf_trampoline_put(tr_link->trampoline);
|
||||
|
||||
@@ -3358,7 +3459,7 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog,
|
||||
* in prog->aux
|
||||
*
|
||||
* - if prog->aux->dst_trampoline is NULL, the program has already been
|
||||
* attached to a target and its initial target was cleared (below)
|
||||
* attached to a target and its initial target was cleared (below)
|
||||
*
|
||||
* - if tgt_prog != NULL, the caller specified tgt_prog_fd +
|
||||
* target_btf_id using the link_create API.
|
||||
@@ -3433,7 +3534,7 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog,
|
||||
if (err)
|
||||
goto out_unlock;
|
||||
|
||||
err = bpf_trampoline_link_prog(&link->link, tr);
|
||||
err = bpf_trampoline_link_prog(&link->link, tr, tgt_prog);
|
||||
if (err) {
|
||||
bpf_link_cleanup(&link_primer);
|
||||
link = NULL;
|
||||
@@ -4002,10 +4103,14 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
|
||||
if (prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI &&
|
||||
attach_type != BPF_TRACE_UPROBE_MULTI)
|
||||
return -EINVAL;
|
||||
if (prog->expected_attach_type == BPF_TRACE_UPROBE_SESSION &&
|
||||
attach_type != BPF_TRACE_UPROBE_SESSION)
|
||||
return -EINVAL;
|
||||
if (attach_type != BPF_PERF_EVENT &&
|
||||
attach_type != BPF_TRACE_KPROBE_MULTI &&
|
||||
attach_type != BPF_TRACE_KPROBE_SESSION &&
|
||||
attach_type != BPF_TRACE_UPROBE_MULTI)
|
||||
attach_type != BPF_TRACE_UPROBE_MULTI &&
|
||||
attach_type != BPF_TRACE_UPROBE_SESSION)
|
||||
return -EINVAL;
|
||||
return 0;
|
||||
case BPF_PROG_TYPE_SCHED_CLS:
|
||||
@@ -5258,7 +5363,8 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr)
|
||||
else if (attr->link_create.attach_type == BPF_TRACE_KPROBE_MULTI ||
|
||||
attr->link_create.attach_type == BPF_TRACE_KPROBE_SESSION)
|
||||
ret = bpf_kprobe_multi_link_attach(attr, prog);
|
||||
else if (attr->link_create.attach_type == BPF_TRACE_UPROBE_MULTI)
|
||||
else if (attr->link_create.attach_type == BPF_TRACE_UPROBE_MULTI ||
|
||||
attr->link_create.attach_type == BPF_TRACE_UPROBE_SESSION)
|
||||
ret = bpf_uprobe_multi_link_attach(attr, prog);
|
||||
break;
|
||||
default:
|
||||
|
||||
@@ -115,10 +115,14 @@ bool bpf_prog_has_trampoline(const struct bpf_prog *prog)
|
||||
(ptype == BPF_PROG_TYPE_LSM && eatype == BPF_LSM_MAC);
|
||||
}
|
||||
|
||||
void bpf_image_ksym_add(void *data, unsigned int size, struct bpf_ksym *ksym)
|
||||
void bpf_image_ksym_init(void *data, unsigned int size, struct bpf_ksym *ksym)
|
||||
{
|
||||
ksym->start = (unsigned long) data;
|
||||
ksym->end = ksym->start + size;
|
||||
}
|
||||
|
||||
void bpf_image_ksym_add(struct bpf_ksym *ksym)
|
||||
{
|
||||
bpf_ksym_add(ksym);
|
||||
perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, ksym->start,
|
||||
PAGE_SIZE, false, ksym->name);
|
||||
@@ -377,7 +381,8 @@ static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key, int size)
|
||||
ksym = &im->ksym;
|
||||
INIT_LIST_HEAD_RCU(&ksym->lnode);
|
||||
snprintf(ksym->name, KSYM_NAME_LEN, "bpf_trampoline_%llu", key);
|
||||
bpf_image_ksym_add(image, size, ksym);
|
||||
bpf_image_ksym_init(image, size, ksym);
|
||||
bpf_image_ksym_add(ksym);
|
||||
return im;
|
||||
|
||||
out_free_image:
|
||||
@@ -523,7 +528,27 @@ static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog)
|
||||
}
|
||||
}
|
||||
|
||||
static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
|
||||
static int bpf_freplace_check_tgt_prog(struct bpf_prog *tgt_prog)
|
||||
{
|
||||
struct bpf_prog_aux *aux = tgt_prog->aux;
|
||||
|
||||
guard(mutex)(&aux->ext_mutex);
|
||||
if (aux->prog_array_member_cnt)
|
||||
/* Program extensions can not extend target prog when the target
|
||||
* prog has been updated to any prog_array map as tail callee.
|
||||
* It's to prevent a potential infinite loop like:
|
||||
* tgt prog entry -> tgt prog subprog -> freplace prog entry
|
||||
* --tailcall-> tgt prog entry.
|
||||
*/
|
||||
return -EBUSY;
|
||||
|
||||
aux->is_extended = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link,
|
||||
struct bpf_trampoline *tr,
|
||||
struct bpf_prog *tgt_prog)
|
||||
{
|
||||
enum bpf_tramp_prog_type kind;
|
||||
struct bpf_tramp_link *link_exiting;
|
||||
@@ -544,6 +569,9 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_tr
|
||||
/* Cannot attach extension if fentry/fexit are in use. */
|
||||
if (cnt)
|
||||
return -EBUSY;
|
||||
err = bpf_freplace_check_tgt_prog(tgt_prog);
|
||||
if (err)
|
||||
return err;
|
||||
tr->extension_prog = link->link.prog;
|
||||
return bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, NULL,
|
||||
link->link.prog->bpf_func);
|
||||
@@ -570,17 +598,21 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_tr
|
||||
return err;
|
||||
}
|
||||
|
||||
int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
|
||||
int bpf_trampoline_link_prog(struct bpf_tramp_link *link,
|
||||
struct bpf_trampoline *tr,
|
||||
struct bpf_prog *tgt_prog)
|
||||
{
|
||||
int err;
|
||||
|
||||
mutex_lock(&tr->mutex);
|
||||
err = __bpf_trampoline_link_prog(link, tr);
|
||||
err = __bpf_trampoline_link_prog(link, tr, tgt_prog);
|
||||
mutex_unlock(&tr->mutex);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
|
||||
static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
|
||||
struct bpf_trampoline *tr,
|
||||
struct bpf_prog *tgt_prog)
|
||||
{
|
||||
enum bpf_tramp_prog_type kind;
|
||||
int err;
|
||||
@@ -591,6 +623,8 @@ static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_
|
||||
err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP,
|
||||
tr->extension_prog->bpf_func, NULL);
|
||||
tr->extension_prog = NULL;
|
||||
guard(mutex)(&tgt_prog->aux->ext_mutex);
|
||||
tgt_prog->aux->is_extended = false;
|
||||
return err;
|
||||
}
|
||||
hlist_del_init(&link->tramp_hlist);
|
||||
@@ -599,12 +633,14 @@ static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_
|
||||
}
|
||||
|
||||
/* bpf_trampoline_unlink_prog() should never fail. */
|
||||
int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
|
||||
int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
|
||||
struct bpf_trampoline *tr,
|
||||
struct bpf_prog *tgt_prog)
|
||||
{
|
||||
int err;
|
||||
|
||||
mutex_lock(&tr->mutex);
|
||||
err = __bpf_trampoline_unlink_prog(link, tr);
|
||||
err = __bpf_trampoline_unlink_prog(link, tr, tgt_prog);
|
||||
mutex_unlock(&tr->mutex);
|
||||
return err;
|
||||
}
|
||||
@@ -619,7 +655,7 @@ static void bpf_shim_tramp_link_release(struct bpf_link *link)
|
||||
if (!shim_link->trampoline)
|
||||
return;
|
||||
|
||||
WARN_ON_ONCE(bpf_trampoline_unlink_prog(&shim_link->link, shim_link->trampoline));
|
||||
WARN_ON_ONCE(bpf_trampoline_unlink_prog(&shim_link->link, shim_link->trampoline, NULL));
|
||||
bpf_trampoline_put(shim_link->trampoline);
|
||||
}
|
||||
|
||||
@@ -733,7 +769,7 @@ int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog,
|
||||
goto err;
|
||||
}
|
||||
|
||||
err = __bpf_trampoline_link_prog(&shim_link->link, tr);
|
||||
err = __bpf_trampoline_link_prog(&shim_link->link, tr, NULL);
|
||||
if (err)
|
||||
goto err;
|
||||
|
||||
@@ -868,6 +904,8 @@ static u64 notrace __bpf_prog_enter_recur(struct bpf_prog *prog, struct bpf_tram
|
||||
|
||||
if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) {
|
||||
bpf_prog_inc_misses_counter(prog);
|
||||
if (prog->aux->recursion_detected)
|
||||
prog->aux->recursion_detected(prog);
|
||||
return 0;
|
||||
}
|
||||
return bpf_prog_start_time();
|
||||
@@ -944,6 +982,8 @@ u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog,
|
||||
|
||||
if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) {
|
||||
bpf_prog_inc_misses_counter(prog);
|
||||
if (prog->aux->recursion_detected)
|
||||
prog->aux->recursion_detected(prog);
|
||||
return 0;
|
||||
}
|
||||
return bpf_prog_start_time();
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -802,6 +802,8 @@ struct send_signal_irq_work {
|
||||
struct task_struct *task;
|
||||
u32 sig;
|
||||
enum pid_type type;
|
||||
bool has_siginfo;
|
||||
struct kernel_siginfo info;
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU(struct send_signal_irq_work, send_signal_work);
|
||||
@@ -809,27 +811,46 @@ static DEFINE_PER_CPU(struct send_signal_irq_work, send_signal_work);
|
||||
static void do_bpf_send_signal(struct irq_work *entry)
|
||||
{
|
||||
struct send_signal_irq_work *work;
|
||||
struct kernel_siginfo *siginfo;
|
||||
|
||||
work = container_of(entry, struct send_signal_irq_work, irq_work);
|
||||
group_send_sig_info(work->sig, SEND_SIG_PRIV, work->task, work->type);
|
||||
siginfo = work->has_siginfo ? &work->info : SEND_SIG_PRIV;
|
||||
|
||||
group_send_sig_info(work->sig, siginfo, work->task, work->type);
|
||||
put_task_struct(work->task);
|
||||
}
|
||||
|
||||
static int bpf_send_signal_common(u32 sig, enum pid_type type)
|
||||
static int bpf_send_signal_common(u32 sig, enum pid_type type, struct task_struct *task, u64 value)
|
||||
{
|
||||
struct send_signal_irq_work *work = NULL;
|
||||
struct kernel_siginfo info;
|
||||
struct kernel_siginfo *siginfo;
|
||||
|
||||
if (!task) {
|
||||
task = current;
|
||||
siginfo = SEND_SIG_PRIV;
|
||||
} else {
|
||||
clear_siginfo(&info);
|
||||
info.si_signo = sig;
|
||||
info.si_errno = 0;
|
||||
info.si_code = SI_KERNEL;
|
||||
info.si_pid = 0;
|
||||
info.si_uid = 0;
|
||||
info.si_value.sival_ptr = (void *)(unsigned long)value;
|
||||
siginfo = &info;
|
||||
}
|
||||
|
||||
/* Similar to bpf_probe_write_user, task needs to be
|
||||
* in a sound condition and kernel memory access be
|
||||
* permitted in order to send signal to the current
|
||||
* task.
|
||||
*/
|
||||
if (unlikely(current->flags & (PF_KTHREAD | PF_EXITING)))
|
||||
if (unlikely(task->flags & (PF_KTHREAD | PF_EXITING)))
|
||||
return -EPERM;
|
||||
if (unlikely(!nmi_uaccess_okay()))
|
||||
return -EPERM;
|
||||
/* Task should not be pid=1 to avoid kernel panic. */
|
||||
if (unlikely(is_global_init(current)))
|
||||
if (unlikely(is_global_init(task)))
|
||||
return -EPERM;
|
||||
|
||||
if (irqs_disabled()) {
|
||||
@@ -847,19 +868,22 @@ static int bpf_send_signal_common(u32 sig, enum pid_type type)
|
||||
* to the irq_work. The current task may change when queued
|
||||
* irq works get executed.
|
||||
*/
|
||||
work->task = get_task_struct(current);
|
||||
work->task = get_task_struct(task);
|
||||
work->has_siginfo = siginfo == &info;
|
||||
if (work->has_siginfo)
|
||||
copy_siginfo(&work->info, &info);
|
||||
work->sig = sig;
|
||||
work->type = type;
|
||||
irq_work_queue(&work->irq_work);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return group_send_sig_info(sig, SEND_SIG_PRIV, current, type);
|
||||
return group_send_sig_info(sig, siginfo, task, type);
|
||||
}
|
||||
|
||||
BPF_CALL_1(bpf_send_signal, u32, sig)
|
||||
{
|
||||
return bpf_send_signal_common(sig, PIDTYPE_TGID);
|
||||
return bpf_send_signal_common(sig, PIDTYPE_TGID, NULL, 0);
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_send_signal_proto = {
|
||||
@@ -871,7 +895,7 @@ static const struct bpf_func_proto bpf_send_signal_proto = {
|
||||
|
||||
BPF_CALL_1(bpf_send_signal_thread, u32, sig)
|
||||
{
|
||||
return bpf_send_signal_common(sig, PIDTYPE_PID);
|
||||
return bpf_send_signal_common(sig, PIDTYPE_PID, NULL, 0);
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_send_signal_thread_proto = {
|
||||
@@ -1557,6 +1581,17 @@ static inline bool is_kprobe_session(const struct bpf_prog *prog)
|
||||
return prog->expected_attach_type == BPF_TRACE_KPROBE_SESSION;
|
||||
}
|
||||
|
||||
static inline bool is_uprobe_multi(const struct bpf_prog *prog)
|
||||
{
|
||||
return prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI ||
|
||||
prog->expected_attach_type == BPF_TRACE_UPROBE_SESSION;
|
||||
}
|
||||
|
||||
static inline bool is_uprobe_session(const struct bpf_prog *prog)
|
||||
{
|
||||
return prog->expected_attach_type == BPF_TRACE_UPROBE_SESSION;
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto *
|
||||
kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
{
|
||||
@@ -1574,13 +1609,13 @@ kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
case BPF_FUNC_get_func_ip:
|
||||
if (is_kprobe_multi(prog))
|
||||
return &bpf_get_func_ip_proto_kprobe_multi;
|
||||
if (prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI)
|
||||
if (is_uprobe_multi(prog))
|
||||
return &bpf_get_func_ip_proto_uprobe_multi;
|
||||
return &bpf_get_func_ip_proto_kprobe;
|
||||
case BPF_FUNC_get_attach_cookie:
|
||||
if (is_kprobe_multi(prog))
|
||||
return &bpf_get_attach_cookie_proto_kmulti;
|
||||
if (prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI)
|
||||
if (is_uprobe_multi(prog))
|
||||
return &bpf_get_attach_cookie_proto_umulti;
|
||||
return &bpf_get_attach_cookie_proto_trace;
|
||||
default:
|
||||
@@ -3072,6 +3107,7 @@ struct bpf_uprobe {
|
||||
u64 cookie;
|
||||
struct uprobe *uprobe;
|
||||
struct uprobe_consumer consumer;
|
||||
bool session;
|
||||
};
|
||||
|
||||
struct bpf_uprobe_multi_link {
|
||||
@@ -3084,7 +3120,7 @@ struct bpf_uprobe_multi_link {
|
||||
};
|
||||
|
||||
struct bpf_uprobe_multi_run_ctx {
|
||||
struct bpf_run_ctx run_ctx;
|
||||
struct bpf_session_run_ctx session_ctx;
|
||||
unsigned long entry_ip;
|
||||
struct bpf_uprobe *uprobe;
|
||||
};
|
||||
@@ -3195,17 +3231,22 @@ static const struct bpf_link_ops bpf_uprobe_multi_link_lops = {
|
||||
|
||||
static int uprobe_prog_run(struct bpf_uprobe *uprobe,
|
||||
unsigned long entry_ip,
|
||||
struct pt_regs *regs)
|
||||
struct pt_regs *regs,
|
||||
bool is_return, void *data)
|
||||
{
|
||||
struct bpf_uprobe_multi_link *link = uprobe->link;
|
||||
struct bpf_uprobe_multi_run_ctx run_ctx = {
|
||||
.session_ctx = {
|
||||
.is_return = is_return,
|
||||
.data = data,
|
||||
},
|
||||
.entry_ip = entry_ip,
|
||||
.uprobe = uprobe,
|
||||
};
|
||||
struct bpf_prog *prog = link->link.prog;
|
||||
bool sleepable = prog->sleepable;
|
||||
struct bpf_run_ctx *old_run_ctx;
|
||||
int err = 0;
|
||||
int err;
|
||||
|
||||
if (link->task && !same_thread_group(current, link->task))
|
||||
return 0;
|
||||
@@ -3217,7 +3258,7 @@ static int uprobe_prog_run(struct bpf_uprobe *uprobe,
|
||||
|
||||
migrate_disable();
|
||||
|
||||
old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
|
||||
old_run_ctx = bpf_set_run_ctx(&run_ctx.session_ctx.run_ctx);
|
||||
err = bpf_prog_run(link->link.prog, regs);
|
||||
bpf_reset_run_ctx(old_run_ctx);
|
||||
|
||||
@@ -3244,9 +3285,13 @@ uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs,
|
||||
__u64 *data)
|
||||
{
|
||||
struct bpf_uprobe *uprobe;
|
||||
int ret;
|
||||
|
||||
uprobe = container_of(con, struct bpf_uprobe, consumer);
|
||||
return uprobe_prog_run(uprobe, instruction_pointer(regs), regs);
|
||||
ret = uprobe_prog_run(uprobe, instruction_pointer(regs), regs, false, data);
|
||||
if (uprobe->session)
|
||||
return ret ? UPROBE_HANDLER_IGNORE : 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
@@ -3256,14 +3301,16 @@ uprobe_multi_link_ret_handler(struct uprobe_consumer *con, unsigned long func, s
|
||||
struct bpf_uprobe *uprobe;
|
||||
|
||||
uprobe = container_of(con, struct bpf_uprobe, consumer);
|
||||
return uprobe_prog_run(uprobe, func, regs);
|
||||
uprobe_prog_run(uprobe, func, regs, true, data);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx)
|
||||
{
|
||||
struct bpf_uprobe_multi_run_ctx *run_ctx;
|
||||
|
||||
run_ctx = container_of(current->bpf_ctx, struct bpf_uprobe_multi_run_ctx, run_ctx);
|
||||
run_ctx = container_of(current->bpf_ctx, struct bpf_uprobe_multi_run_ctx,
|
||||
session_ctx.run_ctx);
|
||||
return run_ctx->entry_ip;
|
||||
}
|
||||
|
||||
@@ -3271,7 +3318,8 @@ static u64 bpf_uprobe_multi_cookie(struct bpf_run_ctx *ctx)
|
||||
{
|
||||
struct bpf_uprobe_multi_run_ctx *run_ctx;
|
||||
|
||||
run_ctx = container_of(current->bpf_ctx, struct bpf_uprobe_multi_run_ctx, run_ctx);
|
||||
run_ctx = container_of(current->bpf_ctx, struct bpf_uprobe_multi_run_ctx,
|
||||
session_ctx.run_ctx);
|
||||
return run_ctx->uprobe->cookie;
|
||||
}
|
||||
|
||||
@@ -3295,7 +3343,7 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
|
||||
if (sizeof(u64) != sizeof(void *))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (prog->expected_attach_type != BPF_TRACE_UPROBE_MULTI)
|
||||
if (!is_uprobe_multi(prog))
|
||||
return -EINVAL;
|
||||
|
||||
flags = attr->link_create.uprobe_multi.flags;
|
||||
@@ -3371,11 +3419,12 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
|
||||
|
||||
uprobes[i].link = link;
|
||||
|
||||
if (flags & BPF_F_UPROBE_MULTI_RETURN)
|
||||
uprobes[i].consumer.ret_handler = uprobe_multi_link_ret_handler;
|
||||
else
|
||||
if (!(flags & BPF_F_UPROBE_MULTI_RETURN))
|
||||
uprobes[i].consumer.handler = uprobe_multi_link_handler;
|
||||
|
||||
if (flags & BPF_F_UPROBE_MULTI_RETURN || is_uprobe_session(prog))
|
||||
uprobes[i].consumer.ret_handler = uprobe_multi_link_ret_handler;
|
||||
if (is_uprobe_session(prog))
|
||||
uprobes[i].session = true;
|
||||
if (pid)
|
||||
uprobes[i].consumer.filter = uprobe_multi_link_filter;
|
||||
}
|
||||
@@ -3464,7 +3513,7 @@ static int bpf_kprobe_multi_filter(const struct bpf_prog *prog, u32 kfunc_id)
|
||||
if (!btf_id_set8_contains(&kprobe_multi_kfunc_set_ids, kfunc_id))
|
||||
return 0;
|
||||
|
||||
if (!is_kprobe_session(prog))
|
||||
if (!is_kprobe_session(prog) && !is_uprobe_session(prog))
|
||||
return -EACCES;
|
||||
|
||||
return 0;
|
||||
@@ -3482,3 +3531,16 @@ static int __init bpf_kprobe_multi_kfuncs_init(void)
|
||||
}
|
||||
|
||||
late_initcall(bpf_kprobe_multi_kfuncs_init);
|
||||
|
||||
__bpf_kfunc_start_defs();
|
||||
|
||||
__bpf_kfunc int bpf_send_signal_task(struct task_struct *task, int sig, enum pid_type type,
|
||||
u64 value)
|
||||
{
|
||||
if (type != PIDTYPE_PID && type != PIDTYPE_TGID)
|
||||
return -EINVAL;
|
||||
|
||||
return bpf_send_signal_common(sig, type, task, value);
|
||||
}
|
||||
|
||||
__bpf_kfunc_end_defs();
|
||||
|
||||
Reference in New Issue
Block a user