From f0d74c4da1f060d2a66976193712a5e6abd361f5 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Mon, 26 Sep 2022 11:49:53 -0700 Subject: [PATCH 1/5] bpf: Parameterize task iterators. Allow creating an iterator that loops through resources of one thread/process. People could only create iterators to loop through all resources of files, vma, and tasks in the system, even though they were interested in only the resources of a specific task or process. Passing the additional parameters, people can now create an iterator to go through all resources or only the resources of a task. Signed-off-by: Kui-Feng Lee Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20220926184957.208194-2-kuifeng@fb.com --- include/linux/bpf.h | 25 +++++ include/uapi/linux/bpf.h | 6 ++ kernel/bpf/task_iter.c | 188 +++++++++++++++++++++++++++++---- tools/include/uapi/linux/bpf.h | 6 ++ 4 files changed, 203 insertions(+), 22 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5161fac0513f..0f3eaf3ed98c 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1796,6 +1796,27 @@ int bpf_obj_get_user(const char __user *pathname, int flags); extern int bpf_iter_ ## target(args); \ int __init bpf_iter_ ## target(args) { return 0; } +/* + * The task type of iterators. + * + * For BPF task iterators, they can be parameterized with various + * parameters to visit only some of tasks. + * + * BPF_TASK_ITER_ALL (default) + * Iterate over resources of every task. + * + * BPF_TASK_ITER_TID + * Iterate over resources of a task/tid. + * + * BPF_TASK_ITER_TGID + * Iterate over resources of every task of a process / task group. + */ +enum bpf_iter_task_type { + BPF_TASK_ITER_ALL = 0, + BPF_TASK_ITER_TID, + BPF_TASK_ITER_TGID, +}; + struct bpf_iter_aux_info { /* for map_elem iter */ struct bpf_map *map; @@ -1805,6 +1826,10 @@ struct bpf_iter_aux_info { struct cgroup *start; /* starting cgroup */ enum bpf_cgroup_iter_order order; } cgroup; + struct { + enum bpf_iter_task_type type; + u32 pid; + } task; }; typedef int (*bpf_iter_attach_target_t)(struct bpf_prog *prog, diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d6bd10759eaf..455b21a53aac 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -110,6 +110,12 @@ union bpf_iter_link_info { __u32 cgroup_fd; __u64 cgroup_id; } cgroup; + /* Parameters of task iterators. */ + struct { + __u32 tid; + __u32 pid; + __u32 pid_fd; + } task; }; /* BPF syscall commands, see bpf(2) man-page for more details. */ diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index 8c921799def4..8b2f47e7139d 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -12,6 +12,9 @@ struct bpf_iter_seq_task_common { struct pid_namespace *ns; + enum bpf_iter_task_type type; + u32 pid; + u32 pid_visiting; }; struct bpf_iter_seq_task_info { @@ -22,18 +25,115 @@ struct bpf_iter_seq_task_info { u32 tid; }; -static struct task_struct *task_seq_get_next(struct pid_namespace *ns, +static struct task_struct *task_group_seq_get_next(struct bpf_iter_seq_task_common *common, + u32 *tid, + bool skip_if_dup_files) +{ + struct task_struct *task, *next_task; + struct pid *pid; + u32 saved_tid; + + if (!*tid) { + /* The first time, the iterator calls this function. */ + pid = find_pid_ns(common->pid, common->ns); + if (!pid) + return NULL; + + task = get_pid_task(pid, PIDTYPE_TGID); + if (!task) + return NULL; + + *tid = common->pid; + common->pid_visiting = common->pid; + + return task; + } + + /* If the control returns to user space and comes back to the + * kernel again, *tid and common->pid_visiting should be the + * same for task_seq_start() to pick up the correct task. + */ + if (*tid == common->pid_visiting) { + pid = find_pid_ns(common->pid_visiting, common->ns); + task = get_pid_task(pid, PIDTYPE_PID); + + return task; + } + + pid = find_pid_ns(common->pid_visiting, common->ns); + if (!pid) + return NULL; + + task = get_pid_task(pid, PIDTYPE_PID); + if (!task) + return NULL; + +retry: + if (!pid_alive(task)) { + put_task_struct(task); + return NULL; + } + + next_task = next_thread(task); + put_task_struct(task); + if (!next_task) + return NULL; + + saved_tid = *tid; + *tid = __task_pid_nr_ns(next_task, PIDTYPE_PID, common->ns); + if (!*tid || *tid == common->pid) { + /* Run out of tasks of a process. The tasks of a + * thread_group are linked as circular linked list. + */ + *tid = saved_tid; + return NULL; + } + + get_task_struct(next_task); + common->pid_visiting = *tid; + + if (skip_if_dup_files && task->files == task->group_leader->files) { + task = next_task; + goto retry; + } + + return next_task; +} + +static struct task_struct *task_seq_get_next(struct bpf_iter_seq_task_common *common, u32 *tid, bool skip_if_dup_files) { struct task_struct *task = NULL; struct pid *pid; + if (common->type == BPF_TASK_ITER_TID) { + if (*tid && *tid != common->pid) + return NULL; + rcu_read_lock(); + pid = find_pid_ns(common->pid, common->ns); + if (pid) { + task = get_pid_task(pid, PIDTYPE_TGID); + *tid = common->pid; + } + rcu_read_unlock(); + + return task; + } + + if (common->type == BPF_TASK_ITER_TGID) { + rcu_read_lock(); + task = task_group_seq_get_next(common, tid, skip_if_dup_files); + rcu_read_unlock(); + + return task; + } + rcu_read_lock(); retry: - pid = find_ge_pid(*tid, ns); + pid = find_ge_pid(*tid, common->ns); if (pid) { - *tid = pid_nr_ns(pid, ns); + *tid = pid_nr_ns(pid, common->ns); task = get_pid_task(pid, PIDTYPE_PID); if (!task) { ++*tid; @@ -56,7 +156,7 @@ static void *task_seq_start(struct seq_file *seq, loff_t *pos) struct bpf_iter_seq_task_info *info = seq->private; struct task_struct *task; - task = task_seq_get_next(info->common.ns, &info->tid, false); + task = task_seq_get_next(&info->common, &info->tid, false); if (!task) return NULL; @@ -73,7 +173,7 @@ static void *task_seq_next(struct seq_file *seq, void *v, loff_t *pos) ++*pos; ++info->tid; put_task_struct((struct task_struct *)v); - task = task_seq_get_next(info->common.ns, &info->tid, false); + task = task_seq_get_next(&info->common, &info->tid, false); if (!task) return NULL; @@ -117,6 +217,41 @@ static void task_seq_stop(struct seq_file *seq, void *v) put_task_struct((struct task_struct *)v); } +static int bpf_iter_attach_task(struct bpf_prog *prog, + union bpf_iter_link_info *linfo, + struct bpf_iter_aux_info *aux) +{ + unsigned int flags; + struct pid *pid; + pid_t tgid; + + if ((!!linfo->task.tid + !!linfo->task.pid + !!linfo->task.pid_fd) > 1) + return -EINVAL; + + aux->task.type = BPF_TASK_ITER_ALL; + if (linfo->task.tid != 0) { + aux->task.type = BPF_TASK_ITER_TID; + aux->task.pid = linfo->task.tid; + } + if (linfo->task.pid != 0) { + aux->task.type = BPF_TASK_ITER_TGID; + aux->task.pid = linfo->task.pid; + } + if (linfo->task.pid_fd != 0) { + aux->task.type = BPF_TASK_ITER_TGID; + + pid = pidfd_get_pid(linfo->task.pid_fd, &flags); + if (IS_ERR(pid)) + return PTR_ERR(pid); + + tgid = pid_nr_ns(pid, task_active_pid_ns(current)); + aux->task.pid = tgid; + put_pid(pid); + } + + return 0; +} + static const struct seq_operations task_seq_ops = { .start = task_seq_start, .next = task_seq_next, @@ -137,8 +272,7 @@ struct bpf_iter_seq_task_file_info { static struct file * task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info) { - struct pid_namespace *ns = info->common.ns; - u32 curr_tid = info->tid; + u32 saved_tid = info->tid; struct task_struct *curr_task; unsigned int curr_fd = info->fd; @@ -151,21 +285,18 @@ again: curr_task = info->task; curr_fd = info->fd; } else { - curr_task = task_seq_get_next(ns, &curr_tid, true); + curr_task = task_seq_get_next(&info->common, &info->tid, true); if (!curr_task) { info->task = NULL; - info->tid = curr_tid; return NULL; } - /* set info->task and info->tid */ + /* set info->task */ info->task = curr_task; - if (curr_tid == info->tid) { + if (saved_tid == info->tid) curr_fd = info->fd; - } else { - info->tid = curr_tid; + else curr_fd = 0; - } } rcu_read_lock(); @@ -186,9 +317,15 @@ again: /* the current task is done, go to the next task */ rcu_read_unlock(); put_task_struct(curr_task); + + if (info->common.type == BPF_TASK_ITER_TID) { + info->task = NULL; + return NULL; + } + info->task = NULL; info->fd = 0; - curr_tid = ++(info->tid); + saved_tid = ++(info->tid); goto again; } @@ -269,6 +406,9 @@ static int init_seq_pidns(void *priv_data, struct bpf_iter_aux_info *aux) struct bpf_iter_seq_task_common *common = priv_data; common->ns = get_pid_ns(task_active_pid_ns(current)); + common->type = aux->task.type; + common->pid = aux->task.pid; + return 0; } @@ -307,11 +447,10 @@ enum bpf_task_vma_iter_find_op { static struct vm_area_struct * task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info) { - struct pid_namespace *ns = info->common.ns; enum bpf_task_vma_iter_find_op op; struct vm_area_struct *curr_vma; struct task_struct *curr_task; - u32 curr_tid = info->tid; + u32 saved_tid = info->tid; /* If this function returns a non-NULL vma, it holds a reference to * the task_struct, and holds read lock on vma->mm->mmap_lock. @@ -371,14 +510,13 @@ task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info) } } else { again: - curr_task = task_seq_get_next(ns, &curr_tid, true); + curr_task = task_seq_get_next(&info->common, &info->tid, true); if (!curr_task) { - info->tid = curr_tid + 1; + info->tid++; goto finish; } - if (curr_tid != info->tid) { - info->tid = curr_tid; + if (saved_tid != info->tid) { /* new task, process the first vma */ op = task_vma_iter_first_vma; } else { @@ -430,9 +568,12 @@ again: return curr_vma; next_task: + if (info->common.type == BPF_TASK_ITER_TID) + goto finish; + put_task_struct(curr_task); info->task = NULL; - curr_tid++; + info->tid++; goto again; finish: @@ -533,6 +674,7 @@ static const struct bpf_iter_seq_info task_seq_info = { static struct bpf_iter_reg task_reg_info = { .target = "task", + .attach_target = bpf_iter_attach_task, .feature = BPF_ITER_RESCHED, .ctx_arg_info_size = 1, .ctx_arg_info = { @@ -551,6 +693,7 @@ static const struct bpf_iter_seq_info task_file_seq_info = { static struct bpf_iter_reg task_file_reg_info = { .target = "task_file", + .attach_target = bpf_iter_attach_task, .feature = BPF_ITER_RESCHED, .ctx_arg_info_size = 2, .ctx_arg_info = { @@ -571,6 +714,7 @@ static const struct bpf_iter_seq_info task_vma_seq_info = { static struct bpf_iter_reg task_vma_reg_info = { .target = "task_vma", + .attach_target = bpf_iter_attach_task, .feature = BPF_ITER_RESCHED, .ctx_arg_info_size = 2, .ctx_arg_info = { diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index d6bd10759eaf..455b21a53aac 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -110,6 +110,12 @@ union bpf_iter_link_info { __u32 cgroup_fd; __u64 cgroup_id; } cgroup; + /* Parameters of task iterators. */ + struct { + __u32 tid; + __u32 pid; + __u32 pid_fd; + } task; }; /* BPF syscall commands, see bpf(2) man-page for more details. */ From 21fb6f2aa3890b0d0abf88b7756d0098e9367a7c Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Mon, 26 Sep 2022 11:49:54 -0700 Subject: [PATCH 2/5] bpf: Handle bpf_link_info for the parameterized task BPF iterators. Add new fields to bpf_link_info that users can query it through bpf_obj_get_info_by_fd(). Signed-off-by: Kui-Feng Lee Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20220926184957.208194-3-kuifeng@fb.com --- include/uapi/linux/bpf.h | 4 ++++ kernel/bpf/task_iter.c | 18 ++++++++++++++++++ tools/include/uapi/linux/bpf.h | 4 ++++ 3 files changed, 26 insertions(+) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 455b21a53aac..3075018a4ef8 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6265,6 +6265,10 @@ struct bpf_link_info { __u64 cgroup_id; __u32 order; } cgroup; + struct { + __u32 tid; + __u32 pid; + } task; }; } iter; struct { diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index 8b2f47e7139d..46f836be22e2 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -672,6 +672,21 @@ static const struct bpf_iter_seq_info task_seq_info = { .seq_priv_size = sizeof(struct bpf_iter_seq_task_info), }; +static int bpf_iter_fill_link_info(const struct bpf_iter_aux_info *aux, struct bpf_link_info *info) +{ + switch (aux->task.type) { + case BPF_TASK_ITER_TID: + info->iter.task.tid = aux->task.pid; + break; + case BPF_TASK_ITER_TGID: + info->iter.task.pid = aux->task.pid; + break; + default: + break; + } + return 0; +} + static struct bpf_iter_reg task_reg_info = { .target = "task", .attach_target = bpf_iter_attach_task, @@ -682,6 +697,7 @@ static struct bpf_iter_reg task_reg_info = { PTR_TO_BTF_ID_OR_NULL }, }, .seq_info = &task_seq_info, + .fill_link_info = bpf_iter_fill_link_info, }; static const struct bpf_iter_seq_info task_file_seq_info = { @@ -703,6 +719,7 @@ static struct bpf_iter_reg task_file_reg_info = { PTR_TO_BTF_ID_OR_NULL }, }, .seq_info = &task_file_seq_info, + .fill_link_info = bpf_iter_fill_link_info, }; static const struct bpf_iter_seq_info task_vma_seq_info = { @@ -724,6 +741,7 @@ static struct bpf_iter_reg task_vma_reg_info = { PTR_TO_BTF_ID_OR_NULL }, }, .seq_info = &task_vma_seq_info, + .fill_link_info = bpf_iter_fill_link_info, }; BPF_CALL_5(bpf_find_vma, struct task_struct *, task, u64, start, diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 455b21a53aac..3075018a4ef8 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -6265,6 +6265,10 @@ struct bpf_link_info { __u64 cgroup_id; __u32 order; } cgroup; + struct { + __u32 tid; + __u32 pid; + } task; }; } iter; struct { From 2c4fe44fb020f3cce904da2ba9e42bb1c118e8a3 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Mon, 26 Sep 2022 11:49:55 -0700 Subject: [PATCH 3/5] bpf: Handle show_fdinfo for the parameterized task BPF iterators Show information of iterators in the respective files under /proc//fdinfo/. For example, for a task file iterator with 1723 as the value of tid parameter, its fdinfo would look like the following lines. pos: 0 flags: 02000000 mnt_id: 14 ino: 38 link_type: iter link_id: 51 prog_tag: a590ac96db22b825 prog_id: 299 target_name: task_file task_type: TID tid: 1723 This patch add the last three fields. task_type is the type of the task parameter. TID means the iterator visit only the thread specified by tid. The value of tid in the above example is 1723. For the case of PID task_type, it means the iterator visits only threads of a process and will show the pid value of the process instead of a tid. Signed-off-by: Kui-Feng Lee Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20220926184957.208194-4-kuifeng@fb.com --- kernel/bpf/task_iter.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index 46f836be22e2..67e03e1833ba 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -10,6 +10,12 @@ #include #include "mmap_unlock_work.h" +static const char * const iter_task_type_names[] = { + "ALL", + "TID", + "PID", +}; + struct bpf_iter_seq_task_common { struct pid_namespace *ns; enum bpf_iter_task_type type; @@ -687,6 +693,15 @@ static int bpf_iter_fill_link_info(const struct bpf_iter_aux_info *aux, struct b return 0; } +static void bpf_iter_task_show_fdinfo(const struct bpf_iter_aux_info *aux, struct seq_file *seq) +{ + seq_printf(seq, "task_type:\t%s\n", iter_task_type_names[aux->task.type]); + if (aux->task.type == BPF_TASK_ITER_TID) + seq_printf(seq, "tid:\t%u\n", aux->task.pid); + else if (aux->task.type == BPF_TASK_ITER_TGID) + seq_printf(seq, "pid:\t%u\n", aux->task.pid); +} + static struct bpf_iter_reg task_reg_info = { .target = "task", .attach_target = bpf_iter_attach_task, @@ -698,6 +713,7 @@ static struct bpf_iter_reg task_reg_info = { }, .seq_info = &task_seq_info, .fill_link_info = bpf_iter_fill_link_info, + .show_fdinfo = bpf_iter_task_show_fdinfo, }; static const struct bpf_iter_seq_info task_file_seq_info = { @@ -720,6 +736,7 @@ static struct bpf_iter_reg task_file_reg_info = { }, .seq_info = &task_file_seq_info, .fill_link_info = bpf_iter_fill_link_info, + .show_fdinfo = bpf_iter_task_show_fdinfo, }; static const struct bpf_iter_seq_info task_vma_seq_info = { @@ -742,6 +759,7 @@ static struct bpf_iter_reg task_vma_reg_info = { }, .seq_info = &task_vma_seq_info, .fill_link_info = bpf_iter_fill_link_info, + .show_fdinfo = bpf_iter_task_show_fdinfo, }; BPF_CALL_5(bpf_find_vma, struct task_struct *, task, u64, start, From b3e1331eb925a45df1cc5d02a725e5ea70da0e2e Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Mon, 26 Sep 2022 11:49:56 -0700 Subject: [PATCH 4/5] selftests/bpf: Test parameterized task BPF iterators. Test iterators of vma, files and tasks. Ensure the API works appropriately to visit all tasks, tasks in a process, or a particular task. Signed-off-by: Kui-Feng Lee Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20220926184957.208194-5-kuifeng@fb.com --- .../selftests/bpf/prog_tests/bpf_iter.c | 282 ++++++++++++++++-- .../selftests/bpf/prog_tests/btf_dump.c | 2 +- .../selftests/bpf/progs/bpf_iter_task.c | 9 + .../selftests/bpf/progs/bpf_iter_task_file.c | 9 +- .../selftests/bpf/progs/bpf_iter_task_vma.c | 7 +- .../selftests/bpf/progs/bpf_iter_vma_offset.c | 37 +++ 6 files changed, 322 insertions(+), 24 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_vma_offset.c diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index e89685bd587c..3369c5ec3a17 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ #include +#include +#include #include "bpf_iter_ipv6_route.skel.h" #include "bpf_iter_netlink.skel.h" #include "bpf_iter_bpf_map.skel.h" @@ -14,6 +16,7 @@ #include "bpf_iter_udp4.skel.h" #include "bpf_iter_udp6.skel.h" #include "bpf_iter_unix.skel.h" +#include "bpf_iter_vma_offset.skel.h" #include "bpf_iter_test_kern1.skel.h" #include "bpf_iter_test_kern2.skel.h" #include "bpf_iter_test_kern3.skel.h" @@ -43,13 +46,13 @@ static void test_btf_id_or_null(void) } } -static void do_dummy_read(struct bpf_program *prog) +static void do_dummy_read_opts(struct bpf_program *prog, struct bpf_iter_attach_opts *opts) { struct bpf_link *link; char buf[16] = {}; int iter_fd, len; - link = bpf_program__attach_iter(prog, NULL); + link = bpf_program__attach_iter(prog, opts); if (!ASSERT_OK_PTR(link, "attach_iter")) return; @@ -68,6 +71,11 @@ free_link: bpf_link__destroy(link); } +static void do_dummy_read(struct bpf_program *prog) +{ + do_dummy_read_opts(prog, NULL); +} + static void do_read_map_iter_fd(struct bpf_object_skeleton **skel, struct bpf_program *prog, struct bpf_map *map) { @@ -167,19 +175,140 @@ static void test_bpf_map(void) bpf_iter_bpf_map__destroy(skel); } -static void test_task(void) +static int pidfd_open(pid_t pid, unsigned int flags) +{ + return syscall(SYS_pidfd_open, pid, flags); +} + +static void check_bpf_link_info(const struct bpf_program *prog) +{ + LIBBPF_OPTS(bpf_iter_attach_opts, opts); + union bpf_iter_link_info linfo; + struct bpf_link_info info = {}; + struct bpf_link *link; + __u32 info_len; + int err; + + memset(&linfo, 0, sizeof(linfo)); + linfo.task.tid = getpid(); + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); + + link = bpf_program__attach_iter(prog, &opts); + if (!ASSERT_OK_PTR(link, "attach_iter")) + return; + + info_len = sizeof(info); + err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &info, &info_len); + ASSERT_OK(err, "bpf_obj_get_info_by_fd"); + ASSERT_EQ(info.iter.task.tid, getpid(), "check_task_tid"); + + bpf_link__destroy(link); +} + +static pthread_mutex_t do_nothing_mutex; + +static void *do_nothing_wait(void *arg) +{ + pthread_mutex_lock(&do_nothing_mutex); + pthread_mutex_unlock(&do_nothing_mutex); + + pthread_exit(arg); +} + +static void test_task_common_nocheck(struct bpf_iter_attach_opts *opts, + int *num_unknown, int *num_known) { struct bpf_iter_task *skel; + pthread_t thread_id; + void *ret; skel = bpf_iter_task__open_and_load(); if (!ASSERT_OK_PTR(skel, "bpf_iter_task__open_and_load")) return; - do_dummy_read(skel->progs.dump_task); + ASSERT_OK(pthread_mutex_lock(&do_nothing_mutex), "pthread_mutex_lock"); + + ASSERT_OK(pthread_create(&thread_id, NULL, &do_nothing_wait, NULL), + "pthread_create"); + + skel->bss->tid = getpid(); + + do_dummy_read_opts(skel->progs.dump_task, opts); + + *num_unknown = skel->bss->num_unknown_tid; + *num_known = skel->bss->num_known_tid; + + ASSERT_OK(pthread_mutex_unlock(&do_nothing_mutex), "pthread_mutex_unlock"); + ASSERT_FALSE(pthread_join(thread_id, &ret) || ret != NULL, + "pthread_join"); bpf_iter_task__destroy(skel); } +static void test_task_common(struct bpf_iter_attach_opts *opts, int num_unknown, int num_known) +{ + int num_unknown_tid, num_known_tid; + + test_task_common_nocheck(opts, &num_unknown_tid, &num_known_tid); + ASSERT_EQ(num_unknown_tid, num_unknown, "check_num_unknown_tid"); + ASSERT_EQ(num_known_tid, num_known, "check_num_known_tid"); +} + +static void test_task_tid(void) +{ + LIBBPF_OPTS(bpf_iter_attach_opts, opts); + union bpf_iter_link_info linfo; + int num_unknown_tid, num_known_tid; + + memset(&linfo, 0, sizeof(linfo)); + linfo.task.tid = getpid(); + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); + test_task_common(&opts, 0, 1); + + linfo.task.tid = 0; + linfo.task.pid = getpid(); + test_task_common(&opts, 1, 1); + + test_task_common_nocheck(NULL, &num_unknown_tid, &num_known_tid); + ASSERT_GT(num_unknown_tid, 1, "check_num_unknown_tid"); + ASSERT_EQ(num_known_tid, 1, "check_num_known_tid"); +} + +static void test_task_pid(void) +{ + LIBBPF_OPTS(bpf_iter_attach_opts, opts); + union bpf_iter_link_info linfo; + + memset(&linfo, 0, sizeof(linfo)); + linfo.task.pid = getpid(); + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); + + test_task_common(&opts, 1, 1); +} + +static void test_task_pidfd(void) +{ + LIBBPF_OPTS(bpf_iter_attach_opts, opts); + union bpf_iter_link_info linfo; + int pidfd; + + pidfd = pidfd_open(getpid(), 0); + if (!ASSERT_GT(pidfd, 0, "pidfd_open")) + return; + + memset(&linfo, 0, sizeof(linfo)); + linfo.task.pid_fd = pidfd; + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); + + test_task_common(&opts, 1, 1); + + close(pidfd); +} + static void test_task_sleepable(void) { struct bpf_iter_task *skel; @@ -212,14 +341,11 @@ static void test_task_stack(void) bpf_iter_task_stack__destroy(skel); } -static void *do_nothing(void *arg) -{ - pthread_exit(arg); -} - static void test_task_file(void) { + LIBBPF_OPTS(bpf_iter_attach_opts, opts); struct bpf_iter_task_file *skel; + union bpf_iter_link_info linfo; pthread_t thread_id; void *ret; @@ -229,19 +355,36 @@ static void test_task_file(void) skel->bss->tgid = getpid(); - if (!ASSERT_OK(pthread_create(&thread_id, NULL, &do_nothing, NULL), - "pthread_create")) - goto done; + ASSERT_OK(pthread_mutex_lock(&do_nothing_mutex), "pthread_mutex_lock"); + + ASSERT_OK(pthread_create(&thread_id, NULL, &do_nothing_wait, NULL), + "pthread_create"); + + memset(&linfo, 0, sizeof(linfo)); + linfo.task.tid = getpid(); + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); + + do_dummy_read_opts(skel->progs.dump_task_file, &opts); + + ASSERT_EQ(skel->bss->count, 0, "check_count"); + ASSERT_EQ(skel->bss->unique_tgid_count, 1, "check_unique_tgid_count"); + + skel->bss->last_tgid = 0; + skel->bss->count = 0; + skel->bss->unique_tgid_count = 0; do_dummy_read(skel->progs.dump_task_file); - if (!ASSERT_FALSE(pthread_join(thread_id, &ret) || ret != NULL, - "pthread_join")) - goto done; - ASSERT_EQ(skel->bss->count, 0, "check_count"); + ASSERT_GT(skel->bss->unique_tgid_count, 1, "check_unique_tgid_count"); + + check_bpf_link_info(skel->progs.dump_task_file); + + ASSERT_OK(pthread_mutex_unlock(&do_nothing_mutex), "pthread_mutex_unlock"); + ASSERT_OK(pthread_join(thread_id, &ret), "pthread_join"); + ASSERT_NULL(ret, "pthread_join"); -done: bpf_iter_task_file__destroy(skel); } @@ -1249,7 +1392,7 @@ static void str_strip_first_line(char *str) *dst = '\0'; } -static void test_task_vma(void) +static void test_task_vma_common(struct bpf_iter_attach_opts *opts) { int err, iter_fd = -1, proc_maps_fd = -1; struct bpf_iter_task_vma *skel; @@ -1261,13 +1404,14 @@ static void test_task_vma(void) return; skel->bss->pid = getpid(); + skel->bss->one_task = opts ? 1 : 0; err = bpf_iter_task_vma__load(skel); if (!ASSERT_OK(err, "bpf_iter_task_vma__load")) goto out; skel->links.proc_maps = bpf_program__attach_iter( - skel->progs.proc_maps, NULL); + skel->progs.proc_maps, opts); if (!ASSERT_OK_PTR(skel->links.proc_maps, "bpf_program__attach_iter")) { skel->links.proc_maps = NULL; @@ -1291,6 +1435,8 @@ static void test_task_vma(void) goto out; len += err; } + if (opts) + ASSERT_EQ(skel->bss->one_task_error, 0, "unexpected task"); /* read CMP_BUFFER_SIZE (1kB) from /proc/pid/maps */ snprintf(maps_path, 64, "/proc/%u/maps", skel->bss->pid); @@ -1306,6 +1452,9 @@ static void test_task_vma(void) str_strip_first_line(proc_maps_output); ASSERT_STREQ(task_vma_output, proc_maps_output, "compare_output"); + + check_bpf_link_info(skel->progs.proc_maps); + out: close(proc_maps_fd); close(iter_fd); @@ -1325,8 +1474,93 @@ void test_bpf_sockmap_map_iter_fd(void) bpf_iter_sockmap__destroy(skel); } +static void test_task_vma(void) +{ + LIBBPF_OPTS(bpf_iter_attach_opts, opts); + union bpf_iter_link_info linfo; + + memset(&linfo, 0, sizeof(linfo)); + linfo.task.tid = getpid(); + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); + + test_task_vma_common(&opts); + test_task_vma_common(NULL); +} + +/* uprobe attach point */ +static noinline int trigger_func(int arg) +{ + asm volatile (""); + return arg + 1; +} + +static void test_task_vma_offset_common(struct bpf_iter_attach_opts *opts, bool one_proc) +{ + struct bpf_iter_vma_offset *skel; + struct bpf_link *link; + char buf[16] = {}; + int iter_fd, len; + int pgsz, shift; + + skel = bpf_iter_vma_offset__open_and_load(); + if (!ASSERT_OK_PTR(skel, "bpf_iter_vma_offset__open_and_load")) + return; + + skel->bss->pid = getpid(); + skel->bss->address = (uintptr_t)trigger_func; + for (pgsz = getpagesize(), shift = 0; pgsz > 1; pgsz >>= 1, shift++) + ; + skel->bss->page_shift = shift; + + link = bpf_program__attach_iter(skel->progs.get_vma_offset, opts); + if (!ASSERT_OK_PTR(link, "attach_iter")) + return; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (!ASSERT_GT(iter_fd, 0, "create_iter")) + goto exit; + + while ((len = read(iter_fd, buf, sizeof(buf))) > 0) + ; + buf[15] = 0; + ASSERT_EQ(strcmp(buf, "OK\n"), 0, "strcmp"); + + ASSERT_EQ(skel->bss->offset, get_uprobe_offset(trigger_func), "offset"); + if (one_proc) + ASSERT_EQ(skel->bss->unique_tgid_cnt, 1, "unique_tgid_count"); + else + ASSERT_GT(skel->bss->unique_tgid_cnt, 1, "unique_tgid_count"); + + close(iter_fd); + +exit: + bpf_link__destroy(link); +} + +static void test_task_vma_offset(void) +{ + LIBBPF_OPTS(bpf_iter_attach_opts, opts); + union bpf_iter_link_info linfo; + + memset(&linfo, 0, sizeof(linfo)); + linfo.task.pid = getpid(); + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); + + test_task_vma_offset_common(&opts, true); + + linfo.task.pid = 0; + linfo.task.tid = getpid(); + test_task_vma_offset_common(&opts, true); + + test_task_vma_offset_common(NULL, false); +} + void test_bpf_iter(void) { + ASSERT_OK(pthread_mutex_init(&do_nothing_mutex, NULL), "pthread_mutex_init"); + if (test__start_subtest("btf_id_or_null")) test_btf_id_or_null(); if (test__start_subtest("ipv6_route")) @@ -1335,8 +1569,12 @@ void test_bpf_iter(void) test_netlink(); if (test__start_subtest("bpf_map")) test_bpf_map(); - if (test__start_subtest("task")) - test_task(); + if (test__start_subtest("task_tid")) + test_task_tid(); + if (test__start_subtest("task_pid")) + test_task_pid(); + if (test__start_subtest("task_pidfd")) + test_task_pidfd(); if (test__start_subtest("task_sleepable")) test_task_sleepable(); if (test__start_subtest("task_stack")) @@ -1397,4 +1635,6 @@ void test_bpf_iter(void) test_ksym_iter(); if (test__start_subtest("bpf_sockmap_map_iter_fd")) test_bpf_sockmap_map_iter_fd(); + if (test__start_subtest("vma_offset")) + test_task_vma_offset(); } diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c index b1ca954ed1e5..24da335482d4 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c @@ -764,7 +764,7 @@ static void test_btf_dump_struct_data(struct btf *btf, struct btf_dump *d, /* union with nested struct */ TEST_BTF_DUMP_DATA(btf, d, "union", str, union bpf_iter_link_info, BTF_F_COMPACT, - "(union bpf_iter_link_info){.map = (struct){.map_fd = (__u32)1,},.cgroup = (struct){.order = (enum bpf_cgroup_iter_order)BPF_CGROUP_ITER_SELF_ONLY,.cgroup_fd = (__u32)1,},}", + "(union bpf_iter_link_info){.map = (struct){.map_fd = (__u32)1,},.cgroup = (struct){.order = (enum bpf_cgroup_iter_order)BPF_CGROUP_ITER_SELF_ONLY,.cgroup_fd = (__u32)1,},.task = (struct){.tid = (__u32)1,.pid = (__u32)1,},}", { .cgroup = { .order = 1, .cgroup_fd = 1, }}); /* struct skb with nested structs/unions; because type output is so diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_task.c index d22741272692..96131b9a1caa 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task.c @@ -6,6 +6,10 @@ char _license[] SEC("license") = "GPL"; +uint32_t tid = 0; +int num_unknown_tid = 0; +int num_known_tid = 0; + SEC("iter/task") int dump_task(struct bpf_iter__task *ctx) { @@ -18,6 +22,11 @@ int dump_task(struct bpf_iter__task *ctx) return 0; } + if (task->pid != tid) + num_unknown_tid++; + else + num_known_tid++; + if (ctx->meta->seq_num == 0) BPF_SEQ_PRINTF(seq, " tgid gid\n"); diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c index 6e7b400888fe..b0255080662d 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c @@ -7,14 +7,16 @@ char _license[] SEC("license") = "GPL"; int count = 0; int tgid = 0; +int last_tgid = 0; +int unique_tgid_count = 0; SEC("iter/task_file") int dump_task_file(struct bpf_iter__task_file *ctx) { struct seq_file *seq = ctx->meta->seq; struct task_struct *task = ctx->task; - __u32 fd = ctx->fd; struct file *file = ctx->file; + __u32 fd = ctx->fd; if (task == (void *)0 || file == (void *)0) return 0; @@ -27,6 +29,11 @@ int dump_task_file(struct bpf_iter__task_file *ctx) if (tgid == task->tgid && task->tgid != task->pid) count++; + if (last_tgid != task->tgid) { + last_tgid = task->tgid; + unique_tgid_count++; + } + BPF_SEQ_PRINTF(seq, "%8d %8d %8d %lx\n", task->tgid, task->pid, fd, (long)file->f_op); return 0; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c index 4ea6a37d1345..dd923dc637d5 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c @@ -20,6 +20,8 @@ char _license[] SEC("license") = "GPL"; #define D_PATH_BUF_SIZE 1024 char d_path_buf[D_PATH_BUF_SIZE] = {}; __u32 pid = 0; +__u32 one_task = 0; +__u32 one_task_error = 0; SEC("iter/task_vma") int proc_maps(struct bpf_iter__task_vma *ctx) { @@ -33,8 +35,11 @@ SEC("iter/task_vma") int proc_maps(struct bpf_iter__task_vma *ctx) return 0; file = vma->vm_file; - if (task->tgid != pid) + if (task->tgid != pid) { + if (one_task) + one_task_error = 1; return 0; + } perm_str[0] = (vma->vm_flags & VM_READ) ? 'r' : '-'; perm_str[1] = (vma->vm_flags & VM_WRITE) ? 'w' : '-'; perm_str[2] = (vma->vm_flags & VM_EXEC) ? 'x' : '-'; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_vma_offset.c b/tools/testing/selftests/bpf/progs/bpf_iter_vma_offset.c new file mode 100644 index 000000000000..ee7455d2623a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_vma_offset.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ +#include "bpf_iter.h" +#include + +char _license[] SEC("license") = "GPL"; + +__u32 unique_tgid_cnt = 0; +uintptr_t address = 0; +uintptr_t offset = 0; +__u32 last_tgid = 0; +__u32 pid = 0; +__u32 page_shift = 0; + +SEC("iter/task_vma") +int get_vma_offset(struct bpf_iter__task_vma *ctx) +{ + struct vm_area_struct *vma = ctx->vma; + struct seq_file *seq = ctx->meta->seq; + struct task_struct *task = ctx->task; + + if (task == NULL || vma == NULL) + return 0; + + if (last_tgid != task->tgid) + unique_tgid_cnt++; + last_tgid = task->tgid; + + if (task->tgid != pid) + return 0; + + if (vma->vm_start <= address && vma->vm_end > address) { + offset = address - vma->vm_start + (vma->vm_pgoff << page_shift); + BPF_SEQ_PRINTF(seq, "OK\n"); + } + return 0; +} From 6bdb6d6be019f697296f52c37865dd7b0ce80750 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Mon, 26 Sep 2022 11:49:57 -0700 Subject: [PATCH 5/5] bpftool: Show parameters of BPF task iterators. Show tid or pid of iterators if giving an argument of tid or pid For example, the command `bpftool link list` may list following lines. 1: iter prog 2 target_name bpf_map 2: iter prog 3 target_name bpf_prog 33: iter prog 225 target_name task_file tid 1644 pids test_progs(1644) Link 33 is a task_file iterator with tid 1644. For now, only targets of task, task_file and task_vma may be with tid or pid to filter out tasks other than those belonging to a process (pid) or a thread (tid). Signed-off-by: Kui-Feng Lee Signed-off-by: Andrii Nakryiko Acked-by: Quentin Monnet Acked-by: Yonghong Song Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20220926184957.208194-6-kuifeng@fb.com --- tools/bpf/bpftool/link.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index ef0dc2f8d5a2..2863639706dd 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -106,6 +106,13 @@ static const char *cgroup_order_string(__u32 order) } } +static bool is_iter_task_target(const char *target_name) +{ + return strcmp(target_name, "task") == 0 || + strcmp(target_name, "task_file") == 0 || + strcmp(target_name, "task_vma") == 0; +} + static void show_iter_json(struct bpf_link_info *info, json_writer_t *wtr) { const char *target_name = u64_to_ptr(info->iter.target_name); @@ -114,6 +121,12 @@ static void show_iter_json(struct bpf_link_info *info, json_writer_t *wtr) if (is_iter_map_target(target_name)) jsonw_uint_field(wtr, "map_id", info->iter.map.map_id); + else if (is_iter_task_target(target_name)) { + if (info->iter.task.tid) + jsonw_uint_field(wtr, "tid", info->iter.task.tid); + else if (info->iter.task.pid) + jsonw_uint_field(wtr, "pid", info->iter.task.pid); + } if (is_iter_cgroup_target(target_name)) { jsonw_lluint_field(wtr, "cgroup_id", info->iter.cgroup.cgroup_id); @@ -237,6 +250,12 @@ static void show_iter_plain(struct bpf_link_info *info) if (is_iter_map_target(target_name)) printf("map_id %u ", info->iter.map.map_id); + else if (is_iter_task_target(target_name)) { + if (info->iter.task.tid) + printf("tid %u ", info->iter.task.tid); + else if (info->iter.task.pid) + printf("pid %u ", info->iter.task.pid); + } if (is_iter_cgroup_target(target_name)) { printf("cgroup_id %llu ", info->iter.cgroup.cgroup_id);