linux

Go to file

Leon Hwang 116e04ba14 bpf, x64: Fix tailcall hierarchy

This patch fixes a tailcall issue caused by abusing the tailcall in
bpf2bpf feature.

As we know, tail_call_cnt propagates by rax from caller to callee when
to call subprog in tailcall context. But, like the following example,
MAX_TAIL_CALL_CNT won't work because of missing tail_call_cnt
back-propagation from callee to caller.

\#include <linux/bpf.h>
\#include <bpf/bpf_helpers.h>
\#include "bpf_legacy.h"

struct {
	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
	__uint(max_entries, 1);
	__uint(key_size, sizeof(__u32));
	__uint(value_size, sizeof(__u32));
} jmp_table SEC(".maps");

int count = 0;

static __noinline
int subprog_tail1(struct __sk_buff *skb)
{
	bpf_tail_call_static(skb, &jmp_table, 0);
	return 0;
}

static __noinline
int subprog_tail2(struct __sk_buff *skb)
{
	bpf_tail_call_static(skb, &jmp_table, 0);
	return 0;
}

SEC("tc")
int entry(struct __sk_buff *skb)
{
	volatile int ret = 1;

	count++;
	subprog_tail1(skb);
	subprog_tail2(skb);

	return ret;
}

char __license[] SEC("license") = "GPL";

At run time, the tail_call_cnt in entry() will be propagated to
subprog_tail1() and subprog_tail2(). But, when the tail_call_cnt in
subprog_tail1() updates when bpf_tail_call_static(), the tail_call_cnt
in entry() won't be updated at the same time. As a result, in entry(),
when tail_call_cnt in entry() is less than MAX_TAIL_CALL_CNT and
subprog_tail1() returns because of MAX_TAIL_CALL_CNT limit,
bpf_tail_call_static() in suprog_tail2() is able to run because the
tail_call_cnt in subprog_tail2() propagated from entry() is less than
MAX_TAIL_CALL_CNT.

So, how many tailcalls are there for this case if no error happens?

From top-down view, does it look like hierarchy layer and layer?

With this view, there will be 2+4+8+...+2^33 = 2^34 - 2 = 17,179,869,182
tailcalls for this case.

How about there are N subprog_tail() in entry()? There will be almost
N^34 tailcalls.

Then, in this patch, it resolves this case on x86_64.

In stead of propagating tail_call_cnt from caller to callee, it
propagates its pointer, tail_call_cnt_ptr, tcc_ptr for short.

However, where does it store tail_call_cnt?

It stores tail_call_cnt on the stack of main prog. When tail call
happens in subprog, it increments tail_call_cnt by tcc_ptr.

Meanwhile, it stores tail_call_cnt_ptr on the stack of main prog, too.

And, before jump to tail callee, it has to pop tail_call_cnt and
tail_call_cnt_ptr.

Then, at the prologue of subprog, it must not make rax as
tail_call_cnt_ptr again. It has to reuse tail_call_cnt_ptr from caller.

As a result, at run time, it has to recognize rax is tail_call_cnt or
tail_call_cnt_ptr at prologue by:

1. rax is tail_call_cnt if rax is <= MAX_TAIL_CALL_CNT.
2. rax is tail_call_cnt_ptr if rax is > MAX_TAIL_CALL_CNT, because a
   pointer won't be <= MAX_TAIL_CALL_CNT.

Here's an example to dump JITed.

struct {
	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
	__uint(max_entries, 1);
	__uint(key_size, sizeof(__u32));
	__uint(value_size, sizeof(__u32));
} jmp_table SEC(".maps");

int count = 0;

static __noinline
int subprog_tail(struct __sk_buff *skb)
{
	bpf_tail_call_static(skb, &jmp_table, 0);
	return 0;
}

SEC("tc")
int entry(struct __sk_buff *skb)
{
	int ret = 1;

	count++;
	subprog_tail(skb);
	subprog_tail(skb);

	return ret;
}

When bpftool p d j id 42:

int entry(struct __sk_buff * skb):
bpf_prog_0c0f4c2413ef19b1_entry:
; int entry(struct __sk_buff *skb)
   0:	endbr64
   4:	nopl	(%rax,%rax)
   9:	xorq	%rax, %rax		;; rax = 0 (tail_call_cnt)
   c:	pushq	%rbp
   d:	movq	%rsp, %rbp
  10:	endbr64
  14:	cmpq	$33, %rax		;; if rax > 33, rax = tcc_ptr
  18:	ja	0x20			;; if rax > 33 goto 0x20 ---+
  1a:	pushq	%rax			;; [rbp - 8] = rax = 0      |
  1b:	movq	%rsp, %rax		;; rax = rbp - 8            |
  1e:	jmp	0x21			;; ---------+               |
  20:	pushq	%rax			;; <--------|---------------+
  21:	pushq	%rax			;; <--------+ [rbp - 16] = rax
  22:	pushq	%rbx			;; callee saved
  23:	movq	%rdi, %rbx		;; rbx = skb (callee saved)
; count++;
  26:	movabsq	$-82417199407104, %rdi
  30:	movl	(%rdi), %esi
  33:	addl	$1, %esi
  36:	movl	%esi, (%rdi)
; subprog_tail(skb);
  39:	movq	%rbx, %rdi		;; rdi = skb
  3c:	movq	-16(%rbp), %rax		;; rax = tcc_ptr
  43:	callq	0x80			;; call subprog_tail()
; subprog_tail(skb);
  48:	movq	%rbx, %rdi		;; rdi = skb
  4b:	movq	-16(%rbp), %rax		;; rax = tcc_ptr
  52:	callq	0x80			;; call subprog_tail()
; return ret;
  57:	movl	$1, %eax
  5c:	popq	%rbx
  5d:	leave
  5e:	retq

int subprog_tail(struct __sk_buff * skb):
bpf_prog_3a140cef239a4b4f_subprog_tail:
; int subprog_tail(struct __sk_buff *skb)
   0:	endbr64
   4:	nopl	(%rax,%rax)
   9:	nopl	(%rax)			;; do not touch tail_call_cnt
   c:	pushq	%rbp
   d:	movq	%rsp, %rbp
  10:	endbr64
  14:	pushq	%rax			;; [rbp - 8]  = rax (tcc_ptr)
  15:	pushq	%rax			;; [rbp - 16] = rax (tcc_ptr)
  16:	pushq	%rbx			;; callee saved
  17:	pushq	%r13			;; callee saved
  19:	movq	%rdi, %rbx		;; rbx = skb
; asm volatile("r1 = %[ctx]\n\t"
  1c:	movabsq	$-105487587488768, %r13	;; r13 = jmp_table
  26:	movq	%rbx, %rdi		;; 1st arg, skb
  29:	movq	%r13, %rsi		;; 2nd arg, jmp_table
  2c:	xorl	%edx, %edx		;; 3rd arg, index = 0
  2e:	movq	-16(%rbp), %rax		;; rax = [rbp - 16] (tcc_ptr)
  35:	cmpq	$33, (%rax)
  39:	jae	0x4e			;; if *tcc_ptr >= 33 goto 0x4e --------+
  3b:	jmp	0x4e			;; jmp bypass, toggled by poking       |
  40:	addq	$1, (%rax)		;; (*tcc_ptr)++                        |
  44:	popq	%r13			;; callee saved                        |
  46:	popq	%rbx			;; callee saved                        |
  47:	popq	%rax			;; undo rbp-16 push                    |
  48:	popq	%rax			;; undo rbp-8  push                    |
  49:	nopl	(%rax,%rax)		;; tail call target, toggled by poking |
; return 0;				;;                                     |
  4e:	popq	%r13			;; restore callee saved <--------------+
  50:	popq	%rbx			;; restore callee saved
  51:	leave
  52:	retq

Furthermore, when trampoline is the caller of bpf prog, which is
tail_call_reachable, it is required to propagate rax through trampoline.

Fixes: ebf7d1f508 ("bpf, x64: rework pro/epilogue and tailcall handling in JIT")
Fixes: e411901c0b ("bpf: allow for tailcalls in BPF subprograms for x64 JIT")
Reviewed-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Leon Hwang <hffilwlqm@gmail.com>
Link: https://lore.kernel.org/r/20240714123902.32305-2-hffilwlqm@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>

2024-07-29 12:53:31 -07:00

arch

bpf, x64: Fix tailcall hierarchy

2024-07-29 12:53:31 -07:00

block

Merge tag 'for-6.11/block-20240722' of git://git.kernel.dk/linux

2024-07-22 11:32:05 -07:00

certs

kbuild: use $(src) instead of $(srctree)/$(src) for source directory

2024-05-10 04:34:52 +09:00

crypto

crypto: testmgr - generate power-of-2 lengths more often

2024-07-13 11:50:28 +12:00

Documentation

Merge tag 'net-6.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net

2024-07-25 13:32:25 -07:00

drivers

Merge tag 'net-6.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net

2024-07-25 13:32:25 -07:00

Merge tag 'constfy-sysctl-6.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/sysctl/sysctl

2024-07-25 12:58:36 -07:00

include

bpf: Track equal scalars history on per-instruction level

2024-07-29 12:53:10 -07:00

init

Merge tag 'kbuild-v6.11' of git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild

2024-07-23 14:32:21 -07:00

io_uring

io_uring: fix error pbuf checking

2024-07-20 11:04:57 -06:00

ipc

sysctl: treewide: constify the ctl_table argument of proc_handlers

2024-07-24 20:59:29 +02:00

kernel

bpf: Remove mark_precise_scalar_ids()

2024-07-29 12:53:14 -07:00

lib

Merge tag 'printk-for-6.11-trivial' of git://git.kernel.org/pub/scm/linux/kernel/git/printk/linux

2024-07-25 13:18:41 -07:00

LICENSES

LICENSES: Add the copyleft-next-0.3.1 license

2022-11-08 15:44:01 +01:00

sysctl: treewide: constify the ctl_table argument of proc_handlers

2024-07-24 20:59:29 +02:00

net

Merge tag 'net-6.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net

2024-07-25 13:32:25 -07:00

rust

Merge tag 'uml-for-linus-6.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/uml/linux

2024-07-25 12:33:08 -07:00

samples

Merge tag 'driver-core-6.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core

2024-07-25 10:42:22 -07:00

scripts

Merge tag 'uml-for-linus-6.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/uml/linux

2024-07-25 12:33:08 -07:00

security

sysctl: treewide: constify the ctl_table argument of proc_handlers

2024-07-24 20:59:29 +02:00

sound

Merge tag 'driver-core-6.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core

2024-07-25 10:42:22 -07:00

tools

selftests/bpf: Update comments find_equal_scalars->sync_linked_regs

2024-07-29 12:53:24 -07:00

usr

initramfs: shorten cmd_initfs in usr/Makefile

2024-07-16 01:07:52 +09:00

virt

Merge tag 'kvm-x86-generic-6.11' of https://github.com/kvm-x86/linux into HEAD

2024-07-16 09:51:36 -04:00

.clang-format

Docs: Move clang-format from process/ to dev-tools/

2024-06-26 16:36:00 -06:00

.cocciconfig

…

.editorconfig

.editorconfig: remove trim_trailing_whitespace option

2024-06-13 16:47:52 +02:00

.get_maintainer.ignore

Add Jeff Kirsher to .get_maintainer.ignore

2024-03-08 11:36:54 +00:00

.gitattributes

.gitattributes: set diff driver for Rust source code files

2023-05-31 17:48:25 +02:00

.gitignore

kbuild: add script and target to generate pacman package

2024-07-22 01:24:22 +09:00

.mailmap

MAINTAINERS: Update email address of Naveen

2024-07-17 19:27:18 +02:00

.rustfmt.toml

rust: add .rustfmt.toml

2022-09-28 09:02:20 +02:00

COPYING

COPYING: state that all contributions really are covered by this file

2020-02-10 13:32:20 -08:00

CREDITS

Merge tag 'trace-v6.11-2' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace

2024-07-18 14:08:42 -07:00

Kbuild

Merge tag 'kbuild-v6.1' of git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild

2022-10-10 12:00:45 -07:00

Kconfig

kbuild: ensure full rebuild when the compiler is updated

2020-05-12 13:28:33 +09:00

MAINTAINERS

Merge tag 'net-6.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net

2024-07-25 13:32:25 -07:00

Makefile

Merge tag 'kbuild-v6.11' of git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild

2024-07-23 14:32:21 -07:00

README

README: Fix spelling

2024-03-18 03:36:32 -06:00

README

Linux kernel
============

There are several guides for kernel developers and users. These guides can
be rendered in a number of formats, like HTML and PDF. Please read
Documentation/admin-guide/README.rst first.

In order to build the documentation, use ``make htmldocs`` or
``make pdfdocs``.  The formatted documentation can also be read online at:

    https://www.kernel.org/doc/html/latest/

There are various text files in the Documentation/ subdirectory,
several of them using the reStructuredText markup notation.

Please read the Documentation/process/changes.rst file, as it contains the
requirements for building and running the kernel, and information about
the problems which may result by upgrading your kernel.

Languages

C 97.5%

Assembly 1%

Shell 0.6%

Python 0.3%

Makefile 0.3%