Historically, arm64 implemented raw_smp_processor_id() as a read of current_thread_info()->cpu. This changed when arm64 moved thread_info into task struct, as at the time CONFIG_THREAD_INFO_IN_TASK made core code use thread_struct::cpu for the cpu number, and due to header dependencies prevented using this in raw_smp_processor_id(). As a workaround, we moved to using a percpu variable in commit:57c82954e7("arm64: make cpu number a percpu variable") Since then, thread_info::cpu was reintroduced, and core code was made to use this in commits:001430c191("arm64: add CPU field to struct thread_info")bcf9033e54("sched: move CPU field back into thread_info if THREAD_INFO_IN_TASK=y") Consequently it is possible to use current_thread_info()->cpu again. This decreases the number of emitted instructions like in the following example: Dump of assembler code for function bpf_get_smp_processor_id: 0xffff8000802cd608 <+0>: nop 0xffff8000802cd60c <+4>: nop 0xffff8000802cd610 <+8>: adrp x0, 0xffff800082138000 0xffff8000802cd614 <+12>: mrs x1, tpidr_el1 0xffff8000802cd618 <+16>: add x0, x0, #0x8 0xffff8000802cd61c <+20>: ldrsw x0, [x0, x1] 0xffff8000802cd620 <+24>: ret After this patch: Dump of assembler code for function bpf_get_smp_processor_id: 0xffff8000802c9130 <+0>: nop 0xffff8000802c9134 <+4>: nop 0xffff8000802c9138 <+8>: mrs x0, sp_el0 0xffff8000802c913c <+12>: ldr w0, [x0, #24] 0xffff8000802c9140 <+16>: ret A microbenchmark[1] was built to measure the performance improvement provided by this change. It calls the following function given number of times and finds the runtime overhead: static noinline int get_cpu_id(void) { return smp_processor_id(); } Run the benchmark like: modprobe smp_processor_id nr_function_calls=1000000000 +--------------------------+------------------------+ | | Number of Calls | Time taken | +--------+-----------------+------------------------+ | Before | 1000000000 | 1602888401ns | +--------+-----------------+------------------------+ | After | 1000000000 | 1206212658ns | +--------+-----------------+------------------------+ | Difference (decrease) | 396675743ns (24.74%) | +---------------------------------------------------+ Remove the percpu variable cpu_number as it is used only in set_smp_ipi_range() as a dummy variable to be passed to ipi_handler(). Use irq_stat in place of cpu_number here like arm32. [1] https://github.com/puranjaymohan/linux/commit/77d3fdd Signed-off-by: Puranjay Mohan <puranjay@kernel.org> Acked-by: Mark Rutland <mark.rutland@arm.com> Reviewed-by: Stephen Boyd <swboyd@chromium.org> Link: https://lore.kernel.org/r/20240503171847.68267-2-puranjay@kernel.org Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
139 lines
3.4 KiB
C
139 lines
3.4 KiB
C
/* SPDX-License-Identifier: GPL-2.0-only */
|
|
/*
|
|
* Copyright (C) 2012 ARM Ltd.
|
|
*/
|
|
#ifndef __ASM_SMP_H
|
|
#define __ASM_SMP_H
|
|
|
|
#include <linux/const.h>
|
|
|
|
/* Values for secondary_data.status */
|
|
#define CPU_STUCK_REASON_SHIFT (8)
|
|
#define CPU_BOOT_STATUS_MASK ((UL(1) << CPU_STUCK_REASON_SHIFT) - 1)
|
|
|
|
#define CPU_MMU_OFF (-1)
|
|
#define CPU_BOOT_SUCCESS (0)
|
|
/* The cpu invoked ops->cpu_die, synchronise it with cpu_kill */
|
|
#define CPU_KILL_ME (1)
|
|
/* The cpu couldn't die gracefully and is looping in the kernel */
|
|
#define CPU_STUCK_IN_KERNEL (2)
|
|
/* Fatal system error detected by secondary CPU, crash the system */
|
|
#define CPU_PANIC_KERNEL (3)
|
|
|
|
#define CPU_STUCK_REASON_52_BIT_VA (UL(1) << CPU_STUCK_REASON_SHIFT)
|
|
#define CPU_STUCK_REASON_NO_GRAN (UL(2) << CPU_STUCK_REASON_SHIFT)
|
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
#include <linux/threads.h>
|
|
#include <linux/cpumask.h>
|
|
#include <linux/thread_info.h>
|
|
|
|
#define raw_smp_processor_id() (current_thread_info()->cpu)
|
|
|
|
/*
|
|
* Logical CPU mapping.
|
|
*/
|
|
extern u64 __cpu_logical_map[NR_CPUS];
|
|
extern u64 cpu_logical_map(unsigned int cpu);
|
|
|
|
static inline void set_cpu_logical_map(unsigned int cpu, u64 hwid)
|
|
{
|
|
__cpu_logical_map[cpu] = hwid;
|
|
}
|
|
|
|
struct seq_file;
|
|
|
|
/*
|
|
* Discover the set of possible CPUs and determine their
|
|
* SMP operations.
|
|
*/
|
|
extern void smp_init_cpus(void);
|
|
|
|
/*
|
|
* Register IPI interrupts with the arch SMP code
|
|
*/
|
|
extern void set_smp_ipi_range(int ipi_base, int nr_ipi);
|
|
|
|
/*
|
|
* Called from the secondary holding pen, this is the secondary CPU entry point.
|
|
*/
|
|
asmlinkage void secondary_start_kernel(void);
|
|
|
|
/*
|
|
* Initial data for bringing up a secondary CPU.
|
|
* @status - Result passed back from the secondary CPU to
|
|
* indicate failure.
|
|
*/
|
|
struct secondary_data {
|
|
struct task_struct *task;
|
|
long status;
|
|
};
|
|
|
|
extern struct secondary_data secondary_data;
|
|
extern long __early_cpu_boot_status;
|
|
extern void secondary_entry(void);
|
|
|
|
extern void arch_send_call_function_single_ipi(int cpu);
|
|
extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
|
|
|
|
#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
|
|
extern void arch_send_wakeup_ipi(unsigned int cpu);
|
|
#else
|
|
static inline void arch_send_wakeup_ipi(unsigned int cpu)
|
|
{
|
|
BUILD_BUG();
|
|
}
|
|
#endif
|
|
|
|
extern int __cpu_disable(void);
|
|
|
|
static inline void __cpu_die(unsigned int cpu) { }
|
|
extern void __noreturn cpu_die(void);
|
|
extern void __noreturn cpu_die_early(void);
|
|
|
|
static inline void __noreturn cpu_park_loop(void)
|
|
{
|
|
for (;;) {
|
|
wfe();
|
|
wfi();
|
|
}
|
|
}
|
|
|
|
static inline void update_cpu_boot_status(int val)
|
|
{
|
|
WRITE_ONCE(secondary_data.status, val);
|
|
/* Ensure the visibility of the status update */
|
|
dsb(ishst);
|
|
}
|
|
|
|
/*
|
|
* The calling secondary CPU has detected serious configuration mismatch,
|
|
* which calls for a kernel panic. Update the boot status and park the calling
|
|
* CPU.
|
|
*/
|
|
static inline void __noreturn cpu_panic_kernel(void)
|
|
{
|
|
update_cpu_boot_status(CPU_PANIC_KERNEL);
|
|
cpu_park_loop();
|
|
}
|
|
|
|
/*
|
|
* If a secondary CPU enters the kernel but fails to come online,
|
|
* (e.g. due to mismatched features), and cannot exit the kernel,
|
|
* we increment cpus_stuck_in_kernel and leave the CPU in a
|
|
* quiesecent loop within the kernel text. The memory containing
|
|
* this loop must not be re-used for anything else as the 'stuck'
|
|
* core is executing it.
|
|
*
|
|
* This function is used to inhibit features like kexec and hibernate.
|
|
*/
|
|
bool cpus_are_stuck_in_kernel(void);
|
|
|
|
extern void crash_smp_send_stop(void);
|
|
extern bool smp_crash_stop_failed(void);
|
|
|
|
#endif /* ifndef __ASSEMBLY__ */
|
|
|
|
#endif /* ifndef __ASM_SMP_H */
|