Merge branches 'barrier.2012.05.09a', 'fixes.2012.04.26a', 'inline.2012.05.02b' and 'srcu.2012.05.07b' into HEAD

barrier:  Reduce the amount of disturbance by rcu_barrier() to the rest of
    	the system.  This branch also includes improvements to
    	RCU_FAST_NO_HZ, which are included here due to conflicts.
fixes:  Miscellaneous fixes.
inline:  Remaining changes from an abortive attempt to inline
    	preemptible RCU's __rcu_read_lock().  These are (1) making
    	exit_rcu() avoid unnecessary work and (2) avoiding having
    	preemptible RCU record a blocked thread when the scheduler
    	declines to do a context switch.
srcu:	Lai Jiangshan's algorithmic implementation of SRCU, including
    	call_srcu().
This commit is contained in:
Paul E. McKenney
2012-05-11 10:14:09 -07:00
21 changed files with 1035 additions and 237 deletions

View File

@@ -51,6 +51,34 @@
#include "rcu.h"
#ifdef CONFIG_PREEMPT_RCU
/*
* Check for a task exiting while in a preemptible-RCU read-side
* critical section, clean up if so. No need to issue warnings,
* as debug_check_no_locks_held() already does this if lockdep
* is enabled.
*/
void exit_rcu(void)
{
struct task_struct *t = current;
if (likely(list_empty(&current->rcu_node_entry)))
return;
t->rcu_read_lock_nesting = 1;
barrier();
t->rcu_read_unlock_special = RCU_READ_UNLOCK_BLOCKED;
__rcu_read_unlock();
}
#else /* #ifdef CONFIG_PREEMPT_RCU */
void exit_rcu(void)
{
}
#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
#ifdef CONFIG_DEBUG_LOCK_ALLOC
static struct lock_class_key rcu_lock_key;
struct lockdep_map rcu_lock_map =

View File

@@ -851,22 +851,6 @@ int rcu_preempt_needs_cpu(void)
return rcu_preempt_ctrlblk.rcb.rcucblist != NULL;
}
/*
* Check for a task exiting while in a preemptible -RCU read-side
* critical section, clean up if so. No need to issue warnings,
* as debug_check_no_locks_held() already does this if lockdep
* is enabled.
*/
void exit_rcu(void)
{
struct task_struct *t = current;
if (t->rcu_read_lock_nesting == 0)
return;
t->rcu_read_lock_nesting = 1;
__rcu_read_unlock();
}
#else /* #ifdef CONFIG_TINY_PREEMPT_RCU */
#ifdef CONFIG_RCU_TRACE

View File

@@ -64,6 +64,7 @@ static int irqreader = 1; /* RCU readers from irq (timers). */
static int fqs_duration; /* Duration of bursts (us), 0 to disable. */
static int fqs_holdoff; /* Hold time within burst (us). */
static int fqs_stutter = 3; /* Wait time between bursts (s). */
static int n_barrier_cbs; /* Number of callbacks to test RCU barriers. */
static int onoff_interval; /* Wait time between CPU hotplugs, 0=disable. */
static int onoff_holdoff; /* Seconds after boot before CPU hotplugs. */
static int shutdown_secs; /* Shutdown time (s). <=0 for no shutdown. */
@@ -96,6 +97,8 @@ module_param(fqs_holdoff, int, 0444);
MODULE_PARM_DESC(fqs_holdoff, "Holdoff time within fqs bursts (us)");
module_param(fqs_stutter, int, 0444);
MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)");
module_param(n_barrier_cbs, int, 0444);
MODULE_PARM_DESC(n_barrier_cbs, "# of callbacks/kthreads for barrier testing");
module_param(onoff_interval, int, 0444);
MODULE_PARM_DESC(onoff_interval, "Time between CPU hotplugs (s), 0=disable");
module_param(onoff_holdoff, int, 0444);
@@ -139,6 +142,8 @@ static struct task_struct *shutdown_task;
static struct task_struct *onoff_task;
#endif /* #ifdef CONFIG_HOTPLUG_CPU */
static struct task_struct *stall_task;
static struct task_struct **barrier_cbs_tasks;
static struct task_struct *barrier_task;
#define RCU_TORTURE_PIPE_LEN 10
@@ -164,6 +169,7 @@ static atomic_t n_rcu_torture_alloc_fail;
static atomic_t n_rcu_torture_free;
static atomic_t n_rcu_torture_mberror;
static atomic_t n_rcu_torture_error;
static long n_rcu_torture_barrier_error;
static long n_rcu_torture_boost_ktrerror;
static long n_rcu_torture_boost_rterror;
static long n_rcu_torture_boost_failure;
@@ -173,6 +179,8 @@ static long n_offline_attempts;
static long n_offline_successes;
static long n_online_attempts;
static long n_online_successes;
static long n_barrier_attempts;
static long n_barrier_successes;
static struct list_head rcu_torture_removed;
static cpumask_var_t shuffle_tmp_mask;
@@ -197,6 +205,10 @@ static unsigned long shutdown_time; /* jiffies to system shutdown. */
static unsigned long boost_starttime; /* jiffies of next boost test start. */
DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */
/* and boost task create/destroy. */
static atomic_t barrier_cbs_count; /* Barrier callbacks registered. */
static atomic_t barrier_cbs_invoked; /* Barrier callbacks invoked. */
static wait_queue_head_t *barrier_cbs_wq; /* Coordinate barrier testing. */
static DECLARE_WAIT_QUEUE_HEAD(barrier_wq);
/* Mediate rmmod and system shutdown. Concurrent rmmod & shutdown illegal! */
@@ -327,6 +339,7 @@ struct rcu_torture_ops {
int (*completed)(void);
void (*deferred_free)(struct rcu_torture *p);
void (*sync)(void);
void (*call)(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
void (*cb_barrier)(void);
void (*fqs)(void);
int (*stats)(char *page);
@@ -417,6 +430,7 @@ static struct rcu_torture_ops rcu_ops = {
.completed = rcu_torture_completed,
.deferred_free = rcu_torture_deferred_free,
.sync = synchronize_rcu,
.call = call_rcu,
.cb_barrier = rcu_barrier,
.fqs = rcu_force_quiescent_state,
.stats = NULL,
@@ -460,6 +474,7 @@ static struct rcu_torture_ops rcu_sync_ops = {
.completed = rcu_torture_completed,
.deferred_free = rcu_sync_torture_deferred_free,
.sync = synchronize_rcu,
.call = NULL,
.cb_barrier = NULL,
.fqs = rcu_force_quiescent_state,
.stats = NULL,
@@ -477,6 +492,7 @@ static struct rcu_torture_ops rcu_expedited_ops = {
.completed = rcu_no_completed,
.deferred_free = rcu_sync_torture_deferred_free,
.sync = synchronize_rcu_expedited,
.call = NULL,
.cb_barrier = NULL,
.fqs = rcu_force_quiescent_state,
.stats = NULL,
@@ -519,6 +535,7 @@ static struct rcu_torture_ops rcu_bh_ops = {
.completed = rcu_bh_torture_completed,
.deferred_free = rcu_bh_torture_deferred_free,
.sync = synchronize_rcu_bh,
.call = call_rcu_bh,
.cb_barrier = rcu_barrier_bh,
.fqs = rcu_bh_force_quiescent_state,
.stats = NULL,
@@ -535,6 +552,7 @@ static struct rcu_torture_ops rcu_bh_sync_ops = {
.completed = rcu_bh_torture_completed,
.deferred_free = rcu_sync_torture_deferred_free,
.sync = synchronize_rcu_bh,
.call = NULL,
.cb_barrier = NULL,
.fqs = rcu_bh_force_quiescent_state,
.stats = NULL,
@@ -551,6 +569,7 @@ static struct rcu_torture_ops rcu_bh_expedited_ops = {
.completed = rcu_bh_torture_completed,
.deferred_free = rcu_sync_torture_deferred_free,
.sync = synchronize_rcu_bh_expedited,
.call = NULL,
.cb_barrier = NULL,
.fqs = rcu_bh_force_quiescent_state,
.stats = NULL,
@@ -606,6 +625,11 @@ static int srcu_torture_completed(void)
return srcu_batches_completed(&srcu_ctl);
}
static void srcu_torture_deferred_free(struct rcu_torture *rp)
{
call_srcu(&srcu_ctl, &rp->rtort_rcu, rcu_torture_cb);
}
static void srcu_torture_synchronize(void)
{
synchronize_srcu(&srcu_ctl);
@@ -620,7 +644,7 @@ static int srcu_torture_stats(char *page)
cnt += sprintf(&page[cnt], "%s%s per-CPU(idx=%d):",
torture_type, TORTURE_FLAG, idx);
for_each_possible_cpu(cpu) {
cnt += sprintf(&page[cnt], " %d(%d,%d)", cpu,
cnt += sprintf(&page[cnt], " %d(%lu,%lu)", cpu,
per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[!idx],
per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[idx]);
}
@@ -635,13 +659,29 @@ static struct rcu_torture_ops srcu_ops = {
.read_delay = srcu_read_delay,
.readunlock = srcu_torture_read_unlock,
.completed = srcu_torture_completed,
.deferred_free = rcu_sync_torture_deferred_free,
.deferred_free = srcu_torture_deferred_free,
.sync = srcu_torture_synchronize,
.call = NULL,
.cb_barrier = NULL,
.stats = srcu_torture_stats,
.name = "srcu"
};
static struct rcu_torture_ops srcu_sync_ops = {
.init = srcu_torture_init,
.cleanup = srcu_torture_cleanup,
.readlock = srcu_torture_read_lock,
.read_delay = srcu_read_delay,
.readunlock = srcu_torture_read_unlock,
.completed = srcu_torture_completed,
.deferred_free = rcu_sync_torture_deferred_free,
.sync = srcu_torture_synchronize,
.call = NULL,
.cb_barrier = NULL,
.stats = srcu_torture_stats,
.name = "srcu_sync"
};
static int srcu_torture_read_lock_raw(void) __acquires(&srcu_ctl)
{
return srcu_read_lock_raw(&srcu_ctl);
@@ -659,13 +699,29 @@ static struct rcu_torture_ops srcu_raw_ops = {
.read_delay = srcu_read_delay,
.readunlock = srcu_torture_read_unlock_raw,
.completed = srcu_torture_completed,
.deferred_free = rcu_sync_torture_deferred_free,
.deferred_free = srcu_torture_deferred_free,
.sync = srcu_torture_synchronize,
.call = NULL,
.cb_barrier = NULL,
.stats = srcu_torture_stats,
.name = "srcu_raw"
};
static struct rcu_torture_ops srcu_raw_sync_ops = {
.init = srcu_torture_init,
.cleanup = srcu_torture_cleanup,
.readlock = srcu_torture_read_lock_raw,
.read_delay = srcu_read_delay,
.readunlock = srcu_torture_read_unlock_raw,
.completed = srcu_torture_completed,
.deferred_free = rcu_sync_torture_deferred_free,
.sync = srcu_torture_synchronize,
.call = NULL,
.cb_barrier = NULL,
.stats = srcu_torture_stats,
.name = "srcu_raw_sync"
};
static void srcu_torture_synchronize_expedited(void)
{
synchronize_srcu_expedited(&srcu_ctl);
@@ -680,6 +736,7 @@ static struct rcu_torture_ops srcu_expedited_ops = {
.completed = srcu_torture_completed,
.deferred_free = rcu_sync_torture_deferred_free,
.sync = srcu_torture_synchronize_expedited,
.call = NULL,
.cb_barrier = NULL,
.stats = srcu_torture_stats,
.name = "srcu_expedited"
@@ -1129,7 +1186,8 @@ rcu_torture_printk(char *page)
"rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d "
"rtmbe: %d rtbke: %ld rtbre: %ld "
"rtbf: %ld rtb: %ld nt: %ld "
"onoff: %ld/%ld:%ld/%ld",
"onoff: %ld/%ld:%ld/%ld "
"barrier: %ld/%ld:%ld",
rcu_torture_current,
rcu_torture_current_version,
list_empty(&rcu_torture_freelist),
@@ -1145,14 +1203,17 @@ rcu_torture_printk(char *page)
n_online_successes,
n_online_attempts,
n_offline_successes,
n_offline_attempts);
n_offline_attempts,
n_barrier_successes,
n_barrier_attempts,
n_rcu_torture_barrier_error);
cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG);
if (atomic_read(&n_rcu_torture_mberror) != 0 ||
n_rcu_torture_barrier_error != 0 ||
n_rcu_torture_boost_ktrerror != 0 ||
n_rcu_torture_boost_rterror != 0 ||
n_rcu_torture_boost_failure != 0)
cnt += sprintf(&page[cnt], " !!!");
cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG);
if (i > 1) {
n_rcu_torture_boost_failure != 0 ||
i > 1) {
cnt += sprintf(&page[cnt], "!!! ");
atomic_inc(&n_rcu_torture_error);
WARN_ON_ONCE(1);
@@ -1337,6 +1398,7 @@ static void rcutorture_booster_cleanup(int cpu)
/* This must be outside of the mutex, otherwise deadlock! */
kthread_stop(t);
boost_tasks[cpu] = NULL;
}
static int rcutorture_booster_init(int cpu)
@@ -1484,13 +1546,15 @@ static void rcu_torture_onoff_cleanup(void)
return;
VERBOSE_PRINTK_STRING("Stopping rcu_torture_onoff task");
kthread_stop(onoff_task);
onoff_task = NULL;
}
#else /* #ifdef CONFIG_HOTPLUG_CPU */
static void
static int
rcu_torture_onoff_init(void)
{
return 0;
}
static void rcu_torture_onoff_cleanup(void)
@@ -1554,6 +1618,152 @@ static void rcu_torture_stall_cleanup(void)
return;
VERBOSE_PRINTK_STRING("Stopping rcu_torture_stall_task.");
kthread_stop(stall_task);
stall_task = NULL;
}
/* Callback function for RCU barrier testing. */
void rcu_torture_barrier_cbf(struct rcu_head *rcu)
{
atomic_inc(&barrier_cbs_invoked);
}
/* kthread function to register callbacks used to test RCU barriers. */
static int rcu_torture_barrier_cbs(void *arg)
{
long myid = (long)arg;
struct rcu_head rcu;
init_rcu_head_on_stack(&rcu);
VERBOSE_PRINTK_STRING("rcu_torture_barrier_cbs task started");
set_user_nice(current, 19);
do {
wait_event(barrier_cbs_wq[myid],
atomic_read(&barrier_cbs_count) == n_barrier_cbs ||
kthread_should_stop() ||
fullstop != FULLSTOP_DONTSTOP);
if (kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP)
break;
cur_ops->call(&rcu, rcu_torture_barrier_cbf);
if (atomic_dec_and_test(&barrier_cbs_count))
wake_up(&barrier_wq);
} while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
VERBOSE_PRINTK_STRING("rcu_torture_barrier_cbs task stopping");
rcutorture_shutdown_absorb("rcu_torture_barrier_cbs");
while (!kthread_should_stop())
schedule_timeout_interruptible(1);
cur_ops->cb_barrier();
destroy_rcu_head_on_stack(&rcu);
return 0;
}
/* kthread function to drive and coordinate RCU barrier testing. */
static int rcu_torture_barrier(void *arg)
{
int i;
VERBOSE_PRINTK_STRING("rcu_torture_barrier task starting");
do {
atomic_set(&barrier_cbs_invoked, 0);
atomic_set(&barrier_cbs_count, n_barrier_cbs);
/* wake_up() path contains the required barriers. */
for (i = 0; i < n_barrier_cbs; i++)
wake_up(&barrier_cbs_wq[i]);
wait_event(barrier_wq,
atomic_read(&barrier_cbs_count) == 0 ||
kthread_should_stop() ||
fullstop != FULLSTOP_DONTSTOP);
if (kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP)
break;
n_barrier_attempts++;
cur_ops->cb_barrier();
if (atomic_read(&barrier_cbs_invoked) != n_barrier_cbs) {
n_rcu_torture_barrier_error++;
WARN_ON_ONCE(1);
}
n_barrier_successes++;
schedule_timeout_interruptible(HZ / 10);
} while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
VERBOSE_PRINTK_STRING("rcu_torture_barrier task stopping");
rcutorture_shutdown_absorb("rcu_torture_barrier_cbs");
while (!kthread_should_stop())
schedule_timeout_interruptible(1);
return 0;
}
/* Initialize RCU barrier testing. */
static int rcu_torture_barrier_init(void)
{
int i;
int ret;
if (n_barrier_cbs == 0)
return 0;
if (cur_ops->call == NULL || cur_ops->cb_barrier == NULL) {
printk(KERN_ALERT "%s" TORTURE_FLAG
" Call or barrier ops missing for %s,\n",
torture_type, cur_ops->name);
printk(KERN_ALERT "%s" TORTURE_FLAG
" RCU barrier testing omitted from run.\n",
torture_type);
return 0;
}
atomic_set(&barrier_cbs_count, 0);
atomic_set(&barrier_cbs_invoked, 0);
barrier_cbs_tasks =
kzalloc(n_barrier_cbs * sizeof(barrier_cbs_tasks[0]),
GFP_KERNEL);
barrier_cbs_wq =
kzalloc(n_barrier_cbs * sizeof(barrier_cbs_wq[0]),
GFP_KERNEL);
if (barrier_cbs_tasks == NULL || barrier_cbs_wq == 0)
return -ENOMEM;
for (i = 0; i < n_barrier_cbs; i++) {
init_waitqueue_head(&barrier_cbs_wq[i]);
barrier_cbs_tasks[i] = kthread_run(rcu_torture_barrier_cbs,
(void *)(long)i,
"rcu_torture_barrier_cbs");
if (IS_ERR(barrier_cbs_tasks[i])) {
ret = PTR_ERR(barrier_cbs_tasks[i]);
VERBOSE_PRINTK_ERRSTRING("Failed to create rcu_torture_barrier_cbs");
barrier_cbs_tasks[i] = NULL;
return ret;
}
}
barrier_task = kthread_run(rcu_torture_barrier, NULL,
"rcu_torture_barrier");
if (IS_ERR(barrier_task)) {
ret = PTR_ERR(barrier_task);
VERBOSE_PRINTK_ERRSTRING("Failed to create rcu_torture_barrier");
barrier_task = NULL;
}
return 0;
}
/* Clean up after RCU barrier testing. */
static void rcu_torture_barrier_cleanup(void)
{
int i;
if (barrier_task != NULL) {
VERBOSE_PRINTK_STRING("Stopping rcu_torture_barrier task");
kthread_stop(barrier_task);
barrier_task = NULL;
}
if (barrier_cbs_tasks != NULL) {
for (i = 0; i < n_barrier_cbs; i++) {
if (barrier_cbs_tasks[i] != NULL) {
VERBOSE_PRINTK_STRING("Stopping rcu_torture_barrier_cbs task");
kthread_stop(barrier_cbs_tasks[i]);
barrier_cbs_tasks[i] = NULL;
}
}
kfree(barrier_cbs_tasks);
barrier_cbs_tasks = NULL;
}
if (barrier_cbs_wq != NULL) {
kfree(barrier_cbs_wq);
barrier_cbs_wq = NULL;
}
}
static int rcutorture_cpu_notify(struct notifier_block *self,
@@ -1598,6 +1808,7 @@ rcu_torture_cleanup(void)
fullstop = FULLSTOP_RMMOD;
mutex_unlock(&fullstop_mutex);
unregister_reboot_notifier(&rcutorture_shutdown_nb);
rcu_torture_barrier_cleanup();
rcu_torture_stall_cleanup();
if (stutter_task) {
VERBOSE_PRINTK_STRING("Stopping rcu_torture_stutter task");
@@ -1665,6 +1876,7 @@ rcu_torture_cleanup(void)
VERBOSE_PRINTK_STRING("Stopping rcu_torture_shutdown task");
kthread_stop(shutdown_task);
}
shutdown_task = NULL;
rcu_torture_onoff_cleanup();
/* Wait for all RCU callbacks to fire. */
@@ -1676,7 +1888,7 @@ rcu_torture_cleanup(void)
if (cur_ops->cleanup)
cur_ops->cleanup();
if (atomic_read(&n_rcu_torture_error))
if (atomic_read(&n_rcu_torture_error) || n_rcu_torture_barrier_error)
rcu_torture_print_module_parms(cur_ops, "End of test: FAILURE");
else if (n_online_successes != n_online_attempts ||
n_offline_successes != n_offline_attempts)
@@ -1692,10 +1904,12 @@ rcu_torture_init(void)
int i;
int cpu;
int firsterr = 0;
int retval;
static struct rcu_torture_ops *torture_ops[] =
{ &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops,
&rcu_bh_ops, &rcu_bh_sync_ops, &rcu_bh_expedited_ops,
&srcu_ops, &srcu_raw_ops, &srcu_expedited_ops,
&srcu_ops, &srcu_sync_ops, &srcu_raw_ops,
&srcu_raw_sync_ops, &srcu_expedited_ops,
&sched_ops, &sched_sync_ops, &sched_expedited_ops, };
mutex_lock(&fullstop_mutex);
@@ -1749,6 +1963,7 @@ rcu_torture_init(void)
atomic_set(&n_rcu_torture_free, 0);
atomic_set(&n_rcu_torture_mberror, 0);
atomic_set(&n_rcu_torture_error, 0);
n_rcu_torture_barrier_error = 0;
n_rcu_torture_boost_ktrerror = 0;
n_rcu_torture_boost_rterror = 0;
n_rcu_torture_boost_failure = 0;
@@ -1872,7 +2087,6 @@ rcu_torture_init(void)
test_boost_duration = 2;
if ((test_boost == 1 && cur_ops->can_boost) ||
test_boost == 2) {
int retval;
boost_starttime = jiffies + test_boost_interval * HZ;
register_cpu_notifier(&rcutorture_cpu_nb);
@@ -1897,9 +2111,22 @@ rcu_torture_init(void)
goto unwind;
}
}
rcu_torture_onoff_init();
i = rcu_torture_onoff_init();
if (i != 0) {
firsterr = i;
goto unwind;
}
register_reboot_notifier(&rcutorture_shutdown_nb);
rcu_torture_stall_init();
i = rcu_torture_stall_init();
if (i != 0) {
firsterr = i;
goto unwind;
}
retval = rcu_torture_barrier_init();
if (retval != 0) {
firsterr = retval;
goto unwind;
}
rcutorture_record_test_transition();
mutex_unlock(&fullstop_mutex);
return 0;

View File

@@ -201,7 +201,6 @@ void rcu_note_context_switch(int cpu)
{
trace_rcu_utilization("Start context switch");
rcu_sched_qs(cpu);
rcu_preempt_note_context_switch(cpu);
trace_rcu_utilization("End context switch");
}
EXPORT_SYMBOL_GPL(rcu_note_context_switch);
@@ -1953,6 +1952,38 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
}
EXPORT_SYMBOL_GPL(call_rcu_bh);
/*
* Because a context switch is a grace period for RCU-sched and RCU-bh,
* any blocking grace-period wait automatically implies a grace period
* if there is only one CPU online at any point time during execution
* of either synchronize_sched() or synchronize_rcu_bh(). It is OK to
* occasionally incorrectly indicate that there are multiple CPUs online
* when there was in fact only one the whole time, as this just adds
* some overhead: RCU still operates correctly.
*
* Of course, sampling num_online_cpus() with preemption enabled can
* give erroneous results if there are concurrent CPU-hotplug operations.
* For example, given a demonic sequence of preemptions in num_online_cpus()
* and CPU-hotplug operations, there could be two or more CPUs online at
* all times, but num_online_cpus() might well return one (or even zero).
*
* However, all such demonic sequences require at least one CPU-offline
* operation. Furthermore, rcu_blocking_is_gp() giving the wrong answer
* is only a problem if there is an RCU read-side critical section executing
* throughout. But RCU-sched and RCU-bh read-side critical sections
* disable either preemption or bh, which prevents a CPU from going offline.
* Therefore, the only way that rcu_blocking_is_gp() can incorrectly return
* that there is only one CPU when in fact there was more than one throughout
* is when there were no RCU readers in the system. If there are no
* RCU readers, the grace period by definition can be of zero length,
* regardless of the number of online CPUs.
*/
static inline int rcu_blocking_is_gp(void)
{
might_sleep(); /* Check for RCU read-side critical section. */
return num_online_cpus() <= 1;
}
/**
* synchronize_sched - wait until an rcu-sched grace period has elapsed.
*
@@ -2543,7 +2574,7 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp)
for (i = NUM_RCU_LVLS - 1; i > 0; i--)
rsp->levelspread[i] = CONFIG_RCU_FANOUT;
rsp->levelspread[0] = RCU_FANOUT_LEAF;
rsp->levelspread[0] = CONFIG_RCU_FANOUT_LEAF;
}
#else /* #ifdef CONFIG_RCU_FANOUT_EXACT */
static void __init rcu_init_levelspread(struct rcu_state *rsp)

View File

@@ -29,18 +29,14 @@
#include <linux/seqlock.h>
/*
* Define shape of hierarchy based on NR_CPUS and CONFIG_RCU_FANOUT.
* Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and
* CONFIG_RCU_FANOUT_LEAF.
* In theory, it should be possible to add more levels straightforwardly.
* In practice, this did work well going from three levels to four.
* Of course, your mileage may vary.
*/
#define MAX_RCU_LVLS 4
#if CONFIG_RCU_FANOUT > 16
#define RCU_FANOUT_LEAF 16
#else /* #if CONFIG_RCU_FANOUT > 16 */
#define RCU_FANOUT_LEAF (CONFIG_RCU_FANOUT)
#endif /* #else #if CONFIG_RCU_FANOUT > 16 */
#define RCU_FANOUT_1 (RCU_FANOUT_LEAF)
#define RCU_FANOUT_1 (CONFIG_RCU_FANOUT_LEAF)
#define RCU_FANOUT_2 (RCU_FANOUT_1 * CONFIG_RCU_FANOUT)
#define RCU_FANOUT_3 (RCU_FANOUT_2 * CONFIG_RCU_FANOUT)
#define RCU_FANOUT_4 (RCU_FANOUT_3 * CONFIG_RCU_FANOUT)
@@ -434,7 +430,6 @@ DECLARE_PER_CPU(char, rcu_cpu_has_work);
/* Forward declarations for rcutree_plugin.h */
static void rcu_bootup_announce(void);
long rcu_batches_completed(void);
static void rcu_preempt_note_context_switch(int cpu);
static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp);
#ifdef CONFIG_HOTPLUG_CPU
static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp,

View File

@@ -153,7 +153,7 @@ static void rcu_preempt_qs(int cpu)
*
* Caller must disable preemption.
*/
static void rcu_preempt_note_context_switch(int cpu)
void rcu_preempt_note_context_switch(void)
{
struct task_struct *t = current;
unsigned long flags;
@@ -164,7 +164,7 @@ static void rcu_preempt_note_context_switch(int cpu)
(t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
/* Possibly blocking in an RCU read-side critical section. */
rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu);
rdp = __this_cpu_ptr(rcu_preempt_state.rda);
rnp = rdp->mynode;
raw_spin_lock_irqsave(&rnp->lock, flags);
t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
@@ -228,7 +228,7 @@ static void rcu_preempt_note_context_switch(int cpu)
* means that we continue to block the current grace period.
*/
local_irq_save(flags);
rcu_preempt_qs(cpu);
rcu_preempt_qs(smp_processor_id());
local_irq_restore(flags);
}
@@ -969,22 +969,6 @@ static void __init __rcu_init_preempt(void)
rcu_init_one(&rcu_preempt_state, &rcu_preempt_data);
}
/*
* Check for a task exiting while in a preemptible-RCU read-side
* critical section, clean up if so. No need to issue warnings,
* as debug_check_no_locks_held() already does this if lockdep
* is enabled.
*/
void exit_rcu(void)
{
struct task_struct *t = current;
if (t->rcu_read_lock_nesting == 0)
return;
t->rcu_read_lock_nesting = 1;
__rcu_read_unlock();
}
#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
static struct rcu_state *rcu_state = &rcu_sched_state;
@@ -1017,14 +1001,6 @@ void rcu_force_quiescent_state(void)
}
EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
/*
* Because preemptible RCU does not exist, we never have to check for
* CPUs being in quiescent states.
*/
static void rcu_preempt_note_context_switch(int cpu)
{
}
/*
* Because preemptible RCU does not exist, there are never any preempted
* RCU readers.

View File

@@ -2083,6 +2083,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
#endif
/* Here we just switch the register state and the stack. */
rcu_switch_from(prev);
switch_to(prev, next, prev);
barrier();

View File

@@ -34,10 +34,77 @@
#include <linux/delay.h>
#include <linux/srcu.h>
/*
* Initialize an rcu_batch structure to empty.
*/
static inline void rcu_batch_init(struct rcu_batch *b)
{
b->head = NULL;
b->tail = &b->head;
}
/*
* Enqueue a callback onto the tail of the specified rcu_batch structure.
*/
static inline void rcu_batch_queue(struct rcu_batch *b, struct rcu_head *head)
{
*b->tail = head;
b->tail = &head->next;
}
/*
* Is the specified rcu_batch structure empty?
*/
static inline bool rcu_batch_empty(struct rcu_batch *b)
{
return b->tail == &b->head;
}
/*
* Remove the callback at the head of the specified rcu_batch structure
* and return a pointer to it, or return NULL if the structure is empty.
*/
static inline struct rcu_head *rcu_batch_dequeue(struct rcu_batch *b)
{
struct rcu_head *head;
if (rcu_batch_empty(b))
return NULL;
head = b->head;
b->head = head->next;
if (b->tail == &head->next)
rcu_batch_init(b);
return head;
}
/*
* Move all callbacks from the rcu_batch structure specified by "from" to
* the structure specified by "to".
*/
static inline void rcu_batch_move(struct rcu_batch *to, struct rcu_batch *from)
{
if (!rcu_batch_empty(from)) {
*to->tail = from->head;
to->tail = from->tail;
rcu_batch_init(from);
}
}
/* single-thread state-machine */
static void process_srcu(struct work_struct *work);
static int init_srcu_struct_fields(struct srcu_struct *sp)
{
sp->completed = 0;
mutex_init(&sp->mutex);
spin_lock_init(&sp->queue_lock);
sp->running = false;
rcu_batch_init(&sp->batch_queue);
rcu_batch_init(&sp->batch_check0);
rcu_batch_init(&sp->batch_check1);
rcu_batch_init(&sp->batch_done);
INIT_DELAYED_WORK(&sp->work, process_srcu);
sp->per_cpu_ref = alloc_percpu(struct srcu_struct_array);
return sp->per_cpu_ref ? 0 : -ENOMEM;
}
@@ -73,21 +140,116 @@ EXPORT_SYMBOL_GPL(init_srcu_struct);
#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
/*
* srcu_readers_active_idx -- returns approximate number of readers
* active on the specified rank of per-CPU counters.
* Returns approximate total of the readers' ->seq[] values for the
* rank of per-CPU counters specified by idx.
*/
static int srcu_readers_active_idx(struct srcu_struct *sp, int idx)
static unsigned long srcu_readers_seq_idx(struct srcu_struct *sp, int idx)
{
int cpu;
int sum;
unsigned long sum = 0;
unsigned long t;
sum = 0;
for_each_possible_cpu(cpu)
sum += per_cpu_ptr(sp->per_cpu_ref, cpu)->c[idx];
for_each_possible_cpu(cpu) {
t = ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->seq[idx]);
sum += t;
}
return sum;
}
/*
* Returns approximate number of readers active on the specified rank
* of the per-CPU ->c[] counters.
*/
static unsigned long srcu_readers_active_idx(struct srcu_struct *sp, int idx)
{
int cpu;
unsigned long sum = 0;
unsigned long t;
for_each_possible_cpu(cpu) {
t = ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[idx]);
sum += t;
}
return sum;
}
/*
* Return true if the number of pre-existing readers is determined to
* be stably zero. An example unstable zero can occur if the call
* to srcu_readers_active_idx() misses an __srcu_read_lock() increment,
* but due to task migration, sees the corresponding __srcu_read_unlock()
* decrement. This can happen because srcu_readers_active_idx() takes
* time to sum the array, and might in fact be interrupted or preempted
* partway through the summation.
*/
static bool srcu_readers_active_idx_check(struct srcu_struct *sp, int idx)
{
unsigned long seq;
seq = srcu_readers_seq_idx(sp, idx);
/*
* The following smp_mb() A pairs with the smp_mb() B located in
* __srcu_read_lock(). This pairing ensures that if an
* __srcu_read_lock() increments its counter after the summation
* in srcu_readers_active_idx(), then the corresponding SRCU read-side
* critical section will see any changes made prior to the start
* of the current SRCU grace period.
*
* Also, if the above call to srcu_readers_seq_idx() saw the
* increment of ->seq[], then the call to srcu_readers_active_idx()
* must see the increment of ->c[].
*/
smp_mb(); /* A */
/*
* Note that srcu_readers_active_idx() can incorrectly return
* zero even though there is a pre-existing reader throughout.
* To see this, suppose that task A is in a very long SRCU
* read-side critical section that started on CPU 0, and that
* no other reader exists, so that the sum of the counters
* is equal to one. Then suppose that task B starts executing
* srcu_readers_active_idx(), summing up to CPU 1, and then that
* task C starts reading on CPU 0, so that its increment is not
* summed, but finishes reading on CPU 2, so that its decrement
* -is- summed. Then when task B completes its sum, it will
* incorrectly get zero, despite the fact that task A has been
* in its SRCU read-side critical section the whole time.
*
* We therefore do a validation step should srcu_readers_active_idx()
* return zero.
*/
if (srcu_readers_active_idx(sp, idx) != 0)
return false;
/*
* The remainder of this function is the validation step.
* The following smp_mb() D pairs with the smp_mb() C in
* __srcu_read_unlock(). If the __srcu_read_unlock() was seen
* by srcu_readers_active_idx() above, then any destructive
* operation performed after the grace period will happen after
* the corresponding SRCU read-side critical section.
*
* Note that there can be at most NR_CPUS worth of readers using
* the old index, which is not enough to overflow even a 32-bit
* integer. (Yes, this does mean that systems having more than
* a billion or so CPUs need to be 64-bit systems.) Therefore,
* the sum of the ->seq[] counters cannot possibly overflow.
* Therefore, the only way that the return values of the two
* calls to srcu_readers_seq_idx() can be equal is if there were
* no increments of the corresponding rank of ->seq[] counts
* in the interim. But the missed-increment scenario laid out
* above includes an increment of the ->seq[] counter by
* the corresponding __srcu_read_lock(). Therefore, if this
* scenario occurs, the return values from the two calls to
* srcu_readers_seq_idx() will differ, and thus the validation
* step below suffices.
*/
smp_mb(); /* D */
return srcu_readers_seq_idx(sp, idx) == seq;
}
/**
* srcu_readers_active - returns approximate number of readers.
* @sp: which srcu_struct to count active readers (holding srcu_read_lock).
@@ -98,7 +260,14 @@ static int srcu_readers_active_idx(struct srcu_struct *sp, int idx)
*/
static int srcu_readers_active(struct srcu_struct *sp)
{
return srcu_readers_active_idx(sp, 0) + srcu_readers_active_idx(sp, 1);
int cpu;
unsigned long sum = 0;
for_each_possible_cpu(cpu) {
sum += ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[0]);
sum += ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[1]);
}
return sum;
}
/**
@@ -131,10 +300,11 @@ int __srcu_read_lock(struct srcu_struct *sp)
int idx;
preempt_disable();
idx = sp->completed & 0x1;
barrier(); /* ensure compiler looks -once- at sp->completed. */
per_cpu_ptr(sp->per_cpu_ref, smp_processor_id())->c[idx]++;
srcu_barrier(); /* ensure compiler won't misorder critical section. */
idx = rcu_dereference_index_check(sp->completed,
rcu_read_lock_sched_held()) & 0x1;
ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->c[idx]) += 1;
smp_mb(); /* B */ /* Avoid leaking the critical section. */
ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->seq[idx]) += 1;
preempt_enable();
return idx;
}
@@ -149,8 +319,8 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock);
void __srcu_read_unlock(struct srcu_struct *sp, int idx)
{
preempt_disable();
srcu_barrier(); /* ensure compiler won't misorder critical section. */
per_cpu_ptr(sp->per_cpu_ref, smp_processor_id())->c[idx]--;
smp_mb(); /* C */ /* Avoid leaking the critical section. */
ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->c[idx]) -= 1;
preempt_enable();
}
EXPORT_SYMBOL_GPL(__srcu_read_unlock);
@@ -163,14 +333,86 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock);
* we repeatedly block for 1-millisecond time periods. This approach
* has done well in testing, so there is no need for a config parameter.
*/
#define SYNCHRONIZE_SRCU_READER_DELAY 10
#define SRCU_RETRY_CHECK_DELAY 5
#define SYNCHRONIZE_SRCU_TRYCOUNT 2
#define SYNCHRONIZE_SRCU_EXP_TRYCOUNT 12
/*
* @@@ Wait until all pre-existing readers complete. Such readers
* will have used the index specified by "idx".
* the caller should ensures the ->completed is not changed while checking
* and idx = (->completed & 1) ^ 1
*/
static bool try_check_zero(struct srcu_struct *sp, int idx, int trycount)
{
for (;;) {
if (srcu_readers_active_idx_check(sp, idx))
return true;
if (--trycount <= 0)
return false;
udelay(SRCU_RETRY_CHECK_DELAY);
}
}
/*
* Increment the ->completed counter so that future SRCU readers will
* use the other rank of the ->c[] and ->seq[] arrays. This allows
* us to wait for pre-existing readers in a starvation-free manner.
*/
static void srcu_flip(struct srcu_struct *sp)
{
sp->completed++;
}
/*
* Enqueue an SRCU callback on the specified srcu_struct structure,
* initiating grace-period processing if it is not already running.
*/
void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
void (*func)(struct rcu_head *head))
{
unsigned long flags;
head->next = NULL;
head->func = func;
spin_lock_irqsave(&sp->queue_lock, flags);
rcu_batch_queue(&sp->batch_queue, head);
if (!sp->running) {
sp->running = true;
queue_delayed_work(system_nrt_wq, &sp->work, 0);
}
spin_unlock_irqrestore(&sp->queue_lock, flags);
}
EXPORT_SYMBOL_GPL(call_srcu);
struct rcu_synchronize {
struct rcu_head head;
struct completion completion;
};
/*
* Awaken the corresponding synchronize_srcu() instance now that a
* grace period has elapsed.
*/
static void wakeme_after_rcu(struct rcu_head *head)
{
struct rcu_synchronize *rcu;
rcu = container_of(head, struct rcu_synchronize, head);
complete(&rcu->completion);
}
static void srcu_advance_batches(struct srcu_struct *sp, int trycount);
static void srcu_reschedule(struct srcu_struct *sp);
/*
* Helper function for synchronize_srcu() and synchronize_srcu_expedited().
*/
static void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void))
static void __synchronize_srcu(struct srcu_struct *sp, int trycount)
{
int idx;
struct rcu_synchronize rcu;
struct rcu_head *head = &rcu.head;
bool done = false;
rcu_lockdep_assert(!lock_is_held(&sp->dep_map) &&
!lock_is_held(&rcu_bh_lock_map) &&
@@ -178,91 +420,32 @@ static void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void))
!lock_is_held(&rcu_sched_lock_map),
"Illegal synchronize_srcu() in same-type SRCU (or RCU) read-side critical section");
idx = sp->completed;
mutex_lock(&sp->mutex);
init_completion(&rcu.completion);
/*
* Check to see if someone else did the work for us while we were
* waiting to acquire the lock. We need -two- advances of
* the counter, not just one. If there was but one, we might have
* shown up -after- our helper's first synchronize_sched(), thus
* having failed to prevent CPU-reordering races with concurrent
* srcu_read_unlock()s on other CPUs (see comment below). So we
* either (1) wait for two or (2) supply the second ourselves.
*/
head->next = NULL;
head->func = wakeme_after_rcu;
spin_lock_irq(&sp->queue_lock);
if (!sp->running) {
/* steal the processing owner */
sp->running = true;
rcu_batch_queue(&sp->batch_check0, head);
spin_unlock_irq(&sp->queue_lock);
if ((sp->completed - idx) >= 2) {
mutex_unlock(&sp->mutex);
return;
srcu_advance_batches(sp, trycount);
if (!rcu_batch_empty(&sp->batch_done)) {
BUG_ON(sp->batch_done.head != head);
rcu_batch_dequeue(&sp->batch_done);
done = true;
}
/* give the processing owner to work_struct */
srcu_reschedule(sp);
} else {
rcu_batch_queue(&sp->batch_queue, head);
spin_unlock_irq(&sp->queue_lock);
}
sync_func(); /* Force memory barrier on all CPUs. */
/*
* The preceding synchronize_sched() ensures that any CPU that
* sees the new value of sp->completed will also see any preceding
* changes to data structures made by this CPU. This prevents
* some other CPU from reordering the accesses in its SRCU
* read-side critical section to precede the corresponding
* srcu_read_lock() -- ensuring that such references will in
* fact be protected.
*
* So it is now safe to do the flip.
*/
idx = sp->completed & 0x1;
sp->completed++;
sync_func(); /* Force memory barrier on all CPUs. */
/*
* At this point, because of the preceding synchronize_sched(),
* all srcu_read_lock() calls using the old counters have completed.
* Their corresponding critical sections might well be still
* executing, but the srcu_read_lock() primitives themselves
* will have finished executing. We initially give readers
* an arbitrarily chosen 10 microseconds to get out of their
* SRCU read-side critical sections, then loop waiting 1/HZ
* seconds per iteration. The 10-microsecond value has done
* very well in testing.
*/
if (srcu_readers_active_idx(sp, idx))
udelay(SYNCHRONIZE_SRCU_READER_DELAY);
while (srcu_readers_active_idx(sp, idx))
schedule_timeout_interruptible(1);
sync_func(); /* Force memory barrier on all CPUs. */
/*
* The preceding synchronize_sched() forces all srcu_read_unlock()
* primitives that were executing concurrently with the preceding
* for_each_possible_cpu() loop to have completed by this point.
* More importantly, it also forces the corresponding SRCU read-side
* critical sections to have also completed, and the corresponding
* references to SRCU-protected data items to be dropped.
*
* Note:
*
* Despite what you might think at first glance, the
* preceding synchronize_sched() -must- be within the
* critical section ended by the following mutex_unlock().
* Otherwise, a task taking the early exit can race
* with a srcu_read_unlock(), which might have executed
* just before the preceding srcu_readers_active() check,
* and whose CPU might have reordered the srcu_read_unlock()
* with the preceding critical section. In this case, there
* is nothing preventing the synchronize_sched() task that is
* taking the early exit from freeing a data structure that
* is still being referenced (out of order) by the task
* doing the srcu_read_unlock().
*
* Alternatively, the comparison with "2" on the early exit
* could be changed to "3", but this increases synchronize_srcu()
* latency for bulk loads. So the current code is preferred.
*/
mutex_unlock(&sp->mutex);
if (!done)
wait_for_completion(&rcu.completion);
}
/**
@@ -281,7 +464,7 @@ static void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void))
*/
void synchronize_srcu(struct srcu_struct *sp)
{
__synchronize_srcu(sp, synchronize_sched);
__synchronize_srcu(sp, SYNCHRONIZE_SRCU_TRYCOUNT);
}
EXPORT_SYMBOL_GPL(synchronize_srcu);
@@ -289,18 +472,11 @@ EXPORT_SYMBOL_GPL(synchronize_srcu);
* synchronize_srcu_expedited - Brute-force SRCU grace period
* @sp: srcu_struct with which to synchronize.
*
* Wait for an SRCU grace period to elapse, but use a "big hammer"
* approach to force the grace period to end quickly. This consumes
* significant time on all CPUs and is unfriendly to real-time workloads,
* so is thus not recommended for any sort of common-case code. In fact,
* if you are using synchronize_srcu_expedited() in a loop, please
* restructure your code to batch your updates, and then use a single
* synchronize_srcu() instead.
* Wait for an SRCU grace period to elapse, but be more aggressive about
* spinning rather than blocking when waiting.
*
* Note that it is illegal to call this function while holding any lock
* that is acquired by a CPU-hotplug notifier. And yes, it is also illegal
* to call this function from a CPU-hotplug notifier. Failing to observe
* these restriction will result in deadlock. It is also illegal to call
* that is acquired by a CPU-hotplug notifier. It is also illegal to call
* synchronize_srcu_expedited() from the corresponding SRCU read-side
* critical section; doing so will result in deadlock. However, it is
* perfectly legal to call synchronize_srcu_expedited() on one srcu_struct
@@ -309,10 +485,19 @@ EXPORT_SYMBOL_GPL(synchronize_srcu);
*/
void synchronize_srcu_expedited(struct srcu_struct *sp)
{
__synchronize_srcu(sp, synchronize_sched_expedited);
__synchronize_srcu(sp, SYNCHRONIZE_SRCU_EXP_TRYCOUNT);
}
EXPORT_SYMBOL_GPL(synchronize_srcu_expedited);
/**
* srcu_barrier - Wait until all in-flight call_srcu() callbacks complete.
*/
void srcu_barrier(struct srcu_struct *sp)
{
synchronize_srcu(sp);
}
EXPORT_SYMBOL_GPL(srcu_barrier);
/**
* srcu_batches_completed - return batches completed.
* @sp: srcu_struct on which to report batch completion.
@@ -320,9 +505,146 @@ EXPORT_SYMBOL_GPL(synchronize_srcu_expedited);
* Report the number of batches, correlated with, but not necessarily
* precisely the same as, the number of grace periods that have elapsed.
*/
long srcu_batches_completed(struct srcu_struct *sp)
{
return sp->completed;
}
EXPORT_SYMBOL_GPL(srcu_batches_completed);
#define SRCU_CALLBACK_BATCH 10
#define SRCU_INTERVAL 1
/*
* Move any new SRCU callbacks to the first stage of the SRCU grace
* period pipeline.
*/
static void srcu_collect_new(struct srcu_struct *sp)
{
if (!rcu_batch_empty(&sp->batch_queue)) {
spin_lock_irq(&sp->queue_lock);
rcu_batch_move(&sp->batch_check0, &sp->batch_queue);
spin_unlock_irq(&sp->queue_lock);
}
}
/*
* Core SRCU state machine. Advance callbacks from ->batch_check0 to
* ->batch_check1 and then to ->batch_done as readers drain.
*/
static void srcu_advance_batches(struct srcu_struct *sp, int trycount)
{
int idx = 1 ^ (sp->completed & 1);
/*
* Because readers might be delayed for an extended period after
* fetching ->completed for their index, at any point in time there
* might well be readers using both idx=0 and idx=1. We therefore
* need to wait for readers to clear from both index values before
* invoking a callback.
*/
if (rcu_batch_empty(&sp->batch_check0) &&
rcu_batch_empty(&sp->batch_check1))
return; /* no callbacks need to be advanced */
if (!try_check_zero(sp, idx, trycount))
return; /* failed to advance, will try after SRCU_INTERVAL */
/*
* The callbacks in ->batch_check1 have already done with their
* first zero check and flip back when they were enqueued on
* ->batch_check0 in a previous invocation of srcu_advance_batches().
* (Presumably try_check_zero() returned false during that
* invocation, leaving the callbacks stranded on ->batch_check1.)
* They are therefore ready to invoke, so move them to ->batch_done.
*/
rcu_batch_move(&sp->batch_done, &sp->batch_check1);
if (rcu_batch_empty(&sp->batch_check0))
return; /* no callbacks need to be advanced */
srcu_flip(sp);
/*
* The callbacks in ->batch_check0 just finished their
* first check zero and flip, so move them to ->batch_check1
* for future checking on the other idx.
*/
rcu_batch_move(&sp->batch_check1, &sp->batch_check0);
/*
* SRCU read-side critical sections are normally short, so check
* at least twice in quick succession after a flip.
*/
trycount = trycount < 2 ? 2 : trycount;
if (!try_check_zero(sp, idx^1, trycount))
return; /* failed to advance, will try after SRCU_INTERVAL */
/*
* The callbacks in ->batch_check1 have now waited for all
* pre-existing readers using both idx values. They are therefore
* ready to invoke, so move them to ->batch_done.
*/
rcu_batch_move(&sp->batch_done, &sp->batch_check1);
}
/*
* Invoke a limited number of SRCU callbacks that have passed through
* their grace period. If there are more to do, SRCU will reschedule
* the workqueue.
*/
static void srcu_invoke_callbacks(struct srcu_struct *sp)
{
int i;
struct rcu_head *head;
for (i = 0; i < SRCU_CALLBACK_BATCH; i++) {
head = rcu_batch_dequeue(&sp->batch_done);
if (!head)
break;
local_bh_disable();
head->func(head);
local_bh_enable();
}
}
/*
* Finished one round of SRCU grace period. Start another if there are
* more SRCU callbacks queued, otherwise put SRCU into not-running state.
*/
static void srcu_reschedule(struct srcu_struct *sp)
{
bool pending = true;
if (rcu_batch_empty(&sp->batch_done) &&
rcu_batch_empty(&sp->batch_check1) &&
rcu_batch_empty(&sp->batch_check0) &&
rcu_batch_empty(&sp->batch_queue)) {
spin_lock_irq(&sp->queue_lock);
if (rcu_batch_empty(&sp->batch_done) &&
rcu_batch_empty(&sp->batch_check1) &&
rcu_batch_empty(&sp->batch_check0) &&
rcu_batch_empty(&sp->batch_queue)) {
sp->running = false;
pending = false;
}
spin_unlock_irq(&sp->queue_lock);
}
if (pending)
queue_delayed_work(system_nrt_wq, &sp->work, SRCU_INTERVAL);
}
/*
* This is the work-queue function that handles SRCU grace periods.
*/
static void process_srcu(struct work_struct *work)
{
struct srcu_struct *sp;
sp = container_of(work, struct srcu_struct, work.work);
srcu_collect_new(sp);
srcu_advance_batches(sp, 1);
srcu_invoke_callbacks(sp);
srcu_reschedule(sp);
}

View File

@@ -861,7 +861,13 @@ EXPORT_SYMBOL(mod_timer);
*
* mod_timer_pinned() is a way to update the expire field of an
* active timer (if the timer is inactive it will be activated)
* and not allow the timer to be migrated to a different CPU.
* and to ensure that the timer is scheduled on the current CPU.
*
* Note that this does not prevent the timer from being migrated
* when the current CPU goes offline. If this is a problem for
* you, use CPU-hotplug notifiers to handle it correctly, for
* example, cancelling the timer when the corresponding CPU goes
* offline.
*
* mod_timer_pinned(timer, expires) is equivalent to:
*