rcu: Create transitive rnp->lock acquisition functions
authorPeter Zijlstra <peterz@infradead.org>
Thu, 8 Oct 2015 10:24:23 +0000 (12:24 +0200)
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>
Mon, 23 Nov 2015 18:37:35 +0000 (10:37 -0800)
Providing RCU's memory-ordering guarantees requires that the rcu_node
tree's locking provide transitive memory ordering, which the Linux kernel's
spinlocks currently do not provide unless smp_mb__after_unlock_lock()
is used.  Having a separate smp_mb__after_unlock_lock() after each and
every lock acquisition is error-prone, hard to read, and a bit annoying,
so this commit provides wrapper functions that pull in the
smp_mb__after_unlock_lock() invocations.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
kernel/rcu/tree.c
kernel/rcu/tree.h
kernel/rcu/tree_plugin.h

index f07343b54fe5a29d4fcabe7e78b9c3ea79029414..daf17e248757604500f542887f171e75146f3b92 100644 (file)
@@ -1534,10 +1534,8 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
         * hold it, acquire the root rcu_node structure's lock in order to
         * start one (if needed).
         */
-       if (rnp != rnp_root) {
-               raw_spin_lock(&rnp_root->lock);
-               smp_mb__after_unlock_lock();
-       }
+       if (rnp != rnp_root)
+               raw_spin_lock_rcu_node(rnp_root);
 
        /*
         * Get a new grace-period number.  If there really is no grace
@@ -1786,11 +1784,10 @@ static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp)
        if ((rdp->gpnum == READ_ONCE(rnp->gpnum) &&
             rdp->completed == READ_ONCE(rnp->completed) &&
             !unlikely(READ_ONCE(rdp->gpwrap))) || /* w/out lock. */
-           !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */
+           !raw_spin_trylock_rcu_node(rnp)) { /* irqs already off, so later. */
                local_irq_restore(flags);
                return;
        }
-       smp_mb__after_unlock_lock();
        needwake = __note_gp_changes(rsp, rnp, rdp);
        raw_spin_unlock_irqrestore(&rnp->lock, flags);
        if (needwake)
@@ -1814,8 +1811,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
        struct rcu_node *rnp = rcu_get_root(rsp);
 
        WRITE_ONCE(rsp->gp_activity, jiffies);
-       raw_spin_lock_irq(&rnp->lock);
-       smp_mb__after_unlock_lock();
+       raw_spin_lock_irq_rcu_node(rnp);
        if (!READ_ONCE(rsp->gp_flags)) {
                /* Spurious wakeup, tell caller to go back to sleep.  */
                raw_spin_unlock_irq(&rnp->lock);
@@ -1847,8 +1843,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
         */
        rcu_for_each_leaf_node(rsp, rnp) {
                rcu_gp_slow(rsp, gp_preinit_delay);
-               raw_spin_lock_irq(&rnp->lock);
-               smp_mb__after_unlock_lock();
+               raw_spin_lock_irq_rcu_node(rnp);
                if (rnp->qsmaskinit == rnp->qsmaskinitnext &&
                    !rnp->wait_blkd_tasks) {
                        /* Nothing to do on this leaf rcu_node structure. */
@@ -1904,8 +1899,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
         */
        rcu_for_each_node_breadth_first(rsp, rnp) {
                rcu_gp_slow(rsp, gp_init_delay);
-               raw_spin_lock_irq(&rnp->lock);
-               smp_mb__after_unlock_lock();
+               raw_spin_lock_irq_rcu_node(rnp);
                rdp = this_cpu_ptr(rsp->rda);
                rcu_preempt_check_blocked_tasks(rnp);
                rnp->qsmask = rnp->qsmaskinit;
@@ -1973,8 +1967,7 @@ static void rcu_gp_fqs(struct rcu_state *rsp, bool first_time)
        }
        /* Clear flag to prevent immediate re-entry. */
        if (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
-               raw_spin_lock_irq(&rnp->lock);
-               smp_mb__after_unlock_lock();
+               raw_spin_lock_irq_rcu_node(rnp);
                WRITE_ONCE(rsp->gp_flags,
                           READ_ONCE(rsp->gp_flags) & ~RCU_GP_FLAG_FQS);
                raw_spin_unlock_irq(&rnp->lock);
@@ -1993,8 +1986,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
        struct rcu_node *rnp = rcu_get_root(rsp);
 
        WRITE_ONCE(rsp->gp_activity, jiffies);
-       raw_spin_lock_irq(&rnp->lock);
-       smp_mb__after_unlock_lock();
+       raw_spin_lock_irq_rcu_node(rnp);
        gp_duration = jiffies - rsp->gp_start;
        if (gp_duration > rsp->gp_max)
                rsp->gp_max = gp_duration;
@@ -2019,8 +2011,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
         * grace period is recorded in any of the rcu_node structures.
         */
        rcu_for_each_node_breadth_first(rsp, rnp) {
-               raw_spin_lock_irq(&rnp->lock);
-               smp_mb__after_unlock_lock();
+               raw_spin_lock_irq_rcu_node(rnp);
                WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp));
                WARN_ON_ONCE(rnp->qsmask);
                WRITE_ONCE(rnp->completed, rsp->gpnum);
@@ -2035,8 +2026,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
                rcu_gp_slow(rsp, gp_cleanup_delay);
        }
        rnp = rcu_get_root(rsp);
-       raw_spin_lock_irq(&rnp->lock);
-       smp_mb__after_unlock_lock(); /* Order GP before ->completed update. */
+       raw_spin_lock_irq_rcu_node(rnp); /* Order GP before ->completed update. */
        rcu_nocb_gp_set(rnp, nocb);
 
        /* Declare grace period done. */
@@ -2284,8 +2274,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
                raw_spin_unlock_irqrestore(&rnp->lock, flags);
                rnp_c = rnp;
                rnp = rnp->parent;
-               raw_spin_lock_irqsave(&rnp->lock, flags);
-               smp_mb__after_unlock_lock();
+               raw_spin_lock_irqsave_rcu_node(rnp, flags);
                oldmask = rnp_c->qsmask;
        }
 
@@ -2332,8 +2321,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_state *rsp,
        gps = rnp->gpnum;
        mask = rnp->grpmask;
        raw_spin_unlock(&rnp->lock);    /* irqs remain disabled. */
-       raw_spin_lock(&rnp_p->lock);    /* irqs already disabled. */
-       smp_mb__after_unlock_lock();
+       raw_spin_lock_rcu_node(rnp_p);  /* irqs already disabled. */
        rcu_report_qs_rnp(mask, rsp, rnp_p, gps, flags);
 }
 
@@ -2355,8 +2343,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
        struct rcu_node *rnp;
 
        rnp = rdp->mynode;
-       raw_spin_lock_irqsave(&rnp->lock, flags);
-       smp_mb__after_unlock_lock();
+       raw_spin_lock_irqsave_rcu_node(rnp, flags);
        if ((rdp->cpu_no_qs.b.norm &&
             rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) ||
            rdp->gpnum != rnp->gpnum || rnp->completed == rnp->gpnum ||
@@ -2582,8 +2569,7 @@ static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf)
                rnp = rnp->parent;
                if (!rnp)
                        break;
-               raw_spin_lock(&rnp->lock); /* irqs already disabled. */
-               smp_mb__after_unlock_lock(); /* GP memory ordering. */
+               raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
                rnp->qsmaskinit &= ~mask;
                rnp->qsmask &= ~mask;
                if (rnp->qsmaskinit) {
@@ -2611,8 +2597,7 @@ static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp)
 
        /* Remove outgoing CPU from mask in the leaf rcu_node structure. */
        mask = rdp->grpmask;
-       raw_spin_lock_irqsave(&rnp->lock, flags);
-       smp_mb__after_unlock_lock();    /* Enforce GP memory-order guarantee. */
+       raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */
        rnp->qsmaskinitnext &= ~mask;
        raw_spin_unlock_irqrestore(&rnp->lock, flags);
 }
@@ -2809,8 +2794,7 @@ static void force_qs_rnp(struct rcu_state *rsp,
        rcu_for_each_leaf_node(rsp, rnp) {
                cond_resched_rcu_qs();
                mask = 0;
-               raw_spin_lock_irqsave(&rnp->lock, flags);
-               smp_mb__after_unlock_lock();
+               raw_spin_lock_irqsave_rcu_node(rnp, flags);
                if (rnp->qsmask == 0) {
                        if (rcu_state_p == &rcu_sched_state ||
                            rsp != rcu_state_p ||
@@ -2881,8 +2865,7 @@ static void force_quiescent_state(struct rcu_state *rsp)
        /* rnp_old == rcu_get_root(rsp), rnp == NULL. */
 
        /* Reached the root of the rcu_node tree, acquire lock. */
-       raw_spin_lock_irqsave(&rnp_old->lock, flags);
-       smp_mb__after_unlock_lock();
+       raw_spin_lock_irqsave_rcu_node(rnp_old, flags);
        raw_spin_unlock(&rnp_old->fqslock);
        if (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
                rsp->n_force_qs_lh++;
@@ -3005,8 +2988,7 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
                if (!rcu_gp_in_progress(rsp)) {
                        struct rcu_node *rnp_root = rcu_get_root(rsp);
 
-                       raw_spin_lock(&rnp_root->lock);
-                       smp_mb__after_unlock_lock();
+                       raw_spin_lock_rcu_node(rnp_root);
                        needwake = rcu_start_gp(rsp);
                        raw_spin_unlock(&rnp_root->lock);
                        if (needwake)
@@ -3426,8 +3408,7 @@ static void sync_exp_reset_tree_hotplug(struct rcu_state *rsp)
         * CPUs for the current rcu_node structure up the rcu_node tree.
         */
        rcu_for_each_leaf_node(rsp, rnp) {
-               raw_spin_lock_irqsave(&rnp->lock, flags);
-               smp_mb__after_unlock_lock();
+               raw_spin_lock_irqsave_rcu_node(rnp, flags);
                if (rnp->expmaskinit == rnp->expmaskinitnext) {
                        raw_spin_unlock_irqrestore(&rnp->lock, flags);
                        continue;  /* No new CPUs, nothing to do. */
@@ -3447,8 +3428,7 @@ static void sync_exp_reset_tree_hotplug(struct rcu_state *rsp)
                rnp_up = rnp->parent;
                done = false;
                while (rnp_up) {
-                       raw_spin_lock_irqsave(&rnp_up->lock, flags);
-                       smp_mb__after_unlock_lock();
+                       raw_spin_lock_irqsave_rcu_node(rnp_up, flags);
                        if (rnp_up->expmaskinit)
                                done = true;
                        rnp_up->expmaskinit |= mask;
@@ -3472,8 +3452,7 @@ static void __maybe_unused sync_exp_reset_tree(struct rcu_state *rsp)
 
        sync_exp_reset_tree_hotplug(rsp);
        rcu_for_each_node_breadth_first(rsp, rnp) {
-               raw_spin_lock_irqsave(&rnp->lock, flags);
-               smp_mb__after_unlock_lock();
+               raw_spin_lock_irqsave_rcu_node(rnp, flags);
                WARN_ON_ONCE(rnp->expmask);
                rnp->expmask = rnp->expmaskinit;
                raw_spin_unlock_irqrestore(&rnp->lock, flags);
@@ -3531,8 +3510,7 @@ static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
                mask = rnp->grpmask;
                raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
                rnp = rnp->parent;
-               raw_spin_lock(&rnp->lock); /* irqs already disabled */
-               smp_mb__after_unlock_lock();
+               raw_spin_lock_rcu_node(rnp); /* irqs already disabled */
                WARN_ON_ONCE(!(rnp->expmask & mask));
                rnp->expmask &= ~mask;
        }
@@ -3549,8 +3527,7 @@ static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp,
 {
        unsigned long flags;
 
-       raw_spin_lock_irqsave(&rnp->lock, flags);
-       smp_mb__after_unlock_lock();
+       raw_spin_lock_irqsave_rcu_node(rnp, flags);
        __rcu_report_exp_rnp(rsp, rnp, wake, flags);
 }
 
@@ -3564,8 +3541,7 @@ static void rcu_report_exp_cpu_mult(struct rcu_state *rsp, struct rcu_node *rnp,
 {
        unsigned long flags;
 
-       raw_spin_lock_irqsave(&rnp->lock, flags);
-       smp_mb__after_unlock_lock();
+       raw_spin_lock_irqsave_rcu_node(rnp, flags);
        if (!(rnp->expmask & mask)) {
                raw_spin_unlock_irqrestore(&rnp->lock, flags);
                return;
@@ -3708,8 +3684,7 @@ static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
 
        sync_exp_reset_tree(rsp);
        rcu_for_each_leaf_node(rsp, rnp) {
-               raw_spin_lock_irqsave(&rnp->lock, flags);
-               smp_mb__after_unlock_lock();
+               raw_spin_lock_irqsave_rcu_node(rnp, flags);
 
                /* Each pass checks a CPU for identity, offline, and idle. */
                mask_ofl_test = 0;
@@ -4198,8 +4173,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
         */
        rnp = rdp->mynode;
        mask = rdp->grpmask;
-       raw_spin_lock(&rnp->lock);              /* irqs already disabled. */
-       smp_mb__after_unlock_lock();
+       raw_spin_lock_rcu_node(rnp);            /* irqs already disabled. */
        rnp->qsmaskinitnext |= mask;
        rnp->expmaskinitnext |= mask;
        if (!rdp->beenonline)
index 9fb4e238d4dcaaed1565dd7bf08a3c71a7f518bf..f32bebb6bc90b5074398120653ab01ba407befe8 100644 (file)
@@ -664,3 +664,42 @@ static inline void rcu_nocb_q_lengths(struct rcu_data *rdp, long *ql, long *qll)
 #else /* #ifdef CONFIG_PPC */
 #define smp_mb__after_unlock_lock()    do { } while (0)
 #endif /* #else #ifdef CONFIG_PPC */
+
+/*
+ * Wrappers for the rcu_node::lock acquire.
+ *
+ * Because the rcu_nodes form a tree, the tree traversal locking will observe
+ * different lock values, this in turn means that an UNLOCK of one level
+ * followed by a LOCK of another level does not imply a full memory barrier;
+ * and most importantly transitivity is lost.
+ *
+ * In order to restore full ordering between tree levels, augment the regular
+ * lock acquire functions with smp_mb__after_unlock_lock().
+ */
+static inline void raw_spin_lock_rcu_node(struct rcu_node *rnp)
+{
+       raw_spin_lock(&rnp->lock);
+       smp_mb__after_unlock_lock();
+}
+
+static inline void raw_spin_lock_irq_rcu_node(struct rcu_node *rnp)
+{
+       raw_spin_lock_irq(&rnp->lock);
+       smp_mb__after_unlock_lock();
+}
+
+#define raw_spin_lock_irqsave_rcu_node(rnp, flags)     \
+do {                                                   \
+       typecheck(unsigned long, flags);                \
+       raw_spin_lock_irqsave(&(rnp)->lock, flags);     \
+       smp_mb__after_unlock_lock();                    \
+} while (0)
+
+static inline bool raw_spin_trylock_rcu_node(struct rcu_node *rnp)
+{
+       bool locked = raw_spin_trylock(&rnp->lock);
+
+       if (locked)
+               smp_mb__after_unlock_lock();
+       return locked;
+}
index 630c19772630cc0c2cac42e2cf751dec002e81e2..fa0e3b96a9edd44b55789ba182373064a6a21b59 100644 (file)
@@ -301,8 +301,7 @@ static void rcu_preempt_note_context_switch(void)
                /* Possibly blocking in an RCU read-side critical section. */
                rdp = this_cpu_ptr(rcu_state_p->rda);
                rnp = rdp->mynode;
-               raw_spin_lock_irqsave(&rnp->lock, flags);
-               smp_mb__after_unlock_lock();
+               raw_spin_lock_irqsave_rcu_node(rnp, flags);
                t->rcu_read_unlock_special.b.blocked = true;
                t->rcu_blocked_node = rnp;
 
@@ -457,8 +456,7 @@ void rcu_read_unlock_special(struct task_struct *t)
                 */
                for (;;) {
                        rnp = t->rcu_blocked_node;
-                       raw_spin_lock(&rnp->lock);  /* irqs already disabled. */
-                       smp_mb__after_unlock_lock();
+                       raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
                        if (rnp == t->rcu_blocked_node)
                                break;
                        WARN_ON_ONCE(1);
@@ -989,8 +987,7 @@ static int rcu_boost(struct rcu_node *rnp)
            READ_ONCE(rnp->boost_tasks) == NULL)
                return 0;  /* Nothing left to boost. */
 
-       raw_spin_lock_irqsave(&rnp->lock, flags);
-       smp_mb__after_unlock_lock();
+       raw_spin_lock_irqsave_rcu_node(rnp, flags);
 
        /*
         * Recheck under the lock: all tasks in need of boosting
@@ -1176,8 +1173,7 @@ static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
                           "rcub/%d", rnp_index);
        if (IS_ERR(t))
                return PTR_ERR(t);
-       raw_spin_lock_irqsave(&rnp->lock, flags);
-       smp_mb__after_unlock_lock();
+       raw_spin_lock_irqsave_rcu_node(rnp, flags);
        rnp->boost_kthread_task = t;
        raw_spin_unlock_irqrestore(&rnp->lock, flags);
        sp.sched_priority = kthread_prio;
@@ -1567,8 +1563,7 @@ static void rcu_prepare_for_idle(void)
                if (!*rdp->nxttail[RCU_DONE_TAIL])
                        continue;
                rnp = rdp->mynode;
-               raw_spin_lock(&rnp->lock); /* irqs already disabled. */
-               smp_mb__after_unlock_lock();
+               raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
                needwake = rcu_accelerate_cbs(rsp, rnp, rdp);
                raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
                if (needwake)
@@ -2068,8 +2063,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
        bool needwake;
        struct rcu_node *rnp = rdp->mynode;
 
-       raw_spin_lock_irqsave(&rnp->lock, flags);
-       smp_mb__after_unlock_lock();
+       raw_spin_lock_irqsave_rcu_node(rnp, flags);
        needwake = rcu_start_future_gp(rnp, rdp, &c);
        raw_spin_unlock_irqrestore(&rnp->lock, flags);
        if (needwake)