From: Paul E. McKenney Date: Fri, 24 Jan 2020 18:37:27 +0000 (-0800) Subject: Merge branches 'doc.2019.12.10a', 'exp.2019.12.09a', 'fixes.2020.01.24a', 'kfree_rcu... X-Git-Url: http://git.lede-project.org./?a=commitdiff_plain;h=0e247386d9ed5ab8b7dad010cf4b183efeb1e47d;p=openwrt%2Fstaging%2Fblogic.git Merge branches 'doc.2019.12.10a', 'exp.2019.12.09a', 'fixes.2020.01.24a', 'kfree_rcu.2020.01.24a', 'list.2020.01.10a', 'preempt.2020.01.24a' and 'torture.2019.12.09a' into HEAD doc.2019.12.10a: Documentations updates exp.2019.12.09a: Expedited grace-period updates fixes.2020.01.24a: Miscellaneous fixes kfree_rcu.2020.01.24a: Batch kfree_rcu() work list.2020.01.10a: RCU-protected-list updates preempt.2020.01.24a: Preemptible RCU updates torture.2019.12.09a: Torture-test updates --- 0e247386d9ed5ab8b7dad010cf4b183efeb1e47d diff --cc kernel/rcu/tree.c index 1694a6b57ad8,6145e08a1407,878f62f218e9,31d2d9255d95,1694a6b57ad8,b0e0612392a9,1694a6b57ad8..d91c9156fab2 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@@@@@@@ -2683,12 -2684,12 -2669,12 -2691,165 -2683,12 -2689,12 -2683,12 +2684,165 @@@@@@@@ void call_rcu(struct rcu_head *head, rc } EXPORT_SYMBOL_GPL(call_rcu); +++ +++ +++ +++/* Maximum number of jiffies to wait before draining a batch. */ +++ +++#define KFREE_DRAIN_JIFFIES (HZ / 50) +++ +++#define KFREE_N_BATCHES 2 +++ +++ +++ +++/** +++ +++ * struct kfree_rcu_cpu_work - single batch of kfree_rcu() requests +++ +++ * @rcu_work: Let queue_rcu_work() invoke workqueue handler after grace period +++ +++ * @head_free: List of kfree_rcu() objects waiting for a grace period +++ +++ * @krcp: Pointer to @kfree_rcu_cpu structure +++ +++ */ +++ +++ +++ +++struct kfree_rcu_cpu_work { +++ +++ struct rcu_work rcu_work; +++ +++ struct rcu_head *head_free; +++ +++ struct kfree_rcu_cpu *krcp; +++ +++}; +++ +++ +++ +++/** +++ +++ * struct kfree_rcu_cpu - batch up kfree_rcu() requests for RCU grace period +++ +++ * @head: List of kfree_rcu() objects not yet waiting for a grace period +++ +++ * @krw_arr: Array of batches of kfree_rcu() objects waiting for a grace period +++ +++ * @lock: Synchronize access to this structure +++ +++ * @monitor_work: Promote @head to @head_free after KFREE_DRAIN_JIFFIES +++ +++ * @monitor_todo: Tracks whether a @monitor_work delayed work is pending +++ +++ * @initialized: The @lock and @rcu_work fields have been initialized +++ +++ * +++ +++ * This is a per-CPU structure. The reason that it is not included in +++ +++ * the rcu_data structure is to permit this code to be extracted from +++ +++ * the RCU files. Such extraction could allow further optimization of +++ +++ * the interactions with the slab allocators. +++ +++ */ +++ +++struct kfree_rcu_cpu { +++ +++ struct rcu_head *head; +++ +++ struct kfree_rcu_cpu_work krw_arr[KFREE_N_BATCHES]; +++ +++ spinlock_t lock; +++ +++ struct delayed_work monitor_work; +++ +++ bool monitor_todo; +++ +++ bool initialized; +++ +++}; +++ +++ +++ +++static DEFINE_PER_CPU(struct kfree_rcu_cpu, krc); +++ +++ +++ +++/* +++ +++ * This function is invoked in workqueue context after a grace period. +++ +++ * It frees all the objects queued on ->head_free. +++ +++ */ +++ +++static void kfree_rcu_work(struct work_struct *work) +++ +++{ +++ +++ unsigned long flags; +++ +++ struct rcu_head *head, *next; +++ +++ struct kfree_rcu_cpu *krcp; +++ +++ struct kfree_rcu_cpu_work *krwp; +++ +++ +++ +++ krwp = container_of(to_rcu_work(work), +++ +++ struct kfree_rcu_cpu_work, rcu_work); +++ +++ krcp = krwp->krcp; +++ +++ spin_lock_irqsave(&krcp->lock, flags); +++ +++ head = krwp->head_free; +++ +++ krwp->head_free = NULL; +++ +++ spin_unlock_irqrestore(&krcp->lock, flags); +++ +++ +++ +++ // List "head" is now private, so traverse locklessly. +++ +++ for (; head; head = next) { +++ +++ unsigned long offset = (unsigned long)head->func; +++ +++ +++ +++ next = head->next; +++ +++ // Potentially optimize with kfree_bulk in future. +++ +++ debug_rcu_head_unqueue(head); +++ +++ rcu_lock_acquire(&rcu_callback_map); +++ +++ trace_rcu_invoke_kfree_callback(rcu_state.name, head, offset); +++ +++ +++ +++ if (!WARN_ON_ONCE(!__is_kfree_rcu_offset(offset))) { +++ +++ /* Could be optimized with kfree_bulk() in future. */ +++ +++ kfree((void *)head - offset); +++ +++ } +++ +++ +++ +++ rcu_lock_release(&rcu_callback_map); +++ +++ cond_resched_tasks_rcu_qs(); +++ +++ } +++ +++} +++ +++ + + /* -- - - * Queue an RCU callback for lazy invocation after a grace period. -- - - * This will likely be later named something like "call_rcu_lazy()", -- - - * but this change will require some way of tagging the lazy RCU -- - - * callbacks in the list of pending callbacks. Until then, this -- - - * function may only be called from __kfree_rcu(). +++ +++ * Schedule the kfree batch RCU work to run in workqueue context after a GP. +++ +++ * +++ +++ * This function is invoked by kfree_rcu_monitor() when the KFREE_DRAIN_JIFFIES +++ +++ * timeout has been reached. +++ +++ */ +++ +++static inline bool queue_kfree_rcu_work(struct kfree_rcu_cpu *krcp) +++ +++{ +++ +++ int i; +++ +++ struct kfree_rcu_cpu_work *krwp = NULL; +++ +++ +++ +++ lockdep_assert_held(&krcp->lock); +++ +++ for (i = 0; i < KFREE_N_BATCHES; i++) +++ +++ if (!krcp->krw_arr[i].head_free) { +++ +++ krwp = &(krcp->krw_arr[i]); +++ +++ break; +++ +++ } +++ +++ +++ +++ // If a previous RCU batch is in progress, we cannot immediately +++ +++ // queue another one, so return false to tell caller to retry. +++ +++ if (!krwp) +++ +++ return false; +++ +++ +++ +++ krwp->head_free = krcp->head; +++ +++ krcp->head = NULL; +++ +++ INIT_RCU_WORK(&krwp->rcu_work, kfree_rcu_work); +++ +++ queue_rcu_work(system_wq, &krwp->rcu_work); +++ +++ return true; +++ +++} +++ +++ +++ +++static inline void kfree_rcu_drain_unlock(struct kfree_rcu_cpu *krcp, +++ +++ unsigned long flags) +++ +++{ +++ +++ // Attempt to start a new batch. +++ +++ krcp->monitor_todo = false; +++ +++ if (queue_kfree_rcu_work(krcp)) { +++ +++ // Success! Our job is done here. +++ +++ spin_unlock_irqrestore(&krcp->lock, flags); +++ +++ return; +++ +++ } +++ +++ +++ +++ // Previous RCU batch still in progress, try again later. +++ +++ krcp->monitor_todo = true; +++ +++ schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES); +++ +++ spin_unlock_irqrestore(&krcp->lock, flags); +++ +++} +++ +++ ++ +++/* - * Queue an RCU callback for lazy invocation after a grace period. - * This will likely be later named something like "call_rcu_lazy()", - * but this change will require some way of tagging the lazy RCU - * callbacks in the list of pending callbacks. Until then, this - * function may only be called from __kfree_rcu(). +++ +++ * This function is invoked after the KFREE_DRAIN_JIFFIES timeout. +++ +++ * It invokes kfree_rcu_drain_unlock() to attempt to start another batch. +++ +++ */ +++ +++static void kfree_rcu_monitor(struct work_struct *work) +++ +++{ +++ +++ unsigned long flags; +++ +++ struct kfree_rcu_cpu *krcp = container_of(work, struct kfree_rcu_cpu, +++ +++ monitor_work.work); +++ +++ +++ +++ spin_lock_irqsave(&krcp->lock, flags); +++ +++ if (krcp->monitor_todo) +++ +++ kfree_rcu_drain_unlock(krcp, flags); +++ +++ else +++ +++ spin_unlock_irqrestore(&krcp->lock, flags); +++ +++} +++ +++ +++ + +/* - * Queue an RCU callback for lazy invocation after a grace period. - * This will likely be later named something like "call_rcu_lazy()", - * but this change will require some way of tagging the lazy RCU - * callbacks in the list of pending callbacks. Until then, this - * function may only be called from __kfree_rcu(). +++ +++ * Queue a request for lazy invocation of kfree() after a grace period. +++ +++ * +++ +++ * Each kfree_call_rcu() request is added to a batch. The batch will be drained +++ +++ * every KFREE_DRAIN_JIFFIES number of jiffies. All the objects in the batch +++ +++ * will be kfree'd in workqueue context. This allows us to: +++ +++ * +++ +++ * 1. Batch requests together to reduce the number of grace periods during +++ +++ * heavy kfree_rcu() load. +++ +++ * +++ +++ * 2. It makes it possible to use kfree_bulk() on a large number of +++ +++ * kfree_rcu() requests thus reducing cache misses and the per-object +++ +++ * overhead of kfree(). */ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func) { @@@@@@@@ -2696,11 -2697,11 -2682,11 -2886,31 -2696,11 -2702,11 -2696,11 +2879,31 @@@@@@@@ unlock_return } EXPORT_SYMBOL_GPL(kfree_call_rcu); +++ +++void __init kfree_rcu_scheduler_running(void) +++ +++{ +++ +++ int cpu; +++ +++ unsigned long flags; +++ +++ +++ +++ for_each_online_cpu(cpu) { +++ +++ struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu); +++ +++ +++ +++ spin_lock_irqsave(&krcp->lock, flags); +++ +++ if (!krcp->head || krcp->monitor_todo) { +++ +++ spin_unlock_irqrestore(&krcp->lock, flags); +++ +++ continue; +++ +++ } +++ +++ krcp->monitor_todo = true; +++ +++ schedule_delayed_work_on(cpu, &krcp->monitor_work, +++ +++ KFREE_DRAIN_JIFFIES); +++ +++ spin_unlock_irqrestore(&krcp->lock, flags); +++ +++ } +++ +++} +++ +++ /* * During early boot, any blocking grace-period wait automatically ----- - * implies a grace period. Later on, this is never the case for PREEMPT. +++++ + * implies a grace period. Later on, this is never the case for PREEMPTION. * ----- - * Howevr, because a context switch is a grace period for !PREEMPT, any +++++ + * Howevr, because a context switch is a grace period for !PREEMPTION, any * blocking grace-period wait automatically implies a grace period if * there is only one CPU online at any point time during execution of * either synchronize_rcu() or synchronize_rcu_expedited(). It is OK to diff --cc kernel/rcu/tree.h index 055c31781d3a,f9253ed406ba,ce90c68c184b,15405420b40c,055c31781d3a,055c31781d3a,055c31781d3a..0c87e4c161c2 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@@@@@@@ -182,8 -182,9 -181,8 -182,7 -182,8 -182,8 -182,8 +181,8 @@@@@@@@ struct rcu_data bool rcu_need_heavy_qs; /* GP old, so heavy quiescent state! */ bool rcu_urgent_qs; /* GP old need light quiescent state. */ bool rcu_forced_tick; /* Forced tick to provide QS. */ + +++++ bool rcu_forced_tick_exp; /* ... provide QS to expedited GP. */ #ifdef CONFIG_RCU_FAST_NO_HZ --- --- bool all_lazy; /* All CPU's CBs lazy at idle start? */ unsigned long last_accelerate; /* Last jiffy CBs were accelerated. */ unsigned long last_advance_all; /* Last jiffy CBs were all advanced. */ int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */