workqueue: Allow modifying low level unbound workqueue cpumask

author Lai Jiangshan <laijs@cn.fujitsu.com>

Thu, 30 Apr 2015 09:16:12 +0000 (17:16 +0800)

committer Tejun Heo <tj@kernel.org>

Thu, 30 Apr 2015 13:24:29 +0000 (09:24 -0400)
author Lai Jiangshan <laijs@cn.fujitsu.com>
Thu, 30 Apr 2015 09:16:12 +0000 (17:16 +0800)
committer Tejun Heo <tj@kernel.org>
Thu, 30 Apr 2015 13:24:29 +0000 (09:24 -0400)
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h

index deee212af8e0939a6d97189569cf8199fb840124..4618dd672d1b345cd8ab1e2acb7b97e65aebcaed 100644 (file)
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -424,6 +424,7 @@ struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask);
  void free_workqueue_attrs(struct workqueue_attrs *attrs);
  int apply_workqueue_attrs(struct workqueue_struct *wq,
                           const struct workqueue_attrs *attrs);
+int workqueue_set_unbound_cpumask(cpumask_var_t cpumask);
  
  extern bool queue_work_on(int cpu, struct workqueue_struct *wq,
                         struct work_struct *work);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c

index 9be75e2a4da6cb041d10d65701d780fd46a38e89..a3915abc19833c3a1a37cafce0683272ea4fd300 100644 (file)
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -299,7 +299,7 @@ static DEFINE_SPINLOCK(wq_mayday_lock);     /* protects wq->maydays list */
  static LIST_HEAD(workqueues);          /* PR: list of all workqueues */
  static bool workqueue_freezing;                /* PL: have wqs started freezing? */
  
-static cpumask_var_t wq_unbound_cpumask;
+static cpumask_var_t wq_unbound_cpumask; /* PL: low level cpumask for all unbound wqs */
  
  /* the per-cpu worker pools */
  static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
@@ -3429,7 +3429,7 @@ static struct pool_workqueue *alloc_unbound_pwq(struct workqueue_struct *wq,
  
  /**
   * wq_calc_node_mask - calculate a wq_attrs' cpumask for the specified node
- * @attrs: the wq_attrs of interest
+ * @attrs: the wq_attrs of the default pwq of the target workqueue
   * @node: the target NUMA node
   * @cpu_going_down: if >= 0, the CPU to consider as offline
   * @cpumask: outarg, the resulting cpumask
@@ -3493,6 +3493,7 @@ static struct pool_workqueue *numa_pwq_tbl_install(struct workqueue_struct *wq,
  struct apply_wqattrs_ctx {
         struct workqueue_struct *wq;            /* target workqueue */
         struct workqueue_attrs  *attrs;         /* attrs to apply */
+       struct list_head        list;           /* queued for batching commit */
         struct pool_workqueue   *dfl_pwq;
         struct pool_workqueue   *pwq_tbl[];
  };
@@ -3532,9 +3533,15 @@ apply_wqattrs_prepare(struct workqueue_struct *wq,
         if (!ctx || !new_attrs || !tmp_attrs)
                 goto out_free;
  
-       /* make a copy of @attrs and sanitize it */
+       /*
+        * Calculate the attrs of the default pwq.
+        * If the user configured cpumask doesn't overlap with the
+        * wq_unbound_cpumask, we fallback to the wq_unbound_cpumask.
+        */
         copy_workqueue_attrs(new_attrs, attrs);
         cpumask_and(new_attrs->cpumask, new_attrs->cpumask, wq_unbound_cpumask);
+       if (unlikely(cpumask_empty(new_attrs->cpumask)))
+               cpumask_copy(new_attrs->cpumask, wq_unbound_cpumask);
  
         /*
          * We may create multiple pwqs with differing cpumasks.  Make a
@@ -3553,7 +3560,7 @@ apply_wqattrs_prepare(struct workqueue_struct *wq,
                 goto out_free;
  
         for_each_node(node) {
-               if (wq_calc_node_cpumask(attrs, node, -1, tmp_attrs->cpumask)) {
+               if (wq_calc_node_cpumask(new_attrs, node, -1, tmp_attrs->cpumask)) {
                         ctx->pwq_tbl[node] = alloc_unbound_pwq(wq, tmp_attrs);
                         if (!ctx->pwq_tbl[node])
                                 goto out_free;
@@ -3563,7 +3570,11 @@ apply_wqattrs_prepare(struct workqueue_struct *wq,
                 }
         }
  
+       /* save the user configured attrs and sanitize it. */
+       copy_workqueue_attrs(new_attrs, attrs);
+       cpumask_and(new_attrs->cpumask, new_attrs->cpumask, cpu_possible_mask);
         ctx->attrs = new_attrs;
+
         ctx->wq = wq;
         free_workqueue_attrs(tmp_attrs);
         return ctx;
@@ -3704,11 +3715,11 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
  
         /*
          * Let's determine what needs to be done.  If the target cpumask is
-        * different from wq's, we need to compare it to @pwq's and create
-        * a new one if they don't match.  If the target cpumask equals
-        * wq's, the default pwq should be used.
+        * different from the default pwq's, we need to compare it to @pwq's
+        * and create a new one if they don't match.  If the target cpumask
+        * equals the default pwq's, the default pwq should be used.
          */
-       if (wq_calc_node_cpumask(wq->unbound_attrs, node, cpu_off, cpumask)) {
+       if (wq_calc_node_cpumask(wq->dfl_pwq->pool->attrs, node, cpu_off, cpumask)) {
                 if (cpumask_equal(cpumask, pwq->pool->attrs->cpumask))
                         goto out_unlock;
         } else {
@@ -4731,6 +4742,84 @@ out_unlock:
  }
  #endif /* CONFIG_FREEZER */
  
+static int workqueue_apply_unbound_cpumask(void)
+{
+       LIST_HEAD(ctxs);
+       int ret = 0;
+       struct workqueue_struct *wq;
+       struct apply_wqattrs_ctx *ctx, *n;
+
+       lockdep_assert_held(&wq_pool_mutex);
+
+       list_for_each_entry(wq, &workqueues, list) {
+               if (!(wq->flags & WQ_UNBOUND))
+                       continue;
+               /* creating multiple pwqs breaks ordering guarantee */
+               if (wq->flags & __WQ_ORDERED)
+                       continue;
+
+               ctx = apply_wqattrs_prepare(wq, wq->unbound_attrs);
+               if (!ctx) {
+                       ret = -ENOMEM;
+                       break;
+               }
+
+               list_add_tail(&ctx->list, &ctxs);
+       }
+
+       list_for_each_entry_safe(ctx, n, &ctxs, list) {
+               if (!ret)
+                       apply_wqattrs_commit(ctx);
+               apply_wqattrs_cleanup(ctx);
+       }
+
+       return ret;
+}
+
+/**
+ *  workqueue_set_unbound_cpumask - Set the low-level unbound cpumask
+ *  @cpumask: the cpumask to set
+ *
+ *  The low-level workqueues cpumask is a global cpumask that limits
+ *  the affinity of all unbound workqueues.  This function check the @cpumask
+ *  and apply it to all unbound workqueues and updates all pwqs of them.
+ *
+ *  Retun:     0       - Success
+ *             -EINVAL - Invalid @cpumask
+ *             -ENOMEM - Failed to allocate memory for attrs or pwqs.
+ */
+int workqueue_set_unbound_cpumask(cpumask_var_t cpumask)
+{
+       int ret = -EINVAL;
+       cpumask_var_t saved_cpumask;
+
+       if (!zalloc_cpumask_var(&saved_cpumask, GFP_KERNEL))
+               return -ENOMEM;
+
+       get_online_cpus();
+       cpumask_and(cpumask, cpumask, cpu_possible_mask);
+       if (!cpumask_empty(cpumask)) {
+               mutex_lock(&wq_pool_mutex);
+
+               /* save the old wq_unbound_cpumask. */
+               cpumask_copy(saved_cpumask, wq_unbound_cpumask);
+
+               /* update wq_unbound_cpumask at first and apply it to wqs. */
+               cpumask_copy(wq_unbound_cpumask, cpumask);
+               ret = workqueue_apply_unbound_cpumask();
+
+               /* restore the wq_unbound_cpumask when failed. */
+               if (ret < 0)
+                       cpumask_copy(wq_unbound_cpumask, saved_cpumask);
+
+               mutex_unlock(&wq_pool_mutex);
+       }
+       put_online_cpus();
+
+       free_cpumask_var(saved_cpumask);
+       return ret;
+}
+
  #ifdef CONFIG_SYSFS
  /*
   * Workqueues with WQ_SYSFS flag set is visible to userland via
@@ -4952,14 +5041,34 @@ static ssize_t wq_unbound_cpumask_show(struct device *dev,
  {
         int written;
  
+       mutex_lock(&wq_pool_mutex);
         written = scnprintf(buf, PAGE_SIZE, "%*pb\n",
                             cpumask_pr_args(wq_unbound_cpumask));
+       mutex_unlock(&wq_pool_mutex);
  
         return written;
  }
  
+static ssize_t wq_unbound_cpumask_store(struct device *dev,
+               struct device_attribute *attr, const char *buf, size_t count)
+{
+       cpumask_var_t cpumask;
+       int ret;
+
+       if (!zalloc_cpumask_var(&cpumask, GFP_KERNEL))
+               return -ENOMEM;
+
+       ret = cpumask_parse(buf, cpumask);
+       if (!ret)
+               ret = workqueue_set_unbound_cpumask(cpumask);
+
+       free_cpumask_var(cpumask);
+       return ret ? ret : count;
+}
+
  static struct device_attribute wq_sysfs_cpumask_attr =
-       __ATTR(cpumask, 0444, wq_unbound_cpumask_show, NULL);
+       __ATTR(cpumask, 0644, wq_unbound_cpumask_show,
+              wq_unbound_cpumask_store);
  
  static int __init wq_sysfs_init(void)
  {
author	Lai Jiangshan <laijs@cn.fujitsu.com>
	Thu, 30 Apr 2015 09:16:12 +0000 (17:16 +0800)
committer	Tejun Heo <tj@kernel.org>
	Thu, 30 Apr 2015 13:24:29 +0000 (09:24 -0400)
include/linux/workqueue.h		patch \| blob \| history
kernel/workqueue.c		patch \| blob \| history