net: Refactor XPS for CPUs and Rx queues
authorAmritha Nambiar <amritha.nambiar@intel.com>
Sat, 30 Jun 2018 04:26:41 +0000 (21:26 -0700)
committerDavid S. Miller <davem@davemloft.net>
Mon, 2 Jul 2018 00:06:23 +0000 (09:06 +0900)
Refactor XPS code to support Tx queue selection based on
CPU(s) map or Rx queue(s) map.

Signed-off-by: Amritha Nambiar <amritha.nambiar@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/linux/cpumask.h
include/linux/netdevice.h
net/core/dev.c
net/core/net-sysfs.c

index bf53d893ad02bbe460dd64ce03d8cfe10d709931..57f20a0a7794908b47fdb151e530d53e3a598b54 100644 (file)
@@ -115,12 +115,17 @@ extern struct cpumask __cpu_active_mask;
 #define cpu_active(cpu)                ((cpu) == 0)
 #endif
 
-/* verify cpu argument to cpumask_* operators */
-static inline unsigned int cpumask_check(unsigned int cpu)
+static inline void cpu_max_bits_warn(unsigned int cpu, unsigned int bits)
 {
 #ifdef CONFIG_DEBUG_PER_CPU_MAPS
-       WARN_ON_ONCE(cpu >= nr_cpumask_bits);
+       WARN_ON_ONCE(cpu >= bits);
 #endif /* CONFIG_DEBUG_PER_CPU_MAPS */
+}
+
+/* verify cpu argument to cpumask_* operators */
+static inline unsigned int cpumask_check(unsigned int cpu)
+{
+       cpu_max_bits_warn(cpu, nr_cpumask_bits);
        return cpu;
 }
 
index c6b377a15869d67f1b702e2fa2be29ce9d0fdf62..8bf8d6149f79210163d6de2edaa60fa66c300b14 100644 (file)
@@ -731,10 +731,15 @@ struct xps_map {
  */
 struct xps_dev_maps {
        struct rcu_head rcu;
-       struct xps_map __rcu *cpu_map[0];
+       struct xps_map __rcu *attr_map[0]; /* Either CPUs map or RXQs map */
 };
-#define XPS_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) +         \
+
+#define XPS_CPU_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) +     \
        (nr_cpu_ids * (_tcs) * sizeof(struct xps_map *)))
+
+#define XPS_RXQ_DEV_MAPS_SIZE(_tcs, _rxqs) (sizeof(struct xps_dev_maps) +\
+       (_rxqs * (_tcs) * sizeof(struct xps_map *)))
+
 #endif /* CONFIG_XPS */
 
 #define TC_MAX_QUEUE   16
@@ -1910,7 +1915,8 @@ struct net_device {
        int                     watchdog_timeo;
 
 #ifdef CONFIG_XPS
-       struct xps_dev_maps __rcu *xps_maps;
+       struct xps_dev_maps __rcu *xps_cpus_map;
+       struct xps_dev_maps __rcu *xps_rxqs_map;
 #endif
 #ifdef CONFIG_NET_CLS_ACT
        struct mini_Qdisc __rcu *miniq_egress;
@@ -3259,6 +3265,92 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
 #ifdef CONFIG_XPS
 int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
                        u16 index);
+int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
+                         u16 index, bool is_rxqs_map);
+
+/**
+ *     netif_attr_test_mask - Test a CPU or Rx queue set in a mask
+ *     @j: CPU/Rx queue index
+ *     @mask: bitmask of all cpus/rx queues
+ *     @nr_bits: number of bits in the bitmask
+ *
+ * Test if a CPU or Rx queue index is set in a mask of all CPU/Rx queues.
+ */
+static inline bool netif_attr_test_mask(unsigned long j,
+                                       const unsigned long *mask,
+                                       unsigned int nr_bits)
+{
+       cpu_max_bits_warn(j, nr_bits);
+       return test_bit(j, mask);
+}
+
+/**
+ *     netif_attr_test_online - Test for online CPU/Rx queue
+ *     @j: CPU/Rx queue index
+ *     @online_mask: bitmask for CPUs/Rx queues that are online
+ *     @nr_bits: number of bits in the bitmask
+ *
+ * Returns true if a CPU/Rx queue is online.
+ */
+static inline bool netif_attr_test_online(unsigned long j,
+                                         const unsigned long *online_mask,
+                                         unsigned int nr_bits)
+{
+       cpu_max_bits_warn(j, nr_bits);
+
+       if (online_mask)
+               return test_bit(j, online_mask);
+
+       return (j < nr_bits);
+}
+
+/**
+ *     netif_attrmask_next - get the next CPU/Rx queue in a cpu/Rx queues mask
+ *     @n: CPU/Rx queue index
+ *     @srcp: the cpumask/Rx queue mask pointer
+ *     @nr_bits: number of bits in the bitmask
+ *
+ * Returns >= nr_bits if no further CPUs/Rx queues set.
+ */
+static inline unsigned int netif_attrmask_next(int n, const unsigned long *srcp,
+                                              unsigned int nr_bits)
+{
+       /* -1 is a legal arg here. */
+       if (n != -1)
+               cpu_max_bits_warn(n, nr_bits);
+
+       if (srcp)
+               return find_next_bit(srcp, nr_bits, n + 1);
+
+       return n + 1;
+}
+
+/**
+ *     netif_attrmask_next_and - get the next CPU/Rx queue in *src1p & *src2p
+ *     @n: CPU/Rx queue index
+ *     @src1p: the first CPUs/Rx queues mask pointer
+ *     @src2p: the second CPUs/Rx queues mask pointer
+ *     @nr_bits: number of bits in the bitmask
+ *
+ * Returns >= nr_bits if no further CPUs/Rx queues set in both.
+ */
+static inline int netif_attrmask_next_and(int n, const unsigned long *src1p,
+                                         const unsigned long *src2p,
+                                         unsigned int nr_bits)
+{
+       /* -1 is a legal arg here. */
+       if (n != -1)
+               cpu_max_bits_warn(n, nr_bits);
+
+       if (src1p && src2p)
+               return find_next_and_bit(src1p, src2p, nr_bits, n + 1);
+       else if (src1p)
+               return find_next_bit(src1p, nr_bits, n + 1);
+       else if (src2p)
+               return find_next_bit(src2p, nr_bits, n + 1);
+
+       return n + 1;
+}
 #else
 static inline int netif_set_xps_queue(struct net_device *dev,
                                      const struct cpumask *mask,
index dffed642e68660439b4ac4fd1a2099ef4754d098..71059558dc39ff02f986575ad8510cecc6856276 100644 (file)
@@ -2092,7 +2092,7 @@ static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
        int pos;
 
        if (dev_maps)
-               map = xmap_dereference(dev_maps->cpu_map[tci]);
+               map = xmap_dereference(dev_maps->attr_map[tci]);
        if (!map)
                return false;
 
@@ -2105,7 +2105,7 @@ static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
                        break;
                }
 
-               RCU_INIT_POINTER(dev_maps->cpu_map[tci], NULL);
+               RCU_INIT_POINTER(dev_maps->attr_map[tci], NULL);
                kfree_rcu(map, rcu);
                return false;
        }
@@ -2135,31 +2135,58 @@ static bool remove_xps_queue_cpu(struct net_device *dev,
        return active;
 }
 
+static void clean_xps_maps(struct net_device *dev, const unsigned long *mask,
+                          struct xps_dev_maps *dev_maps, unsigned int nr_ids,
+                          u16 offset, u16 count, bool is_rxqs_map)
+{
+       bool active = false;
+       int i, j;
+
+       for (j = -1; j = netif_attrmask_next(j, mask, nr_ids),
+            j < nr_ids;)
+               active |= remove_xps_queue_cpu(dev, dev_maps, j, offset,
+                                              count);
+       if (!active) {
+               if (is_rxqs_map) {
+                       RCU_INIT_POINTER(dev->xps_rxqs_map, NULL);
+               } else {
+                       RCU_INIT_POINTER(dev->xps_cpus_map, NULL);
+
+                       for (i = offset + (count - 1); count--; i--)
+                               netdev_queue_numa_node_write(
+                                       netdev_get_tx_queue(dev, i),
+                                                       NUMA_NO_NODE);
+               }
+               kfree_rcu(dev_maps, rcu);
+       }
+}
+
 static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
                                   u16 count)
 {
+       const unsigned long *possible_mask = NULL;
        struct xps_dev_maps *dev_maps;
-       int cpu, i;
-       bool active = false;
+       unsigned int nr_ids;
 
        mutex_lock(&xps_map_mutex);
-       dev_maps = xmap_dereference(dev->xps_maps);
 
-       if (!dev_maps)
-               goto out_no_maps;
-
-       for_each_possible_cpu(cpu)
-               active |= remove_xps_queue_cpu(dev, dev_maps, cpu,
-                                              offset, count);
+       dev_maps = xmap_dereference(dev->xps_rxqs_map);
+       if (dev_maps) {
+               nr_ids = dev->num_rx_queues;
+               clean_xps_maps(dev, possible_mask, dev_maps, nr_ids, offset,
+                              count, true);
 
-       if (!active) {
-               RCU_INIT_POINTER(dev->xps_maps, NULL);
-               kfree_rcu(dev_maps, rcu);
        }
 
-       for (i = offset + (count - 1); count--; i--)
-               netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),
-                                            NUMA_NO_NODE);
+       dev_maps = xmap_dereference(dev->xps_cpus_map);
+       if (!dev_maps)
+               goto out_no_maps;
+
+       if (num_possible_cpus() > 1)
+               possible_mask = cpumask_bits(cpu_possible_mask);
+       nr_ids = nr_cpu_ids;
+       clean_xps_maps(dev, possible_mask, dev_maps, nr_ids, offset, count,
+                      false);
 
 out_no_maps:
        mutex_unlock(&xps_map_mutex);
@@ -2170,8 +2197,8 @@ static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
        netif_reset_xps_queues(dev, index, dev->num_tx_queues - index);
 }
 
-static struct xps_map *expand_xps_map(struct xps_map *map,
-                                     int cpu, u16 index)
+static struct xps_map *expand_xps_map(struct xps_map *map, int attr_index,
+                                     u16 index, bool is_rxqs_map)
 {
        struct xps_map *new_map;
        int alloc_len = XPS_MIN_MAP_ALLOC;
@@ -2183,7 +2210,7 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
                return map;
        }
 
-       /* Need to add queue to this CPU's existing map */
+       /* Need to add tx-queue to this CPU's/rx-queue's existing map */
        if (map) {
                if (pos < map->alloc_len)
                        return map;
@@ -2191,9 +2218,14 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
                alloc_len = map->alloc_len * 2;
        }
 
-       /* Need to allocate new map to store queue on this CPU's map */
-       new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
-                              cpu_to_node(cpu));
+       /* Need to allocate new map to store tx-queue on this CPU's/rx-queue's
+        *  map
+        */
+       if (is_rxqs_map)
+               new_map = kzalloc(XPS_MAP_SIZE(alloc_len), GFP_KERNEL);
+       else
+               new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
+                                      cpu_to_node(attr_index));
        if (!new_map)
                return NULL;
 
@@ -2205,14 +2237,16 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
        return new_map;
 }
 
-int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
-                       u16 index)
+int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
+                         u16 index, bool is_rxqs_map)
 {
+       const unsigned long *online_mask = NULL, *possible_mask = NULL;
        struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
-       int i, cpu, tci, numa_node_id = -2;
+       int i, j, tci, numa_node_id = -2;
        int maps_sz, num_tc = 1, tc = 0;
        struct xps_map *map, *new_map;
        bool active = false;
+       unsigned int nr_ids;
 
        if (dev->num_tc) {
                num_tc = dev->num_tc;
@@ -2221,16 +2255,27 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
                        return -EINVAL;
        }
 
-       maps_sz = XPS_DEV_MAPS_SIZE(num_tc);
-       if (maps_sz < L1_CACHE_BYTES)
-               maps_sz = L1_CACHE_BYTES;
-
        mutex_lock(&xps_map_mutex);
+       if (is_rxqs_map) {
+               maps_sz = XPS_RXQ_DEV_MAPS_SIZE(num_tc, dev->num_rx_queues);
+               dev_maps = xmap_dereference(dev->xps_rxqs_map);
+               nr_ids = dev->num_rx_queues;
+       } else {
+               maps_sz = XPS_CPU_DEV_MAPS_SIZE(num_tc);
+               if (num_possible_cpus() > 1) {
+                       online_mask = cpumask_bits(cpu_online_mask);
+                       possible_mask = cpumask_bits(cpu_possible_mask);
+               }
+               dev_maps = xmap_dereference(dev->xps_cpus_map);
+               nr_ids = nr_cpu_ids;
+       }
 
-       dev_maps = xmap_dereference(dev->xps_maps);
+       if (maps_sz < L1_CACHE_BYTES)
+               maps_sz = L1_CACHE_BYTES;
 
        /* allocate memory for queue storage */
-       for_each_cpu_and(cpu, cpu_online_mask, mask) {
+       for (j = -1; j = netif_attrmask_next_and(j, online_mask, mask, nr_ids),
+            j < nr_ids;) {
                if (!new_dev_maps)
                        new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
                if (!new_dev_maps) {
@@ -2238,73 +2283,81 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
                        return -ENOMEM;
                }
 
-               tci = cpu * num_tc + tc;
-               map = dev_maps ? xmap_dereference(dev_maps->cpu_map[tci]) :
+               tci = j * num_tc + tc;
+               map = dev_maps ? xmap_dereference(dev_maps->attr_map[tci]) :
                                 NULL;
 
-               map = expand_xps_map(map, cpu, index);
+               map = expand_xps_map(map, j, index, is_rxqs_map);
                if (!map)
                        goto error;
 
-               RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
+               RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
        }
 
        if (!new_dev_maps)
                goto out_no_new_maps;
 
-       for_each_possible_cpu(cpu) {
+       for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
+            j < nr_ids;) {
                /* copy maps belonging to foreign traffic classes */
-               for (i = tc, tci = cpu * num_tc; dev_maps && i--; tci++) {
+               for (i = tc, tci = j * num_tc; dev_maps && i--; tci++) {
                        /* fill in the new device map from the old device map */
-                       map = xmap_dereference(dev_maps->cpu_map[tci]);
-                       RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
+                       map = xmap_dereference(dev_maps->attr_map[tci]);
+                       RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
                }
 
                /* We need to explicitly update tci as prevous loop
                 * could break out early if dev_maps is NULL.
                 */
-               tci = cpu * num_tc + tc;
+               tci = j * num_tc + tc;
 
-               if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) {
-                       /* add queue to CPU maps */
+               if (netif_attr_test_mask(j, mask, nr_ids) &&
+                   netif_attr_test_online(j, online_mask, nr_ids)) {
+                       /* add tx-queue to CPU/rx-queue maps */
                        int pos = 0;
 
-                       map = xmap_dereference(new_dev_maps->cpu_map[tci]);
+                       map = xmap_dereference(new_dev_maps->attr_map[tci]);
                        while ((pos < map->len) && (map->queues[pos] != index))
                                pos++;
 
                        if (pos == map->len)
                                map->queues[map->len++] = index;
 #ifdef CONFIG_NUMA
-                       if (numa_node_id == -2)
-                               numa_node_id = cpu_to_node(cpu);
-                       else if (numa_node_id != cpu_to_node(cpu))
-                               numa_node_id = -1;
+                       if (!is_rxqs_map) {
+                               if (numa_node_id == -2)
+                                       numa_node_id = cpu_to_node(j);
+                               else if (numa_node_id != cpu_to_node(j))
+                                       numa_node_id = -1;
+                       }
 #endif
                } else if (dev_maps) {
                        /* fill in the new device map from the old device map */
-                       map = xmap_dereference(dev_maps->cpu_map[tci]);
-                       RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
+                       map = xmap_dereference(dev_maps->attr_map[tci]);
+                       RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
                }
 
                /* copy maps belonging to foreign traffic classes */
                for (i = num_tc - tc, tci++; dev_maps && --i; tci++) {
                        /* fill in the new device map from the old device map */
-                       map = xmap_dereference(dev_maps->cpu_map[tci]);
-                       RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
+                       map = xmap_dereference(dev_maps->attr_map[tci]);
+                       RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
                }
        }
 
-       rcu_assign_pointer(dev->xps_maps, new_dev_maps);
+       if (is_rxqs_map)
+               rcu_assign_pointer(dev->xps_rxqs_map, new_dev_maps);
+       else
+               rcu_assign_pointer(dev->xps_cpus_map, new_dev_maps);
 
        /* Cleanup old maps */
        if (!dev_maps)
                goto out_no_old_maps;
 
-       for_each_possible_cpu(cpu) {
-               for (i = num_tc, tci = cpu * num_tc; i--; tci++) {
-                       new_map = xmap_dereference(new_dev_maps->cpu_map[tci]);
-                       map = xmap_dereference(dev_maps->cpu_map[tci]);
+       for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
+            j < nr_ids;) {
+               for (i = num_tc, tci = j * num_tc; i--; tci++) {
+                       new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
+                       map = xmap_dereference(dev_maps->attr_map[tci]);
                        if (map && map != new_map)
                                kfree_rcu(map, rcu);
                }
@@ -2317,19 +2370,23 @@ out_no_old_maps:
        active = true;
 
 out_no_new_maps:
-       /* update Tx queue numa node */
-       netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
-                                    (numa_node_id >= 0) ? numa_node_id :
-                                    NUMA_NO_NODE);
+       if (!is_rxqs_map) {
+               /* update Tx queue numa node */
+               netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
+                                            (numa_node_id >= 0) ?
+                                            numa_node_id : NUMA_NO_NODE);
+       }
 
        if (!dev_maps)
                goto out_no_maps;
 
-       /* removes queue from unused CPUs */
-       for_each_possible_cpu(cpu) {
-               for (i = tc, tci = cpu * num_tc; i--; tci++)
+       /* removes tx-queue from unused CPUs/rx-queues */
+       for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
+            j < nr_ids;) {
+               for (i = tc, tci = j * num_tc; i--; tci++)
                        active |= remove_xps_queue(dev_maps, tci, index);
-               if (!cpumask_test_cpu(cpu, mask) || !cpu_online(cpu))
+               if (!netif_attr_test_mask(j, mask, nr_ids) ||
+                   !netif_attr_test_online(j, online_mask, nr_ids))
                        active |= remove_xps_queue(dev_maps, tci, index);
                for (i = num_tc - tc, tci++; --i; tci++)
                        active |= remove_xps_queue(dev_maps, tci, index);
@@ -2337,7 +2394,10 @@ out_no_new_maps:
 
        /* free map if not active */
        if (!active) {
-               RCU_INIT_POINTER(dev->xps_maps, NULL);
+               if (is_rxqs_map)
+                       RCU_INIT_POINTER(dev->xps_rxqs_map, NULL);
+               else
+                       RCU_INIT_POINTER(dev->xps_cpus_map, NULL);
                kfree_rcu(dev_maps, rcu);
        }
 
@@ -2347,11 +2407,12 @@ out_no_maps:
        return 0;
 error:
        /* remove any maps that we added */
-       for_each_possible_cpu(cpu) {
-               for (i = num_tc, tci = cpu * num_tc; i--; tci++) {
-                       new_map = xmap_dereference(new_dev_maps->cpu_map[tci]);
+       for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
+            j < nr_ids;) {
+               for (i = num_tc, tci = j * num_tc; i--; tci++) {
+                       new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
                        map = dev_maps ?
-                             xmap_dereference(dev_maps->cpu_map[tci]) :
+                             xmap_dereference(dev_maps->attr_map[tci]) :
                              NULL;
                        if (new_map && new_map != map)
                                kfree(new_map);
@@ -2363,6 +2424,12 @@ error:
        kfree(new_dev_maps);
        return -ENOMEM;
 }
+
+int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
+                       u16 index)
+{
+       return __netif_set_xps_queue(dev, cpumask_bits(mask), index, false);
+}
 EXPORT_SYMBOL(netif_set_xps_queue);
 
 #endif
@@ -3384,7 +3451,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
        int queue_index = -1;
 
        rcu_read_lock();
-       dev_maps = rcu_dereference(dev->xps_maps);
+       dev_maps = rcu_dereference(dev->xps_cpus_map);
        if (dev_maps) {
                unsigned int tci = skb->sender_cpu - 1;
 
@@ -3393,7 +3460,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
                        tci += netdev_get_prio_tc_map(dev, skb->priority);
                }
 
-               map = rcu_dereference(dev_maps->cpu_map[tci]);
+               map = rcu_dereference(dev_maps->attr_map[tci]);
                if (map) {
                        if (map->len == 1)
                                queue_index = map->queues[0];
index bb7e80f4ced3746dc6946cfbc3e71520db503e50..b39987c81d53c2e0d6227aa4dea4e4198c11645d 100644 (file)
@@ -1227,13 +1227,13 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
                return -ENOMEM;
 
        rcu_read_lock();
-       dev_maps = rcu_dereference(dev->xps_maps);
+       dev_maps = rcu_dereference(dev->xps_cpus_map);
        if (dev_maps) {
                for_each_possible_cpu(cpu) {
                        int i, tci = cpu * num_tc + tc;
                        struct xps_map *map;
 
-                       map = rcu_dereference(dev_maps->cpu_map[tci]);
+                       map = rcu_dereference(dev_maps->attr_map[tci]);
                        if (!map)
                                continue;