net: dynamic ingress_queue allocation
authorEric Dumazet <eric.dumazet@gmail.com>
Sat, 2 Oct 2010 06:11:55 +0000 (06:11 +0000)
committerDavid S. Miller <davem@davemloft.net>
Tue, 5 Oct 2010 07:23:44 +0000 (00:23 -0700)
ingress being not used very much, and net_device->ingress_queue being
quite a big object (128 or 256 bytes), use a dynamic allocation if
needed (tc qdisc add dev eth0 ingress ...)

dev_ingress_queue(dev) helper should be used only with RTNL taken.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/linux/netdevice.h
include/linux/rtnetlink.h
net/core/dev.c
net/sched/sch_api.c
net/sched/sch_generic.c

index ceed3474014af4d268a47f187de1eccc5b1eb5c5..92d81edd58087e4ac3ef835338c1bec1b7ab627a 100644 (file)
@@ -986,7 +986,7 @@ struct net_device {
        rx_handler_func_t       *rx_handler;
        void                    *rx_handler_data;
 
-       struct netdev_queue     ingress_queue; /* use two cache lines */
+       struct netdev_queue __rcu *ingress_queue;
 
 /*
  * Cache lines mostly used on transmit path
index 68c436bddc8871650768fb3c5e91f4e72a2e9981..0bb7b48632bd93b5fccc5e21007eadaa228877e2 100644 (file)
@@ -6,6 +6,7 @@
 #include <linux/if_link.h>
 #include <linux/if_addr.h>
 #include <linux/neighbour.h>
+#include <linux/netdevice.h>
 
 /* rtnetlink families. Values up to 127 are reserved for real address
  * families, values above 128 may be used arbitrarily.
@@ -769,6 +770,13 @@ extern int lockdep_rtnl_is_held(void);
 #define rtnl_dereference(p)                                    \
        rcu_dereference_check(p, lockdep_rtnl_is_held())
 
+static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev)
+{
+       return rtnl_dereference(dev->ingress_queue);
+}
+
+extern struct netdev_queue *dev_ingress_queue_create(struct net_device *dev);
+
 extern void rtnetlink_init(void);
 extern void __rtnl_unlock(void);
 
index a313bab1b754a75272c4912d48611edc11e980df..ce6ad88c980b8105531916fa3aa00ced83f437bd 100644 (file)
@@ -2702,11 +2702,10 @@ EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
  * the ingress scheduler, you just cant add policies on ingress.
  *
  */
-static int ing_filter(struct sk_buff *skb)
+static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)
 {
        struct net_device *dev = skb->dev;
        u32 ttl = G_TC_RTTL(skb->tc_verd);
-       struct netdev_queue *rxq;
        int result = TC_ACT_OK;
        struct Qdisc *q;
 
@@ -2720,8 +2719,6 @@ static int ing_filter(struct sk_buff *skb)
        skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
        skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
 
-       rxq = &dev->ingress_queue;
-
        q = rxq->qdisc;
        if (q != &noop_qdisc) {
                spin_lock(qdisc_lock(q));
@@ -2737,7 +2734,9 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
                                         struct packet_type **pt_prev,
                                         int *ret, struct net_device *orig_dev)
 {
-       if (skb->dev->ingress_queue.qdisc == &noop_qdisc)
+       struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);
+
+       if (!rxq || rxq->qdisc == &noop_qdisc)
                goto out;
 
        if (*pt_prev) {
@@ -2745,7 +2744,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
                *pt_prev = NULL;
        }
 
-       switch (ing_filter(skb)) {
+       switch (ing_filter(skb, rxq)) {
        case TC_ACT_SHOT:
        case TC_ACT_STOLEN:
                kfree_skb(skb);
@@ -4940,7 +4939,6 @@ static void __netdev_init_queue_locks_one(struct net_device *dev,
 static void netdev_init_queue_locks(struct net_device *dev)
 {
        netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
-       __netdev_init_queue_locks_one(dev, &dev->ingress_queue, NULL);
 }
 
 unsigned long netdev_fix_features(unsigned long features, const char *name)
@@ -5452,11 +5450,29 @@ static void netdev_init_one_queue(struct net_device *dev,
 
 static void netdev_init_queues(struct net_device *dev)
 {
-       netdev_init_one_queue(dev, &dev->ingress_queue, NULL);
        netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
        spin_lock_init(&dev->tx_global_lock);
 }
 
+struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
+{
+       struct netdev_queue *queue = dev_ingress_queue(dev);
+
+#ifdef CONFIG_NET_CLS_ACT
+       if (queue)
+               return queue;
+       queue = kzalloc(sizeof(*queue), GFP_KERNEL);
+       if (!queue)
+               return NULL;
+       netdev_init_one_queue(dev, queue, NULL);
+       __netdev_init_queue_locks_one(dev, queue, NULL);
+       queue->qdisc = &noop_qdisc;
+       queue->qdisc_sleeping = &noop_qdisc;
+       rcu_assign_pointer(dev->ingress_queue, queue);
+#endif
+       return queue;
+}
+
 /**
  *     alloc_netdev_mq - allocate network device
  *     @sizeof_priv:   size of private data to allocate space for
@@ -5559,6 +5575,8 @@ void free_netdev(struct net_device *dev)
 
        kfree(dev->_tx);
 
+       kfree(rcu_dereference_raw(dev->ingress_queue));
+
        /* Flush device addresses */
        dev_addr_flush(dev);
 
index b8020784d0e996114c86c648cd2d343b0e2f3995..b22ca2d1cebca4e2495f1fd15502e7f39fe82342 100644 (file)
@@ -240,7 +240,10 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
        if (q)
                goto out;
 
-       q = qdisc_match_from_root(dev->ingress_queue.qdisc_sleeping, handle);
+       if (dev_ingress_queue(dev))
+               q = qdisc_match_from_root(
+                       dev_ingress_queue(dev)->qdisc_sleeping,
+                       handle);
 out:
        return q;
 }
@@ -690,6 +693,8 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
                    (new && new->flags & TCQ_F_INGRESS)) {
                        num_q = 1;
                        ingress = 1;
+                       if (!dev_ingress_queue(dev))
+                               return -ENOENT;
                }
 
                if (dev->flags & IFF_UP)
@@ -701,7 +706,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
                }
 
                for (i = 0; i < num_q; i++) {
-                       struct netdev_queue *dev_queue = &dev->ingress_queue;
+                       struct netdev_queue *dev_queue = dev_ingress_queue(dev);
 
                        if (!ingress)
                                dev_queue = netdev_get_tx_queue(dev, i);
@@ -979,7 +984,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
                                        return -ENOENT;
                                q = qdisc_leaf(p, clid);
                        } else { /* ingress */
-                               q = dev->ingress_queue.qdisc_sleeping;
+                               if (dev_ingress_queue(dev))
+                                       q = dev_ingress_queue(dev)->qdisc_sleeping;
                        }
                } else {
                        q = dev->qdisc;
@@ -1043,8 +1049,9 @@ replay:
                                if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
                                        return -ENOENT;
                                q = qdisc_leaf(p, clid);
-                       } else { /*ingress */
-                               q = dev->ingress_queue.qdisc_sleeping;
+                       } else { /* ingress */
+                               if (dev_ingress_queue_create(dev))
+                                       q = dev_ingress_queue(dev)->qdisc_sleeping;
                        }
                } else {
                        q = dev->qdisc;
@@ -1123,11 +1130,14 @@ replay:
 create_n_graft:
        if (!(n->nlmsg_flags&NLM_F_CREATE))
                return -ENOENT;
-       if (clid == TC_H_INGRESS)
-               q = qdisc_create(dev, &dev->ingress_queue, p,
-                                tcm->tcm_parent, tcm->tcm_parent,
-                                tca, &err);
-       else {
+       if (clid == TC_H_INGRESS) {
+               if (dev_ingress_queue(dev))
+                       q = qdisc_create(dev, dev_ingress_queue(dev), p,
+                                        tcm->tcm_parent, tcm->tcm_parent,
+                                        tca, &err);
+               else
+                       err = -ENOENT;
+       } else {
                struct netdev_queue *dev_queue;
 
                if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
@@ -1304,8 +1314,10 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
                if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx) < 0)
                        goto done;
 
-               dev_queue = &dev->ingress_queue;
-               if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0)
+               dev_queue = dev_ingress_queue(dev);
+               if (dev_queue &&
+                   tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
+                                      &q_idx, s_q_idx) < 0)
                        goto done;
 
 cont:
@@ -1595,8 +1607,10 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
        if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
                goto done;
 
-       dev_queue = &dev->ingress_queue;
-       if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0)
+       dev_queue = dev_ingress_queue(dev);
+       if (dev_queue &&
+           tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
+                               &t, s_t) < 0)
                goto done;
 
 done:
index 545278a1c4788eb113a8c91842e02f1443169c79..3d57681bdb7625240f9756be257bb35aa4e5a7d0 100644 (file)
@@ -753,7 +753,8 @@ void dev_activate(struct net_device *dev)
 
        need_watchdog = 0;
        netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog);
-       transition_one_qdisc(dev, &dev->ingress_queue, NULL);
+       if (dev_ingress_queue(dev))
+               transition_one_qdisc(dev, dev_ingress_queue(dev), NULL);
 
        if (need_watchdog) {
                dev->trans_start = jiffies;
@@ -812,7 +813,8 @@ static bool some_qdisc_is_busy(struct net_device *dev)
 void dev_deactivate(struct net_device *dev)
 {
        netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc);
-       dev_deactivate_queue(dev, &dev->ingress_queue, &noop_qdisc);
+       if (dev_ingress_queue(dev))
+               dev_deactivate_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
 
        dev_watchdog_down(dev);
 
@@ -838,7 +840,8 @@ void dev_init_scheduler(struct net_device *dev)
 {
        dev->qdisc = &noop_qdisc;
        netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
-       dev_init_scheduler_queue(dev, &dev->ingress_queue, &noop_qdisc);
+       if (dev_ingress_queue(dev))
+               dev_init_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
 
        setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev);
 }
@@ -861,7 +864,8 @@ static void shutdown_scheduler_queue(struct net_device *dev,
 void dev_shutdown(struct net_device *dev)
 {
        netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
-       shutdown_scheduler_queue(dev, &dev->ingress_queue, &noop_qdisc);
+       if (dev_ingress_queue(dev))
+               shutdown_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
        qdisc_destroy(dev->qdisc);
        dev->qdisc = &noop_qdisc;