net: vrf: Flip IPv4 output path from FIB lookup hook to out hook
authorDavid Ahern <dsa@cumulusnetworks.com>
Sat, 10 Sep 2016 19:09:55 +0000 (12:09 -0700)
committerDavid S. Miller <davem@davemloft.net>
Sun, 11 Sep 2016 06:12:52 +0000 (23:12 -0700)
Flip the IPv4 output path to use the l3mdev tx out hook. The VRF dst
is not returned on the first FIB lookup. Instead, the dst on the
skb is switched at the beginning of the IPv4 output processing to
send the packet to the VRF driver on xmit.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/vrf.c
net/ipv4/route.c

index 1ce7420322ee34365fe3994a94f7ad45bf6b80ea..08540b96ec18e43b84dbe048cc5dd706332eb72f 100644 (file)
@@ -227,6 +227,20 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
 }
 #endif
 
+/* based on ip_local_out; can't use it b/c the dst is switched pointing to us */
+static int vrf_ip_local_out(struct net *net, struct sock *sk,
+                           struct sk_buff *skb)
+{
+       int err;
+
+       err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk,
+                     skb, NULL, skb_dst(skb)->dev, dst_output);
+       if (likely(err == 1))
+               err = dst_output(net, sk, skb);
+
+       return err;
+}
+
 static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb,
                                           struct net_device *vrf_dev)
 {
@@ -292,7 +306,7 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb,
                                               RT_SCOPE_LINK);
        }
 
-       ret = ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb);
+       ret = vrf_ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb);
        if (unlikely(net_xmit_eval(ret)))
                vrf_dev->stats.tx_errors++;
        else
@@ -531,6 +545,53 @@ static int vrf_output(struct net *net, struct sock *sk, struct sk_buff *skb)
                            !(IPCB(skb)->flags & IPSKB_REROUTED));
 }
 
+/* set dst on skb to send packet to us via dev_xmit path. Allows
+ * packet to go through device based features such as qdisc, netfilter
+ * hooks and packet sockets with skb->dev set to vrf device.
+ */
+static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev,
+                                 struct sock *sk,
+                                 struct sk_buff *skb)
+{
+       struct net_vrf *vrf = netdev_priv(vrf_dev);
+       struct dst_entry *dst = NULL;
+       struct rtable *rth;
+
+       rcu_read_lock();
+
+       rth = rcu_dereference(vrf->rth);
+       if (likely(rth)) {
+               dst = &rth->dst;
+               dst_hold(dst);
+       }
+
+       rcu_read_unlock();
+
+       if (unlikely(!dst)) {
+               vrf_tx_error(vrf_dev, skb);
+               return NULL;
+       }
+
+       skb_dst_drop(skb);
+       skb_dst_set(skb, dst);
+
+       return skb;
+}
+
+/* called with rcu lock held */
+static struct sk_buff *vrf_l3_out(struct net_device *vrf_dev,
+                                 struct sock *sk,
+                                 struct sk_buff *skb,
+                                 u16 proto)
+{
+       switch (proto) {
+       case AF_INET:
+               return vrf_ip_out(vrf_dev, sk, skb);
+       }
+
+       return skb;
+}
+
 /* holding rtnl */
 static void vrf_rtable_release(struct net_device *dev, struct net_vrf *vrf)
 {
@@ -1067,6 +1128,7 @@ static const struct l3mdev_ops vrf_l3mdev_ops = {
        .l3mdev_get_rtable      = vrf_get_rtable,
        .l3mdev_get_saddr       = vrf_get_saddr,
        .l3mdev_l3_rcv          = vrf_l3_rcv,
+       .l3mdev_l3_out          = vrf_l3_out,
 #if IS_ENABLED(CONFIG_IPV6)
        .l3mdev_get_rt6_dst     = vrf_get_rt6_dst,
        .l3mdev_get_saddr6      = vrf_get_saddr6,
index f49b2c534e92afc8ed71c14420c004d4430d8a11..ad83f85fb2400d205e1cd5249a972fd89781c9bc 100644 (file)
@@ -2246,10 +2246,6 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
                                fl4->saddr = inet_select_addr(dev_out, 0,
                                                              RT_SCOPE_HOST);
                }
-
-               rth = l3mdev_get_rtable(dev_out, fl4);
-               if (rth)
-                       goto out;
        }
 
        if (!fl4->daddr) {