route: Per route IP tunnel metadata via lightweight tunnel
authorThomas Graf <tgraf@suug.ch>
Tue, 21 Jul 2015 08:44:00 +0000 (10:44 +0200)
committerDavid S. Miller <davem@davemloft.net>
Tue, 21 Jul 2015 17:39:06 +0000 (10:39 -0700)
This introduces a new IP tunnel lightweight tunnel type which allows
to specify IP tunnel instructions per route. Only IPv4 is supported
at this point.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/vxlan.c
include/net/dst_metadata.h
include/net/ip_tunnels.h
include/uapi/linux/lwtunnel.h
include/uapi/linux/rtnetlink.h
net/ipv4/ip_tunnel_core.c
net/ipv4/route.c
net/openvswitch/vport.h

index 06c092b05a51350fcfd603c2fb7344cd3649ff66..9486d7ec128c1aeb06e6f230671ec76a0538c580 100644 (file)
@@ -1935,7 +1935,7 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
 static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                           struct vxlan_rdst *rdst, bool did_rsc)
 {
-       struct ip_tunnel_info *info = skb_tunnel_info(skb);
+       struct ip_tunnel_info *info;
        struct vxlan_dev *vxlan = netdev_priv(dev);
        struct sock *sk = vxlan->vn_sock->sock->sk;
        struct rtable *rt = NULL;
@@ -1952,6 +1952,9 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
        int err;
        u32 flags = vxlan->flags;
 
+       /* FIXME: Support IPv6 */
+       info = skb_tunnel_info(skb, AF_INET);
+
        if (rdst) {
                dst_port = rdst->remote_port ? rdst->remote_port : vxlan->dst_port;
                vni = rdst->remote_vni;
@@ -2141,12 +2144,15 @@ tx_free:
 static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        struct vxlan_dev *vxlan = netdev_priv(dev);
-       const struct ip_tunnel_info *info = skb_tunnel_info(skb);
+       const struct ip_tunnel_info *info;
        struct ethhdr *eth;
        bool did_rsc = false;
        struct vxlan_rdst *rdst, *fdst = NULL;
        struct vxlan_fdb *f;
 
+       /* FIXME: Support IPv6 */
+       info = skb_tunnel_info(skb, AF_INET);
+
        skb_reset_mac_header(skb);
        eth = eth_hdr(skb);
 
index e843937fb30aee0006def9f3243c3b1ea744c5e3..7b03068946634932ca0cf8b1b06fd9ef912b9e17 100644 (file)
@@ -23,13 +23,23 @@ static inline struct metadata_dst *skb_metadata_dst(struct sk_buff *skb)
        return NULL;
 }
 
-static inline struct ip_tunnel_info *skb_tunnel_info(struct sk_buff *skb)
+static inline struct ip_tunnel_info *skb_tunnel_info(struct sk_buff *skb,
+                                                    int family)
 {
        struct metadata_dst *md_dst = skb_metadata_dst(skb);
+       struct rtable *rt;
 
        if (md_dst)
                return &md_dst->u.tun_info;
 
+       switch (family) {
+       case AF_INET:
+               rt = (struct rtable *)skb_dst(skb);
+               if (rt && rt->rt_lwtstate)
+                       return lwt_tun_info(rt->rt_lwtstate);
+               break;
+       }
+
        return NULL;
 }
 
index d11530f1c1e21325487cb9cc2efbccd5c1d856d7..0b7e18cfa0b47a36b54249a715744f6855669c1e 100644 (file)
@@ -9,9 +9,9 @@
 #include <net/dsfield.h>
 #include <net/gro_cells.h>
 #include <net/inet_ecn.h>
-#include <net/ip.h>
 #include <net/netns/generic.h>
 #include <net/rtnetlink.h>
+#include <net/lwtunnel.h>
 
 #if IS_ENABLED(CONFIG_IPV6)
 #include <net/ipv6.h>
@@ -298,6 +298,11 @@ static inline void *ip_tunnel_info_opts(struct ip_tunnel_info *info, size_t n)
        return info + 1;
 }
 
+static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state *lwtstate)
+{
+       return (struct ip_tunnel_info *)lwtstate->data;
+}
+
 #endif /* CONFIG_INET */
 
 #endif /* __NET_IP_TUNNELS_H */
index aa611d931a313e222777f2201b3c97ab0fd697af..31377bbea3f8fc51caa41a8e7f34e7bf283da9ff 100644 (file)
@@ -6,6 +6,7 @@
 enum lwtunnel_encap_types {
        LWTUNNEL_ENCAP_NONE,
        LWTUNNEL_ENCAP_MPLS,
+       LWTUNNEL_ENCAP_IP,
        __LWTUNNEL_ENCAP_MAX,
 };
 
index 0d3d3cc43356e128bc618acbde912fe6b5524ff0..47d24cb3fbc1f8017f715dff06ab5aa5f977f6b0 100644 (file)
@@ -286,6 +286,21 @@ enum rt_class_t {
 
 /* Routing message attributes */
 
+enum ip_tunnel_t {
+       IP_TUN_UNSPEC,
+       IP_TUN_ID,
+       IP_TUN_DST,
+       IP_TUN_SRC,
+       IP_TUN_TTL,
+       IP_TUN_TOS,
+       IP_TUN_SPORT,
+       IP_TUN_DPORT,
+       IP_TUN_FLAGS,
+       __IP_TUN_MAX,
+};
+
+#define IP_TUN_MAX (__IP_TUN_MAX - 1)
+
 enum rtattr_type_t {
        RTA_UNSPEC,
        RTA_DST,
index 6a51a71a6c67a0f3e48523a37e3b559306885de9..025b76e803fde5c8f10245ce5613e42962a85b23 100644 (file)
@@ -190,3 +190,117 @@ struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
        return tot;
 }
 EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64);
+
+static const struct nla_policy ip_tun_policy[IP_TUN_MAX + 1] = {
+       [IP_TUN_ID]             = { .type = NLA_U64 },
+       [IP_TUN_DST]            = { .type = NLA_U32 },
+       [IP_TUN_SRC]            = { .type = NLA_U32 },
+       [IP_TUN_TTL]            = { .type = NLA_U8 },
+       [IP_TUN_TOS]            = { .type = NLA_U8 },
+       [IP_TUN_SPORT]          = { .type = NLA_U16 },
+       [IP_TUN_DPORT]          = { .type = NLA_U16 },
+       [IP_TUN_FLAGS]          = { .type = NLA_U16 },
+};
+
+static int ip_tun_build_state(struct net_device *dev, struct nlattr *attr,
+                             struct lwtunnel_state **ts)
+{
+       struct ip_tunnel_info *tun_info;
+       struct lwtunnel_state *new_state;
+       struct nlattr *tb[IP_TUN_MAX + 1];
+       int err;
+
+       err = nla_parse_nested(tb, IP_TUN_MAX, attr, ip_tun_policy);
+       if (err < 0)
+               return err;
+
+       new_state = lwtunnel_state_alloc(sizeof(*tun_info));
+       if (!new_state)
+               return -ENOMEM;
+
+       new_state->type = LWTUNNEL_ENCAP_IP;
+
+       tun_info = lwt_tun_info(new_state);
+
+       if (tb[IP_TUN_ID])
+               tun_info->key.tun_id = nla_get_u64(tb[IP_TUN_ID]);
+
+       if (tb[IP_TUN_DST])
+               tun_info->key.ipv4_dst = nla_get_be32(tb[IP_TUN_DST]);
+
+       if (tb[IP_TUN_SRC])
+               tun_info->key.ipv4_src = nla_get_be32(tb[IP_TUN_SRC]);
+
+       if (tb[IP_TUN_TTL])
+               tun_info->key.ipv4_ttl = nla_get_u8(tb[IP_TUN_TTL]);
+
+       if (tb[IP_TUN_TOS])
+               tun_info->key.ipv4_tos = nla_get_u8(tb[IP_TUN_TOS]);
+
+       if (tb[IP_TUN_SPORT])
+               tun_info->key.tp_src = nla_get_be16(tb[IP_TUN_SPORT]);
+
+       if (tb[IP_TUN_DPORT])
+               tun_info->key.tp_dst = nla_get_be16(tb[IP_TUN_DPORT]);
+
+       if (tb[IP_TUN_FLAGS])
+               tun_info->key.tun_flags = nla_get_u16(tb[IP_TUN_FLAGS]);
+
+       tun_info->mode = IP_TUNNEL_INFO_TX;
+       tun_info->options = NULL;
+       tun_info->options_len = 0;
+
+       *ts = new_state;
+
+       return 0;
+}
+
+static int ip_tun_fill_encap_info(struct sk_buff *skb,
+                                 struct lwtunnel_state *lwtstate)
+{
+       struct ip_tunnel_info *tun_info = lwt_tun_info(lwtstate);
+
+       if (nla_put_u64(skb, IP_TUN_ID, tun_info->key.tun_id) ||
+           nla_put_be32(skb, IP_TUN_DST, tun_info->key.ipv4_dst) ||
+           nla_put_be32(skb, IP_TUN_SRC, tun_info->key.ipv4_src) ||
+           nla_put_u8(skb, IP_TUN_TOS, tun_info->key.ipv4_tos) ||
+           nla_put_u8(skb, IP_TUN_TTL, tun_info->key.ipv4_ttl) ||
+           nla_put_u16(skb, IP_TUN_SPORT, tun_info->key.tp_src) ||
+           nla_put_u16(skb, IP_TUN_DPORT, tun_info->key.tp_dst) ||
+           nla_put_u16(skb, IP_TUN_FLAGS, tun_info->key.tun_flags))
+               return -ENOMEM;
+
+       return 0;
+}
+
+static int ip_tun_encap_nlsize(struct lwtunnel_state *lwtstate)
+{
+       return nla_total_size(8)        /* IP_TUN_ID */
+               + nla_total_size(4)     /* IP_TUN_DST */
+               + nla_total_size(4)     /* IP_TUN_SRC */
+               + nla_total_size(1)     /* IP_TUN_TOS */
+               + nla_total_size(1)     /* IP_TUN_TTL */
+               + nla_total_size(2)     /* IP_TUN_SPORT */
+               + nla_total_size(2)     /* IP_TUN_DPORT */
+               + nla_total_size(2);    /* IP_TUN_FLAGS */
+}
+
+static const struct lwtunnel_encap_ops ip_tun_lwt_ops = {
+       .build_state = ip_tun_build_state,
+       .fill_encap = ip_tun_fill_encap_info,
+       .get_encap_size = ip_tun_encap_nlsize,
+};
+
+static int __init ip_tunnel_core_init(void)
+{
+       lwtunnel_encap_add_ops(&ip_tun_lwt_ops, LWTUNNEL_ENCAP_IP);
+
+       return 0;
+}
+module_init(ip_tunnel_core_init);
+
+static void __exit ip_tunnel_core_exit(void)
+{
+       lwtunnel_encap_del_ops(&ip_tun_lwt_ops, LWTUNNEL_ENCAP_IP);
+}
+module_exit(ip_tunnel_core_exit);
index 91da18be0a71eb07d8c962bc59ec48d0fa029ae9..519ec232818d406cbd2e9573e8a954e8b552943f 100644 (file)
@@ -1693,7 +1693,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
           by fib_lookup.
         */
 
-       tun_info = skb_tunnel_info(skb);
+       tun_info = skb_tunnel_info(skb, AF_INET);
        if (tun_info && tun_info->mode == IP_TUNNEL_INFO_RX)
                fl4.flowi4_tun_key.tun_id = tun_info->key.tun_id;
        else
index 4750fb673a9fcbd9ca455530d8880098e39f4bb7..75d68248ba69161db38d6df76c9cb622d20da4ef 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/skbuff.h>
 #include <linux/spinlock.h>
 #include <linux/u64_stats_sync.h>
+#include <net/route.h>
 
 #include "datapath.h"