+++ /dev/null
-From: Steven Barth <steven@midlink.org>
-Subject: Add support for MAP-E FMRs (mesh mode)
-
-MAP-E FMRs (draft-ietf-softwire-map-10) are rules for IPv4-communication
-between MAP CEs (mesh mode) without the need to forward such data to a
-border relay. This is similar to how 6rd works but for IPv4 over IPv6.
-
-Signed-off-by: Steven Barth <cyrus@openwrt.org>
----
- include/net/ip6_tunnel.h | 13 ++
- include/uapi/linux/if_tunnel.h | 13 ++
- net/ipv6/ip6_tunnel.c | 276 +++++++++++++++++++++++++++++++++++++++--
- 3 files changed, 291 insertions(+), 11 deletions(-)
-
---- a/include/net/ip6_tunnel.h
-+++ b/include/net/ip6_tunnel.h
-@@ -18,6 +18,18 @@
- /* determine capability on a per-packet basis */
- #define IP6_TNL_F_CAP_PER_PACKET 0x40000
-
-+/* IPv6 tunnel FMR */
-+struct __ip6_tnl_fmr {
-+ struct __ip6_tnl_fmr *next; /* next fmr in list */
-+ struct in6_addr ip6_prefix;
-+ struct in_addr ip4_prefix;
-+
-+ __u8 ip6_prefix_len;
-+ __u8 ip4_prefix_len;
-+ __u8 ea_len;
-+ __u8 offset;
-+};
-+
- struct __ip6_tnl_parm {
- char name[IFNAMSIZ]; /* name of tunnel device */
- int link; /* ifindex of underlying L2 interface */
-@@ -29,6 +41,7 @@
- __u32 flags; /* tunnel flags */
- struct in6_addr laddr; /* local tunnel end-point address */
- struct in6_addr raddr; /* remote tunnel end-point address */
-+ struct __ip6_tnl_fmr *fmrs; /* FMRs */
-
- __be16 i_flags;
- __be16 o_flags;
---- a/include/uapi/linux/if_tunnel.h
-+++ b/include/uapi/linux/if_tunnel.h
-@@ -77,10 +77,23 @@
- IFLA_IPTUN_ENCAP_DPORT,
- IFLA_IPTUN_COLLECT_METADATA,
- IFLA_IPTUN_FWMARK,
-+ IFLA_IPTUN_FMRS,
- __IFLA_IPTUN_MAX,
- };
- #define IFLA_IPTUN_MAX (__IFLA_IPTUN_MAX - 1)
-
-+enum {
-+ IFLA_IPTUN_FMR_UNSPEC,
-+ IFLA_IPTUN_FMR_IP6_PREFIX,
-+ IFLA_IPTUN_FMR_IP4_PREFIX,
-+ IFLA_IPTUN_FMR_IP6_PREFIX_LEN,
-+ IFLA_IPTUN_FMR_IP4_PREFIX_LEN,
-+ IFLA_IPTUN_FMR_EA_LEN,
-+ IFLA_IPTUN_FMR_OFFSET,
-+ __IFLA_IPTUN_FMR_MAX,
-+};
-+#define IFLA_IPTUN_FMR_MAX (__IFLA_IPTUN_FMR_MAX - 1)
-+
- enum tunnel_encap_types {
- TUNNEL_ENCAP_NONE,
- TUNNEL_ENCAP_FOU,
---- a/net/ipv6/ip6_tunnel.c
-+++ b/net/ipv6/ip6_tunnel.c
-@@ -16,6 +16,8 @@
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
-+ * Changes:
-+ * Steven Barth <cyrus@openwrt.org>: MAP-E FMR support
- */
-
- #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-@@ -72,9 +74,9 @@
- module_param(log_ecn_error, bool, 0644);
- MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
-
--static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2)
-+static u32 HASH(const struct in6_addr *addr)
- {
-- u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2);
-+ u32 hash = ipv6_addr_hash(addr);
-
- return hash_32(hash, IP6_TUNNEL_HASH_SIZE_SHIFT);
- }
-@@ -141,20 +143,29 @@
- static struct ip6_tnl *
- ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_addr *local)
- {
-- unsigned int hash = HASH(remote, local);
-+ unsigned int hash = HASH(local);
- struct ip6_tnl *t;
- struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
- struct in6_addr any;
-+ struct __ip6_tnl_fmr *fmr;
-
- for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
-- if (ipv6_addr_equal(local, &t->parms.laddr) &&
-- ipv6_addr_equal(remote, &t->parms.raddr) &&
-- (t->dev->flags & IFF_UP))
-+ if (!ipv6_addr_equal(local, &t->parms.laddr) ||
-+ !(t->dev->flags & IFF_UP))
-+ continue;
-+
-+ if (ipv6_addr_equal(remote, &t->parms.raddr))
- return t;
-+
-+ for (fmr = t->parms.fmrs; fmr; fmr = fmr->next) {
-+ if (ipv6_prefix_equal(remote, &fmr->ip6_prefix,
-+ fmr->ip6_prefix_len))
-+ return t;
-+ }
- }
-
- memset(&any, 0, sizeof(any));
-- hash = HASH(&any, local);
-+ hash = HASH(local);
- for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
- if (ipv6_addr_equal(local, &t->parms.laddr) &&
- ipv6_addr_any(&t->parms.raddr) &&
-@@ -162,7 +173,7 @@
- return t;
- }
-
-- hash = HASH(remote, &any);
-+ hash = HASH(&any);
- for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
- if (ipv6_addr_equal(remote, &t->parms.raddr) &&
- ipv6_addr_any(&t->parms.laddr) &&
-@@ -202,7 +213,7 @@
-
- if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) {
- prio = 1;
-- h = HASH(remote, local);
-+ h = HASH(local);
- }
- return &ip6n->tnls[prio][h];
- }
-@@ -383,6 +394,12 @@
- struct net *net = t->net;
- struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
-
-+ while (t->parms.fmrs) {
-+ struct __ip6_tnl_fmr *next = t->parms.fmrs->next;
-+ kfree(t->parms.fmrs);
-+ t->parms.fmrs = next;
-+ }
-+
- if (dev == ip6n->fb_tnl_dev)
- RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL);
- else
-@@ -772,6 +789,107 @@
- }
- EXPORT_SYMBOL_GPL(ip6_tnl_rcv_ctl);
-
-+/**
-+ * ip4ip6_fmr_calc - calculate target / source IPv6-address based on FMR
-+ * @dest: destination IPv6 address buffer
-+ * @skb: received socket buffer
-+ * @fmr: MAP FMR
-+ * @xmit: Calculate for xmit or rcv
-+ **/
-+static void ip4ip6_fmr_calc(struct in6_addr *dest,
-+ const struct iphdr *iph, const uint8_t *end,
-+ const struct __ip6_tnl_fmr *fmr, bool xmit)
-+{
-+ int psidlen = fmr->ea_len - (32 - fmr->ip4_prefix_len);
-+ u8 *portp = NULL;
-+ bool use_dest_addr;
-+ const struct iphdr *dsth = iph;
-+
-+ if ((u8*)dsth >= end)
-+ return;
-+
-+ /* find significant IP header */
-+ if (iph->protocol == IPPROTO_ICMP) {
-+ struct icmphdr *ih = (struct icmphdr*)(((u8*)dsth) + dsth->ihl * 4);
-+ if (ih && ((u8*)&ih[1]) <= end && (
-+ ih->type == ICMP_DEST_UNREACH ||
-+ ih->type == ICMP_SOURCE_QUENCH ||
-+ ih->type == ICMP_TIME_EXCEEDED ||
-+ ih->type == ICMP_PARAMETERPROB ||
-+ ih->type == ICMP_REDIRECT))
-+ dsth = (const struct iphdr*)&ih[1];
-+ }
-+
-+ /* in xmit-path use dest port by default and source port only if
-+ this is an ICMP reply to something else; vice versa in rcv-path */
-+ use_dest_addr = (xmit && dsth == iph) || (!xmit && dsth != iph);
-+
-+ /* get dst port */
-+ if (((u8*)&dsth[1]) <= end && (
-+ dsth->protocol == IPPROTO_UDP ||
-+ dsth->protocol == IPPROTO_TCP ||
-+ dsth->protocol == IPPROTO_SCTP ||
-+ dsth->protocol == IPPROTO_DCCP)) {
-+ /* for UDP, TCP, SCTP and DCCP source and dest port
-+ follow IPv4 header directly */
-+ portp = ((u8*)dsth) + dsth->ihl * 4;
-+
-+ if (use_dest_addr)
-+ portp += sizeof(u16);
-+ } else if (iph->protocol == IPPROTO_ICMP) {
-+ struct icmphdr *ih = (struct icmphdr*)(((u8*)dsth) + dsth->ihl * 4);
-+
-+ /* use icmp identifier as port */
-+ if (((u8*)&ih) <= end && (
-+ (use_dest_addr && (
-+ ih->type == ICMP_ECHOREPLY ||
-+ ih->type == ICMP_TIMESTAMPREPLY ||
-+ ih->type == ICMP_INFO_REPLY ||
-+ ih->type == ICMP_ADDRESSREPLY)) ||
-+ (!use_dest_addr && (
-+ ih->type == ICMP_ECHO ||
-+ ih->type == ICMP_TIMESTAMP ||
-+ ih->type == ICMP_INFO_REQUEST ||
-+ ih->type == ICMP_ADDRESS)
-+ )))
-+ portp = (u8*)&ih->un.echo.id;
-+ }
-+
-+ if ((portp && &portp[2] <= end) || psidlen == 0) {
-+ int frombyte = fmr->ip6_prefix_len / 8;
-+ int fromrem = fmr->ip6_prefix_len % 8;
-+ int bytes = sizeof(struct in6_addr) - frombyte;
-+ const u32 *addr = (use_dest_addr) ? &iph->daddr : &iph->saddr;
-+ u64 eabits = ((u64)ntohl(*addr)) << (32 + fmr->ip4_prefix_len);
-+ u64 t = 0;
-+
-+ /* extract PSID from port and add it to eabits */
-+ u16 psidbits = 0;
-+ if (psidlen > 0) {
-+ psidbits = ((u16)portp[0]) << 8 | ((u16)portp[1]);
-+ psidbits >>= 16 - psidlen - fmr->offset;
-+ psidbits = (u16)(psidbits << (16 - psidlen));
-+ eabits |= ((u64)psidbits) << (48 - (fmr->ea_len - psidlen));
-+ }
-+
-+ /* rewrite destination address */
-+ *dest = fmr->ip6_prefix;
-+ memcpy(&dest->s6_addr[10], addr, sizeof(*addr));
-+ dest->s6_addr16[7] = htons(psidbits >> (16 - psidlen));
-+
-+ if (bytes > sizeof(u64))
-+ bytes = sizeof(u64);
-+
-+ /* insert eabits */
-+ memcpy(&t, &dest->s6_addr[frombyte], bytes);
-+ t = be64_to_cpu(t) & ~(((((u64)1) << fmr->ea_len) - 1)
-+ << (64 - fmr->ea_len - fromrem));
-+ t = cpu_to_be64(t | (eabits >> fromrem));
-+ memcpy(&dest->s6_addr[frombyte], &t, bytes);
-+ }
-+}
-+
-+
- static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
- const struct tnl_ptk_info *tpi,
- struct metadata_dst *tun_dst,
-@@ -824,6 +942,27 @@
- skb_reset_network_header(skb);
- memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
-
-+ if (tpi->proto == htons(ETH_P_IP) && tunnel->parms.fmrs &&
-+ !ipv6_addr_equal(&ipv6h->saddr, &tunnel->parms.raddr)) {
-+ /* Packet didn't come from BR, so lookup FMR */
-+ struct __ip6_tnl_fmr *fmr;
-+ struct in6_addr expected = tunnel->parms.raddr;
-+ for (fmr = tunnel->parms.fmrs; fmr; fmr = fmr->next)
-+ if (ipv6_prefix_equal(&ipv6h->saddr,
-+ &fmr->ip6_prefix, fmr->ip6_prefix_len))
-+ break;
-+
-+ /* Check that IPv6 matches IPv4 source to prevent spoofing */
-+ if (fmr)
-+ ip4ip6_fmr_calc(&expected, ip_hdr(skb),
-+ skb_tail_pointer(skb), fmr, false);
-+
-+ if (!ipv6_addr_equal(&ipv6h->saddr, &expected)) {
-+ rcu_read_unlock();
-+ goto drop;
-+ }
-+ }
-+
- __skb_tunnel_rx(skb, tunnel->dev, tunnel->net);
-
- err = dscp_ecn_decapsulate(tunnel, ipv6h, skb);
-@@ -955,6 +1094,7 @@
- opt->ops.opt_nflen = 8;
- }
-
-+
- /**
- * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own
- * @t: the outgoing tunnel device
-@@ -1307,6 +1447,7 @@
- {
- struct ip6_tnl *t = netdev_priv(dev);
- struct ipv6hdr *ipv6h = ipv6_hdr(skb);
-+ struct __ip6_tnl_fmr *fmr;
- int encap_limit = -1;
- __u16 offset;
- struct flowi6 fl6;
-@@ -1370,6 +1511,18 @@
-
- fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
-
-+ /* try to find matching FMR */
-+ for (fmr = t->parms.fmrs; fmr; fmr = fmr->next) {
-+ unsigned mshift = 32 - fmr->ip4_prefix_len;
-+ if (ntohl(fmr->ip4_prefix.s_addr) >> mshift ==
-+ ntohl(ip_hdr(skb)->daddr) >> mshift)
-+ break;
-+ }
-+
-+ /* change dstaddr according to FMR */
-+ if (fmr)
-+ ip4ip6_fmr_calc(&fl6.daddr, ip_hdr(skb), skb_tail_pointer(skb), fmr, true);
-+
- if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
- return -1;
-
-@@ -1498,6 +1651,14 @@
- t->parms.link = p->link;
- t->parms.proto = p->proto;
- t->parms.fwmark = p->fwmark;
-+
-+ while (t->parms.fmrs) {
-+ struct __ip6_tnl_fmr *next = t->parms.fmrs->next;
-+ kfree(t->parms.fmrs);
-+ t->parms.fmrs = next;
-+ }
-+ t->parms.fmrs = p->fmrs;
-+
- dst_cache_reset(&t->dst_cache);
- ip6_tnl_link_config(t);
- return 0;
-@@ -1536,6 +1697,7 @@
- p->flowinfo = u->flowinfo;
- p->link = u->link;
- p->proto = u->proto;
-+ p->fmrs = NULL;
- memcpy(p->name, u->name, sizeof(u->name));
- }
-
-@@ -1922,6 +2084,15 @@
- return 0;
- }
-
-+static const struct nla_policy ip6_tnl_fmr_policy[IFLA_IPTUN_FMR_MAX + 1] = {
-+ [IFLA_IPTUN_FMR_IP6_PREFIX] = { .len = sizeof(struct in6_addr) },
-+ [IFLA_IPTUN_FMR_IP4_PREFIX] = { .len = sizeof(struct in_addr) },
-+ [IFLA_IPTUN_FMR_IP6_PREFIX_LEN] = { .type = NLA_U8 },
-+ [IFLA_IPTUN_FMR_IP4_PREFIX_LEN] = { .type = NLA_U8 },
-+ [IFLA_IPTUN_FMR_EA_LEN] = { .type = NLA_U8 },
-+ [IFLA_IPTUN_FMR_OFFSET] = { .type = NLA_U8 }
-+};
-+
- static void ip6_tnl_netlink_parms(struct nlattr *data[],
- struct __ip6_tnl_parm *parms)
- {
-@@ -1959,6 +2130,46 @@
-
- if (data[IFLA_IPTUN_FWMARK])
- parms->fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]);
-+
-+ if (data[IFLA_IPTUN_FMRS]) {
-+ unsigned rem;
-+ struct nlattr *fmr;
-+ nla_for_each_nested(fmr, data[IFLA_IPTUN_FMRS], rem) {
-+ struct nlattr *fmrd[IFLA_IPTUN_FMR_MAX + 1], *c;
-+ struct __ip6_tnl_fmr *nfmr;
-+
-+ nla_parse_nested(fmrd, IFLA_IPTUN_FMR_MAX,
-+ fmr, ip6_tnl_fmr_policy, NULL);
-+
-+ if (!(nfmr = kzalloc(sizeof(*nfmr), GFP_KERNEL)))
-+ continue;
-+
-+ nfmr->offset = 6;
-+
-+ if ((c = fmrd[IFLA_IPTUN_FMR_IP6_PREFIX]))
-+ nla_memcpy(&nfmr->ip6_prefix, fmrd[IFLA_IPTUN_FMR_IP6_PREFIX],
-+ sizeof(nfmr->ip6_prefix));
-+
-+ if ((c = fmrd[IFLA_IPTUN_FMR_IP4_PREFIX]))
-+ nla_memcpy(&nfmr->ip4_prefix, fmrd[IFLA_IPTUN_FMR_IP4_PREFIX],
-+ sizeof(nfmr->ip4_prefix));
-+
-+ if ((c = fmrd[IFLA_IPTUN_FMR_IP6_PREFIX_LEN]))
-+ nfmr->ip6_prefix_len = nla_get_u8(c);
-+
-+ if ((c = fmrd[IFLA_IPTUN_FMR_IP4_PREFIX_LEN]))
-+ nfmr->ip4_prefix_len = nla_get_u8(c);
-+
-+ if ((c = fmrd[IFLA_IPTUN_FMR_EA_LEN]))
-+ nfmr->ea_len = nla_get_u8(c);
-+
-+ if ((c = fmrd[IFLA_IPTUN_FMR_OFFSET]))
-+ nfmr->offset = nla_get_u8(c);
-+
-+ nfmr->next = parms->fmrs;
-+ parms->fmrs = nfmr;
-+ }
-+ }
- }
-
- static bool ip6_tnl_netlink_encap_parms(struct nlattr *data[],
-@@ -2074,6 +2285,12 @@
-
- static size_t ip6_tnl_get_size(const struct net_device *dev)
- {
-+ const struct ip6_tnl *t = netdev_priv(dev);
-+ struct __ip6_tnl_fmr *c;
-+ int fmrs = 0;
-+ for (c = t->parms.fmrs; c; c = c->next)
-+ ++fmrs;
-+
- return
- /* IFLA_IPTUN_LINK */
- nla_total_size(4) +
-@@ -2103,6 +2320,24 @@
- nla_total_size(0) +
- /* IFLA_IPTUN_FWMARK */
- nla_total_size(4) +
-+ /* IFLA_IPTUN_FMRS */
-+ nla_total_size(0) +
-+ (
-+ /* nest */
-+ nla_total_size(0) +
-+ /* IFLA_IPTUN_FMR_IP6_PREFIX */
-+ nla_total_size(sizeof(struct in6_addr)) +
-+ /* IFLA_IPTUN_FMR_IP4_PREFIX */
-+ nla_total_size(sizeof(struct in_addr)) +
-+ /* IFLA_IPTUN_FMR_EA_LEN */
-+ nla_total_size(1) +
-+ /* IFLA_IPTUN_FMR_IP6_PREFIX_LEN */
-+ nla_total_size(1) +
-+ /* IFLA_IPTUN_FMR_IP4_PREFIX_LEN */
-+ nla_total_size(1) +
-+ /* IFLA_IPTUN_FMR_OFFSET */
-+ nla_total_size(1)
-+ ) * fmrs +
- 0;
- }
-
-@@ -2110,6 +2345,9 @@
- {
- struct ip6_tnl *tunnel = netdev_priv(dev);
- struct __ip6_tnl_parm *parm = &tunnel->parms;
-+ struct __ip6_tnl_fmr *c;
-+ int fmrcnt = 0;
-+ struct nlattr *fmrs;
-
- if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
- nla_put_in6_addr(skb, IFLA_IPTUN_LOCAL, &parm->laddr) ||
-@@ -2119,9 +2357,27 @@
- nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) ||
- nla_put_u32(skb, IFLA_IPTUN_FLAGS, parm->flags) ||
- nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto) ||
-- nla_put_u32(skb, IFLA_IPTUN_FWMARK, parm->fwmark))
-+ nla_put_u32(skb, IFLA_IPTUN_FWMARK, parm->fwmark) ||
-+ !(fmrs = nla_nest_start(skb, IFLA_IPTUN_FMRS)))
- goto nla_put_failure;
-
-+ for (c = parm->fmrs; c; c = c->next) {
-+ struct nlattr *fmr = nla_nest_start(skb, ++fmrcnt);
-+ if (!fmr ||
-+ nla_put(skb, IFLA_IPTUN_FMR_IP6_PREFIX,
-+ sizeof(c->ip6_prefix), &c->ip6_prefix) ||
-+ nla_put(skb, IFLA_IPTUN_FMR_IP4_PREFIX,
-+ sizeof(c->ip4_prefix), &c->ip4_prefix) ||
-+ nla_put_u8(skb, IFLA_IPTUN_FMR_IP6_PREFIX_LEN, c->ip6_prefix_len) ||
-+ nla_put_u8(skb, IFLA_IPTUN_FMR_IP4_PREFIX_LEN, c->ip4_prefix_len) ||
-+ nla_put_u8(skb, IFLA_IPTUN_FMR_EA_LEN, c->ea_len) ||
-+ nla_put_u8(skb, IFLA_IPTUN_FMR_OFFSET, c->offset))
-+ goto nla_put_failure;
-+
-+ nla_nest_end(skb, fmr);
-+ }
-+ nla_nest_end(skb, fmrs);
-+
- if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE, tunnel->encap.type) ||
- nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT, tunnel->encap.sport) ||
- nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT, tunnel->encap.dport) ||
-@@ -2161,6 +2417,7 @@
- [IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 },
- [IFLA_IPTUN_COLLECT_METADATA] = { .type = NLA_FLAG },
- [IFLA_IPTUN_FWMARK] = { .type = NLA_U32 },
-+ [IFLA_IPTUN_FMRS] = { .type = NLA_NESTED },
- };
-
- static struct rtnl_link_ops ip6_link_ops __read_mostly = {