realtek: add driver support for routing offload
authorBirger Koblitz <git@birger-koblitz.de>
Fri, 10 Sep 2021 13:04:27 +0000 (15:04 +0200)
committerJohn Crispin <john@phrozen.org>
Sat, 9 Oct 2021 06:25:06 +0000 (08:25 +0200)
Add generic support for listening to FIB and Event notifier updates and
use this information to hook into the L3 hardware capabilities of the
RTL SoCs.

Signed-off-by: Birger Koblitz <git@birger-koblitz.de>
target/linux/realtek/files-5.10/drivers/net/dsa/rtl83xx/common.c
target/linux/realtek/files-5.10/drivers/net/dsa/rtl83xx/rtl838x.h

index 3049151c08bbcdfc1efbbcdf38fb0371e3ed0de8..2745ead06198b8ef217d48bf9490a2e3984b5532 100644 (file)
@@ -7,6 +7,7 @@
 #include <net/neighbour.h>
 #include <net/netevent.h>
 #include <linux/inetdevice.h>
+#include <linux/rhashtable.h>
 
 #include <asm/mach-rtl838x/mach-rtl83xx.h>
 #include "rtl83xx.h"
@@ -495,6 +496,105 @@ int rtl83xx_packet_cntr_alloc(struct rtl838x_switch_priv *priv)
        return idx;
 }
 
+/*
+ * Add an L2 nexthop entry for the L3 routing system / PIE forwarding in the SoC
+ * Use VID and MAC in rtl838x_l2_entry to identify either a free slot in the L2 hash table
+ * or mark an existing entry as a nexthop by setting it's nexthop bit
+ * Called from the L3 layer
+ * The index in the L2 hash table is filled into nh->l2_id;
+ */
+int rtl83xx_l2_nexthop_add(struct rtl838x_switch_priv *priv, struct rtl83xx_nexthop *nh)
+{
+       struct rtl838x_l2_entry e;
+       u64 seed = priv->r->l2_hash_seed(nh->mac, nh->rvid);
+       u32 key = priv->r->l2_hash_key(priv, seed);
+       int i, idx = -1;
+       u64 entry;
+
+       pr_debug("%s searching for %08llx vid %d with key %d, seed: %016llx\n",
+               __func__, nh->mac, nh->rvid, key, seed);
+
+       e.type = L2_UNICAST;
+       u64_to_ether_addr(nh->mac, &e.mac[0]);
+       e.port = nh->port;
+
+       // Loop over all entries in the hash-bucket and over the second block on 93xx SoCs
+       for (i = 0; i < priv->l2_bucket_size; i++) {
+               entry = priv->r->read_l2_entry_using_hash(key, i, &e);
+
+               if (!e.valid || ((entry & 0x0fffffffffffffffULL) == seed)) {
+                       idx = i > 3 ? ((key >> 14) & 0xffff) | i >> 1
+                                       : ((key << 2) | i) & 0xffff;
+                       break;
+               }
+       }
+
+       if (idx < 0) {
+               pr_err("%s: No more L2 forwarding entries available\n", __func__);
+               return -1;
+       }
+
+       // Found an existing (e->valid is true) or empty entry, make it a nexthop entry
+       nh->l2_id = idx;
+       if (e.valid) {
+               nh->port = e.port;
+               nh->vid = e.vid;                // Save VID
+               nh->rvid = e.rvid;
+               nh->dev_id = e.stack_dev;
+               // If the entry is already a valid next hop entry, don't change it
+               if (e.next_hop)
+                       return 0;
+       } else {
+               e.valid = true;
+               e.is_static = true;
+               e.rvid = nh->rvid;
+               e.is_ip_mc = false;
+               e.is_ipv6_mc = false;
+               e.block_da = false;
+               e.block_sa = false;
+               e.suspended = false;
+               e.age = 0;                      // With port-ignore
+               e.port = priv->port_ignore;
+               u64_to_ether_addr(nh->mac, &e.mac[0]);
+       }
+       e.next_hop = true;
+       e.nh_route_id = nh->id;                 // NH route ID takes place of VID
+       e.nh_vlan_target = false;
+
+       priv->r->write_l2_entry_using_hash(idx >> 2, idx & 0x3, &e);
+
+       return 0;
+}
+
+/*
+ * Removes a Layer 2 next hop entry in the forwarding database
+ * If it was static, the entire entry is removed, otherwise the nexthop bit is cleared
+ * and we wait until the entry ages out
+ */
+int rtl83xx_l2_nexthop_rm(struct rtl838x_switch_priv *priv, struct rtl83xx_nexthop *nh)
+{
+       struct rtl838x_l2_entry e;
+       u32 key = nh->l2_id >> 2;
+       int i = nh->l2_id & 0x3;
+       u64 entry = entry = priv->r->read_l2_entry_using_hash(key, i, &e);
+
+       pr_debug("%s: id %d, key %d, index %d\n", __func__, nh->l2_id, key, i);
+       if (!e.valid) {
+               dev_err(priv->dev, "unknown nexthop, id %x\n", nh->l2_id);
+               return -1;
+       }
+
+       if (e.is_static)
+               e.valid = false;
+       e.next_hop = false;
+       e.vid = nh->vid;                // Restore VID
+       e.rvid = nh->rvid;
+
+       priv->r->write_l2_entry_using_hash(key, i, &e);
+
+       return 0;
+}
+
 static int rtl83xx_handle_changeupper(struct rtl838x_switch_priv *priv,
                                      struct net_device *ndev,
                                      struct netdev_notifier_changeupper_info *info)
@@ -594,6 +694,671 @@ static int rtl83xx_netdevice_event(struct notifier_block *this,
        return NOTIFY_DONE;
 }
 
+const static struct rhashtable_params route_ht_params = {
+       .key_len     = sizeof(u32),
+       .key_offset  = offsetof(struct rtl83xx_route, gw_ip),
+       .head_offset = offsetof(struct rtl83xx_route, linkage),
+};
+
+/*
+ * Updates an L3 next hop entry in the ROUTING table
+ */
+static int rtl83xx_l3_nexthop_update(struct rtl838x_switch_priv *priv,  __be32 ip_addr, u64 mac)
+{
+       struct rtl83xx_route *r;
+       struct rhlist_head *tmp, *list;
+
+       rcu_read_lock();
+       list = rhltable_lookup(&priv->routes, &ip_addr, route_ht_params);
+       if (!list) {
+               rcu_read_unlock();
+               return -ENOENT;
+       }
+
+       rhl_for_each_entry_rcu(r, tmp, list, linkage) {
+               pr_info("%s: Setting up fwding: ip %pI4, GW mac %016llx\n",
+                       __func__, &ip_addr, mac);
+
+               // Reads the ROUTING table entry associated with the route
+               priv->r->route_read(r->id, r);
+               pr_info("Route with id %d to %pI4 / %d\n", r->id, &r->dst_ip, r->prefix_len);
+
+               r->nh.mac = r->nh.gw = mac;
+               r->nh.port = priv->port_ignore;
+               r->nh.id = r->id;
+
+               // Do we need to explicitly add a DMAC entry with the route's nh index?
+               if (priv->r->set_l3_egress_mac)
+                       priv->r->set_l3_egress_mac(r->id, mac);
+
+               // Update ROUTING table: map gateway-mac and switch-mac id to route id
+               rtl83xx_l2_nexthop_add(priv, &r->nh);
+
+               r->attr.valid = true;
+               r->attr.action = ROUTE_ACT_FORWARD;
+               r->attr.type = 0;
+               r->attr.hit = false; // Reset route-used indicator
+
+               // Add PIE entry with dst_ip and prefix_len
+               r->pr.dip = r->dst_ip;
+               r->pr.dip_m = inet_make_mask(r->prefix_len);
+
+               if (r->is_host_route) {
+                       int slot = priv->r->find_l3_slot(r, false);
+
+                       pr_info("%s: Got slot for route: %d\n", __func__, slot);
+                       priv->r->host_route_write(slot, r);
+               } else {
+                       priv->r->route_write(r->id, r);
+                       r->pr.fwd_sel = true;
+                       r->pr.fwd_data = r->nh.l2_id;
+                       r->pr.fwd_act = PIE_ACT_ROUTE_UC;
+               }
+
+               if (priv->r->set_l3_nexthop)
+                       priv->r->set_l3_nexthop(r->nh.id, r->nh.l2_id, r->nh.if_id);
+
+               if (r->pr.id < 0) {
+                       r->pr.packet_cntr = rtl83xx_packet_cntr_alloc(priv);
+                       if (r->pr.packet_cntr >= 0) {
+                               pr_info("Using packet counter %d\n", r->pr.packet_cntr);
+                               r->pr.log_sel = true;
+                               r->pr.log_data = r->pr.packet_cntr;
+                       }
+                       priv->r->pie_rule_add(priv, &r->pr);
+               } else {
+                       int pkts = priv->r->packet_cntr_read(r->pr.packet_cntr);
+                       pr_info("%s: total packets: %d\n", __func__, pkts);
+
+                       priv->r->pie_rule_write(priv, r->pr.id, &r->pr);
+               }
+       }
+       rcu_read_unlock();
+       return 0;
+}
+
+static int rtl83xx_port_ipv4_resolve(struct rtl838x_switch_priv *priv,
+                                    struct net_device *dev, __be32 ip_addr)
+{
+       struct neighbour *n = neigh_lookup(&arp_tbl, &ip_addr, dev);
+       int err = 0;
+       u64 mac;
+
+       if (!n) {
+               n = neigh_create(&arp_tbl, &ip_addr, dev);
+               if (IS_ERR(n))
+                       return PTR_ERR(n);
+       }
+
+       /* If the neigh is already resolved, then go ahead and
+        * install the entry, otherwise start the ARP process to
+        * resolve the neigh.
+        */
+       if (n->nud_state & NUD_VALID) {
+               mac = ether_addr_to_u64(n->ha);
+               pr_info("%s: resolved mac: %016llx\n", __func__, mac);
+               rtl83xx_l3_nexthop_update(priv, ip_addr, mac);
+       } else {
+               pr_info("%s: need to wait\n", __func__);
+               neigh_event_send(n, NULL);
+       }
+
+       neigh_release(n);
+       return err;
+}
+
+struct rtl83xx_walk_data {
+       struct rtl838x_switch_priv *priv;
+       int port;
+};
+
+static int rtl83xx_port_lower_walk(struct net_device *lower, struct netdev_nested_priv *_priv)
+{
+       struct rtl83xx_walk_data *data = (struct rtl83xx_walk_data *)_priv->data;
+       struct rtl838x_switch_priv *priv = data->priv;
+       int ret = 0;
+       int index;
+
+       index = rtl83xx_port_is_under(lower, priv);
+       data->port = index;
+       if (index >= 0) {
+               pr_debug("Found DSA-port, index %d\n", index);
+               ret = 1;
+       }
+
+       return ret;
+}
+
+int rtl83xx_port_dev_lower_find(struct net_device *dev, struct rtl838x_switch_priv *priv)
+{
+       struct rtl83xx_walk_data data;
+       struct netdev_nested_priv _priv;
+
+       data.priv = priv;
+       data.port = 0;
+       _priv.data = (void *)&data;
+
+       netdev_walk_all_lower_dev(dev, rtl83xx_port_lower_walk, &_priv);
+
+       return data.port;
+}
+
+static struct rtl83xx_route *rtl83xx_route_alloc(struct rtl838x_switch_priv *priv, u32 ip)
+{
+       struct rtl83xx_route *r;
+       int idx = 0, err;
+
+       mutex_lock(&priv->reg_mutex);
+
+       idx = find_first_zero_bit(priv->route_use_bm, MAX_ROUTES);
+       pr_debug("%s id: %d, ip %pI4\n", __func__, idx, &ip);
+
+       r = kzalloc(sizeof(*r), GFP_KERNEL);
+       if (!r) {
+               mutex_unlock(&priv->reg_mutex);
+               return r;
+       }
+
+       r->id = idx;
+       r->gw_ip = ip;
+       r->pr.id = -1; // We still need to allocate a rule in HW
+       r->is_host_route = false;
+
+       err = rhltable_insert(&priv->routes, &r->linkage, route_ht_params);
+       if (err) {
+               pr_err("Could not insert new rule\n");
+               mutex_unlock(&priv->reg_mutex);
+               goto out_free;
+       }
+
+       set_bit(idx, priv->route_use_bm);
+
+       mutex_unlock(&priv->reg_mutex);
+
+       return r;
+
+out_free:
+       kfree(r);
+       return NULL;
+}
+
+
+static struct rtl83xx_route *rtl83xx_host_route_alloc(struct rtl838x_switch_priv *priv, u32 ip)
+{
+       struct rtl83xx_route *r;
+       int idx = 0, err;
+
+       mutex_lock(&priv->reg_mutex);
+
+       idx = find_first_zero_bit(priv->host_route_use_bm, MAX_HOST_ROUTES);
+       pr_debug("%s id: %d, ip %pI4\n", __func__, idx, &ip);
+
+       r = kzalloc(sizeof(*r), GFP_KERNEL);
+       if (!r) {
+               mutex_unlock(&priv->reg_mutex);
+               return r;
+       }
+
+       /* We require a unique route ID irrespective of whether it is a prefix or host
+        * route (on RTL93xx) as we use this ID to associate a DMAC and next-hop entry */
+       r->id = idx + MAX_ROUTES;
+
+       r->gw_ip = ip;
+       r->pr.id = -1; // We still need to allocate a rule in HW
+       r->is_host_route = true;
+
+       err = rhltable_insert(&priv->routes, &r->linkage, route_ht_params);
+       if (err) {
+               pr_err("Could not insert new rule\n");
+               mutex_unlock(&priv->reg_mutex);
+               goto out_free;
+       }
+
+       set_bit(idx, priv->host_route_use_bm);
+
+       mutex_unlock(&priv->reg_mutex);
+
+       return r;
+
+out_free:
+       kfree(r);
+       return NULL;
+}
+
+
+
+static void rtl83xx_route_rm(struct rtl838x_switch_priv *priv, struct rtl83xx_route *r)
+{
+       int id;
+
+       if (rhltable_remove(&priv->routes, &r->linkage, route_ht_params))
+               dev_warn(priv->dev, "Could not remove route\n");
+
+       if (r->is_host_route) {
+               id = priv->r->find_l3_slot(r, false);
+               pr_debug("%s: Got id for host route: %d\n", __func__, id);
+               r->attr.valid = false;
+               priv->r->host_route_write(id, r);
+               clear_bit(r->id - MAX_ROUTES, priv->host_route_use_bm);
+       } else {
+               // If there is a HW representation of the route, delete it
+               if (priv->r->route_lookup_hw) {
+                       id = priv->r->route_lookup_hw(r);
+                       pr_info("%s: Got id for prefix route: %d\n", __func__, id);
+                       r->attr.valid = false;
+                       priv->r->route_write(id, r);
+               }
+               clear_bit(r->id, priv->route_use_bm);
+       }
+
+       kfree(r);
+}
+
+static int rtl83xx_fib4_del(struct rtl838x_switch_priv *priv,
+                           struct fib_entry_notifier_info *info)
+{
+       struct fib_nh *nh = fib_info_nh(info->fi, 0);
+       struct rtl83xx_route *r;
+       struct rhlist_head *tmp, *list;
+
+       pr_debug("In %s, ip %pI4, len %d\n", __func__, &info->dst, info->dst_len);
+       rcu_read_lock();
+       list = rhltable_lookup(&priv->routes, &nh->fib_nh_gw4, route_ht_params);
+       if (!list) {
+               rcu_read_unlock();
+               pr_err("%s: no such gateway: %pI4\n", __func__, &nh->fib_nh_gw4);
+               return -ENOENT;
+       }
+       rhl_for_each_entry_rcu(r, tmp, list, linkage) {
+               if (r->dst_ip == info->dst && r->prefix_len == info->dst_len) {
+                       pr_info("%s: found a route with id %d, nh-id %d\n",
+                               __func__, r->id, r->nh.id);
+                       break;
+               }
+       }
+       rcu_read_unlock();
+
+       rtl83xx_l2_nexthop_rm(priv, &r->nh);
+
+       pr_debug("%s: Releasing packet counter %d\n", __func__, r->pr.packet_cntr);
+       set_bit(r->pr.packet_cntr, priv->packet_cntr_use_bm);
+       priv->r->pie_rule_rm(priv, &r->pr);
+
+       rtl83xx_route_rm(priv, r);
+
+       nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
+
+       return 0;
+}
+
+/*
+ * On the RTL93xx, an L3 termination endpoint MAC address on which the router waits
+ * for packets to be routed needs to be allocated.
+ */
+static int rtl83xx_alloc_router_mac(struct rtl838x_switch_priv *priv, u64 mac)
+{
+       int i, free_mac = -1;
+       struct rtl93xx_rt_mac m;
+
+       mutex_lock(&priv->reg_mutex);
+       for (i = 0; i < MAX_ROUTER_MACS; i++) {
+               priv->r->get_l3_router_mac(i, &m);
+               if (free_mac < 0 && !m.valid) {
+                       free_mac = i;
+                       continue;
+               }
+               if (m.valid && m.mac == mac) {
+                       free_mac = i;
+                       break;
+               }
+       }
+
+       if (free_mac < 0) {
+               pr_err("No free router MACs, cannot offload\n");
+               mutex_unlock(&priv->reg_mutex);
+               return -1;
+       }
+
+       m.valid = true;
+       m.mac = mac;
+       m.p_type = 0; // An individual port, not a trunk port
+       m.p_id = 0x3f;                  // Listen on any port
+       m.p_id_mask = 0;
+       m.vid = 0;                      // Listen on any VLAN...
+       m.vid_mask = 0;                 // ... so mask needs to be 0
+       m.mac_mask = 0xffffffffffffULL; // We want an exact match of the interface MAC
+       m.action = L3_FORWARD;          // Route the packet
+       priv->r->set_l3_router_mac(free_mac, &m);
+
+       mutex_unlock(&priv->reg_mutex);
+
+       return 0;
+}
+
+static int rtl83xx_alloc_egress_intf(struct rtl838x_switch_priv *priv, u64 mac, int vlan)
+{
+       int i, free_mac = -1;
+       struct rtl838x_l3_intf intf;
+       u64 m;
+
+       mutex_lock(&priv->reg_mutex);
+       for (i = 0; i < MAX_SMACS; i++) {
+               m = priv->r->get_l3_egress_mac(L3_EGRESS_DMACS + i);
+               if (free_mac < 0 && !m) {
+                       free_mac = i;
+                       continue;
+               }
+               if (m == mac) {
+                       mutex_unlock(&priv->reg_mutex);
+                       return i;
+               }
+       }
+
+       if (free_mac < 0) {
+               pr_err("No free egress interface, cannot offload\n");
+               return -1;
+       }
+
+       // Set up default egress interface 1
+       intf.vid = vlan;
+       intf.smac_idx = free_mac;
+       intf.ip4_mtu_id = 1;
+       intf.ip6_mtu_id = 1;
+       intf.ttl_scope = 1; // TTL
+       intf.hl_scope = 1;  // Hop Limit
+       intf.ip4_icmp_redirect = intf.ip6_icmp_redirect = 2;  // FORWARD
+       intf.ip4_pbr_icmp_redirect = intf.ip6_pbr_icmp_redirect = 2; // FORWARD;
+       priv->r->set_l3_egress_intf(free_mac, &intf);
+
+       priv->r->set_l3_egress_mac(L3_EGRESS_DMACS + free_mac, mac);
+
+       mutex_unlock(&priv->reg_mutex);
+
+       return free_mac;
+}
+
+static int rtl83xx_fib4_add(struct rtl838x_switch_priv *priv,
+                           struct fib_entry_notifier_info *info)
+{
+       struct fib_nh *nh = fib_info_nh(info->fi, 0);
+       struct net_device *dev = fib_info_nh(info->fi, 0)->fib_nh_dev;
+       int port;
+       struct rtl83xx_route *r;
+       bool to_localhost;
+       int vlan = is_vlan_dev(dev) ? vlan_dev_vlan_id(dev) : 0;
+
+       pr_debug("In %s, ip %pI4, len %d\n", __func__, &info->dst, info->dst_len);
+       if (!info->dst) {
+               pr_info("Not offloading default route for now\n");
+               return 0;
+       }
+
+       pr_debug("GW: %pI4, interface name %s, mac %016llx, vlan %d\n", &nh->fib_nh_gw4, dev->name,
+               ether_addr_to_u64(dev->dev_addr), vlan
+       );
+
+       port = rtl83xx_port_dev_lower_find(dev, priv);
+       if (port < 0)
+               return -1;
+
+       // For now we only work with routes that have a gateway and are not ourself
+//     if ((!nh->fib_nh_gw4) && (info->dst_len != 32))
+//             return 0;
+
+       if ((info->dst & 0xff) == 0xff)
+               return 0;
+
+       // Do not offload routes to 192.168.100.x
+       if ((info->dst & 0xffffff00) == 0xc0a86400)
+               return 0;
+
+       // Do not offload routes to 127.x.x.x
+       if ((info->dst & 0xff000000) == 0x7f000000)
+               return 0;
+
+       // Allocate route or host-route (entry if hardware supports this)
+       if (info->dst_len == 32 && priv->r->host_route_write)
+               r = rtl83xx_host_route_alloc(priv, nh->fib_nh_gw4);
+       else
+               r = rtl83xx_route_alloc(priv, nh->fib_nh_gw4);
+
+       if (!r) {
+               pr_err("%s: No more free route entries\n", __func__);
+               return -1;
+       }
+
+       r->dst_ip = info->dst;
+       r->prefix_len = info->dst_len;
+       r->nh.rvid = vlan;
+       to_localhost = !nh->fib_nh_gw4;
+
+       if (priv->r->set_l3_router_mac) {
+               u64 mac = ether_addr_to_u64(dev->dev_addr);
+
+               pr_debug("Local route and router mac %016llx\n", mac);
+
+               if (rtl83xx_alloc_router_mac(priv, mac))
+                       goto out_free_rt;
+
+               // vid = 0: Do not care about VID
+               r->nh.if_id = rtl83xx_alloc_egress_intf(priv, mac, vlan);
+               if (r->nh.if_id < 0)
+                       goto out_free_rmac;
+
+               if (to_localhost) {
+                       int slot;
+
+                       r->nh.mac = mac;
+                       r->nh.port = priv->port_ignore;
+                       r->attr.valid = true;
+                       r->attr.action = ROUTE_ACT_TRAP2CPU;
+                       r->attr.type = 0;
+
+                       slot = priv->r->find_l3_slot(r, false);
+                       pr_debug("%s: Got slot for route: %d\n", __func__, slot);
+                       priv->r->host_route_write(slot, r);
+               }
+       }
+
+       // We need to resolve the mac address of the GW
+       if (!to_localhost)
+               rtl83xx_port_ipv4_resolve(priv, dev, nh->fib_nh_gw4);
+
+       nh->fib_nh_flags |= RTNH_F_OFFLOAD;
+
+       return 0;
+
+out_free_rmac:
+out_free_rt:
+       return 0;
+}
+
+static int rtl83xx_fib6_add(struct rtl838x_switch_priv *priv,
+                           struct fib6_entry_notifier_info *info)
+{
+       pr_debug("In %s\n", __func__);
+//     nh->fib_nh_flags |= RTNH_F_OFFLOAD;
+       return 0;
+}
+
+struct net_event_work {
+       struct work_struct work;
+       struct rtl838x_switch_priv *priv;
+       u64 mac;
+       u32 gw_addr;
+};
+
+static void rtl83xx_net_event_work_do(struct work_struct *work)
+{
+       struct net_event_work *net_work =
+               container_of(work, struct net_event_work, work);
+       struct rtl838x_switch_priv *priv = net_work->priv;
+
+       rtl83xx_l3_nexthop_update(priv, net_work->gw_addr, net_work->mac);
+}
+
+static int rtl83xx_netevent_event(struct notifier_block *this,
+                                unsigned long event, void *ptr)
+{
+       struct rtl838x_switch_priv *priv;
+       struct net_device *dev;
+       struct neighbour *n = ptr;
+       int err, port;
+       struct net_event_work *net_work;
+
+       priv = container_of(this, struct rtl838x_switch_priv, ne_nb);
+
+       net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
+       if (!net_work)
+               return NOTIFY_BAD;
+
+       INIT_WORK(&net_work->work, rtl83xx_net_event_work_do);
+       net_work->priv = priv;
+
+       switch (event) {
+       case NETEVENT_NEIGH_UPDATE:
+               if (n->tbl != &arp_tbl)
+                       return NOTIFY_DONE;
+               dev = n->dev;
+               port = rtl83xx_port_dev_lower_find(dev, priv);
+               if (port < 0 || !(n->nud_state & NUD_VALID)) {
+                       pr_debug("%s: Neigbour invalid, not updating\n", __func__);
+                       kfree(net_work);
+                       return NOTIFY_DONE;
+               }
+
+               net_work->mac = ether_addr_to_u64(n->ha);
+               net_work->gw_addr = *(__be32 *) n->primary_key;
+
+               pr_debug("%s: updating neighbour on port %d, mac %016llx\n",
+                       __func__, port, net_work->mac);
+               schedule_work(&net_work->work);
+               if (err)
+                       netdev_warn(dev, "failed to handle neigh update (err %d)\n", err);
+               break;
+       }
+
+       return NOTIFY_DONE;
+}
+
+struct rtl83xx_fib_event_work {
+       struct work_struct work;
+       union {
+               struct fib_entry_notifier_info fen_info;
+               struct fib6_entry_notifier_info fen6_info;
+               struct fib_rule_notifier_info fr_info;
+       };
+       struct rtl838x_switch_priv *priv;
+       bool is_fib6;
+       unsigned long event;
+};
+
+static void rtl83xx_fib_event_work_do(struct work_struct *work)
+{
+       struct rtl83xx_fib_event_work *fib_work =
+               container_of(work, struct rtl83xx_fib_event_work, work);
+       struct rtl838x_switch_priv *priv = fib_work->priv;
+       struct fib_rule *rule;
+       int err;
+
+       /* Protect internal structures from changes */
+       rtnl_lock();
+       pr_debug("%s: doing work, event %ld\n", __func__, fib_work->event);
+       switch (fib_work->event) {
+       case FIB_EVENT_ENTRY_ADD:
+       case FIB_EVENT_ENTRY_REPLACE:
+       case FIB_EVENT_ENTRY_APPEND:
+               if (fib_work->is_fib6) {
+                       err = rtl83xx_fib6_add(priv, &fib_work->fen6_info);
+               } else {
+                       err = rtl83xx_fib4_add(priv, &fib_work->fen_info);
+                       fib_info_put(fib_work->fen_info.fi);
+               }
+               if (err)
+                       pr_err("%s: FIB4 failed\n", __func__);
+               break;
+       case FIB_EVENT_ENTRY_DEL:
+               rtl83xx_fib4_del(priv, &fib_work->fen_info);
+               fib_info_put(fib_work->fen_info.fi);
+               break;
+       case FIB_EVENT_RULE_ADD:
+       case FIB_EVENT_RULE_DEL:
+               rule = fib_work->fr_info.rule;
+               if (!fib4_rule_default(rule))
+                       pr_err("%s: FIB4 default rule failed\n", __func__);
+               fib_rule_put(rule);
+               break;
+       }
+       rtnl_unlock();
+       kfree(fib_work);
+}
+
+/* Called with rcu_read_lock() */
+static int rtl83xx_fib_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+       struct fib_notifier_info *info = ptr;
+       struct rtl838x_switch_priv *priv;
+       struct rtl83xx_fib_event_work *fib_work;
+
+       if ((info->family != AF_INET && info->family != AF_INET6 &&
+            info->family != RTNL_FAMILY_IPMR &&
+            info->family != RTNL_FAMILY_IP6MR))
+               return NOTIFY_DONE;
+
+       priv = container_of(this, struct rtl838x_switch_priv, fib_nb);
+
+       fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
+       if (!fib_work)
+               return NOTIFY_BAD;
+
+       INIT_WORK(&fib_work->work, rtl83xx_fib_event_work_do);
+       fib_work->priv = priv;
+       fib_work->event = event;
+       fib_work->is_fib6 = false;
+
+       switch (event) {
+       case FIB_EVENT_ENTRY_ADD:
+       case FIB_EVENT_ENTRY_REPLACE:
+       case FIB_EVENT_ENTRY_APPEND:
+       case FIB_EVENT_ENTRY_DEL:
+               pr_debug("%s: FIB_ENTRY ADD/DELL, event %ld\n", __func__, event);
+               if (info->family == AF_INET) {
+                       struct fib_entry_notifier_info *fen_info = ptr;
+
+                       if (fen_info->fi->fib_nh_is_v6) {
+                               NL_SET_ERR_MSG_MOD(info->extack,
+                                       "IPv6 gateway with IPv4 route is not supported");
+                               kfree(fib_work);
+                               return notifier_from_errno(-EINVAL);
+                       }
+
+                       memcpy(&fib_work->fen_info, ptr, sizeof(fib_work->fen_info));
+                       /* Take referece on fib_info to prevent it from being
+                       * freed while work is queued. Release it afterwards.
+                       */
+                       fib_info_hold(fib_work->fen_info.fi);
+
+               } else if (info->family == AF_INET6) {
+                       struct fib6_entry_notifier_info *fen6_info = ptr;
+                       pr_warn("%s: FIB_RULE ADD/DELL for IPv6 not supported\n", __func__);
+                       kfree(fib_work);
+                       return notifier_from_errno(-EINVAL);
+               }
+               break;
+
+       case FIB_EVENT_RULE_ADD:
+       case FIB_EVENT_RULE_DEL:
+               pr_debug("%s: FIB_RULE ADD/DELL, event: %ld\n", __func__, event);
+               memcpy(&fib_work->fr_info, ptr, sizeof(fib_work->fr_info));
+               fib_rule_get(fib_work->fr_info.rule);
+               break;
+       }
+
+       schedule_work(&fib_work->work);
+
+       return NOTIFY_DONE;
+}
+
 static int __init rtl83xx_sw_probe(struct platform_device *pdev)
 {
        int err = 0, i;
@@ -623,6 +1388,8 @@ static int __init rtl83xx_sw_probe(struct platform_device *pdev)
        priv->ds->ops = &rtl83xx_switch_ops;
        priv->dev = dev;
 
+       mutex_init(&priv->reg_mutex);
+
        priv->family_id = soc_info.family;
        priv->id = soc_info.id;
        switch(soc_info.family) {
@@ -751,18 +1518,51 @@ static int __init rtl83xx_sw_probe(struct platform_device *pdev)
 
        rtl83xx_setup_qos(priv);
 
+       priv->r->l3_setup(priv);
+
        /* Clear all destination ports for mirror groups */
        for (i = 0; i < 4; i++)
                priv->mirror_group_ports[i] = -1;
 
+       /*
+        * Register netdevice event callback to catch changes in link aggregation groups
+        */
        priv->nb.notifier_call = rtl83xx_netdevice_event;
-               if (register_netdevice_notifier(&priv->nb)) {
-                       priv->nb.notifier_call = NULL;
-                       dev_err(dev, "Failed to register LAG netdev notifier\n");
+       if (register_netdevice_notifier(&priv->nb)) {
+               priv->nb.notifier_call = NULL;
+               dev_err(dev, "Failed to register LAG netdev notifier\n");
+               goto err_register_nb;
+       }
+
+       // Initialize hash table for L3 routing
+       rhltable_init(&priv->routes, &route_ht_params);
+
+       /*
+        * Register netevent notifier callback to catch notifications about neighboring
+        * changes to update nexthop entries for L3 routing.
+        */
+       priv->ne_nb.notifier_call = rtl83xx_netevent_event;
+       if (register_netevent_notifier(&priv->ne_nb)) {
+               priv->ne_nb.notifier_call = NULL;
+               dev_err(dev, "Failed to register netevent notifier\n");
+               goto err_register_ne_nb;
        }
 
+       priv->fib_nb.notifier_call = rtl83xx_fib_event;
+
+       /*
+        * Register Forwarding Information Base notifier to offload routes where
+        * where possible
+        * Only FIBs pointing to our own netdevs are programmed into
+        * the device, so no need to pass a callback.
+        */
+       err = register_fib_notifier(&init_net, &priv->fib_nb, NULL, NULL);
+       if (err)
+               goto err_register_fib_nb;
+
+       // TODO: put this into l2_setup()
        // Flood BPDUs to all ports including cpu-port
-       if (soc_info.family != RTL9300_FAMILY_ID) { // TODO: Port this functionality
+       if (soc_info.family != RTL9300_FAMILY_ID) {
                bpdu_mask = soc_info.family == RTL8380_FAMILY_ID ? 0x1FFFFFFF : 0x1FFFFFFFFFFFFF;
                priv->r->set_port_reg_be(bpdu_mask, priv->r->rma_bpdu_fld_pmask);
 
@@ -772,6 +1572,13 @@ static int __init rtl83xx_sw_probe(struct platform_device *pdev)
                rtl838x_dbgfs_init(priv);
        }
 
+       return 0;
+
+err_register_fib_nb:
+       unregister_netevent_notifier(&priv->ne_nb);
+err_register_ne_nb:
+       unregister_netdevice_notifier(&priv->nb);
+err_register_nb:
        return err;
 }
 
index 6ff59c434823eea7a7086aa90c117c46449a6ca1..c9307458f866963b9dfc7be7a79030849f5c17bd 100644 (file)
 #define RTL838X_VLAN_PROFILE(idx)              (0x3A88 + ((idx) << 2))
 #define RTL838X_VLAN_PORT_EGR_FLTR             (0x3A84)
 #define RTL838X_VLAN_PORT_PB_VLAN              (0x3C00)
-#define RTL838X_VLAN_PORT_IGR_FLTR(port)       (0x3A7C + (((port >> 4) << 2)))
-#define RTL838X_VLAN_PORT_IGR_FLTR_0           (0x3A7C)
-#define RTL838X_VLAN_PORT_IGR_FLTR_1           (0x3A7C + 4)
+#define RTL838X_VLAN_PORT_IGR_FLTR             (0x3A7C)
 #define RTL838X_VLAN_PORT_TAG_STS_CTRL         (0xA530)
 
 #define RTL839X_VLAN_PROFILE(idx)              (0x25C0 + (((idx) << 3)))
 #define RTL839X_VLAN_CTRL                      (0x26D4)
 #define RTL839X_VLAN_PORT_PB_VLAN              (0x26D8)
-#define RTL839X_VLAN_PORT_IGR_FLTR(port)       (0x27B4 + (((port >> 4) << 2)))
-#define RTL839X_VLAN_PORT_EGR_FLTR(port)       (0x27C4 + (((port >> 5) << 2)))
+#define RTL839X_VLAN_PORT_IGR_FLTR             (0x27B4)
+#define RTL839X_VLAN_PORT_EGR_FLTR             (0x27C4)
+#define RTL839X_VLAN_PORT_TAG_STS_CTRL         (0x6828)
 #define RTL839X_VLAN_PORT_TAG_STS_CTRL         (0x6828)
 
 #define RTL930X_VLAN_PROFILE_SET(idx)          (0x9c60 + (((idx) * 20)))
 #define RTL930X_VLAN_CTRL                      (0x82D4)
 #define RTL930X_VLAN_PORT_PB_VLAN              (0x82D8)
-#define RTL930X_VLAN_PORT_IGR_FLTR(port)       (0x83C0 + (((port >> 4) << 2)))
+#define RTL930X_VLAN_PORT_IGR_FLTR             (0x83C0)
 #define RTL930X_VLAN_PORT_EGR_FLTR             (0x83C8)
 #define RTL930X_VLAN_PORT_TAG_STS_CTRL         (0xCE24)
 
 #define RTL931X_VLAN_PROFILE_SET(idx)          (0x9800 + (((idx) * 28)))
 #define RTL931X_VLAN_CTRL                      (0x94E4)
-#define RTL931X_VLAN_PORT_IGR_FLTR(port)       (0x96B4 + (((port >> 4) << 2)))
-#define RTL931X_VLAN_PORT_EGR_FLTR(port)       (0x96C4 + (((port >> 5) << 2)))
+#define RTL931X_VLAN_PORT_IGR_FLTR             (0x96B4)
+#define RTL931X_VLAN_PORT_EGR_FLTR             (0x96C4)
 #define RTL931X_VLAN_PORT_TAG_CTRL             (0x4860)
 
 /* Table access registers */
 #define PIE_ACT_ROUTE_UC       6
 #define PIE_ACT_VID_ASSIGN     0
 
+// L3 actions
+#define L3_FORWARD             0
+#define L3_DROP                        1
+#define L3_TRAP2CPU            2
+#define L3_COPY2CPU            3
+#define L3_TRAP2MASTERCPU      4
+#define L3_COPY2MASTERCPU      5
+#define L3_HARDDROP            6
+
+// Route actions
+#define ROUTE_ACT_FORWARD      0
+#define ROUTE_ACT_TRAP2CPU     1
+#define ROUTE_ACT_COPY2CPU     2
+#define ROUTE_ACT_DROP         3
+
+/* L3 Routing */
+#define RTL839X_ROUTING_SA_CTRL                0x6afc
+#define RTL930X_L3_HOST_TBL_CTRL               (0xAB48)
+#define RTL930X_L3_IPUC_ROUTE_CTRL             (0xAB4C)
+#define RTL930X_L3_IP6UC_ROUTE_CTRL            (0xAB50)
+#define RTL930X_L3_IPMC_ROUTE_CTRL             (0xAB54)
+#define RTL930X_L3_IP6MC_ROUTE_CTRL            (0xAB58)
+#define RTL930X_L3_IP_MTU_CTRL(i)              (0xAB5C + ((i >> 1) << 2))
+#define RTL930X_L3_IP6_MTU_CTRL(i)             (0xAB6C + ((i >> 1) << 2))
+#define RTL930X_L3_HW_LU_KEY_CTRL              (0xAC9C)
+#define RTL930X_L3_HW_LU_KEY_IP_CTRL           (0xACA0)
+#define RTL930X_L3_HW_LU_CTRL                  (0xACC0)
+#define RTL930X_L3_IP_ROUTE_CTRL               0xab44
+
 #define MAX_VLANS 4096
 #define MAX_LAGS 16
 #define MAX_PRIOS 8
 #define MAX_PIE_ENTRIES (18 * PIE_BLOCK_SIZE)
 #define N_FIXED_FIELDS 12
 #define MAX_COUNTERS 2048
+#define MAX_ROUTES 512
+#define MAX_HOST_ROUTES 1536
+#define MAX_INTF_MTUS 8
+#define DEFAULT_MTU 1536
+#define MAX_INTERFACES 100
+#define MAX_ROUTER_MACS 64
+#define L3_EGRESS_DMACS 2048
+#define MAX_SMACS 64
 
 enum phy_type {
        PHY_NONE = 0,
@@ -614,17 +650,52 @@ struct pie_rule {
        bool bypass_ibc_sc;     // Bypass Ingress Bandwidth Control and Storm Control
 };
 
-struct rtl838x_nexthop {
-       u16 id;         // ID in HW Nexthop table
-       u32 ip;         // IP Addres of nexthop
+struct rtl838x_l3_intf {
+       u16 vid;
+       u8 smac_idx;
+       u8 ip4_mtu_id;
+       u8 ip6_mtu_id;
+       u16 ip4_mtu;
+       u16 ip6_mtu;
+       u8 ttl_scope;
+       u8 hl_scope;
+       u8 ip4_icmp_redirect;
+       u8 ip6_icmp_redirect;
+       u8 ip4_pbr_icmp_redirect;
+       u8 ip6_pbr_icmp_redirect;
+};
+
+/*
+ * An entry in the RTL93XX SoC's ROUTER_MAC tables setting up a termination point
+ * for the L3 routing system. Packets arriving and matching an entry in this table
+ * will be considered for routing.
+ * Mask fields state whether the corresponding data fields matter for matching
+ */
+struct rtl93xx_rt_mac {
+       bool valid;     // Valid or not
+       bool p_type;    // Individual (0) or trunk (1) port
+       bool p_mask;    // Whether the port type is used
+       u8 p_id;
+       u8 p_id_mask;   // Mask for the port
+       u8 action;      // Routing action performed: 0: FORWARD, 1: DROP, 2: TRAP2CPU
+                       //   3: COPY2CPU, 4: TRAP2MASTERCPU, 5: COPY2MASTERCPU, 6: HARDDROP
+       u16 vid;
+       u16 vid_mask;
+       u64 mac;        // MAC address used as source MAC in the routed packet
+       u64 mac_mask;
+};
+
+struct rtl83xx_nexthop {
+       u16 id;         // ID: L3_NEXT_HOP table-index or route-index set in L2_NEXT_HOP
        u32 dev_id;
        u16 port;
-       u16 vid;
-       u16 fid;
-       u64 mac;
+       u16 vid;        // VLAN-ID for L2 table entry (saved from L2-UC entry)
+       u16 rvid;       // Relay VID/FID for the L2 table entry
+       u64 mac;        // The MAC address of the entry in the L2_NEXT_HOP table
        u16 mac_id;
        u16 l2_id;      // Index of this next hop forwarding entry in L2 FIB table
-       u16 if_id;
+       u64 gw;         // The gateway MAC address packets are forwarded to
+       int if_id;      // Interface (into L3_EGR_INTF_IDX)
 };
 
 struct rtl838x_switch_priv;
@@ -638,6 +709,32 @@ struct rtl83xx_flow {
        u32 flags;
 };
 
+struct rtl93xx_route_attr {
+       bool valid;
+       bool hit;
+       bool ttl_dec;
+       bool ttl_check;
+       bool dst_null;
+       bool qos_as;
+       u8 qos_prio;
+       u8 type;
+       u8 action;
+};
+
+struct rtl83xx_route {
+       u32 gw_ip;                      // IP of the route's gateway
+       u32 dst_ip;                     // IP of the destination net
+       struct in6_addr dst_ip6;
+       int prefix_len;                 // Network prefix len of the destination net
+       bool is_host_route;
+       int id;                         // ID number of this route
+       struct rhlist_head linkage;
+       u16 switch_mac_id;              // Index into switch's own MACs, RTL839X only
+       struct rtl83xx_nexthop nh;
+       struct pie_rule pr;
+       struct rtl93xx_route_attr attr;
+};
+
 struct rtl838x_reg {
        void (*mask_port_reg_be)(u64 clear, u64 set, int reg);
        void (*set_port_reg_be)(u64 set, int reg);
@@ -713,6 +810,19 @@ struct rtl838x_reg {
        void (*l2_learning_setup)(void);
        u32 (*packet_cntr_read)(int counter);
        void (*packet_cntr_clear)(int counter);
+       void (*route_read)(int idx, struct rtl83xx_route *rt);
+       void (*route_write)(int idx, struct rtl83xx_route *rt);
+       void (*host_route_write)(int idx, struct rtl83xx_route *rt);
+       int (*l3_setup)(struct rtl838x_switch_priv *priv);
+       void (*set_l3_nexthop)(int idx, u16 dmac_id, u16 interface);
+       void (*get_l3_nexthop)(int idx, u16 *dmac_id, u16 *interface);
+       u64 (*get_l3_egress_mac)(u32 idx);
+       void (*set_l3_egress_mac)(u32 idx, u64 mac);
+       int (*find_l3_slot)(struct rtl83xx_route *rt, bool must_exist);
+       int (*route_lookup_hw)(struct rtl83xx_route *rt);
+       void (*get_l3_router_mac)(u32 idx, struct rtl93xx_rt_mac *m);
+       void (*set_l3_router_mac)(u32 idx, struct rtl93xx_rt_mac *m);
+       void (*set_l3_egress_intf)(int idx, struct rtl838x_l3_intf *intf);
 };
 
 struct rtl838x_switch_priv {
@@ -740,7 +850,9 @@ struct rtl838x_switch_priv {
        int n_lags;
        u64 lags_port_members[MAX_LAGS];
        struct net_device *lag_devs[MAX_LAGS];
-       struct notifier_block nb;
+       struct notifier_block nb;  // TODO: change to different name
+       struct notifier_block ne_nb;
+       struct notifier_block fib_nb;
        bool eee_enabled;
        unsigned long int mc_group_bm[MAX_MC_GROUPS >> 5];
        int n_pie_blocks;
@@ -749,8 +861,15 @@ struct rtl838x_switch_priv {
        int n_counters;
        unsigned long int octet_cntr_use_bm[MAX_COUNTERS >> 5];
        unsigned long int packet_cntr_use_bm[MAX_COUNTERS >> 4];
+       struct rhltable routes;
+       unsigned long int route_use_bm[MAX_ROUTES >> 5];
+       unsigned long int host_route_use_bm[MAX_HOST_ROUTES >> 5];
+       struct rtl838x_l3_intf *interfaces[MAX_INTERFACES];
+       u16 intf_mtus[MAX_INTF_MTUS];
+       int intf_mtu_count[MAX_INTF_MTUS];
 };
 
 void rtl838x_dbgfs_init(struct rtl838x_switch_priv *priv);
+void rtl930x_dbgfs_init(struct rtl838x_switch_priv *priv);
 
 #endif /* _RTL838X_H */