net: Introduce generic failover module
authorSridhar Samudrala <sridhar.samudrala@intel.com>
Thu, 24 May 2018 16:55:13 +0000 (09:55 -0700)
committerDavid S. Miller <davem@davemloft.net>
Tue, 29 May 2018 02:59:54 +0000 (22:59 -0400)
The failover module provides a generic interface for paravirtual drivers
to register a netdev and a set of ops with a failover instance. The ops
are used as event handlers that get called to handle netdev register/
unregister/link change/name change events on slave pci ethernet devices
with the same mac address as the failover netdev.

This enables paravirtual drivers to use a VF as an accelerated low latency
datapath. It also allows migration of VMs with direct attached VFs by
failing over to the paravirtual datapath when the VF is unplugged.

Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Documentation/networking/failover.rst [new file with mode: 0644]
MAINTAINERS
include/linux/netdevice.h
include/net/failover.h [new file with mode: 0644]
net/Kconfig
net/core/Makefile
net/core/failover.c [new file with mode: 0644]

diff --git a/Documentation/networking/failover.rst b/Documentation/networking/failover.rst
new file mode 100644 (file)
index 0000000..f0c8483
--- /dev/null
@@ -0,0 +1,18 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+========
+FAILOVER
+========
+
+Overview
+========
+
+The failover module provides a generic interface for paravirtual drivers
+to register a netdev and a set of ops with a failover instance. The ops
+are used as event handlers that get called to handle netdev register/
+unregister/link change/name change events on slave pci ethernet devices
+with the same mac address as the failover netdev.
+
+This enables paravirtual drivers to use a VF as an accelerated low latency
+datapath. It also allows live migration of VMs with direct attached VFs by
+failing over to the paravirtual datapath when the VF is unplugged.
index f492431b239b6e40a26c5bdb7fbfe1b4243abd5d..6c59bdf49a8a80eaf921d6a24aa27b3c8cc880f0 100644 (file)
@@ -5411,6 +5411,14 @@ S:       Maintained
 F:     Documentation/hwmon/f71805f
 F:     drivers/hwmon/f71805f.c
 
+FAILOVER MODULE
+M:     Sridhar Samudrala <sridhar.samudrala@intel.com>
+L:     netdev@vger.kernel.org
+S:     Supported
+F:     net/core/failover.c
+F:     include/net/failover.h
+F:     Documentation/networking/failover.rst
+
 FANOTIFY
 M:     Jan Kara <jack@suse.cz>
 R:     Amir Goldstein <amir73il@gmail.com>
index 8452f72087efa9678f57d9ec7cb85a2cc58318aa..f45b1a4e37ab64680987b4c09d5d6dbe8a03ae5c 100644 (file)
@@ -1425,6 +1425,8 @@ struct net_device_ops {
  *     entity (i.e. the master device for bridged veth)
  * @IFF_MACSEC: device is a MACsec device
  * @IFF_NO_RX_HANDLER: device doesn't support the rx_handler hook
+ * @IFF_FAILOVER: device is a failover master device
+ * @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device
  */
 enum netdev_priv_flags {
        IFF_802_1Q_VLAN                 = 1<<0,
@@ -1454,6 +1456,8 @@ enum netdev_priv_flags {
        IFF_PHONY_HEADROOM              = 1<<24,
        IFF_MACSEC                      = 1<<25,
        IFF_NO_RX_HANDLER               = 1<<26,
+       IFF_FAILOVER                    = 1<<27,
+       IFF_FAILOVER_SLAVE              = 1<<28,
 };
 
 #define IFF_802_1Q_VLAN                        IFF_802_1Q_VLAN
@@ -1482,6 +1486,8 @@ enum netdev_priv_flags {
 #define IFF_RXFH_CONFIGURED            IFF_RXFH_CONFIGURED
 #define IFF_MACSEC                     IFF_MACSEC
 #define IFF_NO_RX_HANDLER              IFF_NO_RX_HANDLER
+#define IFF_FAILOVER                   IFF_FAILOVER
+#define IFF_FAILOVER_SLAVE             IFF_FAILOVER_SLAVE
 
 /**
  *     struct net_device - The DEVICE structure.
@@ -4336,6 +4342,16 @@ static inline bool netif_is_rxfh_configured(const struct net_device *dev)
        return dev->priv_flags & IFF_RXFH_CONFIGURED;
 }
 
+static inline bool netif_is_failover(const struct net_device *dev)
+{
+       return dev->priv_flags & IFF_FAILOVER;
+}
+
+static inline bool netif_is_failover_slave(const struct net_device *dev)
+{
+       return dev->priv_flags & IFF_FAILOVER_SLAVE;
+}
+
 /* This device needs to keep skb dst for qdisc enqueue or ndo_start_xmit() */
 static inline void netif_keep_dst(struct net_device *dev)
 {
diff --git a/include/net/failover.h b/include/net/failover.h
new file mode 100644 (file)
index 0000000..bb15438
--- /dev/null
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _FAILOVER_H
+#define _FAILOVER_H
+
+#include <linux/netdevice.h>
+
+struct failover_ops {
+       int (*slave_pre_register)(struct net_device *slave_dev,
+                                 struct net_device *failover_dev);
+       int (*slave_register)(struct net_device *slave_dev,
+                             struct net_device *failover_dev);
+       int (*slave_pre_unregister)(struct net_device *slave_dev,
+                                   struct net_device *failover_dev);
+       int (*slave_unregister)(struct net_device *slave_dev,
+                               struct net_device *failover_dev);
+       int (*slave_link_change)(struct net_device *slave_dev,
+                                struct net_device *failover_dev);
+       int (*slave_name_change)(struct net_device *slave_dev,
+                                struct net_device *failover_dev);
+       rx_handler_result_t (*slave_handle_frame)(struct sk_buff **pskb);
+};
+
+struct failover {
+       struct list_head list;
+       struct net_device __rcu *failover_dev;
+       struct failover_ops __rcu *ops;
+};
+
+struct failover *failover_register(struct net_device *dev,
+                                  struct failover_ops *ops);
+void failover_unregister(struct failover *failover);
+int failover_slave_unregister(struct net_device *slave_dev);
+
+#endif /* _FAILOVER_H */
index ba554cedb615fcd9e53c9605cf6b9c7d063ab96c..f738a6f27665515a67c88e0b1725a120560a9ce5 100644 (file)
@@ -432,6 +432,19 @@ config MAY_USE_DEVLINK
 config PAGE_POOL
        bool
 
+config FAILOVER
+       tristate "Generic failover module"
+       help
+         The failover module provides a generic interface for paravirtual
+         drivers to register a netdev and a set of ops with a failover
+         instance. The ops are used as event handlers that get called to
+         handle netdev register/unregister/link change/name change events
+         on slave pci ethernet devices with the same mac address as the
+         failover netdev. This enables paravirtual drivers to use a
+         VF as an accelerated low latency datapath. It also allows live
+         migration of VMs with direct attached VFs by failing over to the
+         paravirtual datapath when the VF is unplugged.
+
 endif   # if NET
 
 # Used by archs to tell that they support BPF JIT compiler plus which flavour.
index 7080417f8bc8e991d5d38ac500fc9c18642baa4a..80175e6a2eb871087985cfcb6f8c0ee94aef979f 100644 (file)
@@ -31,3 +31,4 @@ obj-$(CONFIG_DST_CACHE) += dst_cache.o
 obj-$(CONFIG_HWBM) += hwbm.o
 obj-$(CONFIG_NET_DEVLINK) += devlink.o
 obj-$(CONFIG_GRO_CELLS) += gro_cells.o
+obj-$(CONFIG_FAILOVER) += failover.o
diff --git a/net/core/failover.c b/net/core/failover.c
new file mode 100644 (file)
index 0000000..4a92a98
--- /dev/null
@@ -0,0 +1,315 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+/* A common module to handle registrations and notifications for paravirtual
+ * drivers to enable accelerated datapath and support VF live migration.
+ *
+ * The notifier and event handling code is based on netvsc driver.
+ */
+
+#include <linux/module.h>
+#include <linux/etherdevice.h>
+#include <uapi/linux/if_arp.h>
+#include <linux/rtnetlink.h>
+#include <linux/if_vlan.h>
+#include <net/failover.h>
+
+static LIST_HEAD(failover_list);
+static DEFINE_SPINLOCK(failover_lock);
+
+static struct net_device *failover_get_bymac(u8 *mac, struct failover_ops **ops)
+{
+       struct net_device *failover_dev;
+       struct failover *failover;
+
+       spin_lock(&failover_lock);
+       list_for_each_entry(failover, &failover_list, list) {
+               failover_dev = rtnl_dereference(failover->failover_dev);
+               if (ether_addr_equal(failover_dev->perm_addr, mac)) {
+                       *ops = rtnl_dereference(failover->ops);
+                       spin_unlock(&failover_lock);
+                       return failover_dev;
+               }
+       }
+       spin_unlock(&failover_lock);
+       return NULL;
+}
+
+/**
+ * failover_slave_register - Register a slave netdev
+ *
+ * @slave_dev: slave netdev that is being registered
+ *
+ * Registers a slave device to a failover instance. Only ethernet devices
+ * are supported.
+ */
+static int failover_slave_register(struct net_device *slave_dev)
+{
+       struct netdev_lag_upper_info lag_upper_info;
+       struct net_device *failover_dev;
+       struct failover_ops *fops;
+       int err;
+
+       if (slave_dev->type != ARPHRD_ETHER)
+               goto done;
+
+       ASSERT_RTNL();
+
+       failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops);
+       if (!failover_dev)
+               goto done;
+
+       if (fops && fops->slave_pre_register &&
+           fops->slave_pre_register(slave_dev, failover_dev))
+               goto done;
+
+       err = netdev_rx_handler_register(slave_dev, fops->slave_handle_frame,
+                                        failover_dev);
+       if (err) {
+               netdev_err(slave_dev, "can not register failover rx handler (err = %d)\n",
+                          err);
+               goto done;
+       }
+
+       lag_upper_info.tx_type = NETDEV_LAG_TX_TYPE_ACTIVEBACKUP;
+       err = netdev_master_upper_dev_link(slave_dev, failover_dev, NULL,
+                                          &lag_upper_info, NULL);
+       if (err) {
+               netdev_err(slave_dev, "can not set failover device %s (err = %d)\n",
+                          failover_dev->name, err);
+               goto err_upper_link;
+       }
+
+       slave_dev->priv_flags |= IFF_FAILOVER_SLAVE;
+
+       if (fops && fops->slave_register &&
+           !fops->slave_register(slave_dev, failover_dev))
+               return NOTIFY_OK;
+
+       netdev_upper_dev_unlink(slave_dev, failover_dev);
+       slave_dev->priv_flags &= ~IFF_FAILOVER_SLAVE;
+err_upper_link:
+       netdev_rx_handler_unregister(slave_dev);
+done:
+       return NOTIFY_DONE;
+}
+
+/**
+ * failover_slave_unregister - Unregister a slave netdev
+ *
+ * @slave_dev: slave netdev that is being unregistered
+ *
+ * Unregisters a slave device from a failover instance.
+ */
+int failover_slave_unregister(struct net_device *slave_dev)
+{
+       struct net_device *failover_dev;
+       struct failover_ops *fops;
+
+       if (!netif_is_failover_slave(slave_dev))
+               goto done;
+
+       ASSERT_RTNL();
+
+       failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops);
+       if (!failover_dev)
+               goto done;
+
+       if (fops && fops->slave_pre_unregister &&
+           fops->slave_pre_unregister(slave_dev, failover_dev))
+               goto done;
+
+       netdev_rx_handler_unregister(slave_dev);
+       netdev_upper_dev_unlink(slave_dev, failover_dev);
+       slave_dev->priv_flags &= ~IFF_FAILOVER_SLAVE;
+
+       if (fops && fops->slave_unregister &&
+           !fops->slave_unregister(slave_dev, failover_dev))
+               return NOTIFY_OK;
+
+done:
+       return NOTIFY_DONE;
+}
+EXPORT_SYMBOL_GPL(failover_slave_unregister);
+
+static int failover_slave_link_change(struct net_device *slave_dev)
+{
+       struct net_device *failover_dev;
+       struct failover_ops *fops;
+
+       if (!netif_is_failover_slave(slave_dev))
+               goto done;
+
+       ASSERT_RTNL();
+
+       failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops);
+       if (!failover_dev)
+               goto done;
+
+       if (!netif_running(failover_dev))
+               goto done;
+
+       if (fops && fops->slave_link_change &&
+           !fops->slave_link_change(slave_dev, failover_dev))
+               return NOTIFY_OK;
+
+done:
+       return NOTIFY_DONE;
+}
+
+static int failover_slave_name_change(struct net_device *slave_dev)
+{
+       struct net_device *failover_dev;
+       struct failover_ops *fops;
+
+       if (!netif_is_failover_slave(slave_dev))
+               goto done;
+
+       ASSERT_RTNL();
+
+       failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops);
+       if (!failover_dev)
+               goto done;
+
+       if (!netif_running(failover_dev))
+               goto done;
+
+       if (fops && fops->slave_name_change &&
+           !fops->slave_name_change(slave_dev, failover_dev))
+               return NOTIFY_OK;
+
+done:
+       return NOTIFY_DONE;
+}
+
+static int
+failover_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+       struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
+
+       /* Skip parent events */
+       if (netif_is_failover(event_dev))
+               return NOTIFY_DONE;
+
+       switch (event) {
+       case NETDEV_REGISTER:
+               return failover_slave_register(event_dev);
+       case NETDEV_UNREGISTER:
+               return failover_slave_unregister(event_dev);
+       case NETDEV_UP:
+       case NETDEV_DOWN:
+       case NETDEV_CHANGE:
+               return failover_slave_link_change(event_dev);
+       case NETDEV_CHANGENAME:
+               return failover_slave_name_change(event_dev);
+       default:
+               return NOTIFY_DONE;
+       }
+}
+
+static struct notifier_block failover_notifier = {
+       .notifier_call = failover_event,
+};
+
+static void
+failover_existing_slave_register(struct net_device *failover_dev)
+{
+       struct net *net = dev_net(failover_dev);
+       struct net_device *dev;
+
+       rtnl_lock();
+       for_each_netdev(net, dev) {
+               if (netif_is_failover(dev))
+                       continue;
+               if (ether_addr_equal(failover_dev->perm_addr, dev->perm_addr))
+                       failover_slave_register(dev);
+       }
+       rtnl_unlock();
+}
+
+/**
+ * failover_register - Register a failover instance
+ *
+ * @dev: failover netdev
+ * @ops: failover ops
+ *
+ * Allocate and register a failover instance for a failover netdev. ops
+ * provides handlers for slave device register/unregister/link change/
+ * name change events.
+ *
+ * Return: pointer to failover instance
+ */
+struct failover *failover_register(struct net_device *dev,
+                                  struct failover_ops *ops)
+{
+       struct failover *failover;
+
+       if (dev->type != ARPHRD_ETHER)
+               return ERR_PTR(-EINVAL);
+
+       failover = kzalloc(sizeof(*failover), GFP_KERNEL);
+       if (!failover)
+               return ERR_PTR(-ENOMEM);
+
+       rcu_assign_pointer(failover->ops, ops);
+       dev_hold(dev);
+       dev->priv_flags |= IFF_FAILOVER;
+       rcu_assign_pointer(failover->failover_dev, dev);
+
+       spin_lock(&failover_lock);
+       list_add_tail(&failover->list, &failover_list);
+       spin_unlock(&failover_lock);
+
+       netdev_info(dev, "failover master:%s registered\n", dev->name);
+
+       failover_existing_slave_register(dev);
+
+       return failover;
+}
+EXPORT_SYMBOL_GPL(failover_register);
+
+/**
+ * failover_unregister - Unregister a failover instance
+ *
+ * @failover: pointer to failover instance
+ *
+ * Unregisters and frees a failover instance.
+ */
+void failover_unregister(struct failover *failover)
+{
+       struct net_device *failover_dev;
+
+       failover_dev = rcu_dereference(failover->failover_dev);
+
+       netdev_info(failover_dev, "failover master:%s unregistered\n",
+                   failover_dev->name);
+
+       failover_dev->priv_flags &= ~IFF_FAILOVER;
+       dev_put(failover_dev);
+
+       spin_lock(&failover_lock);
+       list_del(&failover->list);
+       spin_unlock(&failover_lock);
+
+       kfree(failover);
+}
+EXPORT_SYMBOL_GPL(failover_unregister);
+
+static __init int
+failover_init(void)
+{
+       register_netdevice_notifier(&failover_notifier);
+
+       return 0;
+}
+module_init(failover_init);
+
+static __exit
+void failover_exit(void)
+{
+       unregister_netdevice_notifier(&failover_notifier);
+}
+module_exit(failover_exit);
+
+MODULE_DESCRIPTION("Generic failover infrastructure/interface");
+MODULE_LICENSE("GPL v2");