CONFIG_NF_FLOW_TABLE_HW
DEPENDS:=+kmod-nf-conntrack @!LINUX_3_18 @!LINUX_4_4 @!LINUX_4_9
FILES:= \
- $(LINUX_DIR)/net/netfilter/nf_flow_table.ko
- AUTOLOAD:=$(call AutoProbe,nf_flow_table)
+ $(LINUX_DIR)/net/netfilter/nf_flow_table.ko \
+ $(LINUX_DIR)/net/netfilter/nf_flow_table_hw.ko
+ AUTOLOAD:=$(call AutoProbe,nf_flow_table nf_flow_table_hw)
endef
$(eval $(call KernelPackage,nf-flow))
--- /dev/null
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Thu, 11 Jan 2018 16:32:00 +0100
+Subject: [PATCH] netfilter: nf_flow_table: add hardware offload support
+
+This patch adds the infrastructure to offload flows to hardware, in case
+the nic/switch comes with built-in flow tables capabilities.
+
+If the hardware comes with no hardware flow tables or they have
+limitations in terms of features, the existing infrastructure falls back
+to the software flow table implementation.
+
+The software flow table garbage collector skips entries that resides in
+the hardware, so the hardware will be responsible for releasing this
+flow table entry too via flow_offload_dead().
+
+Hardware configuration, either to add or to delete entries, is done from
+the hardware offload workqueue, to ensure this is done from user context
+given that we may sleep when grabbing the mdio mutex.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+ create mode 100644 net/netfilter/nf_flow_table_hw.c
+
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -826,6 +826,13 @@ struct xfrmdev_ops {
+ };
+ #endif
+
++struct flow_offload;
++
++enum flow_offload_type {
++ FLOW_OFFLOAD_ADD = 0,
++ FLOW_OFFLOAD_DEL,
++};
++
+ /*
+ * This structure defines the management hooks for network devices.
+ * The following hooks can be defined; unless noted otherwise, they are
+@@ -1057,6 +1064,10 @@ struct xfrmdev_ops {
+ * int (*ndo_bridge_dellink)(struct net_device *dev, struct nlmsghdr *nlh,
+ * u16 flags);
+ *
++ * int (*ndo_flow_offload)(enum flow_offload_type type,
++ * struct flow_offload *flow);
++ * Adds/deletes flow entry to/from net device flowtable.
++ *
+ * int (*ndo_change_carrier)(struct net_device *dev, bool new_carrier);
+ * Called to change device carrier. Soft-devices (like dummy, team, etc)
+ * which do not represent real hardware may define this to allow their
+@@ -1281,6 +1292,8 @@ struct net_device_ops {
+ int (*ndo_bridge_dellink)(struct net_device *dev,
+ struct nlmsghdr *nlh,
+ u16 flags);
++ int (*ndo_flow_offload)(enum flow_offload_type type,
++ struct flow_offload *flow);
+ int (*ndo_change_carrier)(struct net_device *dev,
+ bool new_carrier);
+ int (*ndo_get_phys_port_id)(struct net_device *dev,
+--- a/include/net/netfilter/nf_flow_table.h
++++ b/include/net/netfilter/nf_flow_table.h
+@@ -20,11 +20,17 @@ struct nf_flowtable_type {
+ struct module *owner;
+ };
+
++enum nf_flowtable_flags {
++ NF_FLOWTABLE_F_HW = 0x1,
++};
++
+ struct nf_flowtable {
+ struct list_head list;
+ struct rhashtable rhashtable;
+ const struct nf_flowtable_type *type;
++ u32 flags;
+ struct delayed_work gc_work;
++ possible_net_t ft_net;
+ };
+
+ enum flow_offload_tuple_dir {
+@@ -69,6 +75,7 @@ struct flow_offload_tuple_rhash {
+ #define FLOW_OFFLOAD_DNAT 0x2
+ #define FLOW_OFFLOAD_DYING 0x4
+ #define FLOW_OFFLOAD_TEARDOWN 0x8
++#define FLOW_OFFLOAD_HW 0x10
+
+ struct flow_offload {
+ struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX];
+@@ -126,6 +133,22 @@ unsigned int nf_flow_offload_ip_hook(voi
+ unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state);
+
++void nf_flow_offload_hw_add(struct net *net, struct flow_offload *flow,
++ struct nf_conn *ct);
++void nf_flow_offload_hw_del(struct net *net, struct flow_offload *flow);
++
++struct nf_flow_table_hw {
++ struct module *owner;
++ void (*add)(struct net *net, struct flow_offload *flow,
++ struct nf_conn *ct);
++ void (*del)(struct net *net, struct flow_offload *flow);
++};
++
++int nf_flow_table_hw_register(const struct nf_flow_table_hw *offload);
++void nf_flow_table_hw_unregister(const struct nf_flow_table_hw *offload);
++
++extern struct work_struct nf_flow_offload_hw_work;
++
+ #define MODULE_ALIAS_NF_FLOWTABLE(family) \
+ MODULE_ALIAS("nf-flowtable-" __stringify(family))
+
+--- a/include/uapi/linux/netfilter/nf_tables.h
++++ b/include/uapi/linux/netfilter/nf_tables.h
+@@ -1341,6 +1341,7 @@ enum nft_object_attributes {
+ * @NFTA_FLOWTABLE_HOOK: netfilter hook configuration(NLA_U32)
+ * @NFTA_FLOWTABLE_USE: number of references to this flow table (NLA_U32)
+ * @NFTA_FLOWTABLE_HANDLE: object handle (NLA_U64)
++ * @NFTA_FLOWTABLE_FLAGS: flags (NLA_U32)
+ */
+ enum nft_flowtable_attributes {
+ NFTA_FLOWTABLE_UNSPEC,
+@@ -1350,6 +1351,7 @@ enum nft_flowtable_attributes {
+ NFTA_FLOWTABLE_USE,
+ NFTA_FLOWTABLE_HANDLE,
+ NFTA_FLOWTABLE_PAD,
++ NFTA_FLOWTABLE_FLAGS,
+ __NFTA_FLOWTABLE_MAX
+ };
+ #define NFTA_FLOWTABLE_MAX (__NFTA_FLOWTABLE_MAX - 1)
+--- a/net/netfilter/Kconfig
++++ b/net/netfilter/Kconfig
+@@ -686,6 +686,15 @@ config NF_FLOW_TABLE
+
+ To compile it as a module, choose M here.
+
++config NF_FLOW_TABLE_HW
++ tristate "Netfilter flow table hardware offload module"
++ depends on NF_FLOW_TABLE
++ help
++ This option adds hardware offload support for the flow table core
++ infrastructure.
++
++ To compile it as a module, choose M here.
++
+ config NETFILTER_XTABLES
+ tristate "Netfilter Xtables support (required for ip_tables)"
+ default m if NETFILTER_ADVANCED=n
+--- a/net/netfilter/Makefile
++++ b/net/netfilter/Makefile
+@@ -116,6 +116,7 @@ obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_t
+ nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o
+
+ obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o
++obj-$(CONFIG_NF_FLOW_TABLE_HW) += nf_flow_table_hw.o
+
+ # generic X tables
+ obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
+--- a/net/netfilter/nf_flow_table_core.c
++++ b/net/netfilter/nf_flow_table_core.c
+@@ -199,10 +199,16 @@ int flow_offload_add(struct nf_flowtable
+ }
+ EXPORT_SYMBOL_GPL(flow_offload_add);
+
++static inline bool nf_flow_in_hw(const struct flow_offload *flow)
++{
++ return flow->flags & FLOW_OFFLOAD_HW;
++}
++
+ static void flow_offload_del(struct nf_flowtable *flow_table,
+ struct flow_offload *flow)
+ {
+ struct flow_offload_entry *e;
++ struct net *net = read_pnet(&flow_table->ft_net);
+
+ rhashtable_remove_fast(&flow_table->rhashtable,
+ &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
+@@ -214,6 +220,9 @@ static void flow_offload_del(struct nf_f
+ e = container_of(flow, struct flow_offload_entry, flow);
+ clear_bit(IPS_OFFLOAD_BIT, &e->ct->status);
+
++ if (nf_flow_in_hw(flow))
++ nf_flow_offload_hw_del(net, flow);
++
+ flow_offload_free(flow);
+ }
+
+@@ -307,6 +316,7 @@ static int nf_flow_offload_gc_step(struc
+ rhashtable_walk_start(&hti);
+
+ while ((tuplehash = rhashtable_walk_next(&hti))) {
++ bool teardown;
+ if (IS_ERR(tuplehash)) {
+ err = PTR_ERR(tuplehash);
+ if (err != -EAGAIN)
+@@ -319,9 +329,13 @@ static int nf_flow_offload_gc_step(struc
+
+ flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
+
+- if (nf_flow_has_expired(flow) ||
+- (flow->flags & (FLOW_OFFLOAD_DYING |
+- FLOW_OFFLOAD_TEARDOWN)))
++ teardown = flow->flags & (FLOW_OFFLOAD_DYING |
++ FLOW_OFFLOAD_TEARDOWN);
++
++ if (nf_flow_in_hw(flow) && !teardown)
++ continue;
++
++ if (nf_flow_has_expired(flow) || teardown)
+ flow_offload_del(flow_table, flow);
+ }
+ out:
+@@ -456,10 +470,43 @@ int nf_flow_dnat_port(const struct flow_
+ }
+ EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
+
++static const struct nf_flow_table_hw __rcu *nf_flow_table_hw_hook __read_mostly;
++
++static int nf_flow_offload_hw_init(struct nf_flowtable *flow_table)
++{
++ const struct nf_flow_table_hw *offload;
++
++ if (!rcu_access_pointer(nf_flow_table_hw_hook))
++ request_module("nf-flow-table-hw");
++
++ rcu_read_lock();
++ offload = rcu_dereference(nf_flow_table_hw_hook);
++ if (!offload)
++ goto err_no_hw_offload;
++
++ if (!try_module_get(offload->owner))
++ goto err_no_hw_offload;
++
++ rcu_read_unlock();
++
++ return 0;
++
++err_no_hw_offload:
++ rcu_read_unlock();
++
++ return -EOPNOTSUPP;
++}
++
+ int nf_flow_table_init(struct nf_flowtable *flowtable)
+ {
+ int err;
+
++ if (flowtable->flags & NF_FLOWTABLE_F_HW) {
++ err = nf_flow_offload_hw_init(flowtable);
++ if (err)
++ return err;
++ }
++
+ INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
+
+ err = rhashtable_init(&flowtable->rhashtable,
+@@ -497,6 +544,8 @@ static void nf_flow_table_iterate_cleanu
+ {
+ nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev);
+ flush_delayed_work(&flowtable->gc_work);
++ if (flowtable->flags & NF_FLOWTABLE_F_HW)
++ flush_work(&nf_flow_offload_hw_work);
+ }
+
+ void nf_flow_table_cleanup(struct net *net, struct net_device *dev)
+@@ -510,6 +559,26 @@ void nf_flow_table_cleanup(struct net *n
+ }
+ EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
+
++struct work_struct nf_flow_offload_hw_work;
++EXPORT_SYMBOL_GPL(nf_flow_offload_hw_work);
++
++/* Give the hardware workqueue the chance to remove entries from hardware.*/
++static void nf_flow_offload_hw_free(struct nf_flowtable *flowtable)
++{
++ const struct nf_flow_table_hw *offload;
++
++ flush_work(&nf_flow_offload_hw_work);
++
++ rcu_read_lock();
++ offload = rcu_dereference(nf_flow_table_hw_hook);
++ if (!offload) {
++ rcu_read_unlock();
++ return;
++ }
++ module_put(offload->owner);
++ rcu_read_unlock();
++}
++
+ void nf_flow_table_free(struct nf_flowtable *flow_table)
+ {
+ mutex_lock(&flowtable_lock);
+@@ -519,9 +588,58 @@ void nf_flow_table_free(struct nf_flowta
+ nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
+ WARN_ON(!nf_flow_offload_gc_step(flow_table));
+ rhashtable_destroy(&flow_table->rhashtable);
++ if (flow_table->flags & NF_FLOWTABLE_F_HW)
++ nf_flow_offload_hw_free(flow_table);
+ }
+ EXPORT_SYMBOL_GPL(nf_flow_table_free);
+
++/* Must be called from user context. */
++void nf_flow_offload_hw_add(struct net *net, struct flow_offload *flow,
++ struct nf_conn *ct)
++{
++ const struct nf_flow_table_hw *offload;
++
++ rcu_read_lock();
++ offload = rcu_dereference(nf_flow_table_hw_hook);
++ if (offload)
++ offload->add(net, flow, ct);
++ rcu_read_unlock();
++}
++EXPORT_SYMBOL_GPL(nf_flow_offload_hw_add);
++
++/* Must be called from user context. */
++void nf_flow_offload_hw_del(struct net *net, struct flow_offload *flow)
++{
++ const struct nf_flow_table_hw *offload;
++
++ rcu_read_lock();
++ offload = rcu_dereference(nf_flow_table_hw_hook);
++ if (offload)
++ offload->del(net, flow);
++ rcu_read_unlock();
++}
++EXPORT_SYMBOL_GPL(nf_flow_offload_hw_del);
++
++int nf_flow_table_hw_register(const struct nf_flow_table_hw *offload)
++{
++ if (rcu_access_pointer(nf_flow_table_hw_hook))
++ return -EBUSY;
++
++ rcu_assign_pointer(nf_flow_table_hw_hook, offload);
++
++ return 0;
++}
++EXPORT_SYMBOL_GPL(nf_flow_table_hw_register);
++
++void nf_flow_table_hw_unregister(const struct nf_flow_table_hw *offload)
++{
++ WARN_ON(rcu_access_pointer(nf_flow_table_hw_hook) != offload);
++ rcu_assign_pointer(nf_flow_table_hw_hook, NULL);
++
++ synchronize_rcu();
++}
++EXPORT_SYMBOL_GPL(nf_flow_table_hw_unregister);
++
+ static int nf_flow_table_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+ {
+--- /dev/null
++++ b/net/netfilter/nf_flow_table_hw.c
+@@ -0,0 +1,169 @@
++#include <linux/kernel.h>
++#include <linux/init.h>
++#include <linux/module.h>
++#include <linux/netfilter.h>
++#include <linux/rhashtable.h>
++#include <linux/netdevice.h>
++#include <net/netfilter/nf_flow_table.h>
++#include <net/netfilter/nf_conntrack.h>
++#include <net/netfilter/nf_conntrack_core.h>
++#include <net/netfilter/nf_conntrack_tuple.h>
++
++static DEFINE_SPINLOCK(flow_offload_hw_pending_list_lock);
++static LIST_HEAD(flow_offload_hw_pending_list);
++
++static DEFINE_MUTEX(nf_flow_offload_hw_mutex);
++
++struct flow_offload_hw {
++ struct list_head list;
++ enum flow_offload_type type;
++ struct flow_offload *flow;
++ struct nf_conn *ct;
++ possible_net_t flow_hw_net;
++};
++
++static int do_flow_offload_hw(struct net *net, struct flow_offload *flow,
++ int type)
++{
++ struct net_device *indev;
++ int ret, ifindex;
++
++ ifindex = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.iifidx;
++ indev = dev_get_by_index(net, ifindex);
++ if (WARN_ON(!indev))
++ return 0;
++
++ mutex_lock(&nf_flow_offload_hw_mutex);
++ ret = indev->netdev_ops->ndo_flow_offload(type, flow);
++ mutex_unlock(&nf_flow_offload_hw_mutex);
++
++ dev_put(indev);
++
++ return ret;
++}
++
++static void flow_offload_hw_work_add(struct flow_offload_hw *offload)
++{
++ struct net *net;
++ int ret;
++
++ if (nf_ct_is_dying(offload->ct))
++ return;
++
++ net = read_pnet(&offload->flow_hw_net);
++ ret = do_flow_offload_hw(net, offload->flow, FLOW_OFFLOAD_ADD);
++ if (ret >= 0)
++ offload->flow->flags |= FLOW_OFFLOAD_HW;
++}
++
++static void flow_offload_hw_work_del(struct flow_offload_hw *offload)
++{
++ struct net *net = read_pnet(&offload->flow_hw_net);
++
++ do_flow_offload_hw(net, offload->flow, FLOW_OFFLOAD_DEL);
++}
++
++static void flow_offload_hw_work(struct work_struct *work)
++{
++ struct flow_offload_hw *offload, *next;
++ LIST_HEAD(hw_offload_pending);
++
++ spin_lock_bh(&flow_offload_hw_pending_list_lock);
++ list_replace_init(&flow_offload_hw_pending_list, &hw_offload_pending);
++ spin_unlock_bh(&flow_offload_hw_pending_list_lock);
++
++ list_for_each_entry_safe(offload, next, &hw_offload_pending, list) {
++ switch (offload->type) {
++ case FLOW_OFFLOAD_ADD:
++ flow_offload_hw_work_add(offload);
++ break;
++ case FLOW_OFFLOAD_DEL:
++ flow_offload_hw_work_del(offload);
++ break;
++ }
++ if (offload->ct)
++ nf_conntrack_put(&offload->ct->ct_general);
++ list_del(&offload->list);
++ kfree(offload);
++ }
++}
++
++static void flow_offload_queue_work(struct flow_offload_hw *offload)
++{
++ spin_lock_bh(&flow_offload_hw_pending_list_lock);
++ list_add_tail(&offload->list, &flow_offload_hw_pending_list);
++ spin_unlock_bh(&flow_offload_hw_pending_list_lock);
++
++ schedule_work(&nf_flow_offload_hw_work);
++}
++
++static void flow_offload_hw_add(struct net *net, struct flow_offload *flow,
++ struct nf_conn *ct)
++{
++ struct flow_offload_hw *offload;
++
++ offload = kmalloc(sizeof(struct flow_offload_hw), GFP_ATOMIC);
++ if (!offload)
++ return;
++
++ nf_conntrack_get(&ct->ct_general);
++ offload->type = FLOW_OFFLOAD_ADD;
++ offload->ct = ct;
++ offload->flow = flow;
++ write_pnet(&offload->flow_hw_net, net);
++
++ flow_offload_queue_work(offload);
++}
++
++static void flow_offload_hw_del(struct net *net, struct flow_offload *flow)
++{
++ struct flow_offload_hw *offload;
++
++ offload = kmalloc(sizeof(struct flow_offload_hw), GFP_ATOMIC);
++ if (!offload)
++ return;
++
++ offload->type = FLOW_OFFLOAD_DEL;
++ offload->ct = NULL;
++ offload->flow = flow;
++ write_pnet(&offload->flow_hw_net, net);
++
++ flow_offload_queue_work(offload);
++}
++
++static const struct nf_flow_table_hw flow_offload_hw = {
++ .add = flow_offload_hw_add,
++ .del = flow_offload_hw_del,
++ .owner = THIS_MODULE,
++};
++
++static int __init nf_flow_table_hw_module_init(void)
++{
++ INIT_WORK(&nf_flow_offload_hw_work, flow_offload_hw_work);
++ nf_flow_table_hw_register(&flow_offload_hw);
++
++ return 0;
++}
++
++static void __exit nf_flow_table_hw_module_exit(void)
++{
++ struct flow_offload_hw *offload, *next;
++ LIST_HEAD(hw_offload_pending);
++
++ nf_flow_table_hw_unregister(&flow_offload_hw);
++ cancel_work_sync(&nf_flow_offload_hw_work);
++
++ list_for_each_entry_safe(offload, next, &hw_offload_pending, list) {
++ if (offload->ct)
++ nf_conntrack_put(&offload->ct->ct_general);
++ list_del(&offload->list);
++ kfree(offload);
++ }
++}
++
++module_init(nf_flow_table_hw_module_init);
++module_exit(nf_flow_table_hw_module_exit);
++
++MODULE_LICENSE("GPL");
++MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
++MODULE_ALIAS("nf-flow-table-hw");
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -4866,6 +4866,14 @@ static int nf_tables_flowtable_parse_hoo
+ if (err < 0)
+ goto err1;
+
++ for (i = 0; i < n; i++) {
++ if (flowtable->data.flags & NF_FLOWTABLE_F_HW &&
++ !dev_array[i]->netdev_ops->ndo_flow_offload) {
++ err = -EOPNOTSUPP;
++ goto err1;
++ }
++ }
++
+ ops = kzalloc(sizeof(struct nf_hook_ops) * n, GFP_KERNEL);
+ if (!ops) {
+ err = -ENOMEM;
+@@ -4996,10 +5004,19 @@ static int nf_tables_newflowtable(struct
+ }
+
+ flowtable->data.type = type;
++ write_pnet(&flowtable->data.ft_net, net);
++
+ err = type->init(&flowtable->data);
+ if (err < 0)
+ goto err3;
+
++ if (nla[NFTA_FLOWTABLE_FLAGS]) {
++ flowtable->data.flags =
++ ntohl(nla_get_be32(nla[NFTA_FLOWTABLE_FLAGS]));
++ if (flowtable->data.flags & ~NF_FLOWTABLE_F_HW)
++ goto err4;
++ }
++
+ err = nf_tables_flowtable_parse_hook(&ctx, nla[NFTA_FLOWTABLE_HOOK],
+ flowtable);
+ if (err < 0)
+@@ -5097,7 +5114,8 @@ static int nf_tables_fill_flowtable_info
+ nla_put_string(skb, NFTA_FLOWTABLE_NAME, flowtable->name) ||
+ nla_put_be32(skb, NFTA_FLOWTABLE_USE, htonl(flowtable->use)) ||
+ nla_put_be64(skb, NFTA_FLOWTABLE_HANDLE, cpu_to_be64(flowtable->handle),
+- NFTA_FLOWTABLE_PAD))
++ NFTA_FLOWTABLE_PAD) ||
++ nla_put_be32(skb, NFTA_FLOWTABLE_FLAGS, htonl(flowtable->data.flags)))
+ goto nla_put_failure;
+
+ nest = nla_nest_start(skb, NFTA_FLOWTABLE_HOOK);
+--- a/net/netfilter/nft_flow_offload.c
++++ b/net/netfilter/nft_flow_offload.c
+@@ -121,6 +121,9 @@ static void nft_flow_offload_eval(const
+ if (ret < 0)
+ goto err_flow_add;
+
++ if (flowtable->flags & NF_FLOWTABLE_F_HW)
++ nf_flow_offload_hw_add(nft_net(pkt), flow, ct);
++
+ return;
+
+ err_flow_add:
--- /dev/null
+From: Felix Fietkau <nbd@nbd.name>
+Date: Thu, 15 Mar 2018 20:46:31 +0100
+Subject: [PATCH] netfilter: nf_flow_table: support hw offload through
+ virtual interfaces
+
+There are hardware offload devices that support offloading VLANs and
+PPPoE devices. Additionally, it is useful to be able to offload packets
+routed through bridge interfaces as well.
+Add support for finding the path to the offload device through these
+virtual interfaces, while collecting useful parameters for the offload
+device, like VLAN ID/protocol, PPPoE session and Ethernet MAC address.
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -827,6 +827,7 @@ struct xfrmdev_ops {
+ #endif
+
+ struct flow_offload;
++struct flow_offload_hw_path;
+
+ enum flow_offload_type {
+ FLOW_OFFLOAD_ADD = 0,
+@@ -1064,8 +1065,15 @@ enum flow_offload_type {
+ * int (*ndo_bridge_dellink)(struct net_device *dev, struct nlmsghdr *nlh,
+ * u16 flags);
+ *
++ * int (*ndo_flow_offload_check)(struct flow_offload_hw_path *path);
++ * For virtual devices like bridges, vlan, and pppoe, fill in the
++ * underlying network device that can be used for offloading connections.
++ * Return an error if offloading is not supported.
++ *
+ * int (*ndo_flow_offload)(enum flow_offload_type type,
+- * struct flow_offload *flow);
++ * struct flow_offload *flow,
++ * struct flow_offload_hw_path *src,
++ * struct flow_offload_hw_path *dest);
+ * Adds/deletes flow entry to/from net device flowtable.
+ *
+ * int (*ndo_change_carrier)(struct net_device *dev, bool new_carrier);
+@@ -1292,8 +1300,11 @@ struct net_device_ops {
+ int (*ndo_bridge_dellink)(struct net_device *dev,
+ struct nlmsghdr *nlh,
+ u16 flags);
++ int (*ndo_flow_offload_check)(struct flow_offload_hw_path *path);
+ int (*ndo_flow_offload)(enum flow_offload_type type,
+- struct flow_offload *flow);
++ struct flow_offload *flow,
++ struct flow_offload_hw_path *src,
++ struct flow_offload_hw_path *dest);
+ int (*ndo_change_carrier)(struct net_device *dev,
+ bool new_carrier);
+ int (*ndo_get_phys_port_id)(struct net_device *dev,
+--- a/include/net/netfilter/nf_flow_table.h
++++ b/include/net/netfilter/nf_flow_table.h
+@@ -86,6 +86,21 @@ struct flow_offload {
+ };
+ };
+
++#define FLOW_OFFLOAD_PATH_ETHERNET BIT(0)
++#define FLOW_OFFLOAD_PATH_VLAN BIT(1)
++#define FLOW_OFFLOAD_PATH_PPPOE BIT(2)
++
++struct flow_offload_hw_path {
++ struct net_device *dev;
++ u32 flags;
++
++ u8 eth_src[ETH_ALEN];
++ u8 eth_dest[ETH_ALEN];
++ u16 vlan_proto;
++ u16 vlan_id;
++ u16 pppoe_sid;
++};
++
+ #define NF_FLOW_TIMEOUT (30 * HZ)
+
+ struct nf_flow_route {
+--- a/net/netfilter/nf_flow_table_hw.c
++++ b/net/netfilter/nf_flow_table_hw.c
+@@ -19,48 +19,75 @@ struct flow_offload_hw {
+ enum flow_offload_type type;
+ struct flow_offload *flow;
+ struct nf_conn *ct;
+- possible_net_t flow_hw_net;
++
++ struct flow_offload_hw_path src;
++ struct flow_offload_hw_path dest;
+ };
+
+-static int do_flow_offload_hw(struct net *net, struct flow_offload *flow,
+- int type)
++static void flow_offload_check_ethernet(struct flow_offload_tuple *tuple,
++ struct flow_offload_hw_path *path)
+ {
+- struct net_device *indev;
+- int ret, ifindex;
++ struct net_device *dev = path->dev;
++ struct neighbour *n;
+
+- ifindex = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.iifidx;
+- indev = dev_get_by_index(net, ifindex);
+- if (WARN_ON(!indev))
+- return 0;
++ if (dev->type != ARPHRD_ETHER)
++ return;
+
+- mutex_lock(&nf_flow_offload_hw_mutex);
+- ret = indev->netdev_ops->ndo_flow_offload(type, flow);
+- mutex_unlock(&nf_flow_offload_hw_mutex);
++ memcpy(path->eth_src, path->dev->dev_addr, ETH_ALEN);
++ n = dst_neigh_lookup(tuple->dst_cache, &tuple->src_v4);
++ if (!n)
++ return;
+
+- dev_put(indev);
++ memcpy(path->eth_dest, n->ha, ETH_ALEN);
++ path->flags |= FLOW_OFFLOAD_PATH_ETHERNET;
++ neigh_release(n);
++}
+
+- return ret;
++static int flow_offload_check_path(struct net *net,
++ struct flow_offload_tuple *tuple,
++ struct flow_offload_hw_path *path)
++{
++ struct net_device *dev;
++
++ dev = dev_get_by_index_rcu(net, tuple->iifidx);
++ if (!dev)
++ return -ENOENT;
++
++ path->dev = dev;
++ flow_offload_check_ethernet(tuple, path);
++
++ if (dev->netdev_ops->ndo_flow_offload_check)
++ return dev->netdev_ops->ndo_flow_offload_check(path);
++
++ return 0;
+ }
+
+-static void flow_offload_hw_work_add(struct flow_offload_hw *offload)
++static int do_flow_offload_hw(struct flow_offload_hw *offload)
+ {
+- struct net *net;
++ struct net_device *src_dev = offload->src.dev;
++ struct net_device *dest_dev = offload->dest.dev;
+ int ret;
+
+- if (nf_ct_is_dying(offload->ct))
+- return;
++ ret = src_dev->netdev_ops->ndo_flow_offload(offload->type,
++ offload->flow,
++ &offload->src,
++ &offload->dest);
+
+- net = read_pnet(&offload->flow_hw_net);
+- ret = do_flow_offload_hw(net, offload->flow, FLOW_OFFLOAD_ADD);
+- if (ret >= 0)
+- offload->flow->flags |= FLOW_OFFLOAD_HW;
++ /* restore devices in case the driver mangled them */
++ offload->src.dev = src_dev;
++ offload->dest.dev = dest_dev;
++
++ return ret;
+ }
+
+-static void flow_offload_hw_work_del(struct flow_offload_hw *offload)
++static void flow_offload_hw_free(struct flow_offload_hw *offload)
+ {
+- struct net *net = read_pnet(&offload->flow_hw_net);
+-
+- do_flow_offload_hw(net, offload->flow, FLOW_OFFLOAD_DEL);
++ dev_put(offload->src.dev);
++ dev_put(offload->dest.dev);
++ if (offload->ct)
++ nf_conntrack_put(&offload->ct->ct_general);
++ list_del(&offload->list);
++ kfree(offload);
+ }
+
+ static void flow_offload_hw_work(struct work_struct *work)
+@@ -73,18 +100,22 @@ static void flow_offload_hw_work(struct
+ spin_unlock_bh(&flow_offload_hw_pending_list_lock);
+
+ list_for_each_entry_safe(offload, next, &hw_offload_pending, list) {
++ mutex_lock(&nf_flow_offload_hw_mutex);
+ switch (offload->type) {
+ case FLOW_OFFLOAD_ADD:
+- flow_offload_hw_work_add(offload);
++ if (nf_ct_is_dying(offload->ct))
++ break;
++
++ if (do_flow_offload_hw(offload) >= 0)
++ offload->flow->flags |= FLOW_OFFLOAD_HW;
+ break;
+ case FLOW_OFFLOAD_DEL:
+- flow_offload_hw_work_del(offload);
++ do_flow_offload_hw(offload);
+ break;
+ }
+- if (offload->ct)
+- nf_conntrack_put(&offload->ct->ct_general);
+- list_del(&offload->list);
+- kfree(offload);
++ mutex_unlock(&nf_flow_offload_hw_mutex);
++
++ flow_offload_hw_free(offload);
+ }
+ }
+
+@@ -97,20 +128,55 @@ static void flow_offload_queue_work(stru
+ schedule_work(&nf_flow_offload_hw_work);
+ }
+
++static struct flow_offload_hw *
++flow_offload_hw_prepare(struct net *net, struct flow_offload *flow)
++{
++ struct flow_offload_hw_path src = {};
++ struct flow_offload_hw_path dest = {};
++ struct flow_offload_tuple *tuple;
++ struct flow_offload_hw *offload = NULL;
++
++ rcu_read_lock_bh();
++
++ tuple = &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple;
++ if (flow_offload_check_path(net, tuple, &src))
++ goto out;
++
++ tuple = &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple;
++ if (flow_offload_check_path(net, tuple, &dest))
++ goto out;
++
++ if (!src.dev->netdev_ops->ndo_flow_offload)
++ goto out;
++
++ offload = kzalloc(sizeof(struct flow_offload_hw), GFP_ATOMIC);
++ if (!offload)
++ goto out;
++
++ dev_hold(src.dev);
++ dev_hold(dest.dev);
++ offload->src = src;
++ offload->dest = dest;
++ offload->flow = flow;
++
++out:
++ rcu_read_unlock_bh();
++
++ return offload;
++}
++
+ static void flow_offload_hw_add(struct net *net, struct flow_offload *flow,
+ struct nf_conn *ct)
+ {
+ struct flow_offload_hw *offload;
+
+- offload = kmalloc(sizeof(struct flow_offload_hw), GFP_ATOMIC);
++ offload = flow_offload_hw_prepare(net, flow);
+ if (!offload)
+ return;
+
+ nf_conntrack_get(&ct->ct_general);
+ offload->type = FLOW_OFFLOAD_ADD;
+ offload->ct = ct;
+- offload->flow = flow;
+- write_pnet(&offload->flow_hw_net, net);
+
+ flow_offload_queue_work(offload);
+ }
+@@ -119,14 +185,11 @@ static void flow_offload_hw_del(struct n
+ {
+ struct flow_offload_hw *offload;
+
+- offload = kmalloc(sizeof(struct flow_offload_hw), GFP_ATOMIC);
++ offload = flow_offload_hw_prepare(net, flow);
+ if (!offload)
+ return;
+
+ offload->type = FLOW_OFFLOAD_DEL;
+- offload->ct = NULL;
+- offload->flow = flow;
+- write_pnet(&offload->flow_hw_net, net);
+
+ flow_offload_queue_work(offload);
+ }
+@@ -153,12 +216,8 @@ static void __exit nf_flow_table_hw_modu
+ nf_flow_table_hw_unregister(&flow_offload_hw);
+ cancel_work_sync(&nf_flow_offload_hw_work);
+
+- list_for_each_entry_safe(offload, next, &hw_offload_pending, list) {
+- if (offload->ct)
+- nf_conntrack_put(&offload->ct->ct_general);
+- list_del(&offload->list);
+- kfree(offload);
+- }
++ list_for_each_entry_safe(offload, next, &hw_offload_pending, list)
++ flow_offload_hw_free(offload);
+ }
+
+ module_init(nf_flow_table_hw_module_init);
--- /dev/null
+From: Felix Fietkau <nbd@nbd.name>
+Date: Thu, 15 Mar 2018 20:49:58 +0100
+Subject: [PATCH] net: 8021q: support hardware flow table offload
+
+Add the VLAN ID and protocol information
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/net/8021q/vlan_dev.c
++++ b/net/8021q/vlan_dev.c
+@@ -29,8 +29,10 @@
+ #include <linux/net_tstamp.h>
+ #include <linux/etherdevice.h>
+ #include <linux/ethtool.h>
++#include <linux/netfilter.h>
+ #include <net/arp.h>
+ #include <net/switchdev.h>
++#include <net/netfilter/nf_flow_table.h>
+
+ #include "vlan.h"
+ #include "vlanproc.h"
+@@ -762,6 +764,25 @@ static int vlan_dev_get_iflink(const str
+ return real_dev->ifindex;
+ }
+
++static int vlan_dev_flow_offload_check(struct flow_offload_hw_path *path)
++{
++ struct net_device *dev = path->dev;
++ struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
++
++ if (path->flags & FLOW_OFFLOAD_PATH_VLAN)
++ return -EEXIST;
++
++ path->flags |= FLOW_OFFLOAD_PATH_VLAN;
++ path->vlan_proto = vlan->vlan_proto;
++ path->vlan_id = vlan->vlan_id;
++ path->dev = vlan->real_dev;
++
++ if (vlan->real_dev->netdev_ops->ndo_flow_offload_check)
++ return vlan->real_dev->netdev_ops->ndo_flow_offload_check(path);
++
++ return 0;
++}
++
+ static const struct ethtool_ops vlan_ethtool_ops = {
+ .get_link_ksettings = vlan_ethtool_get_link_ksettings,
+ .get_drvinfo = vlan_ethtool_get_drvinfo,
+@@ -799,6 +820,7 @@ static const struct net_device_ops vlan_
+ .ndo_fix_features = vlan_dev_fix_features,
+ .ndo_get_lock_subclass = vlan_dev_get_lock_subclass,
+ .ndo_get_iflink = vlan_dev_get_iflink,
++ .ndo_flow_offload_check = vlan_dev_flow_offload_check,
+ };
+
+ static void vlan_dev_free(struct net_device *dev)
--- /dev/null
+From: Felix Fietkau <nbd@nbd.name>
+Date: Thu, 15 Mar 2018 20:50:37 +0100
+Subject: [PATCH] net: bridge: support hardware flow table offload
+
+Look up the real device and pass it on
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/net/bridge/br_device.c
++++ b/net/bridge/br_device.c
+@@ -18,6 +18,8 @@
+ #include <linux/ethtool.h>
+ #include <linux/list.h>
+ #include <linux/netfilter_bridge.h>
++#include <linux/netfilter.h>
++#include <net/netfilter/nf_flow_table.h>
+
+ #include <linux/uaccess.h>
+ #include "br_private.h"
+@@ -340,6 +342,26 @@ static const struct ethtool_ops br_ethto
+ .get_link = ethtool_op_get_link,
+ };
+
++static int br_flow_offload_check(struct flow_offload_hw_path *path)
++{
++ struct net_device *dev = path->dev;
++ struct net_bridge *br = netdev_priv(dev);
++ struct net_bridge_fdb_entry *dst;
++
++ if (!(path->flags & FLOW_OFFLOAD_PATH_ETHERNET))
++ return -EINVAL;
++
++ dst = br_fdb_find_rcu(br, path->eth_dest, path->vlan_id);
++ if (!dst || !dst->dst)
++ return -ENOENT;
++
++ path->dev = dst->dst->dev;
++ if (path->dev->netdev_ops->ndo_flow_offload_check)
++ return path->dev->netdev_ops->ndo_flow_offload_check(path);
++
++ return 0;
++}
++
+ static const struct net_device_ops br_netdev_ops = {
+ .ndo_open = br_dev_open,
+ .ndo_stop = br_dev_stop,
+@@ -367,6 +389,7 @@ static const struct net_device_ops br_ne
+ .ndo_bridge_setlink = br_setlink,
+ .ndo_bridge_dellink = br_dellink,
+ .ndo_features_check = passthru_features_check,
++ .ndo_flow_offload_check = br_flow_offload_check,
+ };
+
+ static struct device_type br_type = {
--- /dev/null
+From: Felix Fietkau <nbd@nbd.name>
+Date: Thu, 15 Mar 2018 21:15:00 +0100
+Subject: [PATCH] net: pppoe: support hardware flow table offload
+
+Pass on the PPPoE session ID and the remote MAC address
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/drivers/net/ppp/ppp_generic.c
++++ b/drivers/net/ppp/ppp_generic.c
+@@ -56,6 +56,9 @@
+ #include <net/net_namespace.h>
+ #include <net/netns/generic.h>
+
++#include <linux/netfilter.h>
++#include <net/netfilter/nf_flow_table.h>
++
+ #define PPP_VERSION "2.4.2"
+
+ /*
+@@ -1383,12 +1386,33 @@ static void ppp_dev_priv_destructor(stru
+ ppp_destroy_interface(ppp);
+ }
+
++static int ppp_flow_offload_check(struct flow_offload_hw_path *path)
++{
++ struct ppp *ppp = netdev_priv(path->dev);
++ struct ppp_channel *chan;
++ struct channel *pch;
++
++ if (ppp->flags & SC_MULTILINK)
++ return -EOPNOTSUPP;
++
++ if (list_empty(&ppp->channels))
++ return -ENODEV;
++
++ pch = list_first_entry(&ppp->channels, struct channel, clist);
++ chan = pch->chan;
++ if (!chan->ops->flow_offload_check)
++ return -EOPNOTSUPP;
++
++ return chan->ops->flow_offload_check(chan, path);
++}
++
+ static const struct net_device_ops ppp_netdev_ops = {
+ .ndo_init = ppp_dev_init,
+ .ndo_uninit = ppp_dev_uninit,
+ .ndo_start_xmit = ppp_start_xmit,
+ .ndo_do_ioctl = ppp_net_ioctl,
+ .ndo_get_stats64 = ppp_get_stats64,
++ .ndo_flow_offload_check = ppp_flow_offload_check,
+ };
+
+ static struct device_type ppp_type = {
+--- a/drivers/net/ppp/pppoe.c
++++ b/drivers/net/ppp/pppoe.c
+@@ -77,6 +77,8 @@
+ #include <linux/file.h>
+ #include <linux/proc_fs.h>
+ #include <linux/seq_file.h>
++#include <linux/netfilter.h>
++#include <net/netfilter/nf_flow_table.h>
+
+ #include <linux/nsproxy.h>
+ #include <net/net_namespace.h>
+@@ -970,8 +972,32 @@ static int pppoe_xmit(struct ppp_channel
+ return __pppoe_xmit(sk, skb);
+ }
+
++static int pppoe_flow_offload_check(struct ppp_channel *chan,
++ struct flow_offload_hw_path *path)
++{
++ struct sock *sk = (struct sock *)chan->private;
++ struct pppox_sock *po = pppox_sk(sk);
++ struct net_device *dev = po->pppoe_dev;
++
++ if (sock_flag(sk, SOCK_DEAD) ||
++ !(sk->sk_state & PPPOX_CONNECTED) || !dev)
++ return -ENODEV;
++
++ path->dev = po->pppoe_dev;
++ path->flags |= FLOW_OFFLOAD_PATH_PPPOE;
++ memcpy(path->eth_src, po->pppoe_dev->dev_addr, ETH_ALEN);
++ memcpy(path->eth_dest, po->pppoe_pa.remote, ETH_ALEN);
++ path->pppoe_sid = be16_to_cpu(po->num);
++
++ if (path->dev->netdev_ops->ndo_flow_offload_check)
++ return path->dev->netdev_ops->ndo_flow_offload_check(path);
++
++ return 0;
++}
++
+ static const struct ppp_channel_ops pppoe_chan_ops = {
+ .start_xmit = pppoe_xmit,
++ .flow_offload_check = pppoe_flow_offload_check,
+ };
+
+ static int pppoe_recvmsg(struct socket *sock, struct msghdr *m,
+--- a/include/linux/ppp_channel.h
++++ b/include/linux/ppp_channel.h
+@@ -32,6 +32,8 @@ struct ppp_channel_ops {
+ int (*start_xmit)(struct ppp_channel *, struct sk_buff *);
+ /* Handle an ioctl call that has come in via /dev/ppp. */
+ int (*ioctl)(struct ppp_channel *, unsigned int, unsigned long);
++
++ int (*flow_offload_check)(struct ppp_channel *, struct flow_offload_hw_path *);
+ };
+
+ struct ppp_channel {
--- /dev/null
+From: Felix Fietkau <nbd@nbd.name>
+Date: Sun, 25 Mar 2018 21:10:55 +0200
+Subject: [PATCH] netfilter: nf_flow_table: rework hardware offload timeout
+ handling
+
+Some offload implementations send keepalive packets + explicit
+notifications of TCP FIN/RST packets. In this case it is more convenient
+to simply let the driver update flow->timeout handling and use the
+regular flow offload gc step.
+
+For drivers that manage their own lifetime, a separate flag can be set
+to avoid gc timeouts.
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/include/net/netfilter/nf_flow_table.h
++++ b/include/net/netfilter/nf_flow_table.h
+@@ -76,6 +76,7 @@ struct flow_offload_tuple_rhash {
+ #define FLOW_OFFLOAD_DYING 0x4
+ #define FLOW_OFFLOAD_TEARDOWN 0x8
+ #define FLOW_OFFLOAD_HW 0x10
++#define FLOW_OFFLOAD_KEEP 0x20
+
+ struct flow_offload {
+ struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX];
+--- a/net/netfilter/nf_flow_table_core.c
++++ b/net/netfilter/nf_flow_table_core.c
+@@ -332,7 +332,7 @@ static int nf_flow_offload_gc_step(struc
+ teardown = flow->flags & (FLOW_OFFLOAD_DYING |
+ FLOW_OFFLOAD_TEARDOWN);
+
+- if (nf_flow_in_hw(flow) && !teardown)
++ if ((flow->flags & FLOW_OFFLOAD_KEEP) && !teardown)
+ continue;
+
+ if (nf_flow_has_expired(flow) || teardown)