kernel: backport netfilter NAT offload support to 4.14
authorFelix Fietkau <nbd@nbd.name>
Mon, 5 Feb 2018 12:35:24 +0000 (13:35 +0100)
committerFelix Fietkau <nbd@nbd.name>
Wed, 21 Feb 2018 19:12:42 +0000 (20:12 +0100)
This only works with nftables for now, iptables support will be added
later. Includes a number of related upstream nftables improvements to
simplify backporting follow-up changes

Signed-off-by: John Crispin <john@phrozen.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
43 files changed:
package/kernel/linux/modules/netfilter.mk
target/linux/generic/backport-4.14/320-netfilter-nf_conntrack-add-IPS_OFFLOAD-status-bit.patch [new file with mode: 0644]
target/linux/generic/backport-4.14/321-netfilter-nf_tables-add-flow-table-netlink-frontend.patch [new file with mode: 0644]
target/linux/generic/backport-4.14/322-netfilter-add-generic-flow-table-infrastructure.patch [new file with mode: 0644]
target/linux/generic/backport-4.14/323-netfilter-flow-table-support-for-IPv4.patch [new file with mode: 0644]
target/linux/generic/backport-4.14/324-netfilter-flow-table-support-for-IPv6.patch [new file with mode: 0644]
target/linux/generic/backport-4.14/325-netfilter-flow-table-support-for-the-mixed-IPv4-IPv6.patch [new file with mode: 0644]
target/linux/generic/backport-4.14/326-netfilter-nf_tables-flow-offload-expression.patch [new file with mode: 0644]
target/linux/generic/backport-4.14/327-netfilter-nf_tables-remove-nhooks-field-from-struct-.patch [new file with mode: 0644]
target/linux/generic/backport-4.14/328-netfilter-nf_tables-fix-a-typo-in-nf_tables_getflowt.patch [new file with mode: 0644]
target/linux/generic/backport-4.14/329-netfilter-improve-flow-table-Kconfig-dependencies.patch [new file with mode: 0644]
target/linux/generic/backport-4.14/330-netfilter-nf_tables-remove-flag-field-from-struct-nf.patch [new file with mode: 0644]
target/linux/generic/backport-4.14/331-netfilter-nf_tables-no-need-for-struct-nft_af_info-t.patch [new file with mode: 0644]
target/linux/generic/backport-4.14/332-netfilter-nf_tables-remove-struct-nft_af_info-parame.patch [new file with mode: 0644]
target/linux/generic/backport-4.14/333-netfilter-nf_tables-fix-chain-filter-in-nf_tables_du.patch [new file with mode: 0644]
target/linux/generic/backport-4.14/334-netfilter-nf_tables-fix-potential-NULL-ptr-deref-in-.patch [new file with mode: 0644]
target/linux/generic/backport-4.14/335-netfilter-nf_tables-add-single-table-list-for-all-fa.patch [new file with mode: 0644]
target/linux/generic/backport-4.14/336-netfilter-exit_net-cleanup-check-added.patch [new file with mode: 0644]
target/linux/generic/backport-4.14/337-netfilter-nf_tables-get-rid-of-pernet-families.patch [new file with mode: 0644]
target/linux/generic/backport-4.14/338-netfilter-nf_tables-get-rid-of-struct-nft_af_info-ab.patch [new file with mode: 0644]
target/linux/generic/backport-4.14/339-netfilter-nft_flow_offload-wait-for-garbage-collecto.patch [new file with mode: 0644]
target/linux/generic/backport-4.14/340-netfilter-nft_flow_offload-no-need-to-flush-entries-.patch [new file with mode: 0644]
target/linux/generic/backport-4.14/341-netfilter-nft_flow_offload-move-flowtable-cleanup-ro.patch [new file with mode: 0644]
target/linux/generic/backport-4.14/342-netfilter-nf_tables-fix-flowtable-free.patch [new file with mode: 0644]
target/linux/generic/backport-4.14/343-netfilter-nft_flow_offload-handle-netdevice-events-f.patch [new file with mode: 0644]
target/linux/generic/backport-4.14/344-netfilter-nf_tables-allocate-handle-and-delete-objec.patch [new file with mode: 0644]
target/linux/generic/backport-4.14/345-netfilter-nf_flow_offload-fix-use-after-free-and-a-r.patch [new file with mode: 0644]
target/linux/generic/backport-4.14/346-netfilter-flowtable-infrastructure-depends-on-NETFIL.patch [new file with mode: 0644]
target/linux/generic/backport-4.14/347-netfilter-remove-duplicated-include.patch [new file with mode: 0644]
target/linux/generic/backport-4.14/348-netfilter-nf_flow_table-use-IP_CT_DIR_-values-for-FL.patch [new file with mode: 0644]
target/linux/generic/backport-4.14/349-netfilter-nf_flow_table-clean-up-flow_offload_alloc.patch [new file with mode: 0644]
target/linux/generic/backport-4.14/350-ipv6-make-ip6_dst_mtu_forward-inline.patch [new file with mode: 0644]
target/linux/generic/backport-4.14/351-netfilter-nf_flow_table-cache-mtu-in-struct-flow_off.patch [new file with mode: 0644]
target/linux/generic/backport-4.14/352-netfilter-nf_flow_table-rename-nf_flow_table.c-to-nf.patch [new file with mode: 0644]
target/linux/generic/backport-4.14/353-netfilter-nf_flow_table-move-ipv4-offload-hook-code-.patch [new file with mode: 0644]
target/linux/generic/backport-4.14/354-netfilter-nf_flow_table-move-ip-header-check-out-of-.patch [new file with mode: 0644]
target/linux/generic/backport-4.14/355-netfilter-nf_flow_table-move-ipv6-offload-hook-code-.patch [new file with mode: 0644]
target/linux/generic/backport-4.14/356-netfilter-nf_flow_table-relax-mixed-ipv4-ipv6-flowta.patch [new file with mode: 0644]
target/linux/generic/backport-4.14/357-netfilter-nf_flow_table-move-init-code-to-nf_flow_ta.patch [new file with mode: 0644]
target/linux/generic/backport-4.14/358-netfilter-nf_flow_table-fix-priv-pointer-for-netdev-.patch [new file with mode: 0644]
target/linux/generic/backport-4.14/359-netfilter-nf_flow_table-track-flow-tables-in-nf_flow.patch [new file with mode: 0644]
target/linux/generic/backport-4.14/360-netfilter-nf_flow_table-add-hardware-offload-support.patch [new file with mode: 0644]
target/linux/generic/config-4.14

index 82d5604841b942663618202bd8c0eb0f3f2c038c..57d68d4a55b4ffbecf95e1b06f9604cc14e058bc 100644 (file)
@@ -140,6 +140,23 @@ endef
 $(eval $(call KernelPackage,nf-nat6))
 
 
+define KernelPackage/nf-flow
+  SUBMENU:=$(NF_MENU)
+  TITLE:=Netfilter flowtable support
+  KCONFIG:= \
+       CONFIG_NETFILTER_INGRESS=y \
+       CONFIG_NF_FLOW_TABLE \
+       CONFIG_NF_FLOW_TABLE_HW
+  DEPENDS:=+kmod-nf-conntrack +kmod-nft-core @!LINUX_3_18 @!LINUX_4_4 @!LINUX_4_9
+  FILES:= \
+       $(LINUX_DIR)/net/netfilter/nf_flow_table.ko \
+       $(LINUX_DIR)/net/netfilter/nf_flow_table_hw.ko
+  AUTOLOAD:=$(call AutoProbe,nf_flow_table nf_flow_table_hw)
+endef
+
+$(eval $(call KernelPackage,nf-flow))
+
+
 define AddDepends/ipt
   SUBMENU:=$(NF_MENU)
   DEPENDS+= +kmod-ipt-core $(1)
@@ -970,6 +987,26 @@ endef
 $(eval $(call KernelPackage,nft-nat))
 
 
+define KernelPackage/nft-offload
+  SUBMENU:=$(NF_MENU)
+  TITLE:=Netfilter nf_tables routing/NAT offload support
+  DEPENDS:=+kmod-nf-flow +kmod-nft-nat
+  KCONFIG:= \
+       CONFIG_NF_FLOW_TABLE_INET \
+       CONFIG_NF_FLOW_TABLE_IPV4 \
+       CONFIG_NF_FLOW_TABLE_IPV6 \
+       CONFIG_NFT_FLOW_OFFLOAD
+  FILES:= \
+       $(LINUX_DIR)/net/netfilter/nf_flow_table_inet.ko \
+       $(LINUX_DIR)/net/ipv4/netfilter/nf_flow_table_ipv4.ko \
+       $(LINUX_DIR)/net/ipv6/netfilter/nf_flow_table_ipv6.ko \
+       $(LINUX_DIR)/net/netfilter/nft_flow_offload.ko
+  AUTOLOAD:=$(call AutoProbe,nf_flow_table_inet nf_flow_table_ipv4 nf_flow_table_ipv6 nft_flow_offload)
+endef
+
+$(eval $(call KernelPackage,nft-offload))
+
+
 define KernelPackage/nft-nat6
   SUBMENU:=$(NF_MENU)
   TITLE:=Netfilter nf_tables IPv6-NAT support
diff --git a/target/linux/generic/backport-4.14/320-netfilter-nf_conntrack-add-IPS_OFFLOAD-status-bit.patch b/target/linux/generic/backport-4.14/320-netfilter-nf_conntrack-add-IPS_OFFLOAD-status-bit.patch
new file mode 100644 (file)
index 0000000..9d6ce98
--- /dev/null
@@ -0,0 +1,169 @@
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Sun, 7 Jan 2018 01:03:56 +0100
+Subject: [PATCH] netfilter: nf_conntrack: add IPS_OFFLOAD status bit
+
+This new bit tells us that the conntrack entry is owned by the flow
+table offload infrastructure.
+
+ # cat /proc/net/nf_conntrack
+ ipv4     2 tcp      6 src=10.141.10.2 dst=147.75.205.195 sport=36392 dport=443 src=147.75.205.195 dst=192.168.2.195 sport=443 dport=36392 [OFFLOAD] mark=0 zone=0 use=2
+
+Note the [OFFLOAD] tag in the listing.
+
+The timer of such conntrack entries look like stopped from userspace.
+In practise, to make sure the conntrack entry does not go away, the
+conntrack timer is periodically set to an arbitrary large value that
+gets refreshed on every iteration from the garbage collector, so it
+never expires- and they display no internal state in the case of TCP
+flows. This allows us to save a bitcheck from the packet path via
+nf_ct_is_expired().
+
+Conntrack entries that have been offloaded to the flow table
+infrastructure cannot be deleted/flushed via ctnetlink. The flow table
+infrastructure is also responsible for releasing this conntrack entry.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+
+--- a/include/uapi/linux/netfilter/nf_conntrack_common.h
++++ b/include/uapi/linux/netfilter/nf_conntrack_common.h
+@@ -101,12 +101,16 @@ enum ip_conntrack_status {
+       IPS_HELPER_BIT = 13,
+       IPS_HELPER = (1 << IPS_HELPER_BIT),
++      /* Conntrack has been offloaded to flow table. */
++      IPS_OFFLOAD_BIT = 14,
++      IPS_OFFLOAD = (1 << IPS_OFFLOAD_BIT),
++
+       /* Be careful here, modifying these bits can make things messy,
+        * so don't let users modify them directly.
+        */
+       IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK |
+                                IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING |
+-                               IPS_SEQ_ADJUST | IPS_TEMPLATE),
++                               IPS_SEQ_ADJUST | IPS_TEMPLATE | IPS_OFFLOAD),
+       __IPS_MAX_BIT = 14,
+ };
+--- a/net/netfilter/nf_conntrack_core.c
++++ b/net/netfilter/nf_conntrack_core.c
+@@ -901,6 +901,9 @@ static unsigned int early_drop_list(stru
+       hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) {
+               tmp = nf_ct_tuplehash_to_ctrack(h);
++              if (test_bit(IPS_OFFLOAD_BIT, &tmp->status))
++                      continue;
++
+               if (nf_ct_is_expired(tmp)) {
+                       nf_ct_gc_expired(tmp);
+                       continue;
+@@ -975,6 +978,18 @@ static bool gc_worker_can_early_drop(con
+       return false;
+ }
++#define       DAY     (86400 * HZ)
++
++/* Set an arbitrary timeout large enough not to ever expire, this save
++ * us a check for the IPS_OFFLOAD_BIT from the packet path via
++ * nf_ct_is_expired().
++ */
++static void nf_ct_offload_timeout(struct nf_conn *ct)
++{
++      if (nf_ct_expires(ct) < DAY / 2)
++              ct->timeout = nfct_time_stamp + DAY;
++}
++
+ static void gc_worker(struct work_struct *work)
+ {
+       unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u);
+@@ -1011,6 +1026,11 @@ static void gc_worker(struct work_struct
+                       tmp = nf_ct_tuplehash_to_ctrack(h);
+                       scanned++;
++                      if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) {
++                              nf_ct_offload_timeout(tmp);
++                              continue;
++                      }
++
+                       if (nf_ct_is_expired(tmp)) {
+                               nf_ct_gc_expired(tmp);
+                               expired_count++;
+--- a/net/netfilter/nf_conntrack_netlink.c
++++ b/net/netfilter/nf_conntrack_netlink.c
+@@ -1105,6 +1105,14 @@ static const struct nla_policy ct_nla_po
+                                   .len = NF_CT_LABELS_MAX_SIZE },
+ };
++static int ctnetlink_flush_iterate(struct nf_conn *ct, void *data)
++{
++      if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
++              return 0;
++
++      return ctnetlink_filter_match(ct, data);
++}
++
+ static int ctnetlink_flush_conntrack(struct net *net,
+                                    const struct nlattr * const cda[],
+                                    u32 portid, int report)
+@@ -1117,7 +1125,7 @@ static int ctnetlink_flush_conntrack(str
+                       return PTR_ERR(filter);
+       }
+-      nf_ct_iterate_cleanup_net(net, ctnetlink_filter_match, filter,
++      nf_ct_iterate_cleanup_net(net, ctnetlink_flush_iterate, filter,
+                                 portid, report);
+       kfree(filter);
+@@ -1163,6 +1171,11 @@ static int ctnetlink_del_conntrack(struc
+       ct = nf_ct_tuplehash_to_ctrack(h);
++      if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) {
++              nf_ct_put(ct);
++              return -EBUSY;
++      }
++
+       if (cda[CTA_ID]) {
+               u_int32_t id = ntohl(nla_get_be32(cda[CTA_ID]));
+               if (id != (u32)(unsigned long)ct) {
+--- a/net/netfilter/nf_conntrack_proto_tcp.c
++++ b/net/netfilter/nf_conntrack_proto_tcp.c
+@@ -305,6 +305,9 @@ static bool tcp_invert_tuple(struct nf_c
+ /* Print out the private part of the conntrack. */
+ static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
+ {
++      if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
++              return;
++
+       seq_printf(s, "%s ", tcp_conntrack_names[ct->proto.tcp.state]);
+ }
+ #endif
+--- a/net/netfilter/nf_conntrack_standalone.c
++++ b/net/netfilter/nf_conntrack_standalone.c
+@@ -309,10 +309,12 @@ static int ct_seq_show(struct seq_file *
+       WARN_ON(!l4proto);
+       ret = -ENOSPC;
+-      seq_printf(s, "%-8s %u %-8s %u %ld ",
++      seq_printf(s, "%-8s %u %-8s %u ",
+                  l3proto_name(l3proto->l3proto), nf_ct_l3num(ct),
+-                 l4proto_name(l4proto->l4proto), nf_ct_protonum(ct),
+-                 nf_ct_expires(ct)  / HZ);
++                 l4proto_name(l4proto->l4proto), nf_ct_protonum(ct));
++
++      if (!test_bit(IPS_OFFLOAD_BIT, &ct->status))
++              seq_printf(s, "%ld ", nf_ct_expires(ct)  / HZ);
+       if (l4proto->print_conntrack)
+               l4proto->print_conntrack(s, ct);
+@@ -339,7 +341,9 @@ static int ct_seq_show(struct seq_file *
+       if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
+               goto release;
+-      if (test_bit(IPS_ASSURED_BIT, &ct->status))
++      if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
++              seq_puts(s, "[OFFLOAD] ");
++      else if (test_bit(IPS_ASSURED_BIT, &ct->status))
+               seq_puts(s, "[ASSURED] ");
+       if (seq_has_overflowed(s))
diff --git a/target/linux/generic/backport-4.14/321-netfilter-nf_tables-add-flow-table-netlink-frontend.patch b/target/linux/generic/backport-4.14/321-netfilter-nf_tables-add-flow-table-netlink-frontend.patch
new file mode 100644 (file)
index 0000000..8a0d2f0
--- /dev/null
@@ -0,0 +1,1079 @@
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Sun, 7 Jan 2018 01:04:07 +0100
+Subject: [PATCH] netfilter: nf_tables: add flow table netlink frontend
+
+This patch introduces a netlink control plane to create, delete and dump
+flow tables. Flow tables are identified by name, this name is used from
+rules to refer to an specific flow table. Flow tables use the rhashtable
+class and a generic garbage collector to remove expired entries.
+
+This also adds the infrastructure to add different flow table types, so
+we can add one for each layer 3 protocol family.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+ create mode 100644 include/net/netfilter/nf_flow_table.h
+
+--- /dev/null
++++ b/include/net/netfilter/nf_flow_table.h
+@@ -0,0 +1,23 @@
++#ifndef _NF_FLOW_TABLE_H
++#define _NF_FLOW_TABLE_H
++
++#include <linux/rhashtable.h>
++
++struct nf_flowtable;
++
++struct nf_flowtable_type {
++      struct list_head                list;
++      int                             family;
++      void                            (*gc)(struct work_struct *work);
++      const struct rhashtable_params  *params;
++      nf_hookfn                       *hook;
++      struct module                   *owner;
++};
++
++struct nf_flowtable {
++      struct rhashtable               rhashtable;
++      const struct nf_flowtable_type  *type;
++      struct delayed_work             gc_work;
++};
++
++#endif /* _FLOW_OFFLOAD_H */
+--- a/include/net/netfilter/nf_tables.h
++++ b/include/net/netfilter/nf_tables.h
+@@ -9,6 +9,7 @@
+ #include <linux/netfilter/x_tables.h>
+ #include <linux/netfilter/nf_tables.h>
+ #include <linux/u64_stats_sync.h>
++#include <net/netfilter/nf_flow_table.h>
+ #include <net/netlink.h>
+ #define NFT_JUMP_STACK_SIZE   16
+@@ -933,6 +934,7 @@ unsigned int nft_do_chain(struct nft_pkt
+  *    @chains: chains in the table
+  *    @sets: sets in the table
+  *    @objects: stateful objects in the table
++ *    @flowtables: flow tables in the table
+  *    @hgenerator: handle generator state
+  *    @use: number of chain references to this table
+  *    @flags: table flag (see enum nft_table_flags)
+@@ -944,6 +946,7 @@ struct nft_table {
+       struct list_head                chains;
+       struct list_head                sets;
+       struct list_head                objects;
++      struct list_head                flowtables;
+       u64                             hgenerator;
+       u32                             use;
+       u16                             flags:14,
+@@ -1075,6 +1078,44 @@ int nft_register_obj(struct nft_object_t
+ void nft_unregister_obj(struct nft_object_type *obj_type);
+ /**
++ *    struct nft_flowtable - nf_tables flow table
++ *
++ *    @list: flow table list node in table list
++ *    @table: the table the flow table is contained in
++ *    @name: name of this flow table
++ *    @hooknum: hook number
++ *    @priority: hook priority
++ *    @ops_len: number of hooks in array
++ *    @genmask: generation mask
++ *    @use: number of references to this flow table
++ *    @data: rhashtable and garbage collector
++ *    @ops: array of hooks
++ */
++struct nft_flowtable {
++      struct list_head                list;
++      struct nft_table                *table;
++      char                            *name;
++      int                             hooknum;
++      int                             priority;
++      int                             ops_len;
++      u32                             genmask:2,
++                                      use:30;
++      /* runtime data below here */
++      struct nf_hook_ops              *ops ____cacheline_aligned;
++      struct nf_flowtable             data;
++};
++
++struct nft_flowtable *nf_tables_flowtable_lookup(const struct nft_table *table,
++                                               const struct nlattr *nla,
++                                               u8 genmask);
++void nft_flow_table_iterate(struct net *net,
++                          void (*iter)(struct nf_flowtable *flowtable, void *data),
++                          void *data);
++
++void nft_register_flowtable_type(struct nf_flowtable_type *type);
++void nft_unregister_flowtable_type(struct nf_flowtable_type *type);
++
++/**
+  *    struct nft_traceinfo - nft tracing information and state
+  *
+  *    @pkt: pktinfo currently processed
+@@ -1310,4 +1351,11 @@ struct nft_trans_obj {
+ #define nft_trans_obj(trans)  \
+       (((struct nft_trans_obj *)trans->data)->obj)
++struct nft_trans_flowtable {
++      struct nft_flowtable            *flowtable;
++};
++
++#define nft_trans_flowtable(trans)    \
++      (((struct nft_trans_flowtable *)trans->data)->flowtable)
++
+ #endif /* _NET_NF_TABLES_H */
+--- a/include/uapi/linux/netfilter/nf_tables.h
++++ b/include/uapi/linux/netfilter/nf_tables.h
+@@ -92,6 +92,9 @@ enum nft_verdicts {
+  * @NFT_MSG_GETOBJ: get a stateful object (enum nft_obj_attributes)
+  * @NFT_MSG_DELOBJ: delete a stateful object (enum nft_obj_attributes)
+  * @NFT_MSG_GETOBJ_RESET: get and reset a stateful object (enum nft_obj_attributes)
++ * @NFT_MSG_NEWFLOWTABLE: add new flow table (enum nft_flowtable_attributes)
++ * @NFT_MSG_GETFLOWTABLE: get flow table (enum nft_flowtable_attributes)
++ * @NFT_MSG_DELFLOWTABLE: delete flow table (enum nft_flowtable_attributes)
+  */
+ enum nf_tables_msg_types {
+       NFT_MSG_NEWTABLE,
+@@ -116,6 +119,9 @@ enum nf_tables_msg_types {
+       NFT_MSG_GETOBJ,
+       NFT_MSG_DELOBJ,
+       NFT_MSG_GETOBJ_RESET,
++      NFT_MSG_NEWFLOWTABLE,
++      NFT_MSG_GETFLOWTABLE,
++      NFT_MSG_DELFLOWTABLE,
+       NFT_MSG_MAX,
+ };
+@@ -1310,6 +1316,53 @@ enum nft_object_attributes {
+ #define NFTA_OBJ_MAX          (__NFTA_OBJ_MAX - 1)
+ /**
++ * enum nft_flowtable_attributes - nf_tables flow table netlink attributes
++ *
++ * @NFTA_FLOWTABLE_TABLE: name of the table containing the expression (NLA_STRING)
++ * @NFTA_FLOWTABLE_NAME: name of this flow table (NLA_STRING)
++ * @NFTA_FLOWTABLE_HOOK: netfilter hook configuration(NLA_U32)
++ * @NFTA_FLOWTABLE_USE: number of references to this flow table (NLA_U32)
++ */
++enum nft_flowtable_attributes {
++      NFTA_FLOWTABLE_UNSPEC,
++      NFTA_FLOWTABLE_TABLE,
++      NFTA_FLOWTABLE_NAME,
++      NFTA_FLOWTABLE_HOOK,
++      NFTA_FLOWTABLE_USE,
++      __NFTA_FLOWTABLE_MAX
++};
++#define NFTA_FLOWTABLE_MAX    (__NFTA_FLOWTABLE_MAX - 1)
++
++/**
++ * enum nft_flowtable_hook_attributes - nf_tables flow table hook netlink attributes
++ *
++ * @NFTA_FLOWTABLE_HOOK_NUM: netfilter hook number (NLA_U32)
++ * @NFTA_FLOWTABLE_HOOK_PRIORITY: netfilter hook priority (NLA_U32)
++ * @NFTA_FLOWTABLE_HOOK_DEVS: input devices this flow table is bound to (NLA_NESTED)
++ */
++enum nft_flowtable_hook_attributes {
++      NFTA_FLOWTABLE_HOOK_UNSPEC,
++      NFTA_FLOWTABLE_HOOK_NUM,
++      NFTA_FLOWTABLE_HOOK_PRIORITY,
++      NFTA_FLOWTABLE_HOOK_DEVS,
++      __NFTA_FLOWTABLE_HOOK_MAX
++};
++#define NFTA_FLOWTABLE_HOOK_MAX       (__NFTA_FLOWTABLE_HOOK_MAX - 1)
++
++/**
++ * enum nft_device_attributes - nf_tables device netlink attributes
++ *
++ * @NFTA_DEVICE_NAME: name of this device (NLA_STRING)
++ */
++enum nft_devices_attributes {
++      NFTA_DEVICE_UNSPEC,
++      NFTA_DEVICE_NAME,
++      __NFTA_DEVICE_MAX
++};
++#define NFTA_DEVICE_MAX               (__NFTA_DEVICE_MAX - 1)
++
++
++/**
+  * enum nft_trace_attributes - nf_tables trace netlink attributes
+  *
+  * @NFTA_TRACE_TABLE: name of the table (NLA_STRING)
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -17,6 +17,7 @@
+ #include <linux/netfilter.h>
+ #include <linux/netfilter/nfnetlink.h>
+ #include <linux/netfilter/nf_tables.h>
++#include <net/netfilter/nf_flow_table.h>
+ #include <net/netfilter/nf_tables_core.h>
+ #include <net/netfilter/nf_tables.h>
+ #include <net/net_namespace.h>
+@@ -24,6 +25,7 @@
+ static LIST_HEAD(nf_tables_expressions);
+ static LIST_HEAD(nf_tables_objects);
++static LIST_HEAD(nf_tables_flowtables);
+ /**
+  *    nft_register_afinfo - register nf_tables address family info
+@@ -345,6 +347,40 @@ static int nft_delobj(struct nft_ctx *ct
+       return err;
+ }
++static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type,
++                                 struct nft_flowtable *flowtable)
++{
++      struct nft_trans *trans;
++
++      trans = nft_trans_alloc(ctx, msg_type,
++                              sizeof(struct nft_trans_flowtable));
++      if (trans == NULL)
++              return -ENOMEM;
++
++      if (msg_type == NFT_MSG_NEWFLOWTABLE)
++              nft_activate_next(ctx->net, flowtable);
++
++      nft_trans_flowtable(trans) = flowtable;
++      list_add_tail(&trans->list, &ctx->net->nft.commit_list);
++
++      return 0;
++}
++
++static int nft_delflowtable(struct nft_ctx *ctx,
++                          struct nft_flowtable *flowtable)
++{
++      int err;
++
++      err = nft_trans_flowtable_add(ctx, NFT_MSG_DELFLOWTABLE, flowtable);
++      if (err < 0)
++              return err;
++
++      nft_deactivate_next(ctx->net, flowtable);
++      ctx->table->use--;
++
++      return err;
++}
++
+ /*
+  * Tables
+  */
+@@ -728,6 +764,7 @@ static int nf_tables_newtable(struct net
+       INIT_LIST_HEAD(&table->chains);
+       INIT_LIST_HEAD(&table->sets);
+       INIT_LIST_HEAD(&table->objects);
++      INIT_LIST_HEAD(&table->flowtables);
+       table->flags = flags;
+       nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
+@@ -749,10 +786,11 @@ err1:
+ static int nft_flush_table(struct nft_ctx *ctx)
+ {
+-      int err;
++      struct nft_flowtable *flowtable, *nft;
+       struct nft_chain *chain, *nc;
+       struct nft_object *obj, *ne;
+       struct nft_set *set, *ns;
++      int err;
+       list_for_each_entry(chain, &ctx->table->chains, list) {
+               if (!nft_is_active_next(ctx->net, chain))
+@@ -778,6 +816,12 @@ static int nft_flush_table(struct nft_ct
+                       goto out;
+       }
++      list_for_each_entry_safe(flowtable, nft, &ctx->table->flowtables, list) {
++              err = nft_delflowtable(ctx, flowtable);
++              if (err < 0)
++                      goto out;
++      }
++
+       list_for_each_entry_safe(obj, ne, &ctx->table->objects, list) {
+               err = nft_delobj(ctx, obj);
+               if (err < 0)
+@@ -4765,6 +4809,605 @@ static void nf_tables_obj_notify(const s
+                      ctx->afi->family, ctx->report, GFP_KERNEL);
+ }
++/*
++ * Flow tables
++ */
++void nft_register_flowtable_type(struct nf_flowtable_type *type)
++{
++      nfnl_lock(NFNL_SUBSYS_NFTABLES);
++      list_add_tail_rcu(&type->list, &nf_tables_flowtables);
++      nfnl_unlock(NFNL_SUBSYS_NFTABLES);
++}
++EXPORT_SYMBOL_GPL(nft_register_flowtable_type);
++
++void nft_unregister_flowtable_type(struct nf_flowtable_type *type)
++{
++      nfnl_lock(NFNL_SUBSYS_NFTABLES);
++      list_del_rcu(&type->list);
++      nfnl_unlock(NFNL_SUBSYS_NFTABLES);
++}
++EXPORT_SYMBOL_GPL(nft_unregister_flowtable_type);
++
++static const struct nla_policy nft_flowtable_policy[NFTA_FLOWTABLE_MAX + 1] = {
++      [NFTA_FLOWTABLE_TABLE]          = { .type = NLA_STRING,
++                                          .len = NFT_NAME_MAXLEN - 1 },
++      [NFTA_FLOWTABLE_NAME]           = { .type = NLA_STRING,
++                                          .len = NFT_NAME_MAXLEN - 1 },
++      [NFTA_FLOWTABLE_HOOK]           = { .type = NLA_NESTED },
++};
++
++struct nft_flowtable *nf_tables_flowtable_lookup(const struct nft_table *table,
++                                               const struct nlattr *nla,
++                                               u8 genmask)
++{
++      struct nft_flowtable *flowtable;
++
++      list_for_each_entry(flowtable, &table->flowtables, list) {
++              if (!nla_strcmp(nla, flowtable->name) &&
++                  nft_active_genmask(flowtable, genmask))
++                      return flowtable;
++      }
++      return ERR_PTR(-ENOENT);
++}
++EXPORT_SYMBOL_GPL(nf_tables_flowtable_lookup);
++
++#define NFT_FLOWTABLE_DEVICE_MAX      8
++
++static int nf_tables_parse_devices(const struct nft_ctx *ctx,
++                                 const struct nlattr *attr,
++                                 struct net_device *dev_array[], int *len)
++{
++      const struct nlattr *tmp;
++      struct net_device *dev;
++      char ifname[IFNAMSIZ];
++      int rem, n = 0, err;
++
++      nla_for_each_nested(tmp, attr, rem) {
++              if (nla_type(tmp) != NFTA_DEVICE_NAME) {
++                      err = -EINVAL;
++                      goto err1;
++              }
++
++              nla_strlcpy(ifname, tmp, IFNAMSIZ);
++              dev = dev_get_by_name(ctx->net, ifname);
++              if (!dev) {
++                      err = -ENOENT;
++                      goto err1;
++              }
++
++              dev_array[n++] = dev;
++              if (n == NFT_FLOWTABLE_DEVICE_MAX) {
++                      err = -EFBIG;
++                      goto err1;
++              }
++      }
++      if (!len)
++              return -EINVAL;
++
++      err = 0;
++err1:
++      *len = n;
++      return err;
++}
++
++static const struct nla_policy nft_flowtable_hook_policy[NFTA_FLOWTABLE_HOOK_MAX + 1] = {
++      [NFTA_FLOWTABLE_HOOK_NUM]       = { .type = NLA_U32 },
++      [NFTA_FLOWTABLE_HOOK_PRIORITY]  = { .type = NLA_U32 },
++      [NFTA_FLOWTABLE_HOOK_DEVS]      = { .type = NLA_NESTED },
++};
++
++static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx,
++                                        const struct nlattr *attr,
++                                        struct nft_flowtable *flowtable)
++{
++      struct net_device *dev_array[NFT_FLOWTABLE_DEVICE_MAX];
++      struct nlattr *tb[NFTA_FLOWTABLE_HOOK_MAX + 1];
++      struct nf_hook_ops *ops;
++      int hooknum, priority;
++      int err, n = 0, i;
++
++      err = nla_parse_nested(tb, NFTA_FLOWTABLE_HOOK_MAX, attr,
++                             nft_flowtable_hook_policy, NULL);
++      if (err < 0)
++              return err;
++
++      if (!tb[NFTA_FLOWTABLE_HOOK_NUM] ||
++          !tb[NFTA_FLOWTABLE_HOOK_PRIORITY] ||
++          !tb[NFTA_FLOWTABLE_HOOK_DEVS])
++              return -EINVAL;
++
++      hooknum = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_NUM]));
++      if (hooknum >= ctx->afi->nhooks)
++              return -EINVAL;
++
++      priority = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_PRIORITY]));
++
++      err = nf_tables_parse_devices(ctx, tb[NFTA_FLOWTABLE_HOOK_DEVS],
++                                    dev_array, &n);
++      if (err < 0)
++              goto err1;
++
++      ops = kzalloc(sizeof(struct nf_hook_ops) * n, GFP_KERNEL);
++      if (!ops) {
++              err = -ENOMEM;
++              goto err1;
++      }
++
++      flowtable->ops          = ops;
++      flowtable->ops_len      = n;
++
++      for (i = 0; i < n; i++) {
++              flowtable->ops[i].pf            = NFPROTO_NETDEV;
++              flowtable->ops[i].hooknum       = hooknum;
++              flowtable->ops[i].priority      = priority;
++              flowtable->ops[i].priv          = &flowtable->data.rhashtable;
++              flowtable->ops[i].hook          = flowtable->data.type->hook;
++              flowtable->ops[i].dev           = dev_array[i];
++      }
++
++      err = 0;
++err1:
++      for (i = 0; i < n; i++)
++              dev_put(dev_array[i]);
++
++      return err;
++}
++
++static const struct nf_flowtable_type *
++__nft_flowtable_type_get(const struct nft_af_info *afi)
++{
++      const struct nf_flowtable_type *type;
++
++      list_for_each_entry(type, &nf_tables_flowtables, list) {
++              if (afi->family == type->family)
++                      return type;
++      }
++      return NULL;
++}
++
++static const struct nf_flowtable_type *
++nft_flowtable_type_get(const struct nft_af_info *afi)
++{
++      const struct nf_flowtable_type *type;
++
++      type = __nft_flowtable_type_get(afi);
++      if (type != NULL && try_module_get(type->owner))
++              return type;
++
++#ifdef CONFIG_MODULES
++      if (type == NULL) {
++              nfnl_unlock(NFNL_SUBSYS_NFTABLES);
++              request_module("nf-flowtable-%u", afi->family);
++              nfnl_lock(NFNL_SUBSYS_NFTABLES);
++              if (__nft_flowtable_type_get(afi))
++                      return ERR_PTR(-EAGAIN);
++      }
++#endif
++      return ERR_PTR(-ENOENT);
++}
++
++void nft_flow_table_iterate(struct net *net,
++                          void (*iter)(struct nf_flowtable *flowtable, void *data),
++                          void *data)
++{
++      struct nft_flowtable *flowtable;
++      const struct nft_af_info *afi;
++      const struct nft_table *table;
++
++      rcu_read_lock();
++      list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
++              list_for_each_entry_rcu(table, &afi->tables, list) {
++                      list_for_each_entry_rcu(flowtable, &table->flowtables, list) {
++                              iter(&flowtable->data, data);
++                      }
++              }
++      }
++      rcu_read_unlock();
++}
++EXPORT_SYMBOL_GPL(nft_flow_table_iterate);
++
++static void nft_unregister_flowtable_net_hooks(struct net *net,
++                                             struct nft_flowtable *flowtable)
++{
++      int i;
++
++      for (i = 0; i < flowtable->ops_len; i++) {
++              if (!flowtable->ops[i].dev)
++                      continue;
++
++              nf_unregister_net_hook(net, &flowtable->ops[i]);
++      }
++}
++
++static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
++                                struct sk_buff *skb,
++                                const struct nlmsghdr *nlh,
++                                const struct nlattr * const nla[],
++                                struct netlink_ext_ack *extack)
++{
++      const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
++      const struct nf_flowtable_type *type;
++      u8 genmask = nft_genmask_next(net);
++      int family = nfmsg->nfgen_family;
++      struct nft_flowtable *flowtable;
++      struct nft_af_info *afi;
++      struct nft_table *table;
++      struct nft_ctx ctx;
++      int err, i, k;
++
++      if (!nla[NFTA_FLOWTABLE_TABLE] ||
++          !nla[NFTA_FLOWTABLE_NAME] ||
++          !nla[NFTA_FLOWTABLE_HOOK])
++              return -EINVAL;
++
++      afi = nf_tables_afinfo_lookup(net, family, true);
++      if (IS_ERR(afi))
++              return PTR_ERR(afi);
++
++      table = nf_tables_table_lookup(afi, nla[NFTA_FLOWTABLE_TABLE], genmask);
++      if (IS_ERR(table))
++              return PTR_ERR(table);
++
++      flowtable = nf_tables_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
++                                             genmask);
++      if (IS_ERR(flowtable)) {
++              err = PTR_ERR(flowtable);
++              if (err != -ENOENT)
++                      return err;
++      } else {
++              if (nlh->nlmsg_flags & NLM_F_EXCL)
++                      return -EEXIST;
++
++              return 0;
++      }
++
++      nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
++
++      flowtable = kzalloc(sizeof(*flowtable), GFP_KERNEL);
++      if (!flowtable)
++              return -ENOMEM;
++
++      flowtable->table = table;
++      flowtable->name = nla_strdup(nla[NFTA_FLOWTABLE_NAME], GFP_KERNEL);
++      if (!flowtable->name) {
++              err = -ENOMEM;
++              goto err1;
++      }
++
++      type = nft_flowtable_type_get(afi);
++      if (IS_ERR(type)) {
++              err = PTR_ERR(type);
++              goto err2;
++      }
++
++      flowtable->data.type = type;
++      err = rhashtable_init(&flowtable->data.rhashtable, type->params);
++      if (err < 0)
++              goto err3;
++
++      err = nf_tables_flowtable_parse_hook(&ctx, nla[NFTA_FLOWTABLE_HOOK],
++                                           flowtable);
++      if (err < 0)
++              goto err3;
++
++      for (i = 0; i < flowtable->ops_len; i++) {
++              err = nf_register_net_hook(net, &flowtable->ops[i]);
++              if (err < 0)
++                      goto err4;
++      }
++
++      err = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable);
++      if (err < 0)
++              goto err5;
++
++      INIT_DEFERRABLE_WORK(&flowtable->data.gc_work, type->gc);
++      queue_delayed_work(system_power_efficient_wq,
++                         &flowtable->data.gc_work, HZ);
++
++      list_add_tail_rcu(&flowtable->list, &table->flowtables);
++      table->use++;
++
++      return 0;
++err5:
++      i = flowtable->ops_len;
++err4:
++      for (k = i - 1; k >= 0; k--)
++              nf_unregister_net_hook(net, &flowtable->ops[i]);
++
++      kfree(flowtable->ops);
++err3:
++      module_put(type->owner);
++err2:
++      kfree(flowtable->name);
++err1:
++      kfree(flowtable);
++      return err;
++}
++
++static int nf_tables_delflowtable(struct net *net, struct sock *nlsk,
++                                struct sk_buff *skb,
++                                const struct nlmsghdr *nlh,
++                                const struct nlattr * const nla[],
++                                struct netlink_ext_ack *extack)
++{
++      const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
++      u8 genmask = nft_genmask_next(net);
++      int family = nfmsg->nfgen_family;
++      struct nft_flowtable *flowtable;
++      struct nft_af_info *afi;
++      struct nft_table *table;
++      struct nft_ctx ctx;
++
++      afi = nf_tables_afinfo_lookup(net, family, true);
++      if (IS_ERR(afi))
++              return PTR_ERR(afi);
++
++      table = nf_tables_table_lookup(afi, nla[NFTA_FLOWTABLE_TABLE], genmask);
++      if (IS_ERR(table))
++              return PTR_ERR(table);
++
++      flowtable = nf_tables_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
++                                             genmask);
++      if (IS_ERR(flowtable))
++                return PTR_ERR(flowtable);
++      if (flowtable->use > 0)
++              return -EBUSY;
++
++      nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
++
++      return nft_delflowtable(&ctx, flowtable);
++}
++
++static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net,
++                                       u32 portid, u32 seq, int event,
++                                       u32 flags, int family,
++                                       struct nft_flowtable *flowtable)
++{
++      struct nlattr *nest, *nest_devs;
++      struct nfgenmsg *nfmsg;
++      struct nlmsghdr *nlh;
++      int i;
++
++      event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
++      nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
++      if (nlh == NULL)
++              goto nla_put_failure;
++
++      nfmsg = nlmsg_data(nlh);
++      nfmsg->nfgen_family     = family;
++      nfmsg->version          = NFNETLINK_V0;
++      nfmsg->res_id           = htons(net->nft.base_seq & 0xffff);
++
++      if (nla_put_string(skb, NFTA_FLOWTABLE_TABLE, flowtable->table->name) ||
++          nla_put_string(skb, NFTA_FLOWTABLE_NAME, flowtable->name) ||
++          nla_put_be32(skb, NFTA_FLOWTABLE_USE, htonl(flowtable->use)))
++              goto nla_put_failure;
++
++      nest = nla_nest_start(skb, NFTA_FLOWTABLE_HOOK);
++      if (nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_NUM, htonl(flowtable->hooknum)) ||
++          nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_PRIORITY, htonl(flowtable->priority)))
++              goto nla_put_failure;
++
++      nest_devs = nla_nest_start(skb, NFTA_FLOWTABLE_HOOK_DEVS);
++      if (!nest_devs)
++              goto nla_put_failure;
++
++      for (i = 0; i < flowtable->ops_len; i++) {
++              if (flowtable->ops[i].dev &&
++                  nla_put_string(skb, NFTA_DEVICE_NAME,
++                                 flowtable->ops[i].dev->name))
++                      goto nla_put_failure;
++      }
++      nla_nest_end(skb, nest_devs);
++      nla_nest_end(skb, nest);
++
++      nlmsg_end(skb, nlh);
++      return 0;
++
++nla_put_failure:
++      nlmsg_trim(skb, nlh);
++      return -1;
++}
++
++struct nft_flowtable_filter {
++      char            *table;
++};
++
++static int nf_tables_dump_flowtable(struct sk_buff *skb,
++                                  struct netlink_callback *cb)
++{
++      const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
++      struct nft_flowtable_filter *filter = cb->data;
++      unsigned int idx = 0, s_idx = cb->args[0];
++      struct net *net = sock_net(skb->sk);
++      int family = nfmsg->nfgen_family;
++      struct nft_flowtable *flowtable;
++      const struct nft_af_info *afi;
++      const struct nft_table *table;
++
++      rcu_read_lock();
++      cb->seq = net->nft.base_seq;
++
++      list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
++              if (family != NFPROTO_UNSPEC && family != afi->family)
++                      continue;
++
++              list_for_each_entry_rcu(table, &afi->tables, list) {
++                      list_for_each_entry_rcu(flowtable, &table->flowtables, list) {
++                              if (!nft_is_active(net, flowtable))
++                                      goto cont;
++                              if (idx < s_idx)
++                                      goto cont;
++                              if (idx > s_idx)
++                                      memset(&cb->args[1], 0,
++                                             sizeof(cb->args) - sizeof(cb->args[0]));
++                              if (filter && filter->table[0] &&
++                                  strcmp(filter->table, table->name))
++                                      goto cont;
++
++                              if (nf_tables_fill_flowtable_info(skb, net, NETLINK_CB(cb->skb).portid,
++                                                                cb->nlh->nlmsg_seq,
++                                                                NFT_MSG_NEWFLOWTABLE,
++                                                                NLM_F_MULTI | NLM_F_APPEND,
++                                                                afi->family, flowtable) < 0)
++                                      goto done;
++
++                              nl_dump_check_consistent(cb, nlmsg_hdr(skb));
++cont:
++                              idx++;
++                      }
++              }
++      }
++done:
++      rcu_read_unlock();
++
++      cb->args[0] = idx;
++      return skb->len;
++}
++
++static int nf_tables_dump_flowtable_done(struct netlink_callback *cb)
++{
++      struct nft_flowtable_filter *filter = cb->data;
++
++      if (!filter)
++              return 0;
++
++      kfree(filter->table);
++      kfree(filter);
++
++      return 0;
++}
++
++static struct nft_flowtable_filter *
++nft_flowtable_filter_alloc(const struct nlattr * const nla[])
++{
++      struct nft_flowtable_filter *filter;
++
++      filter = kzalloc(sizeof(*filter), GFP_KERNEL);
++      if (!filter)
++              return ERR_PTR(-ENOMEM);
++
++      if (nla[NFTA_FLOWTABLE_TABLE]) {
++              filter->table = nla_strdup(nla[NFTA_FLOWTABLE_TABLE],
++                                         GFP_KERNEL);
++              if (!filter->table) {
++                      kfree(filter);
++                      return ERR_PTR(-ENOMEM);
++              }
++      }
++      return filter;
++}
++
++static int nf_tables_getflowtable(struct net *net, struct sock *nlsk,
++                                struct sk_buff *skb,
++                                const struct nlmsghdr *nlh,
++                                const struct nlattr * const nla[],
++                                struct netlink_ext_ack *extack)
++{
++      const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
++      u8 genmask = nft_genmask_cur(net);
++      int family = nfmsg->nfgen_family;
++      struct nft_flowtable *flowtable;
++      const struct nft_af_info *afi;
++      const struct nft_table *table;
++      struct sk_buff *skb2;
++      int err;
++
++      if (nlh->nlmsg_flags & NLM_F_DUMP) {
++              struct netlink_dump_control c = {
++                      .dump = nf_tables_dump_flowtable,
++                      .done = nf_tables_dump_flowtable_done,
++              };
++
++              if (nla[NFTA_FLOWTABLE_TABLE]) {
++                      struct nft_flowtable_filter *filter;
++
++                      filter = nft_flowtable_filter_alloc(nla);
++                      if (IS_ERR(filter))
++                              return -ENOMEM;
++
++                      c.data = filter;
++              }
++              return netlink_dump_start(nlsk, skb, nlh, &c);
++      }
++
++      if (!nla[NFTA_FLOWTABLE_NAME])
++              return -EINVAL;
++
++      afi = nf_tables_afinfo_lookup(net, family, false);
++      if (IS_ERR(afi))
++              return PTR_ERR(afi);
++
++      table = nf_tables_table_lookup(afi, nla[NFTA_FLOWTABLE_TABLE], genmask);
++      if (IS_ERR(table))
++              return PTR_ERR(table);
++
++      flowtable = nf_tables_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
++                                             genmask);
++      if (IS_ERR(table))
++              return PTR_ERR(flowtable);
++
++      skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
++      if (!skb2)
++              return -ENOMEM;
++
++      err = nf_tables_fill_flowtable_info(skb2, net, NETLINK_CB(skb).portid,
++                                          nlh->nlmsg_seq,
++                                          NFT_MSG_NEWFLOWTABLE, 0, family,
++                                          flowtable);
++      if (err < 0)
++              goto err;
++
++      return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);
++err:
++      kfree_skb(skb2);
++      return err;
++}
++
++static void nf_tables_flowtable_notify(struct nft_ctx *ctx,
++                                     struct nft_flowtable *flowtable,
++                                     int event)
++{
++      struct sk_buff *skb;
++      int err;
++
++      if (ctx->report &&
++          !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
++              return;
++
++      skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
++      if (skb == NULL)
++              goto err;
++
++      err = nf_tables_fill_flowtable_info(skb, ctx->net, ctx->portid,
++                                          ctx->seq, event, 0,
++                                          ctx->afi->family, flowtable);
++      if (err < 0) {
++              kfree_skb(skb);
++              goto err;
++      }
++
++      nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
++                     ctx->report, GFP_KERNEL);
++      return;
++err:
++      nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
++}
++
++static void nft_flowtable_destroy(void *ptr, void *arg)
++{
++      kfree(ptr);
++}
++
++static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable)
++{
++      cancel_delayed_work_sync(&flowtable->data.gc_work);
++      kfree(flowtable->name);
++      rhashtable_free_and_destroy(&flowtable->data.rhashtable,
++                                  nft_flowtable_destroy, NULL);
++      module_put(flowtable->data.type->owner);
++}
++
+ static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
+                                  u32 portid, u32 seq)
+ {
+@@ -4795,6 +5438,49 @@ nla_put_failure:
+       return -EMSGSIZE;
+ }
++static void nft_flowtable_event(unsigned long event, struct net_device *dev,
++                              struct nft_flowtable *flowtable)
++{
++      int i;
++
++      for (i = 0; i < flowtable->ops_len; i++) {
++              if (flowtable->ops[i].dev != dev)
++                      continue;
++
++              nf_unregister_net_hook(dev_net(dev), &flowtable->ops[i]);
++              flowtable->ops[i].dev = NULL;
++              break;
++      }
++}
++
++static int nf_tables_flowtable_event(struct notifier_block *this,
++                                   unsigned long event, void *ptr)
++{
++      struct net_device *dev = netdev_notifier_info_to_dev(ptr);
++      struct nft_flowtable *flowtable;
++      struct nft_table *table;
++      struct nft_af_info *afi;
++
++      if (event != NETDEV_UNREGISTER)
++              return 0;
++
++      nfnl_lock(NFNL_SUBSYS_NFTABLES);
++      list_for_each_entry(afi, &dev_net(dev)->nft.af_info, list) {
++              list_for_each_entry(table, &afi->tables, list) {
++                      list_for_each_entry(flowtable, &table->flowtables, list) {
++                              nft_flowtable_event(event, dev, flowtable);
++                      }
++              }
++      }
++      nfnl_unlock(NFNL_SUBSYS_NFTABLES);
++
++      return NOTIFY_DONE;
++}
++
++static struct notifier_block nf_tables_flowtable_notifier = {
++      .notifier_call  = nf_tables_flowtable_event,
++};
++
+ static void nf_tables_gen_notify(struct net *net, struct sk_buff *skb,
+                                int event)
+ {
+@@ -4947,6 +5633,21 @@ static const struct nfnl_callback nf_tab
+               .attr_count     = NFTA_OBJ_MAX,
+               .policy         = nft_obj_policy,
+       },
++      [NFT_MSG_NEWFLOWTABLE] = {
++              .call_batch     = nf_tables_newflowtable,
++              .attr_count     = NFTA_FLOWTABLE_MAX,
++              .policy         = nft_flowtable_policy,
++      },
++      [NFT_MSG_GETFLOWTABLE] = {
++              .call           = nf_tables_getflowtable,
++              .attr_count     = NFTA_FLOWTABLE_MAX,
++              .policy         = nft_flowtable_policy,
++      },
++      [NFT_MSG_DELFLOWTABLE] = {
++              .call_batch     = nf_tables_delflowtable,
++              .attr_count     = NFTA_FLOWTABLE_MAX,
++              .policy         = nft_flowtable_policy,
++      },
+ };
+ static void nft_chain_commit_update(struct nft_trans *trans)
+@@ -4992,6 +5693,9 @@ static void nf_tables_commit_release(str
+       case NFT_MSG_DELOBJ:
+               nft_obj_destroy(nft_trans_obj(trans));
+               break;
++      case NFT_MSG_DELFLOWTABLE:
++              nf_tables_flowtable_destroy(nft_trans_flowtable(trans));
++              break;
+       }
+       kfree(trans);
+ }
+@@ -5109,6 +5813,21 @@ static int nf_tables_commit(struct net *
+                       nf_tables_obj_notify(&trans->ctx, nft_trans_obj(trans),
+                                            NFT_MSG_DELOBJ);
+                       break;
++              case NFT_MSG_NEWFLOWTABLE:
++                      nft_clear(net, nft_trans_flowtable(trans));
++                      nf_tables_flowtable_notify(&trans->ctx,
++                                                 nft_trans_flowtable(trans),
++                                                 NFT_MSG_NEWFLOWTABLE);
++                      nft_trans_destroy(trans);
++                      break;
++              case NFT_MSG_DELFLOWTABLE:
++                      list_del_rcu(&nft_trans_flowtable(trans)->list);
++                      nf_tables_flowtable_notify(&trans->ctx,
++                                                 nft_trans_flowtable(trans),
++                                                 NFT_MSG_DELFLOWTABLE);
++                      nft_unregister_flowtable_net_hooks(net,
++                                      nft_trans_flowtable(trans));
++                      break;
+               }
+       }
+@@ -5146,6 +5865,9 @@ static void nf_tables_abort_release(stru
+       case NFT_MSG_NEWOBJ:
+               nft_obj_destroy(nft_trans_obj(trans));
+               break;
++      case NFT_MSG_NEWFLOWTABLE:
++              nf_tables_flowtable_destroy(nft_trans_flowtable(trans));
++              break;
+       }
+       kfree(trans);
+ }
+@@ -5235,6 +5957,17 @@ static int nf_tables_abort(struct net *n
+                       nft_clear(trans->ctx.net, nft_trans_obj(trans));
+                       nft_trans_destroy(trans);
+                       break;
++              case NFT_MSG_NEWFLOWTABLE:
++                      trans->ctx.table->use--;
++                      list_del_rcu(&nft_trans_flowtable(trans)->list);
++                      nft_unregister_flowtable_net_hooks(net,
++                                      nft_trans_flowtable(trans));
++                      break;
++              case NFT_MSG_DELFLOWTABLE:
++                      trans->ctx.table->use++;
++                      nft_clear(trans->ctx.net, nft_trans_flowtable(trans));
++                      nft_trans_destroy(trans);
++                      break;
+               }
+       }
+@@ -5785,6 +6518,7 @@ EXPORT_SYMBOL_GPL(__nft_release_basechai
+ /* Called by nft_unregister_afinfo() from __net_exit path, nfnl_lock is held. */
+ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi)
+ {
++      struct nft_flowtable *flowtable, *nf;
+       struct nft_table *table, *nt;
+       struct nft_chain *chain, *nc;
+       struct nft_object *obj, *ne;
+@@ -5798,6 +6532,9 @@ static void __nft_release_afinfo(struct
+       list_for_each_entry_safe(table, nt, &afi->tables, list) {
+               list_for_each_entry(chain, &table->chains, list)
+                       nf_tables_unregister_hook(net, table, chain);
++              list_for_each_entry(flowtable, &table->flowtables, list)
++                      nf_unregister_net_hooks(net, flowtable->ops,
++                                              flowtable->ops_len);
+               /* No packets are walking on these chains anymore. */
+               ctx.table = table;
+               list_for_each_entry(chain, &table->chains, list) {
+@@ -5808,6 +6545,11 @@ static void __nft_release_afinfo(struct
+                               nf_tables_rule_destroy(&ctx, rule);
+                       }
+               }
++              list_for_each_entry_safe(flowtable, nf, &table->flowtables, list) {
++                      list_del(&flowtable->list);
++                      table->use--;
++                      nf_tables_flowtable_destroy(flowtable);
++              }
+               list_for_each_entry_safe(set, ns, &table->sets, list) {
+                       list_del(&set->list);
+                       table->use--;
+@@ -5851,6 +6593,8 @@ static int __init nf_tables_module_init(
+       if (err < 0)
+               goto err3;
++      register_netdevice_notifier(&nf_tables_flowtable_notifier);
++
+       pr_info("nf_tables: (c) 2007-2009 Patrick McHardy <kaber@trash.net>\n");
+       return register_pernet_subsys(&nf_tables_net_ops);
+ err3:
+@@ -5865,6 +6609,7 @@ static void __exit nf_tables_module_exit
+ {
+       unregister_pernet_subsys(&nf_tables_net_ops);
+       nfnetlink_subsys_unregister(&nf_tables_subsys);
++      unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
+       rcu_barrier();
+       nf_tables_core_module_exit();
+       kfree(info);
diff --git a/target/linux/generic/backport-4.14/322-netfilter-add-generic-flow-table-infrastructure.patch b/target/linux/generic/backport-4.14/322-netfilter-add-generic-flow-table-infrastructure.patch
new file mode 100644 (file)
index 0000000..d811ef0
--- /dev/null
@@ -0,0 +1,586 @@
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Sun, 7 Jan 2018 01:04:11 +0100
+Subject: [PATCH] netfilter: add generic flow table infrastructure
+
+This patch defines the API to interact with flow tables, this allows to
+add, delete and lookup for entries in the flow table. This also adds the
+generic garbage code that removes entries that have expired, ie. no
+traffic has been seen for a while.
+
+Users of the flow table infrastructure can delete entries via
+flow_offload_dead(), which sets the dying bit, this signals the garbage
+collector to release an entry from user context.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+ create mode 100644 net/netfilter/nf_flow_table.c
+
+--- a/include/net/netfilter/nf_flow_table.h
++++ b/include/net/netfilter/nf_flow_table.h
+@@ -1,7 +1,12 @@
+ #ifndef _NF_FLOW_TABLE_H
+ #define _NF_FLOW_TABLE_H
++#include <linux/in.h>
++#include <linux/in6.h>
++#include <linux/netdevice.h>
+ #include <linux/rhashtable.h>
++#include <linux/rcupdate.h>
++#include <net/dst.h>
+ struct nf_flowtable;
+@@ -20,4 +25,93 @@ struct nf_flowtable {
+       struct delayed_work             gc_work;
+ };
++enum flow_offload_tuple_dir {
++      FLOW_OFFLOAD_DIR_ORIGINAL,
++      FLOW_OFFLOAD_DIR_REPLY,
++      __FLOW_OFFLOAD_DIR_MAX          = FLOW_OFFLOAD_DIR_REPLY,
++};
++#define FLOW_OFFLOAD_DIR_MAX  (__FLOW_OFFLOAD_DIR_MAX + 1)
++
++struct flow_offload_tuple {
++      union {
++              struct in_addr          src_v4;
++              struct in6_addr         src_v6;
++      };
++      union {
++              struct in_addr          dst_v4;
++              struct in6_addr         dst_v6;
++      };
++      struct {
++              __be16                  src_port;
++              __be16                  dst_port;
++      };
++
++      int                             iifidx;
++
++      u8                              l3proto;
++      u8                              l4proto;
++      u8                              dir;
++
++      int                             oifidx;
++
++      struct dst_entry                *dst_cache;
++};
++
++struct flow_offload_tuple_rhash {
++      struct rhash_head               node;
++      struct flow_offload_tuple       tuple;
++};
++
++#define FLOW_OFFLOAD_SNAT     0x1
++#define FLOW_OFFLOAD_DNAT     0x2
++#define FLOW_OFFLOAD_DYING    0x4
++
++struct flow_offload {
++      struct flow_offload_tuple_rhash         tuplehash[FLOW_OFFLOAD_DIR_MAX];
++      u32                                     flags;
++      union {
++              /* Your private driver data here. */
++              u32             timeout;
++      };
++};
++
++#define NF_FLOW_TIMEOUT (30 * HZ)
++
++struct nf_flow_route {
++      struct {
++              struct dst_entry        *dst;
++              int                     ifindex;
++      } tuple[FLOW_OFFLOAD_DIR_MAX];
++};
++
++struct flow_offload *flow_offload_alloc(struct nf_conn *ct,
++                                      struct nf_flow_route *route);
++void flow_offload_free(struct flow_offload *flow);
++
++int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow);
++void flow_offload_del(struct nf_flowtable *flow_table, struct flow_offload *flow);
++struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table,
++                                                   struct flow_offload_tuple *tuple);
++int nf_flow_table_iterate(struct nf_flowtable *flow_table,
++                        void (*iter)(struct flow_offload *flow, void *data),
++                        void *data);
++void nf_flow_offload_work_gc(struct work_struct *work);
++extern const struct rhashtable_params nf_flow_offload_rhash_params;
++
++void flow_offload_dead(struct flow_offload *flow);
++
++int nf_flow_snat_port(const struct flow_offload *flow,
++                    struct sk_buff *skb, unsigned int thoff,
++                    u8 protocol, enum flow_offload_tuple_dir dir);
++int nf_flow_dnat_port(const struct flow_offload *flow,
++                    struct sk_buff *skb, unsigned int thoff,
++                    u8 protocol, enum flow_offload_tuple_dir dir);
++
++struct flow_ports {
++      __be16 source, dest;
++};
++
++#define MODULE_ALIAS_NF_FLOWTABLE(family)     \
++      MODULE_ALIAS("nf-flowtable-" __stringify(family))
++
+ #endif /* _FLOW_OFFLOAD_H */
+--- a/net/netfilter/Kconfig
++++ b/net/netfilter/Kconfig
+@@ -661,6 +661,13 @@ endif # NF_TABLES_NETDEV
+ endif # NF_TABLES
++config NF_FLOW_TABLE
++      tristate "Netfilter flow table module"
++      help
++        This option adds the flow table core infrastructure.
++
++        To compile it as a module, choose M here.
++
+ config NETFILTER_XTABLES
+       tristate "Netfilter Xtables support (required for ip_tables)"
+       default m if NETFILTER_ADVANCED=n
+--- a/net/netfilter/Makefile
++++ b/net/netfilter/Makefile
+@@ -110,6 +110,9 @@ obj-$(CONFIG_NFT_FIB_NETDEV)       += nft_fib_
+ obj-$(CONFIG_NFT_DUP_NETDEV)  += nft_dup_netdev.o
+ obj-$(CONFIG_NFT_FWD_NETDEV)  += nft_fwd_netdev.o
++# flow table infrastructure
++obj-$(CONFIG_NF_FLOW_TABLE)   += nf_flow_table.o
++
+ # generic X tables 
+ obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
+--- /dev/null
++++ b/net/netfilter/nf_flow_table.c
+@@ -0,0 +1,429 @@
++#include <linux/kernel.h>
++#include <linux/init.h>
++#include <linux/module.h>
++#include <linux/netfilter.h>
++#include <linux/rhashtable.h>
++#include <linux/netdevice.h>
++#include <net/netfilter/nf_flow_table.h>
++#include <net/netfilter/nf_conntrack.h>
++#include <net/netfilter/nf_conntrack_core.h>
++#include <net/netfilter/nf_conntrack_tuple.h>
++
++struct flow_offload_entry {
++      struct flow_offload     flow;
++      struct nf_conn          *ct;
++      struct rcu_head         rcu_head;
++};
++
++struct flow_offload *
++flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
++{
++      struct flow_offload_entry *entry;
++      struct flow_offload *flow;
++
++      if (unlikely(nf_ct_is_dying(ct) ||
++          !atomic_inc_not_zero(&ct->ct_general.use)))
++              return NULL;
++
++      entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
++      if (!entry)
++              goto err_ct_refcnt;
++
++      flow = &entry->flow;
++
++      if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst))
++              goto err_dst_cache_original;
++
++      if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst))
++              goto err_dst_cache_reply;
++
++      entry->ct = ct;
++
++      switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num) {
++      case NFPROTO_IPV4:
++              flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4 =
++                      ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in;
++              flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4 =
++                      ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in;
++              flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4 =
++                      ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in;
++              flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4 =
++                      ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in;
++              break;
++      case NFPROTO_IPV6:
++              flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6 =
++                      ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in6;
++              flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6 =
++                      ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6;
++              flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6 =
++                      ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in6;
++              flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6 =
++                      ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in6;
++              break;
++      }
++
++      flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l3proto =
++              ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
++      flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto =
++              ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
++      flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l3proto =
++              ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
++      flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l4proto =
++              ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
++
++      flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache =
++                route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst;
++      flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache =
++                route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst;
++
++      flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port =
++              ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port;
++      flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port =
++              ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port;
++      flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port =
++              ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.tcp.port;
++      flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port =
++              ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port;
++
++      flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dir =
++                                              FLOW_OFFLOAD_DIR_ORIGINAL;
++      flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dir =
++                                              FLOW_OFFLOAD_DIR_REPLY;
++
++      flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.iifidx =
++              route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].ifindex;
++      flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.oifidx =
++              route->tuple[FLOW_OFFLOAD_DIR_REPLY].ifindex;
++      flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.iifidx =
++              route->tuple[FLOW_OFFLOAD_DIR_REPLY].ifindex;
++      flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.oifidx =
++              route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].ifindex;
++
++      if (ct->status & IPS_SRC_NAT)
++              flow->flags |= FLOW_OFFLOAD_SNAT;
++      else if (ct->status & IPS_DST_NAT)
++              flow->flags |= FLOW_OFFLOAD_DNAT;
++
++      return flow;
++
++err_dst_cache_reply:
++      dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
++err_dst_cache_original:
++      kfree(entry);
++err_ct_refcnt:
++      nf_ct_put(ct);
++
++      return NULL;
++}
++EXPORT_SYMBOL_GPL(flow_offload_alloc);
++
++void flow_offload_free(struct flow_offload *flow)
++{
++      struct flow_offload_entry *e;
++
++      dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
++      dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
++      e = container_of(flow, struct flow_offload_entry, flow);
++      kfree(e);
++}
++EXPORT_SYMBOL_GPL(flow_offload_free);
++
++void flow_offload_dead(struct flow_offload *flow)
++{
++      flow->flags |= FLOW_OFFLOAD_DYING;
++}
++EXPORT_SYMBOL_GPL(flow_offload_dead);
++
++int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
++{
++      flow->timeout = (u32)jiffies;
++
++      rhashtable_insert_fast(&flow_table->rhashtable,
++                             &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
++                             *flow_table->type->params);
++      rhashtable_insert_fast(&flow_table->rhashtable,
++                             &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
++                             *flow_table->type->params);
++      return 0;
++}
++EXPORT_SYMBOL_GPL(flow_offload_add);
++
++void flow_offload_del(struct nf_flowtable *flow_table,
++                    struct flow_offload *flow)
++{
++      struct flow_offload_entry *e;
++
++      rhashtable_remove_fast(&flow_table->rhashtable,
++                             &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
++                             *flow_table->type->params);
++      rhashtable_remove_fast(&flow_table->rhashtable,
++                             &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
++                             *flow_table->type->params);
++
++      e = container_of(flow, struct flow_offload_entry, flow);
++      kfree_rcu(e, rcu_head);
++}
++EXPORT_SYMBOL_GPL(flow_offload_del);
++
++struct flow_offload_tuple_rhash *
++flow_offload_lookup(struct nf_flowtable *flow_table,
++                  struct flow_offload_tuple *tuple)
++{
++      return rhashtable_lookup_fast(&flow_table->rhashtable, tuple,
++                                    *flow_table->type->params);
++}
++EXPORT_SYMBOL_GPL(flow_offload_lookup);
++
++static void nf_flow_release_ct(const struct flow_offload *flow)
++{
++      struct flow_offload_entry *e;
++
++      e = container_of(flow, struct flow_offload_entry, flow);
++      nf_ct_delete(e->ct, 0, 0);
++      nf_ct_put(e->ct);
++}
++
++int nf_flow_table_iterate(struct nf_flowtable *flow_table,
++                        void (*iter)(struct flow_offload *flow, void *data),
++                        void *data)
++{
++      struct flow_offload_tuple_rhash *tuplehash;
++      struct rhashtable_iter hti;
++      struct flow_offload *flow;
++      int err;
++
++      err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
++      if (err)
++              return err;
++
++      rhashtable_walk_start(&hti);
++
++      while ((tuplehash = rhashtable_walk_next(&hti))) {
++              if (IS_ERR(tuplehash)) {
++                      err = PTR_ERR(tuplehash);
++                      if (err != -EAGAIN)
++                              goto out;
++
++                      continue;
++              }
++              if (tuplehash->tuple.dir)
++                      continue;
++
++              flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
++
++              iter(flow, data);
++      }
++out:
++      rhashtable_walk_stop(&hti);
++      rhashtable_walk_exit(&hti);
++
++      return err;
++}
++EXPORT_SYMBOL_GPL(nf_flow_table_iterate);
++
++static inline bool nf_flow_has_expired(const struct flow_offload *flow)
++{
++      return (__s32)(flow->timeout - (u32)jiffies) <= 0;
++}
++
++static inline bool nf_flow_is_dying(const struct flow_offload *flow)
++{
++      return flow->flags & FLOW_OFFLOAD_DYING;
++}
++
++void nf_flow_offload_work_gc(struct work_struct *work)
++{
++      struct flow_offload_tuple_rhash *tuplehash;
++      struct nf_flowtable *flow_table;
++      struct rhashtable_iter hti;
++      struct flow_offload *flow;
++      int err;
++
++      flow_table = container_of(work, struct nf_flowtable, gc_work.work);
++
++      err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
++      if (err)
++              goto schedule;
++
++      rhashtable_walk_start(&hti);
++
++      while ((tuplehash = rhashtable_walk_next(&hti))) {
++              if (IS_ERR(tuplehash)) {
++                      err = PTR_ERR(tuplehash);
++                      if (err != -EAGAIN)
++                              goto out;
++
++                      continue;
++              }
++              if (tuplehash->tuple.dir)
++                      continue;
++
++              flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
++
++              if (nf_flow_has_expired(flow) ||
++                  nf_flow_is_dying(flow)) {
++                      flow_offload_del(flow_table, flow);
++                      nf_flow_release_ct(flow);
++              }
++      }
++out:
++      rhashtable_walk_stop(&hti);
++      rhashtable_walk_exit(&hti);
++schedule:
++      queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
++}
++EXPORT_SYMBOL_GPL(nf_flow_offload_work_gc);
++
++static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
++{
++      const struct flow_offload_tuple *tuple = data;
++
++      return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
++}
++
++static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
++{
++      const struct flow_offload_tuple_rhash *tuplehash = data;
++
++      return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
++}
++
++static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
++                                      const void *ptr)
++{
++      const struct flow_offload_tuple *tuple = arg->key;
++      const struct flow_offload_tuple_rhash *x = ptr;
++
++      if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
++              return 1;
++
++      return 0;
++}
++
++const struct rhashtable_params nf_flow_offload_rhash_params = {
++      .head_offset            = offsetof(struct flow_offload_tuple_rhash, node),
++      .hashfn                 = flow_offload_hash,
++      .obj_hashfn             = flow_offload_hash_obj,
++      .obj_cmpfn              = flow_offload_hash_cmp,
++      .automatic_shrinking    = true,
++};
++EXPORT_SYMBOL_GPL(nf_flow_offload_rhash_params);
++
++static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
++                              __be16 port, __be16 new_port)
++{
++      struct tcphdr *tcph;
++
++      if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
++          skb_try_make_writable(skb, thoff + sizeof(*tcph)))
++              return -1;
++
++      tcph = (void *)(skb_network_header(skb) + thoff);
++      inet_proto_csum_replace2(&tcph->check, skb, port, new_port, true);
++
++      return 0;
++}
++
++static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
++                              __be16 port, __be16 new_port)
++{
++      struct udphdr *udph;
++
++      if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
++          skb_try_make_writable(skb, thoff + sizeof(*udph)))
++              return -1;
++
++      udph = (void *)(skb_network_header(skb) + thoff);
++      if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
++              inet_proto_csum_replace2(&udph->check, skb, port,
++                                       new_port, true);
++              if (!udph->check)
++                      udph->check = CSUM_MANGLED_0;
++      }
++
++      return 0;
++}
++
++static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
++                          u8 protocol, __be16 port, __be16 new_port)
++{
++      switch (protocol) {
++      case IPPROTO_TCP:
++              if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0)
++                      return NF_DROP;
++              break;
++      case IPPROTO_UDP:
++              if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0)
++                      return NF_DROP;
++              break;
++      }
++
++      return 0;
++}
++
++int nf_flow_snat_port(const struct flow_offload *flow,
++                    struct sk_buff *skb, unsigned int thoff,
++                    u8 protocol, enum flow_offload_tuple_dir dir)
++{
++      struct flow_ports *hdr;
++      __be16 port, new_port;
++
++      if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
++          skb_try_make_writable(skb, thoff + sizeof(*hdr)))
++              return -1;
++
++      hdr = (void *)(skb_network_header(skb) + thoff);
++
++      switch (dir) {
++      case FLOW_OFFLOAD_DIR_ORIGINAL:
++              port = hdr->source;
++              new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port;
++              hdr->source = new_port;
++              break;
++      case FLOW_OFFLOAD_DIR_REPLY:
++              port = hdr->dest;
++              new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
++              hdr->dest = new_port;
++              break;
++      default:
++              return -1;
++      }
++
++      return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
++}
++EXPORT_SYMBOL_GPL(nf_flow_snat_port);
++
++int nf_flow_dnat_port(const struct flow_offload *flow,
++                    struct sk_buff *skb, unsigned int thoff,
++                    u8 protocol, enum flow_offload_tuple_dir dir)
++{
++      struct flow_ports *hdr;
++      __be16 port, new_port;
++
++      if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
++          skb_try_make_writable(skb, thoff + sizeof(*hdr)))
++              return -1;
++
++      hdr = (void *)(skb_network_header(skb) + thoff);
++
++      switch (dir) {
++      case FLOW_OFFLOAD_DIR_ORIGINAL:
++              port = hdr->dest;
++              new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port;
++              hdr->dest = new_port;
++              break;
++      case FLOW_OFFLOAD_DIR_REPLY:
++              port = hdr->source;
++              new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port;
++              hdr->source = new_port;
++              break;
++      default:
++              return -1;
++      }
++
++      return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
++}
++EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
++
++MODULE_LICENSE("GPL");
++MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
diff --git a/target/linux/generic/backport-4.14/323-netfilter-flow-table-support-for-IPv4.patch b/target/linux/generic/backport-4.14/323-netfilter-flow-table-support-for-IPv4.patch
new file mode 100644 (file)
index 0000000..6f36171
--- /dev/null
@@ -0,0 +1,334 @@
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Sun, 7 Jan 2018 01:04:15 +0100
+Subject: [PATCH] netfilter: flow table support for IPv4
+
+This patch adds the IPv4 flow table type, that implements the datapath
+flow table to forward IPv4 traffic. Rationale is:
+
+1) Look up for the packet in the flow table, from the ingress hook.
+2) If there's a hit, decrement ttl and pass it on to the neighbour layer
+   for transmission.
+3) If there's a miss, packet is passed up to the classic forwarding
+   path.
+
+This patch also supports layer 3 source and destination NAT.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+ create mode 100644 net/ipv4/netfilter/nf_flow_table_ipv4.c
+
+--- a/net/ipv4/netfilter/Kconfig
++++ b/net/ipv4/netfilter/Kconfig
+@@ -77,6 +77,14 @@ config NF_TABLES_ARP
+ endif # NF_TABLES
++config NF_FLOW_TABLE_IPV4
++      select NF_FLOW_TABLE
++      tristate "Netfilter flow table IPv4 module"
++      help
++        This option adds the flow table IPv4 support.
++
++        To compile it as a module, choose M here.
++
+ config NF_DUP_IPV4
+       tristate "Netfilter IPv4 packet duplication to alternate destination"
+       depends on !NF_CONNTRACK || NF_CONNTRACK
+--- a/net/ipv4/netfilter/Makefile
++++ b/net/ipv4/netfilter/Makefile
+@@ -43,6 +43,9 @@ obj-$(CONFIG_NFT_REDIR_IPV4) += nft_redi
+ obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o
+ obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o
++# flow table support
++obj-$(CONFIG_NF_FLOW_TABLE_IPV4) += nf_flow_table_ipv4.o
++
+ # generic IP tables 
+ obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
+--- /dev/null
++++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c
+@@ -0,0 +1,283 @@
++#include <linux/kernel.h>
++#include <linux/init.h>
++#include <linux/module.h>
++#include <linux/netfilter.h>
++#include <linux/rhashtable.h>
++#include <linux/ip.h>
++#include <linux/netdevice.h>
++#include <net/ip.h>
++#include <net/neighbour.h>
++#include <net/netfilter/nf_flow_table.h>
++#include <net/netfilter/nf_tables.h>
++/* For layer 4 checksum field offset. */
++#include <linux/tcp.h>
++#include <linux/udp.h>
++
++static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
++                            __be32 addr, __be32 new_addr)
++{
++      struct tcphdr *tcph;
++
++      if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
++          skb_try_make_writable(skb, thoff + sizeof(*tcph)))
++              return -1;
++
++      tcph = (void *)(skb_network_header(skb) + thoff);
++      inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
++
++      return 0;
++}
++
++static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
++                            __be32 addr, __be32 new_addr)
++{
++      struct udphdr *udph;
++
++      if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
++          skb_try_make_writable(skb, thoff + sizeof(*udph)))
++              return -1;
++
++      udph = (void *)(skb_network_header(skb) + thoff);
++      if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
++              inet_proto_csum_replace4(&udph->check, skb, addr,
++                                       new_addr, true);
++              if (!udph->check)
++                      udph->check = CSUM_MANGLED_0;
++      }
++
++      return 0;
++}
++
++static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
++                                unsigned int thoff, __be32 addr,
++                                __be32 new_addr)
++{
++      switch (iph->protocol) {
++      case IPPROTO_TCP:
++              if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0)
++                      return NF_DROP;
++              break;
++      case IPPROTO_UDP:
++              if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0)
++                      return NF_DROP;
++              break;
++      }
++
++      return 0;
++}
++
++static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
++                         struct iphdr *iph, unsigned int thoff,
++                         enum flow_offload_tuple_dir dir)
++{
++      __be32 addr, new_addr;
++
++      switch (dir) {
++      case FLOW_OFFLOAD_DIR_ORIGINAL:
++              addr = iph->saddr;
++              new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
++              iph->saddr = new_addr;
++              break;
++      case FLOW_OFFLOAD_DIR_REPLY:
++              addr = iph->daddr;
++              new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
++              iph->daddr = new_addr;
++              break;
++      default:
++              return -1;
++      }
++      csum_replace4(&iph->check, addr, new_addr);
++
++      return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
++}
++
++static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
++                         struct iphdr *iph, unsigned int thoff,
++                         enum flow_offload_tuple_dir dir)
++{
++      __be32 addr, new_addr;
++
++      switch (dir) {
++      case FLOW_OFFLOAD_DIR_ORIGINAL:
++              addr = iph->daddr;
++              new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
++              iph->daddr = new_addr;
++              break;
++      case FLOW_OFFLOAD_DIR_REPLY:
++              addr = iph->saddr;
++              new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
++              iph->saddr = new_addr;
++              break;
++      default:
++              return -1;
++      }
++
++      return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
++}
++
++static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
++                        enum flow_offload_tuple_dir dir)
++{
++      struct iphdr *iph = ip_hdr(skb);
++      unsigned int thoff = iph->ihl * 4;
++
++      if (flow->flags & FLOW_OFFLOAD_SNAT &&
++          (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
++           nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
++              return -1;
++      if (flow->flags & FLOW_OFFLOAD_DNAT &&
++          (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
++           nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
++              return -1;
++
++      return 0;
++}
++
++static bool ip_has_options(unsigned int thoff)
++{
++      return thoff != sizeof(struct iphdr);
++}
++
++static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
++                          struct flow_offload_tuple *tuple)
++{
++      struct flow_ports *ports;
++      unsigned int thoff;
++      struct iphdr *iph;
++
++      if (!pskb_may_pull(skb, sizeof(*iph)))
++              return -1;
++
++      iph = ip_hdr(skb);
++      thoff = iph->ihl * 4;
++
++      if (ip_is_fragment(iph) ||
++          unlikely(ip_has_options(thoff)))
++              return -1;
++
++      if (iph->protocol != IPPROTO_TCP &&
++          iph->protocol != IPPROTO_UDP)
++              return -1;
++
++      thoff = iph->ihl * 4;
++      if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
++              return -1;
++
++      ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
++
++      tuple->src_v4.s_addr    = iph->saddr;
++      tuple->dst_v4.s_addr    = iph->daddr;
++      tuple->src_port         = ports->source;
++      tuple->dst_port         = ports->dest;
++      tuple->l3proto          = AF_INET;
++      tuple->l4proto          = iph->protocol;
++      tuple->iifidx           = dev->ifindex;
++
++      return 0;
++}
++
++/* Based on ip_exceeds_mtu(). */
++static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
++{
++      if (skb->len <= mtu)
++              return false;
++
++      if ((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0)
++              return false;
++
++      if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
++              return false;
++
++      return true;
++}
++
++static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rtable *rt)
++{
++      u32 mtu;
++
++      mtu = ip_dst_mtu_maybe_forward(&rt->dst, true);
++      if (__nf_flow_exceeds_mtu(skb, mtu))
++              return true;
++
++      return false;
++}
++
++static unsigned int
++nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
++                      const struct nf_hook_state *state)
++{
++      struct flow_offload_tuple_rhash *tuplehash;
++      struct nf_flowtable *flow_table = priv;
++      struct flow_offload_tuple tuple = {};
++      enum flow_offload_tuple_dir dir;
++      struct flow_offload *flow;
++      struct net_device *outdev;
++      const struct rtable *rt;
++      struct iphdr *iph;
++      __be32 nexthop;
++
++      if (skb->protocol != htons(ETH_P_IP))
++              return NF_ACCEPT;
++
++      if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
++              return NF_ACCEPT;
++
++      tuplehash = flow_offload_lookup(flow_table, &tuple);
++      if (tuplehash == NULL)
++              return NF_ACCEPT;
++
++      outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
++      if (!outdev)
++              return NF_ACCEPT;
++
++      dir = tuplehash->tuple.dir;
++      flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
++
++      rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
++      if (unlikely(nf_flow_exceeds_mtu(skb, rt)))
++              return NF_ACCEPT;
++
++      if (skb_try_make_writable(skb, sizeof(*iph)))
++              return NF_DROP;
++
++      if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
++          nf_flow_nat_ip(flow, skb, dir) < 0)
++              return NF_DROP;
++
++      flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
++      iph = ip_hdr(skb);
++      ip_decrease_ttl(iph);
++
++      skb->dev = outdev;
++      nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
++      neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
++
++      return NF_STOLEN;
++}
++
++static struct nf_flowtable_type flowtable_ipv4 = {
++      .family         = NFPROTO_IPV4,
++      .params         = &nf_flow_offload_rhash_params,
++      .gc             = nf_flow_offload_work_gc,
++      .hook           = nf_flow_offload_ip_hook,
++      .owner          = THIS_MODULE,
++};
++
++static int __init nf_flow_ipv4_module_init(void)
++{
++      nft_register_flowtable_type(&flowtable_ipv4);
++
++      return 0;
++}
++
++static void __exit nf_flow_ipv4_module_exit(void)
++{
++      nft_unregister_flowtable_type(&flowtable_ipv4);
++}
++
++module_init(nf_flow_ipv4_module_init);
++module_exit(nf_flow_ipv4_module_exit);
++
++MODULE_LICENSE("GPL");
++MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
++MODULE_ALIAS_NF_FLOWTABLE(AF_INET);
diff --git a/target/linux/generic/backport-4.14/324-netfilter-flow-table-support-for-IPv6.patch b/target/linux/generic/backport-4.14/324-netfilter-flow-table-support-for-IPv6.patch
new file mode 100644 (file)
index 0000000..a5bbac4
--- /dev/null
@@ -0,0 +1,354 @@
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Sun, 7 Jan 2018 01:04:19 +0100
+Subject: [PATCH] netfilter: flow table support for IPv6
+
+This patch adds the IPv6 flow table type, that implements the datapath
+flow table to forward IPv6 traffic.
+
+This patch exports ip6_dst_mtu_forward() that is required to check for
+mtu to pass up packets that need PMTUD handling to the classic
+forwarding path.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+ create mode 100644 net/ipv6/netfilter/nf_flow_table_ipv6.c
+
+--- a/include/net/ipv6.h
++++ b/include/net/ipv6.h
+@@ -913,6 +913,8 @@ static inline struct sk_buff *ip6_finish
+                             &inet6_sk(sk)->cork);
+ }
++unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst);
++
+ int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
+                  struct flowi6 *fl6);
+ struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
+--- a/net/ipv6/ip6_output.c
++++ b/net/ipv6/ip6_output.c
+@@ -370,7 +370,7 @@ static inline int ip6_forward_finish(str
+       return dst_output(net, sk, skb);
+ }
+-static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
++unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
+ {
+       unsigned int mtu;
+       struct inet6_dev *idev;
+@@ -390,6 +390,7 @@ static unsigned int ip6_dst_mtu_forward(
+       return mtu;
+ }
++EXPORT_SYMBOL_GPL(ip6_dst_mtu_forward);
+ static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
+ {
+--- a/net/ipv6/netfilter/Kconfig
++++ b/net/ipv6/netfilter/Kconfig
+@@ -71,6 +71,14 @@ config NFT_FIB_IPV6
+ endif # NF_TABLES_IPV6
+ endif # NF_TABLES
++config NF_FLOW_TABLE_IPV6
++      select NF_FLOW_TABLE
++      tristate "Netfilter flow table IPv6 module"
++      help
++        This option adds the flow table IPv6 support.
++
++        To compile it as a module, choose M here.
++
+ config NF_DUP_IPV6
+       tristate "Netfilter IPv6 packet duplication to alternate destination"
+       depends on !NF_CONNTRACK || NF_CONNTRACK
+--- a/net/ipv6/netfilter/Makefile
++++ b/net/ipv6/netfilter/Makefile
+@@ -45,6 +45,9 @@ obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redi
+ obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o
+ obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o
++# flow table support
++obj-$(CONFIG_NF_FLOW_TABLE_IPV6) += nf_flow_table_ipv6.o
++
+ # matches
+ obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
+ obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o
+--- /dev/null
++++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c
+@@ -0,0 +1,277 @@
++#include <linux/kernel.h>
++#include <linux/init.h>
++#include <linux/module.h>
++#include <linux/netfilter.h>
++#include <linux/rhashtable.h>
++#include <linux/ipv6.h>
++#include <linux/netdevice.h>
++#include <linux/ipv6.h>
++#include <net/ipv6.h>
++#include <net/ip6_route.h>
++#include <net/neighbour.h>
++#include <net/netfilter/nf_flow_table.h>
++#include <net/netfilter/nf_tables.h>
++/* For layer 4 checksum field offset. */
++#include <linux/tcp.h>
++#include <linux/udp.h>
++
++static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
++                              struct in6_addr *addr,
++                              struct in6_addr *new_addr)
++{
++      struct tcphdr *tcph;
++
++      if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
++          skb_try_make_writable(skb, thoff + sizeof(*tcph)))
++              return -1;
++
++      tcph = (void *)(skb_network_header(skb) + thoff);
++      inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
++                                new_addr->s6_addr32, true);
++
++      return 0;
++}
++
++static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
++                              struct in6_addr *addr,
++                              struct in6_addr *new_addr)
++{
++      struct udphdr *udph;
++
++      if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
++          skb_try_make_writable(skb, thoff + sizeof(*udph)))
++              return -1;
++
++      udph = (void *)(skb_network_header(skb) + thoff);
++      if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
++              inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
++                                        new_addr->s6_addr32, true);
++              if (!udph->check)
++                      udph->check = CSUM_MANGLED_0;
++      }
++
++      return 0;
++}
++
++static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
++                                  unsigned int thoff, struct in6_addr *addr,
++                                  struct in6_addr *new_addr)
++{
++      switch (ip6h->nexthdr) {
++      case IPPROTO_TCP:
++              if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
++                      return NF_DROP;
++              break;
++      case IPPROTO_UDP:
++              if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
++                      return NF_DROP;
++              break;
++      }
++
++      return 0;
++}
++
++static int nf_flow_snat_ipv6(const struct flow_offload *flow,
++                           struct sk_buff *skb, struct ipv6hdr *ip6h,
++                           unsigned int thoff,
++                           enum flow_offload_tuple_dir dir)
++{
++      struct in6_addr addr, new_addr;
++
++      switch (dir) {
++      case FLOW_OFFLOAD_DIR_ORIGINAL:
++              addr = ip6h->saddr;
++              new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6;
++              ip6h->saddr = new_addr;
++              break;
++      case FLOW_OFFLOAD_DIR_REPLY:
++              addr = ip6h->daddr;
++              new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
++              ip6h->daddr = new_addr;
++              break;
++      default:
++              return -1;
++      }
++
++      return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
++}
++
++static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
++                           struct sk_buff *skb, struct ipv6hdr *ip6h,
++                           unsigned int thoff,
++                           enum flow_offload_tuple_dir dir)
++{
++      struct in6_addr addr, new_addr;
++
++      switch (dir) {
++      case FLOW_OFFLOAD_DIR_ORIGINAL:
++              addr = ip6h->daddr;
++              new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6;
++              ip6h->daddr = new_addr;
++              break;
++      case FLOW_OFFLOAD_DIR_REPLY:
++              addr = ip6h->saddr;
++              new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
++              ip6h->saddr = new_addr;
++              break;
++      default:
++              return -1;
++      }
++
++      return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
++}
++
++static int nf_flow_nat_ipv6(const struct flow_offload *flow,
++                          struct sk_buff *skb,
++                          enum flow_offload_tuple_dir dir)
++{
++      struct ipv6hdr *ip6h = ipv6_hdr(skb);
++      unsigned int thoff = sizeof(*ip6h);
++
++      if (flow->flags & FLOW_OFFLOAD_SNAT &&
++          (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
++           nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
++              return -1;
++      if (flow->flags & FLOW_OFFLOAD_DNAT &&
++          (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
++           nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
++              return -1;
++
++      return 0;
++}
++
++static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
++                            struct flow_offload_tuple *tuple)
++{
++      struct flow_ports *ports;
++      struct ipv6hdr *ip6h;
++      unsigned int thoff;
++
++      if (!pskb_may_pull(skb, sizeof(*ip6h)))
++              return -1;
++
++      ip6h = ipv6_hdr(skb);
++
++      if (ip6h->nexthdr != IPPROTO_TCP &&
++          ip6h->nexthdr != IPPROTO_UDP)
++              return -1;
++
++      thoff = sizeof(*ip6h);
++      if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
++              return -1;
++
++      ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
++
++      tuple->src_v6           = ip6h->saddr;
++      tuple->dst_v6           = ip6h->daddr;
++      tuple->src_port         = ports->source;
++      tuple->dst_port         = ports->dest;
++      tuple->l3proto          = AF_INET6;
++      tuple->l4proto          = ip6h->nexthdr;
++      tuple->iifidx           = dev->ifindex;
++
++      return 0;
++}
++
++/* Based on ip_exceeds_mtu(). */
++static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
++{
++      if (skb->len <= mtu)
++              return false;
++
++      if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
++              return false;
++
++      return true;
++}
++
++static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rt6_info *rt)
++{
++      u32 mtu;
++
++      mtu = ip6_dst_mtu_forward(&rt->dst);
++      if (__nf_flow_exceeds_mtu(skb, mtu))
++              return true;
++
++      return false;
++}
++
++static unsigned int
++nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
++                        const struct nf_hook_state *state)
++{
++      struct flow_offload_tuple_rhash *tuplehash;
++      struct nf_flowtable *flow_table = priv;
++      struct flow_offload_tuple tuple = {};
++      enum flow_offload_tuple_dir dir;
++      struct flow_offload *flow;
++      struct net_device *outdev;
++      struct in6_addr *nexthop;
++      struct ipv6hdr *ip6h;
++      struct rt6_info *rt;
++
++      if (skb->protocol != htons(ETH_P_IPV6))
++              return NF_ACCEPT;
++
++      if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
++              return NF_ACCEPT;
++
++      tuplehash = flow_offload_lookup(flow_table, &tuple);
++      if (tuplehash == NULL)
++              return NF_ACCEPT;
++
++      outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
++      if (!outdev)
++              return NF_ACCEPT;
++
++      dir = tuplehash->tuple.dir;
++      flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
++
++      rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
++      if (unlikely(nf_flow_exceeds_mtu(skb, rt)))
++              return NF_ACCEPT;
++
++      if (skb_try_make_writable(skb, sizeof(*ip6h)))
++              return NF_DROP;
++
++      if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
++          nf_flow_nat_ipv6(flow, skb, dir) < 0)
++              return NF_DROP;
++
++      flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
++      ip6h = ipv6_hdr(skb);
++      ip6h->hop_limit--;
++
++      skb->dev = outdev;
++      nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
++      neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
++
++      return NF_STOLEN;
++}
++
++static struct nf_flowtable_type flowtable_ipv6 = {
++      .family         = NFPROTO_IPV6,
++      .params         = &nf_flow_offload_rhash_params,
++      .gc             = nf_flow_offload_work_gc,
++      .hook           = nf_flow_offload_ipv6_hook,
++      .owner          = THIS_MODULE,
++};
++
++static int __init nf_flow_ipv6_module_init(void)
++{
++      nft_register_flowtable_type(&flowtable_ipv6);
++
++      return 0;
++}
++
++static void __exit nf_flow_ipv6_module_exit(void)
++{
++      nft_unregister_flowtable_type(&flowtable_ipv6);
++}
++
++module_init(nf_flow_ipv6_module_init);
++module_exit(nf_flow_ipv6_module_exit);
++
++MODULE_LICENSE("GPL");
++MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
++MODULE_ALIAS_NF_FLOWTABLE(AF_INET6);
diff --git a/target/linux/generic/backport-4.14/325-netfilter-flow-table-support-for-the-mixed-IPv4-IPv6.patch b/target/linux/generic/backport-4.14/325-netfilter-flow-table-support-for-the-mixed-IPv4-IPv6.patch
new file mode 100644 (file)
index 0000000..9fcb1be
--- /dev/null
@@ -0,0 +1,141 @@
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Sun, 7 Jan 2018 01:04:22 +0100
+Subject: [PATCH] netfilter: flow table support for the mixed IPv4/IPv6 family
+
+This patch adds the IPv6 flow table type, that implements the datapath
+flow table to forward IPv6 traffic.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+ create mode 100644 net/netfilter/nf_flow_table_inet.c
+
+--- a/include/net/netfilter/nf_flow_table.h
++++ b/include/net/netfilter/nf_flow_table.h
+@@ -111,6 +111,11 @@ struct flow_ports {
+       __be16 source, dest;
+ };
++unsigned int nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
++                                   const struct nf_hook_state *state);
++unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
++                                     const struct nf_hook_state *state);
++
+ #define MODULE_ALIAS_NF_FLOWTABLE(family)     \
+       MODULE_ALIAS("nf-flowtable-" __stringify(family))
+--- a/net/ipv4/netfilter/nf_flow_table_ipv4.c
++++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c
+@@ -202,7 +202,7 @@ static bool nf_flow_exceeds_mtu(struct s
+       return false;
+ }
+-static unsigned int
++unsigned int
+ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
+                       const struct nf_hook_state *state)
+ {
+@@ -254,6 +254,7 @@ nf_flow_offload_ip_hook(void *priv, stru
+       return NF_STOLEN;
+ }
++EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
+ static struct nf_flowtable_type flowtable_ipv4 = {
+       .family         = NFPROTO_IPV4,
+--- a/net/ipv6/netfilter/nf_flow_table_ipv6.c
++++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c
+@@ -196,7 +196,7 @@ static bool nf_flow_exceeds_mtu(struct s
+       return false;
+ }
+-static unsigned int
++unsigned int
+ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
+                         const struct nf_hook_state *state)
+ {
+@@ -248,6 +248,7 @@ nf_flow_offload_ipv6_hook(void *priv, st
+       return NF_STOLEN;
+ }
++EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
+ static struct nf_flowtable_type flowtable_ipv6 = {
+       .family         = NFPROTO_IPV6,
+--- a/net/netfilter/Kconfig
++++ b/net/netfilter/Kconfig
+@@ -661,6 +661,14 @@ endif # NF_TABLES_NETDEV
+ endif # NF_TABLES
++config NF_FLOW_TABLE_INET
++      select NF_FLOW_TABLE
++      tristate "Netfilter flow table mixed IPv4/IPv6 module"
++      help
++          This option adds the flow table mixed IPv4/IPv6 support.
++
++        To compile it as a module, choose M here.
++
+ config NF_FLOW_TABLE
+       tristate "Netfilter flow table module"
+       help
+--- a/net/netfilter/Makefile
++++ b/net/netfilter/Makefile
+@@ -112,6 +112,7 @@ obj-$(CONFIG_NFT_FWD_NETDEV)       += nft_fwd_
+ # flow table infrastructure
+ obj-$(CONFIG_NF_FLOW_TABLE)   += nf_flow_table.o
++obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o
+ # generic X tables 
+ obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
+--- /dev/null
++++ b/net/netfilter/nf_flow_table_inet.c
+@@ -0,0 +1,48 @@
++#include <linux/kernel.h>
++#include <linux/init.h>
++#include <linux/module.h>
++#include <linux/netfilter.h>
++#include <linux/rhashtable.h>
++#include <net/netfilter/nf_flow_table.h>
++#include <net/netfilter/nf_tables.h>
++
++static unsigned int
++nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb,
++                        const struct nf_hook_state *state)
++{
++      switch (skb->protocol) {
++      case htons(ETH_P_IP):
++              return nf_flow_offload_ip_hook(priv, skb, state);
++      case htons(ETH_P_IPV6):
++              return nf_flow_offload_ipv6_hook(priv, skb, state);
++      }
++
++      return NF_ACCEPT;
++}
++
++static struct nf_flowtable_type flowtable_inet = {
++      .family         = NFPROTO_INET,
++      .params         = &nf_flow_offload_rhash_params,
++      .gc             = nf_flow_offload_work_gc,
++      .hook           = nf_flow_offload_inet_hook,
++      .owner          = THIS_MODULE,
++};
++
++static int __init nf_flow_inet_module_init(void)
++{
++      nft_register_flowtable_type(&flowtable_inet);
++
++      return 0;
++}
++
++static void __exit nf_flow_inet_module_exit(void)
++{
++      nft_unregister_flowtable_type(&flowtable_inet);
++}
++
++module_init(nf_flow_inet_module_init);
++module_exit(nf_flow_inet_module_exit);
++
++MODULE_LICENSE("GPL");
++MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
++MODULE_ALIAS_NF_FLOWTABLE(1); /* NFPROTO_INET */
diff --git a/target/linux/generic/backport-4.14/326-netfilter-nf_tables-flow-offload-expression.patch b/target/linux/generic/backport-4.14/326-netfilter-nf_tables-flow-offload-expression.patch
new file mode 100644 (file)
index 0000000..86f1f8a
--- /dev/null
@@ -0,0 +1,332 @@
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Sun, 7 Jan 2018 01:04:26 +0100
+Subject: [PATCH] netfilter: nf_tables: flow offload expression
+
+Add new instruction for the nf_tables VM that allows us to specify what
+flows are offloaded into a given flow table via name. This new
+instruction creates the flow entry and adds it to the flow table.
+
+Only established flows, ie. we have seen traffic in both directions, are
+added to the flow table. You can still decide to offload entries at a
+later stage via packet counting or checking the ct status in case you
+want to offload assured conntracks.
+
+This new extension depends on the conntrack subsystem.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+ create mode 100644 net/netfilter/nft_flow_offload.c
+
+--- a/include/uapi/linux/netfilter/nf_tables.h
++++ b/include/uapi/linux/netfilter/nf_tables.h
+@@ -957,6 +957,17 @@ enum nft_ct_attributes {
+ };
+ #define NFTA_CT_MAX           (__NFTA_CT_MAX - 1)
++/**
++ * enum nft_flow_attributes - ct offload expression attributes
++ * @NFTA_FLOW_TABLE_NAME: flow table name (NLA_STRING)
++ */
++enum nft_offload_attributes {
++      NFTA_FLOW_UNSPEC,
++      NFTA_FLOW_TABLE_NAME,
++      __NFTA_FLOW_MAX,
++};
++#define NFTA_FLOW_MAX         (__NFTA_FLOW_MAX - 1)
++
+ enum nft_limit_type {
+       NFT_LIMIT_PKTS,
+       NFT_LIMIT_PKT_BYTES
+--- a/net/netfilter/Kconfig
++++ b/net/netfilter/Kconfig
+@@ -509,6 +509,13 @@ config NFT_CT
+         This option adds the "ct" expression that you can use to match
+         connection tracking information such as the flow state.
++config NFT_FLOW_OFFLOAD
++      depends on NF_CONNTRACK
++      tristate "Netfilter nf_tables hardware flow offload module"
++      help
++        This option adds the "flow_offload" expression that you can use to
++        choose what flows are placed into the hardware.
++
+ config NFT_SET_RBTREE
+       tristate "Netfilter nf_tables rbtree set module"
+       help
+--- a/net/netfilter/Makefile
++++ b/net/netfilter/Makefile
+@@ -87,6 +87,7 @@ obj-$(CONFIG_NFT_META)               += nft_meta.o
+ obj-$(CONFIG_NFT_RT)          += nft_rt.o
+ obj-$(CONFIG_NFT_NUMGEN)      += nft_numgen.o
+ obj-$(CONFIG_NFT_CT)          += nft_ct.o
++obj-$(CONFIG_NFT_FLOW_OFFLOAD)        += nft_flow_offload.o
+ obj-$(CONFIG_NFT_LIMIT)               += nft_limit.o
+ obj-$(CONFIG_NFT_NAT)         += nft_nat.o
+ obj-$(CONFIG_NFT_OBJREF)      += nft_objref.o
+--- /dev/null
++++ b/net/netfilter/nft_flow_offload.c
+@@ -0,0 +1,264 @@
++#include <linux/kernel.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/netlink.h>
++#include <linux/netfilter.h>
++#include <linux/workqueue.h>
++#include <linux/spinlock.h>
++#include <linux/netfilter/nf_tables.h>
++#include <net/ip.h> /* for ipv4 options. */
++#include <net/netfilter/nf_tables.h>
++#include <net/netfilter/nf_tables_core.h>
++#include <net/netfilter/nf_conntrack_core.h>
++#include <linux/netfilter/nf_conntrack_common.h>
++#include <net/netfilter/nf_flow_table.h>
++
++struct nft_flow_offload {
++      struct nft_flowtable    *flowtable;
++};
++
++static int nft_flow_route(const struct nft_pktinfo *pkt,
++                        const struct nf_conn *ct,
++                        struct nf_flow_route *route,
++                        enum ip_conntrack_dir dir)
++{
++      struct dst_entry *this_dst = skb_dst(pkt->skb);
++      struct dst_entry *other_dst = NULL;
++      struct flowi fl;
++
++      memset(&fl, 0, sizeof(fl));
++      switch (nft_pf(pkt)) {
++      case NFPROTO_IPV4:
++              fl.u.ip4.daddr = ct->tuplehash[!dir].tuple.dst.u3.ip;
++              break;
++      case NFPROTO_IPV6:
++              fl.u.ip6.daddr = ct->tuplehash[!dir].tuple.dst.u3.in6;
++              break;
++      }
++
++      nf_route(nft_net(pkt), &other_dst, &fl, false, nft_pf(pkt));
++      if (!other_dst)
++              return -ENOENT;
++
++      route->tuple[dir].dst           = this_dst;
++      route->tuple[dir].ifindex       = nft_in(pkt)->ifindex;
++      route->tuple[!dir].dst          = other_dst;
++      route->tuple[!dir].ifindex      = nft_out(pkt)->ifindex;
++
++      return 0;
++}
++
++static bool nft_flow_offload_skip(struct sk_buff *skb)
++{
++      struct ip_options *opt  = &(IPCB(skb)->opt);
++
++      if (unlikely(opt->optlen))
++              return true;
++      if (skb_sec_path(skb))
++              return true;
++
++      return false;
++}
++
++static void nft_flow_offload_eval(const struct nft_expr *expr,
++                                struct nft_regs *regs,
++                                const struct nft_pktinfo *pkt)
++{
++      struct nft_flow_offload *priv = nft_expr_priv(expr);
++      struct nf_flowtable *flowtable = &priv->flowtable->data;
++      enum ip_conntrack_info ctinfo;
++      struct nf_flow_route route;
++      struct flow_offload *flow;
++      enum ip_conntrack_dir dir;
++      struct nf_conn *ct;
++      int ret;
++
++      if (nft_flow_offload_skip(pkt->skb))
++              goto out;
++
++      ct = nf_ct_get(pkt->skb, &ctinfo);
++      if (!ct)
++              goto out;
++
++      switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) {
++      case IPPROTO_TCP:
++      case IPPROTO_UDP:
++              break;
++      default:
++              goto out;
++      }
++
++      if (test_bit(IPS_HELPER_BIT, &ct->status))
++              goto out;
++
++      if (ctinfo == IP_CT_NEW ||
++          ctinfo == IP_CT_RELATED)
++              goto out;
++
++      if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status))
++              goto out;
++
++      dir = CTINFO2DIR(ctinfo);
++      if (nft_flow_route(pkt, ct, &route, dir) < 0)
++              goto err_flow_route;
++
++      flow = flow_offload_alloc(ct, &route);
++      if (!flow)
++              goto err_flow_alloc;
++
++      ret = flow_offload_add(flowtable, flow);
++      if (ret < 0)
++              goto err_flow_add;
++
++      return;
++
++err_flow_add:
++      flow_offload_free(flow);
++err_flow_alloc:
++      dst_release(route.tuple[!dir].dst);
++err_flow_route:
++      clear_bit(IPS_OFFLOAD_BIT, &ct->status);
++out:
++      regs->verdict.code = NFT_BREAK;
++}
++
++static int nft_flow_offload_validate(const struct nft_ctx *ctx,
++                                   const struct nft_expr *expr,
++                                   const struct nft_data **data)
++{
++      unsigned int hook_mask = (1 << NF_INET_FORWARD);
++
++      return nft_chain_validate_hooks(ctx->chain, hook_mask);
++}
++
++static int nft_flow_offload_init(const struct nft_ctx *ctx,
++                               const struct nft_expr *expr,
++                               const struct nlattr * const tb[])
++{
++      struct nft_flow_offload *priv = nft_expr_priv(expr);
++      u8 genmask = nft_genmask_next(ctx->net);
++      struct nft_flowtable *flowtable;
++
++      if (!tb[NFTA_FLOW_TABLE_NAME])
++              return -EINVAL;
++
++      flowtable = nf_tables_flowtable_lookup(ctx->table,
++                                             tb[NFTA_FLOW_TABLE_NAME],
++                                             genmask);
++      if (IS_ERR(flowtable))
++              return PTR_ERR(flowtable);
++
++      priv->flowtable = flowtable;
++      flowtable->use++;
++
++      return nf_ct_netns_get(ctx->net, ctx->afi->family);
++}
++
++static void nft_flow_offload_destroy(const struct nft_ctx *ctx,
++                                   const struct nft_expr *expr)
++{
++      struct nft_flow_offload *priv = nft_expr_priv(expr);
++
++      priv->flowtable->use--;
++      nf_ct_netns_put(ctx->net, ctx->afi->family);
++}
++
++static int nft_flow_offload_dump(struct sk_buff *skb, const struct nft_expr *expr)
++{
++      struct nft_flow_offload *priv = nft_expr_priv(expr);
++
++      if (nla_put_string(skb, NFTA_FLOW_TABLE_NAME, priv->flowtable->name))
++              goto nla_put_failure;
++
++      return 0;
++
++nla_put_failure:
++      return -1;
++}
++
++static struct nft_expr_type nft_flow_offload_type;
++static const struct nft_expr_ops nft_flow_offload_ops = {
++      .type           = &nft_flow_offload_type,
++      .size           = NFT_EXPR_SIZE(sizeof(struct nft_flow_offload)),
++      .eval           = nft_flow_offload_eval,
++      .init           = nft_flow_offload_init,
++      .destroy        = nft_flow_offload_destroy,
++      .validate       = nft_flow_offload_validate,
++      .dump           = nft_flow_offload_dump,
++};
++
++static struct nft_expr_type nft_flow_offload_type __read_mostly = {
++      .name           = "flow_offload",
++      .ops            = &nft_flow_offload_ops,
++      .maxattr        = NFTA_FLOW_MAX,
++      .owner          = THIS_MODULE,
++};
++
++static void flow_offload_iterate_cleanup(struct flow_offload *flow, void *data)
++{
++      struct net_device *dev = data;
++
++      if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex)
++              return;
++
++      flow_offload_dead(flow);
++}
++
++static void nft_flow_offload_iterate_cleanup(struct nf_flowtable *flowtable,
++                                           void *data)
++{
++      nf_flow_table_iterate(flowtable, flow_offload_iterate_cleanup, data);
++}
++
++static int flow_offload_netdev_event(struct notifier_block *this,
++                                   unsigned long event, void *ptr)
++{
++      struct net_device *dev = netdev_notifier_info_to_dev(ptr);
++
++      if (event != NETDEV_DOWN)
++              return NOTIFY_DONE;
++
++      nft_flow_table_iterate(dev_net(dev), nft_flow_offload_iterate_cleanup, dev);
++
++      return NOTIFY_DONE;
++}
++
++static struct notifier_block flow_offload_netdev_notifier = {
++      .notifier_call  = flow_offload_netdev_event,
++};
++
++static int __init nft_flow_offload_module_init(void)
++{
++      int err;
++
++      register_netdevice_notifier(&flow_offload_netdev_notifier);
++
++      err = nft_register_expr(&nft_flow_offload_type);
++      if (err < 0)
++              goto register_expr;
++
++      return 0;
++
++register_expr:
++      unregister_netdevice_notifier(&flow_offload_netdev_notifier);
++      return err;
++}
++
++static void __exit nft_flow_offload_module_exit(void)
++{
++      struct net *net;
++
++      nft_unregister_expr(&nft_flow_offload_type);
++      unregister_netdevice_notifier(&flow_offload_netdev_notifier);
++      rtnl_lock();
++      for_each_net(net)
++              nft_flow_table_iterate(net, nft_flow_offload_iterate_cleanup, NULL);
++      rtnl_unlock();
++}
++
++module_init(nft_flow_offload_module_init);
++module_exit(nft_flow_offload_module_exit);
++
++MODULE_LICENSE("GPL");
++MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
++MODULE_ALIAS_NFT_EXPR("flow_offload");
diff --git a/target/linux/generic/backport-4.14/327-netfilter-nf_tables-remove-nhooks-field-from-struct-.patch b/target/linux/generic/backport-4.14/327-netfilter-nf_tables-remove-nhooks-field-from-struct-.patch
new file mode 100644 (file)
index 0000000..3446719
--- /dev/null
@@ -0,0 +1,113 @@
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Tue, 19 Dec 2017 13:53:45 +0100
+Subject: [PATCH] netfilter: nf_tables: remove nhooks field from struct
+ nft_af_info
+
+We already validate the hook through bitmask, so this check is
+superfluous. When removing this, this patch is also fixing a bug in the
+new flowtable codebase, since ctx->afi points to the table family
+instead of the netdev family which is where the flowtable is really
+hooked in.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+
+--- a/include/net/netfilter/nf_tables.h
++++ b/include/net/netfilter/nf_tables.h
+@@ -963,7 +963,6 @@ enum nft_af_flags {
+  *
+  *    @list: used internally
+  *    @family: address family
+- *    @nhooks: number of hooks in this family
+  *    @owner: module owner
+  *    @tables: used internally
+  *    @flags: family flags
+@@ -971,7 +970,6 @@ enum nft_af_flags {
+ struct nft_af_info {
+       struct list_head                list;
+       int                             family;
+-      unsigned int                    nhooks;
+       struct module                   *owner;
+       struct list_head                tables;
+       u32                             flags;
+--- a/net/bridge/netfilter/nf_tables_bridge.c
++++ b/net/bridge/netfilter/nf_tables_bridge.c
+@@ -44,7 +44,6 @@ nft_do_chain_bridge(void *priv,
+ static struct nft_af_info nft_af_bridge __read_mostly = {
+       .family         = NFPROTO_BRIDGE,
+-      .nhooks         = NF_BR_NUMHOOKS,
+       .owner          = THIS_MODULE,
+ };
+--- a/net/ipv4/netfilter/nf_tables_arp.c
++++ b/net/ipv4/netfilter/nf_tables_arp.c
+@@ -29,7 +29,6 @@ nft_do_chain_arp(void *priv,
+ static struct nft_af_info nft_af_arp __read_mostly = {
+       .family         = NFPROTO_ARP,
+-      .nhooks         = NF_ARP_NUMHOOKS,
+       .owner          = THIS_MODULE,
+ };
+--- a/net/ipv4/netfilter/nf_tables_ipv4.c
++++ b/net/ipv4/netfilter/nf_tables_ipv4.c
+@@ -32,7 +32,6 @@ static unsigned int nft_do_chain_ipv4(vo
+ static struct nft_af_info nft_af_ipv4 __read_mostly = {
+       .family         = NFPROTO_IPV4,
+-      .nhooks         = NF_INET_NUMHOOKS,
+       .owner          = THIS_MODULE,
+ };
+--- a/net/ipv6/netfilter/nf_tables_ipv6.c
++++ b/net/ipv6/netfilter/nf_tables_ipv6.c
+@@ -30,7 +30,6 @@ static unsigned int nft_do_chain_ipv6(vo
+ static struct nft_af_info nft_af_ipv6 __read_mostly = {
+       .family         = NFPROTO_IPV6,
+-      .nhooks         = NF_INET_NUMHOOKS,
+       .owner          = THIS_MODULE,
+ };
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -1328,9 +1328,6 @@ static int nft_chain_parse_hook(struct n
+               return -EINVAL;
+       hook->num = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM]));
+-      if (hook->num >= afi->nhooks)
+-              return -EINVAL;
+-
+       hook->priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY]));
+       type = chain_type[afi->family][NFT_CHAIN_T_DEFAULT];
+@@ -4917,7 +4914,7 @@ static int nf_tables_flowtable_parse_hoo
+               return -EINVAL;
+       hooknum = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_NUM]));
+-      if (hooknum >= ctx->afi->nhooks)
++      if (hooknum != NF_NETDEV_INGRESS)
+               return -EINVAL;
+       priority = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_PRIORITY]));
+--- a/net/netfilter/nf_tables_inet.c
++++ b/net/netfilter/nf_tables_inet.c
+@@ -40,7 +40,6 @@ static unsigned int nft_do_chain_inet(vo
+ static struct nft_af_info nft_af_inet __read_mostly = {
+       .family         = NFPROTO_INET,
+-      .nhooks         = NF_INET_NUMHOOKS,
+       .owner          = THIS_MODULE,
+ };
+--- a/net/netfilter/nf_tables_netdev.c
++++ b/net/netfilter/nf_tables_netdev.c
+@@ -40,7 +40,6 @@ nft_do_chain_netdev(void *priv, struct s
+ static struct nft_af_info nft_af_netdev __read_mostly = {
+       .family         = NFPROTO_NETDEV,
+-      .nhooks         = NF_NETDEV_NUMHOOKS,
+       .owner          = THIS_MODULE,
+       .flags          = NFT_AF_NEEDS_DEV,
+ };
diff --git a/target/linux/generic/backport-4.14/328-netfilter-nf_tables-fix-a-typo-in-nf_tables_getflowt.patch b/target/linux/generic/backport-4.14/328-netfilter-nf_tables-fix-a-typo-in-nf_tables_getflowt.patch
new file mode 100644 (file)
index 0000000..b5c1b19
--- /dev/null
@@ -0,0 +1,22 @@
+From: Wei Yongjun <weiyongjun1@huawei.com>
+Date: Wed, 10 Jan 2018 07:04:54 +0000
+Subject: [PATCH] netfilter: nf_tables: fix a typo in nf_tables_getflowtable()
+
+Fix a typo, we should check 'flowtable' instead of 'table'.
+
+Fixes: 3b49e2e94e6e ("netfilter: nf_tables: add flow table netlink frontend")
+Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -5341,7 +5341,7 @@ static int nf_tables_getflowtable(struct
+       flowtable = nf_tables_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
+                                              genmask);
+-      if (IS_ERR(table))
++      if (IS_ERR(flowtable))
+               return PTR_ERR(flowtable);
+       skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
diff --git a/target/linux/generic/backport-4.14/329-netfilter-improve-flow-table-Kconfig-dependencies.patch b/target/linux/generic/backport-4.14/329-netfilter-improve-flow-table-Kconfig-dependencies.patch
new file mode 100644 (file)
index 0000000..c897c36
--- /dev/null
@@ -0,0 +1,106 @@
+From: Arnd Bergmann <arnd@arndb.de>
+Date: Wed, 10 Jan 2018 18:10:59 +0100
+Subject: [PATCH] netfilter: improve flow table Kconfig dependencies
+
+The newly added NF_FLOW_TABLE options cause some build failures in
+randconfig kernels:
+
+- when CONFIG_NF_CONNTRACK is disabled, or is a loadable module but
+  NF_FLOW_TABLE is built-in:
+
+  In file included from net/netfilter/nf_flow_table.c:8:0:
+  include/net/netfilter/nf_conntrack.h:59:22: error: field 'ct_general' has incomplete type
+    struct nf_conntrack ct_general;
+  include/net/netfilter/nf_conntrack.h: In function 'nf_ct_get':
+  include/net/netfilter/nf_conntrack.h:148:15: error: 'const struct sk_buff' has no member named '_nfct'
+  include/net/netfilter/nf_conntrack.h: In function 'nf_ct_put':
+  include/net/netfilter/nf_conntrack.h:157:2: error: implicit declaration of function 'nf_conntrack_put'; did you mean 'nf_ct_put'? [-Werror=implicit-function-declaration]
+
+  net/netfilter/nf_flow_table.o: In function `nf_flow_offload_work_gc':
+  (.text+0x1540): undefined reference to `nf_ct_delete'
+
+- when CONFIG_NF_TABLES is disabled:
+
+  In file included from net/ipv6/netfilter/nf_flow_table_ipv6.c:13:0:
+  include/net/netfilter/nf_tables.h: In function 'nft_gencursor_next':
+  include/net/netfilter/nf_tables.h:1189:14: error: 'const struct net' has no member named 'nft'; did you mean 'nf'?
+
+ - when CONFIG_NF_FLOW_TABLE_INET is enabled, but NF_FLOW_TABLE_IPV4
+  or NF_FLOW_TABLE_IPV6 are not, or are loadable modules
+
+  net/netfilter/nf_flow_table_inet.o: In function `nf_flow_offload_inet_hook':
+  nf_flow_table_inet.c:(.text+0x94): undefined reference to `nf_flow_offload_ipv6_hook'
+  nf_flow_table_inet.c:(.text+0x40): undefined reference to `nf_flow_offload_ip_hook'
+
+- when CONFIG_NF_FLOW_TABLES is disabled, but the other options are
+  enabled:
+
+  net/netfilter/nf_flow_table_inet.o: In function `nf_flow_offload_inet_hook':
+  nf_flow_table_inet.c:(.text+0x6c): undefined reference to `nf_flow_offload_ipv6_hook'
+  net/netfilter/nf_flow_table_inet.o: In function `nf_flow_inet_module_exit':
+  nf_flow_table_inet.c:(.exit.text+0x8): undefined reference to `nft_unregister_flowtable_type'
+  net/netfilter/nf_flow_table_inet.o: In function `nf_flow_inet_module_init':
+  nf_flow_table_inet.c:(.init.text+0x8): undefined reference to `nft_register_flowtable_type'
+  net/ipv4/netfilter/nf_flow_table_ipv4.o: In function `nf_flow_ipv4_module_exit':
+  nf_flow_table_ipv4.c:(.exit.text+0x8): undefined reference to `nft_unregister_flowtable_type'
+  net/ipv4/netfilter/nf_flow_table_ipv4.o: In function `nf_flow_ipv4_module_init':
+  nf_flow_table_ipv4.c:(.init.text+0x8): undefined reference to `nft_register_flowtable_type'
+
+This adds additional Kconfig dependencies to ensure that NF_CONNTRACK and NF_TABLES
+are always visible from NF_FLOW_TABLE, and that the internal dependencies between
+the four new modules are met.
+
+Fixes: 7c23b629a808 ("netfilter: flow table support for the mixed IPv4/IPv6 family")
+Fixes: 0995210753a2 ("netfilter: flow table support for IPv6")
+Fixes: 97add9f0d66d ("netfilter: flow table support for IPv4")
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+
+--- a/net/ipv4/netfilter/Kconfig
++++ b/net/ipv4/netfilter/Kconfig
+@@ -78,8 +78,9 @@ config NF_TABLES_ARP
+ endif # NF_TABLES
+ config NF_FLOW_TABLE_IPV4
+-      select NF_FLOW_TABLE
+       tristate "Netfilter flow table IPv4 module"
++      depends on NF_CONNTRACK && NF_TABLES
++      select NF_FLOW_TABLE
+       help
+         This option adds the flow table IPv4 support.
+--- a/net/ipv6/netfilter/Kconfig
++++ b/net/ipv6/netfilter/Kconfig
+@@ -72,8 +72,9 @@ endif # NF_TABLES_IPV6
+ endif # NF_TABLES
+ config NF_FLOW_TABLE_IPV6
+-      select NF_FLOW_TABLE
+       tristate "Netfilter flow table IPv6 module"
++      depends on NF_CONNTRACK && NF_TABLES
++      select NF_FLOW_TABLE
+       help
+         This option adds the flow table IPv6 support.
+--- a/net/netfilter/Kconfig
++++ b/net/netfilter/Kconfig
+@@ -669,8 +669,9 @@ endif # NF_TABLES_NETDEV
+ endif # NF_TABLES
+ config NF_FLOW_TABLE_INET
+-      select NF_FLOW_TABLE
+       tristate "Netfilter flow table mixed IPv4/IPv6 module"
++      depends on NF_FLOW_TABLE_IPV4 && NF_FLOW_TABLE_IPV6
++      select NF_FLOW_TABLE
+       help
+           This option adds the flow table mixed IPv4/IPv6 support.
+@@ -678,6 +679,7 @@ config NF_FLOW_TABLE_INET
+ config NF_FLOW_TABLE
+       tristate "Netfilter flow table module"
++      depends on NF_CONNTRACK && NF_TABLES
+       help
+         This option adds the flow table core infrastructure.
diff --git a/target/linux/generic/backport-4.14/330-netfilter-nf_tables-remove-flag-field-from-struct-nf.patch b/target/linux/generic/backport-4.14/330-netfilter-nf_tables-remove-flag-field-from-struct-nf.patch
new file mode 100644 (file)
index 0000000..42aa7b1
--- /dev/null
@@ -0,0 +1,59 @@
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Tue, 19 Dec 2017 14:07:52 +0100
+Subject: [PATCH] netfilter: nf_tables: remove flag field from struct
+ nft_af_info
+
+Replace it by a direct check for the netdev protocol family.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+
+--- a/include/net/netfilter/nf_tables.h
++++ b/include/net/netfilter/nf_tables.h
+@@ -954,10 +954,6 @@ struct nft_table {
+       char                            *name;
+ };
+-enum nft_af_flags {
+-      NFT_AF_NEEDS_DEV        = (1 << 0),
+-};
+-
+ /**
+  *    struct nft_af_info - nf_tables address family info
+  *
+@@ -965,14 +961,12 @@ enum nft_af_flags {
+  *    @family: address family
+  *    @owner: module owner
+  *    @tables: used internally
+- *    @flags: family flags
+  */
+ struct nft_af_info {
+       struct list_head                list;
+       int                             family;
+       struct module                   *owner;
+       struct list_head                tables;
+-      u32                             flags;
+ };
+ int nft_register_afinfo(struct net *, struct nft_af_info *);
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -1345,7 +1345,7 @@ static int nft_chain_parse_hook(struct n
+       hook->type = type;
+       hook->dev = NULL;
+-      if (afi->flags & NFT_AF_NEEDS_DEV) {
++      if (afi->family == NFPROTO_NETDEV) {
+               char ifname[IFNAMSIZ];
+               if (!ha[NFTA_HOOK_DEV]) {
+--- a/net/netfilter/nf_tables_netdev.c
++++ b/net/netfilter/nf_tables_netdev.c
+@@ -41,7 +41,6 @@ nft_do_chain_netdev(void *priv, struct s
+ static struct nft_af_info nft_af_netdev __read_mostly = {
+       .family         = NFPROTO_NETDEV,
+       .owner          = THIS_MODULE,
+-      .flags          = NFT_AF_NEEDS_DEV,
+ };
+ static int nf_tables_netdev_init_net(struct net *net)
diff --git a/target/linux/generic/backport-4.14/331-netfilter-nf_tables-no-need-for-struct-nft_af_info-t.patch b/target/linux/generic/backport-4.14/331-netfilter-nf_tables-no-need-for-struct-nft_af_info-t.patch
new file mode 100644 (file)
index 0000000..a9f13c4
--- /dev/null
@@ -0,0 +1,80 @@
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Tue, 19 Dec 2017 12:17:52 +0100
+Subject: [PATCH] netfilter: nf_tables: no need for struct nft_af_info to
+ enable/disable table
+
+nf_tables_table_enable() and nf_tables_table_disable() take a pointer to
+struct nft_af_info that is never used, remove it.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -611,10 +611,7 @@ err:
+       return err;
+ }
+-static void _nf_tables_table_disable(struct net *net,
+-                                   const struct nft_af_info *afi,
+-                                   struct nft_table *table,
+-                                   u32 cnt)
++static void nft_table_disable(struct net *net, struct nft_table *table, u32 cnt)
+ {
+       struct nft_chain *chain;
+       u32 i = 0;
+@@ -632,9 +629,7 @@ static void _nf_tables_table_disable(str
+       }
+ }
+-static int nf_tables_table_enable(struct net *net,
+-                                const struct nft_af_info *afi,
+-                                struct nft_table *table)
++static int nf_tables_table_enable(struct net *net, struct nft_table *table)
+ {
+       struct nft_chain *chain;
+       int err, i = 0;
+@@ -654,15 +649,13 @@ static int nf_tables_table_enable(struct
+       return 0;
+ err:
+       if (i)
+-              _nf_tables_table_disable(net, afi, table, i);
++              nft_table_disable(net, table, i);
+       return err;
+ }
+-static void nf_tables_table_disable(struct net *net,
+-                                  const struct nft_af_info *afi,
+-                                  struct nft_table *table)
++static void nf_tables_table_disable(struct net *net, struct nft_table *table)
+ {
+-      _nf_tables_table_disable(net, afi, table, 0);
++      nft_table_disable(net, table, 0);
+ }
+ static int nf_tables_updtable(struct nft_ctx *ctx)
+@@ -691,7 +684,7 @@ static int nf_tables_updtable(struct nft
+               nft_trans_table_enable(trans) = false;
+       } else if (!(flags & NFT_TABLE_F_DORMANT) &&
+                  ctx->table->flags & NFT_TABLE_F_DORMANT) {
+-              ret = nf_tables_table_enable(ctx->net, ctx->afi, ctx->table);
++              ret = nf_tables_table_enable(ctx->net, ctx->table);
+               if (ret >= 0) {
+                       ctx->table->flags &= ~NFT_TABLE_F_DORMANT;
+                       nft_trans_table_enable(trans) = true;
+@@ -5719,7 +5712,6 @@ static int nf_tables_commit(struct net *
+                       if (nft_trans_table_update(trans)) {
+                               if (!nft_trans_table_enable(trans)) {
+                                       nf_tables_table_disable(net,
+-                                                              trans->ctx.afi,
+                                                               trans->ctx.table);
+                                       trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
+                               }
+@@ -5881,7 +5873,6 @@ static int nf_tables_abort(struct net *n
+                       if (nft_trans_table_update(trans)) {
+                               if (nft_trans_table_enable(trans)) {
+                                       nf_tables_table_disable(net,
+-                                                              trans->ctx.afi,
+                                                               trans->ctx.table);
+                                       trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
+                               }
diff --git a/target/linux/generic/backport-4.14/332-netfilter-nf_tables-remove-struct-nft_af_info-parame.patch b/target/linux/generic/backport-4.14/332-netfilter-nf_tables-remove-struct-nft_af_info-parame.patch
new file mode 100644 (file)
index 0000000..158f987
--- /dev/null
@@ -0,0 +1,60 @@
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Tue, 19 Dec 2017 13:40:22 +0100
+Subject: [PATCH] netfilter: nf_tables: remove struct nft_af_info parameter in
+ nf_tables_chain_type_lookup()
+
+Pass family number instead, this comes in preparation for the removal of
+struct nft_af_info.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -423,7 +423,7 @@ static inline u64 nf_tables_alloc_handle
+ static const struct nf_chain_type *chain_type[NFPROTO_NUMPROTO][NFT_CHAIN_T_MAX];
+ static const struct nf_chain_type *
+-__nf_tables_chain_type_lookup(int family, const struct nlattr *nla)
++__nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family)
+ {
+       int i;
+@@ -436,22 +436,20 @@ __nf_tables_chain_type_lookup(int family
+ }
+ static const struct nf_chain_type *
+-nf_tables_chain_type_lookup(const struct nft_af_info *afi,
+-                          const struct nlattr *nla,
+-                          bool autoload)
++nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family, bool autoload)
+ {
+       const struct nf_chain_type *type;
+-      type = __nf_tables_chain_type_lookup(afi->family, nla);
++      type = __nf_tables_chain_type_lookup(nla, family);
+       if (type != NULL)
+               return type;
+ #ifdef CONFIG_MODULES
+       if (autoload) {
+               nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+-              request_module("nft-chain-%u-%.*s", afi->family,
++              request_module("nft-chain-%u-%.*s", family,
+                              nla_len(nla), (const char *)nla_data(nla));
+               nfnl_lock(NFNL_SUBSYS_NFTABLES);
+-              type = __nf_tables_chain_type_lookup(afi->family, nla);
++              type = __nf_tables_chain_type_lookup(nla, family);
+               if (type != NULL)
+                       return ERR_PTR(-EAGAIN);
+       }
+@@ -1325,8 +1323,8 @@ static int nft_chain_parse_hook(struct n
+       type = chain_type[afi->family][NFT_CHAIN_T_DEFAULT];
+       if (nla[NFTA_CHAIN_TYPE]) {
+-              type = nf_tables_chain_type_lookup(afi, nla[NFTA_CHAIN_TYPE],
+-                                                 create);
++              type = nf_tables_chain_type_lookup(nla[NFTA_CHAIN_TYPE],
++                                                 afi->family, create);
+               if (IS_ERR(type))
+                       return PTR_ERR(type);
+       }
diff --git a/target/linux/generic/backport-4.14/333-netfilter-nf_tables-fix-chain-filter-in-nf_tables_du.patch b/target/linux/generic/backport-4.14/333-netfilter-nf_tables-fix-chain-filter-in-nf_tables_du.patch
new file mode 100644 (file)
index 0000000..a123d23
--- /dev/null
@@ -0,0 +1,24 @@
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Tue, 19 Dec 2017 12:01:21 +0100
+Subject: [PATCH] netfilter: nf_tables: fix chain filter in
+ nf_tables_dump_rules()
+
+ctx->chain may be null now that we have very large object names,
+so we cannot check for ctx->chain[0] here.
+
+Fixes: b7263e071aba7 ("netfilter: nf_tables: Allow table names of up to 255 chars")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Acked-by: Phil Sutter <phil@nwl.cc>
+---
+
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -2090,7 +2090,7 @@ static int nf_tables_dump_rules(struct s
+                               continue;
+                       list_for_each_entry_rcu(chain, &table->chains, list) {
+-                              if (ctx && ctx->chain[0] &&
++                              if (ctx && ctx->chain &&
+                                   strcmp(ctx->chain, chain->name) != 0)
+                                       continue;
diff --git a/target/linux/generic/backport-4.14/334-netfilter-nf_tables-fix-potential-NULL-ptr-deref-in-.patch b/target/linux/generic/backport-4.14/334-netfilter-nf_tables-fix-potential-NULL-ptr-deref-in-.patch
new file mode 100644 (file)
index 0000000..57e9b53
--- /dev/null
@@ -0,0 +1,30 @@
+From: Hangbin Liu <liuhangbin@gmail.com>
+Date: Mon, 25 Dec 2017 11:34:54 +0800
+Subject: [PATCH] netfilter: nf_tables: fix potential NULL-ptr deref in
+ nf_tables_dump_obj_done()
+
+If there is no NFTA_OBJ_TABLE and NFTA_OBJ_TYPE, the c.data will be NULL in
+nf_tables_getobj(). So before free filter->table in nf_tables_dump_obj_done(),
+we need to check if filter is NULL first.
+
+Fixes: e46abbcc05aa ("netfilter: nf_tables: Allow table names of up to 255 chars")
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Acked-by: Phil Sutter <phil@nwl.cc>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -4614,8 +4614,10 @@ static int nf_tables_dump_obj_done(struc
+ {
+       struct nft_obj_filter *filter = cb->data;
+-      kfree(filter->table);
+-      kfree(filter);
++      if (filter) {
++              kfree(filter->table);
++              kfree(filter);
++      }
+       return 0;
+ }
diff --git a/target/linux/generic/backport-4.14/335-netfilter-nf_tables-add-single-table-list-for-all-fa.patch b/target/linux/generic/backport-4.14/335-netfilter-nf_tables-add-single-table-list-for-all-fa.patch
new file mode 100644 (file)
index 0000000..bae2e28
--- /dev/null
@@ -0,0 +1,1449 @@
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Tue, 9 Jan 2018 02:38:03 +0100
+Subject: [PATCH] netfilter: nf_tables: add single table list for all families
+
+Place all existing user defined tables in struct net *, instead of
+having one list per family. This saves us from one level of indentation
+in netlink dump functions.
+
+Place pointer to struct nft_af_info in struct nft_table temporarily, as
+we still need this to put back reference module reference counter on
+table removal.
+
+This patch comes in preparation for the removal of struct nft_af_info.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+
+--- a/include/net/netfilter/nf_tables.h
++++ b/include/net/netfilter/nf_tables.h
+@@ -143,22 +143,22 @@ static inline void nft_data_debug(const
+  *    struct nft_ctx - nf_tables rule/set context
+  *
+  *    @net: net namespace
+- *    @afi: address family info
+  *    @table: the table the chain is contained in
+  *    @chain: the chain the rule is contained in
+  *    @nla: netlink attributes
+  *    @portid: netlink portID of the original message
+  *    @seq: netlink sequence number
++ *    @family: protocol family
+  *    @report: notify via unicast netlink message
+  */
+ struct nft_ctx {
+       struct net                      *net;
+-      struct nft_af_info              *afi;
+       struct nft_table                *table;
+       struct nft_chain                *chain;
+       const struct nlattr * const     *nla;
+       u32                             portid;
+       u32                             seq;
++      u8                              family;
+       bool                            report;
+ };
+@@ -939,6 +939,7 @@ unsigned int nft_do_chain(struct nft_pkt
+  *    @use: number of chain references to this table
+  *    @flags: table flag (see enum nft_table_flags)
+  *    @genmask: generation mask
++ *    @afinfo: address family info
+  *    @name: name of the table
+  */
+ struct nft_table {
+@@ -951,6 +952,7 @@ struct nft_table {
+       u32                             use;
+       u16                             flags:14,
+                                       genmask:2;
++      struct nft_af_info              *afi;
+       char                            *name;
+ };
+@@ -960,13 +962,11 @@ struct nft_table {
+  *    @list: used internally
+  *    @family: address family
+  *    @owner: module owner
+- *    @tables: used internally
+  */
+ struct nft_af_info {
+       struct list_head                list;
+       int                             family;
+       struct module                   *owner;
+-      struct list_head                tables;
+ };
+ int nft_register_afinfo(struct net *, struct nft_af_info *);
+--- a/include/net/netns/nftables.h
++++ b/include/net/netns/nftables.h
+@@ -8,6 +8,7 @@ struct nft_af_info;
+ struct netns_nftables {
+       struct list_head        af_info;
++      struct list_head        tables;
+       struct list_head        commit_list;
+       struct nft_af_info      *ipv4;
+       struct nft_af_info      *ipv6;
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -37,7 +37,6 @@ static LIST_HEAD(nf_tables_flowtables);
+  */
+ int nft_register_afinfo(struct net *net, struct nft_af_info *afi)
+ {
+-      INIT_LIST_HEAD(&afi->tables);
+       nfnl_lock(NFNL_SUBSYS_NFTABLES);
+       list_add_tail_rcu(&afi->list, &net->nft.af_info);
+       nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+@@ -99,13 +98,13 @@ static void nft_ctx_init(struct nft_ctx
+                        struct net *net,
+                        const struct sk_buff *skb,
+                        const struct nlmsghdr *nlh,
+-                       struct nft_af_info *afi,
++                       u8 family,
+                        struct nft_table *table,
+                        struct nft_chain *chain,
+                        const struct nlattr * const *nla)
+ {
+       ctx->net        = net;
+-      ctx->afi        = afi;
++      ctx->family     = family;
+       ctx->table      = table;
+       ctx->chain      = chain;
+       ctx->nla        = nla;
+@@ -385,30 +384,31 @@ static int nft_delflowtable(struct nft_c
+  * Tables
+  */
+-static struct nft_table *nft_table_lookup(const struct nft_af_info *afi,
++static struct nft_table *nft_table_lookup(const struct net *net,
+                                         const struct nlattr *nla,
+-                                        u8 genmask)
++                                        u8 family, u8 genmask)
+ {
+       struct nft_table *table;
+-      list_for_each_entry(table, &afi->tables, list) {
++      list_for_each_entry(table, &net->nft.tables, list) {
+               if (!nla_strcmp(nla, table->name) &&
++                  table->afi->family == family &&
+                   nft_active_genmask(table, genmask))
+                       return table;
+       }
+       return NULL;
+ }
+-static struct nft_table *nf_tables_table_lookup(const struct nft_af_info *afi,
++static struct nft_table *nf_tables_table_lookup(const struct net *net,
+                                               const struct nlattr *nla,
+-                                              u8 genmask)
++                                              u8 family, u8 genmask)
+ {
+       struct nft_table *table;
+       if (nla == NULL)
+               return ERR_PTR(-EINVAL);
+-      table = nft_table_lookup(afi, nla, genmask);
++      table = nft_table_lookup(net, nla, family, genmask);
+       if (table != NULL)
+               return table;
+@@ -507,7 +507,7 @@ static void nf_tables_table_notify(const
+               goto err;
+       err = nf_tables_fill_table_info(skb, ctx->net, ctx->portid, ctx->seq,
+-                                      event, 0, ctx->afi->family, ctx->table);
++                                      event, 0, ctx->family, ctx->table);
+       if (err < 0) {
+               kfree_skb(skb);
+               goto err;
+@@ -524,7 +524,6 @@ static int nf_tables_dump_tables(struct
+                                struct netlink_callback *cb)
+ {
+       const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+-      const struct nft_af_info *afi;
+       const struct nft_table *table;
+       unsigned int idx = 0, s_idx = cb->args[0];
+       struct net *net = sock_net(skb->sk);
+@@ -533,30 +532,27 @@ static int nf_tables_dump_tables(struct
+       rcu_read_lock();
+       cb->seq = net->nft.base_seq;
+-      list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
+-              if (family != NFPROTO_UNSPEC && family != afi->family)
++      list_for_each_entry_rcu(table, &net->nft.tables, list) {
++              if (family != NFPROTO_UNSPEC && family != table->afi->family)
+                       continue;
+-              list_for_each_entry_rcu(table, &afi->tables, list) {
+-                      if (idx < s_idx)
+-                              goto cont;
+-                      if (idx > s_idx)
+-                              memset(&cb->args[1], 0,
+-                                     sizeof(cb->args) - sizeof(cb->args[0]));
+-                      if (!nft_is_active(net, table))
+-                              continue;
+-                      if (nf_tables_fill_table_info(skb, net,
+-                                                    NETLINK_CB(cb->skb).portid,
+-                                                    cb->nlh->nlmsg_seq,
+-                                                    NFT_MSG_NEWTABLE,
+-                                                    NLM_F_MULTI,
+-                                                    afi->family, table) < 0)
+-                              goto done;
++              if (idx < s_idx)
++                      goto cont;
++              if (idx > s_idx)
++                      memset(&cb->args[1], 0,
++                             sizeof(cb->args) - sizeof(cb->args[0]));
++              if (!nft_is_active(net, table))
++                      continue;
++              if (nf_tables_fill_table_info(skb, net,
++                                            NETLINK_CB(cb->skb).portid,
++                                            cb->nlh->nlmsg_seq,
++                                            NFT_MSG_NEWTABLE, NLM_F_MULTI,
++                                            table->afi->family, table) < 0)
++                      goto done;
+-                      nl_dump_check_consistent(cb, nlmsg_hdr(skb));
++              nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+ cont:
+-                      idx++;
+-              }
++              idx++;
+       }
+ done:
+       rcu_read_unlock();
+@@ -588,7 +584,8 @@ static int nf_tables_gettable(struct net
+       if (IS_ERR(afi))
+               return PTR_ERR(afi);
+-      table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], genmask);
++      table = nf_tables_table_lookup(net, nla[NFTA_TABLE_NAME], afi->family,
++                                     genmask);
+       if (IS_ERR(table))
+               return PTR_ERR(table);
+@@ -719,7 +716,7 @@ static int nf_tables_newtable(struct net
+               return PTR_ERR(afi);
+       name = nla[NFTA_TABLE_NAME];
+-      table = nf_tables_table_lookup(afi, name, genmask);
++      table = nf_tables_table_lookup(net, name, afi->family, genmask);
+       if (IS_ERR(table)) {
+               if (PTR_ERR(table) != -ENOENT)
+                       return PTR_ERR(table);
+@@ -729,7 +726,7 @@ static int nf_tables_newtable(struct net
+               if (nlh->nlmsg_flags & NLM_F_REPLACE)
+                       return -EOPNOTSUPP;
+-              nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
++              nft_ctx_init(&ctx, net, skb, nlh, afi->family, table, NULL, nla);
+               return nf_tables_updtable(&ctx);
+       }
+@@ -756,14 +753,15 @@ static int nf_tables_newtable(struct net
+       INIT_LIST_HEAD(&table->sets);
+       INIT_LIST_HEAD(&table->objects);
+       INIT_LIST_HEAD(&table->flowtables);
++      table->afi = afi;
+       table->flags = flags;
+-      nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
++      nft_ctx_init(&ctx, net, skb, nlh, afi->family, table, NULL, nla);
+       err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE);
+       if (err < 0)
+               goto err4;
+-      list_add_tail_rcu(&table->list, &afi->tables);
++      list_add_tail_rcu(&table->list, &net->nft.tables);
+       return 0;
+ err4:
+       kfree(table->name);
+@@ -837,30 +835,28 @@ out:
+ static int nft_flush(struct nft_ctx *ctx, int family)
+ {
+-      struct nft_af_info *afi;
+       struct nft_table *table, *nt;
+       const struct nlattr * const *nla = ctx->nla;
+       int err = 0;
+-      list_for_each_entry(afi, &ctx->net->nft.af_info, list) {
+-              if (family != AF_UNSPEC && afi->family != family)
++      list_for_each_entry_safe(table, nt, &ctx->net->nft.tables, list) {
++              if (family != AF_UNSPEC && table->afi->family != family)
+                       continue;
+-              ctx->afi = afi;
+-              list_for_each_entry_safe(table, nt, &afi->tables, list) {
+-                      if (!nft_is_active_next(ctx->net, table))
+-                              continue;
++              ctx->family = table->afi->family;
+-                      if (nla[NFTA_TABLE_NAME] &&
+-                          nla_strcmp(nla[NFTA_TABLE_NAME], table->name) != 0)
+-                              continue;
++              if (!nft_is_active_next(ctx->net, table))
++                      continue;
+-                      ctx->table = table;
++              if (nla[NFTA_TABLE_NAME] &&
++                  nla_strcmp(nla[NFTA_TABLE_NAME], table->name) != 0)
++                      continue;
+-                      err = nft_flush_table(ctx);
+-                      if (err < 0)
+-                              goto out;
+-              }
++              ctx->table = table;
++
++              err = nft_flush_table(ctx);
++              if (err < 0)
++                      goto out;
+       }
+ out:
+       return err;
+@@ -878,7 +874,7 @@ static int nf_tables_deltable(struct net
+       int family = nfmsg->nfgen_family;
+       struct nft_ctx ctx;
+-      nft_ctx_init(&ctx, net, skb, nlh, NULL, NULL, NULL, nla);
++      nft_ctx_init(&ctx, net, skb, nlh, 0, NULL, NULL, nla);
+       if (family == AF_UNSPEC || nla[NFTA_TABLE_NAME] == NULL)
+               return nft_flush(&ctx, family);
+@@ -886,7 +882,8 @@ static int nf_tables_deltable(struct net
+       if (IS_ERR(afi))
+               return PTR_ERR(afi);
+-      table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], genmask);
++      table = nf_tables_table_lookup(net, nla[NFTA_TABLE_NAME], afi->family,
++                                     genmask);
+       if (IS_ERR(table))
+               return PTR_ERR(table);
+@@ -894,7 +891,7 @@ static int nf_tables_deltable(struct net
+           table->use > 0)
+               return -EBUSY;
+-      ctx.afi = afi;
++      ctx.family = afi->family;
+       ctx.table = table;
+       return nft_flush_table(&ctx);
+@@ -906,7 +903,7 @@ static void nf_tables_table_destroy(stru
+       kfree(ctx->table->name);
+       kfree(ctx->table);
+-      module_put(ctx->afi->owner);
++      module_put(ctx->table->afi->owner);
+ }
+ int nft_register_chain_type(const struct nf_chain_type *ctype)
+@@ -1107,7 +1104,7 @@ static void nf_tables_chain_notify(const
+               goto err;
+       err = nf_tables_fill_chain_info(skb, ctx->net, ctx->portid, ctx->seq,
+-                                      event, 0, ctx->afi->family, ctx->table,
++                                      event, 0, ctx->family, ctx->table,
+                                       ctx->chain);
+       if (err < 0) {
+               kfree_skb(skb);
+@@ -1125,7 +1122,6 @@ static int nf_tables_dump_chains(struct
+                                struct netlink_callback *cb)
+ {
+       const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+-      const struct nft_af_info *afi;
+       const struct nft_table *table;
+       const struct nft_chain *chain;
+       unsigned int idx = 0, s_idx = cb->args[0];
+@@ -1135,31 +1131,30 @@ static int nf_tables_dump_chains(struct
+       rcu_read_lock();
+       cb->seq = net->nft.base_seq;
+-      list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
+-              if (family != NFPROTO_UNSPEC && family != afi->family)
++      list_for_each_entry_rcu(table, &net->nft.tables, list) {
++              if (family != NFPROTO_UNSPEC && family != table->afi->family)
+                       continue;
+-              list_for_each_entry_rcu(table, &afi->tables, list) {
+-                      list_for_each_entry_rcu(chain, &table->chains, list) {
+-                              if (idx < s_idx)
+-                                      goto cont;
+-                              if (idx > s_idx)
+-                                      memset(&cb->args[1], 0,
+-                                             sizeof(cb->args) - sizeof(cb->args[0]));
+-                              if (!nft_is_active(net, chain))
+-                                      continue;
+-                              if (nf_tables_fill_chain_info(skb, net,
+-                                                            NETLINK_CB(cb->skb).portid,
+-                                                            cb->nlh->nlmsg_seq,
+-                                                            NFT_MSG_NEWCHAIN,
+-                                                            NLM_F_MULTI,
+-                                                            afi->family, table, chain) < 0)
+-                                      goto done;
++              list_for_each_entry_rcu(chain, &table->chains, list) {
++                      if (idx < s_idx)
++                              goto cont;
++                      if (idx > s_idx)
++                              memset(&cb->args[1], 0,
++                                     sizeof(cb->args) - sizeof(cb->args[0]));
++                      if (!nft_is_active(net, chain))
++                              continue;
++                      if (nf_tables_fill_chain_info(skb, net,
++                                                    NETLINK_CB(cb->skb).portid,
++                                                    cb->nlh->nlmsg_seq,
++                                                    NFT_MSG_NEWCHAIN,
++                                                    NLM_F_MULTI,
++                                                    table->afi->family, table,
++                                                    chain) < 0)
++                              goto done;
+-                              nl_dump_check_consistent(cb, nlmsg_hdr(skb));
++                      nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+ cont:
+-                              idx++;
+-                      }
++                      idx++;
+               }
+       }
+ done:
+@@ -1193,7 +1188,8 @@ static int nf_tables_getchain(struct net
+       if (IS_ERR(afi))
+               return PTR_ERR(afi);
+-      table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask);
++      table = nf_tables_table_lookup(net, nla[NFTA_CHAIN_TABLE], afi->family,
++                                     genmask);
+       if (IS_ERR(table))
+               return PTR_ERR(table);
+@@ -1301,8 +1297,8 @@ struct nft_chain_hook {
+ static int nft_chain_parse_hook(struct net *net,
+                               const struct nlattr * const nla[],
+-                              struct nft_af_info *afi,
+-                              struct nft_chain_hook *hook, bool create)
++                              struct nft_chain_hook *hook, u8 family,
++                              bool create)
+ {
+       struct nlattr *ha[NFTA_HOOK_MAX + 1];
+       const struct nf_chain_type *type;
+@@ -1321,10 +1317,10 @@ static int nft_chain_parse_hook(struct n
+       hook->num = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM]));
+       hook->priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY]));
+-      type = chain_type[afi->family][NFT_CHAIN_T_DEFAULT];
++      type = chain_type[family][NFT_CHAIN_T_DEFAULT];
+       if (nla[NFTA_CHAIN_TYPE]) {
+               type = nf_tables_chain_type_lookup(nla[NFTA_CHAIN_TYPE],
+-                                                 afi->family, create);
++                                                 family, create);
+               if (IS_ERR(type))
+                       return PTR_ERR(type);
+       }
+@@ -1336,7 +1332,7 @@ static int nft_chain_parse_hook(struct n
+       hook->type = type;
+       hook->dev = NULL;
+-      if (afi->family == NFPROTO_NETDEV) {
++      if (family == NFPROTO_NETDEV) {
+               char ifname[IFNAMSIZ];
+               if (!ha[NFTA_HOOK_DEV]) {
+@@ -1371,7 +1367,6 @@ static int nf_tables_addchain(struct nft
+ {
+       const struct nlattr * const *nla = ctx->nla;
+       struct nft_table *table = ctx->table;
+-      struct nft_af_info *afi = ctx->afi;
+       struct nft_base_chain *basechain;
+       struct nft_stats __percpu *stats;
+       struct net *net = ctx->net;
+@@ -1385,7 +1380,7 @@ static int nf_tables_addchain(struct nft
+               struct nft_chain_hook hook;
+               struct nf_hook_ops *ops;
+-              err = nft_chain_parse_hook(net, nla, afi, &hook, create);
++              err = nft_chain_parse_hook(net, nla, &hook, family, create);
+               if (err < 0)
+                       return err;
+@@ -1478,7 +1473,7 @@ static int nf_tables_updchain(struct nft
+               if (!nft_is_base_chain(chain))
+                       return -EBUSY;
+-              err = nft_chain_parse_hook(ctx->net, nla, ctx->afi, &hook,
++              err = nft_chain_parse_hook(ctx->net, nla, &hook, ctx->family,
+                                          create);
+               if (err < 0)
+                       return err;
+@@ -1571,7 +1566,8 @@ static int nf_tables_newchain(struct net
+       if (IS_ERR(afi))
+               return PTR_ERR(afi);
+-      table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask);
++      table = nf_tables_table_lookup(net, nla[NFTA_CHAIN_TABLE], afi->family,
++                                     genmask);
+       if (IS_ERR(table))
+               return PTR_ERR(table);
+@@ -1611,7 +1607,7 @@ static int nf_tables_newchain(struct net
+               }
+       }
+-      nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
++      nft_ctx_init(&ctx, net, skb, nlh, afi->family, table, chain, nla);
+       if (chain != NULL) {
+               if (nlh->nlmsg_flags & NLM_F_EXCL)
+@@ -1645,7 +1641,8 @@ static int nf_tables_delchain(struct net
+       if (IS_ERR(afi))
+               return PTR_ERR(afi);
+-      table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask);
++      table = nf_tables_table_lookup(net, nla[NFTA_CHAIN_TABLE], afi->family,
++                                     genmask);
+       if (IS_ERR(table))
+               return PTR_ERR(table);
+@@ -1657,7 +1654,7 @@ static int nf_tables_delchain(struct net
+           chain->use > 0)
+               return -EBUSY;
+-      nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
++      nft_ctx_init(&ctx, net, skb, nlh, afi->family, table, chain, nla);
+       use = chain->use;
+       list_for_each_entry(rule, &chain->rules, list) {
+@@ -1822,7 +1819,7 @@ static int nf_tables_expr_parse(const st
+       if (err < 0)
+               return err;
+-      type = nft_expr_type_get(ctx->afi->family, tb[NFTA_EXPR_NAME]);
++      type = nft_expr_type_get(ctx->family, tb[NFTA_EXPR_NAME]);
+       if (IS_ERR(type))
+               return PTR_ERR(type);
+@@ -2045,7 +2042,7 @@ static void nf_tables_rule_notify(const
+               goto err;
+       err = nf_tables_fill_rule_info(skb, ctx->net, ctx->portid, ctx->seq,
+-                                     event, 0, ctx->afi->family, ctx->table,
++                                     event, 0, ctx->family, ctx->table,
+                                      ctx->chain, rule);
+       if (err < 0) {
+               kfree_skb(skb);
+@@ -2069,7 +2066,6 @@ static int nf_tables_dump_rules(struct s
+ {
+       const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+       const struct nft_rule_dump_ctx *ctx = cb->data;
+-      const struct nft_af_info *afi;
+       const struct nft_table *table;
+       const struct nft_chain *chain;
+       const struct nft_rule *rule;
+@@ -2080,39 +2076,37 @@ static int nf_tables_dump_rules(struct s
+       rcu_read_lock();
+       cb->seq = net->nft.base_seq;
+-      list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
+-              if (family != NFPROTO_UNSPEC && family != afi->family)
++      list_for_each_entry_rcu(table, &net->nft.tables, list) {
++              if (family != NFPROTO_UNSPEC && family != table->afi->family)
+                       continue;
+-              list_for_each_entry_rcu(table, &afi->tables, list) {
+-                      if (ctx && ctx->table &&
+-                          strcmp(ctx->table, table->name) != 0)
+-                              continue;
++              if (ctx && ctx->table && strcmp(ctx->table, table->name) != 0)
++                      continue;
+-                      list_for_each_entry_rcu(chain, &table->chains, list) {
+-                              if (ctx && ctx->chain &&
+-                                  strcmp(ctx->chain, chain->name) != 0)
+-                                      continue;
++              list_for_each_entry_rcu(chain, &table->chains, list) {
++                      if (ctx && ctx->chain &&
++                          strcmp(ctx->chain, chain->name) != 0)
++                              continue;
+-                              list_for_each_entry_rcu(rule, &chain->rules, list) {
+-                                      if (!nft_is_active(net, rule))
+-                                              goto cont;
+-                                      if (idx < s_idx)
+-                                              goto cont;
+-                                      if (idx > s_idx)
+-                                              memset(&cb->args[1], 0,
+-                                                     sizeof(cb->args) - sizeof(cb->args[0]));
+-                                      if (nf_tables_fill_rule_info(skb, net, NETLINK_CB(cb->skb).portid,
+-                                                                    cb->nlh->nlmsg_seq,
+-                                                                    NFT_MSG_NEWRULE,
+-                                                                    NLM_F_MULTI | NLM_F_APPEND,
+-                                                                    afi->family, table, chain, rule) < 0)
+-                                              goto done;
++                      list_for_each_entry_rcu(rule, &chain->rules, list) {
++                              if (!nft_is_active(net, rule))
++                                      goto cont;
++                              if (idx < s_idx)
++                                      goto cont;
++                              if (idx > s_idx)
++                                      memset(&cb->args[1], 0,
++                                             sizeof(cb->args) - sizeof(cb->args[0]));
++                              if (nf_tables_fill_rule_info(skb, net, NETLINK_CB(cb->skb).portid,
++                                                            cb->nlh->nlmsg_seq,
++                                                            NFT_MSG_NEWRULE,
++                                                            NLM_F_MULTI | NLM_F_APPEND,
++                                                            table->afi->family,
++                                                            table, chain, rule) < 0)
++                                      goto done;
+-                                      nl_dump_check_consistent(cb, nlmsg_hdr(skb));
++                              nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+ cont:
+-                                      idx++;
+-                              }
++                              idx++;
+                       }
+               }
+       }
+@@ -2190,7 +2184,8 @@ static int nf_tables_getrule(struct net
+       if (IS_ERR(afi))
+               return PTR_ERR(afi);
+-      table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], genmask);
++      table = nf_tables_table_lookup(net, nla[NFTA_RULE_TABLE], afi->family,
++                                     genmask);
+       if (IS_ERR(table))
+               return PTR_ERR(table);
+@@ -2267,7 +2262,8 @@ static int nf_tables_newrule(struct net
+       if (IS_ERR(afi))
+               return PTR_ERR(afi);
+-      table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], genmask);
++      table = nf_tables_table_lookup(net, nla[NFTA_RULE_TABLE], afi->family,
++                                     genmask);
+       if (IS_ERR(table))
+               return PTR_ERR(table);
+@@ -2306,7 +2302,7 @@ static int nf_tables_newrule(struct net
+                       return PTR_ERR(old_rule);
+       }
+-      nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
++      nft_ctx_init(&ctx, net, skb, nlh, afi->family, table, chain, nla);
+       n = 0;
+       size = 0;
+@@ -2441,7 +2437,8 @@ static int nf_tables_delrule(struct net
+       if (IS_ERR(afi))
+               return PTR_ERR(afi);
+-      table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], genmask);
++      table = nf_tables_table_lookup(net, nla[NFTA_RULE_TABLE], afi->family,
++                                     genmask);
+       if (IS_ERR(table))
+               return PTR_ERR(table);
+@@ -2452,7 +2449,7 @@ static int nf_tables_delrule(struct net
+                       return PTR_ERR(chain);
+       }
+-      nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
++      nft_ctx_init(&ctx, net, skb, nlh, afi->family, table, chain, nla);
+       if (chain) {
+               if (nla[NFTA_RULE_HANDLE]) {
+@@ -2650,13 +2647,13 @@ static int nft_ctx_init_from_setattr(str
+               if (afi == NULL)
+                       return -EAFNOSUPPORT;
+-              table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE],
+-                                             genmask);
++              table = nf_tables_table_lookup(net, nla[NFTA_SET_TABLE],
++                                             afi->family, genmask);
+               if (IS_ERR(table))
+                       return PTR_ERR(table);
+       }
+-      nft_ctx_init(ctx, net, skb, nlh, afi, table, NULL, nla);
++      nft_ctx_init(ctx, net, skb, nlh, afi->family, table, NULL, nla);
+       return 0;
+ }
+@@ -2783,7 +2780,7 @@ static int nf_tables_fill_set(struct sk_
+               goto nla_put_failure;
+       nfmsg = nlmsg_data(nlh);
+-      nfmsg->nfgen_family     = ctx->afi->family;
++      nfmsg->nfgen_family     = ctx->family;
+       nfmsg->version          = NFNETLINK_V0;
+       nfmsg->res_id           = htons(ctx->net->nft.base_seq & 0xffff);
+@@ -2875,10 +2872,8 @@ static int nf_tables_dump_sets(struct sk
+ {
+       const struct nft_set *set;
+       unsigned int idx, s_idx = cb->args[0];
+-      struct nft_af_info *afi;
+       struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2];
+       struct net *net = sock_net(skb->sk);
+-      int cur_family = cb->args[3];
+       struct nft_ctx *ctx = cb->data, ctx_set;
+       if (cb->args[1])
+@@ -2887,51 +2882,44 @@ static int nf_tables_dump_sets(struct sk
+       rcu_read_lock();
+       cb->seq = net->nft.base_seq;
+-      list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
+-              if (ctx->afi && ctx->afi != afi)
++      list_for_each_entry_rcu(table, &net->nft.tables, list) {
++              if (ctx->family != NFPROTO_UNSPEC &&
++                  ctx->family != table->afi->family)
+                       continue;
+-              if (cur_family) {
+-                      if (afi->family != cur_family)
++              if (ctx->table && ctx->table != table)
++                      continue;
++
++              if (cur_table) {
++                      if (cur_table != table)
+                               continue;
+-                      cur_family = 0;
++                      cur_table = NULL;
+               }
+-              list_for_each_entry_rcu(table, &afi->tables, list) {
+-                      if (ctx->table && ctx->table != table)
+-                              continue;
++              idx = 0;
++              list_for_each_entry_rcu(set, &table->sets, list) {
++                      if (idx < s_idx)
++                              goto cont;
++                      if (!nft_is_active(net, set))
++                              goto cont;
+-                      if (cur_table) {
+-                              if (cur_table != table)
+-                                      continue;
++                      ctx_set = *ctx;
++                      ctx_set.table = table;
++                      ctx_set.family = table->afi->family;
+-                              cur_table = NULL;
++                      if (nf_tables_fill_set(skb, &ctx_set, set,
++                                             NFT_MSG_NEWSET,
++                                             NLM_F_MULTI) < 0) {
++                              cb->args[0] = idx;
++                              cb->args[2] = (unsigned long) table;
++                              goto done;
+                       }
+-                      idx = 0;
+-                      list_for_each_entry_rcu(set, &table->sets, list) {
+-                              if (idx < s_idx)
+-                                      goto cont;
+-                              if (!nft_is_active(net, set))
+-                                      goto cont;
+-
+-                              ctx_set = *ctx;
+-                              ctx_set.table = table;
+-                              ctx_set.afi = afi;
+-                              if (nf_tables_fill_set(skb, &ctx_set, set,
+-                                                     NFT_MSG_NEWSET,
+-                                                     NLM_F_MULTI) < 0) {
+-                                      cb->args[0] = idx;
+-                                      cb->args[2] = (unsigned long) table;
+-                                      cb->args[3] = afi->family;
+-                                      goto done;
+-                              }
+-                              nl_dump_check_consistent(cb, nlmsg_hdr(skb));
++                      nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+ cont:
+-                              idx++;
+-                      }
+-                      if (s_idx)
+-                              s_idx = 0;
++                      idx++;
+               }
++              if (s_idx)
++                      s_idx = 0;
+       }
+       cb->args[1] = 1;
+ done:
+@@ -3141,11 +3129,12 @@ static int nf_tables_newset(struct net *
+       if (IS_ERR(afi))
+               return PTR_ERR(afi);
+-      table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE], genmask);
++      table = nf_tables_table_lookup(net, nla[NFTA_SET_TABLE], afi->family,
++                                     genmask);
+       if (IS_ERR(table))
+               return PTR_ERR(table);
+-      nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
++      nft_ctx_init(&ctx, net, skb, nlh, afi->family, table, NULL, nla);
+       set = nf_tables_set_lookup(table, nla[NFTA_SET_NAME], genmask);
+       if (IS_ERR(set)) {
+@@ -3410,12 +3399,12 @@ static int nft_ctx_init_from_elemattr(st
+       if (IS_ERR(afi))
+               return PTR_ERR(afi);
+-      table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE],
+-                                     genmask);
++      table = nf_tables_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE],
++                                     afi->family, genmask);
+       if (IS_ERR(table))
+               return PTR_ERR(table);
+-      nft_ctx_init(ctx, net, skb, nlh, afi, table, NULL, nla);
++      nft_ctx_init(ctx, net, skb, nlh, afi->family, table, NULL, nla);
+       return 0;
+ }
+@@ -3520,7 +3509,6 @@ static int nf_tables_dump_set(struct sk_
+ {
+       struct nft_set_dump_ctx *dump_ctx = cb->data;
+       struct net *net = sock_net(skb->sk);
+-      struct nft_af_info *afi;
+       struct nft_table *table;
+       struct nft_set *set;
+       struct nft_set_dump_args args;
+@@ -3532,21 +3520,19 @@ static int nf_tables_dump_set(struct sk_
+       int event;
+       rcu_read_lock();
+-      list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
+-              if (afi != dump_ctx->ctx.afi)
++      list_for_each_entry_rcu(table, &net->nft.tables, list) {
++              if (dump_ctx->ctx.family != NFPROTO_UNSPEC &&
++                  dump_ctx->ctx.family != table->afi->family)
+                       continue;
+-              list_for_each_entry_rcu(table, &afi->tables, list) {
+-                      if (table != dump_ctx->ctx.table)
+-                              continue;
++              if (table != dump_ctx->ctx.table)
++                      continue;
+-                      list_for_each_entry_rcu(set, &table->sets, list) {
+-                              if (set == dump_ctx->set) {
+-                                      set_found = true;
+-                                      break;
+-                              }
++              list_for_each_entry_rcu(set, &table->sets, list) {
++                      if (set == dump_ctx->set) {
++                              set_found = true;
++                              break;
+                       }
+-                      break;
+               }
+               break;
+       }
+@@ -3566,7 +3552,7 @@ static int nf_tables_dump_set(struct sk_
+               goto nla_put_failure;
+       nfmsg = nlmsg_data(nlh);
+-      nfmsg->nfgen_family = afi->family;
++      nfmsg->nfgen_family = table->afi->family;
+       nfmsg->version      = NFNETLINK_V0;
+       nfmsg->res_id       = htons(net->nft.base_seq & 0xffff);
+@@ -3668,7 +3654,7 @@ static int nf_tables_fill_setelem_info(s
+               goto nla_put_failure;
+       nfmsg = nlmsg_data(nlh);
+-      nfmsg->nfgen_family     = ctx->afi->family;
++      nfmsg->nfgen_family     = ctx->family;
+       nfmsg->version          = NFNETLINK_V0;
+       nfmsg->res_id           = htons(ctx->net->nft.base_seq & 0xffff);
+@@ -3912,7 +3898,7 @@ static int nft_add_set_elem(struct nft_c
+               list_for_each_entry(binding, &set->bindings, list) {
+                       struct nft_ctx bind_ctx = {
+                               .net    = ctx->net,
+-                              .afi    = ctx->afi,
++                              .family = ctx->family,
+                               .table  = ctx->table,
+                               .chain  = (struct nft_chain *)binding->chain,
+                       };
+@@ -4459,7 +4445,8 @@ static int nf_tables_newobj(struct net *
+       if (IS_ERR(afi))
+               return PTR_ERR(afi);
+-      table = nf_tables_table_lookup(afi, nla[NFTA_OBJ_TABLE], genmask);
++      table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], afi->family,
++                                     genmask);
+       if (IS_ERR(table))
+               return PTR_ERR(table);
+@@ -4477,7 +4464,7 @@ static int nf_tables_newobj(struct net *
+               return 0;
+       }
+-      nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
++      nft_ctx_init(&ctx, net, skb, nlh, afi->family, table, NULL, nla);
+       type = nft_obj_type_get(objtype);
+       if (IS_ERR(type))
+@@ -4554,7 +4541,6 @@ struct nft_obj_filter {
+ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
+ {
+       const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+-      const struct nft_af_info *afi;
+       const struct nft_table *table;
+       unsigned int idx = 0, s_idx = cb->args[0];
+       struct nft_obj_filter *filter = cb->data;
+@@ -4569,38 +4555,37 @@ static int nf_tables_dump_obj(struct sk_
+       rcu_read_lock();
+       cb->seq = net->nft.base_seq;
+-      list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
+-              if (family != NFPROTO_UNSPEC && family != afi->family)
++      list_for_each_entry_rcu(table, &net->nft.tables, list) {
++              if (family != NFPROTO_UNSPEC && family != table->afi->family)
+                       continue;
+-              list_for_each_entry_rcu(table, &afi->tables, list) {
+-                      list_for_each_entry_rcu(obj, &table->objects, list) {
+-                              if (!nft_is_active(net, obj))
+-                                      goto cont;
+-                              if (idx < s_idx)
+-                                      goto cont;
+-                              if (idx > s_idx)
+-                                      memset(&cb->args[1], 0,
+-                                             sizeof(cb->args) - sizeof(cb->args[0]));
+-                              if (filter && filter->table[0] &&
+-                                  strcmp(filter->table, table->name))
+-                                      goto cont;
+-                              if (filter &&
+-                                  filter->type != NFT_OBJECT_UNSPEC &&
+-                                  obj->ops->type->type != filter->type)
+-                                      goto cont;
++              list_for_each_entry_rcu(obj, &table->objects, list) {
++                      if (!nft_is_active(net, obj))
++                              goto cont;
++                      if (idx < s_idx)
++                              goto cont;
++                      if (idx > s_idx)
++                              memset(&cb->args[1], 0,
++                                     sizeof(cb->args) - sizeof(cb->args[0]));
++                      if (filter && filter->table[0] &&
++                          strcmp(filter->table, table->name))
++                              goto cont;
++                      if (filter &&
++                          filter->type != NFT_OBJECT_UNSPEC &&
++                          obj->ops->type->type != filter->type)
++                              goto cont;
+-                              if (nf_tables_fill_obj_info(skb, net, NETLINK_CB(cb->skb).portid,
+-                                                          cb->nlh->nlmsg_seq,
+-                                                          NFT_MSG_NEWOBJ,
+-                                                          NLM_F_MULTI | NLM_F_APPEND,
+-                                                          afi->family, table, obj, reset) < 0)
+-                                      goto done;
++                      if (nf_tables_fill_obj_info(skb, net, NETLINK_CB(cb->skb).portid,
++                                                  cb->nlh->nlmsg_seq,
++                                                  NFT_MSG_NEWOBJ,
++                                                  NLM_F_MULTI | NLM_F_APPEND,
++                                                  table->afi->family, table,
++                                                  obj, reset) < 0)
++                              goto done;
+-                              nl_dump_check_consistent(cb, nlmsg_hdr(skb));
++                      nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+ cont:
+-                              idx++;
+-                      }
++                      idx++;
+               }
+       }
+ done:
+@@ -4687,7 +4672,8 @@ static int nf_tables_getobj(struct net *
+       if (IS_ERR(afi))
+               return PTR_ERR(afi);
+-      table = nf_tables_table_lookup(afi, nla[NFTA_OBJ_TABLE], genmask);
++      table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], afi->family,
++                                     genmask);
+       if (IS_ERR(table))
+               return PTR_ERR(table);
+@@ -4747,7 +4733,8 @@ static int nf_tables_delobj(struct net *
+       if (IS_ERR(afi))
+               return PTR_ERR(afi);
+-      table = nf_tables_table_lookup(afi, nla[NFTA_OBJ_TABLE], genmask);
++      table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], afi->family,
++                                     genmask);
+       if (IS_ERR(table))
+               return PTR_ERR(table);
+@@ -4758,7 +4745,7 @@ static int nf_tables_delobj(struct net *
+       if (obj->use > 0)
+               return -EBUSY;
+-      nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
++      nft_ctx_init(&ctx, net, skb, nlh, afi->family, table, NULL, nla);
+       return nft_delobj(&ctx, obj);
+ }
+@@ -4796,7 +4783,7 @@ static void nf_tables_obj_notify(const s
+                                struct nft_object *obj, int event)
+ {
+       nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid, ctx->seq, event,
+-                     ctx->afi->family, ctx->report, GFP_KERNEL);
++                     ctx->family, ctx->report, GFP_KERNEL);
+ }
+ /*
+@@ -4986,7 +4973,7 @@ void nft_flow_table_iterate(struct net *
+       rcu_read_lock();
+       list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
+-              list_for_each_entry_rcu(table, &afi->tables, list) {
++              list_for_each_entry_rcu(table, &net->nft.tables, list) {
+                       list_for_each_entry_rcu(flowtable, &table->flowtables, list) {
+                               iter(&flowtable->data, data);
+                       }
+@@ -5034,7 +5021,8 @@ static int nf_tables_newflowtable(struct
+       if (IS_ERR(afi))
+               return PTR_ERR(afi);
+-      table = nf_tables_table_lookup(afi, nla[NFTA_FLOWTABLE_TABLE], genmask);
++      table = nf_tables_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE],
++                                     afi->family, genmask);
+       if (IS_ERR(table))
+               return PTR_ERR(table);
+@@ -5051,7 +5039,7 @@ static int nf_tables_newflowtable(struct
+               return 0;
+       }
+-      nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
++      nft_ctx_init(&ctx, net, skb, nlh, afi->family, table, NULL, nla);
+       flowtable = kzalloc(sizeof(*flowtable), GFP_KERNEL);
+       if (!flowtable)
+@@ -5132,7 +5120,8 @@ static int nf_tables_delflowtable(struct
+       if (IS_ERR(afi))
+               return PTR_ERR(afi);
+-      table = nf_tables_table_lookup(afi, nla[NFTA_FLOWTABLE_TABLE], genmask);
++      table = nf_tables_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE],
++                                     afi->family, genmask);
+       if (IS_ERR(table))
+               return PTR_ERR(table);
+@@ -5143,7 +5132,7 @@ static int nf_tables_delflowtable(struct
+       if (flowtable->use > 0)
+               return -EBUSY;
+-      nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
++      nft_ctx_init(&ctx, net, skb, nlh, afi->family, table, NULL, nla);
+       return nft_delflowtable(&ctx, flowtable);
+ }
+@@ -5212,40 +5201,37 @@ static int nf_tables_dump_flowtable(stru
+       struct net *net = sock_net(skb->sk);
+       int family = nfmsg->nfgen_family;
+       struct nft_flowtable *flowtable;
+-      const struct nft_af_info *afi;
+       const struct nft_table *table;
+       rcu_read_lock();
+       cb->seq = net->nft.base_seq;
+-      list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
+-              if (family != NFPROTO_UNSPEC && family != afi->family)
++      list_for_each_entry_rcu(table, &net->nft.tables, list) {
++              if (family != NFPROTO_UNSPEC && family != table->afi->family)
+                       continue;
+-              list_for_each_entry_rcu(table, &afi->tables, list) {
+-                      list_for_each_entry_rcu(flowtable, &table->flowtables, list) {
+-                              if (!nft_is_active(net, flowtable))
+-                                      goto cont;
+-                              if (idx < s_idx)
+-                                      goto cont;
+-                              if (idx > s_idx)
+-                                      memset(&cb->args[1], 0,
+-                                             sizeof(cb->args) - sizeof(cb->args[0]));
+-                              if (filter && filter->table[0] &&
+-                                  strcmp(filter->table, table->name))
+-                                      goto cont;
++              list_for_each_entry_rcu(flowtable, &table->flowtables, list) {
++                      if (!nft_is_active(net, flowtable))
++                              goto cont;
++                      if (idx < s_idx)
++                              goto cont;
++                      if (idx > s_idx)
++                              memset(&cb->args[1], 0,
++                                     sizeof(cb->args) - sizeof(cb->args[0]));
++                      if (filter && filter->table[0] &&
++                          strcmp(filter->table, table->name))
++                              goto cont;
+-                              if (nf_tables_fill_flowtable_info(skb, net, NETLINK_CB(cb->skb).portid,
+-                                                                cb->nlh->nlmsg_seq,
+-                                                                NFT_MSG_NEWFLOWTABLE,
+-                                                                NLM_F_MULTI | NLM_F_APPEND,
+-                                                                afi->family, flowtable) < 0)
+-                                      goto done;
++                      if (nf_tables_fill_flowtable_info(skb, net, NETLINK_CB(cb->skb).portid,
++                                                        cb->nlh->nlmsg_seq,
++                                                        NFT_MSG_NEWFLOWTABLE,
++                                                        NLM_F_MULTI | NLM_F_APPEND,
++                                                        table->afi->family, flowtable) < 0)
++                              goto done;
+-                              nl_dump_check_consistent(cb, nlmsg_hdr(skb));
++                      nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+ cont:
+-                              idx++;
+-                      }
++                      idx++;
+               }
+       }
+ done:
+@@ -5328,7 +5314,8 @@ static int nf_tables_getflowtable(struct
+       if (IS_ERR(afi))
+               return PTR_ERR(afi);
+-      table = nf_tables_table_lookup(afi, nla[NFTA_FLOWTABLE_TABLE], genmask);
++      table = nf_tables_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE],
++                                     afi->family, genmask);
+       if (IS_ERR(table))
+               return PTR_ERR(table);
+@@ -5371,7 +5358,7 @@ static void nf_tables_flowtable_notify(s
+       err = nf_tables_fill_flowtable_info(skb, ctx->net, ctx->portid,
+                                           ctx->seq, event, 0,
+-                                          ctx->afi->family, flowtable);
++                                          ctx->family, flowtable);
+       if (err < 0) {
+               kfree_skb(skb);
+               goto err;
+@@ -5449,17 +5436,14 @@ static int nf_tables_flowtable_event(str
+       struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+       struct nft_flowtable *flowtable;
+       struct nft_table *table;
+-      struct nft_af_info *afi;
+       if (event != NETDEV_UNREGISTER)
+               return 0;
+       nfnl_lock(NFNL_SUBSYS_NFTABLES);
+-      list_for_each_entry(afi, &dev_net(dev)->nft.af_info, list) {
+-              list_for_each_entry(table, &afi->tables, list) {
+-                      list_for_each_entry(flowtable, &table->flowtables, list) {
+-                              nft_flowtable_event(event, dev, flowtable);
+-                      }
++      list_for_each_entry(table, &dev_net(dev)->nft.tables, list) {
++              list_for_each_entry(flowtable, &table->flowtables, list) {
++                      nft_flowtable_event(event, dev, flowtable);
+               }
+       }
+       nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+@@ -6478,6 +6462,7 @@ EXPORT_SYMBOL_GPL(nft_data_dump);
+ static int __net_init nf_tables_init_net(struct net *net)
+ {
+       INIT_LIST_HEAD(&net->nft.af_info);
++      INIT_LIST_HEAD(&net->nft.tables);
+       INIT_LIST_HEAD(&net->nft.commit_list);
+       net->nft.base_seq = 1;
+       return 0;
+@@ -6514,10 +6499,10 @@ static void __nft_release_afinfo(struct
+       struct nft_set *set, *ns;
+       struct nft_ctx ctx = {
+               .net    = net,
+-              .afi    = afi,
++              .family = afi->family,
+       };
+-      list_for_each_entry_safe(table, nt, &afi->tables, list) {
++      list_for_each_entry_safe(table, nt, &net->nft.tables, list) {
+               list_for_each_entry(chain, &table->chains, list)
+                       nf_tables_unregister_hook(net, table, chain);
+               list_for_each_entry(flowtable, &table->flowtables, list)
+--- a/net/netfilter/nf_tables_netdev.c
++++ b/net/netfilter/nf_tables_netdev.c
+@@ -107,7 +107,6 @@ static int nf_tables_netdev_event(struct
+                                 unsigned long event, void *ptr)
+ {
+       struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+-      struct nft_af_info *afi;
+       struct nft_table *table;
+       struct nft_chain *chain, *nr;
+       struct nft_ctx ctx = {
+@@ -119,20 +118,18 @@ static int nf_tables_netdev_event(struct
+               return NOTIFY_DONE;
+       nfnl_lock(NFNL_SUBSYS_NFTABLES);
+-      list_for_each_entry(afi, &dev_net(dev)->nft.af_info, list) {
+-              ctx.afi = afi;
+-              if (afi->family != NFPROTO_NETDEV)
++      list_for_each_entry(table, &ctx.net->nft.tables, list) {
++              if (table->afi->family != NFPROTO_NETDEV)
+                       continue;
+-              list_for_each_entry(table, &afi->tables, list) {
+-                      ctx.table = table;
+-                      list_for_each_entry_safe(chain, nr, &table->chains, list) {
+-                              if (!nft_is_base_chain(chain))
+-                                      continue;
++              ctx.family = table->afi->family;
++              ctx.table = table;
++              list_for_each_entry_safe(chain, nr, &table->chains, list) {
++                      if (!nft_is_base_chain(chain))
++                              continue;
+-                              ctx.chain = chain;
+-                              nft_netdev_event(event, dev, &ctx);
+-                      }
++                      ctx.chain = chain;
++                      nft_netdev_event(event, dev, &ctx);
+               }
+       }
+       nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+--- a/net/netfilter/nft_compat.c
++++ b/net/netfilter/nft_compat.c
+@@ -144,7 +144,7 @@ nft_target_set_tgchk_param(struct xt_tgc
+ {
+       par->net        = ctx->net;
+       par->table      = ctx->table->name;
+-      switch (ctx->afi->family) {
++      switch (ctx->family) {
+       case AF_INET:
+               entry->e4.ip.proto = proto;
+               entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0;
+@@ -175,7 +175,7 @@ nft_target_set_tgchk_param(struct xt_tgc
+       } else {
+               par->hook_mask = 0;
+       }
+-      par->family     = ctx->afi->family;
++      par->family     = ctx->family;
+       par->nft_compat = true;
+ }
+@@ -267,7 +267,7 @@ nft_target_destroy(const struct nft_ctx
+       par.net = ctx->net;
+       par.target = target;
+       par.targinfo = info;
+-      par.family = ctx->afi->family;
++      par.family = ctx->family;
+       if (par.target->destroy != NULL)
+               par.target->destroy(&par);
+@@ -358,7 +358,7 @@ nft_match_set_mtchk_param(struct xt_mtch
+ {
+       par->net        = ctx->net;
+       par->table      = ctx->table->name;
+-      switch (ctx->afi->family) {
++      switch (ctx->family) {
+       case AF_INET:
+               entry->e4.ip.proto = proto;
+               entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0;
+@@ -389,7 +389,7 @@ nft_match_set_mtchk_param(struct xt_mtch
+       } else {
+               par->hook_mask = 0;
+       }
+-      par->family     = ctx->afi->family;
++      par->family     = ctx->family;
+       par->nft_compat = true;
+ }
+@@ -446,7 +446,7 @@ nft_match_destroy(const struct nft_ctx *
+       par.net = ctx->net;
+       par.match = match;
+       par.matchinfo = info;
+-      par.family = ctx->afi->family;
++      par.family = ctx->family;
+       if (par.match->destroy != NULL)
+               par.match->destroy(&par);
+@@ -648,7 +648,7 @@ nft_match_select_ops(const struct nft_ct
+       mt_name = nla_data(tb[NFTA_MATCH_NAME]);
+       rev = ntohl(nla_get_be32(tb[NFTA_MATCH_REV]));
+-      family = ctx->afi->family;
++      family = ctx->family;
+       /* Re-use the existing match if it's already loaded. */
+       list_for_each_entry(nft_match, &nft_match_list, head) {
+@@ -733,7 +733,7 @@ nft_target_select_ops(const struct nft_c
+       tg_name = nla_data(tb[NFTA_TARGET_NAME]);
+       rev = ntohl(nla_get_be32(tb[NFTA_TARGET_REV]));
+-      family = ctx->afi->family;
++      family = ctx->family;
+       /* Re-use the existing target if it's already loaded. */
+       list_for_each_entry(nft_target, &nft_target_list, head) {
+--- a/net/netfilter/nft_ct.c
++++ b/net/netfilter/nft_ct.c
+@@ -405,7 +405,7 @@ static int nft_ct_get_init(const struct
+               if (tb[NFTA_CT_DIRECTION] == NULL)
+                       return -EINVAL;
+-              switch (ctx->afi->family) {
++              switch (ctx->family) {
+               case NFPROTO_IPV4:
+                       len = FIELD_SIZEOF(struct nf_conntrack_tuple,
+                                          src.u3.ip);
+@@ -456,7 +456,7 @@ static int nft_ct_get_init(const struct
+       if (err < 0)
+               return err;
+-      err = nf_ct_netns_get(ctx->net, ctx->afi->family);
++      err = nf_ct_netns_get(ctx->net, ctx->family);
+       if (err < 0)
+               return err;
+@@ -550,7 +550,7 @@ static int nft_ct_set_init(const struct
+       if (err < 0)
+               goto err1;
+-      err = nf_ct_netns_get(ctx->net, ctx->afi->family);
++      err = nf_ct_netns_get(ctx->net, ctx->family);
+       if (err < 0)
+               goto err1;
+@@ -564,7 +564,7 @@ err1:
+ static void nft_ct_get_destroy(const struct nft_ctx *ctx,
+                              const struct nft_expr *expr)
+ {
+-      nf_ct_netns_put(ctx->net, ctx->afi->family);
++      nf_ct_netns_put(ctx->net, ctx->family);
+ }
+ static void nft_ct_set_destroy(const struct nft_ctx *ctx,
+@@ -573,7 +573,7 @@ static void nft_ct_set_destroy(const str
+       struct nft_ct *priv = nft_expr_priv(expr);
+       __nft_ct_set_destroy(ctx, priv);
+-      nf_ct_netns_put(ctx->net, ctx->afi->family);
++      nf_ct_netns_put(ctx->net, ctx->family);
+ }
+ static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr)
+@@ -734,7 +734,7 @@ static int nft_ct_helper_obj_init(const
+       struct nft_ct_helper_obj *priv = nft_obj_data(obj);
+       struct nf_conntrack_helper *help4, *help6;
+       char name[NF_CT_HELPER_NAME_LEN];
+-      int family = ctx->afi->family;
++      int family = ctx->family;
+       if (!tb[NFTA_CT_HELPER_NAME] || !tb[NFTA_CT_HELPER_L4PROTO])
+               return -EINVAL;
+@@ -753,14 +753,14 @@ static int nft_ct_helper_obj_init(const
+       switch (family) {
+       case NFPROTO_IPV4:
+-              if (ctx->afi->family == NFPROTO_IPV6)
++              if (ctx->family == NFPROTO_IPV6)
+                       return -EINVAL;
+               help4 = nf_conntrack_helper_try_module_get(name, family,
+                                                          priv->l4proto);
+               break;
+       case NFPROTO_IPV6:
+-              if (ctx->afi->family == NFPROTO_IPV4)
++              if (ctx->family == NFPROTO_IPV4)
+                       return -EINVAL;
+               help6 = nf_conntrack_helper_try_module_get(name, family,
+--- a/net/netfilter/nft_flow_offload.c
++++ b/net/netfilter/nft_flow_offload.c
+@@ -151,7 +151,7 @@ static int nft_flow_offload_init(const s
+       priv->flowtable = flowtable;
+       flowtable->use++;
+-      return nf_ct_netns_get(ctx->net, ctx->afi->family);
++      return nf_ct_netns_get(ctx->net, ctx->family);
+ }
+ static void nft_flow_offload_destroy(const struct nft_ctx *ctx,
+@@ -160,7 +160,7 @@ static void nft_flow_offload_destroy(con
+       struct nft_flow_offload *priv = nft_expr_priv(expr);
+       priv->flowtable->use--;
+-      nf_ct_netns_put(ctx->net, ctx->afi->family);
++      nf_ct_netns_put(ctx->net, ctx->family);
+ }
+ static int nft_flow_offload_dump(struct sk_buff *skb, const struct nft_expr *expr)
+--- a/net/netfilter/nft_log.c
++++ b/net/netfilter/nft_log.c
+@@ -112,7 +112,7 @@ static int nft_log_init(const struct nft
+               break;
+       }
+-      err = nf_logger_find_get(ctx->afi->family, li->type);
++      err = nf_logger_find_get(ctx->family, li->type);
+       if (err < 0)
+               goto err1;
+@@ -133,7 +133,7 @@ static void nft_log_destroy(const struct
+       if (priv->prefix != nft_log_null_prefix)
+               kfree(priv->prefix);
+-      nf_logger_put(ctx->afi->family, li->type);
++      nf_logger_put(ctx->family, li->type);
+ }
+ static int nft_log_dump(struct sk_buff *skb, const struct nft_expr *expr)
+--- a/net/netfilter/nft_masq.c
++++ b/net/netfilter/nft_masq.c
+@@ -73,7 +73,7 @@ int nft_masq_init(const struct nft_ctx *
+               }
+       }
+-      return nf_ct_netns_get(ctx->net, ctx->afi->family);
++      return nf_ct_netns_get(ctx->net, ctx->family);
+ }
+ EXPORT_SYMBOL_GPL(nft_masq_init);
+--- a/net/netfilter/nft_meta.c
++++ b/net/netfilter/nft_meta.c
+@@ -339,7 +339,7 @@ static int nft_meta_get_validate(const s
+       if (priv->key != NFT_META_SECPATH)
+               return 0;
+-      switch (ctx->afi->family) {
++      switch (ctx->family) {
+       case NFPROTO_NETDEV:
+               hooks = 1 << NF_NETDEV_INGRESS;
+               break;
+@@ -370,7 +370,7 @@ int nft_meta_set_validate(const struct n
+       if (priv->key != NFT_META_PKTTYPE)
+               return 0;
+-      switch (ctx->afi->family) {
++      switch (ctx->family) {
+       case NFPROTO_BRIDGE:
+               hooks = 1 << NF_BR_PRE_ROUTING;
+               break;
+--- a/net/netfilter/nft_nat.c
++++ b/net/netfilter/nft_nat.c
+@@ -142,7 +142,7 @@ static int nft_nat_init(const struct nft
+               return -EINVAL;
+       family = ntohl(nla_get_be32(tb[NFTA_NAT_FAMILY]));
+-      if (family != ctx->afi->family)
++      if (family != ctx->family)
+               return -EOPNOTSUPP;
+       switch (family) {
+--- a/net/netfilter/nft_redir.c
++++ b/net/netfilter/nft_redir.c
+@@ -75,7 +75,7 @@ int nft_redir_init(const struct nft_ctx
+                       return -EINVAL;
+       }
+-      return nf_ct_netns_get(ctx->net, ctx->afi->family);
++      return nf_ct_netns_get(ctx->net, ctx->family);
+ }
+ EXPORT_SYMBOL_GPL(nft_redir_init);
diff --git a/target/linux/generic/backport-4.14/336-netfilter-exit_net-cleanup-check-added.patch b/target/linux/generic/backport-4.14/336-netfilter-exit_net-cleanup-check-added.patch
new file mode 100644 (file)
index 0000000..5bfa5aa
--- /dev/null
@@ -0,0 +1,100 @@
+From: Vasily Averin <vvs@virtuozzo.com>
+Date: Sun, 12 Nov 2017 14:32:37 +0300
+Subject: [PATCH] netfilter: exit_net cleanup check added
+
+Be sure that lists initialized in net_init hook was return to initial
+state.
+
+Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+
+--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
++++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
+@@ -819,6 +819,7 @@ static void clusterip_net_exit(struct ne
+       cn->procdir = NULL;
+ #endif
+       nf_unregister_net_hook(net, &cip_arp_ops);
++      WARN_ON_ONCE(!list_empty(&cn->configs));
+ }
+ static struct pernet_operations clusterip_net_ops = {
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -6468,6 +6468,12 @@ static int __net_init nf_tables_init_net
+       return 0;
+ }
++static void __net_exit nf_tables_exit_net(struct net *net)
++{
++      WARN_ON_ONCE(!list_empty(&net->nft.af_info));
++      WARN_ON_ONCE(!list_empty(&net->nft.commit_list));
++}
++
+ int __nft_release_basechain(struct nft_ctx *ctx)
+ {
+       struct nft_rule *rule, *nr;
+@@ -6545,6 +6551,7 @@ static void __nft_release_afinfo(struct
+ static struct pernet_operations nf_tables_net_ops = {
+       .init   = nf_tables_init_net,
++      .exit   = nf_tables_exit_net,
+ };
+ static int __init nf_tables_module_init(void)
+--- a/net/netfilter/nfnetlink_log.c
++++ b/net/netfilter/nfnetlink_log.c
+@@ -1093,10 +1093,15 @@ static int __net_init nfnl_log_net_init(
+ static void __net_exit nfnl_log_net_exit(struct net *net)
+ {
++      struct nfnl_log_net *log = nfnl_log_pernet(net);
++      unsigned int i;
++
+ #ifdef CONFIG_PROC_FS
+       remove_proc_entry("nfnetlink_log", net->nf.proc_netfilter);
+ #endif
+       nf_log_unset(net, &nfulnl_logger);
++      for (i = 0; i < INSTANCE_BUCKETS; i++)
++              WARN_ON_ONCE(!hlist_empty(&log->instance_table[i]));
+ }
+ static struct pernet_operations nfnl_log_net_ops = {
+--- a/net/netfilter/nfnetlink_queue.c
++++ b/net/netfilter/nfnetlink_queue.c
+@@ -1512,10 +1512,15 @@ static int __net_init nfnl_queue_net_ini
+ static void __net_exit nfnl_queue_net_exit(struct net *net)
+ {
++      struct nfnl_queue_net *q = nfnl_queue_pernet(net);
++      unsigned int i;
++
+       nf_unregister_queue_handler(net);
+ #ifdef CONFIG_PROC_FS
+       remove_proc_entry("nfnetlink_queue", net->nf.proc_netfilter);
+ #endif
++      for (i = 0; i < INSTANCE_BUCKETS; i++)
++              WARN_ON_ONCE(!hlist_empty(&q->instance_table[i]));
+ }
+ static void nfnl_queue_net_exit_batch(struct list_head *net_exit_list)
+--- a/net/netfilter/x_tables.c
++++ b/net/netfilter/x_tables.c
+@@ -1714,8 +1714,17 @@ static int __net_init xt_net_init(struct
+       return 0;
+ }
++static void __net_exit xt_net_exit(struct net *net)
++{
++      int i;
++
++      for (i = 0; i < NFPROTO_NUMPROTO; i++)
++              WARN_ON_ONCE(!list_empty(&net->xt.tables[i]));
++}
++
+ static struct pernet_operations xt_net_ops = {
+       .init = xt_net_init,
++      .exit = xt_net_exit,
+ };
+ static int __init xt_init(void)
diff --git a/target/linux/generic/backport-4.14/337-netfilter-nf_tables-get-rid-of-pernet-families.patch b/target/linux/generic/backport-4.14/337-netfilter-nf_tables-get-rid-of-pernet-families.patch
new file mode 100644 (file)
index 0000000..28ce2dc
--- /dev/null
@@ -0,0 +1,598 @@
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Tue, 9 Jan 2018 02:42:11 +0100
+Subject: [PATCH] netfilter: nf_tables: get rid of pernet families
+
+Now that we have a single table list for each netns, we can get rid of
+one pointer per family and the global afinfo list, thus, shrinking
+struct netns for nftables that now becomes 64 bytes smaller.
+
+And call __nft_release_afinfo() from __net_exit path accordingly to
+release netnamespace objects on removal.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+
+--- a/include/net/netfilter/nf_tables.h
++++ b/include/net/netfilter/nf_tables.h
+@@ -969,8 +969,8 @@ struct nft_af_info {
+       struct module                   *owner;
+ };
+-int nft_register_afinfo(struct net *, struct nft_af_info *);
+-void nft_unregister_afinfo(struct net *, struct nft_af_info *);
++int nft_register_afinfo(struct nft_af_info *);
++void nft_unregister_afinfo(struct nft_af_info *);
+ int nft_register_chain_type(const struct nf_chain_type *);
+ void nft_unregister_chain_type(const struct nf_chain_type *);
+--- a/include/net/netns/nftables.h
++++ b/include/net/netns/nftables.h
+@@ -7,15 +7,8 @@
+ struct nft_af_info;
+ struct netns_nftables {
+-      struct list_head        af_info;
+       struct list_head        tables;
+       struct list_head        commit_list;
+-      struct nft_af_info      *ipv4;
+-      struct nft_af_info      *ipv6;
+-      struct nft_af_info      *inet;
+-      struct nft_af_info      *arp;
+-      struct nft_af_info      *bridge;
+-      struct nft_af_info      *netdev;
+       unsigned int            base_seq;
+       u8                      gencursor;
+ };
+--- a/net/bridge/netfilter/nf_tables_bridge.c
++++ b/net/bridge/netfilter/nf_tables_bridge.c
+@@ -47,34 +47,6 @@ static struct nft_af_info nft_af_bridge
+       .owner          = THIS_MODULE,
+ };
+-static int nf_tables_bridge_init_net(struct net *net)
+-{
+-      net->nft.bridge = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
+-      if (net->nft.bridge == NULL)
+-              return -ENOMEM;
+-
+-      memcpy(net->nft.bridge, &nft_af_bridge, sizeof(nft_af_bridge));
+-
+-      if (nft_register_afinfo(net, net->nft.bridge) < 0)
+-              goto err;
+-
+-      return 0;
+-err:
+-      kfree(net->nft.bridge);
+-      return -ENOMEM;
+-}
+-
+-static void nf_tables_bridge_exit_net(struct net *net)
+-{
+-      nft_unregister_afinfo(net, net->nft.bridge);
+-      kfree(net->nft.bridge);
+-}
+-
+-static struct pernet_operations nf_tables_bridge_net_ops = {
+-      .init   = nf_tables_bridge_init_net,
+-      .exit   = nf_tables_bridge_exit_net,
+-};
+-
+ static const struct nf_chain_type filter_bridge = {
+       .name           = "filter",
+       .type           = NFT_CHAIN_T_DEFAULT,
+@@ -98,17 +70,17 @@ static int __init nf_tables_bridge_init(
+ {
+       int ret;
+-      ret = nft_register_chain_type(&filter_bridge);
++      ret = nft_register_afinfo(&nft_af_bridge);
+       if (ret < 0)
+               return ret;
+-      ret = register_pernet_subsys(&nf_tables_bridge_net_ops);
++      ret = nft_register_chain_type(&filter_bridge);
+       if (ret < 0)
+-              goto err_register_subsys;
++              goto err_register_chain;
+       return ret;
+-err_register_subsys:
++err_register_chain:
+       nft_unregister_chain_type(&filter_bridge);
+       return ret;
+@@ -116,8 +88,8 @@ err_register_subsys:
+ static void __exit nf_tables_bridge_exit(void)
+ {
+-      unregister_pernet_subsys(&nf_tables_bridge_net_ops);
+       nft_unregister_chain_type(&filter_bridge);
++      nft_unregister_afinfo(&nft_af_bridge);
+ }
+ module_init(nf_tables_bridge_init);
+--- a/net/ipv4/netfilter/nf_tables_arp.c
++++ b/net/ipv4/netfilter/nf_tables_arp.c
+@@ -32,34 +32,6 @@ static struct nft_af_info nft_af_arp __r
+       .owner          = THIS_MODULE,
+ };
+-static int nf_tables_arp_init_net(struct net *net)
+-{
+-      net->nft.arp = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
+-      if (net->nft.arp== NULL)
+-              return -ENOMEM;
+-
+-      memcpy(net->nft.arp, &nft_af_arp, sizeof(nft_af_arp));
+-
+-      if (nft_register_afinfo(net, net->nft.arp) < 0)
+-              goto err;
+-
+-      return 0;
+-err:
+-      kfree(net->nft.arp);
+-      return -ENOMEM;
+-}
+-
+-static void nf_tables_arp_exit_net(struct net *net)
+-{
+-      nft_unregister_afinfo(net, net->nft.arp);
+-      kfree(net->nft.arp);
+-}
+-
+-static struct pernet_operations nf_tables_arp_net_ops = {
+-      .init   = nf_tables_arp_init_net,
+-      .exit   = nf_tables_arp_exit_net,
+-};
+-
+ static const struct nf_chain_type filter_arp = {
+       .name           = "filter",
+       .type           = NFT_CHAIN_T_DEFAULT,
+@@ -77,21 +49,26 @@ static int __init nf_tables_arp_init(voi
+ {
+       int ret;
+-      ret = nft_register_chain_type(&filter_arp);
++      ret = nft_register_afinfo(&nft_af_arp);
+       if (ret < 0)
+               return ret;
+-      ret = register_pernet_subsys(&nf_tables_arp_net_ops);
++      ret = nft_register_chain_type(&filter_arp);
+       if (ret < 0)
+-              nft_unregister_chain_type(&filter_arp);
++              goto err_register_chain;
++
++      return 0;
++
++err_register_chain:
++      nft_unregister_chain_type(&filter_arp);
+       return ret;
+ }
+ static void __exit nf_tables_arp_exit(void)
+ {
+-      unregister_pernet_subsys(&nf_tables_arp_net_ops);
+       nft_unregister_chain_type(&filter_arp);
++      nft_unregister_afinfo(&nft_af_arp);
+ }
+ module_init(nf_tables_arp_init);
+--- a/net/ipv4/netfilter/nf_tables_ipv4.c
++++ b/net/ipv4/netfilter/nf_tables_ipv4.c
+@@ -35,34 +35,6 @@ static struct nft_af_info nft_af_ipv4 __
+       .owner          = THIS_MODULE,
+ };
+-static int nf_tables_ipv4_init_net(struct net *net)
+-{
+-      net->nft.ipv4 = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
+-      if (net->nft.ipv4 == NULL)
+-              return -ENOMEM;
+-
+-      memcpy(net->nft.ipv4, &nft_af_ipv4, sizeof(nft_af_ipv4));
+-
+-      if (nft_register_afinfo(net, net->nft.ipv4) < 0)
+-              goto err;
+-
+-      return 0;
+-err:
+-      kfree(net->nft.ipv4);
+-      return -ENOMEM;
+-}
+-
+-static void nf_tables_ipv4_exit_net(struct net *net)
+-{
+-      nft_unregister_afinfo(net, net->nft.ipv4);
+-      kfree(net->nft.ipv4);
+-}
+-
+-static struct pernet_operations nf_tables_ipv4_net_ops = {
+-      .init   = nf_tables_ipv4_init_net,
+-      .exit   = nf_tables_ipv4_exit_net,
+-};
+-
+ static const struct nf_chain_type filter_ipv4 = {
+       .name           = "filter",
+       .type           = NFT_CHAIN_T_DEFAULT,
+@@ -86,21 +58,25 @@ static int __init nf_tables_ipv4_init(vo
+ {
+       int ret;
+-      ret = nft_register_chain_type(&filter_ipv4);
++      ret = nft_register_afinfo(&nft_af_ipv4);
+       if (ret < 0)
+               return ret;
+-      ret = register_pernet_subsys(&nf_tables_ipv4_net_ops);
++      ret = nft_register_chain_type(&filter_ipv4);
+       if (ret < 0)
+-              nft_unregister_chain_type(&filter_ipv4);
++              goto err_register_chain;
++
++      return 0;
++err_register_chain:
++      nft_unregister_afinfo(&nft_af_ipv4);
+       return ret;
+ }
+ static void __exit nf_tables_ipv4_exit(void)
+ {
+-      unregister_pernet_subsys(&nf_tables_ipv4_net_ops);
+       nft_unregister_chain_type(&filter_ipv4);
++      nft_unregister_afinfo(&nft_af_ipv4);
+ }
+ module_init(nf_tables_ipv4_init);
+--- a/net/ipv6/netfilter/nf_tables_ipv6.c
++++ b/net/ipv6/netfilter/nf_tables_ipv6.c
+@@ -33,34 +33,6 @@ static struct nft_af_info nft_af_ipv6 __
+       .owner          = THIS_MODULE,
+ };
+-static int nf_tables_ipv6_init_net(struct net *net)
+-{
+-      net->nft.ipv6 = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
+-      if (net->nft.ipv6 == NULL)
+-              return -ENOMEM;
+-
+-      memcpy(net->nft.ipv6, &nft_af_ipv6, sizeof(nft_af_ipv6));
+-
+-      if (nft_register_afinfo(net, net->nft.ipv6) < 0)
+-              goto err;
+-
+-      return 0;
+-err:
+-      kfree(net->nft.ipv6);
+-      return -ENOMEM;
+-}
+-
+-static void nf_tables_ipv6_exit_net(struct net *net)
+-{
+-      nft_unregister_afinfo(net, net->nft.ipv6);
+-      kfree(net->nft.ipv6);
+-}
+-
+-static struct pernet_operations nf_tables_ipv6_net_ops = {
+-      .init   = nf_tables_ipv6_init_net,
+-      .exit   = nf_tables_ipv6_exit_net,
+-};
+-
+ static const struct nf_chain_type filter_ipv6 = {
+       .name           = "filter",
+       .type           = NFT_CHAIN_T_DEFAULT,
+@@ -84,20 +56,24 @@ static int __init nf_tables_ipv6_init(vo
+ {
+       int ret;
+-      ret = nft_register_chain_type(&filter_ipv6);
++      ret = nft_register_afinfo(&nft_af_ipv6);
+       if (ret < 0)
+               return ret;
+-      ret = register_pernet_subsys(&nf_tables_ipv6_net_ops);
++      ret = nft_register_chain_type(&filter_ipv6);
+       if (ret < 0)
+-              nft_unregister_chain_type(&filter_ipv6);
++              goto err_register_chain;
++
++      return 0;
++err_register_chain:
++      nft_unregister_afinfo(&nft_af_ipv6);
+       return ret;
+ }
+ static void __exit nf_tables_ipv6_exit(void)
+ {
+-      unregister_pernet_subsys(&nf_tables_ipv6_net_ops);
++      nft_unregister_afinfo(&nft_af_ipv6);
+       nft_unregister_chain_type(&filter_ipv6);
+ }
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -26,6 +26,7 @@
+ static LIST_HEAD(nf_tables_expressions);
+ static LIST_HEAD(nf_tables_objects);
+ static LIST_HEAD(nf_tables_flowtables);
++static LIST_HEAD(nf_tables_af_info);
+ /**
+  *    nft_register_afinfo - register nf_tables address family info
+@@ -35,17 +36,15 @@ static LIST_HEAD(nf_tables_flowtables);
+  *    Register the address family for use with nf_tables. Returns zero on
+  *    success or a negative errno code otherwise.
+  */
+-int nft_register_afinfo(struct net *net, struct nft_af_info *afi)
++int nft_register_afinfo(struct nft_af_info *afi)
+ {
+       nfnl_lock(NFNL_SUBSYS_NFTABLES);
+-      list_add_tail_rcu(&afi->list, &net->nft.af_info);
++      list_add_tail_rcu(&afi->list, &nf_tables_af_info);
+       nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+       return 0;
+ }
+ EXPORT_SYMBOL_GPL(nft_register_afinfo);
+-static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi);
+-
+ /**
+  *    nft_unregister_afinfo - unregister nf_tables address family info
+  *
+@@ -53,10 +52,9 @@ static void __nft_release_afinfo(struct
+  *
+  *    Unregister the address family for use with nf_tables.
+  */
+-void nft_unregister_afinfo(struct net *net, struct nft_af_info *afi)
++void nft_unregister_afinfo(struct nft_af_info *afi)
+ {
+       nfnl_lock(NFNL_SUBSYS_NFTABLES);
+-      __nft_release_afinfo(net, afi);
+       list_del_rcu(&afi->list);
+       nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+ }
+@@ -66,7 +64,7 @@ static struct nft_af_info *nft_afinfo_lo
+ {
+       struct nft_af_info *afi;
+-      list_for_each_entry(afi, &net->nft.af_info, list) {
++      list_for_each_entry(afi, &nf_tables_af_info, list) {
+               if (afi->family == family)
+                       return afi;
+       }
+@@ -4968,15 +4966,12 @@ void nft_flow_table_iterate(struct net *
+                           void *data)
+ {
+       struct nft_flowtable *flowtable;
+-      const struct nft_af_info *afi;
+       const struct nft_table *table;
+       rcu_read_lock();
+-      list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
+-              list_for_each_entry_rcu(table, &net->nft.tables, list) {
+-                      list_for_each_entry_rcu(flowtable, &table->flowtables, list) {
+-                              iter(&flowtable->data, data);
+-                      }
++      list_for_each_entry_rcu(table, &net->nft.tables, list) {
++              list_for_each_entry_rcu(flowtable, &table->flowtables, list) {
++                      iter(&flowtable->data, data);
+               }
+       }
+       rcu_read_unlock();
+@@ -6459,21 +6454,6 @@ int nft_data_dump(struct sk_buff *skb, i
+ }
+ EXPORT_SYMBOL_GPL(nft_data_dump);
+-static int __net_init nf_tables_init_net(struct net *net)
+-{
+-      INIT_LIST_HEAD(&net->nft.af_info);
+-      INIT_LIST_HEAD(&net->nft.tables);
+-      INIT_LIST_HEAD(&net->nft.commit_list);
+-      net->nft.base_seq = 1;
+-      return 0;
+-}
+-
+-static void __net_exit nf_tables_exit_net(struct net *net)
+-{
+-      WARN_ON_ONCE(!list_empty(&net->nft.af_info));
+-      WARN_ON_ONCE(!list_empty(&net->nft.commit_list));
+-}
+-
+ int __nft_release_basechain(struct nft_ctx *ctx)
+ {
+       struct nft_rule *rule, *nr;
+@@ -6494,8 +6474,7 @@ int __nft_release_basechain(struct nft_c
+ }
+ EXPORT_SYMBOL_GPL(__nft_release_basechain);
+-/* Called by nft_unregister_afinfo() from __net_exit path, nfnl_lock is held. */
+-static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi)
++static void __nft_release_afinfo(struct net *net)
+ {
+       struct nft_flowtable *flowtable, *nf;
+       struct nft_table *table, *nt;
+@@ -6505,10 +6484,11 @@ static void __nft_release_afinfo(struct
+       struct nft_set *set, *ns;
+       struct nft_ctx ctx = {
+               .net    = net,
+-              .family = afi->family,
+       };
+       list_for_each_entry_safe(table, nt, &net->nft.tables, list) {
++              ctx.family = table->afi->family;
++
+               list_for_each_entry(chain, &table->chains, list)
+                       nf_tables_unregister_hook(net, table, chain);
+               list_for_each_entry(flowtable, &table->flowtables, list)
+@@ -6549,6 +6529,21 @@ static void __nft_release_afinfo(struct
+       }
+ }
++static int __net_init nf_tables_init_net(struct net *net)
++{
++      INIT_LIST_HEAD(&net->nft.tables);
++      INIT_LIST_HEAD(&net->nft.commit_list);
++      net->nft.base_seq = 1;
++      return 0;
++}
++
++static void __net_exit nf_tables_exit_net(struct net *net)
++{
++      __nft_release_afinfo(net);
++      WARN_ON_ONCE(!list_empty(&net->nft.tables));
++      WARN_ON_ONCE(!list_empty(&net->nft.commit_list));
++}
++
+ static struct pernet_operations nf_tables_net_ops = {
+       .init   = nf_tables_init_net,
+       .exit   = nf_tables_exit_net,
+--- a/net/netfilter/nf_tables_inet.c
++++ b/net/netfilter/nf_tables_inet.c
+@@ -43,34 +43,6 @@ static struct nft_af_info nft_af_inet __
+       .owner          = THIS_MODULE,
+ };
+-static int __net_init nf_tables_inet_init_net(struct net *net)
+-{
+-      net->nft.inet = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
+-      if (net->nft.inet == NULL)
+-              return -ENOMEM;
+-      memcpy(net->nft.inet, &nft_af_inet, sizeof(nft_af_inet));
+-
+-      if (nft_register_afinfo(net, net->nft.inet) < 0)
+-              goto err;
+-
+-      return 0;
+-
+-err:
+-      kfree(net->nft.inet);
+-      return -ENOMEM;
+-}
+-
+-static void __net_exit nf_tables_inet_exit_net(struct net *net)
+-{
+-      nft_unregister_afinfo(net, net->nft.inet);
+-      kfree(net->nft.inet);
+-}
+-
+-static struct pernet_operations nf_tables_inet_net_ops = {
+-      .init   = nf_tables_inet_init_net,
+-      .exit   = nf_tables_inet_exit_net,
+-};
+-
+ static const struct nf_chain_type filter_inet = {
+       .name           = "filter",
+       .type           = NFT_CHAIN_T_DEFAULT,
+@@ -94,21 +66,24 @@ static int __init nf_tables_inet_init(vo
+ {
+       int ret;
+-      ret = nft_register_chain_type(&filter_inet);
+-      if (ret < 0)
++      if (nft_register_afinfo(&nft_af_inet) < 0)
+               return ret;
+-      ret = register_pernet_subsys(&nf_tables_inet_net_ops);
++      ret = nft_register_chain_type(&filter_inet);
+       if (ret < 0)
+-              nft_unregister_chain_type(&filter_inet);
++              goto err_register_chain;
++
++      return ret;
++err_register_chain:
++      nft_unregister_afinfo(&nft_af_inet);
+       return ret;
+ }
+ static void __exit nf_tables_inet_exit(void)
+ {
+-      unregister_pernet_subsys(&nf_tables_inet_net_ops);
+       nft_unregister_chain_type(&filter_inet);
++      nft_unregister_afinfo(&nft_af_inet);
+ }
+ module_init(nf_tables_inet_init);
+--- a/net/netfilter/nf_tables_netdev.c
++++ b/net/netfilter/nf_tables_netdev.c
+@@ -43,34 +43,6 @@ static struct nft_af_info nft_af_netdev
+       .owner          = THIS_MODULE,
+ };
+-static int nf_tables_netdev_init_net(struct net *net)
+-{
+-      net->nft.netdev = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
+-      if (net->nft.netdev == NULL)
+-              return -ENOMEM;
+-
+-      memcpy(net->nft.netdev, &nft_af_netdev, sizeof(nft_af_netdev));
+-
+-      if (nft_register_afinfo(net, net->nft.netdev) < 0)
+-              goto err;
+-
+-      return 0;
+-err:
+-      kfree(net->nft.netdev);
+-      return -ENOMEM;
+-}
+-
+-static void nf_tables_netdev_exit_net(struct net *net)
+-{
+-      nft_unregister_afinfo(net, net->nft.netdev);
+-      kfree(net->nft.netdev);
+-}
+-
+-static struct pernet_operations nf_tables_netdev_net_ops = {
+-      .init   = nf_tables_netdev_init_net,
+-      .exit   = nf_tables_netdev_exit_net,
+-};
+-
+ static const struct nf_chain_type nft_filter_chain_netdev = {
+       .name           = "filter",
+       .type           = NFT_CHAIN_T_DEFAULT,
+@@ -145,32 +117,32 @@ static int __init nf_tables_netdev_init(
+ {
+       int ret;
+-      ret = nft_register_chain_type(&nft_filter_chain_netdev);
+-      if (ret)
++      if (nft_register_afinfo(&nft_af_netdev) < 0)
+               return ret;
+-      ret = register_pernet_subsys(&nf_tables_netdev_net_ops);
++      ret = nft_register_chain_type(&nft_filter_chain_netdev);
+       if (ret)
+-              goto err1;
++              goto err_register_chain_type;
+       ret = register_netdevice_notifier(&nf_tables_netdev_notifier);
+       if (ret)
+-              goto err2;
++              goto err_register_netdevice_notifier;
+       return 0;
+-err2:
+-      unregister_pernet_subsys(&nf_tables_netdev_net_ops);
+-err1:
++err_register_netdevice_notifier:
+       nft_unregister_chain_type(&nft_filter_chain_netdev);
++err_register_chain_type:
++      nft_unregister_afinfo(&nft_af_netdev);
++
+       return ret;
+ }
+ static void __exit nf_tables_netdev_exit(void)
+ {
+       unregister_netdevice_notifier(&nf_tables_netdev_notifier);
+-      unregister_pernet_subsys(&nf_tables_netdev_net_ops);
+       nft_unregister_chain_type(&nft_filter_chain_netdev);
++      nft_unregister_afinfo(&nft_af_netdev);
+ }
+ module_init(nf_tables_netdev_init);
diff --git a/target/linux/generic/backport-4.14/338-netfilter-nf_tables-get-rid-of-struct-nft_af_info-ab.patch b/target/linux/generic/backport-4.14/338-netfilter-nf_tables-get-rid-of-struct-nft_af_info-ab.patch
new file mode 100644 (file)
index 0000000..9e72b7b
--- /dev/null
@@ -0,0 +1,1204 @@
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Tue, 9 Jan 2018 02:48:47 +0100
+Subject: [PATCH] netfilter: nf_tables: get rid of struct nft_af_info
+ abstraction
+
+Remove the infrastructure to register/unregister nft_af_info structure,
+this structure stores no useful information anymore.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+
+--- a/include/net/netfilter/nf_tables.h
++++ b/include/net/netfilter/nf_tables.h
+@@ -950,28 +950,12 @@ struct nft_table {
+       struct list_head                flowtables;
+       u64                             hgenerator;
+       u32                             use;
+-      u16                             flags:14,
++      u16                             family:6,
++                                      flags:8,
+                                       genmask:2;
+-      struct nft_af_info              *afi;
+       char                            *name;
+ };
+-/**
+- *    struct nft_af_info - nf_tables address family info
+- *
+- *    @list: used internally
+- *    @family: address family
+- *    @owner: module owner
+- */
+-struct nft_af_info {
+-      struct list_head                list;
+-      int                             family;
+-      struct module                   *owner;
+-};
+-
+-int nft_register_afinfo(struct nft_af_info *);
+-void nft_unregister_afinfo(struct nft_af_info *);
+-
+ int nft_register_chain_type(const struct nf_chain_type *);
+ void nft_unregister_chain_type(const struct nf_chain_type *);
+@@ -1139,9 +1123,6 @@ void nft_trace_notify(struct nft_tracein
+ #define nft_dereference(p)                                    \
+       nfnl_dereference(p, NFNL_SUBSYS_NFTABLES)
+-#define MODULE_ALIAS_NFT_FAMILY(family)       \
+-      MODULE_ALIAS("nft-afinfo-" __stringify(family))
+-
+ #define MODULE_ALIAS_NFT_CHAIN(family, name) \
+       MODULE_ALIAS("nft-chain-" __stringify(family) "-" name)
+--- a/net/bridge/netfilter/nf_tables_bridge.c
++++ b/net/bridge/netfilter/nf_tables_bridge.c
+@@ -42,11 +42,6 @@ nft_do_chain_bridge(void *priv,
+       return nft_do_chain(&pkt, priv);
+ }
+-static struct nft_af_info nft_af_bridge __read_mostly = {
+-      .family         = NFPROTO_BRIDGE,
+-      .owner          = THIS_MODULE,
+-};
+-
+ static const struct nf_chain_type filter_bridge = {
+       .name           = "filter",
+       .type           = NFT_CHAIN_T_DEFAULT,
+@@ -68,28 +63,12 @@ static const struct nf_chain_type filter
+ static int __init nf_tables_bridge_init(void)
+ {
+-      int ret;
+-
+-      ret = nft_register_afinfo(&nft_af_bridge);
+-      if (ret < 0)
+-              return ret;
+-
+-      ret = nft_register_chain_type(&filter_bridge);
+-      if (ret < 0)
+-              goto err_register_chain;
+-
+-      return ret;
+-
+-err_register_chain:
+-      nft_unregister_chain_type(&filter_bridge);
+-
+-      return ret;
++      return nft_register_chain_type(&filter_bridge);
+ }
+ static void __exit nf_tables_bridge_exit(void)
+ {
+       nft_unregister_chain_type(&filter_bridge);
+-      nft_unregister_afinfo(&nft_af_bridge);
+ }
+ module_init(nf_tables_bridge_init);
+@@ -97,4 +76,4 @@ module_exit(nf_tables_bridge_exit);
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+-MODULE_ALIAS_NFT_FAMILY(AF_BRIDGE);
++MODULE_ALIAS_NFT_CHAIN(AF_BRIDGE, "filter");
+--- a/net/ipv4/netfilter/nf_tables_arp.c
++++ b/net/ipv4/netfilter/nf_tables_arp.c
+@@ -27,11 +27,6 @@ nft_do_chain_arp(void *priv,
+       return nft_do_chain(&pkt, priv);
+ }
+-static struct nft_af_info nft_af_arp __read_mostly = {
+-      .family         = NFPROTO_ARP,
+-      .owner          = THIS_MODULE,
+-};
+-
+ static const struct nf_chain_type filter_arp = {
+       .name           = "filter",
+       .type           = NFT_CHAIN_T_DEFAULT,
+@@ -47,28 +42,12 @@ static const struct nf_chain_type filter
+ static int __init nf_tables_arp_init(void)
+ {
+-      int ret;
+-
+-      ret = nft_register_afinfo(&nft_af_arp);
+-      if (ret < 0)
+-              return ret;
+-
+-      ret = nft_register_chain_type(&filter_arp);
+-      if (ret < 0)
+-              goto err_register_chain;
+-
+-      return 0;
+-
+-err_register_chain:
+-      nft_unregister_chain_type(&filter_arp);
+-
+-      return ret;
++      return nft_register_chain_type(&filter_arp);
+ }
+ static void __exit nf_tables_arp_exit(void)
+ {
+       nft_unregister_chain_type(&filter_arp);
+-      nft_unregister_afinfo(&nft_af_arp);
+ }
+ module_init(nf_tables_arp_init);
+@@ -76,4 +55,4 @@ module_exit(nf_tables_arp_exit);
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+-MODULE_ALIAS_NFT_FAMILY(3); /* NFPROTO_ARP */
++MODULE_ALIAS_NFT_CHAIN(3, "filter"); /* NFPROTO_ARP */
+--- a/net/ipv4/netfilter/nf_tables_ipv4.c
++++ b/net/ipv4/netfilter/nf_tables_ipv4.c
+@@ -30,11 +30,6 @@ static unsigned int nft_do_chain_ipv4(vo
+       return nft_do_chain(&pkt, priv);
+ }
+-static struct nft_af_info nft_af_ipv4 __read_mostly = {
+-      .family         = NFPROTO_IPV4,
+-      .owner          = THIS_MODULE,
+-};
+-
+ static const struct nf_chain_type filter_ipv4 = {
+       .name           = "filter",
+       .type           = NFT_CHAIN_T_DEFAULT,
+@@ -56,27 +51,12 @@ static const struct nf_chain_type filter
+ static int __init nf_tables_ipv4_init(void)
+ {
+-      int ret;
+-
+-      ret = nft_register_afinfo(&nft_af_ipv4);
+-      if (ret < 0)
+-              return ret;
+-
+-      ret = nft_register_chain_type(&filter_ipv4);
+-      if (ret < 0)
+-              goto err_register_chain;
+-
+-      return 0;
+-
+-err_register_chain:
+-      nft_unregister_afinfo(&nft_af_ipv4);
+-      return ret;
++      return nft_register_chain_type(&filter_ipv4);
+ }
+ static void __exit nf_tables_ipv4_exit(void)
+ {
+       nft_unregister_chain_type(&filter_ipv4);
+-      nft_unregister_afinfo(&nft_af_ipv4);
+ }
+ module_init(nf_tables_ipv4_init);
+@@ -84,4 +64,4 @@ module_exit(nf_tables_ipv4_exit);
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+-MODULE_ALIAS_NFT_FAMILY(AF_INET);
++MODULE_ALIAS_NFT_CHAIN(AF_INET, "filter");
+--- a/net/ipv6/netfilter/nf_tables_ipv6.c
++++ b/net/ipv6/netfilter/nf_tables_ipv6.c
+@@ -28,11 +28,6 @@ static unsigned int nft_do_chain_ipv6(vo
+       return nft_do_chain(&pkt, priv);
+ }
+-static struct nft_af_info nft_af_ipv6 __read_mostly = {
+-      .family         = NFPROTO_IPV6,
+-      .owner          = THIS_MODULE,
+-};
+-
+ static const struct nf_chain_type filter_ipv6 = {
+       .name           = "filter",
+       .type           = NFT_CHAIN_T_DEFAULT,
+@@ -54,26 +49,11 @@ static const struct nf_chain_type filter
+ static int __init nf_tables_ipv6_init(void)
+ {
+-      int ret;
+-
+-      ret = nft_register_afinfo(&nft_af_ipv6);
+-      if (ret < 0)
+-              return ret;
+-
+-      ret = nft_register_chain_type(&filter_ipv6);
+-      if (ret < 0)
+-              goto err_register_chain;
+-
+-      return 0;
+-
+-err_register_chain:
+-      nft_unregister_afinfo(&nft_af_ipv6);
+-      return ret;
++      return nft_register_chain_type(&filter_ipv6);
+ }
+ static void __exit nf_tables_ipv6_exit(void)
+ {
+-      nft_unregister_afinfo(&nft_af_ipv6);
+       nft_unregister_chain_type(&filter_ipv6);
+ }
+@@ -82,4 +62,4 @@ module_exit(nf_tables_ipv6_exit);
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+-MODULE_ALIAS_NFT_FAMILY(AF_INET6);
++MODULE_ALIAS_NFT_CHAIN(AF_INET6, "filter");
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -26,71 +26,6 @@
+ static LIST_HEAD(nf_tables_expressions);
+ static LIST_HEAD(nf_tables_objects);
+ static LIST_HEAD(nf_tables_flowtables);
+-static LIST_HEAD(nf_tables_af_info);
+-
+-/**
+- *    nft_register_afinfo - register nf_tables address family info
+- *
+- *    @afi: address family info to register
+- *
+- *    Register the address family for use with nf_tables. Returns zero on
+- *    success or a negative errno code otherwise.
+- */
+-int nft_register_afinfo(struct nft_af_info *afi)
+-{
+-      nfnl_lock(NFNL_SUBSYS_NFTABLES);
+-      list_add_tail_rcu(&afi->list, &nf_tables_af_info);
+-      nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+-      return 0;
+-}
+-EXPORT_SYMBOL_GPL(nft_register_afinfo);
+-
+-/**
+- *    nft_unregister_afinfo - unregister nf_tables address family info
+- *
+- *    @afi: address family info to unregister
+- *
+- *    Unregister the address family for use with nf_tables.
+- */
+-void nft_unregister_afinfo(struct nft_af_info *afi)
+-{
+-      nfnl_lock(NFNL_SUBSYS_NFTABLES);
+-      list_del_rcu(&afi->list);
+-      nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+-}
+-EXPORT_SYMBOL_GPL(nft_unregister_afinfo);
+-
+-static struct nft_af_info *nft_afinfo_lookup(struct net *net, int family)
+-{
+-      struct nft_af_info *afi;
+-
+-      list_for_each_entry(afi, &nf_tables_af_info, list) {
+-              if (afi->family == family)
+-                      return afi;
+-      }
+-      return NULL;
+-}
+-
+-static struct nft_af_info *
+-nf_tables_afinfo_lookup(struct net *net, int family, bool autoload)
+-{
+-      struct nft_af_info *afi;
+-
+-      afi = nft_afinfo_lookup(net, family);
+-      if (afi != NULL)
+-              return afi;
+-#ifdef CONFIG_MODULES
+-      if (autoload) {
+-              nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+-              request_module("nft-afinfo-%u", family);
+-              nfnl_lock(NFNL_SUBSYS_NFTABLES);
+-              afi = nft_afinfo_lookup(net, family);
+-              if (afi != NULL)
+-                      return ERR_PTR(-EAGAIN);
+-      }
+-#endif
+-      return ERR_PTR(-EAFNOSUPPORT);
+-}
+ static void nft_ctx_init(struct nft_ctx *ctx,
+                        struct net *net,
+@@ -390,7 +325,7 @@ static struct nft_table *nft_table_looku
+       list_for_each_entry(table, &net->nft.tables, list) {
+               if (!nla_strcmp(nla, table->name) &&
+-                  table->afi->family == family &&
++                  table->family == family &&
+                   nft_active_genmask(table, genmask))
+                       return table;
+       }
+@@ -531,7 +466,7 @@ static int nf_tables_dump_tables(struct
+       cb->seq = net->nft.base_seq;
+       list_for_each_entry_rcu(table, &net->nft.tables, list) {
+-              if (family != NFPROTO_UNSPEC && family != table->afi->family)
++              if (family != NFPROTO_UNSPEC && family != table->family)
+                       continue;
+               if (idx < s_idx)
+@@ -545,7 +480,7 @@ static int nf_tables_dump_tables(struct
+                                             NETLINK_CB(cb->skb).portid,
+                                             cb->nlh->nlmsg_seq,
+                                             NFT_MSG_NEWTABLE, NLM_F_MULTI,
+-                                            table->afi->family, table) < 0)
++                                            table->family, table) < 0)
+                       goto done;
+               nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+@@ -565,7 +500,6 @@ static int nf_tables_gettable(struct net
+ {
+       const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+       u8 genmask = nft_genmask_cur(net);
+-      const struct nft_af_info *afi;
+       const struct nft_table *table;
+       struct sk_buff *skb2;
+       int family = nfmsg->nfgen_family;
+@@ -578,11 +512,7 @@ static int nf_tables_gettable(struct net
+               return netlink_dump_start(nlsk, skb, nlh, &c);
+       }
+-      afi = nf_tables_afinfo_lookup(net, family, false);
+-      if (IS_ERR(afi))
+-              return PTR_ERR(afi);
+-
+-      table = nf_tables_table_lookup(net, nla[NFTA_TABLE_NAME], afi->family,
++      table = nf_tables_table_lookup(net, nla[NFTA_TABLE_NAME], family,
+                                      genmask);
+       if (IS_ERR(table))
+               return PTR_ERR(table);
+@@ -702,19 +632,14 @@ static int nf_tables_newtable(struct net
+       const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+       u8 genmask = nft_genmask_next(net);
+       const struct nlattr *name;
+-      struct nft_af_info *afi;
+       struct nft_table *table;
+       int family = nfmsg->nfgen_family;
+       u32 flags = 0;
+       struct nft_ctx ctx;
+       int err;
+-      afi = nf_tables_afinfo_lookup(net, family, true);
+-      if (IS_ERR(afi))
+-              return PTR_ERR(afi);
+-
+       name = nla[NFTA_TABLE_NAME];
+-      table = nf_tables_table_lookup(net, name, afi->family, genmask);
++      table = nf_tables_table_lookup(net, name, family, genmask);
+       if (IS_ERR(table)) {
+               if (PTR_ERR(table) != -ENOENT)
+                       return PTR_ERR(table);
+@@ -724,7 +649,7 @@ static int nf_tables_newtable(struct net
+               if (nlh->nlmsg_flags & NLM_F_REPLACE)
+                       return -EOPNOTSUPP;
+-              nft_ctx_init(&ctx, net, skb, nlh, afi->family, table, NULL, nla);
++              nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
+               return nf_tables_updtable(&ctx);
+       }
+@@ -734,40 +659,34 @@ static int nf_tables_newtable(struct net
+                       return -EINVAL;
+       }
+-      err = -EAFNOSUPPORT;
+-      if (!try_module_get(afi->owner))
+-              goto err1;
+-
+       err = -ENOMEM;
+       table = kzalloc(sizeof(*table), GFP_KERNEL);
+       if (table == NULL)
+-              goto err2;
++              goto err_kzalloc;
+       table->name = nla_strdup(name, GFP_KERNEL);
+       if (table->name == NULL)
+-              goto err3;
++              goto err_strdup;
+       INIT_LIST_HEAD(&table->chains);
+       INIT_LIST_HEAD(&table->sets);
+       INIT_LIST_HEAD(&table->objects);
+       INIT_LIST_HEAD(&table->flowtables);
+-      table->afi = afi;
++      table->family = family;
+       table->flags = flags;
+-      nft_ctx_init(&ctx, net, skb, nlh, afi->family, table, NULL, nla);
++      nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
+       err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE);
+       if (err < 0)
+-              goto err4;
++              goto err_trans;
+       list_add_tail_rcu(&table->list, &net->nft.tables);
+       return 0;
+-err4:
++err_trans:
+       kfree(table->name);
+-err3:
++err_strdup:
+       kfree(table);
+-err2:
+-      module_put(afi->owner);
+-err1:
++err_kzalloc:
+       return err;
+ }
+@@ -838,10 +757,10 @@ static int nft_flush(struct nft_ctx *ctx
+       int err = 0;
+       list_for_each_entry_safe(table, nt, &ctx->net->nft.tables, list) {
+-              if (family != AF_UNSPEC && table->afi->family != family)
++              if (family != AF_UNSPEC && table->family != family)
+                       continue;
+-              ctx->family = table->afi->family;
++              ctx->family = table->family;
+               if (!nft_is_active_next(ctx->net, table))
+                       continue;
+@@ -867,7 +786,6 @@ static int nf_tables_deltable(struct net
+ {
+       const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+       u8 genmask = nft_genmask_next(net);
+-      struct nft_af_info *afi;
+       struct nft_table *table;
+       int family = nfmsg->nfgen_family;
+       struct nft_ctx ctx;
+@@ -876,11 +794,7 @@ static int nf_tables_deltable(struct net
+       if (family == AF_UNSPEC || nla[NFTA_TABLE_NAME] == NULL)
+               return nft_flush(&ctx, family);
+-      afi = nf_tables_afinfo_lookup(net, family, false);
+-      if (IS_ERR(afi))
+-              return PTR_ERR(afi);
+-
+-      table = nf_tables_table_lookup(net, nla[NFTA_TABLE_NAME], afi->family,
++      table = nf_tables_table_lookup(net, nla[NFTA_TABLE_NAME], family,
+                                      genmask);
+       if (IS_ERR(table))
+               return PTR_ERR(table);
+@@ -889,7 +803,7 @@ static int nf_tables_deltable(struct net
+           table->use > 0)
+               return -EBUSY;
+-      ctx.family = afi->family;
++      ctx.family = family;
+       ctx.table = table;
+       return nft_flush_table(&ctx);
+@@ -901,7 +815,6 @@ static void nf_tables_table_destroy(stru
+       kfree(ctx->table->name);
+       kfree(ctx->table);
+-      module_put(ctx->table->afi->owner);
+ }
+ int nft_register_chain_type(const struct nf_chain_type *ctype)
+@@ -1130,7 +1043,7 @@ static int nf_tables_dump_chains(struct
+       cb->seq = net->nft.base_seq;
+       list_for_each_entry_rcu(table, &net->nft.tables, list) {
+-              if (family != NFPROTO_UNSPEC && family != table->afi->family)
++              if (family != NFPROTO_UNSPEC && family != table->family)
+                       continue;
+               list_for_each_entry_rcu(chain, &table->chains, list) {
+@@ -1146,7 +1059,7 @@ static int nf_tables_dump_chains(struct
+                                                     cb->nlh->nlmsg_seq,
+                                                     NFT_MSG_NEWCHAIN,
+                                                     NLM_F_MULTI,
+-                                                    table->afi->family, table,
++                                                    table->family, table,
+                                                     chain) < 0)
+                               goto done;
+@@ -1168,7 +1081,6 @@ static int nf_tables_getchain(struct net
+ {
+       const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+       u8 genmask = nft_genmask_cur(net);
+-      const struct nft_af_info *afi;
+       const struct nft_table *table;
+       const struct nft_chain *chain;
+       struct sk_buff *skb2;
+@@ -1182,11 +1094,7 @@ static int nf_tables_getchain(struct net
+               return netlink_dump_start(nlsk, skb, nlh, &c);
+       }
+-      afi = nf_tables_afinfo_lookup(net, family, false);
+-      if (IS_ERR(afi))
+-              return PTR_ERR(afi);
+-
+-      table = nf_tables_table_lookup(net, nla[NFTA_CHAIN_TABLE], afi->family,
++      table = nf_tables_table_lookup(net, nla[NFTA_CHAIN_TABLE], family,
+                                      genmask);
+       if (IS_ERR(table))
+               return PTR_ERR(table);
+@@ -1550,7 +1458,6 @@ static int nf_tables_newchain(struct net
+       const struct nlattr * uninitialized_var(name);
+       u8 genmask = nft_genmask_next(net);
+       int family = nfmsg->nfgen_family;
+-      struct nft_af_info *afi;
+       struct nft_table *table;
+       struct nft_chain *chain;
+       u8 policy = NF_ACCEPT;
+@@ -1560,11 +1467,7 @@ static int nf_tables_newchain(struct net
+       create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
+-      afi = nf_tables_afinfo_lookup(net, family, true);
+-      if (IS_ERR(afi))
+-              return PTR_ERR(afi);
+-
+-      table = nf_tables_table_lookup(net, nla[NFTA_CHAIN_TABLE], afi->family,
++      table = nf_tables_table_lookup(net, nla[NFTA_CHAIN_TABLE], family,
+                                      genmask);
+       if (IS_ERR(table))
+               return PTR_ERR(table);
+@@ -1605,7 +1508,7 @@ static int nf_tables_newchain(struct net
+               }
+       }
+-      nft_ctx_init(&ctx, net, skb, nlh, afi->family, table, chain, nla);
++      nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla);
+       if (chain != NULL) {
+               if (nlh->nlmsg_flags & NLM_F_EXCL)
+@@ -1626,7 +1529,6 @@ static int nf_tables_delchain(struct net
+ {
+       const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+       u8 genmask = nft_genmask_next(net);
+-      struct nft_af_info *afi;
+       struct nft_table *table;
+       struct nft_chain *chain;
+       struct nft_rule *rule;
+@@ -1635,11 +1537,7 @@ static int nf_tables_delchain(struct net
+       u32 use;
+       int err;
+-      afi = nf_tables_afinfo_lookup(net, family, false);
+-      if (IS_ERR(afi))
+-              return PTR_ERR(afi);
+-
+-      table = nf_tables_table_lookup(net, nla[NFTA_CHAIN_TABLE], afi->family,
++      table = nf_tables_table_lookup(net, nla[NFTA_CHAIN_TABLE], family,
+                                      genmask);
+       if (IS_ERR(table))
+               return PTR_ERR(table);
+@@ -1652,7 +1550,7 @@ static int nf_tables_delchain(struct net
+           chain->use > 0)
+               return -EBUSY;
+-      nft_ctx_init(&ctx, net, skb, nlh, afi->family, table, chain, nla);
++      nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla);
+       use = chain->use;
+       list_for_each_entry(rule, &chain->rules, list) {
+@@ -2075,7 +1973,7 @@ static int nf_tables_dump_rules(struct s
+       cb->seq = net->nft.base_seq;
+       list_for_each_entry_rcu(table, &net->nft.tables, list) {
+-              if (family != NFPROTO_UNSPEC && family != table->afi->family)
++              if (family != NFPROTO_UNSPEC && family != table->family)
+                       continue;
+               if (ctx && ctx->table && strcmp(ctx->table, table->name) != 0)
+@@ -2098,7 +1996,7 @@ static int nf_tables_dump_rules(struct s
+                                                             cb->nlh->nlmsg_seq,
+                                                             NFT_MSG_NEWRULE,
+                                                             NLM_F_MULTI | NLM_F_APPEND,
+-                                                            table->afi->family,
++                                                            table->family,
+                                                             table, chain, rule) < 0)
+                                       goto done;
+@@ -2134,7 +2032,6 @@ static int nf_tables_getrule(struct net
+ {
+       const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+       u8 genmask = nft_genmask_cur(net);
+-      const struct nft_af_info *afi;
+       const struct nft_table *table;
+       const struct nft_chain *chain;
+       const struct nft_rule *rule;
+@@ -2178,11 +2075,7 @@ static int nf_tables_getrule(struct net
+               return netlink_dump_start(nlsk, skb, nlh, &c);
+       }
+-      afi = nf_tables_afinfo_lookup(net, family, false);
+-      if (IS_ERR(afi))
+-              return PTR_ERR(afi);
+-
+-      table = nf_tables_table_lookup(net, nla[NFTA_RULE_TABLE], afi->family,
++      table = nf_tables_table_lookup(net, nla[NFTA_RULE_TABLE], family,
+                                      genmask);
+       if (IS_ERR(table))
+               return PTR_ERR(table);
+@@ -2240,7 +2133,7 @@ static int nf_tables_newrule(struct net
+ {
+       const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+       u8 genmask = nft_genmask_next(net);
+-      struct nft_af_info *afi;
++      int family = nfmsg->nfgen_family;
+       struct nft_table *table;
+       struct nft_chain *chain;
+       struct nft_rule *rule, *old_rule = NULL;
+@@ -2256,11 +2149,7 @@ static int nf_tables_newrule(struct net
+       create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
+-      afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, create);
+-      if (IS_ERR(afi))
+-              return PTR_ERR(afi);
+-
+-      table = nf_tables_table_lookup(net, nla[NFTA_RULE_TABLE], afi->family,
++      table = nf_tables_table_lookup(net, nla[NFTA_RULE_TABLE], family,
+                                      genmask);
+       if (IS_ERR(table))
+               return PTR_ERR(table);
+@@ -2300,7 +2189,7 @@ static int nf_tables_newrule(struct net
+                       return PTR_ERR(old_rule);
+       }
+-      nft_ctx_init(&ctx, net, skb, nlh, afi->family, table, chain, nla);
++      nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla);
+       n = 0;
+       size = 0;
+@@ -2424,18 +2313,13 @@ static int nf_tables_delrule(struct net
+ {
+       const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+       u8 genmask = nft_genmask_next(net);
+-      struct nft_af_info *afi;
+       struct nft_table *table;
+       struct nft_chain *chain = NULL;
+       struct nft_rule *rule;
+       int family = nfmsg->nfgen_family, err = 0;
+       struct nft_ctx ctx;
+-      afi = nf_tables_afinfo_lookup(net, family, false);
+-      if (IS_ERR(afi))
+-              return PTR_ERR(afi);
+-
+-      table = nf_tables_table_lookup(net, nla[NFTA_RULE_TABLE], afi->family,
++      table = nf_tables_table_lookup(net, nla[NFTA_RULE_TABLE], family,
+                                      genmask);
+       if (IS_ERR(table))
+               return PTR_ERR(table);
+@@ -2447,7 +2331,7 @@ static int nf_tables_delrule(struct net
+                       return PTR_ERR(chain);
+       }
+-      nft_ctx_init(&ctx, net, skb, nlh, afi->family, table, chain, nla);
++      nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla);
+       if (chain) {
+               if (nla[NFTA_RULE_HANDLE]) {
+@@ -2632,26 +2516,17 @@ static int nft_ctx_init_from_setattr(str
+                                    u8 genmask)
+ {
+       const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+-      struct nft_af_info *afi = NULL;
++      int family = nfmsg->nfgen_family;
+       struct nft_table *table = NULL;
+-      if (nfmsg->nfgen_family != NFPROTO_UNSPEC) {
+-              afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false);
+-              if (IS_ERR(afi))
+-                      return PTR_ERR(afi);
+-      }
+-
+       if (nla[NFTA_SET_TABLE] != NULL) {
+-              if (afi == NULL)
+-                      return -EAFNOSUPPORT;
+-
+               table = nf_tables_table_lookup(net, nla[NFTA_SET_TABLE],
+-                                             afi->family, genmask);
++                                             family, genmask);
+               if (IS_ERR(table))
+                       return PTR_ERR(table);
+       }
+-      nft_ctx_init(ctx, net, skb, nlh, afi->family, table, NULL, nla);
++      nft_ctx_init(ctx, net, skb, nlh, family, table, NULL, nla);
+       return 0;
+ }
+@@ -2882,7 +2757,7 @@ static int nf_tables_dump_sets(struct sk
+       list_for_each_entry_rcu(table, &net->nft.tables, list) {
+               if (ctx->family != NFPROTO_UNSPEC &&
+-                  ctx->family != table->afi->family)
++                  ctx->family != table->family)
+                       continue;
+               if (ctx->table && ctx->table != table)
+@@ -2903,7 +2778,7 @@ static int nf_tables_dump_sets(struct sk
+                       ctx_set = *ctx;
+                       ctx_set.table = table;
+-                      ctx_set.family = table->afi->family;
++                      ctx_set.family = table->family;
+                       if (nf_tables_fill_set(skb, &ctx_set, set,
+                                              NFT_MSG_NEWSET,
+@@ -3015,8 +2890,8 @@ static int nf_tables_newset(struct net *
+ {
+       const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+       u8 genmask = nft_genmask_next(net);
++      int family = nfmsg->nfgen_family;
+       const struct nft_set_ops *ops;
+-      struct nft_af_info *afi;
+       struct nft_table *table;
+       struct nft_set *set;
+       struct nft_ctx ctx;
+@@ -3123,16 +2998,12 @@ static int nf_tables_newset(struct net *
+       create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
+-      afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, create);
+-      if (IS_ERR(afi))
+-              return PTR_ERR(afi);
+-
+-      table = nf_tables_table_lookup(net, nla[NFTA_SET_TABLE], afi->family,
++      table = nf_tables_table_lookup(net, nla[NFTA_SET_TABLE], family,
+                                      genmask);
+       if (IS_ERR(table))
+               return PTR_ERR(table);
+-      nft_ctx_init(&ctx, net, skb, nlh, afi->family, table, NULL, nla);
++      nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
+       set = nf_tables_set_lookup(table, nla[NFTA_SET_NAME], genmask);
+       if (IS_ERR(set)) {
+@@ -3390,19 +3261,15 @@ static int nft_ctx_init_from_elemattr(st
+                                     u8 genmask)
+ {
+       const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+-      struct nft_af_info *afi;
++      int family = nfmsg->nfgen_family;
+       struct nft_table *table;
+-      afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false);
+-      if (IS_ERR(afi))
+-              return PTR_ERR(afi);
+-
+       table = nf_tables_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE],
+-                                     afi->family, genmask);
++                                     family, genmask);
+       if (IS_ERR(table))
+               return PTR_ERR(table);
+-      nft_ctx_init(ctx, net, skb, nlh, afi->family, table, NULL, nla);
++      nft_ctx_init(ctx, net, skb, nlh, family, table, NULL, nla);
+       return 0;
+ }
+@@ -3520,7 +3387,7 @@ static int nf_tables_dump_set(struct sk_
+       rcu_read_lock();
+       list_for_each_entry_rcu(table, &net->nft.tables, list) {
+               if (dump_ctx->ctx.family != NFPROTO_UNSPEC &&
+-                  dump_ctx->ctx.family != table->afi->family)
++                  dump_ctx->ctx.family != table->family)
+                       continue;
+               if (table != dump_ctx->ctx.table)
+@@ -3550,7 +3417,7 @@ static int nf_tables_dump_set(struct sk_
+               goto nla_put_failure;
+       nfmsg = nlmsg_data(nlh);
+-      nfmsg->nfgen_family = table->afi->family;
++      nfmsg->nfgen_family = table->family;
+       nfmsg->version      = NFNETLINK_V0;
+       nfmsg->res_id       = htons(net->nft.base_seq & 0xffff);
+@@ -4427,7 +4294,6 @@ static int nf_tables_newobj(struct net *
+       const struct nft_object_type *type;
+       u8 genmask = nft_genmask_next(net);
+       int family = nfmsg->nfgen_family;
+-      struct nft_af_info *afi;
+       struct nft_table *table;
+       struct nft_object *obj;
+       struct nft_ctx ctx;
+@@ -4439,11 +4305,7 @@ static int nf_tables_newobj(struct net *
+           !nla[NFTA_OBJ_DATA])
+               return -EINVAL;
+-      afi = nf_tables_afinfo_lookup(net, family, true);
+-      if (IS_ERR(afi))
+-              return PTR_ERR(afi);
+-
+-      table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], afi->family,
++      table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], family,
+                                      genmask);
+       if (IS_ERR(table))
+               return PTR_ERR(table);
+@@ -4462,7 +4324,7 @@ static int nf_tables_newobj(struct net *
+               return 0;
+       }
+-      nft_ctx_init(&ctx, net, skb, nlh, afi->family, table, NULL, nla);
++      nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
+       type = nft_obj_type_get(objtype);
+       if (IS_ERR(type))
+@@ -4554,7 +4416,7 @@ static int nf_tables_dump_obj(struct sk_
+       cb->seq = net->nft.base_seq;
+       list_for_each_entry_rcu(table, &net->nft.tables, list) {
+-              if (family != NFPROTO_UNSPEC && family != table->afi->family)
++              if (family != NFPROTO_UNSPEC && family != table->family)
+                       continue;
+               list_for_each_entry_rcu(obj, &table->objects, list) {
+@@ -4577,7 +4439,7 @@ static int nf_tables_dump_obj(struct sk_
+                                                   cb->nlh->nlmsg_seq,
+                                                   NFT_MSG_NEWOBJ,
+                                                   NLM_F_MULTI | NLM_F_APPEND,
+-                                                  table->afi->family, table,
++                                                  table->family, table,
+                                                   obj, reset) < 0)
+                               goto done;
+@@ -4635,7 +4497,6 @@ static int nf_tables_getobj(struct net *
+       const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+       u8 genmask = nft_genmask_cur(net);
+       int family = nfmsg->nfgen_family;
+-      const struct nft_af_info *afi;
+       const struct nft_table *table;
+       struct nft_object *obj;
+       struct sk_buff *skb2;
+@@ -4666,11 +4527,7 @@ static int nf_tables_getobj(struct net *
+           !nla[NFTA_OBJ_TYPE])
+               return -EINVAL;
+-      afi = nf_tables_afinfo_lookup(net, family, false);
+-      if (IS_ERR(afi))
+-              return PTR_ERR(afi);
+-
+-      table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], afi->family,
++      table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], family,
+                                      genmask);
+       if (IS_ERR(table))
+               return PTR_ERR(table);
+@@ -4717,7 +4574,6 @@ static int nf_tables_delobj(struct net *
+       const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+       u8 genmask = nft_genmask_next(net);
+       int family = nfmsg->nfgen_family;
+-      struct nft_af_info *afi;
+       struct nft_table *table;
+       struct nft_object *obj;
+       struct nft_ctx ctx;
+@@ -4727,11 +4583,7 @@ static int nf_tables_delobj(struct net *
+           !nla[NFTA_OBJ_NAME])
+               return -EINVAL;
+-      afi = nf_tables_afinfo_lookup(net, family, true);
+-      if (IS_ERR(afi))
+-              return PTR_ERR(afi);
+-
+-      table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], afi->family,
++      table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], family,
+                                      genmask);
+       if (IS_ERR(table))
+               return PTR_ERR(table);
+@@ -4743,7 +4595,7 @@ static int nf_tables_delobj(struct net *
+       if (obj->use > 0)
+               return -EBUSY;
+-      nft_ctx_init(&ctx, net, skb, nlh, afi->family, table, NULL, nla);
++      nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
+       return nft_delobj(&ctx, obj);
+ }
+@@ -4928,33 +4780,31 @@ err1:
+       return err;
+ }
+-static const struct nf_flowtable_type *
+-__nft_flowtable_type_get(const struct nft_af_info *afi)
++static const struct nf_flowtable_type *__nft_flowtable_type_get(u8 family)
+ {
+       const struct nf_flowtable_type *type;
+       list_for_each_entry(type, &nf_tables_flowtables, list) {
+-              if (afi->family == type->family)
++              if (family == type->family)
+                       return type;
+       }
+       return NULL;
+ }
+-static const struct nf_flowtable_type *
+-nft_flowtable_type_get(const struct nft_af_info *afi)
++static const struct nf_flowtable_type *nft_flowtable_type_get(u8 family)
+ {
+       const struct nf_flowtable_type *type;
+-      type = __nft_flowtable_type_get(afi);
++      type = __nft_flowtable_type_get(family);
+       if (type != NULL && try_module_get(type->owner))
+               return type;
+ #ifdef CONFIG_MODULES
+       if (type == NULL) {
+               nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+-              request_module("nf-flowtable-%u", afi->family);
++              request_module("nf-flowtable-%u", family);
+               nfnl_lock(NFNL_SUBSYS_NFTABLES);
+-              if (__nft_flowtable_type_get(afi))
++              if (__nft_flowtable_type_get(family))
+                       return ERR_PTR(-EAGAIN);
+       }
+ #endif
+@@ -5002,7 +4852,6 @@ static int nf_tables_newflowtable(struct
+       u8 genmask = nft_genmask_next(net);
+       int family = nfmsg->nfgen_family;
+       struct nft_flowtable *flowtable;
+-      struct nft_af_info *afi;
+       struct nft_table *table;
+       struct nft_ctx ctx;
+       int err, i, k;
+@@ -5012,12 +4861,8 @@ static int nf_tables_newflowtable(struct
+           !nla[NFTA_FLOWTABLE_HOOK])
+               return -EINVAL;
+-      afi = nf_tables_afinfo_lookup(net, family, true);
+-      if (IS_ERR(afi))
+-              return PTR_ERR(afi);
+-
+       table = nf_tables_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE],
+-                                     afi->family, genmask);
++                                     family, genmask);
+       if (IS_ERR(table))
+               return PTR_ERR(table);
+@@ -5034,7 +4879,7 @@ static int nf_tables_newflowtable(struct
+               return 0;
+       }
+-      nft_ctx_init(&ctx, net, skb, nlh, afi->family, table, NULL, nla);
++      nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
+       flowtable = kzalloc(sizeof(*flowtable), GFP_KERNEL);
+       if (!flowtable)
+@@ -5047,7 +4892,7 @@ static int nf_tables_newflowtable(struct
+               goto err1;
+       }
+-      type = nft_flowtable_type_get(afi);
++      type = nft_flowtable_type_get(family);
+       if (IS_ERR(type)) {
+               err = PTR_ERR(type);
+               goto err2;
+@@ -5107,16 +4952,11 @@ static int nf_tables_delflowtable(struct
+       u8 genmask = nft_genmask_next(net);
+       int family = nfmsg->nfgen_family;
+       struct nft_flowtable *flowtable;
+-      struct nft_af_info *afi;
+       struct nft_table *table;
+       struct nft_ctx ctx;
+-      afi = nf_tables_afinfo_lookup(net, family, true);
+-      if (IS_ERR(afi))
+-              return PTR_ERR(afi);
+-
+       table = nf_tables_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE],
+-                                     afi->family, genmask);
++                                     family, genmask);
+       if (IS_ERR(table))
+               return PTR_ERR(table);
+@@ -5127,7 +4967,7 @@ static int nf_tables_delflowtable(struct
+       if (flowtable->use > 0)
+               return -EBUSY;
+-      nft_ctx_init(&ctx, net, skb, nlh, afi->family, table, NULL, nla);
++      nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
+       return nft_delflowtable(&ctx, flowtable);
+ }
+@@ -5202,7 +5042,7 @@ static int nf_tables_dump_flowtable(stru
+       cb->seq = net->nft.base_seq;
+       list_for_each_entry_rcu(table, &net->nft.tables, list) {
+-              if (family != NFPROTO_UNSPEC && family != table->afi->family)
++              if (family != NFPROTO_UNSPEC && family != table->family)
+                       continue;
+               list_for_each_entry_rcu(flowtable, &table->flowtables, list) {
+@@ -5221,7 +5061,7 @@ static int nf_tables_dump_flowtable(stru
+                                                         cb->nlh->nlmsg_seq,
+                                                         NFT_MSG_NEWFLOWTABLE,
+                                                         NLM_F_MULTI | NLM_F_APPEND,
+-                                                        table->afi->family, flowtable) < 0)
++                                                        table->family, flowtable) < 0)
+                               goto done;
+                       nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+@@ -5279,7 +5119,6 @@ static int nf_tables_getflowtable(struct
+       u8 genmask = nft_genmask_cur(net);
+       int family = nfmsg->nfgen_family;
+       struct nft_flowtable *flowtable;
+-      const struct nft_af_info *afi;
+       const struct nft_table *table;
+       struct sk_buff *skb2;
+       int err;
+@@ -5305,12 +5144,8 @@ static int nf_tables_getflowtable(struct
+       if (!nla[NFTA_FLOWTABLE_NAME])
+               return -EINVAL;
+-      afi = nf_tables_afinfo_lookup(net, family, false);
+-      if (IS_ERR(afi))
+-              return PTR_ERR(afi);
+-
+       table = nf_tables_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE],
+-                                     afi->family, genmask);
++                                     family, genmask);
+       if (IS_ERR(table))
+               return PTR_ERR(table);
+@@ -6474,7 +6309,7 @@ int __nft_release_basechain(struct nft_c
+ }
+ EXPORT_SYMBOL_GPL(__nft_release_basechain);
+-static void __nft_release_afinfo(struct net *net)
++static void __nft_release_tables(struct net *net)
+ {
+       struct nft_flowtable *flowtable, *nf;
+       struct nft_table *table, *nt;
+@@ -6487,7 +6322,7 @@ static void __nft_release_afinfo(struct
+       };
+       list_for_each_entry_safe(table, nt, &net->nft.tables, list) {
+-              ctx.family = table->afi->family;
++              ctx.family = table->family;
+               list_for_each_entry(chain, &table->chains, list)
+                       nf_tables_unregister_hook(net, table, chain);
+@@ -6539,7 +6374,7 @@ static int __net_init nf_tables_init_net
+ static void __net_exit nf_tables_exit_net(struct net *net)
+ {
+-      __nft_release_afinfo(net);
++      __nft_release_tables(net);
+       WARN_ON_ONCE(!list_empty(&net->nft.tables));
+       WARN_ON_ONCE(!list_empty(&net->nft.commit_list));
+ }
+--- a/net/netfilter/nf_tables_inet.c
++++ b/net/netfilter/nf_tables_inet.c
+@@ -38,11 +38,6 @@ static unsigned int nft_do_chain_inet(vo
+       return nft_do_chain(&pkt, priv);
+ }
+-static struct nft_af_info nft_af_inet __read_mostly = {
+-      .family         = NFPROTO_INET,
+-      .owner          = THIS_MODULE,
+-};
+-
+ static const struct nf_chain_type filter_inet = {
+       .name           = "filter",
+       .type           = NFT_CHAIN_T_DEFAULT,
+@@ -64,26 +59,12 @@ static const struct nf_chain_type filter
+ static int __init nf_tables_inet_init(void)
+ {
+-      int ret;
+-
+-      if (nft_register_afinfo(&nft_af_inet) < 0)
+-              return ret;
+-
+-      ret = nft_register_chain_type(&filter_inet);
+-      if (ret < 0)
+-              goto err_register_chain;
+-
+-      return ret;
+-
+-err_register_chain:
+-      nft_unregister_afinfo(&nft_af_inet);
+-      return ret;
++      return nft_register_chain_type(&filter_inet);
+ }
+ static void __exit nf_tables_inet_exit(void)
+ {
+       nft_unregister_chain_type(&filter_inet);
+-      nft_unregister_afinfo(&nft_af_inet);
+ }
+ module_init(nf_tables_inet_init);
+@@ -91,4 +72,4 @@ module_exit(nf_tables_inet_exit);
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+-MODULE_ALIAS_NFT_FAMILY(1);
++MODULE_ALIAS_NFT_CHAIN(1, "filter");
+--- a/net/netfilter/nf_tables_netdev.c
++++ b/net/netfilter/nf_tables_netdev.c
+@@ -38,11 +38,6 @@ nft_do_chain_netdev(void *priv, struct s
+       return nft_do_chain(&pkt, priv);
+ }
+-static struct nft_af_info nft_af_netdev __read_mostly = {
+-      .family         = NFPROTO_NETDEV,
+-      .owner          = THIS_MODULE,
+-};
+-
+ static const struct nf_chain_type nft_filter_chain_netdev = {
+       .name           = "filter",
+       .type           = NFT_CHAIN_T_DEFAULT,
+@@ -91,10 +86,10 @@ static int nf_tables_netdev_event(struct
+       nfnl_lock(NFNL_SUBSYS_NFTABLES);
+       list_for_each_entry(table, &ctx.net->nft.tables, list) {
+-              if (table->afi->family != NFPROTO_NETDEV)
++              if (table->family != NFPROTO_NETDEV)
+                       continue;
+-              ctx.family = table->afi->family;
++              ctx.family = table->family;
+               ctx.table = table;
+               list_for_each_entry_safe(chain, nr, &table->chains, list) {
+                       if (!nft_is_base_chain(chain))
+@@ -117,12 +112,9 @@ static int __init nf_tables_netdev_init(
+ {
+       int ret;
+-      if (nft_register_afinfo(&nft_af_netdev) < 0)
+-              return ret;
+-
+       ret = nft_register_chain_type(&nft_filter_chain_netdev);
+       if (ret)
+-              goto err_register_chain_type;
++              return ret;
+       ret = register_netdevice_notifier(&nf_tables_netdev_notifier);
+       if (ret)
+@@ -132,8 +124,6 @@ static int __init nf_tables_netdev_init(
+ err_register_netdevice_notifier:
+       nft_unregister_chain_type(&nft_filter_chain_netdev);
+-err_register_chain_type:
+-      nft_unregister_afinfo(&nft_af_netdev);
+       return ret;
+ }
+@@ -142,7 +132,6 @@ static void __exit nf_tables_netdev_exit
+ {
+       unregister_netdevice_notifier(&nf_tables_netdev_notifier);
+       nft_unregister_chain_type(&nft_filter_chain_netdev);
+-      nft_unregister_afinfo(&nft_af_netdev);
+ }
+ module_init(nf_tables_netdev_init);
+@@ -150,4 +139,4 @@ module_exit(nf_tables_netdev_exit);
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+-MODULE_ALIAS_NFT_FAMILY(5); /* NFPROTO_NETDEV */
++MODULE_ALIAS_NFT_CHAIN(5, "filter"); /* NFPROTO_NETDEV */
diff --git a/target/linux/generic/backport-4.14/339-netfilter-nft_flow_offload-wait-for-garbage-collecto.patch b/target/linux/generic/backport-4.14/339-netfilter-nft_flow_offload-wait-for-garbage-collecto.patch
new file mode 100644 (file)
index 0000000..acca41a
--- /dev/null
@@ -0,0 +1,47 @@
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Thu, 1 Feb 2018 18:49:00 +0100
+Subject: [PATCH] netfilter: nft_flow_offload: wait for garbage collector
+ to run after cleanup
+
+If netdevice goes down, then flowtable entries are scheduled to be
+removed. Wait for garbage collector to have a chance to run so it can
+delete them from the hashtable.
+
+The flush call might sleep, so hold the nfnl mutex from
+nft_flow_table_iterate() instead of rcu read side lock. The use of the
+nfnl mutex is also implicitly fixing races between updates via nfnetlink
+and netdevice event.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -4818,13 +4818,13 @@ void nft_flow_table_iterate(struct net *
+       struct nft_flowtable *flowtable;
+       const struct nft_table *table;
+-      rcu_read_lock();
+-      list_for_each_entry_rcu(table, &net->nft.tables, list) {
+-              list_for_each_entry_rcu(flowtable, &table->flowtables, list) {
++      nfnl_lock(NFNL_SUBSYS_NFTABLES);
++      list_for_each_entry(table, &net->nft.tables, list) {
++              list_for_each_entry(flowtable, &table->flowtables, list) {
+                       iter(&flowtable->data, data);
+               }
+       }
+-      rcu_read_unlock();
++      nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+ }
+ EXPORT_SYMBOL_GPL(nft_flow_table_iterate);
+--- a/net/netfilter/nft_flow_offload.c
++++ b/net/netfilter/nft_flow_offload.c
+@@ -208,6 +208,7 @@ static void nft_flow_offload_iterate_cle
+                                            void *data)
+ {
+       nf_flow_table_iterate(flowtable, flow_offload_iterate_cleanup, data);
++      flush_delayed_work(&flowtable->gc_work);
+ }
+ static int flow_offload_netdev_event(struct notifier_block *this,
diff --git a/target/linux/generic/backport-4.14/340-netfilter-nft_flow_offload-no-need-to-flush-entries-.patch b/target/linux/generic/backport-4.14/340-netfilter-nft_flow_offload-no-need-to-flush-entries-.patch
new file mode 100644 (file)
index 0000000..539550d
--- /dev/null
@@ -0,0 +1,29 @@
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Thu, 1 Feb 2018 18:49:01 +0100
+Subject: [PATCH] netfilter: nft_flow_offload: no need to flush entries on
+ module removal
+
+nft_flow_offload module removal does not require to flush existing
+flowtables, it is valid to remove this module while keeping flowtables
+around.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+
+--- a/net/netfilter/nft_flow_offload.c
++++ b/net/netfilter/nft_flow_offload.c
+@@ -247,14 +247,8 @@ register_expr:
+ static void __exit nft_flow_offload_module_exit(void)
+ {
+-      struct net *net;
+-
+       nft_unregister_expr(&nft_flow_offload_type);
+       unregister_netdevice_notifier(&flow_offload_netdev_notifier);
+-      rtnl_lock();
+-      for_each_net(net)
+-              nft_flow_table_iterate(net, nft_flow_offload_iterate_cleanup, NULL);
+-      rtnl_unlock();
+ }
+ module_init(nft_flow_offload_module_init);
diff --git a/target/linux/generic/backport-4.14/341-netfilter-nft_flow_offload-move-flowtable-cleanup-ro.patch b/target/linux/generic/backport-4.14/341-netfilter-nft_flow_offload-move-flowtable-cleanup-ro.patch
new file mode 100644 (file)
index 0000000..9ee0ad5
--- /dev/null
@@ -0,0 +1,97 @@
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Tue, 23 Jan 2018 17:46:09 +0100
+Subject: [PATCH] netfilter: nft_flow_offload: move flowtable cleanup
+ routines to nf_flow_table
+
+Move the flowtable cleanup routines to nf_flow_table and expose the
+nf_flow_table_cleanup() helper function.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+
+--- a/include/net/netfilter/nf_flow_table.h
++++ b/include/net/netfilter/nf_flow_table.h
+@@ -95,6 +95,9 @@ struct flow_offload_tuple_rhash *flow_of
+ int nf_flow_table_iterate(struct nf_flowtable *flow_table,
+                         void (*iter)(struct flow_offload *flow, void *data),
+                         void *data);
++
++void nf_flow_table_cleanup(struct net *net, struct net_device *dev);
++
+ void nf_flow_offload_work_gc(struct work_struct *work);
+ extern const struct rhashtable_params nf_flow_offload_rhash_params;
+--- a/net/netfilter/nf_flow_table.c
++++ b/net/netfilter/nf_flow_table.c
+@@ -4,6 +4,7 @@
+ #include <linux/netfilter.h>
+ #include <linux/rhashtable.h>
+ #include <linux/netdevice.h>
++#include <net/netfilter/nf_tables.h>
+ #include <net/netfilter/nf_flow_table.h>
+ #include <net/netfilter/nf_conntrack.h>
+ #include <net/netfilter/nf_conntrack_core.h>
+@@ -425,5 +426,28 @@ int nf_flow_dnat_port(const struct flow_
+ }
+ EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
++static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
++{
++      struct net_device *dev = data;
++
++      if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex)
++              return;
++
++      flow_offload_dead(flow);
++}
++
++static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
++                                        void *data)
++{
++      nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, data);
++      flush_delayed_work(&flowtable->gc_work);
++}
++
++void nf_flow_table_cleanup(struct net *net, struct net_device *dev)
++{
++      nft_flow_table_iterate(net, nf_flow_table_iterate_cleanup, dev);
++}
++EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
++
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+--- a/net/netfilter/nft_flow_offload.c
++++ b/net/netfilter/nft_flow_offload.c
+@@ -194,23 +194,6 @@ static struct nft_expr_type nft_flow_off
+       .owner          = THIS_MODULE,
+ };
+-static void flow_offload_iterate_cleanup(struct flow_offload *flow, void *data)
+-{
+-      struct net_device *dev = data;
+-
+-      if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex)
+-              return;
+-
+-      flow_offload_dead(flow);
+-}
+-
+-static void nft_flow_offload_iterate_cleanup(struct nf_flowtable *flowtable,
+-                                           void *data)
+-{
+-      nf_flow_table_iterate(flowtable, flow_offload_iterate_cleanup, data);
+-      flush_delayed_work(&flowtable->gc_work);
+-}
+-
+ static int flow_offload_netdev_event(struct notifier_block *this,
+                                    unsigned long event, void *ptr)
+ {
+@@ -219,7 +202,7 @@ static int flow_offload_netdev_event(str
+       if (event != NETDEV_DOWN)
+               return NOTIFY_DONE;
+-      nft_flow_table_iterate(dev_net(dev), nft_flow_offload_iterate_cleanup, dev);
++      nf_flow_table_cleanup(dev_net(dev), dev);
+       return NOTIFY_DONE;
+ }
diff --git a/target/linux/generic/backport-4.14/342-netfilter-nf_tables-fix-flowtable-free.patch b/target/linux/generic/backport-4.14/342-netfilter-nf_tables-fix-flowtable-free.patch
new file mode 100644 (file)
index 0000000..334a814
--- /dev/null
@@ -0,0 +1,140 @@
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Mon, 5 Feb 2018 21:44:50 +0100
+Subject: [PATCH] netfilter: nf_tables: fix flowtable free
+
+Every flow_offload entry is added into the table twice. Because of this,
+rhashtable_free_and_destroy can't be used, since it would call kfree for
+each flow_offload object twice.
+
+This patch adds a call to nf_flow_table_iterate_cleanup() to schedule
+removal of entries, then there is an explicitly invocation of the
+garbage collector to clean up resources.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+
+--- a/include/net/netfilter/nf_flow_table.h
++++ b/include/net/netfilter/nf_flow_table.h
+@@ -14,6 +14,7 @@ struct nf_flowtable_type {
+       struct list_head                list;
+       int                             family;
+       void                            (*gc)(struct work_struct *work);
++      void                            (*free)(struct nf_flowtable *ft);
+       const struct rhashtable_params  *params;
+       nf_hookfn                       *hook;
+       struct module                   *owner;
+@@ -98,6 +99,7 @@ int nf_flow_table_iterate(struct nf_flow
+ void nf_flow_table_cleanup(struct net *net, struct net_device *dev);
++void nf_flow_table_free(struct nf_flowtable *flow_table);
+ void nf_flow_offload_work_gc(struct work_struct *work);
+ extern const struct rhashtable_params nf_flow_offload_rhash_params;
+--- a/net/ipv4/netfilter/nf_flow_table_ipv4.c
++++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c
+@@ -260,6 +260,7 @@ static struct nf_flowtable_type flowtabl
+       .family         = NFPROTO_IPV4,
+       .params         = &nf_flow_offload_rhash_params,
+       .gc             = nf_flow_offload_work_gc,
++      .free           = nf_flow_table_free,
+       .hook           = nf_flow_offload_ip_hook,
+       .owner          = THIS_MODULE,
+ };
+--- a/net/ipv6/netfilter/nf_flow_table_ipv6.c
++++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c
+@@ -254,6 +254,7 @@ static struct nf_flowtable_type flowtabl
+       .family         = NFPROTO_IPV6,
+       .params         = &nf_flow_offload_rhash_params,
+       .gc             = nf_flow_offload_work_gc,
++      .free           = nf_flow_table_free,
+       .hook           = nf_flow_offload_ipv6_hook,
+       .owner          = THIS_MODULE,
+ };
+--- a/net/netfilter/nf_flow_table.c
++++ b/net/netfilter/nf_flow_table.c
+@@ -232,19 +232,16 @@ static inline bool nf_flow_is_dying(cons
+       return flow->flags & FLOW_OFFLOAD_DYING;
+ }
+-void nf_flow_offload_work_gc(struct work_struct *work)
++static int nf_flow_offload_gc_step(struct nf_flowtable *flow_table)
+ {
+       struct flow_offload_tuple_rhash *tuplehash;
+-      struct nf_flowtable *flow_table;
+       struct rhashtable_iter hti;
+       struct flow_offload *flow;
+       int err;
+-      flow_table = container_of(work, struct nf_flowtable, gc_work.work);
+-
+       err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
+       if (err)
+-              goto schedule;
++              return 0;
+       rhashtable_walk_start(&hti);
+@@ -270,7 +267,16 @@ void nf_flow_offload_work_gc(struct work
+ out:
+       rhashtable_walk_stop(&hti);
+       rhashtable_walk_exit(&hti);
+-schedule:
++
++      return 1;
++}
++
++void nf_flow_offload_work_gc(struct work_struct *work)
++{
++      struct nf_flowtable *flow_table;
++
++      flow_table = container_of(work, struct nf_flowtable, gc_work.work);
++      nf_flow_offload_gc_step(flow_table);
+       queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
+ }
+ EXPORT_SYMBOL_GPL(nf_flow_offload_work_gc);
+@@ -449,5 +455,12 @@ void nf_flow_table_cleanup(struct net *n
+ }
+ EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
++void nf_flow_table_free(struct nf_flowtable *flow_table)
++{
++      nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
++      WARN_ON(!nf_flow_offload_gc_step(flow_table));
++}
++EXPORT_SYMBOL_GPL(nf_flow_table_free);
++
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+--- a/net/netfilter/nf_flow_table_inet.c
++++ b/net/netfilter/nf_flow_table_inet.c
+@@ -24,6 +24,7 @@ static struct nf_flowtable_type flowtabl
+       .family         = NFPROTO_INET,
+       .params         = &nf_flow_offload_rhash_params,
+       .gc             = nf_flow_offload_work_gc,
++      .free           = nf_flow_table_free,
+       .hook           = nf_flow_offload_inet_hook,
+       .owner          = THIS_MODULE,
+ };
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -5201,17 +5201,12 @@ err:
+       nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
+ }
+-static void nft_flowtable_destroy(void *ptr, void *arg)
+-{
+-      kfree(ptr);
+-}
+-
+ static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable)
+ {
+       cancel_delayed_work_sync(&flowtable->data.gc_work);
+       kfree(flowtable->name);
+-      rhashtable_free_and_destroy(&flowtable->data.rhashtable,
+-                                  nft_flowtable_destroy, NULL);
++      flowtable->data.type->free(&flowtable->data);
++      rhashtable_destroy(&flowtable->data.rhashtable);
+       module_put(flowtable->data.type->owner);
+ }
diff --git a/target/linux/generic/backport-4.14/343-netfilter-nft_flow_offload-handle-netdevice-events-f.patch b/target/linux/generic/backport-4.14/343-netfilter-nft_flow_offload-handle-netdevice-events-f.patch
new file mode 100644 (file)
index 0000000..7f35cd7
--- /dev/null
@@ -0,0 +1,96 @@
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Thu, 25 Jan 2018 12:58:55 +0100
+Subject: [PATCH] netfilter: nft_flow_offload: handle netdevice events from
+ nf_flow_table
+
+Move the code that deals with device events to the core.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+
+--- a/net/netfilter/nf_flow_table.c
++++ b/net/netfilter/nf_flow_table.c
+@@ -462,5 +462,35 @@ void nf_flow_table_free(struct nf_flowta
+ }
+ EXPORT_SYMBOL_GPL(nf_flow_table_free);
++static int nf_flow_table_netdev_event(struct notifier_block *this,
++                                    unsigned long event, void *ptr)
++{
++      struct net_device *dev = netdev_notifier_info_to_dev(ptr);
++
++      if (event != NETDEV_DOWN)
++              return NOTIFY_DONE;
++
++      nf_flow_table_cleanup(dev_net(dev), dev);
++
++      return NOTIFY_DONE;
++}
++
++static struct notifier_block flow_offload_netdev_notifier = {
++      .notifier_call  = nf_flow_table_netdev_event,
++};
++
++static int __init nf_flow_table_module_init(void)
++{
++      return register_netdevice_notifier(&flow_offload_netdev_notifier);
++}
++
++static void __exit nf_flow_table_module_exit(void)
++{
++      unregister_netdevice_notifier(&flow_offload_netdev_notifier);
++}
++
++module_init(nf_flow_table_module_init);
++module_exit(nf_flow_table_module_exit);
++
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+--- a/net/netfilter/nft_flow_offload.c
++++ b/net/netfilter/nft_flow_offload.c
+@@ -194,44 +194,14 @@ static struct nft_expr_type nft_flow_off
+       .owner          = THIS_MODULE,
+ };
+-static int flow_offload_netdev_event(struct notifier_block *this,
+-                                   unsigned long event, void *ptr)
+-{
+-      struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+-
+-      if (event != NETDEV_DOWN)
+-              return NOTIFY_DONE;
+-
+-      nf_flow_table_cleanup(dev_net(dev), dev);
+-
+-      return NOTIFY_DONE;
+-}
+-
+-static struct notifier_block flow_offload_netdev_notifier = {
+-      .notifier_call  = flow_offload_netdev_event,
+-};
+-
+ static int __init nft_flow_offload_module_init(void)
+ {
+-      int err;
+-
+-      register_netdevice_notifier(&flow_offload_netdev_notifier);
+-
+-      err = nft_register_expr(&nft_flow_offload_type);
+-      if (err < 0)
+-              goto register_expr;
+-
+-      return 0;
+-
+-register_expr:
+-      unregister_netdevice_notifier(&flow_offload_netdev_notifier);
+-      return err;
++      return nft_register_expr(&nft_flow_offload_type);
+ }
+ static void __exit nft_flow_offload_module_exit(void)
+ {
+       nft_unregister_expr(&nft_flow_offload_type);
+-      unregister_netdevice_notifier(&flow_offload_netdev_notifier);
+ }
+ module_init(nft_flow_offload_module_init);
diff --git a/target/linux/generic/backport-4.14/344-netfilter-nf_tables-allocate-handle-and-delete-objec.patch b/target/linux/generic/backport-4.14/344-netfilter-nf_tables-allocate-handle-and-delete-objec.patch
new file mode 100644 (file)
index 0000000..97778a9
--- /dev/null
@@ -0,0 +1,468 @@
+From: Harsha Sharma <harshasharmaiitr@gmail.com>
+Date: Wed, 27 Dec 2017 00:59:00 +0530
+Subject: [PATCH] netfilter: nf_tables: allocate handle and delete objects via
+ handle
+
+This patch allows deletion of objects via unique handle which can be
+listed via '-a' option.
+
+Signed-off-by: Harsha Sharma <harshasharmaiitr@gmail.com>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+
+--- a/include/net/netfilter/nf_tables.h
++++ b/include/net/netfilter/nf_tables.h
+@@ -369,6 +369,7 @@ void nft_unregister_set(struct nft_set_t
+  *    @list: table set list node
+  *    @bindings: list of set bindings
+  *    @name: name of the set
++ *    @handle: unique handle of the set
+  *    @ktype: key type (numeric type defined by userspace, not used in the kernel)
+  *    @dtype: data type (verdict or numeric type defined by userspace)
+  *    @objtype: object type (see NFT_OBJECT_* definitions)
+@@ -391,6 +392,7 @@ struct nft_set {
+       struct list_head                list;
+       struct list_head                bindings;
+       char                            *name;
++      u64                             handle;
+       u32                             ktype;
+       u32                             dtype;
+       u32                             objtype;
+@@ -936,6 +938,7 @@ unsigned int nft_do_chain(struct nft_pkt
+  *    @objects: stateful objects in the table
+  *    @flowtables: flow tables in the table
+  *    @hgenerator: handle generator state
++ *    @handle: table handle
+  *    @use: number of chain references to this table
+  *    @flags: table flag (see enum nft_table_flags)
+  *    @genmask: generation mask
+@@ -949,6 +952,7 @@ struct nft_table {
+       struct list_head                objects;
+       struct list_head                flowtables;
+       u64                             hgenerator;
++      u64                             handle;
+       u32                             use;
+       u16                             family:6,
+                                       flags:8,
+@@ -973,9 +977,9 @@ int nft_verdict_dump(struct sk_buff *skb
+  *    @name: name of this stateful object
+  *    @genmask: generation mask
+  *    @use: number of references to this stateful object
+- *    @data: object data, layout depends on type
++ *    @handle: unique object handle
+  *    @ops: object operations
+- *    @data: pointer to object data
++ *    @data: object data, layout depends on type
+  */
+ struct nft_object {
+       struct list_head                list;
+@@ -983,6 +987,7 @@ struct nft_object {
+       struct nft_table                *table;
+       u32                             genmask:2,
+                                       use:30;
++      u64                             handle;
+       /* runtime data below here */
+       const struct nft_object_ops     *ops ____cacheline_aligned;
+       unsigned char                   data[]
+@@ -1064,6 +1069,7 @@ void nft_unregister_obj(struct nft_objec
+  *    @ops_len: number of hooks in array
+  *    @genmask: generation mask
+  *    @use: number of references to this flow table
++ *    @handle: unique object handle
+  *    @data: rhashtable and garbage collector
+  *    @ops: array of hooks
+  */
+@@ -1076,6 +1082,7 @@ struct nft_flowtable {
+       int                             ops_len;
+       u32                             genmask:2,
+                                       use:30;
++      u64                             handle;
+       /* runtime data below here */
+       struct nf_hook_ops              *ops ____cacheline_aligned;
+       struct nf_flowtable             data;
+--- a/include/uapi/linux/netfilter/nf_tables.h
++++ b/include/uapi/linux/netfilter/nf_tables.h
+@@ -174,6 +174,8 @@ enum nft_table_attributes {
+       NFTA_TABLE_NAME,
+       NFTA_TABLE_FLAGS,
+       NFTA_TABLE_USE,
++      NFTA_TABLE_HANDLE,
++      NFTA_TABLE_PAD,
+       __NFTA_TABLE_MAX
+ };
+ #define NFTA_TABLE_MAX                (__NFTA_TABLE_MAX - 1)
+@@ -317,6 +319,7 @@ enum nft_set_desc_attributes {
+  * @NFTA_SET_GC_INTERVAL: garbage collection interval (NLA_U32)
+  * @NFTA_SET_USERDATA: user data (NLA_BINARY)
+  * @NFTA_SET_OBJ_TYPE: stateful object type (NLA_U32: NFT_OBJECT_*)
++ * @NFTA_SET_HANDLE: set handle (NLA_U64)
+  */
+ enum nft_set_attributes {
+       NFTA_SET_UNSPEC,
+@@ -335,6 +338,7 @@ enum nft_set_attributes {
+       NFTA_SET_USERDATA,
+       NFTA_SET_PAD,
+       NFTA_SET_OBJ_TYPE,
++      NFTA_SET_HANDLE,
+       __NFTA_SET_MAX
+ };
+ #define NFTA_SET_MAX          (__NFTA_SET_MAX - 1)
+@@ -1314,6 +1318,7 @@ enum nft_ct_helper_attributes {
+  * @NFTA_OBJ_TYPE: stateful object type (NLA_U32)
+  * @NFTA_OBJ_DATA: stateful object data (NLA_NESTED)
+  * @NFTA_OBJ_USE: number of references to this expression (NLA_U32)
++ * @NFTA_OBJ_HANDLE: object handle (NLA_U64)
+  */
+ enum nft_object_attributes {
+       NFTA_OBJ_UNSPEC,
+@@ -1322,6 +1327,8 @@ enum nft_object_attributes {
+       NFTA_OBJ_TYPE,
+       NFTA_OBJ_DATA,
+       NFTA_OBJ_USE,
++      NFTA_OBJ_HANDLE,
++      NFTA_OBJ_PAD,
+       __NFTA_OBJ_MAX
+ };
+ #define NFTA_OBJ_MAX          (__NFTA_OBJ_MAX - 1)
+@@ -1333,6 +1340,7 @@ enum nft_object_attributes {
+  * @NFTA_FLOWTABLE_NAME: name of this flow table (NLA_STRING)
+  * @NFTA_FLOWTABLE_HOOK: netfilter hook configuration(NLA_U32)
+  * @NFTA_FLOWTABLE_USE: number of references to this flow table (NLA_U32)
++ * @NFTA_FLOWTABLE_HANDLE: object handle (NLA_U64)
+  */
+ enum nft_flowtable_attributes {
+       NFTA_FLOWTABLE_UNSPEC,
+@@ -1340,6 +1348,8 @@ enum nft_flowtable_attributes {
+       NFTA_FLOWTABLE_NAME,
+       NFTA_FLOWTABLE_HOOK,
+       NFTA_FLOWTABLE_USE,
++      NFTA_FLOWTABLE_HANDLE,
++      NFTA_FLOWTABLE_PAD,
+       __NFTA_FLOWTABLE_MAX
+ };
+ #define NFTA_FLOWTABLE_MAX    (__NFTA_FLOWTABLE_MAX - 1)
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -26,6 +26,7 @@
+ static LIST_HEAD(nf_tables_expressions);
+ static LIST_HEAD(nf_tables_objects);
+ static LIST_HEAD(nf_tables_flowtables);
++static u64 table_handle;
+ static void nft_ctx_init(struct nft_ctx *ctx,
+                        struct net *net,
+@@ -332,6 +333,20 @@ static struct nft_table *nft_table_looku
+       return NULL;
+ }
++static struct nft_table *nft_table_lookup_byhandle(const struct net *net,
++                                                 const struct nlattr *nla,
++                                                 u8 genmask)
++{
++      struct nft_table *table;
++
++      list_for_each_entry(table, &net->nft.tables, list) {
++              if (be64_to_cpu(nla_get_be64(nla)) == table->handle &&
++                  nft_active_genmask(table, genmask))
++                      return table;
++      }
++      return NULL;
++}
++
+ static struct nft_table *nf_tables_table_lookup(const struct net *net,
+                                               const struct nlattr *nla,
+                                               u8 family, u8 genmask)
+@@ -348,6 +363,22 @@ static struct nft_table *nf_tables_table
+       return ERR_PTR(-ENOENT);
+ }
++static struct nft_table *nf_tables_table_lookup_byhandle(const struct net *net,
++                                                       const struct nlattr *nla,
++                                                       u8 genmask)
++{
++      struct nft_table *table;
++
++      if (nla == NULL)
++              return ERR_PTR(-EINVAL);
++
++      table = nft_table_lookup_byhandle(net, nla, genmask);
++      if (table != NULL)
++              return table;
++
++      return ERR_PTR(-ENOENT);
++}
++
+ static inline u64 nf_tables_alloc_handle(struct nft_table *table)
+ {
+       return ++table->hgenerator;
+@@ -394,6 +425,7 @@ static const struct nla_policy nft_table
+       [NFTA_TABLE_NAME]       = { .type = NLA_STRING,
+                                   .len = NFT_TABLE_MAXNAMELEN - 1 },
+       [NFTA_TABLE_FLAGS]      = { .type = NLA_U32 },
++      [NFTA_TABLE_HANDLE]     = { .type = NLA_U64 },
+ };
+ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
+@@ -415,7 +447,9 @@ static int nf_tables_fill_table_info(str
+       if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) ||
+           nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)) ||
+-          nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use)))
++          nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use)) ||
++          nla_put_be64(skb, NFTA_TABLE_HANDLE, cpu_to_be64(table->handle),
++                       NFTA_TABLE_PAD))
+               goto nla_put_failure;
+       nlmsg_end(skb, nlh);
+@@ -674,6 +708,7 @@ static int nf_tables_newtable(struct net
+       INIT_LIST_HEAD(&table->flowtables);
+       table->family = family;
+       table->flags = flags;
++      table->handle = ++table_handle;
+       nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
+       err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE);
+@@ -791,11 +826,18 @@ static int nf_tables_deltable(struct net
+       struct nft_ctx ctx;
+       nft_ctx_init(&ctx, net, skb, nlh, 0, NULL, NULL, nla);
+-      if (family == AF_UNSPEC || nla[NFTA_TABLE_NAME] == NULL)
++      if (family == AF_UNSPEC ||
++          (!nla[NFTA_TABLE_NAME] && !nla[NFTA_TABLE_HANDLE]))
+               return nft_flush(&ctx, family);
+-      table = nf_tables_table_lookup(net, nla[NFTA_TABLE_NAME], family,
+-                                     genmask);
++      if (nla[NFTA_TABLE_HANDLE])
++              table = nf_tables_table_lookup_byhandle(net,
++                                                      nla[NFTA_TABLE_HANDLE],
++                                                      genmask);
++      else
++              table = nf_tables_table_lookup(net, nla[NFTA_TABLE_NAME],
++                                             family, genmask);
++
+       if (IS_ERR(table))
+               return PTR_ERR(table);
+@@ -1534,6 +1576,7 @@ static int nf_tables_delchain(struct net
+       struct nft_rule *rule;
+       int family = nfmsg->nfgen_family;
+       struct nft_ctx ctx;
++      u64 handle;
+       u32 use;
+       int err;
+@@ -1542,7 +1585,12 @@ static int nf_tables_delchain(struct net
+       if (IS_ERR(table))
+               return PTR_ERR(table);
+-      chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
++      if (nla[NFTA_CHAIN_HANDLE]) {
++              handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE]));
++              chain = nf_tables_chain_lookup_byhandle(table, handle, genmask);
++      } else {
++              chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
++      }
+       if (IS_ERR(chain))
+               return PTR_ERR(chain);
+@@ -2503,6 +2551,7 @@ static const struct nla_policy nft_set_p
+       [NFTA_SET_USERDATA]             = { .type = NLA_BINARY,
+                                           .len  = NFT_USERDATA_MAXLEN },
+       [NFTA_SET_OBJ_TYPE]             = { .type = NLA_U32 },
++      [NFTA_SET_HANDLE]               = { .type = NLA_U64 },
+ };
+ static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = {
+@@ -2546,6 +2595,22 @@ static struct nft_set *nf_tables_set_loo
+       return ERR_PTR(-ENOENT);
+ }
++static struct nft_set *nf_tables_set_lookup_byhandle(const struct nft_table *table,
++                                                   const struct nlattr *nla, u8 genmask)
++{
++      struct nft_set *set;
++
++      if (nla == NULL)
++              return ERR_PTR(-EINVAL);
++
++      list_for_each_entry(set, &table->sets, list) {
++              if (be64_to_cpu(nla_get_be64(nla)) == set->handle &&
++                  nft_active_genmask(set, genmask))
++                      return set;
++      }
++      return ERR_PTR(-ENOENT);
++}
++
+ static struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
+                                                const struct nlattr *nla,
+                                                u8 genmask)
+@@ -2661,6 +2726,9 @@ static int nf_tables_fill_set(struct sk_
+               goto nla_put_failure;
+       if (nla_put_string(skb, NFTA_SET_NAME, set->name))
+               goto nla_put_failure;
++      if (nla_put_be64(skb, NFTA_SET_HANDLE, cpu_to_be64(set->handle),
++                       NFTA_SET_PAD))
++              goto nla_put_failure;
+       if (set->flags != 0)
+               if (nla_put_be32(skb, NFTA_SET_FLAGS, htonl(set->flags)))
+                       goto nla_put_failure;
+@@ -3069,6 +3137,7 @@ static int nf_tables_newset(struct net *
+       set->udata  = udata;
+       set->timeout = timeout;
+       set->gc_int = gc_int;
++      set->handle = nf_tables_alloc_handle(table);
+       err = ops->init(set, &desc, nla);
+       if (err < 0)
+@@ -3126,7 +3195,10 @@ static int nf_tables_delset(struct net *
+       if (err < 0)
+               return err;
+-      set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask);
++      if (nla[NFTA_SET_HANDLE])
++              set = nf_tables_set_lookup_byhandle(ctx.table, nla[NFTA_SET_HANDLE], genmask);
++      else
++              set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask);
+       if (IS_ERR(set))
+               return PTR_ERR(set);
+@@ -4182,6 +4254,21 @@ struct nft_object *nf_tables_obj_lookup(
+ }
+ EXPORT_SYMBOL_GPL(nf_tables_obj_lookup);
++struct nft_object *nf_tables_obj_lookup_byhandle(const struct nft_table *table,
++                                               const struct nlattr *nla,
++                                               u32 objtype, u8 genmask)
++{
++      struct nft_object *obj;
++
++      list_for_each_entry(obj, &table->objects, list) {
++              if (be64_to_cpu(nla_get_be64(nla)) == obj->handle &&
++                  objtype == obj->ops->type->type &&
++                  nft_active_genmask(obj, genmask))
++                      return obj;
++      }
++      return ERR_PTR(-ENOENT);
++}
++
+ static const struct nla_policy nft_obj_policy[NFTA_OBJ_MAX + 1] = {
+       [NFTA_OBJ_TABLE]        = { .type = NLA_STRING,
+                                   .len = NFT_TABLE_MAXNAMELEN - 1 },
+@@ -4189,6 +4276,7 @@ static const struct nla_policy nft_obj_p
+                                   .len = NFT_OBJ_MAXNAMELEN - 1 },
+       [NFTA_OBJ_TYPE]         = { .type = NLA_U32 },
+       [NFTA_OBJ_DATA]         = { .type = NLA_NESTED },
++      [NFTA_OBJ_HANDLE]       = { .type = NLA_U64},
+ };
+ static struct nft_object *nft_obj_init(const struct nft_ctx *ctx,
+@@ -4336,6 +4424,8 @@ static int nf_tables_newobj(struct net *
+               goto err1;
+       }
+       obj->table = table;
++      obj->handle = nf_tables_alloc_handle(table);
++
+       obj->name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL);
+       if (!obj->name) {
+               err = -ENOMEM;
+@@ -4382,7 +4472,9 @@ static int nf_tables_fill_obj_info(struc
+           nla_put_string(skb, NFTA_OBJ_NAME, obj->name) ||
+           nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->ops->type->type)) ||
+           nla_put_be32(skb, NFTA_OBJ_USE, htonl(obj->use)) ||
+-          nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset))
++          nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset) ||
++          nla_put_be64(skb, NFTA_OBJ_HANDLE, cpu_to_be64(obj->handle),
++                       NFTA_OBJ_PAD))
+               goto nla_put_failure;
+       nlmsg_end(skb, nlh);
+@@ -4580,7 +4672,7 @@ static int nf_tables_delobj(struct net *
+       u32 objtype;
+       if (!nla[NFTA_OBJ_TYPE] ||
+-          !nla[NFTA_OBJ_NAME])
++          (!nla[NFTA_OBJ_NAME] && !nla[NFTA_OBJ_HANDLE]))
+               return -EINVAL;
+       table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], family,
+@@ -4589,7 +4681,12 @@ static int nf_tables_delobj(struct net *
+               return PTR_ERR(table);
+       objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
+-      obj = nf_tables_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask);
++      if (nla[NFTA_OBJ_HANDLE])
++              obj = nf_tables_obj_lookup_byhandle(table, nla[NFTA_OBJ_HANDLE],
++                                                  objtype, genmask);
++      else
++              obj = nf_tables_obj_lookup(table, nla[NFTA_OBJ_NAME],
++                                         objtype, genmask);
+       if (IS_ERR(obj))
+               return PTR_ERR(obj);
+       if (obj->use > 0)
+@@ -4661,6 +4758,7 @@ static const struct nla_policy nft_flowt
+       [NFTA_FLOWTABLE_NAME]           = { .type = NLA_STRING,
+                                           .len = NFT_NAME_MAXLEN - 1 },
+       [NFTA_FLOWTABLE_HOOK]           = { .type = NLA_NESTED },
++      [NFTA_FLOWTABLE_HANDLE]         = { .type = NLA_U64 },
+ };
+ struct nft_flowtable *nf_tables_flowtable_lookup(const struct nft_table *table,
+@@ -4678,6 +4776,20 @@ struct nft_flowtable *nf_tables_flowtabl
+ }
+ EXPORT_SYMBOL_GPL(nf_tables_flowtable_lookup);
++struct nft_flowtable *
++nf_tables_flowtable_lookup_byhandle(const struct nft_table *table,
++                                  const struct nlattr *nla, u8 genmask)
++{
++       struct nft_flowtable *flowtable;
++
++       list_for_each_entry(flowtable, &table->flowtables, list) {
++               if (be64_to_cpu(nla_get_be64(nla)) == flowtable->handle &&
++                   nft_active_genmask(flowtable, genmask))
++                       return flowtable;
++       }
++       return ERR_PTR(-ENOENT);
++}
++
+ #define NFT_FLOWTABLE_DEVICE_MAX      8
+ static int nf_tables_parse_devices(const struct nft_ctx *ctx,
+@@ -4886,6 +4998,8 @@ static int nf_tables_newflowtable(struct
+               return -ENOMEM;
+       flowtable->table = table;
++      flowtable->handle = nf_tables_alloc_handle(table);
++
+       flowtable->name = nla_strdup(nla[NFTA_FLOWTABLE_NAME], GFP_KERNEL);
+       if (!flowtable->name) {
+               err = -ENOMEM;
+@@ -4960,8 +5074,14 @@ static int nf_tables_delflowtable(struct
+       if (IS_ERR(table))
+               return PTR_ERR(table);
+-      flowtable = nf_tables_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
+-                                             genmask);
++      if (nla[NFTA_FLOWTABLE_HANDLE])
++              flowtable = nf_tables_flowtable_lookup_byhandle(table,
++                                                              nla[NFTA_FLOWTABLE_HANDLE],
++                                                              genmask);
++      else
++              flowtable = nf_tables_flowtable_lookup(table,
++                                                     nla[NFTA_FLOWTABLE_NAME],
++                                                     genmask);
+       if (IS_ERR(flowtable))
+                 return PTR_ERR(flowtable);
+       if (flowtable->use > 0)
+@@ -4994,7 +5114,9 @@ static int nf_tables_fill_flowtable_info
+       if (nla_put_string(skb, NFTA_FLOWTABLE_TABLE, flowtable->table->name) ||
+           nla_put_string(skb, NFTA_FLOWTABLE_NAME, flowtable->name) ||
+-          nla_put_be32(skb, NFTA_FLOWTABLE_USE, htonl(flowtable->use)))
++          nla_put_be32(skb, NFTA_FLOWTABLE_USE, htonl(flowtable->use)) ||
++          nla_put_be64(skb, NFTA_FLOWTABLE_HANDLE, cpu_to_be64(flowtable->handle),
++                       NFTA_FLOWTABLE_PAD))
+               goto nla_put_failure;
+       nest = nla_nest_start(skb, NFTA_FLOWTABLE_HOOK);
diff --git a/target/linux/generic/backport-4.14/345-netfilter-nf_flow_offload-fix-use-after-free-and-a-r.patch b/target/linux/generic/backport-4.14/345-netfilter-nf_flow_offload-fix-use-after-free-and-a-r.patch
new file mode 100644 (file)
index 0000000..331f22d
--- /dev/null
@@ -0,0 +1,95 @@
+From: Felix Fietkau <nbd@nbd.name>
+Date: Wed, 7 Feb 2018 09:23:25 +0100
+Subject: [PATCH] netfilter: nf_flow_offload: fix use-after-free and a resource
+ leak
+
+flow_offload_del frees the flow, so all associated resource must be
+freed before.
+
+Since the ct entry in struct flow_offload_entry was allocated by
+flow_offload_alloc, it should be freed by flow_offload_free to take care
+of the error handling path when flow_offload_add fails.
+
+While at it, make flow_offload_del static, since it should never be
+called directly, only from the gc step
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/include/net/netfilter/nf_flow_table.h
++++ b/include/net/netfilter/nf_flow_table.h
+@@ -90,7 +90,6 @@ struct flow_offload *flow_offload_alloc(
+ void flow_offload_free(struct flow_offload *flow);
+ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow);
+-void flow_offload_del(struct nf_flowtable *flow_table, struct flow_offload *flow);
+ struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table,
+                                                    struct flow_offload_tuple *tuple);
+ int nf_flow_table_iterate(struct nf_flowtable *flow_table,
+--- a/net/netfilter/nf_flow_table.c
++++ b/net/netfilter/nf_flow_table.c
+@@ -125,7 +125,9 @@ void flow_offload_free(struct flow_offlo
+       dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
+       dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
+       e = container_of(flow, struct flow_offload_entry, flow);
+-      kfree(e);
++      nf_ct_delete(e->ct, 0, 0);
++      nf_ct_put(e->ct);
++      kfree_rcu(e, rcu_head);
+ }
+ EXPORT_SYMBOL_GPL(flow_offload_free);
+@@ -149,11 +151,9 @@ int flow_offload_add(struct nf_flowtable
+ }
+ EXPORT_SYMBOL_GPL(flow_offload_add);
+-void flow_offload_del(struct nf_flowtable *flow_table,
+-                    struct flow_offload *flow)
++static void flow_offload_del(struct nf_flowtable *flow_table,
++                           struct flow_offload *flow)
+ {
+-      struct flow_offload_entry *e;
+-
+       rhashtable_remove_fast(&flow_table->rhashtable,
+                              &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
+                              *flow_table->type->params);
+@@ -161,10 +161,8 @@ void flow_offload_del(struct nf_flowtabl
+                              &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
+                              *flow_table->type->params);
+-      e = container_of(flow, struct flow_offload_entry, flow);
+-      kfree_rcu(e, rcu_head);
++      flow_offload_free(flow);
+ }
+-EXPORT_SYMBOL_GPL(flow_offload_del);
+ struct flow_offload_tuple_rhash *
+ flow_offload_lookup(struct nf_flowtable *flow_table,
+@@ -175,15 +173,6 @@ flow_offload_lookup(struct nf_flowtable
+ }
+ EXPORT_SYMBOL_GPL(flow_offload_lookup);
+-static void nf_flow_release_ct(const struct flow_offload *flow)
+-{
+-      struct flow_offload_entry *e;
+-
+-      e = container_of(flow, struct flow_offload_entry, flow);
+-      nf_ct_delete(e->ct, 0, 0);
+-      nf_ct_put(e->ct);
+-}
+-
+ int nf_flow_table_iterate(struct nf_flowtable *flow_table,
+                         void (*iter)(struct flow_offload *flow, void *data),
+                         void *data)
+@@ -259,10 +248,8 @@ static int nf_flow_offload_gc_step(struc
+               flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
+               if (nf_flow_has_expired(flow) ||
+-                  nf_flow_is_dying(flow)) {
++                  nf_flow_is_dying(flow))
+                       flow_offload_del(flow_table, flow);
+-                      nf_flow_release_ct(flow);
+-              }
+       }
+ out:
+       rhashtable_walk_stop(&hti);
diff --git a/target/linux/generic/backport-4.14/346-netfilter-flowtable-infrastructure-depends-on-NETFIL.patch b/target/linux/generic/backport-4.14/346-netfilter-flowtable-infrastructure-depends-on-NETFIL.patch
new file mode 100644 (file)
index 0000000..5267fd2
--- /dev/null
@@ -0,0 +1,73 @@
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Wed, 31 Jan 2018 18:13:39 +0100
+Subject: [PATCH] netfilter: flowtable infrastructure depends on
+ NETFILTER_INGRESS
+
+config NF_FLOW_TABLE depends on NETFILTER_INGRESS. If users forget to
+enable this toggle, flowtable registration fails with EOPNOTSUPP.
+
+Moreover, turn 'select NF_FLOW_TABLE' in every flowtable family flavour
+into dependency instead, otherwise this new dependency on
+NETFILTER_INGRESS causes a warning. This also allows us to remove the
+explicit dependency between family flowtables <-> NF_TABLES and
+NF_CONNTRACK, given they depend on the NF_FLOW_TABLE core that already
+expresses the general dependencies for this new infrastructure.
+
+Moreover, NF_FLOW_TABLE_INET depends on NF_FLOW_TABLE_IPV4 and
+NF_FLOWTABLE_IPV6, which already depends on NF_FLOW_TABLE. So we can get
+rid of direct dependency with NF_FLOW_TABLE.
+
+In general, let's avoid 'select', it just makes things more complicated.
+
+Reported-by: John Crispin <john@phrozen.org>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+
+--- a/net/ipv4/netfilter/Kconfig
++++ b/net/ipv4/netfilter/Kconfig
+@@ -79,8 +79,7 @@ endif # NF_TABLES
+ config NF_FLOW_TABLE_IPV4
+       tristate "Netfilter flow table IPv4 module"
+-      depends on NF_CONNTRACK && NF_TABLES
+-      select NF_FLOW_TABLE
++      depends on NF_FLOW_TABLE
+       help
+         This option adds the flow table IPv4 support.
+--- a/net/ipv6/netfilter/Kconfig
++++ b/net/ipv6/netfilter/Kconfig
+@@ -73,8 +73,7 @@ endif # NF_TABLES
+ config NF_FLOW_TABLE_IPV6
+       tristate "Netfilter flow table IPv6 module"
+-      depends on NF_CONNTRACK && NF_TABLES
+-      select NF_FLOW_TABLE
++      depends on NF_FLOW_TABLE
+       help
+         This option adds the flow table IPv6 support.
+--- a/net/netfilter/Kconfig
++++ b/net/netfilter/Kconfig
+@@ -670,8 +670,8 @@ endif # NF_TABLES
+ config NF_FLOW_TABLE_INET
+       tristate "Netfilter flow table mixed IPv4/IPv6 module"
+-      depends on NF_FLOW_TABLE_IPV4 && NF_FLOW_TABLE_IPV6
+-      select NF_FLOW_TABLE
++      depends on NF_FLOW_TABLE_IPV4
++      depends on NF_FLOW_TABLE_IPV6
+       help
+           This option adds the flow table mixed IPv4/IPv6 support.
+@@ -679,7 +679,9 @@ config NF_FLOW_TABLE_INET
+ config NF_FLOW_TABLE
+       tristate "Netfilter flow table module"
+-      depends on NF_CONNTRACK && NF_TABLES
++      depends on NETFILTER_INGRESS
++      depends on NF_CONNTRACK
++      depends on NF_TABLES
+       help
+         This option adds the flow table core infrastructure.
diff --git a/target/linux/generic/backport-4.14/347-netfilter-remove-duplicated-include.patch b/target/linux/generic/backport-4.14/347-netfilter-remove-duplicated-include.patch
new file mode 100644 (file)
index 0000000..c8a0972
--- /dev/null
@@ -0,0 +1,29 @@
+From: Wei Yongjun <weiyongjun1@huawei.com>
+Date: Wed, 10 Jan 2018 13:06:46 +0000
+Subject: [PATCH] netfilter: remove duplicated include
+
+Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+
+--- a/net/ipv6/netfilter/nf_flow_table_ipv6.c
++++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c
+@@ -5,7 +5,6 @@
+ #include <linux/rhashtable.h>
+ #include <linux/ipv6.h>
+ #include <linux/netdevice.h>
+-#include <linux/ipv6.h>
+ #include <net/ipv6.h>
+ #include <net/ip6_route.h>
+ #include <net/neighbour.h>
+--- a/net/netfilter/nf_queue.c
++++ b/net/netfilter/nf_queue.c
+@@ -15,8 +15,6 @@
+ #include <linux/netfilter_bridge.h>
+ #include <linux/seq_file.h>
+ #include <linux/rcupdate.h>
+-#include <linux/netfilter_ipv4.h>
+-#include <linux/netfilter_ipv6.h>
+ #include <net/protocol.h>
+ #include <net/netfilter/nf_queue.h>
+ #include <net/dst.h>
diff --git a/target/linux/generic/backport-4.14/348-netfilter-nf_flow_table-use-IP_CT_DIR_-values-for-FL.patch b/target/linux/generic/backport-4.14/348-netfilter-nf_flow_table-use-IP_CT_DIR_-values-for-FL.patch
new file mode 100644 (file)
index 0000000..382b33c
--- /dev/null
@@ -0,0 +1,35 @@
+From: Felix Fietkau <nbd@nbd.name>
+Date: Fri, 16 Feb 2018 09:41:18 +0100
+Subject: [PATCH] netfilter: nf_flow_table: use IP_CT_DIR_* values for
+ FLOW_OFFLOAD_DIR_*
+
+Simplifies further code cleanups
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/include/net/netfilter/nf_flow_table.h
++++ b/include/net/netfilter/nf_flow_table.h
+@@ -6,6 +6,7 @@
+ #include <linux/netdevice.h>
+ #include <linux/rhashtable.h>
+ #include <linux/rcupdate.h>
++#include <linux/netfilter/nf_conntrack_tuple_common.h>
+ #include <net/dst.h>
+ struct nf_flowtable;
+@@ -27,11 +28,10 @@ struct nf_flowtable {
+ };
+ enum flow_offload_tuple_dir {
+-      FLOW_OFFLOAD_DIR_ORIGINAL,
+-      FLOW_OFFLOAD_DIR_REPLY,
+-      __FLOW_OFFLOAD_DIR_MAX          = FLOW_OFFLOAD_DIR_REPLY,
++      FLOW_OFFLOAD_DIR_ORIGINAL = IP_CT_DIR_ORIGINAL,
++      FLOW_OFFLOAD_DIR_REPLY = IP_CT_DIR_REPLY,
++      FLOW_OFFLOAD_DIR_MAX = IP_CT_DIR_MAX
+ };
+-#define FLOW_OFFLOAD_DIR_MAX  (__FLOW_OFFLOAD_DIR_MAX + 1)
+ struct flow_offload_tuple {
+       union {
diff --git a/target/linux/generic/backport-4.14/349-netfilter-nf_flow_table-clean-up-flow_offload_alloc.patch b/target/linux/generic/backport-4.14/349-netfilter-nf_flow_table-clean-up-flow_offload_alloc.patch
new file mode 100644 (file)
index 0000000..39ea757
--- /dev/null
@@ -0,0 +1,118 @@
+From: Felix Fietkau <nbd@nbd.name>
+Date: Fri, 16 Feb 2018 09:42:32 +0100
+Subject: [PATCH] netfilter: nf_flow_table: clean up flow_offload_alloc
+
+Reduce code duplication and make it much easier to read
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/net/netfilter/nf_flow_table.c
++++ b/net/netfilter/nf_flow_table.c
+@@ -16,6 +16,38 @@ struct flow_offload_entry {
+       struct rcu_head         rcu_head;
+ };
++static void
++flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
++                    struct nf_flow_route *route,
++                    enum flow_offload_tuple_dir dir)
++{
++      struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
++      struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple;
++
++      ft->dir = dir;
++
++      switch (ctt->src.l3num) {
++      case NFPROTO_IPV4:
++              ft->src_v4 = ctt->src.u3.in;
++              ft->dst_v4 = ctt->dst.u3.in;
++              break;
++      case NFPROTO_IPV6:
++              ft->src_v6 = ctt->src.u3.in6;
++              ft->dst_v6 = ctt->dst.u3.in6;
++              break;
++      }
++
++      ft->l3proto = ctt->src.l3num;
++      ft->l4proto = ctt->dst.protonum;
++      ft->src_port = ctt->src.u.tcp.port;
++      ft->dst_port = ctt->dst.u.tcp.port;
++
++      ft->iifidx = route->tuple[dir].ifindex;
++      ft->oifidx = route->tuple[!dir].ifindex;
++
++      ft->dst_cache = route->tuple[dir].dst;
++}
++
+ struct flow_offload *
+ flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
+ {
+@@ -40,65 +72,8 @@ flow_offload_alloc(struct nf_conn *ct, s
+       entry->ct = ct;
+-      switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num) {
+-      case NFPROTO_IPV4:
+-              flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4 =
+-                      ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in;
+-              flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4 =
+-                      ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in;
+-              flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4 =
+-                      ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in;
+-              flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4 =
+-                      ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in;
+-              break;
+-      case NFPROTO_IPV6:
+-              flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6 =
+-                      ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in6;
+-              flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6 =
+-                      ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6;
+-              flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6 =
+-                      ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in6;
+-              flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6 =
+-                      ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in6;
+-              break;
+-      }
+-
+-      flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l3proto =
+-              ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
+-      flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto =
+-              ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
+-      flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l3proto =
+-              ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
+-      flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l4proto =
+-              ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
+-
+-      flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache =
+-                route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst;
+-      flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache =
+-                route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst;
+-
+-      flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port =
+-              ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port;
+-      flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port =
+-              ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port;
+-      flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port =
+-              ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.tcp.port;
+-      flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port =
+-              ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port;
+-
+-      flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dir =
+-                                              FLOW_OFFLOAD_DIR_ORIGINAL;
+-      flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dir =
+-                                              FLOW_OFFLOAD_DIR_REPLY;
+-
+-      flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.iifidx =
+-              route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].ifindex;
+-      flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.oifidx =
+-              route->tuple[FLOW_OFFLOAD_DIR_REPLY].ifindex;
+-      flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.iifidx =
+-              route->tuple[FLOW_OFFLOAD_DIR_REPLY].ifindex;
+-      flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.oifidx =
+-              route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].ifindex;
++      flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_ORIGINAL);
++      flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_REPLY);
+       if (ct->status & IPS_SRC_NAT)
+               flow->flags |= FLOW_OFFLOAD_SNAT;
diff --git a/target/linux/generic/backport-4.14/350-ipv6-make-ip6_dst_mtu_forward-inline.patch b/target/linux/generic/backport-4.14/350-ipv6-make-ip6_dst_mtu_forward-inline.patch
new file mode 100644 (file)
index 0000000..3a0275b
--- /dev/null
@@ -0,0 +1,80 @@
+From: Felix Fietkau <nbd@nbd.name>
+Date: Fri, 16 Feb 2018 10:54:24 +0100
+Subject: [PATCH] ipv6: make ip6_dst_mtu_forward inline
+
+Removes a direct dependency on ipv6.ko
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/include/net/ip6_route.h
++++ b/include/net/ip6_route.h
+@@ -252,4 +252,26 @@ static inline bool rt6_duplicate_nexthop
+              ipv6_addr_equal(&a->rt6i_gateway, &b->rt6i_gateway) &&
+              !lwtunnel_cmp_encap(a->dst.lwtstate, b->dst.lwtstate);
+ }
++
++static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
++{
++      unsigned int mtu;
++      struct inet6_dev *idev;
++
++      if (dst_metric_locked(dst, RTAX_MTU)) {
++              mtu = dst_metric_raw(dst, RTAX_MTU);
++              if (mtu)
++                      return mtu;
++      }
++
++      mtu = IPV6_MIN_MTU;
++      rcu_read_lock();
++      idev = __in6_dev_get(dst->dev);
++      if (idev)
++              mtu = idev->cnf.mtu6;
++      rcu_read_unlock();
++
++      return mtu;
++}
++
+ #endif
+--- a/include/net/ipv6.h
++++ b/include/net/ipv6.h
+@@ -913,8 +913,6 @@ static inline struct sk_buff *ip6_finish
+                             &inet6_sk(sk)->cork);
+ }
+-unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst);
+-
+ int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
+                  struct flowi6 *fl6);
+ struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
+--- a/net/ipv6/ip6_output.c
++++ b/net/ipv6/ip6_output.c
+@@ -370,28 +370,6 @@ static inline int ip6_forward_finish(str
+       return dst_output(net, sk, skb);
+ }
+-unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
+-{
+-      unsigned int mtu;
+-      struct inet6_dev *idev;
+-
+-      if (dst_metric_locked(dst, RTAX_MTU)) {
+-              mtu = dst_metric_raw(dst, RTAX_MTU);
+-              if (mtu)
+-                      return mtu;
+-      }
+-
+-      mtu = IPV6_MIN_MTU;
+-      rcu_read_lock();
+-      idev = __in6_dev_get(dst->dev);
+-      if (idev)
+-              mtu = idev->cnf.mtu6;
+-      rcu_read_unlock();
+-
+-      return mtu;
+-}
+-EXPORT_SYMBOL_GPL(ip6_dst_mtu_forward);
+-
+ static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
+ {
+       if (skb->len <= mtu)
diff --git a/target/linux/generic/backport-4.14/351-netfilter-nf_flow_table-cache-mtu-in-struct-flow_off.patch b/target/linux/generic/backport-4.14/351-netfilter-nf_flow_table-cache-mtu-in-struct-flow_off.patch
new file mode 100644 (file)
index 0000000..e2015e7
--- /dev/null
@@ -0,0 +1,145 @@
+From: Felix Fietkau <nbd@nbd.name>
+Date: Fri, 16 Feb 2018 10:57:23 +0100
+Subject: [PATCH] netfilter: nf_flow_table: cache mtu in struct
+ flow_offload_tuple
+
+Reduces the number of cache lines touched in the offload forwarding path
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/include/net/netfilter/nf_flow_table.h
++++ b/include/net/netfilter/nf_flow_table.h
+@@ -55,6 +55,8 @@ struct flow_offload_tuple {
+       int                             oifidx;
++      u16                             mtu;
++
+       struct dst_entry                *dst_cache;
+ };
+--- a/net/ipv4/netfilter/nf_flow_table_ipv4.c
++++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c
+@@ -177,7 +177,7 @@ static int nf_flow_tuple_ip(struct sk_bu
+ }
+ /* Based on ip_exceeds_mtu(). */
+-static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
++static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
+ {
+       if (skb->len <= mtu)
+               return false;
+@@ -191,17 +191,6 @@ static bool __nf_flow_exceeds_mtu(const
+       return true;
+ }
+-static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rtable *rt)
+-{
+-      u32 mtu;
+-
+-      mtu = ip_dst_mtu_maybe_forward(&rt->dst, true);
+-      if (__nf_flow_exceeds_mtu(skb, mtu))
+-              return true;
+-
+-      return false;
+-}
+-
+ unsigned int
+ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
+                       const struct nf_hook_state *state)
+@@ -232,9 +221,9 @@ nf_flow_offload_ip_hook(void *priv, stru
+       dir = tuplehash->tuple.dir;
+       flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+-
+       rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
+-      if (unlikely(nf_flow_exceeds_mtu(skb, rt)))
++
++      if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
+               return NF_ACCEPT;
+       if (skb_try_make_writable(skb, sizeof(*iph)))
+--- a/net/ipv6/netfilter/nf_flow_table_ipv6.c
++++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c
+@@ -173,7 +173,7 @@ static int nf_flow_tuple_ipv6(struct sk_
+ }
+ /* Based on ip_exceeds_mtu(). */
+-static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
++static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
+ {
+       if (skb->len <= mtu)
+               return false;
+@@ -184,17 +184,6 @@ static bool __nf_flow_exceeds_mtu(const
+       return true;
+ }
+-static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rt6_info *rt)
+-{
+-      u32 mtu;
+-
+-      mtu = ip6_dst_mtu_forward(&rt->dst);
+-      if (__nf_flow_exceeds_mtu(skb, mtu))
+-              return true;
+-
+-      return false;
+-}
+-
+ unsigned int
+ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
+                         const struct nf_hook_state *state)
+@@ -225,9 +214,9 @@ nf_flow_offload_ipv6_hook(void *priv, st
+       dir = tuplehash->tuple.dir;
+       flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+-
+       rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
+-      if (unlikely(nf_flow_exceeds_mtu(skb, rt)))
++
++      if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
+               return NF_ACCEPT;
+       if (skb_try_make_writable(skb, sizeof(*ip6h)))
+--- a/net/netfilter/nf_flow_table.c
++++ b/net/netfilter/nf_flow_table.c
+@@ -4,6 +4,8 @@
+ #include <linux/netfilter.h>
+ #include <linux/rhashtable.h>
+ #include <linux/netdevice.h>
++#include <net/ip.h>
++#include <net/ip6_route.h>
+ #include <net/netfilter/nf_tables.h>
+ #include <net/netfilter/nf_flow_table.h>
+ #include <net/netfilter/nf_conntrack.h>
+@@ -23,6 +25,7 @@ flow_offload_fill_dir(struct flow_offloa
+ {
+       struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
+       struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple;
++      struct dst_entry *dst = route->tuple[dir].dst;
+       ft->dir = dir;
+@@ -30,10 +33,12 @@ flow_offload_fill_dir(struct flow_offloa
+       case NFPROTO_IPV4:
+               ft->src_v4 = ctt->src.u3.in;
+               ft->dst_v4 = ctt->dst.u3.in;
++              ft->mtu = ip_dst_mtu_maybe_forward(dst, true);
+               break;
+       case NFPROTO_IPV6:
+               ft->src_v6 = ctt->src.u3.in6;
+               ft->dst_v6 = ctt->dst.u3.in6;
++              ft->mtu = ip6_dst_mtu_forward(dst);
+               break;
+       }
+@@ -44,8 +49,7 @@ flow_offload_fill_dir(struct flow_offloa
+       ft->iifidx = route->tuple[dir].ifindex;
+       ft->oifidx = route->tuple[!dir].ifindex;
+-
+-      ft->dst_cache = route->tuple[dir].dst;
++      ft->dst_cache = dst;
+ }
+ struct flow_offload *
diff --git a/target/linux/generic/backport-4.14/352-netfilter-nf_flow_table-rename-nf_flow_table.c-to-nf.patch b/target/linux/generic/backport-4.14/352-netfilter-nf_flow_table-rename-nf_flow_table.c-to-nf.patch
new file mode 100644 (file)
index 0000000..5df56dd
--- /dev/null
@@ -0,0 +1,952 @@
+From: Felix Fietkau <nbd@nbd.name>
+Date: Fri, 16 Feb 2018 11:08:47 +0100
+Subject: [PATCH] netfilter: nf_flow_table: rename nf_flow_table.c to
+ nf_flow_table_core.c
+
+Preparation for adding more code to the same module
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+ rename net/netfilter/{nf_flow_table.c => nf_flow_table_core.c} (100%)
+
+--- a/net/netfilter/Makefile
++++ b/net/netfilter/Makefile
+@@ -113,6 +113,8 @@ obj-$(CONFIG_NFT_FWD_NETDEV)       += nft_fwd_
+ # flow table infrastructure
+ obj-$(CONFIG_NF_FLOW_TABLE)   += nf_flow_table.o
++nf_flow_table-objs := nf_flow_table_core.o
++
+ obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o
+ # generic X tables 
+--- a/net/netfilter/nf_flow_table.c
++++ /dev/null
+@@ -1,462 +0,0 @@
+-#include <linux/kernel.h>
+-#include <linux/init.h>
+-#include <linux/module.h>
+-#include <linux/netfilter.h>
+-#include <linux/rhashtable.h>
+-#include <linux/netdevice.h>
+-#include <net/ip.h>
+-#include <net/ip6_route.h>
+-#include <net/netfilter/nf_tables.h>
+-#include <net/netfilter/nf_flow_table.h>
+-#include <net/netfilter/nf_conntrack.h>
+-#include <net/netfilter/nf_conntrack_core.h>
+-#include <net/netfilter/nf_conntrack_tuple.h>
+-
+-struct flow_offload_entry {
+-      struct flow_offload     flow;
+-      struct nf_conn          *ct;
+-      struct rcu_head         rcu_head;
+-};
+-
+-static void
+-flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
+-                    struct nf_flow_route *route,
+-                    enum flow_offload_tuple_dir dir)
+-{
+-      struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
+-      struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple;
+-      struct dst_entry *dst = route->tuple[dir].dst;
+-
+-      ft->dir = dir;
+-
+-      switch (ctt->src.l3num) {
+-      case NFPROTO_IPV4:
+-              ft->src_v4 = ctt->src.u3.in;
+-              ft->dst_v4 = ctt->dst.u3.in;
+-              ft->mtu = ip_dst_mtu_maybe_forward(dst, true);
+-              break;
+-      case NFPROTO_IPV6:
+-              ft->src_v6 = ctt->src.u3.in6;
+-              ft->dst_v6 = ctt->dst.u3.in6;
+-              ft->mtu = ip6_dst_mtu_forward(dst);
+-              break;
+-      }
+-
+-      ft->l3proto = ctt->src.l3num;
+-      ft->l4proto = ctt->dst.protonum;
+-      ft->src_port = ctt->src.u.tcp.port;
+-      ft->dst_port = ctt->dst.u.tcp.port;
+-
+-      ft->iifidx = route->tuple[dir].ifindex;
+-      ft->oifidx = route->tuple[!dir].ifindex;
+-      ft->dst_cache = dst;
+-}
+-
+-struct flow_offload *
+-flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
+-{
+-      struct flow_offload_entry *entry;
+-      struct flow_offload *flow;
+-
+-      if (unlikely(nf_ct_is_dying(ct) ||
+-          !atomic_inc_not_zero(&ct->ct_general.use)))
+-              return NULL;
+-
+-      entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
+-      if (!entry)
+-              goto err_ct_refcnt;
+-
+-      flow = &entry->flow;
+-
+-      if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst))
+-              goto err_dst_cache_original;
+-
+-      if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst))
+-              goto err_dst_cache_reply;
+-
+-      entry->ct = ct;
+-
+-      flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_ORIGINAL);
+-      flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_REPLY);
+-
+-      if (ct->status & IPS_SRC_NAT)
+-              flow->flags |= FLOW_OFFLOAD_SNAT;
+-      else if (ct->status & IPS_DST_NAT)
+-              flow->flags |= FLOW_OFFLOAD_DNAT;
+-
+-      return flow;
+-
+-err_dst_cache_reply:
+-      dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
+-err_dst_cache_original:
+-      kfree(entry);
+-err_ct_refcnt:
+-      nf_ct_put(ct);
+-
+-      return NULL;
+-}
+-EXPORT_SYMBOL_GPL(flow_offload_alloc);
+-
+-void flow_offload_free(struct flow_offload *flow)
+-{
+-      struct flow_offload_entry *e;
+-
+-      dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
+-      dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
+-      e = container_of(flow, struct flow_offload_entry, flow);
+-      nf_ct_delete(e->ct, 0, 0);
+-      nf_ct_put(e->ct);
+-      kfree_rcu(e, rcu_head);
+-}
+-EXPORT_SYMBOL_GPL(flow_offload_free);
+-
+-void flow_offload_dead(struct flow_offload *flow)
+-{
+-      flow->flags |= FLOW_OFFLOAD_DYING;
+-}
+-EXPORT_SYMBOL_GPL(flow_offload_dead);
+-
+-int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
+-{
+-      flow->timeout = (u32)jiffies;
+-
+-      rhashtable_insert_fast(&flow_table->rhashtable,
+-                             &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
+-                             *flow_table->type->params);
+-      rhashtable_insert_fast(&flow_table->rhashtable,
+-                             &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
+-                             *flow_table->type->params);
+-      return 0;
+-}
+-EXPORT_SYMBOL_GPL(flow_offload_add);
+-
+-static void flow_offload_del(struct nf_flowtable *flow_table,
+-                           struct flow_offload *flow)
+-{
+-      rhashtable_remove_fast(&flow_table->rhashtable,
+-                             &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
+-                             *flow_table->type->params);
+-      rhashtable_remove_fast(&flow_table->rhashtable,
+-                             &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
+-                             *flow_table->type->params);
+-
+-      flow_offload_free(flow);
+-}
+-
+-struct flow_offload_tuple_rhash *
+-flow_offload_lookup(struct nf_flowtable *flow_table,
+-                  struct flow_offload_tuple *tuple)
+-{
+-      return rhashtable_lookup_fast(&flow_table->rhashtable, tuple,
+-                                    *flow_table->type->params);
+-}
+-EXPORT_SYMBOL_GPL(flow_offload_lookup);
+-
+-int nf_flow_table_iterate(struct nf_flowtable *flow_table,
+-                        void (*iter)(struct flow_offload *flow, void *data),
+-                        void *data)
+-{
+-      struct flow_offload_tuple_rhash *tuplehash;
+-      struct rhashtable_iter hti;
+-      struct flow_offload *flow;
+-      int err;
+-
+-      err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
+-      if (err)
+-              return err;
+-
+-      rhashtable_walk_start(&hti);
+-
+-      while ((tuplehash = rhashtable_walk_next(&hti))) {
+-              if (IS_ERR(tuplehash)) {
+-                      err = PTR_ERR(tuplehash);
+-                      if (err != -EAGAIN)
+-                              goto out;
+-
+-                      continue;
+-              }
+-              if (tuplehash->tuple.dir)
+-                      continue;
+-
+-              flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
+-
+-              iter(flow, data);
+-      }
+-out:
+-      rhashtable_walk_stop(&hti);
+-      rhashtable_walk_exit(&hti);
+-
+-      return err;
+-}
+-EXPORT_SYMBOL_GPL(nf_flow_table_iterate);
+-
+-static inline bool nf_flow_has_expired(const struct flow_offload *flow)
+-{
+-      return (__s32)(flow->timeout - (u32)jiffies) <= 0;
+-}
+-
+-static inline bool nf_flow_is_dying(const struct flow_offload *flow)
+-{
+-      return flow->flags & FLOW_OFFLOAD_DYING;
+-}
+-
+-static int nf_flow_offload_gc_step(struct nf_flowtable *flow_table)
+-{
+-      struct flow_offload_tuple_rhash *tuplehash;
+-      struct rhashtable_iter hti;
+-      struct flow_offload *flow;
+-      int err;
+-
+-      err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
+-      if (err)
+-              return 0;
+-
+-      rhashtable_walk_start(&hti);
+-
+-      while ((tuplehash = rhashtable_walk_next(&hti))) {
+-              if (IS_ERR(tuplehash)) {
+-                      err = PTR_ERR(tuplehash);
+-                      if (err != -EAGAIN)
+-                              goto out;
+-
+-                      continue;
+-              }
+-              if (tuplehash->tuple.dir)
+-                      continue;
+-
+-              flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
+-
+-              if (nf_flow_has_expired(flow) ||
+-                  nf_flow_is_dying(flow))
+-                      flow_offload_del(flow_table, flow);
+-      }
+-out:
+-      rhashtable_walk_stop(&hti);
+-      rhashtable_walk_exit(&hti);
+-
+-      return 1;
+-}
+-
+-void nf_flow_offload_work_gc(struct work_struct *work)
+-{
+-      struct nf_flowtable *flow_table;
+-
+-      flow_table = container_of(work, struct nf_flowtable, gc_work.work);
+-      nf_flow_offload_gc_step(flow_table);
+-      queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
+-}
+-EXPORT_SYMBOL_GPL(nf_flow_offload_work_gc);
+-
+-static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
+-{
+-      const struct flow_offload_tuple *tuple = data;
+-
+-      return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
+-}
+-
+-static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
+-{
+-      const struct flow_offload_tuple_rhash *tuplehash = data;
+-
+-      return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
+-}
+-
+-static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
+-                                      const void *ptr)
+-{
+-      const struct flow_offload_tuple *tuple = arg->key;
+-      const struct flow_offload_tuple_rhash *x = ptr;
+-
+-      if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
+-              return 1;
+-
+-      return 0;
+-}
+-
+-const struct rhashtable_params nf_flow_offload_rhash_params = {
+-      .head_offset            = offsetof(struct flow_offload_tuple_rhash, node),
+-      .hashfn                 = flow_offload_hash,
+-      .obj_hashfn             = flow_offload_hash_obj,
+-      .obj_cmpfn              = flow_offload_hash_cmp,
+-      .automatic_shrinking    = true,
+-};
+-EXPORT_SYMBOL_GPL(nf_flow_offload_rhash_params);
+-
+-static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
+-                              __be16 port, __be16 new_port)
+-{
+-      struct tcphdr *tcph;
+-
+-      if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
+-          skb_try_make_writable(skb, thoff + sizeof(*tcph)))
+-              return -1;
+-
+-      tcph = (void *)(skb_network_header(skb) + thoff);
+-      inet_proto_csum_replace2(&tcph->check, skb, port, new_port, true);
+-
+-      return 0;
+-}
+-
+-static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
+-                              __be16 port, __be16 new_port)
+-{
+-      struct udphdr *udph;
+-
+-      if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
+-          skb_try_make_writable(skb, thoff + sizeof(*udph)))
+-              return -1;
+-
+-      udph = (void *)(skb_network_header(skb) + thoff);
+-      if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+-              inet_proto_csum_replace2(&udph->check, skb, port,
+-                                       new_port, true);
+-              if (!udph->check)
+-                      udph->check = CSUM_MANGLED_0;
+-      }
+-
+-      return 0;
+-}
+-
+-static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
+-                          u8 protocol, __be16 port, __be16 new_port)
+-{
+-      switch (protocol) {
+-      case IPPROTO_TCP:
+-              if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0)
+-                      return NF_DROP;
+-              break;
+-      case IPPROTO_UDP:
+-              if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0)
+-                      return NF_DROP;
+-              break;
+-      }
+-
+-      return 0;
+-}
+-
+-int nf_flow_snat_port(const struct flow_offload *flow,
+-                    struct sk_buff *skb, unsigned int thoff,
+-                    u8 protocol, enum flow_offload_tuple_dir dir)
+-{
+-      struct flow_ports *hdr;
+-      __be16 port, new_port;
+-
+-      if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
+-          skb_try_make_writable(skb, thoff + sizeof(*hdr)))
+-              return -1;
+-
+-      hdr = (void *)(skb_network_header(skb) + thoff);
+-
+-      switch (dir) {
+-      case FLOW_OFFLOAD_DIR_ORIGINAL:
+-              port = hdr->source;
+-              new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port;
+-              hdr->source = new_port;
+-              break;
+-      case FLOW_OFFLOAD_DIR_REPLY:
+-              port = hdr->dest;
+-              new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
+-              hdr->dest = new_port;
+-              break;
+-      default:
+-              return -1;
+-      }
+-
+-      return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
+-}
+-EXPORT_SYMBOL_GPL(nf_flow_snat_port);
+-
+-int nf_flow_dnat_port(const struct flow_offload *flow,
+-                    struct sk_buff *skb, unsigned int thoff,
+-                    u8 protocol, enum flow_offload_tuple_dir dir)
+-{
+-      struct flow_ports *hdr;
+-      __be16 port, new_port;
+-
+-      if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
+-          skb_try_make_writable(skb, thoff + sizeof(*hdr)))
+-              return -1;
+-
+-      hdr = (void *)(skb_network_header(skb) + thoff);
+-
+-      switch (dir) {
+-      case FLOW_OFFLOAD_DIR_ORIGINAL:
+-              port = hdr->dest;
+-              new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port;
+-              hdr->dest = new_port;
+-              break;
+-      case FLOW_OFFLOAD_DIR_REPLY:
+-              port = hdr->source;
+-              new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port;
+-              hdr->source = new_port;
+-              break;
+-      default:
+-              return -1;
+-      }
+-
+-      return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
+-}
+-EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
+-
+-static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
+-{
+-      struct net_device *dev = data;
+-
+-      if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex)
+-              return;
+-
+-      flow_offload_dead(flow);
+-}
+-
+-static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
+-                                        void *data)
+-{
+-      nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, data);
+-      flush_delayed_work(&flowtable->gc_work);
+-}
+-
+-void nf_flow_table_cleanup(struct net *net, struct net_device *dev)
+-{
+-      nft_flow_table_iterate(net, nf_flow_table_iterate_cleanup, dev);
+-}
+-EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
+-
+-void nf_flow_table_free(struct nf_flowtable *flow_table)
+-{
+-      nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
+-      WARN_ON(!nf_flow_offload_gc_step(flow_table));
+-}
+-EXPORT_SYMBOL_GPL(nf_flow_table_free);
+-
+-static int nf_flow_table_netdev_event(struct notifier_block *this,
+-                                    unsigned long event, void *ptr)
+-{
+-      struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+-
+-      if (event != NETDEV_DOWN)
+-              return NOTIFY_DONE;
+-
+-      nf_flow_table_cleanup(dev_net(dev), dev);
+-
+-      return NOTIFY_DONE;
+-}
+-
+-static struct notifier_block flow_offload_netdev_notifier = {
+-      .notifier_call  = nf_flow_table_netdev_event,
+-};
+-
+-static int __init nf_flow_table_module_init(void)
+-{
+-      return register_netdevice_notifier(&flow_offload_netdev_notifier);
+-}
+-
+-static void __exit nf_flow_table_module_exit(void)
+-{
+-      unregister_netdevice_notifier(&flow_offload_netdev_notifier);
+-}
+-
+-module_init(nf_flow_table_module_init);
+-module_exit(nf_flow_table_module_exit);
+-
+-MODULE_LICENSE("GPL");
+-MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+--- /dev/null
++++ b/net/netfilter/nf_flow_table_core.c
+@@ -0,0 +1,462 @@
++#include <linux/kernel.h>
++#include <linux/init.h>
++#include <linux/module.h>
++#include <linux/netfilter.h>
++#include <linux/rhashtable.h>
++#include <linux/netdevice.h>
++#include <net/ip.h>
++#include <net/ip6_route.h>
++#include <net/netfilter/nf_tables.h>
++#include <net/netfilter/nf_flow_table.h>
++#include <net/netfilter/nf_conntrack.h>
++#include <net/netfilter/nf_conntrack_core.h>
++#include <net/netfilter/nf_conntrack_tuple.h>
++
++struct flow_offload_entry {
++      struct flow_offload     flow;
++      struct nf_conn          *ct;
++      struct rcu_head         rcu_head;
++};
++
++static void
++flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
++                    struct nf_flow_route *route,
++                    enum flow_offload_tuple_dir dir)
++{
++      struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
++      struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple;
++      struct dst_entry *dst = route->tuple[dir].dst;
++
++      ft->dir = dir;
++
++      switch (ctt->src.l3num) {
++      case NFPROTO_IPV4:
++              ft->src_v4 = ctt->src.u3.in;
++              ft->dst_v4 = ctt->dst.u3.in;
++              ft->mtu = ip_dst_mtu_maybe_forward(dst, true);
++              break;
++      case NFPROTO_IPV6:
++              ft->src_v6 = ctt->src.u3.in6;
++              ft->dst_v6 = ctt->dst.u3.in6;
++              ft->mtu = ip6_dst_mtu_forward(dst);
++              break;
++      }
++
++      ft->l3proto = ctt->src.l3num;
++      ft->l4proto = ctt->dst.protonum;
++      ft->src_port = ctt->src.u.tcp.port;
++      ft->dst_port = ctt->dst.u.tcp.port;
++
++      ft->iifidx = route->tuple[dir].ifindex;
++      ft->oifidx = route->tuple[!dir].ifindex;
++      ft->dst_cache = dst;
++}
++
++struct flow_offload *
++flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
++{
++      struct flow_offload_entry *entry;
++      struct flow_offload *flow;
++
++      if (unlikely(nf_ct_is_dying(ct) ||
++          !atomic_inc_not_zero(&ct->ct_general.use)))
++              return NULL;
++
++      entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
++      if (!entry)
++              goto err_ct_refcnt;
++
++      flow = &entry->flow;
++
++      if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst))
++              goto err_dst_cache_original;
++
++      if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst))
++              goto err_dst_cache_reply;
++
++      entry->ct = ct;
++
++      flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_ORIGINAL);
++      flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_REPLY);
++
++      if (ct->status & IPS_SRC_NAT)
++              flow->flags |= FLOW_OFFLOAD_SNAT;
++      else if (ct->status & IPS_DST_NAT)
++              flow->flags |= FLOW_OFFLOAD_DNAT;
++
++      return flow;
++
++err_dst_cache_reply:
++      dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
++err_dst_cache_original:
++      kfree(entry);
++err_ct_refcnt:
++      nf_ct_put(ct);
++
++      return NULL;
++}
++EXPORT_SYMBOL_GPL(flow_offload_alloc);
++
++void flow_offload_free(struct flow_offload *flow)
++{
++      struct flow_offload_entry *e;
++
++      dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
++      dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
++      e = container_of(flow, struct flow_offload_entry, flow);
++      nf_ct_delete(e->ct, 0, 0);
++      nf_ct_put(e->ct);
++      kfree_rcu(e, rcu_head);
++}
++EXPORT_SYMBOL_GPL(flow_offload_free);
++
++void flow_offload_dead(struct flow_offload *flow)
++{
++      flow->flags |= FLOW_OFFLOAD_DYING;
++}
++EXPORT_SYMBOL_GPL(flow_offload_dead);
++
++int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
++{
++      flow->timeout = (u32)jiffies;
++
++      rhashtable_insert_fast(&flow_table->rhashtable,
++                             &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
++                             *flow_table->type->params);
++      rhashtable_insert_fast(&flow_table->rhashtable,
++                             &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
++                             *flow_table->type->params);
++      return 0;
++}
++EXPORT_SYMBOL_GPL(flow_offload_add);
++
++static void flow_offload_del(struct nf_flowtable *flow_table,
++                           struct flow_offload *flow)
++{
++      rhashtable_remove_fast(&flow_table->rhashtable,
++                             &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
++                             *flow_table->type->params);
++      rhashtable_remove_fast(&flow_table->rhashtable,
++                             &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
++                             *flow_table->type->params);
++
++      flow_offload_free(flow);
++}
++
++struct flow_offload_tuple_rhash *
++flow_offload_lookup(struct nf_flowtable *flow_table,
++                  struct flow_offload_tuple *tuple)
++{
++      return rhashtable_lookup_fast(&flow_table->rhashtable, tuple,
++                                    *flow_table->type->params);
++}
++EXPORT_SYMBOL_GPL(flow_offload_lookup);
++
++int nf_flow_table_iterate(struct nf_flowtable *flow_table,
++                        void (*iter)(struct flow_offload *flow, void *data),
++                        void *data)
++{
++      struct flow_offload_tuple_rhash *tuplehash;
++      struct rhashtable_iter hti;
++      struct flow_offload *flow;
++      int err;
++
++      err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
++      if (err)
++              return err;
++
++      rhashtable_walk_start(&hti);
++
++      while ((tuplehash = rhashtable_walk_next(&hti))) {
++              if (IS_ERR(tuplehash)) {
++                      err = PTR_ERR(tuplehash);
++                      if (err != -EAGAIN)
++                              goto out;
++
++                      continue;
++              }
++              if (tuplehash->tuple.dir)
++                      continue;
++
++              flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
++
++              iter(flow, data);
++      }
++out:
++      rhashtable_walk_stop(&hti);
++      rhashtable_walk_exit(&hti);
++
++      return err;
++}
++EXPORT_SYMBOL_GPL(nf_flow_table_iterate);
++
++static inline bool nf_flow_has_expired(const struct flow_offload *flow)
++{
++      return (__s32)(flow->timeout - (u32)jiffies) <= 0;
++}
++
++static inline bool nf_flow_is_dying(const struct flow_offload *flow)
++{
++      return flow->flags & FLOW_OFFLOAD_DYING;
++}
++
++static int nf_flow_offload_gc_step(struct nf_flowtable *flow_table)
++{
++      struct flow_offload_tuple_rhash *tuplehash;
++      struct rhashtable_iter hti;
++      struct flow_offload *flow;
++      int err;
++
++      err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
++      if (err)
++              return 0;
++
++      rhashtable_walk_start(&hti);
++
++      while ((tuplehash = rhashtable_walk_next(&hti))) {
++              if (IS_ERR(tuplehash)) {
++                      err = PTR_ERR(tuplehash);
++                      if (err != -EAGAIN)
++                              goto out;
++
++                      continue;
++              }
++              if (tuplehash->tuple.dir)
++                      continue;
++
++              flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
++
++              if (nf_flow_has_expired(flow) ||
++                  nf_flow_is_dying(flow))
++                      flow_offload_del(flow_table, flow);
++      }
++out:
++      rhashtable_walk_stop(&hti);
++      rhashtable_walk_exit(&hti);
++
++      return 1;
++}
++
++void nf_flow_offload_work_gc(struct work_struct *work)
++{
++      struct nf_flowtable *flow_table;
++
++      flow_table = container_of(work, struct nf_flowtable, gc_work.work);
++      nf_flow_offload_gc_step(flow_table);
++      queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
++}
++EXPORT_SYMBOL_GPL(nf_flow_offload_work_gc);
++
++static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
++{
++      const struct flow_offload_tuple *tuple = data;
++
++      return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
++}
++
++static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
++{
++      const struct flow_offload_tuple_rhash *tuplehash = data;
++
++      return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
++}
++
++static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
++                                      const void *ptr)
++{
++      const struct flow_offload_tuple *tuple = arg->key;
++      const struct flow_offload_tuple_rhash *x = ptr;
++
++      if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
++              return 1;
++
++      return 0;
++}
++
++const struct rhashtable_params nf_flow_offload_rhash_params = {
++      .head_offset            = offsetof(struct flow_offload_tuple_rhash, node),
++      .hashfn                 = flow_offload_hash,
++      .obj_hashfn             = flow_offload_hash_obj,
++      .obj_cmpfn              = flow_offload_hash_cmp,
++      .automatic_shrinking    = true,
++};
++EXPORT_SYMBOL_GPL(nf_flow_offload_rhash_params);
++
++static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
++                              __be16 port, __be16 new_port)
++{
++      struct tcphdr *tcph;
++
++      if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
++          skb_try_make_writable(skb, thoff + sizeof(*tcph)))
++              return -1;
++
++      tcph = (void *)(skb_network_header(skb) + thoff);
++      inet_proto_csum_replace2(&tcph->check, skb, port, new_port, true);
++
++      return 0;
++}
++
++static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
++                              __be16 port, __be16 new_port)
++{
++      struct udphdr *udph;
++
++      if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
++          skb_try_make_writable(skb, thoff + sizeof(*udph)))
++              return -1;
++
++      udph = (void *)(skb_network_header(skb) + thoff);
++      if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
++              inet_proto_csum_replace2(&udph->check, skb, port,
++                                       new_port, true);
++              if (!udph->check)
++                      udph->check = CSUM_MANGLED_0;
++      }
++
++      return 0;
++}
++
++static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
++                          u8 protocol, __be16 port, __be16 new_port)
++{
++      switch (protocol) {
++      case IPPROTO_TCP:
++              if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0)
++                      return NF_DROP;
++              break;
++      case IPPROTO_UDP:
++              if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0)
++                      return NF_DROP;
++              break;
++      }
++
++      return 0;
++}
++
++int nf_flow_snat_port(const struct flow_offload *flow,
++                    struct sk_buff *skb, unsigned int thoff,
++                    u8 protocol, enum flow_offload_tuple_dir dir)
++{
++      struct flow_ports *hdr;
++      __be16 port, new_port;
++
++      if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
++          skb_try_make_writable(skb, thoff + sizeof(*hdr)))
++              return -1;
++
++      hdr = (void *)(skb_network_header(skb) + thoff);
++
++      switch (dir) {
++      case FLOW_OFFLOAD_DIR_ORIGINAL:
++              port = hdr->source;
++              new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port;
++              hdr->source = new_port;
++              break;
++      case FLOW_OFFLOAD_DIR_REPLY:
++              port = hdr->dest;
++              new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
++              hdr->dest = new_port;
++              break;
++      default:
++              return -1;
++      }
++
++      return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
++}
++EXPORT_SYMBOL_GPL(nf_flow_snat_port);
++
++int nf_flow_dnat_port(const struct flow_offload *flow,
++                    struct sk_buff *skb, unsigned int thoff,
++                    u8 protocol, enum flow_offload_tuple_dir dir)
++{
++      struct flow_ports *hdr;
++      __be16 port, new_port;
++
++      if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
++          skb_try_make_writable(skb, thoff + sizeof(*hdr)))
++              return -1;
++
++      hdr = (void *)(skb_network_header(skb) + thoff);
++
++      switch (dir) {
++      case FLOW_OFFLOAD_DIR_ORIGINAL:
++              port = hdr->dest;
++              new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port;
++              hdr->dest = new_port;
++              break;
++      case FLOW_OFFLOAD_DIR_REPLY:
++              port = hdr->source;
++              new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port;
++              hdr->source = new_port;
++              break;
++      default:
++              return -1;
++      }
++
++      return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
++}
++EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
++
++static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
++{
++      struct net_device *dev = data;
++
++      if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex)
++              return;
++
++      flow_offload_dead(flow);
++}
++
++static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
++                                        void *data)
++{
++      nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, data);
++      flush_delayed_work(&flowtable->gc_work);
++}
++
++void nf_flow_table_cleanup(struct net *net, struct net_device *dev)
++{
++      nft_flow_table_iterate(net, nf_flow_table_iterate_cleanup, dev);
++}
++EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
++
++void nf_flow_table_free(struct nf_flowtable *flow_table)
++{
++      nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
++      WARN_ON(!nf_flow_offload_gc_step(flow_table));
++}
++EXPORT_SYMBOL_GPL(nf_flow_table_free);
++
++static int nf_flow_table_netdev_event(struct notifier_block *this,
++                                    unsigned long event, void *ptr)
++{
++      struct net_device *dev = netdev_notifier_info_to_dev(ptr);
++
++      if (event != NETDEV_DOWN)
++              return NOTIFY_DONE;
++
++      nf_flow_table_cleanup(dev_net(dev), dev);
++
++      return NOTIFY_DONE;
++}
++
++static struct notifier_block flow_offload_netdev_notifier = {
++      .notifier_call  = nf_flow_table_netdev_event,
++};
++
++static int __init nf_flow_table_module_init(void)
++{
++      return register_netdevice_notifier(&flow_offload_netdev_notifier);
++}
++
++static void __exit nf_flow_table_module_exit(void)
++{
++      unregister_netdevice_notifier(&flow_offload_netdev_notifier);
++}
++
++module_init(nf_flow_table_module_init);
++module_exit(nf_flow_table_module_exit);
++
++MODULE_LICENSE("GPL");
++MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
diff --git a/target/linux/generic/backport-4.14/353-netfilter-nf_flow_table-move-ipv4-offload-hook-code-.patch b/target/linux/generic/backport-4.14/353-netfilter-nf_flow_table-move-ipv4-offload-hook-code-.patch
new file mode 100644 (file)
index 0000000..e25a66f
--- /dev/null
@@ -0,0 +1,522 @@
+From: Felix Fietkau <nbd@nbd.name>
+Date: Sat, 17 Feb 2018 11:49:44 +0100
+Subject: [PATCH] netfilter: nf_flow_table: move ipv4 offload hook code to
+ nf_flow_table
+
+Allows some minor code sharing with the ipv6 hook code and is also
+useful as preparation for adding iptables support for offload
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+ create mode 100644 net/netfilter/nf_flow_table_ip.c
+
+--- a/net/ipv4/netfilter/nf_flow_table_ipv4.c
++++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c
+@@ -2,248 +2,8 @@
+ #include <linux/init.h>
+ #include <linux/module.h>
+ #include <linux/netfilter.h>
+-#include <linux/rhashtable.h>
+-#include <linux/ip.h>
+-#include <linux/netdevice.h>
+-#include <net/ip.h>
+-#include <net/neighbour.h>
+ #include <net/netfilter/nf_flow_table.h>
+ #include <net/netfilter/nf_tables.h>
+-/* For layer 4 checksum field offset. */
+-#include <linux/tcp.h>
+-#include <linux/udp.h>
+-
+-static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
+-                            __be32 addr, __be32 new_addr)
+-{
+-      struct tcphdr *tcph;
+-
+-      if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
+-          skb_try_make_writable(skb, thoff + sizeof(*tcph)))
+-              return -1;
+-
+-      tcph = (void *)(skb_network_header(skb) + thoff);
+-      inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
+-
+-      return 0;
+-}
+-
+-static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
+-                            __be32 addr, __be32 new_addr)
+-{
+-      struct udphdr *udph;
+-
+-      if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
+-          skb_try_make_writable(skb, thoff + sizeof(*udph)))
+-              return -1;
+-
+-      udph = (void *)(skb_network_header(skb) + thoff);
+-      if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+-              inet_proto_csum_replace4(&udph->check, skb, addr,
+-                                       new_addr, true);
+-              if (!udph->check)
+-                      udph->check = CSUM_MANGLED_0;
+-      }
+-
+-      return 0;
+-}
+-
+-static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
+-                                unsigned int thoff, __be32 addr,
+-                                __be32 new_addr)
+-{
+-      switch (iph->protocol) {
+-      case IPPROTO_TCP:
+-              if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0)
+-                      return NF_DROP;
+-              break;
+-      case IPPROTO_UDP:
+-              if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0)
+-                      return NF_DROP;
+-              break;
+-      }
+-
+-      return 0;
+-}
+-
+-static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
+-                         struct iphdr *iph, unsigned int thoff,
+-                         enum flow_offload_tuple_dir dir)
+-{
+-      __be32 addr, new_addr;
+-
+-      switch (dir) {
+-      case FLOW_OFFLOAD_DIR_ORIGINAL:
+-              addr = iph->saddr;
+-              new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
+-              iph->saddr = new_addr;
+-              break;
+-      case FLOW_OFFLOAD_DIR_REPLY:
+-              addr = iph->daddr;
+-              new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
+-              iph->daddr = new_addr;
+-              break;
+-      default:
+-              return -1;
+-      }
+-      csum_replace4(&iph->check, addr, new_addr);
+-
+-      return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
+-}
+-
+-static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
+-                         struct iphdr *iph, unsigned int thoff,
+-                         enum flow_offload_tuple_dir dir)
+-{
+-      __be32 addr, new_addr;
+-
+-      switch (dir) {
+-      case FLOW_OFFLOAD_DIR_ORIGINAL:
+-              addr = iph->daddr;
+-              new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
+-              iph->daddr = new_addr;
+-              break;
+-      case FLOW_OFFLOAD_DIR_REPLY:
+-              addr = iph->saddr;
+-              new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
+-              iph->saddr = new_addr;
+-              break;
+-      default:
+-              return -1;
+-      }
+-
+-      return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
+-}
+-
+-static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
+-                        enum flow_offload_tuple_dir dir)
+-{
+-      struct iphdr *iph = ip_hdr(skb);
+-      unsigned int thoff = iph->ihl * 4;
+-
+-      if (flow->flags & FLOW_OFFLOAD_SNAT &&
+-          (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
+-           nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
+-              return -1;
+-      if (flow->flags & FLOW_OFFLOAD_DNAT &&
+-          (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
+-           nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
+-              return -1;
+-
+-      return 0;
+-}
+-
+-static bool ip_has_options(unsigned int thoff)
+-{
+-      return thoff != sizeof(struct iphdr);
+-}
+-
+-static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
+-                          struct flow_offload_tuple *tuple)
+-{
+-      struct flow_ports *ports;
+-      unsigned int thoff;
+-      struct iphdr *iph;
+-
+-      if (!pskb_may_pull(skb, sizeof(*iph)))
+-              return -1;
+-
+-      iph = ip_hdr(skb);
+-      thoff = iph->ihl * 4;
+-
+-      if (ip_is_fragment(iph) ||
+-          unlikely(ip_has_options(thoff)))
+-              return -1;
+-
+-      if (iph->protocol != IPPROTO_TCP &&
+-          iph->protocol != IPPROTO_UDP)
+-              return -1;
+-
+-      thoff = iph->ihl * 4;
+-      if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
+-              return -1;
+-
+-      ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
+-
+-      tuple->src_v4.s_addr    = iph->saddr;
+-      tuple->dst_v4.s_addr    = iph->daddr;
+-      tuple->src_port         = ports->source;
+-      tuple->dst_port         = ports->dest;
+-      tuple->l3proto          = AF_INET;
+-      tuple->l4proto          = iph->protocol;
+-      tuple->iifidx           = dev->ifindex;
+-
+-      return 0;
+-}
+-
+-/* Based on ip_exceeds_mtu(). */
+-static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
+-{
+-      if (skb->len <= mtu)
+-              return false;
+-
+-      if ((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0)
+-              return false;
+-
+-      if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
+-              return false;
+-
+-      return true;
+-}
+-
+-unsigned int
+-nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
+-                      const struct nf_hook_state *state)
+-{
+-      struct flow_offload_tuple_rhash *tuplehash;
+-      struct nf_flowtable *flow_table = priv;
+-      struct flow_offload_tuple tuple = {};
+-      enum flow_offload_tuple_dir dir;
+-      struct flow_offload *flow;
+-      struct net_device *outdev;
+-      const struct rtable *rt;
+-      struct iphdr *iph;
+-      __be32 nexthop;
+-
+-      if (skb->protocol != htons(ETH_P_IP))
+-              return NF_ACCEPT;
+-
+-      if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
+-              return NF_ACCEPT;
+-
+-      tuplehash = flow_offload_lookup(flow_table, &tuple);
+-      if (tuplehash == NULL)
+-              return NF_ACCEPT;
+-
+-      outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
+-      if (!outdev)
+-              return NF_ACCEPT;
+-
+-      dir = tuplehash->tuple.dir;
+-      flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+-      rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
+-
+-      if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
+-              return NF_ACCEPT;
+-
+-      if (skb_try_make_writable(skb, sizeof(*iph)))
+-              return NF_DROP;
+-
+-      if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
+-          nf_flow_nat_ip(flow, skb, dir) < 0)
+-              return NF_DROP;
+-
+-      flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
+-      iph = ip_hdr(skb);
+-      ip_decrease_ttl(iph);
+-
+-      skb->dev = outdev;
+-      nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
+-      neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
+-
+-      return NF_STOLEN;
+-}
+-EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
+ static struct nf_flowtable_type flowtable_ipv4 = {
+       .family         = NFPROTO_IPV4,
+--- a/net/netfilter/Makefile
++++ b/net/netfilter/Makefile
+@@ -113,7 +113,7 @@ obj-$(CONFIG_NFT_FWD_NETDEV)       += nft_fwd_
+ # flow table infrastructure
+ obj-$(CONFIG_NF_FLOW_TABLE)   += nf_flow_table.o
+-nf_flow_table-objs := nf_flow_table_core.o
++nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o
+ obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o
+--- /dev/null
++++ b/net/netfilter/nf_flow_table_ip.c
+@@ -0,0 +1,245 @@
++#include <linux/kernel.h>
++#include <linux/init.h>
++#include <linux/module.h>
++#include <linux/netfilter.h>
++#include <linux/rhashtable.h>
++#include <linux/ip.h>
++#include <linux/netdevice.h>
++#include <net/ip.h>
++#include <net/neighbour.h>
++#include <net/netfilter/nf_flow_table.h>
++/* For layer 4 checksum field offset. */
++#include <linux/tcp.h>
++#include <linux/udp.h>
++
++static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
++                            __be32 addr, __be32 new_addr)
++{
++      struct tcphdr *tcph;
++
++      if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
++          skb_try_make_writable(skb, thoff + sizeof(*tcph)))
++              return -1;
++
++      tcph = (void *)(skb_network_header(skb) + thoff);
++      inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
++
++      return 0;
++}
++
++static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
++                            __be32 addr, __be32 new_addr)
++{
++      struct udphdr *udph;
++
++      if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
++          skb_try_make_writable(skb, thoff + sizeof(*udph)))
++              return -1;
++
++      udph = (void *)(skb_network_header(skb) + thoff);
++      if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
++              inet_proto_csum_replace4(&udph->check, skb, addr,
++                                       new_addr, true);
++              if (!udph->check)
++                      udph->check = CSUM_MANGLED_0;
++      }
++
++      return 0;
++}
++
++static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
++                                unsigned int thoff, __be32 addr,
++                                __be32 new_addr)
++{
++      switch (iph->protocol) {
++      case IPPROTO_TCP:
++              if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0)
++                      return NF_DROP;
++              break;
++      case IPPROTO_UDP:
++              if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0)
++                      return NF_DROP;
++              break;
++      }
++
++      return 0;
++}
++
++static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
++                         struct iphdr *iph, unsigned int thoff,
++                         enum flow_offload_tuple_dir dir)
++{
++      __be32 addr, new_addr;
++
++      switch (dir) {
++      case FLOW_OFFLOAD_DIR_ORIGINAL:
++              addr = iph->saddr;
++              new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
++              iph->saddr = new_addr;
++              break;
++      case FLOW_OFFLOAD_DIR_REPLY:
++              addr = iph->daddr;
++              new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
++              iph->daddr = new_addr;
++              break;
++      default:
++              return -1;
++      }
++      csum_replace4(&iph->check, addr, new_addr);
++
++      return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
++}
++
++static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
++                         struct iphdr *iph, unsigned int thoff,
++                         enum flow_offload_tuple_dir dir)
++{
++      __be32 addr, new_addr;
++
++      switch (dir) {
++      case FLOW_OFFLOAD_DIR_ORIGINAL:
++              addr = iph->daddr;
++              new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
++              iph->daddr = new_addr;
++              break;
++      case FLOW_OFFLOAD_DIR_REPLY:
++              addr = iph->saddr;
++              new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
++              iph->saddr = new_addr;
++              break;
++      default:
++              return -1;
++      }
++
++      return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
++}
++
++static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
++                        enum flow_offload_tuple_dir dir)
++{
++      struct iphdr *iph = ip_hdr(skb);
++      unsigned int thoff = iph->ihl * 4;
++
++      if (flow->flags & FLOW_OFFLOAD_SNAT &&
++          (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
++           nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
++              return -1;
++      if (flow->flags & FLOW_OFFLOAD_DNAT &&
++          (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
++           nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
++              return -1;
++
++      return 0;
++}
++
++static bool ip_has_options(unsigned int thoff)
++{
++      return thoff != sizeof(struct iphdr);
++}
++
++static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
++                          struct flow_offload_tuple *tuple)
++{
++      struct flow_ports *ports;
++      unsigned int thoff;
++      struct iphdr *iph;
++
++      if (!pskb_may_pull(skb, sizeof(*iph)))
++              return -1;
++
++      iph = ip_hdr(skb);
++      thoff = iph->ihl * 4;
++
++      if (ip_is_fragment(iph) ||
++          unlikely(ip_has_options(thoff)))
++              return -1;
++
++      if (iph->protocol != IPPROTO_TCP &&
++          iph->protocol != IPPROTO_UDP)
++              return -1;
++
++      thoff = iph->ihl * 4;
++      if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
++              return -1;
++
++      ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
++
++      tuple->src_v4.s_addr    = iph->saddr;
++      tuple->dst_v4.s_addr    = iph->daddr;
++      tuple->src_port         = ports->source;
++      tuple->dst_port         = ports->dest;
++      tuple->l3proto          = AF_INET;
++      tuple->l4proto          = iph->protocol;
++      tuple->iifidx           = dev->ifindex;
++
++      return 0;
++}
++
++/* Based on ip_exceeds_mtu(). */
++static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
++{
++      if (skb->len <= mtu)
++              return false;
++
++      if ((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0)
++              return false;
++
++      if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
++              return false;
++
++      return true;
++}
++
++unsigned int
++nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
++                      const struct nf_hook_state *state)
++{
++      struct flow_offload_tuple_rhash *tuplehash;
++      struct nf_flowtable *flow_table = priv;
++      struct flow_offload_tuple tuple = {};
++      enum flow_offload_tuple_dir dir;
++      struct flow_offload *flow;
++      struct net_device *outdev;
++      const struct rtable *rt;
++      struct iphdr *iph;
++      __be32 nexthop;
++
++      if (skb->protocol != htons(ETH_P_IP))
++              return NF_ACCEPT;
++
++      if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
++              return NF_ACCEPT;
++
++      tuplehash = flow_offload_lookup(flow_table, &tuple);
++      if (tuplehash == NULL)
++              return NF_ACCEPT;
++
++      outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
++      if (!outdev)
++              return NF_ACCEPT;
++
++      dir = tuplehash->tuple.dir;
++      flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
++      rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
++
++      if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
++              return NF_ACCEPT;
++
++      if (skb_try_make_writable(skb, sizeof(*iph)))
++              return NF_DROP;
++
++      if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
++          nf_flow_nat_ip(flow, skb, dir) < 0)
++              return NF_DROP;
++
++      flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
++      iph = ip_hdr(skb);
++      ip_decrease_ttl(iph);
++
++      skb->dev = outdev;
++      nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
++      neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
++
++      return NF_STOLEN;
++}
++EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
diff --git a/target/linux/generic/backport-4.14/354-netfilter-nf_flow_table-move-ip-header-check-out-of-.patch b/target/linux/generic/backport-4.14/354-netfilter-nf_flow_table-move-ip-header-check-out-of-.patch
new file mode 100644 (file)
index 0000000..4ee5532
--- /dev/null
@@ -0,0 +1,32 @@
+From: Felix Fietkau <nbd@nbd.name>
+Date: Sat, 17 Feb 2018 11:51:20 +0100
+Subject: [PATCH] netfilter: nf_flow_table: move ip header check out of
+ nf_flow_exceeds_mtu
+
+Allows the function to be shared with the IPv6 hook code
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/net/netfilter/nf_flow_table_ip.c
++++ b/net/netfilter/nf_flow_table_ip.c
+@@ -181,9 +181,6 @@ static bool nf_flow_exceeds_mtu(const st
+       if (skb->len <= mtu)
+               return false;
+-      if ((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0)
+-              return false;
+-
+       if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
+               return false;
+@@ -222,7 +219,8 @@ nf_flow_offload_ip_hook(void *priv, stru
+       flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+       rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
+-      if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
++      if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)) &&
++          (ip_hdr(skb)->frag_off & htons(IP_DF)) != 0)
+               return NF_ACCEPT;
+       if (skb_try_make_writable(skb, sizeof(*iph)))
diff --git a/target/linux/generic/backport-4.14/355-netfilter-nf_flow_table-move-ipv6-offload-hook-code-.patch b/target/linux/generic/backport-4.14/355-netfilter-nf_flow_table-move-ipv6-offload-hook-code-.patch
new file mode 100644 (file)
index 0000000..20ab0ed
--- /dev/null
@@ -0,0 +1,483 @@
+From: Felix Fietkau <nbd@nbd.name>
+Date: Sat, 17 Feb 2018 11:55:51 +0100
+Subject: [PATCH] netfilter: nf_flow_table: move ipv6 offload hook code to
+ nf_flow_table
+
+Useful as preparation for adding iptables support for offload
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/net/ipv6/netfilter/nf_flow_table_ipv6.c
++++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c
+@@ -3,240 +3,8 @@
+ #include <linux/module.h>
+ #include <linux/netfilter.h>
+ #include <linux/rhashtable.h>
+-#include <linux/ipv6.h>
+-#include <linux/netdevice.h>
+-#include <net/ipv6.h>
+-#include <net/ip6_route.h>
+-#include <net/neighbour.h>
+ #include <net/netfilter/nf_flow_table.h>
+ #include <net/netfilter/nf_tables.h>
+-/* For layer 4 checksum field offset. */
+-#include <linux/tcp.h>
+-#include <linux/udp.h>
+-
+-static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
+-                              struct in6_addr *addr,
+-                              struct in6_addr *new_addr)
+-{
+-      struct tcphdr *tcph;
+-
+-      if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
+-          skb_try_make_writable(skb, thoff + sizeof(*tcph)))
+-              return -1;
+-
+-      tcph = (void *)(skb_network_header(skb) + thoff);
+-      inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
+-                                new_addr->s6_addr32, true);
+-
+-      return 0;
+-}
+-
+-static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
+-                              struct in6_addr *addr,
+-                              struct in6_addr *new_addr)
+-{
+-      struct udphdr *udph;
+-
+-      if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
+-          skb_try_make_writable(skb, thoff + sizeof(*udph)))
+-              return -1;
+-
+-      udph = (void *)(skb_network_header(skb) + thoff);
+-      if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+-              inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
+-                                        new_addr->s6_addr32, true);
+-              if (!udph->check)
+-                      udph->check = CSUM_MANGLED_0;
+-      }
+-
+-      return 0;
+-}
+-
+-static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
+-                                  unsigned int thoff, struct in6_addr *addr,
+-                                  struct in6_addr *new_addr)
+-{
+-      switch (ip6h->nexthdr) {
+-      case IPPROTO_TCP:
+-              if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
+-                      return NF_DROP;
+-              break;
+-      case IPPROTO_UDP:
+-              if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
+-                      return NF_DROP;
+-              break;
+-      }
+-
+-      return 0;
+-}
+-
+-static int nf_flow_snat_ipv6(const struct flow_offload *flow,
+-                           struct sk_buff *skb, struct ipv6hdr *ip6h,
+-                           unsigned int thoff,
+-                           enum flow_offload_tuple_dir dir)
+-{
+-      struct in6_addr addr, new_addr;
+-
+-      switch (dir) {
+-      case FLOW_OFFLOAD_DIR_ORIGINAL:
+-              addr = ip6h->saddr;
+-              new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6;
+-              ip6h->saddr = new_addr;
+-              break;
+-      case FLOW_OFFLOAD_DIR_REPLY:
+-              addr = ip6h->daddr;
+-              new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
+-              ip6h->daddr = new_addr;
+-              break;
+-      default:
+-              return -1;
+-      }
+-
+-      return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
+-}
+-
+-static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
+-                           struct sk_buff *skb, struct ipv6hdr *ip6h,
+-                           unsigned int thoff,
+-                           enum flow_offload_tuple_dir dir)
+-{
+-      struct in6_addr addr, new_addr;
+-
+-      switch (dir) {
+-      case FLOW_OFFLOAD_DIR_ORIGINAL:
+-              addr = ip6h->daddr;
+-              new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6;
+-              ip6h->daddr = new_addr;
+-              break;
+-      case FLOW_OFFLOAD_DIR_REPLY:
+-              addr = ip6h->saddr;
+-              new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
+-              ip6h->saddr = new_addr;
+-              break;
+-      default:
+-              return -1;
+-      }
+-
+-      return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
+-}
+-
+-static int nf_flow_nat_ipv6(const struct flow_offload *flow,
+-                          struct sk_buff *skb,
+-                          enum flow_offload_tuple_dir dir)
+-{
+-      struct ipv6hdr *ip6h = ipv6_hdr(skb);
+-      unsigned int thoff = sizeof(*ip6h);
+-
+-      if (flow->flags & FLOW_OFFLOAD_SNAT &&
+-          (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
+-           nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
+-              return -1;
+-      if (flow->flags & FLOW_OFFLOAD_DNAT &&
+-          (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
+-           nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
+-              return -1;
+-
+-      return 0;
+-}
+-
+-static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
+-                            struct flow_offload_tuple *tuple)
+-{
+-      struct flow_ports *ports;
+-      struct ipv6hdr *ip6h;
+-      unsigned int thoff;
+-
+-      if (!pskb_may_pull(skb, sizeof(*ip6h)))
+-              return -1;
+-
+-      ip6h = ipv6_hdr(skb);
+-
+-      if (ip6h->nexthdr != IPPROTO_TCP &&
+-          ip6h->nexthdr != IPPROTO_UDP)
+-              return -1;
+-
+-      thoff = sizeof(*ip6h);
+-      if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
+-              return -1;
+-
+-      ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
+-
+-      tuple->src_v6           = ip6h->saddr;
+-      tuple->dst_v6           = ip6h->daddr;
+-      tuple->src_port         = ports->source;
+-      tuple->dst_port         = ports->dest;
+-      tuple->l3proto          = AF_INET6;
+-      tuple->l4proto          = ip6h->nexthdr;
+-      tuple->iifidx           = dev->ifindex;
+-
+-      return 0;
+-}
+-
+-/* Based on ip_exceeds_mtu(). */
+-static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
+-{
+-      if (skb->len <= mtu)
+-              return false;
+-
+-      if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
+-              return false;
+-
+-      return true;
+-}
+-
+-unsigned int
+-nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
+-                        const struct nf_hook_state *state)
+-{
+-      struct flow_offload_tuple_rhash *tuplehash;
+-      struct nf_flowtable *flow_table = priv;
+-      struct flow_offload_tuple tuple = {};
+-      enum flow_offload_tuple_dir dir;
+-      struct flow_offload *flow;
+-      struct net_device *outdev;
+-      struct in6_addr *nexthop;
+-      struct ipv6hdr *ip6h;
+-      struct rt6_info *rt;
+-
+-      if (skb->protocol != htons(ETH_P_IPV6))
+-              return NF_ACCEPT;
+-
+-      if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
+-              return NF_ACCEPT;
+-
+-      tuplehash = flow_offload_lookup(flow_table, &tuple);
+-      if (tuplehash == NULL)
+-              return NF_ACCEPT;
+-
+-      outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
+-      if (!outdev)
+-              return NF_ACCEPT;
+-
+-      dir = tuplehash->tuple.dir;
+-      flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+-      rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
+-
+-      if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
+-              return NF_ACCEPT;
+-
+-      if (skb_try_make_writable(skb, sizeof(*ip6h)))
+-              return NF_DROP;
+-
+-      if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
+-          nf_flow_nat_ipv6(flow, skb, dir) < 0)
+-              return NF_DROP;
+-
+-      flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
+-      ip6h = ipv6_hdr(skb);
+-      ip6h->hop_limit--;
+-
+-      skb->dev = outdev;
+-      nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
+-      neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
+-
+-      return NF_STOLEN;
+-}
+-EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
+ static struct nf_flowtable_type flowtable_ipv6 = {
+       .family         = NFPROTO_IPV6,
+--- a/net/netfilter/nf_flow_table_ip.c
++++ b/net/netfilter/nf_flow_table_ip.c
+@@ -4,8 +4,11 @@
+ #include <linux/netfilter.h>
+ #include <linux/rhashtable.h>
+ #include <linux/ip.h>
++#include <linux/ipv6.h>
+ #include <linux/netdevice.h>
+ #include <net/ip.h>
++#include <net/ipv6.h>
++#include <net/ip6_route.h>
+ #include <net/neighbour.h>
+ #include <net/netfilter/nf_flow_table.h>
+ /* For layer 4 checksum field offset. */
+@@ -241,3 +244,215 @@ nf_flow_offload_ip_hook(void *priv, stru
+       return NF_STOLEN;
+ }
+ EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
++
++static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
++                              struct in6_addr *addr,
++                              struct in6_addr *new_addr)
++{
++      struct tcphdr *tcph;
++
++      if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
++          skb_try_make_writable(skb, thoff + sizeof(*tcph)))
++              return -1;
++
++      tcph = (void *)(skb_network_header(skb) + thoff);
++      inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
++                                new_addr->s6_addr32, true);
++
++      return 0;
++}
++
++static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
++                              struct in6_addr *addr,
++                              struct in6_addr *new_addr)
++{
++      struct udphdr *udph;
++
++      if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
++          skb_try_make_writable(skb, thoff + sizeof(*udph)))
++              return -1;
++
++      udph = (void *)(skb_network_header(skb) + thoff);
++      if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
++              inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
++                                        new_addr->s6_addr32, true);
++              if (!udph->check)
++                      udph->check = CSUM_MANGLED_0;
++      }
++
++      return 0;
++}
++
++static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
++                                  unsigned int thoff, struct in6_addr *addr,
++                                  struct in6_addr *new_addr)
++{
++      switch (ip6h->nexthdr) {
++      case IPPROTO_TCP:
++              if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
++                      return NF_DROP;
++              break;
++      case IPPROTO_UDP:
++              if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
++                      return NF_DROP;
++              break;
++      }
++
++      return 0;
++}
++
++static int nf_flow_snat_ipv6(const struct flow_offload *flow,
++                           struct sk_buff *skb, struct ipv6hdr *ip6h,
++                           unsigned int thoff,
++                           enum flow_offload_tuple_dir dir)
++{
++      struct in6_addr addr, new_addr;
++
++      switch (dir) {
++      case FLOW_OFFLOAD_DIR_ORIGINAL:
++              addr = ip6h->saddr;
++              new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6;
++              ip6h->saddr = new_addr;
++              break;
++      case FLOW_OFFLOAD_DIR_REPLY:
++              addr = ip6h->daddr;
++              new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
++              ip6h->daddr = new_addr;
++              break;
++      default:
++              return -1;
++      }
++
++      return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
++}
++
++static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
++                           struct sk_buff *skb, struct ipv6hdr *ip6h,
++                           unsigned int thoff,
++                           enum flow_offload_tuple_dir dir)
++{
++      struct in6_addr addr, new_addr;
++
++      switch (dir) {
++      case FLOW_OFFLOAD_DIR_ORIGINAL:
++              addr = ip6h->daddr;
++              new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6;
++              ip6h->daddr = new_addr;
++              break;
++      case FLOW_OFFLOAD_DIR_REPLY:
++              addr = ip6h->saddr;
++              new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
++              ip6h->saddr = new_addr;
++              break;
++      default:
++              return -1;
++      }
++
++      return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
++}
++
++static int nf_flow_nat_ipv6(const struct flow_offload *flow,
++                          struct sk_buff *skb,
++                          enum flow_offload_tuple_dir dir)
++{
++      struct ipv6hdr *ip6h = ipv6_hdr(skb);
++      unsigned int thoff = sizeof(*ip6h);
++
++      if (flow->flags & FLOW_OFFLOAD_SNAT &&
++          (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
++           nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
++              return -1;
++      if (flow->flags & FLOW_OFFLOAD_DNAT &&
++          (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
++           nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
++              return -1;
++
++      return 0;
++}
++
++static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
++                            struct flow_offload_tuple *tuple)
++{
++      struct flow_ports *ports;
++      struct ipv6hdr *ip6h;
++      unsigned int thoff;
++
++      if (!pskb_may_pull(skb, sizeof(*ip6h)))
++              return -1;
++
++      ip6h = ipv6_hdr(skb);
++
++      if (ip6h->nexthdr != IPPROTO_TCP &&
++          ip6h->nexthdr != IPPROTO_UDP)
++              return -1;
++
++      thoff = sizeof(*ip6h);
++      if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
++              return -1;
++
++      ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
++
++      tuple->src_v6           = ip6h->saddr;
++      tuple->dst_v6           = ip6h->daddr;
++      tuple->src_port         = ports->source;
++      tuple->dst_port         = ports->dest;
++      tuple->l3proto          = AF_INET6;
++      tuple->l4proto          = ip6h->nexthdr;
++      tuple->iifidx           = dev->ifindex;
++
++      return 0;
++}
++
++unsigned int
++nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
++                        const struct nf_hook_state *state)
++{
++      struct flow_offload_tuple_rhash *tuplehash;
++      struct nf_flowtable *flow_table = priv;
++      struct flow_offload_tuple tuple = {};
++      enum flow_offload_tuple_dir dir;
++      struct flow_offload *flow;
++      struct net_device *outdev;
++      struct in6_addr *nexthop;
++      struct ipv6hdr *ip6h;
++      struct rt6_info *rt;
++
++      if (skb->protocol != htons(ETH_P_IPV6))
++              return NF_ACCEPT;
++
++      if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
++              return NF_ACCEPT;
++
++      tuplehash = flow_offload_lookup(flow_table, &tuple);
++      if (tuplehash == NULL)
++              return NF_ACCEPT;
++
++      outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
++      if (!outdev)
++              return NF_ACCEPT;
++
++      dir = tuplehash->tuple.dir;
++      flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
++      rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
++
++      if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
++              return NF_ACCEPT;
++
++      if (skb_try_make_writable(skb, sizeof(*ip6h)))
++              return NF_DROP;
++
++      if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
++          nf_flow_nat_ipv6(flow, skb, dir) < 0)
++              return NF_DROP;
++
++      flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
++      ip6h = ipv6_hdr(skb);
++      ip6h->hop_limit--;
++
++      skb->dev = outdev;
++      nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
++      neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
++
++      return NF_STOLEN;
++}
++EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
diff --git a/target/linux/generic/backport-4.14/356-netfilter-nf_flow_table-relax-mixed-ipv4-ipv6-flowta.patch b/target/linux/generic/backport-4.14/356-netfilter-nf_flow_table-relax-mixed-ipv4-ipv6-flowta.patch
new file mode 100644 (file)
index 0000000..7d4bdc6
--- /dev/null
@@ -0,0 +1,23 @@
+From: Felix Fietkau <nbd@nbd.name>
+Date: Sat, 17 Feb 2018 12:02:28 +0100
+Subject: [PATCH] netfilter: nf_flow_table: relax mixed ipv4/ipv6 flowtable
+ dependencies
+
+Since the offload hook code was moved, this table no longer depends on
+the IPv4 and IPv6 flowtable modules
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/net/netfilter/Kconfig
++++ b/net/netfilter/Kconfig
+@@ -670,8 +670,7 @@ endif # NF_TABLES
+ config NF_FLOW_TABLE_INET
+       tristate "Netfilter flow table mixed IPv4/IPv6 module"
+-      depends on NF_FLOW_TABLE_IPV4
+-      depends on NF_FLOW_TABLE_IPV6
++      depends on NF_FLOW_TABLE
+       help
+           This option adds the flow table mixed IPv4/IPv6 support.
diff --git a/target/linux/generic/backport-4.14/357-netfilter-nf_flow_table-move-init-code-to-nf_flow_ta.patch b/target/linux/generic/backport-4.14/357-netfilter-nf_flow_table-move-init-code-to-nf_flow_ta.patch
new file mode 100644 (file)
index 0000000..75cbda6
--- /dev/null
@@ -0,0 +1,298 @@
+From: Felix Fietkau <nbd@nbd.name>
+Date: Sun, 18 Feb 2018 18:16:31 +0100
+Subject: [PATCH] netfilter: nf_flow_table: move init code to
+ nf_flow_table_core.c
+
+Reduces duplication of .gc and .params in flowtable type definitions and
+makes the API clearer
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/include/net/netfilter/nf_flow_table.h
++++ b/include/net/netfilter/nf_flow_table.h
+@@ -14,9 +14,8 @@ struct nf_flowtable;
+ struct nf_flowtable_type {
+       struct list_head                list;
+       int                             family;
+-      void                            (*gc)(struct work_struct *work);
++      int                             (*init)(struct nf_flowtable *ft);
+       void                            (*free)(struct nf_flowtable *ft);
+-      const struct rhashtable_params  *params;
+       nf_hookfn                       *hook;
+       struct module                   *owner;
+ };
+@@ -100,9 +99,8 @@ int nf_flow_table_iterate(struct nf_flow
+ void nf_flow_table_cleanup(struct net *net, struct net_device *dev);
++int nf_flow_table_init(struct nf_flowtable *flow_table);
+ void nf_flow_table_free(struct nf_flowtable *flow_table);
+-void nf_flow_offload_work_gc(struct work_struct *work);
+-extern const struct rhashtable_params nf_flow_offload_rhash_params;
+ void flow_offload_dead(struct flow_offload *flow);
+--- a/net/ipv4/netfilter/nf_flow_table_ipv4.c
++++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c
+@@ -7,8 +7,7 @@
+ static struct nf_flowtable_type flowtable_ipv4 = {
+       .family         = NFPROTO_IPV4,
+-      .params         = &nf_flow_offload_rhash_params,
+-      .gc             = nf_flow_offload_work_gc,
++      .init           = nf_flow_table_init,
+       .free           = nf_flow_table_free,
+       .hook           = nf_flow_offload_ip_hook,
+       .owner          = THIS_MODULE,
+--- a/net/ipv6/netfilter/nf_flow_table_ipv6.c
++++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c
+@@ -8,8 +8,7 @@
+ static struct nf_flowtable_type flowtable_ipv6 = {
+       .family         = NFPROTO_IPV6,
+-      .params         = &nf_flow_offload_rhash_params,
+-      .gc             = nf_flow_offload_work_gc,
++      .init           = nf_flow_table_init,
+       .free           = nf_flow_table_free,
+       .hook           = nf_flow_offload_ipv6_hook,
+       .owner          = THIS_MODULE,
+--- a/net/netfilter/nf_flow_table_core.c
++++ b/net/netfilter/nf_flow_table_core.c
+@@ -116,16 +116,50 @@ void flow_offload_dead(struct flow_offlo
+ }
+ EXPORT_SYMBOL_GPL(flow_offload_dead);
++static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
++{
++      const struct flow_offload_tuple *tuple = data;
++
++      return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
++}
++
++static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
++{
++      const struct flow_offload_tuple_rhash *tuplehash = data;
++
++      return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
++}
++
++static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
++                                      const void *ptr)
++{
++      const struct flow_offload_tuple *tuple = arg->key;
++      const struct flow_offload_tuple_rhash *x = ptr;
++
++      if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
++              return 1;
++
++      return 0;
++}
++
++static const struct rhashtable_params nf_flow_offload_rhash_params = {
++      .head_offset            = offsetof(struct flow_offload_tuple_rhash, node),
++      .hashfn                 = flow_offload_hash,
++      .obj_hashfn             = flow_offload_hash_obj,
++      .obj_cmpfn              = flow_offload_hash_cmp,
++      .automatic_shrinking    = true,
++};
++
+ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
+ {
+       flow->timeout = (u32)jiffies;
+       rhashtable_insert_fast(&flow_table->rhashtable,
+                              &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
+-                             *flow_table->type->params);
++                             nf_flow_offload_rhash_params);
+       rhashtable_insert_fast(&flow_table->rhashtable,
+                              &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
+-                             *flow_table->type->params);
++                             nf_flow_offload_rhash_params);
+       return 0;
+ }
+ EXPORT_SYMBOL_GPL(flow_offload_add);
+@@ -135,10 +169,10 @@ static void flow_offload_del(struct nf_f
+ {
+       rhashtable_remove_fast(&flow_table->rhashtable,
+                              &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
+-                             *flow_table->type->params);
++                             nf_flow_offload_rhash_params);
+       rhashtable_remove_fast(&flow_table->rhashtable,
+                              &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
+-                             *flow_table->type->params);
++                             nf_flow_offload_rhash_params);
+       flow_offload_free(flow);
+ }
+@@ -148,7 +182,7 @@ flow_offload_lookup(struct nf_flowtable
+                   struct flow_offload_tuple *tuple)
+ {
+       return rhashtable_lookup_fast(&flow_table->rhashtable, tuple,
+-                                    *flow_table->type->params);
++                                    nf_flow_offload_rhash_params);
+ }
+ EXPORT_SYMBOL_GPL(flow_offload_lookup);
+@@ -237,7 +271,7 @@ out:
+       return 1;
+ }
+-void nf_flow_offload_work_gc(struct work_struct *work)
++static void nf_flow_offload_work_gc(struct work_struct *work)
+ {
+       struct nf_flowtable *flow_table;
+@@ -245,42 +279,6 @@ void nf_flow_offload_work_gc(struct work
+       nf_flow_offload_gc_step(flow_table);
+       queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
+ }
+-EXPORT_SYMBOL_GPL(nf_flow_offload_work_gc);
+-
+-static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
+-{
+-      const struct flow_offload_tuple *tuple = data;
+-
+-      return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
+-}
+-
+-static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
+-{
+-      const struct flow_offload_tuple_rhash *tuplehash = data;
+-
+-      return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
+-}
+-
+-static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
+-                                      const void *ptr)
+-{
+-      const struct flow_offload_tuple *tuple = arg->key;
+-      const struct flow_offload_tuple_rhash *x = ptr;
+-
+-      if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
+-              return 1;
+-
+-      return 0;
+-}
+-
+-const struct rhashtable_params nf_flow_offload_rhash_params = {
+-      .head_offset            = offsetof(struct flow_offload_tuple_rhash, node),
+-      .hashfn                 = flow_offload_hash,
+-      .obj_hashfn             = flow_offload_hash_obj,
+-      .obj_cmpfn              = flow_offload_hash_cmp,
+-      .automatic_shrinking    = true,
+-};
+-EXPORT_SYMBOL_GPL(nf_flow_offload_rhash_params);
+ static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
+                               __be16 port, __be16 new_port)
+@@ -398,6 +396,24 @@ int nf_flow_dnat_port(const struct flow_
+ }
+ EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
++int nf_flow_table_init(struct nf_flowtable *flowtable)
++{
++      int err;
++
++      INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
++
++      err = rhashtable_init(&flowtable->rhashtable,
++                            &nf_flow_offload_rhash_params);
++      if (err < 0)
++              return err;
++
++      queue_delayed_work(system_power_efficient_wq,
++                         &flowtable->gc_work, HZ);
++
++      return 0;
++}
++EXPORT_SYMBOL_GPL(nf_flow_table_init);
++
+ static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
+ {
+       struct net_device *dev = data;
+@@ -423,8 +439,10 @@ EXPORT_SYMBOL_GPL(nf_flow_table_cleanup)
+ void nf_flow_table_free(struct nf_flowtable *flow_table)
+ {
++      cancel_delayed_work_sync(&flow_table->gc_work);
+       nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
+       WARN_ON(!nf_flow_offload_gc_step(flow_table));
++      rhashtable_destroy(&flow_table->rhashtable);
+ }
+ EXPORT_SYMBOL_GPL(nf_flow_table_free);
+--- a/net/netfilter/nf_flow_table_inet.c
++++ b/net/netfilter/nf_flow_table_inet.c
+@@ -22,8 +22,7 @@ nf_flow_offload_inet_hook(void *priv, st
+ static struct nf_flowtable_type flowtable_inet = {
+       .family         = NFPROTO_INET,
+-      .params         = &nf_flow_offload_rhash_params,
+-      .gc             = nf_flow_offload_work_gc,
++      .init           = nf_flow_table_init,
+       .free           = nf_flow_table_free,
+       .hook           = nf_flow_offload_inet_hook,
+       .owner          = THIS_MODULE,
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -5013,40 +5013,38 @@ static int nf_tables_newflowtable(struct
+       }
+       flowtable->data.type = type;
+-      err = rhashtable_init(&flowtable->data.rhashtable, type->params);
++      err = type->init(&flowtable->data);
+       if (err < 0)
+               goto err3;
+       err = nf_tables_flowtable_parse_hook(&ctx, nla[NFTA_FLOWTABLE_HOOK],
+                                            flowtable);
+       if (err < 0)
+-              goto err3;
++              goto err4;
+       for (i = 0; i < flowtable->ops_len; i++) {
+               err = nf_register_net_hook(net, &flowtable->ops[i]);
+               if (err < 0)
+-                      goto err4;
++                      goto err5;
+       }
+       err = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable);
+       if (err < 0)
+-              goto err5;
+-
+-      INIT_DEFERRABLE_WORK(&flowtable->data.gc_work, type->gc);
+-      queue_delayed_work(system_power_efficient_wq,
+-                         &flowtable->data.gc_work, HZ);
++              goto err6;
+       list_add_tail_rcu(&flowtable->list, &table->flowtables);
+       table->use++;
+       return 0;
+-err5:
++err6:
+       i = flowtable->ops_len;
+-err4:
++err5:
+       for (k = i - 1; k >= 0; k--)
+               nf_unregister_net_hook(net, &flowtable->ops[i]);
+       kfree(flowtable->ops);
++err4:
++      flowtable->data.type->free(&flowtable->data);
+ err3:
+       module_put(type->owner);
+ err2:
+@@ -5325,10 +5323,8 @@ err:
+ static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable)
+ {
+-      cancel_delayed_work_sync(&flowtable->data.gc_work);
+       kfree(flowtable->name);
+       flowtable->data.type->free(&flowtable->data);
+-      rhashtable_destroy(&flowtable->data.rhashtable);
+       module_put(flowtable->data.type->owner);
+ }
diff --git a/target/linux/generic/backport-4.14/358-netfilter-nf_flow_table-fix-priv-pointer-for-netdev-.patch b/target/linux/generic/backport-4.14/358-netfilter-nf_flow_table-fix-priv-pointer-for-netdev-.patch
new file mode 100644 (file)
index 0000000..d4b746d
--- /dev/null
@@ -0,0 +1,22 @@
+From: Felix Fietkau <nbd@nbd.name>
+Date: Tue, 20 Feb 2018 14:48:51 +0100
+Subject: [PATCH] netfilter: nf_flow_table: fix priv pointer for netdev hook
+
+The offload ip hook expects a pointer to the flowtable, not to the
+rhashtable. Since the rhashtable is the first member, this is safe for
+the moment, but breaks as soon as the structure layout changes
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -4879,7 +4879,7 @@ static int nf_tables_flowtable_parse_hoo
+               flowtable->ops[i].pf            = NFPROTO_NETDEV;
+               flowtable->ops[i].hooknum       = hooknum;
+               flowtable->ops[i].priority      = priority;
+-              flowtable->ops[i].priv          = &flowtable->data.rhashtable;
++              flowtable->ops[i].priv          = &flowtable->data;
+               flowtable->ops[i].hook          = flowtable->data.type->hook;
+               flowtable->ops[i].dev           = dev_array[i];
+       }
diff --git a/target/linux/generic/backport-4.14/359-netfilter-nf_flow_table-track-flow-tables-in-nf_flow.patch b/target/linux/generic/backport-4.14/359-netfilter-nf_flow_table-track-flow-tables-in-nf_flow.patch
new file mode 100644 (file)
index 0000000..ad7d81f
--- /dev/null
@@ -0,0 +1,114 @@
+From: Felix Fietkau <nbd@nbd.name>
+Date: Tue, 20 Feb 2018 14:08:14 +0100
+Subject: [PATCH] netfilter: nf_flow_table: track flow tables in nf_flow_table
+ directly
+
+Avoids having nf_flow_table depend on nftables (useful for future
+iptables backport work)
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/include/net/netfilter/nf_flow_table.h
++++ b/include/net/netfilter/nf_flow_table.h
+@@ -21,6 +21,7 @@ struct nf_flowtable_type {
+ };
+ struct nf_flowtable {
++      struct list_head                list;
+       struct rhashtable               rhashtable;
+       const struct nf_flowtable_type  *type;
+       struct delayed_work             gc_work;
+--- a/include/net/netfilter/nf_tables.h
++++ b/include/net/netfilter/nf_tables.h
+@@ -1091,9 +1091,6 @@ struct nft_flowtable {
+ struct nft_flowtable *nf_tables_flowtable_lookup(const struct nft_table *table,
+                                                const struct nlattr *nla,
+                                                u8 genmask);
+-void nft_flow_table_iterate(struct net *net,
+-                          void (*iter)(struct nf_flowtable *flowtable, void *data),
+-                          void *data);
+ void nft_register_flowtable_type(struct nf_flowtable_type *type);
+ void nft_unregister_flowtable_type(struct nf_flowtable_type *type);
+--- a/net/netfilter/nf_flow_table_core.c
++++ b/net/netfilter/nf_flow_table_core.c
+@@ -18,6 +18,9 @@ struct flow_offload_entry {
+       struct rcu_head         rcu_head;
+ };
++static DEFINE_MUTEX(flowtable_lock);
++static LIST_HEAD(flowtables);
++
+ static void
+ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
+                     struct nf_flow_route *route,
+@@ -410,6 +413,10 @@ int nf_flow_table_init(struct nf_flowtab
+       queue_delayed_work(system_power_efficient_wq,
+                          &flowtable->gc_work, HZ);
++      mutex_lock(&flowtable_lock);
++      list_add(&flowtable->list, &flowtables);
++      mutex_unlock(&flowtable_lock);
++
+       return 0;
+ }
+ EXPORT_SYMBOL_GPL(nf_flow_table_init);
+@@ -425,20 +432,28 @@ static void nf_flow_table_do_cleanup(str
+ }
+ static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
+-                                        void *data)
++                                        struct net_device *dev)
+ {
+-      nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, data);
++      nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev);
+       flush_delayed_work(&flowtable->gc_work);
+ }
+ void nf_flow_table_cleanup(struct net *net, struct net_device *dev)
+ {
+-      nft_flow_table_iterate(net, nf_flow_table_iterate_cleanup, dev);
++      struct nf_flowtable *flowtable;
++
++      mutex_lock(&flowtable_lock);
++      list_for_each_entry(flowtable, &flowtables, list)
++              nf_flow_table_iterate_cleanup(flowtable, dev);
++      mutex_unlock(&flowtable_lock);
+ }
+ EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
+ void nf_flow_table_free(struct nf_flowtable *flow_table)
+ {
++      mutex_lock(&flowtable_lock);
++      list_del(&flow_table->list);
++      mutex_unlock(&flowtable_lock);
+       cancel_delayed_work_sync(&flow_table->gc_work);
+       nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
+       WARN_ON(!nf_flow_offload_gc_step(flow_table));
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -4923,23 +4923,6 @@ static const struct nf_flowtable_type *n
+       return ERR_PTR(-ENOENT);
+ }
+-void nft_flow_table_iterate(struct net *net,
+-                          void (*iter)(struct nf_flowtable *flowtable, void *data),
+-                          void *data)
+-{
+-      struct nft_flowtable *flowtable;
+-      const struct nft_table *table;
+-
+-      nfnl_lock(NFNL_SUBSYS_NFTABLES);
+-      list_for_each_entry(table, &net->nft.tables, list) {
+-              list_for_each_entry(flowtable, &table->flowtables, list) {
+-                      iter(&flowtable->data, data);
+-              }
+-      }
+-      nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+-}
+-EXPORT_SYMBOL_GPL(nft_flow_table_iterate);
+-
+ static void nft_unregister_flowtable_net_hooks(struct net *net,
+                                              struct nft_flowtable *flowtable)
+ {
diff --git a/target/linux/generic/backport-4.14/360-netfilter-nf_flow_table-add-hardware-offload-support.patch b/target/linux/generic/backport-4.14/360-netfilter-nf_flow_table-add-hardware-offload-support.patch
new file mode 100644 (file)
index 0000000..f3c2ec8
--- /dev/null
@@ -0,0 +1,566 @@
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Thu, 11 Jan 2018 16:32:00 +0100
+Subject: [PATCH] netfilter: nf_flow_table: add hardware offload support
+
+This patch adds the infrastructure to offload flows to hardware, in case
+the nic/switch comes with built-in flow tables capabilities.
+
+If the hardware comes with no hardware flow tables or they have
+limitations in terms of features, the existing infrastructure falls back
+to the software flow table implementation.
+
+The software flow table garbage collector skips entries that resides in
+the hardware, so the hardware will be responsible for releasing this
+flow table entry too via flow_offload_dead().
+
+Hardware configuration, either to add or to delete entries, is done from
+the hardware offload workqueue, to ensure this is done from user context
+given that we may sleep when grabbing the mdio mutex.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+ create mode 100644 net/netfilter/nf_flow_table_hw.c
+
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -826,6 +826,13 @@ struct xfrmdev_ops {
+ };
+ #endif
++struct flow_offload;
++
++enum flow_offload_type {
++      FLOW_OFFLOAD_ADD        = 0,
++      FLOW_OFFLOAD_DEL,
++};
++
+ /*
+  * This structure defines the management hooks for network devices.
+  * The following hooks can be defined; unless noted otherwise, they are
+@@ -1057,6 +1064,10 @@ struct xfrmdev_ops {
+  * int (*ndo_bridge_dellink)(struct net_device *dev, struct nlmsghdr *nlh,
+  *                         u16 flags);
+  *
++ * int (*ndo_flow_offload)(enum flow_offload_type type,
++ *                       struct flow_offload *flow);
++ *    Adds/deletes flow entry to/from net device flowtable.
++ *
+  * int (*ndo_change_carrier)(struct net_device *dev, bool new_carrier);
+  *    Called to change device carrier. Soft-devices (like dummy, team, etc)
+  *    which do not represent real hardware may define this to allow their
+@@ -1281,6 +1292,8 @@ struct net_device_ops {
+       int                     (*ndo_bridge_dellink)(struct net_device *dev,
+                                                     struct nlmsghdr *nlh,
+                                                     u16 flags);
++      int                     (*ndo_flow_offload)(enum flow_offload_type type,
++                                                  struct flow_offload *flow);
+       int                     (*ndo_change_carrier)(struct net_device *dev,
+                                                     bool new_carrier);
+       int                     (*ndo_get_phys_port_id)(struct net_device *dev,
+--- a/include/net/netfilter/nf_flow_table.h
++++ b/include/net/netfilter/nf_flow_table.h
+@@ -20,11 +20,17 @@ struct nf_flowtable_type {
+       struct module                   *owner;
+ };
++enum nf_flowtable_flags {
++      NF_FLOWTABLE_F_HW               = 0x1,
++};
++
+ struct nf_flowtable {
+       struct list_head                list;
+       struct rhashtable               rhashtable;
+       const struct nf_flowtable_type  *type;
++      u32                             flags;
+       struct delayed_work             gc_work;
++      possible_net_t                  ft_net;
+ };
+ enum flow_offload_tuple_dir {
+@@ -68,6 +74,7 @@ struct flow_offload_tuple_rhash {
+ #define FLOW_OFFLOAD_SNAT     0x1
+ #define FLOW_OFFLOAD_DNAT     0x2
+ #define FLOW_OFFLOAD_DYING    0x4
++#define FLOW_OFFLOAD_HW               0x8
+ struct flow_offload {
+       struct flow_offload_tuple_rhash         tuplehash[FLOW_OFFLOAD_DIR_MAX];
+@@ -121,6 +128,22 @@ unsigned int nf_flow_offload_ip_hook(voi
+ unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
+                                      const struct nf_hook_state *state);
++void nf_flow_offload_hw_add(struct net *net, struct flow_offload *flow,
++                          struct nf_conn *ct);
++void nf_flow_offload_hw_del(struct net *net, struct flow_offload *flow);
++
++struct nf_flow_table_hw {
++      struct module   *owner;
++      void            (*add)(struct net *net, struct flow_offload *flow,
++                             struct nf_conn *ct);
++      void            (*del)(struct net *net, struct flow_offload *flow);
++};
++
++int nf_flow_table_hw_register(const struct nf_flow_table_hw *offload);
++void nf_flow_table_hw_unregister(const struct nf_flow_table_hw *offload);
++
++extern struct work_struct nf_flow_offload_hw_work;
++
+ #define MODULE_ALIAS_NF_FLOWTABLE(family)     \
+       MODULE_ALIAS("nf-flowtable-" __stringify(family))
+--- a/include/uapi/linux/netfilter/nf_tables.h
++++ b/include/uapi/linux/netfilter/nf_tables.h
+@@ -1341,6 +1341,7 @@ enum nft_object_attributes {
+  * @NFTA_FLOWTABLE_HOOK: netfilter hook configuration(NLA_U32)
+  * @NFTA_FLOWTABLE_USE: number of references to this flow table (NLA_U32)
+  * @NFTA_FLOWTABLE_HANDLE: object handle (NLA_U64)
++ * @NFTA_FLOWTABLE_FLAGS: flags (NLA_U32)
+  */
+ enum nft_flowtable_attributes {
+       NFTA_FLOWTABLE_UNSPEC,
+@@ -1350,6 +1351,7 @@ enum nft_flowtable_attributes {
+       NFTA_FLOWTABLE_USE,
+       NFTA_FLOWTABLE_HANDLE,
+       NFTA_FLOWTABLE_PAD,
++      NFTA_FLOWTABLE_FLAGS,
+       __NFTA_FLOWTABLE_MAX
+ };
+ #define NFTA_FLOWTABLE_MAX    (__NFTA_FLOWTABLE_MAX - 1)
+--- a/net/netfilter/Kconfig
++++ b/net/netfilter/Kconfig
+@@ -686,6 +686,15 @@ config NF_FLOW_TABLE
+         To compile it as a module, choose M here.
++config NF_FLOW_TABLE_HW
++      tristate "Netfilter flow table hardware offload module"
++      depends on NF_FLOW_TABLE
++      help
++        This option adds hardware offload support for the flow table core
++        infrastructure.
++
++        To compile it as a module, choose M here.
++
+ config NETFILTER_XTABLES
+       tristate "Netfilter Xtables support (required for ip_tables)"
+       default m if NETFILTER_ADVANCED=n
+--- a/net/netfilter/Makefile
++++ b/net/netfilter/Makefile
+@@ -116,6 +116,7 @@ obj-$(CONFIG_NF_FLOW_TABLE)        += nf_flow_t
+ nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o
+ obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o
++obj-$(CONFIG_NF_FLOW_TABLE_HW)        += nf_flow_table_hw.o
+ # generic X tables 
+ obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
+--- a/net/netfilter/nf_flow_table_core.c
++++ b/net/netfilter/nf_flow_table_core.c
+@@ -167,9 +167,16 @@ int flow_offload_add(struct nf_flowtable
+ }
+ EXPORT_SYMBOL_GPL(flow_offload_add);
++static inline bool nf_flow_in_hw(const struct flow_offload *flow)
++{
++      return flow->flags & FLOW_OFFLOAD_HW;
++}
++
+ static void flow_offload_del(struct nf_flowtable *flow_table,
+                            struct flow_offload *flow)
+ {
++      struct net *net = read_pnet(&flow_table->ft_net);
++
+       rhashtable_remove_fast(&flow_table->rhashtable,
+                              &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
+                              nf_flow_offload_rhash_params);
+@@ -177,6 +184,9 @@ static void flow_offload_del(struct nf_f
+                              &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
+                              nf_flow_offload_rhash_params);
++      if (nf_flow_in_hw(flow))
++              nf_flow_offload_hw_del(net, flow);
++
+       flow_offload_free(flow);
+ }
+@@ -263,6 +273,10 @@ static int nf_flow_offload_gc_step(struc
+               flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
++              if (nf_flow_in_hw(flow) &&
++                  !nf_flow_is_dying(flow))
++                      continue;
++
+               if (nf_flow_has_expired(flow) ||
+                   nf_flow_is_dying(flow))
+                       flow_offload_del(flow_table, flow);
+@@ -399,10 +413,43 @@ int nf_flow_dnat_port(const struct flow_
+ }
+ EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
++static const struct nf_flow_table_hw __rcu *nf_flow_table_hw_hook __read_mostly;
++
++static int nf_flow_offload_hw_init(struct nf_flowtable *flow_table)
++{
++      const struct nf_flow_table_hw *offload;
++
++      if (!rcu_access_pointer(nf_flow_table_hw_hook))
++              request_module("nf-flow-table-hw");
++
++      rcu_read_lock();
++      offload = rcu_dereference(nf_flow_table_hw_hook);
++      if (!offload)
++              goto err_no_hw_offload;
++
++      if (!try_module_get(offload->owner))
++              goto err_no_hw_offload;
++
++      rcu_read_unlock();
++
++      return 0;
++
++err_no_hw_offload:
++      rcu_read_unlock();
++
++      return -EOPNOTSUPP;
++}
++
+ int nf_flow_table_init(struct nf_flowtable *flowtable)
+ {
+       int err;
++      if (flowtable->flags & NF_FLOWTABLE_F_HW) {
++              err = nf_flow_offload_hw_init(flowtable);
++              if (err)
++                      return err;
++      }
++
+       INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
+       err = rhashtable_init(&flowtable->rhashtable,
+@@ -436,6 +483,8 @@ static void nf_flow_table_iterate_cleanu
+ {
+       nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev);
+       flush_delayed_work(&flowtable->gc_work);
++      if (flowtable->flags & NF_FLOWTABLE_F_HW)
++              flush_work(&nf_flow_offload_hw_work);
+ }
+ void nf_flow_table_cleanup(struct net *net, struct net_device *dev)
+@@ -449,6 +498,26 @@ void nf_flow_table_cleanup(struct net *n
+ }
+ EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
++struct work_struct nf_flow_offload_hw_work;
++EXPORT_SYMBOL_GPL(nf_flow_offload_hw_work);
++
++/* Give the hardware workqueue the chance to remove entries from hardware.*/
++static void nf_flow_offload_hw_free(struct nf_flowtable *flowtable)
++{
++      const struct nf_flow_table_hw *offload;
++
++      flush_work(&nf_flow_offload_hw_work);
++
++      rcu_read_lock();
++      offload = rcu_dereference(nf_flow_table_hw_hook);
++      if (!offload) {
++              rcu_read_unlock();
++              return;
++      }
++      module_put(offload->owner);
++      rcu_read_unlock();
++}
++
+ void nf_flow_table_free(struct nf_flowtable *flow_table)
+ {
+       mutex_lock(&flowtable_lock);
+@@ -458,9 +527,58 @@ void nf_flow_table_free(struct nf_flowta
+       nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
+       WARN_ON(!nf_flow_offload_gc_step(flow_table));
+       rhashtable_destroy(&flow_table->rhashtable);
++      if (flow_table->flags & NF_FLOWTABLE_F_HW)
++              nf_flow_offload_hw_free(flow_table);
+ }
+ EXPORT_SYMBOL_GPL(nf_flow_table_free);
++/* Must be called from user context. */
++void nf_flow_offload_hw_add(struct net *net, struct flow_offload *flow,
++                          struct nf_conn *ct)
++{
++      const struct nf_flow_table_hw *offload;
++
++      rcu_read_lock();
++      offload = rcu_dereference(nf_flow_table_hw_hook);
++      if (offload)
++              offload->add(net, flow, ct);
++      rcu_read_unlock();
++}
++EXPORT_SYMBOL_GPL(nf_flow_offload_hw_add);
++
++/* Must be called from user context. */
++void nf_flow_offload_hw_del(struct net *net, struct flow_offload *flow)
++{
++      const struct nf_flow_table_hw *offload;
++
++      rcu_read_lock();
++      offload = rcu_dereference(nf_flow_table_hw_hook);
++      if (offload)
++              offload->del(net, flow);
++      rcu_read_unlock();
++}
++EXPORT_SYMBOL_GPL(nf_flow_offload_hw_del);
++
++int nf_flow_table_hw_register(const struct nf_flow_table_hw *offload)
++{
++      if (rcu_access_pointer(nf_flow_table_hw_hook))
++              return -EBUSY;
++
++      rcu_assign_pointer(nf_flow_table_hw_hook, offload);
++
++      return 0;
++}
++EXPORT_SYMBOL_GPL(nf_flow_table_hw_register);
++
++void nf_flow_table_hw_unregister(const struct nf_flow_table_hw *offload)
++{
++      WARN_ON(rcu_access_pointer(nf_flow_table_hw_hook) != offload);
++      rcu_assign_pointer(nf_flow_table_hw_hook, NULL);
++
++      synchronize_rcu();
++}
++EXPORT_SYMBOL_GPL(nf_flow_table_hw_unregister);
++
+ static int nf_flow_table_netdev_event(struct notifier_block *this,
+                                     unsigned long event, void *ptr)
+ {
+--- /dev/null
++++ b/net/netfilter/nf_flow_table_hw.c
+@@ -0,0 +1,169 @@
++#include <linux/kernel.h>
++#include <linux/init.h>
++#include <linux/module.h>
++#include <linux/netfilter.h>
++#include <linux/rhashtable.h>
++#include <linux/netdevice.h>
++#include <net/netfilter/nf_flow_table.h>
++#include <net/netfilter/nf_conntrack.h>
++#include <net/netfilter/nf_conntrack_core.h>
++#include <net/netfilter/nf_conntrack_tuple.h>
++
++static DEFINE_SPINLOCK(flow_offload_hw_pending_list_lock);
++static LIST_HEAD(flow_offload_hw_pending_list);
++
++static DEFINE_MUTEX(nf_flow_offload_hw_mutex);
++
++struct flow_offload_hw {
++      struct list_head        list;
++      enum flow_offload_type  type;
++      struct flow_offload     *flow;
++      struct nf_conn          *ct;
++      possible_net_t          flow_hw_net;
++};
++
++static int do_flow_offload_hw(struct net *net, struct flow_offload *flow,
++                            int type)
++{
++      struct net_device *indev;
++      int ret, ifindex;
++
++      ifindex = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.iifidx;
++      indev = dev_get_by_index(net, ifindex);
++      if (WARN_ON(!indev))
++              return 0;
++
++      mutex_lock(&nf_flow_offload_hw_mutex);
++      ret = indev->netdev_ops->ndo_flow_offload(type, flow);
++      mutex_unlock(&nf_flow_offload_hw_mutex);
++
++      dev_put(indev);
++
++      return ret;
++}
++
++static void flow_offload_hw_work_add(struct flow_offload_hw *offload)
++{
++      struct net *net;
++      int ret;
++
++      if (nf_ct_is_dying(offload->ct))
++              return;
++
++      net = read_pnet(&offload->flow_hw_net);
++      ret = do_flow_offload_hw(net, offload->flow, FLOW_OFFLOAD_ADD);
++      if (ret >= 0)
++              offload->flow->flags |= FLOW_OFFLOAD_HW;
++}
++
++static void flow_offload_hw_work_del(struct flow_offload_hw *offload)
++{
++      struct net *net = read_pnet(&offload->flow_hw_net);
++
++      do_flow_offload_hw(net, offload->flow, FLOW_OFFLOAD_DEL);
++}
++
++static void flow_offload_hw_work(struct work_struct *work)
++{
++      struct flow_offload_hw *offload, *next;
++      LIST_HEAD(hw_offload_pending);
++
++      spin_lock_bh(&flow_offload_hw_pending_list_lock);
++      list_replace_init(&flow_offload_hw_pending_list, &hw_offload_pending);
++      spin_unlock_bh(&flow_offload_hw_pending_list_lock);
++
++      list_for_each_entry_safe(offload, next, &hw_offload_pending, list) {
++              switch (offload->type) {
++              case FLOW_OFFLOAD_ADD:
++                      flow_offload_hw_work_add(offload);
++                      break;
++              case FLOW_OFFLOAD_DEL:
++                      flow_offload_hw_work_del(offload);
++                      break;
++              }
++              if (offload->ct)
++                      nf_conntrack_put(&offload->ct->ct_general);
++              list_del(&offload->list);
++              kfree(offload);
++      }
++}
++
++static void flow_offload_queue_work(struct flow_offload_hw *offload)
++{
++      spin_lock_bh(&flow_offload_hw_pending_list_lock);
++      list_add_tail(&offload->list, &flow_offload_hw_pending_list);
++      spin_unlock_bh(&flow_offload_hw_pending_list_lock);
++
++      schedule_work(&nf_flow_offload_hw_work);
++}
++
++static void flow_offload_hw_add(struct net *net, struct flow_offload *flow,
++                              struct nf_conn *ct)
++{
++      struct flow_offload_hw *offload;
++
++      offload = kmalloc(sizeof(struct flow_offload_hw), GFP_ATOMIC);
++      if (!offload)
++              return;
++
++      nf_conntrack_get(&ct->ct_general);
++      offload->type = FLOW_OFFLOAD_ADD;
++      offload->ct = ct;
++      offload->flow = flow;
++      write_pnet(&offload->flow_hw_net, net);
++
++      flow_offload_queue_work(offload);
++}
++
++static void flow_offload_hw_del(struct net *net, struct flow_offload *flow)
++{
++      struct flow_offload_hw *offload;
++
++      offload = kmalloc(sizeof(struct flow_offload_hw), GFP_ATOMIC);
++      if (!offload)
++              return;
++
++      offload->type = FLOW_OFFLOAD_DEL;
++      offload->ct = NULL;
++      offload->flow = flow;
++      write_pnet(&offload->flow_hw_net, net);
++
++      flow_offload_queue_work(offload);
++}
++
++static const struct nf_flow_table_hw flow_offload_hw = {
++      .add    = flow_offload_hw_add,
++      .del    = flow_offload_hw_del,
++      .owner  = THIS_MODULE,
++};
++
++static int __init nf_flow_table_hw_module_init(void)
++{
++      INIT_WORK(&nf_flow_offload_hw_work, flow_offload_hw_work);
++      nf_flow_table_hw_register(&flow_offload_hw);
++
++      return 0;
++}
++
++static void __exit nf_flow_table_hw_module_exit(void)
++{
++      struct flow_offload_hw *offload, *next;
++      LIST_HEAD(hw_offload_pending);
++
++      nf_flow_table_hw_unregister(&flow_offload_hw);
++      cancel_work_sync(&nf_flow_offload_hw_work);
++
++      list_for_each_entry_safe(offload, next, &hw_offload_pending, list) {
++              if (offload->ct)
++                      nf_conntrack_put(&offload->ct->ct_general);
++              list_del(&offload->list);
++              kfree(offload);
++      }
++}
++
++module_init(nf_flow_table_hw_module_init);
++module_exit(nf_flow_table_hw_module_exit);
++
++MODULE_LICENSE("GPL");
++MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
++MODULE_ALIAS("nf-flow-table-hw");
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -4866,6 +4866,14 @@ static int nf_tables_flowtable_parse_hoo
+       if (err < 0)
+               goto err1;
++      for (i = 0; i < n; i++) {
++              if (flowtable->data.flags & NF_FLOWTABLE_F_HW &&
++                  !dev_array[i]->netdev_ops->ndo_flow_offload) {
++                      err = -EOPNOTSUPP;
++                      goto err1;
++              }
++      }
++
+       ops = kzalloc(sizeof(struct nf_hook_ops) * n, GFP_KERNEL);
+       if (!ops) {
+               err = -ENOMEM;
+@@ -4996,10 +5004,19 @@ static int nf_tables_newflowtable(struct
+       }
+       flowtable->data.type = type;
++      write_pnet(&flowtable->data.ft_net, net);
++
+       err = type->init(&flowtable->data);
+       if (err < 0)
+               goto err3;
++      if (nla[NFTA_FLOWTABLE_FLAGS]) {
++              flowtable->data.flags =
++                      ntohl(nla_get_be32(nla[NFTA_FLOWTABLE_FLAGS]));
++              if (flowtable->data.flags & ~NF_FLOWTABLE_F_HW)
++                      goto err4;
++      }
++
+       err = nf_tables_flowtable_parse_hook(&ctx, nla[NFTA_FLOWTABLE_HOOK],
+                                            flowtable);
+       if (err < 0)
+@@ -5097,7 +5114,8 @@ static int nf_tables_fill_flowtable_info
+           nla_put_string(skb, NFTA_FLOWTABLE_NAME, flowtable->name) ||
+           nla_put_be32(skb, NFTA_FLOWTABLE_USE, htonl(flowtable->use)) ||
+           nla_put_be64(skb, NFTA_FLOWTABLE_HANDLE, cpu_to_be64(flowtable->handle),
+-                       NFTA_FLOWTABLE_PAD))
++                       NFTA_FLOWTABLE_PAD) ||
++          nla_put_be32(skb, NFTA_FLOWTABLE_FLAGS, htonl(flowtable->data.flags)))
+               goto nla_put_failure;
+       nest = nla_nest_start(skb, NFTA_FLOWTABLE_HOOK);
+--- a/net/netfilter/nft_flow_offload.c
++++ b/net/netfilter/nft_flow_offload.c
+@@ -110,6 +110,9 @@ static void nft_flow_offload_eval(const
+       if (ret < 0)
+               goto err_flow_add;
++      if (flowtable->flags & NF_FLOWTABLE_F_HW)
++              nf_flow_offload_hw_add(nft_net(pkt), flow, ct);
++
+       return;
+ err_flow_add:
index f5580f29ce1cf573e5e46be8ce66316bb059afed..5069f4e7382e8630ce6c1b8cf62fd7ac60fd334d 100644 (file)
@@ -3142,6 +3142,7 @@ CONFIG_NFS_V3=y
 # CONFIG_NFT_DUP_IPV6 is not set
 # CONFIG_NFT_FIB_IPV4 is not set
 # CONFIG_NFT_FIB_IPV6 is not set
+# CONFIG_NFT_FLOW_OFFLOAD is not set
 # CONFIG_NFT_OBJREF is not set
 # CONFIG_NFT_RT is not set
 # CONFIG_NFT_SET_BITMAP is not set
@@ -3174,6 +3175,7 @@ CONFIG_NF_CONNTRACK_PROCFS=y
 # CONFIG_NF_DEFRAG_IPV4 is not set
 # CONFIG_NF_DUP_IPV4 is not set
 # CONFIG_NF_DUP_IPV6 is not set
+# CONFIG_NF_FLOW_TABLE is not set
 # CONFIG_NF_LOG_ARP is not set
 # CONFIG_NF_LOG_IPV4 is not set
 # CONFIG_NF_LOG_NETDEV is not set