From: Felix Fietkau Date: Tue, 20 Feb 2018 14:58:42 +0000 (+0100) Subject: netfilter: add a xt_FLOWOFFLOAD target for NAT/routing offload support X-Git-Url: http://git.lede-project.org./?a=commitdiff_plain;h=820f03099894bd48638fb5be326b5c551f0f2b98;p=openwrt%2Fstaging%2Fdangole.git netfilter: add a xt_FLOWOFFLOAD target for NAT/routing offload support This makes it possible to add an iptables rule that offloads routing/NAT packet processing to a software fast path. This fast path is much quicker than running packets through the regular tables/chains. Requires Linux 4.14 Signed-off-by: Felix Fietkau --- diff --git a/include/netfilter.mk b/include/netfilter.mk index bad599b378..c4e43a84a1 100644 --- a/include/netfilter.mk +++ b/include/netfilter.mk @@ -143,6 +143,8 @@ $(eval $(call nf_add,IPT_IPSEC,CONFIG_IP_NF_MATCH_AH, $(P_V4)ipt_ah)) $(eval $(call nf_add,IPT_IPSEC,CONFIG_NETFILTER_XT_MATCH_ESP, $(P_XT)xt_esp)) $(eval $(call nf_add,IPT_IPSEC,CONFIG_NETFILTER_XT_MATCH_POLICY, $(P_XT)xt_policy)) +# flow offload support +$(eval $(call nf_add,IPT_FLOW,CONFIG_NETFILTER_XT_TARGET_FLOWOFFLOAD, $(P_XT)xt_FLOWOFFLOAD)) # IPv6 @@ -370,6 +372,7 @@ IPT_BUILTIN += $(IPT_CONNTRACK-y) IPT_BUILTIN += $(IPT_CONNTRACK_EXTRA-y) IPT_BUILTIN += $(IPT_EXTRA-y) IPT_BUILTIN += $(IPT_FILTER-y) +IPT_BUILTIN += $(IPT_FLOW-y) $(IPT_FLOW-m) IPT_BUILTIN += $(IPT_IPOPT-y) IPT_BUILTIN += $(IPT_IPRANGE-y) IPT_BUILTIN += $(IPT_CLUSTER-y) diff --git a/package/kernel/linux/modules/netfilter.mk b/package/kernel/linux/modules/netfilter.mk index 57d68d4a55..f296a9096e 100644 --- a/package/kernel/linux/modules/netfilter.mk +++ b/package/kernel/linux/modules/netfilter.mk @@ -147,7 +147,7 @@ define KernelPackage/nf-flow CONFIG_NETFILTER_INGRESS=y \ CONFIG_NF_FLOW_TABLE \ CONFIG_NF_FLOW_TABLE_HW - DEPENDS:=+kmod-nf-conntrack +kmod-nft-core @!LINUX_3_18 @!LINUX_4_4 @!LINUX_4_9 + DEPENDS:=+kmod-nf-conntrack @!LINUX_3_18 @!LINUX_4_4 @!LINUX_4_9 FILES:= \ $(LINUX_DIR)/net/netfilter/nf_flow_table.ko \ $(LINUX_DIR)/net/netfilter/nf_flow_table_hw.ko @@ -237,6 +237,17 @@ endef $(eval $(call KernelPackage,ipt-filter)) +define KernelPackage/ipt-offload + TITLE:=Netfilter routing/NAT offload support + KCONFIG:=CONFIG_NETFILTER_XT_TARGET_FLOWOFFLOAD + FILES:=$(foreach mod,$(IPT_FLOW-m),$(LINUX_DIR)/net/$(mod).ko) + AUTOLOAD:=$(call AutoProbe,$(notdir $(IPT_FLOW-m))) + $(call AddDepends/ipt,+kmod-nf-flow) +endef + +$(eval $(call KernelPackage,ipt-offload)) + + define KernelPackage/ipt-ipopt TITLE:=Modules for matching/changing IP packet options KCONFIG:=$(KCONFIG_IPT_IPOPT) diff --git a/package/network/utils/iptables/patches/800-flowoffload_target.patch b/package/network/utils/iptables/patches/800-flowoffload_target.patch new file mode 100644 index 0000000000..c6fe65cd3e --- /dev/null +++ b/package/network/utils/iptables/patches/800-flowoffload_target.patch @@ -0,0 +1,18 @@ +--- /dev/null ++++ b/extensions/libxt_FLOWOFFLOAD.c +@@ -0,0 +1,15 @@ ++#include ++ ++static struct xtables_target offload_tg_reg[] = { ++ { ++ .family = NFPROTO_UNSPEC, ++ .name = "FLOWOFFLOAD", ++ .revision = 0, ++ .version = XTABLES_VERSION, ++ }, ++}; ++ ++void _init(void) ++{ ++ xtables_register_targets(offload_tg_reg, ARRAY_SIZE(offload_tg_reg)); ++} diff --git a/target/linux/generic/hack-4.14/650-netfilter-add-xt_OFFLOAD-target.patch b/target/linux/generic/hack-4.14/650-netfilter-add-xt_OFFLOAD-target.patch new file mode 100644 index 0000000000..cb05cf8e1d --- /dev/null +++ b/target/linux/generic/hack-4.14/650-netfilter-add-xt_OFFLOAD-target.patch @@ -0,0 +1,446 @@ +From: Felix Fietkau +Date: Tue, 20 Feb 2018 15:56:02 +0100 +Subject: [PATCH] netfilter: add xt_OFFLOAD target + +Signed-off-by: Felix Fietkau +--- + create mode 100644 net/netfilter/xt_OFFLOAD.c + +--- a/net/ipv4/netfilter/Kconfig ++++ b/net/ipv4/netfilter/Kconfig +@@ -75,8 +75,6 @@ config NF_TABLES_ARP + help + This option enables the ARP support for nf_tables. + +-endif # NF_TABLES +- + config NF_FLOW_TABLE_IPV4 + tristate "Netfilter flow table IPv4 module" + depends on NF_FLOW_TABLE +@@ -85,6 +83,8 @@ config NF_FLOW_TABLE_IPV4 + + To compile it as a module, choose M here. + ++endif # NF_TABLES ++ + config NF_DUP_IPV4 + tristate "Netfilter IPv4 packet duplication to alternate destination" + depends on !NF_CONNTRACK || NF_CONNTRACK +--- a/net/ipv6/netfilter/Kconfig ++++ b/net/ipv6/netfilter/Kconfig +@@ -69,7 +69,6 @@ config NFT_FIB_IPV6 + multicast or blackhole. + + endif # NF_TABLES_IPV6 +-endif # NF_TABLES + + config NF_FLOW_TABLE_IPV6 + tristate "Netfilter flow table IPv6 module" +@@ -79,6 +78,8 @@ config NF_FLOW_TABLE_IPV6 + + To compile it as a module, choose M here. + ++endif # NF_TABLES ++ + config NF_DUP_IPV6 + tristate "Netfilter IPv6 packet duplication to alternate destination" + depends on !NF_CONNTRACK || NF_CONNTRACK +--- a/net/netfilter/Kconfig ++++ b/net/netfilter/Kconfig +@@ -665,8 +665,6 @@ config NFT_FIB_NETDEV + + endif # NF_TABLES_NETDEV + +-endif # NF_TABLES +- + config NF_FLOW_TABLE_INET + tristate "Netfilter flow table mixed IPv4/IPv6 module" + depends on NF_FLOW_TABLE +@@ -675,11 +673,12 @@ config NF_FLOW_TABLE_INET + + To compile it as a module, choose M here. + ++endif # NF_TABLES ++ + config NF_FLOW_TABLE + tristate "Netfilter flow table module" + depends on NETFILTER_INGRESS + depends on NF_CONNTRACK +- depends on NF_TABLES + help + This option adds the flow table core infrastructure. + +@@ -968,6 +967,15 @@ config NETFILTER_XT_TARGET_NOTRACK + depends on NETFILTER_ADVANCED + select NETFILTER_XT_TARGET_CT + ++config NETFILTER_XT_TARGET_FLOWOFFLOAD ++ tristate '"FLOWOFFLOAD" target support' ++ depends on NF_FLOW_TABLE ++ depends on NETFILTER_INGRESS ++ help ++ This option adds a `FLOWOFFLOAD' target, which uses the nf_flow_offload ++ module to speed up processing of packets by bypassing the usual ++ netfilter chains ++ + config NETFILTER_XT_TARGET_RATEEST + tristate '"RATEEST" target support' + depends on NETFILTER_ADVANCED +--- a/net/netfilter/Makefile ++++ b/net/netfilter/Makefile +@@ -134,6 +134,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIF + obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o + obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o + obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o ++obj-$(CONFIG_NETFILTER_XT_TARGET_FLOWOFFLOAD) += xt_FLOWOFFLOAD.o + obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o + obj-$(CONFIG_NETFILTER_XT_TARGET_HMARK) += xt_HMARK.o + obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o +--- /dev/null ++++ b/net/netfilter/xt_FLOWOFFLOAD.c +@@ -0,0 +1,335 @@ ++/* ++ * Copyright (C) 2018 Felix Fietkau ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static struct nf_flowtable nf_flowtable; ++static HLIST_HEAD(hooks); ++static DEFINE_SPINLOCK(hooks_lock); ++static struct delayed_work hook_work; ++ ++struct xt_flowoffload_hook { ++ struct hlist_node list; ++ struct nf_hook_ops ops; ++ bool registered; ++ bool used; ++}; ++ ++static unsigned int ++xt_flowoffload_net_hook(void *priv, struct sk_buff *skb, ++ const struct nf_hook_state *state) ++{ ++ switch (skb->protocol) { ++ case htons(ETH_P_IP): ++ return nf_flow_offload_ip_hook(priv, skb, state); ++ case htons(ETH_P_IPV6): ++ return nf_flow_offload_ipv6_hook(priv, skb, state); ++ } ++ ++ return NF_ACCEPT; ++} ++ ++static int ++xt_flowoffload_create_hook(struct net_device *dev) ++{ ++ struct xt_flowoffload_hook *hook; ++ struct nf_hook_ops *ops; ++ ++ hook = kzalloc(sizeof(*hook), GFP_ATOMIC); ++ if (!hook) ++ return -ENOMEM; ++ ++ ops = &hook->ops; ++ ops->pf = NFPROTO_NETDEV; ++ ops->hooknum = NF_NETDEV_INGRESS; ++ ops->priority = 10; ++ ops->priv = &nf_flowtable; ++ ops->hook = xt_flowoffload_net_hook; ++ ops->dev = dev; ++ ++ hlist_add_head(&hook->list, &hooks); ++ mod_delayed_work(system_power_efficient_wq, &hook_work, 0); ++ ++ return 0; ++} ++ ++static struct xt_flowoffload_hook * ++flow_offload_lookup_hook(struct net_device *dev) ++{ ++ struct xt_flowoffload_hook *hook; ++ ++ hlist_for_each_entry(hook, &hooks, list) { ++ if (hook->ops.dev == dev) ++ return hook; ++ } ++ ++ return NULL; ++} ++ ++static void ++xt_flowoffload_check_device(struct net_device *dev) ++{ ++ struct xt_flowoffload_hook *hook; ++ ++ spin_lock_bh(&hooks_lock); ++ hook = flow_offload_lookup_hook(dev); ++ if (hook) ++ hook->used = true; ++ else ++ xt_flowoffload_create_hook(dev); ++ spin_unlock_bh(&hooks_lock); ++} ++ ++static void ++xt_flowoffload_register_hooks(void) ++{ ++ struct xt_flowoffload_hook *hook; ++ ++restart: ++ hlist_for_each_entry(hook, &hooks, list) { ++ if (hook->registered) ++ continue; ++ ++ hook->registered = true; ++ spin_unlock_bh(&hooks_lock); ++ nf_register_net_hook(dev_net(hook->ops.dev), &hook->ops); ++ spin_lock_bh(&hooks_lock); ++ goto restart; ++ } ++ ++} ++ ++static void ++xt_flowoffload_cleanup_hooks(void) ++{ ++ struct xt_flowoffload_hook *hook; ++ ++restart: ++ hlist_for_each_entry(hook, &hooks, list) { ++ if (hook->used) ++ continue; ++ ++ hlist_del(&hook->list); ++ spin_unlock_bh(&hooks_lock); ++ nf_unregister_net_hook(dev_net(hook->ops.dev), &hook->ops); ++ kfree(hook); ++ spin_lock_bh(&hooks_lock); ++ goto restart; ++ } ++ ++} ++ ++static void ++xt_flowoffload_check_hook(struct flow_offload *flow, void *data) ++{ ++ struct flow_offload_tuple *tuple = &flow->tuplehash[0].tuple; ++ struct xt_flowoffload_hook *hook; ++ bool *found = data; ++ ++ spin_lock_bh(&hooks_lock); ++ hlist_for_each_entry(hook, &hooks, list) { ++ if (hook->ops.dev->ifindex != tuple->iifidx && ++ hook->ops.dev->ifindex != tuple->oifidx) ++ continue; ++ ++ hook->used = true; ++ *found = true; ++ } ++ spin_unlock_bh(&hooks_lock); ++} ++ ++static void ++xt_flowoffload_hook_work(struct work_struct *work) ++{ ++ struct xt_flowoffload_hook *hook; ++ bool found = false; ++ int err; ++ ++ spin_lock_bh(&hooks_lock); ++ xt_flowoffload_register_hooks(); ++ hlist_for_each_entry(hook, &hooks, list) ++ hook->used = false; ++ spin_unlock_bh(&hooks_lock); ++ ++ err = nf_flow_table_iterate(&nf_flowtable, xt_flowoffload_check_hook, ++ &found); ++ if (err && err != -EAGAIN) ++ goto out; ++ ++ spin_lock_bh(&hooks_lock); ++ xt_flowoffload_cleanup_hooks(); ++ spin_unlock_bh(&hooks_lock); ++ ++out: ++ if (found) ++ queue_delayed_work(system_power_efficient_wq, &hook_work, HZ); ++} ++ ++static bool ++xt_flowoffload_skip(struct sk_buff *skb) ++{ ++ struct ip_options *opt = &(IPCB(skb)->opt); ++ ++ if (unlikely(opt->optlen)) ++ return true; ++ if (skb_sec_path(skb)) ++ return true; ++ ++ return false; ++} ++ ++static int ++xt_flowoffload_route(struct sk_buff *skb, const struct nf_conn *ct, ++ const struct xt_action_param *par, ++ struct nf_flow_route *route, enum ip_conntrack_dir dir) ++{ ++ struct dst_entry *this_dst = skb_dst(skb); ++ struct dst_entry *other_dst = NULL; ++ struct flowi fl; ++ ++ memset(&fl, 0, sizeof(fl)); ++ switch (xt_family(par)) { ++ case NFPROTO_IPV4: ++ fl.u.ip4.daddr = ct->tuplehash[!dir].tuple.dst.u3.ip; ++ break; ++ case NFPROTO_IPV6: ++ fl.u.ip6.daddr = ct->tuplehash[!dir].tuple.dst.u3.in6; ++ break; ++ } ++ ++ nf_route(xt_net(par), &other_dst, &fl, false, xt_family(par)); ++ if (!other_dst) ++ return -ENOENT; ++ ++ route->tuple[dir].dst = this_dst; ++ route->tuple[dir].ifindex = xt_in(par)->ifindex; ++ route->tuple[!dir].dst = other_dst; ++ route->tuple[!dir].ifindex = xt_out(par)->ifindex; ++ ++ return 0; ++} ++ ++static unsigned int ++flowoffload_tg(struct sk_buff *skb, const struct xt_action_param *par) ++{ ++ enum ip_conntrack_info ctinfo; ++ enum ip_conntrack_dir dir; ++ struct nf_flow_route route; ++ struct flow_offload *flow; ++ struct nf_conn *ct; ++ ++ if (xt_flowoffload_skip(skb)) ++ return XT_CONTINUE; ++ ++ ct = nf_ct_get(skb, &ctinfo); ++ if (ct == NULL) ++ return XT_CONTINUE; ++ ++ switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) { ++ case IPPROTO_TCP: ++ case IPPROTO_UDP: ++ break; ++ default: ++ return XT_CONTINUE; ++ } ++ ++ if (test_bit(IPS_HELPER_BIT, &ct->status)) ++ return XT_CONTINUE; ++ ++ if (ctinfo == IP_CT_NEW || ++ ctinfo == IP_CT_RELATED) ++ return XT_CONTINUE; ++ ++ if (!xt_in(par) || !xt_out(par)) ++ return XT_CONTINUE; ++ ++ if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status)) ++ return XT_CONTINUE; ++ ++ dir = CTINFO2DIR(ctinfo); ++ ++ if (xt_flowoffload_route(skb, ct, par, &route, dir) < 0) ++ goto err_flow_route; ++ ++ flow = flow_offload_alloc(ct, &route); ++ if (!flow) ++ goto err_flow_alloc; ++ ++ if (flow_offload_add(&nf_flowtable, flow) < 0) ++ goto err_flow_add; ++ ++ xt_flowoffload_check_device(xt_in(par)); ++ xt_flowoffload_check_device(xt_out(par)); ++ ++ return XT_CONTINUE; ++ ++err_flow_add: ++ flow_offload_free(flow); ++err_flow_alloc: ++ dst_release(route.tuple[!dir].dst); ++err_flow_route: ++ clear_bit(IPS_OFFLOAD_BIT, &ct->status); ++ return XT_CONTINUE; ++} ++ ++ ++static int flowoffload_chk(const struct xt_tgchk_param *par) ++{ ++ return 0; ++} ++ ++static struct xt_target offload_tg_reg __read_mostly = { ++ .family = NFPROTO_UNSPEC, ++ .name = "FLOWOFFLOAD", ++ .revision = 0, ++ .checkentry = flowoffload_chk, ++ .target = flowoffload_tg, ++ .me = THIS_MODULE, ++}; ++ ++static int xt_flowoffload_table_init(struct nf_flowtable *table) ++{ ++ nf_flow_table_init(table); ++ return 0; ++} ++ ++static void xt_flowoffload_table_cleanup(struct nf_flowtable *table) ++{ ++ nf_flow_table_free(table); ++} ++ ++static int __init xt_flowoffload_tg_init(void) ++{ ++ int ret; ++ ++ INIT_DELAYED_WORK(&hook_work, xt_flowoffload_hook_work); ++ ++ ret = xt_flowoffload_table_init(&nf_flowtable); ++ if (ret) ++ return ret; ++ ++ ret = xt_register_target(&offload_tg_reg); ++ if (ret) ++ xt_flowoffload_table_cleanup(&nf_flowtable); ++ ++ return ret; ++} ++ ++static void __exit xt_flowoffload_tg_exit(void) ++{ ++ xt_unregister_target(&offload_tg_reg); ++ xt_flowoffload_table_cleanup(&nf_flowtable); ++} ++ ++MODULE_LICENSE("GPL"); ++module_init(xt_flowoffload_tg_init); ++module_exit(xt_flowoffload_tg_exit); +--- a/net/netfilter/nf_flow_table_core.c ++++ b/net/netfilter/nf_flow_table_core.c +@@ -6,7 +6,6 @@ + #include + #include + #include +-#include + #include + #include + #include