+++ /dev/null
-From: Felix Fietkau <nbd@nbd.name>
-Date: Thu, 15 Mar 2018 18:21:43 +0100
-Subject: [PATCH] netfilter: nf_flow_table: clean up and fix dst handling
-
-dst handling in the code is inconsistent and possibly wrong. In my test,
-skb_dst(skb) holds the dst entry after routing but before NAT, so the
-code could possibly return the same dst entry for both directions of a
-connection.
-Additionally, there was some confusion over the dst entry vs the address
-passed as parameter to rt_nexthop/rt6_nexthop.
-
-Do an explicit dst lookup for both ends of the connection and always use
-the source address for it. When running the IP hook, use the dst entry
-for the opposite direction for determining the route.
-
-Signed-off-by: Felix Fietkau <nbd@nbd.name>
----
-
---- a/net/netfilter/nf_flow_table_ip.c
-+++ b/net/netfilter/nf_flow_table_ip.c
-@@ -238,7 +238,7 @@ nf_flow_offload_ip_hook(void *priv, stru
-
- dir = tuplehash->tuple.dir;
- flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
-- rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
-+ rt = (const struct rtable *)flow->tuplehash[!dir].tuple.dst_cache;
-
- if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)) &&
- (ip_hdr(skb)->frag_off & htons(IP_DF)) != 0)
-@@ -455,7 +455,7 @@ nf_flow_offload_ipv6_hook(void *priv, st
-
- dir = tuplehash->tuple.dir;
- flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
-- rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
-+ rt = (struct rt6_info *)flow->tuplehash[!dir].tuple.dst_cache;
-
- if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
- return NF_ACCEPT;
---- a/net/netfilter/nft_flow_offload.c
-+++ b/net/netfilter/nft_flow_offload.c
-@@ -17,27 +17,38 @@ struct nft_flow_offload {
- struct nft_flowtable *flowtable;
- };
-
--static int nft_flow_route(const struct nft_pktinfo *pkt,
-- const struct nf_conn *ct,
-- struct nf_flow_route *route,
-- enum ip_conntrack_dir dir)
-+static struct dst_entry *
-+nft_flow_dst(const struct nf_conn *ct, enum ip_conntrack_dir dir,
-+ const struct nft_pktinfo *pkt)
- {
-- struct dst_entry *this_dst = skb_dst(pkt->skb);
-- struct dst_entry *other_dst = NULL;
-+ struct dst_entry *dst;
- struct flowi fl;
-
- memset(&fl, 0, sizeof(fl));
- switch (nft_pf(pkt)) {
- case NFPROTO_IPV4:
-- fl.u.ip4.daddr = ct->tuplehash[!dir].tuple.dst.u3.ip;
-+ fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip;
- break;
- case NFPROTO_IPV6:
-- fl.u.ip6.daddr = ct->tuplehash[!dir].tuple.dst.u3.in6;
-+ fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6;
- break;
- }
-
-- nf_route(nft_net(pkt), &other_dst, &fl, false, nft_pf(pkt));
-- if (!other_dst)
-+ nf_route(nft_net(pkt), &dst, &fl, false, nft_pf(pkt));
-+
-+ return dst;
-+}
-+
-+static int nft_flow_route(const struct nft_pktinfo *pkt,
-+ const struct nf_conn *ct,
-+ struct nf_flow_route *route,
-+ enum ip_conntrack_dir dir)
-+{
-+ struct dst_entry *this_dst, *other_dst;
-+
-+ this_dst = nft_flow_dst(ct, dir, pkt);
-+ other_dst = nft_flow_dst(ct, !dir, pkt);
-+ if (!this_dst || !other_dst)
- return -ENOENT;
-
- route->tuple[dir].dst = this_dst;
--- /dev/null
+From: wenxu <wenxu@ucloud.cn>
+Date: Wed, 9 Jan 2019 10:40:11 +0800
+Subject: [PATCH] netfilter: nft_flow_offload: Fix reverse route lookup
+
+Using the following example:
+
+ client 1.1.1.7 ---> 2.2.2.7 which dnat to 10.0.0.7 server
+
+The first reply packet (ie. syn+ack) uses an incorrect destination
+address for the reverse route lookup since it uses:
+
+ daddr = ct->tuplehash[!dir].tuple.dst.u3.ip;
+
+which is 2.2.2.7 in the scenario that is described above, while this
+should be:
+
+ daddr = ct->tuplehash[dir].tuple.src.u3.ip;
+
+that is 10.0.0.7.
+
+Signed-off-by: wenxu <wenxu@ucloud.cn>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+
+--- a/net/netfilter/nft_flow_offload.c
++++ b/net/netfilter/nft_flow_offload.c
+@@ -29,10 +29,10 @@ static int nft_flow_route(const struct n
+ memset(&fl, 0, sizeof(fl));
+ switch (nft_pf(pkt)) {
+ case NFPROTO_IPV4:
+- fl.u.ip4.daddr = ct->tuplehash[!dir].tuple.dst.u3.ip;
++ fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip;
+ break;
+ case NFPROTO_IPV6:
+- fl.u.ip6.daddr = ct->tuplehash[!dir].tuple.dst.u3.in6;
++ fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6;
+ break;
+ }
+
dir = tuplehash->tuple.dir;
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
-- rt = (const struct rtable *)flow->tuplehash[!dir].tuple.dst_cache;
-+ rt = (struct rtable *)flow->tuplehash[!dir].tuple.dst_cache;
+- rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
++ rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)) &&
(ip_hdr(skb)->frag_off & htons(IP_DF)) != 0)
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
-@@ -28,9 +28,11 @@ nft_flow_dst(const struct nf_conn *ct, e
+@@ -30,9 +30,11 @@ static int nft_flow_route(const struct n
switch (nft_pf(pkt)) {
case NFPROTO_IPV4:
fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip;
break;
}
-@@ -52,9 +54,7 @@ static int nft_flow_route(const struct n
+@@ -41,9 +43,7 @@ static int nft_flow_route(const struct n
return -ENOENT;
route->tuple[dir].dst = this_dst;
+++ /dev/null
-From: Felix Fietkau <nbd@nbd.name>
-Date: Thu, 15 Mar 2018 18:21:43 +0100
-Subject: [PATCH] netfilter: nf_flow_table: clean up and fix dst handling
-
-dst handling in the code is inconsistent and possibly wrong. In my test,
-skb_dst(skb) holds the dst entry after routing but before NAT, so the
-code could possibly return the same dst entry for both directions of a
-connection.
-Additionally, there was some confusion over the dst entry vs the address
-passed as parameter to rt_nexthop/rt6_nexthop.
-
-Do an explicit dst lookup for both ends of the connection and always use
-the source address for it. When running the IP hook, use the dst entry
-for the opposite direction for determining the route.
-
-Signed-off-by: Felix Fietkau <nbd@nbd.name>
----
-
---- a/net/netfilter/nf_flow_table_ip.c
-+++ b/net/netfilter/nf_flow_table_ip.c
-@@ -244,7 +244,7 @@ nf_flow_offload_ip_hook(void *priv, stru
-
- dir = tuplehash->tuple.dir;
- flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
-- rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
-+ rt = (struct rtable *)flow->tuplehash[!dir].tuple.dst_cache;
-
- if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
- return NF_ACCEPT;
-@@ -464,7 +464,7 @@ nf_flow_offload_ipv6_hook(void *priv, st
-
- dir = tuplehash->tuple.dir;
- flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
-- rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
-+ rt = (struct rt6_info *)flow->tuplehash[!dir].tuple.dst_cache;
-
- if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
- return NF_ACCEPT;
---- a/net/netfilter/nft_flow_offload.c
-+++ b/net/netfilter/nft_flow_offload.c
-@@ -17,13 +17,11 @@ struct nft_flow_offload {
- struct nft_flowtable *flowtable;
- };
-
--static int nft_flow_route(const struct nft_pktinfo *pkt,
-- const struct nf_conn *ct,
-- struct nf_flow_route *route,
-- enum ip_conntrack_dir dir)
-+static struct dst_entry *
-+nft_flow_dst(const struct nf_conn *ct, enum ip_conntrack_dir dir,
-+ const struct nft_pktinfo *pkt)
- {
-- struct dst_entry *this_dst = skb_dst(pkt->skb);
-- struct dst_entry *other_dst = NULL;
-+ struct dst_entry *dst;
- struct flowi fl;
-
- memset(&fl, 0, sizeof(fl));
-@@ -38,8 +36,21 @@ static int nft_flow_route(const struct n
- break;
- }
-
-- nf_route(nft_net(pkt), &other_dst, &fl, false, nft_pf(pkt));
-- if (!other_dst)
-+ nf_route(nft_net(pkt), &dst, &fl, false, nft_pf(pkt));
-+
-+ return dst;
-+}
-+
-+static int nft_flow_route(const struct nft_pktinfo *pkt,
-+ const struct nf_conn *ct,
-+ struct nf_flow_route *route,
-+ enum ip_conntrack_dir dir)
-+{
-+ struct dst_entry *this_dst, *other_dst;
-+
-+ this_dst = nft_flow_dst(ct, dir, pkt);
-+ other_dst = nft_flow_dst(ct, !dir, pkt);
-+ if (!this_dst || !other_dst)
- return -ENOENT;
-
- route->tuple[dir].dst = this_dst;
obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
--- /dev/null
+++ b/net/netfilter/xt_FLOWOFFLOAD.c
-@@ -0,0 +1,419 @@
+@@ -0,0 +1,421 @@
+/*
+ * Copyright (C) 2018 Felix Fietkau <nbd@nbd.name>
+ *
+ switch (xt_family(par)) {
+ case NFPROTO_IPV4:
+ fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip;
++ fl.u.ip4.flowi4_oif = ifindex;
+ break;
+ case NFPROTO_IPV6:
+ fl.u.ip6.saddr = ct->tuplehash[dir].tuple.dst.u3.in6;
+ fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6;
++ fl.u.ip6.flowi6_oif = ifindex;
+ break;
+ }
+
obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
--- /dev/null
+++ b/net/netfilter/xt_FLOWOFFLOAD.c
-@@ -0,0 +1,387 @@
+@@ -0,0 +1,421 @@
+/*
+ * Copyright (C) 2018 Felix Fietkau <nbd@nbd.name>
+ *
+ switch (xt_family(par)) {
+ case NFPROTO_IPV4:
+ fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip;
++ fl.u.ip4.flowi4_oif = ifindex;
+ break;
+ case NFPROTO_IPV6:
+ fl.u.ip6.saddr = ct->tuplehash[dir].tuple.dst.u3.in6;
+ fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6;
++ fl.u.ip6.flowi6_oif = ifindex;
+ break;
+ }
+
+ nf_flow_table_free(table);
+}
+
++static int flow_offload_netdev_event(struct notifier_block *this,
++ unsigned long event, void *ptr)
++{
++ struct xt_flowoffload_hook *hook = NULL;
++ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
++
++ if (event != NETDEV_UNREGISTER)
++ return NOTIFY_DONE;
++
++ spin_lock_bh(&hooks_lock);
++ hook = flow_offload_lookup_hook(dev);
++ if (hook) {
++ hlist_del(&hook->list);
++ }
++ spin_unlock_bh(&hooks_lock);
++ if (hook) {
++ nf_unregister_net_hook(hook->net, &hook->ops);
++ kfree(hook);
++ }
++
++ nf_flow_table_cleanup(dev_net(dev), dev);
++
++ return NOTIFY_DONE;
++}
++
++static struct notifier_block flow_offload_netdev_notifier = {
++ .notifier_call = flow_offload_netdev_event,
++};
++
+static int __init xt_flowoffload_tg_init(void)
+{
+ int ret;
+
++ register_netdevice_notifier(&flow_offload_netdev_notifier);
++
+ INIT_DELAYED_WORK(&hook_work, xt_flowoffload_hook_work);
+
+ ret = xt_flowoffload_table_init(&nf_flowtable);
+{
+ xt_unregister_target(&offload_tg_reg);
+ xt_flowoffload_table_cleanup(&nf_flowtable);
++ unregister_netdevice_notifier(&flow_offload_netdev_notifier);
+}
+
+MODULE_LICENSE("GPL");
+++ /dev/null
-From ae56e27e30122f82d244f9eb35fcab8fa60e0d31 Mon Sep 17 00:00:00 2001
-From: Chen Minqiang <ptpt52@gmail.com>
-Date: Sun, 29 Apr 2018 14:08:57 +0800
-Subject: [PATCH] cleanup offload hooks on netdev unregister
-
-This should fix crashdump on reboot when FLOWOFFLOAD enabled
-
-kmsg:
-[ 84.188081] Workqueue: events_power_efficient xt_flowoffload_hook_work [xt_FLOWOFFLOAD]
-[ 84.209326] task: ffff88000ecd0c80 task.stack: ffffc90000068000
-[ 84.224706] RIP: 0010:__nf_unregister_net_hook+0x1/0x90
-[ 84.242911] RSP: 0018:ffffc9000006be30 EFLAGS: 00010202
-[ 84.257405] RAX: 0000000000000000 RBX: ffff88000c5b3228 RCX: 0000000100170001
-[ 84.292175] RDX: ffff88000ecd0c80 RSI: ffff88000c5b3228 RDI: 6b6b6b6b6b6b6b6b
-[ 84.305095] RBP: ffffc9000006be58 R08: ffff88000c5b3578 R09: ffff88000c5b3538
-[ 84.325980] R10: ffffc9000006be50 R11: ffff88000fc1f310 R12: ffffffff81e6c580
-[ 84.396514] R13: ffff88000d1723d0 R14: ffff88000ec0fc00 R15: 0000000000000000
-[ 84.459500] FS: 0000000000000000(0000) GS:ffff88000fc00000(0000) knlGS:0000000000000000
-[ 84.525121] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
-[ 84.565460] CR2: 0000000000a931d8 CR3: 0000000001e08006 CR4: 00000000000606f0
-[ 84.638311] Call Trace:
-[ 84.655229] ? nf_unregister_net_hook+0x88/0xd0
-[ 84.706898] xt_flowoffload_hook_work+0x12a/0x17a [xt_FLOWOFFLOAD]
-[ 84.765504] process_one_work+0x1c4/0x310
-[ 84.799558] worker_thread+0x20b/0x3c0
-[ 84.850119] kthread+0x112/0x120
-[ 84.884839] ? process_one_work+0x310/0x310
-[ 84.923571] ? kthread_create_on_node+0x40/0x40
-[ 84.966100] ret_from_fork+0x35/0x40
-[ 84.981738] Code: 41 5c 41 5d 41 5e 41 5f 5d c3 48 8b 05 c1 f1 99 00 55 48 89 e5 48 85 c0 75 02 0f 0b e8 b9 f6 30 00 5d c3 0f 1f 80 00 00 00 00 55 <0f> b7 0f 48 89 e5 48 89 c8 48 c1 e0 04 48 8d 54 07 08 31 c0 eb
-[ 85.100453] RIP: __nf_unregister_net_hook+0x1/0x90 RSP: ffffc9000006be30
-[ 85.111658] ---[ end trace 5c25a390045cac75 ]---
-[ 85.124535] Kernel panic - not syncing: Fatal exception
-
-Signed-off-by: Chen Minqiang <ptpt52@gmail.com>
----
- net/netfilter/xt_FLOWOFFLOAD.c | 32 ++++++++++++++++++++++++++++++++
- 1 file changed, 32 insertions(+)
-
---- a/net/netfilter/xt_FLOWOFFLOAD.c
-+++ b/net/netfilter/xt_FLOWOFFLOAD.c
-@@ -343,10 +343,41 @@ static void xt_flowoffload_table_cleanup
- nf_flow_table_free(table);
- }
-
-+static int flow_offload_netdev_event(struct notifier_block *this,
-+ unsigned long event, void *ptr)
-+{
-+ struct xt_flowoffload_hook *hook = NULL;
-+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-+
-+ if (event != NETDEV_UNREGISTER)
-+ return NOTIFY_DONE;
-+
-+ spin_lock_bh(&hooks_lock);
-+ hook = flow_offload_lookup_hook(dev);
-+ if (hook) {
-+ hlist_del(&hook->list);
-+ }
-+ spin_unlock_bh(&hooks_lock);
-+ if (hook) {
-+ nf_unregister_net_hook(hook->net, &hook->ops);
-+ kfree(hook);
-+ }
-+
-+ nf_flow_table_cleanup(dev_net(dev), dev);
-+
-+ return NOTIFY_DONE;
-+}
-+
-+static struct notifier_block flow_offload_netdev_notifier = {
-+ .notifier_call = flow_offload_netdev_event,
-+};
-+
- static int __init xt_flowoffload_tg_init(void)
- {
- int ret;
-
-+ register_netdevice_notifier(&flow_offload_netdev_notifier);
-+
- INIT_DELAYED_WORK(&hook_work, xt_flowoffload_hook_work);
-
- ret = xt_flowoffload_table_init(&nf_flowtable);
-@@ -364,6 +395,7 @@ static void __exit xt_flowoffload_tg_exi
- {
- xt_unregister_target(&offload_tg_reg);
- xt_flowoffload_table_cleanup(&nf_flowtable);
-+ unregister_netdevice_notifier(&flow_offload_netdev_notifier);
- }
-
- MODULE_LICENSE("GPL");
struct flow_offload {
struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX];
-@@ -126,6 +133,22 @@ unsigned int nf_flow_offload_ip_hook(voi
+@@ -125,6 +132,22 @@ unsigned int nf_flow_offload_ip_hook(voi
unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state);
obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
-@@ -218,10 +218,16 @@ int flow_offload_add(struct nf_flowtable
+@@ -219,10 +219,16 @@ int flow_offload_add(struct nf_flowtable
}
EXPORT_SYMBOL_GPL(flow_offload_add);
rhashtable_remove_fast(&flow_table->rhashtable,
&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
-@@ -236,6 +242,9 @@ static void flow_offload_del(struct nf_f
+@@ -237,6 +243,9 @@ static void flow_offload_del(struct nf_f
if (!(flow->flags & FLOW_OFFLOAD_TEARDOWN))
flow_offload_fixup_ct_state(e->ct);
flow_offload_free(flow);
}
-@@ -349,6 +358,9 @@ static int nf_flow_offload_gc_step(struc
+@@ -350,6 +359,9 @@ static int nf_flow_offload_gc_step(struc
if (!teardown)
nf_ct_offload_timeout(flow);
if (nf_flow_has_expired(flow) || teardown)
flow_offload_del(flow_table, flow);
}
-@@ -484,10 +496,43 @@ int nf_flow_dnat_port(const struct flow_
+@@ -485,10 +497,43 @@ int nf_flow_dnat_port(const struct flow_
}
EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
err = rhashtable_init(&flowtable->rhashtable,
-@@ -525,6 +570,8 @@ static void nf_flow_table_iterate_cleanu
+@@ -526,6 +571,8 @@ static void nf_flow_table_iterate_cleanu
{
nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev);
flush_delayed_work(&flowtable->gc_work);
}
void nf_flow_table_cleanup(struct net *net, struct net_device *dev)
-@@ -538,6 +585,26 @@ void nf_flow_table_cleanup(struct net *n
+@@ -539,6 +586,26 @@ void nf_flow_table_cleanup(struct net *n
}
EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
void nf_flow_table_free(struct nf_flowtable *flow_table)
{
mutex_lock(&flowtable_lock);
-@@ -547,9 +614,58 @@ void nf_flow_table_free(struct nf_flowta
+@@ -548,9 +615,58 @@ void nf_flow_table_free(struct nf_flowta
nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
WARN_ON(!nf_flow_offload_gc_step(flow_table));
rhashtable_destroy(&flow_table->rhashtable);
nest = nla_nest_start(skb, NFTA_FLOWTABLE_HOOK);
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
-@@ -121,6 +121,9 @@ static void nft_flow_offload_eval(const
+@@ -110,6 +110,9 @@ static void nft_flow_offload_eval(const
if (ret < 0)
goto err_flow_add;
struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX];
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
-@@ -358,7 +358,7 @@ static int nf_flow_offload_gc_step(struc
+@@ -359,7 +359,7 @@ static int nf_flow_offload_gc_step(struc
if (!teardown)
nf_ct_offload_timeout(flow);
nest = nla_nest_start(skb, NFTA_FLOWTABLE_HOOK);
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
-@@ -138,6 +138,9 @@ static void nft_flow_offload_eval(const
+@@ -127,6 +127,9 @@ static void nft_flow_offload_eval(const
if (ret < 0)
goto err_flow_add;