kernel: Backport upstream flowtable patches from 5.15
authorHauke Mehrtens <hauke@hauke-m.de>
Sat, 5 Mar 2022 18:08:27 +0000 (19:08 +0100)
committerHauke Mehrtens <hauke@hauke-m.de>
Sat, 13 Aug 2022 22:29:20 +0000 (00:29 +0200)
This backports some patches from kernel 5.15 to fix issues with
flowtable offloading in kernel 5.10. OpenWrt backports most of the
patches related to flowtable offloading from kernel 5.15 already, but we
are missing some of the extra fixes.

This fixes some connection tracking problems when a flow gets removed
from the offload and added to the normal SW path again.

The patch 614-v5.18-netfilter-flowtable-fix-TCP-flow-teardown.patch was
extended manually with the nf_conntrack_tcp_established() function.

All changes are already included in kernel 5.15.

Fixes: #8776
Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
(cherry picked from commit 96ef2dabce1a5f102d53a15f33383193b47fd297)

target/linux/generic/backport-5.10/610-v5.13-58-netfilter-flowtable-Add-FLOW_OFFLOAD_XMIT_UNSPEC-xmi.patch [new file with mode: 0644]
target/linux/generic/backport-5.10/610-v5.15-58-netfilter-flowtable-avoid-possible-false-sharing.patch
target/linux/generic/backport-5.10/610-v5.18-netfilter-flowtable-move-dst_check-to-packet-path.patch
target/linux/generic/backport-5.10/613-v5.15-01-netfilter-flowtable-remove-nf_ct_l4proto_find-call.patch [new file with mode: 0644]
target/linux/generic/backport-5.10/613-v5.15-02-netfilter-conntrack-remove-offload_pickup-sysctl-aga.patch [new file with mode: 0644]
target/linux/generic/backport-5.10/614-v5.18-netfilter-flowtable-fix-TCP-flow-teardown.patch [new file with mode: 0644]
target/linux/generic/hack-5.10/650-netfilter-add-xt_FLOWOFFLOAD-target.patch
target/linux/generic/pending-5.10/613-netfilter_optional_tcp_window_check.patch
target/linux/generic/pending-5.10/704-00-netfilter-flowtable-fix-excessive-hw-offload-attempt.patch

diff --git a/target/linux/generic/backport-5.10/610-v5.13-58-netfilter-flowtable-Add-FLOW_OFFLOAD_XMIT_UNSPEC-xmi.patch b/target/linux/generic/backport-5.10/610-v5.13-58-netfilter-flowtable-Add-FLOW_OFFLOAD_XMIT_UNSPEC-xmi.patch
new file mode 100644 (file)
index 0000000..62edb2c
--- /dev/null
@@ -0,0 +1,41 @@
+From 78ed0a9bc6db76f8e5f5f4cb0d2b2f0d1bb21b24 Mon Sep 17 00:00:00 2001
+From: Roi Dayan <roid@nvidia.com>
+Date: Tue, 13 Apr 2021 11:06:05 +0300
+Subject: [PATCH] netfilter: flowtable: Add FLOW_OFFLOAD_XMIT_UNSPEC xmit type
+
+It could be xmit type was not set and would default to FLOW_OFFLOAD_XMIT_NEIGH
+and in this type the gc expect to have a route info.
+Fix that by adding FLOW_OFFLOAD_XMIT_UNSPEC which defaults to 0.
+
+Fixes: 8b9229d15877 ("netfilter: flowtable: dst_check() from garbage collector path")
+Signed-off-by: Roi Dayan <roid@nvidia.com>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+ include/net/netfilter/nf_flow_table.h | 3 ++-
+ net/netfilter/nf_flow_table_core.c    | 3 +++
+ 2 files changed, 5 insertions(+), 1 deletion(-)
+
+--- a/include/net/netfilter/nf_flow_table.h
++++ b/include/net/netfilter/nf_flow_table.h
+@@ -90,7 +90,8 @@ enum flow_offload_tuple_dir {
+ #define FLOW_OFFLOAD_DIR_MAX  IP_CT_DIR_MAX
+ enum flow_offload_xmit_type {
+-      FLOW_OFFLOAD_XMIT_NEIGH         = 0,
++      FLOW_OFFLOAD_XMIT_UNSPEC        = 0,
++      FLOW_OFFLOAD_XMIT_NEIGH,
+       FLOW_OFFLOAD_XMIT_XFRM,
+       FLOW_OFFLOAD_XMIT_DIRECT,
+ };
+--- a/net/netfilter/nf_flow_table_core.c
++++ b/net/netfilter/nf_flow_table_core.c
+@@ -130,6 +130,9 @@ static int flow_offload_fill_route(struc
+               flow_tuple->dst_cache = dst;
+               flow_tuple->dst_cookie = flow_offload_dst_cookie(flow_tuple);
+               break;
++      default:
++              WARN_ON_ONCE(1);
++              break;
+       }
+       flow_tuple->xmit_type = route->tuple[dir].xmit_type;
index 69c06c51d8b80473b7d19876a778b74a1c19a8fe..a3d0a359231bee849b3f499a31fbd105783999b4 100644 (file)
@@ -12,7 +12,7 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
 
 --- a/net/netfilter/nf_flow_table_core.c
 +++ b/net/netfilter/nf_flow_table_core.c
-@@ -328,7 +328,11 @@ EXPORT_SYMBOL_GPL(flow_offload_add);
+@@ -331,7 +331,11 @@ EXPORT_SYMBOL_GPL(flow_offload_add);
  void flow_offload_refresh(struct nf_flowtable *flow_table,
                          struct flow_offload *flow)
  {
index 99e40b9378680e1813c2e36402342dbc34d44f2d..53118939a3def0831bda1c1ebedc5b4de10baf8d 100644 (file)
@@ -20,7 +20,7 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
 
 --- a/net/netfilter/nf_flow_table_core.c
 +++ b/net/netfilter/nf_flow_table_core.c
-@@ -433,33 +433,12 @@ nf_flow_table_iterate(struct nf_flowtabl
+@@ -436,33 +436,12 @@ nf_flow_table_iterate(struct nf_flowtabl
        return err;
  }
  
diff --git a/target/linux/generic/backport-5.10/613-v5.15-01-netfilter-flowtable-remove-nf_ct_l4proto_find-call.patch b/target/linux/generic/backport-5.10/613-v5.15-01-netfilter-flowtable-remove-nf_ct_l4proto_find-call.patch
new file mode 100644 (file)
index 0000000..72accec
--- /dev/null
@@ -0,0 +1,47 @@
+From 92fb15513edc6ae1eb51f717e70d4d3d538c2d09 Mon Sep 17 00:00:00 2001
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Mon, 19 Jul 2021 18:04:01 +0200
+Subject: [PATCH] netfilter: flowtable: remove nf_ct_l4proto_find() call
+
+TCP and UDP are built-in conntrack protocol trackers and the flowtable
+only supports for TCP and UDP, remove this call.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+ net/netfilter/nf_flow_table_core.c | 10 ----------
+ 1 file changed, 10 deletions(-)
+
+--- a/net/netfilter/nf_flow_table_core.c
++++ b/net/netfilter/nf_flow_table_core.c
+@@ -180,15 +180,10 @@ static void flow_offload_fixup_tcp(struc
+ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
+ {
+-      const struct nf_conntrack_l4proto *l4proto;
+       struct net *net = nf_ct_net(ct);
+       int l4num = nf_ct_protonum(ct);
+       unsigned int timeout;
+-      l4proto = nf_ct_l4proto_find(l4num);
+-      if (!l4proto)
+-              return;
+-
+       if (l4num == IPPROTO_TCP) {
+               struct nf_tcp_net *tn = nf_tcp_pernet(net);
+@@ -273,15 +268,10 @@ static const struct rhashtable_params nf
+ unsigned long flow_offload_get_timeout(struct flow_offload *flow)
+ {
+-      const struct nf_conntrack_l4proto *l4proto;
+       unsigned long timeout = NF_FLOW_TIMEOUT;
+       struct net *net = nf_ct_net(flow->ct);
+       int l4num = nf_ct_protonum(flow->ct);
+-      l4proto = nf_ct_l4proto_find(l4num);
+-      if (!l4proto)
+-              return timeout;
+-
+       if (l4num == IPPROTO_TCP) {
+               struct nf_tcp_net *tn = nf_tcp_pernet(net);
diff --git a/target/linux/generic/backport-5.10/613-v5.15-02-netfilter-conntrack-remove-offload_pickup-sysctl-aga.patch b/target/linux/generic/backport-5.10/613-v5.15-02-netfilter-conntrack-remove-offload_pickup-sysctl-aga.patch
new file mode 100644 (file)
index 0000000..0ed23e9
--- /dev/null
@@ -0,0 +1,184 @@
+From 4592ee7f525c4683ec9e290381601fdee50ae110 Mon Sep 17 00:00:00 2001
+From: Florian Westphal <fw@strlen.de>
+Date: Wed, 4 Aug 2021 15:02:15 +0200
+Subject: [PATCH] netfilter: conntrack: remove offload_pickup sysctl again
+
+These two sysctls were added because the hardcoded defaults (2 minutes,
+tcp, 30 seconds, udp) turned out to be too low for some setups.
+
+They appeared in 5.14-rc1 so it should be fine to remove it again.
+
+Marcelo convinced me that there should be no difference between a flow
+that was offloaded vs. a flow that was not wrt. timeout handling.
+Thus the default is changed to those for TCP established and UDP stream,
+5 days and 120 seconds, respectively.
+
+Marcelo also suggested to account for the timeout value used for the
+offloading, this avoids increase beyond the value in the conntrack-sysctl
+and will also instantly expire the conntrack entry with altered sysctls.
+
+Example:
+   nf_conntrack_udp_timeout_stream=60
+   nf_flowtable_udp_timeout=60
+
+This will remove offloaded udp flows after one minute, rather than two.
+
+An earlier version of this patch also cleared the ASSURED bit to
+allow nf_conntrack to evict the entry via early_drop (i.e., table full).
+However, it looks like we can safely assume that connection timed out
+via HW is still in established state, so this isn't needed.
+
+Quoting Oz:
+ [..] the hardware sends all packets with a set FIN flags to sw.
+ [..] Connections that are aged in hardware are expected to be in the
+ established state.
+
+In case it turns out that back-to-sw-path transition can occur for
+'dodgy' connections too (e.g., one side disappeared while software-path
+would have been in RETRANS timeout), we can adjust this later.
+
+Cc: Oz Shlomo <ozsh@nvidia.com>
+Cc: Paul Blakey <paulb@nvidia.com>
+Suggested-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Reviewed-by: Oz Shlomo <ozsh@nvidia.com>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+ Documentation/networking/nf_conntrack-sysctl.rst | 10 ----------
+ include/net/netns/conntrack.h                    |  2 --
+ net/netfilter/nf_conntrack_proto_tcp.c           |  1 -
+ net/netfilter/nf_conntrack_proto_udp.c           |  1 -
+ net/netfilter/nf_conntrack_standalone.c          | 16 ----------------
+ net/netfilter/nf_flow_table_core.c               | 11 ++++++++---
+ 6 files changed, 8 insertions(+), 33 deletions(-)
+
+--- a/include/net/netns/conntrack.h
++++ b/include/net/netns/conntrack.h
+@@ -29,7 +29,6 @@ struct nf_tcp_net {
+       int tcp_max_retrans;
+ #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+       unsigned int offload_timeout;
+-      unsigned int offload_pickup;
+ #endif
+ };
+@@ -43,7 +42,6 @@ struct nf_udp_net {
+       unsigned int timeouts[UDP_CT_MAX];
+ #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+       unsigned int offload_timeout;
+-      unsigned int offload_pickup;
+ #endif
+ };
+--- a/net/netfilter/nf_conntrack_proto_tcp.c
++++ b/net/netfilter/nf_conntrack_proto_tcp.c
+@@ -1450,7 +1450,6 @@ void nf_conntrack_tcp_init_net(struct ne
+ #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+       tn->offload_timeout = 30 * HZ;
+-      tn->offload_pickup = 120 * HZ;
+ #endif
+ }
+--- a/net/netfilter/nf_conntrack_proto_udp.c
++++ b/net/netfilter/nf_conntrack_proto_udp.c
+@@ -276,7 +276,6 @@ void nf_conntrack_udp_init_net(struct ne
+ #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+       un->offload_timeout = 30 * HZ;
+-      un->offload_pickup = 30 * HZ;
+ #endif
+ }
+--- a/net/netfilter/nf_conntrack_standalone.c
++++ b/net/netfilter/nf_conntrack_standalone.c
+@@ -569,7 +569,6 @@ enum nf_ct_sysctl_index {
+       NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_UNACK,
+ #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+       NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD,
+-      NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP,
+ #endif
+       NF_SYSCTL_CT_PROTO_TCP_LOOSE,
+       NF_SYSCTL_CT_PROTO_TCP_LIBERAL,
+@@ -578,7 +577,6 @@ enum nf_ct_sysctl_index {
+       NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM,
+ #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+       NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD,
+-      NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP,
+ #endif
+       NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP,
+       NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6,
+@@ -773,12 +771,6 @@ static struct ctl_table nf_ct_sysctl_tab
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_jiffies,
+       },
+-      [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP] = {
+-              .procname       = "nf_flowtable_tcp_pickup",
+-              .maxlen         = sizeof(unsigned int),
+-              .mode           = 0644,
+-              .proc_handler   = proc_dointvec_jiffies,
+-      },
+ #endif
+       [NF_SYSCTL_CT_PROTO_TCP_LOOSE] = {
+               .procname       = "nf_conntrack_tcp_loose",
+@@ -821,12 +813,6 @@ static struct ctl_table nf_ct_sysctl_tab
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_jiffies,
+       },
+-      [NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP] = {
+-              .procname       = "nf_flowtable_udp_pickup",
+-              .maxlen         = sizeof(unsigned int),
+-              .mode           = 0644,
+-              .proc_handler   = proc_dointvec_jiffies,
+-      },
+ #endif
+       [NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP] = {
+               .procname       = "nf_conntrack_icmp_timeout",
+@@ -1006,7 +992,6 @@ static void nf_conntrack_standalone_init
+ #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+       table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD].data = &tn->offload_timeout;
+-      table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP].data = &tn->offload_pickup;
+ #endif
+ }
+@@ -1098,7 +1083,6 @@ static int nf_conntrack_standalone_init_
+       table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM].data = &un->timeouts[UDP_CT_REPLIED];
+ #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+       table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD].data = &un->offload_timeout;
+-      table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP].data = &un->offload_pickup;
+ #endif
+       nf_conntrack_standalone_init_tcp_sysctl(net, table);
+--- a/net/netfilter/nf_flow_table_core.c
++++ b/net/netfilter/nf_flow_table_core.c
+@@ -182,20 +182,25 @@ static void flow_offload_fixup_ct_timeou
+ {
+       struct net *net = nf_ct_net(ct);
+       int l4num = nf_ct_protonum(ct);
+-      unsigned int timeout;
++      s32 timeout;
+       if (l4num == IPPROTO_TCP) {
+               struct nf_tcp_net *tn = nf_tcp_pernet(net);
+-              timeout = tn->offload_pickup;
++              timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
++              timeout -= tn->offload_timeout;
+       } else if (l4num == IPPROTO_UDP) {
+               struct nf_udp_net *tn = nf_udp_pernet(net);
+-              timeout = tn->offload_pickup;
++              timeout = tn->timeouts[UDP_CT_REPLIED];
++              timeout -= tn->offload_timeout;
+       } else {
+               return;
+       }
++      if (timeout < 0)
++              timeout = 0;
++
+       if (nf_flow_timeout_delta(READ_ONCE(ct->timeout)) > (__s32)timeout)
+               WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout);
+ }
diff --git a/target/linux/generic/backport-5.10/614-v5.18-netfilter-flowtable-fix-TCP-flow-teardown.patch b/target/linux/generic/backport-5.10/614-v5.18-netfilter-flowtable-fix-TCP-flow-teardown.patch
new file mode 100644 (file)
index 0000000..1b422ca
--- /dev/null
@@ -0,0 +1,166 @@
+From b8835ba8c029b5c9ada5666754526c2b00f7ea80 Mon Sep 17 00:00:00 2001
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Tue, 17 May 2022 10:44:14 +0200
+Subject: netfilter: flowtable: fix TCP flow teardown
+
+[ Upstream commit e5eaac2beb54f0a16ff851125082d9faeb475572 ]
+
+This patch addresses three possible problems:
+
+1. ct gc may race to undo the timeout adjustment of the packet path, leaving
+   the conntrack entry in place with the internal offload timeout (one day).
+
+2. ct gc removes the ct because the IPS_OFFLOAD_BIT is not set and the CLOSE
+   timeout is reached before the flow offload del.
+
+3. tcp ct is always set to ESTABLISHED with a very long timeout
+   in flow offload teardown/delete even though the state might be already
+   CLOSED. Also as a remark we cannot assume that the FIN or RST packet
+   is hitting flow table teardown as the packet might get bumped to the
+   slow path in nftables.
+
+This patch resets IPS_OFFLOAD_BIT from flow_offload_teardown(), so
+conntrack handles the tcp rst/fin packet which triggers the CLOSE/FIN
+state transition.
+
+Moreover, teturn the connection's ownership to conntrack upon teardown
+by clearing the offload flag and fixing the established timeout value.
+The flow table GC thread will asynchonrnously free the flow table and
+hardware offload entries.
+
+Before this patch, the IPS_OFFLOAD_BIT remained set for expired flows on
+which is also misleading since the flow is back to classic conntrack
+path.
+
+If nf_ct_delete() removes the entry from the conntrack table, then it
+calls nf_ct_put() which decrements the refcnt. This is not a problem
+because the flowtable holds a reference to the conntrack object from
+flow_offload_alloc() path which is released via flow_offload_free().
+
+This patch also updates nft_flow_offload to skip packets in SYN_RECV
+state. Since we might miss or bump packets to slow path, we do not know
+what will happen there while we are still in SYN_RECV, this patch
+postpones offload up to the next packet which also aligns to the
+existing behaviour in tc-ct.
+
+flow_offload_teardown() does not reset the existing tcp state from
+flow_offload_fixup_tcp() to ESTABLISHED anymore, packets bump to slow
+path might have already update the state to CLOSE/FIN.
+
+Joint work with Oz and Sven.
+
+Fixes: 1e5b2471bcc4 ("netfilter: nf_flow_table: teardown flow timeout race")
+Signed-off-by: Oz Shlomo <ozsh@nvidia.com>
+Signed-off-by: Sven Auhagen <sven.auhagen@voleatech.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_flow_table_core.c | 33 +++++++-----------------------
+ net/netfilter/nft_flow_offload.c   |  3 ++-
+ 2 files changed, 9 insertions(+), 27 deletions(-)
+
+--- a/net/netfilter/nf_flow_table_core.c
++++ b/net/netfilter/nf_flow_table_core.c
+@@ -173,12 +173,11 @@ EXPORT_SYMBOL_GPL(flow_offload_route_ini
+ static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
+ {
+-      tcp->state = TCP_CONNTRACK_ESTABLISHED;
+       tcp->seen[0].td_maxwin = 0;
+       tcp->seen[1].td_maxwin = 0;
+ }
+-static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
++static void flow_offload_fixup_ct(struct nf_conn *ct)
+ {
+       struct net *net = nf_ct_net(ct);
+       int l4num = nf_ct_protonum(ct);
+@@ -187,7 +186,9 @@ static void flow_offload_fixup_ct_timeou
+       if (l4num == IPPROTO_TCP) {
+               struct nf_tcp_net *tn = nf_tcp_pernet(net);
+-              timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
++              flow_offload_fixup_tcp(&ct->proto.tcp);
++
++              timeout = tn->timeouts[ct->proto.tcp.state];
+               timeout -= tn->offload_timeout;
+       } else if (l4num == IPPROTO_UDP) {
+               struct nf_udp_net *tn = nf_udp_pernet(net);
+@@ -205,18 +206,6 @@ static void flow_offload_fixup_ct_timeou
+               WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout);
+ }
+-static void flow_offload_fixup_ct_state(struct nf_conn *ct)
+-{
+-      if (nf_ct_protonum(ct) == IPPROTO_TCP)
+-              flow_offload_fixup_tcp(&ct->proto.tcp);
+-}
+-
+-static void flow_offload_fixup_ct(struct nf_conn *ct)
+-{
+-      flow_offload_fixup_ct_state(ct);
+-      flow_offload_fixup_ct_timeout(ct);
+-}
+-
+ static void flow_offload_route_release(struct flow_offload *flow)
+ {
+       nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
+@@ -353,22 +342,14 @@ static void flow_offload_del(struct nf_f
+       rhashtable_remove_fast(&flow_table->rhashtable,
+                              &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
+                              nf_flow_offload_rhash_params);
+-
+-      clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
+-
+-      if (nf_flow_has_expired(flow))
+-              flow_offload_fixup_ct(flow->ct);
+-      else
+-              flow_offload_fixup_ct_timeout(flow->ct);
+-
+       flow_offload_free(flow);
+ }
+ void flow_offload_teardown(struct flow_offload *flow)
+ {
++      clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
+       set_bit(NF_FLOW_TEARDOWN, &flow->flags);
+-
+-      flow_offload_fixup_ct_state(flow->ct);
++      flow_offload_fixup_ct(flow->ct);
+ }
+ EXPORT_SYMBOL_GPL(flow_offload_teardown);
+@@ -437,7 +418,7 @@ static void nf_flow_offload_gc_step(stru
+       if (nf_flow_has_expired(flow) ||
+           nf_ct_is_dying(flow->ct))
+-              set_bit(NF_FLOW_TEARDOWN, &flow->flags);
++              flow_offload_teardown(flow);
+       if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
+               if (test_bit(NF_FLOW_HW, &flow->flags)) {
+--- a/net/netfilter/nft_flow_offload.c
++++ b/net/netfilter/nft_flow_offload.c
+@@ -268,6 +268,12 @@ static bool nft_flow_offload_skip(struct
+       return false;
+ }
++static bool nf_conntrack_tcp_established(const struct nf_conn *ct)
++{
++      return ct->proto.tcp.state == TCP_CONNTRACK_ESTABLISHED &&
++             test_bit(IPS_ASSURED_BIT, &ct->status);
++}
++
+ static void nft_flow_offload_eval(const struct nft_expr *expr,
+                                 struct nft_regs *regs,
+                                 const struct nft_pktinfo *pkt)
+@@ -293,7 +299,8 @@ static void nft_flow_offload_eval(const
+       case IPPROTO_TCP:
+               tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff,
+                                         sizeof(_tcph), &_tcph);
+-              if (unlikely(!tcph || tcph->fin || tcph->rst))
++              if (unlikely(!tcph || tcph->fin || tcph->rst ||
++                           !nf_conntrack_tcp_established(ct)))
+                       goto out;
+               break;
+       case IPPROTO_UDP:
index 6252f9a41847a14626b9eb39144eb087c470700a..7037194eee8f31ad2eec1a00afbf0548c39aa136 100644 (file)
@@ -821,7 +821,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
  #include <net/netfilter/nf_flow_table.h>
  #include <net/netfilter/nf_conntrack.h>
  #include <net/netfilter/nf_conntrack_core.h>
-@@ -401,8 +400,7 @@ flow_offload_lookup(struct nf_flowtable
+@@ -380,8 +379,7 @@ flow_offload_lookup(struct nf_flowtable
  }
  EXPORT_SYMBOL_GPL(flow_offload_lookup);
  
@@ -831,7 +831,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
                      void (*iter)(struct flow_offload *flow, void *data),
                      void *data)
  {
-@@ -434,6 +432,7 @@ nf_flow_table_iterate(struct nf_flowtabl
+@@ -413,6 +411,7 @@ nf_flow_table_iterate(struct nf_flowtabl
  
        return err;
  }
@@ -861,7 +861,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
 +#endif /* _XT_FLOWOFFLOAD_H */
 --- a/include/net/netfilter/nf_flow_table.h
 +++ b/include/net/netfilter/nf_flow_table.h
-@@ -270,6 +270,10 @@ void nf_flow_table_free(struct nf_flowta
+@@ -271,6 +271,10 @@ void nf_flow_table_free(struct nf_flowta
  
  void flow_offload_teardown(struct flow_offload *flow);
  
index 4cf07a3e2ca23739b60325f2f40c64f5bf510ce2..458b6761bc8a38f9f187c22e0ff04edd8cde2fd0 100644 (file)
@@ -49,7 +49,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
  static bool enable_hooks __read_mostly;
  MODULE_PARM_DESC(enable_hooks, "Always enable conntrack hooks");
  module_param(enable_hooks, bool, 0000);
-@@ -660,6 +663,7 @@ enum nf_ct_sysctl_index {
+@@ -658,6 +661,7 @@ enum nf_ct_sysctl_index {
        NF_SYSCTL_CT_PROTO_TIMEOUT_GRE_STREAM,
  #endif
  
@@ -57,7 +57,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
        __NF_SYSCTL_CT_LAST_SYSCTL,
  };
  
-@@ -1014,6 +1018,13 @@ static struct ctl_table nf_ct_sysctl_tab
+@@ -1000,6 +1004,13 @@ static struct ctl_table nf_ct_sysctl_tab
                .proc_handler   = proc_dointvec_jiffies,
        },
  #endif
index c15f090f710ef3fe3ef51eb6ec50e0ceac4434e9..67a72f825a5dd71cb34fae75ec1f1516180d27f0 100644 (file)
@@ -13,7 +13,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
 
 --- a/net/netfilter/nf_flow_table_core.c
 +++ b/net/netfilter/nf_flow_table_core.c
-@@ -331,8 +331,10 @@ void flow_offload_refresh(struct nf_flow
+@@ -318,8 +318,10 @@ void flow_offload_refresh(struct nf_flow
        u32 timeout;
  
        timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);