1 From 4592ee7f525c4683ec9e290381601fdee50ae110 Mon Sep 17 00:00:00 2001
2 From: Florian Westphal <fw@strlen.de>
3 Date: Wed, 4 Aug 2021 15:02:15 +0200
4 Subject: [PATCH] netfilter: conntrack: remove offload_pickup sysctl again
6 These two sysctls were added because the hardcoded defaults (2 minutes,
7 tcp, 30 seconds, udp) turned out to be too low for some setups.
9 They appeared in 5.14-rc1 so it should be fine to remove it again.
11 Marcelo convinced me that there should be no difference between a flow
12 that was offloaded vs. a flow that was not wrt. timeout handling.
13 Thus the default is changed to those for TCP established and UDP stream,
14 5 days and 120 seconds, respectively.
16 Marcelo also suggested to account for the timeout value used for the
17 offloading, this avoids increase beyond the value in the conntrack-sysctl
18 and will also instantly expire the conntrack entry with altered sysctls.
21 nf_conntrack_udp_timeout_stream=60
22 nf_flowtable_udp_timeout=60
24 This will remove offloaded udp flows after one minute, rather than two.
26 An earlier version of this patch also cleared the ASSURED bit to
27 allow nf_conntrack to evict the entry via early_drop (i.e., table full).
28 However, it looks like we can safely assume that connection timed out
29 via HW is still in established state, so this isn't needed.
32 [..] the hardware sends all packets with a set FIN flags to sw.
33 [..] Connections that are aged in hardware are expected to be in the
36 In case it turns out that back-to-sw-path transition can occur for
37 'dodgy' connections too (e.g., one side disappeared while software-path
38 would have been in RETRANS timeout), we can adjust this later.
40 Cc: Oz Shlomo <ozsh@nvidia.com>
41 Cc: Paul Blakey <paulb@nvidia.com>
42 Suggested-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
43 Signed-off-by: Florian Westphal <fw@strlen.de>
44 Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
45 Reviewed-by: Oz Shlomo <ozsh@nvidia.com>
46 Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
48 Documentation/networking/nf_conntrack-sysctl.rst | 10 ----------
49 include/net/netns/conntrack.h | 2 --
50 net/netfilter/nf_conntrack_proto_tcp.c | 1 -
51 net/netfilter/nf_conntrack_proto_udp.c | 1 -
52 net/netfilter/nf_conntrack_standalone.c | 16 ----------------
53 net/netfilter/nf_flow_table_core.c | 11 ++++++++---
54 6 files changed, 8 insertions(+), 33 deletions(-)
56 --- a/include/net/netns/conntrack.h
57 +++ b/include/net/netns/conntrack.h
58 @@ -29,7 +29,6 @@ struct nf_tcp_net {
60 #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
61 unsigned int offload_timeout;
62 - unsigned int offload_pickup;
66 @@ -43,7 +42,6 @@ struct nf_udp_net {
67 unsigned int timeouts[UDP_CT_MAX];
68 #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
69 unsigned int offload_timeout;
70 - unsigned int offload_pickup;
74 --- a/net/netfilter/nf_conntrack_proto_tcp.c
75 +++ b/net/netfilter/nf_conntrack_proto_tcp.c
76 @@ -1450,7 +1450,6 @@ void nf_conntrack_tcp_init_net(struct ne
78 #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
79 tn->offload_timeout = 30 * HZ;
80 - tn->offload_pickup = 120 * HZ;
84 --- a/net/netfilter/nf_conntrack_proto_udp.c
85 +++ b/net/netfilter/nf_conntrack_proto_udp.c
86 @@ -276,7 +276,6 @@ void nf_conntrack_udp_init_net(struct ne
88 #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
89 un->offload_timeout = 30 * HZ;
90 - un->offload_pickup = 30 * HZ;
94 --- a/net/netfilter/nf_conntrack_standalone.c
95 +++ b/net/netfilter/nf_conntrack_standalone.c
96 @@ -569,7 +569,6 @@ enum nf_ct_sysctl_index {
97 NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_UNACK,
98 #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
99 NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD,
100 - NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP,
102 NF_SYSCTL_CT_PROTO_TCP_LOOSE,
103 NF_SYSCTL_CT_PROTO_TCP_LIBERAL,
104 @@ -578,7 +577,6 @@ enum nf_ct_sysctl_index {
105 NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM,
106 #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
107 NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD,
108 - NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP,
110 NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP,
111 NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6,
112 @@ -773,12 +771,6 @@ static struct ctl_table nf_ct_sysctl_tab
114 .proc_handler = proc_dointvec_jiffies,
116 - [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP] = {
117 - .procname = "nf_flowtable_tcp_pickup",
118 - .maxlen = sizeof(unsigned int),
120 - .proc_handler = proc_dointvec_jiffies,
123 [NF_SYSCTL_CT_PROTO_TCP_LOOSE] = {
124 .procname = "nf_conntrack_tcp_loose",
125 @@ -821,12 +813,6 @@ static struct ctl_table nf_ct_sysctl_tab
127 .proc_handler = proc_dointvec_jiffies,
129 - [NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP] = {
130 - .procname = "nf_flowtable_udp_pickup",
131 - .maxlen = sizeof(unsigned int),
133 - .proc_handler = proc_dointvec_jiffies,
136 [NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP] = {
137 .procname = "nf_conntrack_icmp_timeout",
138 @@ -1006,7 +992,6 @@ static void nf_conntrack_standalone_init
140 #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
141 table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD].data = &tn->offload_timeout;
142 - table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP].data = &tn->offload_pickup;
146 @@ -1098,7 +1083,6 @@ static int nf_conntrack_standalone_init_
147 table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM].data = &un->timeouts[UDP_CT_REPLIED];
148 #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
149 table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD].data = &un->offload_timeout;
150 - table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP].data = &un->offload_pickup;
153 nf_conntrack_standalone_init_tcp_sysctl(net, table);
154 --- a/net/netfilter/nf_flow_table_core.c
155 +++ b/net/netfilter/nf_flow_table_core.c
156 @@ -182,20 +182,25 @@ static void flow_offload_fixup_ct_timeou
158 struct net *net = nf_ct_net(ct);
159 int l4num = nf_ct_protonum(ct);
160 - unsigned int timeout;
163 if (l4num == IPPROTO_TCP) {
164 struct nf_tcp_net *tn = nf_tcp_pernet(net);
166 - timeout = tn->offload_pickup;
167 + timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
168 + timeout -= tn->offload_timeout;
169 } else if (l4num == IPPROTO_UDP) {
170 struct nf_udp_net *tn = nf_udp_pernet(net);
172 - timeout = tn->offload_pickup;
173 + timeout = tn->timeouts[UDP_CT_REPLIED];
174 + timeout -= tn->offload_timeout;
182 if (nf_flow_timeout_delta(READ_ONCE(ct->timeout)) > (__s32)timeout)
183 WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout);