--- /dev/null
+From: Felix Fietkau <nbd@nbd.name>
+Date: Wed, 13 Jun 2018 12:33:39 +0200
+Subject: [PATCH] netfilter: nf_flow_table: fix offloaded connection timeout
+ corner case
+
+The full teardown of offloaded flows is deferred to a gc work item,
+however processing of packets by netfilter needs to happen immediately
+after a teardown is requested, because the conntrack state needs to be
+fixed up.
+
+Since the IPS_OFFLOAD_BIT is still kept until the teardown is complete,
+the netfilter conntrack gc can accidentally bump the timeout of a
+connection where offload was just stopped, causing a conntrack entry
+leak.
+
+Fix this by moving the conntrack timeout bumping from conntrack core to
+the nf_flow_offload and add a check to prevent bogus timeout bumps.
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/net/netfilter/nf_conntrack_core.c
++++ b/net/netfilter/nf_conntrack_core.c
+@@ -978,18 +978,6 @@ static bool gc_worker_can_early_drop(con
+ return false;
+ }
+
+-#define DAY (86400 * HZ)
+-
+-/* Set an arbitrary timeout large enough not to ever expire, this save
+- * us a check for the IPS_OFFLOAD_BIT from the packet path via
+- * nf_ct_is_expired().
+- */
+-static void nf_ct_offload_timeout(struct nf_conn *ct)
+-{
+- if (nf_ct_expires(ct) < DAY / 2)
+- ct->timeout = nfct_time_stamp + DAY;
+-}
+-
+ static void gc_worker(struct work_struct *work)
+ {
+ unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u);
+@@ -1026,10 +1014,8 @@ static void gc_worker(struct work_struct
+ tmp = nf_ct_tuplehash_to_ctrack(h);
+
+ scanned++;
+- if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) {
+- nf_ct_offload_timeout(tmp);
++ if (test_bit(IPS_OFFLOAD_BIT, &tmp->status))
+ continue;
+- }
+
+ if (nf_ct_is_expired(tmp)) {
+ nf_ct_gc_expired(tmp);
+--- a/net/netfilter/nf_flow_table_core.c
++++ b/net/netfilter/nf_flow_table_core.c
+@@ -185,8 +185,27 @@ static const struct rhashtable_params nf
+ .automatic_shrinking = true,
+ };
+
++#define DAY (86400 * HZ)
++
++/* Set an arbitrary timeout large enough not to ever expire, this save
++ * us a check for the IPS_OFFLOAD_BIT from the packet path via
++ * nf_ct_is_expired().
++ */
++static void nf_ct_offload_timeout(struct flow_offload *flow)
++{
++ struct flow_offload_entry *entry;
++ struct nf_conn *ct;
++
++ entry = container_of(flow, struct flow_offload_entry, flow);
++ ct = entry->ct;
++
++ if (nf_ct_expires(ct) < DAY / 2)
++ ct->timeout = nfct_time_stamp + DAY;
++}
++
+ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
+ {
++ nf_ct_offload_timeout(flow);
+ flow->timeout = (u32)jiffies;
+
+ rhashtable_insert_fast(&flow_table->rhashtable,
+@@ -307,6 +326,8 @@ static int nf_flow_offload_gc_step(struc
+ rhashtable_walk_start(&hti);
+
+ while ((tuplehash = rhashtable_walk_next(&hti))) {
++ bool teardown;
++
+ if (IS_ERR(tuplehash)) {
+ err = PTR_ERR(tuplehash);
+ if (err != -EAGAIN)
+@@ -319,9 +340,13 @@ static int nf_flow_offload_gc_step(struc
+
+ flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
+
+- if (nf_flow_has_expired(flow) ||
+- (flow->flags & (FLOW_OFFLOAD_DYING |
+- FLOW_OFFLOAD_TEARDOWN)))
++ teardown = flow->flags & (FLOW_OFFLOAD_DYING |
++ FLOW_OFFLOAD_TEARDOWN);
++
++ if (!teardown)
++ nf_ct_offload_timeout(flow);
++
++ if (nf_flow_has_expired(flow) || teardown)
+ flow_offload_del(flow_table, flow);
+ }
+ out:
obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
-@@ -199,10 +199,16 @@ int flow_offload_add(struct nf_flowtable
+@@ -218,10 +218,16 @@ int flow_offload_add(struct nf_flowtable
}
EXPORT_SYMBOL_GPL(flow_offload_add);
rhashtable_remove_fast(&flow_table->rhashtable,
&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
-@@ -214,6 +220,9 @@ static void flow_offload_del(struct nf_f
+@@ -233,6 +239,9 @@ static void flow_offload_del(struct nf_f
e = container_of(flow, struct flow_offload_entry, flow);
clear_bit(IPS_OFFLOAD_BIT, &e->ct->status);
flow_offload_free(flow);
}
-@@ -307,6 +316,7 @@ static int nf_flow_offload_gc_step(struc
- rhashtable_walk_start(&hti);
+@@ -346,6 +355,9 @@ static int nf_flow_offload_gc_step(struc
+ if (!teardown)
+ nf_ct_offload_timeout(flow);
- while ((tuplehash = rhashtable_walk_next(&hti))) {
-+ bool teardown;
- if (IS_ERR(tuplehash)) {
- err = PTR_ERR(tuplehash);
- if (err != -EAGAIN)
-@@ -319,9 +329,13 @@ static int nf_flow_offload_gc_step(struc
-
- flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
-
-- if (nf_flow_has_expired(flow) ||
-- (flow->flags & (FLOW_OFFLOAD_DYING |
-- FLOW_OFFLOAD_TEARDOWN)))
-+ teardown = flow->flags & (FLOW_OFFLOAD_DYING |
-+ FLOW_OFFLOAD_TEARDOWN);
-+
+ if (nf_flow_in_hw(flow) && !teardown)
+ continue;
+
-+ if (nf_flow_has_expired(flow) || teardown)
+ if (nf_flow_has_expired(flow) || teardown)
flow_offload_del(flow_table, flow);
}
- out:
-@@ -456,10 +470,43 @@ int nf_flow_dnat_port(const struct flow_
+@@ -481,10 +493,43 @@ int nf_flow_dnat_port(const struct flow_
}
EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
err = rhashtable_init(&flowtable->rhashtable,
-@@ -497,6 +544,8 @@ static void nf_flow_table_iterate_cleanu
+@@ -522,6 +567,8 @@ static void nf_flow_table_iterate_cleanu
{
nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev);
flush_delayed_work(&flowtable->gc_work);
}
void nf_flow_table_cleanup(struct net *net, struct net_device *dev)
-@@ -510,6 +559,26 @@ void nf_flow_table_cleanup(struct net *n
+@@ -535,6 +582,26 @@ void nf_flow_table_cleanup(struct net *n
}
EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
void nf_flow_table_free(struct nf_flowtable *flow_table)
{
mutex_lock(&flowtable_lock);
-@@ -519,9 +588,58 @@ void nf_flow_table_free(struct nf_flowta
+@@ -544,9 +611,58 @@ void nf_flow_table_free(struct nf_flowta
nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
WARN_ON(!nf_flow_offload_gc_step(flow_table));
rhashtable_destroy(&flow_table->rhashtable);
struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX];
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
-@@ -332,7 +332,7 @@ static int nf_flow_offload_gc_step(struc
- teardown = flow->flags & (FLOW_OFFLOAD_DYING |
- FLOW_OFFLOAD_TEARDOWN);
+@@ -355,7 +355,7 @@ static int nf_flow_offload_gc_step(struc
+ if (!teardown)
+ nf_ct_offload_timeout(flow);
- if (nf_flow_in_hw(flow) && !teardown)
+ if ((flow->flags & FLOW_OFFLOAD_KEEP) && !teardown)