]> git.ipfire.org Git - thirdparty/openwrt.git/commitdiff
kernel: fix WED offloaded flow timeout refresh 23611/head
authorQingfang Deng <dqfext@gmail.com>
Mon, 1 Jun 2026 05:32:17 +0000 (13:32 +0800)
committerRobert Marko <robimarko@gmail.com>
Thu, 4 Jun 2026 20:42:02 +0000 (22:42 +0200)
Add a pending patch to fix the WED flow timeout.

OpenWrt has recently migrated many platforms to kernel 6.18. On the
MediaTek platform, which supports hardware network offloading, WiFi
connections accelerated via the WED path were observed to drop after
roughly 300 seconds.

After several debugging sessions, assisted by the Claude LLM, the
problem was narrowed down as follows:

nf_flow_table_extend_ct_timeout() extends ct->timeout for offloaded
flows using:

cmpxchg(&ct->timeout, expires, new_timeout);

'expires' comes from nf_ct_expires(ct) and is a relative value, while
ct->timeout holds an absolute timestamp. The two are never equal, so
the cmpxchg always fails and the timeout is never extended.

This goes unnoticed for most flows, but a long-lived hardware (WED)
offloaded flow on MediaTek MT7986 eventually has ct->timeout decay to
zero, the conntrack entry is reaped and the connection breaks.

Open-code the relative value from a single READ_ONCE(ct->timeout)
snapshot and compare against that same absolute snapshot in the
cmpxchg, so the timeout extension actually takes effect while the
datapath remains authoritative if it updates ct->timeout concurrently.

Signed-off-by: Qingfang Deng <dqfext@gmail.com>
Link: https://github.com/openwrt/openwrt/pull/23611
Signed-off-by: Robert Marko <robimarko@gmail.com>
target/linux/generic/hack-6.18/650-netfilter-add-xt_FLOWOFFLOAD-target.patch
target/linux/generic/pending-6.18/704-netfilter-flowtable-fix-offloaded-ct-timeout-never-b.patch [new file with mode: 0644]

index cdd491890917e011808069347bdac3695702b412..fb24eb99469455b858364a094bf444f35e026c44 100644 (file)
@@ -768,7 +768,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
                      void (*iter)(struct nf_flowtable *flowtable,
                                   struct flow_offload *flow, void *data),
                      void *data)
-@@ -582,6 +580,7 @@ static void nf_flow_offload_gc_step(stru
+@@ -587,6 +585,7 @@ static void nf_flow_offload_gc_step(stru
                nf_flow_offload_stats(flow_table, flow);
        }
  }
diff --git a/target/linux/generic/pending-6.18/704-netfilter-flowtable-fix-offloaded-ct-timeout-never-b.patch b/target/linux/generic/pending-6.18/704-netfilter-flowtable-fix-offloaded-ct-timeout-never-b.patch
new file mode 100644 (file)
index 0000000..852a0d4
--- /dev/null
@@ -0,0 +1,75 @@
+From: Adrian Bente <adibente@gmail.com>
+Date: Thu, 28 May 2026 10:08:51 +0300
+Subject: [PATCH v2 net] netfilter: flowtable: fix offloaded ct timeout never being extended
+Message-ID: <20260528070851.3913-1-adibente@gmail.com>
+
+OpenWrt has recently migrated many platforms to kernel 6.18. On the
+MediaTek platform, which supports hardware network offloading, WiFi
+connections accelerated via the WED path were observed to drop after
+roughly 300 seconds.
+
+After several debugging sessions, assisted by the Claude LLM, the
+problem was narrowed down as follows:
+
+nf_flow_table_extend_ct_timeout() extends ct->timeout for offloaded
+flows using:
+
+       cmpxchg(&ct->timeout, expires, new_timeout);
+
+'expires' comes from nf_ct_expires(ct) and is a relative value, while
+ct->timeout holds an absolute timestamp. The two are never equal, so
+the cmpxchg always fails and the timeout is never extended.
+
+This goes unnoticed for most flows, but a long-lived hardware (WED)
+offloaded flow on MediaTek MT7986 eventually has ct->timeout decay to
+zero, the conntrack entry is reaped and the connection breaks.
+
+Open-code the relative value from a single READ_ONCE(ct->timeout)
+snapshot and compare against that same absolute snapshot in the
+cmpxchg, so the timeout extension actually takes effect while the
+datapath remains authoritative if it updates ct->timeout concurrently.
+
+Suggested-by: Florian Westphal <fw@strlen.de>
+Fixes: 03428ca5cee9 ("netfilter: conntrack: rework offload nf_conn timeout extension logic")
+Cc: stable@vger.kernel.org
+Signed-off-by: Adrian Bente <adibente@gmail.com>
+---
+ net/netfilter/nf_flow_table_core.c | 13 +++++++++----
+ 1 file changed, 9 insertions(+), 4 deletions(-)
+
+--- a/net/netfilter/nf_flow_table_core.c
++++ b/net/netfilter/nf_flow_table_core.c
+@@ -500,8 +500,13 @@ static u32 nf_flow_table_tcp_timeout(con
+  */
+ static void nf_flow_table_extend_ct_timeout(struct nf_conn *ct)
+ {
+-      static const u32 min_timeout = 5 * 60 * HZ;
+-      u32 expires = nf_ct_expires(ct);
++      static const s32 min_timeout = 5 * 60 * HZ;
++      u32 ct_timeout = READ_ONCE(ct->timeout);
++      s32 expires;
++
++      expires = ct_timeout - nfct_time_stamp;
++      if (expires <= 0) /* already expired */
++              return;
+       /* normal case: large enough timeout, nothing to do. */
+       if (likely(expires >= min_timeout))
+@@ -519,7 +524,7 @@ static void nf_flow_table_extend_ct_time
+       if (nf_ct_is_confirmed(ct) &&
+           test_bit(IPS_OFFLOAD_BIT, &ct->status)) {
+               u8 l4proto = nf_ct_protonum(ct);
+-              u32 new_timeout = true;
++              u32 new_timeout = 1;
+               switch (l4proto) {
+               case IPPROTO_UDP:
+@@ -544,7 +549,7 @@ static void nf_flow_table_extend_ct_time
+                */
+               if (new_timeout) {
+                       new_timeout += nfct_time_stamp;
+-                      cmpxchg(&ct->timeout, expires, new_timeout);
++                      cmpxchg(&ct->timeout, ct_timeout, new_timeout);
+               }
+       }