]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 5.4
authorSasha Levin <sashal@kernel.org>
Sun, 23 Mar 2025 17:44:06 +0000 (13:44 -0400)
committerSasha Levin <sashal@kernel.org>
Sun, 23 Mar 2025 17:44:06 +0000 (13:44 -0400)
Signed-off-by: Sasha Levin <sashal@kernel.org>
queue-5.4/bluetooth-fix-error-code-in-chan_alloc_skb_cb.patch [new file with mode: 0644]
queue-5.4/firmware-imx-scu-fix-of-node-leak-in-.probe.patch [new file with mode: 0644]
queue-5.4/ipv6-fix-memleak-of-nhc_pcpu_rth_output-in-fib_check.patch [new file with mode: 0644]
queue-5.4/ipv6-set-errno-after-ip_fib_metrics_init-in-ip6_rout.patch [new file with mode: 0644]
queue-5.4/net-atm-fix-use-after-free-in-lec_send.patch [new file with mode: 0644]
queue-5.4/net-neighbor-add-missing-policy-for-ndtpa_queue_lenb.patch [new file with mode: 0644]
queue-5.4/rdma-bnxt_re-avoid-clearing-vlan_id-mask-in-modify-q.patch [new file with mode: 0644]
queue-5.4/rdma-hns-fix-wrong-value-of-max_sge_rd.patch [new file with mode: 0644]
queue-5.4/series
queue-5.4/xfrm_output-force-software-gso-only-in-tunnel-mode.patch [new file with mode: 0644]

diff --git a/queue-5.4/bluetooth-fix-error-code-in-chan_alloc_skb_cb.patch b/queue-5.4/bluetooth-fix-error-code-in-chan_alloc_skb_cb.patch
new file mode 100644 (file)
index 0000000..f4d16eb
--- /dev/null
@@ -0,0 +1,45 @@
+From aeaded79113964a5083cb0103dc87557f1bf52ec Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 10 Mar 2025 22:46:56 +0300
+Subject: Bluetooth: Fix error code in chan_alloc_skb_cb()
+
+From: Dan Carpenter <dan.carpenter@linaro.org>
+
+[ Upstream commit 72d061ee630d0dbb45c2920d8d19b3861c413e54 ]
+
+The chan_alloc_skb_cb() function is supposed to return error pointers on
+error.  Returning NULL will lead to a NULL dereference.
+
+Fixes: 6b8d4a6a0314 ("Bluetooth: 6LoWPAN: Use connected oriented channel instead of fixed one")
+Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
+Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/bluetooth/6lowpan.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
+index bccad8c048dad..12052b7664414 100644
+--- a/net/bluetooth/6lowpan.c
++++ b/net/bluetooth/6lowpan.c
+@@ -847,11 +847,16 @@ static struct sk_buff *chan_alloc_skb_cb(struct l2cap_chan *chan,
+                                        unsigned long hdr_len,
+                                        unsigned long len, int nb)
+ {
++      struct sk_buff *skb;
++
+       /* Note that we must allocate using GFP_ATOMIC here as
+        * this function is called originally from netdev hard xmit
+        * function in atomic context.
+        */
+-      return bt_skb_alloc(hdr_len + len, GFP_ATOMIC);
++      skb = bt_skb_alloc(hdr_len + len, GFP_ATOMIC);
++      if (!skb)
++              return ERR_PTR(-ENOMEM);
++      return skb;
+ }
+ static void chan_suspend_cb(struct l2cap_chan *chan)
+-- 
+2.39.5
+
diff --git a/queue-5.4/firmware-imx-scu-fix-of-node-leak-in-.probe.patch b/queue-5.4/firmware-imx-scu-fix-of-node-leak-in-.probe.patch
new file mode 100644 (file)
index 0000000..4d35137
--- /dev/null
@@ -0,0 +1,36 @@
+From 283804defefebaeb132f62fe1d81e1480bd4765c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 24 Dec 2024 12:34:56 +0900
+Subject: firmware: imx-scu: fix OF node leak in .probe()
+
+From: Joe Hattori <joe@pf.is.s.u-tokyo.ac.jp>
+
+[ Upstream commit fbf10b86f6057cf79300720da4ea4b77e6708b0d ]
+
+imx_scu_probe() calls of_parse_phandle_with_args(), but does not
+release the OF node reference obtained by it. Add a of_node_put() call
+after done with the node.
+
+Fixes: f25a066d1a07 ("firmware: imx-scu: Support one TX and one RX")
+Signed-off-by: Joe Hattori <joe@pf.is.s.u-tokyo.ac.jp>
+Signed-off-by: Shawn Guo <shawnguo@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/firmware/imx/imx-scu.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/firmware/imx/imx-scu.c b/drivers/firmware/imx/imx-scu.c
+index a3b11bc71dcb8..c973760ed9490 100644
+--- a/drivers/firmware/imx/imx-scu.c
++++ b/drivers/firmware/imx/imx-scu.c
+@@ -266,6 +266,7 @@ static int imx_scu_probe(struct platform_device *pdev)
+               return ret;
+       sc_ipc->fast_ipc = of_device_is_compatible(args.np, "fsl,imx8-mu-scu");
++      of_node_put(args.np);
+       num_channel = sc_ipc->fast_ipc ? 2 : SCU_MU_CHAN_NUM;
+       for (i = 0; i < num_channel; i++) {
+-- 
+2.39.5
+
diff --git a/queue-5.4/ipv6-fix-memleak-of-nhc_pcpu_rth_output-in-fib_check.patch b/queue-5.4/ipv6-fix-memleak-of-nhc_pcpu_rth_output-in-fib_check.patch
new file mode 100644 (file)
index 0000000..3bf76a1
--- /dev/null
@@ -0,0 +1,49 @@
+From 7507c31badba189bb05db9ba78f731ebbc34ae43 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 11 Mar 2025 18:03:25 -0700
+Subject: ipv6: Fix memleak of nhc_pcpu_rth_output in fib_check_nh_v6_gw().
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 9740890ee20e01f99ff1dde84c63dcf089fabb98 ]
+
+fib_check_nh_v6_gw() expects that fib6_nh_init() cleans up everything
+when it fails.
+
+Commit 7dd73168e273 ("ipv6: Always allocate pcpu memory in a fib6_nh")
+moved fib_nh_common_init() before alloc_percpu_gfp() within fib6_nh_init()
+but forgot to add cleanup for fib6_nh->nh_common.nhc_pcpu_rth_output in
+case it fails to allocate fib6_nh->rt6i_pcpu, resulting in memleak.
+
+Let's call fib_nh_common_release() and clear nhc_pcpu_rth_output in the
+error path.
+
+Note that we can remove the fib6_nh_release() call in nh_create_ipv6()
+later in net-next.git.
+
+Fixes: 7dd73168e273 ("ipv6: Always allocate pcpu memory in a fib6_nh")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://patch.msgid.link/20250312010333.56001-1-kuniyu@amazon.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv6/route.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/net/ipv6/route.c b/net/ipv6/route.c
+index 99908861246d3..d037979f6579f 100644
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -3549,7 +3549,8 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
+               in6_dev_put(idev);
+       if (err) {
+-              lwtstate_put(fib6_nh->fib_nh_lws);
++              fib_nh_common_release(&fib6_nh->nh_common);
++              fib6_nh->nh_common.nhc_pcpu_rth_output = NULL;
+               fib6_nh->fib_nh_lws = NULL;
+               if (dev)
+                       dev_put(dev);
+-- 
+2.39.5
+
diff --git a/queue-5.4/ipv6-set-errno-after-ip_fib_metrics_init-in-ip6_rout.patch b/queue-5.4/ipv6-set-errno-after-ip_fib_metrics_init-in-ip6_rout.patch
new file mode 100644 (file)
index 0000000..d23d2ee
--- /dev/null
@@ -0,0 +1,57 @@
+From c9ba3a092ab1a9eff8e7d15141ffc6456f9aa4dd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 11 Mar 2025 18:38:48 -0700
+Subject: ipv6: Set errno after ip_fib_metrics_init() in
+ ip6_route_info_create().
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 9a81fc3480bf5dbe2bf80e278c440770f6ba2692 ]
+
+While creating a new IPv6, we could get a weird -ENOMEM when
+RTA_NH_ID is set and either of the conditions below is true:
+
+  1) CONFIG_IPV6_SUBTREES is enabled and rtm_src_len is specified
+  2) nexthop_get() fails
+
+e.g.)
+
+  # strace ip -6 route add fe80::dead:beef:dead:beef nhid 1 from ::
+  recvmsg(3, {msg_iov=[{iov_base=[...[
+    {error=-ENOMEM, msg=[... [...]]},
+    [{nla_len=49, nla_type=NLMSGERR_ATTR_MSG}, "Nexthops can not be used with so"...]
+  ]], iov_len=32768}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 148
+
+Let's set err explicitly after ip_fib_metrics_init() in
+ip6_route_info_create().
+
+Fixes: f88d8ea67fbd ("ipv6: Plumb support for nexthop object in a fib6_info")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Link: https://patch.msgid.link/20250312013854.61125-1-kuniyu@amazon.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv6/route.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/net/ipv6/route.c b/net/ipv6/route.c
+index d037979f6579f..b321cec711270 100644
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -3729,10 +3729,12 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
+       if (nh) {
+               if (rt->fib6_src.plen) {
+                       NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing");
++                      err = -EINVAL;
+                       goto out_free;
+               }
+               if (!nexthop_get(nh)) {
+                       NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
++                      err = -ENOENT;
+                       goto out_free;
+               }
+               rt->nh = nh;
+-- 
+2.39.5
+
diff --git a/queue-5.4/net-atm-fix-use-after-free-in-lec_send.patch b/queue-5.4/net-atm-fix-use-after-free-in-lec_send.patch
new file mode 100644 (file)
index 0000000..b3fa601
--- /dev/null
@@ -0,0 +1,46 @@
+From 364d348de401e6c3df14df003da07353d243e273 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 14 Mar 2025 13:10:57 +0300
+Subject: net: atm: fix use after free in lec_send()
+
+From: Dan Carpenter <dan.carpenter@linaro.org>
+
+[ Upstream commit f3009d0d6ab78053117f8857b921a8237f4d17b3 ]
+
+The ->send() operation frees skb so save the length before calling
+->send() to avoid a use after free.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://patch.msgid.link/c751531d-4af4-42fe-affe-6104b34b791d@stanley.mountain
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/atm/lec.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/net/atm/lec.c b/net/atm/lec.c
+index 3625a04a6c701..07d4f256c38c1 100644
+--- a/net/atm/lec.c
++++ b/net/atm/lec.c
+@@ -181,6 +181,7 @@ static void
+ lec_send(struct atm_vcc *vcc, struct sk_buff *skb)
+ {
+       struct net_device *dev = skb->dev;
++      unsigned int len = skb->len;
+       ATM_SKB(skb)->vcc = vcc;
+       atm_account_tx(vcc, skb);
+@@ -191,7 +192,7 @@ lec_send(struct atm_vcc *vcc, struct sk_buff *skb)
+       }
+       dev->stats.tx_packets++;
+-      dev->stats.tx_bytes += skb->len;
++      dev->stats.tx_bytes += len;
+ }
+ static void lec_tx_timeout(struct net_device *dev)
+-- 
+2.39.5
+
diff --git a/queue-5.4/net-neighbor-add-missing-policy-for-ndtpa_queue_lenb.patch b/queue-5.4/net-neighbor-add-missing-policy-for-ndtpa_queue_lenb.patch
new file mode 100644 (file)
index 0000000..295cd56
--- /dev/null
@@ -0,0 +1,39 @@
+From a9ff18554bc6d93f822f66450826f655b3265f36 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 16 Mar 2025 00:51:13 +0800
+Subject: net/neighbor: add missing policy for NDTPA_QUEUE_LENBYTES
+
+From: Lin Ma <linma@zju.edu.cn>
+
+[ Upstream commit 90a7138619a0c55e2aefaad27b12ffc2ddbeed78 ]
+
+Previous commit 8b5c171bb3dc ("neigh: new unresolved queue limits")
+introduces new netlink attribute NDTPA_QUEUE_LENBYTES to represent
+approximative value for deprecated QUEUE_LEN. However, it forgot to add
+the associated nla_policy in nl_ntbl_parm_policy array. Fix it with one
+simple NLA_U32 type policy.
+
+Fixes: 8b5c171bb3dc ("neigh: new unresolved queue limits")
+Signed-off-by: Lin Ma <linma@zju.edu.cn>
+Link: https://patch.msgid.link/20250315165113.37600-1-linma@zju.edu.cn
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/neighbour.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/net/core/neighbour.c b/net/core/neighbour.c
+index 7ef3630ea20d7..005cc630f74f3 100644
+--- a/net/core/neighbour.c
++++ b/net/core/neighbour.c
+@@ -2176,6 +2176,7 @@ static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
+ static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
+       [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
+       [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
++      [NDTPA_QUEUE_LENBYTES]          = { .type = NLA_U32 },
+       [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
+       [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
+       [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
+-- 
+2.39.5
+
diff --git a/queue-5.4/rdma-bnxt_re-avoid-clearing-vlan_id-mask-in-modify-q.patch b/queue-5.4/rdma-bnxt_re-avoid-clearing-vlan_id-mask-in-modify-q.patch
new file mode 100644 (file)
index 0000000..9f757db
--- /dev/null
@@ -0,0 +1,42 @@
+From aef2d7aec885502fe70e367ad74eb0f8fcca277e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 10 Mar 2025 22:16:36 -0700
+Subject: RDMA/bnxt_re: Avoid clearing VLAN_ID mask in modify qp path
+
+From: Saravanan Vajravel <saravanan.vajravel@broadcom.com>
+
+[ Upstream commit 81c0db302a674f8004ed805393d17fd76f552e83 ]
+
+Driver is always clearing the mask that sets the VLAN ID/Service Level
+in the adapter. Recent change for supporting multiple traffic class
+exposed this issue.
+
+Allow setting SL and VLAN_ID while QP is moved from INIT to RTR state.
+
+Fixes: 1ac5a4047975 ("RDMA/bnxt_re: Add bnxt_re RoCE driver")
+Fixes: c64b16a37b6d ("RDMA/bnxt_re: Support different traffic class")
+Signed-off-by: Saravanan Vajravel <saravanan.vajravel@broadcom.com>
+Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
+Link: https://patch.msgid.link/1741670196-2919-1-git-send-email-selvin.xavier@broadcom.com
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/infiniband/hw/bnxt_re/qplib_fp.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+index 18b579c8a8c55..e14c9f83cf62b 100644
+--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
++++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+@@ -1105,8 +1105,6 @@ static void __modify_flags_from_init_state(struct bnxt_qplib_qp *qp)
+                       qp->path_mtu =
+                               CMDQ_MODIFY_QP_PATH_MTU_MTU_2048;
+               }
+-              qp->modify_flags &=
+-                      ~CMDQ_MODIFY_QP_MODIFY_MASK_VLAN_ID;
+               /* Bono FW require the max_dest_rd_atomic to be >= 1 */
+               if (qp->max_dest_rd_atomic < 1)
+                       qp->max_dest_rd_atomic = 1;
+-- 
+2.39.5
+
diff --git a/queue-5.4/rdma-hns-fix-wrong-value-of-max_sge_rd.patch b/queue-5.4/rdma-hns-fix-wrong-value-of-max_sge_rd.patch
new file mode 100644 (file)
index 0000000..8b0c395
--- /dev/null
@@ -0,0 +1,38 @@
+From eb05ea65ad1d8acc78dc9959d5efdaf8b06687d9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 11 Mar 2025 16:48:57 +0800
+Subject: RDMA/hns: Fix wrong value of max_sge_rd
+
+From: Junxian Huang <huangjunxian6@hisilicon.com>
+
+[ Upstream commit 6b5e41a8b51fce520bb09bd651a29ef495e990de ]
+
+There is no difference between the sge of READ and non-READ
+operations in hns RoCE. Set max_sge_rd to the same value as
+max_send_sge.
+
+Fixes: 9a4435375cd1 ("IB/hns: Add driver files for hns RoCE driver")
+Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com>
+Link: https://patch.msgid.link/20250311084857.3803665-8-huangjunxian6@hisilicon.com
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/infiniband/hw/hns/hns_roce_main.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
+index 70991649dc693..4c5179e45dadc 100644
+--- a/drivers/infiniband/hw/hns/hns_roce_main.c
++++ b/drivers/infiniband/hw/hns/hns_roce_main.c
+@@ -198,7 +198,7 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
+                                 IB_DEVICE_RC_RNR_NAK_GEN;
+       props->max_send_sge = hr_dev->caps.max_sq_sg;
+       props->max_recv_sge = hr_dev->caps.max_rq_sg;
+-      props->max_sge_rd = 1;
++      props->max_sge_rd = hr_dev->caps.max_sq_sg;
+       props->max_cq = hr_dev->caps.num_cqs;
+       props->max_cqe = hr_dev->caps.max_cqes;
+       props->max_mr = hr_dev->caps.num_mtpts;
+-- 
+2.39.5
+
index b8e5f3aaa0223881c6993c9b678a95259743790e..cd802312011ea65569686f430d692a404950aef9 100644 (file)
@@ -48,3 +48,12 @@ asoc-codecs-wm0010-fix-error-handling-path-in-wm0010.patch
 i2c-ali1535-fix-an-error-handling-path-in-ali1535_pr.patch
 i2c-ali15x3-fix-an-error-handling-path-in-ali15x3_pr.patch
 i2c-sis630-fix-an-error-handling-path-in-sis630_prob.patch
+firmware-imx-scu-fix-of-node-leak-in-.probe.patch
+xfrm_output-force-software-gso-only-in-tunnel-mode.patch
+rdma-bnxt_re-avoid-clearing-vlan_id-mask-in-modify-q.patch
+rdma-hns-fix-wrong-value-of-max_sge_rd.patch
+bluetooth-fix-error-code-in-chan_alloc_skb_cb.patch
+ipv6-fix-memleak-of-nhc_pcpu_rth_output-in-fib_check.patch
+ipv6-set-errno-after-ip_fib_metrics_init-in-ip6_rout.patch
+net-atm-fix-use-after-free-in-lec_send.patch
+net-neighbor-add-missing-policy-for-ndtpa_queue_lenb.patch
diff --git a/queue-5.4/xfrm_output-force-software-gso-only-in-tunnel-mode.patch b/queue-5.4/xfrm_output-force-software-gso-only-in-tunnel-mode.patch
new file mode 100644 (file)
index 0000000..7aa6c6e
--- /dev/null
@@ -0,0 +1,77 @@
+From 8307cb0af241b70036a84c5838808ed0e6a52a83 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Feb 2025 12:52:48 +0200
+Subject: xfrm_output: Force software GSO only in tunnel mode
+
+From: Cosmin Ratiu <cratiu@nvidia.com>
+
+[ Upstream commit 0aae2867aa6067f73d066bc98385e23c8454a1d7 ]
+
+The cited commit fixed a software GSO bug with VXLAN + IPSec in tunnel
+mode. Unfortunately, it is slightly broader than necessary, as it also
+severely affects performance for Geneve + IPSec transport mode over a
+device capable of both HW GSO and IPSec crypto offload. In this case,
+xfrm_output unnecessarily triggers software GSO instead of letting the
+HW do it. In simple iperf3 tests over Geneve + IPSec transport mode over
+a back-2-back pair of NICs with MTU 1500, the performance was observed
+to be up to 6x worse when doing software GSO compared to leaving it to
+the hardware.
+
+This commit makes xfrm_output only trigger software GSO in crypto
+offload cases for already encapsulated packets in tunnel mode, as not
+doing so would then cause the inner tunnel skb->inner_networking_header
+to be overwritten and break software GSO for that packet later if the
+device turns out to not be capable of HW GSO.
+
+Taking a closer look at the conditions for the original bug, to better
+understand the reasons for this change:
+- vxlan_build_skb -> iptunnel_handle_offloads sets inner_protocol and
+  inner network header.
+- then, udp_tunnel_xmit_skb -> ip_tunnel_xmit adds outer transport and
+  network headers.
+- later in the xmit path, xfrm_output -> xfrm_outer_mode_output ->
+  xfrm4_prepare_output -> xfrm4_tunnel_encap_add overwrites the inner
+  network header with the one set in ip_tunnel_xmit before adding the
+  second outer header.
+- __dev_queue_xmit -> validate_xmit_skb checks whether GSO segmentation
+  needs to happen based on dev features. In the original bug, the hw
+  couldn't segment the packets, so skb_gso_segment was invoked.
+- deep in the .gso_segment callback machinery, __skb_udp_tunnel_segment
+  tries to use the wrong inner network header, expecting the one set in
+  iptunnel_handle_offloads but getting the one set by xfrm instead.
+- a bit later, ipv6_gso_segment accesses the wrong memory based on that
+  wrong inner network header.
+
+With the new change, the original bug (or similar ones) cannot happen
+again, as xfrm will now trigger software GSO before applying a tunnel.
+This concern doesn't exist in packet offload mode, when the HW adds
+encapsulation headers. For the non-offloaded packets (crypto in SW),
+software GSO is still done unconditionally in the else branch.
+
+Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
+Reviewed-by: Yael Chemla <ychemla@nvidia.com>
+Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
+Fixes: a204aef9fd77 ("xfrm: call xfrm_output_gso when inner_protocol is set in xfrm_output")
+Signed-off-by: Cosmin Ratiu <cratiu@nvidia.com>
+Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/xfrm/xfrm_output.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
+index 9a6a8c4008ab8..4499ff95e0e8c 100644
+--- a/net/xfrm/xfrm_output.c
++++ b/net/xfrm/xfrm_output.c
+@@ -586,7 +586,7 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb)
+               xfrm_state_hold(x);
+               if (skb_is_gso(skb)) {
+-                      if (skb->inner_protocol)
++                      if (skb->inner_protocol && x->props.mode == XFRM_MODE_TUNNEL)
+                               return xfrm_output_gso(net, sk, skb);
+                       skb_shinfo(skb)->gso_type |= SKB_GSO_ESP;
+-- 
+2.39.5
+