--- /dev/null
+From e42e70ad6ae2ae511a6143d2e8da929366e58bd9 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 31 Jan 2022 18:23:58 -0800
+Subject: af_packet: fix data-race in packet_setsockopt / packet_setsockopt
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit e42e70ad6ae2ae511a6143d2e8da929366e58bd9 upstream.
+
+When packet_setsockopt( PACKET_FANOUT_DATA ) reads po->fanout,
+no lock is held, meaning that another thread can change po->fanout.
+
+Given that po->fanout can only be set once during the socket lifetime
+(it is only cleared from fanout_release()), we can use
+READ_ONCE()/WRITE_ONCE() to document the race.
+
+BUG: KCSAN: data-race in packet_setsockopt / packet_setsockopt
+
+write to 0xffff88813ae8e300 of 8 bytes by task 14653 on cpu 0:
+ fanout_add net/packet/af_packet.c:1791 [inline]
+ packet_setsockopt+0x22fe/0x24a0 net/packet/af_packet.c:3931
+ __sys_setsockopt+0x209/0x2a0 net/socket.c:2180
+ __do_sys_setsockopt net/socket.c:2191 [inline]
+ __se_sys_setsockopt net/socket.c:2188 [inline]
+ __x64_sys_setsockopt+0x62/0x70 net/socket.c:2188
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+read to 0xffff88813ae8e300 of 8 bytes by task 14654 on cpu 1:
+ packet_setsockopt+0x691/0x24a0 net/packet/af_packet.c:3935
+ __sys_setsockopt+0x209/0x2a0 net/socket.c:2180
+ __do_sys_setsockopt net/socket.c:2191 [inline]
+ __se_sys_setsockopt net/socket.c:2188 [inline]
+ __x64_sys_setsockopt+0x62/0x70 net/socket.c:2188
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+value changed: 0x0000000000000000 -> 0xffff888106f8c000
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 1 PID: 14654 Comm: syz-executor.3 Not tainted 5.16.0-syzkaller #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+
+Fixes: 47dceb8ecdc1 ("packet: add classic BPF fanout mode")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Willem de Bruijn <willemb@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Link: https://lore.kernel.org/r/20220201022358.330621-1-eric.dumazet@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/packet/af_packet.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -1753,7 +1753,10 @@ static int fanout_add(struct sock *sk, s
+ err = -ENOSPC;
+ if (refcount_read(&match->sk_ref) < match->max_num_members) {
+ __dev_remove_pack(&po->prot_hook);
+- po->fanout = match;
++
++ /* Paired with packet_setsockopt(PACKET_FANOUT_DATA) */
++ WRITE_ONCE(po->fanout, match);
++
+ po->rollover = rollover;
+ rollover = NULL;
+ refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1);
+@@ -3906,7 +3909,8 @@ packet_setsockopt(struct socket *sock, i
+ }
+ case PACKET_FANOUT_DATA:
+ {
+- if (!po->fanout)
++ /* Paired with the WRITE_ONCE() in fanout_add() */
++ if (!READ_ONCE(po->fanout))
+ return -EINVAL;
+
+ return fanout_set_data(po, optval, optlen);
--- /dev/null
+From 24f6008564183aa120d07c03d9289519c2fe02af Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Thu, 20 Jan 2022 11:04:01 -0600
+Subject: cgroup-v1: Require capabilities to set release_agent
+
+From: Eric W. Biederman <ebiederm@xmission.com>
+
+commit 24f6008564183aa120d07c03d9289519c2fe02af upstream.
+
+The cgroup release_agent is called with call_usermodehelper. The function
+call_usermodehelper starts the release_agent with a full set fo capabilities.
+Therefore require capabilities when setting the release_agaent.
+
+Reported-by: Tabitha Sable <tabitha.c.sable@gmail.com>
+Tested-by: Tabitha Sable <tabitha.c.sable@gmail.com>
+Fixes: 81a6a5cdd2c5 ("Task Control Groups: automatic userspace notification of idle cgroups")
+Cc: stable@vger.kernel.org # v2.6.24+
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/cgroup/cgroup-v1.c | 14 ++++++++++++++
+ 1 file changed, 14 insertions(+)
+
+--- a/kernel/cgroup/cgroup-v1.c
++++ b/kernel/cgroup/cgroup-v1.c
+@@ -552,6 +552,14 @@ static ssize_t cgroup_release_agent_writ
+
+ BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
+
++ /*
++ * Release agent gets called with all capabilities,
++ * require capabilities to set release agent.
++ */
++ if ((of->file->f_cred->user_ns != &init_user_ns) ||
++ !capable(CAP_SYS_ADMIN))
++ return -EPERM;
++
+ cgrp = cgroup_kn_lock_live(of->kn, false);
+ if (!cgrp)
+ return -ENODEV;
+@@ -963,6 +971,12 @@ int cgroup1_parse_param(struct fs_contex
+ /* Specifying two release agents is forbidden */
+ if (ctx->release_agent)
+ return invalfc(fc, "release_agent respecified");
++ /*
++ * Release agent gets called with all capabilities,
++ * require capabilities to set release agent.
++ */
++ if ((fc->user_ns != &init_user_ns) || !capable(CAP_SYS_ADMIN))
++ return invalfc(fc, "Setting release_agent not allowed");
+ ctx->release_agent = param->string;
+ param->string = NULL;
+ break;
--- /dev/null
+From c80d401c52a2d1baf2a5afeb06f0ffe678e56d23 Mon Sep 17 00:00:00 2001
+From: Tianchen Ding <dtcccc@linux.alibaba.com>
+Date: Tue, 18 Jan 2022 18:05:18 +0800
+Subject: cpuset: Fix the bug that subpart_cpus updated wrongly in update_cpumask()
+
+From: Tianchen Ding <dtcccc@linux.alibaba.com>
+
+commit c80d401c52a2d1baf2a5afeb06f0ffe678e56d23 upstream.
+
+subparts_cpus should be limited as a subset of cpus_allowed, but it is
+updated wrongly by using cpumask_andnot(). Use cpumask_and() instead to
+fix it.
+
+Fixes: ee8dde0cd2ce ("cpuset: Add new v2 cpuset.sched.partition flag")
+Signed-off-by: Tianchen Ding <dtcccc@linux.alibaba.com>
+Reviewed-by: Waiman Long <longman@redhat.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/cgroup/cpuset.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/kernel/cgroup/cpuset.c
++++ b/kernel/cgroup/cpuset.c
+@@ -1597,8 +1597,7 @@ static int update_cpumask(struct cpuset
+ * Make sure that subparts_cpus is a subset of cpus_allowed.
+ */
+ if (cs->nr_subparts_cpus) {
+- cpumask_andnot(cs->subparts_cpus, cs->subparts_cpus,
+- cs->cpus_allowed);
++ cpumask_and(cs->subparts_cpus, cs->subparts_cpus, cs->cpus_allowed);
+ cs->nr_subparts_cpus = cpumask_weight(cs->subparts_cpus);
+ }
+ spin_unlock_irq(&callback_lock);
--- /dev/null
+From cad014b7b5a6897d8c4fad13e2888978bfb7a53f Mon Sep 17 00:00:00 2001
+From: Sasha Neftin <sasha.neftin@intel.com>
+Date: Tue, 7 Dec 2021 13:23:42 +0200
+Subject: e1000e: Handshake with CSME starts from ADL platforms
+
+From: Sasha Neftin <sasha.neftin@intel.com>
+
+commit cad014b7b5a6897d8c4fad13e2888978bfb7a53f upstream.
+
+Handshake with CSME/AMT on none provisioned platforms during S0ix flow
+is not supported on TGL platform and can cause to HW unit hang. Update
+the handshake with CSME flow to start from the ADL platform.
+
+Fixes: 3e55d231716e ("e1000e: Add handshake with the CSME to support S0ix")
+Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
+Tested-by: Nechama Kraus <nechamax.kraus@linux.intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/intel/e1000e/netdev.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/intel/e1000e/netdev.c
++++ b/drivers/net/ethernet/intel/e1000e/netdev.c
+@@ -6346,7 +6346,8 @@ static void e1000e_s0ix_entry_flow(struc
+ u32 mac_data;
+ u16 phy_data;
+
+- if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID) {
++ if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID &&
++ hw->mac.type >= e1000_pch_adp) {
+ /* Request ME configure the device for S0ix */
+ mac_data = er32(H2ME);
+ mac_data |= E1000_H2ME_START_DPG;
+@@ -6495,7 +6496,8 @@ static void e1000e_s0ix_exit_flow(struct
+ u16 phy_data;
+ u32 i = 0;
+
+- if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID) {
++ if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID &&
++ hw->mac.type >= e1000_pch_adp) {
+ /* Request ME unconfigure the device from S0ix */
+ mac_data = er32(H2ME);
+ mac_data &= ~E1000_H2ME_START_DPG;
--- /dev/null
+From ee12595147ac1fbfb5bcb23837e26dd58d94b15d Mon Sep 17 00:00:00 2001
+From: Dan Carpenter <dan.carpenter@oracle.com>
+Date: Fri, 28 Jan 2022 22:57:01 +0300
+Subject: fanotify: Fix stale file descriptor in copy_event_to_user()
+
+From: Dan Carpenter <dan.carpenter@oracle.com>
+
+commit ee12595147ac1fbfb5bcb23837e26dd58d94b15d upstream.
+
+This code calls fd_install() which gives the userspace access to the fd.
+Then if copy_info_records_to_user() fails it calls put_unused_fd(fd) but
+that will not release it and leads to a stale entry in the file
+descriptor table.
+
+Generally you can't trust the fd after a call to fd_install(). The fix
+is to delay the fd_install() until everything else has succeeded.
+
+Fortunately it requires CAP_SYS_ADMIN to reach this code so the security
+impact is less.
+
+Fixes: f644bc449b37 ("fanotify: fix copy_event_to_user() fid error clean up")
+Link: https://lore.kernel.org/r/20220128195656.GA26981@kili
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Reviewed-by: Mathias Krause <minipli@grsecurity.net>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/notify/fanotify/fanotify_user.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/fs/notify/fanotify/fanotify_user.c
++++ b/fs/notify/fanotify/fanotify_user.c
+@@ -611,9 +611,6 @@ static ssize_t copy_event_to_user(struct
+ if (fanotify_is_perm_event(event->mask))
+ FANOTIFY_PERM(event)->fd = fd;
+
+- if (f)
+- fd_install(fd, f);
+-
+ if (info_mode) {
+ ret = copy_info_records_to_user(event, info, info_mode, pidfd,
+ buf, count);
+@@ -621,6 +618,9 @@ static ssize_t copy_event_to_user(struct
+ goto out_close_fd;
+ }
+
++ if (f)
++ fd_install(fd, f);
++
+ return metadata.event_len;
+
+ out_close_fd:
--- /dev/null
+From 3d2504663c41104b4359a15f35670cfa82de1bbf Mon Sep 17 00:00:00 2001
+From: Jedrzej Jagielski <jedrzej.jagielski@intel.com>
+Date: Tue, 14 Dec 2021 10:08:22 +0000
+Subject: i40e: Fix reset bw limit when DCB enabled with 1 TC
+
+From: Jedrzej Jagielski <jedrzej.jagielski@intel.com>
+
+commit 3d2504663c41104b4359a15f35670cfa82de1bbf upstream.
+
+There was an AQ error I40E_AQ_RC_EINVAL when trying
+to reset bw limit as part of bw allocation setup.
+This was caused by trying to reset bw limit with
+DCB enabled. Bw limit should not be reset when
+DCB is enabled. The code was relying on the pf->flags
+to check if DCB is enabled but if only 1 TC is available
+this flag will not be set even though DCB is enabled.
+Add a check for number of TC and if it is 1
+don't try to reset bw limit even if pf->flags shows
+DCB as disabled.
+
+Fixes: fa38e30ac73f ("i40e: Fix for Tx timeouts when interface is brought up if DCB is enabled")
+Suggested-by: Alexander Lobakin <alexandr.lobakin@intel.com> # Flatten the condition
+Signed-off-by: Sylwester Dziedziuch <sylwesterx.dziedziuch@intel.com>
+Signed-off-by: Jedrzej Jagielski <jedrzej.jagielski@intel.com>
+Reviewed-by: Alexander Lobakin <alexandr.lobakin@intel.com>
+Tested-by: Imam Hassan Reza Biswas <imam.hassan.reza.biswas@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/intel/i40e/i40e_main.c | 12 +++++++++++-
+ 1 file changed, 11 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
+@@ -5372,7 +5372,15 @@ static int i40e_vsi_configure_bw_alloc(s
+ /* There is no need to reset BW when mqprio mode is on. */
+ if (pf->flags & I40E_FLAG_TC_MQPRIO)
+ return 0;
+- if (!vsi->mqprio_qopt.qopt.hw && !(pf->flags & I40E_FLAG_DCB_ENABLED)) {
++
++ if (!vsi->mqprio_qopt.qopt.hw) {
++ if (pf->flags & I40E_FLAG_DCB_ENABLED)
++ goto skip_reset;
++
++ if (IS_ENABLED(CONFIG_I40E_DCB) &&
++ i40e_dcb_hw_get_num_tc(&pf->hw) == 1)
++ goto skip_reset;
++
+ ret = i40e_set_bw_limit(vsi, vsi->seid, 0);
+ if (ret)
+ dev_info(&pf->pdev->dev,
+@@ -5380,6 +5388,8 @@ static int i40e_vsi_configure_bw_alloc(s
+ vsi->seid);
+ return ret;
+ }
++
++skip_reset:
+ memset(&bw_data, 0, sizeof(bw_data));
+ bw_data.tc_valid_bits = enabled_tc;
+ for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
--- /dev/null
+From 6533e558c6505e94c3e0ed4281ed5e31ec985f4d Mon Sep 17 00:00:00 2001
+From: Karen Sornek <karen.sornek@intel.com>
+Date: Wed, 12 Jan 2022 10:19:47 +0100
+Subject: i40e: Fix reset path while removing the driver
+
+From: Karen Sornek <karen.sornek@intel.com>
+
+commit 6533e558c6505e94c3e0ed4281ed5e31ec985f4d upstream.
+
+Fix the crash in kernel while dereferencing the NULL pointer,
+when the driver is unloaded and simultaneously the VSI rings
+are being stopped.
+
+The hardware requires 50msec in order to finish RX queues
+disable. For this purpose the driver spins in mdelay function
+for the operation to be completed.
+
+For example changing number of queues which requires reset would
+fail in the following call stack:
+
+1) i40e_prep_for_reset
+2) i40e_pf_quiesce_all_vsi
+3) i40e_quiesce_vsi
+4) i40e_vsi_close
+5) i40e_down
+6) i40e_vsi_stop_rings
+7) i40e_vsi_control_rx -> disable requires the delay of 50msecs
+8) continue back in i40e_down function where
+ i40e_clean_tx_ring(vsi->tx_rings[i]) is going to crash
+
+When the driver was spinning vsi_release called
+i40e_vsi_free_arrays where the vsi->tx_rings resources
+were freed and the pointer was set to NULL.
+
+Fixes: 5b6d4a7f20b0 ("i40e: Fix crash during removing i40e driver")
+Signed-off-by: Slawomir Laba <slawomirx.laba@intel.com>
+Signed-off-by: Sylwester Dziedziuch <sylwesterx.dziedziuch@intel.com>
+Signed-off-by: Karen Sornek <karen.sornek@intel.com>
+Tested-by: Gurucharan G <gurucharanx.g@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/intel/i40e/i40e.h | 1 +
+ drivers/net/ethernet/intel/i40e/i40e_main.c | 19 ++++++++++++++++++-
+ 2 files changed, 19 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/intel/i40e/i40e.h
++++ b/drivers/net/ethernet/intel/i40e/i40e.h
+@@ -144,6 +144,7 @@ enum i40e_state_t {
+ __I40E_VIRTCHNL_OP_PENDING,
+ __I40E_RECOVERY_MODE,
+ __I40E_VF_RESETS_DISABLED, /* disable resets during i40e_remove */
++ __I40E_IN_REMOVE,
+ __I40E_VFS_RELEASING,
+ /* This must be last as it determines the size of the BITMAP */
+ __I40E_STATE_SIZE__,
+--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
+@@ -10863,6 +10863,9 @@ static void i40e_reset_and_rebuild(struc
+ bool lock_acquired)
+ {
+ int ret;
++
++ if (test_bit(__I40E_IN_REMOVE, pf->state))
++ return;
+ /* Now we wait for GRST to settle out.
+ * We don't have to delete the VEBs or VSIs from the hw switch
+ * because the reset will make them disappear.
+@@ -12222,6 +12225,8 @@ int i40e_reconfig_rss_queues(struct i40e
+
+ vsi->req_queue_pairs = queue_count;
+ i40e_prep_for_reset(pf);
++ if (test_bit(__I40E_IN_REMOVE, pf->state))
++ return pf->alloc_rss_size;
+
+ pf->alloc_rss_size = new_rss_size;
+
+@@ -13048,6 +13053,10 @@ static int i40e_xdp_setup(struct i40e_vs
+ if (need_reset)
+ i40e_prep_for_reset(pf);
+
++ /* VSI shall be deleted in a moment, just return EINVAL */
++ if (test_bit(__I40E_IN_REMOVE, pf->state))
++ return -EINVAL;
++
+ old_prog = xchg(&vsi->xdp_prog, prog);
+
+ if (need_reset) {
+@@ -15938,8 +15947,13 @@ static void i40e_remove(struct pci_dev *
+ i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), 0);
+ i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), 0);
+
+- while (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state))
++ /* Grab __I40E_RESET_RECOVERY_PENDING and set __I40E_IN_REMOVE
++ * flags, once they are set, i40e_rebuild should not be called as
++ * i40e_prep_for_reset always returns early.
++ */
++ while (test_and_set_bit(__I40E_RESET_RECOVERY_PENDING, pf->state))
+ usleep_range(1000, 2000);
++ set_bit(__I40E_IN_REMOVE, pf->state);
+
+ if (pf->flags & I40E_FLAG_SRIOV_ENABLED) {
+ set_bit(__I40E_VF_RESETS_DISABLED, pf->state);
+@@ -16138,6 +16152,9 @@ static void i40e_pci_error_reset_done(st
+ {
+ struct i40e_pf *pf = pci_get_drvdata(pdev);
+
++ if (test_bit(__I40E_IN_REMOVE, pf->state))
++ return;
++
+ i40e_reset_and_rebuild(pf, false, false);
+ }
+
--- /dev/null
+From 63e4b45c82ed1bde979da7052229a4229ce9cabf Mon Sep 17 00:00:00 2001
+From: Georgi Valkov <gvalkov@abv.bg>
+Date: Tue, 1 Feb 2022 08:16:18 +0100
+Subject: ipheth: fix EOVERFLOW in ipheth_rcvbulk_callback
+
+From: Georgi Valkov <gvalkov@abv.bg>
+
+commit 63e4b45c82ed1bde979da7052229a4229ce9cabf upstream.
+
+When rx_buf is allocated we need to account for IPHETH_IP_ALIGN,
+which reduces the usable size by 2 bytes. Otherwise we have 1512
+bytes usable instead of 1514, and if we receive more than 1512
+bytes, ipheth_rcvbulk_callback is called with status -EOVERFLOW,
+after which the driver malfunctiones and all communication stops.
+
+Resolves ipheth 2-1:4.2: ipheth_rcvbulk_callback: urb status: -75
+
+Fixes: f33d9e2b48a3 ("usbnet: ipheth: fix connectivity with iOS 14")
+Signed-off-by: Georgi Valkov <gvalkov@abv.bg>
+Tested-by: Jan Kiszka <jan.kiszka@siemens.com>
+Link: https://lore.kernel.org/all/B60B8A4B-92A0-49B3-805D-809A2433B46C@abv.bg/
+Link: https://lore.kernel.org/all/24851bd2769434a5fc24730dce8e8a984c5a4505.1643699778.git.jan.kiszka@siemens.com/
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/usb/ipheth.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/usb/ipheth.c
++++ b/drivers/net/usb/ipheth.c
+@@ -121,7 +121,7 @@ static int ipheth_alloc_urbs(struct iphe
+ if (tx_buf == NULL)
+ goto free_rx_urb;
+
+- rx_buf = usb_alloc_coherent(iphone->udev, IPHETH_BUF_SIZE,
++ rx_buf = usb_alloc_coherent(iphone->udev, IPHETH_BUF_SIZE + IPHETH_IP_ALIGN,
+ GFP_KERNEL, &rx_urb->transfer_dma);
+ if (rx_buf == NULL)
+ goto free_tx_buf;
+@@ -146,7 +146,7 @@ error_nomem:
+
+ static void ipheth_free_urbs(struct ipheth_device *iphone)
+ {
+- usb_free_coherent(iphone->udev, IPHETH_BUF_SIZE, iphone->rx_buf,
++ usb_free_coherent(iphone->udev, IPHETH_BUF_SIZE + IPHETH_IP_ALIGN, iphone->rx_buf,
+ iphone->rx_urb->transfer_dma);
+ usb_free_coherent(iphone->udev, IPHETH_BUF_SIZE, iphone->tx_buf,
+ iphone->tx_urb->transfer_dma);
+@@ -317,7 +317,7 @@ static int ipheth_rx_submit(struct iphet
+
+ usb_fill_bulk_urb(dev->rx_urb, udev,
+ usb_rcvbulkpipe(udev, dev->bulk_in),
+- dev->rx_buf, IPHETH_BUF_SIZE,
++ dev->rx_buf, IPHETH_BUF_SIZE + IPHETH_IP_ALIGN,
+ ipheth_rcvbulk_callback,
+ dev);
+ dev->rx_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
--- /dev/null
+From d19a7af73b5ecaac8168712d18be72b9db166768 Mon Sep 17 00:00:00 2001
+From: "J. Bruce Fields" <bfields@redhat.com>
+Date: Tue, 18 Jan 2022 17:00:51 -0500
+Subject: lockd: fix failure to cleanup client locks
+
+From: J. Bruce Fields <bfields@redhat.com>
+
+commit d19a7af73b5ecaac8168712d18be72b9db166768 upstream.
+
+In my testing, we're sometimes hitting the request->fl_flags & FL_EXISTS
+case in posix_lock_inode, presumably just by random luck since we're not
+actually initializing fl_flags here.
+
+This probably didn't matter before commit 7f024fcd5c97 ("Keep read and
+write fds with each nlm_file") since we wouldn't previously unlock
+unless we knew there were locks.
+
+But now it causes lockd to give up on removing more locks.
+
+We could just initialize fl_flags, but really it seems dubious to be
+calling vfs_lock_file with random values in some of the fields.
+
+Fixes: 7f024fcd5c97 ("Keep read and write fds with each nlm_file")
+Signed-off-by: J. Bruce Fields <bfields@redhat.com>
+[ cel: fixed checkpatch.pl nit ]
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/lockd/svcsubs.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/lockd/svcsubs.c
++++ b/fs/lockd/svcsubs.c
+@@ -180,6 +180,7 @@ static int nlm_unlock_files(struct nlm_f
+ {
+ struct file_lock lock;
+
++ locks_init_lock(&lock);
+ lock.fl_type = F_UNLCK;
+ lock.fl_start = 0;
+ lock.fl_end = OFFSET_MAX;
--- /dev/null
+From 6e7f90d163afa8fc2efd6ae318e7c20156a5621f Mon Sep 17 00:00:00 2001
+From: "J. Bruce Fields" <bfields@redhat.com>
+Date: Tue, 18 Jan 2022 17:00:16 -0500
+Subject: lockd: fix server crash on reboot of client holding lock
+
+From: J. Bruce Fields <bfields@redhat.com>
+
+commit 6e7f90d163afa8fc2efd6ae318e7c20156a5621f upstream.
+
+I thought I was iterating over the array when actually the iteration is
+over the values contained in the array?
+
+Ugh, keep it simple.
+
+Symptoms were a null deference in vfs_lock_file() when an NFSv3 client
+that previously held a lock came back up and sent a notify.
+
+Reported-by: Jonathan Woithe <jwoithe@just42.net>
+Fixes: 7f024fcd5c97 ("Keep read and write fds with each nlm_file")
+Signed-off-by: J. Bruce Fields <bfields@redhat.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/lockd/svcsubs.c | 17 +++++++++--------
+ 1 file changed, 9 insertions(+), 8 deletions(-)
+
+--- a/fs/lockd/svcsubs.c
++++ b/fs/lockd/svcsubs.c
+@@ -179,19 +179,20 @@ nlm_delete_file(struct nlm_file *file)
+ static int nlm_unlock_files(struct nlm_file *file)
+ {
+ struct file_lock lock;
+- struct file *f;
+
+ lock.fl_type = F_UNLCK;
+ lock.fl_start = 0;
+ lock.fl_end = OFFSET_MAX;
+- for (f = file->f_file[0]; f <= file->f_file[1]; f++) {
+- if (f && vfs_lock_file(f, F_SETLK, &lock, NULL) < 0) {
+- pr_warn("lockd: unlock failure in %s:%d\n",
+- __FILE__, __LINE__);
+- return 1;
+- }
+- }
++ if (file->f_file[O_RDONLY] &&
++ vfs_lock_file(file->f_file[O_RDONLY], F_SETLK, &lock, NULL))
++ goto out_err;
++ if (file->f_file[O_WRONLY] &&
++ vfs_lock_file(file->f_file[O_WRONLY], F_SETLK, &lock, NULL))
++ goto out_err;
+ return 0;
++out_err:
++ pr_warn("lockd: unlock failure in %s:%d\n", __FILE__, __LINE__);
++ return 1;
+ }
+
+ /*
--- /dev/null
+From 7674b7b559b683478c3832527c59bceb169e701d Mon Sep 17 00:00:00 2001
+From: Raju Rangoju <Raju.Rangoju@amd.com>
+Date: Thu, 27 Jan 2022 11:32:22 +0530
+Subject: net: amd-xgbe: ensure to reset the tx_timer_active flag
+
+From: Raju Rangoju <Raju.Rangoju@amd.com>
+
+commit 7674b7b559b683478c3832527c59bceb169e701d upstream.
+
+Ensure to reset the tx_timer_active flag in xgbe_stop(),
+otherwise a port restart may result in tx timeout due to
+uncleared flag.
+
+Fixes: c635eaacbf77 ("amd-xgbe: Remove Tx coalescing")
+Co-developed-by: Sudheesh Mavila <sudheesh.mavila@amd.com>
+Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com>
+Signed-off-by: Raju Rangoju <Raju.Rangoju@amd.com>
+Acked-by: Tom Lendacky <thomas.lendacky@amd.com>
+Link: https://lore.kernel.org/r/20220127060222.453371-1-Raju.Rangoju@amd.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
++++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+@@ -721,7 +721,9 @@ static void xgbe_stop_timers(struct xgbe
+ if (!channel->tx_ring)
+ break;
+
++ /* Deactivate the Tx timer */
+ del_timer_sync(&channel->tx_timer);
++ channel->tx_timer_active = 0;
+ }
+ }
+
--- /dev/null
+From 5aac9108a180fc06e28d4e7fb00247ce603b72ee Mon Sep 17 00:00:00 2001
+From: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
+Date: Thu, 27 Jan 2022 14:50:03 +0530
+Subject: net: amd-xgbe: Fix skb data length underflow
+
+From: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
+
+commit 5aac9108a180fc06e28d4e7fb00247ce603b72ee upstream.
+
+There will be BUG_ON() triggered in include/linux/skbuff.h leading to
+intermittent kernel panic, when the skb length underflow is detected.
+
+Fix this by dropping the packet if such length underflows are seen
+because of inconsistencies in the hardware descriptors.
+
+Fixes: 622c36f143fc ("amd-xgbe: Fix jumbo MTU processing on newer hardware")
+Suggested-by: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
+Acked-by: Tom Lendacky <thomas.lendacky@amd.com>
+Link: https://lore.kernel.org/r/20220127092003.2812745-1-Shyam-sundar.S-k@amd.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 12 +++++++++++-
+ 1 file changed, 11 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
++++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+@@ -2557,6 +2557,14 @@ read_again:
+ buf2_len = xgbe_rx_buf2_len(rdata, packet, len);
+ len += buf2_len;
+
++ if (buf2_len > rdata->rx.buf.dma_len) {
++ /* Hardware inconsistency within the descriptors
++ * that has resulted in a length underflow.
++ */
++ error = 1;
++ goto skip_data;
++ }
++
+ if (!skb) {
+ skb = xgbe_create_skb(pdata, napi, rdata,
+ buf1_len);
+@@ -2586,8 +2594,10 @@ skip_data:
+ if (!last || context_next)
+ goto read_again;
+
+- if (!skb)
++ if (!skb || error) {
++ dev_kfree_skb(skb);
+ goto next_packet;
++ }
+
+ /* Be sure we don't exceed the configured MTU */
+ max_len = netdev->mtu + ETH_HLEN;
--- /dev/null
+From 350d9a823734b5a7e767cddc3bdde5f0bcbb7ff4 Mon Sep 17 00:00:00 2001
+From: Vlad Buslov <vladbu@nvidia.com>
+Date: Thu, 6 Jan 2022 18:45:26 +0200
+Subject: net/mlx5: Bridge, ensure dev_name is null-terminated
+
+From: Vlad Buslov <vladbu@nvidia.com>
+
+commit 350d9a823734b5a7e767cddc3bdde5f0bcbb7ff4 upstream.
+
+Even though net_device->name is guaranteed to be null-terminated string of
+size<=IFNAMSIZ, the test robot complains that return value of netdev_name()
+can be larger:
+
+In file included from include/trace/define_trace.h:102,
+ from drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h:113,
+ from drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c:12:
+ drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h: In function 'trace_event_raw_event_mlx5_esw_bridge_fdb_template':
+>> drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h:24:29: warning: 'strncpy' output may be truncated copying 16 bytes from a string of length 20 [-Wstringop-truncation]
+ 24 | strncpy(__entry->dev_name,
+ | ^~~~~~~~~~~~~~~~~~~~~~~~~~
+ 25 | netdev_name(fdb->dev),
+ | ~~~~~~~~~~~~~~~~~~~~~~
+ 26 | IFNAMSIZ);
+ | ~~~~~~~~~
+
+This is caused by the fact that default value of IFNAMSIZ is 16, while
+placeholder value that is returned by netdev_name() for unnamed net devices
+is larger than that.
+
+The offending code is in a tracing function that is only called for mlx5
+representors, so there is no straightforward way to reproduce the issue but
+let's fix it for correctness sake by replacing strncpy() with strscpy() to
+ensure that resulting string is always null-terminated.
+
+Fixes: 9724fd5d9c2a ("net/mlx5: Bridge, add tracepoints")
+Reported-by: kernel test robot <lkp@intel.com>
+Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
+Reviewed-by: Roi Dayan <roid@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h
+@@ -21,7 +21,7 @@ DECLARE_EVENT_CLASS(mlx5_esw_bridge_fdb_
+ __field(unsigned int, used)
+ ),
+ TP_fast_assign(
+- strncpy(__entry->dev_name,
++ strscpy(__entry->dev_name,
+ netdev_name(fdb->dev),
+ IFNAMSIZ);
+ memcpy(__entry->addr, fdb->key.addr, ETH_ALEN);
--- /dev/null
+From 880b517691908fb753019b9b27cd082e7617debd Mon Sep 17 00:00:00 2001
+From: Roi Dayan <roid@nvidia.com>
+Date: Mon, 24 Jan 2022 13:56:26 +0200
+Subject: net/mlx5: Bridge, Fix devlink deadlock on net namespace deletion
+
+From: Roi Dayan <roid@nvidia.com>
+
+commit 880b517691908fb753019b9b27cd082e7617debd upstream.
+
+When changing mode to switchdev, rep bridge init registered to netdevice
+notifier holds the devlink lock and then takes pernet_ops_rwsem.
+At that time deleting a netns holds pernet_ops_rwsem and then takes
+the devlink lock.
+
+Example sequence is:
+$ ip netns add foo
+$ devlink dev eswitch set pci/0000:00:08.0 mode switchdev &
+$ ip netns del foo
+
+deleting netns trace:
+
+[ 1185.365555] ? devlink_pernet_pre_exit+0x74/0x1c0
+[ 1185.368331] ? mutex_lock_io_nested+0x13f0/0x13f0
+[ 1185.370984] ? xt_find_table+0x40/0x100
+[ 1185.373244] ? __mutex_lock+0x24a/0x15a0
+[ 1185.375494] ? net_generic+0xa0/0x1c0
+[ 1185.376844] ? wait_for_completion_io+0x280/0x280
+[ 1185.377767] ? devlink_pernet_pre_exit+0x74/0x1c0
+[ 1185.378686] devlink_pernet_pre_exit+0x74/0x1c0
+[ 1185.379579] ? devlink_nl_cmd_get_dumpit+0x3a0/0x3a0
+[ 1185.380557] ? xt_find_table+0xda/0x100
+[ 1185.381367] cleanup_net+0x372/0x8e0
+
+changing mode to switchdev trace:
+
+[ 1185.411267] down_write+0x13a/0x150
+[ 1185.412029] ? down_write_killable+0x180/0x180
+[ 1185.413005] register_netdevice_notifier+0x1e/0x210
+[ 1185.414000] mlx5e_rep_bridge_init+0x181/0x360 [mlx5_core]
+[ 1185.415243] mlx5e_uplink_rep_enable+0x269/0x480 [mlx5_core]
+[ 1185.416464] ? mlx5e_uplink_rep_disable+0x210/0x210 [mlx5_core]
+[ 1185.417749] mlx5e_attach_netdev+0x232/0x400 [mlx5_core]
+[ 1185.418906] mlx5e_netdev_attach_profile+0x15b/0x1e0 [mlx5_core]
+[ 1185.420172] mlx5e_netdev_change_profile+0x15a/0x1d0 [mlx5_core]
+[ 1185.421459] mlx5e_vport_rep_load+0x557/0x780 [mlx5_core]
+[ 1185.422624] ? mlx5e_stats_grp_vport_rep_num_stats+0x10/0x10 [mlx5_core]
+[ 1185.424006] mlx5_esw_offloads_rep_load+0xdb/0x190 [mlx5_core]
+[ 1185.425277] esw_offloads_enable+0xd74/0x14a0 [mlx5_core]
+
+Fix this by registering rep bridges for per net netdev notifier
+instead of global one, which operats on the net namespace without holding
+the pernet_ops_rwsem.
+
+Fixes: 19e9bfa044f3 ("net/mlx5: Bridge, add offload infrastructure")
+Signed-off-by: Roi Dayan <roid@nvidia.com>
+Reviewed-by: Vlad Buslov <vladbu@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
+@@ -491,7 +491,7 @@ void mlx5e_rep_bridge_init(struct mlx5e_
+ }
+
+ br_offloads->netdev_nb.notifier_call = mlx5_esw_bridge_switchdev_port_event;
+- err = register_netdevice_notifier(&br_offloads->netdev_nb);
++ err = register_netdevice_notifier_net(&init_net, &br_offloads->netdev_nb);
+ if (err) {
+ esw_warn(mdev, "Failed to register bridge offloads netdevice notifier (err=%d)\n",
+ err);
+@@ -526,7 +526,7 @@ void mlx5e_rep_bridge_cleanup(struct mlx
+ return;
+
+ cancel_delayed_work_sync(&br_offloads->update_work);
+- unregister_netdevice_notifier(&br_offloads->netdev_nb);
++ unregister_netdevice_notifier_net(&init_net, &br_offloads->netdev_nb);
+ unregister_switchdev_blocking_notifier(&br_offloads->nb_blk);
+ unregister_switchdev_notifier(&br_offloads->nb);
+ destroy_workqueue(br_offloads->wq);
--- /dev/null
+From 04f8c12f031fcd0ffa0c72822eb665ceb2c872e7 Mon Sep 17 00:00:00 2001
+From: Vlad Buslov <vladbu@nvidia.com>
+Date: Thu, 6 Jan 2022 16:40:18 +0200
+Subject: net/mlx5: Bridge, take rtnl lock in init error handler
+
+From: Vlad Buslov <vladbu@nvidia.com>
+
+commit 04f8c12f031fcd0ffa0c72822eb665ceb2c872e7 upstream.
+
+The mlx5_esw_bridge_cleanup() is expected to be called with rtnl lock
+taken, which is true for mlx5e_rep_bridge_cleanup() function but not for
+error handling code in mlx5e_rep_bridge_init(). Add missing rtnl
+lock/unlock calls and extend both mlx5_esw_bridge_cleanup() and its dual
+function mlx5_esw_bridge_init() with ASSERT_RTNL() to verify the invariant
+from now on.
+
+Fixes: 7cd6a54a8285 ("net/mlx5: Bridge, handle FDB events")
+Fixes: 19e9bfa044f3 ("net/mlx5: Bridge, add offload infrastructure")
+Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
+Reviewed-by: Roi Dayan <roid@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c | 2 ++
+ drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c | 4 ++++
+ 2 files changed, 6 insertions(+)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
+@@ -509,7 +509,9 @@ err_register_swdev_blk:
+ err_register_swdev:
+ destroy_workqueue(br_offloads->wq);
+ err_alloc_wq:
++ rtnl_lock();
+ mlx5_esw_bridge_cleanup(esw);
++ rtnl_unlock();
+ }
+
+ void mlx5e_rep_bridge_cleanup(struct mlx5e_priv *priv)
+--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
+@@ -1385,6 +1385,8 @@ struct mlx5_esw_bridge_offloads *mlx5_es
+ {
+ struct mlx5_esw_bridge_offloads *br_offloads;
+
++ ASSERT_RTNL();
++
+ br_offloads = kvzalloc(sizeof(*br_offloads), GFP_KERNEL);
+ if (!br_offloads)
+ return ERR_PTR(-ENOMEM);
+@@ -1401,6 +1403,8 @@ void mlx5_esw_bridge_cleanup(struct mlx5
+ {
+ struct mlx5_esw_bridge_offloads *br_offloads = esw->br_offloads;
+
++ ASSERT_RTNL();
++
+ if (!br_offloads)
+ return;
+
--- /dev/null
+From d8e5883d694bb053b19c4142a2d1f43a34f6fe2c Mon Sep 17 00:00:00 2001
+From: Maor Dickman <maord@nvidia.com>
+Date: Sun, 30 Jan 2022 16:00:41 +0200
+Subject: net/mlx5: E-Switch, Fix uninitialized variable modact
+
+From: Maor Dickman <maord@nvidia.com>
+
+commit d8e5883d694bb053b19c4142a2d1f43a34f6fe2c upstream.
+
+The variable modact is not initialized before used in command
+modify header allocation which can cause command to fail.
+
+Fix by initializing modact with zeros.
+
+Addresses-Coverity: ("Uninitialized scalar variable")
+Fixes: 8f1e0b97cc70 ("net/mlx5: E-Switch, Mark miss packets with new chain id mapping")
+Signed-off-by: Maor Dickman <maord@nvidia.com>
+Reviewed-by: Roi Dayan <roid@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
+@@ -212,7 +212,7 @@ static int
+ create_chain_restore(struct fs_chain *chain)
+ {
+ struct mlx5_eswitch *esw = chain->chains->dev->priv.eswitch;
+- char modact[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)];
++ u8 modact[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
+ struct mlx5_fs_chains *chains = chain->chains;
+ enum mlx5e_tc_attr_to_reg chain_to_reg;
+ struct mlx5_modify_hdr *mod_hdr;
--- /dev/null
+From 55b2ca702cfa744a9eb108915996a2294da47e71 Mon Sep 17 00:00:00 2001
+From: Dima Chumak <dchumak@nvidia.com>
+Date: Mon, 17 Jan 2022 15:32:16 +0200
+Subject: net/mlx5: Fix offloading with ESWITCH_IPV4_TTL_MODIFY_ENABLE
+
+From: Dima Chumak <dchumak@nvidia.com>
+
+commit 55b2ca702cfa744a9eb108915996a2294da47e71 upstream.
+
+Only prio 1 is supported for nic mode when there is no ignore flow level
+support in firmware. But for switchdev mode, which supports fixed number
+of statically pre-allocated prios, this restriction is not relevant so
+it can be relaxed.
+
+Fixes: d671e109bd85 ("net/mlx5: Fix tc max supported prio for nic mode")
+Signed-off-by: Dima Chumak <dchumak@nvidia.com>
+Reviewed-by: Roi Dayan <roid@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
+@@ -121,12 +121,13 @@ u32 mlx5_chains_get_nf_ft_chain(struct m
+
+ u32 mlx5_chains_get_prio_range(struct mlx5_fs_chains *chains)
+ {
+- if (!mlx5_chains_prios_supported(chains))
+- return 1;
+-
+ if (mlx5_chains_ignore_flow_level_supported(chains))
+ return UINT_MAX;
+
++ if (!chains->dev->priv.eswitch ||
++ chains->dev->priv.eswitch->mode != MLX5_ESWITCH_OFFLOADS)
++ return 1;
++
+ /* We should get here only for eswitch case */
+ return FDB_TC_MAX_PRIO;
+ }
--- /dev/null
+From 3c5193a87b0fea090aa3f769d020337662d87b5e Mon Sep 17 00:00:00 2001
+From: Maher Sanalla <msanalla@nvidia.com>
+Date: Thu, 13 Jan 2022 15:48:48 +0200
+Subject: net/mlx5: Use del_timer_sync in fw reset flow of halting poll
+
+From: Maher Sanalla <msanalla@nvidia.com>
+
+commit 3c5193a87b0fea090aa3f769d020337662d87b5e upstream.
+
+Substitute del_timer() with del_timer_sync() in fw reset polling
+deactivation flow, in order to prevent a race condition which occurs
+when del_timer() is called and timer is deactivated while another
+process is handling the timer interrupt. A situation that led to
+the following call trace:
+ RIP: 0010:run_timer_softirq+0x137/0x420
+ <IRQ>
+ recalibrate_cpu_khz+0x10/0x10
+ ktime_get+0x3e/0xa0
+ ? sched_clock_cpu+0xb/0xc0
+ __do_softirq+0xf5/0x2ea
+ irq_exit_rcu+0xc1/0xf0
+ sysvec_apic_timer_interrupt+0x9e/0xc0
+ asm_sysvec_apic_timer_interrupt+0x12/0x20
+ </IRQ>
+
+Fixes: 38b9f903f22b ("net/mlx5: Handle sync reset request event")
+Signed-off-by: Maher Sanalla <msanalla@nvidia.com>
+Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
+@@ -131,7 +131,7 @@ static void mlx5_stop_sync_reset_poll(st
+ {
+ struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+- del_timer(&fw_reset->timer);
++ del_timer_sync(&fw_reset->timer);
+ }
+
+ static void mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health)
--- /dev/null
+From 736dfe4e68b868829a1e89dfef4a44c1580d4478 Mon Sep 17 00:00:00 2001
+From: Maxim Mikityanskiy <maximmi@nvidia.com>
+Date: Tue, 18 Jan 2022 13:31:54 +0200
+Subject: net/mlx5e: Don't treat small ceil values as unlimited in HTB offload
+
+From: Maxim Mikityanskiy <maximmi@nvidia.com>
+
+commit 736dfe4e68b868829a1e89dfef4a44c1580d4478 upstream.
+
+The hardware spec defines max_average_bw == 0 as "unlimited bandwidth".
+max_average_bw is calculated as `ceil / BYTES_IN_MBIT`, which can become
+0 when ceil is small, leading to an undesired effect of having no
+bandwidth limit.
+
+This commit fixes it by rounding up small values of ceil to 1 Mbit/s.
+
+Fixes: 214baf22870c ("net/mlx5e: Support HTB offload")
+Signed-off-by: Maxim Mikityanskiy <maximmi@nvidia.com>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en/qos.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
+@@ -553,7 +553,8 @@ static int mlx5e_htb_convert_rate(struct
+
+ static void mlx5e_htb_convert_ceil(struct mlx5e_priv *priv, u64 ceil, u32 *max_average_bw)
+ {
+- *max_average_bw = div_u64(ceil, BYTES_IN_MBIT);
++ /* Hardware treats 0 as "unlimited", set at least 1. */
++ *max_average_bw = max_t(u32, div_u64(ceil, BYTES_IN_MBIT), 1);
+
+ qos_dbg(priv->mdev, "Convert: ceil %llu -> max_average_bw %u\n",
+ ceil, *max_average_bw);
--- /dev/null
+From ec41332e02bd0acf1f24206867bb6a02f5877a62 Mon Sep 17 00:00:00 2001
+From: Maor Dickman <maord@nvidia.com>
+Date: Thu, 13 Jan 2022 15:11:42 +0200
+Subject: net/mlx5e: Fix handling of wrong devices during bond netevent
+
+From: Maor Dickman <maord@nvidia.com>
+
+commit ec41332e02bd0acf1f24206867bb6a02f5877a62 upstream.
+
+Current implementation of bond netevent handler only check if
+the handled netdev is VF representor and it missing a check if
+the VF representor is on the same phys device of the bond handling
+the netevent.
+
+Fix by adding the missing check and optimizing the check if
+the netdev is VF representor so it will not access uninitialized
+private data and crashes.
+
+BUG: kernel NULL pointer dereference, address: 000000000000036c
+PGD 0 P4D 0
+Oops: 0000 [#1] SMP NOPTI
+Workqueue: eth3bond0 bond_mii_monitor [bonding]
+RIP: 0010:mlx5e_is_uplink_rep+0xc/0x50 [mlx5_core]
+RSP: 0018:ffff88812d69fd60 EFLAGS: 00010282
+RAX: 0000000000000000 RBX: ffff8881cf800000 RCX: 0000000000000000
+RDX: ffff88812d69fe10 RSI: 000000000000001b RDI: ffff8881cf800880
+RBP: ffff8881cf800000 R08: 00000445cabccf2b R09: 0000000000000008
+R10: 0000000000000004 R11: 0000000000000008 R12: ffff88812d69fe10
+R13: 00000000fffffffe R14: ffff88820c0f9000 R15: 0000000000000000
+FS: 0000000000000000(0000) GS:ffff88846fb00000(0000) knlGS:0000000000000000
+CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 000000000000036c CR3: 0000000103d80006 CR4: 0000000000370ea0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+ mlx5e_eswitch_uplink_rep+0x31/0x40 [mlx5_core]
+ mlx5e_rep_is_lag_netdev+0x94/0xc0 [mlx5_core]
+ mlx5e_rep_esw_bond_netevent+0xeb/0x3d0 [mlx5_core]
+ raw_notifier_call_chain+0x41/0x60
+ call_netdevice_notifiers_info+0x34/0x80
+ netdev_lower_state_changed+0x4e/0xa0
+ bond_mii_monitor+0x56b/0x640 [bonding]
+ process_one_work+0x1b9/0x390
+ worker_thread+0x4d/0x3d0
+ ? rescuer_thread+0x350/0x350
+ kthread+0x124/0x150
+ ? set_kthread_struct+0x40/0x40
+ ret_from_fork+0x1f/0x30
+
+Fixes: 7e51891a237f ("net/mlx5e: Use netdev events to set/del egress acl forward-to-vport rule")
+Signed-off-by: Maor Dickman <maord@nvidia.com>
+Reviewed-by: Roi Dayan <roid@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c | 32 +++++++-----------
+ 1 file changed, 14 insertions(+), 18 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
+@@ -183,18 +183,7 @@ void mlx5e_rep_bond_unslave(struct mlx5_
+
+ static bool mlx5e_rep_is_lag_netdev(struct net_device *netdev)
+ {
+- struct mlx5e_rep_priv *rpriv;
+- struct mlx5e_priv *priv;
+-
+- /* A given netdev is not a representor or not a slave of LAG configuration */
+- if (!mlx5e_eswitch_rep(netdev) || !netif_is_lag_port(netdev))
+- return false;
+-
+- priv = netdev_priv(netdev);
+- rpriv = priv->ppriv;
+-
+- /* Egress acl forward to vport is supported only non-uplink representor */
+- return rpriv->rep->vport != MLX5_VPORT_UPLINK;
++ return netif_is_lag_port(netdev) && mlx5e_eswitch_vf_rep(netdev);
+ }
+
+ static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *ptr)
+@@ -210,9 +199,6 @@ static void mlx5e_rep_changelowerstate_e
+ u16 fwd_vport_num;
+ int err;
+
+- if (!mlx5e_rep_is_lag_netdev(netdev))
+- return;
+-
+ info = ptr;
+ lag_info = info->lower_state_info;
+ /* This is not an event of a representor becoming active slave */
+@@ -266,9 +252,6 @@ static void mlx5e_rep_changeupper_event(
+ struct net_device *lag_dev;
+ struct mlx5e_priv *priv;
+
+- if (!mlx5e_rep_is_lag_netdev(netdev))
+- return;
+-
+ priv = netdev_priv(netdev);
+ rpriv = priv->ppriv;
+ lag_dev = info->upper_dev;
+@@ -293,6 +276,19 @@ static int mlx5e_rep_esw_bond_netevent(s
+ unsigned long event, void *ptr)
+ {
+ struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
++ struct mlx5e_rep_priv *rpriv;
++ struct mlx5e_rep_bond *bond;
++ struct mlx5e_priv *priv;
++
++ if (!mlx5e_rep_is_lag_netdev(netdev))
++ return NOTIFY_DONE;
++
++ bond = container_of(nb, struct mlx5e_rep_bond, nb);
++ priv = netdev_priv(netdev);
++ rpriv = mlx5_eswitch_get_uplink_priv(priv->mdev->priv.eswitch, REP_ETH);
++ /* Verify VF representor is on the same device of the bond handling the netevent. */
++ if (rpriv->uplink_priv.bond != bond)
++ return NOTIFY_DONE;
+
+ switch (event) {
+ case NETDEV_CHANGELOWERSTATE:
--- /dev/null
+From 4a08a131351e375a2969b98e46df260ed04dcba7 Mon Sep 17 00:00:00 2001
+From: Gal Pressman <gal@nvidia.com>
+Date: Sun, 16 Jan 2022 09:07:22 +0200
+Subject: net/mlx5e: Fix module EEPROM query
+
+From: Gal Pressman <gal@nvidia.com>
+
+commit 4a08a131351e375a2969b98e46df260ed04dcba7 upstream.
+
+When querying the module EEPROM, there was a misusage of the 'offset'
+variable vs the 'query.offset' field.
+Fix that by always using 'offset' and assigning its value to
+'query.offset' right before the mcia register read call.
+
+While at it, the cross-pages read size adjustment was changed to be more
+intuitive.
+
+Fixes: e19b0a3474ab ("net/mlx5: Refactor module EEPROM query")
+Reported-by: Wang Yugui <wangyugui@e16-tech.com>
+Signed-off-by: Gal Pressman <gal@nvidia.com>
+Reviewed-by: Maxim Mikityanskiy <maximmi@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/port.c | 9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
+@@ -406,23 +406,24 @@ int mlx5_query_module_eeprom(struct mlx5
+
+ switch (module_id) {
+ case MLX5_MODULE_ID_SFP:
+- mlx5_sfp_eeprom_params_set(&query.i2c_address, &query.page, &query.offset);
++ mlx5_sfp_eeprom_params_set(&query.i2c_address, &query.page, &offset);
+ break;
+ case MLX5_MODULE_ID_QSFP:
+ case MLX5_MODULE_ID_QSFP_PLUS:
+ case MLX5_MODULE_ID_QSFP28:
+- mlx5_qsfp_eeprom_params_set(&query.i2c_address, &query.page, &query.offset);
++ mlx5_qsfp_eeprom_params_set(&query.i2c_address, &query.page, &offset);
+ break;
+ default:
+ mlx5_core_err(dev, "Module ID not recognized: 0x%x\n", module_id);
+ return -EINVAL;
+ }
+
+- if (query.offset + size > MLX5_EEPROM_PAGE_LENGTH)
++ if (offset + size > MLX5_EEPROM_PAGE_LENGTH)
+ /* Cross pages read, read until offset 256 in low page */
+- size -= offset + size - MLX5_EEPROM_PAGE_LENGTH;
++ size = MLX5_EEPROM_PAGE_LENGTH - offset;
+
+ query.size = size;
++ query.offset = offset;
+
+ return mlx5_query_mcia(dev, &query, data);
+ }
--- /dev/null
+From de47db0cf7f4a9c555ad204e06baa70b50a70d08 Mon Sep 17 00:00:00 2001
+From: Raed Salem <raeds@nvidia.com>
+Date: Thu, 2 Dec 2021 17:49:01 +0200
+Subject: net/mlx5e: IPsec: Fix tunnel mode crypto offload for non TCP/UDP traffic
+
+From: Raed Salem <raeds@nvidia.com>
+
+commit de47db0cf7f4a9c555ad204e06baa70b50a70d08 upstream.
+
+IPsec Tunnel mode crypto offload software parser (SWP) setting in data
+path currently always set the inner L4 offset regardless of the
+encapsulated L4 header type and whether it exists in the first place,
+this breaks non TCP/UDP traffic as such.
+
+Set the SWP inner L4 offset only when the IPsec tunnel encapsulated L4
+header protocol is TCP/UDP.
+
+While at it fix inner ip protocol read for setting MLX5_ETH_WQE_SWP_INNER_L4_UDP
+flag to address the case where the ip header protocol is IPv6.
+
+Fixes: f1267798c980 ("net/mlx5: Fix checksum issue of VXLAN and IPsec crypto offload")
+Signed-off-by: Raed Salem <raeds@nvidia.com>
+Reviewed-by: Maor Dickman <maord@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c | 13 ++++++++--
+ 1 file changed, 11 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
+@@ -157,11 +157,20 @@ static void mlx5e_ipsec_set_swp(struct s
+ /* Tunnel mode */
+ if (mode == XFRM_MODE_TUNNEL) {
+ eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2;
+- eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2;
+ if (xo->proto == IPPROTO_IPV6)
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
+- if (inner_ip_hdr(skb)->protocol == IPPROTO_UDP)
++
++ switch (xo->inner_ipproto) {
++ case IPPROTO_UDP:
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP;
++ fallthrough;
++ case IPPROTO_TCP:
++ /* IP | ESP | IP | [TCP | UDP] */
++ eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2;
++ break;
++ default:
++ break;
++ }
+ return;
+ }
+
--- /dev/null
+From 04c2a47ffb13c29778e2a14e414ad4cb5a5db4b5 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 31 Jan 2022 09:20:18 -0800
+Subject: net: sched: fix use-after-free in tc_new_tfilter()
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit 04c2a47ffb13c29778e2a14e414ad4cb5a5db4b5 upstream.
+
+Whenever tc_new_tfilter() jumps back to replay: label,
+we need to make sure @q and @chain local variables are cleared again,
+or risk use-after-free as in [1]
+
+For consistency, apply the same fix in tc_ctl_chain()
+
+BUG: KASAN: use-after-free in mini_qdisc_pair_swap+0x1b9/0x1f0 net/sched/sch_generic.c:1581
+Write of size 8 at addr ffff8880985c4b08 by task syz-executor.4/1945
+
+CPU: 0 PID: 1945 Comm: syz-executor.4 Not tainted 5.17.0-rc1-syzkaller-00495-gff58831fa02d #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+Call Trace:
+ <TASK>
+ __dump_stack lib/dump_stack.c:88 [inline]
+ dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106
+ print_address_description.constprop.0.cold+0x8d/0x336 mm/kasan/report.c:255
+ __kasan_report mm/kasan/report.c:442 [inline]
+ kasan_report.cold+0x83/0xdf mm/kasan/report.c:459
+ mini_qdisc_pair_swap+0x1b9/0x1f0 net/sched/sch_generic.c:1581
+ tcf_chain_head_change_item net/sched/cls_api.c:372 [inline]
+ tcf_chain0_head_change.isra.0+0xb9/0x120 net/sched/cls_api.c:386
+ tcf_chain_tp_insert net/sched/cls_api.c:1657 [inline]
+ tcf_chain_tp_insert_unique net/sched/cls_api.c:1707 [inline]
+ tc_new_tfilter+0x1e67/0x2350 net/sched/cls_api.c:2086
+ rtnetlink_rcv_msg+0x80d/0xb80 net/core/rtnetlink.c:5583
+ netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2494
+ netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline]
+ netlink_unicast+0x539/0x7e0 net/netlink/af_netlink.c:1343
+ netlink_sendmsg+0x904/0xe00 net/netlink/af_netlink.c:1919
+ sock_sendmsg_nosec net/socket.c:705 [inline]
+ sock_sendmsg+0xcf/0x120 net/socket.c:725
+ ____sys_sendmsg+0x331/0x810 net/socket.c:2413
+ ___sys_sendmsg+0xf3/0x170 net/socket.c:2467
+ __sys_sendmmsg+0x195/0x470 net/socket.c:2553
+ __do_sys_sendmmsg net/socket.c:2582 [inline]
+ __se_sys_sendmmsg net/socket.c:2579 [inline]
+ __x64_sys_sendmmsg+0x99/0x100 net/socket.c:2579
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x44/0xae
+RIP: 0033:0x7f2647172059
+Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48
+RSP: 002b:00007f2645aa5168 EFLAGS: 00000246 ORIG_RAX: 0000000000000133
+RAX: ffffffffffffffda RBX: 00007f2647285100 RCX: 00007f2647172059
+RDX: 040000000000009f RSI: 00000000200002c0 RDI: 0000000000000006
+RBP: 00007f26471cc08d R08: 0000000000000000 R09: 0000000000000000
+R10: 9e00000000000000 R11: 0000000000000246 R12: 0000000000000000
+R13: 00007fffb3f7f02f R14: 00007f2645aa5300 R15: 0000000000022000
+ </TASK>
+
+Allocated by task 1944:
+ kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38
+ kasan_set_track mm/kasan/common.c:45 [inline]
+ set_alloc_info mm/kasan/common.c:436 [inline]
+ ____kasan_kmalloc mm/kasan/common.c:515 [inline]
+ ____kasan_kmalloc mm/kasan/common.c:474 [inline]
+ __kasan_kmalloc+0xa9/0xd0 mm/kasan/common.c:524
+ kmalloc_node include/linux/slab.h:604 [inline]
+ kzalloc_node include/linux/slab.h:726 [inline]
+ qdisc_alloc+0xac/0xa10 net/sched/sch_generic.c:941
+ qdisc_create.constprop.0+0xce/0x10f0 net/sched/sch_api.c:1211
+ tc_modify_qdisc+0x4c5/0x1980 net/sched/sch_api.c:1660
+ rtnetlink_rcv_msg+0x413/0xb80 net/core/rtnetlink.c:5592
+ netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2494
+ netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline]
+ netlink_unicast+0x539/0x7e0 net/netlink/af_netlink.c:1343
+ netlink_sendmsg+0x904/0xe00 net/netlink/af_netlink.c:1919
+ sock_sendmsg_nosec net/socket.c:705 [inline]
+ sock_sendmsg+0xcf/0x120 net/socket.c:725
+ ____sys_sendmsg+0x331/0x810 net/socket.c:2413
+ ___sys_sendmsg+0xf3/0x170 net/socket.c:2467
+ __sys_sendmmsg+0x195/0x470 net/socket.c:2553
+ __do_sys_sendmmsg net/socket.c:2582 [inline]
+ __se_sys_sendmmsg net/socket.c:2579 [inline]
+ __x64_sys_sendmmsg+0x99/0x100 net/socket.c:2579
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+Freed by task 3609:
+ kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38
+ kasan_set_track+0x21/0x30 mm/kasan/common.c:45
+ kasan_set_free_info+0x20/0x30 mm/kasan/generic.c:370
+ ____kasan_slab_free mm/kasan/common.c:366 [inline]
+ ____kasan_slab_free+0x130/0x160 mm/kasan/common.c:328
+ kasan_slab_free include/linux/kasan.h:236 [inline]
+ slab_free_hook mm/slub.c:1728 [inline]
+ slab_free_freelist_hook+0x8b/0x1c0 mm/slub.c:1754
+ slab_free mm/slub.c:3509 [inline]
+ kfree+0xcb/0x280 mm/slub.c:4562
+ rcu_do_batch kernel/rcu/tree.c:2527 [inline]
+ rcu_core+0x7b8/0x1540 kernel/rcu/tree.c:2778
+ __do_softirq+0x29b/0x9c2 kernel/softirq.c:558
+
+Last potentially related work creation:
+ kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38
+ __kasan_record_aux_stack+0xbe/0xd0 mm/kasan/generic.c:348
+ __call_rcu kernel/rcu/tree.c:3026 [inline]
+ call_rcu+0xb1/0x740 kernel/rcu/tree.c:3106
+ qdisc_put_unlocked+0x6f/0x90 net/sched/sch_generic.c:1109
+ tcf_block_release+0x86/0x90 net/sched/cls_api.c:1238
+ tc_new_tfilter+0xc0d/0x2350 net/sched/cls_api.c:2148
+ rtnetlink_rcv_msg+0x80d/0xb80 net/core/rtnetlink.c:5583
+ netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2494
+ netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline]
+ netlink_unicast+0x539/0x7e0 net/netlink/af_netlink.c:1343
+ netlink_sendmsg+0x904/0xe00 net/netlink/af_netlink.c:1919
+ sock_sendmsg_nosec net/socket.c:705 [inline]
+ sock_sendmsg+0xcf/0x120 net/socket.c:725
+ ____sys_sendmsg+0x331/0x810 net/socket.c:2413
+ ___sys_sendmsg+0xf3/0x170 net/socket.c:2467
+ __sys_sendmmsg+0x195/0x470 net/socket.c:2553
+ __do_sys_sendmmsg net/socket.c:2582 [inline]
+ __se_sys_sendmmsg net/socket.c:2579 [inline]
+ __x64_sys_sendmmsg+0x99/0x100 net/socket.c:2579
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+The buggy address belongs to the object at ffff8880985c4800
+ which belongs to the cache kmalloc-1k of size 1024
+The buggy address is located 776 bytes inside of
+ 1024-byte region [ffff8880985c4800, ffff8880985c4c00)
+The buggy address belongs to the page:
+page:ffffea0002617000 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x985c0
+head:ffffea0002617000 order:3 compound_mapcount:0 compound_pincount:0
+flags: 0xfff00000010200(slab|head|node=0|zone=1|lastcpupid=0x7ff)
+raw: 00fff00000010200 0000000000000000 dead000000000122 ffff888010c41dc0
+raw: 0000000000000000 0000000000100010 00000001ffffffff 0000000000000000
+page dumped because: kasan: bad access detected
+page_owner tracks the page as allocated
+page last allocated via order 3, migratetype Unmovable, gfp_mask 0x1d20c0(__GFP_IO|__GFP_FS|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_NOMEMALLOC|__GFP_HARDWALL), pid 1941, ts 1038999441284, free_ts 1033444432829
+ prep_new_page mm/page_alloc.c:2434 [inline]
+ get_page_from_freelist+0xa72/0x2f50 mm/page_alloc.c:4165
+ __alloc_pages+0x1b2/0x500 mm/page_alloc.c:5389
+ alloc_pages+0x1aa/0x310 mm/mempolicy.c:2271
+ alloc_slab_page mm/slub.c:1799 [inline]
+ allocate_slab mm/slub.c:1944 [inline]
+ new_slab+0x28a/0x3b0 mm/slub.c:2004
+ ___slab_alloc+0x87c/0xe90 mm/slub.c:3018
+ __slab_alloc.constprop.0+0x4d/0xa0 mm/slub.c:3105
+ slab_alloc_node mm/slub.c:3196 [inline]
+ slab_alloc mm/slub.c:3238 [inline]
+ __kmalloc+0x2fb/0x340 mm/slub.c:4420
+ kmalloc include/linux/slab.h:586 [inline]
+ kzalloc include/linux/slab.h:715 [inline]
+ __register_sysctl_table+0x112/0x1090 fs/proc/proc_sysctl.c:1335
+ neigh_sysctl_register+0x2c8/0x5e0 net/core/neighbour.c:3787
+ devinet_sysctl_register+0xb1/0x230 net/ipv4/devinet.c:2618
+ inetdev_init+0x286/0x580 net/ipv4/devinet.c:278
+ inetdev_event+0xa8a/0x15d0 net/ipv4/devinet.c:1532
+ notifier_call_chain+0xb5/0x200 kernel/notifier.c:84
+ call_netdevice_notifiers_info+0xb5/0x130 net/core/dev.c:1919
+ call_netdevice_notifiers_extack net/core/dev.c:1931 [inline]
+ call_netdevice_notifiers net/core/dev.c:1945 [inline]
+ register_netdevice+0x1073/0x1500 net/core/dev.c:9698
+ veth_newlink+0x59c/0xa90 drivers/net/veth.c:1722
+page last free stack trace:
+ reset_page_owner include/linux/page_owner.h:24 [inline]
+ free_pages_prepare mm/page_alloc.c:1352 [inline]
+ free_pcp_prepare+0x374/0x870 mm/page_alloc.c:1404
+ free_unref_page_prepare mm/page_alloc.c:3325 [inline]
+ free_unref_page+0x19/0x690 mm/page_alloc.c:3404
+ release_pages+0x748/0x1220 mm/swap.c:956
+ tlb_batch_pages_flush mm/mmu_gather.c:50 [inline]
+ tlb_flush_mmu_free mm/mmu_gather.c:243 [inline]
+ tlb_flush_mmu+0xe9/0x6b0 mm/mmu_gather.c:250
+ zap_pte_range mm/memory.c:1441 [inline]
+ zap_pmd_range mm/memory.c:1490 [inline]
+ zap_pud_range mm/memory.c:1519 [inline]
+ zap_p4d_range mm/memory.c:1540 [inline]
+ unmap_page_range+0x1d1d/0x2a30 mm/memory.c:1561
+ unmap_single_vma+0x198/0x310 mm/memory.c:1606
+ unmap_vmas+0x16b/0x2f0 mm/memory.c:1638
+ exit_mmap+0x201/0x670 mm/mmap.c:3178
+ __mmput+0x122/0x4b0 kernel/fork.c:1114
+ mmput+0x56/0x60 kernel/fork.c:1135
+ exit_mm kernel/exit.c:507 [inline]
+ do_exit+0xa3c/0x2a30 kernel/exit.c:793
+ do_group_exit+0xd2/0x2f0 kernel/exit.c:935
+ __do_sys_exit_group kernel/exit.c:946 [inline]
+ __se_sys_exit_group kernel/exit.c:944 [inline]
+ __x64_sys_exit_group+0x3a/0x50 kernel/exit.c:944
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+Memory state around the buggy address:
+ ffff8880985c4a00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+ ffff8880985c4a80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+>ffff8880985c4b00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+ ^
+ ffff8880985c4b80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+ ffff8880985c4c00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+
+Fixes: 470502de5bdb ("net: sched: unlock rules update API")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Vlad Buslov <vladbu@mellanox.com>
+Cc: Jiri Pirko <jiri@mellanox.com>
+Cc: Cong Wang <xiyou.wangcong@gmail.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Link: https://lore.kernel.org/r/20220131172018.3704490-1-eric.dumazet@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/cls_api.c | 11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+--- a/net/sched/cls_api.c
++++ b/net/sched/cls_api.c
+@@ -1945,9 +1945,9 @@ static int tc_new_tfilter(struct sk_buff
+ bool prio_allocate;
+ u32 parent;
+ u32 chain_index;
+- struct Qdisc *q = NULL;
++ struct Qdisc *q;
+ struct tcf_chain_info chain_info;
+- struct tcf_chain *chain = NULL;
++ struct tcf_chain *chain;
+ struct tcf_block *block;
+ struct tcf_proto *tp;
+ unsigned long cl;
+@@ -1976,6 +1976,8 @@ replay:
+ tp = NULL;
+ cl = 0;
+ block = NULL;
++ q = NULL;
++ chain = NULL;
+ flags = 0;
+
+ if (prio == 0) {
+@@ -2798,8 +2800,8 @@ static int tc_ctl_chain(struct sk_buff *
+ struct tcmsg *t;
+ u32 parent;
+ u32 chain_index;
+- struct Qdisc *q = NULL;
+- struct tcf_chain *chain = NULL;
++ struct Qdisc *q;
++ struct tcf_chain *chain;
+ struct tcf_block *block;
+ unsigned long cl;
+ int err;
+@@ -2809,6 +2811,7 @@ static int tc_ctl_chain(struct sk_buff *
+ return -EPERM;
+
+ replay:
++ q = NULL;
+ err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
+ rtm_tca_policy, extack);
+ if (err < 0)
--- /dev/null
+From 94fd19752b28aa66c98e7991734af91dfc529f8f Mon Sep 17 00:00:00 2001
+From: Miklos Szeredi <mszeredi@redhat.com>
+Date: Fri, 14 Jan 2022 16:57:56 +0100
+Subject: ovl: don't fail copy up if no fileattr support on upper
+
+From: Miklos Szeredi <mszeredi@redhat.com>
+
+commit 94fd19752b28aa66c98e7991734af91dfc529f8f upstream.
+
+Christoph Fritz is reporting that failure to copy up fileattr when upper
+doesn't support fileattr or xattr results in a regression.
+
+Return success in these failure cases; this reverts overlayfs to the old
+behavior.
+
+Add a pr_warn_once() in these cases to still let the user know about the
+copy up failures.
+
+Reported-by: Christoph Fritz <chf.fritz@googlemail.com>
+Fixes: 72db82115d2b ("ovl: copy up sync/noatime fileattr flags")
+Cc: <stable@vger.kernel.org> # v5.15
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/overlayfs/copy_up.c | 12 +++++++++++-
+ 1 file changed, 11 insertions(+), 1 deletion(-)
+
+--- a/fs/overlayfs/copy_up.c
++++ b/fs/overlayfs/copy_up.c
+@@ -157,7 +157,9 @@ static int ovl_copy_fileattr(struct inod
+ */
+ if (oldfa.flags & OVL_PROT_FS_FLAGS_MASK) {
+ err = ovl_set_protattr(inode, new->dentry, &oldfa);
+- if (err)
++ if (err == -EPERM)
++ pr_warn_once("copying fileattr: no xattr on upper\n");
++ else if (err)
+ return err;
+ }
+
+@@ -167,6 +169,14 @@ static int ovl_copy_fileattr(struct inod
+
+ err = ovl_real_fileattr_get(new, &newfa);
+ if (err) {
++ /*
++ * Returning an error if upper doesn't support fileattr will
++ * result in a regression, so revert to the old behavior.
++ */
++ if (err == -ENOTTY || err == -EINVAL) {
++ pr_warn_once("copying fileattr: no support on upper\n");
++ return 0;
++ }
+ pr_warn("failed to retrieve upper fileattr (%pd2, err=%i)\n",
+ new, err);
+ return err;
--- /dev/null
+From c36c04c2e132fc39f6b658bf607aed4425427fd7 Mon Sep 17 00:00:00 2001
+From: John Hubbard <jhubbard@nvidia.com>
+Date: Tue, 1 Feb 2022 19:23:17 -0800
+Subject: Revert "mm/gup: small refactoring: simplify try_grab_page()"
+
+From: John Hubbard <jhubbard@nvidia.com>
+
+commit c36c04c2e132fc39f6b658bf607aed4425427fd7 upstream.
+
+This reverts commit 54d516b1d62ff8f17cee2da06e5e4706a0d00b8a
+
+That commit did a refactoring that effectively combined fast and slow
+gup paths (again). And that was again incorrect, for two reasons:
+
+ a) Fast gup and slow gup get reference counts on pages in different
+ ways and with different goals: see Linus' writeup in commit
+ cd1adf1b63a1 ("Revert "mm/gup: remove try_get_page(), call
+ try_get_compound_head() directly""), and
+
+ b) try_grab_compound_head() also has a specific check for
+ "FOLL_LONGTERM && !is_pinned(page)", that assumes that the caller
+ can fall back to slow gup. This resulted in new failures, as
+ recently report by Will McVicker [1].
+
+But (a) has problems too, even though they may not have been reported
+yet. So just revert this.
+
+Link: https://lore.kernel.org/r/20220131203504.3458775-1-willmcvicker@google.com [1]
+Fixes: 54d516b1d62f ("mm/gup: small refactoring: simplify try_grab_page()")
+Reported-and-tested-by: Will McVicker <willmcvicker@google.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Cc: Minchan Kim <minchan@google.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Christian Borntraeger <borntraeger@de.ibm.com>
+Cc: Heiko Carstens <hca@linux.ibm.com>
+Cc: Vasily Gorbik <gor@linux.ibm.com>
+Cc: stable@vger.kernel.org # 5.15
+Signed-off-by: John Hubbard <jhubbard@nvidia.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/gup.c | 35 ++++++++++++++++++++++++++++++-----
+ 1 file changed, 30 insertions(+), 5 deletions(-)
+
+--- a/mm/gup.c
++++ b/mm/gup.c
+@@ -124,8 +124,8 @@ static inline struct page *try_get_compo
+ * considered failure, and furthermore, a likely bug in the caller, so a warning
+ * is also emitted.
+ */
+-struct page *try_grab_compound_head(struct page *page,
+- int refs, unsigned int flags)
++__maybe_unused struct page *try_grab_compound_head(struct page *page,
++ int refs, unsigned int flags)
+ {
+ if (flags & FOLL_GET)
+ return try_get_compound_head(page, refs);
+@@ -208,10 +208,35 @@ static void put_compound_head(struct pag
+ */
+ bool __must_check try_grab_page(struct page *page, unsigned int flags)
+ {
+- if (!(flags & (FOLL_GET | FOLL_PIN)))
+- return true;
++ WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == (FOLL_GET | FOLL_PIN));
+
+- return try_grab_compound_head(page, 1, flags);
++ if (flags & FOLL_GET)
++ return try_get_page(page);
++ else if (flags & FOLL_PIN) {
++ int refs = 1;
++
++ page = compound_head(page);
++
++ if (WARN_ON_ONCE(page_ref_count(page) <= 0))
++ return false;
++
++ if (hpage_pincount_available(page))
++ hpage_pincount_add(page, 1);
++ else
++ refs = GUP_PIN_COUNTING_BIAS;
++
++ /*
++ * Similar to try_grab_compound_head(): even if using the
++ * hpage_pincount_add/_sub() routines, be sure to
++ * *also* increment the normal page refcount field at least
++ * once, so that the page really is pinned.
++ */
++ page_ref_add(page, refs);
++
++ mod_node_page_state(page_pgdat(page), NR_FOLL_PIN_ACQUIRED, 1);
++ }
++
++ return true;
+ }
+
+ /**
--- /dev/null
+From c6f6f2444bdbe0079e41914a35081530d0409963 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 31 Jan 2022 17:21:06 -0800
+Subject: rtnetlink: make sure to refresh master_dev/m_ops in __rtnl_newlink()
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit c6f6f2444bdbe0079e41914a35081530d0409963 upstream.
+
+While looking at one unrelated syzbot bug, I found the replay logic
+in __rtnl_newlink() to potentially trigger use-after-free.
+
+It is better to clear master_dev and m_ops inside the loop,
+in case we have to replay it.
+
+Fixes: ba7d49b1f0f8 ("rtnetlink: provide api for getting and setting slave info")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Jiri Pirko <jiri@nvidia.com>
+Link: https://lore.kernel.org/r/20220201012106.216495-1-eric.dumazet@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/rtnetlink.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -3254,8 +3254,8 @@ static int __rtnl_newlink(struct sk_buff
+ struct nlattr *slave_attr[RTNL_SLAVE_MAX_TYPE + 1];
+ unsigned char name_assign_type = NET_NAME_USER;
+ struct nlattr *linkinfo[IFLA_INFO_MAX + 1];
+- const struct rtnl_link_ops *m_ops = NULL;
+- struct net_device *master_dev = NULL;
++ const struct rtnl_link_ops *m_ops;
++ struct net_device *master_dev;
+ struct net *net = sock_net(skb->sk);
+ const struct rtnl_link_ops *ops;
+ struct nlattr *tb[IFLA_MAX + 1];
+@@ -3293,6 +3293,8 @@ replay:
+ else
+ dev = NULL;
+
++ master_dev = NULL;
++ m_ops = NULL;
+ if (dev) {
+ master_dev = netdev_master_upper_dev_get(dev);
+ if (master_dev)
net-ipa-use-a-bitmap-for-endpoint-replenish_enabled.patch
net-ipa-prevent-concurrent-replenish.patch
drm-vc4-hdmi-make-sure-the-device-is-powered-with-cec.patch
+cgroup-v1-require-capabilities-to-set-release_agent.patch
+revert-mm-gup-small-refactoring-simplify-try_grab_page.patch
+ovl-don-t-fail-copy-up-if-no-fileattr-support-on-upper.patch
+lockd-fix-server-crash-on-reboot-of-client-holding-lock.patch
+lockd-fix-failure-to-cleanup-client-locks.patch
+net-mlx5e-ipsec-fix-tunnel-mode-crypto-offload-for-non-tcp-udp-traffic.patch
+net-mlx5-bridge-take-rtnl-lock-in-init-error-handler.patch
+net-mlx5-bridge-ensure-dev_name-is-null-terminated.patch
+net-mlx5e-fix-handling-of-wrong-devices-during-bond-netevent.patch
+net-mlx5-use-del_timer_sync-in-fw-reset-flow-of-halting-poll.patch
+net-mlx5e-fix-module-eeprom-query.patch
+net-mlx5-fix-offloading-with-eswitch_ipv4_ttl_modify_enable.patch
+net-mlx5e-don-t-treat-small-ceil-values-as-unlimited-in-htb-offload.patch
+net-mlx5-bridge-fix-devlink-deadlock-on-net-namespace-deletion.patch
+net-mlx5-e-switch-fix-uninitialized-variable-modact.patch
+ipheth-fix-eoverflow-in-ipheth_rcvbulk_callback.patch
+i40e-fix-reset-bw-limit-when-dcb-enabled-with-1-tc.patch
+i40e-fix-reset-path-while-removing-the-driver.patch
+net-amd-xgbe-ensure-to-reset-the-tx_timer_active-flag.patch
+net-amd-xgbe-fix-skb-data-length-underflow.patch
+fanotify-fix-stale-file-descriptor-in-copy_event_to_user.patch
+net-sched-fix-use-after-free-in-tc_new_tfilter.patch
+rtnetlink-make-sure-to-refresh-master_dev-m_ops-in-__rtnl_newlink.patch
+cpuset-fix-the-bug-that-subpart_cpus-updated-wrongly-in-update_cpumask.patch
+e1000e-handshake-with-csme-starts-from-adl-platforms.patch
+af_packet-fix-data-race-in-packet_setsockopt-packet_setsockopt.patch
+tcp-add-missing-tcp_skb_can_collapse-test-in-tcp_shift_skb_data.patch
--- /dev/null
+From b67985be400969578d4d4b17299714c0e5d2c07b Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 1 Feb 2022 10:46:40 -0800
+Subject: tcp: add missing tcp_skb_can_collapse() test in tcp_shift_skb_data()
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit b67985be400969578d4d4b17299714c0e5d2c07b upstream.
+
+tcp_shift_skb_data() might collapse three packets into a larger one.
+
+P_A, P_B, P_C -> P_ABC
+
+Historically, it used a single tcp_skb_can_collapse_to(P_A) call,
+because it was enough.
+
+In commit 85712484110d ("tcp: coalesce/collapse must respect MPTCP extensions"),
+this call was replaced by a call to tcp_skb_can_collapse(P_A, P_B)
+
+But the now needed test over P_C has been missed.
+
+This probably broke MPTCP.
+
+Then later, commit 9b65b17db723 ("net: avoid double accounting for pure zerocopy skbs")
+added an extra condition to tcp_skb_can_collapse(), but the missing call
+from tcp_shift_skb_data() is also breaking TCP zerocopy, because P_A and P_C
+might have different skb_zcopy_pure() status.
+
+Fixes: 85712484110d ("tcp: coalesce/collapse must respect MPTCP extensions")
+Fixes: 9b65b17db723 ("net: avoid double accounting for pure zerocopy skbs")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Mat Martineau <mathew.j.martineau@linux.intel.com>
+Cc: Talal Ahmad <talalahmad@google.com>
+Cc: Arjun Roy <arjunroy@google.com>
+Cc: Willem de Bruijn <willemb@google.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Acked-by: Paolo Abeni <pabeni@redhat.com>
+Link: https://lore.kernel.org/r/20220201184640.756716-1-eric.dumazet@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_input.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -1652,6 +1652,8 @@ static struct sk_buff *tcp_shift_skb_dat
+ (mss != tcp_skb_seglen(skb)))
+ goto out;
+
++ if (!tcp_skb_can_collapse(prev, skb))
++ goto out;
+ len = skb->len;
+ pcount = tcp_skb_pcount(skb);
+ if (tcp_skb_shift(prev, skb, pcount, len))