--- /dev/null
+From e42e70ad6ae2ae511a6143d2e8da929366e58bd9 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 31 Jan 2022 18:23:58 -0800
+Subject: af_packet: fix data-race in packet_setsockopt / packet_setsockopt
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit e42e70ad6ae2ae511a6143d2e8da929366e58bd9 upstream.
+
+When packet_setsockopt( PACKET_FANOUT_DATA ) reads po->fanout,
+no lock is held, meaning that another thread can change po->fanout.
+
+Given that po->fanout can only be set once during the socket lifetime
+(it is only cleared from fanout_release()), we can use
+READ_ONCE()/WRITE_ONCE() to document the race.
+
+BUG: KCSAN: data-race in packet_setsockopt / packet_setsockopt
+
+write to 0xffff88813ae8e300 of 8 bytes by task 14653 on cpu 0:
+ fanout_add net/packet/af_packet.c:1791 [inline]
+ packet_setsockopt+0x22fe/0x24a0 net/packet/af_packet.c:3931
+ __sys_setsockopt+0x209/0x2a0 net/socket.c:2180
+ __do_sys_setsockopt net/socket.c:2191 [inline]
+ __se_sys_setsockopt net/socket.c:2188 [inline]
+ __x64_sys_setsockopt+0x62/0x70 net/socket.c:2188
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+read to 0xffff88813ae8e300 of 8 bytes by task 14654 on cpu 1:
+ packet_setsockopt+0x691/0x24a0 net/packet/af_packet.c:3935
+ __sys_setsockopt+0x209/0x2a0 net/socket.c:2180
+ __do_sys_setsockopt net/socket.c:2191 [inline]
+ __se_sys_setsockopt net/socket.c:2188 [inline]
+ __x64_sys_setsockopt+0x62/0x70 net/socket.c:2188
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+value changed: 0x0000000000000000 -> 0xffff888106f8c000
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 1 PID: 14654 Comm: syz-executor.3 Not tainted 5.16.0-syzkaller #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+
+Fixes: 47dceb8ecdc1 ("packet: add classic BPF fanout mode")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Willem de Bruijn <willemb@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Link: https://lore.kernel.org/r/20220201022358.330621-1-eric.dumazet@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/packet/af_packet.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -1788,7 +1788,10 @@ static int fanout_add(struct sock *sk, s
+ err = -ENOSPC;
+ if (refcount_read(&match->sk_ref) < match->max_num_members) {
+ __dev_remove_pack(&po->prot_hook);
+- po->fanout = match;
++
++ /* Paired with packet_setsockopt(PACKET_FANOUT_DATA) */
++ WRITE_ONCE(po->fanout, match);
++
+ po->rollover = rollover;
+ rollover = NULL;
+ refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1);
+@@ -3941,7 +3944,8 @@ packet_setsockopt(struct socket *sock, i
+ }
+ case PACKET_FANOUT_DATA:
+ {
+- if (!po->fanout)
++ /* Paired with the WRITE_ONCE() in fanout_add() */
++ if (!READ_ONCE(po->fanout))
+ return -EINVAL;
+
+ return fanout_set_data(po, optval, optlen);
--- /dev/null
+From 0e3135d3bfa5dfb658145238d2bc723a8e30c3a3 Mon Sep 17 00:00:00 2001
+From: He Fengqing <hefengqing@huawei.com>
+Date: Sat, 22 Jan 2022 10:29:36 +0000
+Subject: bpf: Fix possible race in inc_misses_counter
+
+From: He Fengqing <hefengqing@huawei.com>
+
+commit 0e3135d3bfa5dfb658145238d2bc723a8e30c3a3 upstream.
+
+It seems inc_misses_counter() suffers from same issue fixed in
+the commit d979617aa84d ("bpf: Fixes possible race in update_prog_stats()
+for 32bit arches"):
+As it can run while interrupts are enabled, it could
+be re-entered and the u64_stats syncp could be mangled.
+
+Fixes: 9ed9e9ba2337 ("bpf: Count the number of times recursion was prevented")
+Signed-off-by: He Fengqing <hefengqing@huawei.com>
+Acked-by: John Fastabend <john.fastabend@gmail.com>
+Link: https://lore.kernel.org/r/20220122102936.1219518-1-hefengqing@huawei.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/trampoline.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/kernel/bpf/trampoline.c
++++ b/kernel/bpf/trampoline.c
+@@ -542,11 +542,12 @@ static __always_inline u64 notrace bpf_p
+ static void notrace inc_misses_counter(struct bpf_prog *prog)
+ {
+ struct bpf_prog_stats *stats;
++ unsigned int flags;
+
+ stats = this_cpu_ptr(prog->stats);
+- u64_stats_update_begin(&stats->syncp);
++ flags = u64_stats_update_begin_irqsave(&stats->syncp);
+ u64_stats_inc(&stats->misses);
+- u64_stats_update_end(&stats->syncp);
++ u64_stats_update_end_irqrestore(&stats->syncp, flags);
+ }
+
+ /* The logic is similar to bpf_prog_run(), but with an explicit
--- /dev/null
+From 24f6008564183aa120d07c03d9289519c2fe02af Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Thu, 20 Jan 2022 11:04:01 -0600
+Subject: cgroup-v1: Require capabilities to set release_agent
+
+From: Eric W. Biederman <ebiederm@xmission.com>
+
+commit 24f6008564183aa120d07c03d9289519c2fe02af upstream.
+
+The cgroup release_agent is called with call_usermodehelper. The function
+call_usermodehelper starts the release_agent with a full set fo capabilities.
+Therefore require capabilities when setting the release_agaent.
+
+Reported-by: Tabitha Sable <tabitha.c.sable@gmail.com>
+Tested-by: Tabitha Sable <tabitha.c.sable@gmail.com>
+Fixes: 81a6a5cdd2c5 ("Task Control Groups: automatic userspace notification of idle cgroups")
+Cc: stable@vger.kernel.org # v2.6.24+
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/cgroup/cgroup-v1.c | 14 ++++++++++++++
+ 1 file changed, 14 insertions(+)
+
+--- a/kernel/cgroup/cgroup-v1.c
++++ b/kernel/cgroup/cgroup-v1.c
+@@ -549,6 +549,14 @@ static ssize_t cgroup_release_agent_writ
+
+ BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
+
++ /*
++ * Release agent gets called with all capabilities,
++ * require capabilities to set release agent.
++ */
++ if ((of->file->f_cred->user_ns != &init_user_ns) ||
++ !capable(CAP_SYS_ADMIN))
++ return -EPERM;
++
+ cgrp = cgroup_kn_lock_live(of->kn, false);
+ if (!cgrp)
+ return -ENODEV;
+@@ -954,6 +962,12 @@ int cgroup1_parse_param(struct fs_contex
+ /* Specifying two release agents is forbidden */
+ if (ctx->release_agent)
+ return invalfc(fc, "release_agent respecified");
++ /*
++ * Release agent gets called with all capabilities,
++ * require capabilities to set release agent.
++ */
++ if ((fc->user_ns != &init_user_ns) || !capable(CAP_SYS_ADMIN))
++ return invalfc(fc, "Setting release_agent not allowed");
+ ctx->release_agent = param->string;
+ param->string = NULL;
+ break;
--- /dev/null
+From c80d401c52a2d1baf2a5afeb06f0ffe678e56d23 Mon Sep 17 00:00:00 2001
+From: Tianchen Ding <dtcccc@linux.alibaba.com>
+Date: Tue, 18 Jan 2022 18:05:18 +0800
+Subject: cpuset: Fix the bug that subpart_cpus updated wrongly in update_cpumask()
+
+From: Tianchen Ding <dtcccc@linux.alibaba.com>
+
+commit c80d401c52a2d1baf2a5afeb06f0ffe678e56d23 upstream.
+
+subparts_cpus should be limited as a subset of cpus_allowed, but it is
+updated wrongly by using cpumask_andnot(). Use cpumask_and() instead to
+fix it.
+
+Fixes: ee8dde0cd2ce ("cpuset: Add new v2 cpuset.sched.partition flag")
+Signed-off-by: Tianchen Ding <dtcccc@linux.alibaba.com>
+Reviewed-by: Waiman Long <longman@redhat.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/cgroup/cpuset.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/kernel/cgroup/cpuset.c
++++ b/kernel/cgroup/cpuset.c
+@@ -1615,8 +1615,7 @@ static int update_cpumask(struct cpuset
+ * Make sure that subparts_cpus is a subset of cpus_allowed.
+ */
+ if (cs->nr_subparts_cpus) {
+- cpumask_andnot(cs->subparts_cpus, cs->subparts_cpus,
+- cs->cpus_allowed);
++ cpumask_and(cs->subparts_cpus, cs->subparts_cpus, cs->cpus_allowed);
+ cs->nr_subparts_cpus = cpumask_weight(cs->subparts_cpus);
+ }
+ spin_unlock_irq(&callback_lock);
--- /dev/null
+From cad014b7b5a6897d8c4fad13e2888978bfb7a53f Mon Sep 17 00:00:00 2001
+From: Sasha Neftin <sasha.neftin@intel.com>
+Date: Tue, 7 Dec 2021 13:23:42 +0200
+Subject: e1000e: Handshake with CSME starts from ADL platforms
+
+From: Sasha Neftin <sasha.neftin@intel.com>
+
+commit cad014b7b5a6897d8c4fad13e2888978bfb7a53f upstream.
+
+Handshake with CSME/AMT on none provisioned platforms during S0ix flow
+is not supported on TGL platform and can cause to HW unit hang. Update
+the handshake with CSME flow to start from the ADL platform.
+
+Fixes: 3e55d231716e ("e1000e: Add handshake with the CSME to support S0ix")
+Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
+Tested-by: Nechama Kraus <nechamax.kraus@linux.intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/intel/e1000e/netdev.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/intel/e1000e/netdev.c
++++ b/drivers/net/ethernet/intel/e1000e/netdev.c
+@@ -6345,7 +6345,8 @@ static void e1000e_s0ix_entry_flow(struc
+ u32 mac_data;
+ u16 phy_data;
+
+- if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID) {
++ if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID &&
++ hw->mac.type >= e1000_pch_adp) {
+ /* Request ME configure the device for S0ix */
+ mac_data = er32(H2ME);
+ mac_data |= E1000_H2ME_START_DPG;
+@@ -6494,7 +6495,8 @@ static void e1000e_s0ix_exit_flow(struct
+ u16 phy_data;
+ u32 i = 0;
+
+- if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID) {
++ if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID &&
++ hw->mac.type >= e1000_pch_adp) {
+ /* Request ME unconfigure the device from S0ix */
+ mac_data = er32(H2ME);
+ mac_data &= ~E1000_H2ME_START_DPG;
--- /dev/null
+From ee12595147ac1fbfb5bcb23837e26dd58d94b15d Mon Sep 17 00:00:00 2001
+From: Dan Carpenter <dan.carpenter@oracle.com>
+Date: Fri, 28 Jan 2022 22:57:01 +0300
+Subject: fanotify: Fix stale file descriptor in copy_event_to_user()
+
+From: Dan Carpenter <dan.carpenter@oracle.com>
+
+commit ee12595147ac1fbfb5bcb23837e26dd58d94b15d upstream.
+
+This code calls fd_install() which gives the userspace access to the fd.
+Then if copy_info_records_to_user() fails it calls put_unused_fd(fd) but
+that will not release it and leads to a stale entry in the file
+descriptor table.
+
+Generally you can't trust the fd after a call to fd_install(). The fix
+is to delay the fd_install() until everything else has succeeded.
+
+Fortunately it requires CAP_SYS_ADMIN to reach this code so the security
+impact is less.
+
+Fixes: f644bc449b37 ("fanotify: fix copy_event_to_user() fid error clean up")
+Link: https://lore.kernel.org/r/20220128195656.GA26981@kili
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Reviewed-by: Mathias Krause <minipli@grsecurity.net>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/notify/fanotify/fanotify_user.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/fs/notify/fanotify/fanotify_user.c
++++ b/fs/notify/fanotify/fanotify_user.c
+@@ -656,9 +656,6 @@ static ssize_t copy_event_to_user(struct
+ if (fanotify_is_perm_event(event->mask))
+ FANOTIFY_PERM(event)->fd = fd;
+
+- if (f)
+- fd_install(fd, f);
+-
+ if (info_mode) {
+ ret = copy_info_records_to_user(event, info, info_mode, pidfd,
+ buf, count);
+@@ -666,6 +663,9 @@ static ssize_t copy_event_to_user(struct
+ goto out_close_fd;
+ }
+
++ if (f)
++ fd_install(fd, f);
++
+ return metadata.event_len;
+
+ out_close_fd:
--- /dev/null
+From 3d2504663c41104b4359a15f35670cfa82de1bbf Mon Sep 17 00:00:00 2001
+From: Jedrzej Jagielski <jedrzej.jagielski@intel.com>
+Date: Tue, 14 Dec 2021 10:08:22 +0000
+Subject: i40e: Fix reset bw limit when DCB enabled with 1 TC
+
+From: Jedrzej Jagielski <jedrzej.jagielski@intel.com>
+
+commit 3d2504663c41104b4359a15f35670cfa82de1bbf upstream.
+
+There was an AQ error I40E_AQ_RC_EINVAL when trying
+to reset bw limit as part of bw allocation setup.
+This was caused by trying to reset bw limit with
+DCB enabled. Bw limit should not be reset when
+DCB is enabled. The code was relying on the pf->flags
+to check if DCB is enabled but if only 1 TC is available
+this flag will not be set even though DCB is enabled.
+Add a check for number of TC and if it is 1
+don't try to reset bw limit even if pf->flags shows
+DCB as disabled.
+
+Fixes: fa38e30ac73f ("i40e: Fix for Tx timeouts when interface is brought up if DCB is enabled")
+Suggested-by: Alexander Lobakin <alexandr.lobakin@intel.com> # Flatten the condition
+Signed-off-by: Sylwester Dziedziuch <sylwesterx.dziedziuch@intel.com>
+Signed-off-by: Jedrzej Jagielski <jedrzej.jagielski@intel.com>
+Reviewed-by: Alexander Lobakin <alexandr.lobakin@intel.com>
+Tested-by: Imam Hassan Reza Biswas <imam.hassan.reza.biswas@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/intel/i40e/i40e_main.c | 12 +++++++++++-
+ 1 file changed, 11 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
+@@ -5372,7 +5372,15 @@ static int i40e_vsi_configure_bw_alloc(s
+ /* There is no need to reset BW when mqprio mode is on. */
+ if (pf->flags & I40E_FLAG_TC_MQPRIO)
+ return 0;
+- if (!vsi->mqprio_qopt.qopt.hw && !(pf->flags & I40E_FLAG_DCB_ENABLED)) {
++
++ if (!vsi->mqprio_qopt.qopt.hw) {
++ if (pf->flags & I40E_FLAG_DCB_ENABLED)
++ goto skip_reset;
++
++ if (IS_ENABLED(CONFIG_I40E_DCB) &&
++ i40e_dcb_hw_get_num_tc(&pf->hw) == 1)
++ goto skip_reset;
++
+ ret = i40e_set_bw_limit(vsi, vsi->seid, 0);
+ if (ret)
+ dev_info(&pf->pdev->dev,
+@@ -5380,6 +5388,8 @@ static int i40e_vsi_configure_bw_alloc(s
+ vsi->seid);
+ return ret;
+ }
++
++skip_reset:
+ memset(&bw_data, 0, sizeof(bw_data));
+ bw_data.tc_valid_bits = enabled_tc;
+ for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
--- /dev/null
+From 6533e558c6505e94c3e0ed4281ed5e31ec985f4d Mon Sep 17 00:00:00 2001
+From: Karen Sornek <karen.sornek@intel.com>
+Date: Wed, 12 Jan 2022 10:19:47 +0100
+Subject: i40e: Fix reset path while removing the driver
+
+From: Karen Sornek <karen.sornek@intel.com>
+
+commit 6533e558c6505e94c3e0ed4281ed5e31ec985f4d upstream.
+
+Fix the crash in kernel while dereferencing the NULL pointer,
+when the driver is unloaded and simultaneously the VSI rings
+are being stopped.
+
+The hardware requires 50msec in order to finish RX queues
+disable. For this purpose the driver spins in mdelay function
+for the operation to be completed.
+
+For example changing number of queues which requires reset would
+fail in the following call stack:
+
+1) i40e_prep_for_reset
+2) i40e_pf_quiesce_all_vsi
+3) i40e_quiesce_vsi
+4) i40e_vsi_close
+5) i40e_down
+6) i40e_vsi_stop_rings
+7) i40e_vsi_control_rx -> disable requires the delay of 50msecs
+8) continue back in i40e_down function where
+ i40e_clean_tx_ring(vsi->tx_rings[i]) is going to crash
+
+When the driver was spinning vsi_release called
+i40e_vsi_free_arrays where the vsi->tx_rings resources
+were freed and the pointer was set to NULL.
+
+Fixes: 5b6d4a7f20b0 ("i40e: Fix crash during removing i40e driver")
+Signed-off-by: Slawomir Laba <slawomirx.laba@intel.com>
+Signed-off-by: Sylwester Dziedziuch <sylwesterx.dziedziuch@intel.com>
+Signed-off-by: Karen Sornek <karen.sornek@intel.com>
+Tested-by: Gurucharan G <gurucharanx.g@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/intel/i40e/i40e.h | 1 +
+ drivers/net/ethernet/intel/i40e/i40e_main.c | 19 ++++++++++++++++++-
+ 2 files changed, 19 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/intel/i40e/i40e.h
++++ b/drivers/net/ethernet/intel/i40e/i40e.h
+@@ -144,6 +144,7 @@ enum i40e_state_t {
+ __I40E_VIRTCHNL_OP_PENDING,
+ __I40E_RECOVERY_MODE,
+ __I40E_VF_RESETS_DISABLED, /* disable resets during i40e_remove */
++ __I40E_IN_REMOVE,
+ __I40E_VFS_RELEASING,
+ /* This must be last as it determines the size of the BITMAP */
+ __I40E_STATE_SIZE__,
+--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
+@@ -10863,6 +10863,9 @@ static void i40e_reset_and_rebuild(struc
+ bool lock_acquired)
+ {
+ int ret;
++
++ if (test_bit(__I40E_IN_REMOVE, pf->state))
++ return;
+ /* Now we wait for GRST to settle out.
+ * We don't have to delete the VEBs or VSIs from the hw switch
+ * because the reset will make them disappear.
+@@ -12222,6 +12225,8 @@ int i40e_reconfig_rss_queues(struct i40e
+
+ vsi->req_queue_pairs = queue_count;
+ i40e_prep_for_reset(pf);
++ if (test_bit(__I40E_IN_REMOVE, pf->state))
++ return pf->alloc_rss_size;
+
+ pf->alloc_rss_size = new_rss_size;
+
+@@ -13048,6 +13053,10 @@ static int i40e_xdp_setup(struct i40e_vs
+ if (need_reset)
+ i40e_prep_for_reset(pf);
+
++ /* VSI shall be deleted in a moment, just return EINVAL */
++ if (test_bit(__I40E_IN_REMOVE, pf->state))
++ return -EINVAL;
++
+ old_prog = xchg(&vsi->xdp_prog, prog);
+
+ if (need_reset) {
+@@ -15938,8 +15947,13 @@ static void i40e_remove(struct pci_dev *
+ i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), 0);
+ i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), 0);
+
+- while (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state))
++ /* Grab __I40E_RESET_RECOVERY_PENDING and set __I40E_IN_REMOVE
++ * flags, once they are set, i40e_rebuild should not be called as
++ * i40e_prep_for_reset always returns early.
++ */
++ while (test_and_set_bit(__I40E_RESET_RECOVERY_PENDING, pf->state))
+ usleep_range(1000, 2000);
++ set_bit(__I40E_IN_REMOVE, pf->state);
+
+ if (pf->flags & I40E_FLAG_SRIOV_ENABLED) {
+ set_bit(__I40E_VF_RESETS_DISABLED, pf->state);
+@@ -16138,6 +16152,9 @@ static void i40e_pci_error_reset_done(st
+ {
+ struct i40e_pf *pf = pci_get_drvdata(pdev);
+
++ if (test_bit(__I40E_IN_REMOVE, pf->state))
++ return;
++
+ i40e_reset_and_rebuild(pf, false, false);
+ }
+
--- /dev/null
+From 63e4b45c82ed1bde979da7052229a4229ce9cabf Mon Sep 17 00:00:00 2001
+From: Georgi Valkov <gvalkov@abv.bg>
+Date: Tue, 1 Feb 2022 08:16:18 +0100
+Subject: ipheth: fix EOVERFLOW in ipheth_rcvbulk_callback
+
+From: Georgi Valkov <gvalkov@abv.bg>
+
+commit 63e4b45c82ed1bde979da7052229a4229ce9cabf upstream.
+
+When rx_buf is allocated we need to account for IPHETH_IP_ALIGN,
+which reduces the usable size by 2 bytes. Otherwise we have 1512
+bytes usable instead of 1514, and if we receive more than 1512
+bytes, ipheth_rcvbulk_callback is called with status -EOVERFLOW,
+after which the driver malfunctiones and all communication stops.
+
+Resolves ipheth 2-1:4.2: ipheth_rcvbulk_callback: urb status: -75
+
+Fixes: f33d9e2b48a3 ("usbnet: ipheth: fix connectivity with iOS 14")
+Signed-off-by: Georgi Valkov <gvalkov@abv.bg>
+Tested-by: Jan Kiszka <jan.kiszka@siemens.com>
+Link: https://lore.kernel.org/all/B60B8A4B-92A0-49B3-805D-809A2433B46C@abv.bg/
+Link: https://lore.kernel.org/all/24851bd2769434a5fc24730dce8e8a984c5a4505.1643699778.git.jan.kiszka@siemens.com/
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/usb/ipheth.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/usb/ipheth.c
++++ b/drivers/net/usb/ipheth.c
+@@ -121,7 +121,7 @@ static int ipheth_alloc_urbs(struct iphe
+ if (tx_buf == NULL)
+ goto free_rx_urb;
+
+- rx_buf = usb_alloc_coherent(iphone->udev, IPHETH_BUF_SIZE,
++ rx_buf = usb_alloc_coherent(iphone->udev, IPHETH_BUF_SIZE + IPHETH_IP_ALIGN,
+ GFP_KERNEL, &rx_urb->transfer_dma);
+ if (rx_buf == NULL)
+ goto free_tx_buf;
+@@ -146,7 +146,7 @@ error_nomem:
+
+ static void ipheth_free_urbs(struct ipheth_device *iphone)
+ {
+- usb_free_coherent(iphone->udev, IPHETH_BUF_SIZE, iphone->rx_buf,
++ usb_free_coherent(iphone->udev, IPHETH_BUF_SIZE + IPHETH_IP_ALIGN, iphone->rx_buf,
+ iphone->rx_urb->transfer_dma);
+ usb_free_coherent(iphone->udev, IPHETH_BUF_SIZE, iphone->tx_buf,
+ iphone->tx_urb->transfer_dma);
+@@ -317,7 +317,7 @@ static int ipheth_rx_submit(struct iphet
+
+ usb_fill_bulk_urb(dev->rx_urb, udev,
+ usb_rcvbulkpipe(udev, dev->bulk_in),
+- dev->rx_buf, IPHETH_BUF_SIZE,
++ dev->rx_buf, IPHETH_BUF_SIZE + IPHETH_IP_ALIGN,
+ ipheth_rcvbulk_callback,
+ dev);
+ dev->rx_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
--- /dev/null
+From d19a7af73b5ecaac8168712d18be72b9db166768 Mon Sep 17 00:00:00 2001
+From: "J. Bruce Fields" <bfields@redhat.com>
+Date: Tue, 18 Jan 2022 17:00:51 -0500
+Subject: lockd: fix failure to cleanup client locks
+
+From: J. Bruce Fields <bfields@redhat.com>
+
+commit d19a7af73b5ecaac8168712d18be72b9db166768 upstream.
+
+In my testing, we're sometimes hitting the request->fl_flags & FL_EXISTS
+case in posix_lock_inode, presumably just by random luck since we're not
+actually initializing fl_flags here.
+
+This probably didn't matter before commit 7f024fcd5c97 ("Keep read and
+write fds with each nlm_file") since we wouldn't previously unlock
+unless we knew there were locks.
+
+But now it causes lockd to give up on removing more locks.
+
+We could just initialize fl_flags, but really it seems dubious to be
+calling vfs_lock_file with random values in some of the fields.
+
+Fixes: 7f024fcd5c97 ("Keep read and write fds with each nlm_file")
+Signed-off-by: J. Bruce Fields <bfields@redhat.com>
+[ cel: fixed checkpatch.pl nit ]
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/lockd/svcsubs.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/lockd/svcsubs.c
++++ b/fs/lockd/svcsubs.c
+@@ -180,6 +180,7 @@ static int nlm_unlock_files(struct nlm_f
+ {
+ struct file_lock lock;
+
++ locks_init_lock(&lock);
+ lock.fl_type = F_UNLCK;
+ lock.fl_start = 0;
+ lock.fl_end = OFFSET_MAX;
--- /dev/null
+From 6e7f90d163afa8fc2efd6ae318e7c20156a5621f Mon Sep 17 00:00:00 2001
+From: "J. Bruce Fields" <bfields@redhat.com>
+Date: Tue, 18 Jan 2022 17:00:16 -0500
+Subject: lockd: fix server crash on reboot of client holding lock
+
+From: J. Bruce Fields <bfields@redhat.com>
+
+commit 6e7f90d163afa8fc2efd6ae318e7c20156a5621f upstream.
+
+I thought I was iterating over the array when actually the iteration is
+over the values contained in the array?
+
+Ugh, keep it simple.
+
+Symptoms were a null deference in vfs_lock_file() when an NFSv3 client
+that previously held a lock came back up and sent a notify.
+
+Reported-by: Jonathan Woithe <jwoithe@just42.net>
+Fixes: 7f024fcd5c97 ("Keep read and write fds with each nlm_file")
+Signed-off-by: J. Bruce Fields <bfields@redhat.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/lockd/svcsubs.c | 17 +++++++++--------
+ 1 file changed, 9 insertions(+), 8 deletions(-)
+
+--- a/fs/lockd/svcsubs.c
++++ b/fs/lockd/svcsubs.c
+@@ -179,19 +179,20 @@ nlm_delete_file(struct nlm_file *file)
+ static int nlm_unlock_files(struct nlm_file *file)
+ {
+ struct file_lock lock;
+- struct file *f;
+
+ lock.fl_type = F_UNLCK;
+ lock.fl_start = 0;
+ lock.fl_end = OFFSET_MAX;
+- for (f = file->f_file[0]; f <= file->f_file[1]; f++) {
+- if (f && vfs_lock_file(f, F_SETLK, &lock, NULL) < 0) {
+- pr_warn("lockd: unlock failure in %s:%d\n",
+- __FILE__, __LINE__);
+- return 1;
+- }
+- }
++ if (file->f_file[O_RDONLY] &&
++ vfs_lock_file(file->f_file[O_RDONLY], F_SETLK, &lock, NULL))
++ goto out_err;
++ if (file->f_file[O_WRONLY] &&
++ vfs_lock_file(file->f_file[O_WRONLY], F_SETLK, &lock, NULL))
++ goto out_err;
+ return 0;
++out_err:
++ pr_warn("lockd: unlock failure in %s:%d\n", __FILE__, __LINE__);
++ return 1;
+ }
+
+ /*
--- /dev/null
+From 7674b7b559b683478c3832527c59bceb169e701d Mon Sep 17 00:00:00 2001
+From: Raju Rangoju <Raju.Rangoju@amd.com>
+Date: Thu, 27 Jan 2022 11:32:22 +0530
+Subject: net: amd-xgbe: ensure to reset the tx_timer_active flag
+
+From: Raju Rangoju <Raju.Rangoju@amd.com>
+
+commit 7674b7b559b683478c3832527c59bceb169e701d upstream.
+
+Ensure to reset the tx_timer_active flag in xgbe_stop(),
+otherwise a port restart may result in tx timeout due to
+uncleared flag.
+
+Fixes: c635eaacbf77 ("amd-xgbe: Remove Tx coalescing")
+Co-developed-by: Sudheesh Mavila <sudheesh.mavila@amd.com>
+Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com>
+Signed-off-by: Raju Rangoju <Raju.Rangoju@amd.com>
+Acked-by: Tom Lendacky <thomas.lendacky@amd.com>
+Link: https://lore.kernel.org/r/20220127060222.453371-1-Raju.Rangoju@amd.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
++++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+@@ -721,7 +721,9 @@ static void xgbe_stop_timers(struct xgbe
+ if (!channel->tx_ring)
+ break;
+
++ /* Deactivate the Tx timer */
+ del_timer_sync(&channel->tx_timer);
++ channel->tx_timer_active = 0;
+ }
+ }
+
--- /dev/null
+From 5aac9108a180fc06e28d4e7fb00247ce603b72ee Mon Sep 17 00:00:00 2001
+From: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
+Date: Thu, 27 Jan 2022 14:50:03 +0530
+Subject: net: amd-xgbe: Fix skb data length underflow
+
+From: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
+
+commit 5aac9108a180fc06e28d4e7fb00247ce603b72ee upstream.
+
+There will be BUG_ON() triggered in include/linux/skbuff.h leading to
+intermittent kernel panic, when the skb length underflow is detected.
+
+Fix this by dropping the packet if such length underflows are seen
+because of inconsistencies in the hardware descriptors.
+
+Fixes: 622c36f143fc ("amd-xgbe: Fix jumbo MTU processing on newer hardware")
+Suggested-by: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
+Acked-by: Tom Lendacky <thomas.lendacky@amd.com>
+Link: https://lore.kernel.org/r/20220127092003.2812745-1-Shyam-sundar.S-k@amd.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 12 +++++++++++-
+ 1 file changed, 11 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
++++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+@@ -2555,6 +2555,14 @@ read_again:
+ buf2_len = xgbe_rx_buf2_len(rdata, packet, len);
+ len += buf2_len;
+
++ if (buf2_len > rdata->rx.buf.dma_len) {
++ /* Hardware inconsistency within the descriptors
++ * that has resulted in a length underflow.
++ */
++ error = 1;
++ goto skip_data;
++ }
++
+ if (!skb) {
+ skb = xgbe_create_skb(pdata, napi, rdata,
+ buf1_len);
+@@ -2584,8 +2592,10 @@ skip_data:
+ if (!last || context_next)
+ goto read_again;
+
+- if (!skb)
++ if (!skb || error) {
++ dev_kfree_skb(skb);
+ goto next_packet;
++ }
+
+ /* Be sure we don't exceed the configured MTU */
+ max_len = netdev->mtu + ETH_HLEN;
--- /dev/null
+From 34a081761e4e3c35381cbfad609ebae2962fe2f8 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@linaro.org>
+Date: Tue, 1 Feb 2022 09:02:05 -0600
+Subject: net: ipa: request IPA register values be retained
+
+From: Alex Elder <elder@linaro.org>
+
+commit 34a081761e4e3c35381cbfad609ebae2962fe2f8 upstream.
+
+In some cases, the IPA hardware needs to request the always-on
+subsystem (AOSS) to coordinate with the IPA microcontroller to
+retain IPA register values at power collapse. This is done by
+issuing a QMP request to the AOSS microcontroller. A similar
+request ondoes that request.
+
+We must get and hold the "QMP" handle early, because we might get
+back EPROBE_DEFER for that. But the actual request should be sent
+while we know the IPA clock is active, and when we know the
+microcontroller is operational.
+
+Fixes: 1aac309d3207 ("net: ipa: use autosuspend")
+Signed-off-by: Alex Elder <elder@linaro.org>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ipa/ipa_power.c | 52 ++++++++++++++++++++++++++++++++++++++++++++
+ drivers/net/ipa/ipa_power.h | 7 +++++
+ drivers/net/ipa/ipa_uc.c | 5 ++++
+ 3 files changed, 64 insertions(+)
+
+--- a/drivers/net/ipa/ipa_power.c
++++ b/drivers/net/ipa/ipa_power.c
+@@ -11,6 +11,8 @@
+ #include <linux/pm_runtime.h>
+ #include <linux/bitops.h>
+
++#include "linux/soc/qcom/qcom_aoss.h"
++
+ #include "ipa.h"
+ #include "ipa_power.h"
+ #include "ipa_endpoint.h"
+@@ -64,6 +66,7 @@ enum ipa_power_flag {
+ * struct ipa_power - IPA power management information
+ * @dev: IPA device pointer
+ * @core: IPA core clock
++ * @qmp: QMP handle for AOSS communication
+ * @spinlock: Protects modem TX queue enable/disable
+ * @flags: Boolean state flags
+ * @interconnect_count: Number of elements in interconnect[]
+@@ -72,6 +75,7 @@ enum ipa_power_flag {
+ struct ipa_power {
+ struct device *dev;
+ struct clk *core;
++ struct qmp *qmp;
+ spinlock_t spinlock; /* used with STOPPED/STARTED power flags */
+ DECLARE_BITMAP(flags, IPA_POWER_FLAG_COUNT);
+ u32 interconnect_count;
+@@ -382,6 +386,47 @@ void ipa_power_modem_queue_active(struct
+ clear_bit(IPA_POWER_FLAG_STARTED, ipa->power->flags);
+ }
+
++static int ipa_power_retention_init(struct ipa_power *power)
++{
++ struct qmp *qmp = qmp_get(power->dev);
++
++ if (IS_ERR(qmp)) {
++ if (PTR_ERR(qmp) == -EPROBE_DEFER)
++ return -EPROBE_DEFER;
++
++ /* We assume any other error means it's not defined/needed */
++ qmp = NULL;
++ }
++ power->qmp = qmp;
++
++ return 0;
++}
++
++static void ipa_power_retention_exit(struct ipa_power *power)
++{
++ qmp_put(power->qmp);
++ power->qmp = NULL;
++}
++
++/* Control register retention on power collapse */
++void ipa_power_retention(struct ipa *ipa, bool enable)
++{
++ static const char fmt[] = "{ class: bcm, res: ipa_pc, val: %c }";
++ struct ipa_power *power = ipa->power;
++ char buf[36]; /* Exactly enough for fmt[]; size a multiple of 4 */
++ int ret;
++
++ if (!power->qmp)
++ return; /* Not needed on this platform */
++
++ (void)snprintf(buf, sizeof(buf), fmt, enable ? '1' : '0');
++
++ ret = qmp_send(power->qmp, buf, sizeof(buf));
++ if (ret)
++ dev_err(power->dev, "error %d sending QMP %sable request\n",
++ ret, enable ? "en" : "dis");
++}
++
+ int ipa_power_setup(struct ipa *ipa)
+ {
+ int ret;
+@@ -438,12 +483,18 @@ ipa_power_init(struct device *dev, const
+ if (ret)
+ goto err_kfree;
+
++ ret = ipa_power_retention_init(power);
++ if (ret)
++ goto err_interconnect_exit;
++
+ pm_runtime_set_autosuspend_delay(dev, IPA_AUTOSUSPEND_DELAY);
+ pm_runtime_use_autosuspend(dev);
+ pm_runtime_enable(dev);
+
+ return power;
+
++err_interconnect_exit:
++ ipa_interconnect_exit(power);
+ err_kfree:
+ kfree(power);
+ err_clk_put:
+@@ -460,6 +511,7 @@ void ipa_power_exit(struct ipa_power *po
+
+ pm_runtime_disable(dev);
+ pm_runtime_dont_use_autosuspend(dev);
++ ipa_power_retention_exit(power);
+ ipa_interconnect_exit(power);
+ kfree(power);
+ clk_put(clk);
+--- a/drivers/net/ipa/ipa_power.h
++++ b/drivers/net/ipa/ipa_power.h
+@@ -41,6 +41,13 @@ void ipa_power_modem_queue_wake(struct i
+ void ipa_power_modem_queue_active(struct ipa *ipa);
+
+ /**
++ * ipa_power_retention() - Control register retention on power collapse
++ * @ipa: IPA pointer
++ * @enable: Whether retention should be enabled or disabled
++ */
++void ipa_power_retention(struct ipa *ipa, bool enable);
++
++/**
+ * ipa_power_setup() - Set up IPA power management
+ * @ipa: IPA pointer
+ *
+--- a/drivers/net/ipa/ipa_uc.c
++++ b/drivers/net/ipa/ipa_uc.c
+@@ -11,6 +11,7 @@
+
+ #include "ipa.h"
+ #include "ipa_uc.h"
++#include "ipa_power.h"
+
+ /**
+ * DOC: The IPA embedded microcontroller
+@@ -154,6 +155,7 @@ static void ipa_uc_response_hdlr(struct
+ case IPA_UC_RESPONSE_INIT_COMPLETED:
+ if (ipa->uc_powered) {
+ ipa->uc_loaded = true;
++ ipa_power_retention(ipa, true);
+ pm_runtime_mark_last_busy(dev);
+ (void)pm_runtime_put_autosuspend(dev);
+ ipa->uc_powered = false;
+@@ -184,6 +186,9 @@ void ipa_uc_deconfig(struct ipa *ipa)
+
+ ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_UC_1);
+ ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_UC_0);
++ if (ipa->uc_loaded)
++ ipa_power_retention(ipa, false);
++
+ if (!ipa->uc_powered)
+ return;
+
--- /dev/null
+From 350d9a823734b5a7e767cddc3bdde5f0bcbb7ff4 Mon Sep 17 00:00:00 2001
+From: Vlad Buslov <vladbu@nvidia.com>
+Date: Thu, 6 Jan 2022 18:45:26 +0200
+Subject: net/mlx5: Bridge, ensure dev_name is null-terminated
+
+From: Vlad Buslov <vladbu@nvidia.com>
+
+commit 350d9a823734b5a7e767cddc3bdde5f0bcbb7ff4 upstream.
+
+Even though net_device->name is guaranteed to be null-terminated string of
+size<=IFNAMSIZ, the test robot complains that return value of netdev_name()
+can be larger:
+
+In file included from include/trace/define_trace.h:102,
+ from drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h:113,
+ from drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c:12:
+ drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h: In function 'trace_event_raw_event_mlx5_esw_bridge_fdb_template':
+>> drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h:24:29: warning: 'strncpy' output may be truncated copying 16 bytes from a string of length 20 [-Wstringop-truncation]
+ 24 | strncpy(__entry->dev_name,
+ | ^~~~~~~~~~~~~~~~~~~~~~~~~~
+ 25 | netdev_name(fdb->dev),
+ | ~~~~~~~~~~~~~~~~~~~~~~
+ 26 | IFNAMSIZ);
+ | ~~~~~~~~~
+
+This is caused by the fact that default value of IFNAMSIZ is 16, while
+placeholder value that is returned by netdev_name() for unnamed net devices
+is larger than that.
+
+The offending code is in a tracing function that is only called for mlx5
+representors, so there is no straightforward way to reproduce the issue but
+let's fix it for correctness sake by replacing strncpy() with strscpy() to
+ensure that resulting string is always null-terminated.
+
+Fixes: 9724fd5d9c2a ("net/mlx5: Bridge, add tracepoints")
+Reported-by: kernel test robot <lkp@intel.com>
+Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
+Reviewed-by: Roi Dayan <roid@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h
+@@ -21,7 +21,7 @@ DECLARE_EVENT_CLASS(mlx5_esw_bridge_fdb_
+ __field(unsigned int, used)
+ ),
+ TP_fast_assign(
+- strncpy(__entry->dev_name,
++ strscpy(__entry->dev_name,
+ netdev_name(fdb->dev),
+ IFNAMSIZ);
+ memcpy(__entry->addr, fdb->key.addr, ETH_ALEN);
--- /dev/null
+From 880b517691908fb753019b9b27cd082e7617debd Mon Sep 17 00:00:00 2001
+From: Roi Dayan <roid@nvidia.com>
+Date: Mon, 24 Jan 2022 13:56:26 +0200
+Subject: net/mlx5: Bridge, Fix devlink deadlock on net namespace deletion
+
+From: Roi Dayan <roid@nvidia.com>
+
+commit 880b517691908fb753019b9b27cd082e7617debd upstream.
+
+When changing mode to switchdev, rep bridge init registered to netdevice
+notifier holds the devlink lock and then takes pernet_ops_rwsem.
+At that time deleting a netns holds pernet_ops_rwsem and then takes
+the devlink lock.
+
+Example sequence is:
+$ ip netns add foo
+$ devlink dev eswitch set pci/0000:00:08.0 mode switchdev &
+$ ip netns del foo
+
+deleting netns trace:
+
+[ 1185.365555] ? devlink_pernet_pre_exit+0x74/0x1c0
+[ 1185.368331] ? mutex_lock_io_nested+0x13f0/0x13f0
+[ 1185.370984] ? xt_find_table+0x40/0x100
+[ 1185.373244] ? __mutex_lock+0x24a/0x15a0
+[ 1185.375494] ? net_generic+0xa0/0x1c0
+[ 1185.376844] ? wait_for_completion_io+0x280/0x280
+[ 1185.377767] ? devlink_pernet_pre_exit+0x74/0x1c0
+[ 1185.378686] devlink_pernet_pre_exit+0x74/0x1c0
+[ 1185.379579] ? devlink_nl_cmd_get_dumpit+0x3a0/0x3a0
+[ 1185.380557] ? xt_find_table+0xda/0x100
+[ 1185.381367] cleanup_net+0x372/0x8e0
+
+changing mode to switchdev trace:
+
+[ 1185.411267] down_write+0x13a/0x150
+[ 1185.412029] ? down_write_killable+0x180/0x180
+[ 1185.413005] register_netdevice_notifier+0x1e/0x210
+[ 1185.414000] mlx5e_rep_bridge_init+0x181/0x360 [mlx5_core]
+[ 1185.415243] mlx5e_uplink_rep_enable+0x269/0x480 [mlx5_core]
+[ 1185.416464] ? mlx5e_uplink_rep_disable+0x210/0x210 [mlx5_core]
+[ 1185.417749] mlx5e_attach_netdev+0x232/0x400 [mlx5_core]
+[ 1185.418906] mlx5e_netdev_attach_profile+0x15b/0x1e0 [mlx5_core]
+[ 1185.420172] mlx5e_netdev_change_profile+0x15a/0x1d0 [mlx5_core]
+[ 1185.421459] mlx5e_vport_rep_load+0x557/0x780 [mlx5_core]
+[ 1185.422624] ? mlx5e_stats_grp_vport_rep_num_stats+0x10/0x10 [mlx5_core]
+[ 1185.424006] mlx5_esw_offloads_rep_load+0xdb/0x190 [mlx5_core]
+[ 1185.425277] esw_offloads_enable+0xd74/0x14a0 [mlx5_core]
+
+Fix this by registering rep bridges for per net netdev notifier
+instead of global one, which operats on the net namespace without holding
+the pernet_ops_rwsem.
+
+Fixes: 19e9bfa044f3 ("net/mlx5: Bridge, add offload infrastructure")
+Signed-off-by: Roi Dayan <roid@nvidia.com>
+Reviewed-by: Vlad Buslov <vladbu@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
+@@ -491,7 +491,7 @@ void mlx5e_rep_bridge_init(struct mlx5e_
+ }
+
+ br_offloads->netdev_nb.notifier_call = mlx5_esw_bridge_switchdev_port_event;
+- err = register_netdevice_notifier(&br_offloads->netdev_nb);
++ err = register_netdevice_notifier_net(&init_net, &br_offloads->netdev_nb);
+ if (err) {
+ esw_warn(mdev, "Failed to register bridge offloads netdevice notifier (err=%d)\n",
+ err);
+@@ -526,7 +526,7 @@ void mlx5e_rep_bridge_cleanup(struct mlx
+ return;
+
+ cancel_delayed_work_sync(&br_offloads->update_work);
+- unregister_netdevice_notifier(&br_offloads->netdev_nb);
++ unregister_netdevice_notifier_net(&init_net, &br_offloads->netdev_nb);
+ unregister_switchdev_blocking_notifier(&br_offloads->nb_blk);
+ unregister_switchdev_notifier(&br_offloads->nb);
+ destroy_workqueue(br_offloads->wq);
--- /dev/null
+From 04f8c12f031fcd0ffa0c72822eb665ceb2c872e7 Mon Sep 17 00:00:00 2001
+From: Vlad Buslov <vladbu@nvidia.com>
+Date: Thu, 6 Jan 2022 16:40:18 +0200
+Subject: net/mlx5: Bridge, take rtnl lock in init error handler
+
+From: Vlad Buslov <vladbu@nvidia.com>
+
+commit 04f8c12f031fcd0ffa0c72822eb665ceb2c872e7 upstream.
+
+The mlx5_esw_bridge_cleanup() is expected to be called with rtnl lock
+taken, which is true for mlx5e_rep_bridge_cleanup() function but not for
+error handling code in mlx5e_rep_bridge_init(). Add missing rtnl
+lock/unlock calls and extend both mlx5_esw_bridge_cleanup() and its dual
+function mlx5_esw_bridge_init() with ASSERT_RTNL() to verify the invariant
+from now on.
+
+Fixes: 7cd6a54a8285 ("net/mlx5: Bridge, handle FDB events")
+Fixes: 19e9bfa044f3 ("net/mlx5: Bridge, add offload infrastructure")
+Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
+Reviewed-by: Roi Dayan <roid@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c | 2 ++
+ drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c | 4 ++++
+ 2 files changed, 6 insertions(+)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
+@@ -509,7 +509,9 @@ err_register_swdev_blk:
+ err_register_swdev:
+ destroy_workqueue(br_offloads->wq);
+ err_alloc_wq:
++ rtnl_lock();
+ mlx5_esw_bridge_cleanup(esw);
++ rtnl_unlock();
+ }
+
+ void mlx5e_rep_bridge_cleanup(struct mlx5e_priv *priv)
+--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
+@@ -1574,6 +1574,8 @@ struct mlx5_esw_bridge_offloads *mlx5_es
+ {
+ struct mlx5_esw_bridge_offloads *br_offloads;
+
++ ASSERT_RTNL();
++
+ br_offloads = kvzalloc(sizeof(*br_offloads), GFP_KERNEL);
+ if (!br_offloads)
+ return ERR_PTR(-ENOMEM);
+@@ -1590,6 +1592,8 @@ void mlx5_esw_bridge_cleanup(struct mlx5
+ {
+ struct mlx5_esw_bridge_offloads *br_offloads = esw->br_offloads;
+
++ ASSERT_RTNL();
++
+ if (!br_offloads)
+ return;
+
--- /dev/null
+From d8e5883d694bb053b19c4142a2d1f43a34f6fe2c Mon Sep 17 00:00:00 2001
+From: Maor Dickman <maord@nvidia.com>
+Date: Sun, 30 Jan 2022 16:00:41 +0200
+Subject: net/mlx5: E-Switch, Fix uninitialized variable modact
+
+From: Maor Dickman <maord@nvidia.com>
+
+commit d8e5883d694bb053b19c4142a2d1f43a34f6fe2c upstream.
+
+The variable modact is not initialized before used in command
+modify header allocation which can cause command to fail.
+
+Fix by initializing modact with zeros.
+
+Addresses-Coverity: ("Uninitialized scalar variable")
+Fixes: 8f1e0b97cc70 ("net/mlx5: E-Switch, Mark miss packets with new chain id mapping")
+Signed-off-by: Maor Dickman <maord@nvidia.com>
+Reviewed-by: Roi Dayan <roid@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
+@@ -212,7 +212,7 @@ static int
+ create_chain_restore(struct fs_chain *chain)
+ {
+ struct mlx5_eswitch *esw = chain->chains->dev->priv.eswitch;
+- char modact[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)];
++ u8 modact[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
+ struct mlx5_fs_chains *chains = chain->chains;
+ enum mlx5e_tc_attr_to_reg chain_to_reg;
+ struct mlx5_modify_hdr *mod_hdr;
--- /dev/null
+From 55b2ca702cfa744a9eb108915996a2294da47e71 Mon Sep 17 00:00:00 2001
+From: Dima Chumak <dchumak@nvidia.com>
+Date: Mon, 17 Jan 2022 15:32:16 +0200
+Subject: net/mlx5: Fix offloading with ESWITCH_IPV4_TTL_MODIFY_ENABLE
+
+From: Dima Chumak <dchumak@nvidia.com>
+
+commit 55b2ca702cfa744a9eb108915996a2294da47e71 upstream.
+
+Only prio 1 is supported for nic mode when there is no ignore flow level
+support in firmware. But for switchdev mode, which supports fixed number
+of statically pre-allocated prios, this restriction is not relevant so
+it can be relaxed.
+
+Fixes: d671e109bd85 ("net/mlx5: Fix tc max supported prio for nic mode")
+Signed-off-by: Dima Chumak <dchumak@nvidia.com>
+Reviewed-by: Roi Dayan <roid@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
+@@ -121,12 +121,13 @@ u32 mlx5_chains_get_nf_ft_chain(struct m
+
+ u32 mlx5_chains_get_prio_range(struct mlx5_fs_chains *chains)
+ {
+- if (!mlx5_chains_prios_supported(chains))
+- return 1;
+-
+ if (mlx5_chains_ignore_flow_level_supported(chains))
+ return UINT_MAX;
+
++ if (!chains->dev->priv.eswitch ||
++ chains->dev->priv.eswitch->mode != MLX5_ESWITCH_OFFLOADS)
++ return 1;
++
+ /* We should get here only for eswitch case */
+ return FDB_TC_MAX_PRIO;
+ }
--- /dev/null
+From 3c5193a87b0fea090aa3f769d020337662d87b5e Mon Sep 17 00:00:00 2001
+From: Maher Sanalla <msanalla@nvidia.com>
+Date: Thu, 13 Jan 2022 15:48:48 +0200
+Subject: net/mlx5: Use del_timer_sync in fw reset flow of halting poll
+
+From: Maher Sanalla <msanalla@nvidia.com>
+
+commit 3c5193a87b0fea090aa3f769d020337662d87b5e upstream.
+
+Substitute del_timer() with del_timer_sync() in fw reset polling
+deactivation flow, in order to prevent a race condition which occurs
+when del_timer() is called and timer is deactivated while another
+process is handling the timer interrupt. A situation that led to
+the following call trace:
+ RIP: 0010:run_timer_softirq+0x137/0x420
+ <IRQ>
+ recalibrate_cpu_khz+0x10/0x10
+ ktime_get+0x3e/0xa0
+ ? sched_clock_cpu+0xb/0xc0
+ __do_softirq+0xf5/0x2ea
+ irq_exit_rcu+0xc1/0xf0
+ sysvec_apic_timer_interrupt+0x9e/0xc0
+ asm_sysvec_apic_timer_interrupt+0x12/0x20
+ </IRQ>
+
+Fixes: 38b9f903f22b ("net/mlx5: Handle sync reset request event")
+Signed-off-by: Maher Sanalla <msanalla@nvidia.com>
+Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
+@@ -132,7 +132,7 @@ static void mlx5_stop_sync_reset_poll(st
+ {
+ struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+- del_timer(&fw_reset->timer);
++ del_timer_sync(&fw_reset->timer);
+ }
+
+ static void mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health)
--- /dev/null
+From ad5185735f7dab342fdd0dd41044da4c9ccfef67 Mon Sep 17 00:00:00 2001
+From: Kees Cook <keescook@chromium.org>
+Date: Mon, 24 Jan 2022 09:20:28 -0800
+Subject: net/mlx5e: Avoid field-overflowing memcpy()
+
+From: Kees Cook <keescook@chromium.org>
+
+commit ad5185735f7dab342fdd0dd41044da4c9ccfef67 upstream.
+
+In preparation for FORTIFY_SOURCE performing compile-time and run-time
+field bounds checking for memcpy(), memmove(), and memset(), avoid
+intentionally writing across neighboring fields.
+
+Use flexible arrays instead of zero-element arrays (which look like they
+are always overflowing) and split the cross-field memcpy() into two halves
+that can be appropriately bounds-checked by the compiler.
+
+We were doing:
+
+ #define ETH_HLEN 14
+ #define VLAN_HLEN 4
+ ...
+ #define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
+ ...
+ struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+ ...
+ struct mlx5_wqe_eth_seg *eseg = &wqe->eth;
+ struct mlx5_wqe_data_seg *dseg = wqe->data;
+ ...
+ memcpy(eseg->inline_hdr.start, xdptxd->data, MLX5E_XDP_MIN_INLINE);
+
+target is wqe->eth.inline_hdr.start (which the compiler sees as being
+2 bytes in size), but copying 18, intending to write across start
+(really vlan_tci, 2 bytes). The remaining 16 bytes get written into
+wqe->data[0], covering byte_count (4 bytes), lkey (4 bytes), and addr
+(8 bytes).
+
+struct mlx5e_tx_wqe {
+ struct mlx5_wqe_ctrl_seg ctrl; /* 0 16 */
+ struct mlx5_wqe_eth_seg eth; /* 16 16 */
+ struct mlx5_wqe_data_seg data[]; /* 32 0 */
+
+ /* size: 32, cachelines: 1, members: 3 */
+ /* last cacheline: 32 bytes */
+};
+
+struct mlx5_wqe_eth_seg {
+ u8 swp_outer_l4_offset; /* 0 1 */
+ u8 swp_outer_l3_offset; /* 1 1 */
+ u8 swp_inner_l4_offset; /* 2 1 */
+ u8 swp_inner_l3_offset; /* 3 1 */
+ u8 cs_flags; /* 4 1 */
+ u8 swp_flags; /* 5 1 */
+ __be16 mss; /* 6 2 */
+ __be32 flow_table_metadata; /* 8 4 */
+ union {
+ struct {
+ __be16 sz; /* 12 2 */
+ u8 start[2]; /* 14 2 */
+ } inline_hdr; /* 12 4 */
+ struct {
+ __be16 type; /* 12 2 */
+ __be16 vlan_tci; /* 14 2 */
+ } insert; /* 12 4 */
+ __be32 trailer; /* 12 4 */
+ }; /* 12 4 */
+
+ /* size: 16, cachelines: 1, members: 9 */
+ /* last cacheline: 16 bytes */
+};
+
+struct mlx5_wqe_data_seg {
+ __be32 byte_count; /* 0 4 */
+ __be32 lkey; /* 4 4 */
+ __be64 addr; /* 8 8 */
+
+ /* size: 16, cachelines: 1, members: 3 */
+ /* last cacheline: 16 bytes */
+};
+
+So, split the memcpy() so the compiler can reason about the buffer
+sizes.
+
+"pahole" shows no size nor member offset changes to struct mlx5e_tx_wqe
+nor struct mlx5e_umr_wqe. "objdump -d" shows no meaningful object
+code changes (i.e. only source line number induced differences and
+optimizations).
+
+Fixes: b5503b994ed5 ("net/mlx5e: XDP TX forwarding support")
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en.h | 6 +++---
+ drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c | 4 +++-
+ 2 files changed, 6 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
+@@ -225,7 +225,7 @@ static inline int mlx5e_get_max_num_chan
+ struct mlx5e_tx_wqe {
+ struct mlx5_wqe_ctrl_seg ctrl;
+ struct mlx5_wqe_eth_seg eth;
+- struct mlx5_wqe_data_seg data[0];
++ struct mlx5_wqe_data_seg data[];
+ };
+
+ struct mlx5e_rx_wqe_ll {
+@@ -242,8 +242,8 @@ struct mlx5e_umr_wqe {
+ struct mlx5_wqe_umr_ctrl_seg uctrl;
+ struct mlx5_mkey_seg mkc;
+ union {
+- struct mlx5_mtt inline_mtts[0];
+- struct mlx5_klm inline_klms[0];
++ DECLARE_FLEX_ARRAY(struct mlx5_mtt, inline_mtts);
++ DECLARE_FLEX_ARRAY(struct mlx5_klm, inline_klms);
+ };
+ };
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+@@ -341,8 +341,10 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq
+
+ /* copy the inline part if required */
+ if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
+- memcpy(eseg->inline_hdr.start, xdptxd->data, MLX5E_XDP_MIN_INLINE);
++ memcpy(eseg->inline_hdr.start, xdptxd->data, sizeof(eseg->inline_hdr.start));
+ eseg->inline_hdr.sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE);
++ memcpy(dseg, xdptxd->data + sizeof(eseg->inline_hdr.start),
++ MLX5E_XDP_MIN_INLINE - sizeof(eseg->inline_hdr.start));
+ dma_len -= MLX5E_XDP_MIN_INLINE;
+ dma_addr += MLX5E_XDP_MIN_INLINE;
+ dseg++;
--- /dev/null
+From 5b209d1a22afabfb7d644abb10510c5713a3e569 Mon Sep 17 00:00:00 2001
+From: Roi Dayan <roid@nvidia.com>
+Date: Tue, 1 Feb 2022 15:27:48 +0200
+Subject: net/mlx5e: Avoid implicit modify hdr for decap drop rule
+
+From: Roi Dayan <roid@nvidia.com>
+
+commit 5b209d1a22afabfb7d644abb10510c5713a3e569 upstream.
+
+Currently the driver adds implicit modify hdr action for
+decap rules on tunnel devices if the port is an ovs port.
+This is also done if the action is drop and makes the modify
+hdr redundant and also the FW doesn't support it and will generate
+a syndrome.
+
+kernel: mlx5_core 0000:08:00.0: mlx5_cmd_check:777:(pid 102063): SET_FLOW_TABLE_ENTRY(0x936) op_mod(0x0) failed, status bad parameter(0x3), syndrome (0x8708c3)
+
+Fix it by adding the implicit modify hdr only for fwd actions.
+
+Fixes: b16eb3c81fe2 ("net/mlx5: Support internal port as decap route device")
+Fixes: 077cdda764c7 ("net/mlx5e: TC, Fix memory leak with rules with internal port")
+Signed-off-by: Roi Dayan <roid@nvidia.com>
+Reviewed-by: Ariel Levkovich <lariel@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+@@ -1425,7 +1425,8 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv
+ if (err)
+ goto err_out;
+
+- if (!attr->chain && esw_attr->int_port) {
++ if (!attr->chain && esw_attr->int_port &&
++ attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+ /* If decap route device is internal port, change the
+ * source vport value in reg_c0 back to uplink just in
+ * case the rule performs goto chain > 0. If we have a miss
--- /dev/null
+From 736dfe4e68b868829a1e89dfef4a44c1580d4478 Mon Sep 17 00:00:00 2001
+From: Maxim Mikityanskiy <maximmi@nvidia.com>
+Date: Tue, 18 Jan 2022 13:31:54 +0200
+Subject: net/mlx5e: Don't treat small ceil values as unlimited in HTB offload
+
+From: Maxim Mikityanskiy <maximmi@nvidia.com>
+
+commit 736dfe4e68b868829a1e89dfef4a44c1580d4478 upstream.
+
+The hardware spec defines max_average_bw == 0 as "unlimited bandwidth".
+max_average_bw is calculated as `ceil / BYTES_IN_MBIT`, which can become
+0 when ceil is small, leading to an undesired effect of having no
+bandwidth limit.
+
+This commit fixes it by rounding up small values of ceil to 1 Mbit/s.
+
+Fixes: 214baf22870c ("net/mlx5e: Support HTB offload")
+Signed-off-by: Maxim Mikityanskiy <maximmi@nvidia.com>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en/qos.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
+@@ -569,7 +569,8 @@ static int mlx5e_htb_convert_rate(struct
+
+ static void mlx5e_htb_convert_ceil(struct mlx5e_priv *priv, u64 ceil, u32 *max_average_bw)
+ {
+- *max_average_bw = div_u64(ceil, BYTES_IN_MBIT);
++ /* Hardware treats 0 as "unlimited", set at least 1. */
++ *max_average_bw = max_t(u32, div_u64(ceil, BYTES_IN_MBIT), 1);
+
+ qos_dbg(priv->mdev, "Convert: ceil %llu -> max_average_bw %u\n",
+ ceil, *max_average_bw);
--- /dev/null
+From 7957837b816f11eecb9146235bb0715478f4c81f Mon Sep 17 00:00:00 2001
+From: Khalid Manaa <khalidm@nvidia.com>
+Date: Wed, 26 Jan 2022 14:25:55 +0200
+Subject: net/mlx5e: Fix broken SKB allocation in HW-GRO
+
+From: Khalid Manaa <khalidm@nvidia.com>
+
+commit 7957837b816f11eecb9146235bb0715478f4c81f upstream.
+
+In case the HW doesn't perform header-data split, it will write the whole
+packet into the data buffer in the WQ, in this case the SHAMPO CQE handler
+couldn't use the header entry to build the SKB, instead it should allocate
+a new memory to build the SKB using the function:
+mlx5e_skb_from_cqe_mpwrq_nonlinear.
+
+Fixes: f97d5c2a453e ("net/mlx5e: Add handle SHAMPO cqe support")
+Signed-off-by: Khalid Manaa <khalidm@nvidia.com>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 26 +++++++++++++++---------
+ 1 file changed, 17 insertions(+), 9 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+@@ -1866,7 +1866,7 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct m
+ return skb;
+ }
+
+-static void
++static struct sk_buff *
+ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
+ struct mlx5_cqe64 *cqe, u16 header_index)
+ {
+@@ -1890,7 +1890,7 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_r
+ skb = mlx5e_build_linear_skb(rq, hdr, frag_size, rx_headroom, head_size);
+
+ if (unlikely(!skb))
+- return;
++ return NULL;
+
+ /* queue up for recycling/reuse */
+ page_ref_inc(head->page);
+@@ -1902,7 +1902,7 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_r
+ ALIGN(head_size, sizeof(long)));
+ if (unlikely(!skb)) {
+ rq->stats->buff_alloc_err++;
+- return;
++ return NULL;
+ }
+
+ prefetchw(skb->data);
+@@ -1913,9 +1913,7 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_r
+ skb->tail += head_size;
+ skb->len += head_size;
+ }
+- rq->hw_gro_data->skb = skb;
+- NAPI_GRO_CB(skb)->count = 1;
+- skb_shinfo(skb)->gso_size = mpwrq_get_cqe_byte_cnt(cqe) - head_size;
++ return skb;
+ }
+
+ static void
+@@ -1975,6 +1973,7 @@ static void mlx5e_handle_rx_cqe_mpwrq_sh
+ u32 cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe);
+ u16 wqe_id = be16_to_cpu(cqe->wqe_id);
+ u32 page_idx = wqe_offset >> PAGE_SHIFT;
++ u16 head_size = cqe->shampo.header_size;
+ struct sk_buff **skb = &rq->hw_gro_data->skb;
+ bool flush = cqe->shampo.flush;
+ bool match = cqe->shampo.match;
+@@ -2007,9 +2006,16 @@ static void mlx5e_handle_rx_cqe_mpwrq_sh
+ }
+
+ if (!*skb) {
+- mlx5e_skb_from_cqe_shampo(rq, wi, cqe, header_index);
++ if (likely(head_size))
++ *skb = mlx5e_skb_from_cqe_shampo(rq, wi, cqe, header_index);
++ else
++ *skb = mlx5e_skb_from_cqe_mpwrq_nonlinear(rq, wi, cqe_bcnt, data_offset,
++ page_idx);
+ if (unlikely(!*skb))
+ goto free_hd_entry;
++
++ NAPI_GRO_CB(*skb)->count = 1;
++ skb_shinfo(*skb)->gso_size = cqe_bcnt - head_size;
+ } else {
+ NAPI_GRO_CB(*skb)->count++;
+ if (NAPI_GRO_CB(*skb)->count == 2 &&
+@@ -2023,8 +2029,10 @@ static void mlx5e_handle_rx_cqe_mpwrq_sh
+ }
+ }
+
+- di = &wi->umr.dma_info[page_idx];
+- mlx5e_fill_skb_data(*skb, rq, di, data_bcnt, data_offset);
++ if (likely(head_size)) {
++ di = &wi->umr.dma_info[page_idx];
++ mlx5e_fill_skb_data(*skb, rq, di, data_bcnt, data_offset);
++ }
+
+ mlx5e_shampo_complete_rx_cqe(rq, cqe, cqe_bcnt, *skb);
+ if (flush)
--- /dev/null
+From ec41332e02bd0acf1f24206867bb6a02f5877a62 Mon Sep 17 00:00:00 2001
+From: Maor Dickman <maord@nvidia.com>
+Date: Thu, 13 Jan 2022 15:11:42 +0200
+Subject: net/mlx5e: Fix handling of wrong devices during bond netevent
+
+From: Maor Dickman <maord@nvidia.com>
+
+commit ec41332e02bd0acf1f24206867bb6a02f5877a62 upstream.
+
+Current implementation of bond netevent handler only check if
+the handled netdev is VF representor and it missing a check if
+the VF representor is on the same phys device of the bond handling
+the netevent.
+
+Fix by adding the missing check and optimizing the check if
+the netdev is VF representor so it will not access uninitialized
+private data and crashes.
+
+BUG: kernel NULL pointer dereference, address: 000000000000036c
+PGD 0 P4D 0
+Oops: 0000 [#1] SMP NOPTI
+Workqueue: eth3bond0 bond_mii_monitor [bonding]
+RIP: 0010:mlx5e_is_uplink_rep+0xc/0x50 [mlx5_core]
+RSP: 0018:ffff88812d69fd60 EFLAGS: 00010282
+RAX: 0000000000000000 RBX: ffff8881cf800000 RCX: 0000000000000000
+RDX: ffff88812d69fe10 RSI: 000000000000001b RDI: ffff8881cf800880
+RBP: ffff8881cf800000 R08: 00000445cabccf2b R09: 0000000000000008
+R10: 0000000000000004 R11: 0000000000000008 R12: ffff88812d69fe10
+R13: 00000000fffffffe R14: ffff88820c0f9000 R15: 0000000000000000
+FS: 0000000000000000(0000) GS:ffff88846fb00000(0000) knlGS:0000000000000000
+CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 000000000000036c CR3: 0000000103d80006 CR4: 0000000000370ea0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+ mlx5e_eswitch_uplink_rep+0x31/0x40 [mlx5_core]
+ mlx5e_rep_is_lag_netdev+0x94/0xc0 [mlx5_core]
+ mlx5e_rep_esw_bond_netevent+0xeb/0x3d0 [mlx5_core]
+ raw_notifier_call_chain+0x41/0x60
+ call_netdevice_notifiers_info+0x34/0x80
+ netdev_lower_state_changed+0x4e/0xa0
+ bond_mii_monitor+0x56b/0x640 [bonding]
+ process_one_work+0x1b9/0x390
+ worker_thread+0x4d/0x3d0
+ ? rescuer_thread+0x350/0x350
+ kthread+0x124/0x150
+ ? set_kthread_struct+0x40/0x40
+ ret_from_fork+0x1f/0x30
+
+Fixes: 7e51891a237f ("net/mlx5e: Use netdev events to set/del egress acl forward-to-vport rule")
+Signed-off-by: Maor Dickman <maord@nvidia.com>
+Reviewed-by: Roi Dayan <roid@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c | 32 +++++++-----------
+ 1 file changed, 14 insertions(+), 18 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
+@@ -183,18 +183,7 @@ void mlx5e_rep_bond_unslave(struct mlx5_
+
+ static bool mlx5e_rep_is_lag_netdev(struct net_device *netdev)
+ {
+- struct mlx5e_rep_priv *rpriv;
+- struct mlx5e_priv *priv;
+-
+- /* A given netdev is not a representor or not a slave of LAG configuration */
+- if (!mlx5e_eswitch_rep(netdev) || !netif_is_lag_port(netdev))
+- return false;
+-
+- priv = netdev_priv(netdev);
+- rpriv = priv->ppriv;
+-
+- /* Egress acl forward to vport is supported only non-uplink representor */
+- return rpriv->rep->vport != MLX5_VPORT_UPLINK;
++ return netif_is_lag_port(netdev) && mlx5e_eswitch_vf_rep(netdev);
+ }
+
+ static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *ptr)
+@@ -210,9 +199,6 @@ static void mlx5e_rep_changelowerstate_e
+ u16 fwd_vport_num;
+ int err;
+
+- if (!mlx5e_rep_is_lag_netdev(netdev))
+- return;
+-
+ info = ptr;
+ lag_info = info->lower_state_info;
+ /* This is not an event of a representor becoming active slave */
+@@ -266,9 +252,6 @@ static void mlx5e_rep_changeupper_event(
+ struct net_device *lag_dev;
+ struct mlx5e_priv *priv;
+
+- if (!mlx5e_rep_is_lag_netdev(netdev))
+- return;
+-
+ priv = netdev_priv(netdev);
+ rpriv = priv->ppriv;
+ lag_dev = info->upper_dev;
+@@ -293,6 +276,19 @@ static int mlx5e_rep_esw_bond_netevent(s
+ unsigned long event, void *ptr)
+ {
+ struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
++ struct mlx5e_rep_priv *rpriv;
++ struct mlx5e_rep_bond *bond;
++ struct mlx5e_priv *priv;
++
++ if (!mlx5e_rep_is_lag_netdev(netdev))
++ return NOTIFY_DONE;
++
++ bond = container_of(nb, struct mlx5e_rep_bond, nb);
++ priv = netdev_priv(netdev);
++ rpriv = mlx5_eswitch_get_uplink_priv(priv->mdev->priv.eswitch, REP_ETH);
++ /* Verify VF representor is on the same device of the bond handling the netevent. */
++ if (rpriv->uplink_priv.bond != bond)
++ return NOTIFY_DONE;
+
+ switch (event) {
+ case NETDEV_CHANGELOWERSTATE:
--- /dev/null
+From 4a08a131351e375a2969b98e46df260ed04dcba7 Mon Sep 17 00:00:00 2001
+From: Gal Pressman <gal@nvidia.com>
+Date: Sun, 16 Jan 2022 09:07:22 +0200
+Subject: net/mlx5e: Fix module EEPROM query
+
+From: Gal Pressman <gal@nvidia.com>
+
+commit 4a08a131351e375a2969b98e46df260ed04dcba7 upstream.
+
+When querying the module EEPROM, there was a misusage of the 'offset'
+variable vs the 'query.offset' field.
+Fix that by always using 'offset' and assigning its value to
+'query.offset' right before the mcia register read call.
+
+While at it, the cross-pages read size adjustment was changed to be more
+intuitive.
+
+Fixes: e19b0a3474ab ("net/mlx5: Refactor module EEPROM query")
+Reported-by: Wang Yugui <wangyugui@e16-tech.com>
+Signed-off-by: Gal Pressman <gal@nvidia.com>
+Reviewed-by: Maxim Mikityanskiy <maximmi@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/port.c | 9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
+@@ -406,23 +406,24 @@ int mlx5_query_module_eeprom(struct mlx5
+
+ switch (module_id) {
+ case MLX5_MODULE_ID_SFP:
+- mlx5_sfp_eeprom_params_set(&query.i2c_address, &query.page, &query.offset);
++ mlx5_sfp_eeprom_params_set(&query.i2c_address, &query.page, &offset);
+ break;
+ case MLX5_MODULE_ID_QSFP:
+ case MLX5_MODULE_ID_QSFP_PLUS:
+ case MLX5_MODULE_ID_QSFP28:
+- mlx5_qsfp_eeprom_params_set(&query.i2c_address, &query.page, &query.offset);
++ mlx5_qsfp_eeprom_params_set(&query.i2c_address, &query.page, &offset);
+ break;
+ default:
+ mlx5_core_err(dev, "Module ID not recognized: 0x%x\n", module_id);
+ return -EINVAL;
+ }
+
+- if (query.offset + size > MLX5_EEPROM_PAGE_LENGTH)
++ if (offset + size > MLX5_EEPROM_PAGE_LENGTH)
+ /* Cross pages read, read until offset 256 in low page */
+- size -= offset + size - MLX5_EEPROM_PAGE_LENGTH;
++ size = MLX5_EEPROM_PAGE_LENGTH - offset;
+
+ query.size = size;
++ query.offset = offset;
+
+ return mlx5_query_mcia(dev, &query, data);
+ }
--- /dev/null
+From b8d91145ed7cfa046cc07bcfb277465b9d45da73 Mon Sep 17 00:00:00 2001
+From: Khalid Manaa <khalidm@nvidia.com>
+Date: Wed, 26 Jan 2022 14:14:58 +0200
+Subject: net/mlx5e: Fix wrong calculation of header index in HW_GRO
+
+From: Khalid Manaa <khalidm@nvidia.com>
+
+commit b8d91145ed7cfa046cc07bcfb277465b9d45da73 upstream.
+
+The HW doesn't wrap the CQE.shampo.header_index field according to the
+headers buffer size, instead it always increases it until reaching overflow
+of u16 size.
+
+Thus the mlx5e_handle_rx_cqe_mpwrq_shampo handler should mask the
+CQE header_index field to find the actual header index in the headers buffer.
+
+Fixes: f97d5c2a453e ("net/mlx5e: Add handle SHAMPO cqe support")
+Signed-off-by: Khalid Manaa <khalidm@nvidia.com>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h | 5 +++++
+ drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 4 ++--
+ 2 files changed, 7 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+@@ -167,6 +167,11 @@ static inline u16 mlx5e_txqsq_get_next_p
+ return pi;
+ }
+
++static inline u16 mlx5e_shampo_get_cqe_header_index(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
++{
++ return be16_to_cpu(cqe->shampo.header_entry_index) & (rq->mpwqe.shampo->hd_per_wq - 1);
++}
++
+ struct mlx5e_shampo_umr {
+ u16 len;
+ };
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+@@ -1116,7 +1116,7 @@ static void mlx5e_shampo_update_ipv6_udp
+ static void mlx5e_shampo_update_fin_psh_flags(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
+ struct tcphdr *skb_tcp_hd)
+ {
+- u16 header_index = be16_to_cpu(cqe->shampo.header_entry_index);
++ u16 header_index = mlx5e_shampo_get_cqe_header_index(rq, cqe);
+ struct tcphdr *last_tcp_hd;
+ void *last_hd_addr;
+
+@@ -1968,7 +1968,7 @@ mlx5e_free_rx_shampo_hd_entry(struct mlx
+ static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+ {
+ u16 data_bcnt = mpwrq_get_cqe_byte_cnt(cqe) - cqe->shampo.header_size;
+- u16 header_index = be16_to_cpu(cqe->shampo.header_entry_index);
++ u16 header_index = mlx5e_shampo_get_cqe_header_index(rq, cqe);
+ u32 wqe_offset = be32_to_cpu(cqe->shampo.data_offset);
+ u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe);
+ u32 data_offset = wqe_offset & (PAGE_SIZE - 1);
--- /dev/null
+From 5352859b3bfa0ca188b2f1d2c1436fddc781e3b6 Mon Sep 17 00:00:00 2001
+From: Raed Salem <raeds@nvidia.com>
+Date: Thu, 2 Dec 2021 17:43:50 +0200
+Subject: net/mlx5e: IPsec: Fix crypto offload for non TCP/UDP encapsulated traffic
+
+From: Raed Salem <raeds@nvidia.com>
+
+commit 5352859b3bfa0ca188b2f1d2c1436fddc781e3b6 upstream.
+
+IPsec crypto offload always set the ethernet segment checksum flags with
+the inner L4 header checksum flag enabled for encapsulated IPsec offloaded
+packet regardless of the encapsulated L4 header type, and even if it
+doesn't exists in the first place, this breaks non TCP/UDP traffic as
+such.
+
+Set the inner L4 checksum flag only when the encapsulated L4 header
+protocol is TCP/UDP using software parser swp_inner_l4_offset field as
+indication.
+
+Fixes: 5cfb540ef27b ("net/mlx5e: Set IPsec WAs only in IP's non checksum partial case.")
+Signed-off-by: Raed Salem <raeds@nvidia.com>
+Reviewed-by: Maor Dickman <maord@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h | 9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
+@@ -131,14 +131,17 @@ static inline bool
+ mlx5e_ipsec_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ struct mlx5_wqe_eth_seg *eseg)
+ {
+- struct xfrm_offload *xo = xfrm_offload(skb);
++ u8 inner_ipproto;
+
+ if (!mlx5e_ipsec_eseg_meta(eseg))
+ return false;
+
+ eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM;
+- if (xo->inner_ipproto) {
+- eseg->cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM | MLX5_ETH_WQE_L3_INNER_CSUM;
++ inner_ipproto = xfrm_offload(skb)->inner_ipproto;
++ if (inner_ipproto) {
++ eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM;
++ if (inner_ipproto == IPPROTO_TCP || inner_ipproto == IPPROTO_UDP)
++ eseg->cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM;
+ } else if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
+ eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM;
+ sq->stats->csum_partial_inner++;
--- /dev/null
+From de47db0cf7f4a9c555ad204e06baa70b50a70d08 Mon Sep 17 00:00:00 2001
+From: Raed Salem <raeds@nvidia.com>
+Date: Thu, 2 Dec 2021 17:49:01 +0200
+Subject: net/mlx5e: IPsec: Fix tunnel mode crypto offload for non TCP/UDP traffic
+
+From: Raed Salem <raeds@nvidia.com>
+
+commit de47db0cf7f4a9c555ad204e06baa70b50a70d08 upstream.
+
+IPsec Tunnel mode crypto offload software parser (SWP) setting in data
+path currently always set the inner L4 offset regardless of the
+encapsulated L4 header type and whether it exists in the first place,
+this breaks non TCP/UDP traffic as such.
+
+Set the SWP inner L4 offset only when the IPsec tunnel encapsulated L4
+header protocol is TCP/UDP.
+
+While at it fix inner ip protocol read for setting MLX5_ETH_WQE_SWP_INNER_L4_UDP
+flag to address the case where the ip header protocol is IPv6.
+
+Fixes: f1267798c980 ("net/mlx5: Fix checksum issue of VXLAN and IPsec crypto offload")
+Signed-off-by: Raed Salem <raeds@nvidia.com>
+Reviewed-by: Maor Dickman <maord@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c | 13 ++++++++--
+ 1 file changed, 11 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
+@@ -157,11 +157,20 @@ static void mlx5e_ipsec_set_swp(struct s
+ /* Tunnel mode */
+ if (mode == XFRM_MODE_TUNNEL) {
+ eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2;
+- eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2;
+ if (xo->proto == IPPROTO_IPV6)
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
+- if (inner_ip_hdr(skb)->protocol == IPPROTO_UDP)
++
++ switch (xo->inner_ipproto) {
++ case IPPROTO_UDP:
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP;
++ fallthrough;
++ case IPPROTO_TCP:
++ /* IP | ESP | IP | [TCP | UDP] */
++ eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2;
++ break;
++ default:
++ break;
++ }
+ return;
+ }
+
--- /dev/null
+From a2446bc77a16cefd27de712d28af2396d6287593 Mon Sep 17 00:00:00 2001
+From: Roi Dayan <roid@nvidia.com>
+Date: Tue, 4 Jan 2022 10:38:02 +0200
+Subject: net/mlx5e: TC, Reject rules with drop and modify hdr action
+
+From: Roi Dayan <roid@nvidia.com>
+
+commit a2446bc77a16cefd27de712d28af2396d6287593 upstream.
+
+This kind of action is not supported by firmware and generates a
+syndrome.
+
+kernel: mlx5_core 0000:08:00.0: mlx5_cmd_check:777:(pid 102063): SET_FLOW_TABLE_ENTRY(0x936) op_mod(0x0) failed, status bad parameter(0x3), syndrome (0x8708c3)
+
+Fixes: d7e75a325cb2 ("net/mlx5e: Add offloading of E-Switch TC pedit (header re-write) actions")
+Signed-off-by: Roi Dayan <roid@nvidia.com>
+Reviewed-by: Oz Shlomo <ozsh@nvidia.com>
+Reviewed-by: Maor Dickman <maord@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+@@ -3421,6 +3421,12 @@ actions_match_supported(struct mlx5e_pri
+ }
+
+ if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
++ actions & MLX5_FLOW_CONTEXT_ACTION_DROP) {
++ NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported");
++ return false;
++ }
++
++ if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
+ !modify_header_match_supported(priv, &parse_attr->spec, flow_action,
+ actions, ct_flow, ct_clear, extack))
+ return false;
--- /dev/null
+From 5623ef8a118838aae65363750dfafcba734dc8cb Mon Sep 17 00:00:00 2001
+From: Roi Dayan <roid@nvidia.com>
+Date: Mon, 17 Jan 2022 15:00:30 +0200
+Subject: net/mlx5e: TC, Reject rules with forward and drop actions
+
+From: Roi Dayan <roid@nvidia.com>
+
+commit 5623ef8a118838aae65363750dfafcba734dc8cb upstream.
+
+Such rules are redundant but allowed and passed to the driver.
+The driver does not support offloading such rules so return an error.
+
+Fixes: 03a9d11e6eeb ("net/mlx5e: Add TC drop and mirred/redirect action parsing for SRIOV offloads")
+Signed-off-by: Roi Dayan <roid@nvidia.com>
+Reviewed-by: Oz Shlomo <ozsh@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+@@ -3420,6 +3420,12 @@ actions_match_supported(struct mlx5e_pri
+ return false;
+ }
+
++ if (!(~actions &
++ (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
++ NL_SET_ERR_MSG_MOD(extack, "Rule cannot support forward+drop action");
++ return false;
++ }
++
+ if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
+ actions & MLX5_FLOW_CONTEXT_ACTION_DROP) {
+ NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported");
--- /dev/null
+From 881cc731df6af99a21622e9be25a23b81adcd10b Mon Sep 17 00:00:00 2001
+From: Jonathan McDowell <noodles@earth.li>
+Date: Mon, 31 Jan 2022 13:56:41 +0000
+Subject: net: phy: Fix qca8081 with speeds lower than 2.5Gb/s
+
+From: Jonathan McDowell <noodles@earth.li>
+
+commit 881cc731df6af99a21622e9be25a23b81adcd10b upstream.
+
+A typo in qca808x_read_status means we try to set SMII mode on the port
+rather than SGMII when the link speed is not 2.5Gb/s. This results in no
+traffic due to the mismatch in configuration between the phy and the
+mac.
+
+v2:
+ Only change interface mode when the link is up
+
+Fixes: 79c7bc0521545 ("net: phy: add qca8081 read_status")
+Cc: stable@vger.kernel.org
+Signed-off-by: Jonathan McDowell <noodles@earth.li>
+Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/phy/at803x.c | 26 +++++++++++++-------------
+ 1 file changed, 13 insertions(+), 13 deletions(-)
+
+--- a/drivers/net/phy/at803x.c
++++ b/drivers/net/phy/at803x.c
+@@ -1688,19 +1688,19 @@ static int qca808x_read_status(struct ph
+ if (ret < 0)
+ return ret;
+
+- if (phydev->link && phydev->speed == SPEED_2500)
+- phydev->interface = PHY_INTERFACE_MODE_2500BASEX;
+- else
+- phydev->interface = PHY_INTERFACE_MODE_SMII;
+-
+- /* generate seed as a lower random value to make PHY linked as SLAVE easily,
+- * except for master/slave configuration fault detected.
+- * the reason for not putting this code into the function link_change_notify is
+- * the corner case where the link partner is also the qca8081 PHY and the seed
+- * value is configured as the same value, the link can't be up and no link change
+- * occurs.
+- */
+- if (!phydev->link) {
++ if (phydev->link) {
++ if (phydev->speed == SPEED_2500)
++ phydev->interface = PHY_INTERFACE_MODE_2500BASEX;
++ else
++ phydev->interface = PHY_INTERFACE_MODE_SGMII;
++ } else {
++ /* generate seed as a lower random value to make PHY linked as SLAVE easily,
++ * except for master/slave configuration fault detected.
++ * the reason for not putting this code into the function link_change_notify is
++ * the corner case where the link partner is also the qca8081 PHY and the seed
++ * value is configured as the same value, the link can't be up and no link change
++ * occurs.
++ */
+ if (phydev->master_slave_state == MASTER_SLAVE_STATE_ERR) {
+ qca808x_phy_ms_seed_enable(phydev, false);
+ } else {
--- /dev/null
+From 04c2a47ffb13c29778e2a14e414ad4cb5a5db4b5 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 31 Jan 2022 09:20:18 -0800
+Subject: net: sched: fix use-after-free in tc_new_tfilter()
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit 04c2a47ffb13c29778e2a14e414ad4cb5a5db4b5 upstream.
+
+Whenever tc_new_tfilter() jumps back to replay: label,
+we need to make sure @q and @chain local variables are cleared again,
+or risk use-after-free as in [1]
+
+For consistency, apply the same fix in tc_ctl_chain()
+
+BUG: KASAN: use-after-free in mini_qdisc_pair_swap+0x1b9/0x1f0 net/sched/sch_generic.c:1581
+Write of size 8 at addr ffff8880985c4b08 by task syz-executor.4/1945
+
+CPU: 0 PID: 1945 Comm: syz-executor.4 Not tainted 5.17.0-rc1-syzkaller-00495-gff58831fa02d #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+Call Trace:
+ <TASK>
+ __dump_stack lib/dump_stack.c:88 [inline]
+ dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106
+ print_address_description.constprop.0.cold+0x8d/0x336 mm/kasan/report.c:255
+ __kasan_report mm/kasan/report.c:442 [inline]
+ kasan_report.cold+0x83/0xdf mm/kasan/report.c:459
+ mini_qdisc_pair_swap+0x1b9/0x1f0 net/sched/sch_generic.c:1581
+ tcf_chain_head_change_item net/sched/cls_api.c:372 [inline]
+ tcf_chain0_head_change.isra.0+0xb9/0x120 net/sched/cls_api.c:386
+ tcf_chain_tp_insert net/sched/cls_api.c:1657 [inline]
+ tcf_chain_tp_insert_unique net/sched/cls_api.c:1707 [inline]
+ tc_new_tfilter+0x1e67/0x2350 net/sched/cls_api.c:2086
+ rtnetlink_rcv_msg+0x80d/0xb80 net/core/rtnetlink.c:5583
+ netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2494
+ netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline]
+ netlink_unicast+0x539/0x7e0 net/netlink/af_netlink.c:1343
+ netlink_sendmsg+0x904/0xe00 net/netlink/af_netlink.c:1919
+ sock_sendmsg_nosec net/socket.c:705 [inline]
+ sock_sendmsg+0xcf/0x120 net/socket.c:725
+ ____sys_sendmsg+0x331/0x810 net/socket.c:2413
+ ___sys_sendmsg+0xf3/0x170 net/socket.c:2467
+ __sys_sendmmsg+0x195/0x470 net/socket.c:2553
+ __do_sys_sendmmsg net/socket.c:2582 [inline]
+ __se_sys_sendmmsg net/socket.c:2579 [inline]
+ __x64_sys_sendmmsg+0x99/0x100 net/socket.c:2579
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x44/0xae
+RIP: 0033:0x7f2647172059
+Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48
+RSP: 002b:00007f2645aa5168 EFLAGS: 00000246 ORIG_RAX: 0000000000000133
+RAX: ffffffffffffffda RBX: 00007f2647285100 RCX: 00007f2647172059
+RDX: 040000000000009f RSI: 00000000200002c0 RDI: 0000000000000006
+RBP: 00007f26471cc08d R08: 0000000000000000 R09: 0000000000000000
+R10: 9e00000000000000 R11: 0000000000000246 R12: 0000000000000000
+R13: 00007fffb3f7f02f R14: 00007f2645aa5300 R15: 0000000000022000
+ </TASK>
+
+Allocated by task 1944:
+ kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38
+ kasan_set_track mm/kasan/common.c:45 [inline]
+ set_alloc_info mm/kasan/common.c:436 [inline]
+ ____kasan_kmalloc mm/kasan/common.c:515 [inline]
+ ____kasan_kmalloc mm/kasan/common.c:474 [inline]
+ __kasan_kmalloc+0xa9/0xd0 mm/kasan/common.c:524
+ kmalloc_node include/linux/slab.h:604 [inline]
+ kzalloc_node include/linux/slab.h:726 [inline]
+ qdisc_alloc+0xac/0xa10 net/sched/sch_generic.c:941
+ qdisc_create.constprop.0+0xce/0x10f0 net/sched/sch_api.c:1211
+ tc_modify_qdisc+0x4c5/0x1980 net/sched/sch_api.c:1660
+ rtnetlink_rcv_msg+0x413/0xb80 net/core/rtnetlink.c:5592
+ netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2494
+ netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline]
+ netlink_unicast+0x539/0x7e0 net/netlink/af_netlink.c:1343
+ netlink_sendmsg+0x904/0xe00 net/netlink/af_netlink.c:1919
+ sock_sendmsg_nosec net/socket.c:705 [inline]
+ sock_sendmsg+0xcf/0x120 net/socket.c:725
+ ____sys_sendmsg+0x331/0x810 net/socket.c:2413
+ ___sys_sendmsg+0xf3/0x170 net/socket.c:2467
+ __sys_sendmmsg+0x195/0x470 net/socket.c:2553
+ __do_sys_sendmmsg net/socket.c:2582 [inline]
+ __se_sys_sendmmsg net/socket.c:2579 [inline]
+ __x64_sys_sendmmsg+0x99/0x100 net/socket.c:2579
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+Freed by task 3609:
+ kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38
+ kasan_set_track+0x21/0x30 mm/kasan/common.c:45
+ kasan_set_free_info+0x20/0x30 mm/kasan/generic.c:370
+ ____kasan_slab_free mm/kasan/common.c:366 [inline]
+ ____kasan_slab_free+0x130/0x160 mm/kasan/common.c:328
+ kasan_slab_free include/linux/kasan.h:236 [inline]
+ slab_free_hook mm/slub.c:1728 [inline]
+ slab_free_freelist_hook+0x8b/0x1c0 mm/slub.c:1754
+ slab_free mm/slub.c:3509 [inline]
+ kfree+0xcb/0x280 mm/slub.c:4562
+ rcu_do_batch kernel/rcu/tree.c:2527 [inline]
+ rcu_core+0x7b8/0x1540 kernel/rcu/tree.c:2778
+ __do_softirq+0x29b/0x9c2 kernel/softirq.c:558
+
+Last potentially related work creation:
+ kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38
+ __kasan_record_aux_stack+0xbe/0xd0 mm/kasan/generic.c:348
+ __call_rcu kernel/rcu/tree.c:3026 [inline]
+ call_rcu+0xb1/0x740 kernel/rcu/tree.c:3106
+ qdisc_put_unlocked+0x6f/0x90 net/sched/sch_generic.c:1109
+ tcf_block_release+0x86/0x90 net/sched/cls_api.c:1238
+ tc_new_tfilter+0xc0d/0x2350 net/sched/cls_api.c:2148
+ rtnetlink_rcv_msg+0x80d/0xb80 net/core/rtnetlink.c:5583
+ netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2494
+ netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline]
+ netlink_unicast+0x539/0x7e0 net/netlink/af_netlink.c:1343
+ netlink_sendmsg+0x904/0xe00 net/netlink/af_netlink.c:1919
+ sock_sendmsg_nosec net/socket.c:705 [inline]
+ sock_sendmsg+0xcf/0x120 net/socket.c:725
+ ____sys_sendmsg+0x331/0x810 net/socket.c:2413
+ ___sys_sendmsg+0xf3/0x170 net/socket.c:2467
+ __sys_sendmmsg+0x195/0x470 net/socket.c:2553
+ __do_sys_sendmmsg net/socket.c:2582 [inline]
+ __se_sys_sendmmsg net/socket.c:2579 [inline]
+ __x64_sys_sendmmsg+0x99/0x100 net/socket.c:2579
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+The buggy address belongs to the object at ffff8880985c4800
+ which belongs to the cache kmalloc-1k of size 1024
+The buggy address is located 776 bytes inside of
+ 1024-byte region [ffff8880985c4800, ffff8880985c4c00)
+The buggy address belongs to the page:
+page:ffffea0002617000 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x985c0
+head:ffffea0002617000 order:3 compound_mapcount:0 compound_pincount:0
+flags: 0xfff00000010200(slab|head|node=0|zone=1|lastcpupid=0x7ff)
+raw: 00fff00000010200 0000000000000000 dead000000000122 ffff888010c41dc0
+raw: 0000000000000000 0000000000100010 00000001ffffffff 0000000000000000
+page dumped because: kasan: bad access detected
+page_owner tracks the page as allocated
+page last allocated via order 3, migratetype Unmovable, gfp_mask 0x1d20c0(__GFP_IO|__GFP_FS|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_NOMEMALLOC|__GFP_HARDWALL), pid 1941, ts 1038999441284, free_ts 1033444432829
+ prep_new_page mm/page_alloc.c:2434 [inline]
+ get_page_from_freelist+0xa72/0x2f50 mm/page_alloc.c:4165
+ __alloc_pages+0x1b2/0x500 mm/page_alloc.c:5389
+ alloc_pages+0x1aa/0x310 mm/mempolicy.c:2271
+ alloc_slab_page mm/slub.c:1799 [inline]
+ allocate_slab mm/slub.c:1944 [inline]
+ new_slab+0x28a/0x3b0 mm/slub.c:2004
+ ___slab_alloc+0x87c/0xe90 mm/slub.c:3018
+ __slab_alloc.constprop.0+0x4d/0xa0 mm/slub.c:3105
+ slab_alloc_node mm/slub.c:3196 [inline]
+ slab_alloc mm/slub.c:3238 [inline]
+ __kmalloc+0x2fb/0x340 mm/slub.c:4420
+ kmalloc include/linux/slab.h:586 [inline]
+ kzalloc include/linux/slab.h:715 [inline]
+ __register_sysctl_table+0x112/0x1090 fs/proc/proc_sysctl.c:1335
+ neigh_sysctl_register+0x2c8/0x5e0 net/core/neighbour.c:3787
+ devinet_sysctl_register+0xb1/0x230 net/ipv4/devinet.c:2618
+ inetdev_init+0x286/0x580 net/ipv4/devinet.c:278
+ inetdev_event+0xa8a/0x15d0 net/ipv4/devinet.c:1532
+ notifier_call_chain+0xb5/0x200 kernel/notifier.c:84
+ call_netdevice_notifiers_info+0xb5/0x130 net/core/dev.c:1919
+ call_netdevice_notifiers_extack net/core/dev.c:1931 [inline]
+ call_netdevice_notifiers net/core/dev.c:1945 [inline]
+ register_netdevice+0x1073/0x1500 net/core/dev.c:9698
+ veth_newlink+0x59c/0xa90 drivers/net/veth.c:1722
+page last free stack trace:
+ reset_page_owner include/linux/page_owner.h:24 [inline]
+ free_pages_prepare mm/page_alloc.c:1352 [inline]
+ free_pcp_prepare+0x374/0x870 mm/page_alloc.c:1404
+ free_unref_page_prepare mm/page_alloc.c:3325 [inline]
+ free_unref_page+0x19/0x690 mm/page_alloc.c:3404
+ release_pages+0x748/0x1220 mm/swap.c:956
+ tlb_batch_pages_flush mm/mmu_gather.c:50 [inline]
+ tlb_flush_mmu_free mm/mmu_gather.c:243 [inline]
+ tlb_flush_mmu+0xe9/0x6b0 mm/mmu_gather.c:250
+ zap_pte_range mm/memory.c:1441 [inline]
+ zap_pmd_range mm/memory.c:1490 [inline]
+ zap_pud_range mm/memory.c:1519 [inline]
+ zap_p4d_range mm/memory.c:1540 [inline]
+ unmap_page_range+0x1d1d/0x2a30 mm/memory.c:1561
+ unmap_single_vma+0x198/0x310 mm/memory.c:1606
+ unmap_vmas+0x16b/0x2f0 mm/memory.c:1638
+ exit_mmap+0x201/0x670 mm/mmap.c:3178
+ __mmput+0x122/0x4b0 kernel/fork.c:1114
+ mmput+0x56/0x60 kernel/fork.c:1135
+ exit_mm kernel/exit.c:507 [inline]
+ do_exit+0xa3c/0x2a30 kernel/exit.c:793
+ do_group_exit+0xd2/0x2f0 kernel/exit.c:935
+ __do_sys_exit_group kernel/exit.c:946 [inline]
+ __se_sys_exit_group kernel/exit.c:944 [inline]
+ __x64_sys_exit_group+0x3a/0x50 kernel/exit.c:944
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+Memory state around the buggy address:
+ ffff8880985c4a00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+ ffff8880985c4a80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+>ffff8880985c4b00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+ ^
+ ffff8880985c4b80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+ ffff8880985c4c00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+
+Fixes: 470502de5bdb ("net: sched: unlock rules update API")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Vlad Buslov <vladbu@mellanox.com>
+Cc: Jiri Pirko <jiri@mellanox.com>
+Cc: Cong Wang <xiyou.wangcong@gmail.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Link: https://lore.kernel.org/r/20220131172018.3704490-1-eric.dumazet@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/cls_api.c | 11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+--- a/net/sched/cls_api.c
++++ b/net/sched/cls_api.c
+@@ -1945,9 +1945,9 @@ static int tc_new_tfilter(struct sk_buff
+ bool prio_allocate;
+ u32 parent;
+ u32 chain_index;
+- struct Qdisc *q = NULL;
++ struct Qdisc *q;
+ struct tcf_chain_info chain_info;
+- struct tcf_chain *chain = NULL;
++ struct tcf_chain *chain;
+ struct tcf_block *block;
+ struct tcf_proto *tp;
+ unsigned long cl;
+@@ -1976,6 +1976,8 @@ replay:
+ tp = NULL;
+ cl = 0;
+ block = NULL;
++ q = NULL;
++ chain = NULL;
+ flags = 0;
+
+ if (prio == 0) {
+@@ -2798,8 +2800,8 @@ static int tc_ctl_chain(struct sk_buff *
+ struct tcmsg *t;
+ u32 parent;
+ u32 chain_index;
+- struct Qdisc *q = NULL;
+- struct tcf_chain *chain = NULL;
++ struct Qdisc *q;
++ struct tcf_chain *chain;
+ struct tcf_block *block;
+ unsigned long cl;
+ int err;
+@@ -2809,6 +2811,7 @@ static int tc_ctl_chain(struct sk_buff *
+ return -EPERM;
+
+ replay:
++ q = NULL;
+ err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
+ rtm_tca_policy, extack);
+ if (err < 0)
--- /dev/null
+From 94fd19752b28aa66c98e7991734af91dfc529f8f Mon Sep 17 00:00:00 2001
+From: Miklos Szeredi <mszeredi@redhat.com>
+Date: Fri, 14 Jan 2022 16:57:56 +0100
+Subject: ovl: don't fail copy up if no fileattr support on upper
+
+From: Miklos Szeredi <mszeredi@redhat.com>
+
+commit 94fd19752b28aa66c98e7991734af91dfc529f8f upstream.
+
+Christoph Fritz is reporting that failure to copy up fileattr when upper
+doesn't support fileattr or xattr results in a regression.
+
+Return success in these failure cases; this reverts overlayfs to the old
+behavior.
+
+Add a pr_warn_once() in these cases to still let the user know about the
+copy up failures.
+
+Reported-by: Christoph Fritz <chf.fritz@googlemail.com>
+Fixes: 72db82115d2b ("ovl: copy up sync/noatime fileattr flags")
+Cc: <stable@vger.kernel.org> # v5.15
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/overlayfs/copy_up.c | 12 +++++++++++-
+ 1 file changed, 11 insertions(+), 1 deletion(-)
+
+--- a/fs/overlayfs/copy_up.c
++++ b/fs/overlayfs/copy_up.c
+@@ -157,7 +157,9 @@ static int ovl_copy_fileattr(struct inod
+ */
+ if (oldfa.flags & OVL_PROT_FS_FLAGS_MASK) {
+ err = ovl_set_protattr(inode, new->dentry, &oldfa);
+- if (err)
++ if (err == -EPERM)
++ pr_warn_once("copying fileattr: no xattr on upper\n");
++ else if (err)
+ return err;
+ }
+
+@@ -167,6 +169,14 @@ static int ovl_copy_fileattr(struct inod
+
+ err = ovl_real_fileattr_get(new, &newfa);
+ if (err) {
++ /*
++ * Returning an error if upper doesn't support fileattr will
++ * result in a regression, so revert to the old behavior.
++ */
++ if (err == -ENOTTY || err == -EINVAL) {
++ pr_warn_once("copying fileattr: no support on upper\n");
++ return 0;
++ }
+ pr_warn("failed to retrieve upper fileattr (%pd2, err=%i)\n",
+ new, err);
+ return err;
--- /dev/null
+From c36c04c2e132fc39f6b658bf607aed4425427fd7 Mon Sep 17 00:00:00 2001
+From: John Hubbard <jhubbard@nvidia.com>
+Date: Tue, 1 Feb 2022 19:23:17 -0800
+Subject: Revert "mm/gup: small refactoring: simplify try_grab_page()"
+
+From: John Hubbard <jhubbard@nvidia.com>
+
+commit c36c04c2e132fc39f6b658bf607aed4425427fd7 upstream.
+
+This reverts commit 54d516b1d62ff8f17cee2da06e5e4706a0d00b8a
+
+That commit did a refactoring that effectively combined fast and slow
+gup paths (again). And that was again incorrect, for two reasons:
+
+ a) Fast gup and slow gup get reference counts on pages in different
+ ways and with different goals: see Linus' writeup in commit
+ cd1adf1b63a1 ("Revert "mm/gup: remove try_get_page(), call
+ try_get_compound_head() directly""), and
+
+ b) try_grab_compound_head() also has a specific check for
+ "FOLL_LONGTERM && !is_pinned(page)", that assumes that the caller
+ can fall back to slow gup. This resulted in new failures, as
+ recently report by Will McVicker [1].
+
+But (a) has problems too, even though they may not have been reported
+yet. So just revert this.
+
+Link: https://lore.kernel.org/r/20220131203504.3458775-1-willmcvicker@google.com [1]
+Fixes: 54d516b1d62f ("mm/gup: small refactoring: simplify try_grab_page()")
+Reported-and-tested-by: Will McVicker <willmcvicker@google.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Cc: Minchan Kim <minchan@google.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Christian Borntraeger <borntraeger@de.ibm.com>
+Cc: Heiko Carstens <hca@linux.ibm.com>
+Cc: Vasily Gorbik <gor@linux.ibm.com>
+Cc: stable@vger.kernel.org # 5.15
+Signed-off-by: John Hubbard <jhubbard@nvidia.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/gup.c | 35 ++++++++++++++++++++++++++++++-----
+ 1 file changed, 30 insertions(+), 5 deletions(-)
+
+--- a/mm/gup.c
++++ b/mm/gup.c
+@@ -124,8 +124,8 @@ static inline struct page *try_get_compo
+ * considered failure, and furthermore, a likely bug in the caller, so a warning
+ * is also emitted.
+ */
+-struct page *try_grab_compound_head(struct page *page,
+- int refs, unsigned int flags)
++__maybe_unused struct page *try_grab_compound_head(struct page *page,
++ int refs, unsigned int flags)
+ {
+ if (flags & FOLL_GET)
+ return try_get_compound_head(page, refs);
+@@ -208,10 +208,35 @@ static void put_compound_head(struct pag
+ */
+ bool __must_check try_grab_page(struct page *page, unsigned int flags)
+ {
+- if (!(flags & (FOLL_GET | FOLL_PIN)))
+- return true;
++ WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == (FOLL_GET | FOLL_PIN));
+
+- return try_grab_compound_head(page, 1, flags);
++ if (flags & FOLL_GET)
++ return try_get_page(page);
++ else if (flags & FOLL_PIN) {
++ int refs = 1;
++
++ page = compound_head(page);
++
++ if (WARN_ON_ONCE(page_ref_count(page) <= 0))
++ return false;
++
++ if (hpage_pincount_available(page))
++ hpage_pincount_add(page, 1);
++ else
++ refs = GUP_PIN_COUNTING_BIAS;
++
++ /*
++ * Similar to try_grab_compound_head(): even if using the
++ * hpage_pincount_add/_sub() routines, be sure to
++ * *also* increment the normal page refcount field at least
++ * once, so that the page really is pinned.
++ */
++ page_ref_add(page, refs);
++
++ mod_node_page_state(page_pgdat(page), NR_FOLL_PIN_ACQUIRED, 1);
++ }
++
++ return true;
+ }
+
+ /**
--- /dev/null
+From c6f6f2444bdbe0079e41914a35081530d0409963 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 31 Jan 2022 17:21:06 -0800
+Subject: rtnetlink: make sure to refresh master_dev/m_ops in __rtnl_newlink()
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit c6f6f2444bdbe0079e41914a35081530d0409963 upstream.
+
+While looking at one unrelated syzbot bug, I found the replay logic
+in __rtnl_newlink() to potentially trigger use-after-free.
+
+It is better to clear master_dev and m_ops inside the loop,
+in case we have to replay it.
+
+Fixes: ba7d49b1f0f8 ("rtnetlink: provide api for getting and setting slave info")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Jiri Pirko <jiri@nvidia.com>
+Link: https://lore.kernel.org/r/20220201012106.216495-1-eric.dumazet@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/rtnetlink.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -3254,8 +3254,8 @@ static int __rtnl_newlink(struct sk_buff
+ struct nlattr *slave_attr[RTNL_SLAVE_MAX_TYPE + 1];
+ unsigned char name_assign_type = NET_NAME_USER;
+ struct nlattr *linkinfo[IFLA_INFO_MAX + 1];
+- const struct rtnl_link_ops *m_ops = NULL;
+- struct net_device *master_dev = NULL;
++ const struct rtnl_link_ops *m_ops;
++ struct net_device *master_dev;
+ struct net *net = sock_net(skb->sk);
+ const struct rtnl_link_ops *ops;
+ struct nlattr *tb[IFLA_MAX + 1];
+@@ -3293,6 +3293,8 @@ replay:
+ else
+ dev = NULL;
+
++ master_dev = NULL;
++ m_ops = NULL;
+ if (dev) {
+ master_dev = netdev_master_upper_dev_get(dev);
+ if (master_dev)
net-ipa-use-a-bitmap-for-endpoint-replenish_enabled.patch
net-ipa-prevent-concurrent-replenish.patch
drm-vc4-hdmi-make-sure-the-device-is-powered-with-cec.patch
+cgroup-v1-require-capabilities-to-set-release_agent.patch
+revert-mm-gup-small-refactoring-simplify-try_grab_page.patch
+net-phy-fix-qca8081-with-speeds-lower-than-2.5gb-s.patch
+ovl-don-t-fail-copy-up-if-no-fileattr-support-on-upper.patch
+lockd-fix-server-crash-on-reboot-of-client-holding-lock.patch
+lockd-fix-failure-to-cleanup-client-locks.patch
+net-mlx5e-ipsec-fix-crypto-offload-for-non-tcp-udp-encapsulated-traffic.patch
+net-mlx5e-ipsec-fix-tunnel-mode-crypto-offload-for-non-tcp-udp-traffic.patch
+net-mlx5e-tc-reject-rules-with-drop-and-modify-hdr-action.patch
+net-mlx5-bridge-take-rtnl-lock-in-init-error-handler.patch
+net-mlx5-bridge-ensure-dev_name-is-null-terminated.patch
+net-mlx5e-fix-handling-of-wrong-devices-during-bond-netevent.patch
+net-mlx5-use-del_timer_sync-in-fw-reset-flow-of-halting-poll.patch
+net-mlx5e-fix-module-eeprom-query.patch
+net-mlx5e-tc-reject-rules-with-forward-and-drop-actions.patch
+net-mlx5-fix-offloading-with-eswitch_ipv4_ttl_modify_enable.patch
+net-mlx5e-don-t-treat-small-ceil-values-as-unlimited-in-htb-offload.patch
+net-mlx5-bridge-fix-devlink-deadlock-on-net-namespace-deletion.patch
+net-mlx5e-avoid-field-overflowing-memcpy.patch
+net-mlx5e-fix-wrong-calculation-of-header-index-in-hw_gro.patch
+net-mlx5e-fix-broken-skb-allocation-in-hw-gro.patch
+net-mlx5-e-switch-fix-uninitialized-variable-modact.patch
+net-mlx5e-avoid-implicit-modify-hdr-for-decap-drop-rule.patch
+ipheth-fix-eoverflow-in-ipheth_rcvbulk_callback.patch
+i40e-fix-reset-bw-limit-when-dcb-enabled-with-1-tc.patch
+i40e-fix-reset-path-while-removing-the-driver.patch
+net-amd-xgbe-ensure-to-reset-the-tx_timer_active-flag.patch
+net-amd-xgbe-fix-skb-data-length-underflow.patch
+fanotify-fix-stale-file-descriptor-in-copy_event_to_user.patch
+net-sched-fix-use-after-free-in-tc_new_tfilter.patch
+rtnetlink-make-sure-to-refresh-master_dev-m_ops-in-__rtnl_newlink.patch
+net-ipa-request-ipa-register-values-be-retained.patch
+bpf-fix-possible-race-in-inc_misses_counter.patch
+cpuset-fix-the-bug-that-subpart_cpus-updated-wrongly-in-update_cpumask.patch
+e1000e-handshake-with-csme-starts-from-adl-platforms.patch
+af_packet-fix-data-race-in-packet_setsockopt-packet_setsockopt.patch
+tcp-fix-mem-under-charging-with-zerocopy-sendmsg.patch
+tcp-add-missing-tcp_skb_can_collapse-test-in-tcp_shift_skb_data.patch
--- /dev/null
+From b67985be400969578d4d4b17299714c0e5d2c07b Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 1 Feb 2022 10:46:40 -0800
+Subject: tcp: add missing tcp_skb_can_collapse() test in tcp_shift_skb_data()
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit b67985be400969578d4d4b17299714c0e5d2c07b upstream.
+
+tcp_shift_skb_data() might collapse three packets into a larger one.
+
+P_A, P_B, P_C -> P_ABC
+
+Historically, it used a single tcp_skb_can_collapse_to(P_A) call,
+because it was enough.
+
+In commit 85712484110d ("tcp: coalesce/collapse must respect MPTCP extensions"),
+this call was replaced by a call to tcp_skb_can_collapse(P_A, P_B)
+
+But the now needed test over P_C has been missed.
+
+This probably broke MPTCP.
+
+Then later, commit 9b65b17db723 ("net: avoid double accounting for pure zerocopy skbs")
+added an extra condition to tcp_skb_can_collapse(), but the missing call
+from tcp_shift_skb_data() is also breaking TCP zerocopy, because P_A and P_C
+might have different skb_zcopy_pure() status.
+
+Fixes: 85712484110d ("tcp: coalesce/collapse must respect MPTCP extensions")
+Fixes: 9b65b17db723 ("net: avoid double accounting for pure zerocopy skbs")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Mat Martineau <mathew.j.martineau@linux.intel.com>
+Cc: Talal Ahmad <talalahmad@google.com>
+Cc: Arjun Roy <arjunroy@google.com>
+Cc: Willem de Bruijn <willemb@google.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Acked-by: Paolo Abeni <pabeni@redhat.com>
+Link: https://lore.kernel.org/r/20220201184640.756716-1-eric.dumazet@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_input.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -1660,6 +1660,8 @@ static struct sk_buff *tcp_shift_skb_dat
+ (mss != tcp_skb_seglen(skb)))
+ goto out;
+
++ if (!tcp_skb_can_collapse(prev, skb))
++ goto out;
+ len = skb->len;
+ pcount = tcp_skb_pcount(skb);
+ if (tcp_skb_shift(prev, skb, pcount, len))
--- /dev/null
+From 479f5547239d970d3833f15f54a6481fffdb91ec Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 31 Jan 2022 22:52:54 -0800
+Subject: tcp: fix mem under-charging with zerocopy sendmsg()
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit 479f5547239d970d3833f15f54a6481fffdb91ec upstream.
+
+We got reports of following warning in inet_sock_destruct()
+
+ WARN_ON(sk_forward_alloc_get(sk));
+
+Whenever we add a non zero-copy fragment to a pure zerocopy skb,
+we have to anticipate that whole skb->truesize will be uncharged
+when skb is finally freed.
+
+skb->data_len is the payload length. But the memory truesize
+estimated by __zerocopy_sg_from_iter() is page aligned.
+
+Fixes: 9b65b17db723 ("net: avoid double accounting for pure zerocopy skbs")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Talal Ahmad <talalahmad@google.com>
+Cc: Arjun Roy <arjunroy@google.com>
+Cc: Willem de Bruijn <willemb@google.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Link: https://lore.kernel.org/r/20220201065254.680532-1-eric.dumazet@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -1321,10 +1321,13 @@ new_segment:
+
+ /* skb changing from pure zc to mixed, must charge zc */
+ if (unlikely(skb_zcopy_pure(skb))) {
+- if (!sk_wmem_schedule(sk, skb->data_len))
++ u32 extra = skb->truesize -
++ SKB_TRUESIZE(skb_end_offset(skb));
++
++ if (!sk_wmem_schedule(sk, extra))
+ goto wait_for_space;
+
+- sk_mem_charge(sk, skb->data_len);
++ sk_mem_charge(sk, extra);
+ skb_shinfo(skb)->flags &= ~SKBFL_PURE_ZEROCOPY;
+ }
+