5.16-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Fri, 4 Feb 2022 08:52:40 +0000 (09:52 +0100)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Fri, 4 Feb 2022 08:52:40 +0000 (09:52 +0100)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 4 Feb 2022 08:52:40 +0000 (09:52 +0100)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 4 Feb 2022 08:52:40 +0000 (09:52 +0100)
diff --git a/queue-5.16/af_packet-fix-data-race-in-packet_setsockopt-packet_setsockopt.patch b/queue-5.16/af_packet-fix-data-race-in-packet_setsockopt-packet_setsockopt.patch

new file mode 100644 (file)

index 0000000..fdb5b64
--- /dev/null
+++ b/queue-5.16/af_packet-fix-data-race-in-packet_setsockopt-packet_setsockopt.patch
@@ -0,0 +1,80 @@
+From e42e70ad6ae2ae511a6143d2e8da929366e58bd9 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 31 Jan 2022 18:23:58 -0800
+Subject: af_packet: fix data-race in packet_setsockopt / packet_setsockopt
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit e42e70ad6ae2ae511a6143d2e8da929366e58bd9 upstream.
+
+When packet_setsockopt( PACKET_FANOUT_DATA ) reads po->fanout,
+no lock is held, meaning that another thread can change po->fanout.
+
+Given that po->fanout can only be set once during the socket lifetime
+(it is only cleared from fanout_release()), we can use
+READ_ONCE()/WRITE_ONCE() to document the race.
+
+BUG: KCSAN: data-race in packet_setsockopt / packet_setsockopt
+
+write to 0xffff88813ae8e300 of 8 bytes by task 14653 on cpu 0:
+ fanout_add net/packet/af_packet.c:1791 [inline]
+ packet_setsockopt+0x22fe/0x24a0 net/packet/af_packet.c:3931
+ __sys_setsockopt+0x209/0x2a0 net/socket.c:2180
+ __do_sys_setsockopt net/socket.c:2191 [inline]
+ __se_sys_setsockopt net/socket.c:2188 [inline]
+ __x64_sys_setsockopt+0x62/0x70 net/socket.c:2188
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+read to 0xffff88813ae8e300 of 8 bytes by task 14654 on cpu 1:
+ packet_setsockopt+0x691/0x24a0 net/packet/af_packet.c:3935
+ __sys_setsockopt+0x209/0x2a0 net/socket.c:2180
+ __do_sys_setsockopt net/socket.c:2191 [inline]
+ __se_sys_setsockopt net/socket.c:2188 [inline]
+ __x64_sys_setsockopt+0x62/0x70 net/socket.c:2188
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+value changed: 0x0000000000000000 -> 0xffff888106f8c000
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 1 PID: 14654 Comm: syz-executor.3 Not tainted 5.16.0-syzkaller #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+
+Fixes: 47dceb8ecdc1 ("packet: add classic BPF fanout mode")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Willem de Bruijn <willemb@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Link: https://lore.kernel.org/r/20220201022358.330621-1-eric.dumazet@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/packet/af_packet.c |    8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -1788,7 +1788,10 @@ static int fanout_add(struct sock *sk, s
+               err = -ENOSPC;
+               if (refcount_read(&match->sk_ref) < match->max_num_members) {
+                       __dev_remove_pack(&po->prot_hook);
+-                      po->fanout = match;
++
++                      /* Paired with packet_setsockopt(PACKET_FANOUT_DATA) */
++                      WRITE_ONCE(po->fanout, match);
++
+                       po->rollover = rollover;
+                       rollover = NULL;
+                       refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1);
+@@ -3941,7 +3944,8 @@ packet_setsockopt(struct socket *sock, i
+       }
+       case PACKET_FANOUT_DATA:
+       {
+-              if (!po->fanout)
++              /* Paired with the WRITE_ONCE() in fanout_add() */
++              if (!READ_ONCE(po->fanout))
+                       return -EINVAL;
+ 
+               return fanout_set_data(po, optval, optlen);
diff --git a/queue-5.16/bpf-fix-possible-race-in-inc_misses_counter.patch b/queue-5.16/bpf-fix-possible-race-in-inc_misses_counter.patch

new file mode 100644 (file)

index 0000000..ab63ee4
--- /dev/null
+++ b/queue-5.16/bpf-fix-possible-race-in-inc_misses_counter.patch
@@ -0,0 +1,42 @@
+From 0e3135d3bfa5dfb658145238d2bc723a8e30c3a3 Mon Sep 17 00:00:00 2001
+From: He Fengqing <hefengqing@huawei.com>
+Date: Sat, 22 Jan 2022 10:29:36 +0000
+Subject: bpf: Fix possible race in inc_misses_counter
+
+From: He Fengqing <hefengqing@huawei.com>
+
+commit 0e3135d3bfa5dfb658145238d2bc723a8e30c3a3 upstream.
+
+It seems inc_misses_counter() suffers from same issue fixed in
+the commit d979617aa84d ("bpf: Fixes possible race in update_prog_stats()
+for 32bit arches"):
+As it can run while interrupts are enabled, it could
+be re-entered and the u64_stats syncp could be mangled.
+
+Fixes: 9ed9e9ba2337 ("bpf: Count the number of times recursion was prevented")
+Signed-off-by: He Fengqing <hefengqing@huawei.com>
+Acked-by: John Fastabend <john.fastabend@gmail.com>
+Link: https://lore.kernel.org/r/20220122102936.1219518-1-hefengqing@huawei.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/trampoline.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/kernel/bpf/trampoline.c
++++ b/kernel/bpf/trampoline.c
+@@ -542,11 +542,12 @@ static __always_inline u64 notrace bpf_p
+ static void notrace inc_misses_counter(struct bpf_prog *prog)
+ {
+       struct bpf_prog_stats *stats;
++      unsigned int flags;
+ 
+       stats = this_cpu_ptr(prog->stats);
+-      u64_stats_update_begin(&stats->syncp);
++      flags = u64_stats_update_begin_irqsave(&stats->syncp);
+       u64_stats_inc(&stats->misses);
+-      u64_stats_update_end(&stats->syncp);
++      u64_stats_update_end_irqrestore(&stats->syncp, flags);
+ }
+ 
+ /* The logic is similar to bpf_prog_run(), but with an explicit
diff --git a/queue-5.16/cgroup-v1-require-capabilities-to-set-release_agent.patch b/queue-5.16/cgroup-v1-require-capabilities-to-set-release_agent.patch

new file mode 100644 (file)

index 0000000..de5c84c
--- /dev/null
+++ b/queue-5.16/cgroup-v1-require-capabilities-to-set-release_agent.patch
@@ -0,0 +1,54 @@
+From 24f6008564183aa120d07c03d9289519c2fe02af Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Thu, 20 Jan 2022 11:04:01 -0600
+Subject: cgroup-v1: Require capabilities to set release_agent
+
+From: Eric W. Biederman <ebiederm@xmission.com>
+
+commit 24f6008564183aa120d07c03d9289519c2fe02af upstream.
+
+The cgroup release_agent is called with call_usermodehelper.  The function
+call_usermodehelper starts the release_agent with a full set fo capabilities.
+Therefore require capabilities when setting the release_agaent.
+
+Reported-by: Tabitha Sable <tabitha.c.sable@gmail.com>
+Tested-by: Tabitha Sable <tabitha.c.sable@gmail.com>
+Fixes: 81a6a5cdd2c5 ("Task Control Groups: automatic userspace notification of idle cgroups")
+Cc: stable@vger.kernel.org # v2.6.24+
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/cgroup/cgroup-v1.c |   14 ++++++++++++++
+ 1 file changed, 14 insertions(+)
+
+--- a/kernel/cgroup/cgroup-v1.c
++++ b/kernel/cgroup/cgroup-v1.c
+@@ -549,6 +549,14 @@ static ssize_t cgroup_release_agent_writ
+ 
+       BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
+ 
++      /*
++       * Release agent gets called with all capabilities,
++       * require capabilities to set release agent.
++       */
++      if ((of->file->f_cred->user_ns != &init_user_ns) ||
++          !capable(CAP_SYS_ADMIN))
++              return -EPERM;
++
+       cgrp = cgroup_kn_lock_live(of->kn, false);
+       if (!cgrp)
+               return -ENODEV;
+@@ -954,6 +962,12 @@ int cgroup1_parse_param(struct fs_contex
+               /* Specifying two release agents is forbidden */
+               if (ctx->release_agent)
+                       return invalfc(fc, "release_agent respecified");
++              /*
++               * Release agent gets called with all capabilities,
++               * require capabilities to set release agent.
++               */
++              if ((fc->user_ns != &init_user_ns) || !capable(CAP_SYS_ADMIN))
++                      return invalfc(fc, "Setting release_agent not allowed");
+               ctx->release_agent = param->string;
+               param->string = NULL;
+               break;
diff --git a/queue-5.16/cpuset-fix-the-bug-that-subpart_cpus-updated-wrongly-in-update_cpumask.patch b/queue-5.16/cpuset-fix-the-bug-that-subpart_cpus-updated-wrongly-in-update_cpumask.patch

new file mode 100644 (file)

index 0000000..4582d26
--- /dev/null
+++ b/queue-5.16/cpuset-fix-the-bug-that-subpart_cpus-updated-wrongly-in-update_cpumask.patch
@@ -0,0 +1,34 @@
+From c80d401c52a2d1baf2a5afeb06f0ffe678e56d23 Mon Sep 17 00:00:00 2001
+From: Tianchen Ding <dtcccc@linux.alibaba.com>
+Date: Tue, 18 Jan 2022 18:05:18 +0800
+Subject: cpuset: Fix the bug that subpart_cpus updated wrongly in update_cpumask()
+
+From: Tianchen Ding <dtcccc@linux.alibaba.com>
+
+commit c80d401c52a2d1baf2a5afeb06f0ffe678e56d23 upstream.
+
+subparts_cpus should be limited as a subset of cpus_allowed, but it is
+updated wrongly by using cpumask_andnot(). Use cpumask_and() instead to
+fix it.
+
+Fixes: ee8dde0cd2ce ("cpuset: Add new v2 cpuset.sched.partition flag")
+Signed-off-by: Tianchen Ding <dtcccc@linux.alibaba.com>
+Reviewed-by: Waiman Long <longman@redhat.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/cgroup/cpuset.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/kernel/cgroup/cpuset.c
++++ b/kernel/cgroup/cpuset.c
+@@ -1615,8 +1615,7 @@ static int update_cpumask(struct cpuset
+        * Make sure that subparts_cpus is a subset of cpus_allowed.
+        */
+       if (cs->nr_subparts_cpus) {
+-              cpumask_andnot(cs->subparts_cpus, cs->subparts_cpus,
+-                             cs->cpus_allowed);
++              cpumask_and(cs->subparts_cpus, cs->subparts_cpus, cs->cpus_allowed);
+               cs->nr_subparts_cpus = cpumask_weight(cs->subparts_cpus);
+       }
+       spin_unlock_irq(&callback_lock);
diff --git a/queue-5.16/e1000e-handshake-with-csme-starts-from-adl-platforms.patch b/queue-5.16/e1000e-handshake-with-csme-starts-from-adl-platforms.patch

new file mode 100644 (file)

index 0000000..1398b05
--- /dev/null
+++ b/queue-5.16/e1000e-handshake-with-csme-starts-from-adl-platforms.patch
@@ -0,0 +1,44 @@
+From cad014b7b5a6897d8c4fad13e2888978bfb7a53f Mon Sep 17 00:00:00 2001
+From: Sasha Neftin <sasha.neftin@intel.com>
+Date: Tue, 7 Dec 2021 13:23:42 +0200
+Subject: e1000e: Handshake with CSME starts from ADL platforms
+
+From: Sasha Neftin <sasha.neftin@intel.com>
+
+commit cad014b7b5a6897d8c4fad13e2888978bfb7a53f upstream.
+
+Handshake with CSME/AMT on none provisioned platforms during S0ix flow
+is not supported on TGL platform and can cause to HW unit hang. Update
+the handshake with CSME flow to start from the ADL platform.
+
+Fixes: 3e55d231716e ("e1000e: Add handshake with the CSME to support S0ix")
+Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
+Tested-by: Nechama Kraus <nechamax.kraus@linux.intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/intel/e1000e/netdev.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/intel/e1000e/netdev.c
++++ b/drivers/net/ethernet/intel/e1000e/netdev.c
+@@ -6345,7 +6345,8 @@ static void e1000e_s0ix_entry_flow(struc
+       u32 mac_data;
+       u16 phy_data;
+ 
+-      if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID) {
++      if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID &&
++          hw->mac.type >= e1000_pch_adp) {
+               /* Request ME configure the device for S0ix */
+               mac_data = er32(H2ME);
+               mac_data |= E1000_H2ME_START_DPG;
+@@ -6494,7 +6495,8 @@ static void e1000e_s0ix_exit_flow(struct
+       u16 phy_data;
+       u32 i = 0;
+ 
+-      if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID) {
++      if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID &&
++          hw->mac.type >= e1000_pch_adp) {
+               /* Request ME unconfigure the device from S0ix */
+               mac_data = er32(H2ME);
+               mac_data &= ~E1000_H2ME_START_DPG;
diff --git a/queue-5.16/fanotify-fix-stale-file-descriptor-in-copy_event_to_user.patch b/queue-5.16/fanotify-fix-stale-file-descriptor-in-copy_event_to_user.patch

new file mode 100644 (file)

index 0000000..1fcd8c1
--- /dev/null
+++ b/queue-5.16/fanotify-fix-stale-file-descriptor-in-copy_event_to_user.patch
@@ -0,0 +1,52 @@
+From ee12595147ac1fbfb5bcb23837e26dd58d94b15d Mon Sep 17 00:00:00 2001
+From: Dan Carpenter <dan.carpenter@oracle.com>
+Date: Fri, 28 Jan 2022 22:57:01 +0300
+Subject: fanotify: Fix stale file descriptor in copy_event_to_user()
+
+From: Dan Carpenter <dan.carpenter@oracle.com>
+
+commit ee12595147ac1fbfb5bcb23837e26dd58d94b15d upstream.
+
+This code calls fd_install() which gives the userspace access to the fd.
+Then if copy_info_records_to_user() fails it calls put_unused_fd(fd) but
+that will not release it and leads to a stale entry in the file
+descriptor table.
+
+Generally you can't trust the fd after a call to fd_install().  The fix
+is to delay the fd_install() until everything else has succeeded.
+
+Fortunately it requires CAP_SYS_ADMIN to reach this code so the security
+impact is less.
+
+Fixes: f644bc449b37 ("fanotify: fix copy_event_to_user() fid error clean up")
+Link: https://lore.kernel.org/r/20220128195656.GA26981@kili
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Reviewed-by: Mathias Krause <minipli@grsecurity.net>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/notify/fanotify/fanotify_user.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/fs/notify/fanotify/fanotify_user.c
++++ b/fs/notify/fanotify/fanotify_user.c
+@@ -656,9 +656,6 @@ static ssize_t copy_event_to_user(struct
+       if (fanotify_is_perm_event(event->mask))
+               FANOTIFY_PERM(event)->fd = fd;
+ 
+-      if (f)
+-              fd_install(fd, f);
+-
+       if (info_mode) {
+               ret = copy_info_records_to_user(event, info, info_mode, pidfd,
+                                               buf, count);
+@@ -666,6 +663,9 @@ static ssize_t copy_event_to_user(struct
+                       goto out_close_fd;
+       }
+ 
++      if (f)
++              fd_install(fd, f);
++
+       return metadata.event_len;
+ 
+ out_close_fd:
diff --git a/queue-5.16/i40e-fix-reset-bw-limit-when-dcb-enabled-with-1-tc.patch b/queue-5.16/i40e-fix-reset-bw-limit-when-dcb-enabled-with-1-tc.patch

new file mode 100644 (file)

index 0000000..750d368
--- /dev/null
+++ b/queue-5.16/i40e-fix-reset-bw-limit-when-dcb-enabled-with-1-tc.patch
@@ -0,0 +1,60 @@
+From 3d2504663c41104b4359a15f35670cfa82de1bbf Mon Sep 17 00:00:00 2001
+From: Jedrzej Jagielski <jedrzej.jagielski@intel.com>
+Date: Tue, 14 Dec 2021 10:08:22 +0000
+Subject: i40e: Fix reset bw limit when DCB enabled with 1 TC
+
+From: Jedrzej Jagielski <jedrzej.jagielski@intel.com>
+
+commit 3d2504663c41104b4359a15f35670cfa82de1bbf upstream.
+
+There was an AQ error I40E_AQ_RC_EINVAL when trying
+to reset bw limit as part of bw allocation setup.
+This was caused by trying to reset bw limit with
+DCB enabled. Bw limit should not be reset when
+DCB is enabled. The code was relying on the pf->flags
+to check if DCB is enabled but if only 1 TC is available
+this flag will not be set even though DCB is enabled.
+Add a check for number of TC and if it is 1
+don't try to reset bw limit even if pf->flags shows
+DCB as disabled.
+
+Fixes: fa38e30ac73f ("i40e: Fix for Tx timeouts when interface is brought up if DCB is enabled")
+Suggested-by: Alexander Lobakin <alexandr.lobakin@intel.com> # Flatten the condition
+Signed-off-by: Sylwester Dziedziuch <sylwesterx.dziedziuch@intel.com>
+Signed-off-by: Jedrzej Jagielski <jedrzej.jagielski@intel.com>
+Reviewed-by: Alexander Lobakin <alexandr.lobakin@intel.com>
+Tested-by: Imam Hassan Reza Biswas <imam.hassan.reza.biswas@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/intel/i40e/i40e_main.c |   12 +++++++++++-
+ 1 file changed, 11 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
+@@ -5372,7 +5372,15 @@ static int i40e_vsi_configure_bw_alloc(s
+       /* There is no need to reset BW when mqprio mode is on.  */
+       if (pf->flags & I40E_FLAG_TC_MQPRIO)
+               return 0;
+-      if (!vsi->mqprio_qopt.qopt.hw && !(pf->flags & I40E_FLAG_DCB_ENABLED)) {
++
++      if (!vsi->mqprio_qopt.qopt.hw) {
++              if (pf->flags & I40E_FLAG_DCB_ENABLED)
++                      goto skip_reset;
++
++              if (IS_ENABLED(CONFIG_I40E_DCB) &&
++                  i40e_dcb_hw_get_num_tc(&pf->hw) == 1)
++                      goto skip_reset;
++
+               ret = i40e_set_bw_limit(vsi, vsi->seid, 0);
+               if (ret)
+                       dev_info(&pf->pdev->dev,
+@@ -5380,6 +5388,8 @@ static int i40e_vsi_configure_bw_alloc(s
+                                vsi->seid);
+               return ret;
+       }
++
++skip_reset:
+       memset(&bw_data, 0, sizeof(bw_data));
+       bw_data.tc_valid_bits = enabled_tc;
+       for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
diff --git a/queue-5.16/i40e-fix-reset-path-while-removing-the-driver.patch b/queue-5.16/i40e-fix-reset-path-while-removing-the-driver.patch

new file mode 100644 (file)

index 0000000..bbe2716
--- /dev/null
+++ b/queue-5.16/i40e-fix-reset-path-while-removing-the-driver.patch
@@ -0,0 +1,113 @@
+From 6533e558c6505e94c3e0ed4281ed5e31ec985f4d Mon Sep 17 00:00:00 2001
+From: Karen Sornek <karen.sornek@intel.com>
+Date: Wed, 12 Jan 2022 10:19:47 +0100
+Subject: i40e: Fix reset path while removing the driver
+
+From: Karen Sornek <karen.sornek@intel.com>
+
+commit 6533e558c6505e94c3e0ed4281ed5e31ec985f4d upstream.
+
+Fix the crash in kernel while dereferencing the NULL pointer,
+when the driver is unloaded and simultaneously the VSI rings
+are being stopped.
+
+The hardware requires 50msec in order to finish RX queues
+disable. For this purpose the driver spins in mdelay function
+for the operation to be completed.
+
+For example changing number of queues which requires reset would
+fail in the following call stack:
+
+1) i40e_prep_for_reset
+2) i40e_pf_quiesce_all_vsi
+3) i40e_quiesce_vsi
+4) i40e_vsi_close
+5) i40e_down
+6) i40e_vsi_stop_rings
+7) i40e_vsi_control_rx -> disable requires the delay of 50msecs
+8) continue back in i40e_down function where
+   i40e_clean_tx_ring(vsi->tx_rings[i]) is going to crash
+
+When the driver was spinning vsi_release called
+i40e_vsi_free_arrays where the vsi->tx_rings resources
+were freed and the pointer was set to NULL.
+
+Fixes: 5b6d4a7f20b0 ("i40e: Fix crash during removing i40e driver")
+Signed-off-by: Slawomir Laba <slawomirx.laba@intel.com>
+Signed-off-by: Sylwester Dziedziuch <sylwesterx.dziedziuch@intel.com>
+Signed-off-by: Karen Sornek <karen.sornek@intel.com>
+Tested-by: Gurucharan G <gurucharanx.g@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/intel/i40e/i40e.h      |    1 +
+ drivers/net/ethernet/intel/i40e/i40e_main.c |   19 ++++++++++++++++++-
+ 2 files changed, 19 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/intel/i40e/i40e.h
++++ b/drivers/net/ethernet/intel/i40e/i40e.h
+@@ -144,6 +144,7 @@ enum i40e_state_t {
+       __I40E_VIRTCHNL_OP_PENDING,
+       __I40E_RECOVERY_MODE,
+       __I40E_VF_RESETS_DISABLED,      /* disable resets during i40e_remove */
++      __I40E_IN_REMOVE,
+       __I40E_VFS_RELEASING,
+       /* This must be last as it determines the size of the BITMAP */
+       __I40E_STATE_SIZE__,
+--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
+@@ -10863,6 +10863,9 @@ static void i40e_reset_and_rebuild(struc
+                                  bool lock_acquired)
+ {
+       int ret;
++
++      if (test_bit(__I40E_IN_REMOVE, pf->state))
++              return;
+       /* Now we wait for GRST to settle out.
+        * We don't have to delete the VEBs or VSIs from the hw switch
+        * because the reset will make them disappear.
+@@ -12222,6 +12225,8 @@ int i40e_reconfig_rss_queues(struct i40e
+ 
+               vsi->req_queue_pairs = queue_count;
+               i40e_prep_for_reset(pf);
++              if (test_bit(__I40E_IN_REMOVE, pf->state))
++                      return pf->alloc_rss_size;
+ 
+               pf->alloc_rss_size = new_rss_size;
+ 
+@@ -13048,6 +13053,10 @@ static int i40e_xdp_setup(struct i40e_vs
+       if (need_reset)
+               i40e_prep_for_reset(pf);
+ 
++      /* VSI shall be deleted in a moment, just return EINVAL */
++      if (test_bit(__I40E_IN_REMOVE, pf->state))
++              return -EINVAL;
++
+       old_prog = xchg(&vsi->xdp_prog, prog);
+ 
+       if (need_reset) {
+@@ -15938,8 +15947,13 @@ static void i40e_remove(struct pci_dev *
+       i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), 0);
+       i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), 0);
+ 
+-      while (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state))
++      /* Grab __I40E_RESET_RECOVERY_PENDING and set __I40E_IN_REMOVE
++       * flags, once they are set, i40e_rebuild should not be called as
++       * i40e_prep_for_reset always returns early.
++       */
++      while (test_and_set_bit(__I40E_RESET_RECOVERY_PENDING, pf->state))
+               usleep_range(1000, 2000);
++      set_bit(__I40E_IN_REMOVE, pf->state);
+ 
+       if (pf->flags & I40E_FLAG_SRIOV_ENABLED) {
+               set_bit(__I40E_VF_RESETS_DISABLED, pf->state);
+@@ -16138,6 +16152,9 @@ static void i40e_pci_error_reset_done(st
+ {
+       struct i40e_pf *pf = pci_get_drvdata(pdev);
+ 
++      if (test_bit(__I40E_IN_REMOVE, pf->state))
++              return;
++
+       i40e_reset_and_rebuild(pf, false, false);
+ }
+ 
diff --git a/queue-5.16/ipheth-fix-eoverflow-in-ipheth_rcvbulk_callback.patch b/queue-5.16/ipheth-fix-eoverflow-in-ipheth_rcvbulk_callback.patch

new file mode 100644 (file)

index 0000000..8e58fd6
--- /dev/null
+++ b/queue-5.16/ipheth-fix-eoverflow-in-ipheth_rcvbulk_callback.patch
@@ -0,0 +1,57 @@
+From 63e4b45c82ed1bde979da7052229a4229ce9cabf Mon Sep 17 00:00:00 2001
+From: Georgi Valkov <gvalkov@abv.bg>
+Date: Tue, 1 Feb 2022 08:16:18 +0100
+Subject: ipheth: fix EOVERFLOW in ipheth_rcvbulk_callback
+
+From: Georgi Valkov <gvalkov@abv.bg>
+
+commit 63e4b45c82ed1bde979da7052229a4229ce9cabf upstream.
+
+When rx_buf is allocated we need to account for IPHETH_IP_ALIGN,
+which reduces the usable size by 2 bytes. Otherwise we have 1512
+bytes usable instead of 1514, and if we receive more than 1512
+bytes, ipheth_rcvbulk_callback is called with status -EOVERFLOW,
+after which the driver malfunctiones and all communication stops.
+
+Resolves ipheth 2-1:4.2: ipheth_rcvbulk_callback: urb status: -75
+
+Fixes: f33d9e2b48a3 ("usbnet: ipheth: fix connectivity with iOS 14")
+Signed-off-by: Georgi Valkov <gvalkov@abv.bg>
+Tested-by: Jan Kiszka <jan.kiszka@siemens.com>
+Link: https://lore.kernel.org/all/B60B8A4B-92A0-49B3-805D-809A2433B46C@abv.bg/
+Link: https://lore.kernel.org/all/24851bd2769434a5fc24730dce8e8a984c5a4505.1643699778.git.jan.kiszka@siemens.com/
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/usb/ipheth.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/usb/ipheth.c
++++ b/drivers/net/usb/ipheth.c
+@@ -121,7 +121,7 @@ static int ipheth_alloc_urbs(struct iphe
+       if (tx_buf == NULL)
+               goto free_rx_urb;
+ 
+-      rx_buf = usb_alloc_coherent(iphone->udev, IPHETH_BUF_SIZE,
++      rx_buf = usb_alloc_coherent(iphone->udev, IPHETH_BUF_SIZE + IPHETH_IP_ALIGN,
+                                   GFP_KERNEL, &rx_urb->transfer_dma);
+       if (rx_buf == NULL)
+               goto free_tx_buf;
+@@ -146,7 +146,7 @@ error_nomem:
+ 
+ static void ipheth_free_urbs(struct ipheth_device *iphone)
+ {
+-      usb_free_coherent(iphone->udev, IPHETH_BUF_SIZE, iphone->rx_buf,
++      usb_free_coherent(iphone->udev, IPHETH_BUF_SIZE + IPHETH_IP_ALIGN, iphone->rx_buf,
+                         iphone->rx_urb->transfer_dma);
+       usb_free_coherent(iphone->udev, IPHETH_BUF_SIZE, iphone->tx_buf,
+                         iphone->tx_urb->transfer_dma);
+@@ -317,7 +317,7 @@ static int ipheth_rx_submit(struct iphet
+ 
+       usb_fill_bulk_urb(dev->rx_urb, udev,
+                         usb_rcvbulkpipe(udev, dev->bulk_in),
+-                        dev->rx_buf, IPHETH_BUF_SIZE,
++                        dev->rx_buf, IPHETH_BUF_SIZE + IPHETH_IP_ALIGN,
+                         ipheth_rcvbulk_callback,
+                         dev);
+       dev->rx_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
diff --git a/queue-5.16/lockd-fix-failure-to-cleanup-client-locks.patch b/queue-5.16/lockd-fix-failure-to-cleanup-client-locks.patch

new file mode 100644 (file)

index 0000000..16dfd99
--- /dev/null
+++ b/queue-5.16/lockd-fix-failure-to-cleanup-client-locks.patch
@@ -0,0 +1,41 @@
+From d19a7af73b5ecaac8168712d18be72b9db166768 Mon Sep 17 00:00:00 2001
+From: "J. Bruce Fields" <bfields@redhat.com>
+Date: Tue, 18 Jan 2022 17:00:51 -0500
+Subject: lockd: fix failure to cleanup client locks
+
+From: J. Bruce Fields <bfields@redhat.com>
+
+commit d19a7af73b5ecaac8168712d18be72b9db166768 upstream.
+
+In my testing, we're sometimes hitting the request->fl_flags & FL_EXISTS
+case in posix_lock_inode, presumably just by random luck since we're not
+actually initializing fl_flags here.
+
+This probably didn't matter before commit 7f024fcd5c97 ("Keep read and
+write fds with each nlm_file") since we wouldn't previously unlock
+unless we knew there were locks.
+
+But now it causes lockd to give up on removing more locks.
+
+We could just initialize fl_flags, but really it seems dubious to be
+calling vfs_lock_file with random values in some of the fields.
+
+Fixes: 7f024fcd5c97 ("Keep read and write fds with each nlm_file")
+Signed-off-by: J. Bruce Fields <bfields@redhat.com>
+[ cel: fixed checkpatch.pl nit ]
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/lockd/svcsubs.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/lockd/svcsubs.c
++++ b/fs/lockd/svcsubs.c
+@@ -180,6 +180,7 @@ static int nlm_unlock_files(struct nlm_f
+ {
+       struct file_lock lock;
+ 
++      locks_init_lock(&lock);
+       lock.fl_type  = F_UNLCK;
+       lock.fl_start = 0;
+       lock.fl_end   = OFFSET_MAX;
diff --git a/queue-5.16/lockd-fix-server-crash-on-reboot-of-client-holding-lock.patch b/queue-5.16/lockd-fix-server-crash-on-reboot-of-client-holding-lock.patch

new file mode 100644 (file)

index 0000000..e1ecadd
--- /dev/null
+++ b/queue-5.16/lockd-fix-server-crash-on-reboot-of-client-holding-lock.patch
@@ -0,0 +1,57 @@
+From 6e7f90d163afa8fc2efd6ae318e7c20156a5621f Mon Sep 17 00:00:00 2001
+From: "J. Bruce Fields" <bfields@redhat.com>
+Date: Tue, 18 Jan 2022 17:00:16 -0500
+Subject: lockd: fix server crash on reboot of client holding lock
+
+From: J. Bruce Fields <bfields@redhat.com>
+
+commit 6e7f90d163afa8fc2efd6ae318e7c20156a5621f upstream.
+
+I thought I was iterating over the array when actually the iteration is
+over the values contained in the array?
+
+Ugh, keep it simple.
+
+Symptoms were a null deference in vfs_lock_file() when an NFSv3 client
+that previously held a lock came back up and sent a notify.
+
+Reported-by: Jonathan Woithe <jwoithe@just42.net>
+Fixes: 7f024fcd5c97 ("Keep read and write fds with each nlm_file")
+Signed-off-by: J. Bruce Fields <bfields@redhat.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/lockd/svcsubs.c |   17 +++++++++--------
+ 1 file changed, 9 insertions(+), 8 deletions(-)
+
+--- a/fs/lockd/svcsubs.c
++++ b/fs/lockd/svcsubs.c
+@@ -179,19 +179,20 @@ nlm_delete_file(struct nlm_file *file)
+ static int nlm_unlock_files(struct nlm_file *file)
+ {
+       struct file_lock lock;
+-      struct file *f;
+ 
+       lock.fl_type  = F_UNLCK;
+       lock.fl_start = 0;
+       lock.fl_end   = OFFSET_MAX;
+-      for (f = file->f_file[0]; f <= file->f_file[1]; f++) {
+-              if (f && vfs_lock_file(f, F_SETLK, &lock, NULL) < 0) {
+-                      pr_warn("lockd: unlock failure in %s:%d\n",
+-                              __FILE__, __LINE__);
+-                      return 1;
+-              }
+-      }
++      if (file->f_file[O_RDONLY] &&
++          vfs_lock_file(file->f_file[O_RDONLY], F_SETLK, &lock, NULL))
++              goto out_err;
++      if (file->f_file[O_WRONLY] &&
++          vfs_lock_file(file->f_file[O_WRONLY], F_SETLK, &lock, NULL))
++              goto out_err;
+       return 0;
++out_err:
++      pr_warn("lockd: unlock failure in %s:%d\n", __FILE__, __LINE__);
++      return 1;
+ }
+ 
+ /*
diff --git a/queue-5.16/net-amd-xgbe-ensure-to-reset-the-tx_timer_active-flag.patch b/queue-5.16/net-amd-xgbe-ensure-to-reset-the-tx_timer_active-flag.patch

new file mode 100644 (file)

index 0000000..88e7ce5
--- /dev/null
+++ b/queue-5.16/net-amd-xgbe-ensure-to-reset-the-tx_timer_active-flag.patch
@@ -0,0 +1,37 @@
+From 7674b7b559b683478c3832527c59bceb169e701d Mon Sep 17 00:00:00 2001
+From: Raju Rangoju <Raju.Rangoju@amd.com>
+Date: Thu, 27 Jan 2022 11:32:22 +0530
+Subject: net: amd-xgbe: ensure to reset the tx_timer_active flag
+
+From: Raju Rangoju <Raju.Rangoju@amd.com>
+
+commit 7674b7b559b683478c3832527c59bceb169e701d upstream.
+
+Ensure to reset the tx_timer_active flag in xgbe_stop(),
+otherwise a port restart may result in tx timeout due to
+uncleared flag.
+
+Fixes: c635eaacbf77 ("amd-xgbe: Remove Tx coalescing")
+Co-developed-by: Sudheesh Mavila <sudheesh.mavila@amd.com>
+Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com>
+Signed-off-by: Raju Rangoju <Raju.Rangoju@amd.com>
+Acked-by: Tom Lendacky <thomas.lendacky@amd.com>
+Link: https://lore.kernel.org/r/20220127060222.453371-1-Raju.Rangoju@amd.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/amd/xgbe/xgbe-drv.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
++++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+@@ -721,7 +721,9 @@ static void xgbe_stop_timers(struct xgbe
+               if (!channel->tx_ring)
+                       break;
+ 
++              /* Deactivate the Tx timer */
+               del_timer_sync(&channel->tx_timer);
++              channel->tx_timer_active = 0;
+       }
+ }
+ 
diff --git a/queue-5.16/net-amd-xgbe-fix-skb-data-length-underflow.patch b/queue-5.16/net-amd-xgbe-fix-skb-data-length-underflow.patch

new file mode 100644 (file)

index 0000000..112d08a
--- /dev/null
+++ b/queue-5.16/net-amd-xgbe-fix-skb-data-length-underflow.patch
@@ -0,0 +1,55 @@
+From 5aac9108a180fc06e28d4e7fb00247ce603b72ee Mon Sep 17 00:00:00 2001
+From: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
+Date: Thu, 27 Jan 2022 14:50:03 +0530
+Subject: net: amd-xgbe: Fix skb data length underflow
+
+From: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
+
+commit 5aac9108a180fc06e28d4e7fb00247ce603b72ee upstream.
+
+There will be BUG_ON() triggered in include/linux/skbuff.h leading to
+intermittent kernel panic, when the skb length underflow is detected.
+
+Fix this by dropping the packet if such length underflows are seen
+because of inconsistencies in the hardware descriptors.
+
+Fixes: 622c36f143fc ("amd-xgbe: Fix jumbo MTU processing on newer hardware")
+Suggested-by: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
+Acked-by: Tom Lendacky <thomas.lendacky@amd.com>
+Link: https://lore.kernel.org/r/20220127092003.2812745-1-Shyam-sundar.S-k@amd.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/amd/xgbe/xgbe-drv.c |   12 +++++++++++-
+ 1 file changed, 11 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
++++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+@@ -2555,6 +2555,14 @@ read_again:
+                       buf2_len = xgbe_rx_buf2_len(rdata, packet, len);
+                       len += buf2_len;
+ 
++                      if (buf2_len > rdata->rx.buf.dma_len) {
++                              /* Hardware inconsistency within the descriptors
++                               * that has resulted in a length underflow.
++                               */
++                              error = 1;
++                              goto skip_data;
++                      }
++
+                       if (!skb) {
+                               skb = xgbe_create_skb(pdata, napi, rdata,
+                                                     buf1_len);
+@@ -2584,8 +2592,10 @@ skip_data:
+               if (!last || context_next)
+                       goto read_again;
+ 
+-              if (!skb)
++              if (!skb || error) {
++                      dev_kfree_skb(skb);
+                       goto next_packet;
++              }
+ 
+               /* Be sure we don't exceed the configured MTU */
+               max_len = netdev->mtu + ETH_HLEN;
diff --git a/queue-5.16/net-ipa-request-ipa-register-values-be-retained.patch b/queue-5.16/net-ipa-request-ipa-register-values-be-retained.patch

new file mode 100644 (file)

index 0000000..36ae63b
--- /dev/null
+++ b/queue-5.16/net-ipa-request-ipa-register-values-be-retained.patch
@@ -0,0 +1,176 @@
+From 34a081761e4e3c35381cbfad609ebae2962fe2f8 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@linaro.org>
+Date: Tue, 1 Feb 2022 09:02:05 -0600
+Subject: net: ipa: request IPA register values be retained
+
+From: Alex Elder <elder@linaro.org>
+
+commit 34a081761e4e3c35381cbfad609ebae2962fe2f8 upstream.
+
+In some cases, the IPA hardware needs to request the always-on
+subsystem (AOSS) to coordinate with the IPA microcontroller to
+retain IPA register values at power collapse.  This is done by
+issuing a QMP request to the AOSS microcontroller.  A similar
+request ondoes that request.
+
+We must get and hold the "QMP" handle early, because we might get
+back EPROBE_DEFER for that.  But the actual request should be sent
+while we know the IPA clock is active, and when we know the
+microcontroller is operational.
+
+Fixes: 1aac309d3207 ("net: ipa: use autosuspend")
+Signed-off-by: Alex Elder <elder@linaro.org>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ipa/ipa_power.c |   52 ++++++++++++++++++++++++++++++++++++++++++++
+ drivers/net/ipa/ipa_power.h |    7 +++++
+ drivers/net/ipa/ipa_uc.c    |    5 ++++
+ 3 files changed, 64 insertions(+)
+
+--- a/drivers/net/ipa/ipa_power.c
++++ b/drivers/net/ipa/ipa_power.c
+@@ -11,6 +11,8 @@
+ #include <linux/pm_runtime.h>
+ #include <linux/bitops.h>
+ 
++#include "linux/soc/qcom/qcom_aoss.h"
++
+ #include "ipa.h"
+ #include "ipa_power.h"
+ #include "ipa_endpoint.h"
+@@ -64,6 +66,7 @@ enum ipa_power_flag {
+  * struct ipa_power - IPA power management information
+  * @dev:              IPA device pointer
+  * @core:             IPA core clock
++ * @qmp:              QMP handle for AOSS communication
+  * @spinlock:         Protects modem TX queue enable/disable
+  * @flags:            Boolean state flags
+  * @interconnect_count:       Number of elements in interconnect[]
+@@ -72,6 +75,7 @@ enum ipa_power_flag {
+ struct ipa_power {
+       struct device *dev;
+       struct clk *core;
++      struct qmp *qmp;
+       spinlock_t spinlock;    /* used with STOPPED/STARTED power flags */
+       DECLARE_BITMAP(flags, IPA_POWER_FLAG_COUNT);
+       u32 interconnect_count;
+@@ -382,6 +386,47 @@ void ipa_power_modem_queue_active(struct
+       clear_bit(IPA_POWER_FLAG_STARTED, ipa->power->flags);
+ }
+ 
++static int ipa_power_retention_init(struct ipa_power *power)
++{
++      struct qmp *qmp = qmp_get(power->dev);
++
++      if (IS_ERR(qmp)) {
++              if (PTR_ERR(qmp) == -EPROBE_DEFER)
++                      return -EPROBE_DEFER;
++
++              /* We assume any other error means it's not defined/needed */
++              qmp = NULL;
++      }
++      power->qmp = qmp;
++
++      return 0;
++}
++
++static void ipa_power_retention_exit(struct ipa_power *power)
++{
++      qmp_put(power->qmp);
++      power->qmp = NULL;
++}
++
++/* Control register retention on power collapse */
++void ipa_power_retention(struct ipa *ipa, bool enable)
++{
++      static const char fmt[] = "{ class: bcm, res: ipa_pc, val: %c }";
++      struct ipa_power *power = ipa->power;
++      char buf[36];   /* Exactly enough for fmt[]; size a multiple of 4 */
++      int ret;
++
++      if (!power->qmp)
++              return;         /* Not needed on this platform */
++
++      (void)snprintf(buf, sizeof(buf), fmt, enable ? '1' : '0');
++
++      ret = qmp_send(power->qmp, buf, sizeof(buf));
++      if (ret)
++              dev_err(power->dev, "error %d sending QMP %sable request\n",
++                      ret, enable ? "en" : "dis");
++}
++
+ int ipa_power_setup(struct ipa *ipa)
+ {
+       int ret;
+@@ -438,12 +483,18 @@ ipa_power_init(struct device *dev, const
+       if (ret)
+               goto err_kfree;
+ 
++      ret = ipa_power_retention_init(power);
++      if (ret)
++              goto err_interconnect_exit;
++
+       pm_runtime_set_autosuspend_delay(dev, IPA_AUTOSUSPEND_DELAY);
+       pm_runtime_use_autosuspend(dev);
+       pm_runtime_enable(dev);
+ 
+       return power;
+ 
++err_interconnect_exit:
++      ipa_interconnect_exit(power);
+ err_kfree:
+       kfree(power);
+ err_clk_put:
+@@ -460,6 +511,7 @@ void ipa_power_exit(struct ipa_power *po
+ 
+       pm_runtime_disable(dev);
+       pm_runtime_dont_use_autosuspend(dev);
++      ipa_power_retention_exit(power);
+       ipa_interconnect_exit(power);
+       kfree(power);
+       clk_put(clk);
+--- a/drivers/net/ipa/ipa_power.h
++++ b/drivers/net/ipa/ipa_power.h
+@@ -41,6 +41,13 @@ void ipa_power_modem_queue_wake(struct i
+ void ipa_power_modem_queue_active(struct ipa *ipa);
+ 
+ /**
++ * ipa_power_retention() - Control register retention on power collapse
++ * @ipa:      IPA pointer
++ * @enable:   Whether retention should be enabled or disabled
++ */
++void ipa_power_retention(struct ipa *ipa, bool enable);
++
++/**
+  * ipa_power_setup() - Set up IPA power management
+  * @ipa:      IPA pointer
+  *
+--- a/drivers/net/ipa/ipa_uc.c
++++ b/drivers/net/ipa/ipa_uc.c
+@@ -11,6 +11,7 @@
+ 
+ #include "ipa.h"
+ #include "ipa_uc.h"
++#include "ipa_power.h"
+ 
+ /**
+  * DOC:  The IPA embedded microcontroller
+@@ -154,6 +155,7 @@ static void ipa_uc_response_hdlr(struct
+       case IPA_UC_RESPONSE_INIT_COMPLETED:
+               if (ipa->uc_powered) {
+                       ipa->uc_loaded = true;
++                      ipa_power_retention(ipa, true);
+                       pm_runtime_mark_last_busy(dev);
+                       (void)pm_runtime_put_autosuspend(dev);
+                       ipa->uc_powered = false;
+@@ -184,6 +186,9 @@ void ipa_uc_deconfig(struct ipa *ipa)
+ 
+       ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_UC_1);
+       ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_UC_0);
++      if (ipa->uc_loaded)
++              ipa_power_retention(ipa, false);
++
+       if (!ipa->uc_powered)
+               return;
+ 
diff --git a/queue-5.16/net-mlx5-bridge-ensure-dev_name-is-null-terminated.patch b/queue-5.16/net-mlx5-bridge-ensure-dev_name-is-null-terminated.patch

new file mode 100644 (file)

index 0000000..a07f1a7
--- /dev/null
+++ b/queue-5.16/net-mlx5-bridge-ensure-dev_name-is-null-terminated.patch
@@ -0,0 +1,55 @@
+From 350d9a823734b5a7e767cddc3bdde5f0bcbb7ff4 Mon Sep 17 00:00:00 2001
+From: Vlad Buslov <vladbu@nvidia.com>
+Date: Thu, 6 Jan 2022 18:45:26 +0200
+Subject: net/mlx5: Bridge, ensure dev_name is null-terminated
+
+From: Vlad Buslov <vladbu@nvidia.com>
+
+commit 350d9a823734b5a7e767cddc3bdde5f0bcbb7ff4 upstream.
+
+Even though net_device->name is guaranteed to be null-terminated string of
+size<=IFNAMSIZ, the test robot complains that return value of netdev_name()
+can be larger:
+
+In file included from include/trace/define_trace.h:102,
+                    from drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h:113,
+                    from drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c:12:
+   drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h: In function 'trace_event_raw_event_mlx5_esw_bridge_fdb_template':
+>> drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h:24:29: warning: 'strncpy' output may be truncated copying 16 bytes from a string of length 20 [-Wstringop-truncation]
+      24 |                             strncpy(__entry->dev_name,
+         |                             ^~~~~~~~~~~~~~~~~~~~~~~~~~
+      25 |                                     netdev_name(fdb->dev),
+         |                                     ~~~~~~~~~~~~~~~~~~~~~~
+      26 |                                     IFNAMSIZ);
+         |                                     ~~~~~~~~~
+
+This is caused by the fact that default value of IFNAMSIZ is 16, while
+placeholder value that is returned by netdev_name() for unnamed net devices
+is larger than that.
+
+The offending code is in a tracing function that is only called for mlx5
+representors, so there is no straightforward way to reproduce the issue but
+let's fix it for correctness sake by replacing strncpy() with strscpy() to
+ensure that resulting string is always null-terminated.
+
+Fixes: 9724fd5d9c2a ("net/mlx5: Bridge, add tracepoints")
+Reported-by: kernel test robot <lkp@intel.com>
+Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
+Reviewed-by: Roi Dayan <roid@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h
+@@ -21,7 +21,7 @@ DECLARE_EVENT_CLASS(mlx5_esw_bridge_fdb_
+                           __field(unsigned int, used)
+                           ),
+                   TP_fast_assign(
+-                          strncpy(__entry->dev_name,
++                          strscpy(__entry->dev_name,
+                                   netdev_name(fdb->dev),
+                                   IFNAMSIZ);
+                           memcpy(__entry->addr, fdb->key.addr, ETH_ALEN);
diff --git a/queue-5.16/net-mlx5-bridge-fix-devlink-deadlock-on-net-namespace-deletion.patch b/queue-5.16/net-mlx5-bridge-fix-devlink-deadlock-on-net-namespace-deletion.patch

new file mode 100644 (file)

index 0000000..fc78354
--- /dev/null
+++ b/queue-5.16/net-mlx5-bridge-fix-devlink-deadlock-on-net-namespace-deletion.patch
@@ -0,0 +1,82 @@
+From 880b517691908fb753019b9b27cd082e7617debd Mon Sep 17 00:00:00 2001
+From: Roi Dayan <roid@nvidia.com>
+Date: Mon, 24 Jan 2022 13:56:26 +0200
+Subject: net/mlx5: Bridge, Fix devlink deadlock on net namespace deletion
+
+From: Roi Dayan <roid@nvidia.com>
+
+commit 880b517691908fb753019b9b27cd082e7617debd upstream.
+
+When changing mode to switchdev, rep bridge init registered to netdevice
+notifier holds the devlink lock and then takes pernet_ops_rwsem.
+At that time deleting a netns holds pernet_ops_rwsem and then takes
+the devlink lock.
+
+Example sequence is:
+$ ip netns add foo
+$ devlink dev eswitch set pci/0000:00:08.0 mode switchdev &
+$ ip netns del foo
+
+deleting netns trace:
+
+[ 1185.365555]  ? devlink_pernet_pre_exit+0x74/0x1c0
+[ 1185.368331]  ? mutex_lock_io_nested+0x13f0/0x13f0
+[ 1185.370984]  ? xt_find_table+0x40/0x100
+[ 1185.373244]  ? __mutex_lock+0x24a/0x15a0
+[ 1185.375494]  ? net_generic+0xa0/0x1c0
+[ 1185.376844]  ? wait_for_completion_io+0x280/0x280
+[ 1185.377767]  ? devlink_pernet_pre_exit+0x74/0x1c0
+[ 1185.378686]  devlink_pernet_pre_exit+0x74/0x1c0
+[ 1185.379579]  ? devlink_nl_cmd_get_dumpit+0x3a0/0x3a0
+[ 1185.380557]  ? xt_find_table+0xda/0x100
+[ 1185.381367]  cleanup_net+0x372/0x8e0
+
+changing mode to switchdev trace:
+
+[ 1185.411267]  down_write+0x13a/0x150
+[ 1185.412029]  ? down_write_killable+0x180/0x180
+[ 1185.413005]  register_netdevice_notifier+0x1e/0x210
+[ 1185.414000]  mlx5e_rep_bridge_init+0x181/0x360 [mlx5_core]
+[ 1185.415243]  mlx5e_uplink_rep_enable+0x269/0x480 [mlx5_core]
+[ 1185.416464]  ? mlx5e_uplink_rep_disable+0x210/0x210 [mlx5_core]
+[ 1185.417749]  mlx5e_attach_netdev+0x232/0x400 [mlx5_core]
+[ 1185.418906]  mlx5e_netdev_attach_profile+0x15b/0x1e0 [mlx5_core]
+[ 1185.420172]  mlx5e_netdev_change_profile+0x15a/0x1d0 [mlx5_core]
+[ 1185.421459]  mlx5e_vport_rep_load+0x557/0x780 [mlx5_core]
+[ 1185.422624]  ? mlx5e_stats_grp_vport_rep_num_stats+0x10/0x10 [mlx5_core]
+[ 1185.424006]  mlx5_esw_offloads_rep_load+0xdb/0x190 [mlx5_core]
+[ 1185.425277]  esw_offloads_enable+0xd74/0x14a0 [mlx5_core]
+
+Fix this by registering rep bridges for per net netdev notifier
+instead of global one, which operats on the net namespace without holding
+the pernet_ops_rwsem.
+
+Fixes: 19e9bfa044f3 ("net/mlx5: Bridge, add offload infrastructure")
+Signed-off-by: Roi Dayan <roid@nvidia.com>
+Reviewed-by: Vlad Buslov <vladbu@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
+@@ -491,7 +491,7 @@ void mlx5e_rep_bridge_init(struct mlx5e_
+       }
+ 
+       br_offloads->netdev_nb.notifier_call = mlx5_esw_bridge_switchdev_port_event;
+-      err = register_netdevice_notifier(&br_offloads->netdev_nb);
++      err = register_netdevice_notifier_net(&init_net, &br_offloads->netdev_nb);
+       if (err) {
+               esw_warn(mdev, "Failed to register bridge offloads netdevice notifier (err=%d)\n",
+                        err);
+@@ -526,7 +526,7 @@ void mlx5e_rep_bridge_cleanup(struct mlx
+               return;
+ 
+       cancel_delayed_work_sync(&br_offloads->update_work);
+-      unregister_netdevice_notifier(&br_offloads->netdev_nb);
++      unregister_netdevice_notifier_net(&init_net, &br_offloads->netdev_nb);
+       unregister_switchdev_blocking_notifier(&br_offloads->nb_blk);
+       unregister_switchdev_notifier(&br_offloads->nb);
+       destroy_workqueue(br_offloads->wq);
diff --git a/queue-5.16/net-mlx5-bridge-take-rtnl-lock-in-init-error-handler.patch b/queue-5.16/net-mlx5-bridge-take-rtnl-lock-in-init-error-handler.patch

new file mode 100644 (file)

index 0000000..d7cf1d5
--- /dev/null
+++ b/queue-5.16/net-mlx5-bridge-take-rtnl-lock-in-init-error-handler.patch
@@ -0,0 +1,59 @@
+From 04f8c12f031fcd0ffa0c72822eb665ceb2c872e7 Mon Sep 17 00:00:00 2001
+From: Vlad Buslov <vladbu@nvidia.com>
+Date: Thu, 6 Jan 2022 16:40:18 +0200
+Subject: net/mlx5: Bridge, take rtnl lock in init error handler
+
+From: Vlad Buslov <vladbu@nvidia.com>
+
+commit 04f8c12f031fcd0ffa0c72822eb665ceb2c872e7 upstream.
+
+The mlx5_esw_bridge_cleanup() is expected to be called with rtnl lock
+taken, which is true for mlx5e_rep_bridge_cleanup() function but not for
+error handling code in mlx5e_rep_bridge_init(). Add missing rtnl
+lock/unlock calls and extend both mlx5_esw_bridge_cleanup() and its dual
+function mlx5_esw_bridge_init() with ASSERT_RTNL() to verify the invariant
+from now on.
+
+Fixes: 7cd6a54a8285 ("net/mlx5: Bridge, handle FDB events")
+Fixes: 19e9bfa044f3 ("net/mlx5: Bridge, add offload infrastructure")
+Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
+Reviewed-by: Roi Dayan <roid@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c |    2 ++
+ drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c    |    4 ++++
+ 2 files changed, 6 insertions(+)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
+@@ -509,7 +509,9 @@ err_register_swdev_blk:
+ err_register_swdev:
+       destroy_workqueue(br_offloads->wq);
+ err_alloc_wq:
++      rtnl_lock();
+       mlx5_esw_bridge_cleanup(esw);
++      rtnl_unlock();
+ }
+ 
+ void mlx5e_rep_bridge_cleanup(struct mlx5e_priv *priv)
+--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
+@@ -1574,6 +1574,8 @@ struct mlx5_esw_bridge_offloads *mlx5_es
+ {
+       struct mlx5_esw_bridge_offloads *br_offloads;
+ 
++      ASSERT_RTNL();
++
+       br_offloads = kvzalloc(sizeof(*br_offloads), GFP_KERNEL);
+       if (!br_offloads)
+               return ERR_PTR(-ENOMEM);
+@@ -1590,6 +1592,8 @@ void mlx5_esw_bridge_cleanup(struct mlx5
+ {
+       struct mlx5_esw_bridge_offloads *br_offloads = esw->br_offloads;
+ 
++      ASSERT_RTNL();
++
+       if (!br_offloads)
+               return;
+ 
diff --git a/queue-5.16/net-mlx5-e-switch-fix-uninitialized-variable-modact.patch b/queue-5.16/net-mlx5-e-switch-fix-uninitialized-variable-modact.patch

new file mode 100644 (file)

index 0000000..0ee6432
--- /dev/null
+++ b/queue-5.16/net-mlx5-e-switch-fix-uninitialized-variable-modact.patch
@@ -0,0 +1,35 @@
+From d8e5883d694bb053b19c4142a2d1f43a34f6fe2c Mon Sep 17 00:00:00 2001
+From: Maor Dickman <maord@nvidia.com>
+Date: Sun, 30 Jan 2022 16:00:41 +0200
+Subject: net/mlx5: E-Switch, Fix uninitialized variable modact
+
+From: Maor Dickman <maord@nvidia.com>
+
+commit d8e5883d694bb053b19c4142a2d1f43a34f6fe2c upstream.
+
+The variable modact is not initialized before used in command
+modify header allocation which can cause command to fail.
+
+Fix by initializing modact with zeros.
+
+Addresses-Coverity: ("Uninitialized scalar variable")
+Fixes: 8f1e0b97cc70 ("net/mlx5: E-Switch, Mark miss packets with new chain id mapping")
+Signed-off-by: Maor Dickman <maord@nvidia.com>
+Reviewed-by: Roi Dayan <roid@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
+@@ -212,7 +212,7 @@ static int
+ create_chain_restore(struct fs_chain *chain)
+ {
+       struct mlx5_eswitch *esw = chain->chains->dev->priv.eswitch;
+-      char modact[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)];
++      u8 modact[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
+       struct mlx5_fs_chains *chains = chain->chains;
+       enum mlx5e_tc_attr_to_reg chain_to_reg;
+       struct mlx5_modify_hdr *mod_hdr;
diff --git a/queue-5.16/net-mlx5-fix-offloading-with-eswitch_ipv4_ttl_modify_enable.patch b/queue-5.16/net-mlx5-fix-offloading-with-eswitch_ipv4_ttl_modify_enable.patch

new file mode 100644 (file)

index 0000000..df7d556
--- /dev/null
+++ b/queue-5.16/net-mlx5-fix-offloading-with-eswitch_ipv4_ttl_modify_enable.patch
@@ -0,0 +1,42 @@
+From 55b2ca702cfa744a9eb108915996a2294da47e71 Mon Sep 17 00:00:00 2001
+From: Dima Chumak <dchumak@nvidia.com>
+Date: Mon, 17 Jan 2022 15:32:16 +0200
+Subject: net/mlx5: Fix offloading with ESWITCH_IPV4_TTL_MODIFY_ENABLE
+
+From: Dima Chumak <dchumak@nvidia.com>
+
+commit 55b2ca702cfa744a9eb108915996a2294da47e71 upstream.
+
+Only prio 1 is supported for nic mode when there is no ignore flow level
+support in firmware. But for switchdev mode, which supports fixed number
+of statically pre-allocated prios, this restriction is not relevant so
+it can be relaxed.
+
+Fixes: d671e109bd85 ("net/mlx5: Fix tc max supported prio for nic mode")
+Signed-off-by: Dima Chumak <dchumak@nvidia.com>
+Reviewed-by: Roi Dayan <roid@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c |    7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
+@@ -121,12 +121,13 @@ u32 mlx5_chains_get_nf_ft_chain(struct m
+ 
+ u32 mlx5_chains_get_prio_range(struct mlx5_fs_chains *chains)
+ {
+-      if (!mlx5_chains_prios_supported(chains))
+-              return 1;
+-
+       if (mlx5_chains_ignore_flow_level_supported(chains))
+               return UINT_MAX;
+ 
++      if (!chains->dev->priv.eswitch ||
++          chains->dev->priv.eswitch->mode != MLX5_ESWITCH_OFFLOADS)
++              return 1;
++
+       /* We should get here only for eswitch case */
+       return FDB_TC_MAX_PRIO;
+ }
diff --git a/queue-5.16/net-mlx5-use-del_timer_sync-in-fw-reset-flow-of-halting-poll.patch b/queue-5.16/net-mlx5-use-del_timer_sync-in-fw-reset-flow-of-halting-poll.patch

new file mode 100644 (file)

index 0000000..0444415
--- /dev/null
+++ b/queue-5.16/net-mlx5-use-del_timer_sync-in-fw-reset-flow-of-halting-poll.patch
@@ -0,0 +1,45 @@
+From 3c5193a87b0fea090aa3f769d020337662d87b5e Mon Sep 17 00:00:00 2001
+From: Maher Sanalla <msanalla@nvidia.com>
+Date: Thu, 13 Jan 2022 15:48:48 +0200
+Subject: net/mlx5: Use del_timer_sync in fw reset flow of halting poll
+
+From: Maher Sanalla <msanalla@nvidia.com>
+
+commit 3c5193a87b0fea090aa3f769d020337662d87b5e upstream.
+
+Substitute del_timer() with del_timer_sync() in fw reset polling
+deactivation flow, in order to prevent a race condition which occurs
+when del_timer() is called and timer is deactivated while another
+process is handling the timer interrupt. A situation that led to
+the following call trace:
+       RIP: 0010:run_timer_softirq+0x137/0x420
+       <IRQ>
+       recalibrate_cpu_khz+0x10/0x10
+       ktime_get+0x3e/0xa0
+       ? sched_clock_cpu+0xb/0xc0
+       __do_softirq+0xf5/0x2ea
+       irq_exit_rcu+0xc1/0xf0
+       sysvec_apic_timer_interrupt+0x9e/0xc0
+       asm_sysvec_apic_timer_interrupt+0x12/0x20
+       </IRQ>
+
+Fixes: 38b9f903f22b ("net/mlx5: Handle sync reset request event")
+Signed-off-by: Maher Sanalla <msanalla@nvidia.com>
+Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
+@@ -132,7 +132,7 @@ static void mlx5_stop_sync_reset_poll(st
+ {
+       struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+ 
+-      del_timer(&fw_reset->timer);
++      del_timer_sync(&fw_reset->timer);
+ }
+ 
+ static void mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health)
diff --git a/queue-5.16/net-mlx5e-avoid-field-overflowing-memcpy.patch b/queue-5.16/net-mlx5e-avoid-field-overflowing-memcpy.patch

new file mode 100644 (file)

index 0000000..16984bc
--- /dev/null
+++ b/queue-5.16/net-mlx5e-avoid-field-overflowing-memcpy.patch
@@ -0,0 +1,133 @@
+From ad5185735f7dab342fdd0dd41044da4c9ccfef67 Mon Sep 17 00:00:00 2001
+From: Kees Cook <keescook@chromium.org>
+Date: Mon, 24 Jan 2022 09:20:28 -0800
+Subject: net/mlx5e: Avoid field-overflowing memcpy()
+
+From: Kees Cook <keescook@chromium.org>
+
+commit ad5185735f7dab342fdd0dd41044da4c9ccfef67 upstream.
+
+In preparation for FORTIFY_SOURCE performing compile-time and run-time
+field bounds checking for memcpy(), memmove(), and memset(), avoid
+intentionally writing across neighboring fields.
+
+Use flexible arrays instead of zero-element arrays (which look like they
+are always overflowing) and split the cross-field memcpy() into two halves
+that can be appropriately bounds-checked by the compiler.
+
+We were doing:
+
+       #define ETH_HLEN  14
+       #define VLAN_HLEN  4
+       ...
+       #define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
+       ...
+        struct mlx5e_tx_wqe      *wqe  = mlx5_wq_cyc_get_wqe(wq, pi);
+       ...
+        struct mlx5_wqe_eth_seg  *eseg = &wqe->eth;
+        struct mlx5_wqe_data_seg *dseg = wqe->data;
+       ...
+       memcpy(eseg->inline_hdr.start, xdptxd->data, MLX5E_XDP_MIN_INLINE);
+
+target is wqe->eth.inline_hdr.start (which the compiler sees as being
+2 bytes in size), but copying 18, intending to write across start
+(really vlan_tci, 2 bytes). The remaining 16 bytes get written into
+wqe->data[0], covering byte_count (4 bytes), lkey (4 bytes), and addr
+(8 bytes).
+
+struct mlx5e_tx_wqe {
+        struct mlx5_wqe_ctrl_seg   ctrl;                 /*     0    16 */
+        struct mlx5_wqe_eth_seg    eth;                  /*    16    16 */
+        struct mlx5_wqe_data_seg   data[];               /*    32     0 */
+
+        /* size: 32, cachelines: 1, members: 3 */
+        /* last cacheline: 32 bytes */
+};
+
+struct mlx5_wqe_eth_seg {
+        u8                         swp_outer_l4_offset;  /*     0     1 */
+        u8                         swp_outer_l3_offset;  /*     1     1 */
+        u8                         swp_inner_l4_offset;  /*     2     1 */
+        u8                         swp_inner_l3_offset;  /*     3     1 */
+        u8                         cs_flags;             /*     4     1 */
+        u8                         swp_flags;            /*     5     1 */
+        __be16                     mss;                  /*     6     2 */
+        __be32                     flow_table_metadata;  /*     8     4 */
+        union {
+                struct {
+                        __be16     sz;                   /*    12     2 */
+                        u8         start[2];             /*    14     2 */
+                } inline_hdr;                            /*    12     4 */
+                struct {
+                        __be16     type;                 /*    12     2 */
+                        __be16     vlan_tci;             /*    14     2 */
+                } insert;                                /*    12     4 */
+                __be32             trailer;              /*    12     4 */
+        };                                               /*    12     4 */
+
+        /* size: 16, cachelines: 1, members: 9 */
+        /* last cacheline: 16 bytes */
+};
+
+struct mlx5_wqe_data_seg {
+        __be32                     byte_count;           /*     0     4 */
+        __be32                     lkey;                 /*     4     4 */
+        __be64                     addr;                 /*     8     8 */
+
+        /* size: 16, cachelines: 1, members: 3 */
+        /* last cacheline: 16 bytes */
+};
+
+So, split the memcpy() so the compiler can reason about the buffer
+sizes.
+
+"pahole" shows no size nor member offset changes to struct mlx5e_tx_wqe
+nor struct mlx5e_umr_wqe. "objdump -d" shows no meaningful object
+code changes (i.e. only source line number induced differences and
+optimizations).
+
+Fixes: b5503b994ed5 ("net/mlx5e: XDP TX forwarding support")
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en.h     |    6 +++---
+ drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c |    4 +++-
+ 2 files changed, 6 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
+@@ -225,7 +225,7 @@ static inline int mlx5e_get_max_num_chan
+ struct mlx5e_tx_wqe {
+       struct mlx5_wqe_ctrl_seg ctrl;
+       struct mlx5_wqe_eth_seg  eth;
+-      struct mlx5_wqe_data_seg data[0];
++      struct mlx5_wqe_data_seg data[];
+ };
+ 
+ struct mlx5e_rx_wqe_ll {
+@@ -242,8 +242,8 @@ struct mlx5e_umr_wqe {
+       struct mlx5_wqe_umr_ctrl_seg   uctrl;
+       struct mlx5_mkey_seg           mkc;
+       union {
+-              struct mlx5_mtt inline_mtts[0];
+-              struct mlx5_klm inline_klms[0];
++              DECLARE_FLEX_ARRAY(struct mlx5_mtt, inline_mtts);
++              DECLARE_FLEX_ARRAY(struct mlx5_klm, inline_klms);
+       };
+ };
+ 
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+@@ -341,8 +341,10 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq
+ 
+       /* copy the inline part if required */
+       if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
+-              memcpy(eseg->inline_hdr.start, xdptxd->data, MLX5E_XDP_MIN_INLINE);
++              memcpy(eseg->inline_hdr.start, xdptxd->data, sizeof(eseg->inline_hdr.start));
+               eseg->inline_hdr.sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE);
++              memcpy(dseg, xdptxd->data + sizeof(eseg->inline_hdr.start),
++                     MLX5E_XDP_MIN_INLINE - sizeof(eseg->inline_hdr.start));
+               dma_len  -= MLX5E_XDP_MIN_INLINE;
+               dma_addr += MLX5E_XDP_MIN_INLINE;
+               dseg++;
diff --git a/queue-5.16/net-mlx5e-avoid-implicit-modify-hdr-for-decap-drop-rule.patch b/queue-5.16/net-mlx5e-avoid-implicit-modify-hdr-for-decap-drop-rule.patch

new file mode 100644 (file)

index 0000000..59ada85
--- /dev/null
+++ b/queue-5.16/net-mlx5e-avoid-implicit-modify-hdr-for-decap-drop-rule.patch
@@ -0,0 +1,41 @@
+From 5b209d1a22afabfb7d644abb10510c5713a3e569 Mon Sep 17 00:00:00 2001
+From: Roi Dayan <roid@nvidia.com>
+Date: Tue, 1 Feb 2022 15:27:48 +0200
+Subject: net/mlx5e: Avoid implicit modify hdr for decap drop rule
+
+From: Roi Dayan <roid@nvidia.com>
+
+commit 5b209d1a22afabfb7d644abb10510c5713a3e569 upstream.
+
+Currently the driver adds implicit modify hdr action for
+decap rules on tunnel devices if the port is an ovs port.
+This is also done if the action is drop and makes the modify
+hdr redundant and also the FW doesn't support it and will generate
+a syndrome.
+
+kernel: mlx5_core 0000:08:00.0: mlx5_cmd_check:777:(pid 102063): SET_FLOW_TABLE_ENTRY(0x936) op_mod(0x0) failed, status bad parameter(0x3), syndrome (0x8708c3)
+
+Fix it by adding the implicit modify hdr only for fwd actions.
+
+Fixes: b16eb3c81fe2 ("net/mlx5: Support internal port as decap route device")
+Fixes: 077cdda764c7 ("net/mlx5e: TC, Fix memory leak with rules with internal port")
+Signed-off-by: Roi Dayan <roid@nvidia.com>
+Reviewed-by: Ariel Levkovich <lariel@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_tc.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+@@ -1425,7 +1425,8 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv
+               if (err)
+                       goto err_out;
+ 
+-              if (!attr->chain && esw_attr->int_port) {
++              if (!attr->chain && esw_attr->int_port &&
++                  attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+                       /* If decap route device is internal port, change the
+                        * source vport value in reg_c0 back to uplink just in
+                        * case the rule performs goto chain > 0. If we have a miss
diff --git a/queue-5.16/net-mlx5e-don-t-treat-small-ceil-values-as-unlimited-in-htb-offload.patch b/queue-5.16/net-mlx5e-don-t-treat-small-ceil-values-as-unlimited-in-htb-offload.patch

new file mode 100644 (file)

index 0000000..d830533
--- /dev/null
+++ b/queue-5.16/net-mlx5e-don-t-treat-small-ceil-values-as-unlimited-in-htb-offload.patch
@@ -0,0 +1,37 @@
+From 736dfe4e68b868829a1e89dfef4a44c1580d4478 Mon Sep 17 00:00:00 2001
+From: Maxim Mikityanskiy <maximmi@nvidia.com>
+Date: Tue, 18 Jan 2022 13:31:54 +0200
+Subject: net/mlx5e: Don't treat small ceil values as unlimited in HTB offload
+
+From: Maxim Mikityanskiy <maximmi@nvidia.com>
+
+commit 736dfe4e68b868829a1e89dfef4a44c1580d4478 upstream.
+
+The hardware spec defines max_average_bw == 0 as "unlimited bandwidth".
+max_average_bw is calculated as `ceil / BYTES_IN_MBIT`, which can become
+0 when ceil is small, leading to an undesired effect of having no
+bandwidth limit.
+
+This commit fixes it by rounding up small values of ceil to 1 Mbit/s.
+
+Fixes: 214baf22870c ("net/mlx5e: Support HTB offload")
+Signed-off-by: Maxim Mikityanskiy <maximmi@nvidia.com>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en/qos.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
+@@ -569,7 +569,8 @@ static int mlx5e_htb_convert_rate(struct
+ 
+ static void mlx5e_htb_convert_ceil(struct mlx5e_priv *priv, u64 ceil, u32 *max_average_bw)
+ {
+-      *max_average_bw = div_u64(ceil, BYTES_IN_MBIT);
++      /* Hardware treats 0 as "unlimited", set at least 1. */
++      *max_average_bw = max_t(u32, div_u64(ceil, BYTES_IN_MBIT), 1);
+ 
+       qos_dbg(priv->mdev, "Convert: ceil %llu -> max_average_bw %u\n",
+               ceil, *max_average_bw);
diff --git a/queue-5.16/net-mlx5e-fix-broken-skb-allocation-in-hw-gro.patch b/queue-5.16/net-mlx5e-fix-broken-skb-allocation-in-hw-gro.patch

new file mode 100644 (file)

index 0000000..96062bf
--- /dev/null
+++ b/queue-5.16/net-mlx5e-fix-broken-skb-allocation-in-hw-gro.patch
@@ -0,0 +1,103 @@
+From 7957837b816f11eecb9146235bb0715478f4c81f Mon Sep 17 00:00:00 2001
+From: Khalid Manaa <khalidm@nvidia.com>
+Date: Wed, 26 Jan 2022 14:25:55 +0200
+Subject: net/mlx5e: Fix broken SKB allocation in HW-GRO
+
+From: Khalid Manaa <khalidm@nvidia.com>
+
+commit 7957837b816f11eecb9146235bb0715478f4c81f upstream.
+
+In case the HW doesn't perform header-data split, it will write the whole
+packet into the data buffer in the WQ, in this case the SHAMPO CQE handler
+couldn't use the header entry to build the SKB, instead it should allocate
+a new memory to build the SKB using the function:
+mlx5e_skb_from_cqe_mpwrq_nonlinear.
+
+Fixes: f97d5c2a453e ("net/mlx5e: Add handle SHAMPO cqe support")
+Signed-off-by: Khalid Manaa <khalidm@nvidia.com>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_rx.c |   26 +++++++++++++++---------
+ 1 file changed, 17 insertions(+), 9 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+@@ -1866,7 +1866,7 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct m
+       return skb;
+ }
+ 
+-static void
++static struct sk_buff *
+ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
+                         struct mlx5_cqe64 *cqe, u16 header_index)
+ {
+@@ -1890,7 +1890,7 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_r
+               skb = mlx5e_build_linear_skb(rq, hdr, frag_size, rx_headroom, head_size);
+ 
+               if (unlikely(!skb))
+-                      return;
++                      return NULL;
+ 
+               /* queue up for recycling/reuse */
+               page_ref_inc(head->page);
+@@ -1902,7 +1902,7 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_r
+                                    ALIGN(head_size, sizeof(long)));
+               if (unlikely(!skb)) {
+                       rq->stats->buff_alloc_err++;
+-                      return;
++                      return NULL;
+               }
+ 
+               prefetchw(skb->data);
+@@ -1913,9 +1913,7 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_r
+               skb->tail += head_size;
+               skb->len  += head_size;
+       }
+-      rq->hw_gro_data->skb = skb;
+-      NAPI_GRO_CB(skb)->count = 1;
+-      skb_shinfo(skb)->gso_size = mpwrq_get_cqe_byte_cnt(cqe) - head_size;
++      return skb;
+ }
+ 
+ static void
+@@ -1975,6 +1973,7 @@ static void mlx5e_handle_rx_cqe_mpwrq_sh
+       u32 cqe_bcnt            = mpwrq_get_cqe_byte_cnt(cqe);
+       u16 wqe_id              = be16_to_cpu(cqe->wqe_id);
+       u32 page_idx            = wqe_offset >> PAGE_SHIFT;
++      u16 head_size           = cqe->shampo.header_size;
+       struct sk_buff **skb    = &rq->hw_gro_data->skb;
+       bool flush              = cqe->shampo.flush;
+       bool match              = cqe->shampo.match;
+@@ -2007,9 +2006,16 @@ static void mlx5e_handle_rx_cqe_mpwrq_sh
+       }
+ 
+       if (!*skb) {
+-              mlx5e_skb_from_cqe_shampo(rq, wi, cqe, header_index);
++              if (likely(head_size))
++                      *skb = mlx5e_skb_from_cqe_shampo(rq, wi, cqe, header_index);
++              else
++                      *skb = mlx5e_skb_from_cqe_mpwrq_nonlinear(rq, wi, cqe_bcnt, data_offset,
++                                                                page_idx);
+               if (unlikely(!*skb))
+                       goto free_hd_entry;
++
++              NAPI_GRO_CB(*skb)->count = 1;
++              skb_shinfo(*skb)->gso_size = cqe_bcnt - head_size;
+       } else {
+               NAPI_GRO_CB(*skb)->count++;
+               if (NAPI_GRO_CB(*skb)->count == 2 &&
+@@ -2023,8 +2029,10 @@ static void mlx5e_handle_rx_cqe_mpwrq_sh
+               }
+       }
+ 
+-      di = &wi->umr.dma_info[page_idx];
+-      mlx5e_fill_skb_data(*skb, rq, di, data_bcnt, data_offset);
++      if (likely(head_size)) {
++              di = &wi->umr.dma_info[page_idx];
++              mlx5e_fill_skb_data(*skb, rq, di, data_bcnt, data_offset);
++      }
+ 
+       mlx5e_shampo_complete_rx_cqe(rq, cqe, cqe_bcnt, *skb);
+       if (flush)
diff --git a/queue-5.16/net-mlx5e-fix-handling-of-wrong-devices-during-bond-netevent.patch b/queue-5.16/net-mlx5e-fix-handling-of-wrong-devices-during-bond-netevent.patch

new file mode 100644 (file)

index 0000000..4a9d15d
--- /dev/null
+++ b/queue-5.16/net-mlx5e-fix-handling-of-wrong-devices-during-bond-netevent.patch
@@ -0,0 +1,120 @@
+From ec41332e02bd0acf1f24206867bb6a02f5877a62 Mon Sep 17 00:00:00 2001
+From: Maor Dickman <maord@nvidia.com>
+Date: Thu, 13 Jan 2022 15:11:42 +0200
+Subject: net/mlx5e: Fix handling of wrong devices during bond netevent
+
+From: Maor Dickman <maord@nvidia.com>
+
+commit ec41332e02bd0acf1f24206867bb6a02f5877a62 upstream.
+
+Current implementation of bond netevent handler only check if
+the handled netdev is VF representor and it missing a check if
+the VF representor is on the same phys device of the bond handling
+the netevent.
+
+Fix by adding the missing check and optimizing the check if
+the netdev is VF representor so it will not access uninitialized
+private data and crashes.
+
+BUG: kernel NULL pointer dereference, address: 000000000000036c
+PGD 0 P4D 0
+Oops: 0000 [#1] SMP NOPTI
+Workqueue: eth3bond0 bond_mii_monitor [bonding]
+RIP: 0010:mlx5e_is_uplink_rep+0xc/0x50 [mlx5_core]
+RSP: 0018:ffff88812d69fd60 EFLAGS: 00010282
+RAX: 0000000000000000 RBX: ffff8881cf800000 RCX: 0000000000000000
+RDX: ffff88812d69fe10 RSI: 000000000000001b RDI: ffff8881cf800880
+RBP: ffff8881cf800000 R08: 00000445cabccf2b R09: 0000000000000008
+R10: 0000000000000004 R11: 0000000000000008 R12: ffff88812d69fe10
+R13: 00000000fffffffe R14: ffff88820c0f9000 R15: 0000000000000000
+FS:  0000000000000000(0000) GS:ffff88846fb00000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 000000000000036c CR3: 0000000103d80006 CR4: 0000000000370ea0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+ mlx5e_eswitch_uplink_rep+0x31/0x40 [mlx5_core]
+ mlx5e_rep_is_lag_netdev+0x94/0xc0 [mlx5_core]
+ mlx5e_rep_esw_bond_netevent+0xeb/0x3d0 [mlx5_core]
+ raw_notifier_call_chain+0x41/0x60
+ call_netdevice_notifiers_info+0x34/0x80
+ netdev_lower_state_changed+0x4e/0xa0
+ bond_mii_monitor+0x56b/0x640 [bonding]
+ process_one_work+0x1b9/0x390
+ worker_thread+0x4d/0x3d0
+ ? rescuer_thread+0x350/0x350
+ kthread+0x124/0x150
+ ? set_kthread_struct+0x40/0x40
+ ret_from_fork+0x1f/0x30
+
+Fixes: 7e51891a237f ("net/mlx5e: Use netdev events to set/del egress acl forward-to-vport rule")
+Signed-off-by: Maor Dickman <maord@nvidia.com>
+Reviewed-by: Roi Dayan <roid@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c |   32 +++++++-----------
+ 1 file changed, 14 insertions(+), 18 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
+@@ -183,18 +183,7 @@ void mlx5e_rep_bond_unslave(struct mlx5_
+ 
+ static bool mlx5e_rep_is_lag_netdev(struct net_device *netdev)
+ {
+-      struct mlx5e_rep_priv *rpriv;
+-      struct mlx5e_priv *priv;
+-
+-      /* A given netdev is not a representor or not a slave of LAG configuration */
+-      if (!mlx5e_eswitch_rep(netdev) || !netif_is_lag_port(netdev))
+-              return false;
+-
+-      priv = netdev_priv(netdev);
+-      rpriv = priv->ppriv;
+-
+-      /* Egress acl forward to vport is supported only non-uplink representor */
+-      return rpriv->rep->vport != MLX5_VPORT_UPLINK;
++      return netif_is_lag_port(netdev) && mlx5e_eswitch_vf_rep(netdev);
+ }
+ 
+ static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *ptr)
+@@ -210,9 +199,6 @@ static void mlx5e_rep_changelowerstate_e
+       u16 fwd_vport_num;
+       int err;
+ 
+-      if (!mlx5e_rep_is_lag_netdev(netdev))
+-              return;
+-
+       info = ptr;
+       lag_info = info->lower_state_info;
+       /* This is not an event of a representor becoming active slave */
+@@ -266,9 +252,6 @@ static void mlx5e_rep_changeupper_event(
+       struct net_device *lag_dev;
+       struct mlx5e_priv *priv;
+ 
+-      if (!mlx5e_rep_is_lag_netdev(netdev))
+-              return;
+-
+       priv = netdev_priv(netdev);
+       rpriv = priv->ppriv;
+       lag_dev = info->upper_dev;
+@@ -293,6 +276,19 @@ static int mlx5e_rep_esw_bond_netevent(s
+                                      unsigned long event, void *ptr)
+ {
+       struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
++      struct mlx5e_rep_priv *rpriv;
++      struct mlx5e_rep_bond *bond;
++      struct mlx5e_priv *priv;
++
++      if (!mlx5e_rep_is_lag_netdev(netdev))
++              return NOTIFY_DONE;
++
++      bond = container_of(nb, struct mlx5e_rep_bond, nb);
++      priv = netdev_priv(netdev);
++      rpriv = mlx5_eswitch_get_uplink_priv(priv->mdev->priv.eswitch, REP_ETH);
++      /* Verify VF representor is on the same device of the bond handling the netevent. */
++      if (rpriv->uplink_priv.bond != bond)
++              return NOTIFY_DONE;
+ 
+       switch (event) {
+       case NETDEV_CHANGELOWERSTATE:
diff --git a/queue-5.16/net-mlx5e-fix-module-eeprom-query.patch b/queue-5.16/net-mlx5e-fix-module-eeprom-query.patch

new file mode 100644 (file)

index 0000000..d8104df
--- /dev/null
+++ b/queue-5.16/net-mlx5e-fix-module-eeprom-query.patch
@@ -0,0 +1,58 @@
+From 4a08a131351e375a2969b98e46df260ed04dcba7 Mon Sep 17 00:00:00 2001
+From: Gal Pressman <gal@nvidia.com>
+Date: Sun, 16 Jan 2022 09:07:22 +0200
+Subject: net/mlx5e: Fix module EEPROM query
+
+From: Gal Pressman <gal@nvidia.com>
+
+commit 4a08a131351e375a2969b98e46df260ed04dcba7 upstream.
+
+When querying the module EEPROM, there was a misusage of the 'offset'
+variable vs the 'query.offset' field.
+Fix that by always using 'offset' and assigning its value to
+'query.offset' right before the mcia register read call.
+
+While at it, the cross-pages read size adjustment was changed to be more
+intuitive.
+
+Fixes: e19b0a3474ab ("net/mlx5: Refactor module EEPROM query")
+Reported-by: Wang Yugui <wangyugui@e16-tech.com>
+Signed-off-by: Gal Pressman <gal@nvidia.com>
+Reviewed-by: Maxim Mikityanskiy <maximmi@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/port.c |    9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
+@@ -406,23 +406,24 @@ int mlx5_query_module_eeprom(struct mlx5
+ 
+       switch (module_id) {
+       case MLX5_MODULE_ID_SFP:
+-              mlx5_sfp_eeprom_params_set(&query.i2c_address, &query.page, &query.offset);
++              mlx5_sfp_eeprom_params_set(&query.i2c_address, &query.page, &offset);
+               break;
+       case MLX5_MODULE_ID_QSFP:
+       case MLX5_MODULE_ID_QSFP_PLUS:
+       case MLX5_MODULE_ID_QSFP28:
+-              mlx5_qsfp_eeprom_params_set(&query.i2c_address, &query.page, &query.offset);
++              mlx5_qsfp_eeprom_params_set(&query.i2c_address, &query.page, &offset);
+               break;
+       default:
+               mlx5_core_err(dev, "Module ID not recognized: 0x%x\n", module_id);
+               return -EINVAL;
+       }
+ 
+-      if (query.offset + size > MLX5_EEPROM_PAGE_LENGTH)
++      if (offset + size > MLX5_EEPROM_PAGE_LENGTH)
+               /* Cross pages read, read until offset 256 in low page */
+-              size -= offset + size - MLX5_EEPROM_PAGE_LENGTH;
++              size = MLX5_EEPROM_PAGE_LENGTH - offset;
+ 
+       query.size = size;
++      query.offset = offset;
+ 
+       return mlx5_query_mcia(dev, &query, data);
+ }
diff --git a/queue-5.16/net-mlx5e-fix-wrong-calculation-of-header-index-in-hw_gro.patch b/queue-5.16/net-mlx5e-fix-wrong-calculation-of-header-index-in-hw_gro.patch

new file mode 100644 (file)

index 0000000..6802bbb
--- /dev/null
+++ b/queue-5.16/net-mlx5e-fix-wrong-calculation-of-header-index-in-hw_gro.patch
@@ -0,0 +1,60 @@
+From b8d91145ed7cfa046cc07bcfb277465b9d45da73 Mon Sep 17 00:00:00 2001
+From: Khalid Manaa <khalidm@nvidia.com>
+Date: Wed, 26 Jan 2022 14:14:58 +0200
+Subject: net/mlx5e: Fix wrong calculation of header index in HW_GRO
+
+From: Khalid Manaa <khalidm@nvidia.com>
+
+commit b8d91145ed7cfa046cc07bcfb277465b9d45da73 upstream.
+
+The HW doesn't wrap the CQE.shampo.header_index field according to the
+headers buffer size, instead it always increases it until reaching overflow
+of u16 size.
+
+Thus the mlx5e_handle_rx_cqe_mpwrq_shampo handler should mask the
+CQE header_index field to find the actual header index in the headers buffer.
+
+Fixes: f97d5c2a453e ("net/mlx5e: Add handle SHAMPO cqe support")
+Signed-off-by: Khalid Manaa <khalidm@nvidia.com>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h |    5 +++++
+ drivers/net/ethernet/mellanox/mlx5/core/en_rx.c   |    4 ++--
+ 2 files changed, 7 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+@@ -167,6 +167,11 @@ static inline u16 mlx5e_txqsq_get_next_p
+       return pi;
+ }
+ 
++static inline u16 mlx5e_shampo_get_cqe_header_index(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
++{
++      return be16_to_cpu(cqe->shampo.header_entry_index) & (rq->mpwqe.shampo->hd_per_wq - 1);
++}
++
+ struct mlx5e_shampo_umr {
+       u16 len;
+ };
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+@@ -1116,7 +1116,7 @@ static void mlx5e_shampo_update_ipv6_udp
+ static void mlx5e_shampo_update_fin_psh_flags(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
+                                             struct tcphdr *skb_tcp_hd)
+ {
+-      u16 header_index = be16_to_cpu(cqe->shampo.header_entry_index);
++      u16 header_index = mlx5e_shampo_get_cqe_header_index(rq, cqe);
+       struct tcphdr *last_tcp_hd;
+       void *last_hd_addr;
+ 
+@@ -1968,7 +1968,7 @@ mlx5e_free_rx_shampo_hd_entry(struct mlx
+ static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+ {
+       u16 data_bcnt           = mpwrq_get_cqe_byte_cnt(cqe) - cqe->shampo.header_size;
+-      u16 header_index        = be16_to_cpu(cqe->shampo.header_entry_index);
++      u16 header_index        = mlx5e_shampo_get_cqe_header_index(rq, cqe);
+       u32 wqe_offset          = be32_to_cpu(cqe->shampo.data_offset);
+       u16 cstrides            = mpwrq_get_cqe_consumed_strides(cqe);
+       u32 data_offset         = wqe_offset & (PAGE_SIZE - 1);
diff --git a/queue-5.16/net-mlx5e-ipsec-fix-crypto-offload-for-non-tcp-udp-encapsulated-traffic.patch b/queue-5.16/net-mlx5e-ipsec-fix-crypto-offload-for-non-tcp-udp-encapsulated-traffic.patch

new file mode 100644 (file)

index 0000000..f225ae4
--- /dev/null
+++ b/queue-5.16/net-mlx5e-ipsec-fix-crypto-offload-for-non-tcp-udp-encapsulated-traffic.patch
@@ -0,0 +1,51 @@
+From 5352859b3bfa0ca188b2f1d2c1436fddc781e3b6 Mon Sep 17 00:00:00 2001
+From: Raed Salem <raeds@nvidia.com>
+Date: Thu, 2 Dec 2021 17:43:50 +0200
+Subject: net/mlx5e: IPsec: Fix crypto offload for non TCP/UDP encapsulated traffic
+
+From: Raed Salem <raeds@nvidia.com>
+
+commit 5352859b3bfa0ca188b2f1d2c1436fddc781e3b6 upstream.
+
+IPsec crypto offload always set the ethernet segment checksum flags with
+the inner L4 header checksum flag enabled for encapsulated IPsec offloaded
+packet regardless of the encapsulated L4 header type, and even if it
+doesn't exists in the first place, this breaks non TCP/UDP traffic as
+such.
+
+Set the inner L4 checksum flag only when the encapsulated L4 header
+protocol is TCP/UDP using software parser swp_inner_l4_offset field as
+indication.
+
+Fixes: 5cfb540ef27b ("net/mlx5e: Set IPsec WAs only in IP's non checksum partial case.")
+Signed-off-by: Raed Salem <raeds@nvidia.com>
+Reviewed-by: Maor Dickman <maord@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h |    9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
+@@ -131,14 +131,17 @@ static inline bool
+ mlx5e_ipsec_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+                                 struct mlx5_wqe_eth_seg *eseg)
+ {
+-      struct xfrm_offload *xo = xfrm_offload(skb);
++      u8 inner_ipproto;
+ 
+       if (!mlx5e_ipsec_eseg_meta(eseg))
+               return false;
+ 
+       eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM;
+-      if (xo->inner_ipproto) {
+-              eseg->cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM | MLX5_ETH_WQE_L3_INNER_CSUM;
++      inner_ipproto = xfrm_offload(skb)->inner_ipproto;
++      if (inner_ipproto) {
++              eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM;
++              if (inner_ipproto == IPPROTO_TCP || inner_ipproto == IPPROTO_UDP)
++                      eseg->cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM;
+       } else if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
+               eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM;
+               sq->stats->csum_partial_inner++;
diff --git a/queue-5.16/net-mlx5e-ipsec-fix-tunnel-mode-crypto-offload-for-non-tcp-udp-traffic.patch b/queue-5.16/net-mlx5e-ipsec-fix-tunnel-mode-crypto-offload-for-non-tcp-udp-traffic.patch

new file mode 100644 (file)

index 0000000..a519717
--- /dev/null
+++ b/queue-5.16/net-mlx5e-ipsec-fix-tunnel-mode-crypto-offload-for-non-tcp-udp-traffic.patch
@@ -0,0 +1,54 @@
+From de47db0cf7f4a9c555ad204e06baa70b50a70d08 Mon Sep 17 00:00:00 2001
+From: Raed Salem <raeds@nvidia.com>
+Date: Thu, 2 Dec 2021 17:49:01 +0200
+Subject: net/mlx5e: IPsec: Fix tunnel mode crypto offload for non TCP/UDP traffic
+
+From: Raed Salem <raeds@nvidia.com>
+
+commit de47db0cf7f4a9c555ad204e06baa70b50a70d08 upstream.
+
+IPsec Tunnel mode crypto offload software parser (SWP) setting in data
+path currently always set the inner L4 offset regardless of the
+encapsulated L4 header type and whether it exists in the first place,
+this breaks non TCP/UDP traffic as such.
+
+Set the SWP inner L4 offset only when the IPsec tunnel encapsulated L4
+header protocol is TCP/UDP.
+
+While at it fix inner ip protocol read for setting MLX5_ETH_WQE_SWP_INNER_L4_UDP
+flag to address the case where the ip header protocol is IPv6.
+
+Fixes: f1267798c980 ("net/mlx5: Fix checksum issue of VXLAN and IPsec crypto offload")
+Signed-off-by: Raed Salem <raeds@nvidia.com>
+Reviewed-by: Maor Dickman <maord@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c |   13 ++++++++--
+ 1 file changed, 11 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
+@@ -157,11 +157,20 @@ static void mlx5e_ipsec_set_swp(struct s
+       /* Tunnel mode */
+       if (mode == XFRM_MODE_TUNNEL) {
+               eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2;
+-              eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2;
+               if (xo->proto == IPPROTO_IPV6)
+                       eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
+-              if (inner_ip_hdr(skb)->protocol == IPPROTO_UDP)
++
++              switch (xo->inner_ipproto) {
++              case IPPROTO_UDP:
+                       eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP;
++                      fallthrough;
++              case IPPROTO_TCP:
++                      /* IP | ESP | IP | [TCP | UDP] */
++                      eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2;
++                      break;
++              default:
++                      break;
++              }
+               return;
+       }
+ 
diff --git a/queue-5.16/net-mlx5e-tc-reject-rules-with-drop-and-modify-hdr-action.patch b/queue-5.16/net-mlx5e-tc-reject-rules-with-drop-and-modify-hdr-action.patch

new file mode 100644 (file)

index 0000000..c63c47e
--- /dev/null
+++ b/queue-5.16/net-mlx5e-tc-reject-rules-with-drop-and-modify-hdr-action.patch
@@ -0,0 +1,39 @@
+From a2446bc77a16cefd27de712d28af2396d6287593 Mon Sep 17 00:00:00 2001
+From: Roi Dayan <roid@nvidia.com>
+Date: Tue, 4 Jan 2022 10:38:02 +0200
+Subject: net/mlx5e: TC, Reject rules with drop and modify hdr action
+
+From: Roi Dayan <roid@nvidia.com>
+
+commit a2446bc77a16cefd27de712d28af2396d6287593 upstream.
+
+This kind of action is not supported by firmware and generates a
+syndrome.
+
+kernel: mlx5_core 0000:08:00.0: mlx5_cmd_check:777:(pid 102063): SET_FLOW_TABLE_ENTRY(0x936) op_mod(0x0) failed, status bad parameter(0x3), syndrome (0x8708c3)
+
+Fixes: d7e75a325cb2 ("net/mlx5e: Add offloading of E-Switch TC pedit (header re-write) actions")
+Signed-off-by: Roi Dayan <roid@nvidia.com>
+Reviewed-by: Oz Shlomo <ozsh@nvidia.com>
+Reviewed-by: Maor Dickman <maord@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_tc.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+@@ -3421,6 +3421,12 @@ actions_match_supported(struct mlx5e_pri
+       }
+ 
+       if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
++          actions & MLX5_FLOW_CONTEXT_ACTION_DROP) {
++              NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported");
++              return false;
++      }
++
++      if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
+           !modify_header_match_supported(priv, &parse_attr->spec, flow_action,
+                                          actions, ct_flow, ct_clear, extack))
+               return false;
diff --git a/queue-5.16/net-mlx5e-tc-reject-rules-with-forward-and-drop-actions.patch b/queue-5.16/net-mlx5e-tc-reject-rules-with-forward-and-drop-actions.patch

new file mode 100644 (file)

index 0000000..26f39fe
--- /dev/null
+++ b/queue-5.16/net-mlx5e-tc-reject-rules-with-forward-and-drop-actions.patch
@@ -0,0 +1,36 @@
+From 5623ef8a118838aae65363750dfafcba734dc8cb Mon Sep 17 00:00:00 2001
+From: Roi Dayan <roid@nvidia.com>
+Date: Mon, 17 Jan 2022 15:00:30 +0200
+Subject: net/mlx5e: TC, Reject rules with forward and drop actions
+
+From: Roi Dayan <roid@nvidia.com>
+
+commit 5623ef8a118838aae65363750dfafcba734dc8cb upstream.
+
+Such rules are redundant but allowed and passed to the driver.
+The driver does not support offloading such rules so return an error.
+
+Fixes: 03a9d11e6eeb ("net/mlx5e: Add TC drop and mirred/redirect action parsing for SRIOV offloads")
+Signed-off-by: Roi Dayan <roid@nvidia.com>
+Reviewed-by: Oz Shlomo <ozsh@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_tc.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+@@ -3420,6 +3420,12 @@ actions_match_supported(struct mlx5e_pri
+               return false;
+       }
+ 
++      if (!(~actions &
++            (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
++              NL_SET_ERR_MSG_MOD(extack, "Rule cannot support forward+drop action");
++              return false;
++      }
++
+       if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
+           actions & MLX5_FLOW_CONTEXT_ACTION_DROP) {
+               NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported");
diff --git a/queue-5.16/net-phy-fix-qca8081-with-speeds-lower-than-2.5gb-s.patch b/queue-5.16/net-phy-fix-qca8081-with-speeds-lower-than-2.5gb-s.patch

new file mode 100644 (file)

index 0000000..565c6ff
--- /dev/null
+++ b/queue-5.16/net-phy-fix-qca8081-with-speeds-lower-than-2.5gb-s.patch
@@ -0,0 +1,62 @@
+From 881cc731df6af99a21622e9be25a23b81adcd10b Mon Sep 17 00:00:00 2001
+From: Jonathan McDowell <noodles@earth.li>
+Date: Mon, 31 Jan 2022 13:56:41 +0000
+Subject: net: phy: Fix qca8081 with speeds lower than 2.5Gb/s
+
+From: Jonathan McDowell <noodles@earth.li>
+
+commit 881cc731df6af99a21622e9be25a23b81adcd10b upstream.
+
+A typo in qca808x_read_status means we try to set SMII mode on the port
+rather than SGMII when the link speed is not 2.5Gb/s. This results in no
+traffic due to the mismatch in configuration between the phy and the
+mac.
+
+v2:
+ Only change interface mode when the link is up
+
+Fixes: 79c7bc0521545 ("net: phy: add qca8081 read_status")
+Cc: stable@vger.kernel.org
+Signed-off-by: Jonathan McDowell <noodles@earth.li>
+Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/phy/at803x.c |   26 +++++++++++++-------------
+ 1 file changed, 13 insertions(+), 13 deletions(-)
+
+--- a/drivers/net/phy/at803x.c
++++ b/drivers/net/phy/at803x.c
+@@ -1688,19 +1688,19 @@ static int qca808x_read_status(struct ph
+       if (ret < 0)
+               return ret;
+ 
+-      if (phydev->link && phydev->speed == SPEED_2500)
+-              phydev->interface = PHY_INTERFACE_MODE_2500BASEX;
+-      else
+-              phydev->interface = PHY_INTERFACE_MODE_SMII;
+-
+-      /* generate seed as a lower random value to make PHY linked as SLAVE easily,
+-       * except for master/slave configuration fault detected.
+-       * the reason for not putting this code into the function link_change_notify is
+-       * the corner case where the link partner is also the qca8081 PHY and the seed
+-       * value is configured as the same value, the link can't be up and no link change
+-       * occurs.
+-       */
+-      if (!phydev->link) {
++      if (phydev->link) {
++              if (phydev->speed == SPEED_2500)
++                      phydev->interface = PHY_INTERFACE_MODE_2500BASEX;
++              else
++                      phydev->interface = PHY_INTERFACE_MODE_SGMII;
++      } else {
++              /* generate seed as a lower random value to make PHY linked as SLAVE easily,
++               * except for master/slave configuration fault detected.
++               * the reason for not putting this code into the function link_change_notify is
++               * the corner case where the link partner is also the qca8081 PHY and the seed
++               * value is configured as the same value, the link can't be up and no link change
++               * occurs.
++               */
+               if (phydev->master_slave_state == MASTER_SLAVE_STATE_ERR) {
+                       qca808x_phy_ms_seed_enable(phydev, false);
+               } else {
diff --git a/queue-5.16/net-sched-fix-use-after-free-in-tc_new_tfilter.patch b/queue-5.16/net-sched-fix-use-after-free-in-tc_new_tfilter.patch

new file mode 100644 (file)

index 0000000..55ada88
--- /dev/null
+++ b/queue-5.16/net-sched-fix-use-after-free-in-tc_new_tfilter.patch
@@ -0,0 +1,259 @@
+From 04c2a47ffb13c29778e2a14e414ad4cb5a5db4b5 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 31 Jan 2022 09:20:18 -0800
+Subject: net: sched: fix use-after-free in tc_new_tfilter()
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit 04c2a47ffb13c29778e2a14e414ad4cb5a5db4b5 upstream.
+
+Whenever tc_new_tfilter() jumps back to replay: label,
+we need to make sure @q and @chain local variables are cleared again,
+or risk use-after-free as in [1]
+
+For consistency, apply the same fix in tc_ctl_chain()
+
+BUG: KASAN: use-after-free in mini_qdisc_pair_swap+0x1b9/0x1f0 net/sched/sch_generic.c:1581
+Write of size 8 at addr ffff8880985c4b08 by task syz-executor.4/1945
+
+CPU: 0 PID: 1945 Comm: syz-executor.4 Not tainted 5.17.0-rc1-syzkaller-00495-gff58831fa02d #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+Call Trace:
+ <TASK>
+ __dump_stack lib/dump_stack.c:88 [inline]
+ dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106
+ print_address_description.constprop.0.cold+0x8d/0x336 mm/kasan/report.c:255
+ __kasan_report mm/kasan/report.c:442 [inline]
+ kasan_report.cold+0x83/0xdf mm/kasan/report.c:459
+ mini_qdisc_pair_swap+0x1b9/0x1f0 net/sched/sch_generic.c:1581
+ tcf_chain_head_change_item net/sched/cls_api.c:372 [inline]
+ tcf_chain0_head_change.isra.0+0xb9/0x120 net/sched/cls_api.c:386
+ tcf_chain_tp_insert net/sched/cls_api.c:1657 [inline]
+ tcf_chain_tp_insert_unique net/sched/cls_api.c:1707 [inline]
+ tc_new_tfilter+0x1e67/0x2350 net/sched/cls_api.c:2086
+ rtnetlink_rcv_msg+0x80d/0xb80 net/core/rtnetlink.c:5583
+ netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2494
+ netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline]
+ netlink_unicast+0x539/0x7e0 net/netlink/af_netlink.c:1343
+ netlink_sendmsg+0x904/0xe00 net/netlink/af_netlink.c:1919
+ sock_sendmsg_nosec net/socket.c:705 [inline]
+ sock_sendmsg+0xcf/0x120 net/socket.c:725
+ ____sys_sendmsg+0x331/0x810 net/socket.c:2413
+ ___sys_sendmsg+0xf3/0x170 net/socket.c:2467
+ __sys_sendmmsg+0x195/0x470 net/socket.c:2553
+ __do_sys_sendmmsg net/socket.c:2582 [inline]
+ __se_sys_sendmmsg net/socket.c:2579 [inline]
+ __x64_sys_sendmmsg+0x99/0x100 net/socket.c:2579
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x44/0xae
+RIP: 0033:0x7f2647172059
+Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48
+RSP: 002b:00007f2645aa5168 EFLAGS: 00000246 ORIG_RAX: 0000000000000133
+RAX: ffffffffffffffda RBX: 00007f2647285100 RCX: 00007f2647172059
+RDX: 040000000000009f RSI: 00000000200002c0 RDI: 0000000000000006
+RBP: 00007f26471cc08d R08: 0000000000000000 R09: 0000000000000000
+R10: 9e00000000000000 R11: 0000000000000246 R12: 0000000000000000
+R13: 00007fffb3f7f02f R14: 00007f2645aa5300 R15: 0000000000022000
+ </TASK>
+
+Allocated by task 1944:
+ kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38
+ kasan_set_track mm/kasan/common.c:45 [inline]
+ set_alloc_info mm/kasan/common.c:436 [inline]
+ ____kasan_kmalloc mm/kasan/common.c:515 [inline]
+ ____kasan_kmalloc mm/kasan/common.c:474 [inline]
+ __kasan_kmalloc+0xa9/0xd0 mm/kasan/common.c:524
+ kmalloc_node include/linux/slab.h:604 [inline]
+ kzalloc_node include/linux/slab.h:726 [inline]
+ qdisc_alloc+0xac/0xa10 net/sched/sch_generic.c:941
+ qdisc_create.constprop.0+0xce/0x10f0 net/sched/sch_api.c:1211
+ tc_modify_qdisc+0x4c5/0x1980 net/sched/sch_api.c:1660
+ rtnetlink_rcv_msg+0x413/0xb80 net/core/rtnetlink.c:5592
+ netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2494
+ netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline]
+ netlink_unicast+0x539/0x7e0 net/netlink/af_netlink.c:1343
+ netlink_sendmsg+0x904/0xe00 net/netlink/af_netlink.c:1919
+ sock_sendmsg_nosec net/socket.c:705 [inline]
+ sock_sendmsg+0xcf/0x120 net/socket.c:725
+ ____sys_sendmsg+0x331/0x810 net/socket.c:2413
+ ___sys_sendmsg+0xf3/0x170 net/socket.c:2467
+ __sys_sendmmsg+0x195/0x470 net/socket.c:2553
+ __do_sys_sendmmsg net/socket.c:2582 [inline]
+ __se_sys_sendmmsg net/socket.c:2579 [inline]
+ __x64_sys_sendmmsg+0x99/0x100 net/socket.c:2579
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+Freed by task 3609:
+ kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38
+ kasan_set_track+0x21/0x30 mm/kasan/common.c:45
+ kasan_set_free_info+0x20/0x30 mm/kasan/generic.c:370
+ ____kasan_slab_free mm/kasan/common.c:366 [inline]
+ ____kasan_slab_free+0x130/0x160 mm/kasan/common.c:328
+ kasan_slab_free include/linux/kasan.h:236 [inline]
+ slab_free_hook mm/slub.c:1728 [inline]
+ slab_free_freelist_hook+0x8b/0x1c0 mm/slub.c:1754
+ slab_free mm/slub.c:3509 [inline]
+ kfree+0xcb/0x280 mm/slub.c:4562
+ rcu_do_batch kernel/rcu/tree.c:2527 [inline]
+ rcu_core+0x7b8/0x1540 kernel/rcu/tree.c:2778
+ __do_softirq+0x29b/0x9c2 kernel/softirq.c:558
+
+Last potentially related work creation:
+ kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38
+ __kasan_record_aux_stack+0xbe/0xd0 mm/kasan/generic.c:348
+ __call_rcu kernel/rcu/tree.c:3026 [inline]
+ call_rcu+0xb1/0x740 kernel/rcu/tree.c:3106
+ qdisc_put_unlocked+0x6f/0x90 net/sched/sch_generic.c:1109
+ tcf_block_release+0x86/0x90 net/sched/cls_api.c:1238
+ tc_new_tfilter+0xc0d/0x2350 net/sched/cls_api.c:2148
+ rtnetlink_rcv_msg+0x80d/0xb80 net/core/rtnetlink.c:5583
+ netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2494
+ netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline]
+ netlink_unicast+0x539/0x7e0 net/netlink/af_netlink.c:1343
+ netlink_sendmsg+0x904/0xe00 net/netlink/af_netlink.c:1919
+ sock_sendmsg_nosec net/socket.c:705 [inline]
+ sock_sendmsg+0xcf/0x120 net/socket.c:725
+ ____sys_sendmsg+0x331/0x810 net/socket.c:2413
+ ___sys_sendmsg+0xf3/0x170 net/socket.c:2467
+ __sys_sendmmsg+0x195/0x470 net/socket.c:2553
+ __do_sys_sendmmsg net/socket.c:2582 [inline]
+ __se_sys_sendmmsg net/socket.c:2579 [inline]
+ __x64_sys_sendmmsg+0x99/0x100 net/socket.c:2579
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+The buggy address belongs to the object at ffff8880985c4800
+ which belongs to the cache kmalloc-1k of size 1024
+The buggy address is located 776 bytes inside of
+ 1024-byte region [ffff8880985c4800, ffff8880985c4c00)
+The buggy address belongs to the page:
+page:ffffea0002617000 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x985c0
+head:ffffea0002617000 order:3 compound_mapcount:0 compound_pincount:0
+flags: 0xfff00000010200(slab|head|node=0|zone=1|lastcpupid=0x7ff)
+raw: 00fff00000010200 0000000000000000 dead000000000122 ffff888010c41dc0
+raw: 0000000000000000 0000000000100010 00000001ffffffff 0000000000000000
+page dumped because: kasan: bad access detected
+page_owner tracks the page as allocated
+page last allocated via order 3, migratetype Unmovable, gfp_mask 0x1d20c0(__GFP_IO|__GFP_FS|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_NOMEMALLOC|__GFP_HARDWALL), pid 1941, ts 1038999441284, free_ts 1033444432829
+ prep_new_page mm/page_alloc.c:2434 [inline]
+ get_page_from_freelist+0xa72/0x2f50 mm/page_alloc.c:4165
+ __alloc_pages+0x1b2/0x500 mm/page_alloc.c:5389
+ alloc_pages+0x1aa/0x310 mm/mempolicy.c:2271
+ alloc_slab_page mm/slub.c:1799 [inline]
+ allocate_slab mm/slub.c:1944 [inline]
+ new_slab+0x28a/0x3b0 mm/slub.c:2004
+ ___slab_alloc+0x87c/0xe90 mm/slub.c:3018
+ __slab_alloc.constprop.0+0x4d/0xa0 mm/slub.c:3105
+ slab_alloc_node mm/slub.c:3196 [inline]
+ slab_alloc mm/slub.c:3238 [inline]
+ __kmalloc+0x2fb/0x340 mm/slub.c:4420
+ kmalloc include/linux/slab.h:586 [inline]
+ kzalloc include/linux/slab.h:715 [inline]
+ __register_sysctl_table+0x112/0x1090 fs/proc/proc_sysctl.c:1335
+ neigh_sysctl_register+0x2c8/0x5e0 net/core/neighbour.c:3787
+ devinet_sysctl_register+0xb1/0x230 net/ipv4/devinet.c:2618
+ inetdev_init+0x286/0x580 net/ipv4/devinet.c:278
+ inetdev_event+0xa8a/0x15d0 net/ipv4/devinet.c:1532
+ notifier_call_chain+0xb5/0x200 kernel/notifier.c:84
+ call_netdevice_notifiers_info+0xb5/0x130 net/core/dev.c:1919
+ call_netdevice_notifiers_extack net/core/dev.c:1931 [inline]
+ call_netdevice_notifiers net/core/dev.c:1945 [inline]
+ register_netdevice+0x1073/0x1500 net/core/dev.c:9698
+ veth_newlink+0x59c/0xa90 drivers/net/veth.c:1722
+page last free stack trace:
+ reset_page_owner include/linux/page_owner.h:24 [inline]
+ free_pages_prepare mm/page_alloc.c:1352 [inline]
+ free_pcp_prepare+0x374/0x870 mm/page_alloc.c:1404
+ free_unref_page_prepare mm/page_alloc.c:3325 [inline]
+ free_unref_page+0x19/0x690 mm/page_alloc.c:3404
+ release_pages+0x748/0x1220 mm/swap.c:956
+ tlb_batch_pages_flush mm/mmu_gather.c:50 [inline]
+ tlb_flush_mmu_free mm/mmu_gather.c:243 [inline]
+ tlb_flush_mmu+0xe9/0x6b0 mm/mmu_gather.c:250
+ zap_pte_range mm/memory.c:1441 [inline]
+ zap_pmd_range mm/memory.c:1490 [inline]
+ zap_pud_range mm/memory.c:1519 [inline]
+ zap_p4d_range mm/memory.c:1540 [inline]
+ unmap_page_range+0x1d1d/0x2a30 mm/memory.c:1561
+ unmap_single_vma+0x198/0x310 mm/memory.c:1606
+ unmap_vmas+0x16b/0x2f0 mm/memory.c:1638
+ exit_mmap+0x201/0x670 mm/mmap.c:3178
+ __mmput+0x122/0x4b0 kernel/fork.c:1114
+ mmput+0x56/0x60 kernel/fork.c:1135
+ exit_mm kernel/exit.c:507 [inline]
+ do_exit+0xa3c/0x2a30 kernel/exit.c:793
+ do_group_exit+0xd2/0x2f0 kernel/exit.c:935
+ __do_sys_exit_group kernel/exit.c:946 [inline]
+ __se_sys_exit_group kernel/exit.c:944 [inline]
+ __x64_sys_exit_group+0x3a/0x50 kernel/exit.c:944
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+Memory state around the buggy address:
+ ffff8880985c4a00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+ ffff8880985c4a80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+>ffff8880985c4b00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+                      ^
+ ffff8880985c4b80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+ ffff8880985c4c00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+
+Fixes: 470502de5bdb ("net: sched: unlock rules update API")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Vlad Buslov <vladbu@mellanox.com>
+Cc: Jiri Pirko <jiri@mellanox.com>
+Cc: Cong Wang <xiyou.wangcong@gmail.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Link: https://lore.kernel.org/r/20220131172018.3704490-1-eric.dumazet@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/cls_api.c |   11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+--- a/net/sched/cls_api.c
++++ b/net/sched/cls_api.c
+@@ -1945,9 +1945,9 @@ static int tc_new_tfilter(struct sk_buff
+       bool prio_allocate;
+       u32 parent;
+       u32 chain_index;
+-      struct Qdisc *q = NULL;
++      struct Qdisc *q;
+       struct tcf_chain_info chain_info;
+-      struct tcf_chain *chain = NULL;
++      struct tcf_chain *chain;
+       struct tcf_block *block;
+       struct tcf_proto *tp;
+       unsigned long cl;
+@@ -1976,6 +1976,8 @@ replay:
+       tp = NULL;
+       cl = 0;
+       block = NULL;
++      q = NULL;
++      chain = NULL;
+       flags = 0;
+ 
+       if (prio == 0) {
+@@ -2798,8 +2800,8 @@ static int tc_ctl_chain(struct sk_buff *
+       struct tcmsg *t;
+       u32 parent;
+       u32 chain_index;
+-      struct Qdisc *q = NULL;
+-      struct tcf_chain *chain = NULL;
++      struct Qdisc *q;
++      struct tcf_chain *chain;
+       struct tcf_block *block;
+       unsigned long cl;
+       int err;
+@@ -2809,6 +2811,7 @@ static int tc_ctl_chain(struct sk_buff *
+               return -EPERM;
+ 
+ replay:
++      q = NULL;
+       err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
+                                    rtm_tca_policy, extack);
+       if (err < 0)
diff --git a/queue-5.16/ovl-don-t-fail-copy-up-if-no-fileattr-support-on-upper.patch b/queue-5.16/ovl-don-t-fail-copy-up-if-no-fileattr-support-on-upper.patch

new file mode 100644 (file)

index 0000000..a908118
--- /dev/null
+++ b/queue-5.16/ovl-don-t-fail-copy-up-if-no-fileattr-support-on-upper.patch
@@ -0,0 +1,55 @@
+From 94fd19752b28aa66c98e7991734af91dfc529f8f Mon Sep 17 00:00:00 2001
+From: Miklos Szeredi <mszeredi@redhat.com>
+Date: Fri, 14 Jan 2022 16:57:56 +0100
+Subject: ovl: don't fail copy up if no fileattr support on upper
+
+From: Miklos Szeredi <mszeredi@redhat.com>
+
+commit 94fd19752b28aa66c98e7991734af91dfc529f8f upstream.
+
+Christoph Fritz is reporting that failure to copy up fileattr when upper
+doesn't support fileattr or xattr results in a regression.
+
+Return success in these failure cases; this reverts overlayfs to the old
+behavior.
+
+Add a pr_warn_once() in these cases to still let the user know about the
+copy up failures.
+
+Reported-by: Christoph Fritz <chf.fritz@googlemail.com>
+Fixes: 72db82115d2b ("ovl: copy up sync/noatime fileattr flags")
+Cc: <stable@vger.kernel.org> # v5.15
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/overlayfs/copy_up.c |   12 +++++++++++-
+ 1 file changed, 11 insertions(+), 1 deletion(-)
+
+--- a/fs/overlayfs/copy_up.c
++++ b/fs/overlayfs/copy_up.c
+@@ -157,7 +157,9 @@ static int ovl_copy_fileattr(struct inod
+        */
+       if (oldfa.flags & OVL_PROT_FS_FLAGS_MASK) {
+               err = ovl_set_protattr(inode, new->dentry, &oldfa);
+-              if (err)
++              if (err == -EPERM)
++                      pr_warn_once("copying fileattr: no xattr on upper\n");
++              else if (err)
+                       return err;
+       }
+ 
+@@ -167,6 +169,14 @@ static int ovl_copy_fileattr(struct inod
+ 
+       err = ovl_real_fileattr_get(new, &newfa);
+       if (err) {
++              /*
++               * Returning an error if upper doesn't support fileattr will
++               * result in a regression, so revert to the old behavior.
++               */
++              if (err == -ENOTTY || err == -EINVAL) {
++                      pr_warn_once("copying fileattr: no support on upper\n");
++                      return 0;
++              }
+               pr_warn("failed to retrieve upper fileattr (%pd2, err=%i)\n",
+                       new, err);
+               return err;
diff --git a/queue-5.16/revert-mm-gup-small-refactoring-simplify-try_grab_page.patch b/queue-5.16/revert-mm-gup-small-refactoring-simplify-try_grab_page.patch

new file mode 100644 (file)

index 0000000..d35d1af
--- /dev/null
+++ b/queue-5.16/revert-mm-gup-small-refactoring-simplify-try_grab_page.patch
@@ -0,0 +1,96 @@
+From c36c04c2e132fc39f6b658bf607aed4425427fd7 Mon Sep 17 00:00:00 2001
+From: John Hubbard <jhubbard@nvidia.com>
+Date: Tue, 1 Feb 2022 19:23:17 -0800
+Subject: Revert "mm/gup: small refactoring: simplify try_grab_page()"
+
+From: John Hubbard <jhubbard@nvidia.com>
+
+commit c36c04c2e132fc39f6b658bf607aed4425427fd7 upstream.
+
+This reverts commit 54d516b1d62ff8f17cee2da06e5e4706a0d00b8a
+
+That commit did a refactoring that effectively combined fast and slow
+gup paths (again).  And that was again incorrect, for two reasons:
+
+ a) Fast gup and slow gup get reference counts on pages in different
+    ways and with different goals: see Linus' writeup in commit
+    cd1adf1b63a1 ("Revert "mm/gup: remove try_get_page(), call
+    try_get_compound_head() directly""), and
+
+ b) try_grab_compound_head() also has a specific check for
+    "FOLL_LONGTERM && !is_pinned(page)", that assumes that the caller
+    can fall back to slow gup. This resulted in new failures, as
+    recently report by Will McVicker [1].
+
+But (a) has problems too, even though they may not have been reported
+yet.  So just revert this.
+
+Link: https://lore.kernel.org/r/20220131203504.3458775-1-willmcvicker@google.com [1]
+Fixes: 54d516b1d62f ("mm/gup: small refactoring: simplify try_grab_page()")
+Reported-and-tested-by: Will McVicker <willmcvicker@google.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Cc: Minchan Kim <minchan@google.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Christian Borntraeger <borntraeger@de.ibm.com>
+Cc: Heiko Carstens <hca@linux.ibm.com>
+Cc: Vasily Gorbik <gor@linux.ibm.com>
+Cc: stable@vger.kernel.org # 5.15
+Signed-off-by: John Hubbard <jhubbard@nvidia.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/gup.c |   35 ++++++++++++++++++++++++++++++-----
+ 1 file changed, 30 insertions(+), 5 deletions(-)
+
+--- a/mm/gup.c
++++ b/mm/gup.c
+@@ -124,8 +124,8 @@ static inline struct page *try_get_compo
+  * considered failure, and furthermore, a likely bug in the caller, so a warning
+  * is also emitted.
+  */
+-struct page *try_grab_compound_head(struct page *page,
+-                                  int refs, unsigned int flags)
++__maybe_unused struct page *try_grab_compound_head(struct page *page,
++                                                 int refs, unsigned int flags)
+ {
+       if (flags & FOLL_GET)
+               return try_get_compound_head(page, refs);
+@@ -208,10 +208,35 @@ static void put_compound_head(struct pag
+  */
+ bool __must_check try_grab_page(struct page *page, unsigned int flags)
+ {
+-      if (!(flags & (FOLL_GET | FOLL_PIN)))
+-              return true;
++      WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == (FOLL_GET | FOLL_PIN));
+ 
+-      return try_grab_compound_head(page, 1, flags);
++      if (flags & FOLL_GET)
++              return try_get_page(page);
++      else if (flags & FOLL_PIN) {
++              int refs = 1;
++
++              page = compound_head(page);
++
++              if (WARN_ON_ONCE(page_ref_count(page) <= 0))
++                      return false;
++
++              if (hpage_pincount_available(page))
++                      hpage_pincount_add(page, 1);
++              else
++                      refs = GUP_PIN_COUNTING_BIAS;
++
++              /*
++               * Similar to try_grab_compound_head(): even if using the
++               * hpage_pincount_add/_sub() routines, be sure to
++               * *also* increment the normal page refcount field at least
++               * once, so that the page really is pinned.
++               */
++              page_ref_add(page, refs);
++
++              mod_node_page_state(page_pgdat(page), NR_FOLL_PIN_ACQUIRED, 1);
++      }
++
++      return true;
+ }
+ 
+ /**
diff --git a/queue-5.16/rtnetlink-make-sure-to-refresh-master_dev-m_ops-in-__rtnl_newlink.patch b/queue-5.16/rtnetlink-make-sure-to-refresh-master_dev-m_ops-in-__rtnl_newlink.patch

new file mode 100644 (file)

index 0000000..07696a3
--- /dev/null
+++ b/queue-5.16/rtnetlink-make-sure-to-refresh-master_dev-m_ops-in-__rtnl_newlink.patch
@@ -0,0 +1,47 @@
+From c6f6f2444bdbe0079e41914a35081530d0409963 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 31 Jan 2022 17:21:06 -0800
+Subject: rtnetlink: make sure to refresh master_dev/m_ops in __rtnl_newlink()
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit c6f6f2444bdbe0079e41914a35081530d0409963 upstream.
+
+While looking at one unrelated syzbot bug, I found the replay logic
+in __rtnl_newlink() to potentially trigger use-after-free.
+
+It is better to clear master_dev and m_ops inside the loop,
+in case we have to replay it.
+
+Fixes: ba7d49b1f0f8 ("rtnetlink: provide api for getting and setting slave info")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Jiri Pirko <jiri@nvidia.com>
+Link: https://lore.kernel.org/r/20220201012106.216495-1-eric.dumazet@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/rtnetlink.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -3254,8 +3254,8 @@ static int __rtnl_newlink(struct sk_buff
+       struct nlattr *slave_attr[RTNL_SLAVE_MAX_TYPE + 1];
+       unsigned char name_assign_type = NET_NAME_USER;
+       struct nlattr *linkinfo[IFLA_INFO_MAX + 1];
+-      const struct rtnl_link_ops *m_ops = NULL;
+-      struct net_device *master_dev = NULL;
++      const struct rtnl_link_ops *m_ops;
++      struct net_device *master_dev;
+       struct net *net = sock_net(skb->sk);
+       const struct rtnl_link_ops *ops;
+       struct nlattr *tb[IFLA_MAX + 1];
+@@ -3293,6 +3293,8 @@ replay:
+       else
+               dev = NULL;
+ 
++      master_dev = NULL;
++      m_ops = NULL;
+       if (dev) {
+               master_dev = netdev_master_upper_dev_get(dev);
+               if (master_dev)
diff --git a/queue-5.16/series b/queue-5.16/series

index 385f4bf3fe6e6e64a91123fb5b133f8165d75e73..697f89c91f1dbf14c507bf6a395996d439cecc73 100644 (file)
--- a/queue-5.16/series
+++ b/queue-5.16/series
@@ -3,3 +3,41 @@ selftests-mptcp-fix-ipv6-routing-setup.patch
  net-ipa-use-a-bitmap-for-endpoint-replenish_enabled.patch
  net-ipa-prevent-concurrent-replenish.patch
  drm-vc4-hdmi-make-sure-the-device-is-powered-with-cec.patch
+cgroup-v1-require-capabilities-to-set-release_agent.patch
+revert-mm-gup-small-refactoring-simplify-try_grab_page.patch
+net-phy-fix-qca8081-with-speeds-lower-than-2.5gb-s.patch
+ovl-don-t-fail-copy-up-if-no-fileattr-support-on-upper.patch
+lockd-fix-server-crash-on-reboot-of-client-holding-lock.patch
+lockd-fix-failure-to-cleanup-client-locks.patch
+net-mlx5e-ipsec-fix-crypto-offload-for-non-tcp-udp-encapsulated-traffic.patch
+net-mlx5e-ipsec-fix-tunnel-mode-crypto-offload-for-non-tcp-udp-traffic.patch
+net-mlx5e-tc-reject-rules-with-drop-and-modify-hdr-action.patch
+net-mlx5-bridge-take-rtnl-lock-in-init-error-handler.patch
+net-mlx5-bridge-ensure-dev_name-is-null-terminated.patch
+net-mlx5e-fix-handling-of-wrong-devices-during-bond-netevent.patch
+net-mlx5-use-del_timer_sync-in-fw-reset-flow-of-halting-poll.patch
+net-mlx5e-fix-module-eeprom-query.patch
+net-mlx5e-tc-reject-rules-with-forward-and-drop-actions.patch
+net-mlx5-fix-offloading-with-eswitch_ipv4_ttl_modify_enable.patch
+net-mlx5e-don-t-treat-small-ceil-values-as-unlimited-in-htb-offload.patch
+net-mlx5-bridge-fix-devlink-deadlock-on-net-namespace-deletion.patch
+net-mlx5e-avoid-field-overflowing-memcpy.patch
+net-mlx5e-fix-wrong-calculation-of-header-index-in-hw_gro.patch
+net-mlx5e-fix-broken-skb-allocation-in-hw-gro.patch
+net-mlx5-e-switch-fix-uninitialized-variable-modact.patch
+net-mlx5e-avoid-implicit-modify-hdr-for-decap-drop-rule.patch
+ipheth-fix-eoverflow-in-ipheth_rcvbulk_callback.patch
+i40e-fix-reset-bw-limit-when-dcb-enabled-with-1-tc.patch
+i40e-fix-reset-path-while-removing-the-driver.patch
+net-amd-xgbe-ensure-to-reset-the-tx_timer_active-flag.patch
+net-amd-xgbe-fix-skb-data-length-underflow.patch
+fanotify-fix-stale-file-descriptor-in-copy_event_to_user.patch
+net-sched-fix-use-after-free-in-tc_new_tfilter.patch
+rtnetlink-make-sure-to-refresh-master_dev-m_ops-in-__rtnl_newlink.patch
+net-ipa-request-ipa-register-values-be-retained.patch
+bpf-fix-possible-race-in-inc_misses_counter.patch
+cpuset-fix-the-bug-that-subpart_cpus-updated-wrongly-in-update_cpumask.patch
+e1000e-handshake-with-csme-starts-from-adl-platforms.patch
+af_packet-fix-data-race-in-packet_setsockopt-packet_setsockopt.patch
+tcp-fix-mem-under-charging-with-zerocopy-sendmsg.patch
+tcp-add-missing-tcp_skb_can_collapse-test-in-tcp_shift_skb_data.patch
diff --git a/queue-5.16/tcp-add-missing-tcp_skb_can_collapse-test-in-tcp_shift_skb_data.patch b/queue-5.16/tcp-add-missing-tcp_skb_can_collapse-test-in-tcp_shift_skb_data.patch

new file mode 100644 (file)

index 0000000..16c82cb
--- /dev/null
+++ b/queue-5.16/tcp-add-missing-tcp_skb_can_collapse-test-in-tcp_shift_skb_data.patch
@@ -0,0 +1,55 @@
+From b67985be400969578d4d4b17299714c0e5d2c07b Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 1 Feb 2022 10:46:40 -0800
+Subject: tcp: add missing tcp_skb_can_collapse() test in tcp_shift_skb_data()
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit b67985be400969578d4d4b17299714c0e5d2c07b upstream.
+
+tcp_shift_skb_data() might collapse three packets into a larger one.
+
+P_A, P_B, P_C  -> P_ABC
+
+Historically, it used a single tcp_skb_can_collapse_to(P_A) call,
+because it was enough.
+
+In commit 85712484110d ("tcp: coalesce/collapse must respect MPTCP extensions"),
+this call was replaced by a call to tcp_skb_can_collapse(P_A, P_B)
+
+But the now needed test over P_C has been missed.
+
+This probably broke MPTCP.
+
+Then later, commit 9b65b17db723 ("net: avoid double accounting for pure zerocopy skbs")
+added an extra condition to tcp_skb_can_collapse(), but the missing call
+from tcp_shift_skb_data() is also breaking TCP zerocopy, because P_A and P_C
+might have different skb_zcopy_pure() status.
+
+Fixes: 85712484110d ("tcp: coalesce/collapse must respect MPTCP extensions")
+Fixes: 9b65b17db723 ("net: avoid double accounting for pure zerocopy skbs")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Mat Martineau <mathew.j.martineau@linux.intel.com>
+Cc: Talal Ahmad <talalahmad@google.com>
+Cc: Arjun Roy <arjunroy@google.com>
+Cc: Willem de Bruijn <willemb@google.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Acked-by: Paolo Abeni <pabeni@redhat.com>
+Link: https://lore.kernel.org/r/20220201184640.756716-1-eric.dumazet@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_input.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -1660,6 +1660,8 @@ static struct sk_buff *tcp_shift_skb_dat
+           (mss != tcp_skb_seglen(skb)))
+               goto out;
+ 
++      if (!tcp_skb_can_collapse(prev, skb))
++              goto out;
+       len = skb->len;
+       pcount = tcp_skb_pcount(skb);
+       if (tcp_skb_shift(prev, skb, pcount, len))
diff --git a/queue-5.16/tcp-fix-mem-under-charging-with-zerocopy-sendmsg.patch b/queue-5.16/tcp-fix-mem-under-charging-with-zerocopy-sendmsg.patch

new file mode 100644 (file)

index 0000000..8bd096a
--- /dev/null
+++ b/queue-5.16/tcp-fix-mem-under-charging-with-zerocopy-sendmsg.patch
@@ -0,0 +1,51 @@
+From 479f5547239d970d3833f15f54a6481fffdb91ec Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 31 Jan 2022 22:52:54 -0800
+Subject: tcp: fix mem under-charging with zerocopy sendmsg()
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit 479f5547239d970d3833f15f54a6481fffdb91ec upstream.
+
+We got reports of following warning in inet_sock_destruct()
+
+       WARN_ON(sk_forward_alloc_get(sk));
+
+Whenever we add a non zero-copy fragment to a pure zerocopy skb,
+we have to anticipate that whole skb->truesize will be uncharged
+when skb is finally freed.
+
+skb->data_len is the payload length. But the memory truesize
+estimated by __zerocopy_sg_from_iter() is page aligned.
+
+Fixes: 9b65b17db723 ("net: avoid double accounting for pure zerocopy skbs")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Talal Ahmad <talalahmad@google.com>
+Cc: Arjun Roy <arjunroy@google.com>
+Cc: Willem de Bruijn <willemb@google.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Link: https://lore.kernel.org/r/20220201065254.680532-1-eric.dumazet@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -1321,10 +1321,13 @@ new_segment:
+ 
+                       /* skb changing from pure zc to mixed, must charge zc */
+                       if (unlikely(skb_zcopy_pure(skb))) {
+-                              if (!sk_wmem_schedule(sk, skb->data_len))
++                              u32 extra = skb->truesize -
++                                          SKB_TRUESIZE(skb_end_offset(skb));
++
++                              if (!sk_wmem_schedule(sk, extra))
+                                       goto wait_for_space;
+ 
+-                              sk_mem_charge(sk, skb->data_len);
++                              sk_mem_charge(sk, extra);
+                               skb_shinfo(skb)->flags &= ~SKBFL_PURE_ZEROCOPY;
+                       }
+
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Fri, 4 Feb 2022 08:52:40 +0000 (09:52 +0100)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Fri, 4 Feb 2022 08:52:40 +0000 (09:52 +0100)
queue-5.16/af_packet-fix-data-race-in-packet_setsockopt-packet_setsockopt.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/bpf-fix-possible-race-in-inc_misses_counter.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/cgroup-v1-require-capabilities-to-set-release_agent.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/cpuset-fix-the-bug-that-subpart_cpus-updated-wrongly-in-update_cpumask.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/e1000e-handshake-with-csme-starts-from-adl-platforms.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/fanotify-fix-stale-file-descriptor-in-copy_event_to_user.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/i40e-fix-reset-bw-limit-when-dcb-enabled-with-1-tc.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/i40e-fix-reset-path-while-removing-the-driver.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/ipheth-fix-eoverflow-in-ipheth_rcvbulk_callback.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/lockd-fix-failure-to-cleanup-client-locks.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/lockd-fix-server-crash-on-reboot-of-client-holding-lock.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/net-amd-xgbe-ensure-to-reset-the-tx_timer_active-flag.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/net-amd-xgbe-fix-skb-data-length-underflow.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/net-ipa-request-ipa-register-values-be-retained.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/net-mlx5-bridge-ensure-dev_name-is-null-terminated.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/net-mlx5-bridge-fix-devlink-deadlock-on-net-namespace-deletion.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/net-mlx5-bridge-take-rtnl-lock-in-init-error-handler.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/net-mlx5-e-switch-fix-uninitialized-variable-modact.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/net-mlx5-fix-offloading-with-eswitch_ipv4_ttl_modify_enable.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/net-mlx5-use-del_timer_sync-in-fw-reset-flow-of-halting-poll.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/net-mlx5e-avoid-field-overflowing-memcpy.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/net-mlx5e-avoid-implicit-modify-hdr-for-decap-drop-rule.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/net-mlx5e-don-t-treat-small-ceil-values-as-unlimited-in-htb-offload.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/net-mlx5e-fix-broken-skb-allocation-in-hw-gro.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/net-mlx5e-fix-handling-of-wrong-devices-during-bond-netevent.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/net-mlx5e-fix-module-eeprom-query.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/net-mlx5e-fix-wrong-calculation-of-header-index-in-hw_gro.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/net-mlx5e-ipsec-fix-crypto-offload-for-non-tcp-udp-encapsulated-traffic.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/net-mlx5e-ipsec-fix-tunnel-mode-crypto-offload-for-non-tcp-udp-traffic.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/net-mlx5e-tc-reject-rules-with-drop-and-modify-hdr-action.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/net-mlx5e-tc-reject-rules-with-forward-and-drop-actions.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/net-phy-fix-qca8081-with-speeds-lower-than-2.5gb-s.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/net-sched-fix-use-after-free-in-tc_new_tfilter.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/ovl-don-t-fail-copy-up-if-no-fileattr-support-on-upper.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/revert-mm-gup-small-refactoring-simplify-try_grab_page.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/rtnetlink-make-sure-to-refresh-master_dev-m_ops-in-__rtnl_newlink.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/series		patch \| blob \| blame \| history
queue-5.16/tcp-add-missing-tcp_skb_can_collapse-test-in-tcp_shift_skb_data.patch	[new file with mode: 0644]	patch \| blob
queue-5.16/tcp-fix-mem-under-charging-with-zerocopy-sendmsg.patch	[new file with mode: 0644]	patch \| blob