]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 5.10
authorSasha Levin <sashal@kernel.org>
Mon, 18 Jan 2021 00:19:11 +0000 (19:19 -0500)
committerSasha Levin <sashal@kernel.org>
Mon, 18 Jan 2021 00:19:11 +0000 (19:19 -0500)
Signed-off-by: Sasha Levin <sashal@kernel.org>
16 files changed:
queue-5.10/blk-mq-debugfs-add-decode-for-blk_mq_f_tag_hctx_shar.patch [new file with mode: 0644]
queue-5.10/bpf-save-correct-stopping-point-in-file-seq-iteratio.patch [new file with mode: 0644]
queue-5.10/bpf-simplify-task_file_seq_get_next.patch [new file with mode: 0644]
queue-5.10/cfg80211-select-config_crc32.patch [new file with mode: 0644]
queue-5.10/iommu-vt-d-update-domain-geometry-in-iommu_ops.at-de.patch [new file with mode: 0644]
queue-5.10/mm-don-t-play-games-with-pinned-pages-in-clear_page_.patch [new file with mode: 0644]
queue-5.10/mm-don-t-put-pinned-pages-into-the-swap-cache.patch [new file with mode: 0644]
queue-5.10/mm-fix-clear_refs_write-locking.patch [new file with mode: 0644]
queue-5.10/net-dcb-validate-netlink-message-in-dcb-handler.patch [new file with mode: 0644]
queue-5.10/net-mlx5-e-switch-fix-changing-vf-vlanid.patch [new file with mode: 0644]
queue-5.10/net-mlx5-fix-passing-zero-to-ptr_err.patch [new file with mode: 0644]
queue-5.10/net-mlx5e-ct-use-per-flow-counter-when-ct-flow-accou.patch [new file with mode: 0644]
queue-5.10/nvme-fc-avoid-calling-_nvme_fc_abort_outstanding_ios.patch [new file with mode: 0644]
queue-5.10/rcu-tasks-move-rcu-tasks-initialization-to-before-ea.patch [new file with mode: 0644]
queue-5.10/series
queue-5.10/x86-sev-es-fix-sev-es-out-in-immediate-opcode-vc-han.patch [new file with mode: 0644]

diff --git a/queue-5.10/blk-mq-debugfs-add-decode-for-blk_mq_f_tag_hctx_shar.patch b/queue-5.10/blk-mq-debugfs-add-decode-for-blk_mq_f_tag_hctx_shar.patch
new file mode 100644 (file)
index 0000000..5ecfcde
--- /dev/null
@@ -0,0 +1,40 @@
+From b140d6ef141eff2f1cef417589750bc7a20dd0fa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Jan 2021 16:55:37 +0800
+Subject: blk-mq-debugfs: Add decode for BLK_MQ_F_TAG_HCTX_SHARED
+
+From: John Garry <john.garry@huawei.com>
+
+[ Upstream commit 02f938e9fed1681791605ca8b96c2d9da9355f6a ]
+
+Showing the hctx flags for when BLK_MQ_F_TAG_HCTX_SHARED is set gives
+something like:
+
+root@debian:/home/john# more /sys/kernel/debug/block/sda/hctx0/flags
+alloc_policy=FIFO SHOULD_MERGE|TAG_QUEUE_SHARED|3
+
+Add the decoding for that flag.
+
+Fixes: 32bc15afed04b ("blk-mq: Facilitate a shared sbitmap per tagset")
+Signed-off-by: John Garry <john.garry@huawei.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/blk-mq-debugfs.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
+index 4d6e83e5b4429..4de03da9a624b 100644
+--- a/block/blk-mq-debugfs.c
++++ b/block/blk-mq-debugfs.c
+@@ -246,6 +246,7 @@ static const char *const hctx_flag_name[] = {
+       HCTX_FLAG_NAME(BLOCKING),
+       HCTX_FLAG_NAME(NO_SCHED),
+       HCTX_FLAG_NAME(STACKING),
++      HCTX_FLAG_NAME(TAG_HCTX_SHARED),
+ };
+ #undef HCTX_FLAG_NAME
+-- 
+2.27.0
+
diff --git a/queue-5.10/bpf-save-correct-stopping-point-in-file-seq-iteratio.patch b/queue-5.10/bpf-save-correct-stopping-point-in-file-seq-iteratio.patch
new file mode 100644 (file)
index 0000000..35abd51
--- /dev/null
@@ -0,0 +1,103 @@
+From bb6fdb272eb3431f27eab07c5e6c2b603639c4e5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 18 Dec 2020 10:50:30 -0800
+Subject: bpf: Save correct stopping point in file seq iteration
+
+From: Jonathan Lemon <bsd@fb.com>
+
+[ Upstream commit 69ca310f34168eae0ada434796bfc22fb4a0fa26 ]
+
+On some systems, some variant of the following splat is
+repeatedly seen.  The common factor in all traces seems
+to be the entry point to task_file_seq_next().  With the
+patch, all warnings go away.
+
+    rcu: INFO: rcu_sched self-detected stall on CPU
+    rcu: \x0926-....: (20992 ticks this GP) idle=d7e/1/0x4000000000000002 softirq=81556231/81556231 fqs=4876
+    \x09(t=21033 jiffies g=159148529 q=223125)
+    NMI backtrace for cpu 26
+    CPU: 26 PID: 2015853 Comm: bpftool Kdump: loaded Not tainted 5.6.13-0_fbk4_3876_gd8d1f9bf80bb #1
+    Hardware name: Quanta Twin Lakes MP/Twin Lakes Passive MP, BIOS F09_3A12 10/08/2018
+    Call Trace:
+     <IRQ>
+     dump_stack+0x50/0x70
+     nmi_cpu_backtrace.cold.6+0x13/0x50
+     ? lapic_can_unplug_cpu.cold.30+0x40/0x40
+     nmi_trigger_cpumask_backtrace+0xba/0xca
+     rcu_dump_cpu_stacks+0x99/0xc7
+     rcu_sched_clock_irq.cold.90+0x1b4/0x3aa
+     ? tick_sched_do_timer+0x60/0x60
+     update_process_times+0x24/0x50
+     tick_sched_timer+0x37/0x70
+     __hrtimer_run_queues+0xfe/0x270
+     hrtimer_interrupt+0xf4/0x210
+     smp_apic_timer_interrupt+0x5e/0x120
+     apic_timer_interrupt+0xf/0x20
+     </IRQ>
+    RIP: 0010:get_pid_task+0x38/0x80
+    Code: 89 f6 48 8d 44 f7 08 48 8b 00 48 85 c0 74 2b 48 83 c6 55 48 c1 e6 04 48 29 f0 74 19 48 8d 78 20 ba 01 00 00 00 f0 0f c1 50 20 <85> d2 74 27 78 11 83 c2 01 78 0c 48 83 c4 08 c3 31 c0 48 83 c4 08
+    RSP: 0018:ffffc9000d293dc8 EFLAGS: 00000202 ORIG_RAX: ffffffffffffff13
+    RAX: ffff888637c05600 RBX: ffffc9000d293e0c RCX: 0000000000000000
+    RDX: 0000000000000001 RSI: 0000000000000550 RDI: ffff888637c05620
+    RBP: ffffffff8284eb80 R08: ffff88831341d300 R09: ffff88822ffd8248
+    R10: ffff88822ffd82d0 R11: 00000000003a93c0 R12: 0000000000000001
+    R13: 00000000ffffffff R14: ffff88831341d300 R15: 0000000000000000
+     ? find_ge_pid+0x1b/0x20
+     task_seq_get_next+0x52/0xc0
+     task_file_seq_get_next+0x159/0x220
+     task_file_seq_next+0x4f/0xa0
+     bpf_seq_read+0x159/0x390
+     vfs_read+0x8a/0x140
+     ksys_read+0x59/0xd0
+     do_syscall_64+0x42/0x110
+     entry_SYSCALL_64_after_hwframe+0x44/0xa9
+    RIP: 0033:0x7f95ae73e76e
+    Code: Bad RIP value.
+    RSP: 002b:00007ffc02c1dbf8 EFLAGS: 00000246 ORIG_RAX: 0000000000000000
+    RAX: ffffffffffffffda RBX: 000000000170faa0 RCX: 00007f95ae73e76e
+    RDX: 0000000000001000 RSI: 00007ffc02c1dc30 RDI: 0000000000000007
+    RBP: 00007ffc02c1ec70 R08: 0000000000000005 R09: 0000000000000006
+    R10: fffffffffffff20b R11: 0000000000000246 R12: 00000000019112a0
+    R13: 0000000000000000 R14: 0000000000000007 R15: 00000000004283c0
+
+If unable to obtain the file structure for the current task,
+proceed to the next task number after the one returned from
+task_seq_get_next(), instead of the next task number from the
+original iterator.
+
+Also, save the stopping task number from task_seq_get_next()
+on failure in case of restarts.
+
+Fixes: eaaacd23910f ("bpf: Add task and task/file iterator targets")
+Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Andrii Nakryiko <andrii@kernel.org>
+Link: https://lore.kernel.org/bpf/20201218185032.2464558-2-jonathan.lemon@gmail.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/task_iter.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
+index 767c93d38bf55..f3d3a562a802a 100644
+--- a/kernel/bpf/task_iter.c
++++ b/kernel/bpf/task_iter.c
+@@ -158,13 +158,14 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info)
+               if (!curr_task) {
+                       info->task = NULL;
+                       info->files = NULL;
++                      info->tid = curr_tid;
+                       return NULL;
+               }
+               curr_files = get_files_struct(curr_task);
+               if (!curr_files) {
+                       put_task_struct(curr_task);
+-                      curr_tid = ++(info->tid);
++                      curr_tid = curr_tid + 1;
+                       info->fd = 0;
+                       goto again;
+               }
+-- 
+2.27.0
+
diff --git a/queue-5.10/bpf-simplify-task_file_seq_get_next.patch b/queue-5.10/bpf-simplify-task_file_seq_get_next.patch
new file mode 100644 (file)
index 0000000..ba50d6a
--- /dev/null
@@ -0,0 +1,135 @@
+From 13806bf8b0dc8d2d3de7d970e3ae96dc4e72f141 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 19 Nov 2020 16:28:33 -0800
+Subject: bpf: Simplify task_file_seq_get_next()
+
+From: Song Liu <songliubraving@fb.com>
+
+[ Upstream commit 91b2db27d3ff9ad29e8b3108dfbf1e2f49fe9bd3 ]
+
+Simplify task_file_seq_get_next() by removing two in/out arguments: task
+and fstruct. Use info->task and info->files instead.
+
+Signed-off-by: Song Liu <songliubraving@fb.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Yonghong Song <yhs@fb.com>
+Link: https://lore.kernel.org/bpf/20201120002833.2481110-1-songliubraving@fb.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/task_iter.c | 54 +++++++++++++-----------------------------
+ 1 file changed, 17 insertions(+), 37 deletions(-)
+
+diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
+index 5b6af30bfbcd8..767c93d38bf55 100644
+--- a/kernel/bpf/task_iter.c
++++ b/kernel/bpf/task_iter.c
+@@ -136,8 +136,7 @@ struct bpf_iter_seq_task_file_info {
+ };
+ static struct file *
+-task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info,
+-                     struct task_struct **task, struct files_struct **fstruct)
++task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info)
+ {
+       struct pid_namespace *ns = info->common.ns;
+       u32 curr_tid = info->tid, max_fds;
+@@ -150,14 +149,17 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info,
+        * Otherwise, it does not hold any reference.
+        */
+ again:
+-      if (*task) {
+-              curr_task = *task;
+-              curr_files = *fstruct;
++      if (info->task) {
++              curr_task = info->task;
++              curr_files = info->files;
+               curr_fd = info->fd;
+       } else {
+               curr_task = task_seq_get_next(ns, &curr_tid, true);
+-              if (!curr_task)
++              if (!curr_task) {
++                      info->task = NULL;
++                      info->files = NULL;
+                       return NULL;
++              }
+               curr_files = get_files_struct(curr_task);
+               if (!curr_files) {
+@@ -167,9 +169,8 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info,
+                       goto again;
+               }
+-              /* set *fstruct, *task and info->tid */
+-              *fstruct = curr_files;
+-              *task = curr_task;
++              info->files = curr_files;
++              info->task = curr_task;
+               if (curr_tid == info->tid) {
+                       curr_fd = info->fd;
+               } else {
+@@ -199,8 +200,8 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info,
+       rcu_read_unlock();
+       put_files_struct(curr_files);
+       put_task_struct(curr_task);
+-      *task = NULL;
+-      *fstruct = NULL;
++      info->task = NULL;
++      info->files = NULL;
+       info->fd = 0;
+       curr_tid = ++(info->tid);
+       goto again;
+@@ -209,21 +210,13 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info,
+ static void *task_file_seq_start(struct seq_file *seq, loff_t *pos)
+ {
+       struct bpf_iter_seq_task_file_info *info = seq->private;
+-      struct files_struct *files = NULL;
+-      struct task_struct *task = NULL;
+       struct file *file;
+-      file = task_file_seq_get_next(info, &task, &files);
+-      if (!file) {
+-              info->files = NULL;
+-              info->task = NULL;
+-              return NULL;
+-      }
+-
+-      if (*pos == 0)
++      info->task = NULL;
++      info->files = NULL;
++      file = task_file_seq_get_next(info);
++      if (file && *pos == 0)
+               ++*pos;
+-      info->task = task;
+-      info->files = files;
+       return file;
+ }
+@@ -231,24 +224,11 @@ static void *task_file_seq_start(struct seq_file *seq, loff_t *pos)
+ static void *task_file_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+ {
+       struct bpf_iter_seq_task_file_info *info = seq->private;
+-      struct files_struct *files = info->files;
+-      struct task_struct *task = info->task;
+-      struct file *file;
+       ++*pos;
+       ++info->fd;
+       fput((struct file *)v);
+-      file = task_file_seq_get_next(info, &task, &files);
+-      if (!file) {
+-              info->files = NULL;
+-              info->task = NULL;
+-              return NULL;
+-      }
+-
+-      info->task = task;
+-      info->files = files;
+-
+-      return file;
++      return task_file_seq_get_next(info);
+ }
+ struct bpf_iter__task_file {
+-- 
+2.27.0
+
diff --git a/queue-5.10/cfg80211-select-config_crc32.patch b/queue-5.10/cfg80211-select-config_crc32.patch
new file mode 100644 (file)
index 0000000..37450b1
--- /dev/null
@@ -0,0 +1,37 @@
+From b36c5d49968611865638e1f12cf39db7e9d4704e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 3 Jan 2021 22:36:21 +0100
+Subject: cfg80211: select CONFIG_CRC32
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+[ Upstream commit 152a8a6c017bfdeda7f6d052fbc6e151891bd9b6 ]
+
+Without crc32 support, this fails to link:
+
+arm-linux-gnueabi-ld: net/wireless/scan.o: in function `cfg80211_scan_6ghz':
+scan.c:(.text+0x928): undefined reference to `crc32_le'
+
+Fixes: c8cb5b854b40 ("nl80211/cfg80211: support 6 GHz scanning")
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/wireless/Kconfig | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
+index 27026f587fa61..f620acd2a0f5e 100644
+--- a/net/wireless/Kconfig
++++ b/net/wireless/Kconfig
+@@ -21,6 +21,7 @@ config CFG80211
+       tristate "cfg80211 - wireless configuration API"
+       depends on RFKILL || !RFKILL
+       select FW_LOADER
++      select CRC32
+       # may need to update this when certificates are changed and are
+       # using a different algorithm, though right now they shouldn't
+       # (this is here rather than below to allow it to be a module)
+-- 
+2.27.0
+
diff --git a/queue-5.10/iommu-vt-d-update-domain-geometry-in-iommu_ops.at-de.patch b/queue-5.10/iommu-vt-d-update-domain-geometry-in-iommu_ops.at-de.patch
new file mode 100644 (file)
index 0000000..47834ae
--- /dev/null
@@ -0,0 +1,59 @@
+From d5a01fb8d21f07661bce406b9a8a88e31530bc44 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 24 Nov 2020 16:20:55 +0800
+Subject: iommu/vt-d: Update domain geometry in iommu_ops.at(de)tach_dev
+
+From: Lu Baolu <baolu.lu@linux.intel.com>
+
+[ Upstream commit c062db039f40e868c371c36afe8d0fac64305b5d ]
+
+The iommu-dma constrains IOVA allocation based on the domain geometry
+that the driver reports. Update domain geometry everytime a domain is
+attached to or detached from a device.
+
+Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
+Tested-by: Logan Gunthorpe <logang@deltatee.com>
+Link: https://lore.kernel.org/r/20201124082057.2614359-6-baolu.lu@linux.intel.com
+Signed-off-by: Will Deacon <will@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/iommu/intel/iommu.c | 16 ++++++++++++++--
+ 1 file changed, 14 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
+index c9da9e93f545c..151243fa01ba5 100644
+--- a/drivers/iommu/intel/iommu.c
++++ b/drivers/iommu/intel/iommu.c
+@@ -67,8 +67,8 @@
+ #define MAX_AGAW_WIDTH 64
+ #define MAX_AGAW_PFN_WIDTH    (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
+-#define __DOMAIN_MAX_PFN(gaw)  ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
+-#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
++#define __DOMAIN_MAX_PFN(gaw)  ((((uint64_t)1) << ((gaw) - VTD_PAGE_SHIFT)) - 1)
++#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << (gaw)) - 1)
+ /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
+    to match. That way, we can use 'unsigned long' for PFNs with impunity. */
+@@ -739,6 +739,18 @@ static void domain_update_iommu_cap(struct dmar_domain *domain)
+        */
+       if (domain->nid == NUMA_NO_NODE)
+               domain->nid = domain_update_device_node(domain);
++
++      /*
++       * First-level translation restricts the input-address to a
++       * canonical address (i.e., address bits 63:N have the same
++       * value as address bit [N-1], where N is 48-bits with 4-level
++       * paging and 57-bits with 5-level paging). Hence, skip bit
++       * [N-1].
++       */
++      if (domain_use_first_level(domain))
++              domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw - 1);
++      else
++              domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw);
+ }
+ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
+-- 
+2.27.0
+
diff --git a/queue-5.10/mm-don-t-play-games-with-pinned-pages-in-clear_page_.patch b/queue-5.10/mm-don-t-play-games-with-pinned-pages-in-clear_page_.patch
new file mode 100644 (file)
index 0000000..4b514ef
--- /dev/null
@@ -0,0 +1,63 @@
+From d0ee377ade13732e8ec098939c15d82a002a8b62 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 9 Jan 2021 17:09:10 -0800
+Subject: mm: don't play games with pinned pages in clear_page_refs
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+[ Upstream commit 9348b73c2e1bfea74ccd4a44fb4ccc7276ab9623 ]
+
+Turning a pinned page read-only breaks the pinning after COW.  Don't do it.
+
+The whole "track page soft dirty" state doesn't work with pinned pages
+anyway, since the page might be dirtied by the pinning entity without
+ever being noticed in the page tables.
+
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/proc/task_mmu.c | 21 +++++++++++++++++++++
+ 1 file changed, 21 insertions(+)
+
+diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
+index ab7d700b2caa4..602e3a52884d8 100644
+--- a/fs/proc/task_mmu.c
++++ b/fs/proc/task_mmu.c
+@@ -1035,6 +1035,25 @@ struct clear_refs_private {
+ };
+ #ifdef CONFIG_MEM_SOFT_DIRTY
++
++#define is_cow_mapping(flags) (((flags) & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE)
++
++static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr, pte_t pte)
++{
++      struct page *page;
++
++      if (!pte_write(pte))
++              return false;
++      if (!is_cow_mapping(vma->vm_flags))
++              return false;
++      if (likely(!atomic_read(&vma->vm_mm->has_pinned)))
++              return false;
++      page = vm_normal_page(vma, addr, pte);
++      if (!page)
++              return false;
++      return page_maybe_dma_pinned(page);
++}
++
+ static inline void clear_soft_dirty(struct vm_area_struct *vma,
+               unsigned long addr, pte_t *pte)
+ {
+@@ -1049,6 +1068,8 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
+       if (pte_present(ptent)) {
+               pte_t old_pte;
++              if (pte_is_pinned(vma, addr, ptent))
++                      return;
+               old_pte = ptep_modify_prot_start(vma, addr, pte);
+               ptent = pte_wrprotect(old_pte);
+               ptent = pte_clear_soft_dirty(ptent);
+-- 
+2.27.0
+
diff --git a/queue-5.10/mm-don-t-put-pinned-pages-into-the-swap-cache.patch b/queue-5.10/mm-don-t-put-pinned-pages-into-the-swap-cache.patch
new file mode 100644 (file)
index 0000000..c370431
--- /dev/null
@@ -0,0 +1,78 @@
+From a6608f8acf909e793c929a20084ffadc5d8b559b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 16 Jan 2021 15:34:57 -0800
+Subject: mm: don't put pinned pages into the swap cache
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+[ Upstream commit feb889fb40fafc6933339cf1cca8f770126819fb ]
+
+So technically there is nothing wrong with adding a pinned page to the
+swap cache, but the pinning obviously means that the page can't actually
+be free'd right now anyway, so it's a bit pointless.
+
+However, the real problem is not with it being a bit pointless: the real
+issue is that after we've added it to the swap cache, we'll try to unmap
+the page.  That will succeed, because the code in mm/rmap.c doesn't know
+or care about pinned pages.
+
+Even the unmapping isn't fatal per se, since the page will stay around
+in memory due to the pinning, and we do hold the connection to it using
+the swap cache.  But when we then touch it next and take a page fault,
+the logic in do_swap_page() will map it back into the process as a
+possibly read-only page, and we'll then break the page association on
+the next COW fault.
+
+Honestly, this issue could have been fixed in any of those other places:
+(a) we could refuse to unmap a pinned page (which makes conceptual
+sense), or (b) we could make sure to re-map a pinned page writably in
+do_swap_page(), or (c) we could just make do_wp_page() not COW the
+pinned page (which was what we historically did before that "mm:
+do_wp_page() simplification" commit).
+
+But while all of them are equally valid models for breaking this chain,
+not putting pinned pages into the swap cache in the first place is the
+simplest one by far.
+
+It's also the safest one: the reason why do_wp_page() was changed in the
+first place was that getting the "can I re-use this page" wrong is so
+fraught with errors.  If you do it wrong, you end up with an incorrectly
+shared page.
+
+As a result, using "page_maybe_dma_pinned()" in either do_wp_page() or
+do_swap_page() would be a serious bug since it is only a (very good)
+heuristic.  Re-using the page requires a hard black-and-white rule with
+no room for ambiguity.
+
+In contrast, saying "this page is very likely dma pinned, so let's not
+add it to the swap cache and try to unmap it" is an obviously safe thing
+to do, and if the heuristic might very rarely be a false positive, no
+harm is done.
+
+Fixes: 09854ba94c6a ("mm: do_wp_page() simplification")
+Reported-and-tested-by: Martin Raiber <martin@urbackup.org>
+Cc: Pavel Begunkov <asml.silence@gmail.com>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: Peter Xu <peterx@redhat.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/vmscan.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/mm/vmscan.c b/mm/vmscan.c
+index 0ec6321e98878..4c5a9b2286bf5 100644
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -1240,6 +1240,8 @@ static unsigned int shrink_page_list(struct list_head *page_list,
+                       if (!PageSwapCache(page)) {
+                               if (!(sc->gfp_mask & __GFP_IO))
+                                       goto keep_locked;
++                              if (page_maybe_dma_pinned(page))
++                                      goto keep_locked;
+                               if (PageTransHuge(page)) {
+                                       /* cannot split THP, skip it */
+                                       if (!can_split_huge_page(page, NULL))
+-- 
+2.27.0
+
diff --git a/queue-5.10/mm-fix-clear_refs_write-locking.patch b/queue-5.10/mm-fix-clear_refs_write-locking.patch
new file mode 100644 (file)
index 0000000..27f6773
--- /dev/null
@@ -0,0 +1,87 @@
+From 989c05cdbc9010dbb0ee82bd8b6bf76265c0a4e9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Jan 2021 13:13:41 -0800
+Subject: mm: fix clear_refs_write locking
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+[ Upstream commit 29a951dfb3c3263c3a0f3bd9f7f2c2cfde4baedb ]
+
+Turning page table entries read-only requires the mmap_sem held for
+writing.
+
+So stop doing the odd games with turning things from read locks to write
+locks and back.  Just get the write lock.
+
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/proc/task_mmu.c | 32 +++++++++-----------------------
+ 1 file changed, 9 insertions(+), 23 deletions(-)
+
+diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
+index ee5a235b30562..ab7d700b2caa4 100644
+--- a/fs/proc/task_mmu.c
++++ b/fs/proc/task_mmu.c
+@@ -1215,41 +1215,26 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
+                       .type = type,
+               };
++              if (mmap_write_lock_killable(mm)) {
++                      count = -EINTR;
++                      goto out_mm;
++              }
+               if (type == CLEAR_REFS_MM_HIWATER_RSS) {
+-                      if (mmap_write_lock_killable(mm)) {
+-                              count = -EINTR;
+-                              goto out_mm;
+-                      }
+-
+                       /*
+                        * Writing 5 to /proc/pid/clear_refs resets the peak
+                        * resident set size to this mm's current rss value.
+                        */
+                       reset_mm_hiwater_rss(mm);
+-                      mmap_write_unlock(mm);
+-                      goto out_mm;
++                      goto out_unlock;
+               }
+-              if (mmap_read_lock_killable(mm)) {
+-                      count = -EINTR;
+-                      goto out_mm;
+-              }
+               tlb_gather_mmu(&tlb, mm, 0, -1);
+               if (type == CLEAR_REFS_SOFT_DIRTY) {
+                       for (vma = mm->mmap; vma; vma = vma->vm_next) {
+                               if (!(vma->vm_flags & VM_SOFTDIRTY))
+                                       continue;
+-                              mmap_read_unlock(mm);
+-                              if (mmap_write_lock_killable(mm)) {
+-                                      count = -EINTR;
+-                                      goto out_mm;
+-                              }
+-                              for (vma = mm->mmap; vma; vma = vma->vm_next) {
+-                                      vma->vm_flags &= ~VM_SOFTDIRTY;
+-                                      vma_set_page_prot(vma);
+-                              }
+-                              mmap_write_downgrade(mm);
+-                              break;
++                              vma->vm_flags &= ~VM_SOFTDIRTY;
++                              vma_set_page_prot(vma);
+                       }
+                       mmu_notifier_range_init(&range, MMU_NOTIFY_SOFT_DIRTY,
+@@ -1261,7 +1246,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
+               if (type == CLEAR_REFS_SOFT_DIRTY)
+                       mmu_notifier_invalidate_range_end(&range);
+               tlb_finish_mmu(&tlb, 0, -1);
+-              mmap_read_unlock(mm);
++out_unlock:
++              mmap_write_unlock(mm);
+ out_mm:
+               mmput(mm);
+       }
+-- 
+2.27.0
+
diff --git a/queue-5.10/net-dcb-validate-netlink-message-in-dcb-handler.patch b/queue-5.10/net-dcb-validate-netlink-message-in-dcb-handler.patch
new file mode 100644 (file)
index 0000000..6be6ed2
--- /dev/null
@@ -0,0 +1,52 @@
+From 00f3a482d485f1b701edbe927a102147b5a7492a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 22 Dec 2020 22:49:44 +0100
+Subject: net: dcb: Validate netlink message in DCB handler
+
+From: Petr Machata <me@pmachata.org>
+
+[ Upstream commit 826f328e2b7e8854dd42ea44e6519cd75018e7b1 ]
+
+DCB uses the same handler function for both RTM_GETDCB and RTM_SETDCB
+messages. dcb_doit() bounces RTM_SETDCB mesasges if the user does not have
+the CAP_NET_ADMIN capability.
+
+However, the operation to be performed is not decided from the DCB message
+type, but from the DCB command. Thus DCB_CMD_*_GET commands are used for
+reading DCB objects, the corresponding SET and DEL commands are used for
+manipulation.
+
+The assumption is that set-like commands will be sent via an RTM_SETDCB
+message, and get-like ones via RTM_GETDCB. However, this assumption is not
+enforced.
+
+It is therefore possible to manipulate DCB objects without CAP_NET_ADMIN
+capability by sending the corresponding command in an RTM_GETDCB message.
+That is a bug. Fix it by validating the type of the request message against
+the type used for the response.
+
+Fixes: 2f90b8657ec9 ("ixgbe: this patch adds support for DCB to the kernel and ixgbe driver")
+Signed-off-by: Petr Machata <me@pmachata.org>
+Link: https://lore.kernel.org/r/a2a9b88418f3a58ef211b718f2970128ef9e3793.1608673640.git.me@pmachata.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/dcb/dcbnl.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
+index 16014ad194066..0983564064940 100644
+--- a/net/dcb/dcbnl.c
++++ b/net/dcb/dcbnl.c
+@@ -1765,6 +1765,8 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
+       fn = &reply_funcs[dcb->cmd];
+       if (!fn->cb)
+               return -EOPNOTSUPP;
++      if (fn->type != nlh->nlmsg_type)
++              return -EPERM;
+       if (!tb[DCB_ATTR_IFNAME])
+               return -EINVAL;
+-- 
+2.27.0
+
diff --git a/queue-5.10/net-mlx5-e-switch-fix-changing-vf-vlanid.patch b/queue-5.10/net-mlx5-e-switch-fix-changing-vf-vlanid.patch
new file mode 100644 (file)
index 0000000..d1e9327
--- /dev/null
@@ -0,0 +1,68 @@
+From 67d44bd59f6bfc9dfe2cbb74fc2724e19424c6c0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 4 Jan 2021 12:54:40 +0200
+Subject: net/mlx5: E-Switch, fix changing vf VLANID
+
+From: Alaa Hleihel <alaa@nvidia.com>
+
+[ Upstream commit 25c904b59aaf4816337acd415514b0c47715f604 ]
+
+Adding vf VLANID for the first time, or after having cleared previously
+defined VLANID works fine, however, attempting to change an existing vf
+VLANID clears the rules on the firmware, but does not add new rules for
+the new vf VLANID.
+
+Fix this by changing the logic in function esw_acl_egress_lgcy_setup()
+so that it will always configure egress rules.
+
+Fixes: ea651a86d468 ("net/mlx5: E-Switch, Refactor eswitch egress acl codes")
+Signed-off-by: Alaa Hleihel <alaa@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../mellanox/mlx5/core/esw/acl/egress_lgcy.c  | 27 +++++++++----------
+ 1 file changed, 13 insertions(+), 14 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c
+index 2b85d4777303a..3e19b1721303f 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c
+@@ -95,22 +95,21 @@ int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw,
+               return 0;
+       }
+-      if (!IS_ERR_OR_NULL(vport->egress.acl))
+-              return 0;
+-
+-      vport->egress.acl = esw_acl_table_create(esw, vport->vport,
+-                                               MLX5_FLOW_NAMESPACE_ESW_EGRESS,
+-                                               table_size);
+-      if (IS_ERR(vport->egress.acl)) {
+-              err = PTR_ERR(vport->egress.acl);
+-              vport->egress.acl = NULL;
+-              goto out;
++      if (!vport->egress.acl) {
++              vport->egress.acl = esw_acl_table_create(esw, vport->vport,
++                                                       MLX5_FLOW_NAMESPACE_ESW_EGRESS,
++                                                       table_size);
++              if (IS_ERR(vport->egress.acl)) {
++                      err = PTR_ERR(vport->egress.acl);
++                      vport->egress.acl = NULL;
++                      goto out;
++              }
++
++              err = esw_acl_egress_lgcy_groups_create(esw, vport);
++              if (err)
++                      goto out;
+       }
+-      err = esw_acl_egress_lgcy_groups_create(esw, vport);
+-      if (err)
+-              goto out;
+-
+       esw_debug(esw->dev,
+                 "vport[%d] configure egress rules, vlan(%d) qos(%d)\n",
+                 vport->vport, vport->info.vlan, vport->info.qos);
+-- 
+2.27.0
+
diff --git a/queue-5.10/net-mlx5-fix-passing-zero-to-ptr_err.patch b/queue-5.10/net-mlx5-fix-passing-zero-to-ptr_err.patch
new file mode 100644 (file)
index 0000000..a708b5f
--- /dev/null
@@ -0,0 +1,84 @@
+From 3ed3079dc81658dc1624bc39df6c5bd92694382b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 1 Dec 2020 14:42:06 -0800
+Subject: net/mlx5: Fix passing zero to 'PTR_ERR'
+
+From: YueHaibing <yuehaibing@huawei.com>
+
+[ Upstream commit 0c4accc41cb56e527c8c049f5495af9f3d6bef7e ]
+
+Fix smatch warnings:
+
+drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c:105 esw_acl_egress_lgcy_setup() warn: passing zero to 'PTR_ERR'
+drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c:177 esw_acl_egress_ofld_setup() warn: passing zero to 'PTR_ERR'
+drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c:184 esw_acl_ingress_lgcy_setup() warn: passing zero to 'PTR_ERR'
+drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c:262 esw_acl_ingress_ofld_setup() warn: passing zero to 'PTR_ERR'
+
+esw_acl_table_create() never returns NULL, so
+NULL test should be removed.
+
+Signed-off-by: YueHaibing <yuehaibing@huawei.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c  | 2 +-
+ drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c  | 2 +-
+ drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c | 2 +-
+ drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c | 2 +-
+ 4 files changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c
+index d46f8b225ebe3..2b85d4777303a 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c
+@@ -101,7 +101,7 @@ int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw,
+       vport->egress.acl = esw_acl_table_create(esw, vport->vport,
+                                                MLX5_FLOW_NAMESPACE_ESW_EGRESS,
+                                                table_size);
+-      if (IS_ERR_OR_NULL(vport->egress.acl)) {
++      if (IS_ERR(vport->egress.acl)) {
+               err = PTR_ERR(vport->egress.acl);
+               vport->egress.acl = NULL;
+               goto out;
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c
+index c3faae67e4d6e..4c74e2690d57b 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c
+@@ -173,7 +173,7 @@ int esw_acl_egress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport
+               table_size++;
+       vport->egress.acl = esw_acl_table_create(esw, vport->vport,
+                                                MLX5_FLOW_NAMESPACE_ESW_EGRESS, table_size);
+-      if (IS_ERR_OR_NULL(vport->egress.acl)) {
++      if (IS_ERR(vport->egress.acl)) {
+               err = PTR_ERR(vport->egress.acl);
+               vport->egress.acl = NULL;
+               return err;
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c
+index b68976b378b81..d64fad2823e73 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c
+@@ -180,7 +180,7 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw,
+               vport->ingress.acl = esw_acl_table_create(esw, vport->vport,
+                                                         MLX5_FLOW_NAMESPACE_ESW_INGRESS,
+                                                         table_size);
+-              if (IS_ERR_OR_NULL(vport->ingress.acl)) {
++              if (IS_ERR(vport->ingress.acl)) {
+                       err = PTR_ERR(vport->ingress.acl);
+                       vport->ingress.acl = NULL;
+                       return err;
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c
+index 4e55d7225a265..548c005ea6335 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c
+@@ -258,7 +258,7 @@ int esw_acl_ingress_ofld_setup(struct mlx5_eswitch *esw,
+       vport->ingress.acl = esw_acl_table_create(esw, vport->vport,
+                                                 MLX5_FLOW_NAMESPACE_ESW_INGRESS,
+                                                 num_ftes);
+-      if (IS_ERR_OR_NULL(vport->ingress.acl)) {
++      if (IS_ERR(vport->ingress.acl)) {
+               err = PTR_ERR(vport->ingress.acl);
+               vport->ingress.acl = NULL;
+               return err;
+-- 
+2.27.0
+
diff --git a/queue-5.10/net-mlx5e-ct-use-per-flow-counter-when-ct-flow-accou.patch b/queue-5.10/net-mlx5e-ct-use-per-flow-counter-when-ct-flow-accou.patch
new file mode 100644 (file)
index 0000000..c1fadd7
--- /dev/null
@@ -0,0 +1,199 @@
+From 0651aebf4c549580dcf9bed3fe4efd51e5e7f09e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 7 Dec 2020 08:15:18 +0000
+Subject: net/mlx5e: CT: Use per flow counter when CT flow accounting is
+ enabled
+
+From: Oz Shlomo <ozsh@nvidia.com>
+
+[ Upstream commit eed38eeee734756596e2cc163bdc7dac3be501b1 ]
+
+Connection counters may be shared for both directions when the counter
+is used for connection aging purposes. However, if TC flow
+accounting is enabled then a unique counter is required per direction.
+
+Instantiate a unique counter per direction if the conntrack accounting
+extension is enabled. Use a shared counter when the connection accounting
+extension is disabled.
+
+Fixes: 1edae2335adf ("net/mlx5e: CT: Use the same counter for both directions")
+Signed-off-by: Oz Shlomo <ozsh@nvidia.com>
+Reported-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Reviewed-by: Roi Dayan <roid@nvidia.com>
+Reviewed-by: Paul Blakey <paulb@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../ethernet/mellanox/mlx5/core/en/tc_ct.c    | 77 ++++++++++++-------
+ 1 file changed, 49 insertions(+), 28 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+index e521254d886ef..072363e73f1ce 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+@@ -118,16 +118,17 @@ struct mlx5_ct_tuple {
+       u16 zone;
+ };
+-struct mlx5_ct_shared_counter {
++struct mlx5_ct_counter {
+       struct mlx5_fc *counter;
+       refcount_t refcount;
++      bool is_shared;
+ };
+ struct mlx5_ct_entry {
+       struct rhash_head node;
+       struct rhash_head tuple_node;
+       struct rhash_head tuple_nat_node;
+-      struct mlx5_ct_shared_counter *shared_counter;
++      struct mlx5_ct_counter *counter;
+       unsigned long cookie;
+       unsigned long restore_cookie;
+       struct mlx5_ct_tuple tuple;
+@@ -394,13 +395,14 @@ mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec,
+ }
+ static void
+-mlx5_tc_ct_shared_counter_put(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry)
++mlx5_tc_ct_counter_put(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry)
+ {
+-      if (!refcount_dec_and_test(&entry->shared_counter->refcount))
++      if (entry->counter->is_shared &&
++          !refcount_dec_and_test(&entry->counter->refcount))
+               return;
+-      mlx5_fc_destroy(ct_priv->dev, entry->shared_counter->counter);
+-      kfree(entry->shared_counter);
++      mlx5_fc_destroy(ct_priv->dev, entry->counter->counter);
++      kfree(entry->counter);
+ }
+ static void
+@@ -699,7 +701,7 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
+       attr->dest_ft = ct_priv->post_ct;
+       attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct;
+       attr->outer_match_level = MLX5_MATCH_L4;
+-      attr->counter = entry->shared_counter->counter;
++      attr->counter = entry->counter->counter;
+       attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT;
+       mlx5_tc_ct_set_tuple_match(netdev_priv(ct_priv->netdev), spec, flow_rule);
+@@ -732,13 +734,34 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
+       return err;
+ }
+-static struct mlx5_ct_shared_counter *
++static struct mlx5_ct_counter *
++mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv *ct_priv)
++{
++      struct mlx5_ct_counter *counter;
++      int ret;
++
++      counter = kzalloc(sizeof(*counter), GFP_KERNEL);
++      if (!counter)
++              return ERR_PTR(-ENOMEM);
++
++      counter->is_shared = false;
++      counter->counter = mlx5_fc_create(ct_priv->dev, true);
++      if (IS_ERR(counter->counter)) {
++              ct_dbg("Failed to create counter for ct entry");
++              ret = PTR_ERR(counter->counter);
++              kfree(counter);
++              return ERR_PTR(ret);
++      }
++
++      return counter;
++}
++
++static struct mlx5_ct_counter *
+ mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv,
+                             struct mlx5_ct_entry *entry)
+ {
+       struct mlx5_ct_tuple rev_tuple = entry->tuple;
+-      struct mlx5_ct_shared_counter *shared_counter;
+-      struct mlx5_core_dev *dev = ct_priv->dev;
++      struct mlx5_ct_counter *shared_counter;
+       struct mlx5_ct_entry *rev_entry;
+       __be16 tmp_port;
+       int ret;
+@@ -767,25 +790,20 @@ mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv,
+       rev_entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, &rev_tuple,
+                                          tuples_ht_params);
+       if (rev_entry) {
+-              if (refcount_inc_not_zero(&rev_entry->shared_counter->refcount)) {
++              if (refcount_inc_not_zero(&rev_entry->counter->refcount)) {
+                       mutex_unlock(&ct_priv->shared_counter_lock);
+-                      return rev_entry->shared_counter;
++                      return rev_entry->counter;
+               }
+       }
+       mutex_unlock(&ct_priv->shared_counter_lock);
+-      shared_counter = kzalloc(sizeof(*shared_counter), GFP_KERNEL);
+-      if (!shared_counter)
+-              return ERR_PTR(-ENOMEM);
+-
+-      shared_counter->counter = mlx5_fc_create(dev, true);
+-      if (IS_ERR(shared_counter->counter)) {
+-              ct_dbg("Failed to create counter for ct entry");
+-              ret = PTR_ERR(shared_counter->counter);
+-              kfree(shared_counter);
++      shared_counter = mlx5_tc_ct_counter_create(ct_priv);
++      if (IS_ERR(shared_counter)) {
++              ret = PTR_ERR(shared_counter);
+               return ERR_PTR(ret);
+       }
++      shared_counter->is_shared = true;
+       refcount_set(&shared_counter->refcount, 1);
+       return shared_counter;
+ }
+@@ -798,10 +816,13 @@ mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv,
+ {
+       int err;
+-      entry->shared_counter = mlx5_tc_ct_shared_counter_get(ct_priv, entry);
+-      if (IS_ERR(entry->shared_counter)) {
+-              err = PTR_ERR(entry->shared_counter);
+-              ct_dbg("Failed to create counter for ct entry");
++      if (nf_ct_acct_enabled(dev_net(ct_priv->netdev)))
++              entry->counter = mlx5_tc_ct_counter_create(ct_priv);
++      else
++              entry->counter = mlx5_tc_ct_shared_counter_get(ct_priv, entry);
++
++      if (IS_ERR(entry->counter)) {
++              err = PTR_ERR(entry->counter);
+               return err;
+       }
+@@ -820,7 +841,7 @@ mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv,
+ err_nat:
+       mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
+ err_orig:
+-      mlx5_tc_ct_shared_counter_put(ct_priv, entry);
++      mlx5_tc_ct_counter_put(ct_priv, entry);
+       return err;
+ }
+@@ -918,7 +939,7 @@ mlx5_tc_ct_del_ft_entry(struct mlx5_tc_ct_priv *ct_priv,
+       rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node,
+                              tuples_ht_params);
+       mutex_unlock(&ct_priv->shared_counter_lock);
+-      mlx5_tc_ct_shared_counter_put(ct_priv, entry);
++      mlx5_tc_ct_counter_put(ct_priv, entry);
+ }
+@@ -956,7 +977,7 @@ mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft,
+       if (!entry)
+               return -ENOENT;
+-      mlx5_fc_query_cached(entry->shared_counter->counter, &bytes, &packets, &lastuse);
++      mlx5_fc_query_cached(entry->counter->counter, &bytes, &packets, &lastuse);
+       flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
+                         FLOW_ACTION_HW_STATS_DELAYED);
+-- 
+2.27.0
+
diff --git a/queue-5.10/nvme-fc-avoid-calling-_nvme_fc_abort_outstanding_ios.patch b/queue-5.10/nvme-fc-avoid-calling-_nvme_fc_abort_outstanding_ios.patch
new file mode 100644 (file)
index 0000000..4ce6ed1
--- /dev/null
@@ -0,0 +1,94 @@
+From 162b78cd04f991e8c434f0ef59c35d8a0ae70eb0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 1 Dec 2020 17:52:43 -0800
+Subject: nvme-fc: avoid calling _nvme_fc_abort_outstanding_ios from interrupt
+ context
+
+From: James Smart <james.smart@broadcom.com>
+
+[ Upstream commit 19fce0470f05031e6af36e49ce222d0f0050d432 ]
+
+Recent patches changed calling sequences. nvme_fc_abort_outstanding_ios
+used to be called from a timeout or work context. Now it is being called
+in an io completion context, which can be an interrupt handler.
+Unfortunately, the abort outstanding ios routine attempts to stop nvme
+queues and nested routines that may try to sleep, which is in conflict
+with the interrupt handler.
+
+Correct replacing the direct call with a work element scheduling, and the
+abort outstanding ios routine will be called in the work element.
+
+Fixes: 95ced8a2c72d ("nvme-fc: eliminate terminate_io use by nvme_fc_error_recovery")
+Signed-off-by: James Smart <james.smart@broadcom.com>
+Reported-by: Daniel Wagner <dwagner@suse.de>
+Tested-by: Daniel Wagner <dwagner@suse.de>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nvme/host/fc.c | 15 ++++++++++++++-
+ 1 file changed, 14 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
+index f4c246462658f..5ead217ac2bc8 100644
+--- a/drivers/nvme/host/fc.c
++++ b/drivers/nvme/host/fc.c
+@@ -166,6 +166,7 @@ struct nvme_fc_ctrl {
+       struct blk_mq_tag_set   admin_tag_set;
+       struct blk_mq_tag_set   tag_set;
++      struct work_struct      ioerr_work;
+       struct delayed_work     connect_work;
+       struct kref             ref;
+@@ -1888,6 +1889,15 @@ __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl,
+       }
+ }
++static void
++nvme_fc_ctrl_ioerr_work(struct work_struct *work)
++{
++      struct nvme_fc_ctrl *ctrl =
++                      container_of(work, struct nvme_fc_ctrl, ioerr_work);
++
++      nvme_fc_error_recovery(ctrl, "transport detected io error");
++}
++
+ static void
+ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
+ {
+@@ -2046,7 +2056,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
+ check_error:
+       if (terminate_assoc)
+-              nvme_fc_error_recovery(ctrl, "transport detected io error");
++              queue_work(nvme_reset_wq, &ctrl->ioerr_work);
+ }
+ static int
+@@ -3233,6 +3243,7 @@ nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl)
+ {
+       struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
++      cancel_work_sync(&ctrl->ioerr_work);
+       cancel_delayed_work_sync(&ctrl->connect_work);
+       /*
+        * kill the association on the link side.  this will block
+@@ -3449,6 +3460,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
+       INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work);
+       INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work);
++      INIT_WORK(&ctrl->ioerr_work, nvme_fc_ctrl_ioerr_work);
+       spin_lock_init(&ctrl->lock);
+       /* io queue count */
+@@ -3540,6 +3552,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
+ fail_ctrl:
+       nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING);
++      cancel_work_sync(&ctrl->ioerr_work);
+       cancel_work_sync(&ctrl->ctrl.reset_work);
+       cancel_delayed_work_sync(&ctrl->connect_work);
+-- 
+2.27.0
+
diff --git a/queue-5.10/rcu-tasks-move-rcu-tasks-initialization-to-before-ea.patch b/queue-5.10/rcu-tasks-move-rcu-tasks-initialization-to-before-ea.patch
new file mode 100644 (file)
index 0000000..2a1b43e
--- /dev/null
@@ -0,0 +1,130 @@
+From fe81fd65a52f8becdddde2348d543492e340f14f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 9 Dec 2020 21:27:31 +0100
+Subject: rcu-tasks: Move RCU-tasks initialization to before early_initcall()
+
+From: Uladzislau Rezki (Sony) <urezki@gmail.com>
+
+[ Upstream commit 1b04fa9900263b4e217ca2509fd778b32c2b4eb2 ]
+
+PowerPC testing encountered boot failures due to RCU Tasks not being
+fully initialized until core_initcall() time.  This commit therefore
+initializes RCU Tasks (along with Rude RCU and RCU Tasks Trace) just
+before early_initcall() time, thus allowing waiting on RCU Tasks grace
+periods from early_initcall() handlers.
+
+Link: https://lore.kernel.org/rcu/87eekfh80a.fsf@dja-thinkpad.axtens.net/
+Fixes: 36dadef23fcc ("kprobes: Init kprobes in early_initcall")
+Tested-by: Daniel Axtens <dja@axtens.net>
+Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
+Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/rcupdate.h |  6 ++++++
+ init/main.c              |  1 +
+ kernel/rcu/tasks.h       | 25 +++++++++++++++++++++----
+ 3 files changed, 28 insertions(+), 4 deletions(-)
+
+diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
+index 6cdd0152c253a..5c119d6cecf14 100644
+--- a/include/linux/rcupdate.h
++++ b/include/linux/rcupdate.h
+@@ -86,6 +86,12 @@ void rcu_sched_clock_irq(int user);
+ void rcu_report_dead(unsigned int cpu);
+ void rcutree_migrate_callbacks(int cpu);
++#ifdef CONFIG_TASKS_RCU_GENERIC
++void rcu_init_tasks_generic(void);
++#else
++static inline void rcu_init_tasks_generic(void) { }
++#endif
++
+ #ifdef CONFIG_RCU_STALL_COMMON
+ void rcu_sysrq_start(void);
+ void rcu_sysrq_end(void);
+diff --git a/init/main.c b/init/main.c
+index 32b2a8affafd1..9d964511fe0c2 100644
+--- a/init/main.c
++++ b/init/main.c
+@@ -1512,6 +1512,7 @@ static noinline void __init kernel_init_freeable(void)
+       init_mm_internals();
++      rcu_init_tasks_generic();
+       do_pre_smp_initcalls();
+       lockup_detector_init();
+diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
+index d5d9f2d03e8a0..73bbe792fe1e8 100644
+--- a/kernel/rcu/tasks.h
++++ b/kernel/rcu/tasks.h
+@@ -241,7 +241,7 @@ static int __noreturn rcu_tasks_kthread(void *arg)
+       }
+ }
+-/* Spawn RCU-tasks grace-period kthread, e.g., at core_initcall() time. */
++/* Spawn RCU-tasks grace-period kthread. */
+ static void __init rcu_spawn_tasks_kthread_generic(struct rcu_tasks *rtp)
+ {
+       struct task_struct *t;
+@@ -569,7 +569,6 @@ static int __init rcu_spawn_tasks_kthread(void)
+       rcu_spawn_tasks_kthread_generic(&rcu_tasks);
+       return 0;
+ }
+-core_initcall(rcu_spawn_tasks_kthread);
+ #ifndef CONFIG_TINY_RCU
+ static void show_rcu_tasks_classic_gp_kthread(void)
+@@ -697,7 +696,6 @@ static int __init rcu_spawn_tasks_rude_kthread(void)
+       rcu_spawn_tasks_kthread_generic(&rcu_tasks_rude);
+       return 0;
+ }
+-core_initcall(rcu_spawn_tasks_rude_kthread);
+ #ifndef CONFIG_TINY_RCU
+ static void show_rcu_tasks_rude_gp_kthread(void)
+@@ -975,6 +973,11 @@ static void rcu_tasks_trace_pregp_step(void)
+ static void rcu_tasks_trace_pertask(struct task_struct *t,
+                                   struct list_head *hop)
+ {
++      // During early boot when there is only the one boot CPU, there
++      // is no idle task for the other CPUs. Just return.
++      if (unlikely(t == NULL))
++              return;
++
+       WRITE_ONCE(t->trc_reader_special.b.need_qs, false);
+       WRITE_ONCE(t->trc_reader_checked, false);
+       t->trc_ipi_to_cpu = -1;
+@@ -1200,7 +1203,6 @@ static int __init rcu_spawn_tasks_trace_kthread(void)
+       rcu_spawn_tasks_kthread_generic(&rcu_tasks_trace);
+       return 0;
+ }
+-core_initcall(rcu_spawn_tasks_trace_kthread);
+ #ifndef CONFIG_TINY_RCU
+ static void show_rcu_tasks_trace_gp_kthread(void)
+@@ -1229,6 +1231,21 @@ void show_rcu_tasks_gp_kthreads(void)
+ }
+ #endif /* #ifndef CONFIG_TINY_RCU */
++void __init rcu_init_tasks_generic(void)
++{
++#ifdef CONFIG_TASKS_RCU
++      rcu_spawn_tasks_kthread();
++#endif
++
++#ifdef CONFIG_TASKS_RUDE_RCU
++      rcu_spawn_tasks_rude_kthread();
++#endif
++
++#ifdef CONFIG_TASKS_TRACE_RCU
++      rcu_spawn_tasks_trace_kthread();
++#endif
++}
++
+ #else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
+ static inline void rcu_tasks_bootup_oddness(void) {}
+ void show_rcu_tasks_gp_kthreads(void) {}
+-- 
+2.27.0
+
index 55c431973bafad0c32820bfc4ca04287c863ec52..d0e8f4caec91e383851d4bbbd2a545740a1f6602 100644 (file)
@@ -95,3 +95,17 @@ acpi-scan-add-stub-acpi_create_platform_device-for-c.patch
 drm-msm-call-msm_init_vram-before-binding-the-gpu.patch
 arm-picoxcell-fix-missing-interrupt-parent-propertie.patch
 poll-fix-performance-regression-due-to-out-of-line-_.patch
+rcu-tasks-move-rcu-tasks-initialization-to-before-ea.patch
+bpf-simplify-task_file_seq_get_next.patch
+bpf-save-correct-stopping-point-in-file-seq-iteratio.patch
+x86-sev-es-fix-sev-es-out-in-immediate-opcode-vc-han.patch
+cfg80211-select-config_crc32.patch
+nvme-fc-avoid-calling-_nvme_fc_abort_outstanding_ios.patch
+iommu-vt-d-update-domain-geometry-in-iommu_ops.at-de.patch
+net-mlx5e-ct-use-per-flow-counter-when-ct-flow-accou.patch
+net-mlx5-fix-passing-zero-to-ptr_err.patch
+net-mlx5-e-switch-fix-changing-vf-vlanid.patch
+blk-mq-debugfs-add-decode-for-blk_mq_f_tag_hctx_shar.patch
+mm-fix-clear_refs_write-locking.patch
+mm-don-t-play-games-with-pinned-pages-in-clear_page_.patch
+mm-don-t-put-pinned-pages-into-the-swap-cache.patch
diff --git a/queue-5.10/x86-sev-es-fix-sev-es-out-in-immediate-opcode-vc-han.patch b/queue-5.10/x86-sev-es-fix-sev-es-out-in-immediate-opcode-vc-han.patch
new file mode 100644 (file)
index 0000000..d6956b7
--- /dev/null
@@ -0,0 +1,52 @@
+From a9e69327d1fb7a9077254599e4c5374769d3060f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 5 Jan 2021 08:33:11 -0800
+Subject: x86/sev-es: Fix SEV-ES OUT/IN immediate opcode vc handling
+
+From: Peter Gonda <pgonda@google.com>
+
+[ Upstream commit a8f7e08a81708920a928664a865208fdf451c49f ]
+
+The IN and OUT instructions with port address as an immediate operand
+only use an 8-bit immediate (imm8). The current VC handler uses the
+entire 32-bit immediate value but these instructions only set the first
+bytes.
+
+Cast the operand to an u8 for that.
+
+ [ bp: Massage commit message. ]
+
+Fixes: 25189d08e5168 ("x86/sev-es: Add support for handling IOIO exceptions")
+Signed-off-by: Peter Gonda <pgonda@google.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Acked-by: David Rientjes <rientjes@google.com>
+Link: https://lkml.kernel.org/r/20210105163311.221490-1-pgonda@google.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kernel/sev-es-shared.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/kernel/sev-es-shared.c b/arch/x86/kernel/sev-es-shared.c
+index 7d04b356d44d3..cdc04d0912423 100644
+--- a/arch/x86/kernel/sev-es-shared.c
++++ b/arch/x86/kernel/sev-es-shared.c
+@@ -305,14 +305,14 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo)
+       case 0xe4:
+       case 0xe5:
+               *exitinfo |= IOIO_TYPE_IN;
+-              *exitinfo |= (u64)insn->immediate.value << 16;
++              *exitinfo |= (u8)insn->immediate.value << 16;
+               break;
+       /* OUT immediate opcodes */
+       case 0xe6:
+       case 0xe7:
+               *exitinfo |= IOIO_TYPE_OUT;
+-              *exitinfo |= (u64)insn->immediate.value << 16;
++              *exitinfo |= (u8)insn->immediate.value << 16;
+               break;
+       /* IN register opcodes */
+-- 
+2.27.0
+