5.15-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 22 Nov 2021 12:42:17 +0000 (13:42 +0100)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 22 Nov 2021 12:42:17 +0000 (13:42 +0100)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 22 Nov 2021 12:42:17 +0000 (13:42 +0100)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 22 Nov 2021 12:42:17 +0000 (13:42 +0100)
diff --git a/queue-5.15/ata-libata-add-missing-ata_identify_page_supported-calls.patch b/queue-5.15/ata-libata-add-missing-ata_identify_page_supported-calls.patch

new file mode 100644 (file)

index 0000000..56b4dfe
--- /dev/null
+++ b/queue-5.15/ata-libata-add-missing-ata_identify_page_supported-calls.patch
@@ -0,0 +1,47 @@
+From 06f6c4c6c3e8354dceddd77bd58f9a7a84c67246 Mon Sep 17 00:00:00 2001
+From: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+Date: Mon, 15 Nov 2021 12:47:26 +0900
+Subject: ata: libata: add missing ata_identify_page_supported() calls
+
+From: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+
+commit 06f6c4c6c3e8354dceddd77bd58f9a7a84c67246 upstream.
+
+ata_dev_config_ncq_prio() and ata_dev_config_devslp() both access pages
+of the IDENTIFY DEVICE data log. Before calling ata_read_log_page(),
+make sure to check for the existence of the IDENTIFY DEVICE data log and
+of the log page accessed using ata_identify_page_supported(). This
+avoids useless error messages from ata_read_log_page() and failures with
+some LLDD scsi drivers using libsas.
+
+Reported-by: Nikolay <knv418@gmail.com>
+Cc: stable@kernel.org # 5.15
+Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+Tested-by: Matthew Perkowski <mgperkow@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/ata/libata-core.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/drivers/ata/libata-core.c
++++ b/drivers/ata/libata-core.c
+@@ -2167,6 +2167,9 @@ static void ata_dev_config_ncq_prio(stru
+       struct ata_port *ap = dev->link->ap;
+       unsigned int err_mask;
+ 
++      if (!ata_identify_page_supported(dev, ATA_LOG_SATA_SETTINGS))
++              return;
++
+       err_mask = ata_read_log_page(dev,
+                                    ATA_LOG_IDENTIFY_DEVICE,
+                                    ATA_LOG_SATA_SETTINGS,
+@@ -2443,7 +2446,8 @@ static void ata_dev_config_devslp(struct
+        * Check device sleep capability. Get DevSlp timing variables
+        * from SATA Settings page of Identify Device Data Log.
+        */
+-      if (!ata_id_has_devslp(dev->id))
++      if (!ata_id_has_devslp(dev->id) ||
++          !ata_identify_page_supported(dev, ATA_LOG_SATA_SETTINGS))
+               return;
+ 
+       err_mask = ata_read_log_page(dev,
diff --git a/queue-5.15/ata-libata-improve-ata_read_log_page-error-message.patch b/queue-5.15/ata-libata-improve-ata_read_log_page-error-message.patch

new file mode 100644 (file)

index 0000000..6854c0a
--- /dev/null
+++ b/queue-5.15/ata-libata-improve-ata_read_log_page-error-message.patch
@@ -0,0 +1,35 @@
+From 23ef63d5e14f916c5bba39128ebef395859d7c0f Mon Sep 17 00:00:00 2001
+From: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+Date: Mon, 15 Nov 2021 12:37:46 +0900
+Subject: ata: libata: improve ata_read_log_page() error message
+
+From: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+
+commit 23ef63d5e14f916c5bba39128ebef395859d7c0f upstream.
+
+If ata_read_log_page() fails to read a log page, the ata_dev_err() error
+message only print the page number, omitting the log number. In case of
+error, facilitate debugging by also printing the log number.
+
+Cc: stable@kernel.org # 5.15
+Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+Tested-by: Matthew Perkowski <mgperkow@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/ata/libata-core.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/drivers/ata/libata-core.c
++++ b/drivers/ata/libata-core.c
+@@ -2031,8 +2031,9 @@ retry:
+                       dev->horkage |= ATA_HORKAGE_NO_DMA_LOG;
+                       goto retry;
+               }
+-              ata_dev_err(dev, "Read log page 0x%02x failed, Emask 0x%x\n",
+-                          (unsigned int)page, err_mask);
++              ata_dev_err(dev,
++                          "Read log 0x%02x page 0x%02x failed, Emask 0x%x\n",
++                          (unsigned int)log, (unsigned int)page, err_mask);
+       }
+ 
+       return err_mask;
diff --git a/queue-5.15/block-check-admin-before-nice-for-ioprio_class_rt.patch b/queue-5.15/block-check-admin-before-nice-for-ioprio_class_rt.patch

new file mode 100644 (file)

index 0000000..646af93
--- /dev/null
+++ b/queue-5.15/block-check-admin-before-nice-for-ioprio_class_rt.patch
@@ -0,0 +1,58 @@
+From 94c4b4fd25e6c3763941bdec3ad54f2204afa992 Mon Sep 17 00:00:00 2001
+From: Alistair Delva <adelva@google.com>
+Date: Mon, 15 Nov 2021 18:16:55 +0000
+Subject: block: Check ADMIN before NICE for IOPRIO_CLASS_RT
+
+From: Alistair Delva <adelva@google.com>
+
+commit 94c4b4fd25e6c3763941bdec3ad54f2204afa992 upstream.
+
+Booting to Android userspace on 5.14 or newer triggers the following
+SELinux denial:
+
+avc: denied { sys_nice } for comm="init" capability=23
+     scontext=u:r:init:s0 tcontext=u:r:init:s0 tclass=capability
+     permissive=0
+
+Init is PID 0 running as root, so it already has CAP_SYS_ADMIN. For
+better compatibility with older SEPolicy, check ADMIN before NICE.
+
+Fixes: 9d3a39a5f1e4 ("block: grant IOPRIO_CLASS_RT to CAP_SYS_NICE")
+Signed-off-by: Alistair Delva <adelva@google.com>
+Cc: Khazhismel Kumykov <khazhy@google.com>
+Cc: Bart Van Assche <bvanassche@acm.org>
+Cc: Serge Hallyn <serge@hallyn.com>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Paul Moore <paul@paul-moore.com>
+Cc: selinux@vger.kernel.org
+Cc: linux-security-module@vger.kernel.org
+Cc: kernel-team@android.com
+Cc: stable@vger.kernel.org # v5.14+
+Reviewed-by: Bart Van Assche <bvanassche@acm.org>
+Acked-by: Serge Hallyn <serge@hallyn.com>
+Link: https://lore.kernel.org/r/20211115181655.3608659-1-adelva@google.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ block/ioprio.c |    9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/block/ioprio.c
++++ b/block/ioprio.c
+@@ -69,7 +69,14 @@ int ioprio_check_cap(int ioprio)
+ 
+       switch (class) {
+               case IOPRIO_CLASS_RT:
+-                      if (!capable(CAP_SYS_NICE) && !capable(CAP_SYS_ADMIN))
++                      /*
++                       * Originally this only checked for CAP_SYS_ADMIN,
++                       * which was implicitly allowed for pid 0 by security
++                       * modules such as SELinux. Make sure we check
++                       * CAP_SYS_ADMIN first to avoid a denial/avc for
++                       * possibly missing CAP_SYS_NICE permission.
++                       */
++                      if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_NICE))
+                               return -EPERM;
+                       fallthrough;
+                       /* rt has prio field too */
diff --git a/queue-5.15/fbdev-prevent-probing-generic-drivers-if-a-fb-is-already-registered.patch b/queue-5.15/fbdev-prevent-probing-generic-drivers-if-a-fb-is-already-registered.patch

new file mode 100644 (file)

index 0000000..806d80c
--- /dev/null
+++ b/queue-5.15/fbdev-prevent-probing-generic-drivers-if-a-fb-is-already-registered.patch
@@ -0,0 +1,74 @@
+From fb561bf9abde49f7e00fdbf9ed2ccf2d86cac8ee Mon Sep 17 00:00:00 2001
+From: Javier Martinez Canillas <javierm@redhat.com>
+Date: Thu, 11 Nov 2021 12:57:57 +0100
+Subject: fbdev: Prevent probing generic drivers if a FB is already registered
+
+From: Javier Martinez Canillas <javierm@redhat.com>
+
+commit fb561bf9abde49f7e00fdbf9ed2ccf2d86cac8ee upstream.
+
+The efifb and simplefb drivers just render to a pre-allocated frame buffer
+and rely on the display hardware being initialized before the kernel boots.
+
+But if another driver already probed correctly and registered a fbdev, the
+generic drivers shouldn't be probed since an actual driver for the display
+hardware is already present.
+
+This is more likely to occur after commit d391c5827107 ("drivers/firmware:
+move x86 Generic System Framebuffers support") since the "efi-framebuffer"
+and "simple-framebuffer" platform devices are registered at a later time.
+
+Link: https://lore.kernel.org/r/20211110200253.rfudkt3edbd3nsyj@lahvuun/
+Fixes: d391c5827107 ("drivers/firmware: move x86 Generic System Framebuffers support")
+Reported-by: Ilya Trukhanov <lahvuun@gmail.com>
+Cc: <stable@vger.kernel.org> # 5.15.x
+Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>
+Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
+Tested-by: Ilya Trukhanov <lahvuun@gmail.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20211111115757.1351045-1-javierm@redhat.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/video/fbdev/efifb.c    |   11 +++++++++++
+ drivers/video/fbdev/simplefb.c |   11 +++++++++++
+ 2 files changed, 22 insertions(+)
+
+--- a/drivers/video/fbdev/efifb.c
++++ b/drivers/video/fbdev/efifb.c
+@@ -351,6 +351,17 @@ static int efifb_probe(struct platform_d
+       char *option = NULL;
+       efi_memory_desc_t md;
+ 
++      /*
++       * Generic drivers must not be registered if a framebuffer exists.
++       * If a native driver was probed, the display hardware was already
++       * taken and attempting to use the system framebuffer is dangerous.
++       */
++      if (num_registered_fb > 0) {
++              dev_err(&dev->dev,
++                      "efifb: a framebuffer is already registered\n");
++              return -EINVAL;
++      }
++
+       if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI || pci_dev_disabled)
+               return -ENODEV;
+ 
+--- a/drivers/video/fbdev/simplefb.c
++++ b/drivers/video/fbdev/simplefb.c
+@@ -407,6 +407,17 @@ static int simplefb_probe(struct platfor
+       struct simplefb_par *par;
+       struct resource *mem;
+ 
++      /*
++       * Generic drivers must not be registered if a framebuffer exists.
++       * If a native driver was probed, the display hardware was already
++       * taken and attempting to use the system framebuffer is dangerous.
++       */
++      if (num_registered_fb > 0) {
++              dev_err(&pdev->dev,
++                      "simplefb: a framebuffer is already registered\n");
++              return -EINVAL;
++      }
++
+       if (fb_get_options("simplefb", NULL))
+               return -ENODEV;
+ 
diff --git a/queue-5.15/hexagon-clean-up-timer-regs.h.patch b/queue-5.15/hexagon-clean-up-timer-regs.h.patch

new file mode 100644 (file)

index 0000000..567bb46
--- /dev/null
+++ b/queue-5.15/hexagon-clean-up-timer-regs.h.patch
@@ -0,0 +1,132 @@
+From 51f2ec593441d3d1ebc0d478fac3ea329c7c93ac Mon Sep 17 00:00:00 2001
+From: Nathan Chancellor <nathan@kernel.org>
+Date: Fri, 19 Nov 2021 16:43:31 -0800
+Subject: hexagon: clean up timer-regs.h
+
+From: Nathan Chancellor <nathan@kernel.org>
+
+commit 51f2ec593441d3d1ebc0d478fac3ea329c7c93ac upstream.
+
+When building allmodconfig, there is a warning about TIMER_ENABLE being
+redefined:
+
+  drivers/clocksource/timer-oxnas-rps.c:39:9: error: 'TIMER_ENABLE' macro redefined [-Werror,-Wmacro-redefined]
+  #define TIMER_ENABLE            BIT(7)
+          ^
+  arch/hexagon/include/asm/timer-regs.h:13:9: note: previous definition is here
+  #define TIMER_ENABLE            0
+           ^
+  1 error generated.
+
+The values in this header are only used in one file each, if they are
+used at all.  Remove the header and sink all of the constants into their
+respective files.
+
+TCX0_CLK_RATE is only used in arch/hexagon/include/asm/timex.h
+
+TIMER_ENABLE, RTOS_TIMER_INT, RTOS_TIMER_REGS_ADDR are only used in
+arch/hexagon/kernel/time.c.
+
+SLEEP_CLK_RATE and TIMER_CLR_ON_MATCH have both been unused since the
+file's introduction in commit 71e4a47f32f4 ("Hexagon: Add time and timer
+functions").
+
+TIMER_ENABLE is redefined as BIT(0) so the shift is moved into the
+definition, rather than its use.
+
+Link: https://lkml.kernel.org/r/20211115174250.1994179-3-nathan@kernel.org
+Signed-off-by: Nathan Chancellor <nathan@kernel.org>
+Acked-by: Brian Cain <bcain@codeaurora.org>
+Cc: Nick Desaulniers <ndesaulniers@google.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/hexagon/include/asm/timer-regs.h |   26 --------------------------
+ arch/hexagon/include/asm/timex.h      |    3 +--
+ arch/hexagon/kernel/time.c            |   12 ++++++++++--
+ 3 files changed, 11 insertions(+), 30 deletions(-)
+ delete mode 100644 arch/hexagon/include/asm/timer-regs.h
+
+--- a/arch/hexagon/include/asm/timer-regs.h
++++ /dev/null
+@@ -1,26 +0,0 @@
+-/* SPDX-License-Identifier: GPL-2.0-only */
+-/*
+- * Timer support for Hexagon
+- *
+- * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved.
+- */
+-
+-#ifndef _ASM_TIMER_REGS_H
+-#define _ASM_TIMER_REGS_H
+-
+-/*  This stuff should go into a platform specific file  */
+-#define TCX0_CLK_RATE         19200
+-#define TIMER_ENABLE          0
+-#define TIMER_CLR_ON_MATCH    1
+-
+-/*
+- * 8x50 HDD Specs 5-8.  Simulator co-sim not fixed until
+- * release 1.1, and then it's "adjustable" and probably not defaulted.
+- */
+-#define RTOS_TIMER_INT                3
+-#ifdef CONFIG_HEXAGON_COMET
+-#define RTOS_TIMER_REGS_ADDR  0xAB000000UL
+-#endif
+-#define SLEEP_CLK_RATE                32000
+-
+-#endif
+--- a/arch/hexagon/include/asm/timex.h
++++ b/arch/hexagon/include/asm/timex.h
+@@ -7,11 +7,10 @@
+ #define _ASM_TIMEX_H
+ 
+ #include <asm-generic/timex.h>
+-#include <asm/timer-regs.h>
+ #include <asm/hexagon_vm.h>
+ 
+ /* Using TCX0 as our clock.  CLOCK_TICK_RATE scheduled to be removed. */
+-#define CLOCK_TICK_RATE              TCX0_CLK_RATE
++#define CLOCK_TICK_RATE              19200
+ 
+ #define ARCH_HAS_READ_CURRENT_TIMER
+ 
+--- a/arch/hexagon/kernel/time.c
++++ b/arch/hexagon/kernel/time.c
+@@ -17,9 +17,10 @@
+ #include <linux/of_irq.h>
+ #include <linux/module.h>
+ 
+-#include <asm/timer-regs.h>
+ #include <asm/hexagon_vm.h>
+ 
++#define TIMER_ENABLE          BIT(0)
++
+ /*
+  * For the clocksource we need:
+  *    pcycle frequency (600MHz)
+@@ -33,6 +34,13 @@ cycles_t    pcycle_freq_mhz;
+ cycles_t      thread_freq_mhz;
+ cycles_t      sleep_clk_freq;
+ 
++/*
++ * 8x50 HDD Specs 5-8.  Simulator co-sim not fixed until
++ * release 1.1, and then it's "adjustable" and probably not defaulted.
++ */
++#define RTOS_TIMER_INT                3
++#define RTOS_TIMER_REGS_ADDR  0xAB000000UL
++
+ static struct resource rtos_timer_resources[] = {
+       {
+               .start  = RTOS_TIMER_REGS_ADDR,
+@@ -80,7 +88,7 @@ static int set_next_event(unsigned long
+       iowrite32(0, &rtos_timer->clear);
+ 
+       iowrite32(delta, &rtos_timer->match);
+-      iowrite32(1 << TIMER_ENABLE, &rtos_timer->enable);
++      iowrite32(TIMER_ENABLE, &rtos_timer->enable);
+       return 0;
+ }
+ 
diff --git a/queue-5.15/hexagon-export-raw-i-o-routines-for-modules.patch b/queue-5.15/hexagon-export-raw-i-o-routines-for-modules.patch

new file mode 100644 (file)

index 0000000..b4c6b01
--- /dev/null
+++ b/queue-5.15/hexagon-export-raw-i-o-routines-for-modules.patch
@@ -0,0 +1,70 @@
+From ffb92ce826fd801acb0f4e15b75e4ddf0d189bde Mon Sep 17 00:00:00 2001
+From: Nathan Chancellor <nathan@kernel.org>
+Date: Fri, 19 Nov 2021 16:43:28 -0800
+Subject: hexagon: export raw I/O routines for modules
+
+From: Nathan Chancellor <nathan@kernel.org>
+
+commit ffb92ce826fd801acb0f4e15b75e4ddf0d189bde upstream.
+
+Patch series "Fixes for ARCH=hexagon allmodconfig", v2.
+
+This series fixes some issues noticed with ARCH=hexagon allmodconfig.
+
+This patch (of 3):
+
+When building ARCH=hexagon allmodconfig, the following errors occur:
+
+  ERROR: modpost: "__raw_readsl" [drivers/i3c/master/svc-i3c-master.ko] undefined!
+  ERROR: modpost: "__raw_writesl" [drivers/i3c/master/dw-i3c-master.ko] undefined!
+  ERROR: modpost: "__raw_readsl" [drivers/i3c/master/dw-i3c-master.ko] undefined!
+  ERROR: modpost: "__raw_writesl" [drivers/i3c/master/i3c-master-cdns.ko] undefined!
+  ERROR: modpost: "__raw_readsl" [drivers/i3c/master/i3c-master-cdns.ko] undefined!
+
+Export these symbols so that modules can use them without any errors.
+
+Link: https://lkml.kernel.org/r/20211115174250.1994179-1-nathan@kernel.org
+Link: https://lkml.kernel.org/r/20211115174250.1994179-2-nathan@kernel.org
+Fixes: 013bf24c3829 ("Hexagon: Provide basic implementation and/or stubs for I/O routines.")
+Signed-off-by: Nathan Chancellor <nathan@kernel.org>
+Acked-by: Brian Cain <bcain@codeaurora.org>
+Cc: Nick Desaulniers <ndesaulniers@google.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/hexagon/lib/io.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/arch/hexagon/lib/io.c
++++ b/arch/hexagon/lib/io.c
+@@ -27,6 +27,7 @@ void __raw_readsw(const void __iomem *ad
+               *dst++ = *src;
+ 
+ }
++EXPORT_SYMBOL(__raw_readsw);
+ 
+ /*
+  * __raw_writesw - read words a short at a time
+@@ -47,6 +48,7 @@ void __raw_writesw(void __iomem *addr, c
+ 
+ 
+ }
++EXPORT_SYMBOL(__raw_writesw);
+ 
+ /*  Pretty sure len is pre-adjusted for the length of the access already */
+ void __raw_readsl(const void __iomem *addr, void *data, int len)
+@@ -62,6 +64,7 @@ void __raw_readsl(const void __iomem *ad
+ 
+ 
+ }
++EXPORT_SYMBOL(__raw_readsl);
+ 
+ void __raw_writesl(void __iomem *addr, const void *data, int len)
+ {
+@@ -76,3 +79,4 @@ void __raw_writesl(void __iomem *addr, c
+ 
+ 
+ }
++EXPORT_SYMBOL(__raw_writesl);
diff --git a/queue-5.15/hugetlb-userfaultfd-fix-reservation-restore-on-userfaultfd-error.patch b/queue-5.15/hugetlb-userfaultfd-fix-reservation-restore-on-userfaultfd-error.patch

new file mode 100644 (file)

index 0000000..31cfd91
--- /dev/null
+++ b/queue-5.15/hugetlb-userfaultfd-fix-reservation-restore-on-userfaultfd-error.patch
@@ -0,0 +1,73 @@
+From cc30042df6fcc82ea18acf0dace831503e60a0b7 Mon Sep 17 00:00:00 2001
+From: Mina Almasry <almasrymina@google.com>
+Date: Fri, 19 Nov 2021 16:43:43 -0800
+Subject: hugetlb, userfaultfd: fix reservation restore on userfaultfd error
+
+From: Mina Almasry <almasrymina@google.com>
+
+commit cc30042df6fcc82ea18acf0dace831503e60a0b7 upstream.
+
+Currently in the is_continue case in hugetlb_mcopy_atomic_pte(), if we
+bail out using "goto out_release_unlock;" in the cases where idx >=
+size, or !huge_pte_none(), the code will detect that new_pagecache_page
+== false, and so call restore_reserve_on_error().  In this case I see
+restore_reserve_on_error() delete the reservation, and the following
+call to remove_inode_hugepages() will increment h->resv_hugepages
+causing a 100% reproducible leak.
+
+We should treat the is_continue case similar to adding a page into the
+pagecache and set new_pagecache_page to true, to indicate that there is
+no reservation to restore on the error path, and we need not call
+restore_reserve_on_error().  Rename new_pagecache_page to
+page_in_pagecache to make that clear.
+
+Link: https://lkml.kernel.org/r/20211117193825.378528-1-almasrymina@google.com
+Fixes: c7b1850dfb41 ("hugetlb: don't pass page cache pages to restore_reserve_on_error")
+Signed-off-by: Mina Almasry <almasrymina@google.com>
+Reported-by: James Houghton <jthoughton@google.com>
+Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
+Cc: Wei Xu <weixugc@google.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/hugetlb.c |    7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -5236,13 +5236,14 @@ int hugetlb_mcopy_atomic_pte(struct mm_s
+       int ret = -ENOMEM;
+       struct page *page;
+       int writable;
+-      bool new_pagecache_page = false;
++      bool page_in_pagecache = false;
+ 
+       if (is_continue) {
+               ret = -EFAULT;
+               page = find_lock_page(mapping, idx);
+               if (!page)
+                       goto out;
++              page_in_pagecache = true;
+       } else if (!*pagep) {
+               /* If a page already exists, then it's UFFDIO_COPY for
+                * a non-missing case. Return -EEXIST.
+@@ -5330,7 +5331,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_s
+               ret = huge_add_to_page_cache(page, mapping, idx);
+               if (ret)
+                       goto out_release_nounlock;
+-              new_pagecache_page = true;
++              page_in_pagecache = true;
+       }
+ 
+       ptl = huge_pte_lockptr(h, dst_mm, dst_pte);
+@@ -5394,7 +5395,7 @@ out_release_unlock:
+       if (vm_shared || is_continue)
+               unlock_page(page);
+ out_release_nounlock:
+-      if (!new_pagecache_page)
++      if (!page_in_pagecache)
+               restore_reserve_on_error(h, dst_vma, dst_addr, page);
+       put_page(page);
+       goto out;
diff --git a/queue-5.15/ipc-warn-if-trying-to-remove-ipc-object-which-is-absent.patch b/queue-5.15/ipc-warn-if-trying-to-remove-ipc-object-which-is-absent.patch

new file mode 100644 (file)

index 0000000..7d2f991
--- /dev/null
+++ b/queue-5.15/ipc-warn-if-trying-to-remove-ipc-object-which-is-absent.patch
@@ -0,0 +1,115 @@
+From 126e8bee943e9926238c891e2df5b5573aee76bc Mon Sep 17 00:00:00 2001
+From: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
+Date: Fri, 19 Nov 2021 16:43:18 -0800
+Subject: ipc: WARN if trying to remove ipc object which is absent
+
+From: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
+
+commit 126e8bee943e9926238c891e2df5b5573aee76bc upstream.
+
+Patch series "shm: shm_rmid_forced feature fixes".
+
+Some time ago I met kernel crash after CRIU restore procedure,
+fortunately, it was CRIU restore, so, I had dump files and could do
+restore many times and crash reproduced easily.  After some
+investigation I've constructed the minimal reproducer.  It was found
+that it's use-after-free and it happens only if sysctl
+kernel.shm_rmid_forced = 1.
+
+The key of the problem is that the exit_shm() function not handles shp's
+object destroy when task->sysvshm.shm_clist contains items from
+different IPC namespaces.  In most cases this list will contain only
+items from one IPC namespace.
+
+How can this list contain object from different namespaces? The
+exit_shm() function is designed to clean up this list always when
+process leaves IPC namespace.  But we made a mistake a long time ago and
+did not add a exit_shm() call into the setns() syscall procedures.
+
+The first idea was just to add this call to setns() syscall but it
+obviously changes semantics of setns() syscall and that's
+userspace-visible change.  So, I gave up on this idea.
+
+The first real attempt to address the issue was just to omit forced
+destroy if we meet shp object not from current task IPC namespace [1].
+But that was not the best idea because task->sysvshm.shm_clist was
+protected by rwsem which belongs to current task IPC namespace.  It
+means that list corruption may occur.
+
+Second approach is just extend exit_shm() to properly handle shp's from
+different IPC namespaces [2].  This is really non-trivial thing, I've
+put a lot of effort into that but not believed that it's possible to
+make it fully safe, clean and clear.
+
+Thanks to the efforts of Manfred Spraul working an elegant solution was
+designed.  Thanks a lot, Manfred!
+
+Eric also suggested the way to address the issue in ("[RFC][PATCH] shm:
+In shm_exit destroy all created and never attached segments") Eric's
+idea was to maintain a list of shm_clists one per IPC namespace, use
+lock-less lists.  But there is some extra memory consumption-related
+concerns.
+
+An alternative solution which was suggested by me was implemented in
+("shm: reset shm_clist on setns but omit forced shm destroy").  The idea
+is pretty simple, we add exit_shm() syscall to setns() but DO NOT
+destroy shm segments even if sysctl kernel.shm_rmid_forced = 1, we just
+clean up the task->sysvshm.shm_clist list.
+
+This chages semantics of setns() syscall a little bit but in comparision
+to the "naive" solution when we just add exit_shm() without any special
+exclusions this looks like a safer option.
+
+[1] https://lkml.org/lkml/2021/7/6/1108
+[2] https://lkml.org/lkml/2021/7/14/736
+
+This patch (of 2):
+
+Let's produce a warning if we trying to remove non-existing IPC object
+from IPC namespace kht/idr structures.
+
+This allows us to catch possible bugs when the ipc_rmid() function was
+called with inconsistent struct ipc_ids*, struct kern_ipc_perm*
+arguments.
+
+Link: https://lkml.kernel.org/r/20211027224348.611025-1-alexander.mikhalitsyn@virtuozzo.com
+Link: https://lkml.kernel.org/r/20211027224348.611025-2-alexander.mikhalitsyn@virtuozzo.com
+Co-developed-by: Manfred Spraul <manfred@colorfullife.com>
+Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
+Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
+Cc: "Eric W. Biederman" <ebiederm@xmission.com>
+Cc: Davidlohr Bueso <dave@stgolabs.net>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: Andrei Vagin <avagin@gmail.com>
+Cc: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
+Cc: Vasily Averin <vvs@virtuozzo.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ ipc/util.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/ipc/util.c
++++ b/ipc/util.c
+@@ -447,8 +447,8 @@ static int ipcget_public(struct ipc_name
+ static void ipc_kht_remove(struct ipc_ids *ids, struct kern_ipc_perm *ipcp)
+ {
+       if (ipcp->key != IPC_PRIVATE)
+-              rhashtable_remove_fast(&ids->key_ht, &ipcp->khtnode,
+-                                     ipc_kht_params);
++              WARN_ON_ONCE(rhashtable_remove_fast(&ids->key_ht, &ipcp->khtnode,
++                                     ipc_kht_params));
+ }
+ 
+ /**
+@@ -498,7 +498,7 @@ void ipc_rmid(struct ipc_ids *ids, struc
+ {
+       int idx = ipcid_to_idx(ipcp->id);
+ 
+-      idr_remove(&ids->ipcs_idr, idx);
++      WARN_ON_ONCE(idr_remove(&ids->ipcs_idr, idx) != ipcp);
+       ipc_kht_remove(ids, ipcp);
+       ids->in_use--;
+       ipcp->deleted = true;
diff --git a/queue-5.15/kmap_local-don-t-assume-kmap-ptes-are-linear-arrays-in-memory.patch b/queue-5.15/kmap_local-don-t-assume-kmap-ptes-are-linear-arrays-in-memory.patch

new file mode 100644 (file)

index 0000000..29bd2b3
--- /dev/null
+++ b/queue-5.15/kmap_local-don-t-assume-kmap-ptes-are-linear-arrays-in-memory.patch
@@ -0,0 +1,167 @@
+From 825c43f50e3aa811a291ffcb40e02fbf6d91ba86 Mon Sep 17 00:00:00 2001
+From: Ard Biesheuvel <ardb@kernel.org>
+Date: Fri, 19 Nov 2021 16:43:55 -0800
+Subject: kmap_local: don't assume kmap PTEs are linear arrays in memory
+
+From: Ard Biesheuvel <ardb@kernel.org>
+
+commit 825c43f50e3aa811a291ffcb40e02fbf6d91ba86 upstream.
+
+The kmap_local conversion broke the ARM architecture, because the new
+code assumes that all PTEs used for creating kmaps form a linear array
+in memory, and uses array indexing to look up the kmap PTE belonging to
+a certain kmap index.
+
+On ARM, this cannot work, not only because the PTE pages may be
+non-adjacent in memory, but also because ARM/!LPAE interleaves hardware
+entries and extended entries (carrying software-only bits) in a way that
+is not compatible with array indexing.
+
+Fortunately, this only seems to affect configurations with more than 8
+CPUs, due to the way the per-CPU kmap slots are organized in memory.
+
+Work around this by permitting an architecture to set a Kconfig symbol
+that signifies that the kmap PTEs do not form a lineary array in memory,
+and so the only way to locate the appropriate one is to walk the page
+tables.
+
+Link: https://lore.kernel.org/linux-arm-kernel/20211026131249.3731275-1-ardb@kernel.org/
+Link: https://lkml.kernel.org/r/20211116094737.7391-1-ardb@kernel.org
+Fixes: 2a15ba82fa6c ("ARM: highmem: Switch to generic kmap atomic")
+Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
+Reported-by: Quanyang Wang <quanyang.wang@windriver.com>
+Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
+Acked-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm/Kconfig |    1 +
+ mm/Kconfig       |    3 +++
+ mm/highmem.c     |   32 +++++++++++++++++++++-----------
+ 3 files changed, 25 insertions(+), 11 deletions(-)
+
+--- a/arch/arm/Kconfig
++++ b/arch/arm/Kconfig
+@@ -1455,6 +1455,7 @@ config HIGHMEM
+       bool "High Memory Support"
+       depends on MMU
+       select KMAP_LOCAL
++      select KMAP_LOCAL_NON_LINEAR_PTE_ARRAY
+       help
+         The address space of ARM processors is only 4 Gigabytes large
+         and it has to accommodate user address space, kernel address
+--- a/mm/Kconfig
++++ b/mm/Kconfig
+@@ -887,6 +887,9 @@ config MAPPING_DIRTY_HELPERS
+ config KMAP_LOCAL
+       bool
+ 
++config KMAP_LOCAL_NON_LINEAR_PTE_ARRAY
++      bool
++
+ # struct io_mapping based helper.  Selected by drivers that need them
+ config IO_MAPPING
+       bool
+--- a/mm/highmem.c
++++ b/mm/highmem.c
+@@ -504,16 +504,22 @@ static inline int kmap_local_calc_idx(in
+ 
+ static pte_t *__kmap_pte;
+ 
+-static pte_t *kmap_get_pte(void)
++static pte_t *kmap_get_pte(unsigned long vaddr, int idx)
+ {
++      if (IS_ENABLED(CONFIG_KMAP_LOCAL_NON_LINEAR_PTE_ARRAY))
++              /*
++               * Set by the arch if __kmap_pte[-idx] does not produce
++               * the correct entry.
++               */
++              return virt_to_kpte(vaddr);
+       if (!__kmap_pte)
+               __kmap_pte = virt_to_kpte(__fix_to_virt(FIX_KMAP_BEGIN));
+-      return __kmap_pte;
++      return &__kmap_pte[-idx];
+ }
+ 
+ void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot)
+ {
+-      pte_t pteval, *kmap_pte = kmap_get_pte();
++      pte_t pteval, *kmap_pte;
+       unsigned long vaddr;
+       int idx;
+ 
+@@ -525,9 +531,10 @@ void *__kmap_local_pfn_prot(unsigned lon
+       preempt_disable();
+       idx = arch_kmap_local_map_idx(kmap_local_idx_push(), pfn);
+       vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+-      BUG_ON(!pte_none(*(kmap_pte - idx)));
++      kmap_pte = kmap_get_pte(vaddr, idx);
++      BUG_ON(!pte_none(*kmap_pte));
+       pteval = pfn_pte(pfn, prot);
+-      arch_kmap_local_set_pte(&init_mm, vaddr, kmap_pte - idx, pteval);
++      arch_kmap_local_set_pte(&init_mm, vaddr, kmap_pte, pteval);
+       arch_kmap_local_post_map(vaddr, pteval);
+       current->kmap_ctrl.pteval[kmap_local_idx()] = pteval;
+       preempt_enable();
+@@ -560,7 +567,7 @@ EXPORT_SYMBOL(__kmap_local_page_prot);
+ void kunmap_local_indexed(void *vaddr)
+ {
+       unsigned long addr = (unsigned long) vaddr & PAGE_MASK;
+-      pte_t *kmap_pte = kmap_get_pte();
++      pte_t *kmap_pte;
+       int idx;
+ 
+       if (addr < __fix_to_virt(FIX_KMAP_END) ||
+@@ -585,8 +592,9 @@ void kunmap_local_indexed(void *vaddr)
+       idx = arch_kmap_local_unmap_idx(kmap_local_idx(), addr);
+       WARN_ON_ONCE(addr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
+ 
++      kmap_pte = kmap_get_pte(addr, idx);
+       arch_kmap_local_pre_unmap(addr);
+-      pte_clear(&init_mm, addr, kmap_pte - idx);
++      pte_clear(&init_mm, addr, kmap_pte);
+       arch_kmap_local_post_unmap(addr);
+       current->kmap_ctrl.pteval[kmap_local_idx()] = __pte(0);
+       kmap_local_idx_pop();
+@@ -608,7 +616,7 @@ EXPORT_SYMBOL(kunmap_local_indexed);
+ void __kmap_local_sched_out(void)
+ {
+       struct task_struct *tsk = current;
+-      pte_t *kmap_pte = kmap_get_pte();
++      pte_t *kmap_pte;
+       int i;
+ 
+       /* Clear kmaps */
+@@ -635,8 +643,9 @@ void __kmap_local_sched_out(void)
+               idx = arch_kmap_local_map_idx(i, pte_pfn(pteval));
+ 
+               addr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
++              kmap_pte = kmap_get_pte(addr, idx);
+               arch_kmap_local_pre_unmap(addr);
+-              pte_clear(&init_mm, addr, kmap_pte - idx);
++              pte_clear(&init_mm, addr, kmap_pte);
+               arch_kmap_local_post_unmap(addr);
+       }
+ }
+@@ -644,7 +653,7 @@ void __kmap_local_sched_out(void)
+ void __kmap_local_sched_in(void)
+ {
+       struct task_struct *tsk = current;
+-      pte_t *kmap_pte = kmap_get_pte();
++      pte_t *kmap_pte;
+       int i;
+ 
+       /* Restore kmaps */
+@@ -664,7 +673,8 @@ void __kmap_local_sched_in(void)
+               /* See comment in __kmap_local_sched_out() */
+               idx = arch_kmap_local_map_idx(i, pte_pfn(pteval));
+               addr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+-              set_pte_at(&init_mm, addr, kmap_pte - idx, pteval);
++              kmap_pte = kmap_get_pte(addr, idx);
++              set_pte_at(&init_mm, addr, kmap_pte, pteval);
+               arch_kmap_local_post_map(addr, pteval);
+       }
+ }
diff --git a/queue-5.15/kvm-x86-assume-a-64-bit-hypercall-for-guests-with-protected-state.patch b/queue-5.15/kvm-x86-assume-a-64-bit-hypercall-for-guests-with-protected-state.patch

new file mode 100644 (file)

index 0000000..6a9b69a
--- /dev/null
+++ b/queue-5.15/kvm-x86-assume-a-64-bit-hypercall-for-guests-with-protected-state.patch
@@ -0,0 +1,110 @@
+From b5aead0064f33ae5e693a364e3204fe1c0ac9af2 Mon Sep 17 00:00:00 2001
+From: Tom Lendacky <thomas.lendacky@amd.com>
+Date: Mon, 24 May 2021 12:48:57 -0500
+Subject: KVM: x86: Assume a 64-bit hypercall for guests with protected state
+
+From: Tom Lendacky <thomas.lendacky@amd.com>
+
+commit b5aead0064f33ae5e693a364e3204fe1c0ac9af2 upstream.
+
+When processing a hypercall for a guest with protected state, currently
+SEV-ES guests, the guest CS segment register can't be checked to
+determine if the guest is in 64-bit mode. For an SEV-ES guest, it is
+expected that communication between the guest and the hypervisor is
+performed to shared memory using the GHCB. In order to use the GHCB, the
+guest must have been in long mode, otherwise writes by the guest to the
+GHCB would be encrypted and not be able to be comprehended by the
+hypervisor.
+
+Create a new helper function, is_64_bit_hypercall(), that assumes the
+guest is in 64-bit mode when the guest has protected state, and returns
+true, otherwise invoking is_64_bit_mode() to determine the mode. Update
+the hypercall related routines to use is_64_bit_hypercall() instead of
+is_64_bit_mode().
+
+Add a WARN_ON_ONCE() to is_64_bit_mode() to catch occurences of calls to
+this helper function for a guest running with protected state.
+
+Fixes: f1c6366e3043 ("KVM: SVM: Add required changes to support intercepts under SEV-ES")
+Reported-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Message-Id: <e0b20c770c9d0d1403f23d83e785385104211f74.1621878537.git.thomas.lendacky@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/hyperv.c |    4 ++--
+ arch/x86/kvm/x86.c    |    2 +-
+ arch/x86/kvm/x86.h    |   12 ++++++++++++
+ arch/x86/kvm/xen.c    |    2 +-
+ 4 files changed, 16 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/kvm/hyperv.c
++++ b/arch/x86/kvm/hyperv.c
+@@ -2022,7 +2022,7 @@ static void kvm_hv_hypercall_set_result(
+ {
+       bool longmode;
+ 
+-      longmode = is_64_bit_mode(vcpu);
++      longmode = is_64_bit_hypercall(vcpu);
+       if (longmode)
+               kvm_rax_write(vcpu, result);
+       else {
+@@ -2171,7 +2171,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vc
+       }
+ 
+ #ifdef CONFIG_X86_64
+-      if (is_64_bit_mode(vcpu)) {
++      if (is_64_bit_hypercall(vcpu)) {
+               hc.param = kvm_rcx_read(vcpu);
+               hc.ingpa = kvm_rdx_read(vcpu);
+               hc.outgpa = kvm_r8_read(vcpu);
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -8737,7 +8737,7 @@ int kvm_emulate_hypercall(struct kvm_vcp
+ 
+       trace_kvm_hypercall(nr, a0, a1, a2, a3);
+ 
+-      op_64_bit = is_64_bit_mode(vcpu);
++      op_64_bit = is_64_bit_hypercall(vcpu);
+       if (!op_64_bit) {
+               nr &= 0xFFFFFFFF;
+               a0 &= 0xFFFFFFFF;
+--- a/arch/x86/kvm/x86.h
++++ b/arch/x86/kvm/x86.h
+@@ -153,12 +153,24 @@ static inline bool is_64_bit_mode(struct
+ {
+       int cs_db, cs_l;
+ 
++      WARN_ON_ONCE(vcpu->arch.guest_state_protected);
++
+       if (!is_long_mode(vcpu))
+               return false;
+       static_call(kvm_x86_get_cs_db_l_bits)(vcpu, &cs_db, &cs_l);
+       return cs_l;
+ }
+ 
++static inline bool is_64_bit_hypercall(struct kvm_vcpu *vcpu)
++{
++      /*
++       * If running with protected guest state, the CS register is not
++       * accessible. The hypercall register values will have had to been
++       * provided in 64-bit mode, so assume the guest is in 64-bit.
++       */
++      return vcpu->arch.guest_state_protected || is_64_bit_mode(vcpu);
++}
++
+ static inline bool x86_exception_has_error_code(unsigned int vector)
+ {
+       static u32 exception_has_error_code = BIT(DF_VECTOR) | BIT(TS_VECTOR) |
+--- a/arch/x86/kvm/xen.c
++++ b/arch/x86/kvm/xen.c
+@@ -698,7 +698,7 @@ int kvm_xen_hypercall(struct kvm_vcpu *v
+           kvm_hv_hypercall_enabled(vcpu))
+               return kvm_hv_hypercall(vcpu);
+ 
+-      longmode = is_64_bit_mode(vcpu);
++      longmode = is_64_bit_hypercall(vcpu);
+       if (!longmode) {
+               params[0] = (u32)kvm_rbx_read(vcpu);
+               params[1] = (u32)kvm_rcx_read(vcpu);
diff --git a/queue-5.15/kvm-x86-fix-uninitialized-eoi_exit_bitmap-usage-in-vcpu_load_eoi_exitmap.patch b/queue-5.15/kvm-x86-fix-uninitialized-eoi_exit_bitmap-usage-in-vcpu_load_eoi_exitmap.patch

new file mode 100644 (file)

index 0000000..8aff5fe
--- /dev/null
+++ b/queue-5.15/kvm-x86-fix-uninitialized-eoi_exit_bitmap-usage-in-vcpu_load_eoi_exitmap.patch
@@ -0,0 +1,51 @@
+From c5adbb3af051079f35abfa26551107e2c653087f Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?=E9=BB=84=E4=B9=90?= <huangle1@jd.com>
+Date: Mon, 15 Nov 2021 14:08:29 +0000
+Subject: KVM: x86: Fix uninitialized eoi_exit_bitmap usage in vcpu_load_eoi_exitmap()
+
+From: 黄乐 <huangle1@jd.com>
+
+commit c5adbb3af051079f35abfa26551107e2c653087f upstream.
+
+In vcpu_load_eoi_exitmap(), currently the eoi_exit_bitmap[4] array is
+initialized only when Hyper-V context is available, in other path it is
+just passed to kvm_x86_ops.load_eoi_exitmap() directly from on the stack,
+which would cause unexpected interrupt delivery/handling issues, e.g. an
+*old* linux kernel that relies on PIT to do clock calibration on KVM might
+randomly fail to boot.
+
+Fix it by passing ioapic_handled_vectors to load_eoi_exitmap() when Hyper-V
+context is not available.
+
+Fixes: f2bc14b69c38 ("KVM: x86: hyper-v: Prepare to meet unallocated Hyper-V context")
+Cc: stable@vger.kernel.org
+Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Signed-off-by: Huang Le <huangle1@jd.com>
+Message-Id: <62115b277dab49ea97da5633f8522daf@jd.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/x86.c |    8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -9429,12 +9429,16 @@ static void vcpu_load_eoi_exitmap(struct
+       if (!kvm_apic_hw_enabled(vcpu->arch.apic))
+               return;
+ 
+-      if (to_hv_vcpu(vcpu))
++      if (to_hv_vcpu(vcpu)) {
+               bitmap_or((ulong *)eoi_exit_bitmap,
+                         vcpu->arch.ioapic_handled_vectors,
+                         to_hv_synic(vcpu)->vec_bitmap, 256);
++              static_call(kvm_x86_load_eoi_exitmap)(vcpu, eoi_exit_bitmap);
++              return;
++      }
+ 
+-      static_call(kvm_x86_load_eoi_exitmap)(vcpu, eoi_exit_bitmap);
++      static_call(kvm_x86_load_eoi_exitmap)(
++              vcpu, (u64 *)vcpu->arch.ioapic_handled_vectors);
+ }
+ 
+ void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
diff --git a/queue-5.15/kvm-x86-mmu-include-efer.lma-in-extended-mmu-role.patch b/queue-5.15/kvm-x86-mmu-include-efer.lma-in-extended-mmu-role.patch

new file mode 100644 (file)

index 0000000..5ac2015
--- /dev/null
+++ b/queue-5.15/kvm-x86-mmu-include-efer.lma-in-extended-mmu-role.patch
@@ -0,0 +1,62 @@
+From b8453cdcf26020030da182f0156d7bf59ae5719f Mon Sep 17 00:00:00 2001
+From: Maxim Levitsky <mlevitsk@redhat.com>
+Date: Mon, 15 Nov 2021 15:18:37 +0200
+Subject: KVM: x86/mmu: include EFER.LMA in extended mmu role
+
+From: Maxim Levitsky <mlevitsk@redhat.com>
+
+commit b8453cdcf26020030da182f0156d7bf59ae5719f upstream.
+
+Incorporate EFER.LMA into kvm_mmu_extended_role, as it used to compute the
+guest root level and is not reflected in kvm_mmu_page_role.level when TDP
+is in use.  When simply running the guest, it is impossible for EFER.LMA
+and kvm_mmu.root_level to get out of sync, as the guest cannot transition
+from PAE paging to 64-bit paging without toggling CR0.PG, i.e. without
+first bouncing through a different MMU context.  And stuffing guest state
+via KVM_SET_SREGS{,2} also ensures a full MMU context reset.
+
+However, if KVM_SET_SREGS{,2} is followed by KVM_SET_NESTED_STATE, e.g. to
+set guest state when migrating the VM while L2 is active, the vCPU state
+will reflect L2, not L1.  If L1 is using TDP for L2, then root_mmu will
+have been configured using L2's state, despite not being used for L2.  If
+L2.EFER.LMA != L1.EFER.LMA, and L2 is using PAE paging, then root_mmu will
+be configured for guest PAE paging, but will match the mmu_role for 64-bit
+paging and cause KVM to not reconfigure root_mmu on the next nested VM-Exit.
+
+Alternatively, the root_mmu's role could be invalidated after a successful
+KVM_SET_NESTED_STATE that yields vcpu->arch.mmu != vcpu->arch.root_mmu,
+i.e. that switches the active mmu to guest_mmu, but doing so is unnecessarily
+tricky, and not even needed if L1 and L2 do have the same role (e.g., they
+are both 64-bit guests and run with the same CR4).
+
+Suggested-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
+Message-Id: <20211115131837.195527-3-mlevitsk@redhat.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/kvm_host.h |    1 +
+ arch/x86/kvm/mmu/mmu.c          |    1 +
+ 2 files changed, 2 insertions(+)
+
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -364,6 +364,7 @@ union kvm_mmu_extended_role {
+               unsigned int cr4_smap:1;
+               unsigned int cr4_smep:1;
+               unsigned int cr4_la57:1;
++              unsigned int efer_lma:1;
+       };
+ };
+ 
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -4679,6 +4679,7 @@ static union kvm_mmu_extended_role kvm_c
+               /* PKEY and LA57 are active iff long mode is active. */
+               ext.cr4_pke = ____is_efer_lma(regs) && ____is_cr4_pke(regs);
+               ext.cr4_la57 = ____is_efer_lma(regs) && ____is_cr4_la57(regs);
++              ext.efer_lma = ____is_efer_lma(regs);
+       }
+ 
+       ext.valid = 1;
diff --git a/queue-5.15/kvm-x86-xen-fix-get_attr-of-kvm_xen_attr_type_shared_info.patch b/queue-5.15/kvm-x86-xen-fix-get_attr-of-kvm_xen_attr_type_shared_info.patch

new file mode 100644 (file)

index 0000000..f7d05b3
--- /dev/null
+++ b/queue-5.15/kvm-x86-xen-fix-get_attr-of-kvm_xen_attr_type_shared_info.patch
@@ -0,0 +1,35 @@
+From 4e8436479ad3be76a3823e6ce466ae464ce71300 Mon Sep 17 00:00:00 2001
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Mon, 15 Nov 2021 16:50:21 +0000
+Subject: KVM: x86/xen: Fix get_attr of KVM_XEN_ATTR_TYPE_SHARED_INFO
+
+From: David Woodhouse <dwmw@amazon.co.uk>
+
+commit 4e8436479ad3be76a3823e6ce466ae464ce71300 upstream.
+
+In commit 319afe68567b ("KVM: xen: do not use struct gfn_to_hva_cache") we
+stopped storing this in-kernel as a GPA, and started storing it as a GFN.
+Which means we probably should have stopped calling gpa_to_gfn() on it
+when userspace asks for it back.
+
+Cc: stable@vger.kernel.org
+Fixes: 319afe68567b ("KVM: xen: do not use struct gfn_to_hva_cache")
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Message-Id: <20211115165030.7422-2-dwmw2@infradead.org>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/xen.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/xen.c
++++ b/arch/x86/kvm/xen.c
+@@ -299,7 +299,7 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm
+               break;
+ 
+       case KVM_XEN_ATTR_TYPE_SHARED_INFO:
+-              data->u.shared_info.gfn = gpa_to_gfn(kvm->arch.xen.shinfo_gfn);
++              data->u.shared_info.gfn = kvm->arch.xen.shinfo_gfn;
+               r = 0;
+               break;
+ 
diff --git a/queue-5.15/mm-damon-dbgfs-fix-missed-use-of-damon_dbgfs_lock.patch b/queue-5.15/mm-damon-dbgfs-fix-missed-use-of-damon_dbgfs_lock.patch

new file mode 100644 (file)

index 0000000..6dbeb0b
--- /dev/null
+++ b/queue-5.15/mm-damon-dbgfs-fix-missed-use-of-damon_dbgfs_lock.patch
@@ -0,0 +1,71 @@
+From d78f3853f831eee46c6dbe726debf3be9e9c0d05 Mon Sep 17 00:00:00 2001
+From: SeongJae Park <sj@kernel.org>
+Date: Fri, 19 Nov 2021 16:43:52 -0800
+Subject: mm/damon/dbgfs: fix missed use of damon_dbgfs_lock
+
+From: SeongJae Park <sj@kernel.org>
+
+commit d78f3853f831eee46c6dbe726debf3be9e9c0d05 upstream.
+
+DAMON debugfs is supposed to protect dbgfs_ctxs, dbgfs_nr_ctxs, and
+dbgfs_dirs using damon_dbgfs_lock.  However, some of the code is
+accessing the variables without the protection.  This fixes it by
+protecting all such accesses.
+
+Link: https://lkml.kernel.org/r/20211110145758.16558-3-sj@kernel.org
+Fixes: 75c1c2b53c78 ("mm/damon/dbgfs: support multiple contexts")
+Signed-off-by: SeongJae Park <sj@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/damon/dbgfs.c |   11 ++++++++---
+ 1 file changed, 8 insertions(+), 3 deletions(-)
+
+--- a/mm/damon/dbgfs.c
++++ b/mm/damon/dbgfs.c
+@@ -538,12 +538,14 @@ static ssize_t dbgfs_monitor_on_write(st
+               return -EINVAL;
+       }
+ 
++      mutex_lock(&damon_dbgfs_lock);
+       if (!strncmp(kbuf, "on", count))
+               err = damon_start(dbgfs_ctxs, dbgfs_nr_ctxs);
+       else if (!strncmp(kbuf, "off", count))
+               err = damon_stop(dbgfs_ctxs, dbgfs_nr_ctxs);
+       else
+               err = -EINVAL;
++      mutex_unlock(&damon_dbgfs_lock);
+ 
+       if (err)
+               ret = err;
+@@ -596,15 +598,16 @@ static int __init __damon_dbgfs_init(voi
+ 
+ static int __init damon_dbgfs_init(void)
+ {
+-      int rc;
++      int rc = -ENOMEM;
+ 
++      mutex_lock(&damon_dbgfs_lock);
+       dbgfs_ctxs = kmalloc(sizeof(*dbgfs_ctxs), GFP_KERNEL);
+       if (!dbgfs_ctxs)
+-              return -ENOMEM;
++              goto out;
+       dbgfs_ctxs[0] = dbgfs_new_ctx();
+       if (!dbgfs_ctxs[0]) {
+               kfree(dbgfs_ctxs);
+-              return -ENOMEM;
++              goto out;
+       }
+       dbgfs_nr_ctxs = 1;
+ 
+@@ -615,6 +618,8 @@ static int __init damon_dbgfs_init(void)
+               pr_err("%s: dbgfs init failed\n", __func__);
+       }
+ 
++out:
++      mutex_unlock(&damon_dbgfs_lock);
+       return rc;
+ }
+ 
diff --git a/queue-5.15/mm-damon-dbgfs-use-__gfp_nowarn-for-user-specified-size-buffer-allocation.patch b/queue-5.15/mm-damon-dbgfs-use-__gfp_nowarn-for-user-specified-size-buffer-allocation.patch

new file mode 100644 (file)

index 0000000..87cf356
--- /dev/null
+++ b/queue-5.15/mm-damon-dbgfs-use-__gfp_nowarn-for-user-specified-size-buffer-allocation.patch
@@ -0,0 +1,60 @@
+From db7a347b26fe05d2e8c115bb24dfd908d0252bc3 Mon Sep 17 00:00:00 2001
+From: SeongJae Park <sj@kernel.org>
+Date: Fri, 19 Nov 2021 16:43:49 -0800
+Subject: mm/damon/dbgfs: use '__GFP_NOWARN' for user-specified size buffer allocation
+
+From: SeongJae Park <sj@kernel.org>
+
+commit db7a347b26fe05d2e8c115bb24dfd908d0252bc3 upstream.
+
+Patch series "DAMON fixes".
+
+This patch (of 2):
+
+DAMON users can trigger below warning in '__alloc_pages()' by invoking
+write() to some DAMON debugfs files with arbitrarily high count
+argument, because DAMON debugfs interface allocates some buffers based
+on the user-specified 'count'.
+
+        if (unlikely(order >= MAX_ORDER)) {
+                WARN_ON_ONCE(!(gfp & __GFP_NOWARN));
+                return NULL;
+        }
+
+Because the DAMON debugfs interface code checks failure of the
+'kmalloc()', this commit simply suppresses the warnings by adding
+'__GFP_NOWARN' flag.
+
+Link: https://lkml.kernel.org/r/20211110145758.16558-1-sj@kernel.org
+Link: https://lkml.kernel.org/r/20211110145758.16558-2-sj@kernel.org
+Fixes: 4bc05954d007 ("mm/damon: implement a debugfs-based user space interface")
+Signed-off-by: SeongJae Park <sj@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/damon/dbgfs.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/mm/damon/dbgfs.c
++++ b/mm/damon/dbgfs.c
+@@ -32,7 +32,7 @@ static char *user_input_str(const char _
+       if (*ppos)
+               return ERR_PTR(-EINVAL);
+ 
+-      kbuf = kmalloc(count + 1, GFP_KERNEL);
++      kbuf = kmalloc(count + 1, GFP_KERNEL | __GFP_NOWARN);
+       if (!kbuf)
+               return ERR_PTR(-ENOMEM);
+ 
+@@ -247,7 +247,7 @@ static ssize_t dbgfs_kdamond_pid_read(st
+       char *kbuf;
+       ssize_t len;
+ 
+-      kbuf = kmalloc(count, GFP_KERNEL);
++      kbuf = kmalloc(count, GFP_KERNEL | __GFP_NOWARN);
+       if (!kbuf)
+               return -ENOMEM;
+ 
diff --git a/queue-5.15/mm-kmemleak-slob-respect-slab_noleaktrace-flag.patch b/queue-5.15/mm-kmemleak-slob-respect-slab_noleaktrace-flag.patch

new file mode 100644 (file)

index 0000000..ce71540
--- /dev/null
+++ b/queue-5.15/mm-kmemleak-slob-respect-slab_noleaktrace-flag.patch
@@ -0,0 +1,51 @@
+From 34dbc3aaf5d9e89ba6cc5e24add9458c21ab1950 Mon Sep 17 00:00:00 2001
+From: Rustam Kovhaev <rkovhaev@gmail.com>
+Date: Fri, 19 Nov 2021 16:43:37 -0800
+Subject: mm: kmemleak: slob: respect SLAB_NOLEAKTRACE flag
+
+From: Rustam Kovhaev <rkovhaev@gmail.com>
+
+commit 34dbc3aaf5d9e89ba6cc5e24add9458c21ab1950 upstream.
+
+When kmemleak is enabled for SLOB, system does not boot and does not
+print anything to the console.  At the very early stage in the boot
+process we hit infinite recursion from kmemleak_init() and eventually
+kernel crashes.
+
+kmemleak_init() specifies SLAB_NOLEAKTRACE for KMEM_CACHE(), but
+kmem_cache_create_usercopy() removes it because CACHE_CREATE_MASK is not
+valid for SLOB.
+
+Let's fix CACHE_CREATE_MASK and make kmemleak work with SLOB
+
+Link: https://lkml.kernel.org/r/20211115020850.3154366-1-rkovhaev@gmail.com
+Fixes: d8843922fba4 ("slab: Ignore internal flags in cache creation")
+Signed-off-by: Rustam Kovhaev <rkovhaev@gmail.com>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Reviewed-by: Muchun Song <songmuchun@bytedance.com>
+Cc: Christoph Lameter <cl@linux.com>
+Cc: Pekka Enberg <penberg@kernel.org>
+Cc: David Rientjes <rientjes@google.com>
+Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Glauber Costa <glommer@parallels.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/slab.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/slab.h
++++ b/mm/slab.h
+@@ -147,7 +147,7 @@ static inline slab_flags_t kmem_cache_fl
+ #define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \
+                         SLAB_TEMPORARY | SLAB_ACCOUNT)
+ #else
+-#define SLAB_CACHE_FLAGS (0)
++#define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE)
+ #endif
+ 
+ /* Common flags available with current configuration */
diff --git a/queue-5.15/pinctrl-ralink-include-ralink_regs.h-in-pinctrl-mt7620.c.patch b/queue-5.15/pinctrl-ralink-include-ralink_regs.h-in-pinctrl-mt7620.c.patch

new file mode 100644 (file)

index 0000000..6b55578
--- /dev/null
+++ b/queue-5.15/pinctrl-ralink-include-ralink_regs.h-in-pinctrl-mt7620.c.patch
@@ -0,0 +1,32 @@
+From a5b9703fe11cd1d6d7a60102aa2abe686dc1867f Mon Sep 17 00:00:00 2001
+From: Sergio Paracuellos <sergio.paracuellos@gmail.com>
+Date: Sun, 31 Oct 2021 07:40:46 +0100
+Subject: pinctrl: ralink: include 'ralink_regs.h' in 'pinctrl-mt7620.c'
+
+From: Sergio Paracuellos <sergio.paracuellos@gmail.com>
+
+commit a5b9703fe11cd1d6d7a60102aa2abe686dc1867f upstream.
+
+mt7620.h, included by pinctrl-mt7620.c, mentions MT762X_SOC_MT7628AN
+declared in ralink_regs.h.
+
+Fixes: 745ec436de72 ("pinctrl: ralink: move MT7620 SoC pinmux config into a new 'pinctrl-mt7620.c' file")
+Cc: stable@vger.kernel.org
+Signed-off-by: Luiz Angelo Daros de Luca <luizluca@gmail.com>
+Signed-off-by: Sergio Paracuellos <sergio.paracuellos@gmail.com>
+Link: https://lore.kernel.org/r/20211031064046.13533-1-sergio.paracuellos@gmail.com
+Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/pinctrl/ralink/pinctrl-mt7620.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/pinctrl/ralink/pinctrl-mt7620.c
++++ b/drivers/pinctrl/ralink/pinctrl-mt7620.c
+@@ -1,5 +1,6 @@
+ // SPDX-License-Identifier: GPL-2.0-only
+ 
++#include <asm/mach-ralink/ralink_regs.h>
+ #include <asm/mach-ralink/mt7620.h>
+ #include <linux/module.h>
+ #include <linux/platform_device.h>
diff --git a/queue-5.15/powerpc-8xx-fix-pinned-tlbs-with-config_strict_kernel_rwx.patch b/queue-5.15/powerpc-8xx-fix-pinned-tlbs-with-config_strict_kernel_rwx.patch

new file mode 100644 (file)

index 0000000..c5cbec2
--- /dev/null
+++ b/queue-5.15/powerpc-8xx-fix-pinned-tlbs-with-config_strict_kernel_rwx.patch
@@ -0,0 +1,90 @@
+From 1e35eba4055149c578baf0318d2f2f89ea3c44a0 Mon Sep 17 00:00:00 2001
+From: Christophe Leroy <christophe.leroy@csgroup.eu>
+Date: Mon, 15 Nov 2021 09:08:36 +0100
+Subject: powerpc/8xx: Fix pinned TLBs with CONFIG_STRICT_KERNEL_RWX
+
+From: Christophe Leroy <christophe.leroy@csgroup.eu>
+
+commit 1e35eba4055149c578baf0318d2f2f89ea3c44a0 upstream.
+
+As spotted and explained in commit c12ab8dbc492 ("powerpc/8xx: Fix
+Oops with STRICT_KERNEL_RWX without DEBUG_RODATA_TEST"), the selection
+of STRICT_KERNEL_RWX without selecting DEBUG_RODATA_TEST has spotted
+the lack of the DIRTY bit in the pinned kernel data TLBs.
+
+This problem should have been detected a lot earlier if things had
+been working as expected. But due to an incredible level of chance or
+mishap, this went undetected because of a set of bugs: In fact the
+DTLBs were not pinned, because instead of setting the reserve bit
+in MD_CTR, it was set in MI_CTR that is the register for ITLBs.
+
+But then, another huge bug was there: the physical address was
+reset to 0 at the boundary between RO and RW areas, leading to the
+same physical space being mapped at both 0xc0000000 and 0xc8000000.
+This had by miracle no consequence until now because the entry was
+not really pinned so it was overwritten soon enough to go undetected.
+
+Of course, now that we really pin the DTLBs, it must be fixed as well.
+
+Fixes: f76c8f6d257c ("powerpc/8xx: Add function to set pinned TLBs")
+Cc: stable@vger.kernel.org # v5.8+
+Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
+Depends-on: c12ab8dbc492 ("powerpc/8xx: Fix Oops with STRICT_KERNEL_RWX without DEBUG_RODATA_TEST")
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/a21e9a057fe2d247a535aff0d157a54eefee017a.1636963688.git.christophe.leroy@csgroup.eu
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/powerpc/kernel/head_8xx.S |   13 +++++++------
+ 1 file changed, 7 insertions(+), 6 deletions(-)
+
+--- a/arch/powerpc/kernel/head_8xx.S
++++ b/arch/powerpc/kernel/head_8xx.S
+@@ -733,6 +733,7 @@ _GLOBAL(mmu_pin_tlb)
+ #ifdef CONFIG_PIN_TLB_DATA
+       LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET)
+       LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG | _PMD_ACCESSED)
++      li      r8, 0
+ #ifdef CONFIG_PIN_TLB_IMMR
+       li      r0, 3
+ #else
+@@ -741,26 +742,26 @@ _GLOBAL(mmu_pin_tlb)
+       mtctr   r0
+       cmpwi   r4, 0
+       beq     4f
+-      LOAD_REG_IMMEDIATE(r8, 0xf0 | _PAGE_RO | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT)
+       LOAD_REG_ADDR(r9, _sinittext)
+ 
+ 2:    ori     r0, r6, MD_EVALID
++      ori     r12, r8, 0xf0 | _PAGE_RO | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT
+       mtspr   SPRN_MD_CTR, r5
+       mtspr   SPRN_MD_EPN, r0
+       mtspr   SPRN_MD_TWC, r7
+-      mtspr   SPRN_MD_RPN, r8
++      mtspr   SPRN_MD_RPN, r12
+       addi    r5, r5, 0x100
+       addis   r6, r6, SZ_8M@h
+       addis   r8, r8, SZ_8M@h
+       cmplw   r6, r9
+       bdnzt   lt, 2b
+-
+-4:    LOAD_REG_IMMEDIATE(r8, 0xf0 | _PAGE_DIRTY | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT)
++4:
+ 2:    ori     r0, r6, MD_EVALID
++      ori     r12, r8, 0xf0 | _PAGE_DIRTY | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT
+       mtspr   SPRN_MD_CTR, r5
+       mtspr   SPRN_MD_EPN, r0
+       mtspr   SPRN_MD_TWC, r7
+-      mtspr   SPRN_MD_RPN, r8
++      mtspr   SPRN_MD_RPN, r12
+       addi    r5, r5, 0x100
+       addis   r6, r6, SZ_8M@h
+       addis   r8, r8, SZ_8M@h
+@@ -781,7 +782,7 @@ _GLOBAL(mmu_pin_tlb)
+ #endif
+ #if defined(CONFIG_PIN_TLB_IMMR) || defined(CONFIG_PIN_TLB_DATA)
+       lis     r0, (MD_RSV4I | MD_TWAM)@h
+-      mtspr   SPRN_MI_CTR, r0
++      mtspr   SPRN_MD_CTR, r0
+ #endif
+       mtspr   SPRN_SRR1, r10
+       mtspr   SPRN_SRR0, r11
diff --git a/queue-5.15/powerpc-signal32-fix-sigset_t-copy.patch b/queue-5.15/powerpc-signal32-fix-sigset_t-copy.patch

new file mode 100644 (file)

index 0000000..e03bbef
--- /dev/null
+++ b/queue-5.15/powerpc-signal32-fix-sigset_t-copy.patch
@@ -0,0 +1,64 @@
+From 5499802b2284331788a440585869590f1bd63f7f Mon Sep 17 00:00:00 2001
+From: Christophe Leroy <christophe.leroy@csgroup.eu>
+Date: Mon, 15 Nov 2021 09:52:55 +0100
+Subject: powerpc/signal32: Fix sigset_t copy
+
+From: Christophe Leroy <christophe.leroy@csgroup.eu>
+
+commit 5499802b2284331788a440585869590f1bd63f7f upstream.
+
+The conversion from __copy_from_user() to __get_user() by
+commit d3ccc9781560 ("powerpc/signal: Use __get_user() to copy
+sigset_t") introduced a regression in __get_user_sigset() for
+powerpc/32. The bug was subsequently moved into
+unsafe_get_user_sigset().
+
+The bug is due to the copied 64 bit value being truncated to
+32 bits while being assigned to dst->sig[0]
+
+The regression was reported by users of the Xorg packages distributed in
+Debian/powerpc --
+
+    "The symptoms are that the fb screen goes blank, with the backlight
+    remaining on and no errors logged in /var/log; wdm (or startx) run
+    with no effect (I tried logging in in the blind, with no effect).
+    And they are hard to kill, requiring 'kill -KILL ...'"
+
+Fix the regression by copying each word of the sigset, not only the
+first one.
+
+__get_user_sigset() was tentatively optimised to copy 64 bits at once
+in order to minimise KUAP unlock/lock impact, but the unsafe variant
+doesn't suffer that, so it can just copy words.
+
+Fixes: 887f3ceb51cd ("powerpc/signal32: Convert do_setcontext[_tm]() to user access block")
+Cc: stable@vger.kernel.org # v5.13+
+Reported-by: Finn Thain <fthain@linux-m68k.org>
+Reported-and-tested-by: Stan Johnson <userm57@yahoo.com>
+Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/99ef38d61c0eb3f79c68942deb0c35995a93a777.1636966353.git.christophe.leroy@csgroup.eu
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/powerpc/kernel/signal.h |   10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+--- a/arch/powerpc/kernel/signal.h
++++ b/arch/powerpc/kernel/signal.h
+@@ -25,8 +25,14 @@ static inline int __get_user_sigset(sigs
+ 
+       return __get_user(dst->sig[0], (u64 __user *)&src->sig[0]);
+ }
+-#define unsafe_get_user_sigset(dst, src, label) \
+-      unsafe_get_user((dst)->sig[0], (u64 __user *)&(src)->sig[0], label)
++#define unsafe_get_user_sigset(dst, src, label) do {                  \
++      sigset_t *__dst = dst;                                          \
++      const sigset_t __user *__src = src;                             \
++      int i;                                                          \
++                                                                      \
++      for (i = 0; i < _NSIG_WORDS; i++)                               \
++              unsafe_get_user(__dst->sig[i], &__src->sig[i], label);  \
++} while (0)
+ 
+ #ifdef CONFIG_VSX
+ extern unsigned long copy_vsx_to_user(void __user *to,
diff --git a/queue-5.15/powerpc-xive-change-irq-domain-to-a-tree-domain.patch b/queue-5.15/powerpc-xive-change-irq-domain-to-a-tree-domain.patch

new file mode 100644 (file)

index 0000000..46161aa
--- /dev/null
+++ b/queue-5.15/powerpc-xive-change-irq-domain-to-a-tree-domain.patch
@@ -0,0 +1,63 @@
+From 8e80a73fa9a7747e3e8255cb149c543aabf65a24 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@kaod.org>
+Date: Tue, 16 Nov 2021 14:40:22 +0100
+Subject: powerpc/xive: Change IRQ domain to a tree domain
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Cédric Le Goater <clg@kaod.org>
+
+commit 8e80a73fa9a7747e3e8255cb149c543aabf65a24 upstream.
+
+Commit 4f86a06e2d6e ("irqdomain: Make normal and nomap irqdomains
+exclusive") introduced an IRQ_DOMAIN_FLAG_NO_MAP flag to isolate the
+'nomap' domains still in use under the powerpc arch. With this new
+flag, the revmap_tree of the IRQ domain is not used anymore. This
+change broke the support of shared LSIs [1] in the XIVE driver because
+it was relying on a lookup in the revmap_tree to query previously
+mapped interrupts. Linux now creates two distinct IRQ mappings on the
+same HW IRQ which can lead to unexpected behavior in the drivers.
+
+The XIVE IRQ domain is not a direct mapping domain and its HW IRQ
+interrupt number space is rather large : 1M/socket on POWER9 and
+POWER10, change the XIVE driver to use a 'tree' domain type instead.
+
+[1] For instance, a linux KVM guest with virtio-rng and virtio-balloon
+    devices.
+
+Fixes: 4f86a06e2d6e ("irqdomain: Make normal and nomap irqdomains exclusive")
+Cc: stable@vger.kernel.org # v5.14+
+Signed-off-by: Cédric Le Goater <clg@kaod.org>
+Tested-by: Greg Kurz <groug@kaod.org>
+Acked-by: Marc Zyngier <maz@kernel.org>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20211116134022.420412-1-clg@kaod.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/powerpc/sysdev/xive/Kconfig  |    1 -
+ arch/powerpc/sysdev/xive/common.c |    3 +--
+ 2 files changed, 1 insertion(+), 3 deletions(-)
+
+--- a/arch/powerpc/sysdev/xive/Kconfig
++++ b/arch/powerpc/sysdev/xive/Kconfig
+@@ -3,7 +3,6 @@ config PPC_XIVE
+       bool
+       select PPC_SMP_MUXED_IPI
+       select HARDIRQS_SW_RESEND
+-      select IRQ_DOMAIN_NOMAP
+ 
+ config PPC_XIVE_NATIVE
+       bool
+--- a/arch/powerpc/sysdev/xive/common.c
++++ b/arch/powerpc/sysdev/xive/common.c
+@@ -1443,8 +1443,7 @@ static const struct irq_domain_ops xive_
+ 
+ static void __init xive_init_host(struct device_node *np)
+ {
+-      xive_irq_domain = irq_domain_add_nomap(np, XIVE_MAX_IRQ,
+-                                             &xive_irq_domain_ops, NULL);
++      xive_irq_domain = irq_domain_add_tree(np, &xive_irq_domain_ops, NULL);
+       if (WARN_ON(xive_irq_domain == NULL))
+               return;
+       irq_set_default_host(xive_irq_domain);
diff --git a/queue-5.15/pstore-blk-use-lu-to-format-unsigned-long.patch b/queue-5.15/pstore-blk-use-lu-to-format-unsigned-long.patch

new file mode 100644 (file)

index 0000000..a38bc3d
--- /dev/null
+++ b/queue-5.15/pstore-blk-use-lu-to-format-unsigned-long.patch
@@ -0,0 +1,52 @@
+From 61eb495c83bf6ebde490992bf888ca15b9babc39 Mon Sep 17 00:00:00 2001
+From: Geert Uytterhoeven <geert@linux-m68k.org>
+Date: Thu, 18 Nov 2021 10:26:21 -0800
+Subject: pstore/blk: Use "%lu" to format unsigned long
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Geert Uytterhoeven <geert@linux-m68k.org>
+
+commit 61eb495c83bf6ebde490992bf888ca15b9babc39 upstream.
+
+On 32-bit:
+
+    fs/pstore/blk.c: In function ‘__best_effort_init’:
+    include/linux/kern_levels.h:5:18: warning: format ‘%zu’ expects argument of type ‘size_t’, but argument 3 has type ‘long unsigned int’ [-Wformat=]
+       5 | #define KERN_SOH "\001"  /* ASCII Start Of Header */
+         |                  ^~~~~~
+    include/linux/kern_levels.h:14:19: note: in expansion of macro ‘KERN_SOH’
+       14 | #define KERN_INFO KERN_SOH "6" /* informational */
+         |                   ^~~~~~~~
+    include/linux/printk.h:373:9: note: in expansion of macro ‘KERN_INFO’
+      373 |  printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
+         |         ^~~~~~~~~
+    fs/pstore/blk.c:314:3: note: in expansion of macro ‘pr_info’
+      314 |   pr_info("attached %s (%zu) (no dedicated panic_write!)\n",
+         |   ^~~~~~~
+
+Cc: stable@vger.kernel.org
+Fixes: 7bb9557b48fcabaa ("pstore/blk: Use the normal block device I/O path")
+Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Link: https://lore.kernel.org/r/20210629103700.1935012-1-geert@linux-m68k.org
+Cc: Jens Axboe <axboe@kernel.dk>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/pstore/blk.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/pstore/blk.c
++++ b/fs/pstore/blk.c
+@@ -311,7 +311,7 @@ static int __init __best_effort_init(voi
+       if (ret)
+               kfree(best_effort_dev);
+       else
+-              pr_info("attached %s (%zu) (no dedicated panic_write!)\n",
++              pr_info("attached %s (%lu) (no dedicated panic_write!)\n",
+                       blkdev, best_effort_dev->zone.total_size);
+ 
+       return ret;
diff --git a/queue-5.15/revert-drm-i915-tgl-dsi-gate-the-ddi-clocks-after-pll-mapping.patch b/queue-5.15/revert-drm-i915-tgl-dsi-gate-the-ddi-clocks-after-pll-mapping.patch

new file mode 100644 (file)

index 0000000..40973c4
--- /dev/null
+++ b/queue-5.15/revert-drm-i915-tgl-dsi-gate-the-ddi-clocks-after-pll-mapping.patch
@@ -0,0 +1,59 @@
+From f15863b27752682bb700c21de5f83f613a0fb77e Mon Sep 17 00:00:00 2001
+From: Vandita Kulkarni <vandita.kulkarni@intel.com>
+Date: Tue, 9 Nov 2021 17:34:28 +0530
+Subject: Revert "drm/i915/tgl/dsi: Gate the ddi clocks after pll mapping"
+
+From: Vandita Kulkarni <vandita.kulkarni@intel.com>
+
+commit f15863b27752682bb700c21de5f83f613a0fb77e upstream.
+
+This reverts commit 991d9557b0c4 ("drm/i915/tgl/dsi: Gate the ddi clocks
+after pll mapping"). The Bspec was updated recently with the pll ungate
+sequence similar to that of icl dsi enable sequence. Hence reverting.
+
+Bspec: 49187
+Fixes: 991d9557b0c4 ("drm/i915/tgl/dsi: Gate the ddi clocks after pll mapping")
+Cc: <stable@vger.kernel.org> # v5.4+
+Signed-off-by: Vandita Kulkarni <vandita.kulkarni@intel.com>
+Signed-off-by: Jani Nikula <jani.nikula@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20211109120428.15211-1-vandita.kulkarni@intel.com
+(cherry picked from commit 4579509ef181480f4e4510d436c691519167c5c2)
+Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/display/icl_dsi.c |   10 ++--------
+ 1 file changed, 2 insertions(+), 8 deletions(-)
+
+--- a/drivers/gpu/drm/i915/display/icl_dsi.c
++++ b/drivers/gpu/drm/i915/display/icl_dsi.c
+@@ -711,10 +711,7 @@ static void gen11_dsi_map_pll(struct int
+       intel_de_write(dev_priv, ICL_DPCLKA_CFGCR0, val);
+ 
+       for_each_dsi_phy(phy, intel_dsi->phys) {
+-              if (DISPLAY_VER(dev_priv) >= 12)
+-                      val |= ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy);
+-              else
+-                      val &= ~ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy);
++              val &= ~ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy);
+       }
+       intel_de_write(dev_priv, ICL_DPCLKA_CFGCR0, val);
+ 
+@@ -1150,8 +1147,6 @@ static void
+ gen11_dsi_enable_port_and_phy(struct intel_encoder *encoder,
+                             const struct intel_crtc_state *crtc_state)
+ {
+-      struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+-
+       /* step 4a: power up all lanes of the DDI used by DSI */
+       gen11_dsi_power_up_lanes(encoder);
+ 
+@@ -1177,8 +1172,7 @@ gen11_dsi_enable_port_and_phy(struct int
+       gen11_dsi_configure_transcoder(encoder, crtc_state);
+ 
+       /* Step 4l: Gate DDI clocks */
+-      if (DISPLAY_VER(dev_priv) == 11)
+-              gen11_dsi_gate_clocks(encoder);
++      gen11_dsi_gate_clocks(encoder);
+ }
+ 
+ static void gen11_dsi_powerup_panel(struct intel_encoder *encoder)
diff --git a/queue-5.15/revert-mark-pstore-blk-as-broken.patch b/queue-5.15/revert-mark-pstore-blk-as-broken.patch

new file mode 100644 (file)

index 0000000..63d9c22
--- /dev/null
+++ b/queue-5.15/revert-mark-pstore-blk-as-broken.patch
@@ -0,0 +1,36 @@
+From d1faacbf67b1944f0e0c618dc581d929263f6fe9 Mon Sep 17 00:00:00 2001
+From: Kees Cook <keescook@chromium.org>
+Date: Tue, 16 Nov 2021 10:15:59 -0800
+Subject: Revert "mark pstore-blk as broken"
+
+From: Kees Cook <keescook@chromium.org>
+
+commit d1faacbf67b1944f0e0c618dc581d929263f6fe9 upstream.
+
+This reverts commit d07f3b081ee632268786601f55e1334d1f68b997.
+
+pstore-blk was fixed to avoid the unwanted APIs in commit 7bb9557b48fc
+("pstore/blk: Use the normal block device I/O path"), which landed in
+the same release as the commit adding BROKEN.
+
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: Christoph Hellwig <hch@lst.de>
+Cc: stable@vger.kernel.org
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Link: https://lore.kernel.org/r/20211116181559.3975566-1-keescook@chromium.org
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/pstore/Kconfig |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/fs/pstore/Kconfig
++++ b/fs/pstore/Kconfig
+@@ -173,7 +173,6 @@ config PSTORE_BLK
+       tristate "Log panic/oops to a block device"
+       depends on PSTORE
+       depends on BLOCK
+-      depends on BROKEN
+       select PSTORE_ZONE
+       default n
+       help
diff --git a/queue-5.15/revert-parisc-reduce-sigreturn-trampoline-to-3-instructions.patch b/queue-5.15/revert-parisc-reduce-sigreturn-trampoline-to-3-instructions.patch

new file mode 100644 (file)

index 0000000..8f32b06
--- /dev/null
+++ b/queue-5.15/revert-parisc-reduce-sigreturn-trampoline-to-3-instructions.patch
@@ -0,0 +1,75 @@
+From 79df39d535c7a3770856fe9f5aba8c0ad1eebdb6 Mon Sep 17 00:00:00 2001
+From: Helge Deller <deller@gmx.de>
+Date: Wed, 17 Nov 2021 11:05:07 +0100
+Subject: Revert "parisc: Reduce sigreturn trampoline to 3 instructions"
+
+From: Helge Deller <deller@gmx.de>
+
+commit 79df39d535c7a3770856fe9f5aba8c0ad1eebdb6 upstream.
+
+This reverts commit e4f2006f1287e7ea17660490569cff323772dac4.
+
+This patch shows problems with signal handling. Revert it for now.
+
+Signed-off-by: Helge Deller <deller@gmx.de>
+Cc: <stable@vger.kernel.org> # v5.15
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/parisc/include/asm/rt_sigframe.h |    2 +-
+ arch/parisc/kernel/signal.c           |   13 +++++++------
+ arch/parisc/kernel/signal32.h         |    2 +-
+ 3 files changed, 9 insertions(+), 8 deletions(-)
+
+--- a/arch/parisc/include/asm/rt_sigframe.h
++++ b/arch/parisc/include/asm/rt_sigframe.h
+@@ -2,7 +2,7 @@
+ #ifndef _ASM_PARISC_RT_SIGFRAME_H
+ #define _ASM_PARISC_RT_SIGFRAME_H
+ 
+-#define SIGRETURN_TRAMP 3
++#define SIGRETURN_TRAMP 4
+ #define SIGRESTARTBLOCK_TRAMP 5 
+ #define TRAMP_SIZE (SIGRETURN_TRAMP + SIGRESTARTBLOCK_TRAMP)
+ 
+--- a/arch/parisc/kernel/signal.c
++++ b/arch/parisc/kernel/signal.c
+@@ -288,21 +288,22 @@ setup_rt_frame(struct ksignal *ksig, sig
+          already in userspace. The first words of tramp are used to
+          save the previous sigrestartblock trampoline that might be
+          on the stack. We start the sigreturn trampoline at 
+-         SIGRESTARTBLOCK_TRAMP. */
++         SIGRESTARTBLOCK_TRAMP+X. */
+       err |= __put_user(in_syscall ? INSN_LDI_R25_1 : INSN_LDI_R25_0,
+                       &frame->tramp[SIGRESTARTBLOCK_TRAMP+0]);
+-      err |= __put_user(INSN_BLE_SR2_R0, 
++      err |= __put_user(INSN_LDI_R20,
+                       &frame->tramp[SIGRESTARTBLOCK_TRAMP+1]);
+-      err |= __put_user(INSN_LDI_R20,
++      err |= __put_user(INSN_BLE_SR2_R0,
+                       &frame->tramp[SIGRESTARTBLOCK_TRAMP+2]);
++      err |= __put_user(INSN_NOP, &frame->tramp[SIGRESTARTBLOCK_TRAMP+3]);
+ 
+-      start = (unsigned long) &frame->tramp[SIGRESTARTBLOCK_TRAMP+0];
+-      end = (unsigned long) &frame->tramp[SIGRESTARTBLOCK_TRAMP+3];
++      start = (unsigned long) &frame->tramp[0];
++      end = (unsigned long) &frame->tramp[TRAMP_SIZE];
+       flush_user_dcache_range_asm(start, end);
+       flush_user_icache_range_asm(start, end);
+ 
+       /* TRAMP Words 0-4, Length 5 = SIGRESTARTBLOCK_TRAMP
+-       * TRAMP Words 5-7, Length 3 = SIGRETURN_TRAMP
++       * TRAMP Words 5-9, Length 4 = SIGRETURN_TRAMP
+        * So the SIGRETURN_TRAMP is at the end of SIGRESTARTBLOCK_TRAMP
+        */
+       rp = (unsigned long) &frame->tramp[SIGRESTARTBLOCK_TRAMP];
+--- a/arch/parisc/kernel/signal32.h
++++ b/arch/parisc/kernel/signal32.h
+@@ -36,7 +36,7 @@ struct compat_regfile {
+         compat_int_t rf_sar;
+ };
+ 
+-#define COMPAT_SIGRETURN_TRAMP 3
++#define COMPAT_SIGRETURN_TRAMP 4
+ #define COMPAT_SIGRESTARTBLOCK_TRAMP 5
+ #define COMPAT_TRAMP_SIZE (COMPAT_SIGRETURN_TRAMP + \
+                               COMPAT_SIGRESTARTBLOCK_TRAMP)
diff --git a/queue-5.15/s390-boot-simplify-and-fix-kernel-memory-layout-setup.patch b/queue-5.15/s390-boot-simplify-and-fix-kernel-memory-layout-setup.patch

new file mode 100644 (file)

index 0000000..8058bf5
--- /dev/null
+++ b/queue-5.15/s390-boot-simplify-and-fix-kernel-memory-layout-setup.patch
@@ -0,0 +1,165 @@
+From 9a39abb7c9aab50eec4ac4421e9ee7f3de013d24 Mon Sep 17 00:00:00 2001
+From: Vasily Gorbik <gor@linux.ibm.com>
+Date: Thu, 14 Oct 2021 13:53:54 +0200
+Subject: s390/boot: simplify and fix kernel memory layout setup
+
+From: Vasily Gorbik <gor@linux.ibm.com>
+
+commit 9a39abb7c9aab50eec4ac4421e9ee7f3de013d24 upstream.
+
+Initial KASAN shadow memory range was picked to preserve original kernel
+modules area position. With protected execution support, which might
+impose addressing limitation on vmalloc area and hence affect modules
+area position, current fixed KASAN shadow memory range is only making
+kernel memory layout setup more complex. So move it to the very end of
+available virtual space and simplify calculations.
+
+At the same time return to previous kernel address space split. In
+particular commit 0c4f2623b957 ("s390: setup kernel memory layout
+early") introduced precise identity map size calculation and keeping
+vmemmap left most starting from a fresh region table entry. This didn't
+take into account additional mapping region requirement for potential
+DCSS mapping above available physical memory. So go back to virtual
+space split between 1:1 mapping & vmemmap array once vmalloc area size
+is subtracted.
+
+Cc: stable@vger.kernel.org
+Fixes: 0c4f2623b957 ("s390: setup kernel memory layout early")
+Reported-by: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
+Reviewed-by: Alexander Gordeev <agordeev@linux.ibm.com>
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/Kconfig        |    2 -
+ arch/s390/boot/startup.c |   88 ++++++++++++++++-------------------------------
+ 2 files changed, 32 insertions(+), 58 deletions(-)
+
+--- a/arch/s390/Kconfig
++++ b/arch/s390/Kconfig
+@@ -47,7 +47,7 @@ config ARCH_SUPPORTS_UPROBES
+ config KASAN_SHADOW_OFFSET
+       hex
+       depends on KASAN
+-      default 0x18000000000000
++      default 0x1C000000000000
+ 
+ config S390
+       def_bool y
+--- a/arch/s390/boot/startup.c
++++ b/arch/s390/boot/startup.c
+@@ -148,82 +148,56 @@ static void setup_ident_map_size(unsigne
+ 
+ static void setup_kernel_memory_layout(void)
+ {
+-      bool vmalloc_size_verified = false;
+-      unsigned long vmemmap_off;
+-      unsigned long vspace_left;
++      unsigned long vmemmap_start;
+       unsigned long rte_size;
+       unsigned long pages;
+-      unsigned long vmax;
+ 
+       pages = ident_map_size / PAGE_SIZE;
+       /* vmemmap contains a multiple of PAGES_PER_SECTION struct pages */
+       vmemmap_size = SECTION_ALIGN_UP(pages) * sizeof(struct page);
+ 
+       /* choose kernel address space layout: 4 or 3 levels. */
+-      vmemmap_off = round_up(ident_map_size, _REGION3_SIZE);
++      vmemmap_start = round_up(ident_map_size, _REGION3_SIZE);
+       if (IS_ENABLED(CONFIG_KASAN) ||
+           vmalloc_size > _REGION2_SIZE ||
+-          vmemmap_off + vmemmap_size + vmalloc_size + MODULES_LEN > _REGION2_SIZE)
+-              vmax = _REGION1_SIZE;
+-      else
+-              vmax = _REGION2_SIZE;
+-
+-      /* keep vmemmap_off aligned to a top level region table entry */
+-      rte_size = vmax == _REGION1_SIZE ? _REGION2_SIZE : _REGION3_SIZE;
+-      MODULES_END = vmax;
+-      if (is_prot_virt_host()) {
+-              /*
+-               * forcing modules and vmalloc area under the ultravisor
+-               * secure storage limit, so that any vmalloc allocation
+-               * we do could be used to back secure guest storage.
+-               */
+-              adjust_to_uv_max(&MODULES_END);
+-      }
+-
+-#ifdef CONFIG_KASAN
+-      if (MODULES_END < vmax) {
+-              /* force vmalloc and modules below kasan shadow */
+-              MODULES_END = min(MODULES_END, KASAN_SHADOW_START);
++          vmemmap_start + vmemmap_size + vmalloc_size + MODULES_LEN >
++                  _REGION2_SIZE) {
++              MODULES_END = _REGION1_SIZE;
++              rte_size = _REGION2_SIZE;
+       } else {
+-              /*
+-               * leave vmalloc and modules above kasan shadow but make
+-               * sure they don't overlap with it
+-               */
+-              vmalloc_size = min(vmalloc_size, vmax - KASAN_SHADOW_END - MODULES_LEN);
+-              vmalloc_size_verified = true;
+-              vspace_left = KASAN_SHADOW_START;
++              MODULES_END = _REGION2_SIZE;
++              rte_size = _REGION3_SIZE;
+       }
++      /*
++       * forcing modules and vmalloc area under the ultravisor
++       * secure storage limit, so that any vmalloc allocation
++       * we do could be used to back secure guest storage.
++       */
++      adjust_to_uv_max(&MODULES_END);
++#ifdef CONFIG_KASAN
++      /* force vmalloc and modules below kasan shadow */
++      MODULES_END = min(MODULES_END, KASAN_SHADOW_START);
+ #endif
+       MODULES_VADDR = MODULES_END - MODULES_LEN;
+       VMALLOC_END = MODULES_VADDR;
+ 
+-      if (vmalloc_size_verified) {
+-              VMALLOC_START = VMALLOC_END - vmalloc_size;
+-      } else {
+-              vmemmap_off = round_up(ident_map_size, rte_size);
+-
+-              if (vmemmap_off + vmemmap_size > VMALLOC_END ||
+-                  vmalloc_size > VMALLOC_END - vmemmap_off - vmemmap_size) {
+-                      /*
+-                       * allow vmalloc area to occupy up to 1/2 of
+-                       * the rest virtual space left.
+-                       */
+-                      vmalloc_size = min(vmalloc_size, VMALLOC_END / 2);
+-              }
+-              VMALLOC_START = VMALLOC_END - vmalloc_size;
+-              vspace_left = VMALLOC_START;
+-      }
++      /* allow vmalloc area to occupy up to about 1/2 of the rest virtual space left */
++      vmalloc_size = min(vmalloc_size, round_down(VMALLOC_END / 2, _REGION3_SIZE));
++      VMALLOC_START = VMALLOC_END - vmalloc_size;
+ 
+-      pages = vspace_left / (PAGE_SIZE + sizeof(struct page));
++      /* split remaining virtual space between 1:1 mapping & vmemmap array */
++      pages = VMALLOC_START / (PAGE_SIZE + sizeof(struct page));
+       pages = SECTION_ALIGN_UP(pages);
+-      vmemmap_off = round_up(vspace_left - pages * sizeof(struct page), rte_size);
+-      /* keep vmemmap left most starting from a fresh region table entry */
+-      vmemmap_off = min(vmemmap_off, round_up(ident_map_size, rte_size));
+-      /* take care that identity map is lower then vmemmap */
+-      ident_map_size = min(ident_map_size, vmemmap_off);
++      /* keep vmemmap_start aligned to a top level region table entry */
++      vmemmap_start = round_down(VMALLOC_START - pages * sizeof(struct page), rte_size);
++      /* vmemmap_start is the future VMEM_MAX_PHYS, make sure it is within MAX_PHYSMEM */
++      vmemmap_start = min(vmemmap_start, 1UL << MAX_PHYSMEM_BITS);
++      /* make sure identity map doesn't overlay with vmemmap */
++      ident_map_size = min(ident_map_size, vmemmap_start);
+       vmemmap_size = SECTION_ALIGN_UP(ident_map_size / PAGE_SIZE) * sizeof(struct page);
+-      VMALLOC_START = max(vmemmap_off + vmemmap_size, VMALLOC_START);
+-      vmemmap = (struct page *)vmemmap_off;
++      /* make sure vmemmap doesn't overlay with vmalloc area */
++      VMALLOC_START = max(vmemmap_start + vmemmap_size, VMALLOC_START);
++      vmemmap = (struct page *)vmemmap_start;
+ }
+ 
+ /*
diff --git a/queue-5.15/s390-dump-fix-copying-to-user-space-of-swapped-kdump-oldmem.patch b/queue-5.15/s390-dump-fix-copying-to-user-space-of-swapped-kdump-oldmem.patch

new file mode 100644 (file)

index 0000000..fc173af
--- /dev/null
+++ b/queue-5.15/s390-dump-fix-copying-to-user-space-of-swapped-kdump-oldmem.patch
@@ -0,0 +1,41 @@
+From 3b90954419d4c05651de9cce6d7632bcf6977678 Mon Sep 17 00:00:00 2001
+From: Alexander Egorenkov <egorenar@linux.ibm.com>
+Date: Mon, 15 Nov 2021 07:40:25 +0100
+Subject: s390/dump: fix copying to user-space of swapped kdump oldmem
+
+From: Alexander Egorenkov <egorenar@linux.ibm.com>
+
+commit 3b90954419d4c05651de9cce6d7632bcf6977678 upstream.
+
+This commit fixes a bug introduced by commit e9e7870f90e3 ("s390/dump:
+introduce boot data 'oldmem_data'").
+OLDMEM_BASE was mistakenly replaced by oldmem_data.size instead of
+oldmem_data.start.
+
+This bug caused the following error during kdump:
+kdump.sh[878]: No program header covering vaddr 0x3434f5245found kexec bug?
+
+Fixes: e9e7870f90e3 ("s390/dump: introduce boot data 'oldmem_data'")
+Cc: stable@vger.kernel.org # 5.15+
+Signed-off-by: Alexander Egorenkov <egorenar@linux.ibm.com>
+Reviewed-by: Marc Hartmayer <mhartmay@linux.ibm.com>
+Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/kernel/crash_dump.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/s390/kernel/crash_dump.c
++++ b/arch/s390/kernel/crash_dump.c
+@@ -191,8 +191,8 @@ static int copy_oldmem_user(void __user
+                               return rc;
+               } else {
+                       /* Check for swapped kdump oldmem areas */
+-                      if (oldmem_data.start && from - oldmem_data.size < oldmem_data.size) {
+-                              from -= oldmem_data.size;
++                      if (oldmem_data.start && from - oldmem_data.start < oldmem_data.size) {
++                              from -= oldmem_data.start;
+                               len = min(count, oldmem_data.size - from);
+                       } else if (oldmem_data.start && from < oldmem_data.size) {
+                               len = min(count, oldmem_data.size - from);
diff --git a/queue-5.15/s390-kexec-fix-memory-leak-of-ipl-report-buffer.patch b/queue-5.15/s390-kexec-fix-memory-leak-of-ipl-report-buffer.patch

new file mode 100644 (file)

index 0000000..e14e44f
--- /dev/null
+++ b/queue-5.15/s390-kexec-fix-memory-leak-of-ipl-report-buffer.patch
@@ -0,0 +1,85 @@
+From 4aa9340584e37debef06fa99b56d064beb723891 Mon Sep 17 00:00:00 2001
+From: Baoquan He <bhe@redhat.com>
+Date: Tue, 16 Nov 2021 11:31:01 +0800
+Subject: s390/kexec: fix memory leak of ipl report buffer
+
+From: Baoquan He <bhe@redhat.com>
+
+commit 4aa9340584e37debef06fa99b56d064beb723891 upstream.
+
+unreferenced object 0x38000195000 (size 4096):
+  comm "kexec", pid 8548, jiffies 4294953647 (age 32443.270s)
+  hex dump (first 32 bytes):
+    00 00 00 c8 20 00 00 00 00 00 00 c0 02 80 00 00  .... ...........
+    40 40 40 40 40 40 40 40 00 00 00 00 00 00 00 00  @@@@@@@@........
+  backtrace:
+    [<0000000011a2f199>] __vmalloc_node_range+0xc0/0x140
+    [<0000000081fa2752>] vzalloc+0x5a/0x70
+    [<0000000063a4c92d>] ipl_report_finish+0x2c/0x180
+    [<00000000553304da>] kexec_file_add_ipl_report+0xf4/0x150
+    [<00000000862d033f>] kexec_file_add_components+0x124/0x160
+    [<000000000d2717bb>] arch_kexec_kernel_image_load+0x62/0x90
+    [<000000002e0373b6>] kimage_file_alloc_init+0x1aa/0x2e0
+    [<0000000060f2d14f>] __do_sys_kexec_file_load+0x17c/0x2c0
+    [<000000008c86fe5a>] __s390x_sys_kexec_file_load+0x40/0x50
+    [<000000001fdb9dac>] __do_syscall+0x1bc/0x1f0
+    [<000000003ee4258d>] system_call+0x78/0xa0
+
+Signed-off-by: Baoquan He <bhe@redhat.com>
+Reviewed-by: Philipp Rudo <prudo@redhat.com>
+Fixes: 99feaa717e55 ("s390/kexec_file: Create ipl report and pass to next kernel")
+Cc: <stable@vger.kernel.org> # v5.2: 20c76e242e70: s390/kexec: fix return code handling
+Cc: <stable@vger.kernel.org> # v5.2
+Link: https://lore.kernel.org/r/20211116033101.GD21646@MiWiFi-R3L-srv
+Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/include/asm/kexec.h         |    6 ++++++
+ arch/s390/kernel/machine_kexec_file.c |   10 ++++++++++
+ 2 files changed, 16 insertions(+)
+
+--- a/arch/s390/include/asm/kexec.h
++++ b/arch/s390/include/asm/kexec.h
+@@ -74,6 +74,12 @@ void *kexec_file_add_components(struct k
+ int arch_kexec_do_relocs(int r_type, void *loc, unsigned long val,
+                        unsigned long addr);
+ 
++#define ARCH_HAS_KIMAGE_ARCH
++
++struct kimage_arch {
++      void *ipl_buf;
++};
++
+ extern const struct kexec_file_ops s390_kexec_image_ops;
+ extern const struct kexec_file_ops s390_kexec_elf_ops;
+ 
+--- a/arch/s390/kernel/machine_kexec_file.c
++++ b/arch/s390/kernel/machine_kexec_file.c
+@@ -12,6 +12,7 @@
+ #include <linux/kexec.h>
+ #include <linux/module_signature.h>
+ #include <linux/verification.h>
++#include <linux/vmalloc.h>
+ #include <asm/boot_data.h>
+ #include <asm/ipl.h>
+ #include <asm/setup.h>
+@@ -206,6 +207,7 @@ static int kexec_file_add_ipl_report(str
+               goto out;
+       buf.bufsz = data->report->size;
+       buf.memsz = buf.bufsz;
++      image->arch.ipl_buf = buf.buffer;
+ 
+       data->memsz += buf.memsz;
+ 
+@@ -327,3 +329,11 @@ int arch_kexec_kernel_image_probe(struct
+ 
+       return kexec_image_probe_default(image, buf, buf_len);
+ }
++
++int arch_kimage_file_post_load_cleanup(struct kimage *image)
++{
++      vfree(image->arch.ipl_buf);
++      image->arch.ipl_buf = NULL;
++
++      return kexec_image_post_load_cleanup_default(image);
++}
diff --git a/queue-5.15/s390-setup-avoid-reserving-memory-above-identity-mapping.patch b/queue-5.15/s390-setup-avoid-reserving-memory-above-identity-mapping.patch

new file mode 100644 (file)

index 0000000..9ac06b9
--- /dev/null
+++ b/queue-5.15/s390-setup-avoid-reserving-memory-above-identity-mapping.patch
@@ -0,0 +1,59 @@
+From 420f48f636b98fd685f44a3acc4c0a7c0840910d Mon Sep 17 00:00:00 2001
+From: Vasily Gorbik <gor@linux.ibm.com>
+Date: Thu, 14 Oct 2021 13:33:45 +0200
+Subject: s390/setup: avoid reserving memory above identity mapping
+
+From: Vasily Gorbik <gor@linux.ibm.com>
+
+commit 420f48f636b98fd685f44a3acc4c0a7c0840910d upstream.
+
+Such reserved memory region, if not cleaned up later causes problems when
+memblock_free_all() is called to release free pages to the buddy allocator
+and those reserved regions are carried over to reserve_bootmem_region()
+which marks the pages as PageReserved.
+
+Instead use memblock_set_current_limit() to make sure memblock allocations
+do not go over identity mapping (which could happen when "mem=" option
+is used or during kdump).
+
+Cc: stable@vger.kernel.org
+Fixes: 73045a08cf55 ("s390: unify identity mapping limits handling")
+Reported-by: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/kernel/setup.c |   10 +---------
+ 1 file changed, 1 insertion(+), 9 deletions(-)
+
+--- a/arch/s390/kernel/setup.c
++++ b/arch/s390/kernel/setup.c
+@@ -634,14 +634,6 @@ static struct notifier_block kdump_mem_n
+ #endif
+ 
+ /*
+- * Make sure that the area above identity mapping is protected
+- */
+-static void __init reserve_above_ident_map(void)
+-{
+-      memblock_reserve(ident_map_size, ULONG_MAX);
+-}
+-
+-/*
+  * Reserve memory for kdump kernel to be loaded with kexec
+  */
+ static void __init reserve_crashkernel(void)
+@@ -1005,11 +997,11 @@ void __init setup_arch(char **cmdline_p)
+       setup_control_program_code();
+ 
+       /* Do some memory reservations *before* memory is added to memblock */
+-      reserve_above_ident_map();
+       reserve_kernel();
+       reserve_initrd();
+       reserve_certificate_list();
+       reserve_mem_detect_info();
++      memblock_set_current_limit(ident_map_size);
+       memblock_allow_resize();
+ 
+       /* Get information about *all* installed memory */
diff --git a/queue-5.15/s390-vdso-filter-out-mstack-guard-and-mstack-size.patch b/queue-5.15/s390-vdso-filter-out-mstack-guard-and-mstack-size.patch

new file mode 100644 (file)

index 0000000..28c1291
--- /dev/null
+++ b/queue-5.15/s390-vdso-filter-out-mstack-guard-and-mstack-size.patch
@@ -0,0 +1,62 @@
+From 00b55eaf45549ce26424224d069a091c7e5d8bac Mon Sep 17 00:00:00 2001
+From: Sven Schnelle <svens@linux.ibm.com>
+Date: Thu, 11 Nov 2021 10:58:26 +0100
+Subject: s390/vdso: filter out -mstack-guard and -mstack-size
+
+From: Sven Schnelle <svens@linux.ibm.com>
+
+commit 00b55eaf45549ce26424224d069a091c7e5d8bac upstream.
+
+When CONFIG_VMAP_STACK is disabled, the user can enable CONFIG_STACK_CHECK,
+which adds a stack overflow check to each C function in the kernel. This is
+also done for functions in the vdso page. These functions are run in user
+context and user stack sizes are usually different to what the kernel uses.
+This might trigger the stack check although the stack size is valid.
+Therefore filter the -mstack-guard and -mstack-size flags when compiling
+vdso C files.
+
+Cc: stable@kernel.org # 5.10+
+Fixes: 4bff8cb54502 ("s390: convert to GENERIC_VDSO")
+Reported-by: Janosch Frank <frankja@linux.ibm.com>
+Signed-off-by: Sven Schnelle <svens@linux.ibm.com>
+Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/Makefile               |   10 ++++++----
+ arch/s390/kernel/vdso64/Makefile |    5 +++--
+ 2 files changed, 9 insertions(+), 6 deletions(-)
+
+--- a/arch/s390/Makefile
++++ b/arch/s390/Makefile
+@@ -79,10 +79,12 @@ KBUILD_AFLAGS_DECOMPRESSOR += $(aflags-y
+ KBUILD_CFLAGS_DECOMPRESSOR += $(cflags-y)
+ 
+ ifneq ($(call cc-option,-mstack-size=8192 -mstack-guard=128),)
+-cflags-$(CONFIG_CHECK_STACK) += -mstack-size=$(STACK_SIZE)
+-ifeq ($(call cc-option,-mstack-size=8192),)
+-cflags-$(CONFIG_CHECK_STACK) += -mstack-guard=$(CONFIG_STACK_GUARD)
+-endif
++  CC_FLAGS_CHECK_STACK := -mstack-size=$(STACK_SIZE)
++  ifeq ($(call cc-option,-mstack-size=8192),)
++    CC_FLAGS_CHECK_STACK += -mstack-guard=$(CONFIG_STACK_GUARD)
++  endif
++  export CC_FLAGS_CHECK_STACK
++  cflags-$(CONFIG_CHECK_STACK) += $(CC_FLAGS_CHECK_STACK)
+ endif
+ 
+ ifdef CONFIG_EXPOLINE
+--- a/arch/s390/kernel/vdso64/Makefile
++++ b/arch/s390/kernel/vdso64/Makefile
+@@ -8,8 +8,9 @@ ARCH_REL_TYPE_ABS += R_390_GOT|R_390_PLT
+ include $(srctree)/lib/vdso/Makefile
+ obj-vdso64 = vdso_user_wrapper.o note.o
+ obj-cvdso64 = vdso64_generic.o getcpu.o
+-CFLAGS_REMOVE_getcpu.o = -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE)
+-CFLAGS_REMOVE_vdso64_generic.o = -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE)
++VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) $(CC_FLAGS_CHECK_STACK)
++CFLAGS_REMOVE_getcpu.o = $(VDSO_CFLAGS_REMOVE)
++CFLAGS_REMOVE_vdso64_generic.o = $(VDSO_CFLAGS_REMOVE)
+ 
+ # Build rules
+ 
diff --git a/queue-5.15/scsi-qla2xxx-fix-mailbox-direction-flags-in-qla2xxx_get_adapter_id.patch b/queue-5.15/scsi-qla2xxx-fix-mailbox-direction-flags-in-qla2xxx_get_adapter_id.patch

new file mode 100644 (file)

index 0000000..2804d0a
--- /dev/null
+++ b/queue-5.15/scsi-qla2xxx-fix-mailbox-direction-flags-in-qla2xxx_get_adapter_id.patch
@@ -0,0 +1,39 @@
+From 392006871bb26166bcfafa56faf49431c2cfaaa8 Mon Sep 17 00:00:00 2001
+From: "Ewan D. Milne" <emilne@redhat.com>
+Date: Mon, 8 Nov 2021 13:30:12 -0500
+Subject: scsi: qla2xxx: Fix mailbox direction flags in qla2xxx_get_adapter_id()
+
+From: Ewan D. Milne <emilne@redhat.com>
+
+commit 392006871bb26166bcfafa56faf49431c2cfaaa8 upstream.
+
+The SCM changes set the flags in mcp->out_mb instead of mcp->in_mb so the
+data was not actually being read into the mcp->mb[] array from the adapter.
+
+Link: https://lore.kernel.org/r/20211108183012.13895-1-emilne@redhat.com
+Fixes: 9f2475fe7406 ("scsi: qla2xxx: SAN congestion management implementation")
+Cc: stable@vger.kernel.org
+Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
+Reviewed-by: Arun Easi <aeasi@marvell.com>
+Signed-off-by: Ewan D. Milne <emilne@redhat.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/scsi/qla2xxx/qla_mbx.c |    6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+--- a/drivers/scsi/qla2xxx/qla_mbx.c
++++ b/drivers/scsi/qla2xxx/qla_mbx.c
+@@ -1695,10 +1695,8 @@ qla2x00_get_adapter_id(scsi_qla_host_t *
+               mcp->in_mb |= MBX_13|MBX_12|MBX_11|MBX_10;
+       if (IS_FWI2_CAPABLE(vha->hw))
+               mcp->in_mb |= MBX_19|MBX_18|MBX_17|MBX_16;
+-      if (IS_QLA27XX(vha->hw) || IS_QLA28XX(vha->hw)) {
+-              mcp->in_mb |= MBX_15;
+-              mcp->out_mb |= MBX_7|MBX_21|MBX_22|MBX_23;
+-      }
++      if (IS_QLA27XX(vha->hw) || IS_QLA28XX(vha->hw))
++              mcp->in_mb |= MBX_15|MBX_21|MBX_22|MBX_23;
+ 
+       mcp->tov = MBX_TOV_SECONDS;
+       mcp->flags = 0;
diff --git a/queue-5.15/series b/queue-5.15/series

index 1d51243d00ceb1e24d28e1c215c336cd01487b62..d080d9d1978edec38d3249aedbaa87ece2a229af 100644 (file)
--- a/queue-5.15/series
+++ b/queue-5.15/series
@@ -188,3 +188,38 @@ s390-kexec-fix-return-code-handling.patch
  blk-cgroup-fix-missing-put-device-in-error-path-from.patch
  dmaengine-remove-debugfs-ifdef.patch
  tun-fix-bonding-active-backup-with-arp-monitoring.patch
+revert-mark-pstore-blk-as-broken.patch
+pstore-blk-use-lu-to-format-unsigned-long.patch
+hexagon-export-raw-i-o-routines-for-modules.patch
+hexagon-clean-up-timer-regs.h.patch
+tipc-check-for-null-after-calling-kmemdup.patch
+ipc-warn-if-trying-to-remove-ipc-object-which-is-absent.patch
+shm-extend-forced-shm-destroy-to-support-objects-from-several-ipc-nses.patch
+mm-kmemleak-slob-respect-slab_noleaktrace-flag.patch
+hugetlb-userfaultfd-fix-reservation-restore-on-userfaultfd-error.patch
+kmap_local-don-t-assume-kmap-ptes-are-linear-arrays-in-memory.patch
+mm-damon-dbgfs-use-__gfp_nowarn-for-user-specified-size-buffer-allocation.patch
+mm-damon-dbgfs-fix-missed-use-of-damon_dbgfs_lock.patch
+x86-boot-pull-up-cmdline-preparation-and-early-param-parsing.patch
+x86-sgx-fix-free-page-accounting.patch
+x86-hyperv-fix-null-deref-in-set_hv_tscchange_cb-if-hyper-v-setup-fails.patch
+kvm-x86-assume-a-64-bit-hypercall-for-guests-with-protected-state.patch
+kvm-x86-fix-uninitialized-eoi_exit_bitmap-usage-in-vcpu_load_eoi_exitmap.patch
+kvm-x86-mmu-include-efer.lma-in-extended-mmu-role.patch
+kvm-x86-xen-fix-get_attr-of-kvm_xen_attr_type_shared_info.patch
+powerpc-signal32-fix-sigset_t-copy.patch
+powerpc-xive-change-irq-domain-to-a-tree-domain.patch
+powerpc-8xx-fix-pinned-tlbs-with-config_strict_kernel_rwx.patch
+revert-drm-i915-tgl-dsi-gate-the-ddi-clocks-after-pll-mapping.patch
+revert-parisc-reduce-sigreturn-trampoline-to-3-instructions.patch
+ata-libata-improve-ata_read_log_page-error-message.patch
+ata-libata-add-missing-ata_identify_page_supported-calls.patch
+scsi-qla2xxx-fix-mailbox-direction-flags-in-qla2xxx_get_adapter_id.patch
+pinctrl-ralink-include-ralink_regs.h-in-pinctrl-mt7620.c.patch
+s390-setup-avoid-reserving-memory-above-identity-mapping.patch
+s390-boot-simplify-and-fix-kernel-memory-layout-setup.patch
+s390-vdso-filter-out-mstack-guard-and-mstack-size.patch
+s390-kexec-fix-memory-leak-of-ipl-report-buffer.patch
+s390-dump-fix-copying-to-user-space-of-swapped-kdump-oldmem.patch
+block-check-admin-before-nice-for-ioprio_class_rt.patch
+fbdev-prevent-probing-generic-drivers-if-a-fb-is-already-registered.patch
diff --git a/queue-5.15/shm-extend-forced-shm-destroy-to-support-objects-from-several-ipc-nses.patch b/queue-5.15/shm-extend-forced-shm-destroy-to-support-objects-from-several-ipc-nses.patch

new file mode 100644 (file)

index 0000000..c0625ea
--- /dev/null
+++ b/queue-5.15/shm-extend-forced-shm-destroy-to-support-objects-from-several-ipc-nses.patch
@@ -0,0 +1,386 @@
+From 85b6d24646e4125c591639841169baa98a2da503 Mon Sep 17 00:00:00 2001
+From: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
+Date: Fri, 19 Nov 2021 16:43:21 -0800
+Subject: shm: extend forced shm destroy to support objects from several IPC nses
+
+From: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
+
+commit 85b6d24646e4125c591639841169baa98a2da503 upstream.
+
+Currently, the exit_shm() function not designed to work properly when
+task->sysvshm.shm_clist holds shm objects from different IPC namespaces.
+
+This is a real pain when sysctl kernel.shm_rmid_forced = 1, because it
+leads to use-after-free (reproducer exists).
+
+This is an attempt to fix the problem by extending exit_shm mechanism to
+handle shm's destroy from several IPC ns'es.
+
+To achieve that we do several things:
+
+1. add a namespace (non-refcounted) pointer to the struct shmid_kernel
+
+2. during new shm object creation (newseg()/shmget syscall) we
+   initialize this pointer by current task IPC ns
+
+3. exit_shm() fully reworked such that it traverses over all shp's in
+   task->sysvshm.shm_clist and gets IPC namespace not from current task
+   as it was before but from shp's object itself, then call
+   shm_destroy(shp, ns).
+
+Note: We need to be really careful here, because as it was said before
+(1), our pointer to IPC ns non-refcnt'ed.  To be on the safe side we
+using special helper get_ipc_ns_not_zero() which allows to get IPC ns
+refcounter only if IPC ns not in the "state of destruction".
+
+Q/A
+
+Q: Why can we access shp->ns memory using non-refcounted pointer?
+A: Because shp object lifetime is always shorther than IPC namespace
+   lifetime, so, if we get shp object from the task->sysvshm.shm_clist
+   while holding task_lock(task) nobody can steal our namespace.
+
+Q: Does this patch change semantics of unshare/setns/clone syscalls?
+A: No. It's just fixes non-covered case when process may leave IPC
+   namespace without getting task->sysvshm.shm_clist list cleaned up.
+
+Link: https://lkml.kernel.org/r/67bb03e5-f79c-1815-e2bf-949c67047418@colorfullife.com
+Link: https://lkml.kernel.org/r/20211109151501.4921-1-manfred@colorfullife.com
+Fixes: ab602f79915 ("shm: make exit_shm work proportional to task activity")
+Co-developed-by: Manfred Spraul <manfred@colorfullife.com>
+Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
+Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
+Cc: "Eric W. Biederman" <ebiederm@xmission.com>
+Cc: Davidlohr Bueso <dave@stgolabs.net>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: Andrei Vagin <avagin@gmail.com>
+Cc: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
+Cc: Vasily Averin <vvs@virtuozzo.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/ipc_namespace.h |   15 +++
+ include/linux/sched/task.h    |    2 
+ ipc/shm.c                     |  189 +++++++++++++++++++++++++++++++-----------
+ 3 files changed, 159 insertions(+), 47 deletions(-)
+
+--- a/include/linux/ipc_namespace.h
++++ b/include/linux/ipc_namespace.h
+@@ -131,6 +131,16 @@ static inline struct ipc_namespace *get_
+       return ns;
+ }
+ 
++static inline struct ipc_namespace *get_ipc_ns_not_zero(struct ipc_namespace *ns)
++{
++      if (ns) {
++              if (refcount_inc_not_zero(&ns->ns.count))
++                      return ns;
++      }
++
++      return NULL;
++}
++
+ extern void put_ipc_ns(struct ipc_namespace *ns);
+ #else
+ static inline struct ipc_namespace *copy_ipcs(unsigned long flags,
+@@ -146,6 +156,11 @@ static inline struct ipc_namespace *get_
+ {
+       return ns;
+ }
++
++static inline struct ipc_namespace *get_ipc_ns_not_zero(struct ipc_namespace *ns)
++{
++      return ns;
++}
+ 
+ static inline void put_ipc_ns(struct ipc_namespace *ns)
+ {
+--- a/include/linux/sched/task.h
++++ b/include/linux/sched/task.h
+@@ -158,7 +158,7 @@ static inline struct vm_struct *task_sta
+  * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring
+  * subscriptions and synchronises with wait4().  Also used in procfs.  Also
+  * pins the final release of task.io_context.  Also protects ->cpuset and
+- * ->cgroup.subsys[]. And ->vfork_done.
++ * ->cgroup.subsys[]. And ->vfork_done. And ->sysvshm.shm_clist.
+  *
+  * Nests both inside and outside of read_lock(&tasklist_lock).
+  * It must not be nested with write_lock_irq(&tasklist_lock),
+--- a/ipc/shm.c
++++ b/ipc/shm.c
+@@ -62,9 +62,18 @@ struct shmid_kernel /* private to the ke
+       struct pid              *shm_lprid;
+       struct ucounts          *mlock_ucounts;
+ 
+-      /* The task created the shm object.  NULL if the task is dead. */
++      /*
++       * The task created the shm object, for
++       * task_lock(shp->shm_creator)
++       */
+       struct task_struct      *shm_creator;
+-      struct list_head        shm_clist;      /* list by creator */
++
++      /*
++       * List by creator. task_lock(->shm_creator) required for read/write.
++       * If list_empty(), then the creator is dead already.
++       */
++      struct list_head        shm_clist;
++      struct ipc_namespace    *ns;
+ } __randomize_layout;
+ 
+ /* shm_mode upper byte flags */
+@@ -115,6 +124,7 @@ static void do_shm_rmid(struct ipc_names
+       struct shmid_kernel *shp;
+ 
+       shp = container_of(ipcp, struct shmid_kernel, shm_perm);
++      WARN_ON(ns != shp->ns);
+ 
+       if (shp->shm_nattch) {
+               shp->shm_perm.mode |= SHM_DEST;
+@@ -225,10 +235,43 @@ static void shm_rcu_free(struct rcu_head
+       kfree(shp);
+ }
+ 
+-static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
++/*
++ * It has to be called with shp locked.
++ * It must be called before ipc_rmid()
++ */
++static inline void shm_clist_rm(struct shmid_kernel *shp)
++{
++      struct task_struct *creator;
++
++      /* ensure that shm_creator does not disappear */
++      rcu_read_lock();
++
++      /*
++       * A concurrent exit_shm may do a list_del_init() as well.
++       * Just do nothing if exit_shm already did the work
++       */
++      if (!list_empty(&shp->shm_clist)) {
++              /*
++               * shp->shm_creator is guaranteed to be valid *only*
++               * if shp->shm_clist is not empty.
++               */
++              creator = shp->shm_creator;
++
++              task_lock(creator);
++              /*
++               * list_del_init() is a nop if the entry was already removed
++               * from the list.
++               */
++              list_del_init(&shp->shm_clist);
++              task_unlock(creator);
++      }
++      rcu_read_unlock();
++}
++
++static inline void shm_rmid(struct shmid_kernel *s)
+ {
+-      list_del(&s->shm_clist);
+-      ipc_rmid(&shm_ids(ns), &s->shm_perm);
++      shm_clist_rm(s);
++      ipc_rmid(&shm_ids(s->ns), &s->shm_perm);
+ }
+ 
+ 
+@@ -283,7 +326,7 @@ static void shm_destroy(struct ipc_names
+       shm_file = shp->shm_file;
+       shp->shm_file = NULL;
+       ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
+-      shm_rmid(ns, shp);
++      shm_rmid(shp);
+       shm_unlock(shp);
+       if (!is_file_hugepages(shm_file))
+               shmem_lock(shm_file, 0, shp->mlock_ucounts);
+@@ -306,10 +349,10 @@ static void shm_destroy(struct ipc_names
+  *
+  * 2) sysctl kernel.shm_rmid_forced is set to 1.
+  */
+-static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
++static bool shm_may_destroy(struct shmid_kernel *shp)
+ {
+       return (shp->shm_nattch == 0) &&
+-             (ns->shm_rmid_forced ||
++             (shp->ns->shm_rmid_forced ||
+               (shp->shm_perm.mode & SHM_DEST));
+ }
+ 
+@@ -340,7 +383,7 @@ static void shm_close(struct vm_area_str
+       ipc_update_pid(&shp->shm_lprid, task_tgid(current));
+       shp->shm_dtim = ktime_get_real_seconds();
+       shp->shm_nattch--;
+-      if (shm_may_destroy(ns, shp))
++      if (shm_may_destroy(shp))
+               shm_destroy(ns, shp);
+       else
+               shm_unlock(shp);
+@@ -361,10 +404,10 @@ static int shm_try_destroy_orphaned(int
+        *
+        * As shp->* are changed under rwsem, it's safe to skip shp locking.
+        */
+-      if (shp->shm_creator != NULL)
++      if (!list_empty(&shp->shm_clist))
+               return 0;
+ 
+-      if (shm_may_destroy(ns, shp)) {
++      if (shm_may_destroy(shp)) {
+               shm_lock_by_ptr(shp);
+               shm_destroy(ns, shp);
+       }
+@@ -382,48 +425,97 @@ void shm_destroy_orphaned(struct ipc_nam
+ /* Locking assumes this will only be called with task == current */
+ void exit_shm(struct task_struct *task)
+ {
+-      struct ipc_namespace *ns = task->nsproxy->ipc_ns;
+-      struct shmid_kernel *shp, *n;
++      for (;;) {
++              struct shmid_kernel *shp;
++              struct ipc_namespace *ns;
+ 
+-      if (list_empty(&task->sysvshm.shm_clist))
+-              return;
++              task_lock(task);
++
++              if (list_empty(&task->sysvshm.shm_clist)) {
++                      task_unlock(task);
++                      break;
++              }
++
++              shp = list_first_entry(&task->sysvshm.shm_clist, struct shmid_kernel,
++                              shm_clist);
+ 
+-      /*
+-       * If kernel.shm_rmid_forced is not set then only keep track of
+-       * which shmids are orphaned, so that a later set of the sysctl
+-       * can clean them up.
+-       */
+-      if (!ns->shm_rmid_forced) {
+-              down_read(&shm_ids(ns).rwsem);
+-              list_for_each_entry(shp, &task->sysvshm.shm_clist, shm_clist)
+-                      shp->shm_creator = NULL;
+               /*
+-               * Only under read lock but we are only called on current
+-               * so no entry on the list will be shared.
++               * 1) Get pointer to the ipc namespace. It is worth to say
++               * that this pointer is guaranteed to be valid because
++               * shp lifetime is always shorter than namespace lifetime
++               * in which shp lives.
++               * We taken task_lock it means that shp won't be freed.
+                */
+-              list_del(&task->sysvshm.shm_clist);
+-              up_read(&shm_ids(ns).rwsem);
+-              return;
+-      }
++              ns = shp->ns;
+ 
+-      /*
+-       * Destroy all already created segments, that were not yet mapped,
+-       * and mark any mapped as orphan to cover the sysctl toggling.
+-       * Destroy is skipped if shm_may_destroy() returns false.
+-       */
+-      down_write(&shm_ids(ns).rwsem);
+-      list_for_each_entry_safe(shp, n, &task->sysvshm.shm_clist, shm_clist) {
+-              shp->shm_creator = NULL;
++              /*
++               * 2) If kernel.shm_rmid_forced is not set then only keep track of
++               * which shmids are orphaned, so that a later set of the sysctl
++               * can clean them up.
++               */
++              if (!ns->shm_rmid_forced)
++                      goto unlink_continue;
+ 
+-              if (shm_may_destroy(ns, shp)) {
+-                      shm_lock_by_ptr(shp);
+-                      shm_destroy(ns, shp);
++              /*
++               * 3) get a reference to the namespace.
++               *    The refcount could be already 0. If it is 0, then
++               *    the shm objects will be free by free_ipc_work().
++               */
++              ns = get_ipc_ns_not_zero(ns);
++              if (!ns) {
++unlink_continue:
++                      list_del_init(&shp->shm_clist);
++                      task_unlock(task);
++                      continue;
+               }
+-      }
+ 
+-      /* Remove the list head from any segments still attached. */
+-      list_del(&task->sysvshm.shm_clist);
+-      up_write(&shm_ids(ns).rwsem);
++              /*
++               * 4) get a reference to shp.
++               *   This cannot fail: shm_clist_rm() is called before
++               *   ipc_rmid(), thus the refcount cannot be 0.
++               */
++              WARN_ON(!ipc_rcu_getref(&shp->shm_perm));
++
++              /*
++               * 5) unlink the shm segment from the list of segments
++               *    created by current.
++               *    This must be done last. After unlinking,
++               *    only the refcounts obtained above prevent IPC_RMID
++               *    from destroying the segment or the namespace.
++               */
++              list_del_init(&shp->shm_clist);
++
++              task_unlock(task);
++
++              /*
++               * 6) we have all references
++               *    Thus lock & if needed destroy shp.
++               */
++              down_write(&shm_ids(ns).rwsem);
++              shm_lock_by_ptr(shp);
++              /*
++               * rcu_read_lock was implicitly taken in shm_lock_by_ptr, it's
++               * safe to call ipc_rcu_putref here
++               */
++              ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
++
++              if (ipc_valid_object(&shp->shm_perm)) {
++                      if (shm_may_destroy(shp))
++                              shm_destroy(ns, shp);
++                      else
++                              shm_unlock(shp);
++              } else {
++                      /*
++                       * Someone else deleted the shp from namespace
++                       * idr/kht while we have waited.
++                       * Just unlock and continue.
++                       */
++                      shm_unlock(shp);
++              }
++
++              up_write(&shm_ids(ns).rwsem);
++              put_ipc_ns(ns); /* paired with get_ipc_ns_not_zero */
++      }
+ }
+ 
+ static vm_fault_t shm_fault(struct vm_fault *vmf)
+@@ -680,7 +772,11 @@ static int newseg(struct ipc_namespace *
+       if (error < 0)
+               goto no_id;
+ 
++      shp->ns = ns;
++
++      task_lock(current);
+       list_add(&shp->shm_clist, &current->sysvshm.shm_clist);
++      task_unlock(current);
+ 
+       /*
+        * shmid gets reported as "inode#" in /proc/pid/maps.
+@@ -1573,7 +1669,8 @@ out_nattch:
+       down_write(&shm_ids(ns).rwsem);
+       shp = shm_lock(ns, shmid);
+       shp->shm_nattch--;
+-      if (shm_may_destroy(ns, shp))
++
++      if (shm_may_destroy(shp))
+               shm_destroy(ns, shp);
+       else
+               shm_unlock(shp);
diff --git a/queue-5.15/tipc-check-for-null-after-calling-kmemdup.patch b/queue-5.15/tipc-check-for-null-after-calling-kmemdup.patch

new file mode 100644 (file)

index 0000000..5d04eec
--- /dev/null
+++ b/queue-5.15/tipc-check-for-null-after-calling-kmemdup.patch
@@ -0,0 +1,42 @@
+From 3e6db079751afd527bf3db32314ae938dc571916 Mon Sep 17 00:00:00 2001
+From: Tadeusz Struk <tadeusz.struk@linaro.org>
+Date: Mon, 15 Nov 2021 08:01:43 -0800
+Subject: tipc: check for null after calling kmemdup
+
+From: Tadeusz Struk <tadeusz.struk@linaro.org>
+
+commit 3e6db079751afd527bf3db32314ae938dc571916 upstream.
+
+kmemdup can return a null pointer so need to check for it, otherwise
+the null key will be dereferenced later in tipc_crypto_key_xmit as
+can be seen in the trace [1].
+
+Cc: tipc-discussion@lists.sourceforge.net
+Cc: stable@vger.kernel.org # 5.15, 5.14, 5.10
+
+[1] https://syzkaller.appspot.com/bug?id=bca180abb29567b189efdbdb34cbf7ba851c2a58
+
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Signed-off-by: Tadeusz Struk <tadeusz.struk@linaro.org>
+Acked-by: Ying Xue <ying.xue@windriver.com>
+Acked-by: Jon Maloy <jmaloy@redhat.com>
+Link: https://lore.kernel.org/r/20211115160143.5099-1-tadeusz.struk@linaro.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/tipc/crypto.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/net/tipc/crypto.c
++++ b/net/tipc/crypto.c
+@@ -597,6 +597,10 @@ static int tipc_aead_init(struct tipc_ae
+       tmp->cloned = NULL;
+       tmp->authsize = TIPC_AES_GCM_TAG_SIZE;
+       tmp->key = kmemdup(ukey, tipc_aead_key_size(ukey), GFP_KERNEL);
++      if (!tmp->key) {
++              tipc_aead_free(&tmp->rcu);
++              return -ENOMEM;
++      }
+       memcpy(&tmp->salt, ukey->key + keylen, TIPC_AES_GCM_SALT_SIZE);
+       atomic_set(&tmp->users, 0);
+       atomic64_set(&tmp->seqno, 0);
diff --git a/queue-5.15/x86-boot-pull-up-cmdline-preparation-and-early-param-parsing.patch b/queue-5.15/x86-boot-pull-up-cmdline-preparation-and-early-param-parsing.patch

new file mode 100644 (file)

index 0000000..cbeff6d
--- /dev/null
+++ b/queue-5.15/x86-boot-pull-up-cmdline-preparation-and-early-param-parsing.patch
@@ -0,0 +1,137 @@
+From 8d48bf8206f77aa8687f0e241e901e5197e52423 Mon Sep 17 00:00:00 2001
+From: Borislav Petkov <bp@suse.de>
+Date: Fri, 5 Nov 2021 10:41:51 +0100
+Subject: x86/boot: Pull up cmdline preparation and early param parsing
+
+From: Borislav Petkov <bp@suse.de>
+
+commit 8d48bf8206f77aa8687f0e241e901e5197e52423 upstream.
+
+Dan reports that Anjaneya Chagam can no longer use the efi=nosoftreserve
+kernel command line parameter to suppress "soft reservation" behavior.
+
+This is due to the fact that the following call-chain happens at boot:
+
+early_reserve_memory
+|-> efi_memblock_x86_reserve_range
+    |-> efi_fake_memmap_early
+
+which does
+
+        if (!efi_soft_reserve_enabled())
+                return;
+
+and that would have set EFI_MEM_NO_SOFT_RESERVE after having parsed
+"nosoftreserve".
+
+However, parse_early_param() gets called *after* it, leading to the boot
+cmdline not being taken into account.
+
+Therefore, carve out the command line preparation into a separate
+function which does the early param parsing too. So that it all goes
+together.
+
+And then call that function before early_reserve_memory() so that the
+params would have been parsed by then.
+
+Fixes: 8aa83e6395ce ("x86/setup: Call early_reserve_memory() earlier")
+Reported-by: Dan Williams <dan.j.williams@intel.com>
+Reviewed-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Tested-by: Anjaneya Chagam <anjaneya.chagam@intel.com>
+Cc: <stable@vger.kernel.org>
+Link: https://lore.kernel.org/r/e8dd8993c38702ee6dd73b3c11f158617e665607.camel@intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/setup.c |   66 ++++++++++++++++++++++++++++--------------------
+ 1 file changed, 39 insertions(+), 27 deletions(-)
+
+--- a/arch/x86/kernel/setup.c
++++ b/arch/x86/kernel/setup.c
+@@ -742,6 +742,28 @@ dump_kernel_offset(struct notifier_block
+       return 0;
+ }
+ 
++static char *prepare_command_line(void)
++{
++#ifdef CONFIG_CMDLINE_BOOL
++#ifdef CONFIG_CMDLINE_OVERRIDE
++      strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
++#else
++      if (builtin_cmdline[0]) {
++              /* append boot loader cmdline to builtin */
++              strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE);
++              strlcat(builtin_cmdline, boot_command_line, COMMAND_LINE_SIZE);
++              strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
++      }
++#endif
++#endif
++
++      strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
++
++      parse_early_param();
++
++      return command_line;
++}
++
+ /*
+  * Determine if we were loaded by an EFI loader.  If so, then we have also been
+  * passed the efi memmap, systab, etc., so we should use these data structures
+@@ -831,6 +853,23 @@ void __init setup_arch(char **cmdline_p)
+       x86_init.oem.arch_setup();
+ 
+       /*
++       * x86_configure_nx() is called before parse_early_param() (called by
++       * prepare_command_line()) to detect whether hardware doesn't support
++       * NX (so that the early EHCI debug console setup can safely call
++       * set_fixmap()). It may then be called again from within noexec_setup()
++       * during parsing early parameters to honor the respective command line
++       * option.
++       */
++      x86_configure_nx();
++
++      /*
++       * This parses early params and it needs to run before
++       * early_reserve_memory() because latter relies on such settings
++       * supplied as early params.
++       */
++      *cmdline_p = prepare_command_line();
++
++      /*
+        * Do some memory reservations *before* memory is added to memblock, so
+        * memblock allocations won't overwrite it.
+        *
+@@ -863,33 +902,6 @@ void __init setup_arch(char **cmdline_p)
+       bss_resource.start = __pa_symbol(__bss_start);
+       bss_resource.end = __pa_symbol(__bss_stop)-1;
+ 
+-#ifdef CONFIG_CMDLINE_BOOL
+-#ifdef CONFIG_CMDLINE_OVERRIDE
+-      strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
+-#else
+-      if (builtin_cmdline[0]) {
+-              /* append boot loader cmdline to builtin */
+-              strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE);
+-              strlcat(builtin_cmdline, boot_command_line, COMMAND_LINE_SIZE);
+-              strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
+-      }
+-#endif
+-#endif
+-
+-      strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
+-      *cmdline_p = command_line;
+-
+-      /*
+-       * x86_configure_nx() is called before parse_early_param() to detect
+-       * whether hardware doesn't support NX (so that the early EHCI debug
+-       * console setup can safely call set_fixmap()). It may then be called
+-       * again from within noexec_setup() during parsing early parameters
+-       * to honor the respective command line option.
+-       */
+-      x86_configure_nx();
+-
+-      parse_early_param();
+-
+ #ifdef CONFIG_MEMORY_HOTPLUG
+       /*
+        * Memory used by the kernel cannot be hot-removed because Linux
diff --git a/queue-5.15/x86-hyperv-fix-null-deref-in-set_hv_tscchange_cb-if-hyper-v-setup-fails.patch b/queue-5.15/x86-hyperv-fix-null-deref-in-set_hv_tscchange_cb-if-hyper-v-setup-fails.patch

new file mode 100644 (file)

index 0000000..c7bfdd1
--- /dev/null
+++ b/queue-5.15/x86-hyperv-fix-null-deref-in-set_hv_tscchange_cb-if-hyper-v-setup-fails.patch
@@ -0,0 +1,57 @@
+From daf972118c517b91f74ff1731417feb4270625a4 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Thu, 4 Nov 2021 18:22:38 +0000
+Subject: x86/hyperv: Fix NULL deref in set_hv_tscchange_cb() if Hyper-V setup fails
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit daf972118c517b91f74ff1731417feb4270625a4 upstream.
+
+Check for a valid hv_vp_index array prior to derefencing hv_vp_index when
+setting Hyper-V's TSC change callback.  If Hyper-V setup failed in
+hyperv_init(), the kernel will still report that it's running under
+Hyper-V, but will have silently disabled nearly all functionality.
+
+  BUG: kernel NULL pointer dereference, address: 0000000000000010
+  #PF: supervisor read access in kernel mode
+  #PF: error_code(0x0000) - not-present page
+  PGD 0 P4D 0
+  Oops: 0000 [#1] SMP
+  CPU: 4 PID: 1 Comm: swapper/0 Not tainted 5.15.0-rc2+ #75
+  Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
+  RIP: 0010:set_hv_tscchange_cb+0x15/0xa0
+  Code: <8b> 04 82 8b 15 12 17 85 01 48 c1 e0 20 48 0d ee 00 01 00 f6 c6 08
+  ...
+  Call Trace:
+   kvm_arch_init+0x17c/0x280
+   kvm_init+0x31/0x330
+   vmx_init+0xba/0x13a
+   do_one_initcall+0x41/0x1c0
+   kernel_init_freeable+0x1f2/0x23b
+   kernel_init+0x16/0x120
+   ret_from_fork+0x22/0x30
+
+Fixes: 93286261de1b ("x86/hyperv: Reenlightenment notifications support")
+Cc: stable@vger.kernel.org
+Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Link: https://lore.kernel.org/r/20211104182239.1302956-2-seanjc@google.com
+Signed-off-by: Wei Liu <wei.liu@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/hyperv/hv_init.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/arch/x86/hyperv/hv_init.c
++++ b/arch/x86/hyperv/hv_init.c
+@@ -147,6 +147,9 @@ void set_hv_tscchange_cb(void (*cb)(void
+               return;
+       }
+ 
++      if (!hv_vp_index)
++              return;
++
+       hv_reenlightenment_cb = cb;
+ 
+       /* Make sure callback is registered before we write to MSRs */
diff --git a/queue-5.15/x86-sgx-fix-free-page-accounting.patch b/queue-5.15/x86-sgx-fix-free-page-accounting.patch

new file mode 100644 (file)

index 0000000..a1d6a8e
--- /dev/null
+++ b/queue-5.15/x86-sgx-fix-free-page-accounting.patch
@@ -0,0 +1,167 @@
+From ac5d272a0ad0419f52e08c91953356e32b075af7 Mon Sep 17 00:00:00 2001
+From: Reinette Chatre <reinette.chatre@intel.com>
+Date: Mon, 15 Nov 2021 11:29:04 -0800
+Subject: x86/sgx: Fix free page accounting
+
+From: Reinette Chatre <reinette.chatre@intel.com>
+
+commit ac5d272a0ad0419f52e08c91953356e32b075af7 upstream.
+
+The SGX driver maintains a single global free page counter,
+sgx_nr_free_pages, that reflects the number of free pages available
+across all NUMA nodes. Correspondingly, a list of free pages is
+associated with each NUMA node and sgx_nr_free_pages is updated
+every time a page is added or removed from any of the free page
+lists. The main usage of sgx_nr_free_pages is by the reclaimer
+that runs when it (sgx_nr_free_pages) goes below a watermark
+to ensure that there are always some free pages available to, for
+example, support efficient page faults.
+
+With sgx_nr_free_pages accessed and modified from a few places
+it is essential to ensure that these accesses are done safely but
+this is not the case. sgx_nr_free_pages is read without any
+protection and updated with inconsistent protection by any one
+of the spin locks associated with the individual NUMA nodes.
+For example:
+
+      CPU_A                                 CPU_B
+      -----                                 -----
+ spin_lock(&nodeA->lock);              spin_lock(&nodeB->lock);
+ ...                                   ...
+ sgx_nr_free_pages--;  /* NOT SAFE */  sgx_nr_free_pages--;
+
+ spin_unlock(&nodeA->lock);            spin_unlock(&nodeB->lock);
+
+Since sgx_nr_free_pages may be protected by different spin locks
+while being modified from different CPUs, the following scenario
+is possible:
+
+      CPU_A                                CPU_B
+      -----                                -----
+{sgx_nr_free_pages = 100}
+ spin_lock(&nodeA->lock);              spin_lock(&nodeB->lock);
+ sgx_nr_free_pages--;                  sgx_nr_free_pages--;
+ /* LOAD sgx_nr_free_pages = 100 */    /* LOAD sgx_nr_free_pages = 100 */
+ /* sgx_nr_free_pages--          */    /* sgx_nr_free_pages--          */
+ /* STORE sgx_nr_free_pages = 99 */    /* STORE sgx_nr_free_pages = 99 */
+ spin_unlock(&nodeA->lock);            spin_unlock(&nodeB->lock);
+
+In the above scenario, sgx_nr_free_pages is decremented from two CPUs
+but instead of sgx_nr_free_pages ending with a value that is two less
+than it started with, it was only decremented by one while the number
+of free pages were actually reduced by two. The consequence of
+sgx_nr_free_pages not being protected is that its value may not
+accurately reflect the actual number of free pages on the system,
+impacting the availability of free pages in support of many flows.
+
+The problematic scenario is when the reclaimer does not run because it
+believes there to be sufficient free pages while any attempt to allocate
+a page fails because there are no free pages available. In the SGX driver
+the reclaimer's watermark is only 32 pages so after encountering the
+above example scenario 32 times a user space hang is possible when there
+are no more free pages because of repeated page faults caused by no
+free pages made available.
+
+The following flow was encountered:
+asm_exc_page_fault
+ ...
+   sgx_vma_fault()
+     sgx_encl_load_page()
+       sgx_encl_eldu() // Encrypted page needs to be loaded from backing
+                       // storage into newly allocated SGX memory page
+         sgx_alloc_epc_page() // Allocate a page of SGX memory
+           __sgx_alloc_epc_page() // Fails, no free SGX memory
+           ...
+           if (sgx_should_reclaim(SGX_NR_LOW_PAGES)) // Wake reclaimer
+             wake_up(&ksgxd_waitq);
+           return -EBUSY; // Return -EBUSY giving reclaimer time to run
+       return -EBUSY;
+     return -EBUSY;
+   return VM_FAULT_NOPAGE;
+
+The reclaimer is triggered in above flow with the following code:
+
+static bool sgx_should_reclaim(unsigned long watermark)
+{
+        return sgx_nr_free_pages < watermark &&
+               !list_empty(&sgx_active_page_list);
+}
+
+In the problematic scenario there were no free pages available yet the
+value of sgx_nr_free_pages was above the watermark. The allocation of
+SGX memory thus always failed because of a lack of free pages while no
+free pages were made available because the reclaimer is never started
+because of sgx_nr_free_pages' incorrect value. The consequence was that
+user space kept encountering VM_FAULT_NOPAGE that caused the same
+address to be accessed repeatedly with the same result.
+
+Change the global free page counter to an atomic type that
+ensures simultaneous updates are done safely. While doing so, move
+the updating of the variable outside of the spin lock critical
+section to which it does not belong.
+
+Cc: stable@vger.kernel.org
+Fixes: 901ddbb9ecf5 ("x86/sgx: Add a basic NUMA allocation scheme to sgx_alloc_epc_page()")
+Suggested-by: Dave Hansen <dave.hansen@linux.intel.com>
+Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Reviewed-by: Tony Luck <tony.luck@intel.com>
+Acked-by: Jarkko Sakkinen <jarkko@kernel.org>
+Link: https://lkml.kernel.org/r/a95a40743bbd3f795b465f30922dde7f1ea9e0eb.1637004094.git.reinette.chatre@intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/sgx/main.c |   12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/kernel/cpu/sgx/main.c
++++ b/arch/x86/kernel/cpu/sgx/main.c
+@@ -28,8 +28,7 @@ static DECLARE_WAIT_QUEUE_HEAD(ksgxd_wai
+ static LIST_HEAD(sgx_active_page_list);
+ static DEFINE_SPINLOCK(sgx_reclaimer_lock);
+ 
+-/* The free page list lock protected variables prepend the lock. */
+-static unsigned long sgx_nr_free_pages;
++static atomic_long_t sgx_nr_free_pages = ATOMIC_LONG_INIT(0);
+ 
+ /* Nodes with one or more EPC sections. */
+ static nodemask_t sgx_numa_mask;
+@@ -403,14 +402,15 @@ skip:
+ 
+               spin_lock(&node->lock);
+               list_add_tail(&epc_page->list, &node->free_page_list);
+-              sgx_nr_free_pages++;
+               spin_unlock(&node->lock);
++              atomic_long_inc(&sgx_nr_free_pages);
+       }
+ }
+ 
+ static bool sgx_should_reclaim(unsigned long watermark)
+ {
+-      return sgx_nr_free_pages < watermark && !list_empty(&sgx_active_page_list);
++      return atomic_long_read(&sgx_nr_free_pages) < watermark &&
++             !list_empty(&sgx_active_page_list);
+ }
+ 
+ static int ksgxd(void *p)
+@@ -471,9 +471,9 @@ static struct sgx_epc_page *__sgx_alloc_
+ 
+       page = list_first_entry(&node->free_page_list, struct sgx_epc_page, list);
+       list_del_init(&page->list);
+-      sgx_nr_free_pages--;
+ 
+       spin_unlock(&node->lock);
++      atomic_long_dec(&sgx_nr_free_pages);
+ 
+       return page;
+ }
+@@ -625,9 +625,9 @@ void sgx_free_epc_page(struct sgx_epc_pa
+       spin_lock(&node->lock);
+ 
+       list_add_tail(&page->list, &node->free_page_list);
+-      sgx_nr_free_pages++;
+ 
+       spin_unlock(&node->lock);
++      atomic_long_inc(&sgx_nr_free_pages);
+ }
+ 
+ static bool __init sgx_setup_epc_section(u64 phys_addr, u64 size,
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 22 Nov 2021 12:42:17 +0000 (13:42 +0100)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 22 Nov 2021 12:42:17 +0000 (13:42 +0100)
queue-5.15/ata-libata-add-missing-ata_identify_page_supported-calls.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/ata-libata-improve-ata_read_log_page-error-message.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/block-check-admin-before-nice-for-ioprio_class_rt.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/fbdev-prevent-probing-generic-drivers-if-a-fb-is-already-registered.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/hexagon-clean-up-timer-regs.h.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/hexagon-export-raw-i-o-routines-for-modules.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/hugetlb-userfaultfd-fix-reservation-restore-on-userfaultfd-error.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/ipc-warn-if-trying-to-remove-ipc-object-which-is-absent.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/kmap_local-don-t-assume-kmap-ptes-are-linear-arrays-in-memory.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/kvm-x86-assume-a-64-bit-hypercall-for-guests-with-protected-state.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/kvm-x86-fix-uninitialized-eoi_exit_bitmap-usage-in-vcpu_load_eoi_exitmap.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/kvm-x86-mmu-include-efer.lma-in-extended-mmu-role.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/kvm-x86-xen-fix-get_attr-of-kvm_xen_attr_type_shared_info.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/mm-damon-dbgfs-fix-missed-use-of-damon_dbgfs_lock.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/mm-damon-dbgfs-use-__gfp_nowarn-for-user-specified-size-buffer-allocation.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/mm-kmemleak-slob-respect-slab_noleaktrace-flag.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/pinctrl-ralink-include-ralink_regs.h-in-pinctrl-mt7620.c.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/powerpc-8xx-fix-pinned-tlbs-with-config_strict_kernel_rwx.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/powerpc-signal32-fix-sigset_t-copy.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/powerpc-xive-change-irq-domain-to-a-tree-domain.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/pstore-blk-use-lu-to-format-unsigned-long.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/revert-drm-i915-tgl-dsi-gate-the-ddi-clocks-after-pll-mapping.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/revert-mark-pstore-blk-as-broken.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/revert-parisc-reduce-sigreturn-trampoline-to-3-instructions.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/s390-boot-simplify-and-fix-kernel-memory-layout-setup.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/s390-dump-fix-copying-to-user-space-of-swapped-kdump-oldmem.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/s390-kexec-fix-memory-leak-of-ipl-report-buffer.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/s390-setup-avoid-reserving-memory-above-identity-mapping.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/s390-vdso-filter-out-mstack-guard-and-mstack-size.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/scsi-qla2xxx-fix-mailbox-direction-flags-in-qla2xxx_get_adapter_id.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/series		patch \| blob \| blame \| history
queue-5.15/shm-extend-forced-shm-destroy-to-support-objects-from-several-ipc-nses.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/tipc-check-for-null-after-calling-kmemdup.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/x86-boot-pull-up-cmdline-preparation-and-early-param-parsing.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/x86-hyperv-fix-null-deref-in-set_hv_tscchange_cb-if-hyper-v-setup-fails.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/x86-sgx-fix-free-page-accounting.patch	[new file with mode: 0644]	patch \| blob