--- /dev/null
+From 06f6c4c6c3e8354dceddd77bd58f9a7a84c67246 Mon Sep 17 00:00:00 2001
+From: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+Date: Mon, 15 Nov 2021 12:47:26 +0900
+Subject: ata: libata: add missing ata_identify_page_supported() calls
+
+From: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+
+commit 06f6c4c6c3e8354dceddd77bd58f9a7a84c67246 upstream.
+
+ata_dev_config_ncq_prio() and ata_dev_config_devslp() both access pages
+of the IDENTIFY DEVICE data log. Before calling ata_read_log_page(),
+make sure to check for the existence of the IDENTIFY DEVICE data log and
+of the log page accessed using ata_identify_page_supported(). This
+avoids useless error messages from ata_read_log_page() and failures with
+some LLDD scsi drivers using libsas.
+
+Reported-by: Nikolay <knv418@gmail.com>
+Cc: stable@kernel.org # 5.15
+Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+Tested-by: Matthew Perkowski <mgperkow@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/ata/libata-core.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/drivers/ata/libata-core.c
++++ b/drivers/ata/libata-core.c
+@@ -2167,6 +2167,9 @@ static void ata_dev_config_ncq_prio(stru
+ struct ata_port *ap = dev->link->ap;
+ unsigned int err_mask;
+
++ if (!ata_identify_page_supported(dev, ATA_LOG_SATA_SETTINGS))
++ return;
++
+ err_mask = ata_read_log_page(dev,
+ ATA_LOG_IDENTIFY_DEVICE,
+ ATA_LOG_SATA_SETTINGS,
+@@ -2443,7 +2446,8 @@ static void ata_dev_config_devslp(struct
+ * Check device sleep capability. Get DevSlp timing variables
+ * from SATA Settings page of Identify Device Data Log.
+ */
+- if (!ata_id_has_devslp(dev->id))
++ if (!ata_id_has_devslp(dev->id) ||
++ !ata_identify_page_supported(dev, ATA_LOG_SATA_SETTINGS))
+ return;
+
+ err_mask = ata_read_log_page(dev,
--- /dev/null
+From 23ef63d5e14f916c5bba39128ebef395859d7c0f Mon Sep 17 00:00:00 2001
+From: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+Date: Mon, 15 Nov 2021 12:37:46 +0900
+Subject: ata: libata: improve ata_read_log_page() error message
+
+From: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+
+commit 23ef63d5e14f916c5bba39128ebef395859d7c0f upstream.
+
+If ata_read_log_page() fails to read a log page, the ata_dev_err() error
+message only print the page number, omitting the log number. In case of
+error, facilitate debugging by also printing the log number.
+
+Cc: stable@kernel.org # 5.15
+Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+Tested-by: Matthew Perkowski <mgperkow@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/ata/libata-core.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/drivers/ata/libata-core.c
++++ b/drivers/ata/libata-core.c
+@@ -2031,8 +2031,9 @@ retry:
+ dev->horkage |= ATA_HORKAGE_NO_DMA_LOG;
+ goto retry;
+ }
+- ata_dev_err(dev, "Read log page 0x%02x failed, Emask 0x%x\n",
+- (unsigned int)page, err_mask);
++ ata_dev_err(dev,
++ "Read log 0x%02x page 0x%02x failed, Emask 0x%x\n",
++ (unsigned int)log, (unsigned int)page, err_mask);
+ }
+
+ return err_mask;
--- /dev/null
+From 94c4b4fd25e6c3763941bdec3ad54f2204afa992 Mon Sep 17 00:00:00 2001
+From: Alistair Delva <adelva@google.com>
+Date: Mon, 15 Nov 2021 18:16:55 +0000
+Subject: block: Check ADMIN before NICE for IOPRIO_CLASS_RT
+
+From: Alistair Delva <adelva@google.com>
+
+commit 94c4b4fd25e6c3763941bdec3ad54f2204afa992 upstream.
+
+Booting to Android userspace on 5.14 or newer triggers the following
+SELinux denial:
+
+avc: denied { sys_nice } for comm="init" capability=23
+ scontext=u:r:init:s0 tcontext=u:r:init:s0 tclass=capability
+ permissive=0
+
+Init is PID 0 running as root, so it already has CAP_SYS_ADMIN. For
+better compatibility with older SEPolicy, check ADMIN before NICE.
+
+Fixes: 9d3a39a5f1e4 ("block: grant IOPRIO_CLASS_RT to CAP_SYS_NICE")
+Signed-off-by: Alistair Delva <adelva@google.com>
+Cc: Khazhismel Kumykov <khazhy@google.com>
+Cc: Bart Van Assche <bvanassche@acm.org>
+Cc: Serge Hallyn <serge@hallyn.com>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Paul Moore <paul@paul-moore.com>
+Cc: selinux@vger.kernel.org
+Cc: linux-security-module@vger.kernel.org
+Cc: kernel-team@android.com
+Cc: stable@vger.kernel.org # v5.14+
+Reviewed-by: Bart Van Assche <bvanassche@acm.org>
+Acked-by: Serge Hallyn <serge@hallyn.com>
+Link: https://lore.kernel.org/r/20211115181655.3608659-1-adelva@google.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ block/ioprio.c | 9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/block/ioprio.c
++++ b/block/ioprio.c
+@@ -69,7 +69,14 @@ int ioprio_check_cap(int ioprio)
+
+ switch (class) {
+ case IOPRIO_CLASS_RT:
+- if (!capable(CAP_SYS_NICE) && !capable(CAP_SYS_ADMIN))
++ /*
++ * Originally this only checked for CAP_SYS_ADMIN,
++ * which was implicitly allowed for pid 0 by security
++ * modules such as SELinux. Make sure we check
++ * CAP_SYS_ADMIN first to avoid a denial/avc for
++ * possibly missing CAP_SYS_NICE permission.
++ */
++ if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_NICE))
+ return -EPERM;
+ fallthrough;
+ /* rt has prio field too */
--- /dev/null
+From fb561bf9abde49f7e00fdbf9ed2ccf2d86cac8ee Mon Sep 17 00:00:00 2001
+From: Javier Martinez Canillas <javierm@redhat.com>
+Date: Thu, 11 Nov 2021 12:57:57 +0100
+Subject: fbdev: Prevent probing generic drivers if a FB is already registered
+
+From: Javier Martinez Canillas <javierm@redhat.com>
+
+commit fb561bf9abde49f7e00fdbf9ed2ccf2d86cac8ee upstream.
+
+The efifb and simplefb drivers just render to a pre-allocated frame buffer
+and rely on the display hardware being initialized before the kernel boots.
+
+But if another driver already probed correctly and registered a fbdev, the
+generic drivers shouldn't be probed since an actual driver for the display
+hardware is already present.
+
+This is more likely to occur after commit d391c5827107 ("drivers/firmware:
+move x86 Generic System Framebuffers support") since the "efi-framebuffer"
+and "simple-framebuffer" platform devices are registered at a later time.
+
+Link: https://lore.kernel.org/r/20211110200253.rfudkt3edbd3nsyj@lahvuun/
+Fixes: d391c5827107 ("drivers/firmware: move x86 Generic System Framebuffers support")
+Reported-by: Ilya Trukhanov <lahvuun@gmail.com>
+Cc: <stable@vger.kernel.org> # 5.15.x
+Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>
+Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
+Tested-by: Ilya Trukhanov <lahvuun@gmail.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20211111115757.1351045-1-javierm@redhat.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/video/fbdev/efifb.c | 11 +++++++++++
+ drivers/video/fbdev/simplefb.c | 11 +++++++++++
+ 2 files changed, 22 insertions(+)
+
+--- a/drivers/video/fbdev/efifb.c
++++ b/drivers/video/fbdev/efifb.c
+@@ -351,6 +351,17 @@ static int efifb_probe(struct platform_d
+ char *option = NULL;
+ efi_memory_desc_t md;
+
++ /*
++ * Generic drivers must not be registered if a framebuffer exists.
++ * If a native driver was probed, the display hardware was already
++ * taken and attempting to use the system framebuffer is dangerous.
++ */
++ if (num_registered_fb > 0) {
++ dev_err(&dev->dev,
++ "efifb: a framebuffer is already registered\n");
++ return -EINVAL;
++ }
++
+ if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI || pci_dev_disabled)
+ return -ENODEV;
+
+--- a/drivers/video/fbdev/simplefb.c
++++ b/drivers/video/fbdev/simplefb.c
+@@ -407,6 +407,17 @@ static int simplefb_probe(struct platfor
+ struct simplefb_par *par;
+ struct resource *mem;
+
++ /*
++ * Generic drivers must not be registered if a framebuffer exists.
++ * If a native driver was probed, the display hardware was already
++ * taken and attempting to use the system framebuffer is dangerous.
++ */
++ if (num_registered_fb > 0) {
++ dev_err(&pdev->dev,
++ "simplefb: a framebuffer is already registered\n");
++ return -EINVAL;
++ }
++
+ if (fb_get_options("simplefb", NULL))
+ return -ENODEV;
+
--- /dev/null
+From 51f2ec593441d3d1ebc0d478fac3ea329c7c93ac Mon Sep 17 00:00:00 2001
+From: Nathan Chancellor <nathan@kernel.org>
+Date: Fri, 19 Nov 2021 16:43:31 -0800
+Subject: hexagon: clean up timer-regs.h
+
+From: Nathan Chancellor <nathan@kernel.org>
+
+commit 51f2ec593441d3d1ebc0d478fac3ea329c7c93ac upstream.
+
+When building allmodconfig, there is a warning about TIMER_ENABLE being
+redefined:
+
+ drivers/clocksource/timer-oxnas-rps.c:39:9: error: 'TIMER_ENABLE' macro redefined [-Werror,-Wmacro-redefined]
+ #define TIMER_ENABLE BIT(7)
+ ^
+ arch/hexagon/include/asm/timer-regs.h:13:9: note: previous definition is here
+ #define TIMER_ENABLE 0
+ ^
+ 1 error generated.
+
+The values in this header are only used in one file each, if they are
+used at all. Remove the header and sink all of the constants into their
+respective files.
+
+TCX0_CLK_RATE is only used in arch/hexagon/include/asm/timex.h
+
+TIMER_ENABLE, RTOS_TIMER_INT, RTOS_TIMER_REGS_ADDR are only used in
+arch/hexagon/kernel/time.c.
+
+SLEEP_CLK_RATE and TIMER_CLR_ON_MATCH have both been unused since the
+file's introduction in commit 71e4a47f32f4 ("Hexagon: Add time and timer
+functions").
+
+TIMER_ENABLE is redefined as BIT(0) so the shift is moved into the
+definition, rather than its use.
+
+Link: https://lkml.kernel.org/r/20211115174250.1994179-3-nathan@kernel.org
+Signed-off-by: Nathan Chancellor <nathan@kernel.org>
+Acked-by: Brian Cain <bcain@codeaurora.org>
+Cc: Nick Desaulniers <ndesaulniers@google.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/hexagon/include/asm/timer-regs.h | 26 --------------------------
+ arch/hexagon/include/asm/timex.h | 3 +--
+ arch/hexagon/kernel/time.c | 12 ++++++++++--
+ 3 files changed, 11 insertions(+), 30 deletions(-)
+ delete mode 100644 arch/hexagon/include/asm/timer-regs.h
+
+--- a/arch/hexagon/include/asm/timer-regs.h
++++ /dev/null
+@@ -1,26 +0,0 @@
+-/* SPDX-License-Identifier: GPL-2.0-only */
+-/*
+- * Timer support for Hexagon
+- *
+- * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved.
+- */
+-
+-#ifndef _ASM_TIMER_REGS_H
+-#define _ASM_TIMER_REGS_H
+-
+-/* This stuff should go into a platform specific file */
+-#define TCX0_CLK_RATE 19200
+-#define TIMER_ENABLE 0
+-#define TIMER_CLR_ON_MATCH 1
+-
+-/*
+- * 8x50 HDD Specs 5-8. Simulator co-sim not fixed until
+- * release 1.1, and then it's "adjustable" and probably not defaulted.
+- */
+-#define RTOS_TIMER_INT 3
+-#ifdef CONFIG_HEXAGON_COMET
+-#define RTOS_TIMER_REGS_ADDR 0xAB000000UL
+-#endif
+-#define SLEEP_CLK_RATE 32000
+-
+-#endif
+--- a/arch/hexagon/include/asm/timex.h
++++ b/arch/hexagon/include/asm/timex.h
+@@ -7,11 +7,10 @@
+ #define _ASM_TIMEX_H
+
+ #include <asm-generic/timex.h>
+-#include <asm/timer-regs.h>
+ #include <asm/hexagon_vm.h>
+
+ /* Using TCX0 as our clock. CLOCK_TICK_RATE scheduled to be removed. */
+-#define CLOCK_TICK_RATE TCX0_CLK_RATE
++#define CLOCK_TICK_RATE 19200
+
+ #define ARCH_HAS_READ_CURRENT_TIMER
+
+--- a/arch/hexagon/kernel/time.c
++++ b/arch/hexagon/kernel/time.c
+@@ -17,9 +17,10 @@
+ #include <linux/of_irq.h>
+ #include <linux/module.h>
+
+-#include <asm/timer-regs.h>
+ #include <asm/hexagon_vm.h>
+
++#define TIMER_ENABLE BIT(0)
++
+ /*
+ * For the clocksource we need:
+ * pcycle frequency (600MHz)
+@@ -33,6 +34,13 @@ cycles_t pcycle_freq_mhz;
+ cycles_t thread_freq_mhz;
+ cycles_t sleep_clk_freq;
+
++/*
++ * 8x50 HDD Specs 5-8. Simulator co-sim not fixed until
++ * release 1.1, and then it's "adjustable" and probably not defaulted.
++ */
++#define RTOS_TIMER_INT 3
++#define RTOS_TIMER_REGS_ADDR 0xAB000000UL
++
+ static struct resource rtos_timer_resources[] = {
+ {
+ .start = RTOS_TIMER_REGS_ADDR,
+@@ -80,7 +88,7 @@ static int set_next_event(unsigned long
+ iowrite32(0, &rtos_timer->clear);
+
+ iowrite32(delta, &rtos_timer->match);
+- iowrite32(1 << TIMER_ENABLE, &rtos_timer->enable);
++ iowrite32(TIMER_ENABLE, &rtos_timer->enable);
+ return 0;
+ }
+
--- /dev/null
+From ffb92ce826fd801acb0f4e15b75e4ddf0d189bde Mon Sep 17 00:00:00 2001
+From: Nathan Chancellor <nathan@kernel.org>
+Date: Fri, 19 Nov 2021 16:43:28 -0800
+Subject: hexagon: export raw I/O routines for modules
+
+From: Nathan Chancellor <nathan@kernel.org>
+
+commit ffb92ce826fd801acb0f4e15b75e4ddf0d189bde upstream.
+
+Patch series "Fixes for ARCH=hexagon allmodconfig", v2.
+
+This series fixes some issues noticed with ARCH=hexagon allmodconfig.
+
+This patch (of 3):
+
+When building ARCH=hexagon allmodconfig, the following errors occur:
+
+ ERROR: modpost: "__raw_readsl" [drivers/i3c/master/svc-i3c-master.ko] undefined!
+ ERROR: modpost: "__raw_writesl" [drivers/i3c/master/dw-i3c-master.ko] undefined!
+ ERROR: modpost: "__raw_readsl" [drivers/i3c/master/dw-i3c-master.ko] undefined!
+ ERROR: modpost: "__raw_writesl" [drivers/i3c/master/i3c-master-cdns.ko] undefined!
+ ERROR: modpost: "__raw_readsl" [drivers/i3c/master/i3c-master-cdns.ko] undefined!
+
+Export these symbols so that modules can use them without any errors.
+
+Link: https://lkml.kernel.org/r/20211115174250.1994179-1-nathan@kernel.org
+Link: https://lkml.kernel.org/r/20211115174250.1994179-2-nathan@kernel.org
+Fixes: 013bf24c3829 ("Hexagon: Provide basic implementation and/or stubs for I/O routines.")
+Signed-off-by: Nathan Chancellor <nathan@kernel.org>
+Acked-by: Brian Cain <bcain@codeaurora.org>
+Cc: Nick Desaulniers <ndesaulniers@google.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/hexagon/lib/io.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/arch/hexagon/lib/io.c
++++ b/arch/hexagon/lib/io.c
+@@ -27,6 +27,7 @@ void __raw_readsw(const void __iomem *ad
+ *dst++ = *src;
+
+ }
++EXPORT_SYMBOL(__raw_readsw);
+
+ /*
+ * __raw_writesw - read words a short at a time
+@@ -47,6 +48,7 @@ void __raw_writesw(void __iomem *addr, c
+
+
+ }
++EXPORT_SYMBOL(__raw_writesw);
+
+ /* Pretty sure len is pre-adjusted for the length of the access already */
+ void __raw_readsl(const void __iomem *addr, void *data, int len)
+@@ -62,6 +64,7 @@ void __raw_readsl(const void __iomem *ad
+
+
+ }
++EXPORT_SYMBOL(__raw_readsl);
+
+ void __raw_writesl(void __iomem *addr, const void *data, int len)
+ {
+@@ -76,3 +79,4 @@ void __raw_writesl(void __iomem *addr, c
+
+
+ }
++EXPORT_SYMBOL(__raw_writesl);
--- /dev/null
+From cc30042df6fcc82ea18acf0dace831503e60a0b7 Mon Sep 17 00:00:00 2001
+From: Mina Almasry <almasrymina@google.com>
+Date: Fri, 19 Nov 2021 16:43:43 -0800
+Subject: hugetlb, userfaultfd: fix reservation restore on userfaultfd error
+
+From: Mina Almasry <almasrymina@google.com>
+
+commit cc30042df6fcc82ea18acf0dace831503e60a0b7 upstream.
+
+Currently in the is_continue case in hugetlb_mcopy_atomic_pte(), if we
+bail out using "goto out_release_unlock;" in the cases where idx >=
+size, or !huge_pte_none(), the code will detect that new_pagecache_page
+== false, and so call restore_reserve_on_error(). In this case I see
+restore_reserve_on_error() delete the reservation, and the following
+call to remove_inode_hugepages() will increment h->resv_hugepages
+causing a 100% reproducible leak.
+
+We should treat the is_continue case similar to adding a page into the
+pagecache and set new_pagecache_page to true, to indicate that there is
+no reservation to restore on the error path, and we need not call
+restore_reserve_on_error(). Rename new_pagecache_page to
+page_in_pagecache to make that clear.
+
+Link: https://lkml.kernel.org/r/20211117193825.378528-1-almasrymina@google.com
+Fixes: c7b1850dfb41 ("hugetlb: don't pass page cache pages to restore_reserve_on_error")
+Signed-off-by: Mina Almasry <almasrymina@google.com>
+Reported-by: James Houghton <jthoughton@google.com>
+Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
+Cc: Wei Xu <weixugc@google.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/hugetlb.c | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -5236,13 +5236,14 @@ int hugetlb_mcopy_atomic_pte(struct mm_s
+ int ret = -ENOMEM;
+ struct page *page;
+ int writable;
+- bool new_pagecache_page = false;
++ bool page_in_pagecache = false;
+
+ if (is_continue) {
+ ret = -EFAULT;
+ page = find_lock_page(mapping, idx);
+ if (!page)
+ goto out;
++ page_in_pagecache = true;
+ } else if (!*pagep) {
+ /* If a page already exists, then it's UFFDIO_COPY for
+ * a non-missing case. Return -EEXIST.
+@@ -5330,7 +5331,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_s
+ ret = huge_add_to_page_cache(page, mapping, idx);
+ if (ret)
+ goto out_release_nounlock;
+- new_pagecache_page = true;
++ page_in_pagecache = true;
+ }
+
+ ptl = huge_pte_lockptr(h, dst_mm, dst_pte);
+@@ -5394,7 +5395,7 @@ out_release_unlock:
+ if (vm_shared || is_continue)
+ unlock_page(page);
+ out_release_nounlock:
+- if (!new_pagecache_page)
++ if (!page_in_pagecache)
+ restore_reserve_on_error(h, dst_vma, dst_addr, page);
+ put_page(page);
+ goto out;
--- /dev/null
+From 126e8bee943e9926238c891e2df5b5573aee76bc Mon Sep 17 00:00:00 2001
+From: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
+Date: Fri, 19 Nov 2021 16:43:18 -0800
+Subject: ipc: WARN if trying to remove ipc object which is absent
+
+From: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
+
+commit 126e8bee943e9926238c891e2df5b5573aee76bc upstream.
+
+Patch series "shm: shm_rmid_forced feature fixes".
+
+Some time ago I met kernel crash after CRIU restore procedure,
+fortunately, it was CRIU restore, so, I had dump files and could do
+restore many times and crash reproduced easily. After some
+investigation I've constructed the minimal reproducer. It was found
+that it's use-after-free and it happens only if sysctl
+kernel.shm_rmid_forced = 1.
+
+The key of the problem is that the exit_shm() function not handles shp's
+object destroy when task->sysvshm.shm_clist contains items from
+different IPC namespaces. In most cases this list will contain only
+items from one IPC namespace.
+
+How can this list contain object from different namespaces? The
+exit_shm() function is designed to clean up this list always when
+process leaves IPC namespace. But we made a mistake a long time ago and
+did not add a exit_shm() call into the setns() syscall procedures.
+
+The first idea was just to add this call to setns() syscall but it
+obviously changes semantics of setns() syscall and that's
+userspace-visible change. So, I gave up on this idea.
+
+The first real attempt to address the issue was just to omit forced
+destroy if we meet shp object not from current task IPC namespace [1].
+But that was not the best idea because task->sysvshm.shm_clist was
+protected by rwsem which belongs to current task IPC namespace. It
+means that list corruption may occur.
+
+Second approach is just extend exit_shm() to properly handle shp's from
+different IPC namespaces [2]. This is really non-trivial thing, I've
+put a lot of effort into that but not believed that it's possible to
+make it fully safe, clean and clear.
+
+Thanks to the efforts of Manfred Spraul working an elegant solution was
+designed. Thanks a lot, Manfred!
+
+Eric also suggested the way to address the issue in ("[RFC][PATCH] shm:
+In shm_exit destroy all created and never attached segments") Eric's
+idea was to maintain a list of shm_clists one per IPC namespace, use
+lock-less lists. But there is some extra memory consumption-related
+concerns.
+
+An alternative solution which was suggested by me was implemented in
+("shm: reset shm_clist on setns but omit forced shm destroy"). The idea
+is pretty simple, we add exit_shm() syscall to setns() but DO NOT
+destroy shm segments even if sysctl kernel.shm_rmid_forced = 1, we just
+clean up the task->sysvshm.shm_clist list.
+
+This chages semantics of setns() syscall a little bit but in comparision
+to the "naive" solution when we just add exit_shm() without any special
+exclusions this looks like a safer option.
+
+[1] https://lkml.org/lkml/2021/7/6/1108
+[2] https://lkml.org/lkml/2021/7/14/736
+
+This patch (of 2):
+
+Let's produce a warning if we trying to remove non-existing IPC object
+from IPC namespace kht/idr structures.
+
+This allows us to catch possible bugs when the ipc_rmid() function was
+called with inconsistent struct ipc_ids*, struct kern_ipc_perm*
+arguments.
+
+Link: https://lkml.kernel.org/r/20211027224348.611025-1-alexander.mikhalitsyn@virtuozzo.com
+Link: https://lkml.kernel.org/r/20211027224348.611025-2-alexander.mikhalitsyn@virtuozzo.com
+Co-developed-by: Manfred Spraul <manfred@colorfullife.com>
+Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
+Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
+Cc: "Eric W. Biederman" <ebiederm@xmission.com>
+Cc: Davidlohr Bueso <dave@stgolabs.net>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: Andrei Vagin <avagin@gmail.com>
+Cc: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
+Cc: Vasily Averin <vvs@virtuozzo.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ ipc/util.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/ipc/util.c
++++ b/ipc/util.c
+@@ -447,8 +447,8 @@ static int ipcget_public(struct ipc_name
+ static void ipc_kht_remove(struct ipc_ids *ids, struct kern_ipc_perm *ipcp)
+ {
+ if (ipcp->key != IPC_PRIVATE)
+- rhashtable_remove_fast(&ids->key_ht, &ipcp->khtnode,
+- ipc_kht_params);
++ WARN_ON_ONCE(rhashtable_remove_fast(&ids->key_ht, &ipcp->khtnode,
++ ipc_kht_params));
+ }
+
+ /**
+@@ -498,7 +498,7 @@ void ipc_rmid(struct ipc_ids *ids, struc
+ {
+ int idx = ipcid_to_idx(ipcp->id);
+
+- idr_remove(&ids->ipcs_idr, idx);
++ WARN_ON_ONCE(idr_remove(&ids->ipcs_idr, idx) != ipcp);
+ ipc_kht_remove(ids, ipcp);
+ ids->in_use--;
+ ipcp->deleted = true;
--- /dev/null
+From 825c43f50e3aa811a291ffcb40e02fbf6d91ba86 Mon Sep 17 00:00:00 2001
+From: Ard Biesheuvel <ardb@kernel.org>
+Date: Fri, 19 Nov 2021 16:43:55 -0800
+Subject: kmap_local: don't assume kmap PTEs are linear arrays in memory
+
+From: Ard Biesheuvel <ardb@kernel.org>
+
+commit 825c43f50e3aa811a291ffcb40e02fbf6d91ba86 upstream.
+
+The kmap_local conversion broke the ARM architecture, because the new
+code assumes that all PTEs used for creating kmaps form a linear array
+in memory, and uses array indexing to look up the kmap PTE belonging to
+a certain kmap index.
+
+On ARM, this cannot work, not only because the PTE pages may be
+non-adjacent in memory, but also because ARM/!LPAE interleaves hardware
+entries and extended entries (carrying software-only bits) in a way that
+is not compatible with array indexing.
+
+Fortunately, this only seems to affect configurations with more than 8
+CPUs, due to the way the per-CPU kmap slots are organized in memory.
+
+Work around this by permitting an architecture to set a Kconfig symbol
+that signifies that the kmap PTEs do not form a lineary array in memory,
+and so the only way to locate the appropriate one is to walk the page
+tables.
+
+Link: https://lore.kernel.org/linux-arm-kernel/20211026131249.3731275-1-ardb@kernel.org/
+Link: https://lkml.kernel.org/r/20211116094737.7391-1-ardb@kernel.org
+Fixes: 2a15ba82fa6c ("ARM: highmem: Switch to generic kmap atomic")
+Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
+Reported-by: Quanyang Wang <quanyang.wang@windriver.com>
+Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
+Acked-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm/Kconfig | 1 +
+ mm/Kconfig | 3 +++
+ mm/highmem.c | 32 +++++++++++++++++++++-----------
+ 3 files changed, 25 insertions(+), 11 deletions(-)
+
+--- a/arch/arm/Kconfig
++++ b/arch/arm/Kconfig
+@@ -1455,6 +1455,7 @@ config HIGHMEM
+ bool "High Memory Support"
+ depends on MMU
+ select KMAP_LOCAL
++ select KMAP_LOCAL_NON_LINEAR_PTE_ARRAY
+ help
+ The address space of ARM processors is only 4 Gigabytes large
+ and it has to accommodate user address space, kernel address
+--- a/mm/Kconfig
++++ b/mm/Kconfig
+@@ -887,6 +887,9 @@ config MAPPING_DIRTY_HELPERS
+ config KMAP_LOCAL
+ bool
+
++config KMAP_LOCAL_NON_LINEAR_PTE_ARRAY
++ bool
++
+ # struct io_mapping based helper. Selected by drivers that need them
+ config IO_MAPPING
+ bool
+--- a/mm/highmem.c
++++ b/mm/highmem.c
+@@ -504,16 +504,22 @@ static inline int kmap_local_calc_idx(in
+
+ static pte_t *__kmap_pte;
+
+-static pte_t *kmap_get_pte(void)
++static pte_t *kmap_get_pte(unsigned long vaddr, int idx)
+ {
++ if (IS_ENABLED(CONFIG_KMAP_LOCAL_NON_LINEAR_PTE_ARRAY))
++ /*
++ * Set by the arch if __kmap_pte[-idx] does not produce
++ * the correct entry.
++ */
++ return virt_to_kpte(vaddr);
+ if (!__kmap_pte)
+ __kmap_pte = virt_to_kpte(__fix_to_virt(FIX_KMAP_BEGIN));
+- return __kmap_pte;
++ return &__kmap_pte[-idx];
+ }
+
+ void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot)
+ {
+- pte_t pteval, *kmap_pte = kmap_get_pte();
++ pte_t pteval, *kmap_pte;
+ unsigned long vaddr;
+ int idx;
+
+@@ -525,9 +531,10 @@ void *__kmap_local_pfn_prot(unsigned lon
+ preempt_disable();
+ idx = arch_kmap_local_map_idx(kmap_local_idx_push(), pfn);
+ vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+- BUG_ON(!pte_none(*(kmap_pte - idx)));
++ kmap_pte = kmap_get_pte(vaddr, idx);
++ BUG_ON(!pte_none(*kmap_pte));
+ pteval = pfn_pte(pfn, prot);
+- arch_kmap_local_set_pte(&init_mm, vaddr, kmap_pte - idx, pteval);
++ arch_kmap_local_set_pte(&init_mm, vaddr, kmap_pte, pteval);
+ arch_kmap_local_post_map(vaddr, pteval);
+ current->kmap_ctrl.pteval[kmap_local_idx()] = pteval;
+ preempt_enable();
+@@ -560,7 +567,7 @@ EXPORT_SYMBOL(__kmap_local_page_prot);
+ void kunmap_local_indexed(void *vaddr)
+ {
+ unsigned long addr = (unsigned long) vaddr & PAGE_MASK;
+- pte_t *kmap_pte = kmap_get_pte();
++ pte_t *kmap_pte;
+ int idx;
+
+ if (addr < __fix_to_virt(FIX_KMAP_END) ||
+@@ -585,8 +592,9 @@ void kunmap_local_indexed(void *vaddr)
+ idx = arch_kmap_local_unmap_idx(kmap_local_idx(), addr);
+ WARN_ON_ONCE(addr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
+
++ kmap_pte = kmap_get_pte(addr, idx);
+ arch_kmap_local_pre_unmap(addr);
+- pte_clear(&init_mm, addr, kmap_pte - idx);
++ pte_clear(&init_mm, addr, kmap_pte);
+ arch_kmap_local_post_unmap(addr);
+ current->kmap_ctrl.pteval[kmap_local_idx()] = __pte(0);
+ kmap_local_idx_pop();
+@@ -608,7 +616,7 @@ EXPORT_SYMBOL(kunmap_local_indexed);
+ void __kmap_local_sched_out(void)
+ {
+ struct task_struct *tsk = current;
+- pte_t *kmap_pte = kmap_get_pte();
++ pte_t *kmap_pte;
+ int i;
+
+ /* Clear kmaps */
+@@ -635,8 +643,9 @@ void __kmap_local_sched_out(void)
+ idx = arch_kmap_local_map_idx(i, pte_pfn(pteval));
+
+ addr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
++ kmap_pte = kmap_get_pte(addr, idx);
+ arch_kmap_local_pre_unmap(addr);
+- pte_clear(&init_mm, addr, kmap_pte - idx);
++ pte_clear(&init_mm, addr, kmap_pte);
+ arch_kmap_local_post_unmap(addr);
+ }
+ }
+@@ -644,7 +653,7 @@ void __kmap_local_sched_out(void)
+ void __kmap_local_sched_in(void)
+ {
+ struct task_struct *tsk = current;
+- pte_t *kmap_pte = kmap_get_pte();
++ pte_t *kmap_pte;
+ int i;
+
+ /* Restore kmaps */
+@@ -664,7 +673,8 @@ void __kmap_local_sched_in(void)
+ /* See comment in __kmap_local_sched_out() */
+ idx = arch_kmap_local_map_idx(i, pte_pfn(pteval));
+ addr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+- set_pte_at(&init_mm, addr, kmap_pte - idx, pteval);
++ kmap_pte = kmap_get_pte(addr, idx);
++ set_pte_at(&init_mm, addr, kmap_pte, pteval);
+ arch_kmap_local_post_map(addr, pteval);
+ }
+ }
--- /dev/null
+From b5aead0064f33ae5e693a364e3204fe1c0ac9af2 Mon Sep 17 00:00:00 2001
+From: Tom Lendacky <thomas.lendacky@amd.com>
+Date: Mon, 24 May 2021 12:48:57 -0500
+Subject: KVM: x86: Assume a 64-bit hypercall for guests with protected state
+
+From: Tom Lendacky <thomas.lendacky@amd.com>
+
+commit b5aead0064f33ae5e693a364e3204fe1c0ac9af2 upstream.
+
+When processing a hypercall for a guest with protected state, currently
+SEV-ES guests, the guest CS segment register can't be checked to
+determine if the guest is in 64-bit mode. For an SEV-ES guest, it is
+expected that communication between the guest and the hypervisor is
+performed to shared memory using the GHCB. In order to use the GHCB, the
+guest must have been in long mode, otherwise writes by the guest to the
+GHCB would be encrypted and not be able to be comprehended by the
+hypervisor.
+
+Create a new helper function, is_64_bit_hypercall(), that assumes the
+guest is in 64-bit mode when the guest has protected state, and returns
+true, otherwise invoking is_64_bit_mode() to determine the mode. Update
+the hypercall related routines to use is_64_bit_hypercall() instead of
+is_64_bit_mode().
+
+Add a WARN_ON_ONCE() to is_64_bit_mode() to catch occurences of calls to
+this helper function for a guest running with protected state.
+
+Fixes: f1c6366e3043 ("KVM: SVM: Add required changes to support intercepts under SEV-ES")
+Reported-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Message-Id: <e0b20c770c9d0d1403f23d83e785385104211f74.1621878537.git.thomas.lendacky@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/hyperv.c | 4 ++--
+ arch/x86/kvm/x86.c | 2 +-
+ arch/x86/kvm/x86.h | 12 ++++++++++++
+ arch/x86/kvm/xen.c | 2 +-
+ 4 files changed, 16 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/kvm/hyperv.c
++++ b/arch/x86/kvm/hyperv.c
+@@ -2022,7 +2022,7 @@ static void kvm_hv_hypercall_set_result(
+ {
+ bool longmode;
+
+- longmode = is_64_bit_mode(vcpu);
++ longmode = is_64_bit_hypercall(vcpu);
+ if (longmode)
+ kvm_rax_write(vcpu, result);
+ else {
+@@ -2171,7 +2171,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vc
+ }
+
+ #ifdef CONFIG_X86_64
+- if (is_64_bit_mode(vcpu)) {
++ if (is_64_bit_hypercall(vcpu)) {
+ hc.param = kvm_rcx_read(vcpu);
+ hc.ingpa = kvm_rdx_read(vcpu);
+ hc.outgpa = kvm_r8_read(vcpu);
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -8737,7 +8737,7 @@ int kvm_emulate_hypercall(struct kvm_vcp
+
+ trace_kvm_hypercall(nr, a0, a1, a2, a3);
+
+- op_64_bit = is_64_bit_mode(vcpu);
++ op_64_bit = is_64_bit_hypercall(vcpu);
+ if (!op_64_bit) {
+ nr &= 0xFFFFFFFF;
+ a0 &= 0xFFFFFFFF;
+--- a/arch/x86/kvm/x86.h
++++ b/arch/x86/kvm/x86.h
+@@ -153,12 +153,24 @@ static inline bool is_64_bit_mode(struct
+ {
+ int cs_db, cs_l;
+
++ WARN_ON_ONCE(vcpu->arch.guest_state_protected);
++
+ if (!is_long_mode(vcpu))
+ return false;
+ static_call(kvm_x86_get_cs_db_l_bits)(vcpu, &cs_db, &cs_l);
+ return cs_l;
+ }
+
++static inline bool is_64_bit_hypercall(struct kvm_vcpu *vcpu)
++{
++ /*
++ * If running with protected guest state, the CS register is not
++ * accessible. The hypercall register values will have had to been
++ * provided in 64-bit mode, so assume the guest is in 64-bit.
++ */
++ return vcpu->arch.guest_state_protected || is_64_bit_mode(vcpu);
++}
++
+ static inline bool x86_exception_has_error_code(unsigned int vector)
+ {
+ static u32 exception_has_error_code = BIT(DF_VECTOR) | BIT(TS_VECTOR) |
+--- a/arch/x86/kvm/xen.c
++++ b/arch/x86/kvm/xen.c
+@@ -698,7 +698,7 @@ int kvm_xen_hypercall(struct kvm_vcpu *v
+ kvm_hv_hypercall_enabled(vcpu))
+ return kvm_hv_hypercall(vcpu);
+
+- longmode = is_64_bit_mode(vcpu);
++ longmode = is_64_bit_hypercall(vcpu);
+ if (!longmode) {
+ params[0] = (u32)kvm_rbx_read(vcpu);
+ params[1] = (u32)kvm_rcx_read(vcpu);
--- /dev/null
+From c5adbb3af051079f35abfa26551107e2c653087f Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?=E9=BB=84=E4=B9=90?= <huangle1@jd.com>
+Date: Mon, 15 Nov 2021 14:08:29 +0000
+Subject: KVM: x86: Fix uninitialized eoi_exit_bitmap usage in vcpu_load_eoi_exitmap()
+
+From: 黄乐 <huangle1@jd.com>
+
+commit c5adbb3af051079f35abfa26551107e2c653087f upstream.
+
+In vcpu_load_eoi_exitmap(), currently the eoi_exit_bitmap[4] array is
+initialized only when Hyper-V context is available, in other path it is
+just passed to kvm_x86_ops.load_eoi_exitmap() directly from on the stack,
+which would cause unexpected interrupt delivery/handling issues, e.g. an
+*old* linux kernel that relies on PIT to do clock calibration on KVM might
+randomly fail to boot.
+
+Fix it by passing ioapic_handled_vectors to load_eoi_exitmap() when Hyper-V
+context is not available.
+
+Fixes: f2bc14b69c38 ("KVM: x86: hyper-v: Prepare to meet unallocated Hyper-V context")
+Cc: stable@vger.kernel.org
+Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Signed-off-by: Huang Le <huangle1@jd.com>
+Message-Id: <62115b277dab49ea97da5633f8522daf@jd.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/x86.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -9429,12 +9429,16 @@ static void vcpu_load_eoi_exitmap(struct
+ if (!kvm_apic_hw_enabled(vcpu->arch.apic))
+ return;
+
+- if (to_hv_vcpu(vcpu))
++ if (to_hv_vcpu(vcpu)) {
+ bitmap_or((ulong *)eoi_exit_bitmap,
+ vcpu->arch.ioapic_handled_vectors,
+ to_hv_synic(vcpu)->vec_bitmap, 256);
++ static_call(kvm_x86_load_eoi_exitmap)(vcpu, eoi_exit_bitmap);
++ return;
++ }
+
+- static_call(kvm_x86_load_eoi_exitmap)(vcpu, eoi_exit_bitmap);
++ static_call(kvm_x86_load_eoi_exitmap)(
++ vcpu, (u64 *)vcpu->arch.ioapic_handled_vectors);
+ }
+
+ void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
--- /dev/null
+From b8453cdcf26020030da182f0156d7bf59ae5719f Mon Sep 17 00:00:00 2001
+From: Maxim Levitsky <mlevitsk@redhat.com>
+Date: Mon, 15 Nov 2021 15:18:37 +0200
+Subject: KVM: x86/mmu: include EFER.LMA in extended mmu role
+
+From: Maxim Levitsky <mlevitsk@redhat.com>
+
+commit b8453cdcf26020030da182f0156d7bf59ae5719f upstream.
+
+Incorporate EFER.LMA into kvm_mmu_extended_role, as it used to compute the
+guest root level and is not reflected in kvm_mmu_page_role.level when TDP
+is in use. When simply running the guest, it is impossible for EFER.LMA
+and kvm_mmu.root_level to get out of sync, as the guest cannot transition
+from PAE paging to 64-bit paging without toggling CR0.PG, i.e. without
+first bouncing through a different MMU context. And stuffing guest state
+via KVM_SET_SREGS{,2} also ensures a full MMU context reset.
+
+However, if KVM_SET_SREGS{,2} is followed by KVM_SET_NESTED_STATE, e.g. to
+set guest state when migrating the VM while L2 is active, the vCPU state
+will reflect L2, not L1. If L1 is using TDP for L2, then root_mmu will
+have been configured using L2's state, despite not being used for L2. If
+L2.EFER.LMA != L1.EFER.LMA, and L2 is using PAE paging, then root_mmu will
+be configured for guest PAE paging, but will match the mmu_role for 64-bit
+paging and cause KVM to not reconfigure root_mmu on the next nested VM-Exit.
+
+Alternatively, the root_mmu's role could be invalidated after a successful
+KVM_SET_NESTED_STATE that yields vcpu->arch.mmu != vcpu->arch.root_mmu,
+i.e. that switches the active mmu to guest_mmu, but doing so is unnecessarily
+tricky, and not even needed if L1 and L2 do have the same role (e.g., they
+are both 64-bit guests and run with the same CR4).
+
+Suggested-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
+Message-Id: <20211115131837.195527-3-mlevitsk@redhat.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/kvm_host.h | 1 +
+ arch/x86/kvm/mmu/mmu.c | 1 +
+ 2 files changed, 2 insertions(+)
+
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -364,6 +364,7 @@ union kvm_mmu_extended_role {
+ unsigned int cr4_smap:1;
+ unsigned int cr4_smep:1;
+ unsigned int cr4_la57:1;
++ unsigned int efer_lma:1;
+ };
+ };
+
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -4679,6 +4679,7 @@ static union kvm_mmu_extended_role kvm_c
+ /* PKEY and LA57 are active iff long mode is active. */
+ ext.cr4_pke = ____is_efer_lma(regs) && ____is_cr4_pke(regs);
+ ext.cr4_la57 = ____is_efer_lma(regs) && ____is_cr4_la57(regs);
++ ext.efer_lma = ____is_efer_lma(regs);
+ }
+
+ ext.valid = 1;
--- /dev/null
+From 4e8436479ad3be76a3823e6ce466ae464ce71300 Mon Sep 17 00:00:00 2001
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Mon, 15 Nov 2021 16:50:21 +0000
+Subject: KVM: x86/xen: Fix get_attr of KVM_XEN_ATTR_TYPE_SHARED_INFO
+
+From: David Woodhouse <dwmw@amazon.co.uk>
+
+commit 4e8436479ad3be76a3823e6ce466ae464ce71300 upstream.
+
+In commit 319afe68567b ("KVM: xen: do not use struct gfn_to_hva_cache") we
+stopped storing this in-kernel as a GPA, and started storing it as a GFN.
+Which means we probably should have stopped calling gpa_to_gfn() on it
+when userspace asks for it back.
+
+Cc: stable@vger.kernel.org
+Fixes: 319afe68567b ("KVM: xen: do not use struct gfn_to_hva_cache")
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Message-Id: <20211115165030.7422-2-dwmw2@infradead.org>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/xen.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/xen.c
++++ b/arch/x86/kvm/xen.c
+@@ -299,7 +299,7 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm
+ break;
+
+ case KVM_XEN_ATTR_TYPE_SHARED_INFO:
+- data->u.shared_info.gfn = gpa_to_gfn(kvm->arch.xen.shinfo_gfn);
++ data->u.shared_info.gfn = kvm->arch.xen.shinfo_gfn;
+ r = 0;
+ break;
+
--- /dev/null
+From d78f3853f831eee46c6dbe726debf3be9e9c0d05 Mon Sep 17 00:00:00 2001
+From: SeongJae Park <sj@kernel.org>
+Date: Fri, 19 Nov 2021 16:43:52 -0800
+Subject: mm/damon/dbgfs: fix missed use of damon_dbgfs_lock
+
+From: SeongJae Park <sj@kernel.org>
+
+commit d78f3853f831eee46c6dbe726debf3be9e9c0d05 upstream.
+
+DAMON debugfs is supposed to protect dbgfs_ctxs, dbgfs_nr_ctxs, and
+dbgfs_dirs using damon_dbgfs_lock. However, some of the code is
+accessing the variables without the protection. This fixes it by
+protecting all such accesses.
+
+Link: https://lkml.kernel.org/r/20211110145758.16558-3-sj@kernel.org
+Fixes: 75c1c2b53c78 ("mm/damon/dbgfs: support multiple contexts")
+Signed-off-by: SeongJae Park <sj@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/damon/dbgfs.c | 11 ++++++++---
+ 1 file changed, 8 insertions(+), 3 deletions(-)
+
+--- a/mm/damon/dbgfs.c
++++ b/mm/damon/dbgfs.c
+@@ -538,12 +538,14 @@ static ssize_t dbgfs_monitor_on_write(st
+ return -EINVAL;
+ }
+
++ mutex_lock(&damon_dbgfs_lock);
+ if (!strncmp(kbuf, "on", count))
+ err = damon_start(dbgfs_ctxs, dbgfs_nr_ctxs);
+ else if (!strncmp(kbuf, "off", count))
+ err = damon_stop(dbgfs_ctxs, dbgfs_nr_ctxs);
+ else
+ err = -EINVAL;
++ mutex_unlock(&damon_dbgfs_lock);
+
+ if (err)
+ ret = err;
+@@ -596,15 +598,16 @@ static int __init __damon_dbgfs_init(voi
+
+ static int __init damon_dbgfs_init(void)
+ {
+- int rc;
++ int rc = -ENOMEM;
+
++ mutex_lock(&damon_dbgfs_lock);
+ dbgfs_ctxs = kmalloc(sizeof(*dbgfs_ctxs), GFP_KERNEL);
+ if (!dbgfs_ctxs)
+- return -ENOMEM;
++ goto out;
+ dbgfs_ctxs[0] = dbgfs_new_ctx();
+ if (!dbgfs_ctxs[0]) {
+ kfree(dbgfs_ctxs);
+- return -ENOMEM;
++ goto out;
+ }
+ dbgfs_nr_ctxs = 1;
+
+@@ -615,6 +618,8 @@ static int __init damon_dbgfs_init(void)
+ pr_err("%s: dbgfs init failed\n", __func__);
+ }
+
++out:
++ mutex_unlock(&damon_dbgfs_lock);
+ return rc;
+ }
+
--- /dev/null
+From db7a347b26fe05d2e8c115bb24dfd908d0252bc3 Mon Sep 17 00:00:00 2001
+From: SeongJae Park <sj@kernel.org>
+Date: Fri, 19 Nov 2021 16:43:49 -0800
+Subject: mm/damon/dbgfs: use '__GFP_NOWARN' for user-specified size buffer allocation
+
+From: SeongJae Park <sj@kernel.org>
+
+commit db7a347b26fe05d2e8c115bb24dfd908d0252bc3 upstream.
+
+Patch series "DAMON fixes".
+
+This patch (of 2):
+
+DAMON users can trigger below warning in '__alloc_pages()' by invoking
+write() to some DAMON debugfs files with arbitrarily high count
+argument, because DAMON debugfs interface allocates some buffers based
+on the user-specified 'count'.
+
+ if (unlikely(order >= MAX_ORDER)) {
+ WARN_ON_ONCE(!(gfp & __GFP_NOWARN));
+ return NULL;
+ }
+
+Because the DAMON debugfs interface code checks failure of the
+'kmalloc()', this commit simply suppresses the warnings by adding
+'__GFP_NOWARN' flag.
+
+Link: https://lkml.kernel.org/r/20211110145758.16558-1-sj@kernel.org
+Link: https://lkml.kernel.org/r/20211110145758.16558-2-sj@kernel.org
+Fixes: 4bc05954d007 ("mm/damon: implement a debugfs-based user space interface")
+Signed-off-by: SeongJae Park <sj@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/damon/dbgfs.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/mm/damon/dbgfs.c
++++ b/mm/damon/dbgfs.c
+@@ -32,7 +32,7 @@ static char *user_input_str(const char _
+ if (*ppos)
+ return ERR_PTR(-EINVAL);
+
+- kbuf = kmalloc(count + 1, GFP_KERNEL);
++ kbuf = kmalloc(count + 1, GFP_KERNEL | __GFP_NOWARN);
+ if (!kbuf)
+ return ERR_PTR(-ENOMEM);
+
+@@ -247,7 +247,7 @@ static ssize_t dbgfs_kdamond_pid_read(st
+ char *kbuf;
+ ssize_t len;
+
+- kbuf = kmalloc(count, GFP_KERNEL);
++ kbuf = kmalloc(count, GFP_KERNEL | __GFP_NOWARN);
+ if (!kbuf)
+ return -ENOMEM;
+
--- /dev/null
+From 34dbc3aaf5d9e89ba6cc5e24add9458c21ab1950 Mon Sep 17 00:00:00 2001
+From: Rustam Kovhaev <rkovhaev@gmail.com>
+Date: Fri, 19 Nov 2021 16:43:37 -0800
+Subject: mm: kmemleak: slob: respect SLAB_NOLEAKTRACE flag
+
+From: Rustam Kovhaev <rkovhaev@gmail.com>
+
+commit 34dbc3aaf5d9e89ba6cc5e24add9458c21ab1950 upstream.
+
+When kmemleak is enabled for SLOB, system does not boot and does not
+print anything to the console. At the very early stage in the boot
+process we hit infinite recursion from kmemleak_init() and eventually
+kernel crashes.
+
+kmemleak_init() specifies SLAB_NOLEAKTRACE for KMEM_CACHE(), but
+kmem_cache_create_usercopy() removes it because CACHE_CREATE_MASK is not
+valid for SLOB.
+
+Let's fix CACHE_CREATE_MASK and make kmemleak work with SLOB
+
+Link: https://lkml.kernel.org/r/20211115020850.3154366-1-rkovhaev@gmail.com
+Fixes: d8843922fba4 ("slab: Ignore internal flags in cache creation")
+Signed-off-by: Rustam Kovhaev <rkovhaev@gmail.com>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Reviewed-by: Muchun Song <songmuchun@bytedance.com>
+Cc: Christoph Lameter <cl@linux.com>
+Cc: Pekka Enberg <penberg@kernel.org>
+Cc: David Rientjes <rientjes@google.com>
+Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Glauber Costa <glommer@parallels.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/slab.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/slab.h
++++ b/mm/slab.h
+@@ -147,7 +147,7 @@ static inline slab_flags_t kmem_cache_fl
+ #define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \
+ SLAB_TEMPORARY | SLAB_ACCOUNT)
+ #else
+-#define SLAB_CACHE_FLAGS (0)
++#define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE)
+ #endif
+
+ /* Common flags available with current configuration */
--- /dev/null
+From a5b9703fe11cd1d6d7a60102aa2abe686dc1867f Mon Sep 17 00:00:00 2001
+From: Sergio Paracuellos <sergio.paracuellos@gmail.com>
+Date: Sun, 31 Oct 2021 07:40:46 +0100
+Subject: pinctrl: ralink: include 'ralink_regs.h' in 'pinctrl-mt7620.c'
+
+From: Sergio Paracuellos <sergio.paracuellos@gmail.com>
+
+commit a5b9703fe11cd1d6d7a60102aa2abe686dc1867f upstream.
+
+mt7620.h, included by pinctrl-mt7620.c, mentions MT762X_SOC_MT7628AN
+declared in ralink_regs.h.
+
+Fixes: 745ec436de72 ("pinctrl: ralink: move MT7620 SoC pinmux config into a new 'pinctrl-mt7620.c' file")
+Cc: stable@vger.kernel.org
+Signed-off-by: Luiz Angelo Daros de Luca <luizluca@gmail.com>
+Signed-off-by: Sergio Paracuellos <sergio.paracuellos@gmail.com>
+Link: https://lore.kernel.org/r/20211031064046.13533-1-sergio.paracuellos@gmail.com
+Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/pinctrl/ralink/pinctrl-mt7620.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/pinctrl/ralink/pinctrl-mt7620.c
++++ b/drivers/pinctrl/ralink/pinctrl-mt7620.c
+@@ -1,5 +1,6 @@
+ // SPDX-License-Identifier: GPL-2.0-only
+
++#include <asm/mach-ralink/ralink_regs.h>
+ #include <asm/mach-ralink/mt7620.h>
+ #include <linux/module.h>
+ #include <linux/platform_device.h>
--- /dev/null
+From 1e35eba4055149c578baf0318d2f2f89ea3c44a0 Mon Sep 17 00:00:00 2001
+From: Christophe Leroy <christophe.leroy@csgroup.eu>
+Date: Mon, 15 Nov 2021 09:08:36 +0100
+Subject: powerpc/8xx: Fix pinned TLBs with CONFIG_STRICT_KERNEL_RWX
+
+From: Christophe Leroy <christophe.leroy@csgroup.eu>
+
+commit 1e35eba4055149c578baf0318d2f2f89ea3c44a0 upstream.
+
+As spotted and explained in commit c12ab8dbc492 ("powerpc/8xx: Fix
+Oops with STRICT_KERNEL_RWX without DEBUG_RODATA_TEST"), the selection
+of STRICT_KERNEL_RWX without selecting DEBUG_RODATA_TEST has spotted
+the lack of the DIRTY bit in the pinned kernel data TLBs.
+
+This problem should have been detected a lot earlier if things had
+been working as expected. But due to an incredible level of chance or
+mishap, this went undetected because of a set of bugs: In fact the
+DTLBs were not pinned, because instead of setting the reserve bit
+in MD_CTR, it was set in MI_CTR that is the register for ITLBs.
+
+But then, another huge bug was there: the physical address was
+reset to 0 at the boundary between RO and RW areas, leading to the
+same physical space being mapped at both 0xc0000000 and 0xc8000000.
+This had by miracle no consequence until now because the entry was
+not really pinned so it was overwritten soon enough to go undetected.
+
+Of course, now that we really pin the DTLBs, it must be fixed as well.
+
+Fixes: f76c8f6d257c ("powerpc/8xx: Add function to set pinned TLBs")
+Cc: stable@vger.kernel.org # v5.8+
+Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
+Depends-on: c12ab8dbc492 ("powerpc/8xx: Fix Oops with STRICT_KERNEL_RWX without DEBUG_RODATA_TEST")
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/a21e9a057fe2d247a535aff0d157a54eefee017a.1636963688.git.christophe.leroy@csgroup.eu
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/powerpc/kernel/head_8xx.S | 13 +++++++------
+ 1 file changed, 7 insertions(+), 6 deletions(-)
+
+--- a/arch/powerpc/kernel/head_8xx.S
++++ b/arch/powerpc/kernel/head_8xx.S
+@@ -733,6 +733,7 @@ _GLOBAL(mmu_pin_tlb)
+ #ifdef CONFIG_PIN_TLB_DATA
+ LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET)
+ LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG | _PMD_ACCESSED)
++ li r8, 0
+ #ifdef CONFIG_PIN_TLB_IMMR
+ li r0, 3
+ #else
+@@ -741,26 +742,26 @@ _GLOBAL(mmu_pin_tlb)
+ mtctr r0
+ cmpwi r4, 0
+ beq 4f
+- LOAD_REG_IMMEDIATE(r8, 0xf0 | _PAGE_RO | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT)
+ LOAD_REG_ADDR(r9, _sinittext)
+
+ 2: ori r0, r6, MD_EVALID
++ ori r12, r8, 0xf0 | _PAGE_RO | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT
+ mtspr SPRN_MD_CTR, r5
+ mtspr SPRN_MD_EPN, r0
+ mtspr SPRN_MD_TWC, r7
+- mtspr SPRN_MD_RPN, r8
++ mtspr SPRN_MD_RPN, r12
+ addi r5, r5, 0x100
+ addis r6, r6, SZ_8M@h
+ addis r8, r8, SZ_8M@h
+ cmplw r6, r9
+ bdnzt lt, 2b
+-
+-4: LOAD_REG_IMMEDIATE(r8, 0xf0 | _PAGE_DIRTY | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT)
++4:
+ 2: ori r0, r6, MD_EVALID
++ ori r12, r8, 0xf0 | _PAGE_DIRTY | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT
+ mtspr SPRN_MD_CTR, r5
+ mtspr SPRN_MD_EPN, r0
+ mtspr SPRN_MD_TWC, r7
+- mtspr SPRN_MD_RPN, r8
++ mtspr SPRN_MD_RPN, r12
+ addi r5, r5, 0x100
+ addis r6, r6, SZ_8M@h
+ addis r8, r8, SZ_8M@h
+@@ -781,7 +782,7 @@ _GLOBAL(mmu_pin_tlb)
+ #endif
+ #if defined(CONFIG_PIN_TLB_IMMR) || defined(CONFIG_PIN_TLB_DATA)
+ lis r0, (MD_RSV4I | MD_TWAM)@h
+- mtspr SPRN_MI_CTR, r0
++ mtspr SPRN_MD_CTR, r0
+ #endif
+ mtspr SPRN_SRR1, r10
+ mtspr SPRN_SRR0, r11
--- /dev/null
+From 5499802b2284331788a440585869590f1bd63f7f Mon Sep 17 00:00:00 2001
+From: Christophe Leroy <christophe.leroy@csgroup.eu>
+Date: Mon, 15 Nov 2021 09:52:55 +0100
+Subject: powerpc/signal32: Fix sigset_t copy
+
+From: Christophe Leroy <christophe.leroy@csgroup.eu>
+
+commit 5499802b2284331788a440585869590f1bd63f7f upstream.
+
+The conversion from __copy_from_user() to __get_user() by
+commit d3ccc9781560 ("powerpc/signal: Use __get_user() to copy
+sigset_t") introduced a regression in __get_user_sigset() for
+powerpc/32. The bug was subsequently moved into
+unsafe_get_user_sigset().
+
+The bug is due to the copied 64 bit value being truncated to
+32 bits while being assigned to dst->sig[0]
+
+The regression was reported by users of the Xorg packages distributed in
+Debian/powerpc --
+
+ "The symptoms are that the fb screen goes blank, with the backlight
+ remaining on and no errors logged in /var/log; wdm (or startx) run
+ with no effect (I tried logging in in the blind, with no effect).
+ And they are hard to kill, requiring 'kill -KILL ...'"
+
+Fix the regression by copying each word of the sigset, not only the
+first one.
+
+__get_user_sigset() was tentatively optimised to copy 64 bits at once
+in order to minimise KUAP unlock/lock impact, but the unsafe variant
+doesn't suffer that, so it can just copy words.
+
+Fixes: 887f3ceb51cd ("powerpc/signal32: Convert do_setcontext[_tm]() to user access block")
+Cc: stable@vger.kernel.org # v5.13+
+Reported-by: Finn Thain <fthain@linux-m68k.org>
+Reported-and-tested-by: Stan Johnson <userm57@yahoo.com>
+Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/99ef38d61c0eb3f79c68942deb0c35995a93a777.1636966353.git.christophe.leroy@csgroup.eu
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/powerpc/kernel/signal.h | 10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+--- a/arch/powerpc/kernel/signal.h
++++ b/arch/powerpc/kernel/signal.h
+@@ -25,8 +25,14 @@ static inline int __get_user_sigset(sigs
+
+ return __get_user(dst->sig[0], (u64 __user *)&src->sig[0]);
+ }
+-#define unsafe_get_user_sigset(dst, src, label) \
+- unsafe_get_user((dst)->sig[0], (u64 __user *)&(src)->sig[0], label)
++#define unsafe_get_user_sigset(dst, src, label) do { \
++ sigset_t *__dst = dst; \
++ const sigset_t __user *__src = src; \
++ int i; \
++ \
++ for (i = 0; i < _NSIG_WORDS; i++) \
++ unsafe_get_user(__dst->sig[i], &__src->sig[i], label); \
++} while (0)
+
+ #ifdef CONFIG_VSX
+ extern unsigned long copy_vsx_to_user(void __user *to,
--- /dev/null
+From 8e80a73fa9a7747e3e8255cb149c543aabf65a24 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@kaod.org>
+Date: Tue, 16 Nov 2021 14:40:22 +0100
+Subject: powerpc/xive: Change IRQ domain to a tree domain
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Cédric Le Goater <clg@kaod.org>
+
+commit 8e80a73fa9a7747e3e8255cb149c543aabf65a24 upstream.
+
+Commit 4f86a06e2d6e ("irqdomain: Make normal and nomap irqdomains
+exclusive") introduced an IRQ_DOMAIN_FLAG_NO_MAP flag to isolate the
+'nomap' domains still in use under the powerpc arch. With this new
+flag, the revmap_tree of the IRQ domain is not used anymore. This
+change broke the support of shared LSIs [1] in the XIVE driver because
+it was relying on a lookup in the revmap_tree to query previously
+mapped interrupts. Linux now creates two distinct IRQ mappings on the
+same HW IRQ which can lead to unexpected behavior in the drivers.
+
+The XIVE IRQ domain is not a direct mapping domain and its HW IRQ
+interrupt number space is rather large : 1M/socket on POWER9 and
+POWER10, change the XIVE driver to use a 'tree' domain type instead.
+
+[1] For instance, a linux KVM guest with virtio-rng and virtio-balloon
+ devices.
+
+Fixes: 4f86a06e2d6e ("irqdomain: Make normal and nomap irqdomains exclusive")
+Cc: stable@vger.kernel.org # v5.14+
+Signed-off-by: Cédric Le Goater <clg@kaod.org>
+Tested-by: Greg Kurz <groug@kaod.org>
+Acked-by: Marc Zyngier <maz@kernel.org>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20211116134022.420412-1-clg@kaod.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/powerpc/sysdev/xive/Kconfig | 1 -
+ arch/powerpc/sysdev/xive/common.c | 3 +--
+ 2 files changed, 1 insertion(+), 3 deletions(-)
+
+--- a/arch/powerpc/sysdev/xive/Kconfig
++++ b/arch/powerpc/sysdev/xive/Kconfig
+@@ -3,7 +3,6 @@ config PPC_XIVE
+ bool
+ select PPC_SMP_MUXED_IPI
+ select HARDIRQS_SW_RESEND
+- select IRQ_DOMAIN_NOMAP
+
+ config PPC_XIVE_NATIVE
+ bool
+--- a/arch/powerpc/sysdev/xive/common.c
++++ b/arch/powerpc/sysdev/xive/common.c
+@@ -1443,8 +1443,7 @@ static const struct irq_domain_ops xive_
+
+ static void __init xive_init_host(struct device_node *np)
+ {
+- xive_irq_domain = irq_domain_add_nomap(np, XIVE_MAX_IRQ,
+- &xive_irq_domain_ops, NULL);
++ xive_irq_domain = irq_domain_add_tree(np, &xive_irq_domain_ops, NULL);
+ if (WARN_ON(xive_irq_domain == NULL))
+ return;
+ irq_set_default_host(xive_irq_domain);
--- /dev/null
+From 61eb495c83bf6ebde490992bf888ca15b9babc39 Mon Sep 17 00:00:00 2001
+From: Geert Uytterhoeven <geert@linux-m68k.org>
+Date: Thu, 18 Nov 2021 10:26:21 -0800
+Subject: pstore/blk: Use "%lu" to format unsigned long
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Geert Uytterhoeven <geert@linux-m68k.org>
+
+commit 61eb495c83bf6ebde490992bf888ca15b9babc39 upstream.
+
+On 32-bit:
+
+ fs/pstore/blk.c: In function ‘__best_effort_init’:
+ include/linux/kern_levels.h:5:18: warning: format ‘%zu’ expects argument of type ‘size_t’, but argument 3 has type ‘long unsigned int’ [-Wformat=]
+ 5 | #define KERN_SOH "\001" /* ASCII Start Of Header */
+ | ^~~~~~
+ include/linux/kern_levels.h:14:19: note: in expansion of macro ‘KERN_SOH’
+ 14 | #define KERN_INFO KERN_SOH "6" /* informational */
+ | ^~~~~~~~
+ include/linux/printk.h:373:9: note: in expansion of macro ‘KERN_INFO’
+ 373 | printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
+ | ^~~~~~~~~
+ fs/pstore/blk.c:314:3: note: in expansion of macro ‘pr_info’
+ 314 | pr_info("attached %s (%zu) (no dedicated panic_write!)\n",
+ | ^~~~~~~
+
+Cc: stable@vger.kernel.org
+Fixes: 7bb9557b48fcabaa ("pstore/blk: Use the normal block device I/O path")
+Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Link: https://lore.kernel.org/r/20210629103700.1935012-1-geert@linux-m68k.org
+Cc: Jens Axboe <axboe@kernel.dk>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/pstore/blk.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/pstore/blk.c
++++ b/fs/pstore/blk.c
+@@ -311,7 +311,7 @@ static int __init __best_effort_init(voi
+ if (ret)
+ kfree(best_effort_dev);
+ else
+- pr_info("attached %s (%zu) (no dedicated panic_write!)\n",
++ pr_info("attached %s (%lu) (no dedicated panic_write!)\n",
+ blkdev, best_effort_dev->zone.total_size);
+
+ return ret;
--- /dev/null
+From f15863b27752682bb700c21de5f83f613a0fb77e Mon Sep 17 00:00:00 2001
+From: Vandita Kulkarni <vandita.kulkarni@intel.com>
+Date: Tue, 9 Nov 2021 17:34:28 +0530
+Subject: Revert "drm/i915/tgl/dsi: Gate the ddi clocks after pll mapping"
+
+From: Vandita Kulkarni <vandita.kulkarni@intel.com>
+
+commit f15863b27752682bb700c21de5f83f613a0fb77e upstream.
+
+This reverts commit 991d9557b0c4 ("drm/i915/tgl/dsi: Gate the ddi clocks
+after pll mapping"). The Bspec was updated recently with the pll ungate
+sequence similar to that of icl dsi enable sequence. Hence reverting.
+
+Bspec: 49187
+Fixes: 991d9557b0c4 ("drm/i915/tgl/dsi: Gate the ddi clocks after pll mapping")
+Cc: <stable@vger.kernel.org> # v5.4+
+Signed-off-by: Vandita Kulkarni <vandita.kulkarni@intel.com>
+Signed-off-by: Jani Nikula <jani.nikula@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20211109120428.15211-1-vandita.kulkarni@intel.com
+(cherry picked from commit 4579509ef181480f4e4510d436c691519167c5c2)
+Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/display/icl_dsi.c | 10 ++--------
+ 1 file changed, 2 insertions(+), 8 deletions(-)
+
+--- a/drivers/gpu/drm/i915/display/icl_dsi.c
++++ b/drivers/gpu/drm/i915/display/icl_dsi.c
+@@ -711,10 +711,7 @@ static void gen11_dsi_map_pll(struct int
+ intel_de_write(dev_priv, ICL_DPCLKA_CFGCR0, val);
+
+ for_each_dsi_phy(phy, intel_dsi->phys) {
+- if (DISPLAY_VER(dev_priv) >= 12)
+- val |= ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy);
+- else
+- val &= ~ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy);
++ val &= ~ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy);
+ }
+ intel_de_write(dev_priv, ICL_DPCLKA_CFGCR0, val);
+
+@@ -1150,8 +1147,6 @@ static void
+ gen11_dsi_enable_port_and_phy(struct intel_encoder *encoder,
+ const struct intel_crtc_state *crtc_state)
+ {
+- struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+-
+ /* step 4a: power up all lanes of the DDI used by DSI */
+ gen11_dsi_power_up_lanes(encoder);
+
+@@ -1177,8 +1172,7 @@ gen11_dsi_enable_port_and_phy(struct int
+ gen11_dsi_configure_transcoder(encoder, crtc_state);
+
+ /* Step 4l: Gate DDI clocks */
+- if (DISPLAY_VER(dev_priv) == 11)
+- gen11_dsi_gate_clocks(encoder);
++ gen11_dsi_gate_clocks(encoder);
+ }
+
+ static void gen11_dsi_powerup_panel(struct intel_encoder *encoder)
--- /dev/null
+From d1faacbf67b1944f0e0c618dc581d929263f6fe9 Mon Sep 17 00:00:00 2001
+From: Kees Cook <keescook@chromium.org>
+Date: Tue, 16 Nov 2021 10:15:59 -0800
+Subject: Revert "mark pstore-blk as broken"
+
+From: Kees Cook <keescook@chromium.org>
+
+commit d1faacbf67b1944f0e0c618dc581d929263f6fe9 upstream.
+
+This reverts commit d07f3b081ee632268786601f55e1334d1f68b997.
+
+pstore-blk was fixed to avoid the unwanted APIs in commit 7bb9557b48fc
+("pstore/blk: Use the normal block device I/O path"), which landed in
+the same release as the commit adding BROKEN.
+
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: Christoph Hellwig <hch@lst.de>
+Cc: stable@vger.kernel.org
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Link: https://lore.kernel.org/r/20211116181559.3975566-1-keescook@chromium.org
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/pstore/Kconfig | 1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/fs/pstore/Kconfig
++++ b/fs/pstore/Kconfig
+@@ -173,7 +173,6 @@ config PSTORE_BLK
+ tristate "Log panic/oops to a block device"
+ depends on PSTORE
+ depends on BLOCK
+- depends on BROKEN
+ select PSTORE_ZONE
+ default n
+ help
--- /dev/null
+From 79df39d535c7a3770856fe9f5aba8c0ad1eebdb6 Mon Sep 17 00:00:00 2001
+From: Helge Deller <deller@gmx.de>
+Date: Wed, 17 Nov 2021 11:05:07 +0100
+Subject: Revert "parisc: Reduce sigreturn trampoline to 3 instructions"
+
+From: Helge Deller <deller@gmx.de>
+
+commit 79df39d535c7a3770856fe9f5aba8c0ad1eebdb6 upstream.
+
+This reverts commit e4f2006f1287e7ea17660490569cff323772dac4.
+
+This patch shows problems with signal handling. Revert it for now.
+
+Signed-off-by: Helge Deller <deller@gmx.de>
+Cc: <stable@vger.kernel.org> # v5.15
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/parisc/include/asm/rt_sigframe.h | 2 +-
+ arch/parisc/kernel/signal.c | 13 +++++++------
+ arch/parisc/kernel/signal32.h | 2 +-
+ 3 files changed, 9 insertions(+), 8 deletions(-)
+
+--- a/arch/parisc/include/asm/rt_sigframe.h
++++ b/arch/parisc/include/asm/rt_sigframe.h
+@@ -2,7 +2,7 @@
+ #ifndef _ASM_PARISC_RT_SIGFRAME_H
+ #define _ASM_PARISC_RT_SIGFRAME_H
+
+-#define SIGRETURN_TRAMP 3
++#define SIGRETURN_TRAMP 4
+ #define SIGRESTARTBLOCK_TRAMP 5
+ #define TRAMP_SIZE (SIGRETURN_TRAMP + SIGRESTARTBLOCK_TRAMP)
+
+--- a/arch/parisc/kernel/signal.c
++++ b/arch/parisc/kernel/signal.c
+@@ -288,21 +288,22 @@ setup_rt_frame(struct ksignal *ksig, sig
+ already in userspace. The first words of tramp are used to
+ save the previous sigrestartblock trampoline that might be
+ on the stack. We start the sigreturn trampoline at
+- SIGRESTARTBLOCK_TRAMP. */
++ SIGRESTARTBLOCK_TRAMP+X. */
+ err |= __put_user(in_syscall ? INSN_LDI_R25_1 : INSN_LDI_R25_0,
+ &frame->tramp[SIGRESTARTBLOCK_TRAMP+0]);
+- err |= __put_user(INSN_BLE_SR2_R0,
++ err |= __put_user(INSN_LDI_R20,
+ &frame->tramp[SIGRESTARTBLOCK_TRAMP+1]);
+- err |= __put_user(INSN_LDI_R20,
++ err |= __put_user(INSN_BLE_SR2_R0,
+ &frame->tramp[SIGRESTARTBLOCK_TRAMP+2]);
++ err |= __put_user(INSN_NOP, &frame->tramp[SIGRESTARTBLOCK_TRAMP+3]);
+
+- start = (unsigned long) &frame->tramp[SIGRESTARTBLOCK_TRAMP+0];
+- end = (unsigned long) &frame->tramp[SIGRESTARTBLOCK_TRAMP+3];
++ start = (unsigned long) &frame->tramp[0];
++ end = (unsigned long) &frame->tramp[TRAMP_SIZE];
+ flush_user_dcache_range_asm(start, end);
+ flush_user_icache_range_asm(start, end);
+
+ /* TRAMP Words 0-4, Length 5 = SIGRESTARTBLOCK_TRAMP
+- * TRAMP Words 5-7, Length 3 = SIGRETURN_TRAMP
++ * TRAMP Words 5-9, Length 4 = SIGRETURN_TRAMP
+ * So the SIGRETURN_TRAMP is at the end of SIGRESTARTBLOCK_TRAMP
+ */
+ rp = (unsigned long) &frame->tramp[SIGRESTARTBLOCK_TRAMP];
+--- a/arch/parisc/kernel/signal32.h
++++ b/arch/parisc/kernel/signal32.h
+@@ -36,7 +36,7 @@ struct compat_regfile {
+ compat_int_t rf_sar;
+ };
+
+-#define COMPAT_SIGRETURN_TRAMP 3
++#define COMPAT_SIGRETURN_TRAMP 4
+ #define COMPAT_SIGRESTARTBLOCK_TRAMP 5
+ #define COMPAT_TRAMP_SIZE (COMPAT_SIGRETURN_TRAMP + \
+ COMPAT_SIGRESTARTBLOCK_TRAMP)
--- /dev/null
+From 9a39abb7c9aab50eec4ac4421e9ee7f3de013d24 Mon Sep 17 00:00:00 2001
+From: Vasily Gorbik <gor@linux.ibm.com>
+Date: Thu, 14 Oct 2021 13:53:54 +0200
+Subject: s390/boot: simplify and fix kernel memory layout setup
+
+From: Vasily Gorbik <gor@linux.ibm.com>
+
+commit 9a39abb7c9aab50eec4ac4421e9ee7f3de013d24 upstream.
+
+Initial KASAN shadow memory range was picked to preserve original kernel
+modules area position. With protected execution support, which might
+impose addressing limitation on vmalloc area and hence affect modules
+area position, current fixed KASAN shadow memory range is only making
+kernel memory layout setup more complex. So move it to the very end of
+available virtual space and simplify calculations.
+
+At the same time return to previous kernel address space split. In
+particular commit 0c4f2623b957 ("s390: setup kernel memory layout
+early") introduced precise identity map size calculation and keeping
+vmemmap left most starting from a fresh region table entry. This didn't
+take into account additional mapping region requirement for potential
+DCSS mapping above available physical memory. So go back to virtual
+space split between 1:1 mapping & vmemmap array once vmalloc area size
+is subtracted.
+
+Cc: stable@vger.kernel.org
+Fixes: 0c4f2623b957 ("s390: setup kernel memory layout early")
+Reported-by: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
+Reviewed-by: Alexander Gordeev <agordeev@linux.ibm.com>
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/Kconfig | 2 -
+ arch/s390/boot/startup.c | 88 ++++++++++++++++-------------------------------
+ 2 files changed, 32 insertions(+), 58 deletions(-)
+
+--- a/arch/s390/Kconfig
++++ b/arch/s390/Kconfig
+@@ -47,7 +47,7 @@ config ARCH_SUPPORTS_UPROBES
+ config KASAN_SHADOW_OFFSET
+ hex
+ depends on KASAN
+- default 0x18000000000000
++ default 0x1C000000000000
+
+ config S390
+ def_bool y
+--- a/arch/s390/boot/startup.c
++++ b/arch/s390/boot/startup.c
+@@ -148,82 +148,56 @@ static void setup_ident_map_size(unsigne
+
+ static void setup_kernel_memory_layout(void)
+ {
+- bool vmalloc_size_verified = false;
+- unsigned long vmemmap_off;
+- unsigned long vspace_left;
++ unsigned long vmemmap_start;
+ unsigned long rte_size;
+ unsigned long pages;
+- unsigned long vmax;
+
+ pages = ident_map_size / PAGE_SIZE;
+ /* vmemmap contains a multiple of PAGES_PER_SECTION struct pages */
+ vmemmap_size = SECTION_ALIGN_UP(pages) * sizeof(struct page);
+
+ /* choose kernel address space layout: 4 or 3 levels. */
+- vmemmap_off = round_up(ident_map_size, _REGION3_SIZE);
++ vmemmap_start = round_up(ident_map_size, _REGION3_SIZE);
+ if (IS_ENABLED(CONFIG_KASAN) ||
+ vmalloc_size > _REGION2_SIZE ||
+- vmemmap_off + vmemmap_size + vmalloc_size + MODULES_LEN > _REGION2_SIZE)
+- vmax = _REGION1_SIZE;
+- else
+- vmax = _REGION2_SIZE;
+-
+- /* keep vmemmap_off aligned to a top level region table entry */
+- rte_size = vmax == _REGION1_SIZE ? _REGION2_SIZE : _REGION3_SIZE;
+- MODULES_END = vmax;
+- if (is_prot_virt_host()) {
+- /*
+- * forcing modules and vmalloc area under the ultravisor
+- * secure storage limit, so that any vmalloc allocation
+- * we do could be used to back secure guest storage.
+- */
+- adjust_to_uv_max(&MODULES_END);
+- }
+-
+-#ifdef CONFIG_KASAN
+- if (MODULES_END < vmax) {
+- /* force vmalloc and modules below kasan shadow */
+- MODULES_END = min(MODULES_END, KASAN_SHADOW_START);
++ vmemmap_start + vmemmap_size + vmalloc_size + MODULES_LEN >
++ _REGION2_SIZE) {
++ MODULES_END = _REGION1_SIZE;
++ rte_size = _REGION2_SIZE;
+ } else {
+- /*
+- * leave vmalloc and modules above kasan shadow but make
+- * sure they don't overlap with it
+- */
+- vmalloc_size = min(vmalloc_size, vmax - KASAN_SHADOW_END - MODULES_LEN);
+- vmalloc_size_verified = true;
+- vspace_left = KASAN_SHADOW_START;
++ MODULES_END = _REGION2_SIZE;
++ rte_size = _REGION3_SIZE;
+ }
++ /*
++ * forcing modules and vmalloc area under the ultravisor
++ * secure storage limit, so that any vmalloc allocation
++ * we do could be used to back secure guest storage.
++ */
++ adjust_to_uv_max(&MODULES_END);
++#ifdef CONFIG_KASAN
++ /* force vmalloc and modules below kasan shadow */
++ MODULES_END = min(MODULES_END, KASAN_SHADOW_START);
+ #endif
+ MODULES_VADDR = MODULES_END - MODULES_LEN;
+ VMALLOC_END = MODULES_VADDR;
+
+- if (vmalloc_size_verified) {
+- VMALLOC_START = VMALLOC_END - vmalloc_size;
+- } else {
+- vmemmap_off = round_up(ident_map_size, rte_size);
+-
+- if (vmemmap_off + vmemmap_size > VMALLOC_END ||
+- vmalloc_size > VMALLOC_END - vmemmap_off - vmemmap_size) {
+- /*
+- * allow vmalloc area to occupy up to 1/2 of
+- * the rest virtual space left.
+- */
+- vmalloc_size = min(vmalloc_size, VMALLOC_END / 2);
+- }
+- VMALLOC_START = VMALLOC_END - vmalloc_size;
+- vspace_left = VMALLOC_START;
+- }
++ /* allow vmalloc area to occupy up to about 1/2 of the rest virtual space left */
++ vmalloc_size = min(vmalloc_size, round_down(VMALLOC_END / 2, _REGION3_SIZE));
++ VMALLOC_START = VMALLOC_END - vmalloc_size;
+
+- pages = vspace_left / (PAGE_SIZE + sizeof(struct page));
++ /* split remaining virtual space between 1:1 mapping & vmemmap array */
++ pages = VMALLOC_START / (PAGE_SIZE + sizeof(struct page));
+ pages = SECTION_ALIGN_UP(pages);
+- vmemmap_off = round_up(vspace_left - pages * sizeof(struct page), rte_size);
+- /* keep vmemmap left most starting from a fresh region table entry */
+- vmemmap_off = min(vmemmap_off, round_up(ident_map_size, rte_size));
+- /* take care that identity map is lower then vmemmap */
+- ident_map_size = min(ident_map_size, vmemmap_off);
++ /* keep vmemmap_start aligned to a top level region table entry */
++ vmemmap_start = round_down(VMALLOC_START - pages * sizeof(struct page), rte_size);
++ /* vmemmap_start is the future VMEM_MAX_PHYS, make sure it is within MAX_PHYSMEM */
++ vmemmap_start = min(vmemmap_start, 1UL << MAX_PHYSMEM_BITS);
++ /* make sure identity map doesn't overlay with vmemmap */
++ ident_map_size = min(ident_map_size, vmemmap_start);
+ vmemmap_size = SECTION_ALIGN_UP(ident_map_size / PAGE_SIZE) * sizeof(struct page);
+- VMALLOC_START = max(vmemmap_off + vmemmap_size, VMALLOC_START);
+- vmemmap = (struct page *)vmemmap_off;
++ /* make sure vmemmap doesn't overlay with vmalloc area */
++ VMALLOC_START = max(vmemmap_start + vmemmap_size, VMALLOC_START);
++ vmemmap = (struct page *)vmemmap_start;
+ }
+
+ /*
--- /dev/null
+From 3b90954419d4c05651de9cce6d7632bcf6977678 Mon Sep 17 00:00:00 2001
+From: Alexander Egorenkov <egorenar@linux.ibm.com>
+Date: Mon, 15 Nov 2021 07:40:25 +0100
+Subject: s390/dump: fix copying to user-space of swapped kdump oldmem
+
+From: Alexander Egorenkov <egorenar@linux.ibm.com>
+
+commit 3b90954419d4c05651de9cce6d7632bcf6977678 upstream.
+
+This commit fixes a bug introduced by commit e9e7870f90e3 ("s390/dump:
+introduce boot data 'oldmem_data'").
+OLDMEM_BASE was mistakenly replaced by oldmem_data.size instead of
+oldmem_data.start.
+
+This bug caused the following error during kdump:
+kdump.sh[878]: No program header covering vaddr 0x3434f5245found kexec bug?
+
+Fixes: e9e7870f90e3 ("s390/dump: introduce boot data 'oldmem_data'")
+Cc: stable@vger.kernel.org # 5.15+
+Signed-off-by: Alexander Egorenkov <egorenar@linux.ibm.com>
+Reviewed-by: Marc Hartmayer <mhartmay@linux.ibm.com>
+Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/kernel/crash_dump.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/s390/kernel/crash_dump.c
++++ b/arch/s390/kernel/crash_dump.c
+@@ -191,8 +191,8 @@ static int copy_oldmem_user(void __user
+ return rc;
+ } else {
+ /* Check for swapped kdump oldmem areas */
+- if (oldmem_data.start && from - oldmem_data.size < oldmem_data.size) {
+- from -= oldmem_data.size;
++ if (oldmem_data.start && from - oldmem_data.start < oldmem_data.size) {
++ from -= oldmem_data.start;
+ len = min(count, oldmem_data.size - from);
+ } else if (oldmem_data.start && from < oldmem_data.size) {
+ len = min(count, oldmem_data.size - from);
--- /dev/null
+From 4aa9340584e37debef06fa99b56d064beb723891 Mon Sep 17 00:00:00 2001
+From: Baoquan He <bhe@redhat.com>
+Date: Tue, 16 Nov 2021 11:31:01 +0800
+Subject: s390/kexec: fix memory leak of ipl report buffer
+
+From: Baoquan He <bhe@redhat.com>
+
+commit 4aa9340584e37debef06fa99b56d064beb723891 upstream.
+
+unreferenced object 0x38000195000 (size 4096):
+ comm "kexec", pid 8548, jiffies 4294953647 (age 32443.270s)
+ hex dump (first 32 bytes):
+ 00 00 00 c8 20 00 00 00 00 00 00 c0 02 80 00 00 .... ...........
+ 40 40 40 40 40 40 40 40 00 00 00 00 00 00 00 00 @@@@@@@@........
+ backtrace:
+ [<0000000011a2f199>] __vmalloc_node_range+0xc0/0x140
+ [<0000000081fa2752>] vzalloc+0x5a/0x70
+ [<0000000063a4c92d>] ipl_report_finish+0x2c/0x180
+ [<00000000553304da>] kexec_file_add_ipl_report+0xf4/0x150
+ [<00000000862d033f>] kexec_file_add_components+0x124/0x160
+ [<000000000d2717bb>] arch_kexec_kernel_image_load+0x62/0x90
+ [<000000002e0373b6>] kimage_file_alloc_init+0x1aa/0x2e0
+ [<0000000060f2d14f>] __do_sys_kexec_file_load+0x17c/0x2c0
+ [<000000008c86fe5a>] __s390x_sys_kexec_file_load+0x40/0x50
+ [<000000001fdb9dac>] __do_syscall+0x1bc/0x1f0
+ [<000000003ee4258d>] system_call+0x78/0xa0
+
+Signed-off-by: Baoquan He <bhe@redhat.com>
+Reviewed-by: Philipp Rudo <prudo@redhat.com>
+Fixes: 99feaa717e55 ("s390/kexec_file: Create ipl report and pass to next kernel")
+Cc: <stable@vger.kernel.org> # v5.2: 20c76e242e70: s390/kexec: fix return code handling
+Cc: <stable@vger.kernel.org> # v5.2
+Link: https://lore.kernel.org/r/20211116033101.GD21646@MiWiFi-R3L-srv
+Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/include/asm/kexec.h | 6 ++++++
+ arch/s390/kernel/machine_kexec_file.c | 10 ++++++++++
+ 2 files changed, 16 insertions(+)
+
+--- a/arch/s390/include/asm/kexec.h
++++ b/arch/s390/include/asm/kexec.h
+@@ -74,6 +74,12 @@ void *kexec_file_add_components(struct k
+ int arch_kexec_do_relocs(int r_type, void *loc, unsigned long val,
+ unsigned long addr);
+
++#define ARCH_HAS_KIMAGE_ARCH
++
++struct kimage_arch {
++ void *ipl_buf;
++};
++
+ extern const struct kexec_file_ops s390_kexec_image_ops;
+ extern const struct kexec_file_ops s390_kexec_elf_ops;
+
+--- a/arch/s390/kernel/machine_kexec_file.c
++++ b/arch/s390/kernel/machine_kexec_file.c
+@@ -12,6 +12,7 @@
+ #include <linux/kexec.h>
+ #include <linux/module_signature.h>
+ #include <linux/verification.h>
++#include <linux/vmalloc.h>
+ #include <asm/boot_data.h>
+ #include <asm/ipl.h>
+ #include <asm/setup.h>
+@@ -206,6 +207,7 @@ static int kexec_file_add_ipl_report(str
+ goto out;
+ buf.bufsz = data->report->size;
+ buf.memsz = buf.bufsz;
++ image->arch.ipl_buf = buf.buffer;
+
+ data->memsz += buf.memsz;
+
+@@ -327,3 +329,11 @@ int arch_kexec_kernel_image_probe(struct
+
+ return kexec_image_probe_default(image, buf, buf_len);
+ }
++
++int arch_kimage_file_post_load_cleanup(struct kimage *image)
++{
++ vfree(image->arch.ipl_buf);
++ image->arch.ipl_buf = NULL;
++
++ return kexec_image_post_load_cleanup_default(image);
++}
--- /dev/null
+From 420f48f636b98fd685f44a3acc4c0a7c0840910d Mon Sep 17 00:00:00 2001
+From: Vasily Gorbik <gor@linux.ibm.com>
+Date: Thu, 14 Oct 2021 13:33:45 +0200
+Subject: s390/setup: avoid reserving memory above identity mapping
+
+From: Vasily Gorbik <gor@linux.ibm.com>
+
+commit 420f48f636b98fd685f44a3acc4c0a7c0840910d upstream.
+
+Such reserved memory region, if not cleaned up later causes problems when
+memblock_free_all() is called to release free pages to the buddy allocator
+and those reserved regions are carried over to reserve_bootmem_region()
+which marks the pages as PageReserved.
+
+Instead use memblock_set_current_limit() to make sure memblock allocations
+do not go over identity mapping (which could happen when "mem=" option
+is used or during kdump).
+
+Cc: stable@vger.kernel.org
+Fixes: 73045a08cf55 ("s390: unify identity mapping limits handling")
+Reported-by: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/kernel/setup.c | 10 +---------
+ 1 file changed, 1 insertion(+), 9 deletions(-)
+
+--- a/arch/s390/kernel/setup.c
++++ b/arch/s390/kernel/setup.c
+@@ -634,14 +634,6 @@ static struct notifier_block kdump_mem_n
+ #endif
+
+ /*
+- * Make sure that the area above identity mapping is protected
+- */
+-static void __init reserve_above_ident_map(void)
+-{
+- memblock_reserve(ident_map_size, ULONG_MAX);
+-}
+-
+-/*
+ * Reserve memory for kdump kernel to be loaded with kexec
+ */
+ static void __init reserve_crashkernel(void)
+@@ -1005,11 +997,11 @@ void __init setup_arch(char **cmdline_p)
+ setup_control_program_code();
+
+ /* Do some memory reservations *before* memory is added to memblock */
+- reserve_above_ident_map();
+ reserve_kernel();
+ reserve_initrd();
+ reserve_certificate_list();
+ reserve_mem_detect_info();
++ memblock_set_current_limit(ident_map_size);
+ memblock_allow_resize();
+
+ /* Get information about *all* installed memory */
--- /dev/null
+From 00b55eaf45549ce26424224d069a091c7e5d8bac Mon Sep 17 00:00:00 2001
+From: Sven Schnelle <svens@linux.ibm.com>
+Date: Thu, 11 Nov 2021 10:58:26 +0100
+Subject: s390/vdso: filter out -mstack-guard and -mstack-size
+
+From: Sven Schnelle <svens@linux.ibm.com>
+
+commit 00b55eaf45549ce26424224d069a091c7e5d8bac upstream.
+
+When CONFIG_VMAP_STACK is disabled, the user can enable CONFIG_STACK_CHECK,
+which adds a stack overflow check to each C function in the kernel. This is
+also done for functions in the vdso page. These functions are run in user
+context and user stack sizes are usually different to what the kernel uses.
+This might trigger the stack check although the stack size is valid.
+Therefore filter the -mstack-guard and -mstack-size flags when compiling
+vdso C files.
+
+Cc: stable@kernel.org # 5.10+
+Fixes: 4bff8cb54502 ("s390: convert to GENERIC_VDSO")
+Reported-by: Janosch Frank <frankja@linux.ibm.com>
+Signed-off-by: Sven Schnelle <svens@linux.ibm.com>
+Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/Makefile | 10 ++++++----
+ arch/s390/kernel/vdso64/Makefile | 5 +++--
+ 2 files changed, 9 insertions(+), 6 deletions(-)
+
+--- a/arch/s390/Makefile
++++ b/arch/s390/Makefile
+@@ -79,10 +79,12 @@ KBUILD_AFLAGS_DECOMPRESSOR += $(aflags-y
+ KBUILD_CFLAGS_DECOMPRESSOR += $(cflags-y)
+
+ ifneq ($(call cc-option,-mstack-size=8192 -mstack-guard=128),)
+-cflags-$(CONFIG_CHECK_STACK) += -mstack-size=$(STACK_SIZE)
+-ifeq ($(call cc-option,-mstack-size=8192),)
+-cflags-$(CONFIG_CHECK_STACK) += -mstack-guard=$(CONFIG_STACK_GUARD)
+-endif
++ CC_FLAGS_CHECK_STACK := -mstack-size=$(STACK_SIZE)
++ ifeq ($(call cc-option,-mstack-size=8192),)
++ CC_FLAGS_CHECK_STACK += -mstack-guard=$(CONFIG_STACK_GUARD)
++ endif
++ export CC_FLAGS_CHECK_STACK
++ cflags-$(CONFIG_CHECK_STACK) += $(CC_FLAGS_CHECK_STACK)
+ endif
+
+ ifdef CONFIG_EXPOLINE
+--- a/arch/s390/kernel/vdso64/Makefile
++++ b/arch/s390/kernel/vdso64/Makefile
+@@ -8,8 +8,9 @@ ARCH_REL_TYPE_ABS += R_390_GOT|R_390_PLT
+ include $(srctree)/lib/vdso/Makefile
+ obj-vdso64 = vdso_user_wrapper.o note.o
+ obj-cvdso64 = vdso64_generic.o getcpu.o
+-CFLAGS_REMOVE_getcpu.o = -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE)
+-CFLAGS_REMOVE_vdso64_generic.o = -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE)
++VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) $(CC_FLAGS_CHECK_STACK)
++CFLAGS_REMOVE_getcpu.o = $(VDSO_CFLAGS_REMOVE)
++CFLAGS_REMOVE_vdso64_generic.o = $(VDSO_CFLAGS_REMOVE)
+
+ # Build rules
+
--- /dev/null
+From 392006871bb26166bcfafa56faf49431c2cfaaa8 Mon Sep 17 00:00:00 2001
+From: "Ewan D. Milne" <emilne@redhat.com>
+Date: Mon, 8 Nov 2021 13:30:12 -0500
+Subject: scsi: qla2xxx: Fix mailbox direction flags in qla2xxx_get_adapter_id()
+
+From: Ewan D. Milne <emilne@redhat.com>
+
+commit 392006871bb26166bcfafa56faf49431c2cfaaa8 upstream.
+
+The SCM changes set the flags in mcp->out_mb instead of mcp->in_mb so the
+data was not actually being read into the mcp->mb[] array from the adapter.
+
+Link: https://lore.kernel.org/r/20211108183012.13895-1-emilne@redhat.com
+Fixes: 9f2475fe7406 ("scsi: qla2xxx: SAN congestion management implementation")
+Cc: stable@vger.kernel.org
+Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
+Reviewed-by: Arun Easi <aeasi@marvell.com>
+Signed-off-by: Ewan D. Milne <emilne@redhat.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/scsi/qla2xxx/qla_mbx.c | 6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+--- a/drivers/scsi/qla2xxx/qla_mbx.c
++++ b/drivers/scsi/qla2xxx/qla_mbx.c
+@@ -1695,10 +1695,8 @@ qla2x00_get_adapter_id(scsi_qla_host_t *
+ mcp->in_mb |= MBX_13|MBX_12|MBX_11|MBX_10;
+ if (IS_FWI2_CAPABLE(vha->hw))
+ mcp->in_mb |= MBX_19|MBX_18|MBX_17|MBX_16;
+- if (IS_QLA27XX(vha->hw) || IS_QLA28XX(vha->hw)) {
+- mcp->in_mb |= MBX_15;
+- mcp->out_mb |= MBX_7|MBX_21|MBX_22|MBX_23;
+- }
++ if (IS_QLA27XX(vha->hw) || IS_QLA28XX(vha->hw))
++ mcp->in_mb |= MBX_15|MBX_21|MBX_22|MBX_23;
+
+ mcp->tov = MBX_TOV_SECONDS;
+ mcp->flags = 0;
blk-cgroup-fix-missing-put-device-in-error-path-from.patch
dmaengine-remove-debugfs-ifdef.patch
tun-fix-bonding-active-backup-with-arp-monitoring.patch
+revert-mark-pstore-blk-as-broken.patch
+pstore-blk-use-lu-to-format-unsigned-long.patch
+hexagon-export-raw-i-o-routines-for-modules.patch
+hexagon-clean-up-timer-regs.h.patch
+tipc-check-for-null-after-calling-kmemdup.patch
+ipc-warn-if-trying-to-remove-ipc-object-which-is-absent.patch
+shm-extend-forced-shm-destroy-to-support-objects-from-several-ipc-nses.patch
+mm-kmemleak-slob-respect-slab_noleaktrace-flag.patch
+hugetlb-userfaultfd-fix-reservation-restore-on-userfaultfd-error.patch
+kmap_local-don-t-assume-kmap-ptes-are-linear-arrays-in-memory.patch
+mm-damon-dbgfs-use-__gfp_nowarn-for-user-specified-size-buffer-allocation.patch
+mm-damon-dbgfs-fix-missed-use-of-damon_dbgfs_lock.patch
+x86-boot-pull-up-cmdline-preparation-and-early-param-parsing.patch
+x86-sgx-fix-free-page-accounting.patch
+x86-hyperv-fix-null-deref-in-set_hv_tscchange_cb-if-hyper-v-setup-fails.patch
+kvm-x86-assume-a-64-bit-hypercall-for-guests-with-protected-state.patch
+kvm-x86-fix-uninitialized-eoi_exit_bitmap-usage-in-vcpu_load_eoi_exitmap.patch
+kvm-x86-mmu-include-efer.lma-in-extended-mmu-role.patch
+kvm-x86-xen-fix-get_attr-of-kvm_xen_attr_type_shared_info.patch
+powerpc-signal32-fix-sigset_t-copy.patch
+powerpc-xive-change-irq-domain-to-a-tree-domain.patch
+powerpc-8xx-fix-pinned-tlbs-with-config_strict_kernel_rwx.patch
+revert-drm-i915-tgl-dsi-gate-the-ddi-clocks-after-pll-mapping.patch
+revert-parisc-reduce-sigreturn-trampoline-to-3-instructions.patch
+ata-libata-improve-ata_read_log_page-error-message.patch
+ata-libata-add-missing-ata_identify_page_supported-calls.patch
+scsi-qla2xxx-fix-mailbox-direction-flags-in-qla2xxx_get_adapter_id.patch
+pinctrl-ralink-include-ralink_regs.h-in-pinctrl-mt7620.c.patch
+s390-setup-avoid-reserving-memory-above-identity-mapping.patch
+s390-boot-simplify-and-fix-kernel-memory-layout-setup.patch
+s390-vdso-filter-out-mstack-guard-and-mstack-size.patch
+s390-kexec-fix-memory-leak-of-ipl-report-buffer.patch
+s390-dump-fix-copying-to-user-space-of-swapped-kdump-oldmem.patch
+block-check-admin-before-nice-for-ioprio_class_rt.patch
+fbdev-prevent-probing-generic-drivers-if-a-fb-is-already-registered.patch
--- /dev/null
+From 85b6d24646e4125c591639841169baa98a2da503 Mon Sep 17 00:00:00 2001
+From: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
+Date: Fri, 19 Nov 2021 16:43:21 -0800
+Subject: shm: extend forced shm destroy to support objects from several IPC nses
+
+From: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
+
+commit 85b6d24646e4125c591639841169baa98a2da503 upstream.
+
+Currently, the exit_shm() function not designed to work properly when
+task->sysvshm.shm_clist holds shm objects from different IPC namespaces.
+
+This is a real pain when sysctl kernel.shm_rmid_forced = 1, because it
+leads to use-after-free (reproducer exists).
+
+This is an attempt to fix the problem by extending exit_shm mechanism to
+handle shm's destroy from several IPC ns'es.
+
+To achieve that we do several things:
+
+1. add a namespace (non-refcounted) pointer to the struct shmid_kernel
+
+2. during new shm object creation (newseg()/shmget syscall) we
+ initialize this pointer by current task IPC ns
+
+3. exit_shm() fully reworked such that it traverses over all shp's in
+ task->sysvshm.shm_clist and gets IPC namespace not from current task
+ as it was before but from shp's object itself, then call
+ shm_destroy(shp, ns).
+
+Note: We need to be really careful here, because as it was said before
+(1), our pointer to IPC ns non-refcnt'ed. To be on the safe side we
+using special helper get_ipc_ns_not_zero() which allows to get IPC ns
+refcounter only if IPC ns not in the "state of destruction".
+
+Q/A
+
+Q: Why can we access shp->ns memory using non-refcounted pointer?
+A: Because shp object lifetime is always shorther than IPC namespace
+ lifetime, so, if we get shp object from the task->sysvshm.shm_clist
+ while holding task_lock(task) nobody can steal our namespace.
+
+Q: Does this patch change semantics of unshare/setns/clone syscalls?
+A: No. It's just fixes non-covered case when process may leave IPC
+ namespace without getting task->sysvshm.shm_clist list cleaned up.
+
+Link: https://lkml.kernel.org/r/67bb03e5-f79c-1815-e2bf-949c67047418@colorfullife.com
+Link: https://lkml.kernel.org/r/20211109151501.4921-1-manfred@colorfullife.com
+Fixes: ab602f79915 ("shm: make exit_shm work proportional to task activity")
+Co-developed-by: Manfred Spraul <manfred@colorfullife.com>
+Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
+Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
+Cc: "Eric W. Biederman" <ebiederm@xmission.com>
+Cc: Davidlohr Bueso <dave@stgolabs.net>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: Andrei Vagin <avagin@gmail.com>
+Cc: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
+Cc: Vasily Averin <vvs@virtuozzo.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/ipc_namespace.h | 15 +++
+ include/linux/sched/task.h | 2
+ ipc/shm.c | 189 +++++++++++++++++++++++++++++++-----------
+ 3 files changed, 159 insertions(+), 47 deletions(-)
+
+--- a/include/linux/ipc_namespace.h
++++ b/include/linux/ipc_namespace.h
+@@ -131,6 +131,16 @@ static inline struct ipc_namespace *get_
+ return ns;
+ }
+
++static inline struct ipc_namespace *get_ipc_ns_not_zero(struct ipc_namespace *ns)
++{
++ if (ns) {
++ if (refcount_inc_not_zero(&ns->ns.count))
++ return ns;
++ }
++
++ return NULL;
++}
++
+ extern void put_ipc_ns(struct ipc_namespace *ns);
+ #else
+ static inline struct ipc_namespace *copy_ipcs(unsigned long flags,
+@@ -146,6 +156,11 @@ static inline struct ipc_namespace *get_
+ {
+ return ns;
+ }
++
++static inline struct ipc_namespace *get_ipc_ns_not_zero(struct ipc_namespace *ns)
++{
++ return ns;
++}
+
+ static inline void put_ipc_ns(struct ipc_namespace *ns)
+ {
+--- a/include/linux/sched/task.h
++++ b/include/linux/sched/task.h
+@@ -158,7 +158,7 @@ static inline struct vm_struct *task_sta
+ * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring
+ * subscriptions and synchronises with wait4(). Also used in procfs. Also
+ * pins the final release of task.io_context. Also protects ->cpuset and
+- * ->cgroup.subsys[]. And ->vfork_done.
++ * ->cgroup.subsys[]. And ->vfork_done. And ->sysvshm.shm_clist.
+ *
+ * Nests both inside and outside of read_lock(&tasklist_lock).
+ * It must not be nested with write_lock_irq(&tasklist_lock),
+--- a/ipc/shm.c
++++ b/ipc/shm.c
+@@ -62,9 +62,18 @@ struct shmid_kernel /* private to the ke
+ struct pid *shm_lprid;
+ struct ucounts *mlock_ucounts;
+
+- /* The task created the shm object. NULL if the task is dead. */
++ /*
++ * The task created the shm object, for
++ * task_lock(shp->shm_creator)
++ */
+ struct task_struct *shm_creator;
+- struct list_head shm_clist; /* list by creator */
++
++ /*
++ * List by creator. task_lock(->shm_creator) required for read/write.
++ * If list_empty(), then the creator is dead already.
++ */
++ struct list_head shm_clist;
++ struct ipc_namespace *ns;
+ } __randomize_layout;
+
+ /* shm_mode upper byte flags */
+@@ -115,6 +124,7 @@ static void do_shm_rmid(struct ipc_names
+ struct shmid_kernel *shp;
+
+ shp = container_of(ipcp, struct shmid_kernel, shm_perm);
++ WARN_ON(ns != shp->ns);
+
+ if (shp->shm_nattch) {
+ shp->shm_perm.mode |= SHM_DEST;
+@@ -225,10 +235,43 @@ static void shm_rcu_free(struct rcu_head
+ kfree(shp);
+ }
+
+-static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
++/*
++ * It has to be called with shp locked.
++ * It must be called before ipc_rmid()
++ */
++static inline void shm_clist_rm(struct shmid_kernel *shp)
++{
++ struct task_struct *creator;
++
++ /* ensure that shm_creator does not disappear */
++ rcu_read_lock();
++
++ /*
++ * A concurrent exit_shm may do a list_del_init() as well.
++ * Just do nothing if exit_shm already did the work
++ */
++ if (!list_empty(&shp->shm_clist)) {
++ /*
++ * shp->shm_creator is guaranteed to be valid *only*
++ * if shp->shm_clist is not empty.
++ */
++ creator = shp->shm_creator;
++
++ task_lock(creator);
++ /*
++ * list_del_init() is a nop if the entry was already removed
++ * from the list.
++ */
++ list_del_init(&shp->shm_clist);
++ task_unlock(creator);
++ }
++ rcu_read_unlock();
++}
++
++static inline void shm_rmid(struct shmid_kernel *s)
+ {
+- list_del(&s->shm_clist);
+- ipc_rmid(&shm_ids(ns), &s->shm_perm);
++ shm_clist_rm(s);
++ ipc_rmid(&shm_ids(s->ns), &s->shm_perm);
+ }
+
+
+@@ -283,7 +326,7 @@ static void shm_destroy(struct ipc_names
+ shm_file = shp->shm_file;
+ shp->shm_file = NULL;
+ ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
+- shm_rmid(ns, shp);
++ shm_rmid(shp);
+ shm_unlock(shp);
+ if (!is_file_hugepages(shm_file))
+ shmem_lock(shm_file, 0, shp->mlock_ucounts);
+@@ -306,10 +349,10 @@ static void shm_destroy(struct ipc_names
+ *
+ * 2) sysctl kernel.shm_rmid_forced is set to 1.
+ */
+-static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
++static bool shm_may_destroy(struct shmid_kernel *shp)
+ {
+ return (shp->shm_nattch == 0) &&
+- (ns->shm_rmid_forced ||
++ (shp->ns->shm_rmid_forced ||
+ (shp->shm_perm.mode & SHM_DEST));
+ }
+
+@@ -340,7 +383,7 @@ static void shm_close(struct vm_area_str
+ ipc_update_pid(&shp->shm_lprid, task_tgid(current));
+ shp->shm_dtim = ktime_get_real_seconds();
+ shp->shm_nattch--;
+- if (shm_may_destroy(ns, shp))
++ if (shm_may_destroy(shp))
+ shm_destroy(ns, shp);
+ else
+ shm_unlock(shp);
+@@ -361,10 +404,10 @@ static int shm_try_destroy_orphaned(int
+ *
+ * As shp->* are changed under rwsem, it's safe to skip shp locking.
+ */
+- if (shp->shm_creator != NULL)
++ if (!list_empty(&shp->shm_clist))
+ return 0;
+
+- if (shm_may_destroy(ns, shp)) {
++ if (shm_may_destroy(shp)) {
+ shm_lock_by_ptr(shp);
+ shm_destroy(ns, shp);
+ }
+@@ -382,48 +425,97 @@ void shm_destroy_orphaned(struct ipc_nam
+ /* Locking assumes this will only be called with task == current */
+ void exit_shm(struct task_struct *task)
+ {
+- struct ipc_namespace *ns = task->nsproxy->ipc_ns;
+- struct shmid_kernel *shp, *n;
++ for (;;) {
++ struct shmid_kernel *shp;
++ struct ipc_namespace *ns;
+
+- if (list_empty(&task->sysvshm.shm_clist))
+- return;
++ task_lock(task);
++
++ if (list_empty(&task->sysvshm.shm_clist)) {
++ task_unlock(task);
++ break;
++ }
++
++ shp = list_first_entry(&task->sysvshm.shm_clist, struct shmid_kernel,
++ shm_clist);
+
+- /*
+- * If kernel.shm_rmid_forced is not set then only keep track of
+- * which shmids are orphaned, so that a later set of the sysctl
+- * can clean them up.
+- */
+- if (!ns->shm_rmid_forced) {
+- down_read(&shm_ids(ns).rwsem);
+- list_for_each_entry(shp, &task->sysvshm.shm_clist, shm_clist)
+- shp->shm_creator = NULL;
+ /*
+- * Only under read lock but we are only called on current
+- * so no entry on the list will be shared.
++ * 1) Get pointer to the ipc namespace. It is worth to say
++ * that this pointer is guaranteed to be valid because
++ * shp lifetime is always shorter than namespace lifetime
++ * in which shp lives.
++ * We taken task_lock it means that shp won't be freed.
+ */
+- list_del(&task->sysvshm.shm_clist);
+- up_read(&shm_ids(ns).rwsem);
+- return;
+- }
++ ns = shp->ns;
+
+- /*
+- * Destroy all already created segments, that were not yet mapped,
+- * and mark any mapped as orphan to cover the sysctl toggling.
+- * Destroy is skipped if shm_may_destroy() returns false.
+- */
+- down_write(&shm_ids(ns).rwsem);
+- list_for_each_entry_safe(shp, n, &task->sysvshm.shm_clist, shm_clist) {
+- shp->shm_creator = NULL;
++ /*
++ * 2) If kernel.shm_rmid_forced is not set then only keep track of
++ * which shmids are orphaned, so that a later set of the sysctl
++ * can clean them up.
++ */
++ if (!ns->shm_rmid_forced)
++ goto unlink_continue;
+
+- if (shm_may_destroy(ns, shp)) {
+- shm_lock_by_ptr(shp);
+- shm_destroy(ns, shp);
++ /*
++ * 3) get a reference to the namespace.
++ * The refcount could be already 0. If it is 0, then
++ * the shm objects will be free by free_ipc_work().
++ */
++ ns = get_ipc_ns_not_zero(ns);
++ if (!ns) {
++unlink_continue:
++ list_del_init(&shp->shm_clist);
++ task_unlock(task);
++ continue;
+ }
+- }
+
+- /* Remove the list head from any segments still attached. */
+- list_del(&task->sysvshm.shm_clist);
+- up_write(&shm_ids(ns).rwsem);
++ /*
++ * 4) get a reference to shp.
++ * This cannot fail: shm_clist_rm() is called before
++ * ipc_rmid(), thus the refcount cannot be 0.
++ */
++ WARN_ON(!ipc_rcu_getref(&shp->shm_perm));
++
++ /*
++ * 5) unlink the shm segment from the list of segments
++ * created by current.
++ * This must be done last. After unlinking,
++ * only the refcounts obtained above prevent IPC_RMID
++ * from destroying the segment or the namespace.
++ */
++ list_del_init(&shp->shm_clist);
++
++ task_unlock(task);
++
++ /*
++ * 6) we have all references
++ * Thus lock & if needed destroy shp.
++ */
++ down_write(&shm_ids(ns).rwsem);
++ shm_lock_by_ptr(shp);
++ /*
++ * rcu_read_lock was implicitly taken in shm_lock_by_ptr, it's
++ * safe to call ipc_rcu_putref here
++ */
++ ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
++
++ if (ipc_valid_object(&shp->shm_perm)) {
++ if (shm_may_destroy(shp))
++ shm_destroy(ns, shp);
++ else
++ shm_unlock(shp);
++ } else {
++ /*
++ * Someone else deleted the shp from namespace
++ * idr/kht while we have waited.
++ * Just unlock and continue.
++ */
++ shm_unlock(shp);
++ }
++
++ up_write(&shm_ids(ns).rwsem);
++ put_ipc_ns(ns); /* paired with get_ipc_ns_not_zero */
++ }
+ }
+
+ static vm_fault_t shm_fault(struct vm_fault *vmf)
+@@ -680,7 +772,11 @@ static int newseg(struct ipc_namespace *
+ if (error < 0)
+ goto no_id;
+
++ shp->ns = ns;
++
++ task_lock(current);
+ list_add(&shp->shm_clist, ¤t->sysvshm.shm_clist);
++ task_unlock(current);
+
+ /*
+ * shmid gets reported as "inode#" in /proc/pid/maps.
+@@ -1573,7 +1669,8 @@ out_nattch:
+ down_write(&shm_ids(ns).rwsem);
+ shp = shm_lock(ns, shmid);
+ shp->shm_nattch--;
+- if (shm_may_destroy(ns, shp))
++
++ if (shm_may_destroy(shp))
+ shm_destroy(ns, shp);
+ else
+ shm_unlock(shp);
--- /dev/null
+From 3e6db079751afd527bf3db32314ae938dc571916 Mon Sep 17 00:00:00 2001
+From: Tadeusz Struk <tadeusz.struk@linaro.org>
+Date: Mon, 15 Nov 2021 08:01:43 -0800
+Subject: tipc: check for null after calling kmemdup
+
+From: Tadeusz Struk <tadeusz.struk@linaro.org>
+
+commit 3e6db079751afd527bf3db32314ae938dc571916 upstream.
+
+kmemdup can return a null pointer so need to check for it, otherwise
+the null key will be dereferenced later in tipc_crypto_key_xmit as
+can be seen in the trace [1].
+
+Cc: tipc-discussion@lists.sourceforge.net
+Cc: stable@vger.kernel.org # 5.15, 5.14, 5.10
+
+[1] https://syzkaller.appspot.com/bug?id=bca180abb29567b189efdbdb34cbf7ba851c2a58
+
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Signed-off-by: Tadeusz Struk <tadeusz.struk@linaro.org>
+Acked-by: Ying Xue <ying.xue@windriver.com>
+Acked-by: Jon Maloy <jmaloy@redhat.com>
+Link: https://lore.kernel.org/r/20211115160143.5099-1-tadeusz.struk@linaro.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/tipc/crypto.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/net/tipc/crypto.c
++++ b/net/tipc/crypto.c
+@@ -597,6 +597,10 @@ static int tipc_aead_init(struct tipc_ae
+ tmp->cloned = NULL;
+ tmp->authsize = TIPC_AES_GCM_TAG_SIZE;
+ tmp->key = kmemdup(ukey, tipc_aead_key_size(ukey), GFP_KERNEL);
++ if (!tmp->key) {
++ tipc_aead_free(&tmp->rcu);
++ return -ENOMEM;
++ }
+ memcpy(&tmp->salt, ukey->key + keylen, TIPC_AES_GCM_SALT_SIZE);
+ atomic_set(&tmp->users, 0);
+ atomic64_set(&tmp->seqno, 0);
--- /dev/null
+From 8d48bf8206f77aa8687f0e241e901e5197e52423 Mon Sep 17 00:00:00 2001
+From: Borislav Petkov <bp@suse.de>
+Date: Fri, 5 Nov 2021 10:41:51 +0100
+Subject: x86/boot: Pull up cmdline preparation and early param parsing
+
+From: Borislav Petkov <bp@suse.de>
+
+commit 8d48bf8206f77aa8687f0e241e901e5197e52423 upstream.
+
+Dan reports that Anjaneya Chagam can no longer use the efi=nosoftreserve
+kernel command line parameter to suppress "soft reservation" behavior.
+
+This is due to the fact that the following call-chain happens at boot:
+
+early_reserve_memory
+|-> efi_memblock_x86_reserve_range
+ |-> efi_fake_memmap_early
+
+which does
+
+ if (!efi_soft_reserve_enabled())
+ return;
+
+and that would have set EFI_MEM_NO_SOFT_RESERVE after having parsed
+"nosoftreserve".
+
+However, parse_early_param() gets called *after* it, leading to the boot
+cmdline not being taken into account.
+
+Therefore, carve out the command line preparation into a separate
+function which does the early param parsing too. So that it all goes
+together.
+
+And then call that function before early_reserve_memory() so that the
+params would have been parsed by then.
+
+Fixes: 8aa83e6395ce ("x86/setup: Call early_reserve_memory() earlier")
+Reported-by: Dan Williams <dan.j.williams@intel.com>
+Reviewed-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Tested-by: Anjaneya Chagam <anjaneya.chagam@intel.com>
+Cc: <stable@vger.kernel.org>
+Link: https://lore.kernel.org/r/e8dd8993c38702ee6dd73b3c11f158617e665607.camel@intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/setup.c | 66 ++++++++++++++++++++++++++++--------------------
+ 1 file changed, 39 insertions(+), 27 deletions(-)
+
+--- a/arch/x86/kernel/setup.c
++++ b/arch/x86/kernel/setup.c
+@@ -742,6 +742,28 @@ dump_kernel_offset(struct notifier_block
+ return 0;
+ }
+
++static char *prepare_command_line(void)
++{
++#ifdef CONFIG_CMDLINE_BOOL
++#ifdef CONFIG_CMDLINE_OVERRIDE
++ strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
++#else
++ if (builtin_cmdline[0]) {
++ /* append boot loader cmdline to builtin */
++ strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE);
++ strlcat(builtin_cmdline, boot_command_line, COMMAND_LINE_SIZE);
++ strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
++ }
++#endif
++#endif
++
++ strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
++
++ parse_early_param();
++
++ return command_line;
++}
++
+ /*
+ * Determine if we were loaded by an EFI loader. If so, then we have also been
+ * passed the efi memmap, systab, etc., so we should use these data structures
+@@ -831,6 +853,23 @@ void __init setup_arch(char **cmdline_p)
+ x86_init.oem.arch_setup();
+
+ /*
++ * x86_configure_nx() is called before parse_early_param() (called by
++ * prepare_command_line()) to detect whether hardware doesn't support
++ * NX (so that the early EHCI debug console setup can safely call
++ * set_fixmap()). It may then be called again from within noexec_setup()
++ * during parsing early parameters to honor the respective command line
++ * option.
++ */
++ x86_configure_nx();
++
++ /*
++ * This parses early params and it needs to run before
++ * early_reserve_memory() because latter relies on such settings
++ * supplied as early params.
++ */
++ *cmdline_p = prepare_command_line();
++
++ /*
+ * Do some memory reservations *before* memory is added to memblock, so
+ * memblock allocations won't overwrite it.
+ *
+@@ -863,33 +902,6 @@ void __init setup_arch(char **cmdline_p)
+ bss_resource.start = __pa_symbol(__bss_start);
+ bss_resource.end = __pa_symbol(__bss_stop)-1;
+
+-#ifdef CONFIG_CMDLINE_BOOL
+-#ifdef CONFIG_CMDLINE_OVERRIDE
+- strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
+-#else
+- if (builtin_cmdline[0]) {
+- /* append boot loader cmdline to builtin */
+- strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE);
+- strlcat(builtin_cmdline, boot_command_line, COMMAND_LINE_SIZE);
+- strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
+- }
+-#endif
+-#endif
+-
+- strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
+- *cmdline_p = command_line;
+-
+- /*
+- * x86_configure_nx() is called before parse_early_param() to detect
+- * whether hardware doesn't support NX (so that the early EHCI debug
+- * console setup can safely call set_fixmap()). It may then be called
+- * again from within noexec_setup() during parsing early parameters
+- * to honor the respective command line option.
+- */
+- x86_configure_nx();
+-
+- parse_early_param();
+-
+ #ifdef CONFIG_MEMORY_HOTPLUG
+ /*
+ * Memory used by the kernel cannot be hot-removed because Linux
--- /dev/null
+From daf972118c517b91f74ff1731417feb4270625a4 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Thu, 4 Nov 2021 18:22:38 +0000
+Subject: x86/hyperv: Fix NULL deref in set_hv_tscchange_cb() if Hyper-V setup fails
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit daf972118c517b91f74ff1731417feb4270625a4 upstream.
+
+Check for a valid hv_vp_index array prior to derefencing hv_vp_index when
+setting Hyper-V's TSC change callback. If Hyper-V setup failed in
+hyperv_init(), the kernel will still report that it's running under
+Hyper-V, but will have silently disabled nearly all functionality.
+
+ BUG: kernel NULL pointer dereference, address: 0000000000000010
+ #PF: supervisor read access in kernel mode
+ #PF: error_code(0x0000) - not-present page
+ PGD 0 P4D 0
+ Oops: 0000 [#1] SMP
+ CPU: 4 PID: 1 Comm: swapper/0 Not tainted 5.15.0-rc2+ #75
+ Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
+ RIP: 0010:set_hv_tscchange_cb+0x15/0xa0
+ Code: <8b> 04 82 8b 15 12 17 85 01 48 c1 e0 20 48 0d ee 00 01 00 f6 c6 08
+ ...
+ Call Trace:
+ kvm_arch_init+0x17c/0x280
+ kvm_init+0x31/0x330
+ vmx_init+0xba/0x13a
+ do_one_initcall+0x41/0x1c0
+ kernel_init_freeable+0x1f2/0x23b
+ kernel_init+0x16/0x120
+ ret_from_fork+0x22/0x30
+
+Fixes: 93286261de1b ("x86/hyperv: Reenlightenment notifications support")
+Cc: stable@vger.kernel.org
+Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Link: https://lore.kernel.org/r/20211104182239.1302956-2-seanjc@google.com
+Signed-off-by: Wei Liu <wei.liu@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/hyperv/hv_init.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/arch/x86/hyperv/hv_init.c
++++ b/arch/x86/hyperv/hv_init.c
+@@ -147,6 +147,9 @@ void set_hv_tscchange_cb(void (*cb)(void
+ return;
+ }
+
++ if (!hv_vp_index)
++ return;
++
+ hv_reenlightenment_cb = cb;
+
+ /* Make sure callback is registered before we write to MSRs */
--- /dev/null
+From ac5d272a0ad0419f52e08c91953356e32b075af7 Mon Sep 17 00:00:00 2001
+From: Reinette Chatre <reinette.chatre@intel.com>
+Date: Mon, 15 Nov 2021 11:29:04 -0800
+Subject: x86/sgx: Fix free page accounting
+
+From: Reinette Chatre <reinette.chatre@intel.com>
+
+commit ac5d272a0ad0419f52e08c91953356e32b075af7 upstream.
+
+The SGX driver maintains a single global free page counter,
+sgx_nr_free_pages, that reflects the number of free pages available
+across all NUMA nodes. Correspondingly, a list of free pages is
+associated with each NUMA node and sgx_nr_free_pages is updated
+every time a page is added or removed from any of the free page
+lists. The main usage of sgx_nr_free_pages is by the reclaimer
+that runs when it (sgx_nr_free_pages) goes below a watermark
+to ensure that there are always some free pages available to, for
+example, support efficient page faults.
+
+With sgx_nr_free_pages accessed and modified from a few places
+it is essential to ensure that these accesses are done safely but
+this is not the case. sgx_nr_free_pages is read without any
+protection and updated with inconsistent protection by any one
+of the spin locks associated with the individual NUMA nodes.
+For example:
+
+ CPU_A CPU_B
+ ----- -----
+ spin_lock(&nodeA->lock); spin_lock(&nodeB->lock);
+ ... ...
+ sgx_nr_free_pages--; /* NOT SAFE */ sgx_nr_free_pages--;
+
+ spin_unlock(&nodeA->lock); spin_unlock(&nodeB->lock);
+
+Since sgx_nr_free_pages may be protected by different spin locks
+while being modified from different CPUs, the following scenario
+is possible:
+
+ CPU_A CPU_B
+ ----- -----
+{sgx_nr_free_pages = 100}
+ spin_lock(&nodeA->lock); spin_lock(&nodeB->lock);
+ sgx_nr_free_pages--; sgx_nr_free_pages--;
+ /* LOAD sgx_nr_free_pages = 100 */ /* LOAD sgx_nr_free_pages = 100 */
+ /* sgx_nr_free_pages-- */ /* sgx_nr_free_pages-- */
+ /* STORE sgx_nr_free_pages = 99 */ /* STORE sgx_nr_free_pages = 99 */
+ spin_unlock(&nodeA->lock); spin_unlock(&nodeB->lock);
+
+In the above scenario, sgx_nr_free_pages is decremented from two CPUs
+but instead of sgx_nr_free_pages ending with a value that is two less
+than it started with, it was only decremented by one while the number
+of free pages were actually reduced by two. The consequence of
+sgx_nr_free_pages not being protected is that its value may not
+accurately reflect the actual number of free pages on the system,
+impacting the availability of free pages in support of many flows.
+
+The problematic scenario is when the reclaimer does not run because it
+believes there to be sufficient free pages while any attempt to allocate
+a page fails because there are no free pages available. In the SGX driver
+the reclaimer's watermark is only 32 pages so after encountering the
+above example scenario 32 times a user space hang is possible when there
+are no more free pages because of repeated page faults caused by no
+free pages made available.
+
+The following flow was encountered:
+asm_exc_page_fault
+ ...
+ sgx_vma_fault()
+ sgx_encl_load_page()
+ sgx_encl_eldu() // Encrypted page needs to be loaded from backing
+ // storage into newly allocated SGX memory page
+ sgx_alloc_epc_page() // Allocate a page of SGX memory
+ __sgx_alloc_epc_page() // Fails, no free SGX memory
+ ...
+ if (sgx_should_reclaim(SGX_NR_LOW_PAGES)) // Wake reclaimer
+ wake_up(&ksgxd_waitq);
+ return -EBUSY; // Return -EBUSY giving reclaimer time to run
+ return -EBUSY;
+ return -EBUSY;
+ return VM_FAULT_NOPAGE;
+
+The reclaimer is triggered in above flow with the following code:
+
+static bool sgx_should_reclaim(unsigned long watermark)
+{
+ return sgx_nr_free_pages < watermark &&
+ !list_empty(&sgx_active_page_list);
+}
+
+In the problematic scenario there were no free pages available yet the
+value of sgx_nr_free_pages was above the watermark. The allocation of
+SGX memory thus always failed because of a lack of free pages while no
+free pages were made available because the reclaimer is never started
+because of sgx_nr_free_pages' incorrect value. The consequence was that
+user space kept encountering VM_FAULT_NOPAGE that caused the same
+address to be accessed repeatedly with the same result.
+
+Change the global free page counter to an atomic type that
+ensures simultaneous updates are done safely. While doing so, move
+the updating of the variable outside of the spin lock critical
+section to which it does not belong.
+
+Cc: stable@vger.kernel.org
+Fixes: 901ddbb9ecf5 ("x86/sgx: Add a basic NUMA allocation scheme to sgx_alloc_epc_page()")
+Suggested-by: Dave Hansen <dave.hansen@linux.intel.com>
+Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Reviewed-by: Tony Luck <tony.luck@intel.com>
+Acked-by: Jarkko Sakkinen <jarkko@kernel.org>
+Link: https://lkml.kernel.org/r/a95a40743bbd3f795b465f30922dde7f1ea9e0eb.1637004094.git.reinette.chatre@intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/sgx/main.c | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/kernel/cpu/sgx/main.c
++++ b/arch/x86/kernel/cpu/sgx/main.c
+@@ -28,8 +28,7 @@ static DECLARE_WAIT_QUEUE_HEAD(ksgxd_wai
+ static LIST_HEAD(sgx_active_page_list);
+ static DEFINE_SPINLOCK(sgx_reclaimer_lock);
+
+-/* The free page list lock protected variables prepend the lock. */
+-static unsigned long sgx_nr_free_pages;
++static atomic_long_t sgx_nr_free_pages = ATOMIC_LONG_INIT(0);
+
+ /* Nodes with one or more EPC sections. */
+ static nodemask_t sgx_numa_mask;
+@@ -403,14 +402,15 @@ skip:
+
+ spin_lock(&node->lock);
+ list_add_tail(&epc_page->list, &node->free_page_list);
+- sgx_nr_free_pages++;
+ spin_unlock(&node->lock);
++ atomic_long_inc(&sgx_nr_free_pages);
+ }
+ }
+
+ static bool sgx_should_reclaim(unsigned long watermark)
+ {
+- return sgx_nr_free_pages < watermark && !list_empty(&sgx_active_page_list);
++ return atomic_long_read(&sgx_nr_free_pages) < watermark &&
++ !list_empty(&sgx_active_page_list);
+ }
+
+ static int ksgxd(void *p)
+@@ -471,9 +471,9 @@ static struct sgx_epc_page *__sgx_alloc_
+
+ page = list_first_entry(&node->free_page_list, struct sgx_epc_page, list);
+ list_del_init(&page->list);
+- sgx_nr_free_pages--;
+
+ spin_unlock(&node->lock);
++ atomic_long_dec(&sgx_nr_free_pages);
+
+ return page;
+ }
+@@ -625,9 +625,9 @@ void sgx_free_epc_page(struct sgx_epc_pa
+ spin_lock(&node->lock);
+
+ list_add_tail(&page->list, &node->free_page_list);
+- sgx_nr_free_pages++;
+
+ spin_unlock(&node->lock);
++ atomic_long_inc(&sgx_nr_free_pages);
+ }
+
+ static bool __init sgx_setup_epc_section(u64 phys_addr, u64 size,