--- /dev/null
+From c8b9aff419303e4d4219b5ff64b1c7e062dee48e Mon Sep 17 00:00:00 2001
+From: Baolin Wang <baolin.wang@linux.alibaba.com>
+Date: Thu, 18 Aug 2022 15:37:43 +0800
+Subject: mm/damon: validate if the pmd entry is present before accessing
+
+From: Baolin Wang <baolin.wang@linux.alibaba.com>
+
+commit c8b9aff419303e4d4219b5ff64b1c7e062dee48e upstream.
+
+pmd_huge() is used to validate if the pmd entry is mapped by a huge page,
+also including the case of non-present (migration or hwpoisoned) pmd entry
+on arm64 or x86 architectures. This means that pmd_pfn() can not get the
+correct pfn number for a non-present pmd entry, which will cause
+damon_get_page() to get an incorrect page struct (also may be NULL by
+pfn_to_online_page()), making the access statistics incorrect.
+
+This means that the DAMON may make incorrect decision according to the
+incorrect statistics, for example, DAMON may can not reclaim cold page
+in time due to this cold page was regarded as accessed mistakenly if
+DAMOS_PAGEOUT operation is specified.
+
+Moreover it does not make sense that we still waste time to get the page
+of the non-present entry. Just treat it as not-accessed and skip it,
+which maintains consistency with non-present pte level entries.
+
+So add pmd entry present validation to fix the above issues.
+
+Link: https://lkml.kernel.org/r/58b1d1f5fbda7db49ca886d9ef6783e3dcbbbc98.1660805030.git.baolin.wang@linux.alibaba.com
+Fixes: 3f49584b262c ("mm/damon: implement primitives for the virtual memory address spaces")
+Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
+Reviewed-by: SeongJae Park <sj@kernel.org>
+Reviewed-by: Muchun Song <songmuchun@bytedance.com>
+Cc: Mike Kravetz <mike.kravetz@oracle.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/damon/vaddr.c | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+--- a/mm/damon/vaddr.c
++++ b/mm/damon/vaddr.c
+@@ -456,6 +456,11 @@ static int damon_mkold_pmd_entry(pmd_t *
+
+ if (pmd_huge(*pmd)) {
+ ptl = pmd_lock(walk->mm, pmd);
++ if (!pmd_present(*pmd)) {
++ spin_unlock(ptl);
++ return 0;
++ }
++
+ if (pmd_huge(*pmd)) {
+ damon_pmdp_mkold(pmd, walk->mm, addr);
+ spin_unlock(ptl);
+@@ -530,6 +535,11 @@ static int damon_young_pmd_entry(pmd_t *
+ #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ if (pmd_huge(*pmd)) {
+ ptl = pmd_lock(walk->mm, pmd);
++ if (!pmd_present(*pmd)) {
++ spin_unlock(ptl);
++ return 0;
++ }
++
+ if (!pmd_huge(*pmd)) {
+ spin_unlock(ptl);
+ goto regular_page;
--- /dev/null
+From deb0f6562884b5b4beb883d73e66a7d3a1b96d99 Mon Sep 17 00:00:00 2001
+From: Carlos Llamas <cmllamas@google.com>
+Date: Fri, 30 Sep 2022 00:38:43 +0000
+Subject: mm/mmap: undo ->mmap() when arch_validate_flags() fails
+
+From: Carlos Llamas <cmllamas@google.com>
+
+commit deb0f6562884b5b4beb883d73e66a7d3a1b96d99 upstream.
+
+Commit c462ac288f2c ("mm: Introduce arch_validate_flags()") added a late
+check in mmap_region() to let architectures validate vm_flags. The check
+needs to happen after calling ->mmap() as the flags can potentially be
+modified during this callback.
+
+If arch_validate_flags() check fails we unmap and free the vma. However,
+the error path fails to undo the ->mmap() call that previously succeeded
+and depending on the specific ->mmap() implementation this translates to
+reference increments, memory allocations and other operations what will
+not be cleaned up.
+
+There are several places (mainly device drivers) where this is an issue.
+However, one specific example is bpf_map_mmap() which keeps count of the
+mappings in map->writecnt. The count is incremented on ->mmap() and then
+decremented on vm_ops->close(). When arch_validate_flags() fails this
+count is off since bpf_map_mmap_close() is never called.
+
+One can reproduce this issue in arm64 devices with MTE support. Here the
+vm_flags are checked to only allow VM_MTE if VM_MTE_ALLOWED has been set
+previously. From userspace then is enough to pass the PROT_MTE flag to
+mmap() syscall to trigger the arch_validate_flags() failure.
+
+The following program reproduces this issue:
+
+ #include <stdio.h>
+ #include <unistd.h>
+ #include <linux/unistd.h>
+ #include <linux/bpf.h>
+ #include <sys/mman.h>
+
+ int main(void)
+ {
+ union bpf_attr attr = {
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(long long),
+ .max_entries = 256,
+ .map_flags = BPF_F_MMAPABLE,
+ };
+ int fd;
+
+ fd = syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr));
+ mmap(NULL, 4096, PROT_WRITE | PROT_MTE, MAP_SHARED, fd, 0);
+
+ return 0;
+ }
+
+By manually adding some log statements to the vm_ops callbacks we can
+confirm that when passing PROT_MTE to mmap() the map->writecnt is off upon
+->release():
+
+With PROT_MTE flag:
+ root@debian:~# ./bpf-test
+ [ 111.263874] bpf_map_write_active_inc: map=9 writecnt=1
+ [ 111.288763] bpf_map_release: map=9 writecnt=1
+
+Without PROT_MTE flag:
+ root@debian:~# ./bpf-test
+ [ 157.816912] bpf_map_write_active_inc: map=10 writecnt=1
+ [ 157.830442] bpf_map_write_active_dec: map=10 writecnt=0
+ [ 157.832396] bpf_map_release: map=10 writecnt=0
+
+This patch fixes the above issue by calling vm_ops->close() when the
+arch_validate_flags() check fails, after this we can proceed to unmap and
+free the vma on the error path.
+
+Link: https://lkml.kernel.org/r/20220930003844.1210987-1-cmllamas@google.com
+Fixes: c462ac288f2c ("mm: Introduce arch_validate_flags()")
+Signed-off-by: Carlos Llamas <cmllamas@google.com>
+Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
+Acked-by: Andrii Nakryiko <andrii@kernel.org>
+Reviewed-by: Liam Howlett <liam.howlett@oracle.com>
+Cc: Christian Brauner (Microsoft) <brauner@kernel.org>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Suren Baghdasaryan <surenb@google.com>
+Cc: <stable@vger.kernel.org> [5.10+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/mmap.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/mm/mmap.c
++++ b/mm/mmap.c
+@@ -1836,7 +1836,7 @@ unsigned long mmap_region(struct file *f
+ if (!arch_validate_flags(vma->vm_flags)) {
+ error = -EINVAL;
+ if (file)
+- goto unmap_and_free_vma;
++ goto close_and_free_vma;
+ else
+ goto free_vma;
+ }
+@@ -1876,6 +1876,9 @@ out:
+
+ return addr;
+
++close_and_free_vma:
++ if (vma->vm_ops && vma->vm_ops->close)
++ vma->vm_ops->close(vma);
+ unmap_and_free_vma:
+ fput(vma->vm_file);
+ vma->vm_file = NULL;
--- /dev/null
+From 401bc1f90874280a80b93f23be33a0e7e2d1f912 Mon Sep 17 00:00:00 2001
+From: Chuck Lever <chuck.lever@oracle.com>
+Date: Thu, 1 Sep 2022 15:10:18 -0400
+Subject: NFSD: Protect against send buffer overflow in NFSv2 READ
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+commit 401bc1f90874280a80b93f23be33a0e7e2d1f912 upstream.
+
+Since before the git era, NFSD has conserved the number of pages
+held by each nfsd thread by combining the RPC receive and send
+buffers into a single array of pages. This works because there are
+no cases where an operation needs a large RPC Call message and a
+large RPC Reply at the same time.
+
+Once an RPC Call has been received, svc_process() updates
+svc_rqst::rq_res to describe the part of rq_pages that can be
+used for constructing the Reply. This means that the send buffer
+(rq_res) shrinks when the received RPC record containing the RPC
+Call is large.
+
+A client can force this shrinkage on TCP by sending a correctly-
+formed RPC Call header contained in an RPC record that is
+excessively large. The full maximum payload size cannot be
+constructed in that case.
+
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfsd/nfsproc.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/nfsd/nfsproc.c
++++ b/fs/nfsd/nfsproc.c
+@@ -182,6 +182,7 @@ nfsd_proc_read(struct svc_rqst *rqstp)
+ argp->count, argp->offset);
+
+ argp->count = min_t(u32, argp->count, NFSSVC_MAXBLKSIZE_V2);
++ argp->count = min_t(u32, argp->count, rqstp->rq_res.buflen);
+
+ v = 0;
+ len = argp->count;
--- /dev/null
+From fa6be9cc6e80ec79892ddf08a8c10cabab9baf38 Mon Sep 17 00:00:00 2001
+From: Chuck Lever <chuck.lever@oracle.com>
+Date: Thu, 1 Sep 2022 15:10:24 -0400
+Subject: NFSD: Protect against send buffer overflow in NFSv3 READ
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+commit fa6be9cc6e80ec79892ddf08a8c10cabab9baf38 upstream.
+
+Since before the git era, NFSD has conserved the number of pages
+held by each nfsd thread by combining the RPC receive and send
+buffers into a single array of pages. This works because there are
+no cases where an operation needs a large RPC Call message and a
+large RPC Reply at the same time.
+
+Once an RPC Call has been received, svc_process() updates
+svc_rqst::rq_res to describe the part of rq_pages that can be
+used for constructing the Reply. This means that the send buffer
+(rq_res) shrinks when the received RPC record containing the RPC
+Call is large.
+
+A client can force this shrinkage on TCP by sending a correctly-
+formed RPC Call header contained in an RPC record that is
+excessively large. The full maximum payload size cannot be
+constructed in that case.
+
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfsd/nfs3proc.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/fs/nfsd/nfs3proc.c
++++ b/fs/nfsd/nfs3proc.c
+@@ -146,7 +146,6 @@ nfsd3_proc_read(struct svc_rqst *rqstp)
+ {
+ struct nfsd3_readargs *argp = rqstp->rq_argp;
+ struct nfsd3_readres *resp = rqstp->rq_resp;
+- u32 max_blocksize = svc_max_payload(rqstp);
+ unsigned int len;
+ int v;
+
+@@ -155,7 +154,8 @@ nfsd3_proc_read(struct svc_rqst *rqstp)
+ (unsigned long) argp->count,
+ (unsigned long long) argp->offset);
+
+- argp->count = min_t(u32, argp->count, max_blocksize);
++ argp->count = min_t(u32, argp->count, svc_max_payload(rqstp));
++ argp->count = min_t(u32, argp->count, rqstp->rq_res.buflen);
+ if (argp->offset > (u64)OFFSET_MAX)
+ argp->offset = (u64)OFFSET_MAX;
+ if (argp->offset + argp->count > (u64)OFFSET_MAX)
--- /dev/null
+From 640f87c190e0d1b2a0fcb2ecf6d2cd53b1c41991 Mon Sep 17 00:00:00 2001
+From: Chuck Lever <chuck.lever@oracle.com>
+Date: Thu, 1 Sep 2022 15:10:12 -0400
+Subject: NFSD: Protect against send buffer overflow in NFSv3 READDIR
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+commit 640f87c190e0d1b2a0fcb2ecf6d2cd53b1c41991 upstream.
+
+Since before the git era, NFSD has conserved the number of pages
+held by each nfsd thread by combining the RPC receive and send
+buffers into a single array of pages. This works because there are
+no cases where an operation needs a large RPC Call message and a
+large RPC Reply message at the same time.
+
+Once an RPC Call has been received, svc_process() updates
+svc_rqst::rq_res to describe the part of rq_pages that can be
+used for constructing the Reply. This means that the send buffer
+(rq_res) shrinks when the received RPC record containing the RPC
+Call is large.
+
+A client can force this shrinkage on TCP by sending a correctly-
+formed RPC Call header contained in an RPC record that is
+excessively large. The full maximum payload size cannot be
+constructed in that case.
+
+Thanks to Aleksi Illikainen and Kari Hulkko for uncovering this
+issue.
+
+Reported-by: Ben Ronallo <Benjamin.Ronallo@synopsys.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfsd/nfs3proc.c | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/fs/nfsd/nfs3proc.c
++++ b/fs/nfsd/nfs3proc.c
+@@ -447,13 +447,14 @@ static void nfsd3_init_dirlist_pages(str
+ {
+ struct xdr_buf *buf = &resp->dirlist;
+ struct xdr_stream *xdr = &resp->xdr;
+-
+- count = clamp(count, (u32)(XDR_UNIT * 2), svc_max_payload(rqstp));
++ unsigned int sendbuf = min_t(unsigned int, rqstp->rq_res.buflen,
++ svc_max_payload(rqstp));
+
+ memset(buf, 0, sizeof(*buf));
+
+ /* Reserve room for the NULL ptr & eof flag (-2 words) */
+- buf->buflen = count - XDR_UNIT * 2;
++ buf->buflen = clamp(count, (u32)(XDR_UNIT * 2), sendbuf);
++ buf->buflen -= XDR_UNIT * 2;
+ buf->pages = rqstp->rq_next_page;
+ rqstp->rq_next_page += (buf->buflen + PAGE_SIZE - 1) >> PAGE_SHIFT;
+
--- /dev/null
+From 0e32818397426a688f598f35d3bc762eca6d7592 Mon Sep 17 00:00:00 2001
+From: "Maciej W. Rozycki" <macro@orcam.me.uk>
+Date: Wed, 21 Sep 2022 20:49:16 +0100
+Subject: PCI: Sanitise firmware BAR assignments behind a PCI-PCI bridge
+
+From: Maciej W. Rozycki <macro@orcam.me.uk>
+
+commit 0e32818397426a688f598f35d3bc762eca6d7592 upstream.
+
+When pci_assign_resource() is unable to assign resources to a BAR, it uses
+pci_revert_fw_address() to fall back to a firmware assignment (if any).
+Previously pci_revert_fw_address() assumed all addresses could reach the
+device, but this is not true if the device is below a bridge that only
+forwards addresses within its windows.
+
+This problem was observed on a Tyan Tomcat IV S1564D system where the BIOS
+did not assign valid addresses to several bridges and USB devices:
+
+ pci 0000:00:11.0: PCI-to-PCIe bridge to [bus 01-ff]
+ pci 0000:00:11.0: bridge window [io 0xe000-0xefff]
+ pci 0000:01:00.0: PCIe Upstream Port to [bus 02-ff]
+ pci 0000:01:00.0: bridge window [io 0x0000-0x0fff] # unreachable
+ pci 0000:02:02.0: PCIe Downstream Port to [bus 05-ff]
+ pci 0000:02:02.0: bridge window [io 0x0000-0x0fff] # unreachable
+ pci 0000:05:00.0: PCIe-to-PCI bridge to [bus 06-ff]
+ pci 0000:05:00.0: bridge window [io 0x0000-0x0fff] # unreachable
+ pci 0000:06:08.0: USB UHCI 1.1
+ pci 0000:06:08.0: BAR 4: [io 0xfce0-0xfcff] # unreachable
+ pci 0000:06:08.1: USB UHCI 1.1
+ pci 0000:06:08.1: BAR 4: [io 0xfce0-0xfcff] # unreachable
+ pci 0000:06:08.0: can't claim BAR 4 [io 0xfce0-0xfcff]: no compatible bridge window
+ pci 0000:06:08.1: can't claim BAR 4 [io 0xfce0-0xfcff]: no compatible bridge window
+
+During the first pass of assigning unassigned resources, there was not
+enough I/O space available, so we couldn't assign the 06:08.0 BAR and
+reverted to the firmware assignment (still unreachable). Reverting the
+06:08.1 assignment failed because it conflicted with 06:08.0:
+
+ pci 0000:00:11.0: bridge window [io 0xe000-0xefff]
+ pci 0000:01:00.0: no space for bridge window [io size 0x2000]
+ pci 0000:02:02.0: no space for bridge window [io size 0x1000]
+ pci 0000:05:00.0: no space for bridge window [io size 0x1000]
+ pci 0000:06:08.0: BAR 4: no space for [io size 0x0020]
+ pci 0000:06:08.0: BAR 4: trying firmware assignment [io 0xfce0-0xfcff]
+ pci 0000:06:08.1: BAR 4: no space for [io size 0x0020]
+ pci 0000:06:08.1: BAR 4: trying firmware assignment [io 0xfce0-0xfcff]
+ pci 0000:06:08.1: BAR 4: [io 0xfce0-0xfcff] conflicts with 0000:06:08.0 [io 0xfce0-0xfcff]
+
+A subsequent pass assigned valid bridge windows and a valid 06:08.1 BAR,
+but left the 06:08.0 BAR alone, so the UHCI device was still unusable:
+
+ pci 0000:00:11.0: bridge window [io 0xe000-0xefff] released
+ pci 0000:00:11.0: bridge window [io 0x1000-0x2fff] # reassigned
+ pci 0000:01:00.0: bridge window [io 0x1000-0x2fff] # reassigned
+ pci 0000:02:02.0: bridge window [io 0x2000-0x2fff] # reassigned
+ pci 0000:05:00.0: bridge window [io 0x2000-0x2fff] # reassigned
+ pci 0000:06:08.0: BAR 4: assigned [io 0xfce0-0xfcff] # left alone
+ pci 0000:06:08.1: BAR 4: assigned [io 0x2000-0x201f]
+ ...
+ uhci_hcd 0000:06:08.0: host system error, PCI problems?
+ uhci_hcd 0000:06:08.0: host controller process error, something bad happened!
+ uhci_hcd 0000:06:08.0: host controller halted, very bad!
+ uhci_hcd 0000:06:08.0: HCRESET not completed yet!
+ uhci_hcd 0000:06:08.0: HC died; cleaning up
+
+If the address assigned by firmware is not reachable because it's not
+within upstream bridge windows, fail instead of assigning the unusable
+address from firmware.
+
+[bhelgaas: commit log, use pci_upstream_bridge()]
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=16263
+Link: https://lore.kernel.org/r/alpine.DEB.2.21.2203012338460.46819@angie.orcam.me.uk
+Link: https://lore.kernel.org/r/alpine.DEB.2.21.2209211921250.29493@angie.orcam.me.uk
+Fixes: 58c84eda0756 ("PCI: fall back to original BIOS BAR addresses")
+Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Cc: stable@vger.kernel.org # v2.6.35+
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/pci/setup-res.c | 11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+--- a/drivers/pci/setup-res.c
++++ b/drivers/pci/setup-res.c
+@@ -210,6 +210,17 @@ static int pci_revert_fw_address(struct
+
+ root = pci_find_parent_resource(dev, res);
+ if (!root) {
++ /*
++ * If dev is behind a bridge, accesses will only reach it
++ * if res is inside the relevant bridge window.
++ */
++ if (pci_upstream_bridge(dev))
++ return -ENXIO;
++
++ /*
++ * On the root bus, assume the host bridge will forward
++ * everything.
++ */
+ if (res->flags & IORESOURCE_IO)
+ root = &ioport_resource;
+ else
--- /dev/null
+From 4c081324df5608b73428662ca54d5221ea03a6bd Mon Sep 17 00:00:00 2001
+From: Zhang Rui <rui.zhang@intel.com>
+Date: Sat, 24 Sep 2022 13:47:36 +0800
+Subject: powercap: intel_rapl: Use standard Energy Unit for SPR Dram RAPL domain
+
+From: Zhang Rui <rui.zhang@intel.com>
+
+commit 4c081324df5608b73428662ca54d5221ea03a6bd upstream.
+
+Intel Xeon servers used to use a fixed energy resolution (15.3uj) for
+Dram RAPL domain. But on SPR, Dram RAPL domain follows the standard
+energy resolution as described in MSR_RAPL_POWER_UNIT.
+
+Remove the SPR dram_domain_energy_unit quirk.
+
+Fixes: 2d798d9f5967 ("powercap: intel_rapl: add support for Sapphire Rapids")
+Signed-off-by: Zhang Rui <rui.zhang@intel.com>
+Tested-by: Wang Wendy <wendy.wang@intel.com>
+Cc: 5.9+ <stable@vger.kernel.org> # 5.9+
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/powercap/intel_rapl_common.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/drivers/powercap/intel_rapl_common.c
++++ b/drivers/powercap/intel_rapl_common.c
+@@ -979,7 +979,6 @@ static const struct rapl_defaults rapl_d
+ .check_unit = rapl_check_unit_core,
+ .set_floor_freq = set_floor_freq_default,
+ .compute_time_window = rapl_compute_time_window_core,
+- .dram_domain_energy_unit = 15300,
+ .psys_domain_energy_unit = 1000000000,
+ };
+
--- /dev/null
+From 110a58b9f91c66f743c01a2c217243d94c899c23 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali@kernel.org>
+Date: Sat, 27 Aug 2022 15:44:54 +0200
+Subject: powerpc/boot: Explicitly disable usage of SPE instructions
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Pali Rohár <pali@kernel.org>
+
+commit 110a58b9f91c66f743c01a2c217243d94c899c23 upstream.
+
+uImage boot wrapper should not use SPE instructions, like kernel itself.
+Boot wrapper has already disabled Altivec and VSX instructions but not SPE.
+Options -mno-spe and -mspe=no already set when compilation of kernel, but
+not when compiling uImage wrapper yet. Fix it.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Pali Rohár <pali@kernel.org>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20220827134454.17365-1-pali@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/powerpc/boot/Makefile | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/powerpc/boot/Makefile
++++ b/arch/powerpc/boot/Makefile
+@@ -34,6 +34,7 @@ endif
+
+ BOOTCFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
+ -fno-strict-aliasing -O2 -msoft-float -mno-altivec -mno-vsx \
++ $(call cc-option,-mno-spe) $(call cc-option,-mspe=no) \
+ -pipe -fomit-frame-pointer -fno-builtin -fPIC -nostdinc \
+ $(LINUXINCLUDE)
+
--- /dev/null
+From 592642e6b11e620e4b43189f8072752429fc8dc3 Mon Sep 17 00:00:00 2001
+From: Saurav Kashyap <skashyap@marvell.com>
+Date: Mon, 19 Sep 2022 06:44:34 -0700
+Subject: scsi: qedf: Populate sysfs attributes for vport
+
+From: Saurav Kashyap <skashyap@marvell.com>
+
+commit 592642e6b11e620e4b43189f8072752429fc8dc3 upstream.
+
+Few vport parameters were displayed by systool as 'Unknown' or 'NULL'.
+Copy speed, supported_speed, frame_size and update port_type for NPIV port.
+
+Link: https://lore.kernel.org/r/20220919134434.3513-1-njavali@marvell.com
+Cc: stable@vger.kernel.org
+Tested-by: Guangwu Zhang <guazhang@redhat.com>
+Reviewed-by: John Meneghini <jmeneghi@redhat.com>
+Signed-off-by: Saurav Kashyap <skashyap@marvell.com>
+Signed-off-by: Nilesh Javali <njavali@marvell.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/scsi/qedf/qedf_main.c | 21 +++++++++++++++++++++
+ 1 file changed, 21 insertions(+)
+
+--- a/drivers/scsi/qedf/qedf_main.c
++++ b/drivers/scsi/qedf/qedf_main.c
+@@ -1921,6 +1921,27 @@ static int qedf_vport_create(struct fc_v
+ fc_vport_setlink(vn_port);
+ }
+
++ /* Set symbolic node name */
++ if (base_qedf->pdev->device == QL45xxx)
++ snprintf(fc_host_symbolic_name(vn_port->host), 256,
++ "Marvell FastLinQ 45xxx FCoE v%s", QEDF_VERSION);
++
++ if (base_qedf->pdev->device == QL41xxx)
++ snprintf(fc_host_symbolic_name(vn_port->host), 256,
++ "Marvell FastLinQ 41xxx FCoE v%s", QEDF_VERSION);
++
++ /* Set supported speed */
++ fc_host_supported_speeds(vn_port->host) = n_port->link_supported_speeds;
++
++ /* Set speed */
++ vn_port->link_speed = n_port->link_speed;
++
++ /* Set port type */
++ fc_host_port_type(vn_port->host) = FC_PORTTYPE_NPIV;
++
++ /* Set maxframe size */
++ fc_host_maxframe_size(vn_port->host) = n_port->mfs;
++
+ QEDF_INFO(&(base_qedf->dbg_ctx), QEDF_LOG_NPIV, "vn_port=%p.\n",
+ vn_port);
+
--- /dev/null
+From 9906890c89e4dbd900ed87ad3040080339a7f411 Mon Sep 17 00:00:00 2001
+From: "Maciej W. Rozycki" <macro@orcam.me.uk>
+Date: Wed, 21 Sep 2022 00:35:32 +0100
+Subject: serial: 8250: Let drivers request full 16550A feature probing
+
+From: Maciej W. Rozycki <macro@orcam.me.uk>
+
+commit 9906890c89e4dbd900ed87ad3040080339a7f411 upstream.
+
+A SERIAL_8250_16550A_VARIANTS configuration option has been recently
+defined that lets one request the 8250 driver not to probe for 16550A
+device features so as to reduce the driver's device startup time in
+virtual machines.
+
+Some actual hardware devices require these features to have been fully
+determined however for their driver to work correctly, so define a flag
+to let drivers request full 16550A feature probing on a device-by-device
+basis if required regardless of the SERIAL_8250_16550A_VARIANTS option
+setting chosen.
+
+Fixes: dc56ecb81a0a ("serial: 8250: Support disabling mdelay-filled probes of 16550A variants")
+Cc: stable@vger.kernel.org # v5.6+
+Reported-by: Anders Blomdell <anders.blomdell@control.lth.se>
+Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>
+Link: https://lore.kernel.org/r/alpine.DEB.2.21.2209202357520.41633@angie.orcam.me.uk
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/tty/serial/8250/8250_port.c | 3 ++-
+ include/linux/serial_core.h | 3 ++-
+ 2 files changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/tty/serial/8250/8250_port.c
++++ b/drivers/tty/serial/8250/8250_port.c
+@@ -1029,7 +1029,8 @@ static void autoconfig_16550a(struct uar
+ up->port.type = PORT_16550A;
+ up->capabilities |= UART_CAP_FIFO;
+
+- if (!IS_ENABLED(CONFIG_SERIAL_8250_16550A_VARIANTS))
++ if (!IS_ENABLED(CONFIG_SERIAL_8250_16550A_VARIANTS) &&
++ !(up->port.flags & UPF_FULL_PROBE))
+ return;
+
+ /*
+--- a/include/linux/serial_core.h
++++ b/include/linux/serial_core.h
+@@ -100,7 +100,7 @@ struct uart_icount {
+ __u32 buf_overrun;
+ };
+
+-typedef unsigned int __bitwise upf_t;
++typedef u64 __bitwise upf_t;
+ typedef unsigned int __bitwise upstat_t;
+
+ struct uart_port {
+@@ -207,6 +207,7 @@ struct uart_port {
+ #define UPF_FIXED_PORT ((__force upf_t) (1 << 29))
+ #define UPF_DEAD ((__force upf_t) (1 << 30))
+ #define UPF_IOREMAP ((__force upf_t) (1 << 31))
++#define UPF_FULL_PROBE ((__force upf_t) (1ULL << 32))
+
+ #define __UPF_CHANGE_MASK 0x17fff
+ #define UPF_CHANGE_MASK ((__force upf_t) __UPF_CHANGE_MASK)
--- /dev/null
+From 00b7a4d4ee42be1c515e56cb1e8ba0f25e271d8e Mon Sep 17 00:00:00 2001
+From: "Maciej W. Rozycki" <macro@orcam.me.uk>
+Date: Wed, 21 Sep 2022 00:35:37 +0100
+Subject: serial: 8250: Request full 16550A feature probing for OxSemi PCIe devices
+
+From: Maciej W. Rozycki <macro@orcam.me.uk>
+
+commit 00b7a4d4ee42be1c515e56cb1e8ba0f25e271d8e upstream.
+
+Oxford Semiconductor PCIe (Tornado) 950 serial port devices need to
+operate in the enhanced mode via the EFR register for the Divide-by-M
+N/8 baud rate generator prescaler to be used in their native UART mode.
+Otherwise the prescaler is fixed at 1 causing grossly incorrect baud
+rates to be programmed.
+
+Accessing the EFR register requires 16550A features to have been probed
+for, so request this to happen regardless of SERIAL_8250_16550A_VARIANTS
+by setting UPF_FULL_PROBE in port flags.
+
+Fixes: 366f6c955d4d ("serial: 8250: Add proper clock handling for OxSemi PCIe devices")
+Cc: stable@vger.kernel.org # v5.19+
+Reported-by: Anders Blomdell <anders.blomdell@control.lth.se>
+Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>
+Link: https://lore.kernel.org/r/alpine.DEB.2.21.2209210005040.41633@angie.orcam.me.uk
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/tty/serial/8250/8250_pci.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/drivers/tty/serial/8250/8250_pci.c
++++ b/drivers/tty/serial/8250/8250_pci.c
+@@ -1232,6 +1232,10 @@ static void pci_oxsemi_tornado_set_mctrl
+ serial8250_do_set_mctrl(port, mctrl);
+ }
+
++/*
++ * We require EFR features for clock programming, so set UPF_FULL_PROBE
++ * for full probing regardless of CONFIG_SERIAL_8250_16550A_VARIANTS setting.
++ */
+ static int pci_oxsemi_tornado_setup(struct serial_private *priv,
+ const struct pciserial_board *board,
+ struct uart_8250_port *up, int idx)
+@@ -1239,6 +1243,7 @@ static int pci_oxsemi_tornado_setup(stru
+ struct pci_dev *dev = priv->dev;
+
+ if (pci_oxsemi_tornado_p(dev)) {
++ up->port.flags |= UPF_FULL_PROBE;
+ up->port.get_divisor = pci_oxsemi_tornado_get_divisor;
+ up->port.set_divisor = pci_oxsemi_tornado_set_divisor;
+ up->port.set_mctrl = pci_oxsemi_tornado_set_mctrl;
drm-virtio-use-appropriate-atomic-state-in-virtio_gpu_plane_cleanup_fb.patch
drm-udl-restore-display-mode-on-resume.patch
arm64-errata-add-cortex-a55-to-the-repeat-tlbi-list.patch
+mm-damon-validate-if-the-pmd-entry-is-present-before-accessing.patch
+mm-mmap-undo-mmap-when-arch_validate_flags-fails.patch
+xen-gntdev-prevent-leaking-grants.patch
+xen-gntdev-accommodate-vma-splitting.patch
+pci-sanitise-firmware-bar-assignments-behind-a-pci-pci-bridge.patch
+serial-8250-let-drivers-request-full-16550a-feature-probing.patch
+serial-8250-request-full-16550a-feature-probing-for-oxsemi-pcie-devices.patch
+nfsd-protect-against-send-buffer-overflow-in-nfsv3-readdir.patch
+nfsd-protect-against-send-buffer-overflow-in-nfsv2-read.patch
+nfsd-protect-against-send-buffer-overflow-in-nfsv3-read.patch
+powercap-intel_rapl-use-standard-energy-unit-for-spr-dram-rapl-domain.patch
+powerpc-boot-explicitly-disable-usage-of-spe-instructions.patch
+slimbus-qcom-ngd-use-correct-error-in-message-of-pdr_add_lookup-failure.patch
+slimbus-qcom-ngd-cleanup-in-probe-error-path.patch
+scsi-qedf-populate-sysfs-attributes-for-vport.patch
--- /dev/null
+From 16f14551d0df9e7cd283545d7d748829594d912f Mon Sep 17 00:00:00 2001
+From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+Date: Fri, 16 Sep 2022 13:29:08 +0100
+Subject: slimbus: qcom-ngd: cleanup in probe error path
+
+From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+
+commit 16f14551d0df9e7cd283545d7d748829594d912f upstream.
+
+Add proper error path in probe() to cleanup resources previously
+acquired/allocated to fix warnings visible during probe deferral:
+
+ notifier callback qcom_slim_ngd_ssr_notify already registered
+ WARNING: CPU: 6 PID: 70 at kernel/notifier.c:28 notifier_chain_register+0x5c/0x90
+ Modules linked in:
+ CPU: 6 PID: 70 Comm: kworker/u16:1 Not tainted 6.0.0-rc3-next-20220830 #380
+ Call trace:
+ notifier_chain_register+0x5c/0x90
+ srcu_notifier_chain_register+0x44/0x90
+ qcom_register_ssr_notifier+0x38/0x4c
+ qcom_slim_ngd_ctrl_probe+0xd8/0x400
+ platform_probe+0x6c/0xe0
+ really_probe+0xbc/0x2d4
+ __driver_probe_device+0x78/0xe0
+ driver_probe_device+0x3c/0x12c
+ __device_attach_driver+0xb8/0x120
+ bus_for_each_drv+0x78/0xd0
+ __device_attach+0xa8/0x1c0
+ device_initial_probe+0x18/0x24
+ bus_probe_device+0xa0/0xac
+ deferred_probe_work_func+0x88/0xc0
+ process_one_work+0x1d4/0x320
+ worker_thread+0x2cc/0x44c
+ kthread+0x110/0x114
+ ret_from_fork+0x10/0x20
+
+Fixes: e1ae85e1830e ("slimbus: qcom-ngd-ctrl: add Protection Domain Restart Support")
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
+Link: https://lore.kernel.org/r/20220916122910.170730-3-srinivas.kandagatla@linaro.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/slimbus/qcom-ngd-ctrl.c | 13 +++++++++++--
+ 1 file changed, 11 insertions(+), 2 deletions(-)
+
+--- a/drivers/slimbus/qcom-ngd-ctrl.c
++++ b/drivers/slimbus/qcom-ngd-ctrl.c
+@@ -1567,18 +1567,27 @@ static int qcom_slim_ngd_ctrl_probe(stru
+ ctrl->pdr = pdr_handle_alloc(slim_pd_status, ctrl);
+ if (IS_ERR(ctrl->pdr)) {
+ dev_err(dev, "Failed to init PDR handle\n");
+- return PTR_ERR(ctrl->pdr);
++ ret = PTR_ERR(ctrl->pdr);
++ goto err_pdr_alloc;
+ }
+
+ pds = pdr_add_lookup(ctrl->pdr, "avs/audio", "msm/adsp/audio_pd");
+ if (IS_ERR(pds) && PTR_ERR(pds) != -EALREADY) {
+ ret = PTR_ERR(pds);
+ dev_err(dev, "pdr add lookup failed: %d\n", ret);
+- return ret;
++ goto err_pdr_lookup;
+ }
+
+ platform_driver_register(&qcom_slim_ngd_driver);
+ return of_qcom_slim_ngd_register(dev, ctrl);
++
++err_pdr_alloc:
++ qcom_unregister_ssr_notifier(ctrl->notifier, &ctrl->nb);
++
++err_pdr_lookup:
++ pdr_handle_release(ctrl->pdr);
++
++ return ret;
+ }
+
+ static int qcom_slim_ngd_ctrl_remove(struct platform_device *pdev)
--- /dev/null
+From 5038d21dde818fe74ba1fcb6f2cee35b8c2ebbf2 Mon Sep 17 00:00:00 2001
+From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+Date: Fri, 16 Sep 2022 13:29:07 +0100
+Subject: slimbus: qcom-ngd: use correct error in message of pdr_add_lookup() failure
+
+From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+
+commit 5038d21dde818fe74ba1fcb6f2cee35b8c2ebbf2 upstream.
+
+Use correct error code, instead of previous 'ret' value, when printing
+error from pdr_add_lookup() failure.
+
+Fixes: e1ae85e1830e ("slimbus: qcom-ngd-ctrl: add Protection Domain Restart Support")
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
+Link: https://lore.kernel.org/r/20220916122910.170730-2-srinivas.kandagatla@linaro.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/slimbus/qcom-ngd-ctrl.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/slimbus/qcom-ngd-ctrl.c
++++ b/drivers/slimbus/qcom-ngd-ctrl.c
+@@ -1572,8 +1572,9 @@ static int qcom_slim_ngd_ctrl_probe(stru
+
+ pds = pdr_add_lookup(ctrl->pdr, "avs/audio", "msm/adsp/audio_pd");
+ if (IS_ERR(pds) && PTR_ERR(pds) != -EALREADY) {
++ ret = PTR_ERR(pds);
+ dev_err(dev, "pdr add lookup failed: %d\n", ret);
+- return PTR_ERR(pds);
++ return ret;
+ }
+
+ platform_driver_register(&qcom_slim_ngd_driver);
--- /dev/null
+From 5c13a4a0291b30191eff9ead8d010e1ca43a4d0c Mon Sep 17 00:00:00 2001
+From: "M. Vefa Bicakci" <m.v.b@runbox.com>
+Date: Sun, 2 Oct 2022 18:20:06 -0400
+Subject: xen/gntdev: Accommodate VMA splitting
+
+From: M. Vefa Bicakci <m.v.b@runbox.com>
+
+commit 5c13a4a0291b30191eff9ead8d010e1ca43a4d0c upstream.
+
+Prior to this commit, the gntdev driver code did not handle the
+following scenario correctly with paravirtualized (PV) Xen domains:
+
+* User process sets up a gntdev mapping composed of two grant mappings
+ (i.e., two pages shared by another Xen domain).
+* User process munmap()s one of the pages.
+* User process munmap()s the remaining page.
+* User process exits.
+
+In the scenario above, the user process would cause the kernel to log
+the following messages in dmesg for the first munmap(), and the second
+munmap() call would result in similar log messages:
+
+ BUG: Bad page map in process doublemap.test pte:... pmd:...
+ page:0000000057c97bff refcount:1 mapcount:-1 \
+ mapping:0000000000000000 index:0x0 pfn:...
+ ...
+ page dumped because: bad pte
+ ...
+ file:gntdev fault:0x0 mmap:gntdev_mmap [xen_gntdev] readpage:0x0
+ ...
+ Call Trace:
+ <TASK>
+ dump_stack_lvl+0x46/0x5e
+ print_bad_pte.cold+0x66/0xb6
+ unmap_page_range+0x7e5/0xdc0
+ unmap_vmas+0x78/0xf0
+ unmap_region+0xa8/0x110
+ __do_munmap+0x1ea/0x4e0
+ __vm_munmap+0x75/0x120
+ __x64_sys_munmap+0x28/0x40
+ do_syscall_64+0x38/0x90
+ entry_SYSCALL_64_after_hwframe+0x61/0xcb
+ ...
+
+For each munmap() call, the Xen hypervisor (if built with CONFIG_DEBUG)
+would print out the following and trigger a general protection fault in
+the affected Xen PV domain:
+
+ (XEN) d0v... Attempt to implicitly unmap d0's grant PTE ...
+ (XEN) d0v... Attempt to implicitly unmap d0's grant PTE ...
+
+As of this writing, gntdev_grant_map structure's vma field (referred to
+as map->vma below) is mainly used for checking the start and end
+addresses of mappings. However, with split VMAs, these may change, and
+there could be more than one VMA associated with a gntdev mapping.
+Hence, remove the use of map->vma and rely on map->pages_vm_start for
+the original start address and on (map->count << PAGE_SHIFT) for the
+original mapping size. Let the invalidate() and find_special_page()
+hooks use these.
+
+Also, given that there can be multiple VMAs associated with a gntdev
+mapping, move the "mmu_interval_notifier_remove(&map->notifier)" call to
+the end of gntdev_put_map, so that the MMU notifier is only removed
+after the closing of the last remaining VMA.
+
+Finally, use an atomic to prevent inadvertent gntdev mapping re-use,
+instead of using the map->live_grants atomic counter and/or the map->vma
+pointer (the latter of which is now removed). This prevents the
+userspace from mmap()'ing (with MAP_FIXED) a gntdev mapping over the
+same address range as a previously set up gntdev mapping. This scenario
+can be summarized with the following call-trace, which was valid prior
+to this commit:
+
+ mmap
+ gntdev_mmap
+ mmap (repeat mmap with MAP_FIXED over the same address range)
+ gntdev_invalidate
+ unmap_grant_pages (sets 'being_removed' entries to true)
+ gnttab_unmap_refs_async
+ unmap_single_vma
+ gntdev_mmap (maps the shared pages again)
+ munmap
+ gntdev_invalidate
+ unmap_grant_pages
+ (no-op because 'being_removed' entries are true)
+ unmap_single_vma (For PV domains, Xen reports that a granted page
+ is being unmapped and triggers a general protection fault in the
+ affected domain, if Xen was built with CONFIG_DEBUG)
+
+The fix for this last scenario could be worth its own commit, but we
+opted for a single commit, because removing the gntdev_grant_map
+structure's vma field requires guarding the entry to gntdev_mmap(), and
+the live_grants atomic counter is not sufficient on its own to prevent
+the mmap() over a pre-existing mapping.
+
+Link: https://github.com/QubesOS/qubes-issues/issues/7631
+Fixes: ab31523c2fca ("xen/gntdev: allow usermode to map granted pages")
+Cc: stable@vger.kernel.org
+Signed-off-by: M. Vefa Bicakci <m.v.b@runbox.com>
+Reviewed-by: Juergen Gross <jgross@suse.com>
+Link: https://lore.kernel.org/r/20221002222006.2077-3-m.v.b@runbox.com
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/xen/gntdev-common.h | 3 +-
+ drivers/xen/gntdev.c | 58 ++++++++++++++++++--------------------------
+ 2 files changed, 27 insertions(+), 34 deletions(-)
+
+--- a/drivers/xen/gntdev-common.h
++++ b/drivers/xen/gntdev-common.h
+@@ -44,9 +44,10 @@ struct gntdev_unmap_notify {
+ };
+
+ struct gntdev_grant_map {
++ atomic_t in_use;
+ struct mmu_interval_notifier notifier;
++ bool notifier_init;
+ struct list_head next;
+- struct vm_area_struct *vma;
+ int index;
+ int count;
+ int flags;
+--- a/drivers/xen/gntdev.c
++++ b/drivers/xen/gntdev.c
+@@ -286,6 +286,9 @@ void gntdev_put_map(struct gntdev_priv *
+ */
+ }
+
++ if (use_ptemod && map->notifier_init)
++ mmu_interval_notifier_remove(&map->notifier);
++
+ if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
+ notify_remote_via_evtchn(map->notify.event);
+ evtchn_put(map->notify.event);
+@@ -298,7 +301,7 @@ void gntdev_put_map(struct gntdev_priv *
+ static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data)
+ {
+ struct gntdev_grant_map *map = data;
+- unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT;
++ unsigned int pgnr = (addr - map->pages_vm_start) >> PAGE_SHIFT;
+ int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte |
+ (1 << _GNTMAP_guest_avail0);
+ u64 pte_maddr;
+@@ -508,11 +511,7 @@ static void gntdev_vma_close(struct vm_a
+ struct gntdev_priv *priv = file->private_data;
+
+ pr_debug("gntdev_vma_close %p\n", vma);
+- if (use_ptemod) {
+- WARN_ON(map->vma != vma);
+- mmu_interval_notifier_remove(&map->notifier);
+- map->vma = NULL;
+- }
++
+ vma->vm_private_data = NULL;
+ gntdev_put_map(priv, map);
+ }
+@@ -540,29 +539,30 @@ static bool gntdev_invalidate(struct mmu
+ struct gntdev_grant_map *map =
+ container_of(mn, struct gntdev_grant_map, notifier);
+ unsigned long mstart, mend;
++ unsigned long map_start, map_end;
+
+ if (!mmu_notifier_range_blockable(range))
+ return false;
+
++ map_start = map->pages_vm_start;
++ map_end = map->pages_vm_start + (map->count << PAGE_SHIFT);
++
+ /*
+ * If the VMA is split or otherwise changed the notifier is not
+ * updated, but we don't want to process VA's outside the modified
+ * VMA. FIXME: It would be much more understandable to just prevent
+ * modifying the VMA in the first place.
+ */
+- if (map->vma->vm_start >= range->end ||
+- map->vma->vm_end <= range->start)
++ if (map_start >= range->end || map_end <= range->start)
+ return true;
+
+- mstart = max(range->start, map->vma->vm_start);
+- mend = min(range->end, map->vma->vm_end);
++ mstart = max(range->start, map_start);
++ mend = min(range->end, map_end);
+ pr_debug("map %d+%d (%lx %lx), range %lx %lx, mrange %lx %lx\n",
+- map->index, map->count,
+- map->vma->vm_start, map->vma->vm_end,
+- range->start, range->end, mstart, mend);
+- unmap_grant_pages(map,
+- (mstart - map->vma->vm_start) >> PAGE_SHIFT,
+- (mend - mstart) >> PAGE_SHIFT);
++ map->index, map->count, map_start, map_end,
++ range->start, range->end, mstart, mend);
++ unmap_grant_pages(map, (mstart - map_start) >> PAGE_SHIFT,
++ (mend - mstart) >> PAGE_SHIFT);
+
+ return true;
+ }
+@@ -1042,18 +1042,15 @@ static int gntdev_mmap(struct file *flip
+ return -EINVAL;
+
+ pr_debug("map %d+%d at %lx (pgoff %lx)\n",
+- index, count, vma->vm_start, vma->vm_pgoff);
++ index, count, vma->vm_start, vma->vm_pgoff);
+
+ mutex_lock(&priv->lock);
+ map = gntdev_find_map_index(priv, index, count);
+ if (!map)
+ goto unlock_out;
+- if (use_ptemod && map->vma)
+- goto unlock_out;
+- if (atomic_read(&map->live_grants)) {
+- err = -EAGAIN;
++ if (!atomic_add_unless(&map->in_use, 1, 1))
+ goto unlock_out;
+- }
++
+ refcount_inc(&map->users);
+
+ vma->vm_ops = &gntdev_vmops;
+@@ -1074,15 +1071,16 @@ static int gntdev_mmap(struct file *flip
+ map->flags |= GNTMAP_readonly;
+ }
+
++ map->pages_vm_start = vma->vm_start;
++
+ if (use_ptemod) {
+- map->vma = vma;
+ err = mmu_interval_notifier_insert_locked(
+ &map->notifier, vma->vm_mm, vma->vm_start,
+ vma->vm_end - vma->vm_start, &gntdev_mmu_ops);
+- if (err) {
+- map->vma = NULL;
++ if (err)
+ goto out_unlock_put;
+- }
++
++ map->notifier_init = true;
+ }
+ mutex_unlock(&priv->lock);
+
+@@ -1099,7 +1097,6 @@ static int gntdev_mmap(struct file *flip
+ */
+ mmu_interval_read_begin(&map->notifier);
+
+- map->pages_vm_start = vma->vm_start;
+ err = apply_to_page_range(vma->vm_mm, vma->vm_start,
+ vma->vm_end - vma->vm_start,
+ find_grant_ptes, map);
+@@ -1128,13 +1125,8 @@ unlock_out:
+ out_unlock_put:
+ mutex_unlock(&priv->lock);
+ out_put_map:
+- if (use_ptemod) {
++ if (use_ptemod)
+ unmap_grant_pages(map, 0, map->count);
+- if (map->vma) {
+- mmu_interval_notifier_remove(&map->notifier);
+- map->vma = NULL;
+- }
+- }
+ gntdev_put_map(priv, map);
+ return err;
+ }
--- /dev/null
+From 0991028cd49567d7016d1b224fe0117c35059f86 Mon Sep 17 00:00:00 2001
+From: "M. Vefa Bicakci" <m.v.b@runbox.com>
+Date: Sun, 2 Oct 2022 18:20:05 -0400
+Subject: xen/gntdev: Prevent leaking grants
+
+From: M. Vefa Bicakci <m.v.b@runbox.com>
+
+commit 0991028cd49567d7016d1b224fe0117c35059f86 upstream.
+
+Prior to this commit, if a grant mapping operation failed partially,
+some of the entries in the map_ops array would be invalid, whereas all
+of the entries in the kmap_ops array would be valid. This in turn would
+cause the following logic in gntdev_map_grant_pages to become invalid:
+
+ for (i = 0; i < map->count; i++) {
+ if (map->map_ops[i].status == GNTST_okay) {
+ map->unmap_ops[i].handle = map->map_ops[i].handle;
+ if (!use_ptemod)
+ alloced++;
+ }
+ if (use_ptemod) {
+ if (map->kmap_ops[i].status == GNTST_okay) {
+ if (map->map_ops[i].status == GNTST_okay)
+ alloced++;
+ map->kunmap_ops[i].handle = map->kmap_ops[i].handle;
+ }
+ }
+ }
+ ...
+ atomic_add(alloced, &map->live_grants);
+
+Assume that use_ptemod is true (i.e., the domain mapping the granted
+pages is a paravirtualized domain). In the code excerpt above, note that
+the "alloced" variable is only incremented when both kmap_ops[i].status
+and map_ops[i].status are set to GNTST_okay (i.e., both mapping
+operations are successful). However, as also noted above, there are
+cases where a grant mapping operation fails partially, breaking the
+assumption of the code excerpt above.
+
+The aforementioned causes map->live_grants to be incorrectly set. In
+some cases, all of the map_ops mappings fail, but all of the kmap_ops
+mappings succeed, meaning that live_grants may remain zero. This in turn
+makes it impossible to unmap the successfully grant-mapped pages pointed
+to by kmap_ops, because unmap_grant_pages has the following snippet of
+code at its beginning:
+
+ if (atomic_read(&map->live_grants) == 0)
+ return; /* Nothing to do */
+
+In other cases where only some of the map_ops mappings fail but all
+kmap_ops mappings succeed, live_grants is made positive, but when the
+user requests unmapping the grant-mapped pages, __unmap_grant_pages_done
+will then make map->live_grants negative, because the latter function
+does not check if all of the pages that were requested to be unmapped
+were actually unmapped, and the same function unconditionally subtracts
+"data->count" (i.e., a value that can be greater than map->live_grants)
+from map->live_grants. The side effects of a negative live_grants value
+have not been studied.
+
+The net effect of all of this is that grant references are leaked in one
+of the above conditions. In Qubes OS v4.1 (which uses Xen's grant
+mechanism extensively for X11 GUI isolation), this issue manifests
+itself with warning messages like the following to be printed out by the
+Linux kernel in the VM that had granted pages (that contain X11 GUI
+window data) to dom0: "g.e. 0x1234 still pending", especially after the
+user rapidly resizes GUI VM windows (causing some grant-mapping
+operations to partially or completely fail, due to the fact that the VM
+unshares some of the pages as part of the window resizing, making the
+pages impossible to grant-map from dom0).
+
+The fix for this issue involves counting all successful map_ops and
+kmap_ops mappings separately, and then adding the sum to live_grants.
+During unmapping, only the number of successfully unmapped grants is
+subtracted from live_grants. The code is also modified to check for
+negative live_grants values after the subtraction and warn the user.
+
+Link: https://github.com/QubesOS/qubes-issues/issues/7631
+Fixes: dbe97cff7dd9 ("xen/gntdev: Avoid blocking in unmap_grant_pages()")
+Cc: stable@vger.kernel.org
+Signed-off-by: M. Vefa Bicakci <m.v.b@runbox.com>
+Acked-by: Demi Marie Obenour <demi@invisiblethingslab.com>
+Reviewed-by: Juergen Gross <jgross@suse.com>
+Link: https://lore.kernel.org/r/20221002222006.2077-2-m.v.b@runbox.com
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/xen/gntdev.c | 22 +++++++++++++++++-----
+ 1 file changed, 17 insertions(+), 5 deletions(-)
+
+--- a/drivers/xen/gntdev.c
++++ b/drivers/xen/gntdev.c
+@@ -367,8 +367,7 @@ int gntdev_map_grant_pages(struct gntdev
+ for (i = 0; i < map->count; i++) {
+ if (map->map_ops[i].status == GNTST_okay) {
+ map->unmap_ops[i].handle = map->map_ops[i].handle;
+- if (!use_ptemod)
+- alloced++;
++ alloced++;
+ } else if (!err)
+ err = -EINVAL;
+
+@@ -377,8 +376,7 @@ int gntdev_map_grant_pages(struct gntdev
+
+ if (use_ptemod) {
+ if (map->kmap_ops[i].status == GNTST_okay) {
+- if (map->map_ops[i].status == GNTST_okay)
+- alloced++;
++ alloced++;
+ map->kunmap_ops[i].handle = map->kmap_ops[i].handle;
+ } else if (!err)
+ err = -EINVAL;
+@@ -394,8 +392,14 @@ static void __unmap_grant_pages_done(int
+ unsigned int i;
+ struct gntdev_grant_map *map = data->data;
+ unsigned int offset = data->unmap_ops - map->unmap_ops;
++ int successful_unmaps = 0;
++ int live_grants;
+
+ for (i = 0; i < data->count; i++) {
++ if (map->unmap_ops[offset + i].status == GNTST_okay &&
++ map->unmap_ops[offset + i].handle != INVALID_GRANT_HANDLE)
++ successful_unmaps++;
++
+ WARN_ON(map->unmap_ops[offset + i].status != GNTST_okay &&
+ map->unmap_ops[offset + i].handle != INVALID_GRANT_HANDLE);
+ pr_debug("unmap handle=%d st=%d\n",
+@@ -403,6 +407,10 @@ static void __unmap_grant_pages_done(int
+ map->unmap_ops[offset+i].status);
+ map->unmap_ops[offset+i].handle = INVALID_GRANT_HANDLE;
+ if (use_ptemod) {
++ if (map->kunmap_ops[offset + i].status == GNTST_okay &&
++ map->kunmap_ops[offset + i].handle != INVALID_GRANT_HANDLE)
++ successful_unmaps++;
++
+ WARN_ON(map->kunmap_ops[offset + i].status != GNTST_okay &&
+ map->kunmap_ops[offset + i].handle != INVALID_GRANT_HANDLE);
+ pr_debug("kunmap handle=%u st=%d\n",
+@@ -411,11 +419,15 @@ static void __unmap_grant_pages_done(int
+ map->kunmap_ops[offset+i].handle = INVALID_GRANT_HANDLE;
+ }
+ }
++
+ /*
+ * Decrease the live-grant counter. This must happen after the loop to
+ * prevent premature reuse of the grants by gnttab_mmap().
+ */
+- atomic_sub(data->count, &map->live_grants);
++ live_grants = atomic_sub_return(successful_unmaps, &map->live_grants);
++ if (WARN_ON(live_grants < 0))
++ pr_err("%s: live_grants became negative (%d) after unmapping %d pages!\n",
++ __func__, live_grants, successful_unmaps);
+
+ /* Release reference taken by __unmap_grant_pages */
+ gntdev_put_map(NULL, map);