]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.1-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 1 Oct 2024 08:20:45 +0000 (10:20 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 1 Oct 2024 08:20:45 +0000 (10:20 +0200)
added patches:
drm-vmwgfx-prevent-unmapping-active-read-buffers.patch
io_uring-io-wq-do-not-allow-pinning-outside-of-cpuset.patch
io_uring-io-wq-inherit-cpuset-of-cgroup-in-io-worker.patch
selinux-smack-don-t-bypass-permissions-check-in-inode_setsecctx-hook.patch
vfio-pci-fix-potential-memory-leak-in-vfio_intx_enable.patch

queue-6.1/drm-vmwgfx-prevent-unmapping-active-read-buffers.patch [new file with mode: 0644]
queue-6.1/io_uring-io-wq-do-not-allow-pinning-outside-of-cpuset.patch [new file with mode: 0644]
queue-6.1/io_uring-io-wq-inherit-cpuset-of-cgroup-in-io-worker.patch [new file with mode: 0644]
queue-6.1/selinux-smack-don-t-bypass-permissions-check-in-inode_setsecctx-hook.patch [new file with mode: 0644]
queue-6.1/series
queue-6.1/vfio-pci-fix-potential-memory-leak-in-vfio_intx_enable.patch [new file with mode: 0644]
queue-6.1/xen-tolerate-acpi-nvs-memory-overlapping-with-xen-al.patch [deleted file]

diff --git a/queue-6.1/drm-vmwgfx-prevent-unmapping-active-read-buffers.patch b/queue-6.1/drm-vmwgfx-prevent-unmapping-active-read-buffers.patch
new file mode 100644 (file)
index 0000000..4594336
--- /dev/null
@@ -0,0 +1,96 @@
+From aba07b9a0587f50e5d3346eaa19019cf3f86c0ea Mon Sep 17 00:00:00 2001
+From: Zack Rusin <zack.rusin@broadcom.com>
+Date: Fri, 16 Aug 2024 14:32:05 -0400
+Subject: drm/vmwgfx: Prevent unmapping active read buffers
+
+From: Zack Rusin <zack.rusin@broadcom.com>
+
+commit aba07b9a0587f50e5d3346eaa19019cf3f86c0ea upstream.
+
+The kms paths keep a persistent map active to read and compare the cursor
+buffer. These maps can race with each other in simple scenario where:
+a) buffer "a" mapped for update
+b) buffer "a" mapped for compare
+c) do the compare
+d) unmap "a" for compare
+e) update the cursor
+f) unmap "a" for update
+At step "e" the buffer has been unmapped and the read contents is bogus.
+
+Prevent unmapping of active read buffers by simply keeping a count of
+how many paths have currently active maps and unmap only when the count
+reaches 0.
+
+Fixes: 485d98d472d5 ("drm/vmwgfx: Add support for CursorMob and CursorBypass 4")
+Cc: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com>
+Cc: dri-devel@lists.freedesktop.org
+Cc: <stable@vger.kernel.org> # v5.19+
+Signed-off-by: Zack Rusin <zack.rusin@broadcom.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240816183332.31961-2-zack.rusin@broadcom.com
+Reviewed-by: Martin Krastev <martin.krastev@broadcom.com>
+Reviewed-by: Maaz Mombasawala <maaz.mombasawala@broadcom.com>
+[Shivani: Modified to apply on v6.1.y]
+Signed-off-by: Shivani Agarwal <shivani.agarwal@broadcom.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/vmwgfx/vmwgfx_bo.c  |   12 +++++++++++-
+ drivers/gpu/drm/vmwgfx/vmwgfx_drv.h |    3 +++
+ 2 files changed, 14 insertions(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
+@@ -348,6 +348,8 @@ void *vmw_bo_map_and_cache(struct vmw_bu
+       void *virtual;
+       int ret;
++      atomic_inc(&vbo->map_count);
++
+       virtual = ttm_kmap_obj_virtual(&vbo->map, &not_used);
+       if (virtual)
+               return virtual;
+@@ -370,10 +372,17 @@ void *vmw_bo_map_and_cache(struct vmw_bu
+  */
+ void vmw_bo_unmap(struct vmw_buffer_object *vbo)
+ {
++      int map_count;
++
+       if (vbo->map.bo == NULL)
+               return;
+-      ttm_bo_kunmap(&vbo->map);
++      map_count = atomic_dec_return(&vbo->map_count);
++
++      if (!map_count) {
++              ttm_bo_kunmap(&vbo->map);
++              vbo->map.bo = NULL;
++      }
+ }
+@@ -510,6 +519,7 @@ int vmw_bo_init(struct vmw_private *dev_
+       BUILD_BUG_ON(TTM_MAX_BO_PRIORITY <= 3);
+       vmw_bo->base.priority = 3;
+       vmw_bo->res_tree = RB_ROOT;
++      atomic_set(&vmw_bo->map_count, 0);
+       size = ALIGN(size, PAGE_SIZE);
+       drm_gem_private_object_init(vdev, &vmw_bo->base.base, size);
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+@@ -116,6 +116,8 @@ struct vmwgfx_hash_item {
+  * @base: The TTM buffer object
+  * @res_tree: RB tree of resources using this buffer object as a backing MOB
+  * @base_mapped_count: ttm BO mapping count; used by KMS atomic helpers.
++ * @map_count: The number of currently active maps. Will differ from the
++ * cpu_writers because it includes kernel maps.
+  * @cpu_writers: Number of synccpu write grabs. Protected by reservation when
+  * increased. May be decreased without reservation.
+  * @dx_query_ctx: DX context if this buffer object is used as a DX query MOB
+@@ -129,6 +131,7 @@ struct vmw_buffer_object {
+       /* For KMS atomic helpers: ttm bo mapping count */
+       atomic_t base_mapped_count;
++      atomic_t map_count;
+       atomic_t cpu_writers;
+       /* Not ref-counted.  Protected by binding_mutex */
+       struct vmw_resource *dx_query_ctx;
diff --git a/queue-6.1/io_uring-io-wq-do-not-allow-pinning-outside-of-cpuset.patch b/queue-6.1/io_uring-io-wq-do-not-allow-pinning-outside-of-cpuset.patch
new file mode 100644 (file)
index 0000000..440b48d
--- /dev/null
@@ -0,0 +1,77 @@
+From 0997aa5497c714edbb349ca366d28bd550ba3408 Mon Sep 17 00:00:00 2001
+From: Felix Moessbauer <felix.moessbauer@siemens.com>
+Date: Tue, 10 Sep 2024 19:11:56 +0200
+Subject: io_uring/io-wq: do not allow pinning outside of cpuset
+
+From: Felix Moessbauer <felix.moessbauer@siemens.com>
+
+commit 0997aa5497c714edbb349ca366d28bd550ba3408 upstream.
+
+The io worker threads are userland threads that just never exit to the
+userland. By that, they are also assigned to a cgroup (the group of the
+creating task).
+
+When changing the affinity of the io_wq thread via syscall, we must only
+allow cpumasks within the limits defined by the cpuset controller of the
+cgroup (if enabled).
+
+Fixes: da64d6db3bd3 ("io_uring: One wqe per wq")
+Signed-off-by: Felix Moessbauer <felix.moessbauer@siemens.com>
+Link: https://lore.kernel.org/r/20240910171157.166423-2-felix.moessbauer@siemens.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/io-wq.c |   25 +++++++++++++++++++------
+ 1 file changed, 19 insertions(+), 6 deletions(-)
+
+--- a/io_uring/io-wq.c
++++ b/io_uring/io-wq.c
+@@ -13,6 +13,7 @@
+ #include <linux/slab.h>
+ #include <linux/rculist_nulls.h>
+ #include <linux/cpu.h>
++#include <linux/cpuset.h>
+ #include <linux/task_work.h>
+ #include <linux/audit.h>
+ #include <uapi/linux/io_uring.h>
+@@ -1362,22 +1363,34 @@ static int io_wq_cpu_offline(unsigned in
+ int io_wq_cpu_affinity(struct io_uring_task *tctx, cpumask_var_t mask)
+ {
++      cpumask_var_t allowed_mask;
++      int ret = 0;
+       int i;
+       if (!tctx || !tctx->io_wq)
+               return -EINVAL;
++      if (!alloc_cpumask_var(&allowed_mask, GFP_KERNEL))
++              return -ENOMEM;
++      cpuset_cpus_allowed(tctx->io_wq->task, allowed_mask);
++
+       rcu_read_lock();
+       for_each_node(i) {
+               struct io_wqe *wqe = tctx->io_wq->wqes[i];
+-
+-              if (mask)
+-                      cpumask_copy(wqe->cpu_mask, mask);
+-              else
+-                      cpumask_copy(wqe->cpu_mask, cpumask_of_node(i));
++              if (mask) {
++                      if (cpumask_subset(mask, allowed_mask))
++                              cpumask_copy(wqe->cpu_mask, mask);
++                      else
++                              ret = -EINVAL;
++              } else {
++                      if (!cpumask_and(wqe->cpu_mask, cpumask_of_node(i), allowed_mask))
++                              cpumask_copy(wqe->cpu_mask, allowed_mask);
++              }
+       }
+       rcu_read_unlock();
+-      return 0;
++
++      free_cpumask_var(allowed_mask);
++      return ret;
+ }
+ /*
diff --git a/queue-6.1/io_uring-io-wq-inherit-cpuset-of-cgroup-in-io-worker.patch b/queue-6.1/io_uring-io-wq-inherit-cpuset-of-cgroup-in-io-worker.patch
new file mode 100644 (file)
index 0000000..0adac81
--- /dev/null
@@ -0,0 +1,63 @@
+From 84eacf177faa605853c58e5b1c0d9544b88c16fd Mon Sep 17 00:00:00 2001
+From: Felix Moessbauer <felix.moessbauer@siemens.com>
+Date: Tue, 10 Sep 2024 19:11:57 +0200
+Subject: io_uring/io-wq: inherit cpuset of cgroup in io worker
+
+From: Felix Moessbauer <felix.moessbauer@siemens.com>
+
+commit 84eacf177faa605853c58e5b1c0d9544b88c16fd upstream.
+
+The io worker threads are userland threads that just never exit to the
+userland. By that, they are also assigned to a cgroup (the group of the
+creating task).
+
+When creating a new io worker, this worker should inherit the cpuset
+of the cgroup.
+
+Fixes: da64d6db3bd3 ("io_uring: One wqe per wq")
+Signed-off-by: Felix Moessbauer <felix.moessbauer@siemens.com>
+Link: https://lore.kernel.org/r/20240910171157.166423-3-felix.moessbauer@siemens.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/io-wq.c |    8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/io_uring/io-wq.c
++++ b/io_uring/io-wq.c
+@@ -1157,6 +1157,7 @@ struct io_wq *io_wq_create(unsigned boun
+ {
+       int ret, node, i;
+       struct io_wq *wq;
++      cpumask_var_t allowed_mask;
+       if (WARN_ON_ONCE(!data->free_work || !data->do_work))
+               return ERR_PTR(-EINVAL);
+@@ -1176,6 +1177,9 @@ struct io_wq *io_wq_create(unsigned boun
+       wq->do_work = data->do_work;
+       ret = -ENOMEM;
++      if (!alloc_cpumask_var(&allowed_mask, GFP_KERNEL))
++              goto err;
++      cpuset_cpus_allowed(current, allowed_mask);
+       for_each_node(node) {
+               struct io_wqe *wqe;
+               int alloc_node = node;
+@@ -1188,7 +1192,8 @@ struct io_wq *io_wq_create(unsigned boun
+               wq->wqes[node] = wqe;
+               if (!alloc_cpumask_var(&wqe->cpu_mask, GFP_KERNEL))
+                       goto err;
+-              cpumask_copy(wqe->cpu_mask, cpumask_of_node(node));
++              if (!cpumask_and(wqe->cpu_mask, cpumask_of_node(node), allowed_mask))
++                      cpumask_copy(wqe->cpu_mask, allowed_mask);
+               wqe->node = alloc_node;
+               wqe->acct[IO_WQ_ACCT_BOUND].max_workers = bounded;
+               wqe->acct[IO_WQ_ACCT_UNBOUND].max_workers =
+@@ -1222,6 +1227,7 @@ err:
+               free_cpumask_var(wq->wqes[node]->cpu_mask);
+               kfree(wq->wqes[node]);
+       }
++      free_cpumask_var(allowed_mask);
+ err_wq:
+       kfree(wq);
+       return ERR_PTR(ret);
diff --git a/queue-6.1/selinux-smack-don-t-bypass-permissions-check-in-inode_setsecctx-hook.patch b/queue-6.1/selinux-smack-don-t-bypass-permissions-check-in-inode_setsecctx-hook.patch
new file mode 100644 (file)
index 0000000..acc6cb9
--- /dev/null
@@ -0,0 +1,74 @@
+From 76a0e79bc84f466999fa501fce5bf7a07641b8a7 Mon Sep 17 00:00:00 2001
+From: Scott Mayhew <smayhew@redhat.com>
+Date: Wed, 28 Aug 2024 15:51:29 -0400
+Subject: selinux,smack: don't bypass permissions check in inode_setsecctx hook
+
+From: Scott Mayhew <smayhew@redhat.com>
+
+commit 76a0e79bc84f466999fa501fce5bf7a07641b8a7 upstream.
+
+Marek Gresko reports that the root user on an NFS client is able to
+change the security labels on files on an NFS filesystem that is
+exported with root squashing enabled.
+
+The end of the kerneldoc comment for __vfs_setxattr_noperm() states:
+
+ *  This function requires the caller to lock the inode's i_mutex before it
+ *  is executed. It also assumes that the caller will make the appropriate
+ *  permission checks.
+
+nfsd_setattr() does do permissions checking via fh_verify() and
+nfsd_permission(), but those don't do all the same permissions checks
+that are done by security_inode_setxattr() and its related LSM hooks do.
+
+Since nfsd_setattr() is the only consumer of security_inode_setsecctx(),
+simplest solution appears to be to replace the call to
+__vfs_setxattr_noperm() with a call to __vfs_setxattr_locked().  This
+fixes the above issue and has the added benefit of causing nfsd to
+recall conflicting delegations on a file when a client tries to change
+its security label.
+
+Cc: stable@kernel.org
+Reported-by: Marek Gresko <marek.gresko@protonmail.com>
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=218809
+Signed-off-by: Scott Mayhew <smayhew@redhat.com>
+Tested-by: Stephen Smalley <stephen.smalley.work@gmail.com>
+Reviewed-by: Stephen Smalley <stephen.smalley.work@gmail.com>
+Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Acked-by: Casey Schaufler <casey@schaufler-ca.com>
+Signed-off-by: Paul Moore <paul@paul-moore.com>
+[Shivani: Modified to apply on v5.15.y-v6.1.y]
+Signed-off-by: Shivani Agarwal <shivani.agarwal@broadcom.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ security/selinux/hooks.c   |    4 ++--
+ security/smack/smack_lsm.c |    4 ++--
+ 2 files changed, 4 insertions(+), 4 deletions(-)
+
+--- a/security/selinux/hooks.c
++++ b/security/selinux/hooks.c
+@@ -6631,8 +6631,8 @@ static int selinux_inode_notifysecctx(st
+  */
+ static int selinux_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen)
+ {
+-      return __vfs_setxattr_noperm(&init_user_ns, dentry, XATTR_NAME_SELINUX,
+-                                   ctx, ctxlen, 0);
++      return __vfs_setxattr_locked(&init_user_ns, dentry, XATTR_NAME_SELINUX,
++                                     ctx, ctxlen, 0, NULL);
+ }
+ static int selinux_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen)
+--- a/security/smack/smack_lsm.c
++++ b/security/smack/smack_lsm.c
+@@ -4714,8 +4714,8 @@ static int smack_inode_notifysecctx(stru
+ static int smack_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen)
+ {
+-      return __vfs_setxattr_noperm(&init_user_ns, dentry, XATTR_NAME_SMACK,
+-                                   ctx, ctxlen, 0);
++      return __vfs_setxattr_locked(&init_user_ns, dentry, XATTR_NAME_SMACK,
++                                   ctx, ctxlen, 0, NULL);
+ }
+ static int smack_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen)
index 6262bd4ae6f399e575c42416ea79030d4f87b39b..8620902d6086d211e54b36de3db2580c1849286f 100644 (file)
@@ -139,7 +139,6 @@ minmax-avoid-overly-complex-min-max-macro-arguments-.patch
 xen-introduce-generic-helper-checking-for-memory-map.patch
 xen-move-max_pfn-in-xen_memory_setup-out-of-function.patch
 xen-add-capability-to-remap-non-ram-pages-to-differe.patch
-xen-tolerate-acpi-nvs-memory-overlapping-with-xen-al.patch
 powerpc-vdso-fix-vdso-data-access-when-running-in-a-.patch
 selftests-vdso-fix-elf-hash-table-entry-size-for-s39.patch
 selftests-vdso-fix-vdso_config-for-s390.patch
@@ -291,3 +290,8 @@ netfilter-nf_tables-keep-deleted-flowtable-hooks-unt.patch
 netfilter-ctnetlink-compile-ctnetlink_label_size-wit.patch
 io_uring-sqpoll-do-not-allow-pinning-outside-of-cpuset.patch
 drm-amd-display-fix-synaptics-cascaded-panamera-dsc-determination.patch
+io_uring-io-wq-do-not-allow-pinning-outside-of-cpuset.patch
+io_uring-io-wq-inherit-cpuset-of-cgroup-in-io-worker.patch
+vfio-pci-fix-potential-memory-leak-in-vfio_intx_enable.patch
+selinux-smack-don-t-bypass-permissions-check-in-inode_setsecctx-hook.patch
+drm-vmwgfx-prevent-unmapping-active-read-buffers.patch
diff --git a/queue-6.1/vfio-pci-fix-potential-memory-leak-in-vfio_intx_enable.patch b/queue-6.1/vfio-pci-fix-potential-memory-leak-in-vfio_intx_enable.patch
new file mode 100644 (file)
index 0000000..cdf048e
--- /dev/null
@@ -0,0 +1,37 @@
+From 82b951e6fbd31d85ae7f4feb5f00ddd4c5d256e2 Mon Sep 17 00:00:00 2001
+From: Ye Bin <yebin10@huawei.com>
+Date: Mon, 15 Apr 2024 09:50:29 +0800
+Subject: vfio/pci: fix potential memory leak in vfio_intx_enable()
+
+From: Ye Bin <yebin10@huawei.com>
+
+commit 82b951e6fbd31d85ae7f4feb5f00ddd4c5d256e2 upstream.
+
+If vfio_irq_ctx_alloc() failed will lead to 'name' memory leak.
+
+Fixes: 18c198c96a81 ("vfio/pci: Create persistent INTx handler")
+Signed-off-by: Ye Bin <yebin10@huawei.com>
+Reviewed-by: Kevin Tian <kevin.tian@intel.com>
+Acked-by: Reinette Chatre <reinette.chatre@intel.com>
+Link: https://lore.kernel.org/r/20240415015029.3699844-1-yebin10@huawei.com
+Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
+Signed-off-by: Oleksandr Tymoshenko <ovt@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/vfio/pci/vfio_pci_intrs.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/vfio/pci/vfio_pci_intrs.c
++++ b/drivers/vfio/pci/vfio_pci_intrs.c
+@@ -215,8 +215,10 @@ static int vfio_intx_enable(struct vfio_
+               return -ENOMEM;
+       vdev->ctx = kzalloc(sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL_ACCOUNT);
+-      if (!vdev->ctx)
++      if (!vdev->ctx) {
++              kfree(name);
+               return -ENOMEM;
++      }
+       vdev->num_ctx = 1;
diff --git a/queue-6.1/xen-tolerate-acpi-nvs-memory-overlapping-with-xen-al.patch b/queue-6.1/xen-tolerate-acpi-nvs-memory-overlapping-with-xen-al.patch
deleted file mode 100644 (file)
index 379951f..0000000
+++ /dev/null
@@ -1,161 +0,0 @@
-From e16e625df04ccb81431a921d954b174cd73b9388 Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Fri, 2 Aug 2024 20:14:22 +0200
-Subject: xen: tolerate ACPI NVS memory overlapping with Xen allocated memory
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-From: Juergen Gross <jgross@suse.com>
-
-[ Upstream commit be35d91c8880650404f3bf813573222dfb106935 ]
-
-In order to minimize required special handling for running as Xen PV
-dom0, the memory layout is modified to match that of the host. This
-requires to have only RAM at the locations where Xen allocated memory
-is living. Unfortunately there seem to be some machines, where ACPI
-NVS is located at 64 MB, resulting in a conflict with the loaded
-kernel or the initial page tables built by Xen.
-
-Avoid this conflict by swapping the ACPI NVS area in the memory map
-with unused RAM. This is possible via modification of the dom0 P2M map.
-Accesses to the ACPI NVS area are done either for saving and restoring
-it across suspend operations (this will work the same way as before),
-or by ACPI code when NVS memory is referenced from other ACPI tables.
-The latter case is handled by a Xen specific indirection of
-acpi_os_ioremap().
-
-While the E820 map can (and should) be modified right away, the P2M
-map can be updated only after memory allocation is working, as the P2M
-map might need to be extended.
-
-Fixes: 808fdb71936c ("xen: check for kernel memory conflicting with memory layout")
-Signed-off-by: Juergen Gross <jgross@suse.com>
-Tested-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-Signed-off-by: Juergen Gross <jgross@suse.com>
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- arch/x86/xen/setup.c | 92 +++++++++++++++++++++++++++++++++++++++++++-
- 1 file changed, 91 insertions(+), 1 deletion(-)
-
-diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
-index af18bd7e0700d..69803e3b1a231 100644
---- a/arch/x86/xen/setup.c
-+++ b/arch/x86/xen/setup.c
-@@ -536,6 +536,8 @@ void __init xen_remap_memory(void)
-       set_pte_mfn(buf, mfn_save, PAGE_KERNEL);
-       pr_info("Remapped %ld page(s)\n", remapped);
-+
-+      xen_do_remap_nonram();
- }
- static unsigned long __init xen_get_pages_limit(void)
-@@ -666,14 +668,102 @@ phys_addr_t __init xen_find_free_area(phys_addr_t size)
-       return 0;
- }
-+/*
-+ * Swap a non-RAM E820 map entry with RAM above ini_nr_pages.
-+ * Note that the E820 map is modified accordingly, but the P2M map isn't yet.
-+ * The adaption of the P2M must be deferred until page allocation is possible.
-+ */
-+static void __init xen_e820_swap_entry_with_ram(struct e820_entry *swap_entry)
-+{
-+      struct e820_entry *entry;
-+      unsigned int mapcnt;
-+      phys_addr_t mem_end = PFN_PHYS(ini_nr_pages);
-+      phys_addr_t swap_addr, swap_size, entry_end;
-+
-+      swap_addr = PAGE_ALIGN_DOWN(swap_entry->addr);
-+      swap_size = PAGE_ALIGN(swap_entry->addr - swap_addr + swap_entry->size);
-+      entry = xen_e820_table.entries;
-+
-+      for (mapcnt = 0; mapcnt < xen_e820_table.nr_entries; mapcnt++) {
-+              entry_end = entry->addr + entry->size;
-+              if (entry->type == E820_TYPE_RAM && entry->size >= swap_size &&
-+                  entry_end - swap_size >= mem_end) {
-+                      /* Reduce RAM entry by needed space (whole pages). */
-+                      entry->size -= swap_size;
-+
-+                      /* Add new entry at the end of E820 map. */
-+                      entry = xen_e820_table.entries +
-+                              xen_e820_table.nr_entries;
-+                      xen_e820_table.nr_entries++;
-+
-+                      /* Fill new entry (keep size and page offset). */
-+                      entry->type = swap_entry->type;
-+                      entry->addr = entry_end - swap_size +
-+                                    swap_addr - swap_entry->addr;
-+                      entry->size = swap_entry->size;
-+
-+                      /* Convert old entry to RAM, align to pages. */
-+                      swap_entry->type = E820_TYPE_RAM;
-+                      swap_entry->addr = swap_addr;
-+                      swap_entry->size = swap_size;
-+
-+                      /* Remember PFN<->MFN relation for P2M update. */
-+                      xen_add_remap_nonram(swap_addr, entry_end - swap_size,
-+                                           swap_size);
-+
-+                      /* Order E820 table and merge entries. */
-+                      e820__update_table(&xen_e820_table);
-+
-+                      return;
-+              }
-+
-+              entry++;
-+      }
-+
-+      xen_raw_console_write("No suitable area found for required E820 entry remapping action\n");
-+      BUG();
-+}
-+
-+/*
-+ * Look for non-RAM memory types in a specific guest physical area and move
-+ * those away if possible (ACPI NVS only for now).
-+ */
-+static void __init xen_e820_resolve_conflicts(phys_addr_t start,
-+                                            phys_addr_t size)
-+{
-+      struct e820_entry *entry;
-+      unsigned int mapcnt;
-+      phys_addr_t end;
-+
-+      if (!size)
-+              return;
-+
-+      end = start + size;
-+      entry = xen_e820_table.entries;
-+
-+      for (mapcnt = 0; mapcnt < xen_e820_table.nr_entries; mapcnt++) {
-+              if (entry->addr >= end)
-+                      return;
-+
-+              if (entry->addr + entry->size > start &&
-+                  entry->type == E820_TYPE_NVS)
-+                      xen_e820_swap_entry_with_ram(entry);
-+
-+              entry++;
-+      }
-+}
-+
- /*
-  * Check for an area in physical memory to be usable for non-movable purposes.
-- * An area is considered to usable if the used E820 map lists it to be RAM.
-+ * An area is considered to usable if the used E820 map lists it to be RAM or
-+ * some other type which can be moved to higher PFNs while keeping the MFNs.
-  * In case the area is not usable, crash the system with an error message.
-  */
- void __init xen_chk_is_e820_usable(phys_addr_t start, phys_addr_t size,
-                                  const char *component)
- {
-+      xen_e820_resolve_conflicts(start, size);
-+
-       if (!xen_is_e820_reserved(start, size))
-               return;
--- 
-2.43.0
-