--- /dev/null
+From 20d98716f2ddccaf0c08ef3edeb921836b32d8d9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 12 Feb 2024 16:04:24 -0500
+Subject: drm: add drm_gem_object_is_shared_for_memory_stats() helper
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Alex Deucher <alexander.deucher@amd.com>
+
+[ Upstream commit b31f5eba32ae8cc28e7cfa5a55ec8670d8c718e2 ]
+
+Add a helper so that drm drivers can consistently report
+shared status via the fdinfo shared memory stats interface.
+
+In addition to handle count, show buffers as shared if they
+are shared via dma-buf as well (e.g., shared with v4l or some
+other subsystem).
+
+v2: switch to inline function
+
+Link: https://lore.kernel.org/all/20231207180225.439482-1-alexander.deucher@amd.com/
+Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> (v1)
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Reviewed-by: Christian König <christian.keonig@amd.com>
+Signed-off-by: Christian König <christian.koenig@amd.com>
+Stable-dep-of: a6ff969fe9cb ("drm/amdgpu: fix visible VRAM handling during faults")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/drm/drm_gem.h | 13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h
+index bc9f6aa2f3fec..7c2ec139c464a 100644
+--- a/include/drm/drm_gem.h
++++ b/include/drm/drm_gem.h
+@@ -544,6 +544,19 @@ unsigned long drm_gem_lru_scan(struct drm_gem_lru *lru,
+
+ int drm_gem_evict(struct drm_gem_object *obj);
+
++/**
++ * drm_gem_object_is_shared_for_memory_stats - helper for shared memory stats
++ *
++ * This helper should only be used for fdinfo shared memory stats to determine
++ * if a GEM object is shared.
++ *
++ * @obj: obj in question
++ */
++static inline bool drm_gem_object_is_shared_for_memory_stats(struct drm_gem_object *obj)
++{
++ return (obj->handle_count > 1) || obj->dma_buf;
++}
++
+ #ifdef CONFIG_LOCKDEP
+ /**
+ * drm_gem_gpuva_set_lock() - Set the lock protecting accesses to the gpuva list.
+--
+2.43.0
+
--- /dev/null
+From 414f410b10b6323ad6499db3bafb65b241dfa7c6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 12 Feb 2024 16:04:26 -0500
+Subject: drm/amdgpu: add shared fdinfo stats
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Alex Deucher <alexander.deucher@amd.com>
+
+[ Upstream commit ba1a58d5b907bdf1814f8f57434aebc86233430f ]
+
+Add shared stats. Useful for seeing shared memory.
+
+v2: take dma-buf into account as well
+v3: use the new gem helper
+
+Link: https://lore.kernel.org/all/20231207180225.439482-1-alexander.deucher@amd.com/
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: Rob Clark <robdclark@gmail.com>
+Reviewed-by: Christian König <christian.keonig@amd.com>
+Signed-off-by: Christian König <christian.koenig@amd.com>
+Stable-dep-of: a6ff969fe9cb ("drm/amdgpu: fix visible VRAM handling during faults")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c | 4 ++++
+ drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 11 +++++++++++
+ drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 6 ++++++
+ 3 files changed, 21 insertions(+)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
+index 6038b5021b27b..792c059ff7b35 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
+@@ -105,6 +105,10 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file)
+ stats.requested_visible_vram/1024UL);
+ drm_printf(p, "amd-requested-gtt:\t%llu KiB\n",
+ stats.requested_gtt/1024UL);
++ drm_printf(p, "drm-shared-vram:\t%llu KiB\n", stats.vram_shared/1024UL);
++ drm_printf(p, "drm-shared-gtt:\t%llu KiB\n", stats.gtt_shared/1024UL);
++ drm_printf(p, "drm-shared-cpu:\t%llu KiB\n", stats.cpu_shared/1024UL);
++
+ for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
+ if (!usage[hw_ip])
+ continue;
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+index 173b43a5aa13b..394f475877e3b 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+@@ -1281,25 +1281,36 @@ void amdgpu_bo_get_memory(struct amdgpu_bo *bo,
+ struct amdgpu_mem_stats *stats)
+ {
+ uint64_t size = amdgpu_bo_size(bo);
++ struct drm_gem_object *obj;
+ unsigned int domain;
++ bool shared;
+
+ /* Abort if the BO doesn't currently have a backing store */
+ if (!bo->tbo.resource)
+ return;
+
++ obj = &bo->tbo.base;
++ shared = drm_gem_object_is_shared_for_memory_stats(obj);
++
+ domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type);
+ switch (domain) {
+ case AMDGPU_GEM_DOMAIN_VRAM:
+ stats->vram += size;
+ if (amdgpu_bo_in_cpu_visible_vram(bo))
+ stats->visible_vram += size;
++ if (shared)
++ stats->vram_shared += size;
+ break;
+ case AMDGPU_GEM_DOMAIN_GTT:
+ stats->gtt += size;
++ if (shared)
++ stats->gtt_shared += size;
+ break;
+ case AMDGPU_GEM_DOMAIN_CPU:
+ default:
+ stats->cpu += size;
++ if (shared)
++ stats->cpu_shared += size;
+ break;
+ }
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+index a3ea8a82db23a..be679c42b0b8c 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+@@ -138,12 +138,18 @@ struct amdgpu_bo_vm {
+ struct amdgpu_mem_stats {
+ /* current VRAM usage, includes visible VRAM */
+ uint64_t vram;
++ /* current shared VRAM usage, includes visible VRAM */
++ uint64_t vram_shared;
+ /* current visible VRAM usage */
+ uint64_t visible_vram;
+ /* current GTT usage */
+ uint64_t gtt;
++ /* current shared GTT usage */
++ uint64_t gtt_shared;
+ /* current system memory usage */
+ uint64_t cpu;
++ /* current shared system memory usage */
++ uint64_t cpu_shared;
+ /* sum of evicted buffers, includes visible VRAM */
+ uint64_t evicted_vram;
+ /* sum of evicted buffers due to CPU access */
+--
+2.43.0
+
--- /dev/null
+From 18ea34ca16ae5fcde5f879e412836c1a81d28914 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 4 Apr 2024 16:25:40 +0200
+Subject: drm/amdgpu: fix visible VRAM handling during faults
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Christian König <christian.koenig@amd.com>
+
+[ Upstream commit a6ff969fe9cbf369e3cd0ac54261fec1122682ec ]
+
+When we removed the hacky start code check we actually didn't took into
+account that *all* VRAM pages needs to be CPU accessible.
+
+Clean up the code and unify the handling into a single helper which
+checks if the whole resource is CPU accessible.
+
+The only place where a partial check would make sense is during
+eviction, but that is neglitible.
+
+Signed-off-by: Christian König <christian.koenig@amd.com>
+Fixes: aed01a68047b ("drm/amdgpu: Remove TTM resource->start visible VRAM condition v2")
+Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+CC: stable@vger.kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 22 ++++----
+ drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 22 --------
+ drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 61 ++++++++++++++--------
+ drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 3 ++
+ 5 files changed, 53 insertions(+), 57 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+index c0a3afe81bb1a..4294f5e7bff9a 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+@@ -819,7 +819,7 @@ static int amdgpu_cs_bo_validate(void *param, struct amdgpu_bo *bo)
+
+ p->bytes_moved += ctx.bytes_moved;
+ if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
+- amdgpu_bo_in_cpu_visible_vram(bo))
++ amdgpu_res_cpu_visible(adev, bo->tbo.resource))
+ p->bytes_moved_vis += ctx.bytes_moved;
+
+ if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+index 394f475877e3b..361f2cc94e8e5 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+@@ -625,8 +625,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
+ return r;
+
+ if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
+- bo->tbo.resource->mem_type == TTM_PL_VRAM &&
+- amdgpu_bo_in_cpu_visible_vram(bo))
++ amdgpu_res_cpu_visible(adev, bo->tbo.resource))
+ amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved,
+ ctx.bytes_moved);
+ else
+@@ -1280,23 +1279,25 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict)
+ void amdgpu_bo_get_memory(struct amdgpu_bo *bo,
+ struct amdgpu_mem_stats *stats)
+ {
++ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
++ struct ttm_resource *res = bo->tbo.resource;
+ uint64_t size = amdgpu_bo_size(bo);
+ struct drm_gem_object *obj;
+ unsigned int domain;
+ bool shared;
+
+ /* Abort if the BO doesn't currently have a backing store */
+- if (!bo->tbo.resource)
++ if (!res)
+ return;
+
+ obj = &bo->tbo.base;
+ shared = drm_gem_object_is_shared_for_memory_stats(obj);
+
+- domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type);
++ domain = amdgpu_mem_type_to_domain(res->mem_type);
+ switch (domain) {
+ case AMDGPU_GEM_DOMAIN_VRAM:
+ stats->vram += size;
+- if (amdgpu_bo_in_cpu_visible_vram(bo))
++ if (amdgpu_res_cpu_visible(adev, bo->tbo.resource))
+ stats->visible_vram += size;
+ if (shared)
+ stats->vram_shared += size;
+@@ -1395,10 +1396,7 @@ vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
+ /* Remember that this BO was accessed by the CPU */
+ abo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+
+- if (bo->resource->mem_type != TTM_PL_VRAM)
+- return 0;
+-
+- if (amdgpu_bo_in_cpu_visible_vram(abo))
++ if (amdgpu_res_cpu_visible(adev, bo->resource))
+ return 0;
+
+ /* Can't move a pinned BO to visible VRAM */
+@@ -1422,7 +1420,7 @@ vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
+
+ /* this should never happen */
+ if (bo->resource->mem_type == TTM_PL_VRAM &&
+- !amdgpu_bo_in_cpu_visible_vram(abo))
++ !amdgpu_res_cpu_visible(adev, bo->resource))
+ return VM_FAULT_SIGBUS;
+
+ ttm_bo_move_to_lru_tail_unlocked(bo);
+@@ -1582,6 +1580,7 @@ uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev,
+ */
+ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m)
+ {
++ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ struct dma_buf_attachment *attachment;
+ struct dma_buf *dma_buf;
+ const char *placement;
+@@ -1590,10 +1589,11 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m)
+
+ if (dma_resv_trylock(bo->tbo.base.resv)) {
+ unsigned int domain;
++
+ domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type);
+ switch (domain) {
+ case AMDGPU_GEM_DOMAIN_VRAM:
+- if (amdgpu_bo_in_cpu_visible_vram(bo))
++ if (amdgpu_res_cpu_visible(adev, bo->tbo.resource))
+ placement = "VRAM VISIBLE";
+ else
+ placement = "VRAM";
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+index be679c42b0b8c..fa03d9e4874cc 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+@@ -250,28 +250,6 @@ static inline u64 amdgpu_bo_mmap_offset(struct amdgpu_bo *bo)
+ return drm_vma_node_offset_addr(&bo->tbo.base.vma_node);
+ }
+
+-/**
+- * amdgpu_bo_in_cpu_visible_vram - check if BO is (partly) in visible VRAM
+- */
+-static inline bool amdgpu_bo_in_cpu_visible_vram(struct amdgpu_bo *bo)
+-{
+- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+- struct amdgpu_res_cursor cursor;
+-
+- if (!bo->tbo.resource || bo->tbo.resource->mem_type != TTM_PL_VRAM)
+- return false;
+-
+- amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
+- while (cursor.remaining) {
+- if (cursor.start < adev->gmc.visible_vram_size)
+- return true;
+-
+- amdgpu_res_next(&cursor, cursor.size);
+- }
+-
+- return false;
+-}
+-
+ /**
+ * amdgpu_bo_explicit_sync - return whether the bo is explicitly synced
+ */
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+index 1124e2d4f8530..d1687b5725693 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+@@ -137,7 +137,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
+ amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
+ } else if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
+ !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) &&
+- amdgpu_bo_in_cpu_visible_vram(abo)) {
++ amdgpu_res_cpu_visible(adev, bo->resource)) {
+
+ /* Try evicting to the CPU inaccessible part of VRAM
+ * first, but only set GTT as busy placement, so this
+@@ -408,40 +408,55 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
+ return r;
+ }
+
+-/*
+- * amdgpu_mem_visible - Check that memory can be accessed by ttm_bo_move_memcpy
++/**
++ * amdgpu_res_cpu_visible - Check that resource can be accessed by CPU
++ * @adev: amdgpu device
++ * @res: the resource to check
+ *
+- * Called by amdgpu_bo_move()
++ * Returns: true if the full resource is CPU visible, false otherwise.
+ */
+-static bool amdgpu_mem_visible(struct amdgpu_device *adev,
+- struct ttm_resource *mem)
++bool amdgpu_res_cpu_visible(struct amdgpu_device *adev,
++ struct ttm_resource *res)
+ {
+- u64 mem_size = (u64)mem->size;
+ struct amdgpu_res_cursor cursor;
+- u64 end;
+
+- if (mem->mem_type == TTM_PL_SYSTEM ||
+- mem->mem_type == TTM_PL_TT)
++ if (!res)
++ return false;
++
++ if (res->mem_type == TTM_PL_SYSTEM || res->mem_type == TTM_PL_TT ||
++ res->mem_type == AMDGPU_PL_PREEMPT)
+ return true;
+- if (mem->mem_type != TTM_PL_VRAM)
++
++ if (res->mem_type != TTM_PL_VRAM)
+ return false;
+
+- amdgpu_res_first(mem, 0, mem_size, &cursor);
+- end = cursor.start + cursor.size;
++ amdgpu_res_first(res, 0, res->size, &cursor);
+ while (cursor.remaining) {
++ if ((cursor.start + cursor.size) >= adev->gmc.visible_vram_size)
++ return false;
+ amdgpu_res_next(&cursor, cursor.size);
++ }
+
+- if (!cursor.remaining)
+- break;
++ return true;
++}
+
+- /* ttm_resource_ioremap only supports contiguous memory */
+- if (end != cursor.start)
+- return false;
++/*
++ * amdgpu_res_copyable - Check that memory can be accessed by ttm_bo_move_memcpy
++ *
++ * Called by amdgpu_bo_move()
++ */
++static bool amdgpu_res_copyable(struct amdgpu_device *adev,
++ struct ttm_resource *mem)
++{
++ if (!amdgpu_res_cpu_visible(adev, mem))
++ return false;
+
+- end = cursor.start + cursor.size;
+- }
++ /* ttm_resource_ioremap only supports contiguous memory */
++ if (mem->mem_type == TTM_PL_VRAM &&
++ !(mem->placement & TTM_PL_FLAG_CONTIGUOUS))
++ return false;
+
+- return end <= adev->gmc.visible_vram_size;
++ return true;
+ }
+
+ /*
+@@ -534,8 +549,8 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
+
+ if (r) {
+ /* Check that all memory is CPU accessible */
+- if (!amdgpu_mem_visible(adev, old_mem) ||
+- !amdgpu_mem_visible(adev, new_mem)) {
++ if (!amdgpu_res_copyable(adev, old_mem) ||
++ !amdgpu_res_copyable(adev, new_mem)) {
+ pr_err("Move buffer fallback to memcpy unavailable\n");
+ return r;
+ }
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+index 65ec82141a8e0..32cf6b6f6efd9 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+@@ -139,6 +139,9 @@ int amdgpu_vram_mgr_reserve_range(struct amdgpu_vram_mgr *mgr,
+ int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr,
+ uint64_t start);
+
++bool amdgpu_res_cpu_visible(struct amdgpu_device *adev,
++ struct ttm_resource *res);
++
+ int amdgpu_ttm_init(struct amdgpu_device *adev);
+ void amdgpu_ttm_fini(struct amdgpu_device *adev);
+ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev,
+--
+2.43.0
+
--- /dev/null
+From 1c9adb22902b02073346490edd1c27feb4ff0eb3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 15 Apr 2024 15:48:21 +0200
+Subject: drm/ttm: stop pooling cached NUMA pages v2
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Christian König <ckoenig.leichtzumerken@gmail.com>
+
+[ Upstream commit b6976f323a8687cc0d55bc92c2086fd934324ed5 ]
+
+We only pool write combined and uncached allocations because they
+require extra overhead on allocation and release.
+
+If we also pool cached NUMA it not only means some extra unnecessary
+overhead, but also that under memory pressure it can happen that
+pages from the wrong NUMA node enters the pool and are re-used
+over and over again.
+
+This can lead to performance reduction after running into memory
+pressure.
+
+v2: restructure and cleanup the code a bit from the internal hack to
+ test this.
+
+Signed-off-by: Christian König <christian.koenig@amd.com>
+Fixes: 4482d3c94d7f ("drm/ttm: add NUMA node id to the pool")
+CC: stable@vger.kernel.org
+Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240415134821.1919-1-christian.koenig@amd.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/ttm/ttm_pool.c | 38 +++++++++++++++++++++++++---------
+ 1 file changed, 28 insertions(+), 10 deletions(-)
+
+diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
+index c8ec6a2cac5d4..37c08fac7e7d0 100644
+--- a/drivers/gpu/drm/ttm/ttm_pool.c
++++ b/drivers/gpu/drm/ttm/ttm_pool.c
+@@ -287,17 +287,23 @@ static struct ttm_pool_type *ttm_pool_select_type(struct ttm_pool *pool,
+ enum ttm_caching caching,
+ unsigned int order)
+ {
+- if (pool->use_dma_alloc || pool->nid != NUMA_NO_NODE)
++ if (pool->use_dma_alloc)
+ return &pool->caching[caching].orders[order];
+
+ #ifdef CONFIG_X86
+ switch (caching) {
+ case ttm_write_combined:
++ if (pool->nid != NUMA_NO_NODE)
++ return &pool->caching[caching].orders[order];
++
+ if (pool->use_dma32)
+ return &global_dma32_write_combined[order];
+
+ return &global_write_combined[order];
+ case ttm_uncached:
++ if (pool->nid != NUMA_NO_NODE)
++ return &pool->caching[caching].orders[order];
++
+ if (pool->use_dma32)
+ return &global_dma32_uncached[order];
+
+@@ -563,11 +569,17 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
+ pool->use_dma_alloc = use_dma_alloc;
+ pool->use_dma32 = use_dma32;
+
+- if (use_dma_alloc || nid != NUMA_NO_NODE) {
+- for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
+- for (j = 0; j < NR_PAGE_ORDERS; ++j)
+- ttm_pool_type_init(&pool->caching[i].orders[j],
+- pool, i, j);
++ for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
++ for (j = 0; j < NR_PAGE_ORDERS; ++j) {
++ struct ttm_pool_type *pt;
++
++ /* Initialize only pool types which are actually used */
++ pt = ttm_pool_select_type(pool, i, j);
++ if (pt != &pool->caching[i].orders[j])
++ continue;
++
++ ttm_pool_type_init(pt, pool, i, j);
++ }
+ }
+ }
+ EXPORT_SYMBOL(ttm_pool_init);
+@@ -584,10 +596,16 @@ void ttm_pool_fini(struct ttm_pool *pool)
+ {
+ unsigned int i, j;
+
+- if (pool->use_dma_alloc || pool->nid != NUMA_NO_NODE) {
+- for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
+- for (j = 0; j < NR_PAGE_ORDERS; ++j)
+- ttm_pool_type_fini(&pool->caching[i].orders[j]);
++ for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
++ for (j = 0; j < NR_PAGE_ORDERS; ++j) {
++ struct ttm_pool_type *pt;
++
++ pt = ttm_pool_select_type(pool, i, j);
++ if (pt != &pool->caching[i].orders[j])
++ continue;
++
++ ttm_pool_type_fini(pt);
++ }
+ }
+
+ /* We removed the pool types from the LRU, but we need to also make sure
+--
+2.43.0
+
--- /dev/null
+From e53f791282b6567256f6c19d43cf78cd053ff7bb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Mar 2024 17:36:40 -0800
+Subject: KVM: x86/pmu: Set enable bits for GP counters in PERF_GLOBAL_CTRL at
+ "RESET"
+
+From: Sean Christopherson <seanjc@google.com>
+
+[ Upstream commit de120e1d692d73c7eefa3278837b1eb68f90728a ]
+
+Set the enable bits for general purpose counters in IA32_PERF_GLOBAL_CTRL
+when refreshing the PMU to emulate the MSR's architecturally defined
+post-RESET behavior. Per Intel's SDM:
+
+ IA32_PERF_GLOBAL_CTRL: Sets bits n-1:0 and clears the upper bits.
+
+and
+
+ Where "n" is the number of general-purpose counters available in the processor.
+
+AMD also documents this behavior for PerfMonV2 CPUs in one of AMD's many
+PPRs.
+
+Do not set any PERF_GLOBAL_CTRL bits if there are no general purpose
+counters, although a literal reading of the SDM would require the CPU to
+set either bits 63:0 or 31:0. The intent of the behavior is to globally
+enable all GP counters; honor the intent, if not the letter of the law.
+
+Leaving PERF_GLOBAL_CTRL '0' effectively breaks PMU usage in guests that
+haven't been updated to work with PMUs that support PERF_GLOBAL_CTRL.
+This bug was recently exposed when KVM added supported for AMD's
+PerfMonV2, i.e. when KVM started exposing a vPMU with PERF_GLOBAL_CTRL to
+guest software that only knew how to program v1 PMUs (that don't support
+PERF_GLOBAL_CTRL).
+
+Failure to emulate the post-RESET behavior results in such guests
+unknowingly leaving all general purpose counters globally disabled (the
+entire reason the post-RESET value sets the GP counter enable bits is to
+maintain backwards compatibility).
+
+The bug has likely gone unnoticed because PERF_GLOBAL_CTRL has been
+supported on Intel CPUs for as long as KVM has existed, i.e. hardly anyone
+is running guest software that isn't aware of PERF_GLOBAL_CTRL on Intel
+PMUs. And because up until v6.0, KVM _did_ emulate the behavior for Intel
+CPUs, although the old behavior was likely dumb luck.
+
+Because (a) that old code was also broken in its own way (the history of
+this code is a comedy of errors), and (b) PERF_GLOBAL_CTRL was documented
+as having a value of '0' post-RESET in all SDMs before March 2023.
+
+Initial vPMU support in commit f5132b01386b ("KVM: Expose a version 2
+architectural PMU to a guests") *almost* got it right (again likely by
+dumb luck), but for some reason only set the bits if the guest PMU was
+advertised as v1:
+
+ if (pmu->version == 1) {
+ pmu->global_ctrl = (1 << pmu->nr_arch_gp_counters) - 1;
+ return;
+ }
+
+Commit f19a0c2c2e6a ("KVM: PMU emulation: GLOBAL_CTRL MSR should be
+enabled on reset") then tried to remedy that goof, presumably because
+guest PMUs were leaving PERF_GLOBAL_CTRL '0', i.e. weren't enabling
+counters.
+
+ pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) |
+ (((1ull << pmu->nr_arch_fixed_counters) - 1) << X86_PMC_IDX_FIXED);
+ pmu->global_ctrl_mask = ~pmu->global_ctrl;
+
+That was KVM's behavior up until commit c49467a45fe0 ("KVM: x86/pmu:
+Don't overwrite the pmu->global_ctrl when refreshing") removed
+*everything*. However, it did so based on the behavior defined by the
+SDM , which at the time stated that "Global Perf Counter Controls" is
+'0' at Power-Up and RESET.
+
+But then the March 2023 SDM (325462-079US), stealthily changed its
+"IA-32 and Intel 64 Processor States Following Power-up, Reset, or INIT"
+table to say:
+
+ IA32_PERF_GLOBAL_CTRL: Sets bits n-1:0 and clears the upper bits.
+
+Note, kvm_pmu_refresh() can be invoked multiple times, i.e. it's not a
+"pure" RESET flow. But it can only be called prior to the first KVM_RUN,
+i.e. the guest will only ever observe the final value.
+
+Note #2, KVM has always cleared global_ctrl during refresh (see commit
+f5132b01386b ("KVM: Expose a version 2 architectural PMU to a guests")),
+i.e. there is no danger of breaking existing setups by clobbering a value
+set by userspace.
+
+Reported-by: Babu Moger <babu.moger@amd.com>
+Cc: Sandipan Das <sandipan.das@amd.com>
+Cc: Like Xu <like.xu.linux@gmail.com>
+Cc: Mingwei Zhang <mizhang@google.com>
+Cc: Dapeng Mi <dapeng1.mi@linux.intel.com>
+Cc: stable@vger.kernel.org
+Reviewed-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
+Tested-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
+Link: https://lore.kernel.org/r/20240309013641.1413400-2-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/pmu.c | 16 ++++++++++++++--
+ 1 file changed, 14 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
+index fa6f5cd70d4c8..da2d82e3a8735 100644
+--- a/arch/x86/kvm/pmu.c
++++ b/arch/x86/kvm/pmu.c
+@@ -716,8 +716,20 @@ void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
+ pmu->pebs_data_cfg_mask = ~0ull;
+ bitmap_zero(pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX);
+
+- if (vcpu->kvm->arch.enable_pmu)
+- static_call(kvm_x86_pmu_refresh)(vcpu);
++ if (!vcpu->kvm->arch.enable_pmu)
++ return;
++
++ static_call(kvm_x86_pmu_refresh)(vcpu);
++
++ /*
++ * At RESET, both Intel and AMD CPUs set all enable bits for general
++ * purpose counters in IA32_PERF_GLOBAL_CTRL (so that software that
++ * was written for v1 PMUs don't unknowingly leave GP counters disabled
++ * in the global controls). Emulate that behavior when refreshing the
++ * PMU so that userspace doesn't need to manually set PERF_GLOBAL_CTRL.
++ */
++ if (kvm_pmu_has_perf_global_ctrl(pmu) && pmu->nr_arch_gp_counters)
++ pmu->global_ctrl = GENMASK_ULL(pmu->nr_arch_gp_counters - 1, 0);
+ }
+
+ void kvm_pmu_init(struct kvm_vcpu *vcpu)
+--
+2.43.0
+
--- /dev/null
+From ca1df8e3df9e6041412863834209ce5cdef70af9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 9 Nov 2023 18:28:48 -0800
+Subject: KVM: x86/pmu: Zero out PMU metadata on AMD if PMU is disabled
+
+From: Sean Christopherson <seanjc@google.com>
+
+[ Upstream commit f933b88e20150f15787390e2a1754a7e412754ed ]
+
+Move the purging of common PMU metadata from intel_pmu_refresh() to
+kvm_pmu_refresh(), and invoke the vendor refresh() hook if and only if
+the VM is supposed to have a vPMU.
+
+KVM already denies access to the PMU based on kvm->arch.enable_pmu, as
+get_gp_pmc_amd() returns NULL for all PMCs in that case, i.e. KVM already
+violates AMD's architecture by not virtualizing a PMU (kernels have long
+since learned to not panic when the PMU is unavailable). But configuring
+the PMU as if it were enabled causes unwanted side effects, e.g. calls to
+kvm_pmu_trigger_event() waste an absurd number of cycles due to the
+all_valid_pmc_idx bitmap being non-zero.
+
+Fixes: b1d66dad65dc ("KVM: x86/svm: Add module param to control PMU virtualization")
+Reported-by: Konstantin Khorenko <khorenko@virtuozzo.com>
+Closes: https://lore.kernel.org/all/20231109180646.2963718-2-khorenko@virtuozzo.com
+Link: https://lore.kernel.org/r/20231110022857.1273836-2-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Stable-dep-of: de120e1d692d ("KVM: x86/pmu: Set enable bits for GP counters in PERF_GLOBAL_CTRL at "RESET"")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/pmu.c | 20 ++++++++++++++++++--
+ arch/x86/kvm/vmx/pmu_intel.c | 16 ++--------------
+ 2 files changed, 20 insertions(+), 16 deletions(-)
+
+diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
+index dc8e8e907cfbf..fa6f5cd70d4c8 100644
+--- a/arch/x86/kvm/pmu.c
++++ b/arch/x86/kvm/pmu.c
+@@ -691,6 +691,8 @@ void kvm_pmu_reset(struct kvm_vcpu *vcpu)
+ */
+ void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
+ {
++ struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
++
+ if (KVM_BUG_ON(kvm_vcpu_has_run(vcpu), vcpu->kvm))
+ return;
+
+@@ -700,8 +702,22 @@ void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
+ */
+ kvm_pmu_reset(vcpu);
+
+- bitmap_zero(vcpu_to_pmu(vcpu)->all_valid_pmc_idx, X86_PMC_IDX_MAX);
+- static_call(kvm_x86_pmu_refresh)(vcpu);
++ pmu->version = 0;
++ pmu->nr_arch_gp_counters = 0;
++ pmu->nr_arch_fixed_counters = 0;
++ pmu->counter_bitmask[KVM_PMC_GP] = 0;
++ pmu->counter_bitmask[KVM_PMC_FIXED] = 0;
++ pmu->reserved_bits = 0xffffffff00200000ull;
++ pmu->raw_event_mask = X86_RAW_EVENT_MASK;
++ pmu->global_ctrl_mask = ~0ull;
++ pmu->global_status_mask = ~0ull;
++ pmu->fixed_ctr_ctrl_mask = ~0ull;
++ pmu->pebs_enable_mask = ~0ull;
++ pmu->pebs_data_cfg_mask = ~0ull;
++ bitmap_zero(pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX);
++
++ if (vcpu->kvm->arch.enable_pmu)
++ static_call(kvm_x86_pmu_refresh)(vcpu);
+ }
+
+ void kvm_pmu_init(struct kvm_vcpu *vcpu)
+diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
+index 1549461fa42b7..48a2f77f62ef3 100644
+--- a/arch/x86/kvm/vmx/pmu_intel.c
++++ b/arch/x86/kvm/vmx/pmu_intel.c
+@@ -493,19 +493,6 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
+ u64 counter_mask;
+ int i;
+
+- pmu->nr_arch_gp_counters = 0;
+- pmu->nr_arch_fixed_counters = 0;
+- pmu->counter_bitmask[KVM_PMC_GP] = 0;
+- pmu->counter_bitmask[KVM_PMC_FIXED] = 0;
+- pmu->version = 0;
+- pmu->reserved_bits = 0xffffffff00200000ull;
+- pmu->raw_event_mask = X86_RAW_EVENT_MASK;
+- pmu->global_ctrl_mask = ~0ull;
+- pmu->global_status_mask = ~0ull;
+- pmu->fixed_ctr_ctrl_mask = ~0ull;
+- pmu->pebs_enable_mask = ~0ull;
+- pmu->pebs_data_cfg_mask = ~0ull;
+-
+ memset(&lbr_desc->records, 0, sizeof(lbr_desc->records));
+
+ /*
+@@ -517,8 +504,9 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
+ return;
+
+ entry = kvm_find_cpuid_entry(vcpu, 0xa);
+- if (!entry || !vcpu->kvm->arch.enable_pmu)
++ if (!entry)
+ return;
++
+ eax.full = entry->eax;
+ edx.full = entry->edx;
+
+--
+2.43.0
+
--- /dev/null
+From d64d15556b61253f6109226a4c039ace736a218f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 3 Oct 2023 00:14:52 +0100
+Subject: mm/gup: explicitly define and check internal GUP flags, disallow
+ FOLL_TOUCH
+
+From: Lorenzo Stoakes <lstoakes@gmail.com>
+
+[ Upstream commit 0f20bba1688bdf3b32df0162511a67d4eda15790 ]
+
+Rather than open-coding a list of internal GUP flags in
+is_valid_gup_args(), define which ones are internal.
+
+In addition, explicitly check to see if the user passed in FOLL_TOUCH
+somehow, as this appears to have been accidentally excluded.
+
+Link: https://lkml.kernel.org/r/971e013dfe20915612ea8b704e801d7aef9a66b6.1696288092.git.lstoakes@gmail.com
+Signed-off-by: Lorenzo Stoakes <lstoakes@gmail.com>
+Reviewed-by: Arnd Bergmann <arnd@arndb.de>
+Reviewed-by: David Hildenbrand <david@redhat.com>
+Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
+Cc: Adrian Hunter <adrian.hunter@intel.com>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Ian Rogers <irogers@google.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: John Hubbard <jhubbard@nvidia.com>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Namhyung Kim <namhyung@kernel.org>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Richard Cochran <richardcochran@gmail.com>
+Cc: Will Deacon <will@kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 631426ba1d45 ("mm/madvise: make MADV_POPULATE_(READ|WRITE) handle VM_FAULT_RETRY properly")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/gup.c | 5 ++---
+ mm/internal.h | 3 +++
+ 2 files changed, 5 insertions(+), 3 deletions(-)
+
+diff --git a/mm/gup.c b/mm/gup.c
+index 2f8a2d89fde19..b21b33d1787e1 100644
+--- a/mm/gup.c
++++ b/mm/gup.c
+@@ -2227,12 +2227,11 @@ static bool is_valid_gup_args(struct page **pages, int *locked,
+ /*
+ * These flags not allowed to be specified externally to the gup
+ * interfaces:
+- * - FOLL_PIN/FOLL_TRIED/FOLL_FAST_ONLY are internal only
++ * - FOLL_TOUCH/FOLL_PIN/FOLL_TRIED/FOLL_FAST_ONLY are internal only
+ * - FOLL_REMOTE is internal only and used on follow_page()
+ * - FOLL_UNLOCKABLE is internal only and used if locked is !NULL
+ */
+- if (WARN_ON_ONCE(gup_flags & (FOLL_PIN | FOLL_TRIED | FOLL_UNLOCKABLE |
+- FOLL_REMOTE | FOLL_FAST_ONLY)))
++ if (WARN_ON_ONCE(gup_flags & INTERNAL_GUP_FLAGS))
+ return false;
+
+ gup_flags |= to_set;
+diff --git a/mm/internal.h b/mm/internal.h
+index 30cf724ddbce3..50cf76d30a88f 100644
+--- a/mm/internal.h
++++ b/mm/internal.h
+@@ -964,6 +964,9 @@ enum {
+ FOLL_UNLOCKABLE = 1 << 21,
+ };
+
++#define INTERNAL_GUP_FLAGS (FOLL_TOUCH | FOLL_TRIED | FOLL_REMOTE | FOLL_PIN | \
++ FOLL_FAST_ONLY | FOLL_UNLOCKABLE)
++
+ /*
+ * Indicates for which pages that are write-protected in the page table,
+ * whether GUP has to trigger unsharing via FAULT_FLAG_UNSHARE such that the
+--
+2.43.0
+
--- /dev/null
+From 7d3472c1c4d4a10f6720d5894d2efd29400d4ea4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 14 Mar 2024 17:12:59 +0100
+Subject: mm/madvise: make MADV_POPULATE_(READ|WRITE) handle VM_FAULT_RETRY
+ properly
+
+From: David Hildenbrand <david@redhat.com>
+
+[ Upstream commit 631426ba1d45a8672b177ee85ad4cabe760dd131 ]
+
+Darrick reports that in some cases where pread() would fail with -EIO and
+mmap()+access would generate a SIGBUS signal, MADV_POPULATE_READ /
+MADV_POPULATE_WRITE will keep retrying forever and not fail with -EFAULT.
+
+While the madvise() call can be interrupted by a signal, this is not the
+desired behavior. MADV_POPULATE_READ / MADV_POPULATE_WRITE should behave
+like page faults in that case: fail and not retry forever.
+
+A reproducer can be found at [1].
+
+The reason is that __get_user_pages(), as called by
+faultin_vma_page_range(), will not handle VM_FAULT_RETRY in a proper way:
+it will simply return 0 when VM_FAULT_RETRY happened, making
+madvise_populate()->faultin_vma_page_range() retry again and again, never
+setting FOLL_TRIED->FAULT_FLAG_TRIED for __get_user_pages().
+
+__get_user_pages_locked() does what we want, but duplicating that logic in
+faultin_vma_page_range() feels wrong.
+
+So let's use __get_user_pages_locked() instead, that will detect
+VM_FAULT_RETRY and set FOLL_TRIED when retrying, making the fault handler
+return VM_FAULT_SIGBUS (VM_FAULT_ERROR) at some point, propagating -EFAULT
+from faultin_page() to __get_user_pages(), all the way to
+madvise_populate().
+
+But, there is an issue: __get_user_pages_locked() will end up re-taking
+the MM lock and then __get_user_pages() will do another VMA lookup. In
+the meantime, the VMA layout could have changed and we'd fail with
+different error codes than we'd want to.
+
+As __get_user_pages() will currently do a new VMA lookup either way, let
+it do the VMA handling in a different way, controlled by a new
+FOLL_MADV_POPULATE flag, effectively moving these checks from
+madvise_populate() + faultin_page_range() in there.
+
+With this change, Darricks reproducer properly fails with -EFAULT, as
+documented for MADV_POPULATE_READ / MADV_POPULATE_WRITE.
+
+[1] https://lore.kernel.org/all/20240313171936.GN1927156@frogsfrogsfrogs/
+
+Link: https://lkml.kernel.org/r/20240314161300.382526-1-david@redhat.com
+Link: https://lkml.kernel.org/r/20240314161300.382526-2-david@redhat.com
+Fixes: 4ca9b3859dac ("mm/madvise: introduce MADV_POPULATE_(READ|WRITE) to prefault page tables")
+Signed-off-by: David Hildenbrand <david@redhat.com>
+Reported-by: Darrick J. Wong <djwong@kernel.org>
+Closes: https://lore.kernel.org/all/20240311223815.GW1927156@frogsfrogsfrogs/
+Cc: Darrick J. Wong <djwong@kernel.org>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Jason Gunthorpe <jgg@nvidia.com>
+Cc: John Hubbard <jhubbard@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/gup.c | 54 ++++++++++++++++++++++++++++++---------------------
+ mm/internal.h | 10 ++++++----
+ mm/madvise.c | 17 ++--------------
+ 3 files changed, 40 insertions(+), 41 deletions(-)
+
+diff --git a/mm/gup.c b/mm/gup.c
+index b21b33d1787e1..cfc0a66d951b9 100644
+--- a/mm/gup.c
++++ b/mm/gup.c
+@@ -1204,6 +1204,22 @@ static long __get_user_pages(struct mm_struct *mm,
+
+ /* first iteration or cross vma bound */
+ if (!vma || start >= vma->vm_end) {
++ /*
++ * MADV_POPULATE_(READ|WRITE) wants to handle VMA
++ * lookups+error reporting differently.
++ */
++ if (gup_flags & FOLL_MADV_POPULATE) {
++ vma = vma_lookup(mm, start);
++ if (!vma) {
++ ret = -ENOMEM;
++ goto out;
++ }
++ if (check_vma_flags(vma, gup_flags)) {
++ ret = -EINVAL;
++ goto out;
++ }
++ goto retry;
++ }
+ vma = gup_vma_lookup(mm, start);
+ if (!vma && in_gate_area(mm, start)) {
+ ret = get_gate_page(mm, start & PAGE_MASK,
+@@ -1670,35 +1686,35 @@ long populate_vma_page_range(struct vm_area_struct *vma,
+ }
+
+ /*
+- * faultin_vma_page_range() - populate (prefault) page tables inside the
+- * given VMA range readable/writable
++ * faultin_page_range() - populate (prefault) page tables inside the
++ * given range readable/writable
+ *
+ * This takes care of mlocking the pages, too, if VM_LOCKED is set.
+ *
+- * @vma: target vma
++ * @mm: the mm to populate page tables in
+ * @start: start address
+ * @end: end address
+ * @write: whether to prefault readable or writable
+ * @locked: whether the mmap_lock is still held
+ *
+- * Returns either number of processed pages in the vma, or a negative error
+- * code on error (see __get_user_pages()).
++ * Returns either number of processed pages in the MM, or a negative error
++ * code on error (see __get_user_pages()). Note that this function reports
++ * errors related to VMAs, such as incompatible mappings, as expected by
++ * MADV_POPULATE_(READ|WRITE).
+ *
+- * vma->vm_mm->mmap_lock must be held. The range must be page-aligned and
+- * covered by the VMA. If it's released, *@locked will be set to 0.
++ * The range must be page-aligned.
++ *
++ * mm->mmap_lock must be held. If it's released, *@locked will be set to 0.
+ */
+-long faultin_vma_page_range(struct vm_area_struct *vma, unsigned long start,
+- unsigned long end, bool write, int *locked)
++long faultin_page_range(struct mm_struct *mm, unsigned long start,
++ unsigned long end, bool write, int *locked)
+ {
+- struct mm_struct *mm = vma->vm_mm;
+ unsigned long nr_pages = (end - start) / PAGE_SIZE;
+ int gup_flags;
+ long ret;
+
+ VM_BUG_ON(!PAGE_ALIGNED(start));
+ VM_BUG_ON(!PAGE_ALIGNED(end));
+- VM_BUG_ON_VMA(start < vma->vm_start, vma);
+- VM_BUG_ON_VMA(end > vma->vm_end, vma);
+ mmap_assert_locked(mm);
+
+ /*
+@@ -1710,19 +1726,13 @@ long faultin_vma_page_range(struct vm_area_struct *vma, unsigned long start,
+ * a poisoned page.
+ * !FOLL_FORCE: Require proper access permissions.
+ */
+- gup_flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_UNLOCKABLE;
++ gup_flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_UNLOCKABLE |
++ FOLL_MADV_POPULATE;
+ if (write)
+ gup_flags |= FOLL_WRITE;
+
+- /*
+- * We want to report -EINVAL instead of -EFAULT for any permission
+- * problems or incompatible mappings.
+- */
+- if (check_vma_flags(vma, gup_flags))
+- return -EINVAL;
+-
+- ret = __get_user_pages(mm, start, nr_pages, gup_flags,
+- NULL, locked);
++ ret = __get_user_pages_locked(mm, start, nr_pages, NULL, locked,
++ gup_flags);
+ lru_add_drain();
+ return ret;
+ }
+diff --git a/mm/internal.h b/mm/internal.h
+index 50cf76d30a88f..abed947f784b7 100644
+--- a/mm/internal.h
++++ b/mm/internal.h
+@@ -581,9 +581,8 @@ struct anon_vma *folio_anon_vma(struct folio *folio);
+ void unmap_mapping_folio(struct folio *folio);
+ extern long populate_vma_page_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end, int *locked);
+-extern long faultin_vma_page_range(struct vm_area_struct *vma,
+- unsigned long start, unsigned long end,
+- bool write, int *locked);
++extern long faultin_page_range(struct mm_struct *mm, unsigned long start,
++ unsigned long end, bool write, int *locked);
+ extern bool mlock_future_ok(struct mm_struct *mm, unsigned long flags,
+ unsigned long bytes);
+ /*
+@@ -962,10 +961,13 @@ enum {
+ FOLL_FAST_ONLY = 1 << 20,
+ /* allow unlocking the mmap lock */
+ FOLL_UNLOCKABLE = 1 << 21,
++ /* VMA lookup+checks compatible with MADV_POPULATE_(READ|WRITE) */
++ FOLL_MADV_POPULATE = 1 << 22,
+ };
+
+ #define INTERNAL_GUP_FLAGS (FOLL_TOUCH | FOLL_TRIED | FOLL_REMOTE | FOLL_PIN | \
+- FOLL_FAST_ONLY | FOLL_UNLOCKABLE)
++ FOLL_FAST_ONLY | FOLL_UNLOCKABLE | \
++ FOLL_MADV_POPULATE)
+
+ /*
+ * Indicates for which pages that are write-protected in the page table,
+diff --git a/mm/madvise.c b/mm/madvise.c
+index 4dded5d27e7ea..98fdb9288a68a 100644
+--- a/mm/madvise.c
++++ b/mm/madvise.c
+@@ -917,27 +917,14 @@ static long madvise_populate(struct vm_area_struct *vma,
+ {
+ const bool write = behavior == MADV_POPULATE_WRITE;
+ struct mm_struct *mm = vma->vm_mm;
+- unsigned long tmp_end;
+ int locked = 1;
+ long pages;
+
+ *prev = vma;
+
+ while (start < end) {
+- /*
+- * We might have temporarily dropped the lock. For example,
+- * our VMA might have been split.
+- */
+- if (!vma || start >= vma->vm_end) {
+- vma = vma_lookup(mm, start);
+- if (!vma)
+- return -ENOMEM;
+- }
+-
+- tmp_end = min_t(unsigned long, end, vma->vm_end);
+ /* Populate (prefault) page tables readable/writable. */
+- pages = faultin_vma_page_range(vma, start, tmp_end, write,
+- &locked);
++ pages = faultin_page_range(mm, start, end, write, &locked);
+ if (!locked) {
+ mmap_read_lock(mm);
+ locked = 1;
+@@ -958,7 +945,7 @@ static long madvise_populate(struct vm_area_struct *vma,
+ pr_warn_once("%s: unhandled return value: %ld\n",
+ __func__, pages);
+ fallthrough;
+- case -ENOMEM:
++ case -ENOMEM: /* No VMA or out of memory. */
+ return -ENOMEM;
+ }
+ }
+--
+2.43.0
+
--- /dev/null
+From c781c72f054614cc0c84bd00d0e44ed387a86b8c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 28 Dec 2023 17:47:03 +0300
+Subject: mm, treewide: introduce NR_PAGE_ORDERS
+
+From: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+
+[ Upstream commit fd37721803c6e73619108f76ad2e12a9aa5fafaf ]
+
+NR_PAGE_ORDERS defines the number of page orders supported by the page
+allocator, ranging from 0 to MAX_ORDER, MAX_ORDER + 1 in total.
+
+NR_PAGE_ORDERS assists in defining arrays of page orders and allows for
+more natural iteration over them.
+
+[kirill.shutemov@linux.intel.com: fixup for kerneldoc warning]
+ Link: https://lkml.kernel.org/r/20240101111512.7empzyifq7kxtzk3@box
+Link: https://lkml.kernel.org/r/20231228144704.14033-1-kirill.shutemov@linux.intel.com
+Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Reviewed-by: Zi Yan <ziy@nvidia.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: b6976f323a86 ("drm/ttm: stop pooling cached NUMA pages v2")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../admin-guide/kdump/vmcoreinfo.rst | 6 +++---
+ arch/arm64/kvm/hyp/include/nvhe/gfp.h | 2 +-
+ arch/sparc/kernel/traps_64.c | 2 +-
+ drivers/gpu/drm/ttm/tests/ttm_device_test.c | 2 +-
+ drivers/gpu/drm/ttm/ttm_pool.c | 20 +++++++++----------
+ include/drm/ttm/ttm_pool.h | 2 +-
+ include/linux/mmzone.h | 6 ++++--
+ kernel/crash_core.c | 2 +-
+ lib/test_meminit.c | 2 +-
+ mm/compaction.c | 2 +-
+ mm/kmsan/init.c | 2 +-
+ mm/page_alloc.c | 13 ++++++------
+ mm/page_reporting.c | 2 +-
+ mm/show_mem.c | 8 ++++----
+ mm/vmstat.c | 12 +++++------
+ 15 files changed, 42 insertions(+), 41 deletions(-)
+
+diff --git a/Documentation/admin-guide/kdump/vmcoreinfo.rst b/Documentation/admin-guide/kdump/vmcoreinfo.rst
+index 599e8d3bcbc31..9235cf4fbabff 100644
+--- a/Documentation/admin-guide/kdump/vmcoreinfo.rst
++++ b/Documentation/admin-guide/kdump/vmcoreinfo.rst
+@@ -172,7 +172,7 @@ variables.
+ Offset of the free_list's member. This value is used to compute the number
+ of free pages.
+
+-Each zone has a free_area structure array called free_area[MAX_ORDER + 1].
++Each zone has a free_area structure array called free_area[NR_PAGE_ORDERS].
+ The free_list represents a linked list of free page blocks.
+
+ (list_head, next|prev)
+@@ -189,8 +189,8 @@ Offsets of the vmap_area's members. They carry vmalloc-specific
+ information. Makedumpfile gets the start address of the vmalloc region
+ from this.
+
+-(zone.free_area, MAX_ORDER + 1)
+--------------------------------
++(zone.free_area, NR_PAGE_ORDERS)
++--------------------------------
+
+ Free areas descriptor. User-space tools use this value to iterate the
+ free_area ranges. MAX_ORDER is used by the zone buddy allocator.
+diff --git a/arch/arm64/kvm/hyp/include/nvhe/gfp.h b/arch/arm64/kvm/hyp/include/nvhe/gfp.h
+index fe5472a184a37..97c527ef53c2a 100644
+--- a/arch/arm64/kvm/hyp/include/nvhe/gfp.h
++++ b/arch/arm64/kvm/hyp/include/nvhe/gfp.h
+@@ -16,7 +16,7 @@ struct hyp_pool {
+ * API at EL2.
+ */
+ hyp_spinlock_t lock;
+- struct list_head free_area[MAX_ORDER + 1];
++ struct list_head free_area[NR_PAGE_ORDERS];
+ phys_addr_t range_start;
+ phys_addr_t range_end;
+ unsigned short max_order;
+diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c
+index 08ffd17d5ec34..523a6e5ee9251 100644
+--- a/arch/sparc/kernel/traps_64.c
++++ b/arch/sparc/kernel/traps_64.c
+@@ -897,7 +897,7 @@ void __init cheetah_ecache_flush_init(void)
+
+ /* Now allocate error trap reporting scoreboard. */
+ sz = NR_CPUS * (2 * sizeof(struct cheetah_err_info));
+- for (order = 0; order <= MAX_ORDER; order++) {
++ for (order = 0; order < NR_PAGE_ORDERS; order++) {
+ if ((PAGE_SIZE << order) >= sz)
+ break;
+ }
+diff --git a/drivers/gpu/drm/ttm/tests/ttm_device_test.c b/drivers/gpu/drm/ttm/tests/ttm_device_test.c
+index b1b423b68cdf1..19eaff22e6ae0 100644
+--- a/drivers/gpu/drm/ttm/tests/ttm_device_test.c
++++ b/drivers/gpu/drm/ttm/tests/ttm_device_test.c
+@@ -175,7 +175,7 @@ static void ttm_device_init_pools(struct kunit *test)
+
+ if (params->pools_init_expected) {
+ for (int i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
+- for (int j = 0; j <= MAX_ORDER; ++j) {
++ for (int j = 0; j < NR_PAGE_ORDERS; ++j) {
+ pt = pool->caching[i].orders[j];
+ KUNIT_EXPECT_PTR_EQ(test, pt.pool, pool);
+ KUNIT_EXPECT_EQ(test, pt.caching, i);
+diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
+index 9b60222511d65..c8ec6a2cac5d4 100644
+--- a/drivers/gpu/drm/ttm/ttm_pool.c
++++ b/drivers/gpu/drm/ttm/ttm_pool.c
+@@ -65,11 +65,11 @@ module_param(page_pool_size, ulong, 0644);
+
+ static atomic_long_t allocated_pages;
+
+-static struct ttm_pool_type global_write_combined[MAX_ORDER + 1];
+-static struct ttm_pool_type global_uncached[MAX_ORDER + 1];
++static struct ttm_pool_type global_write_combined[NR_PAGE_ORDERS];
++static struct ttm_pool_type global_uncached[NR_PAGE_ORDERS];
+
+-static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER + 1];
+-static struct ttm_pool_type global_dma32_uncached[MAX_ORDER + 1];
++static struct ttm_pool_type global_dma32_write_combined[NR_PAGE_ORDERS];
++static struct ttm_pool_type global_dma32_uncached[NR_PAGE_ORDERS];
+
+ static spinlock_t shrinker_lock;
+ static struct list_head shrinker_list;
+@@ -565,7 +565,7 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
+
+ if (use_dma_alloc || nid != NUMA_NO_NODE) {
+ for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
+- for (j = 0; j <= MAX_ORDER; ++j)
++ for (j = 0; j < NR_PAGE_ORDERS; ++j)
+ ttm_pool_type_init(&pool->caching[i].orders[j],
+ pool, i, j);
+ }
+@@ -586,7 +586,7 @@ void ttm_pool_fini(struct ttm_pool *pool)
+
+ if (pool->use_dma_alloc || pool->nid != NUMA_NO_NODE) {
+ for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
+- for (j = 0; j <= MAX_ORDER; ++j)
++ for (j = 0; j < NR_PAGE_ORDERS; ++j)
+ ttm_pool_type_fini(&pool->caching[i].orders[j]);
+ }
+
+@@ -641,7 +641,7 @@ static void ttm_pool_debugfs_header(struct seq_file *m)
+ unsigned int i;
+
+ seq_puts(m, "\t ");
+- for (i = 0; i <= MAX_ORDER; ++i)
++ for (i = 0; i < NR_PAGE_ORDERS; ++i)
+ seq_printf(m, " ---%2u---", i);
+ seq_puts(m, "\n");
+ }
+@@ -652,7 +652,7 @@ static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt,
+ {
+ unsigned int i;
+
+- for (i = 0; i <= MAX_ORDER; ++i)
++ for (i = 0; i < NR_PAGE_ORDERS; ++i)
+ seq_printf(m, " %8u", ttm_pool_type_count(&pt[i]));
+ seq_puts(m, "\n");
+ }
+@@ -761,7 +761,7 @@ int ttm_pool_mgr_init(unsigned long num_pages)
+ spin_lock_init(&shrinker_lock);
+ INIT_LIST_HEAD(&shrinker_list);
+
+- for (i = 0; i <= MAX_ORDER; ++i) {
++ for (i = 0; i < NR_PAGE_ORDERS; ++i) {
+ ttm_pool_type_init(&global_write_combined[i], NULL,
+ ttm_write_combined, i);
+ ttm_pool_type_init(&global_uncached[i], NULL, ttm_uncached, i);
+@@ -794,7 +794,7 @@ void ttm_pool_mgr_fini(void)
+ {
+ unsigned int i;
+
+- for (i = 0; i <= MAX_ORDER; ++i) {
++ for (i = 0; i < NR_PAGE_ORDERS; ++i) {
+ ttm_pool_type_fini(&global_write_combined[i]);
+ ttm_pool_type_fini(&global_uncached[i]);
+
+diff --git a/include/drm/ttm/ttm_pool.h b/include/drm/ttm/ttm_pool.h
+index 30a347e5aa114..4490d43c63e33 100644
+--- a/include/drm/ttm/ttm_pool.h
++++ b/include/drm/ttm/ttm_pool.h
+@@ -74,7 +74,7 @@ struct ttm_pool {
+ bool use_dma32;
+
+ struct {
+- struct ttm_pool_type orders[MAX_ORDER + 1];
++ struct ttm_pool_type orders[NR_PAGE_ORDERS];
+ } caching[TTM_NUM_CACHING_TYPES];
+ };
+
+diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
+index 0f62786269d0c..1acbc6ce1fe43 100644
+--- a/include/linux/mmzone.h
++++ b/include/linux/mmzone.h
+@@ -34,6 +34,8 @@
+
+ #define IS_MAX_ORDER_ALIGNED(pfn) IS_ALIGNED(pfn, MAX_ORDER_NR_PAGES)
+
++#define NR_PAGE_ORDERS (MAX_ORDER + 1)
++
+ /*
+ * PAGE_ALLOC_COSTLY_ORDER is the order at which allocations are deemed
+ * costly to service. That is between allocation orders which should
+@@ -95,7 +97,7 @@ static inline bool migratetype_is_mergeable(int mt)
+ }
+
+ #define for_each_migratetype_order(order, type) \
+- for (order = 0; order <= MAX_ORDER; order++) \
++ for (order = 0; order < NR_PAGE_ORDERS; order++) \
+ for (type = 0; type < MIGRATE_TYPES; type++)
+
+ extern int page_group_by_mobility_disabled;
+@@ -929,7 +931,7 @@ struct zone {
+ CACHELINE_PADDING(_pad1_);
+
+ /* free areas of different sizes */
+- struct free_area free_area[MAX_ORDER + 1];
++ struct free_area free_area[NR_PAGE_ORDERS];
+
+ #ifdef CONFIG_UNACCEPTED_MEMORY
+ /* Pages to be accepted. All pages on the list are MAX_ORDER */
+diff --git a/kernel/crash_core.c b/kernel/crash_core.c
+index 2f675ef045d40..b685e94605841 100644
+--- a/kernel/crash_core.c
++++ b/kernel/crash_core.c
+@@ -660,7 +660,7 @@ static int __init crash_save_vmcoreinfo_init(void)
+ VMCOREINFO_OFFSET(list_head, prev);
+ VMCOREINFO_OFFSET(vmap_area, va_start);
+ VMCOREINFO_OFFSET(vmap_area, list);
+- VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER + 1);
++ VMCOREINFO_LENGTH(zone.free_area, NR_PAGE_ORDERS);
+ log_buf_vmcoreinfo_setup();
+ VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES);
+ VMCOREINFO_NUMBER(NR_FREE_PAGES);
+diff --git a/lib/test_meminit.c b/lib/test_meminit.c
+index 0ae35223d7733..0dc173849a542 100644
+--- a/lib/test_meminit.c
++++ b/lib/test_meminit.c
+@@ -93,7 +93,7 @@ static int __init test_pages(int *total_failures)
+ int failures = 0, num_tests = 0;
+ int i;
+
+- for (i = 0; i <= MAX_ORDER; i++)
++ for (i = 0; i < NR_PAGE_ORDERS; i++)
+ num_tests += do_alloc_pages_order(i, &failures);
+
+ REPORT_FAILURES_IN_FN();
+diff --git a/mm/compaction.c b/mm/compaction.c
+index 5a3c644c978e2..61c741f11e9bb 100644
+--- a/mm/compaction.c
++++ b/mm/compaction.c
+@@ -2225,7 +2225,7 @@ static enum compact_result __compact_finished(struct compact_control *cc)
+
+ /* Direct compactor: Is a suitable page free? */
+ ret = COMPACT_NO_SUITABLE_PAGE;
+- for (order = cc->order; order <= MAX_ORDER; order++) {
++ for (order = cc->order; order < NR_PAGE_ORDERS; order++) {
+ struct free_area *area = &cc->zone->free_area[order];
+ bool can_steal;
+
+diff --git a/mm/kmsan/init.c b/mm/kmsan/init.c
+index ffedf4dbc49d7..103e2e88ea033 100644
+--- a/mm/kmsan/init.c
++++ b/mm/kmsan/init.c
+@@ -96,7 +96,7 @@ void __init kmsan_init_shadow(void)
+ struct metadata_page_pair {
+ struct page *shadow, *origin;
+ };
+-static struct metadata_page_pair held_back[MAX_ORDER + 1] __initdata;
++static struct metadata_page_pair held_back[NR_PAGE_ORDERS] __initdata;
+
+ /*
+ * Eager metadata allocation. When the memblock allocator is freeing pages to
+diff --git a/mm/page_alloc.c b/mm/page_alloc.c
+index ab71417350127..6b4c30fcae1c9 100644
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -1570,7 +1570,7 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
+ struct page *page;
+
+ /* Find a page of the appropriate size in the preferred list */
+- for (current_order = order; current_order <= MAX_ORDER; ++current_order) {
++ for (current_order = order; current_order < NR_PAGE_ORDERS; ++current_order) {
+ area = &(zone->free_area[current_order]);
+ page = get_page_from_free_area(area, migratetype);
+ if (!page)
+@@ -1940,7 +1940,7 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
+ continue;
+
+ spin_lock_irqsave(&zone->lock, flags);
+- for (order = 0; order <= MAX_ORDER; order++) {
++ for (order = 0; order < NR_PAGE_ORDERS; order++) {
+ struct free_area *area = &(zone->free_area[order]);
+
+ page = get_page_from_free_area(area, MIGRATE_HIGHATOMIC);
+@@ -2050,8 +2050,7 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype,
+ return false;
+
+ find_smallest:
+- for (current_order = order; current_order <= MAX_ORDER;
+- current_order++) {
++ for (current_order = order; current_order < NR_PAGE_ORDERS; current_order++) {
+ area = &(zone->free_area[current_order]);
+ fallback_mt = find_suitable_fallback(area, current_order,
+ start_migratetype, false, &can_steal);
+@@ -2884,7 +2883,7 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
+ return true;
+
+ /* For a high-order request, check at least one suitable page is free */
+- for (o = order; o <= MAX_ORDER; o++) {
++ for (o = order; o < NR_PAGE_ORDERS; o++) {
+ struct free_area *area = &z->free_area[o];
+ int mt;
+
+@@ -6442,7 +6441,7 @@ bool is_free_buddy_page(struct page *page)
+ unsigned long pfn = page_to_pfn(page);
+ unsigned int order;
+
+- for (order = 0; order <= MAX_ORDER; order++) {
++ for (order = 0; order < NR_PAGE_ORDERS; order++) {
+ struct page *page_head = page - (pfn & ((1 << order) - 1));
+
+ if (PageBuddy(page_head) &&
+@@ -6501,7 +6500,7 @@ bool take_page_off_buddy(struct page *page)
+ bool ret = false;
+
+ spin_lock_irqsave(&zone->lock, flags);
+- for (order = 0; order <= MAX_ORDER; order++) {
++ for (order = 0; order < NR_PAGE_ORDERS; order++) {
+ struct page *page_head = page - (pfn & ((1 << order) - 1));
+ int page_order = buddy_order(page_head);
+
+diff --git a/mm/page_reporting.c b/mm/page_reporting.c
+index b021f482a4cb3..66369cc5279bf 100644
+--- a/mm/page_reporting.c
++++ b/mm/page_reporting.c
+@@ -276,7 +276,7 @@ page_reporting_process_zone(struct page_reporting_dev_info *prdev,
+ return err;
+
+ /* Process each free list starting from lowest order/mt */
+- for (order = page_reporting_order; order <= MAX_ORDER; order++) {
++ for (order = page_reporting_order; order < NR_PAGE_ORDERS; order++) {
+ for (mt = 0; mt < MIGRATE_TYPES; mt++) {
+ /* We do not pull pages from the isolate free list */
+ if (is_migrate_isolate(mt))
+diff --git a/mm/show_mem.c b/mm/show_mem.c
+index 4b888b18bddea..b896e54e3a26c 100644
+--- a/mm/show_mem.c
++++ b/mm/show_mem.c
+@@ -355,8 +355,8 @@ static void show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_z
+
+ for_each_populated_zone(zone) {
+ unsigned int order;
+- unsigned long nr[MAX_ORDER + 1], flags, total = 0;
+- unsigned char types[MAX_ORDER + 1];
++ unsigned long nr[NR_PAGE_ORDERS], flags, total = 0;
++ unsigned char types[NR_PAGE_ORDERS];
+
+ if (zone_idx(zone) > max_zone_idx)
+ continue;
+@@ -366,7 +366,7 @@ static void show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_z
+ printk(KERN_CONT "%s: ", zone->name);
+
+ spin_lock_irqsave(&zone->lock, flags);
+- for (order = 0; order <= MAX_ORDER; order++) {
++ for (order = 0; order < NR_PAGE_ORDERS; order++) {
+ struct free_area *area = &zone->free_area[order];
+ int type;
+
+@@ -380,7 +380,7 @@ static void show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_z
+ }
+ }
+ spin_unlock_irqrestore(&zone->lock, flags);
+- for (order = 0; order <= MAX_ORDER; order++) {
++ for (order = 0; order < NR_PAGE_ORDERS; order++) {
+ printk(KERN_CONT "%lu*%lukB ",
+ nr[order], K(1UL) << order);
+ if (nr[order])
+diff --git a/mm/vmstat.c b/mm/vmstat.c
+index 00e81e99c6ee2..e9616c4ca12db 100644
+--- a/mm/vmstat.c
++++ b/mm/vmstat.c
+@@ -1055,7 +1055,7 @@ static void fill_contig_page_info(struct zone *zone,
+ info->free_blocks_total = 0;
+ info->free_blocks_suitable = 0;
+
+- for (order = 0; order <= MAX_ORDER; order++) {
++ for (order = 0; order < NR_PAGE_ORDERS; order++) {
+ unsigned long blocks;
+
+ /*
+@@ -1471,7 +1471,7 @@ static void frag_show_print(struct seq_file *m, pg_data_t *pgdat,
+ int order;
+
+ seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
+- for (order = 0; order <= MAX_ORDER; ++order)
++ for (order = 0; order < NR_PAGE_ORDERS; ++order)
+ /*
+ * Access to nr_free is lockless as nr_free is used only for
+ * printing purposes. Use data_race to avoid KCSAN warning.
+@@ -1500,7 +1500,7 @@ static void pagetypeinfo_showfree_print(struct seq_file *m,
+ pgdat->node_id,
+ zone->name,
+ migratetype_names[mtype]);
+- for (order = 0; order <= MAX_ORDER; ++order) {
++ for (order = 0; order < NR_PAGE_ORDERS; ++order) {
+ unsigned long freecount = 0;
+ struct free_area *area;
+ struct list_head *curr;
+@@ -1540,7 +1540,7 @@ static void pagetypeinfo_showfree(struct seq_file *m, void *arg)
+
+ /* Print header */
+ seq_printf(m, "%-43s ", "Free pages count per migrate type at order");
+- for (order = 0; order <= MAX_ORDER; ++order)
++ for (order = 0; order < NR_PAGE_ORDERS; ++order)
+ seq_printf(m, "%6d ", order);
+ seq_putc(m, '\n');
+
+@@ -2176,7 +2176,7 @@ static void unusable_show_print(struct seq_file *m,
+ seq_printf(m, "Node %d, zone %8s ",
+ pgdat->node_id,
+ zone->name);
+- for (order = 0; order <= MAX_ORDER; ++order) {
++ for (order = 0; order < NR_PAGE_ORDERS; ++order) {
+ fill_contig_page_info(zone, order, &info);
+ index = unusable_free_index(order, &info);
+ seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
+@@ -2228,7 +2228,7 @@ static void extfrag_show_print(struct seq_file *m,
+ seq_printf(m, "Node %d, zone %8s ",
+ pgdat->node_id,
+ zone->name);
+- for (order = 0; order <= MAX_ORDER; ++order) {
++ for (order = 0; order < NR_PAGE_ORDERS; ++order) {
+ fill_contig_page_info(zone, order, &info);
+ index = __fragmentation_index(order, &info);
+ seq_printf(m, "%2d.%03d ", index / 1000, index % 1000);
+--
+2.43.0
+
net-ethernet-ti-am65-cpts-fix-ptpv1-message-type-on-.patch
tls-fix-lockless-read-of-strp-msg_ready-in-poll.patch
af_unix-suppress-false-positive-lockdep-splat-for-sp.patch
+kvm-x86-pmu-zero-out-pmu-metadata-on-amd-if-pmu-is-d.patch
+kvm-x86-pmu-set-enable-bits-for-gp-counters-in-perf_.patch
+mm-gup-explicitly-define-and-check-internal-gup-flag.patch
+mm-madvise-make-madv_populate_-read-write-handle-vm_.patch
+drm-add-drm_gem_object_is_shared_for_memory_stats-he.patch
+drm-amdgpu-add-shared-fdinfo-stats.patch
+drm-amdgpu-fix-visible-vram-handling-during-faults.patch
+mm-treewide-introduce-nr_page_orders.patch
+drm-ttm-stop-pooling-cached-numa-pages-v2.patch
+squashfs-convert-to-new-timestamp-accessors.patch
+squashfs-check-the-inode-number-is-not-the-invalid-v.patch
--- /dev/null
+From 2c44ece9d52e9e48b8d9813cb44beb104cf8e60b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 8 Apr 2024 23:02:06 +0100
+Subject: Squashfs: check the inode number is not the invalid value of zero
+
+From: Phillip Lougher <phillip@squashfs.org.uk>
+
+[ Upstream commit 9253c54e01b6505d348afbc02abaa4d9f8a01395 ]
+
+Syskiller has produced an out of bounds access in fill_meta_index().
+
+That out of bounds access is ultimately caused because the inode
+has an inode number with the invalid value of zero, which was not checked.
+
+The reason this causes the out of bounds access is due to following
+sequence of events:
+
+1. Fill_meta_index() is called to allocate (via empty_meta_index())
+ and fill a metadata index. It however suffers a data read error
+ and aborts, invalidating the newly returned empty metadata index.
+ It does this by setting the inode number of the index to zero,
+ which means unused (zero is not a valid inode number).
+
+2. When fill_meta_index() is subsequently called again on another
+ read operation, locate_meta_index() returns the previous index
+ because it matches the inode number of 0. Because this index
+ has been returned it is expected to have been filled, and because
+ it hasn't been, an out of bounds access is performed.
+
+This patch adds a sanity check which checks that the inode number
+is not zero when the inode is created and returns -EINVAL if it is.
+
+[phillip@squashfs.org.uk: whitespace fix]
+ Link: https://lkml.kernel.org/r/20240409204723.446925-1-phillip@squashfs.org.uk
+Link: https://lkml.kernel.org/r/20240408220206.435788-1-phillip@squashfs.org.uk
+Signed-off-by: Phillip Lougher <phillip@squashfs.org.uk>
+Reported-by: "Ubisectech Sirius" <bugreport@ubisectech.com>
+Closes: https://lore.kernel.org/lkml/87f5c007-b8a5-41ae-8b57-431e924c5915.bugreport@ubisectech.com/
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/squashfs/inode.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c
+index aa3411354e66d..16bd693d0b3aa 100644
+--- a/fs/squashfs/inode.c
++++ b/fs/squashfs/inode.c
+@@ -48,6 +48,10 @@ static int squashfs_new_inode(struct super_block *sb, struct inode *inode,
+ gid_t i_gid;
+ int err;
+
++ inode->i_ino = le32_to_cpu(sqsh_ino->inode_number);
++ if (inode->i_ino == 0)
++ return -EINVAL;
++
+ err = squashfs_get_id(sb, le16_to_cpu(sqsh_ino->uid), &i_uid);
+ if (err)
+ return err;
+@@ -58,7 +62,6 @@ static int squashfs_new_inode(struct super_block *sb, struct inode *inode,
+
+ i_uid_write(inode, i_uid);
+ i_gid_write(inode, i_gid);
+- inode->i_ino = le32_to_cpu(sqsh_ino->inode_number);
+ inode_set_mtime(inode, le32_to_cpu(sqsh_ino->mtime), 0);
+ inode_set_atime(inode, inode_get_mtime_sec(inode), 0);
+ inode_set_ctime(inode, inode_get_mtime_sec(inode), 0);
+--
+2.43.0
+
--- /dev/null
+From 87c95dbeff05ea29654e73ee7059cbf94bc5a227 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 4 Oct 2023 14:52:55 -0400
+Subject: squashfs: convert to new timestamp accessors
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit a1f13ed8c74893ed31d41c5bca156a623b0e9a86 ]
+
+Convert to using the new inode timestamp accessor functions.
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Link: https://lore.kernel.org/r/20231004185347.80880-68-jlayton@kernel.org
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Stable-dep-of: 9253c54e01b6 ("Squashfs: check the inode number is not the invalid value of zero")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/squashfs/inode.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c
+index c6e626b00546b..aa3411354e66d 100644
+--- a/fs/squashfs/inode.c
++++ b/fs/squashfs/inode.c
+@@ -59,9 +59,9 @@ static int squashfs_new_inode(struct super_block *sb, struct inode *inode,
+ i_uid_write(inode, i_uid);
+ i_gid_write(inode, i_gid);
+ inode->i_ino = le32_to_cpu(sqsh_ino->inode_number);
+- inode->i_mtime.tv_sec = le32_to_cpu(sqsh_ino->mtime);
+- inode->i_atime.tv_sec = inode->i_mtime.tv_sec;
+- inode_set_ctime(inode, inode->i_mtime.tv_sec, 0);
++ inode_set_mtime(inode, le32_to_cpu(sqsh_ino->mtime), 0);
++ inode_set_atime(inode, inode_get_mtime_sec(inode), 0);
++ inode_set_ctime(inode, inode_get_mtime_sec(inode), 0);
+ inode->i_mode = le16_to_cpu(sqsh_ino->mode);
+ inode->i_size = 0;
+
+--
+2.43.0
+