6.2-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sat, 22 Apr 2023 16:58:29 +0000 (18:58 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sat, 22 Apr 2023 16:58:29 +0000 (18:58 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 22 Apr 2023 16:58:29 +0000 (18:58 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 22 Apr 2023 16:58:29 +0000 (18:58 +0200)
diff --git a/queue-6.2/drm-amd-display-set-dcn315-lb-bpp-to-48.patch b/queue-6.2/drm-amd-display-set-dcn315-lb-bpp-to-48.patch

new file mode 100644 (file)

index 0000000..4e99b69
--- /dev/null
+++ b/queue-6.2/drm-amd-display-set-dcn315-lb-bpp-to-48.patch
@@ -0,0 +1,34 @@
+From 6d9240c46f7419aa3210353b5f52cc63da5a6440 Mon Sep 17 00:00:00 2001
+From: Dmytro Laktyushkin <Dmytro.Laktyushkin@amd.com>
+Date: Mon, 3 Apr 2023 10:13:12 -0400
+Subject: drm/amd/display: set dcn315 lb bpp to 48
+
+From: Dmytro Laktyushkin <Dmytro.Laktyushkin@amd.com>
+
+commit 6d9240c46f7419aa3210353b5f52cc63da5a6440 upstream.
+
+[Why & How]
+Fix a typo for dcn315 line buffer bpp.
+
+Reviewed-by: Jun Lei <Jun.Lei@amd.com>
+Acked-by: Qingqing Zhuo <qingqing.zhuo@amd.com>
+Signed-off-by: Dmytro Laktyushkin <Dmytro.Laktyushkin@amd.com>
+Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
++++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
+@@ -222,7 +222,7 @@ struct _vcs_dpi_ip_params_st dcn3_15_ip
+       .maximum_dsc_bits_per_component = 10,
+       .dsc422_native_support = false,
+       .is_line_buffer_bpp_fixed = true,
+-      .line_buffer_fixed_bpp = 49,
++      .line_buffer_fixed_bpp = 48,
+       .line_buffer_size_bits = 789504,
+       .max_line_buffer_lines = 12,
+       .writeback_interface_buffer_size_kbytes = 90,
diff --git a/queue-6.2/drm-amdgpu-fix-desktop-freezed-after-gpu-reset.patch b/queue-6.2/drm-amdgpu-fix-desktop-freezed-after-gpu-reset.patch

new file mode 100644 (file)

index 0000000..123a289
--- /dev/null
+++ b/queue-6.2/drm-amdgpu-fix-desktop-freezed-after-gpu-reset.patch
@@ -0,0 +1,101 @@
+From c8b5a95b570949536a2b75cd8fc4f1de0bc60629 Mon Sep 17 00:00:00 2001
+From: Alan Liu <HaoPing.Liu@amd.com>
+Date: Fri, 14 Apr 2023 18:39:52 +0800
+Subject: drm/amdgpu: Fix desktop freezed after gpu-reset
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Alan Liu <HaoPing.Liu@amd.com>
+
+commit c8b5a95b570949536a2b75cd8fc4f1de0bc60629 upstream.
+
+[Why]
+After gpu-reset, sometimes the driver fails to enable vblank irq,
+causing flip_done timed out and the desktop freezed.
+
+During gpu-reset, we disable and enable vblank irq in dm_suspend() and
+dm_resume(). Later on in amdgpu_irq_gpu_reset_resume_helper(), we check
+irqs' refcount and decide to enable or disable the irqs again.
+
+However, we have 2 sets of API for controling vblank irq, one is
+dm_vblank_get/put() and another is amdgpu_irq_get/put(). Each API has
+its own refcount and flag to store the state of vblank irq, and they
+are not synchronized.
+
+In drm we use the first API to control vblank irq but in
+amdgpu_irq_gpu_reset_resume_helper() we use the second set of API.
+
+The failure happens when vblank irq was enabled by dm_vblank_get()
+before gpu-reset, we have vblank->enabled true. However, during
+gpu-reset, in amdgpu_irq_gpu_reset_resume_helper() vblank irq's state
+checked from amdgpu_irq_update() is DISABLED. So finally it disables
+vblank irq again. After gpu-reset, if there is a cursor plane commit,
+the driver will try to enable vblank irq by calling drm_vblank_enable(),
+but the vblank->enabled is still true, so it fails to turn on vblank
+irq and causes flip_done can't be completed in vblank irq handler and
+desktop become freezed.
+
+[How]
+Combining the 2 vblank control APIs by letting drm's API finally calls
+amdgpu_irq's API, so the irq's refcount and state of both APIs can be
+synchronized. Also add a check to prevent refcount from being less then
+0 in amdgpu_irq_put().
+
+v2:
+- Add warning in amdgpu_irq_enable() if the irq is already disabled.
+- Call dc_interrupt_set() in dm_set_vblank() to avoid refcount change
+  if it is in gpu-reset.
+
+v3:
+- Improve commit message and code comments.
+
+Signed-off-by: Alan Liu <HaoPing.Liu@amd.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c                |    3 +++
+ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c |   17 ++++++++++++++---
+ 2 files changed, 17 insertions(+), 3 deletions(-)
+
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+@@ -597,6 +597,9 @@ int amdgpu_irq_put(struct amdgpu_device
+       if (!src->enabled_types || !src->funcs->set)
+               return -EINVAL;
+ 
++      if (WARN_ON(!amdgpu_irq_enabled(adev, src, type)))
++              return -EINVAL;
++
+       if (atomic_dec_and_test(&src->enabled_types[type]))
+               return amdgpu_irq_update(adev, src, type);
+ 
+--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
+@@ -170,10 +170,21 @@ static inline int dm_set_vblank(struct d
+       if (rc)
+               return rc;
+ 
+-      irq_source = IRQ_TYPE_VBLANK + acrtc->otg_inst;
++      if (amdgpu_in_reset(adev)) {
++              irq_source = IRQ_TYPE_VBLANK + acrtc->otg_inst;
++              /* During gpu-reset we disable and then enable vblank irq, so
++               * don't use amdgpu_irq_get/put() to avoid refcount change.
++               */
++              if (!dc_interrupt_set(adev->dm.dc, irq_source, enable))
++                      rc = -EBUSY;
++      } else {
++              rc = (enable)
++                      ? amdgpu_irq_get(adev, &adev->crtc_irq, acrtc->crtc_id)
++                      : amdgpu_irq_put(adev, &adev->crtc_irq, acrtc->crtc_id);
++      }
+ 
+-      if (!dc_interrupt_set(adev->dm.dc, irq_source, enable))
+-              return -EBUSY;
++      if (rc)
++              return rc;
+ 
+ skip:
+       if (amdgpu_in_reset(adev))
diff --git a/queue-6.2/drm-i915-fix-fast-wake-aux-sync-len.patch b/queue-6.2/drm-i915-fix-fast-wake-aux-sync-len.patch

new file mode 100644 (file)

index 0000000..1d223aa
--- /dev/null
+++ b/queue-6.2/drm-i915-fix-fast-wake-aux-sync-len.patch
@@ -0,0 +1,41 @@
+From e1c71f8f918047ce822dc19b42ab1261ed259fd1 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
+Date: Wed, 29 Mar 2023 20:24:33 +0300
+Subject: drm/i915: Fix fast wake AUX sync len
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Ville Syrjälä <ville.syrjala@linux.intel.com>
+
+commit e1c71f8f918047ce822dc19b42ab1261ed259fd1 upstream.
+
+Fast wake should use 8 SYNC pulses for the preamble
+and 10-16 SYNC pulses for the precharge. Reduce our
+fast wake SYNC count to match the maximum value.
+We also use the maximum precharge length for normal
+AUX transactions.
+
+Cc: stable@vger.kernel.org
+Cc: Jouni Högander <jouni.hogander@intel.com>
+Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230329172434.18744-1-ville.syrjala@linux.intel.com
+Reviewed-by: Jouni Högander <jouni.hogander@intel.com>
+(cherry picked from commit 605f7c73133341d4b762cbd9a22174cc22d4c38b)
+Signed-off-by: Jani Nikula <jani.nikula@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/display/intel_dp_aux.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/i915/display/intel_dp_aux.c
++++ b/drivers/gpu/drm/i915/display/intel_dp_aux.c
+@@ -166,7 +166,7 @@ static u32 skl_get_aux_send_ctl(struct i
+             DP_AUX_CH_CTL_TIME_OUT_MAX |
+             DP_AUX_CH_CTL_RECEIVE_ERROR |
+             (send_bytes << DP_AUX_CH_CTL_MESSAGE_SIZE_SHIFT) |
+-            DP_AUX_CH_CTL_FW_SYNC_PULSE_SKL(32) |
++            DP_AUX_CH_CTL_FW_SYNC_PULSE_SKL(24) |
+             DP_AUX_CH_CTL_SYNC_PULSE_SKL(32);
+ 
+       if (intel_tc_port_in_tbt_alt_mode(dig_port))
diff --git a/queue-6.2/drm-rockchip-vop2-fix-suspend-resume.patch b/queue-6.2/drm-rockchip-vop2-fix-suspend-resume.patch

new file mode 100644 (file)

index 0000000..bbf1d36
--- /dev/null
+++ b/queue-6.2/drm-rockchip-vop2-fix-suspend-resume.patch
@@ -0,0 +1,52 @@
+From afa965a45e01e541cdbe5c8018226eff117610f0 Mon Sep 17 00:00:00 2001
+From: Sascha Hauer <s.hauer@pengutronix.de>
+Date: Thu, 13 Apr 2023 16:43:47 +0200
+Subject: drm/rockchip: vop2: fix suspend/resume
+
+From: Sascha Hauer <s.hauer@pengutronix.de>
+
+commit afa965a45e01e541cdbe5c8018226eff117610f0 upstream.
+
+During a suspend/resume cycle the VO power domain will be disabled and
+the VOP2 registers will reset to their default values. After that the
+cached register values will be out of sync and the read/modify/write
+operations we do on the window registers will result in bogus values
+written. Fix this by re-initializing the register cache each time we
+enable the VOP2. With this the VOP2 will show a picture after a
+suspend/resume cycle whereas without this the screen stays dark.
+
+Fixes: 604be85547ce4 ("drm/rockchip: Add VOP2 driver")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
+Tested-by: Chris Morgan <macromorgan@hotmail.com>
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230413144347.3506023-1-s.hauer@pengutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/rockchip/rockchip_drm_vop2.c |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+@@ -216,6 +216,8 @@ struct vop2 {
+       struct vop2_win win[];
+ };
+ 
++static const struct regmap_config vop2_regmap_config;
++
+ static struct vop2_video_port *to_vop2_video_port(struct drm_crtc *crtc)
+ {
+       return container_of(crtc, struct vop2_video_port, crtc);
+@@ -840,6 +842,12 @@ static void vop2_enable(struct vop2 *vop
+               return;
+       }
+ 
++      ret = regmap_reinit_cache(vop2->map, &vop2_regmap_config);
++      if (ret) {
++              drm_err(vop2->drm, "failed to reinit cache: %d\n", ret);
++              return;
++      }
++
+       if (vop2->data->soc_id == 3566)
+               vop2_writel(vop2, RK3568_OTP_WIN_EN, 1);
+ 
diff --git a/queue-6.2/drm-rockchip-vop2-use-regcache_sync-to-fix-suspend-resume.patch b/queue-6.2/drm-rockchip-vop2-use-regcache_sync-to-fix-suspend-resume.patch

new file mode 100644 (file)

index 0000000..e06c4af
--- /dev/null
+++ b/queue-6.2/drm-rockchip-vop2-use-regcache_sync-to-fix-suspend-resume.patch
@@ -0,0 +1,58 @@
+From b63a553e8f5aa6574eeb535a551817a93c426d8c Mon Sep 17 00:00:00 2001
+From: Sascha Hauer <s.hauer@pengutronix.de>
+Date: Mon, 17 Apr 2023 14:37:47 +0200
+Subject: drm/rockchip: vop2: Use regcache_sync() to fix suspend/resume
+
+From: Sascha Hauer <s.hauer@pengutronix.de>
+
+commit b63a553e8f5aa6574eeb535a551817a93c426d8c upstream.
+
+afa965a45e01 ("drm/rockchip: vop2: fix suspend/resume") uses
+regmap_reinit_cache() to fix the suspend/resume issue with the VOP2
+driver. During discussion it came up that we should rather use
+regcache_sync() instead. As the original patch is already applied
+fix this up in this follow-up patch.
+
+Fixes: afa965a45e01 ("drm/rockchip: vop2: fix suspend/resume")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230417123747.2179695-1-s.hauer@pengutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/rockchip/rockchip_drm_vop2.c |   10 +++-------
+ 1 file changed, 3 insertions(+), 7 deletions(-)
+
+--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+@@ -216,8 +216,6 @@ struct vop2 {
+       struct vop2_win win[];
+ };
+ 
+-static const struct regmap_config vop2_regmap_config;
+-
+ static struct vop2_video_port *to_vop2_video_port(struct drm_crtc *crtc)
+ {
+       return container_of(crtc, struct vop2_video_port, crtc);
+@@ -842,11 +840,7 @@ static void vop2_enable(struct vop2 *vop
+               return;
+       }
+ 
+-      ret = regmap_reinit_cache(vop2->map, &vop2_regmap_config);
+-      if (ret) {
+-              drm_err(vop2->drm, "failed to reinit cache: %d\n", ret);
+-              return;
+-      }
++      regcache_sync(vop2->map);
+ 
+       if (vop2->data->soc_id == 3566)
+               vop2_writel(vop2, RK3568_OTP_WIN_EN, 1);
+@@ -876,6 +870,8 @@ static void vop2_disable(struct vop2 *vo
+ 
+       pm_runtime_put_sync(vop2->dev);
+ 
++      regcache_mark_dirty(vop2->map);
++
+       clk_disable_unprepare(vop2->aclk);
+       clk_disable_unprepare(vop2->hclk);
+ }
diff --git a/queue-6.2/mm-fix-memory-leak-on-mm_init-error-handling.patch b/queue-6.2/mm-fix-memory-leak-on-mm_init-error-handling.patch

new file mode 100644 (file)

index 0000000..21ada29
--- /dev/null
+++ b/queue-6.2/mm-fix-memory-leak-on-mm_init-error-handling.patch
@@ -0,0 +1,42 @@
+From b20b0368c614c609badfe16fbd113dfb4780acd9 Mon Sep 17 00:00:00 2001
+From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Date: Thu, 30 Mar 2023 09:38:22 -0400
+Subject: mm: fix memory leak on mm_init error handling
+
+From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+
+commit b20b0368c614c609badfe16fbd113dfb4780acd9 upstream.
+
+commit f1a7941243c1 ("mm: convert mm's rss stats into percpu_counter")
+introduces a memory leak by missing a call to destroy_context() when a
+percpu_counter fails to allocate.
+
+Before introducing the per-cpu counter allocations, init_new_context() was
+the last call that could fail in mm_init(), and thus there was no need to
+ever invoke destroy_context() in the error paths.  Adding the following
+percpu counter allocations adds error paths after init_new_context(),
+which means its associated destroy_context() needs to be called when
+percpu counters fail to allocate.
+
+Link: https://lkml.kernel.org/r/20230330133822.66271-1-mathieu.desnoyers@efficios.com
+Fixes: f1a7941243c1 ("mm: convert mm's rss stats into percpu_counter")
+Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Acked-by: Shakeel Butt <shakeelb@google.com>
+Cc: Marek Szyprowski <m.szyprowski@samsung.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/fork.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -1177,6 +1177,7 @@ static struct mm_struct *mm_init(struct
+ fail_pcpu:
+       while (i > 0)
+               percpu_counter_destroy(&mm->rss_stat[--i]);
++      destroy_context(mm);
+ fail_nocontext:
+       mm_free_pgd(mm);
+ fail_nopgd:
diff --git a/queue-6.2/mm-huge_memory.c-warn-with-pr_warn_ratelimited-instead-of-vm_warn_on_once_folio.patch b/queue-6.2/mm-huge_memory.c-warn-with-pr_warn_ratelimited-instead-of-vm_warn_on_once_folio.patch

new file mode 100644 (file)

index 0000000..adb59e6
--- /dev/null
+++ b/queue-6.2/mm-huge_memory.c-warn-with-pr_warn_ratelimited-instead-of-vm_warn_on_once_folio.patch
@@ -0,0 +1,48 @@
+From 4737edbbdd4958ae29ca6a310a6a2fa4e0684b01 Mon Sep 17 00:00:00 2001
+From: Naoya Horiguchi <naoya.horiguchi@nec.com>
+Date: Thu, 6 Apr 2023 17:20:04 +0900
+Subject: mm/huge_memory.c: warn with pr_warn_ratelimited instead of VM_WARN_ON_ONCE_FOLIO
+
+From: Naoya Horiguchi <naoya.horiguchi@nec.com>
+
+commit 4737edbbdd4958ae29ca6a310a6a2fa4e0684b01 upstream.
+
+split_huge_page_to_list() WARNs when called for huge zero pages, which
+sounds to me too harsh because it does not imply a kernel bug, but just
+notifies the event to admins.  On the other hand, this is considered as
+critical by syzkaller and makes its testing less efficient, which seems to
+me harmful.
+
+So replace the VM_WARN_ON_ONCE_FOLIO with pr_warn_ratelimited.
+
+Link: https://lkml.kernel.org/r/20230406082004.2185420-1-naoya.horiguchi@linux.dev
+Fixes: 478d134e9506 ("mm/huge_memory: do not overkill when splitting huge_zero_page")
+Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
+Reported-by: syzbot+07a218429c8d19b1fb25@syzkaller.appspotmail.com
+  Link: https://lore.kernel.org/lkml/000000000000a6f34a05e6efcd01@google.com/
+Reviewed-by: Yang Shi <shy828301@gmail.com>
+Cc: Miaohe Lin <linmiaohe@huawei.com>
+Cc: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
+Cc: Xu Yu <xuyu@linux.alibaba.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/huge_memory.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -2674,9 +2674,10 @@ int split_huge_page_to_list(struct page
+       VM_BUG_ON_FOLIO(!folio_test_large(folio), folio);
+ 
+       is_hzp = is_huge_zero_page(&folio->page);
+-      VM_WARN_ON_ONCE_FOLIO(is_hzp, folio);
+-      if (is_hzp)
++      if (is_hzp) {
++              pr_warn_ratelimited("Called split_huge_page for huge zero page\n");
+               return -EBUSY;
++      }
+ 
+       if (folio_test_writeback(folio))
+               return -EBUSY;
diff --git a/queue-6.2/mm-khugepaged-check-again-on-anon-uffd-wp-during-isolation.patch b/queue-6.2/mm-khugepaged-check-again-on-anon-uffd-wp-during-isolation.patch

new file mode 100644 (file)

index 0000000..66d0a21
--- /dev/null
+++ b/queue-6.2/mm-khugepaged-check-again-on-anon-uffd-wp-during-isolation.patch
@@ -0,0 +1,60 @@
+From dd47ac428c3f5f3bcabe845f36be870fe6c20784 Mon Sep 17 00:00:00 2001
+From: Peter Xu <peterx@redhat.com>
+Date: Wed, 5 Apr 2023 11:51:20 -0400
+Subject: mm/khugepaged: check again on anon uffd-wp during isolation
+
+From: Peter Xu <peterx@redhat.com>
+
+commit dd47ac428c3f5f3bcabe845f36be870fe6c20784 upstream.
+
+Khugepaged collapse an anonymous thp in two rounds of scans.  The 2nd
+round done in __collapse_huge_page_isolate() after
+hpage_collapse_scan_pmd(), during which all the locks will be released
+temporarily.  It means the pgtable can change during this phase before 2nd
+round starts.
+
+It's logically possible some ptes got wr-protected during this phase, and
+we can errornously collapse a thp without noticing some ptes are
+wr-protected by userfault.  e1e267c7928f wanted to avoid it but it only
+did that for the 1st phase, not the 2nd phase.
+
+Since __collapse_huge_page_isolate() happens after a round of small page
+swapins, we don't need to worry on any !present ptes - if it existed
+khugepaged will already bail out.  So we only need to check present ptes
+with uffd-wp bit set there.
+
+This is something I found only but never had a reproducer, I thought it
+was one caused a bug in Muhammad's recent pagemap new ioctl work, but it
+turns out it's not the cause of that but an userspace bug.  However this
+seems to still be a real bug even with a very small race window, still
+worth to have it fixed and copy stable.
+
+Link: https://lkml.kernel.org/r/20230405155120.3608140-1-peterx@redhat.com
+Fixes: e1e267c7928f ("khugepaged: skip collapse if uffd-wp detected")
+Signed-off-by: Peter Xu <peterx@redhat.com>
+Reviewed-by: David Hildenbrand <david@redhat.com>
+Reviewed-by: Yang Shi <shy828301@gmail.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Axel Rasmussen <axelrasmussen@google.com>
+Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
+Cc: Nadav Amit <nadav.amit@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/khugepaged.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/mm/khugepaged.c
++++ b/mm/khugepaged.c
+@@ -561,6 +561,10 @@ static int __collapse_huge_page_isolate(
+                       result = SCAN_PTE_NON_PRESENT;
+                       goto out;
+               }
++              if (pte_uffd_wp(pteval)) {
++                      result = SCAN_PTE_UFFD_WP;
++                      goto out;
++              }
+               page = vm_normal_page(vma, address, pteval);
+               if (unlikely(!page) || unlikely(is_zone_device_page(page))) {
+                       result = SCAN_PAGE_NULL;
diff --git a/queue-6.2/mm-kmsan-handle-alloc-failures-in-kmsan_ioremap_page_range.patch b/queue-6.2/mm-kmsan-handle-alloc-failures-in-kmsan_ioremap_page_range.patch

new file mode 100644 (file)

index 0000000..5e4d846
--- /dev/null
+++ b/queue-6.2/mm-kmsan-handle-alloc-failures-in-kmsan_ioremap_page_range.patch
@@ -0,0 +1,170 @@
+From fdea03e12aa2a44a7bb34144208be97fc25dfd90 Mon Sep 17 00:00:00 2001
+From: Alexander Potapenko <glider@google.com>
+Date: Thu, 13 Apr 2023 15:12:21 +0200
+Subject: mm: kmsan: handle alloc failures in kmsan_ioremap_page_range()
+
+From: Alexander Potapenko <glider@google.com>
+
+commit fdea03e12aa2a44a7bb34144208be97fc25dfd90 upstream.
+
+Similarly to kmsan_vmap_pages_range_noflush(), kmsan_ioremap_page_range()
+must also properly handle allocation/mapping failures.  In the case of
+such, it must clean up the already created metadata mappings and return an
+error code, so that the error can be propagated to ioremap_page_range().
+Without doing so, KMSAN may silently fail to bring the metadata for the
+page range into a consistent state, which will result in user-visible
+crashes when trying to access them.
+
+Link: https://lkml.kernel.org/r/20230413131223.4135168-2-glider@google.com
+Fixes: b073d7f8aee4 ("mm: kmsan: maintain KMSAN metadata for page operations")
+Signed-off-by: Alexander Potapenko <glider@google.com>
+Reported-by: Dipanjan Das <mail.dipanjan.das@gmail.com>
+  Link: https://lore.kernel.org/linux-mm/CANX2M5ZRrRA64k0hOif02TjmY9kbbO2aCBPyq79es34RXZ=cAw@mail.gmail.com/
+Reviewed-by: Marco Elver <elver@google.com>
+Cc: Christoph Hellwig <hch@infradead.org>
+Cc: Dmitry Vyukov <dvyukov@google.com>
+Cc: Uladzislau Rezki (Sony) <urezki@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/kmsan.h |   19 +++++++++--------
+ mm/kmsan/hooks.c      |   55 ++++++++++++++++++++++++++++++++++++++++++--------
+ mm/vmalloc.c          |    4 +--
+ 3 files changed, 59 insertions(+), 19 deletions(-)
+
+--- a/include/linux/kmsan.h
++++ b/include/linux/kmsan.h
+@@ -159,11 +159,12 @@ void kmsan_vunmap_range_noflush(unsigned
+  * @page_shift:       page_shift argument passed to vmap_range_noflush().
+  *
+  * KMSAN creates new metadata pages for the physical pages mapped into the
+- * virtual memory.
++ * virtual memory. Returns 0 on success, callers must check for non-zero return
++ * value.
+  */
+-void kmsan_ioremap_page_range(unsigned long addr, unsigned long end,
+-                            phys_addr_t phys_addr, pgprot_t prot,
+-                            unsigned int page_shift);
++int kmsan_ioremap_page_range(unsigned long addr, unsigned long end,
++                           phys_addr_t phys_addr, pgprot_t prot,
++                           unsigned int page_shift);
+ 
+ /**
+  * kmsan_iounmap_page_range() - Notify KMSAN about a iounmap_page_range() call.
+@@ -294,12 +295,12 @@ static inline void kmsan_vunmap_range_no
+ {
+ }
+ 
+-static inline void kmsan_ioremap_page_range(unsigned long start,
+-                                          unsigned long end,
+-                                          phys_addr_t phys_addr,
+-                                          pgprot_t prot,
+-                                          unsigned int page_shift)
++static inline int kmsan_ioremap_page_range(unsigned long start,
++                                         unsigned long end,
++                                         phys_addr_t phys_addr, pgprot_t prot,
++                                         unsigned int page_shift)
+ {
++      return 0;
+ }
+ 
+ static inline void kmsan_iounmap_page_range(unsigned long start,
+--- a/mm/kmsan/hooks.c
++++ b/mm/kmsan/hooks.c
+@@ -148,35 +148,74 @@ void kmsan_vunmap_range_noflush(unsigned
+  * into the virtual memory. If those physical pages already had shadow/origin,
+  * those are ignored.
+  */
+-void kmsan_ioremap_page_range(unsigned long start, unsigned long end,
+-                            phys_addr_t phys_addr, pgprot_t prot,
+-                            unsigned int page_shift)
++int kmsan_ioremap_page_range(unsigned long start, unsigned long end,
++                           phys_addr_t phys_addr, pgprot_t prot,
++                           unsigned int page_shift)
+ {
+       gfp_t gfp_mask = GFP_KERNEL | __GFP_ZERO;
+       struct page *shadow, *origin;
+       unsigned long off = 0;
+-      int nr;
++      int nr, err = 0, clean = 0, mapped;
+ 
+       if (!kmsan_enabled || kmsan_in_runtime())
+-              return;
++              return 0;
+ 
+       nr = (end - start) / PAGE_SIZE;
+       kmsan_enter_runtime();
+-      for (int i = 0; i < nr; i++, off += PAGE_SIZE) {
++      for (int i = 0; i < nr; i++, off += PAGE_SIZE, clean = i) {
+               shadow = alloc_pages(gfp_mask, 1);
+               origin = alloc_pages(gfp_mask, 1);
+-              __vmap_pages_range_noflush(
++              if (!shadow || !origin) {
++                      err = -ENOMEM;
++                      goto ret;
++              }
++              mapped = __vmap_pages_range_noflush(
+                       vmalloc_shadow(start + off),
+                       vmalloc_shadow(start + off + PAGE_SIZE), prot, &shadow,
+                       PAGE_SHIFT);
+-              __vmap_pages_range_noflush(
++              if (mapped) {
++                      err = mapped;
++                      goto ret;
++              }
++              shadow = NULL;
++              mapped = __vmap_pages_range_noflush(
+                       vmalloc_origin(start + off),
+                       vmalloc_origin(start + off + PAGE_SIZE), prot, &origin,
+                       PAGE_SHIFT);
++              if (mapped) {
++                      __vunmap_range_noflush(
++                              vmalloc_shadow(start + off),
++                              vmalloc_shadow(start + off + PAGE_SIZE));
++                      err = mapped;
++                      goto ret;
++              }
++              origin = NULL;
++      }
++      /* Page mapping loop finished normally, nothing to clean up. */
++      clean = 0;
++
++ret:
++      if (clean > 0) {
++              /*
++               * Something went wrong. Clean up shadow/origin pages allocated
++               * on the last loop iteration, then delete mappings created
++               * during the previous iterations.
++               */
++              if (shadow)
++                      __free_pages(shadow, 1);
++              if (origin)
++                      __free_pages(origin, 1);
++              __vunmap_range_noflush(
++                      vmalloc_shadow(start),
++                      vmalloc_shadow(start + clean * PAGE_SIZE));
++              __vunmap_range_noflush(
++                      vmalloc_origin(start),
++                      vmalloc_origin(start + clean * PAGE_SIZE));
+       }
+       flush_cache_vmap(vmalloc_shadow(start), vmalloc_shadow(end));
+       flush_cache_vmap(vmalloc_origin(start), vmalloc_origin(end));
+       kmsan_leave_runtime();
++      return err;
+ }
+ 
+ void kmsan_iounmap_page_range(unsigned long start, unsigned long end)
+--- a/mm/vmalloc.c
++++ b/mm/vmalloc.c
+@@ -324,8 +324,8 @@ int ioremap_page_range(unsigned long add
+                                ioremap_max_page_shift);
+       flush_cache_vmap(addr, end);
+       if (!err)
+-              kmsan_ioremap_page_range(addr, end, phys_addr, prot,
+-                                       ioremap_max_page_shift);
++              err = kmsan_ioremap_page_range(addr, end, phys_addr, prot,
++                                             ioremap_max_page_shift);
+       return err;
+ }
+ 
diff --git a/queue-6.2/mm-kmsan-handle-alloc-failures-in-kmsan_vmap_pages_range_noflush.patch b/queue-6.2/mm-kmsan-handle-alloc-failures-in-kmsan_vmap_pages_range_noflush.patch

new file mode 100644 (file)

index 0000000..acde6c5
--- /dev/null
+++ b/queue-6.2/mm-kmsan-handle-alloc-failures-in-kmsan_vmap_pages_range_noflush.patch
@@ -0,0 +1,160 @@
+From 47ebd0310e89c087f56e58c103c44b72a2f6b216 Mon Sep 17 00:00:00 2001
+From: Alexander Potapenko <glider@google.com>
+Date: Thu, 13 Apr 2023 15:12:20 +0200
+Subject: mm: kmsan: handle alloc failures in kmsan_vmap_pages_range_noflush()
+
+From: Alexander Potapenko <glider@google.com>
+
+commit 47ebd0310e89c087f56e58c103c44b72a2f6b216 upstream.
+
+As reported by Dipanjan Das, when KMSAN is used together with kernel fault
+injection (or, generally, even without the latter), calls to kcalloc() or
+__vmap_pages_range_noflush() may fail, leaving the metadata mappings for
+the virtual mapping in an inconsistent state.  When these metadata
+mappings are accessed later, the kernel crashes.
+
+To address the problem, we return a non-zero error code from
+kmsan_vmap_pages_range_noflush() in the case of any allocation/mapping
+failure inside it, and make vmap_pages_range_noflush() return an error if
+KMSAN fails to allocate the metadata.
+
+This patch also removes KMSAN_WARN_ON() from vmap_pages_range_noflush(),
+as these allocation failures are not fatal anymore.
+
+Link: https://lkml.kernel.org/r/20230413131223.4135168-1-glider@google.com
+Fixes: b073d7f8aee4 ("mm: kmsan: maintain KMSAN metadata for page operations")
+Signed-off-by: Alexander Potapenko <glider@google.com>
+Reported-by: Dipanjan Das <mail.dipanjan.das@gmail.com>
+  Link: https://lore.kernel.org/linux-mm/CANX2M5ZRrRA64k0hOif02TjmY9kbbO2aCBPyq79es34RXZ=cAw@mail.gmail.com/
+Reviewed-by: Marco Elver <elver@google.com>
+Cc: Christoph Hellwig <hch@infradead.org>
+Cc: Dmitry Vyukov <dvyukov@google.com>
+Cc: Uladzislau Rezki (Sony) <urezki@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/kmsan.h |   20 +++++++++++---------
+ mm/kmsan/shadow.c     |   27 ++++++++++++++++++---------
+ mm/vmalloc.c          |    6 +++++-
+ 3 files changed, 34 insertions(+), 19 deletions(-)
+
+--- a/include/linux/kmsan.h
++++ b/include/linux/kmsan.h
+@@ -134,11 +134,12 @@ void kmsan_kfree_large(const void *ptr);
+  * @page_shift:       page_shift passed to vmap_range_noflush().
+  *
+  * KMSAN maps shadow and origin pages of @pages into contiguous ranges in
+- * vmalloc metadata address range.
++ * vmalloc metadata address range. Returns 0 on success, callers must check
++ * for non-zero return value.
+  */
+-void kmsan_vmap_pages_range_noflush(unsigned long start, unsigned long end,
+-                                  pgprot_t prot, struct page **pages,
+-                                  unsigned int page_shift);
++int kmsan_vmap_pages_range_noflush(unsigned long start, unsigned long end,
++                                 pgprot_t prot, struct page **pages,
++                                 unsigned int page_shift);
+ 
+ /**
+  * kmsan_vunmap_kernel_range_noflush() - Notify KMSAN about a vunmap.
+@@ -282,12 +283,13 @@ static inline void kmsan_kfree_large(con
+ {
+ }
+ 
+-static inline void kmsan_vmap_pages_range_noflush(unsigned long start,
+-                                                unsigned long end,
+-                                                pgprot_t prot,
+-                                                struct page **pages,
+-                                                unsigned int page_shift)
++static inline int kmsan_vmap_pages_range_noflush(unsigned long start,
++                                               unsigned long end,
++                                               pgprot_t prot,
++                                               struct page **pages,
++                                               unsigned int page_shift)
+ {
++      return 0;
+ }
+ 
+ static inline void kmsan_vunmap_range_noflush(unsigned long start,
+--- a/mm/kmsan/shadow.c
++++ b/mm/kmsan/shadow.c
+@@ -216,27 +216,29 @@ void kmsan_free_page(struct page *page,
+       kmsan_leave_runtime();
+ }
+ 
+-void kmsan_vmap_pages_range_noflush(unsigned long start, unsigned long end,
+-                                  pgprot_t prot, struct page **pages,
+-                                  unsigned int page_shift)
++int kmsan_vmap_pages_range_noflush(unsigned long start, unsigned long end,
++                                 pgprot_t prot, struct page **pages,
++                                 unsigned int page_shift)
+ {
+       unsigned long shadow_start, origin_start, shadow_end, origin_end;
+       struct page **s_pages, **o_pages;
+-      int nr, mapped;
++      int nr, mapped, err = 0;
+ 
+       if (!kmsan_enabled)
+-              return;
++              return 0;
+ 
+       shadow_start = vmalloc_meta((void *)start, KMSAN_META_SHADOW);
+       shadow_end = vmalloc_meta((void *)end, KMSAN_META_SHADOW);
+       if (!shadow_start)
+-              return;
++              return 0;
+ 
+       nr = (end - start) / PAGE_SIZE;
+       s_pages = kcalloc(nr, sizeof(*s_pages), GFP_KERNEL);
+       o_pages = kcalloc(nr, sizeof(*o_pages), GFP_KERNEL);
+-      if (!s_pages || !o_pages)
++      if (!s_pages || !o_pages) {
++              err = -ENOMEM;
+               goto ret;
++      }
+       for (int i = 0; i < nr; i++) {
+               s_pages[i] = shadow_page_for(pages[i]);
+               o_pages[i] = origin_page_for(pages[i]);
+@@ -249,10 +251,16 @@ void kmsan_vmap_pages_range_noflush(unsi
+       kmsan_enter_runtime();
+       mapped = __vmap_pages_range_noflush(shadow_start, shadow_end, prot,
+                                           s_pages, page_shift);
+-      KMSAN_WARN_ON(mapped);
++      if (mapped) {
++              err = mapped;
++              goto ret;
++      }
+       mapped = __vmap_pages_range_noflush(origin_start, origin_end, prot,
+                                           o_pages, page_shift);
+-      KMSAN_WARN_ON(mapped);
++      if (mapped) {
++              err = mapped;
++              goto ret;
++      }
+       kmsan_leave_runtime();
+       flush_tlb_kernel_range(shadow_start, shadow_end);
+       flush_tlb_kernel_range(origin_start, origin_end);
+@@ -262,6 +270,7 @@ void kmsan_vmap_pages_range_noflush(unsi
+ ret:
+       kfree(s_pages);
+       kfree(o_pages);
++      return err;
+ }
+ 
+ /* Allocate metadata for pages allocated at boot time. */
+--- a/mm/vmalloc.c
++++ b/mm/vmalloc.c
+@@ -616,7 +616,11 @@ int __vmap_pages_range_noflush(unsigned
+ int vmap_pages_range_noflush(unsigned long addr, unsigned long end,
+               pgprot_t prot, struct page **pages, unsigned int page_shift)
+ {
+-      kmsan_vmap_pages_range_noflush(addr, end, prot, pages, page_shift);
++      int ret = kmsan_vmap_pages_range_noflush(addr, end, prot, pages,
++                                               page_shift);
++
++      if (ret)
++              return ret;
+       return __vmap_pages_range_noflush(addr, end, prot, pages, page_shift);
+ }
+ 
diff --git a/queue-6.2/mm-mmap-regression-fix-for-unmapped_area-_topdown.patch b/queue-6.2/mm-mmap-regression-fix-for-unmapped_area-_topdown.patch

new file mode 100644 (file)

index 0000000..15b49e3
--- /dev/null
+++ b/queue-6.2/mm-mmap-regression-fix-for-unmapped_area-_topdown.patch
@@ -0,0 +1,113 @@
+From 58c5d0d6d522112577c7eeb71d382ea642ed7be4 Mon Sep 17 00:00:00 2001
+From: "Liam R. Howlett" <Liam.Howlett@oracle.com>
+Date: Fri, 14 Apr 2023 14:59:19 -0400
+Subject: mm/mmap: regression fix for unmapped_area{_topdown}
+
+From: Liam R. Howlett <Liam.Howlett@oracle.com>
+
+commit 58c5d0d6d522112577c7eeb71d382ea642ed7be4 upstream.
+
+The maple tree limits the gap returned to a window that specifically fits
+what was asked.  This may not be optimal in the case of switching search
+directions or a gap that does not satisfy the requested space for other
+reasons.  Fix the search by retrying the operation and limiting the search
+window in the rare occasion that a conflict occurs.
+
+Link: https://lkml.kernel.org/r/20230414185919.4175572-1-Liam.Howlett@oracle.com
+Fixes: 3499a13168da ("mm/mmap: use maple tree for unmapped_area{_topdown}")
+Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com>
+Reported-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/mmap.c |   48 +++++++++++++++++++++++++++++++++++++++++++-----
+ 1 file changed, 43 insertions(+), 5 deletions(-)
+
+--- a/mm/mmap.c
++++ b/mm/mmap.c
+@@ -1565,7 +1565,8 @@ static inline int accountable_mapping(st
+  */
+ static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
+ {
+-      unsigned long length, gap;
++      unsigned long length, gap, low_limit;
++      struct vm_area_struct *tmp;
+ 
+       MA_STATE(mas, &current->mm->mm_mt, 0, 0);
+ 
+@@ -1574,12 +1575,29 @@ static unsigned long unmapped_area(struc
+       if (length < info->length)
+               return -ENOMEM;
+ 
+-      if (mas_empty_area(&mas, info->low_limit, info->high_limit - 1,
+-                                length))
++      low_limit = info->low_limit;
++retry:
++      if (mas_empty_area(&mas, low_limit, info->high_limit - 1, length))
+               return -ENOMEM;
+ 
+       gap = mas.index;
+       gap += (info->align_offset - gap) & info->align_mask;
++      tmp = mas_next(&mas, ULONG_MAX);
++      if (tmp && (tmp->vm_flags & VM_GROWSDOWN)) { /* Avoid prev check if possible */
++              if (vm_start_gap(tmp) < gap + length - 1) {
++                      low_limit = tmp->vm_end;
++                      mas_reset(&mas);
++                      goto retry;
++              }
++      } else {
++              tmp = mas_prev(&mas, 0);
++              if (tmp && vm_end_gap(tmp) > gap) {
++                      low_limit = vm_end_gap(tmp);
++                      mas_reset(&mas);
++                      goto retry;
++              }
++      }
++
+       return gap;
+ }
+ 
+@@ -1595,7 +1613,8 @@ static unsigned long unmapped_area(struc
+  */
+ static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
+ {
+-      unsigned long length, gap;
++      unsigned long length, gap, high_limit, gap_end;
++      struct vm_area_struct *tmp;
+ 
+       MA_STATE(mas, &current->mm->mm_mt, 0, 0);
+       /* Adjust search length to account for worst case alignment overhead */
+@@ -1603,12 +1622,31 @@ static unsigned long unmapped_area_topdo
+       if (length < info->length)
+               return -ENOMEM;
+ 
+-      if (mas_empty_area_rev(&mas, info->low_limit, info->high_limit - 1,
++      high_limit = info->high_limit;
++retry:
++      if (mas_empty_area_rev(&mas, info->low_limit, high_limit - 1,
+                               length))
+               return -ENOMEM;
+ 
+       gap = mas.last + 1 - info->length;
+       gap -= (gap - info->align_offset) & info->align_mask;
++      gap_end = mas.last;
++      tmp = mas_next(&mas, ULONG_MAX);
++      if (tmp && (tmp->vm_flags & VM_GROWSDOWN)) { /* Avoid prev check if possible */
++              if (vm_start_gap(tmp) <= gap_end) {
++                      high_limit = vm_start_gap(tmp);
++                      mas_reset(&mas);
++                      goto retry;
++              }
++      } else {
++              tmp = mas_prev(&mas, 0);
++              if (tmp && vm_end_gap(tmp) > gap) {
++                      high_limit = tmp->vm_start;
++                      mas_reset(&mas);
++                      goto retry;
++              }
++      }
++
+       return gap;
+ }
+ 
diff --git a/queue-6.2/mm-page_alloc-skip-regions-with-hugetlbfs-pages-when-allocating-1g-pages.patch b/queue-6.2/mm-page_alloc-skip-regions-with-hugetlbfs-pages-when-allocating-1g-pages.patch

new file mode 100644 (file)

index 0000000..01eec60
--- /dev/null
+++ b/queue-6.2/mm-page_alloc-skip-regions-with-hugetlbfs-pages-when-allocating-1g-pages.patch
@@ -0,0 +1,92 @@
+From 4d73ba5fa710fe7d432e0b271e6fecd252aef66e Mon Sep 17 00:00:00 2001
+From: Mel Gorman <mgorman@techsingularity.net>
+Date: Fri, 14 Apr 2023 15:14:29 +0100
+Subject: mm: page_alloc: skip regions with hugetlbfs pages when allocating 1G pages
+
+From: Mel Gorman <mgorman@techsingularity.net>
+
+commit 4d73ba5fa710fe7d432e0b271e6fecd252aef66e upstream.
+
+A bug was reported by Yuanxi Liu where allocating 1G pages at runtime is
+taking an excessive amount of time for large amounts of memory.  Further
+testing allocating huge pages that the cost is linear i.e.  if allocating
+1G pages in batches of 10 then the time to allocate nr_hugepages from
+10->20->30->etc increases linearly even though 10 pages are allocated at
+each step.  Profiles indicated that much of the time is spent checking the
+validity within already existing huge pages and then attempting a
+migration that fails after isolating the range, draining pages and a whole
+lot of other useless work.
+
+Commit eb14d4eefdc4 ("mm,page_alloc: drop unnecessary checks from
+pfn_range_valid_contig") removed two checks, one which ignored huge pages
+for contiguous allocations as huge pages can sometimes migrate.  While
+there may be value on migrating a 2M page to satisfy a 1G allocation, it's
+potentially expensive if the 1G allocation fails and it's pointless to try
+moving a 1G page for a new 1G allocation or scan the tail pages for valid
+PFNs.
+
+Reintroduce the PageHuge check and assume any contiguous region with
+hugetlbfs pages is unsuitable for a new 1G allocation.
+
+The hpagealloc test allocates huge pages in batches and reports the
+average latency per page over time.  This test happens just after boot
+when fragmentation is not an issue.  Units are in milliseconds.
+
+hpagealloc
+                               6.3.0-rc6              6.3.0-rc6              6.3.0-rc6
+                                 vanilla   hugeallocrevert-v1r1   hugeallocsimple-v1r2
+Min       Latency       26.42 (   0.00%)        5.07 (  80.82%)       18.94 (  28.30%)
+1st-qrtle Latency      356.61 (   0.00%)        5.34 (  98.50%)       19.85 (  94.43%)
+2nd-qrtle Latency      697.26 (   0.00%)        5.47 (  99.22%)       20.44 (  97.07%)
+3rd-qrtle Latency      972.94 (   0.00%)        5.50 (  99.43%)       20.81 (  97.86%)
+Max-1     Latency       26.42 (   0.00%)        5.07 (  80.82%)       18.94 (  28.30%)
+Max-5     Latency       82.14 (   0.00%)        5.11 (  93.78%)       19.31 (  76.49%)
+Max-10    Latency      150.54 (   0.00%)        5.20 (  96.55%)       19.43 (  87.09%)
+Max-90    Latency     1164.45 (   0.00%)        5.53 (  99.52%)       20.97 (  98.20%)
+Max-95    Latency     1223.06 (   0.00%)        5.55 (  99.55%)       21.06 (  98.28%)
+Max-99    Latency     1278.67 (   0.00%)        5.57 (  99.56%)       22.56 (  98.24%)
+Max       Latency     1310.90 (   0.00%)        8.06 (  99.39%)       26.62 (  97.97%)
+Amean     Latency      678.36 (   0.00%)        5.44 *  99.20%*       20.44 *  96.99%*
+
+                   6.3.0-rc6   6.3.0-rc6   6.3.0-rc6
+                     vanilla   revert-v1   hugeallocfix-v2
+Duration User           0.28        0.27        0.30
+Duration System       808.66       17.77       35.99
+Duration Elapsed      830.87       18.08       36.33
+
+The vanilla kernel is poor, taking up to 1.3 second to allocate a huge
+page and almost 10 minutes in total to run the test.  Reverting the
+problematic commit reduces it to 8ms at worst and the patch takes 26ms.
+This patch fixes the main issue with skipping huge pages but leaves the
+page_count() out because a page with an elevated count potentially can
+migrate.
+
+BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=217022
+Link: https://lkml.kernel.org/r/20230414141429.pwgieuwluxwez3rj@techsingularity.net
+Fixes: eb14d4eefdc4 ("mm,page_alloc: drop unnecessary checks from pfn_range_valid_contig")
+Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
+Reported-by: Yuanxi Liu <y.liu@naruida.com>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Reviewed-by: David Hildenbrand <david@redhat.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Reviewed-by: Oscar Salvador <osalvador@suse.de>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/page_alloc.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -9407,6 +9407,9 @@ static bool pfn_range_valid_contig(struc
+ 
+               if (PageReserved(page))
+                       return false;
++
++              if (PageHuge(page))
++                      return false;
+       }
+       return true;
+ }
diff --git a/queue-6.2/mm-userfaultfd-fix-uffd-wp-handling-for-thp-migration-entries.patch b/queue-6.2/mm-userfaultfd-fix-uffd-wp-handling-for-thp-migration-entries.patch

new file mode 100644 (file)

index 0000000..70deb7e
--- /dev/null
+++ b/queue-6.2/mm-userfaultfd-fix-uffd-wp-handling-for-thp-migration-entries.patch
@@ -0,0 +1,77 @@
+From 24bf08c4376be417f16ceb609188b16f461b0443 Mon Sep 17 00:00:00 2001
+From: David Hildenbrand <david@redhat.com>
+Date: Wed, 5 Apr 2023 18:02:35 +0200
+Subject: mm/userfaultfd: fix uffd-wp handling for THP migration entries
+
+From: David Hildenbrand <david@redhat.com>
+
+commit 24bf08c4376be417f16ceb609188b16f461b0443 upstream.
+
+Looks like what we fixed for hugetlb in commit 44f86392bdd1 ("mm/hugetlb:
+fix uffd-wp handling for migration entries in
+hugetlb_change_protection()") similarly applies to THP.
+
+Setting/clearing uffd-wp on THP migration entries is not implemented
+properly.  Further, while removing migration PMDs considers the uffd-wp
+bit, inserting migration PMDs does not consider the uffd-wp bit.
+
+We have to set/clear independently of the migration entry type in
+change_huge_pmd() and properly copy the uffd-wp bit in
+set_pmd_migration_entry().
+
+Verified using a simple reproducer that triggers migration of a THP, that
+the set_pmd_migration_entry() no longer loses the uffd-wp bit.
+
+Link: https://lkml.kernel.org/r/20230405160236.587705-2-david@redhat.com
+Fixes: f45ec5ff16a7 ("userfaultfd: wp: support swap and page migration")
+Signed-off-by: David Hildenbrand <david@redhat.com>
+Reviewed-by: Peter Xu <peterx@redhat.com>
+Cc: <stable@vger.kernel.org>
+Cc: Muhammad Usama Anjum <usama.anjum@collabora.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/huge_memory.c |   14 ++++++++++++--
+ 1 file changed, 12 insertions(+), 2 deletions(-)
+
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -1845,10 +1845,10 @@ int change_huge_pmd(struct mmu_gather *t
+       if (is_swap_pmd(*pmd)) {
+               swp_entry_t entry = pmd_to_swp_entry(*pmd);
+               struct page *page = pfn_swap_entry_to_page(entry);
++              pmd_t newpmd;
+ 
+               VM_BUG_ON(!is_pmd_migration_entry(*pmd));
+               if (is_writable_migration_entry(entry)) {
+-                      pmd_t newpmd;
+                       /*
+                        * A protection check is difficult so
+                        * just be safe and disable write
+@@ -1862,8 +1862,16 @@ int change_huge_pmd(struct mmu_gather *t
+                               newpmd = pmd_swp_mksoft_dirty(newpmd);
+                       if (pmd_swp_uffd_wp(*pmd))
+                               newpmd = pmd_swp_mkuffd_wp(newpmd);
+-                      set_pmd_at(mm, addr, pmd, newpmd);
++              } else {
++                      newpmd = *pmd;
+               }
++
++              if (uffd_wp)
++                      newpmd = pmd_swp_mkuffd_wp(newpmd);
++              else if (uffd_wp_resolve)
++                      newpmd = pmd_swp_clear_uffd_wp(newpmd);
++              if (!pmd_same(*pmd, newpmd))
++                      set_pmd_at(mm, addr, pmd, newpmd);
+               goto unlock;
+       }
+ #endif
+@@ -3252,6 +3260,8 @@ int set_pmd_migration_entry(struct page_
+       pmdswp = swp_entry_to_pmd(entry);
+       if (pmd_soft_dirty(pmdval))
+               pmdswp = pmd_swp_mksoft_dirty(pmdswp);
++      if (pmd_uffd_wp(pmdval))
++              pmdswp = pmd_swp_mkuffd_wp(pmdswp);
+       set_pmd_at(mm, address, pvmw->pmd, pmdswp);
+       page_remove_rmap(page, vma, true);
+       put_page(page);
diff --git a/queue-6.2/mmc-sdhci_am654-set-high_speed_ena-for-sdr12-and-sdr25.patch b/queue-6.2/mmc-sdhci_am654-set-high_speed_ena-for-sdr12-and-sdr25.patch

new file mode 100644 (file)

index 0000000..c31ec60
--- /dev/null
+++ b/queue-6.2/mmc-sdhci_am654-set-high_speed_ena-for-sdr12-and-sdr25.patch
@@ -0,0 +1,35 @@
+From 2265098fd6a6272fde3fd1be5761f2f5895bd99a Mon Sep 17 00:00:00 2001
+From: Bhavya Kapoor <b-kapoor@ti.com>
+Date: Fri, 17 Mar 2023 14:57:11 +0530
+Subject: mmc: sdhci_am654: Set HIGH_SPEED_ENA for SDR12 and SDR25
+
+From: Bhavya Kapoor <b-kapoor@ti.com>
+
+commit 2265098fd6a6272fde3fd1be5761f2f5895bd99a upstream.
+
+Timing Information in Datasheet assumes that HIGH_SPEED_ENA=1 should be
+set for SDR12 and SDR25 modes. But sdhci_am654 driver clears
+HIGH_SPEED_ENA register. Thus, Modify sdhci_am654 to not clear
+HIGH_SPEED_ENA (HOST_CONTROL[2]) bit for SDR12 and SDR25 speed modes.
+
+Fixes: e374e87538f4 ("mmc: sdhci_am654: Clear HISPD_ENA in some lower speed modes")
+Signed-off-by: Bhavya Kapoor <b-kapoor@ti.com>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20230317092711.660897-1-b-kapoor@ti.com
+Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/mmc/host/sdhci_am654.c |    2 --
+ 1 file changed, 2 deletions(-)
+
+--- a/drivers/mmc/host/sdhci_am654.c
++++ b/drivers/mmc/host/sdhci_am654.c
+@@ -351,8 +351,6 @@ static void sdhci_am654_write_b(struct s
+                */
+               case MMC_TIMING_SD_HS:
+               case MMC_TIMING_MMC_HS:
+-              case MMC_TIMING_UHS_SDR12:
+-              case MMC_TIMING_UHS_SDR25:
+                       val &= ~SDHCI_CTRL_HISPD;
+               }
+       }
diff --git a/queue-6.2/series b/queue-6.2/series

index 3098ee3df3d5d203e0c07eefafb81e8e381ac89b..e817cdf1a460e220e09998c68c520a95dbdc95ca 100644 (file)
--- a/queue-6.2/series
+++ b/queue-6.2/series
@@ -76,3 +76,17 @@ tools-mm-page_owner_sort.c-fix-tgid-output-when-cull-tg-is-used.patch
  memstick-fix-memory-leak-if-card-device-is-never-registered.patch
  kernel-sys.c-fix-and-improve-control-flow-in-__sys_setresid.patch
  writeback-cgroup-fix-null-ptr-deref-write-in-bdi_split_work_to_wbs.patch
+mmc-sdhci_am654-set-high_speed_ena-for-sdr12-and-sdr25.patch
+drm-i915-fix-fast-wake-aux-sync-len.patch
+drm-amdgpu-fix-desktop-freezed-after-gpu-reset.patch
+drm-amd-display-set-dcn315-lb-bpp-to-48.patch
+drm-rockchip-vop2-fix-suspend-resume.patch
+drm-rockchip-vop2-use-regcache_sync-to-fix-suspend-resume.patch
+mm-fix-memory-leak-on-mm_init-error-handling.patch
+mm-userfaultfd-fix-uffd-wp-handling-for-thp-migration-entries.patch
+mm-khugepaged-check-again-on-anon-uffd-wp-during-isolation.patch
+mm-huge_memory.c-warn-with-pr_warn_ratelimited-instead-of-vm_warn_on_once_folio.patch
+mm-kmsan-handle-alloc-failures-in-kmsan_ioremap_page_range.patch
+mm-kmsan-handle-alloc-failures-in-kmsan_vmap_pages_range_noflush.patch
+mm-page_alloc-skip-regions-with-hugetlbfs-pages-when-allocating-1g-pages.patch
+mm-mmap-regression-fix-for-unmapped_area-_topdown.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sat, 22 Apr 2023 16:58:29 +0000 (18:58 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sat, 22 Apr 2023 16:58:29 +0000 (18:58 +0200)
queue-6.2/drm-amd-display-set-dcn315-lb-bpp-to-48.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/drm-amdgpu-fix-desktop-freezed-after-gpu-reset.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/drm-i915-fix-fast-wake-aux-sync-len.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/drm-rockchip-vop2-fix-suspend-resume.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/drm-rockchip-vop2-use-regcache_sync-to-fix-suspend-resume.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/mm-fix-memory-leak-on-mm_init-error-handling.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/mm-huge_memory.c-warn-with-pr_warn_ratelimited-instead-of-vm_warn_on_once_folio.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/mm-khugepaged-check-again-on-anon-uffd-wp-during-isolation.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/mm-kmsan-handle-alloc-failures-in-kmsan_ioremap_page_range.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/mm-kmsan-handle-alloc-failures-in-kmsan_vmap_pages_range_noflush.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/mm-mmap-regression-fix-for-unmapped_area-_topdown.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/mm-page_alloc-skip-regions-with-hugetlbfs-pages-when-allocating-1g-pages.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/mm-userfaultfd-fix-uffd-wp-handling-for-thp-migration-entries.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/mmc-sdhci_am654-set-high_speed_ena-for-sdr12-and-sdr25.patch	[new file with mode: 0644]	patch \| blob
queue-6.2/series		patch \| blob \| blame \| history