--- /dev/null
+From 0e01a20897e4404df3b1eaa37e3b37f829ab4363 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 29 Oct 2024 16:16:53 +0100
+Subject: ALSA: hda/realtek: Fix headset mic on TUXEDO Stellaris 16 Gen6 mb1
+
+From: Christoffer Sandberg <cs@tuxedo.de>
+
+[ Upstream commit e49370d769e71456db3fbd982e95bab8c69f73e8 ]
+
+Quirk is needed to enable headset microphone on missing pin 0x19.
+
+Signed-off-by: Christoffer Sandberg <cs@tuxedo.de>
+Signed-off-by: Werner Sembach <wse@tuxedocomputers.com>
+Cc: <stable@vger.kernel.org>
+Link: https://patch.msgid.link/20241029151653.80726-2-wse@tuxedocomputers.com
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/pci/hda/patch_realtek.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index 3cbd9cf80be96..d750c6e6eb984 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -10214,6 +10214,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+ SND_PCI_QUIRK(0x1d05, 0x115c, "TongFang GMxTGxx", ALC269_FIXUP_NO_SHUTUP),
+ SND_PCI_QUIRK(0x1d05, 0x121b, "TongFang GMxAGxx", ALC269_FIXUP_NO_SHUTUP),
+ SND_PCI_QUIRK(0x1d05, 0x1387, "TongFang GMxIXxx", ALC2XX_FIXUP_HEADSET_MIC),
++ SND_PCI_QUIRK(0x1d05, 0x1409, "TongFang GMxIXxx", ALC2XX_FIXUP_HEADSET_MIC),
+ SND_PCI_QUIRK(0x1d17, 0x3288, "Haier Boyue G42", ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS),
+ SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC),
+ SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE),
+--
+2.43.0
+
--- /dev/null
+From cdd6f79946b70304f691527e0efecb41c8c114d3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 18 Oct 2024 13:53:24 +0800
+Subject: ALSA: hda/realtek: Limit internal Mic boost on Dell platform
+
+From: Kailang Yang <kailang@realtek.com>
+
+[ Upstream commit 78e7be018784934081afec77f96d49a2483f9188 ]
+
+Dell want to limit internal Mic boost on all Dell platform.
+
+Signed-off-by: Kailang Yang <kailang@realtek.com>
+Cc: <stable@vger.kernel.org>
+Link: https://lore.kernel.org/561fc5f5eff04b6cbd79ed173cd1c1db@realtek.com
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/pci/hda/patch_realtek.c | 21 ++++++++++++++++++---
+ 1 file changed, 18 insertions(+), 3 deletions(-)
+
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index a8bc95ffa41a3..3cbd9cf80be96 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -7159,6 +7159,7 @@ enum {
+ ALC286_FIXUP_SONY_MIC_NO_PRESENCE,
+ ALC269_FIXUP_PINCFG_NO_HP_TO_LINEOUT,
+ ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
++ ALC269_FIXUP_DELL1_LIMIT_INT_MIC_BOOST,
+ ALC269_FIXUP_DELL2_MIC_NO_PRESENCE,
+ ALC269_FIXUP_DELL3_MIC_NO_PRESENCE,
+ ALC269_FIXUP_DELL4_MIC_NO_PRESENCE,
+@@ -7193,6 +7194,7 @@ enum {
+ ALC255_FIXUP_ACER_MIC_NO_PRESENCE,
+ ALC255_FIXUP_ASUS_MIC_NO_PRESENCE,
+ ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
++ ALC255_FIXUP_DELL1_LIMIT_INT_MIC_BOOST,
+ ALC255_FIXUP_DELL2_MIC_NO_PRESENCE,
+ ALC255_FIXUP_HEADSET_MODE,
+ ALC255_FIXUP_HEADSET_MODE_NO_HP_MIC,
+@@ -7658,6 +7660,12 @@ static const struct hda_fixup alc269_fixups[] = {
+ .chained = true,
+ .chain_id = ALC269_FIXUP_HEADSET_MODE
+ },
++ [ALC269_FIXUP_DELL1_LIMIT_INT_MIC_BOOST] = {
++ .type = HDA_FIXUP_FUNC,
++ .v.func = alc269_fixup_limit_int_mic_boost,
++ .chained = true,
++ .chain_id = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE
++ },
+ [ALC269_FIXUP_DELL2_MIC_NO_PRESENCE] = {
+ .type = HDA_FIXUP_PINS,
+ .v.pins = (const struct hda_pintbl[]) {
+@@ -7938,6 +7946,12 @@ static const struct hda_fixup alc269_fixups[] = {
+ .chained = true,
+ .chain_id = ALC255_FIXUP_HEADSET_MODE
+ },
++ [ALC255_FIXUP_DELL1_LIMIT_INT_MIC_BOOST] = {
++ .type = HDA_FIXUP_FUNC,
++ .v.func = alc269_fixup_limit_int_mic_boost,
++ .chained = true,
++ .chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE
++ },
+ [ALC255_FIXUP_DELL2_MIC_NO_PRESENCE] = {
+ .type = HDA_FIXUP_PINS,
+ .v.pins = (const struct hda_pintbl[]) {
+@@ -10294,6 +10308,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
+ {.id = ALC269_FIXUP_DELL2_MIC_NO_PRESENCE, .name = "dell-headset-dock"},
+ {.id = ALC269_FIXUP_DELL3_MIC_NO_PRESENCE, .name = "dell-headset3"},
+ {.id = ALC269_FIXUP_DELL4_MIC_NO_PRESENCE, .name = "dell-headset4"},
++ {.id = ALC269_FIXUP_DELL4_MIC_NO_PRESENCE_QUIET, .name = "dell-headset4-quiet"},
+ {.id = ALC283_FIXUP_CHROME_BOOK, .name = "alc283-dac-wcaps"},
+ {.id = ALC283_FIXUP_SENSE_COMBO_JACK, .name = "alc283-sense-combo"},
+ {.id = ALC292_FIXUP_TPT440_DOCK, .name = "tpt440-dock"},
+@@ -10841,16 +10856,16 @@ static const struct snd_hda_pin_quirk alc269_fallback_pin_fixup_tbl[] = {
+ SND_HDA_PIN_QUIRK(0x10ec0289, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE,
+ {0x19, 0x40000000},
+ {0x1b, 0x40000000}),
+- SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE,
++ SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE_QUIET,
+ {0x19, 0x40000000},
+ {0x1b, 0x40000000}),
+ SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
+ {0x19, 0x40000000},
+ {0x1a, 0x40000000}),
+- SND_HDA_PIN_QUIRK(0x10ec0236, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
++ SND_HDA_PIN_QUIRK(0x10ec0236, 0x1028, "Dell", ALC255_FIXUP_DELL1_LIMIT_INT_MIC_BOOST,
+ {0x19, 0x40000000},
+ {0x1a, 0x40000000}),
+- SND_HDA_PIN_QUIRK(0x10ec0274, 0x1028, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB,
++ SND_HDA_PIN_QUIRK(0x10ec0274, 0x1028, "Dell", ALC269_FIXUP_DELL1_LIMIT_INT_MIC_BOOST,
+ {0x19, 0x40000000},
+ {0x1a, 0x40000000}),
+ SND_HDA_PIN_QUIRK(0x10ec0256, 0x1043, "ASUS", ALC2XX_FIXUP_HEADSET_MIC,
+--
+2.43.0
+
--- /dev/null
+From 9a988c5c336b6dfb5c813c357a36faaacac25c88 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 23 Oct 2024 15:15:19 -0600
+Subject: block: fix sanity checks in blk_rq_map_user_bvec
+
+From: Xinyu Zhang <xizhang@purestorage.com>
+
+[ Upstream commit 2ff949441802a8d076d9013c7761f63e8ae5a9bd ]
+
+blk_rq_map_user_bvec contains a check bytes + bv->bv_len > nr_iter which
+causes unnecessary failures in NVMe passthrough I/O, reproducible as
+follows:
+
+- register a 2 page, page-aligned buffer against a ring
+- use that buffer to do a 1 page io_uring NVMe passthrough read
+
+The second (i = 1) iteration of the loop in blk_rq_map_user_bvec will
+then have nr_iter == 1 page, bytes == 1 page, bv->bv_len == 1 page, so
+the check bytes + bv->bv_len > nr_iter will succeed, causing the I/O to
+fail. This failure is unnecessary, as when the check succeeds, it means
+we've checked the entire buffer that will be used by the request - i.e.
+blk_rq_map_user_bvec should complete successfully. Therefore, terminate
+the loop early and return successfully when the check bytes + bv->bv_len
+> nr_iter succeeds.
+
+While we're at it, also remove the check that all segments in the bvec
+are single-page. While this seems to be true for all users of the
+function, it doesn't appear to be required anywhere downstream.
+
+CC: stable@vger.kernel.org
+Signed-off-by: Xinyu Zhang <xizhang@purestorage.com>
+Co-developed-by: Uday Shankar <ushankar@purestorage.com>
+Signed-off-by: Uday Shankar <ushankar@purestorage.com>
+Fixes: 37987547932c ("block: extend functionality to map bvec iterator")
+Link: https://lore.kernel.org/r/20241023211519.4177873-1-ushankar@purestorage.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/blk-map.c | 4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+diff --git a/block/blk-map.c b/block/blk-map.c
+index b337ae347bfa3..a2fa387560375 100644
+--- a/block/blk-map.c
++++ b/block/blk-map.c
+@@ -597,9 +597,7 @@ static int blk_rq_map_user_bvec(struct request *rq, const struct iov_iter *iter)
+ if (nsegs >= nr_segs || bytes > UINT_MAX - bv->bv_len)
+ goto put_bio;
+ if (bytes + bv->bv_len > nr_iter)
+- goto put_bio;
+- if (bv->bv_offset + bv->bv_len > PAGE_SIZE)
+- goto put_bio;
++ break;
+
+ nsegs++;
+ bytes += bv->bv_len;
+--
+2.43.0
+
--- /dev/null
+From 0c340c704aa5935085c6ae2de631adada19df11e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Oct 2024 11:24:56 +0000
+Subject: cgroup/bpf: use a dedicated workqueue for cgroup bpf destruction
+
+From: Chen Ridong <chenridong@huawei.com>
+
+[ Upstream commit 117932eea99b729ee5d12783601a4f7f5fd58a23 ]
+
+A hung_task problem shown below was found:
+
+INFO: task kworker/0:0:8 blocked for more than 327 seconds.
+"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+Workqueue: events cgroup_bpf_release
+Call Trace:
+ <TASK>
+ __schedule+0x5a2/0x2050
+ ? find_held_lock+0x33/0x100
+ ? wq_worker_sleeping+0x9e/0xe0
+ schedule+0x9f/0x180
+ schedule_preempt_disabled+0x25/0x50
+ __mutex_lock+0x512/0x740
+ ? cgroup_bpf_release+0x1e/0x4d0
+ ? cgroup_bpf_release+0xcf/0x4d0
+ ? process_scheduled_works+0x161/0x8a0
+ ? cgroup_bpf_release+0x1e/0x4d0
+ ? mutex_lock_nested+0x2b/0x40
+ ? __pfx_delay_tsc+0x10/0x10
+ mutex_lock_nested+0x2b/0x40
+ cgroup_bpf_release+0xcf/0x4d0
+ ? process_scheduled_works+0x161/0x8a0
+ ? trace_event_raw_event_workqueue_execute_start+0x64/0xd0
+ ? process_scheduled_works+0x161/0x8a0
+ process_scheduled_works+0x23a/0x8a0
+ worker_thread+0x231/0x5b0
+ ? __pfx_worker_thread+0x10/0x10
+ kthread+0x14d/0x1c0
+ ? __pfx_kthread+0x10/0x10
+ ret_from_fork+0x59/0x70
+ ? __pfx_kthread+0x10/0x10
+ ret_from_fork_asm+0x1b/0x30
+ </TASK>
+
+This issue can be reproduced by the following pressuse test:
+1. A large number of cpuset cgroups are deleted.
+2. Set cpu on and off repeatly.
+3. Set watchdog_thresh repeatly.
+The scripts can be obtained at LINK mentioned above the signature.
+
+The reason for this issue is cgroup_mutex and cpu_hotplug_lock are
+acquired in different tasks, which may lead to deadlock.
+It can lead to a deadlock through the following steps:
+1. A large number of cpusets are deleted asynchronously, which puts a
+ large number of cgroup_bpf_release works into system_wq. The max_active
+ of system_wq is WQ_DFL_ACTIVE(256). Consequently, all active works are
+ cgroup_bpf_release works, and many cgroup_bpf_release works will be put
+ into inactive queue. As illustrated in the diagram, there are 256 (in
+ the acvtive queue) + n (in the inactive queue) works.
+2. Setting watchdog_thresh will hold cpu_hotplug_lock.read and put
+ smp_call_on_cpu work into system_wq. However step 1 has already filled
+ system_wq, 'sscs.work' is put into inactive queue. 'sscs.work' has
+ to wait until the works that were put into the inacvtive queue earlier
+ have executed (n cgroup_bpf_release), so it will be blocked for a while.
+3. Cpu offline requires cpu_hotplug_lock.write, which is blocked by step 2.
+4. Cpusets that were deleted at step 1 put cgroup_release works into
+ cgroup_destroy_wq. They are competing to get cgroup_mutex all the time.
+ When cgroup_metux is acqured by work at css_killed_work_fn, it will
+ call cpuset_css_offline, which needs to acqure cpu_hotplug_lock.read.
+ However, cpuset_css_offline will be blocked for step 3.
+5. At this moment, there are 256 works in active queue that are
+ cgroup_bpf_release, they are attempting to acquire cgroup_mutex, and as
+ a result, all of them are blocked. Consequently, sscs.work can not be
+ executed. Ultimately, this situation leads to four processes being
+ blocked, forming a deadlock.
+
+system_wq(step1) WatchDog(step2) cpu offline(step3) cgroup_destroy_wq(step4)
+...
+2000+ cgroups deleted asyn
+256 actives + n inactives
+ __lockup_detector_reconfigure
+ P(cpu_hotplug_lock.read)
+ put sscs.work into system_wq
+256 + n + 1(sscs.work)
+sscs.work wait to be executed
+ warting sscs.work finish
+ percpu_down_write
+ P(cpu_hotplug_lock.write)
+ ...blocking...
+ css_killed_work_fn
+ P(cgroup_mutex)
+ cpuset_css_offline
+ P(cpu_hotplug_lock.read)
+ ...blocking...
+256 cgroup_bpf_release
+mutex_lock(&cgroup_mutex);
+..blocking...
+
+To fix the problem, place cgroup_bpf_release works on a dedicated
+workqueue which can break the loop and solve the problem. System wqs are
+for misc things which shouldn't create a large number of concurrent work
+items. If something is going to generate >WQ_DFL_ACTIVE(256) concurrent
+work items, it should use its own dedicated workqueue.
+
+Fixes: 4bfc0bb2c60e ("bpf: decouple the lifetime of cgroup_bpf from cgroup itself")
+Cc: stable@vger.kernel.org # v5.3+
+Link: https://lore.kernel.org/cgroups/e90c32d2-2a85-4f28-9154-09c7d320cb60@huawei.com/T/#t
+Tested-by: Vishal Chourasia <vishalc@linux.ibm.com>
+Signed-off-by: Chen Ridong <chenridong@huawei.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/cgroup.c | 19 ++++++++++++++++++-
+ 1 file changed, 18 insertions(+), 1 deletion(-)
+
+diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
+index bb70f400c25eb..2cb04e0e118d9 100644
+--- a/kernel/bpf/cgroup.c
++++ b/kernel/bpf/cgroup.c
+@@ -24,6 +24,23 @@
+ DEFINE_STATIC_KEY_ARRAY_FALSE(cgroup_bpf_enabled_key, MAX_CGROUP_BPF_ATTACH_TYPE);
+ EXPORT_SYMBOL(cgroup_bpf_enabled_key);
+
++/*
++ * cgroup bpf destruction makes heavy use of work items and there can be a lot
++ * of concurrent destructions. Use a separate workqueue so that cgroup bpf
++ * destruction work items don't end up filling up max_active of system_wq
++ * which may lead to deadlock.
++ */
++static struct workqueue_struct *cgroup_bpf_destroy_wq;
++
++static int __init cgroup_bpf_wq_init(void)
++{
++ cgroup_bpf_destroy_wq = alloc_workqueue("cgroup_bpf_destroy", 0, 1);
++ if (!cgroup_bpf_destroy_wq)
++ panic("Failed to alloc workqueue for cgroup bpf destroy.\n");
++ return 0;
++}
++core_initcall(cgroup_bpf_wq_init);
++
+ /* __always_inline is necessary to prevent indirect call through run_prog
+ * function pointer.
+ */
+@@ -334,7 +351,7 @@ static void cgroup_bpf_release_fn(struct percpu_ref *ref)
+ struct cgroup *cgrp = container_of(ref, struct cgroup, bpf.refcnt);
+
+ INIT_WORK(&cgrp->bpf.release_work, cgroup_bpf_release);
+- queue_work(system_wq, &cgrp->bpf.release_work);
++ queue_work(cgroup_bpf_destroy_wq, &cgrp->bpf.release_work);
+ }
+
+ /* Get underlying bpf_prog of bpf_prog_list entry, regardless if it's through
+--
+2.43.0
+
--- /dev/null
+From 2c6aa71b070247d3da2b5ed5820e41eb07a2cf17 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 1 Dec 2022 13:33:48 -0800
+Subject: cxl/acpi: Move rescan to the workqueue
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+[ Upstream commit 4029c32fb601d505dfb92bdf0db9fdcc41fe1434 ]
+
+Now that the cxl_mem driver has a need to take the root device lock, the
+cxl_bus_rescan() needs to run outside of the root lock context. That
+need arises from RCH topologies and the locking that the cxl_mem driver
+does to attach a descendant to an upstream port. In the RCH case the
+lock needed is the CXL root device lock [1].
+
+Link: http://lore.kernel.org/r/166993045621.1882361.1730100141527044744.stgit@dwillia2-xfh.jf.intel.com [1]
+Tested-by: Robert Richter <rrichter@amd.com>
+Link: http://lore.kernel.org/r/166993042884.1882361.5633723613683058881.stgit@dwillia2-xfh.jf.intel.com
+Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Stable-dep-of: 3d6ebf16438d ("cxl/port: Fix cxl_bus_rescan() vs bus_rescan_devices()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/cxl/acpi.c | 17 +++++++++++++++--
+ drivers/cxl/core/port.c | 19 +++++++++++++++++--
+ drivers/cxl/cxl.h | 3 ++-
+ 3 files changed, 34 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
+index dd610556a3afa..d7d789211c173 100644
+--- a/drivers/cxl/acpi.c
++++ b/drivers/cxl/acpi.c
+@@ -509,7 +509,8 @@ static int cxl_acpi_probe(struct platform_device *pdev)
+ return rc;
+
+ /* In case PCI is scanned before ACPI re-trigger memdev attach */
+- return cxl_bus_rescan();
++ cxl_bus_rescan();
++ return 0;
+ }
+
+ static const struct acpi_device_id cxl_acpi_ids[] = {
+@@ -533,7 +534,19 @@ static struct platform_driver cxl_acpi_driver = {
+ .id_table = cxl_test_ids,
+ };
+
+-module_platform_driver(cxl_acpi_driver);
++static int __init cxl_acpi_init(void)
++{
++ return platform_driver_register(&cxl_acpi_driver);
++}
++
++static void __exit cxl_acpi_exit(void)
++{
++ platform_driver_unregister(&cxl_acpi_driver);
++ cxl_bus_drain();
++}
++
++module_init(cxl_acpi_init);
++module_exit(cxl_acpi_exit);
+ MODULE_LICENSE("GPL v2");
+ MODULE_IMPORT_NS(CXL);
+ MODULE_IMPORT_NS(ACPI);
+diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
+index 1f1483a9e5252..f0875fa86c616 100644
+--- a/drivers/cxl/core/port.c
++++ b/drivers/cxl/core/port.c
+@@ -1786,12 +1786,27 @@ static void cxl_bus_remove(struct device *dev)
+
+ static struct workqueue_struct *cxl_bus_wq;
+
+-int cxl_bus_rescan(void)
++static void cxl_bus_rescan_queue(struct work_struct *w)
+ {
+- return bus_rescan_devices(&cxl_bus_type);
++ int rc = bus_rescan_devices(&cxl_bus_type);
++
++ pr_debug("CXL bus rescan result: %d\n", rc);
++}
++
++void cxl_bus_rescan(void)
++{
++ static DECLARE_WORK(rescan_work, cxl_bus_rescan_queue);
++
++ queue_work(cxl_bus_wq, &rescan_work);
+ }
+ EXPORT_SYMBOL_NS_GPL(cxl_bus_rescan, CXL);
+
++void cxl_bus_drain(void)
++{
++ drain_workqueue(cxl_bus_wq);
++}
++EXPORT_SYMBOL_NS_GPL(cxl_bus_drain, CXL);
++
+ bool schedule_cxl_memdev_detach(struct cxl_memdev *cxlmd)
+ {
+ return queue_work(cxl_bus_wq, &cxlmd->detach_work);
+diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
+index 7750ccb7652db..827fa94cddda1 100644
+--- a/drivers/cxl/cxl.h
++++ b/drivers/cxl/cxl.h
+@@ -564,7 +564,8 @@ struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport,
+ struct cxl_dport *parent_dport);
+ struct cxl_port *find_cxl_root(struct device *dev);
+ int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd);
+-int cxl_bus_rescan(void);
++void cxl_bus_rescan(void);
++void cxl_bus_drain(void);
+ struct cxl_port *cxl_mem_find_port(struct cxl_memdev *cxlmd,
+ struct cxl_dport **dport);
+ bool schedule_cxl_memdev_detach(struct cxl_memdev *cxlmd);
+--
+2.43.0
+
--- /dev/null
+From d7f1f35cfbdd17cb6884cd3722f902922f725701 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 22 Oct 2024 18:43:32 -0700
+Subject: cxl/port: Fix cxl_bus_rescan() vs bus_rescan_devices()
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+[ Upstream commit 3d6ebf16438de5d712030fefbb4182b46373d677 ]
+
+It turns out since its original introduction, pre-2.6.12,
+bus_rescan_devices() has skipped devices that might be in the process of
+attaching or detaching from their driver. For CXL this behavior is
+unwanted and expects that cxl_bus_rescan() is a probe barrier.
+
+That behavior is simple enough to achieve with bus_for_each_dev() paired
+with call to device_attach(), and it is unclear why bus_rescan_devices()
+took the position of lockless consumption of dev->driver which is racy.
+
+The "Fixes:" but no "Cc: stable" on this patch reflects that the issue
+is merely by inspection since the bug that triggered the discovery of
+this potential problem [1] is fixed by other means. However, a stable
+backport should do no harm.
+
+Fixes: 8dd2bc0f8e02 ("cxl/mem: Add the cxl_mem driver")
+Link: http://lore.kernel.org/20241004212504.1246-1-gourry@gourry.net [1]
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Tested-by: Gregory Price <gourry@gourry.net>
+Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
+Reviewed-by: Ira Weiny <ira.weiny@intel.com>
+Link: https://patch.msgid.link/172964781104.81806.4277549800082443769.stgit@dwillia2-xfh.jf.intel.com
+Signed-off-by: Ira Weiny <ira.weiny@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/cxl/core/port.c | 13 ++++++++++---
+ 1 file changed, 10 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
+index f0875fa86c616..20f052d3759e0 100644
+--- a/drivers/cxl/core/port.c
++++ b/drivers/cxl/core/port.c
+@@ -1786,11 +1786,18 @@ static void cxl_bus_remove(struct device *dev)
+
+ static struct workqueue_struct *cxl_bus_wq;
+
+-static void cxl_bus_rescan_queue(struct work_struct *w)
++static int cxl_rescan_attach(struct device *dev, void *data)
+ {
+- int rc = bus_rescan_devices(&cxl_bus_type);
++ int rc = device_attach(dev);
++
++ dev_vdbg(dev, "rescan: %s\n", rc ? "attach" : "detached");
+
+- pr_debug("CXL bus rescan result: %d\n", rc);
++ return 0;
++}
++
++static void cxl_bus_rescan_queue(struct work_struct *w)
++{
++ bus_for_each_dev(&cxl_bus_type, NULL, NULL, cxl_rescan_attach);
+ }
+
+ void cxl_bus_rescan(void)
+--
+2.43.0
+
--- /dev/null
+From fc9f40091fb160075127a025b7278dbdf985a5b4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 17 Aug 2023 17:13:33 +0300
+Subject: fs: create kiocb_{start,end}_write() helpers
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+[ Upstream commit ed0360bbab72b829437b67ebb2f9cfac19f59dfe ]
+
+aio, io_uring, cachefiles and overlayfs, all open code an ugly variant
+of file_{start,end}_write() to silence lockdep warnings.
+
+Create helpers for this lockdep dance so we can use the helpers in all
+the callers.
+
+Suggested-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Reviewed-by: Jens Axboe <axboe@kernel.dk>
+Message-Id: <20230817141337.1025891-4-amir73il@gmail.com>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Stable-dep-of: 1d60d74e8526 ("io_uring/rw: fix missing NOWAIT check for O_DIRECT start write")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/fs.h | 36 ++++++++++++++++++++++++++++++++++++
+ 1 file changed, 36 insertions(+)
+
+diff --git a/include/linux/fs.h b/include/linux/fs.h
+index 33c4961309833..0d32634c5cf0d 100644
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -3029,6 +3029,42 @@ static inline void file_end_write(struct file *file)
+ __sb_end_write(file_inode(file)->i_sb, SB_FREEZE_WRITE);
+ }
+
++/**
++ * kiocb_start_write - get write access to a superblock for async file io
++ * @iocb: the io context we want to submit the write with
++ *
++ * This is a variant of sb_start_write() for async io submission.
++ * Should be matched with a call to kiocb_end_write().
++ */
++static inline void kiocb_start_write(struct kiocb *iocb)
++{
++ struct inode *inode = file_inode(iocb->ki_filp);
++
++ sb_start_write(inode->i_sb);
++ /*
++ * Fool lockdep by telling it the lock got released so that it
++ * doesn't complain about the held lock when we return to userspace.
++ */
++ __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE);
++}
++
++/**
++ * kiocb_end_write - drop write access to a superblock after async file io
++ * @iocb: the io context we sumbitted the write with
++ *
++ * Should be matched with a call to kiocb_start_write().
++ */
++static inline void kiocb_end_write(struct kiocb *iocb)
++{
++ struct inode *inode = file_inode(iocb->ki_filp);
++
++ /*
++ * Tell lockdep we inherited freeze protection from submission thread.
++ */
++ __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
++ sb_end_write(inode->i_sb);
++}
++
+ /*
+ * This is used for regular files where some users -- especially the
+ * currently executed binary in a process, previously handled via
+--
+2.43.0
+
--- /dev/null
+From 85ca1f838a54f739f7d8ddcc16ace4d7f67af19f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 17 Aug 2023 17:13:31 +0300
+Subject: io_uring: rename kiocb_end_write() local helper
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+[ Upstream commit a370167fe526123637965f60859a9f1f3e1a58b7 ]
+
+This helper does not take a kiocb as input and we want to create a
+common helper by that name that takes a kiocb as input.
+
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Reviewed-by: Jens Axboe <axboe@kernel.dk>
+Message-Id: <20230817141337.1025891-2-amir73il@gmail.com>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Stable-dep-of: 1d60d74e8526 ("io_uring/rw: fix missing NOWAIT check for O_DIRECT start write")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ io_uring/rw.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/io_uring/rw.c b/io_uring/rw.c
+index 038e6b13a7496..4eb42fc29c151 100644
+--- a/io_uring/rw.c
++++ b/io_uring/rw.c
+@@ -220,7 +220,7 @@ static bool io_rw_should_reissue(struct io_kiocb *req)
+ }
+ #endif
+
+-static void kiocb_end_write(struct io_kiocb *req)
++static void io_req_end_write(struct io_kiocb *req)
+ {
+ /*
+ * Tell lockdep we inherited freeze protection from submission
+@@ -243,7 +243,7 @@ static void io_req_io_end(struct io_kiocb *req)
+ struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
+
+ if (rw->kiocb.ki_flags & IOCB_WRITE) {
+- kiocb_end_write(req);
++ io_req_end_write(req);
+ fsnotify_modify(req->file);
+ } else {
+ fsnotify_access(req->file);
+@@ -307,7 +307,7 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res)
+ struct io_kiocb *req = cmd_to_io_kiocb(rw);
+
+ if (kiocb->ki_flags & IOCB_WRITE)
+- kiocb_end_write(req);
++ io_req_end_write(req);
+ if (unlikely(res != req->cqe.res)) {
+ if (res == -EAGAIN && io_rw_should_reissue(req)) {
+ req->flags |= REQ_F_REISSUE | REQ_F_PARTIAL_IO;
+@@ -956,7 +956,7 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
+ io->bytes_done += ret2;
+
+ if (kiocb->ki_flags & IOCB_WRITE)
+- kiocb_end_write(req);
++ io_req_end_write(req);
+ return ret ? ret : -EAGAIN;
+ }
+ done:
+@@ -967,7 +967,7 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
+ ret = io_setup_async_rw(req, iovec, s, false);
+ if (!ret) {
+ if (kiocb->ki_flags & IOCB_WRITE)
+- kiocb_end_write(req);
++ io_req_end_write(req);
+ return -EAGAIN;
+ }
+ return ret;
+--
+2.43.0
+
--- /dev/null
+From 7cc8484de2585d9324d89c118dfda19dc847ebab Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 31 Oct 2024 08:05:44 -0600
+Subject: io_uring/rw: fix missing NOWAIT check for O_DIRECT start write
+
+From: Jens Axboe <axboe@kernel.dk>
+
+[ Upstream commit 1d60d74e852647255bd8e76f5a22dc42531e4389 ]
+
+When io_uring starts a write, it'll call kiocb_start_write() to bump the
+super block rwsem, preventing any freezes from happening while that
+write is in-flight. The freeze side will grab that rwsem for writing,
+excluding any new writers from happening and waiting for existing writes
+to finish. But io_uring unconditionally uses kiocb_start_write(), which
+will block if someone is currently attempting to freeze the mount point.
+This causes a deadlock where freeze is waiting for previous writes to
+complete, but the previous writes cannot complete, as the task that is
+supposed to complete them is blocked waiting on starting a new write.
+This results in the following stuck trace showing that dependency with
+the write blocked starting a new write:
+
+task:fio state:D stack:0 pid:886 tgid:886 ppid:876
+Call trace:
+ __switch_to+0x1d8/0x348
+ __schedule+0x8e8/0x2248
+ schedule+0x110/0x3f0
+ percpu_rwsem_wait+0x1e8/0x3f8
+ __percpu_down_read+0xe8/0x500
+ io_write+0xbb8/0xff8
+ io_issue_sqe+0x10c/0x1020
+ io_submit_sqes+0x614/0x2110
+ __arm64_sys_io_uring_enter+0x524/0x1038
+ invoke_syscall+0x74/0x268
+ el0_svc_common.constprop.0+0x160/0x238
+ do_el0_svc+0x44/0x60
+ el0_svc+0x44/0xb0
+ el0t_64_sync_handler+0x118/0x128
+ el0t_64_sync+0x168/0x170
+INFO: task fsfreeze:7364 blocked for more than 15 seconds.
+ Not tainted 6.12.0-rc5-00063-g76aaf945701c #7963
+
+with the attempting freezer stuck trying to grab the rwsem:
+
+task:fsfreeze state:D stack:0 pid:7364 tgid:7364 ppid:995
+Call trace:
+ __switch_to+0x1d8/0x348
+ __schedule+0x8e8/0x2248
+ schedule+0x110/0x3f0
+ percpu_down_write+0x2b0/0x680
+ freeze_super+0x248/0x8a8
+ do_vfs_ioctl+0x149c/0x1b18
+ __arm64_sys_ioctl+0xd0/0x1a0
+ invoke_syscall+0x74/0x268
+ el0_svc_common.constprop.0+0x160/0x238
+ do_el0_svc+0x44/0x60
+ el0_svc+0x44/0xb0
+ el0t_64_sync_handler+0x118/0x128
+ el0t_64_sync+0x168/0x170
+
+Fix this by having the io_uring side honor IOCB_NOWAIT, and only attempt a
+blocking grab of the super block rwsem if it isn't set. For normal issue
+where IOCB_NOWAIT would always be set, this returns -EAGAIN which will
+have io_uring core issue a blocking attempt of the write. That will in
+turn also get completions run, ensuring forward progress.
+
+Since freezing requires CAP_SYS_ADMIN in the first place, this isn't
+something that can be triggered by a regular user.
+
+Cc: stable@vger.kernel.org # 5.10+
+Reported-by: Peter Mann <peter.mann@sh.cz>
+Link: https://lore.kernel.org/io-uring/38c94aec-81c9-4f62-b44e-1d87f5597644@sh.cz
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ io_uring/rw.c | 23 +++++++++++++++++++++--
+ 1 file changed, 21 insertions(+), 2 deletions(-)
+
+diff --git a/io_uring/rw.c b/io_uring/rw.c
+index c15c7873813b3..9d6e17a244ae7 100644
+--- a/io_uring/rw.c
++++ b/io_uring/rw.c
+@@ -839,6 +839,25 @@ int io_read(struct io_kiocb *req, unsigned int issue_flags)
+ return kiocb_done(req, ret, issue_flags);
+ }
+
++static bool io_kiocb_start_write(struct io_kiocb *req, struct kiocb *kiocb)
++{
++ struct inode *inode;
++ bool ret;
++
++ if (!(req->flags & REQ_F_ISREG))
++ return true;
++ if (!(kiocb->ki_flags & IOCB_NOWAIT)) {
++ kiocb_start_write(kiocb);
++ return true;
++ }
++
++ inode = file_inode(kiocb->ki_filp);
++ ret = sb_start_write_trylock(inode->i_sb);
++ if (ret)
++ __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE);
++ return ret;
++}
++
+ int io_write(struct io_kiocb *req, unsigned int issue_flags)
+ {
+ struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
+@@ -892,8 +911,8 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
+ return ret;
+ }
+
+- if (req->flags & REQ_F_ISREG)
+- kiocb_start_write(kiocb);
++ if (unlikely(!io_kiocb_start_write(req, kiocb)))
++ return -EAGAIN;
+ kiocb->ki_flags |= IOCB_WRITE;
+
+ if (likely(req->file->f_op->write_iter))
+--
+2.43.0
+
--- /dev/null
+From 4cf01f8e6f316d434b3882d6b4fff5666ee05971 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 17 Aug 2023 17:13:34 +0300
+Subject: io_uring: use kiocb_{start,end}_write() helpers
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+[ Upstream commit e484fd73f4bdcb00c2188100c2d84e9f3f5c9f7d ]
+
+Use helpers instead of the open coded dance to silence lockdep warnings.
+
+Suggested-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Reviewed-by: Jens Axboe <axboe@kernel.dk>
+Message-Id: <20230817141337.1025891-5-amir73il@gmail.com>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Stable-dep-of: 1d60d74e8526 ("io_uring/rw: fix missing NOWAIT check for O_DIRECT start write")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ io_uring/rw.c | 23 ++++-------------------
+ 1 file changed, 4 insertions(+), 19 deletions(-)
+
+diff --git a/io_uring/rw.c b/io_uring/rw.c
+index 4eb42fc29c151..c15c7873813b3 100644
+--- a/io_uring/rw.c
++++ b/io_uring/rw.c
+@@ -222,15 +222,10 @@ static bool io_rw_should_reissue(struct io_kiocb *req)
+
+ static void io_req_end_write(struct io_kiocb *req)
+ {
+- /*
+- * Tell lockdep we inherited freeze protection from submission
+- * thread.
+- */
+ if (req->flags & REQ_F_ISREG) {
+- struct super_block *sb = file_inode(req->file)->i_sb;
++ struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
+
+- __sb_writers_acquired(sb, SB_FREEZE_WRITE);
+- sb_end_write(sb);
++ kiocb_end_write(&rw->kiocb);
+ }
+ }
+
+@@ -897,18 +892,8 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
+ return ret;
+ }
+
+- /*
+- * Open-code file_start_write here to grab freeze protection,
+- * which will be released by another thread in
+- * io_complete_rw(). Fool lockdep by telling it the lock got
+- * released so that it doesn't complain about the held lock when
+- * we return to userspace.
+- */
+- if (req->flags & REQ_F_ISREG) {
+- sb_start_write(file_inode(req->file)->i_sb);
+- __sb_writers_release(file_inode(req->file)->i_sb,
+- SB_FREEZE_WRITE);
+- }
++ if (req->flags & REQ_F_ISREG)
++ kiocb_start_write(kiocb);
+ kiocb->ki_flags |= IOCB_WRITE;
+
+ if (likely(req->file->f_op->write_iter))
+--
+2.43.0
+
--- /dev/null
+From 4ed3283db4accb8de6dae5d596250f67d6afd12b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 22 Oct 2024 18:07:06 +0200
+Subject: kasan: remove vmalloc_percpu test
+
+From: Andrey Konovalov <andreyknvl@gmail.com>
+
+[ Upstream commit 330d8df81f3673d6fb74550bbc9bb159d81b35f7 ]
+
+Commit 1a2473f0cbc0 ("kasan: improve vmalloc tests") added the
+vmalloc_percpu KASAN test with the assumption that __alloc_percpu always
+uses vmalloc internally, which is tagged by KASAN.
+
+However, __alloc_percpu might allocate memory from the first per-CPU
+chunk, which is not allocated via vmalloc(). As a result, the test might
+fail.
+
+Remove the test until proper KASAN annotation for the per-CPU allocated
+are added; tracked in https://bugzilla.kernel.org/show_bug.cgi?id=215019.
+
+Link: https://lkml.kernel.org/r/20241022160706.38943-1-andrey.konovalov@linux.dev
+Fixes: 1a2473f0cbc0 ("kasan: improve vmalloc tests")
+Signed-off-by: Andrey Konovalov <andreyknvl@gmail.com>
+Reported-by: Samuel Holland <samuel.holland@sifive.com>
+Link: https://lore.kernel.org/all/4a245fff-cc46-44d1-a5f9-fd2f1c3764ae@sifive.com/
+Reported-by: Sabyrzhan Tasbolatov <snovitoll@gmail.com>
+Link: https://lore.kernel.org/all/CACzwLxiWzNqPBp4C1VkaXZ2wDwvY3yZeetCi1TLGFipKW77drA@mail.gmail.com/
+Cc: Alexander Potapenko <glider@google.com>
+Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
+Cc: Dmitry Vyukov <dvyukov@google.com>
+Cc: Marco Elver <elver@google.com>
+Cc: Sabyrzhan Tasbolatov <snovitoll@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/kasan/kasan_test.c | 27 ---------------------------
+ 1 file changed, 27 deletions(-)
+
+diff --git a/mm/kasan/kasan_test.c b/mm/kasan/kasan_test.c
+index cef683a2e0d2e..df9658299a08a 100644
+--- a/mm/kasan/kasan_test.c
++++ b/mm/kasan/kasan_test.c
+@@ -1260,32 +1260,6 @@ static void vm_map_ram_tags(struct kunit *test)
+ free_pages((unsigned long)p_ptr, 1);
+ }
+
+-static void vmalloc_percpu(struct kunit *test)
+-{
+- char __percpu *ptr;
+- int cpu;
+-
+- /*
+- * This test is specifically crafted for the software tag-based mode,
+- * the only tag-based mode that poisons percpu mappings.
+- */
+- KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_SW_TAGS);
+-
+- ptr = __alloc_percpu(PAGE_SIZE, PAGE_SIZE);
+-
+- for_each_possible_cpu(cpu) {
+- char *c_ptr = per_cpu_ptr(ptr, cpu);
+-
+- KUNIT_EXPECT_GE(test, (u8)get_tag(c_ptr), (u8)KASAN_TAG_MIN);
+- KUNIT_EXPECT_LT(test, (u8)get_tag(c_ptr), (u8)KASAN_TAG_KERNEL);
+-
+- /* Make sure that in-bounds accesses don't crash the kernel. */
+- *c_ptr = 0;
+- }
+-
+- free_percpu(ptr);
+-}
+-
+ /*
+ * Check that the assigned pointer tag falls within the [KASAN_TAG_MIN,
+ * KASAN_TAG_KERNEL) range (note: excluding the match-all tag) for tag-based
+@@ -1439,7 +1413,6 @@ static struct kunit_case kasan_kunit_test_cases[] = {
+ KUNIT_CASE(vmalloc_oob),
+ KUNIT_CASE(vmap_tags),
+ KUNIT_CASE(vm_map_ram_tags),
+- KUNIT_CASE(vmalloc_percpu),
+ KUNIT_CASE(match_all_not_assigned),
+ KUNIT_CASE(match_all_ptr_tag),
+ KUNIT_CASE(match_all_mem_tag),
+--
+2.43.0
+
--- /dev/null
+From f2e4472e93a1a0c03ce77a1cb2932502e907e34d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 22 Oct 2024 18:25:14 +0800
+Subject: mctp i2c: handle NULL header address
+
+From: Matt Johnston <matt@codeconstruct.com.au>
+
+[ Upstream commit 01e215975fd80af81b5b79f009d49ddd35976c13 ]
+
+daddr can be NULL if there is no neighbour table entry present,
+in that case the tx packet should be dropped.
+
+saddr will usually be set by MCTP core, but check for NULL in case a
+packet is transmitted by a different protocol.
+
+Fixes: f5b8abf9fc3d ("mctp i2c: MCTP I2C binding driver")
+Cc: stable@vger.kernel.org
+Reported-by: Dung Cao <dung@os.amperecomputing.com>
+Signed-off-by: Matt Johnston <matt@codeconstruct.com.au>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://patch.msgid.link/20241022-mctp-i2c-null-dest-v3-1-e929709956c5@codeconstruct.com.au
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/mctp/mctp-i2c.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/net/mctp/mctp-i2c.c b/drivers/net/mctp/mctp-i2c.c
+index 1d67a3ca1fd11..7635a8b3c35cd 100644
+--- a/drivers/net/mctp/mctp-i2c.c
++++ b/drivers/net/mctp/mctp-i2c.c
+@@ -547,6 +547,9 @@ static int mctp_i2c_header_create(struct sk_buff *skb, struct net_device *dev,
+ if (len > MCTP_I2C_MAXMTU)
+ return -EMSGSIZE;
+
++ if (!daddr || !saddr)
++ return -EINVAL;
++
+ lldst = *((u8 *)daddr);
+ llsrc = *((u8 *)saddr);
+
+--
+2.43.0
+
--- /dev/null
+From 3992d9e7cfbd4eec0eee3c177e5f4e11ba8c4294 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 9 Nov 2022 09:23:48 +0800
+Subject: migrate: convert migrate_pages() to use folios
+
+From: Huang Ying <ying.huang@intel.com>
+
+[ Upstream commit eaec4e639f11413ce75fbf38affd1aa5c40979e9 ]
+
+Quite straightforward, the page functions are converted to corresponding
+folio functions. Same for comments.
+
+THP specific code are converted to be large folio.
+
+Link: https://lkml.kernel.org/r/20221109012348.93849-3-ying.huang@intel.com
+Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
+Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
+Tested-by: Baolin Wang <baolin.wang@linux.alibaba.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: Matthew Wilcox <willy@infradead.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 35e41024c4c2 ("vmscan,migrate: fix page count imbalance on node stats when demoting pages")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/migrate.c | 210 +++++++++++++++++++++++++++------------------------
+ 1 file changed, 112 insertions(+), 98 deletions(-)
+
+diff --git a/mm/migrate.c b/mm/migrate.c
+index 16b456b927c18..562f819dc6189 100644
+--- a/mm/migrate.c
++++ b/mm/migrate.c
+@@ -1385,231 +1385,245 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
+ return rc;
+ }
+
+-static inline int try_split_thp(struct page *page, struct list_head *split_pages)
++static inline int try_split_folio(struct folio *folio, struct list_head *split_folios)
+ {
+ int rc;
+
+- lock_page(page);
+- rc = split_huge_page_to_list(page, split_pages);
+- unlock_page(page);
++ folio_lock(folio);
++ rc = split_folio_to_list(folio, split_folios);
++ folio_unlock(folio);
+ if (!rc)
+- list_move_tail(&page->lru, split_pages);
++ list_move_tail(&folio->lru, split_folios);
+
+ return rc;
+ }
+
+ /*
+- * migrate_pages - migrate the pages specified in a list, to the free pages
++ * migrate_pages - migrate the folios specified in a list, to the free folios
+ * supplied as the target for the page migration
+ *
+- * @from: The list of pages to be migrated.
+- * @get_new_page: The function used to allocate free pages to be used
+- * as the target of the page migration.
+- * @put_new_page: The function used to free target pages if migration
++ * @from: The list of folios to be migrated.
++ * @get_new_page: The function used to allocate free folios to be used
++ * as the target of the folio migration.
++ * @put_new_page: The function used to free target folios if migration
+ * fails, or NULL if no special handling is necessary.
+ * @private: Private data to be passed on to get_new_page()
+ * @mode: The migration mode that specifies the constraints for
+- * page migration, if any.
+- * @reason: The reason for page migration.
+- * @ret_succeeded: Set to the number of normal pages migrated successfully if
++ * folio migration, if any.
++ * @reason: The reason for folio migration.
++ * @ret_succeeded: Set to the number of folios migrated successfully if
+ * the caller passes a non-NULL pointer.
+ *
+- * The function returns after 10 attempts or if no pages are movable any more
+- * because the list has become empty or no retryable pages exist any more.
+- * It is caller's responsibility to call putback_movable_pages() to return pages
++ * The function returns after 10 attempts or if no folios are movable any more
++ * because the list has become empty or no retryable folios exist any more.
++ * It is caller's responsibility to call putback_movable_pages() to return folios
+ * to the LRU or free list only if ret != 0.
+ *
+- * Returns the number of {normal page, THP, hugetlb} that were not migrated, or
+- * an error code. The number of THP splits will be considered as the number of
+- * non-migrated THP, no matter how many subpages of the THP are migrated successfully.
++ * Returns the number of {normal folio, large folio, hugetlb} that were not
++ * migrated, or an error code. The number of large folio splits will be
++ * considered as the number of non-migrated large folio, no matter how many
++ * split folios of the large folio are migrated successfully.
+ */
+ int migrate_pages(struct list_head *from, new_page_t get_new_page,
+ free_page_t put_new_page, unsigned long private,
+ enum migrate_mode mode, int reason, unsigned int *ret_succeeded)
+ {
+ int retry = 1;
++ int large_retry = 1;
+ int thp_retry = 1;
+ int nr_failed = 0;
+ int nr_failed_pages = 0;
+ int nr_retry_pages = 0;
+ int nr_succeeded = 0;
+ int nr_thp_succeeded = 0;
++ int nr_large_failed = 0;
+ int nr_thp_failed = 0;
+ int nr_thp_split = 0;
+ int pass = 0;
++ bool is_large = false;
+ bool is_thp = false;
+- struct page *page;
+- struct page *page2;
+- int rc, nr_subpages;
+- LIST_HEAD(ret_pages);
+- LIST_HEAD(thp_split_pages);
++ struct folio *folio, *folio2;
++ int rc, nr_pages;
++ LIST_HEAD(ret_folios);
++ LIST_HEAD(split_folios);
+ bool nosplit = (reason == MR_NUMA_MISPLACED);
+- bool no_subpage_counting = false;
++ bool no_split_folio_counting = false;
+
+ trace_mm_migrate_pages_start(mode, reason);
+
+-thp_subpage_migration:
+- for (pass = 0; pass < 10 && (retry || thp_retry); pass++) {
++split_folio_migration:
++ for (pass = 0; pass < 10 && (retry || large_retry); pass++) {
+ retry = 0;
++ large_retry = 0;
+ thp_retry = 0;
+ nr_retry_pages = 0;
+
+- list_for_each_entry_safe(page, page2, from, lru) {
++ list_for_each_entry_safe(folio, folio2, from, lru) {
+ /*
+- * THP statistics is based on the source huge page.
+- * Capture required information that might get lost
+- * during migration.
++ * Large folio statistics is based on the source large
++ * folio. Capture required information that might get
++ * lost during migration.
+ */
+- is_thp = PageTransHuge(page) && !PageHuge(page);
+- nr_subpages = compound_nr(page);
++ is_large = folio_test_large(folio) && !folio_test_hugetlb(folio);
++ is_thp = is_large && folio_test_pmd_mappable(folio);
++ nr_pages = folio_nr_pages(folio);
+ cond_resched();
+
+- if (PageHuge(page))
++ if (folio_test_hugetlb(folio))
+ rc = unmap_and_move_huge_page(get_new_page,
+- put_new_page, private, page,
+- pass > 2, mode, reason,
+- &ret_pages);
++ put_new_page, private,
++ &folio->page, pass > 2, mode,
++ reason,
++ &ret_folios);
+ else
+ rc = unmap_and_move(get_new_page, put_new_page,
+- private, page_folio(page), pass > 2, mode,
+- reason, &ret_pages);
++ private, folio, pass > 2, mode,
++ reason, &ret_folios);
+ /*
+ * The rules are:
+- * Success: non hugetlb page will be freed, hugetlb
+- * page will be put back
++ * Success: non hugetlb folio will be freed, hugetlb
++ * folio will be put back
+ * -EAGAIN: stay on the from list
+ * -ENOMEM: stay on the from list
+ * -ENOSYS: stay on the from list
+- * Other errno: put on ret_pages list then splice to
++ * Other errno: put on ret_folios list then splice to
+ * from list
+ */
+ switch(rc) {
+ /*
+- * THP migration might be unsupported or the
+- * allocation could've failed so we should
+- * retry on the same page with the THP split
+- * to base pages.
++ * Large folio migration might be unsupported or
++ * the allocation could've failed so we should retry
++ * on the same folio with the large folio split
++ * to normal folios.
+ *
+- * Sub-pages are put in thp_split_pages, and
++ * Split folios are put in split_folios, and
+ * we will migrate them after the rest of the
+ * list is processed.
+ */
+ case -ENOSYS:
+- /* THP migration is unsupported */
+- if (is_thp) {
+- nr_thp_failed++;
+- if (!try_split_thp(page, &thp_split_pages)) {
+- nr_thp_split++;
++ /* Large folio migration is unsupported */
++ if (is_large) {
++ nr_large_failed++;
++ nr_thp_failed += is_thp;
++ if (!try_split_folio(folio, &split_folios)) {
++ nr_thp_split += is_thp;
+ break;
+ }
+ /* Hugetlb migration is unsupported */
+- } else if (!no_subpage_counting) {
++ } else if (!no_split_folio_counting) {
+ nr_failed++;
+ }
+
+- nr_failed_pages += nr_subpages;
+- list_move_tail(&page->lru, &ret_pages);
++ nr_failed_pages += nr_pages;
++ list_move_tail(&folio->lru, &ret_folios);
+ break;
+ case -ENOMEM:
+ /*
+ * When memory is low, don't bother to try to migrate
+- * other pages, just exit.
++ * other folios, just exit.
+ */
+- if (is_thp) {
+- nr_thp_failed++;
+- /* THP NUMA faulting doesn't split THP to retry. */
++ if (is_large) {
++ nr_large_failed++;
++ nr_thp_failed += is_thp;
++ /* Large folio NUMA faulting doesn't split to retry. */
+ if (!nosplit) {
+- int ret = try_split_thp(page, &thp_split_pages);
++ int ret = try_split_folio(folio, &split_folios);
+
+ if (!ret) {
+- nr_thp_split++;
++ nr_thp_split += is_thp;
+ break;
+ } else if (reason == MR_LONGTERM_PIN &&
+ ret == -EAGAIN) {
+ /*
+- * Try again to split THP to mitigate
+- * the failure of longterm pinning.
++ * Try again to split large folio to
++ * mitigate the failure of longterm pinning.
+ */
+- thp_retry++;
+- nr_retry_pages += nr_subpages;
++ large_retry++;
++ thp_retry += is_thp;
++ nr_retry_pages += nr_pages;
+ break;
+ }
+ }
+- } else if (!no_subpage_counting) {
++ } else if (!no_split_folio_counting) {
+ nr_failed++;
+ }
+
+- nr_failed_pages += nr_subpages + nr_retry_pages;
++ nr_failed_pages += nr_pages + nr_retry_pages;
+ /*
+- * There might be some subpages of fail-to-migrate THPs
+- * left in thp_split_pages list. Move them back to migration
++ * There might be some split folios of fail-to-migrate large
++ * folios left in split_folios list. Move them back to migration
+ * list so that they could be put back to the right list by
+- * the caller otherwise the page refcnt will be leaked.
++ * the caller otherwise the folio refcnt will be leaked.
+ */
+- list_splice_init(&thp_split_pages, from);
++ list_splice_init(&split_folios, from);
+ /* nr_failed isn't updated for not used */
++ nr_large_failed += large_retry;
+ nr_thp_failed += thp_retry;
+ goto out;
+ case -EAGAIN:
+- if (is_thp)
+- thp_retry++;
+- else if (!no_subpage_counting)
++ if (is_large) {
++ large_retry++;
++ thp_retry += is_thp;
++ } else if (!no_split_folio_counting) {
+ retry++;
+- nr_retry_pages += nr_subpages;
++ }
++ nr_retry_pages += nr_pages;
+ break;
+ case MIGRATEPAGE_SUCCESS:
+- nr_succeeded += nr_subpages;
+- if (is_thp)
+- nr_thp_succeeded++;
++ nr_succeeded += nr_pages;
++ nr_thp_succeeded += is_thp;
+ break;
+ default:
+ /*
+ * Permanent failure (-EBUSY, etc.):
+- * unlike -EAGAIN case, the failed page is
+- * removed from migration page list and not
++ * unlike -EAGAIN case, the failed folio is
++ * removed from migration folio list and not
+ * retried in the next outer loop.
+ */
+- if (is_thp)
+- nr_thp_failed++;
+- else if (!no_subpage_counting)
++ if (is_large) {
++ nr_large_failed++;
++ nr_thp_failed += is_thp;
++ } else if (!no_split_folio_counting) {
+ nr_failed++;
++ }
+
+- nr_failed_pages += nr_subpages;
++ nr_failed_pages += nr_pages;
+ break;
+ }
+ }
+ }
+ nr_failed += retry;
++ nr_large_failed += large_retry;
+ nr_thp_failed += thp_retry;
+ nr_failed_pages += nr_retry_pages;
+ /*
+- * Try to migrate subpages of fail-to-migrate THPs, no nr_failed
+- * counting in this round, since all subpages of a THP is counted
+- * as 1 failure in the first round.
++ * Try to migrate split folios of fail-to-migrate large folios, no
++ * nr_failed counting in this round, since all split folios of a
++ * large folio is counted as 1 failure in the first round.
+ */
+- if (!list_empty(&thp_split_pages)) {
++ if (!list_empty(&split_folios)) {
+ /*
+- * Move non-migrated pages (after 10 retries) to ret_pages
++ * Move non-migrated folios (after 10 retries) to ret_folios
+ * to avoid migrating them again.
+ */
+- list_splice_init(from, &ret_pages);
+- list_splice_init(&thp_split_pages, from);
+- no_subpage_counting = true;
++ list_splice_init(from, &ret_folios);
++ list_splice_init(&split_folios, from);
++ no_split_folio_counting = true;
+ retry = 1;
+- goto thp_subpage_migration;
++ goto split_folio_migration;
+ }
+
+- rc = nr_failed + nr_thp_failed;
++ rc = nr_failed + nr_large_failed;
+ out:
+ /*
+- * Put the permanent failure page back to migration list, they
++ * Put the permanent failure folio back to migration list, they
+ * will be put back to the right list by the caller.
+ */
+- list_splice(&ret_pages, from);
++ list_splice(&ret_folios, from);
+
+ /*
+- * Return 0 in case all subpages of fail-to-migrate THPs are
+- * migrated successfully.
++ * Return 0 in case all split folios of fail-to-migrate large folios
++ * are migrated successfully.
+ */
+ if (list_empty(from))
+ rc = 0;
+--
+2.43.0
+
--- /dev/null
+From 989ae777a6d2f1f50e59c2afe0e78e6d29cc0dde Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 9 Nov 2022 09:23:47 +0800
+Subject: migrate: convert unmap_and_move() to use folios
+
+From: Huang Ying <ying.huang@intel.com>
+
+[ Upstream commit 49f51859221a3dfee27488eaeaff800459cac6a9 ]
+
+Patch series "migrate: convert migrate_pages()/unmap_and_move() to use
+folios", v2.
+
+The conversion is quite straightforward, just replace the page API to the
+corresponding folio API. migrate_pages() and unmap_and_move() mostly work
+with folios (head pages) only.
+
+This patch (of 2):
+
+Quite straightforward, the page functions are converted to corresponding
+folio functions. Same for comments.
+
+Link: https://lkml.kernel.org/r/20221109012348.93849-1-ying.huang@intel.com
+Link: https://lkml.kernel.org/r/20221109012348.93849-2-ying.huang@intel.com
+Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
+Reviewed-by: Yang Shi <shy828301@gmail.com>
+Reviewed-by: Zi Yan <ziy@nvidia.com>
+Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
+Cc: Oscar Salvador <osalvador@suse.de>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 35e41024c4c2 ("vmscan,migrate: fix page count imbalance on node stats when demoting pages")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/migrate.c | 54 ++++++++++++++++++++++++++--------------------------
+ 1 file changed, 27 insertions(+), 27 deletions(-)
+
+diff --git a/mm/migrate.c b/mm/migrate.c
+index b0caa89e67d5f..16b456b927c18 100644
+--- a/mm/migrate.c
++++ b/mm/migrate.c
+@@ -1162,79 +1162,79 @@ static int __unmap_and_move(struct folio *src, struct folio *dst,
+ }
+
+ /*
+- * Obtain the lock on page, remove all ptes and migrate the page
+- * to the newly allocated page in newpage.
++ * Obtain the lock on folio, remove all ptes and migrate the folio
++ * to the newly allocated folio in dst.
+ */
+ static int unmap_and_move(new_page_t get_new_page,
+ free_page_t put_new_page,
+- unsigned long private, struct page *page,
++ unsigned long private, struct folio *src,
+ int force, enum migrate_mode mode,
+ enum migrate_reason reason,
+ struct list_head *ret)
+ {
+- struct folio *dst, *src = page_folio(page);
++ struct folio *dst;
+ int rc = MIGRATEPAGE_SUCCESS;
+ struct page *newpage = NULL;
+
+- if (!thp_migration_supported() && PageTransHuge(page))
++ if (!thp_migration_supported() && folio_test_transhuge(src))
+ return -ENOSYS;
+
+- if (page_count(page) == 1) {
+- /* Page was freed from under us. So we are done. */
+- ClearPageActive(page);
+- ClearPageUnevictable(page);
++ if (folio_ref_count(src) == 1) {
++ /* Folio was freed from under us. So we are done. */
++ folio_clear_active(src);
++ folio_clear_unevictable(src);
+ /* free_pages_prepare() will clear PG_isolated. */
+ goto out;
+ }
+
+- newpage = get_new_page(page, private);
++ newpage = get_new_page(&src->page, private);
+ if (!newpage)
+ return -ENOMEM;
+ dst = page_folio(newpage);
+
+- newpage->private = 0;
++ dst->private = 0;
+ rc = __unmap_and_move(src, dst, force, mode);
+ if (rc == MIGRATEPAGE_SUCCESS)
+- set_page_owner_migrate_reason(newpage, reason);
++ set_page_owner_migrate_reason(&dst->page, reason);
+
+ out:
+ if (rc != -EAGAIN) {
+ /*
+- * A page that has been migrated has all references
+- * removed and will be freed. A page that has not been
++ * A folio that has been migrated has all references
++ * removed and will be freed. A folio that has not been
+ * migrated will have kept its references and be restored.
+ */
+- list_del(&page->lru);
++ list_del(&src->lru);
+ }
+
+ /*
+ * If migration is successful, releases reference grabbed during
+- * isolation. Otherwise, restore the page to right list unless
++ * isolation. Otherwise, restore the folio to right list unless
+ * we want to retry.
+ */
+ if (rc == MIGRATEPAGE_SUCCESS) {
+ /*
+- * Compaction can migrate also non-LRU pages which are
++ * Compaction can migrate also non-LRU folios which are
+ * not accounted to NR_ISOLATED_*. They can be recognized
+- * as __PageMovable
++ * as __folio_test_movable
+ */
+- if (likely(!__PageMovable(page)))
+- mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON +
+- page_is_file_lru(page), -thp_nr_pages(page));
++ if (likely(!__folio_test_movable(src)))
++ mod_node_page_state(folio_pgdat(src), NR_ISOLATED_ANON +
++ folio_is_file_lru(src), -folio_nr_pages(src));
+
+ if (reason != MR_MEMORY_FAILURE)
+ /*
+- * We release the page in page_handle_poison.
++ * We release the folio in page_handle_poison.
+ */
+- put_page(page);
++ folio_put(src);
+ } else {
+ if (rc != -EAGAIN)
+- list_add_tail(&page->lru, ret);
++ list_add_tail(&src->lru, ret);
+
+ if (put_new_page)
+- put_new_page(newpage, private);
++ put_new_page(&dst->page, private);
+ else
+- put_page(newpage);
++ folio_put(dst);
+ }
+
+ return rc;
+@@ -1471,7 +1471,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
+ &ret_pages);
+ else
+ rc = unmap_and_move(get_new_page, put_new_page,
+- private, page, pass > 2, mode,
++ private, page_folio(page), pass > 2, mode,
+ reason, &ret_pages);
+ /*
+ * The rules are:
+--
+2.43.0
+
--- /dev/null
+From 41a3f5ffed4ddea2c459d69b3b751704faa84a6f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 Feb 2023 20:34:36 +0800
+Subject: migrate_pages: organize stats with struct migrate_pages_stats
+
+From: Huang Ying <ying.huang@intel.com>
+
+[ Upstream commit 5b855937096aea7f81e73ad6d40d433c9dd49577 ]
+
+Patch series "migrate_pages(): batch TLB flushing", v5.
+
+Now, migrate_pages() migrates folios one by one, like the fake code as
+follows,
+
+ for each folio
+ unmap
+ flush TLB
+ copy
+ restore map
+
+If multiple folios are passed to migrate_pages(), there are opportunities
+to batch the TLB flushing and copying. That is, we can change the code to
+something as follows,
+
+ for each folio
+ unmap
+ for each folio
+ flush TLB
+ for each folio
+ copy
+ for each folio
+ restore map
+
+The total number of TLB flushing IPI can be reduced considerably. And we
+may use some hardware accelerator such as DSA to accelerate the folio
+copying.
+
+So in this patch, we refactor the migrate_pages() implementation and
+implement the TLB flushing batching. Base on this, hardware accelerated
+folio copying can be implemented.
+
+If too many folios are passed to migrate_pages(), in the naive batched
+implementation, we may unmap too many folios at the same time. The
+possibility for a task to wait for the migrated folios to be mapped again
+increases. So the latency may be hurt. To deal with this issue, the max
+number of folios be unmapped in batch is restricted to no more than
+HPAGE_PMD_NR in the unit of page. That is, the influence is at the same
+level of THP migration.
+
+We use the following test to measure the performance impact of the
+patchset,
+
+On a 2-socket Intel server,
+
+ - Run pmbench memory accessing benchmark
+
+ - Run `migratepages` to migrate pages of pmbench between node 0 and
+ node 1 back and forth.
+
+With the patch, the TLB flushing IPI reduces 99.1% during the test and
+the number of pages migrated successfully per second increases 291.7%.
+
+Xin Hao helped to test the patchset on an ARM64 server with 128 cores,
+2 NUMA nodes. Test results show that the page migration performance
+increases up to 78%.
+
+This patch (of 9):
+
+Define struct migrate_pages_stats to organize the various statistics in
+migrate_pages(). This makes it easier to collect and consume the
+statistics in multiple functions. This will be needed in the following
+patches in the series.
+
+Link: https://lkml.kernel.org/r/20230213123444.155149-1-ying.huang@intel.com
+Link: https://lkml.kernel.org/r/20230213123444.155149-2-ying.huang@intel.com
+Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
+Reviewed-by: Alistair Popple <apopple@nvidia.com>
+Reviewed-by: Zi Yan <ziy@nvidia.com>
+Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
+Reviewed-by: Xin Hao <xhao@linux.alibaba.com>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Bharata B Rao <bharata@amd.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Mike Kravetz <mike.kravetz@oracle.com>
+Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 35e41024c4c2 ("vmscan,migrate: fix page count imbalance on node stats when demoting pages")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/migrate.c | 60 +++++++++++++++++++++++++++++-----------------------
+ 1 file changed, 34 insertions(+), 26 deletions(-)
+
+diff --git a/mm/migrate.c b/mm/migrate.c
+index 81444abf54dba..b7596a0b4445f 100644
+--- a/mm/migrate.c
++++ b/mm/migrate.c
+@@ -1398,6 +1398,16 @@ static inline int try_split_folio(struct folio *folio, struct list_head *split_f
+ return rc;
+ }
+
++struct migrate_pages_stats {
++ int nr_succeeded; /* Normal and large folios migrated successfully, in
++ units of base pages */
++ int nr_failed_pages; /* Normal and large folios failed to be migrated, in
++ units of base pages. Untried folios aren't counted */
++ int nr_thp_succeeded; /* THP migrated successfully */
++ int nr_thp_failed; /* THP failed to be migrated */
++ int nr_thp_split; /* THP split before migrating */
++};
++
+ /*
+ * migrate_pages - migrate the folios specified in a list, to the free folios
+ * supplied as the target for the page migration
+@@ -1432,13 +1442,8 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
+ int large_retry = 1;
+ int thp_retry = 1;
+ int nr_failed = 0;
+- int nr_failed_pages = 0;
+ int nr_retry_pages = 0;
+- int nr_succeeded = 0;
+- int nr_thp_succeeded = 0;
+ int nr_large_failed = 0;
+- int nr_thp_failed = 0;
+- int nr_thp_split = 0;
+ int pass = 0;
+ bool is_large = false;
+ bool is_thp = false;
+@@ -1448,9 +1453,11 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
+ LIST_HEAD(split_folios);
+ bool nosplit = (reason == MR_NUMA_MISPLACED);
+ bool no_split_folio_counting = false;
++ struct migrate_pages_stats stats;
+
+ trace_mm_migrate_pages_start(mode, reason);
+
++ memset(&stats, 0, sizeof(stats));
+ split_folio_migration:
+ for (pass = 0; pass < 10 && (retry || large_retry); pass++) {
+ retry = 0;
+@@ -1504,9 +1511,9 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
+ /* Large folio migration is unsupported */
+ if (is_large) {
+ nr_large_failed++;
+- nr_thp_failed += is_thp;
++ stats.nr_thp_failed += is_thp;
+ if (!try_split_folio(folio, &split_folios)) {
+- nr_thp_split += is_thp;
++ stats.nr_thp_split += is_thp;
+ break;
+ }
+ /* Hugetlb migration is unsupported */
+@@ -1514,7 +1521,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
+ nr_failed++;
+ }
+
+- nr_failed_pages += nr_pages;
++ stats.nr_failed_pages += nr_pages;
+ list_move_tail(&folio->lru, &ret_folios);
+ break;
+ case -ENOMEM:
+@@ -1524,13 +1531,13 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
+ */
+ if (is_large) {
+ nr_large_failed++;
+- nr_thp_failed += is_thp;
++ stats.nr_thp_failed += is_thp;
+ /* Large folio NUMA faulting doesn't split to retry. */
+ if (!nosplit) {
+ int ret = try_split_folio(folio, &split_folios);
+
+ if (!ret) {
+- nr_thp_split += is_thp;
++ stats.nr_thp_split += is_thp;
+ break;
+ } else if (reason == MR_LONGTERM_PIN &&
+ ret == -EAGAIN) {
+@@ -1548,7 +1555,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
+ nr_failed++;
+ }
+
+- nr_failed_pages += nr_pages + nr_retry_pages;
++ stats.nr_failed_pages += nr_pages + nr_retry_pages;
+ /*
+ * There might be some split folios of fail-to-migrate large
+ * folios left in split_folios list. Move them back to migration
+@@ -1558,7 +1565,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
+ list_splice_init(&split_folios, from);
+ /* nr_failed isn't updated for not used */
+ nr_large_failed += large_retry;
+- nr_thp_failed += thp_retry;
++ stats.nr_thp_failed += thp_retry;
+ goto out;
+ case -EAGAIN:
+ if (is_large) {
+@@ -1570,8 +1577,8 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
+ nr_retry_pages += nr_pages;
+ break;
+ case MIGRATEPAGE_SUCCESS:
+- nr_succeeded += nr_pages;
+- nr_thp_succeeded += is_thp;
++ stats.nr_succeeded += nr_pages;
++ stats.nr_thp_succeeded += is_thp;
+ break;
+ default:
+ /*
+@@ -1582,20 +1589,20 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
+ */
+ if (is_large) {
+ nr_large_failed++;
+- nr_thp_failed += is_thp;
++ stats.nr_thp_failed += is_thp;
+ } else if (!no_split_folio_counting) {
+ nr_failed++;
+ }
+
+- nr_failed_pages += nr_pages;
++ stats.nr_failed_pages += nr_pages;
+ break;
+ }
+ }
+ }
+ nr_failed += retry;
+ nr_large_failed += large_retry;
+- nr_thp_failed += thp_retry;
+- nr_failed_pages += nr_retry_pages;
++ stats.nr_thp_failed += thp_retry;
++ stats.nr_failed_pages += nr_retry_pages;
+ /*
+ * Try to migrate split folios of fail-to-migrate large folios, no
+ * nr_failed counting in this round, since all split folios of a
+@@ -1628,16 +1635,17 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
+ if (list_empty(from))
+ rc = 0;
+
+- count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded);
+- count_vm_events(PGMIGRATE_FAIL, nr_failed_pages);
+- count_vm_events(THP_MIGRATION_SUCCESS, nr_thp_succeeded);
+- count_vm_events(THP_MIGRATION_FAIL, nr_thp_failed);
+- count_vm_events(THP_MIGRATION_SPLIT, nr_thp_split);
+- trace_mm_migrate_pages(nr_succeeded, nr_failed_pages, nr_thp_succeeded,
+- nr_thp_failed, nr_thp_split, mode, reason);
++ count_vm_events(PGMIGRATE_SUCCESS, stats.nr_succeeded);
++ count_vm_events(PGMIGRATE_FAIL, stats.nr_failed_pages);
++ count_vm_events(THP_MIGRATION_SUCCESS, stats.nr_thp_succeeded);
++ count_vm_events(THP_MIGRATION_FAIL, stats.nr_thp_failed);
++ count_vm_events(THP_MIGRATION_SPLIT, stats.nr_thp_split);
++ trace_mm_migrate_pages(stats.nr_succeeded, stats.nr_failed_pages,
++ stats.nr_thp_succeeded, stats.nr_thp_failed,
++ stats.nr_thp_split, mode, reason);
+
+ if (ret_succeeded)
+- *ret_succeeded = nr_succeeded;
++ *ret_succeeded = stats.nr_succeeded;
+
+ return rc;
+ }
+--
+2.43.0
+
--- /dev/null
+From f440d486b0dc2fe6f1bca63448860dc0b8809928 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 Feb 2023 20:34:38 +0800
+Subject: migrate_pages: restrict number of pages to migrate in batch
+
+From: Huang Ying <ying.huang@intel.com>
+
+[ Upstream commit 42012e0436d44aeb2e68f11a28ddd0ad3f38b61f ]
+
+This is a preparation patch to batch the folio unmapping and moving for
+non-hugetlb folios.
+
+If we had batched the folio unmapping, all folios to be migrated would be
+unmapped before copying the contents and flags of the folios. If the
+folios that were passed to migrate_pages() were too many in unit of pages,
+the execution of the processes would be stopped for too long time, thus
+too long latency. For example, migrate_pages() syscall will call
+migrate_pages() with all folios of a process. To avoid this possible
+issue, in this patch, we restrict the number of pages to be migrated to be
+no more than HPAGE_PMD_NR. That is, the influence is at the same level of
+THP migration.
+
+Link: https://lkml.kernel.org/r/20230213123444.155149-4-ying.huang@intel.com
+Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
+Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Bharata B Rao <bharata@amd.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Xin Hao <xhao@linux.alibaba.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Mike Kravetz <mike.kravetz@oracle.com>
+Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 35e41024c4c2 ("vmscan,migrate: fix page count imbalance on node stats when demoting pages")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/migrate.c | 174 +++++++++++++++++++++++++++++++--------------------
+ 1 file changed, 106 insertions(+), 68 deletions(-)
+
+diff --git a/mm/migrate.c b/mm/migrate.c
+index 70d0b20d06a5f..40ae91e1a026b 100644
+--- a/mm/migrate.c
++++ b/mm/migrate.c
+@@ -1398,6 +1398,11 @@ static inline int try_split_folio(struct folio *folio, struct list_head *split_f
+ return rc;
+ }
+
++#ifdef CONFIG_TRANSPARENT_HUGEPAGE
++#define NR_MAX_BATCHED_MIGRATION HPAGE_PMD_NR
++#else
++#define NR_MAX_BATCHED_MIGRATION 512
++#endif
+ #define NR_MAX_MIGRATE_PAGES_RETRY 10
+
+ struct migrate_pages_stats {
+@@ -1499,40 +1504,15 @@ static int migrate_hugetlbs(struct list_head *from, new_page_t get_new_page,
+ return nr_failed;
+ }
+
+-/*
+- * migrate_pages - migrate the folios specified in a list, to the free folios
+- * supplied as the target for the page migration
+- *
+- * @from: The list of folios to be migrated.
+- * @get_new_page: The function used to allocate free folios to be used
+- * as the target of the folio migration.
+- * @put_new_page: The function used to free target folios if migration
+- * fails, or NULL if no special handling is necessary.
+- * @private: Private data to be passed on to get_new_page()
+- * @mode: The migration mode that specifies the constraints for
+- * folio migration, if any.
+- * @reason: The reason for folio migration.
+- * @ret_succeeded: Set to the number of folios migrated successfully if
+- * the caller passes a non-NULL pointer.
+- *
+- * The function returns after NR_MAX_MIGRATE_PAGES_RETRY attempts or if no folios
+- * are movable any more because the list has become empty or no retryable folios
+- * exist any more. It is caller's responsibility to call putback_movable_pages()
+- * only if ret != 0.
+- *
+- * Returns the number of {normal folio, large folio, hugetlb} that were not
+- * migrated, or an error code. The number of large folio splits will be
+- * considered as the number of non-migrated large folio, no matter how many
+- * split folios of the large folio are migrated successfully.
+- */
+-int migrate_pages(struct list_head *from, new_page_t get_new_page,
++static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
+ free_page_t put_new_page, unsigned long private,
+- enum migrate_mode mode, int reason, unsigned int *ret_succeeded)
++ enum migrate_mode mode, int reason, struct list_head *ret_folios,
++ struct migrate_pages_stats *stats)
+ {
+ int retry = 1;
+ int large_retry = 1;
+ int thp_retry = 1;
+- int nr_failed;
++ int nr_failed = 0;
+ int nr_retry_pages = 0;
+ int nr_large_failed = 0;
+ int pass = 0;
+@@ -1540,20 +1520,9 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
+ bool is_thp = false;
+ struct folio *folio, *folio2;
+ int rc, nr_pages;
+- LIST_HEAD(ret_folios);
+ LIST_HEAD(split_folios);
+ bool nosplit = (reason == MR_NUMA_MISPLACED);
+ bool no_split_folio_counting = false;
+- struct migrate_pages_stats stats;
+-
+- trace_mm_migrate_pages_start(mode, reason);
+-
+- memset(&stats, 0, sizeof(stats));
+- rc = migrate_hugetlbs(from, get_new_page, put_new_page, private, mode, reason,
+- &stats, &ret_folios);
+- if (rc < 0)
+- goto out;
+- nr_failed = rc;
+
+ split_folio_migration:
+ for (pass = 0;
+@@ -1565,12 +1534,6 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
+ nr_retry_pages = 0;
+
+ list_for_each_entry_safe(folio, folio2, from, lru) {
+- /* Retried hugetlb folios will be kept in list */
+- if (folio_test_hugetlb(folio)) {
+- list_move_tail(&folio->lru, &ret_folios);
+- continue;
+- }
+-
+ /*
+ * Large folio statistics is based on the source large
+ * folio. Capture required information that might get
+@@ -1584,15 +1547,14 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
+
+ rc = unmap_and_move(get_new_page, put_new_page,
+ private, folio, pass > 2, mode,
+- reason, &ret_folios);
++ reason, ret_folios);
+ /*
+ * The rules are:
+ * Success: folio will be freed
+ * -EAGAIN: stay on the from list
+ * -ENOMEM: stay on the from list
+ * -ENOSYS: stay on the from list
+- * Other errno: put on ret_folios list then splice to
+- * from list
++ * Other errno: put on ret_folios list
+ */
+ switch(rc) {
+ /*
+@@ -1609,17 +1571,17 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
+ /* Large folio migration is unsupported */
+ if (is_large) {
+ nr_large_failed++;
+- stats.nr_thp_failed += is_thp;
++ stats->nr_thp_failed += is_thp;
+ if (!try_split_folio(folio, &split_folios)) {
+- stats.nr_thp_split += is_thp;
++ stats->nr_thp_split += is_thp;
+ break;
+ }
+ } else if (!no_split_folio_counting) {
+ nr_failed++;
+ }
+
+- stats.nr_failed_pages += nr_pages;
+- list_move_tail(&folio->lru, &ret_folios);
++ stats->nr_failed_pages += nr_pages;
++ list_move_tail(&folio->lru, ret_folios);
+ break;
+ case -ENOMEM:
+ /*
+@@ -1628,13 +1590,13 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
+ */
+ if (is_large) {
+ nr_large_failed++;
+- stats.nr_thp_failed += is_thp;
++ stats->nr_thp_failed += is_thp;
+ /* Large folio NUMA faulting doesn't split to retry. */
+ if (!nosplit) {
+ int ret = try_split_folio(folio, &split_folios);
+
+ if (!ret) {
+- stats.nr_thp_split += is_thp;
++ stats->nr_thp_split += is_thp;
+ break;
+ } else if (reason == MR_LONGTERM_PIN &&
+ ret == -EAGAIN) {
+@@ -1652,17 +1614,17 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
+ nr_failed++;
+ }
+
+- stats.nr_failed_pages += nr_pages + nr_retry_pages;
++ stats->nr_failed_pages += nr_pages + nr_retry_pages;
+ /*
+ * There might be some split folios of fail-to-migrate large
+- * folios left in split_folios list. Move them back to migration
++ * folios left in split_folios list. Move them to ret_folios
+ * list so that they could be put back to the right list by
+ * the caller otherwise the folio refcnt will be leaked.
+ */
+- list_splice_init(&split_folios, from);
++ list_splice_init(&split_folios, ret_folios);
+ /* nr_failed isn't updated for not used */
+ nr_large_failed += large_retry;
+- stats.nr_thp_failed += thp_retry;
++ stats->nr_thp_failed += thp_retry;
+ goto out;
+ case -EAGAIN:
+ if (is_large) {
+@@ -1674,8 +1636,8 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
+ nr_retry_pages += nr_pages;
+ break;
+ case MIGRATEPAGE_SUCCESS:
+- stats.nr_succeeded += nr_pages;
+- stats.nr_thp_succeeded += is_thp;
++ stats->nr_succeeded += nr_pages;
++ stats->nr_thp_succeeded += is_thp;
+ break;
+ default:
+ /*
+@@ -1686,20 +1648,20 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
+ */
+ if (is_large) {
+ nr_large_failed++;
+- stats.nr_thp_failed += is_thp;
++ stats->nr_thp_failed += is_thp;
+ } else if (!no_split_folio_counting) {
+ nr_failed++;
+ }
+
+- stats.nr_failed_pages += nr_pages;
++ stats->nr_failed_pages += nr_pages;
+ break;
+ }
+ }
+ }
+ nr_failed += retry;
+ nr_large_failed += large_retry;
+- stats.nr_thp_failed += thp_retry;
+- stats.nr_failed_pages += nr_retry_pages;
++ stats->nr_thp_failed += thp_retry;
++ stats->nr_failed_pages += nr_retry_pages;
+ /*
+ * Try to migrate split folios of fail-to-migrate large folios, no
+ * nr_failed counting in this round, since all split folios of a
+@@ -1710,7 +1672,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
+ * Move non-migrated folios (after NR_MAX_MIGRATE_PAGES_RETRY
+ * retries) to ret_folios to avoid migrating them again.
+ */
+- list_splice_init(from, &ret_folios);
++ list_splice_init(from, ret_folios);
+ list_splice_init(&split_folios, from);
+ no_split_folio_counting = true;
+ retry = 1;
+@@ -1718,6 +1680,82 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
+ }
+
+ rc = nr_failed + nr_large_failed;
++out:
++ return rc;
++}
++
++/*
++ * migrate_pages - migrate the folios specified in a list, to the free folios
++ * supplied as the target for the page migration
++ *
++ * @from: The list of folios to be migrated.
++ * @get_new_page: The function used to allocate free folios to be used
++ * as the target of the folio migration.
++ * @put_new_page: The function used to free target folios if migration
++ * fails, or NULL if no special handling is necessary.
++ * @private: Private data to be passed on to get_new_page()
++ * @mode: The migration mode that specifies the constraints for
++ * folio migration, if any.
++ * @reason: The reason for folio migration.
++ * @ret_succeeded: Set to the number of folios migrated successfully if
++ * the caller passes a non-NULL pointer.
++ *
++ * The function returns after NR_MAX_MIGRATE_PAGES_RETRY attempts or if no folios
++ * are movable any more because the list has become empty or no retryable folios
++ * exist any more. It is caller's responsibility to call putback_movable_pages()
++ * only if ret != 0.
++ *
++ * Returns the number of {normal folio, large folio, hugetlb} that were not
++ * migrated, or an error code. The number of large folio splits will be
++ * considered as the number of non-migrated large folio, no matter how many
++ * split folios of the large folio are migrated successfully.
++ */
++int migrate_pages(struct list_head *from, new_page_t get_new_page,
++ free_page_t put_new_page, unsigned long private,
++ enum migrate_mode mode, int reason, unsigned int *ret_succeeded)
++{
++ int rc, rc_gather;
++ int nr_pages;
++ struct folio *folio, *folio2;
++ LIST_HEAD(folios);
++ LIST_HEAD(ret_folios);
++ struct migrate_pages_stats stats;
++
++ trace_mm_migrate_pages_start(mode, reason);
++
++ memset(&stats, 0, sizeof(stats));
++
++ rc_gather = migrate_hugetlbs(from, get_new_page, put_new_page, private,
++ mode, reason, &stats, &ret_folios);
++ if (rc_gather < 0)
++ goto out;
++again:
++ nr_pages = 0;
++ list_for_each_entry_safe(folio, folio2, from, lru) {
++ /* Retried hugetlb folios will be kept in list */
++ if (folio_test_hugetlb(folio)) {
++ list_move_tail(&folio->lru, &ret_folios);
++ continue;
++ }
++
++ nr_pages += folio_nr_pages(folio);
++ if (nr_pages > NR_MAX_BATCHED_MIGRATION)
++ break;
++ }
++ if (nr_pages > NR_MAX_BATCHED_MIGRATION)
++ list_cut_before(&folios, from, &folio->lru);
++ else
++ list_splice_init(from, &folios);
++ rc = migrate_pages_batch(&folios, get_new_page, put_new_page, private,
++ mode, reason, &ret_folios, &stats);
++ list_splice_tail_init(&folios, &ret_folios);
++ if (rc < 0) {
++ rc_gather = rc;
++ goto out;
++ }
++ rc_gather += rc;
++ if (!list_empty(from))
++ goto again;
+ out:
+ /*
+ * Put the permanent failure folio back to migration list, they
+@@ -1730,7 +1768,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
+ * are migrated successfully.
+ */
+ if (list_empty(from))
+- rc = 0;
++ rc_gather = 0;
+
+ count_vm_events(PGMIGRATE_SUCCESS, stats.nr_succeeded);
+ count_vm_events(PGMIGRATE_FAIL, stats.nr_failed_pages);
+@@ -1744,7 +1782,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
+ if (ret_succeeded)
+ *ret_succeeded = stats.nr_succeeded;
+
+- return rc;
++ return rc_gather;
+ }
+
+ struct page *alloc_migration_target(struct page *page, unsigned long private)
+--
+2.43.0
+
--- /dev/null
+From dc1b2cb876a9212a452499066bada1d7645a8442 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 Feb 2023 20:34:37 +0800
+Subject: migrate_pages: separate hugetlb folios migration
+
+From: Huang Ying <ying.huang@intel.com>
+
+[ Upstream commit e5bfff8b10e496378da4b7863479dd6fb907d4ea ]
+
+This is a preparation patch to batch the folio unmapping and moving for
+the non-hugetlb folios. Based on that we can batch the TLB shootdown
+during the folio migration and make it possible to use some hardware
+accelerator for the folio copying.
+
+In this patch the hugetlb folios and non-hugetlb folios migration is
+separated in migrate_pages() to make it easy to change the non-hugetlb
+folios migration implementation.
+
+Link: https://lkml.kernel.org/r/20230213123444.155149-3-ying.huang@intel.com
+Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
+Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
+Reviewed-by: Xin Hao <xhao@linux.alibaba.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Bharata B Rao <bharata@amd.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Mike Kravetz <mike.kravetz@oracle.com>
+Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 35e41024c4c2 ("vmscan,migrate: fix page count imbalance on node stats when demoting pages")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/migrate.c | 141 +++++++++++++++++++++++++++++++++++++++++++--------
+ 1 file changed, 119 insertions(+), 22 deletions(-)
+
+diff --git a/mm/migrate.c b/mm/migrate.c
+index b7596a0b4445f..70d0b20d06a5f 100644
+--- a/mm/migrate.c
++++ b/mm/migrate.c
+@@ -1398,6 +1398,8 @@ static inline int try_split_folio(struct folio *folio, struct list_head *split_f
+ return rc;
+ }
+
++#define NR_MAX_MIGRATE_PAGES_RETRY 10
++
+ struct migrate_pages_stats {
+ int nr_succeeded; /* Normal and large folios migrated successfully, in
+ units of base pages */
+@@ -1408,6 +1410,95 @@ struct migrate_pages_stats {
+ int nr_thp_split; /* THP split before migrating */
+ };
+
++/*
++ * Returns the number of hugetlb folios that were not migrated, or an error code
++ * after NR_MAX_MIGRATE_PAGES_RETRY attempts or if no hugetlb folios are movable
++ * any more because the list has become empty or no retryable hugetlb folios
++ * exist any more. It is caller's responsibility to call putback_movable_pages()
++ * only if ret != 0.
++ */
++static int migrate_hugetlbs(struct list_head *from, new_page_t get_new_page,
++ free_page_t put_new_page, unsigned long private,
++ enum migrate_mode mode, int reason,
++ struct migrate_pages_stats *stats,
++ struct list_head *ret_folios)
++{
++ int retry = 1;
++ int nr_failed = 0;
++ int nr_retry_pages = 0;
++ int pass = 0;
++ struct folio *folio, *folio2;
++ int rc, nr_pages;
++
++ for (pass = 0; pass < NR_MAX_MIGRATE_PAGES_RETRY && retry; pass++) {
++ retry = 0;
++ nr_retry_pages = 0;
++
++ list_for_each_entry_safe(folio, folio2, from, lru) {
++ if (!folio_test_hugetlb(folio))
++ continue;
++
++ nr_pages = folio_nr_pages(folio);
++
++ cond_resched();
++
++ rc = unmap_and_move_huge_page(get_new_page,
++ put_new_page, private,
++ &folio->page, pass > 2, mode,
++ reason, ret_folios);
++ /*
++ * The rules are:
++ * Success: hugetlb folio will be put back
++ * -EAGAIN: stay on the from list
++ * -ENOMEM: stay on the from list
++ * -ENOSYS: stay on the from list
++ * Other errno: put on ret_folios list
++ */
++ switch(rc) {
++ case -ENOSYS:
++ /* Hugetlb migration is unsupported */
++ nr_failed++;
++ stats->nr_failed_pages += nr_pages;
++ list_move_tail(&folio->lru, ret_folios);
++ break;
++ case -ENOMEM:
++ /*
++ * When memory is low, don't bother to try to migrate
++ * other folios, just exit.
++ */
++ stats->nr_failed_pages += nr_pages + nr_retry_pages;
++ return -ENOMEM;
++ case -EAGAIN:
++ retry++;
++ nr_retry_pages += nr_pages;
++ break;
++ case MIGRATEPAGE_SUCCESS:
++ stats->nr_succeeded += nr_pages;
++ break;
++ default:
++ /*
++ * Permanent failure (-EBUSY, etc.):
++ * unlike -EAGAIN case, the failed folio is
++ * removed from migration folio list and not
++ * retried in the next outer loop.
++ */
++ nr_failed++;
++ stats->nr_failed_pages += nr_pages;
++ break;
++ }
++ }
++ }
++ /*
++ * nr_failed is number of hugetlb folios failed to be migrated. After
++ * NR_MAX_MIGRATE_PAGES_RETRY attempts, give up and count retried hugetlb
++ * folios as failed.
++ */
++ nr_failed += retry;
++ stats->nr_failed_pages += nr_retry_pages;
++
++ return nr_failed;
++}
++
+ /*
+ * migrate_pages - migrate the folios specified in a list, to the free folios
+ * supplied as the target for the page migration
+@@ -1424,10 +1515,10 @@ struct migrate_pages_stats {
+ * @ret_succeeded: Set to the number of folios migrated successfully if
+ * the caller passes a non-NULL pointer.
+ *
+- * The function returns after 10 attempts or if no folios are movable any more
+- * because the list has become empty or no retryable folios exist any more.
+- * It is caller's responsibility to call putback_movable_pages() to return folios
+- * to the LRU or free list only if ret != 0.
++ * The function returns after NR_MAX_MIGRATE_PAGES_RETRY attempts or if no folios
++ * are movable any more because the list has become empty or no retryable folios
++ * exist any more. It is caller's responsibility to call putback_movable_pages()
++ * only if ret != 0.
+ *
+ * Returns the number of {normal folio, large folio, hugetlb} that were not
+ * migrated, or an error code. The number of large folio splits will be
+@@ -1441,7 +1532,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
+ int retry = 1;
+ int large_retry = 1;
+ int thp_retry = 1;
+- int nr_failed = 0;
++ int nr_failed;
+ int nr_retry_pages = 0;
+ int nr_large_failed = 0;
+ int pass = 0;
+@@ -1458,38 +1549,45 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
+ trace_mm_migrate_pages_start(mode, reason);
+
+ memset(&stats, 0, sizeof(stats));
++ rc = migrate_hugetlbs(from, get_new_page, put_new_page, private, mode, reason,
++ &stats, &ret_folios);
++ if (rc < 0)
++ goto out;
++ nr_failed = rc;
++
+ split_folio_migration:
+- for (pass = 0; pass < 10 && (retry || large_retry); pass++) {
++ for (pass = 0;
++ pass < NR_MAX_MIGRATE_PAGES_RETRY && (retry || large_retry);
++ pass++) {
+ retry = 0;
+ large_retry = 0;
+ thp_retry = 0;
+ nr_retry_pages = 0;
+
+ list_for_each_entry_safe(folio, folio2, from, lru) {
++ /* Retried hugetlb folios will be kept in list */
++ if (folio_test_hugetlb(folio)) {
++ list_move_tail(&folio->lru, &ret_folios);
++ continue;
++ }
++
+ /*
+ * Large folio statistics is based on the source large
+ * folio. Capture required information that might get
+ * lost during migration.
+ */
+- is_large = folio_test_large(folio) && !folio_test_hugetlb(folio);
++ is_large = folio_test_large(folio);
+ is_thp = is_large && folio_test_pmd_mappable(folio);
+ nr_pages = folio_nr_pages(folio);
++
+ cond_resched();
+
+- if (folio_test_hugetlb(folio))
+- rc = unmap_and_move_huge_page(get_new_page,
+- put_new_page, private,
+- &folio->page, pass > 2, mode,
+- reason,
+- &ret_folios);
+- else
+- rc = unmap_and_move(get_new_page, put_new_page,
+- private, folio, pass > 2, mode,
+- reason, &ret_folios);
++ rc = unmap_and_move(get_new_page, put_new_page,
++ private, folio, pass > 2, mode,
++ reason, &ret_folios);
+ /*
+ * The rules are:
+- * Success: non hugetlb folio will be freed, hugetlb
+- * folio will be put back
++ * Success: folio will be freed
+ * -EAGAIN: stay on the from list
+ * -ENOMEM: stay on the from list
+ * -ENOSYS: stay on the from list
+@@ -1516,7 +1614,6 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
+ stats.nr_thp_split += is_thp;
+ break;
+ }
+- /* Hugetlb migration is unsupported */
+ } else if (!no_split_folio_counting) {
+ nr_failed++;
+ }
+@@ -1610,8 +1707,8 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
+ */
+ if (!list_empty(&split_folios)) {
+ /*
+- * Move non-migrated folios (after 10 retries) to ret_folios
+- * to avoid migrating them again.
++ * Move non-migrated folios (after NR_MAX_MIGRATE_PAGES_RETRY
++ * retries) to ret_folios to avoid migrating them again.
+ */
+ list_splice_init(from, &ret_folios);
+ list_splice_init(&split_folios, from);
+--
+2.43.0
+
--- /dev/null
+From 742e80422397bb0e53a0352ee493019024a9c902 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 Feb 2023 20:34:39 +0800
+Subject: migrate_pages: split unmap_and_move() to _unmap() and _move()
+
+From: Huang Ying <ying.huang@intel.com>
+
+[ Upstream commit 64c8902ed4418317cd416c566f896bd4a92b2efc ]
+
+This is a preparation patch to batch the folio unmapping and moving.
+
+In this patch, unmap_and_move() is split to migrate_folio_unmap() and
+migrate_folio_move(). So, we can batch _unmap() and _move() in different
+loops later. To pass some information between unmap and move, the
+original unused dst->mapping and dst->private are used.
+
+Link: https://lkml.kernel.org/r/20230213123444.155149-5-ying.huang@intel.com
+Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
+Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
+Reviewed-by: Xin Hao <xhao@linux.alibaba.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Bharata B Rao <bharata@amd.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Mike Kravetz <mike.kravetz@oracle.com>
+Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 35e41024c4c2 ("vmscan,migrate: fix page count imbalance on node stats when demoting pages")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/migrate.h | 1 +
+ mm/migrate.c | 169 ++++++++++++++++++++++++++++++----------
+ 2 files changed, 129 insertions(+), 41 deletions(-)
+
+diff --git a/include/linux/migrate.h b/include/linux/migrate.h
+index 3ef77f52a4f04..7376074f2e1e3 100644
+--- a/include/linux/migrate.h
++++ b/include/linux/migrate.h
+@@ -18,6 +18,7 @@ struct migration_target_control;
+ * - zero on page migration success;
+ */
+ #define MIGRATEPAGE_SUCCESS 0
++#define MIGRATEPAGE_UNMAP 1
+
+ /**
+ * struct movable_operations - Driver page migration
+diff --git a/mm/migrate.c b/mm/migrate.c
+index 40ae91e1a026b..46a1476e188c3 100644
+--- a/mm/migrate.c
++++ b/mm/migrate.c
+@@ -1011,11 +1011,53 @@ static int move_to_new_folio(struct folio *dst, struct folio *src,
+ return rc;
+ }
+
+-static int __unmap_and_move(struct folio *src, struct folio *dst,
++/*
++ * To record some information during migration, we use some unused
++ * fields (mapping and private) of struct folio of the newly allocated
++ * destination folio. This is safe because nobody is using them
++ * except us.
++ */
++static void __migrate_folio_record(struct folio *dst,
++ unsigned long page_was_mapped,
++ struct anon_vma *anon_vma)
++{
++ dst->mapping = (void *)anon_vma;
++ dst->private = (void *)page_was_mapped;
++}
++
++static void __migrate_folio_extract(struct folio *dst,
++ int *page_was_mappedp,
++ struct anon_vma **anon_vmap)
++{
++ *anon_vmap = (void *)dst->mapping;
++ *page_was_mappedp = (unsigned long)dst->private;
++ dst->mapping = NULL;
++ dst->private = NULL;
++}
++
++/* Cleanup src folio upon migration success */
++static void migrate_folio_done(struct folio *src,
++ enum migrate_reason reason)
++{
++ /*
++ * Compaction can migrate also non-LRU pages which are
++ * not accounted to NR_ISOLATED_*. They can be recognized
++ * as __PageMovable
++ */
++ if (likely(!__folio_test_movable(src)))
++ mod_node_page_state(folio_pgdat(src), NR_ISOLATED_ANON +
++ folio_is_file_lru(src), -folio_nr_pages(src));
++
++ if (reason != MR_MEMORY_FAILURE)
++ /* We release the page in page_handle_poison. */
++ folio_put(src);
++}
++
++static int __migrate_folio_unmap(struct folio *src, struct folio *dst,
+ int force, enum migrate_mode mode)
+ {
+ int rc = -EAGAIN;
+- bool page_was_mapped = false;
++ int page_was_mapped = 0;
+ struct anon_vma *anon_vma = NULL;
+ bool is_lru = !__PageMovable(&src->page);
+
+@@ -1091,8 +1133,8 @@ static int __unmap_and_move(struct folio *src, struct folio *dst,
+ goto out_unlock;
+
+ if (unlikely(!is_lru)) {
+- rc = move_to_new_folio(dst, src, mode);
+- goto out_unlock_both;
++ __migrate_folio_record(dst, page_was_mapped, anon_vma);
++ return MIGRATEPAGE_UNMAP;
+ }
+
+ /*
+@@ -1117,11 +1159,42 @@ static int __unmap_and_move(struct folio *src, struct folio *dst,
+ VM_BUG_ON_FOLIO(folio_test_anon(src) &&
+ !folio_test_ksm(src) && !anon_vma, src);
+ try_to_migrate(src, 0);
+- page_was_mapped = true;
++ page_was_mapped = 1;
+ }
+
+- if (!folio_mapped(src))
+- rc = move_to_new_folio(dst, src, mode);
++ if (!folio_mapped(src)) {
++ __migrate_folio_record(dst, page_was_mapped, anon_vma);
++ return MIGRATEPAGE_UNMAP;
++ }
++
++ if (page_was_mapped)
++ remove_migration_ptes(src, src, false);
++
++out_unlock_both:
++ folio_unlock(dst);
++out_unlock:
++ /* Drop an anon_vma reference if we took one */
++ if (anon_vma)
++ put_anon_vma(anon_vma);
++ folio_unlock(src);
++out:
++
++ return rc;
++}
++
++static int __migrate_folio_move(struct folio *src, struct folio *dst,
++ enum migrate_mode mode)
++{
++ int rc;
++ int page_was_mapped = 0;
++ struct anon_vma *anon_vma = NULL;
++ bool is_lru = !__PageMovable(&src->page);
++
++ __migrate_folio_extract(dst, &page_was_mapped, &anon_vma);
++
++ rc = move_to_new_folio(dst, src, mode);
++ if (unlikely(!is_lru))
++ goto out_unlock_both;
+
+ /*
+ * When successful, push dst to LRU immediately: so that if it
+@@ -1144,12 +1217,10 @@ static int __unmap_and_move(struct folio *src, struct folio *dst,
+
+ out_unlock_both:
+ folio_unlock(dst);
+-out_unlock:
+ /* Drop an anon_vma reference if we took one */
+ if (anon_vma)
+ put_anon_vma(anon_vma);
+ folio_unlock(src);
+-out:
+ /*
+ * If migration is successful, decrease refcount of dst,
+ * which will not free the page because new page owner increased
+@@ -1161,19 +1232,15 @@ static int __unmap_and_move(struct folio *src, struct folio *dst,
+ return rc;
+ }
+
+-/*
+- * Obtain the lock on folio, remove all ptes and migrate the folio
+- * to the newly allocated folio in dst.
+- */
+-static int unmap_and_move(new_page_t get_new_page,
+- free_page_t put_new_page,
+- unsigned long private, struct folio *src,
+- int force, enum migrate_mode mode,
+- enum migrate_reason reason,
+- struct list_head *ret)
++/* Obtain the lock on page, remove all ptes. */
++static int migrate_folio_unmap(new_page_t get_new_page, free_page_t put_new_page,
++ unsigned long private, struct folio *src,
++ struct folio **dstp, int force,
++ enum migrate_mode mode, enum migrate_reason reason,
++ struct list_head *ret)
+ {
+ struct folio *dst;
+- int rc = MIGRATEPAGE_SUCCESS;
++ int rc = MIGRATEPAGE_UNMAP;
+ struct page *newpage = NULL;
+
+ if (!thp_migration_supported() && folio_test_transhuge(src))
+@@ -1184,20 +1251,49 @@ static int unmap_and_move(new_page_t get_new_page,
+ folio_clear_active(src);
+ folio_clear_unevictable(src);
+ /* free_pages_prepare() will clear PG_isolated. */
+- goto out;
++ list_del(&src->lru);
++ migrate_folio_done(src, reason);
++ return MIGRATEPAGE_SUCCESS;
+ }
+
+ newpage = get_new_page(&src->page, private);
+ if (!newpage)
+ return -ENOMEM;
+ dst = page_folio(newpage);
++ *dstp = dst;
+
+ dst->private = NULL;
+- rc = __unmap_and_move(src, dst, force, mode);
++ rc = __migrate_folio_unmap(src, dst, force, mode);
++ if (rc == MIGRATEPAGE_UNMAP)
++ return rc;
++
++ /*
++ * A folio that has not been unmapped will be restored to
++ * right list unless we want to retry.
++ */
++ if (rc != -EAGAIN)
++ list_move_tail(&src->lru, ret);
++
++ if (put_new_page)
++ put_new_page(&dst->page, private);
++ else
++ folio_put(dst);
++
++ return rc;
++}
++
++/* Migrate the folio to the newly allocated folio in dst. */
++static int migrate_folio_move(free_page_t put_new_page, unsigned long private,
++ struct folio *src, struct folio *dst,
++ enum migrate_mode mode, enum migrate_reason reason,
++ struct list_head *ret)
++{
++ int rc;
++
++ rc = __migrate_folio_move(src, dst, mode);
+ if (rc == MIGRATEPAGE_SUCCESS)
+ set_page_owner_migrate_reason(&dst->page, reason);
+
+-out:
+ if (rc != -EAGAIN) {
+ /*
+ * A folio that has been migrated has all references
+@@ -1213,20 +1309,7 @@ static int unmap_and_move(new_page_t get_new_page,
+ * we want to retry.
+ */
+ if (rc == MIGRATEPAGE_SUCCESS) {
+- /*
+- * Compaction can migrate also non-LRU folios which are
+- * not accounted to NR_ISOLATED_*. They can be recognized
+- * as __folio_test_movable
+- */
+- if (likely(!__folio_test_movable(src)))
+- mod_node_page_state(folio_pgdat(src), NR_ISOLATED_ANON +
+- folio_is_file_lru(src), -folio_nr_pages(src));
+-
+- if (reason != MR_MEMORY_FAILURE)
+- /*
+- * We release the folio in page_handle_poison.
+- */
+- folio_put(src);
++ migrate_folio_done(src, reason);
+ } else {
+ if (rc != -EAGAIN)
+ list_add_tail(&src->lru, ret);
+@@ -1518,7 +1601,7 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
+ int pass = 0;
+ bool is_large = false;
+ bool is_thp = false;
+- struct folio *folio, *folio2;
++ struct folio *folio, *folio2, *dst = NULL;
+ int rc, nr_pages;
+ LIST_HEAD(split_folios);
+ bool nosplit = (reason == MR_NUMA_MISPLACED);
+@@ -1545,9 +1628,13 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
+
+ cond_resched();
+
+- rc = unmap_and_move(get_new_page, put_new_page,
+- private, folio, pass > 2, mode,
+- reason, ret_folios);
++ rc = migrate_folio_unmap(get_new_page, put_new_page, private,
++ folio, &dst, pass > 2, mode,
++ reason, ret_folios);
++ if (rc == MIGRATEPAGE_UNMAP)
++ rc = migrate_folio_move(put_new_page, private,
++ folio, dst, mode,
++ reason, ret_folios);
+ /*
+ * The rules are:
+ * Success: folio will be freed
+--
+2.43.0
+
--- /dev/null
+From 8834c3584cdce54f769ee76fdb530bc80d0689a4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 24 Oct 2022 16:34:22 +0800
+Subject: mm: migrate: try again if THP split is failed due to page refcnt
+
+From: Baolin Wang <baolin.wang@linux.alibaba.com>
+
+[ Upstream commit fd4a7ac32918d3d7a2d17dc06c5520f45e36eb52 ]
+
+When creating a virtual machine, we will use memfd_create() to get a file
+descriptor which can be used to create share memory mappings using the
+mmap function, meanwhile the mmap() will set the MAP_POPULATE flag to
+allocate physical pages for the virtual machine.
+
+When allocating physical pages for the guest, the host can fallback to
+allocate some CMA pages for the guest when over half of the zone's free
+memory is in the CMA area.
+
+In guest os, when the application wants to do some data transaction with
+DMA, our QEMU will call VFIO_IOMMU_MAP_DMA ioctl to do longterm-pin and
+create IOMMU mappings for the DMA pages. However, when calling
+VFIO_IOMMU_MAP_DMA ioctl to pin the physical pages, we found it will be
+failed to longterm-pin sometimes.
+
+After some invetigation, we found the pages used to do DMA mapping can
+contain some CMA pages, and these CMA pages will cause a possible failure
+of the longterm-pin, due to failed to migrate the CMA pages. The reason
+of migration failure may be temporary reference count or memory allocation
+failure. So that will cause the VFIO_IOMMU_MAP_DMA ioctl returns error,
+which makes the application failed to start.
+
+I observed one migration failure case (which is not easy to reproduce) is
+that, the 'thp_migration_fail' count is 1 and the 'thp_split_page_failed'
+count is also 1.
+
+That means when migrating a THP which is in CMA area, but can not allocate
+a new THP due to memory fragmentation, so it will split the THP. However
+THP split is also failed, probably the reason is temporary reference count
+of this THP. And the temporary reference count can be caused by dropping
+page caches (I observed the drop caches operation in the system), but we
+can not drop the shmem page caches due to they are already dirty at that
+time.
+
+Especially for THP split failure, which is caused by temporary reference
+count, we can try again to mitigate the failure of migration in this case
+according to previous discussion [1].
+
+[1] https://lore.kernel.org/all/470dc638-a300-f261-94b4-e27250e42f96@redhat.com/
+Link: https://lkml.kernel.org/r/6784730480a1df82e8f4cba1ed088e4ac767994b.1666599848.git.baolin.wang@linux.alibaba.com
+Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
+Reviewed-by: "Huang, Ying" <ying.huang@intel.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 35e41024c4c2 ("vmscan,migrate: fix page count imbalance on node stats when demoting pages")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/huge_memory.c | 4 ++--
+ mm/migrate.c | 19 ++++++++++++++++---
+ 2 files changed, 18 insertions(+), 5 deletions(-)
+
+diff --git a/mm/huge_memory.c b/mm/huge_memory.c
+index 98a1a05f2db2d..f53bc54dacb37 100644
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -2728,7 +2728,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
+ * split PMDs
+ */
+ if (!can_split_folio(folio, &extra_pins)) {
+- ret = -EBUSY;
++ ret = -EAGAIN;
+ goto out_unlock;
+ }
+
+@@ -2780,7 +2780,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
+ xas_unlock(&xas);
+ local_irq_enable();
+ remap_page(folio, folio_nr_pages(folio));
+- ret = -EBUSY;
++ ret = -EAGAIN;
+ }
+
+ out_unlock:
+diff --git a/mm/migrate.c b/mm/migrate.c
+index 0252aa4ff572e..b0caa89e67d5f 100644
+--- a/mm/migrate.c
++++ b/mm/migrate.c
+@@ -1518,9 +1518,22 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
+ if (is_thp) {
+ nr_thp_failed++;
+ /* THP NUMA faulting doesn't split THP to retry. */
+- if (!nosplit && !try_split_thp(page, &thp_split_pages)) {
+- nr_thp_split++;
+- break;
++ if (!nosplit) {
++ int ret = try_split_thp(page, &thp_split_pages);
++
++ if (!ret) {
++ nr_thp_split++;
++ break;
++ } else if (reason == MR_LONGTERM_PIN &&
++ ret == -EAGAIN) {
++ /*
++ * Try again to split THP to mitigate
++ * the failure of longterm pinning.
++ */
++ thp_retry++;
++ nr_retry_pages += nr_subpages;
++ break;
++ }
+ }
+ } else if (!no_subpage_counting) {
+ nr_failed++;
+--
+2.43.0
+
--- /dev/null
+From d6819b70b7c23771f73c3c02e6128860b55539bb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Nov 2022 09:23:45 +0800
+Subject: mm/migrate.c: stop using 0 as NULL pointer
+
+From: Yang Li <yang.lee@linux.alibaba.com>
+
+[ Upstream commit 4c74b65f478dc9353780a6be17fc82f1b06cea80 ]
+
+mm/migrate.c:1198:24: warning: Using plain integer as NULL pointer
+
+Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=3080
+Link: https://lkml.kernel.org/r/20221116012345.84870-1-yang.lee@linux.alibaba.com
+Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
+Reported-by: Abaci Robot <abaci@linux.alibaba.com>
+Reviewed-by: David Hildenbrand <david@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 35e41024c4c2 ("vmscan,migrate: fix page count imbalance on node stats when demoting pages")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/migrate.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/mm/migrate.c b/mm/migrate.c
+index 562f819dc6189..81444abf54dba 100644
+--- a/mm/migrate.c
++++ b/mm/migrate.c
+@@ -1192,7 +1192,7 @@ static int unmap_and_move(new_page_t get_new_page,
+ return -ENOMEM;
+ dst = page_folio(newpage);
+
+- dst->private = 0;
++ dst->private = NULL;
+ rc = __unmap_and_move(src, dst, force, mode);
+ if (rc == MIGRATEPAGE_SUCCESS)
+ set_page_owner_migrate_reason(&dst->page, reason);
+--
+2.43.0
+
--- /dev/null
+From 1c6a2cba75776f1944c170b81c1ad027ef2a12f4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 13 Jan 2023 11:12:16 +0000
+Subject: mm/page_alloc: explicitly define how __GFP_HIGH non-blocking
+ allocations accesses reserves
+
+From: Mel Gorman <mgorman@techsingularity.net>
+
+[ Upstream commit 1ebbb21811b76c3b932959787f37985af36f62fa ]
+
+GFP_ATOMIC allocations get flagged ALLOC_HARDER which is a vague
+description. In preparation for the removal of GFP_ATOMIC redefine
+__GFP_ATOMIC to simply mean non-blocking and renaming ALLOC_HARDER to
+ALLOC_NON_BLOCK accordingly. __GFP_HIGH is required for access to
+reserves but non-blocking is granted more access. For example, GFP_NOWAIT
+is non-blocking but has no special access to reserves. A __GFP_NOFAIL
+blocking allocation is granted access similar to __GFP_HIGH if the only
+alternative is an OOM kill.
+
+Link: https://lkml.kernel.org/r/20230113111217.14134-6-mgorman@techsingularity.net
+Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: NeilBrown <neilb@suse.de>
+Cc: Thierry Reding <thierry.reding@gmail.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 281dd25c1a01 ("mm/page_alloc: let GFP_ATOMIC order-0 allocs access highatomic reserves")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/internal.h | 7 +++++--
+ mm/page_alloc.c | 44 ++++++++++++++++++++++++--------------------
+ 2 files changed, 29 insertions(+), 22 deletions(-)
+
+diff --git a/mm/internal.h b/mm/internal.h
+index cd095ce2f199e..a50bc08337d21 100644
+--- a/mm/internal.h
++++ b/mm/internal.h
+@@ -754,7 +754,10 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
+ #define ALLOC_OOM ALLOC_NO_WATERMARKS
+ #endif
+
+-#define ALLOC_HARDER 0x10 /* try to alloc harder */
++#define ALLOC_NON_BLOCK 0x10 /* Caller cannot block. Allow access
++ * to 25% of the min watermark or
++ * 62.5% if __GFP_HIGH is set.
++ */
+ #define ALLOC_MIN_RESERVE 0x20 /* __GFP_HIGH set. Allow access to 50%
+ * of the min watermark.
+ */
+@@ -769,7 +772,7 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
+ #define ALLOC_KSWAPD 0x800 /* allow waking of kswapd, __GFP_KSWAPD_RECLAIM set */
+
+ /* Flags that allow allocations below the min watermark. */
+-#define ALLOC_RESERVES (ALLOC_HARDER|ALLOC_MIN_RESERVE|ALLOC_HIGHATOMIC|ALLOC_OOM)
++#define ALLOC_RESERVES (ALLOC_NON_BLOCK|ALLOC_MIN_RESERVE|ALLOC_HIGHATOMIC|ALLOC_OOM)
+
+ enum ttu_flags;
+ struct tlbflush_unmap_batch;
+diff --git a/mm/page_alloc.c b/mm/page_alloc.c
+index 6ab53e47ccea1..49dc4ba88c278 100644
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -3996,18 +3996,19 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
+ * __GFP_HIGH allows access to 50% of the min reserve as well
+ * as OOM.
+ */
+- if (alloc_flags & ALLOC_MIN_RESERVE)
++ if (alloc_flags & ALLOC_MIN_RESERVE) {
+ min -= min / 2;
+
+- /*
+- * Non-blocking allocations can access some of the reserve
+- * with more access if also __GFP_HIGH. The reasoning is that
+- * a non-blocking caller may incur a more severe penalty
+- * if it cannot get memory quickly, particularly if it's
+- * also __GFP_HIGH.
+- */
+- if (alloc_flags & ALLOC_HARDER)
+- min -= min / 4;
++ /*
++ * Non-blocking allocations (e.g. GFP_ATOMIC) can
++ * access more reserves than just __GFP_HIGH. Other
++ * non-blocking allocations requests such as GFP_NOWAIT
++ * or (GFP_KERNEL & ~__GFP_DIRECT_RECLAIM) do not get
++ * access to the min reserve.
++ */
++ if (alloc_flags & ALLOC_NON_BLOCK)
++ min -= min / 4;
++ }
+
+ /*
+ * OOM victims can try even harder than the normal reserve
+@@ -4858,28 +4859,30 @@ gfp_to_alloc_flags(gfp_t gfp_mask, unsigned int order)
+ * The caller may dip into page reserves a bit more if the caller
+ * cannot run direct reclaim, or if the caller has realtime scheduling
+ * policy or is asking for __GFP_HIGH memory. GFP_ATOMIC requests will
+- * set both ALLOC_HARDER (__GFP_ATOMIC) and ALLOC_MIN_RESERVE(__GFP_HIGH).
++ * set both ALLOC_NON_BLOCK and ALLOC_MIN_RESERVE(__GFP_HIGH).
+ */
+ alloc_flags |= (__force int)
+ (gfp_mask & (__GFP_HIGH | __GFP_KSWAPD_RECLAIM));
+
+- if (gfp_mask & __GFP_ATOMIC) {
++ if (!(gfp_mask & __GFP_DIRECT_RECLAIM)) {
+ /*
+ * Not worth trying to allocate harder for __GFP_NOMEMALLOC even
+ * if it can't schedule.
+ */
+ if (!(gfp_mask & __GFP_NOMEMALLOC)) {
+- alloc_flags |= ALLOC_HARDER;
++ alloc_flags |= ALLOC_NON_BLOCK;
+
+ if (order > 0)
+ alloc_flags |= ALLOC_HIGHATOMIC;
+ }
+
+ /*
+- * Ignore cpuset mems for GFP_ATOMIC rather than fail, see the
+- * comment for __cpuset_node_allowed().
++ * Ignore cpuset mems for non-blocking __GFP_HIGH (probably
++ * GFP_ATOMIC) rather than fail, see the comment for
++ * __cpuset_node_allowed().
+ */
+- alloc_flags &= ~ALLOC_CPUSET;
++ if (alloc_flags & ALLOC_MIN_RESERVE)
++ alloc_flags &= ~ALLOC_CPUSET;
+ } else if (unlikely(rt_task(current)) && in_task())
+ alloc_flags |= ALLOC_MIN_RESERVE;
+
+@@ -5312,12 +5315,13 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
+ WARN_ON_ONCE_GFP(costly_order, gfp_mask);
+
+ /*
+- * Help non-failing allocations by giving them access to memory
+- * reserves but do not use ALLOC_NO_WATERMARKS because this
++ * Help non-failing allocations by giving some access to memory
++ * reserves normally used for high priority non-blocking
++ * allocations but do not use ALLOC_NO_WATERMARKS because this
+ * could deplete whole memory reserves which would just make
+- * the situation worse
++ * the situation worse.
+ */
+- page = __alloc_pages_cpuset_fallback(gfp_mask, order, ALLOC_HARDER, ac);
++ page = __alloc_pages_cpuset_fallback(gfp_mask, order, ALLOC_MIN_RESERVE, ac);
+ if (page)
+ goto got_pg;
+
+--
+2.43.0
+
--- /dev/null
+From 7f56b2ec2c70a47a901ef2da605c6c7552cd71c2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 13 Jan 2023 11:12:15 +0000
+Subject: mm/page_alloc: explicitly define what alloc flags deplete min
+ reserves
+
+From: Mel Gorman <mgorman@techsingularity.net>
+
+[ Upstream commit ab3508854353793cd35e348fde89a5c09b2fd8b5 ]
+
+As there are more ALLOC_ flags that affect reserves, define what flags
+affect reserves and clarify the effect of each flag.
+
+Link: https://lkml.kernel.org/r/20230113111217.14134-5-mgorman@techsingularity.net
+Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: NeilBrown <neilb@suse.de>
+Cc: Thierry Reding <thierry.reding@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 281dd25c1a01 ("mm/page_alloc: let GFP_ATOMIC order-0 allocs access highatomic reserves")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/internal.h | 3 +++
+ mm/page_alloc.c | 34 ++++++++++++++++++++++------------
+ 2 files changed, 25 insertions(+), 12 deletions(-)
+
+diff --git a/mm/internal.h b/mm/internal.h
+index f0f6198462cc1..cd095ce2f199e 100644
+--- a/mm/internal.h
++++ b/mm/internal.h
+@@ -768,6 +768,9 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
+ #define ALLOC_HIGHATOMIC 0x200 /* Allows access to MIGRATE_HIGHATOMIC */
+ #define ALLOC_KSWAPD 0x800 /* allow waking of kswapd, __GFP_KSWAPD_RECLAIM set */
+
++/* Flags that allow allocations below the min watermark. */
++#define ALLOC_RESERVES (ALLOC_HARDER|ALLOC_MIN_RESERVE|ALLOC_HIGHATOMIC|ALLOC_OOM)
++
+ enum ttu_flags;
+ struct tlbflush_unmap_batch;
+
+diff --git a/mm/page_alloc.c b/mm/page_alloc.c
+index 8e1f4d779b26c..6ab53e47ccea1 100644
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -3956,15 +3956,14 @@ ALLOW_ERROR_INJECTION(should_fail_alloc_page, TRUE);
+ static inline long __zone_watermark_unusable_free(struct zone *z,
+ unsigned int order, unsigned int alloc_flags)
+ {
+- const bool alloc_harder = (alloc_flags & (ALLOC_HARDER|ALLOC_OOM));
+ long unusable_free = (1 << order) - 1;
+
+ /*
+- * If the caller does not have rights to ALLOC_HARDER then subtract
+- * the high-atomic reserves. This will over-estimate the size of the
+- * atomic reserve but it avoids a search.
++ * If the caller does not have rights to reserves below the min
++ * watermark then subtract the high-atomic reserves. This will
++ * over-estimate the size of the atomic reserve but it avoids a search.
+ */
+- if (likely(!alloc_harder))
++ if (likely(!(alloc_flags & ALLOC_RESERVES)))
+ unusable_free += z->nr_reserved_highatomic;
+
+ #ifdef CONFIG_CMA
+@@ -3988,25 +3987,36 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
+ {
+ long min = mark;
+ int o;
+- const bool alloc_harder = (alloc_flags & (ALLOC_HARDER|ALLOC_OOM));
+
+ /* free_pages may go negative - that's OK */
+ free_pages -= __zone_watermark_unusable_free(z, order, alloc_flags);
+
+- if (alloc_flags & ALLOC_MIN_RESERVE)
+- min -= min / 2;
++ if (unlikely(alloc_flags & ALLOC_RESERVES)) {
++ /*
++ * __GFP_HIGH allows access to 50% of the min reserve as well
++ * as OOM.
++ */
++ if (alloc_flags & ALLOC_MIN_RESERVE)
++ min -= min / 2;
+
+- if (unlikely(alloc_harder)) {
+ /*
+- * OOM victims can try even harder than normal ALLOC_HARDER
++ * Non-blocking allocations can access some of the reserve
++ * with more access if also __GFP_HIGH. The reasoning is that
++ * a non-blocking caller may incur a more severe penalty
++ * if it cannot get memory quickly, particularly if it's
++ * also __GFP_HIGH.
++ */
++ if (alloc_flags & ALLOC_HARDER)
++ min -= min / 4;
++
++ /*
++ * OOM victims can try even harder than the normal reserve
+ * users on the grounds that it's definitely going to be in
+ * the exit path shortly and free memory. Any allocation it
+ * makes during the free path will be small and short-lived.
+ */
+ if (alloc_flags & ALLOC_OOM)
+ min -= min / 2;
+- else
+- min -= min / 4;
+ }
+
+ /*
+--
+2.43.0
+
--- /dev/null
+From 69f11057cb93d04feb54dbe7ac271978e1263a69 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 13 Jan 2023 11:12:14 +0000
+Subject: mm/page_alloc: explicitly record high-order atomic allocations in
+ alloc_flags
+
+From: Mel Gorman <mgorman@techsingularity.net>
+
+[ Upstream commit eb2e2b425c6984ca8034448a3f2c680622bd3d4d ]
+
+A high-order ALLOC_HARDER allocation is assumed to be atomic. While that
+is accurate, it changes later in the series. In preparation, explicitly
+record high-order atomic allocations in gfp_to_alloc_flags().
+
+Link: https://lkml.kernel.org/r/20230113111217.14134-4-mgorman@techsingularity.net
+Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: NeilBrown <neilb@suse.de>
+Cc: Thierry Reding <thierry.reding@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 281dd25c1a01 ("mm/page_alloc: let GFP_ATOMIC order-0 allocs access highatomic reserves")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/internal.h | 1 +
+ mm/page_alloc.c | 29 +++++++++++++++++++++++------
+ 2 files changed, 24 insertions(+), 6 deletions(-)
+
+diff --git a/mm/internal.h b/mm/internal.h
+index 1be79a5147549..f0f6198462cc1 100644
+--- a/mm/internal.h
++++ b/mm/internal.h
+@@ -765,6 +765,7 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
+ #else
+ #define ALLOC_NOFRAGMENT 0x0
+ #endif
++#define ALLOC_HIGHATOMIC 0x200 /* Allows access to MIGRATE_HIGHATOMIC */
+ #define ALLOC_KSWAPD 0x800 /* allow waking of kswapd, __GFP_KSWAPD_RECLAIM set */
+
+ enum ttu_flags;
+diff --git a/mm/page_alloc.c b/mm/page_alloc.c
+index e78ab23eb1743..8e1f4d779b26c 100644
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -3713,10 +3713,20 @@ struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone,
+ * reserved for high-order atomic allocation, so order-0
+ * request should skip it.
+ */
+- if (order > 0 && alloc_flags & ALLOC_HARDER)
++ if (alloc_flags & ALLOC_HIGHATOMIC)
+ page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
+ if (!page) {
+ page = __rmqueue(zone, order, migratetype, alloc_flags);
++
++ /*
++ * If the allocation fails, allow OOM handling access
++ * to HIGHATOMIC reserves as failing now is worse than
++ * failing a high-order atomic allocation in the
++ * future.
++ */
++ if (!page && (alloc_flags & ALLOC_OOM))
++ page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
++
+ if (!page) {
+ spin_unlock_irqrestore(&zone->lock, flags);
+ return NULL;
+@@ -4030,8 +4040,10 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
+ return true;
+ }
+ #endif
+- if (alloc_harder && !free_area_empty(area, MIGRATE_HIGHATOMIC))
++ if ((alloc_flags & (ALLOC_HIGHATOMIC|ALLOC_OOM)) &&
++ !free_area_empty(area, MIGRATE_HIGHATOMIC)) {
+ return true;
++ }
+ }
+ return false;
+ }
+@@ -4293,7 +4305,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
+ * If this is a high-order atomic allocation then check
+ * if the pageblock should be reserved for the future
+ */
+- if (unlikely(order && (alloc_flags & ALLOC_HARDER)))
++ if (unlikely(alloc_flags & ALLOC_HIGHATOMIC))
+ reserve_highatomic_pageblock(page, zone, order);
+
+ return page;
+@@ -4820,7 +4832,7 @@ static void wake_all_kswapds(unsigned int order, gfp_t gfp_mask,
+ }
+
+ static inline unsigned int
+-gfp_to_alloc_flags(gfp_t gfp_mask)
++gfp_to_alloc_flags(gfp_t gfp_mask, unsigned int order)
+ {
+ unsigned int alloc_flags = ALLOC_WMARK_MIN | ALLOC_CPUSET;
+
+@@ -4846,8 +4858,13 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
+ * Not worth trying to allocate harder for __GFP_NOMEMALLOC even
+ * if it can't schedule.
+ */
+- if (!(gfp_mask & __GFP_NOMEMALLOC))
++ if (!(gfp_mask & __GFP_NOMEMALLOC)) {
+ alloc_flags |= ALLOC_HARDER;
++
++ if (order > 0)
++ alloc_flags |= ALLOC_HIGHATOMIC;
++ }
++
+ /*
+ * Ignore cpuset mems for GFP_ATOMIC rather than fail, see the
+ * comment for __cpuset_node_allowed().
+@@ -5056,7 +5073,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
+ * kswapd needs to be woken up, and to avoid the cost of setting up
+ * alloc_flags precisely. So we do that now.
+ */
+- alloc_flags = gfp_to_alloc_flags(gfp_mask);
++ alloc_flags = gfp_to_alloc_flags(gfp_mask, order);
+
+ /*
+ * We need to recalculate the starting point for the zonelist iterator
+--
+2.43.0
+
--- /dev/null
+From 035af24a1a0a452608fb425c6bd69b4d36c22548 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 11 Oct 2024 13:07:37 +0100
+Subject: mm/page_alloc: let GFP_ATOMIC order-0 allocs access highatomic
+ reserves
+
+From: Matt Fleming <mfleming@cloudflare.com>
+
+[ Upstream commit 281dd25c1a018261a04d1b8bf41a0674000bfe38 ]
+
+Under memory pressure it's possible for GFP_ATOMIC order-0 allocations to
+fail even though free pages are available in the highatomic reserves.
+GFP_ATOMIC allocations cannot trigger unreserve_highatomic_pageblock()
+since it's only run from reclaim.
+
+Given that such allocations will pass the watermarks in
+__zone_watermark_unusable_free(), it makes sense to fallback to highatomic
+reserves the same way that ALLOC_OOM can.
+
+This fixes order-0 page allocation failures observed on Cloudflare's fleet
+when handling network packets:
+
+ kswapd1: page allocation failure: order:0, mode:0x820(GFP_ATOMIC),
+ nodemask=(null),cpuset=/,mems_allowed=0-7
+ CPU: 10 PID: 696 Comm: kswapd1 Kdump: loaded Tainted: G O 6.6.43-CUSTOM #1
+ Hardware name: MACHINE
+ Call Trace:
+ <IRQ>
+ dump_stack_lvl+0x3c/0x50
+ warn_alloc+0x13a/0x1c0
+ __alloc_pages_slowpath.constprop.0+0xc9d/0xd10
+ __alloc_pages+0x327/0x340
+ __napi_alloc_skb+0x16d/0x1f0
+ bnxt_rx_page_skb+0x96/0x1b0 [bnxt_en]
+ bnxt_rx_pkt+0x201/0x15e0 [bnxt_en]
+ __bnxt_poll_work+0x156/0x2b0 [bnxt_en]
+ bnxt_poll+0xd9/0x1c0 [bnxt_en]
+ __napi_poll+0x2b/0x1b0
+ bpf_trampoline_6442524138+0x7d/0x1000
+ __napi_poll+0x5/0x1b0
+ net_rx_action+0x342/0x740
+ handle_softirqs+0xcf/0x2b0
+ irq_exit_rcu+0x6c/0x90
+ sysvec_apic_timer_interrupt+0x72/0x90
+ </IRQ>
+
+[mfleming@cloudflare.com: update comment]
+ Link: https://lkml.kernel.org/r/20241015125158.3597702-1-matt@readmodwrite.com
+Link: https://lkml.kernel.org/r/20241011120737.3300370-1-matt@readmodwrite.com
+Link: https://lore.kernel.org/all/CAGis_TWzSu=P7QJmjD58WWiu3zjMTVKSzdOwWE8ORaGytzWJwQ@mail.gmail.com/
+Fixes: 1d91df85f399 ("mm/page_alloc: handle a missing case for memalloc_nocma_{save/restore} APIs")
+Signed-off-by: Matt Fleming <mfleming@cloudflare.com>
+Suggested-by: Vlastimil Babka <vbabka@suse.cz>
+Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/page_alloc.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/mm/page_alloc.c b/mm/page_alloc.c
+index 49dc4ba88c278..b87b350b2f405 100644
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -3719,12 +3719,12 @@ struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone,
+ page = __rmqueue(zone, order, migratetype, alloc_flags);
+
+ /*
+- * If the allocation fails, allow OOM handling access
+- * to HIGHATOMIC reserves as failing now is worse than
+- * failing a high-order atomic allocation in the
+- * future.
++ * If the allocation fails, allow OOM handling and
++ * order-0 (atomic) allocs access to HIGHATOMIC
++ * reserves as failing now is worse than failing a
++ * high-order atomic allocation in the future.
+ */
+- if (!page && (alloc_flags & ALLOC_OOM))
++ if (!page && (alloc_flags & (ALLOC_OOM|ALLOC_NON_BLOCK)))
+ page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
+
+ if (!page) {
+--
+2.43.0
+
--- /dev/null
+From e6ad0b3e024d77a33bb122f362f753202b75a30e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 13 Jan 2023 11:12:12 +0000
+Subject: mm/page_alloc: rename ALLOC_HIGH to ALLOC_MIN_RESERVE
+
+From: Mel Gorman <mgorman@techsingularity.net>
+
+[ Upstream commit 524c48072e5673f4511f1ad81493e2485863fd65 ]
+
+Patch series "Discard __GFP_ATOMIC", v3.
+
+Neil's patch has been residing in mm-unstable as commit 2fafb4fe8f7a ("mm:
+discard __GFP_ATOMIC") for a long time and recently brought up again.
+Most recently, I was worried that __GFP_HIGH allocations could use
+high-order atomic reserves which is unintentional but there was no
+response so lets revisit -- this series reworks how min reserves are used,
+protects highorder reserves and then finishes with Neil's patch with very
+minor modifications so it fits on top.
+
+There was a review discussion on renaming __GFP_DIRECT_RECLAIM to
+__GFP_ALLOW_BLOCKING but I didn't think it was that big an issue and is
+orthogonal to the removal of __GFP_ATOMIC.
+
+There were some concerns about how the gfp flags affect the min reserves
+but it never reached a solid conclusion so I made my own attempt.
+
+The series tries to iron out some of the details on how reserves are used.
+ALLOC_HIGH becomes ALLOC_MIN_RESERVE and ALLOC_HARDER becomes
+ALLOC_NON_BLOCK and documents how the reserves are affected. For example,
+ALLOC_NON_BLOCK (no direct reclaim) on its own allows 25% of the min
+reserve. ALLOC_MIN_RESERVE (__GFP_HIGH) allows 50% and both combined
+allows deeper access again. ALLOC_OOM allows access to 75%.
+
+High-order atomic allocations are explicitly handled with the caveat that
+no __GFP_ATOMIC flag means that any high-order allocation that specifies
+GFP_HIGH and cannot enter direct reclaim will be treated as if it was
+GFP_ATOMIC.
+
+This patch (of 6):
+
+__GFP_HIGH aliases to ALLOC_HIGH but the name does not really hint what it
+means. As ALLOC_HIGH is internal to the allocator, rename it to
+ALLOC_MIN_RESERVE to document that the min reserves can be depleted.
+
+Link: https://lkml.kernel.org/r/20230113111217.14134-1-mgorman@techsingularity.net
+Link: https://lkml.kernel.org/r/20230113111217.14134-2-mgorman@techsingularity.net
+Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: NeilBrown <neilb@suse.de>
+Cc: Thierry Reding <thierry.reding@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 281dd25c1a01 ("mm/page_alloc: let GFP_ATOMIC order-0 allocs access highatomic reserves")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/internal.h | 4 +++-
+ mm/page_alloc.c | 8 ++++----
+ 2 files changed, 7 insertions(+), 5 deletions(-)
+
+diff --git a/mm/internal.h b/mm/internal.h
+index d01130efce5fb..1be79a5147549 100644
+--- a/mm/internal.h
++++ b/mm/internal.h
+@@ -755,7 +755,9 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
+ #endif
+
+ #define ALLOC_HARDER 0x10 /* try to alloc harder */
+-#define ALLOC_HIGH 0x20 /* __GFP_HIGH set */
++#define ALLOC_MIN_RESERVE 0x20 /* __GFP_HIGH set. Allow access to 50%
++ * of the min watermark.
++ */
+ #define ALLOC_CPUSET 0x40 /* check for correct cpuset */
+ #define ALLOC_CMA 0x80 /* allow allocations from CMA areas */
+ #ifdef CONFIG_ZONE_DMA32
+diff --git a/mm/page_alloc.c b/mm/page_alloc.c
+index a905b850d31c4..f5b870780d3fd 100644
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -3983,7 +3983,7 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
+ /* free_pages may go negative - that's OK */
+ free_pages -= __zone_watermark_unusable_free(z, order, alloc_flags);
+
+- if (alloc_flags & ALLOC_HIGH)
++ if (alloc_flags & ALLOC_MIN_RESERVE)
+ min -= min / 2;
+
+ if (unlikely(alloc_harder)) {
+@@ -4825,18 +4825,18 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
+ unsigned int alloc_flags = ALLOC_WMARK_MIN | ALLOC_CPUSET;
+
+ /*
+- * __GFP_HIGH is assumed to be the same as ALLOC_HIGH
++ * __GFP_HIGH is assumed to be the same as ALLOC_MIN_RESERVE
+ * and __GFP_KSWAPD_RECLAIM is assumed to be the same as ALLOC_KSWAPD
+ * to save two branches.
+ */
+- BUILD_BUG_ON(__GFP_HIGH != (__force gfp_t) ALLOC_HIGH);
++ BUILD_BUG_ON(__GFP_HIGH != (__force gfp_t) ALLOC_MIN_RESERVE);
+ BUILD_BUG_ON(__GFP_KSWAPD_RECLAIM != (__force gfp_t) ALLOC_KSWAPD);
+
+ /*
+ * The caller may dip into page reserves a bit more if the caller
+ * cannot run direct reclaim, or if the caller has realtime scheduling
+ * policy or is asking for __GFP_HIGH memory. GFP_ATOMIC requests will
+- * set both ALLOC_HARDER (__GFP_ATOMIC) and ALLOC_HIGH (__GFP_HIGH).
++ * set both ALLOC_HARDER (__GFP_ATOMIC) and ALLOC_MIN_RESERVE(__GFP_HIGH).
+ */
+ alloc_flags |= (__force int)
+ (gfp_mask & (__GFP_HIGH | __GFP_KSWAPD_RECLAIM));
+--
+2.43.0
+
--- /dev/null
+From 5fc910982082f797aee07e26aceeec356048aab5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 13 Jan 2023 11:12:13 +0000
+Subject: mm/page_alloc: treat RT tasks similar to __GFP_HIGH
+
+From: Mel Gorman <mgorman@techsingularity.net>
+
+[ Upstream commit c988dcbecf3fd5430921eaa3fe9054754f76d185 ]
+
+RT tasks are allowed to dip below the min reserve but ALLOC_HARDER is
+typically combined with ALLOC_MIN_RESERVE so RT tasks are a little
+unusual. While there is some justification for allowing RT tasks access
+to memory reserves, there is a strong chance that a RT task that is also
+under memory pressure is at risk of missing deadlines anyway. Relax how
+much reserves an RT task can access by treating it the same as __GFP_HIGH
+allocations.
+
+Note that in a future kernel release that the RT special casing will be
+removed. Hard realtime tasks should be locking down resources in advance
+and ensuring enough memory is available. Even a soft-realtime task like
+audio or video live decoding which cannot jitter should be allocating both
+memory and any disk space required up-front before the recording starts
+instead of relying on reserves. At best, reserve access will only delay
+the problem by a very short interval.
+
+Link: https://lkml.kernel.org/r/20230113111217.14134-3-mgorman@techsingularity.net
+Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: NeilBrown <neilb@suse.de>
+Cc: Thierry Reding <thierry.reding@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 281dd25c1a01 ("mm/page_alloc: let GFP_ATOMIC order-0 allocs access highatomic reserves")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/page_alloc.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/mm/page_alloc.c b/mm/page_alloc.c
+index f5b870780d3fd..e78ab23eb1743 100644
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -4854,7 +4854,7 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
+ */
+ alloc_flags &= ~ALLOC_CPUSET;
+ } else if (unlikely(rt_task(current)) && in_task())
+- alloc_flags |= ALLOC_HARDER;
++ alloc_flags |= ALLOC_MIN_RESERVE;
+
+ alloc_flags = gfp_to_alloc_flags_cma(gfp_mask, alloc_flags);
+
+--
+2.43.0
+
--- /dev/null
+From 60268a8bc1d37e87324ab48e3fa7c47d3a7306b7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 16 Sep 2024 22:41:37 +0500
+Subject: nvmet-auth: assign dh_key to NULL after kfree_sensitive
+
+From: Vitaliy Shevtsov <v.shevtsov@maxima.ru>
+
+[ Upstream commit d2f551b1f72b4c508ab9298419f6feadc3b5d791 ]
+
+ctrl->dh_key might be used across multiple calls to nvmet_setup_dhgroup()
+for the same controller. So it's better to nullify it after release on
+error path in order to avoid double free later in nvmet_destroy_auth().
+
+Found by Linux Verification Center (linuxtesting.org) with Svace.
+
+Fixes: 7a277c37d352 ("nvmet-auth: Diffie-Hellman key exchange support")
+Cc: stable@vger.kernel.org
+Signed-off-by: Vitaliy Shevtsov <v.shevtsov@maxima.ru>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Hannes Reinecke <hare@suse.de>
+Signed-off-by: Keith Busch <kbusch@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nvme/target/auth.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/nvme/target/auth.c b/drivers/nvme/target/auth.c
+index aacc05ec00c2b..74791078fdebc 100644
+--- a/drivers/nvme/target/auth.c
++++ b/drivers/nvme/target/auth.c
+@@ -101,6 +101,7 @@ int nvmet_setup_dhgroup(struct nvmet_ctrl *ctrl, u8 dhgroup_id)
+ pr_debug("%s: ctrl %d failed to generate private key, err %d\n",
+ __func__, ctrl->cntlid, ret);
+ kfree_sensitive(ctrl->dh_key);
++ ctrl->dh_key = NULL;
+ return ret;
+ }
+ ctrl->dh_keysize = crypto_kpp_maxsize(ctrl->dh_tfm);
+--
+2.43.0
+
--- /dev/null
+From e385b2a0a317a67940c499f3891df7a28f222d5a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Oct 2024 19:43:47 +0800
+Subject: ocfs2: pass u64 to ocfs2_truncate_inline maybe overflow
+
+From: Edward Adam Davis <eadavis@qq.com>
+
+[ Upstream commit bc0a2f3a73fcdac651fca64df39306d1e5ebe3b0 ]
+
+Syzbot reported a kernel BUG in ocfs2_truncate_inline. There are two
+reasons for this: first, the parameter value passed is greater than
+ocfs2_max_inline_data_with_xattr, second, the start and end parameters of
+ocfs2_truncate_inline are "unsigned int".
+
+So, we need to add a sanity check for byte_start and byte_len right before
+ocfs2_truncate_inline() in ocfs2_remove_inode_range(), if they are greater
+than ocfs2_max_inline_data_with_xattr return -EINVAL.
+
+Link: https://lkml.kernel.org/r/tencent_D48DB5122ADDAEDDD11918CFB68D93258C07@qq.com
+Fixes: 1afc32b95233 ("ocfs2: Write support for inline data")
+Signed-off-by: Edward Adam Davis <eadavis@qq.com>
+Reported-by: syzbot+81092778aac03460d6b7@syzkaller.appspotmail.com
+Closes: https://syzkaller.appspot.com/bug?extid=81092778aac03460d6b7
+Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
+Cc: Joel Becker <jlbec@evilplan.org>
+Cc: Joseph Qi <joseph.qi@linux.alibaba.com>
+Cc: Mark Fasheh <mark@fasheh.com>
+Cc: Junxiao Bi <junxiao.bi@oracle.com>
+Cc: Changwei Ge <gechangwei@live.cn>
+Cc: Gang He <ghe@suse.com>
+Cc: Jun Piao <piaojun@huawei.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ocfs2/file.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
+index f502bb2ce2ea7..ea7c79e8ce429 100644
+--- a/fs/ocfs2/file.c
++++ b/fs/ocfs2/file.c
+@@ -1784,6 +1784,14 @@ int ocfs2_remove_inode_range(struct inode *inode,
+ return 0;
+
+ if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
++ int id_count = ocfs2_max_inline_data_with_xattr(inode->i_sb, di);
++
++ if (byte_start > id_count || byte_start + byte_len > id_count) {
++ ret = -EINVAL;
++ mlog_errno(ret);
++ goto out;
++ }
++
+ ret = ocfs2_truncate_inline(inode, di_bh, byte_start,
+ byte_start + byte_len, 0);
+ if (ret) {
+--
+2.43.0
+
--- /dev/null
+From 253cd32230547e4e3a73363d58169af99d480326 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 29 Sep 2024 16:02:33 +0200
+Subject: riscv: efi: Set NX compat flag in PE/COFF header
+
+From: Heinrich Schuchardt <heinrich.schuchardt@canonical.com>
+
+[ Upstream commit d41373a4b910961df5a5e3527d7bde6ad45ca438 ]
+
+The IMAGE_DLLCHARACTERISTICS_NX_COMPAT informs the firmware that the
+EFI binary does not rely on pages that are both executable and
+writable.
+
+The flag is used by some distro versions of GRUB to decide if the EFI
+binary may be executed.
+
+As the Linux kernel neither has RWX sections nor needs RWX pages for
+relocation we should set the flag.
+
+Cc: Ard Biesheuvel <ardb@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Heinrich Schuchardt <heinrich.schuchardt@canonical.com>
+Reviewed-by: Emil Renner Berthing <emil.renner.berthing@canonical.com>
+Fixes: cb7d2dd5612a ("RISC-V: Add PE/COFF header for EFI stub")
+Acked-by: Ard Biesheuvel <ardb@kernel.org>
+Link: https://lore.kernel.org/r/20240929140233.211800-1-heinrich.schuchardt@canonical.com
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/kernel/efi-header.S | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/riscv/kernel/efi-header.S b/arch/riscv/kernel/efi-header.S
+index 8e733aa48ba6c..c306f3a6a800e 100644
+--- a/arch/riscv/kernel/efi-header.S
++++ b/arch/riscv/kernel/efi-header.S
+@@ -59,7 +59,7 @@ extra_header_fields:
+ .long efi_header_end - _start // SizeOfHeaders
+ .long 0 // CheckSum
+ .short IMAGE_SUBSYSTEM_EFI_APPLICATION // Subsystem
+- .short 0 // DllCharacteristics
++ .short IMAGE_DLL_CHARACTERISTICS_NX_COMPAT // DllCharacteristics
+ .quad 0 // SizeOfStackReserve
+ .quad 0 // SizeOfStackCommit
+ .quad 0 // SizeOfHeapReserve
+--
+2.43.0
+
--- /dev/null
+From 7c946b69fbe00d6a7ea385b4e627abd569037584 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Oct 2024 17:41:39 +0800
+Subject: riscv: Remove duplicated GET_RM
+
+From: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
+
+[ Upstream commit 164f66de6bb6ef454893f193c898dc8f1da6d18b ]
+
+The macro GET_RM defined twice in this file, one can be removed.
+
+Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>
+Signed-off-by: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
+Fixes: 956d705dd279 ("riscv: Unaligned load/store handling for M_MODE")
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20241008094141.549248-3-zhangchunyan@iscas.ac.cn
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/kernel/traps_misaligned.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c
+index 5348d842c7453..3d16cc803220e 100644
+--- a/arch/riscv/kernel/traps_misaligned.c
++++ b/arch/riscv/kernel/traps_misaligned.c
+@@ -132,8 +132,6 @@
+ #define REG_PTR(insn, pos, regs) \
+ (ulong *)((ulong)(regs) + REG_OFFSET(insn, pos))
+
+-#define GET_RM(insn) (((insn) >> 12) & 7)
+-
+ #define GET_RS1(insn, regs) (*REG_PTR(insn, SH_RS1, regs))
+ #define GET_RS2(insn, regs) (*REG_PTR(insn, SH_RS2, regs))
+ #define GET_RS1S(insn, regs) (*REG_PTR(RVC_RS1S(insn), 0, regs))
+--
+2.43.0
+
--- /dev/null
+From c50a27b625ad9865ab4d7c4464b650f3309d5ba8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Oct 2024 17:41:38 +0800
+Subject: riscv: Remove unused GENERATING_ASM_OFFSETS
+
+From: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
+
+[ Upstream commit 46d4e5ac6f2f801f97bcd0ec82365969197dc9b1 ]
+
+The macro is not used in the current version of kernel, it looks like
+can be removed to avoid a build warning:
+
+../arch/riscv/kernel/asm-offsets.c: At top level:
+../arch/riscv/kernel/asm-offsets.c:7: warning: macro "GENERATING_ASM_OFFSETS" is not used [-Wunused-macros]
+ 7 | #define GENERATING_ASM_OFFSETS
+
+Fixes: 9639a44394b9 ("RISC-V: Provide a cleaner raw_smp_processor_id()")
+Cc: stable@vger.kernel.org
+Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>
+Tested-by: Alexandre Ghiti <alexghiti@rivosinc.com>
+Signed-off-by: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
+Link: https://lore.kernel.org/r/20241008094141.549248-2-zhangchunyan@iscas.ac.cn
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/kernel/asm-offsets.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c
+index df9444397908d..1ecafbcee9a0a 100644
+--- a/arch/riscv/kernel/asm-offsets.c
++++ b/arch/riscv/kernel/asm-offsets.c
+@@ -4,8 +4,6 @@
+ * Copyright (C) 2017 SiFive
+ */
+
+-#define GENERATING_ASM_OFFSETS
+-
+ #include <linux/kbuild.h>
+ #include <linux/mm.h>
+ #include <linux/sched.h>
+--
+2.43.0
+
--- /dev/null
+From ff7ce41d5795e9e45aae3d280bfb411d40248dca Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 17 Oct 2024 11:20:10 +0800
+Subject: riscv: Use '%u' to format the output of 'cpu'
+
+From: WangYuli <wangyuli@uniontech.com>
+
+[ Upstream commit e0872ab72630dada3ae055bfa410bf463ff1d1e0 ]
+
+'cpu' is an unsigned integer, so its conversion specifier should
+be %u, not %d.
+
+Suggested-by: Wentao Guan <guanwentao@uniontech.com>
+Suggested-by: Maciej W. Rozycki <macro@orcam.me.uk>
+Link: https://lore.kernel.org/all/alpine.DEB.2.21.2409122309090.40372@angie.orcam.me.uk/
+Signed-off-by: WangYuli <wangyuli@uniontech.com>
+Reviewed-by: Charlie Jenkins <charlie@rivosinc.com>
+Tested-by: Charlie Jenkins <charlie@rivosinc.com>
+Fixes: f1e58583b9c7 ("RISC-V: Support cpu hotplug")
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/4C127DEECDA287C8+20241017032010.96772-1-wangyuli@uniontech.com
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/kernel/cpu-hotplug.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/riscv/kernel/cpu-hotplug.c b/arch/riscv/kernel/cpu-hotplug.c
+index f7a832e3a1d1d..462b3631663f9 100644
+--- a/arch/riscv/kernel/cpu-hotplug.c
++++ b/arch/riscv/kernel/cpu-hotplug.c
+@@ -65,7 +65,7 @@ void __cpu_die(unsigned int cpu)
+ if (cpu_ops[cpu]->cpu_is_stopped)
+ ret = cpu_ops[cpu]->cpu_is_stopped(cpu);
+ if (ret)
+- pr_warn("CPU%d may not have stopped: %d\n", cpu, ret);
++ pr_warn("CPU%u may not have stopped: %d\n", cpu, ret);
+ }
+
+ /*
+--
+2.43.0
+
--- /dev/null
+From d06392f489cede0c9e276cf66ff324f9b61a1157 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Oct 2024 10:36:24 +0200
+Subject: riscv: vdso: Prevent the compiler from inserting calls to memset()
+
+From: Alexandre Ghiti <alexghiti@rivosinc.com>
+
+[ Upstream commit bf40167d54d55d4b54d0103713d86a8638fb9290 ]
+
+The compiler is smart enough to insert a call to memset() in
+riscv_vdso_get_cpus(), which generates a dynamic relocation.
+
+So prevent this by using -fno-builtin option.
+
+Fixes: e2c0cdfba7f6 ("RISC-V: User-facing API")
+Cc: stable@vger.kernel.org
+Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
+Reviewed-by: Guo Ren <guoren@kernel.org>
+Link: https://lore.kernel.org/r/20241016083625.136311-2-alexghiti@rivosinc.com
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/kernel/vdso/Makefile | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile
+index 06e6b27f3bcc9..c1b68f962bada 100644
+--- a/arch/riscv/kernel/vdso/Makefile
++++ b/arch/riscv/kernel/vdso/Makefile
+@@ -18,6 +18,7 @@ obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o
+
+ ccflags-y := -fno-stack-protector
+ ccflags-y += -DDISABLE_BRANCH_PROFILING
++ccflags-y += -fno-builtin
+
+ ifneq ($(c-gettimeofday-y),)
+ CFLAGS_vgettimeofday.o += -fPIC -include $(c-gettimeofday-y)
+--
+2.43.0
+
iio-adc-ad7124-fix-division-by-zero-in-ad7124_set_channel_odr.patch
iio-light-veml6030-fix-microlux-value-calculation.patch
nilfs2-fix-potential-deadlock-with-newly-created-symlinks.patch
+block-fix-sanity-checks-in-blk_rq_map_user_bvec.patch
+cgroup-bpf-use-a-dedicated-workqueue-for-cgroup-bpf-.patch
+riscv-vdso-prevent-the-compiler-from-inserting-calls.patch
+alsa-hda-realtek-limit-internal-mic-boost-on-dell-pl.patch
+riscv-efi-set-nx-compat-flag-in-pe-coff-header.patch
+riscv-use-u-to-format-the-output-of-cpu.patch
+riscv-remove-unused-generating_asm_offsets.patch
+riscv-remove-duplicated-get_rm.patch
+cxl-acpi-move-rescan-to-the-workqueue.patch
+cxl-port-fix-cxl_bus_rescan-vs-bus_rescan_devices.patch
+mm-page_alloc-rename-alloc_high-to-alloc_min_reserve.patch
+mm-page_alloc-treat-rt-tasks-similar-to-__gfp_high.patch
+mm-page_alloc-explicitly-record-high-order-atomic-al.patch
+mm-page_alloc-explicitly-define-what-alloc-flags-dep.patch
+mm-page_alloc-explicitly-define-how-__gfp_high-non-b.patch
+mm-page_alloc-let-gfp_atomic-order-0-allocs-access-h.patch
+ocfs2-pass-u64-to-ocfs2_truncate_inline-maybe-overfl.patch
+mctp-i2c-handle-null-header-address.patch
+alsa-hda-realtek-fix-headset-mic-on-tuxedo-stellaris.patch
+nvmet-auth-assign-dh_key-to-null-after-kfree_sensiti.patch
+kasan-remove-vmalloc_percpu-test.patch
+io_uring-rename-kiocb_end_write-local-helper.patch
+fs-create-kiocb_-start-end-_write-helpers.patch
+io_uring-use-kiocb_-start-end-_write-helpers.patch
+io_uring-rw-fix-missing-nowait-check-for-o_direct-st.patch
+mm-migrate-try-again-if-thp-split-is-failed-due-to-p.patch
+migrate-convert-unmap_and_move-to-use-folios.patch
+migrate-convert-migrate_pages-to-use-folios.patch
+mm-migrate.c-stop-using-0-as-null-pointer.patch
+migrate_pages-organize-stats-with-struct-migrate_pag.patch
+migrate_pages-separate-hugetlb-folios-migration.patch
+migrate_pages-restrict-number-of-pages-to-migrate-in.patch
+migrate_pages-split-unmap_and_move-to-_unmap-and-_mo.patch
+vmscan-migrate-fix-page-count-imbalance-on-node-stat.patch
--- /dev/null
+From 1f94221149da9099afe301073304fc26a2ec2ac6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 25 Oct 2024 10:17:24 -0400
+Subject: vmscan,migrate: fix page count imbalance on node stats when demoting
+ pages
+
+From: Gregory Price <gourry@gourry.net>
+
+[ Upstream commit 35e41024c4c2b02ef8207f61b9004f6956cf037b ]
+
+When numa balancing is enabled with demotion, vmscan will call
+migrate_pages when shrinking LRUs. migrate_pages will decrement the
+the node's isolated page count, leading to an imbalanced count when
+invoked from (MG)LRU code.
+
+The result is dmesg output like such:
+
+$ cat /proc/sys/vm/stat_refresh
+
+[77383.088417] vmstat_refresh: nr_isolated_anon -103212
+[77383.088417] vmstat_refresh: nr_isolated_file -899642
+
+This negative value may impact compaction and reclaim throttling.
+
+The following path produces the decrement:
+
+shrink_folio_list
+ demote_folio_list
+ migrate_pages
+ migrate_pages_batch
+ migrate_folio_move
+ migrate_folio_done
+ mod_node_page_state(-ve) <- decrement
+
+This path happens for SUCCESSFUL migrations, not failures. Typically
+callers to migrate_pages are required to handle putback/accounting for
+failures, but this is already handled in the shrink code.
+
+When accounting for migrations, instead do not decrement the count when
+the migration reason is MR_DEMOTION. As of v6.11, this demotion logic
+is the only source of MR_DEMOTION.
+
+Link: https://lkml.kernel.org/r/20241025141724.17927-1-gourry@gourry.net
+Fixes: 26aa2d199d6f ("mm/migrate: demote pages during reclaim")
+Signed-off-by: Gregory Price <gourry@gourry.net>
+Reviewed-by: Yang Shi <shy828301@gmail.com>
+Reviewed-by: Davidlohr Bueso <dave@stgolabs.net>
+Reviewed-by: Shakeel Butt <shakeel.butt@linux.dev>
+Reviewed-by: "Huang, Ying" <ying.huang@intel.com>
+Reviewed-by: Oscar Salvador <osalvador@suse.de>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Wei Xu <weixugc@google.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/migrate.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/mm/migrate.c b/mm/migrate.c
+index 46a1476e188c3..9ff5d77b61a3e 100644
+--- a/mm/migrate.c
++++ b/mm/migrate.c
+@@ -1044,7 +1044,7 @@ static void migrate_folio_done(struct folio *src,
+ * not accounted to NR_ISOLATED_*. They can be recognized
+ * as __PageMovable
+ */
+- if (likely(!__folio_test_movable(src)))
++ if (likely(!__folio_test_movable(src)) && reason != MR_DEMOTION)
+ mod_node_page_state(folio_pgdat(src), NR_ISOLATED_ANON +
+ folio_is_file_lru(src), -folio_nr_pages(src));
+
+--
+2.43.0
+