From a884e3000cc03cd50574bc3e2ecc070982f32da1 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 19 Jun 2024 11:34:56 +0200 Subject: [PATCH] 5.15-stable patches added patches: mm-huge_memory-don-t-unpoison-huge_zero_folio.patch scsi-mpi3mr-fix-ata-ncq-priority-support.patch tick-nohz_full-don-t-abuse-smp_call_function_single-in-tick_setup_device.patch --- ...emory-don-t-unpoison-huge_zero_folio.patch | 90 +++++++ ...-mpi3mr-fix-ata-ncq-priority-support.patch | 254 ++++++++++++++++++ queue-5.15/series | 3 + ...function_single-in-tick_setup_device.patch | 94 +++++++ 4 files changed, 441 insertions(+) create mode 100644 queue-5.15/mm-huge_memory-don-t-unpoison-huge_zero_folio.patch create mode 100644 queue-5.15/scsi-mpi3mr-fix-ata-ncq-priority-support.patch create mode 100644 queue-5.15/tick-nohz_full-don-t-abuse-smp_call_function_single-in-tick_setup_device.patch diff --git a/queue-5.15/mm-huge_memory-don-t-unpoison-huge_zero_folio.patch b/queue-5.15/mm-huge_memory-don-t-unpoison-huge_zero_folio.patch new file mode 100644 index 00000000000..1d967c03387 --- /dev/null +++ b/queue-5.15/mm-huge_memory-don-t-unpoison-huge_zero_folio.patch @@ -0,0 +1,90 @@ +From fe6f86f4b40855a130a19aa589f9ba7f650423f4 Mon Sep 17 00:00:00 2001 +From: Miaohe Lin +Date: Thu, 16 May 2024 20:26:08 +0800 +Subject: mm/huge_memory: don't unpoison huge_zero_folio + +From: Miaohe Lin + +commit fe6f86f4b40855a130a19aa589f9ba7f650423f4 upstream. + +When I did memory failure tests recently, below panic occurs: + + kernel BUG at include/linux/mm.h:1135! + invalid opcode: 0000 [#1] PREEMPT SMP NOPTI + CPU: 9 PID: 137 Comm: kswapd1 Not tainted 6.9.0-rc4-00491-gd5ce28f156fe-dirty #14 + RIP: 0010:shrink_huge_zero_page_scan+0x168/0x1a0 + RSP: 0018:ffff9933c6c57bd0 EFLAGS: 00000246 + RAX: 000000000000003e RBX: 0000000000000000 RCX: ffff88f61fc5c9c8 + RDX: 0000000000000000 RSI: 0000000000000027 RDI: ffff88f61fc5c9c0 + RBP: ffffcd7c446b0000 R08: ffffffff9a9405f0 R09: 0000000000005492 + R10: 00000000000030ea R11: ffffffff9a9405f0 R12: 0000000000000000 + R13: 0000000000000000 R14: 0000000000000000 R15: ffff88e703c4ac00 + FS: 0000000000000000(0000) GS:ffff88f61fc40000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 000055f4da6e9878 CR3: 0000000c71048000 CR4: 00000000000006f0 + Call Trace: + + do_shrink_slab+0x14f/0x6a0 + shrink_slab+0xca/0x8c0 + shrink_node+0x2d0/0x7d0 + balance_pgdat+0x33a/0x720 + kswapd+0x1f3/0x410 + kthread+0xd5/0x100 + ret_from_fork+0x2f/0x50 + ret_from_fork_asm+0x1a/0x30 + + Modules linked in: mce_inject hwpoison_inject + ---[ end trace 0000000000000000 ]--- + RIP: 0010:shrink_huge_zero_page_scan+0x168/0x1a0 + RSP: 0018:ffff9933c6c57bd0 EFLAGS: 00000246 + RAX: 000000000000003e RBX: 0000000000000000 RCX: ffff88f61fc5c9c8 + RDX: 0000000000000000 RSI: 0000000000000027 RDI: ffff88f61fc5c9c0 + RBP: ffffcd7c446b0000 R08: ffffffff9a9405f0 R09: 0000000000005492 + R10: 00000000000030ea R11: ffffffff9a9405f0 R12: 0000000000000000 + R13: 0000000000000000 R14: 0000000000000000 R15: ffff88e703c4ac00 + FS: 0000000000000000(0000) GS:ffff88f61fc40000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 000055f4da6e9878 CR3: 0000000c71048000 CR4: 00000000000006f0 + +The root cause is that HWPoison flag will be set for huge_zero_folio +without increasing the folio refcnt. But then unpoison_memory() will +decrease the folio refcnt unexpectedly as it appears like a successfully +hwpoisoned folio leading to VM_BUG_ON_PAGE(page_ref_count(page) == 0) when +releasing huge_zero_folio. + +Skip unpoisoning huge_zero_folio in unpoison_memory() to fix this issue. +We're not prepared to unpoison huge_zero_folio yet. + +Link: https://lkml.kernel.org/r/20240516122608.22610-1-linmiaohe@huawei.com +Fixes: 478d134e9506 ("mm/huge_memory: do not overkill when splitting huge_zero_page") +Signed-off-by: Miaohe Lin +Acked-by: David Hildenbrand +Reviewed-by: Yang Shi +Reviewed-by: Oscar Salvador +Reviewed-by: Anshuman Khandual +Cc: Naoya Horiguchi +Cc: Xu Yu +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Miaohe Lin +Signed-off-by: Greg Kroah-Hartman +--- + mm/memory-failure.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/mm/memory-failure.c ++++ b/mm/memory-failure.c +@@ -2051,6 +2051,13 @@ int unpoison_memory(unsigned long pfn) + + mutex_lock(&mf_mutex); + ++ if (is_huge_zero_page(page)) { ++ unpoison_pr_info("Unpoison: huge zero page is not supported %#lx\n", ++ pfn, &unpoison_rs); ++ ret = -EOPNOTSUPP; ++ goto unlock_mutex; ++ } ++ + if (!PageHWPoison(p)) { + unpoison_pr_info("Unpoison: Page was already unpoisoned %#lx\n", + pfn, &unpoison_rs); diff --git a/queue-5.15/scsi-mpi3mr-fix-ata-ncq-priority-support.patch b/queue-5.15/scsi-mpi3mr-fix-ata-ncq-priority-support.patch new file mode 100644 index 00000000000..187104c0e49 --- /dev/null +++ b/queue-5.15/scsi-mpi3mr-fix-ata-ncq-priority-support.patch @@ -0,0 +1,254 @@ +From 90e6f08915ec6efe46570420412a65050ec826b2 Mon Sep 17 00:00:00 2001 +From: Damien Le Moal +Date: Tue, 11 Jun 2024 17:34:35 +0900 +Subject: scsi: mpi3mr: Fix ATA NCQ priority support + +From: Damien Le Moal + +commit 90e6f08915ec6efe46570420412a65050ec826b2 upstream. + +The function mpi3mr_qcmd() of the mpi3mr driver is able to indicate to +the HBA if a read or write command directed at an ATA device should be +translated to an NCQ read/write command with the high prioiryt bit set +when the request uses the RT priority class and the user has enabled NCQ +priority through sysfs. + +However, unlike the mpt3sas driver, the mpi3mr driver does not define +the sas_ncq_prio_supported and sas_ncq_prio_enable sysfs attributes, so +the ncq_prio_enable field of struct mpi3mr_sdev_priv_data is never +actually set and NCQ Priority cannot ever be used. + +Fix this by defining these missing atributes to allow a user to check if +an ATA device supports NCQ priority and to enable/disable the use of NCQ +priority. To do this, lift the function scsih_ncq_prio_supp() out of the +mpt3sas driver and make it the generic SCSI SAS transport function +sas_ata_ncq_prio_supported(). Nothing in that function is hardware +specific, so this function can be used in both the mpt3sas driver and +the mpi3mr driver. + +Reported-by: Scott McCoy +Fixes: 023ab2a9b4ed ("scsi: mpi3mr: Add support for queue command processing") +Cc: stable@vger.kernel.org +Signed-off-by: Damien Le Moal +Link: https://lore.kernel.org/r/20240611083435.92961-1-dlemoal@kernel.org +Reviewed-by: Niklas Cassel +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/mpi3mr/mpi3mr.h | 1 + drivers/scsi/mpi3mr/mpi3mr_os.c | 67 +++++++++++++++++++++++++++++++++++ + drivers/scsi/mpt3sas/mpt3sas_base.h | 3 - + drivers/scsi/mpt3sas/mpt3sas_ctl.c | 4 +- + drivers/scsi/mpt3sas/mpt3sas_scsih.c | 25 ------------- + drivers/scsi/scsi_transport_sas.c | 29 +++++++++++++++ + include/scsi/scsi_transport_sas.h | 2 + + 7 files changed, 101 insertions(+), 30 deletions(-) + +--- a/drivers/scsi/mpi3mr/mpi3mr.h ++++ b/drivers/scsi/mpi3mr/mpi3mr.h +@@ -38,6 +38,7 @@ + #include + #include + #include ++#include + + #include "mpi/mpi30_transport.h" + #include "mpi/mpi30_cnfg.h" +--- a/drivers/scsi/mpi3mr/mpi3mr_os.c ++++ b/drivers/scsi/mpi3mr/mpi3mr_os.c +@@ -3549,6 +3549,72 @@ out: + return retval; + } + ++/** ++ * sas_ncq_prio_supported_show - Indicate if device supports NCQ priority ++ * @dev: pointer to embedded device ++ * @attr: sas_ncq_prio_supported attribute descriptor ++ * @buf: the buffer returned ++ * ++ * A sysfs 'read-only' sdev attribute, only works with SATA devices ++ */ ++static ssize_t ++sas_ncq_prio_supported_show(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ struct scsi_device *sdev = to_scsi_device(dev); ++ ++ return sysfs_emit(buf, "%d\n", sas_ata_ncq_prio_supported(sdev)); ++} ++static DEVICE_ATTR_RO(sas_ncq_prio_supported); ++ ++/** ++ * sas_ncq_prio_enable_show - send prioritized io commands to device ++ * @dev: pointer to embedded device ++ * @attr: sas_ncq_prio_enable attribute descriptor ++ * @buf: the buffer returned ++ * ++ * A sysfs 'read/write' sdev attribute, only works with SATA devices ++ */ ++static ssize_t ++sas_ncq_prio_enable_show(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ struct scsi_device *sdev = to_scsi_device(dev); ++ struct mpi3mr_sdev_priv_data *sdev_priv_data = sdev->hostdata; ++ ++ if (!sdev_priv_data) ++ return 0; ++ ++ return sysfs_emit(buf, "%d\n", sdev_priv_data->ncq_prio_enable); ++} ++ ++static ssize_t ++sas_ncq_prio_enable_store(struct device *dev, ++ struct device_attribute *attr, ++ const char *buf, size_t count) ++{ ++ struct scsi_device *sdev = to_scsi_device(dev); ++ struct mpi3mr_sdev_priv_data *sdev_priv_data = sdev->hostdata; ++ bool ncq_prio_enable = 0; ++ ++ if (kstrtobool(buf, &ncq_prio_enable)) ++ return -EINVAL; ++ ++ if (!sas_ata_ncq_prio_supported(sdev)) ++ return -EINVAL; ++ ++ sdev_priv_data->ncq_prio_enable = ncq_prio_enable; ++ ++ return strlen(buf); ++} ++static DEVICE_ATTR_RW(sas_ncq_prio_enable); ++ ++static struct device_attribute *mpi3mr_dev_attrs[] = { ++ &dev_attr_sas_ncq_prio_supported, ++ &dev_attr_sas_ncq_prio_enable, ++ NULL, ++}; ++ + static struct scsi_host_template mpi3mr_driver_template = { + .module = THIS_MODULE, + .name = "MPI3 Storage Controller", +@@ -3577,6 +3643,7 @@ static struct scsi_host_template mpi3mr_ + .cmd_per_lun = MPI3MR_MAX_CMDS_LUN, + .track_queue_depth = 1, + .cmd_size = sizeof(struct scmd_priv), ++ .sdev_attrs = mpi3mr_dev_attrs, + }; + + /** +--- a/drivers/scsi/mpt3sas/mpt3sas_base.h ++++ b/drivers/scsi/mpt3sas/mpt3sas_base.h +@@ -2010,9 +2010,6 @@ void + mpt3sas_setup_direct_io(struct MPT3SAS_ADAPTER *ioc, struct scsi_cmnd *scmd, + struct _raid_device *raid_device, Mpi25SCSIIORequest_t *mpi_request); + +-/* NCQ Prio Handling Check */ +-bool scsih_ncq_prio_supp(struct scsi_device *sdev); +- + void mpt3sas_setup_debugfs(struct MPT3SAS_ADAPTER *ioc); + void mpt3sas_destroy_debugfs(struct MPT3SAS_ADAPTER *ioc); + void mpt3sas_init_debugfs(void); +--- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c ++++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c +@@ -3933,7 +3933,7 @@ sas_ncq_prio_supported_show(struct devic + { + struct scsi_device *sdev = to_scsi_device(dev); + +- return sysfs_emit(buf, "%d\n", scsih_ncq_prio_supp(sdev)); ++ return sysfs_emit(buf, "%d\n", sas_ata_ncq_prio_supported(sdev)); + } + static DEVICE_ATTR_RO(sas_ncq_prio_supported); + +@@ -3968,7 +3968,7 @@ sas_ncq_prio_enable_store(struct device + if (kstrtobool(buf, &ncq_prio_enable)) + return -EINVAL; + +- if (!scsih_ncq_prio_supp(sdev)) ++ if (!sas_ata_ncq_prio_supported(sdev)) + return -EINVAL; + + sas_device_priv_data->ncq_prio_enable = ncq_prio_enable; +--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c ++++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c +@@ -12580,31 +12580,6 @@ scsih_pci_mmio_enabled(struct pci_dev *p + return PCI_ERS_RESULT_RECOVERED; + } + +-/** +- * scsih_ncq_prio_supp - Check for NCQ command priority support +- * @sdev: scsi device struct +- * +- * This is called when a user indicates they would like to enable +- * ncq command priorities. This works only on SATA devices. +- */ +-bool scsih_ncq_prio_supp(struct scsi_device *sdev) +-{ +- unsigned char *buf; +- bool ncq_prio_supp = false; +- +- if (!scsi_device_supports_vpd(sdev)) +- return ncq_prio_supp; +- +- buf = kmalloc(SCSI_VPD_PG_LEN, GFP_KERNEL); +- if (!buf) +- return ncq_prio_supp; +- +- if (!scsi_get_vpd_page(sdev, 0x89, buf, SCSI_VPD_PG_LEN)) +- ncq_prio_supp = (buf[213] >> 4) & 1; +- +- kfree(buf); +- return ncq_prio_supp; +-} + /* + * The pci device ids are defined in mpi/mpi2_cnfg.h. + */ +--- a/drivers/scsi/scsi_transport_sas.c ++++ b/drivers/scsi/scsi_transport_sas.c +@@ -410,6 +410,35 @@ unsigned int sas_is_tlr_enabled(struct s + } + EXPORT_SYMBOL_GPL(sas_is_tlr_enabled); + ++/** ++ * sas_ata_ncq_prio_supported - Check for ATA NCQ command priority support ++ * @sdev: SCSI device ++ * ++ * Check if an ATA device supports NCQ priority using VPD page 89h (ATA ++ * Information). Since this VPD page is implemented only for ATA devices, ++ * this function always returns false for SCSI devices. ++ */ ++bool sas_ata_ncq_prio_supported(struct scsi_device *sdev) ++{ ++ unsigned char *buf; ++ bool ncq_prio_supported = false; ++ ++ if (!scsi_device_supports_vpd(sdev)) ++ return false; ++ ++ buf = kmalloc(SCSI_VPD_PG_LEN, GFP_KERNEL); ++ if (!buf) ++ return false; ++ ++ if (!scsi_get_vpd_page(sdev, 0x89, buf, SCSI_VPD_PG_LEN)) ++ ncq_prio_supported = (buf[213] >> 4) & 1; ++ ++ kfree(buf); ++ ++ return ncq_prio_supported; ++} ++EXPORT_SYMBOL_GPL(sas_ata_ncq_prio_supported); ++ + /* + * SAS Phy attributes + */ +--- a/include/scsi/scsi_transport_sas.h ++++ b/include/scsi/scsi_transport_sas.h +@@ -199,6 +199,8 @@ unsigned int sas_is_tlr_enabled(struct s + void sas_disable_tlr(struct scsi_device *); + void sas_enable_tlr(struct scsi_device *); + ++bool sas_ata_ncq_prio_supported(struct scsi_device *sdev); ++ + extern struct sas_rphy *sas_end_device_alloc(struct sas_port *); + extern struct sas_rphy *sas_expander_alloc(struct sas_port *, enum sas_device_type); + void sas_rphy_free(struct sas_rphy *); diff --git a/queue-5.15/series b/queue-5.15/series index 7dd20e61917..163100bc37b 100644 --- a/queue-5.15/series +++ b/queue-5.15/series @@ -163,3 +163,6 @@ intel_th-pci-add-sapphire-rapids-soc-support.patch intel_th-pci-add-meteor-lake-s-support.patch intel_th-pci-add-lunar-lake-support.patch nilfs2-fix-potential-kernel-bug-due-to-lack-of-writeback-flag-waiting.patch +tick-nohz_full-don-t-abuse-smp_call_function_single-in-tick_setup_device.patch +scsi-mpi3mr-fix-ata-ncq-priority-support.patch +mm-huge_memory-don-t-unpoison-huge_zero_folio.patch diff --git a/queue-5.15/tick-nohz_full-don-t-abuse-smp_call_function_single-in-tick_setup_device.patch b/queue-5.15/tick-nohz_full-don-t-abuse-smp_call_function_single-in-tick_setup_device.patch new file mode 100644 index 00000000000..ea72c5ffcc0 --- /dev/null +++ b/queue-5.15/tick-nohz_full-don-t-abuse-smp_call_function_single-in-tick_setup_device.patch @@ -0,0 +1,94 @@ +From 07c54cc5988f19c9642fd463c2dbdac7fc52f777 Mon Sep 17 00:00:00 2001 +From: Oleg Nesterov +Date: Tue, 28 May 2024 14:20:19 +0200 +Subject: tick/nohz_full: Don't abuse smp_call_function_single() in tick_setup_device() + +From: Oleg Nesterov + +commit 07c54cc5988f19c9642fd463c2dbdac7fc52f777 upstream. + +After the recent commit 5097cbcb38e6 ("sched/isolation: Prevent boot crash +when the boot CPU is nohz_full") the kernel no longer crashes, but there is +another problem. + +In this case tick_setup_device() calls tick_take_do_timer_from_boot() to +update tick_do_timer_cpu and this triggers the WARN_ON_ONCE(irqs_disabled) +in smp_call_function_single(). + +Kill tick_take_do_timer_from_boot() and just use WRITE_ONCE(), the new +comment explains why this is safe (thanks Thomas!). + +Fixes: 08ae95f4fd3b ("nohz_full: Allow the boot CPU to be nohz_full") +Signed-off-by: Oleg Nesterov +Signed-off-by: Thomas Gleixner +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20240528122019.GA28794@redhat.com +Link: https://lore.kernel.org/all/20240522151742.GA10400@redhat.com +Signed-off-by: Greg Kroah-Hartman +--- + kernel/time/tick-common.c | 42 ++++++++++++++---------------------------- + 1 file changed, 14 insertions(+), 28 deletions(-) + +--- a/kernel/time/tick-common.c ++++ b/kernel/time/tick-common.c +@@ -179,26 +179,6 @@ void tick_setup_periodic(struct clock_ev + } + } + +-#ifdef CONFIG_NO_HZ_FULL +-static void giveup_do_timer(void *info) +-{ +- int cpu = *(unsigned int *)info; +- +- WARN_ON(tick_do_timer_cpu != smp_processor_id()); +- +- tick_do_timer_cpu = cpu; +-} +- +-static void tick_take_do_timer_from_boot(void) +-{ +- int cpu = smp_processor_id(); +- int from = tick_do_timer_boot_cpu; +- +- if (from >= 0 && from != cpu) +- smp_call_function_single(from, giveup_do_timer, &cpu, 1); +-} +-#endif +- + /* + * Setup the tick device + */ +@@ -222,19 +202,25 @@ static void tick_setup_device(struct tic + tick_next_period = ktime_get(); + #ifdef CONFIG_NO_HZ_FULL + /* +- * The boot CPU may be nohz_full, in which case set +- * tick_do_timer_boot_cpu so the first housekeeping +- * secondary that comes up will take do_timer from +- * us. ++ * The boot CPU may be nohz_full, in which case the ++ * first housekeeping secondary will take do_timer() ++ * from it. + */ + if (tick_nohz_full_cpu(cpu)) + tick_do_timer_boot_cpu = cpu; + +- } else if (tick_do_timer_boot_cpu != -1 && +- !tick_nohz_full_cpu(cpu)) { +- tick_take_do_timer_from_boot(); ++ } else if (tick_do_timer_boot_cpu != -1 && !tick_nohz_full_cpu(cpu)) { + tick_do_timer_boot_cpu = -1; +- WARN_ON(tick_do_timer_cpu != cpu); ++ /* ++ * The boot CPU will stay in periodic (NOHZ disabled) ++ * mode until clocksource_done_booting() called after ++ * smp_init() selects a high resolution clocksource and ++ * timekeeping_notify() kicks the NOHZ stuff alive. ++ * ++ * So this WRITE_ONCE can only race with the READ_ONCE ++ * check in tick_periodic() but this race is harmless. ++ */ ++ WRITE_ONCE(tick_do_timer_cpu, cpu); + #endif + } + -- 2.47.3