]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.15-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 19 Jun 2024 09:34:56 +0000 (11:34 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 19 Jun 2024 09:34:56 +0000 (11:34 +0200)
added patches:
mm-huge_memory-don-t-unpoison-huge_zero_folio.patch
scsi-mpi3mr-fix-ata-ncq-priority-support.patch
tick-nohz_full-don-t-abuse-smp_call_function_single-in-tick_setup_device.patch

queue-5.15/mm-huge_memory-don-t-unpoison-huge_zero_folio.patch [new file with mode: 0644]
queue-5.15/scsi-mpi3mr-fix-ata-ncq-priority-support.patch [new file with mode: 0644]
queue-5.15/series
queue-5.15/tick-nohz_full-don-t-abuse-smp_call_function_single-in-tick_setup_device.patch [new file with mode: 0644]

diff --git a/queue-5.15/mm-huge_memory-don-t-unpoison-huge_zero_folio.patch b/queue-5.15/mm-huge_memory-don-t-unpoison-huge_zero_folio.patch
new file mode 100644 (file)
index 0000000..1d967c0
--- /dev/null
@@ -0,0 +1,90 @@
+From fe6f86f4b40855a130a19aa589f9ba7f650423f4 Mon Sep 17 00:00:00 2001
+From: Miaohe Lin <linmiaohe@huawei.com>
+Date: Thu, 16 May 2024 20:26:08 +0800
+Subject: mm/huge_memory: don't unpoison huge_zero_folio
+
+From: Miaohe Lin <linmiaohe@huawei.com>
+
+commit fe6f86f4b40855a130a19aa589f9ba7f650423f4 upstream.
+
+When I did memory failure tests recently, below panic occurs:
+
+ kernel BUG at include/linux/mm.h:1135!
+ invalid opcode: 0000 [#1] PREEMPT SMP NOPTI
+ CPU: 9 PID: 137 Comm: kswapd1 Not tainted 6.9.0-rc4-00491-gd5ce28f156fe-dirty #14
+ RIP: 0010:shrink_huge_zero_page_scan+0x168/0x1a0
+ RSP: 0018:ffff9933c6c57bd0 EFLAGS: 00000246
+ RAX: 000000000000003e RBX: 0000000000000000 RCX: ffff88f61fc5c9c8
+ RDX: 0000000000000000 RSI: 0000000000000027 RDI: ffff88f61fc5c9c0
+ RBP: ffffcd7c446b0000 R08: ffffffff9a9405f0 R09: 0000000000005492
+ R10: 00000000000030ea R11: ffffffff9a9405f0 R12: 0000000000000000
+ R13: 0000000000000000 R14: 0000000000000000 R15: ffff88e703c4ac00
+ FS:  0000000000000000(0000) GS:ffff88f61fc40000(0000) knlGS:0000000000000000
+ CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: 000055f4da6e9878 CR3: 0000000c71048000 CR4: 00000000000006f0
+ Call Trace:
+  <TASK>
+  do_shrink_slab+0x14f/0x6a0
+  shrink_slab+0xca/0x8c0
+  shrink_node+0x2d0/0x7d0
+  balance_pgdat+0x33a/0x720
+  kswapd+0x1f3/0x410
+  kthread+0xd5/0x100
+  ret_from_fork+0x2f/0x50
+  ret_from_fork_asm+0x1a/0x30
+  </TASK>
+ Modules linked in: mce_inject hwpoison_inject
+ ---[ end trace 0000000000000000 ]---
+ RIP: 0010:shrink_huge_zero_page_scan+0x168/0x1a0
+ RSP: 0018:ffff9933c6c57bd0 EFLAGS: 00000246
+ RAX: 000000000000003e RBX: 0000000000000000 RCX: ffff88f61fc5c9c8
+ RDX: 0000000000000000 RSI: 0000000000000027 RDI: ffff88f61fc5c9c0
+ RBP: ffffcd7c446b0000 R08: ffffffff9a9405f0 R09: 0000000000005492
+ R10: 00000000000030ea R11: ffffffff9a9405f0 R12: 0000000000000000
+ R13: 0000000000000000 R14: 0000000000000000 R15: ffff88e703c4ac00
+ FS:  0000000000000000(0000) GS:ffff88f61fc40000(0000) knlGS:0000000000000000
+ CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: 000055f4da6e9878 CR3: 0000000c71048000 CR4: 00000000000006f0
+
+The root cause is that HWPoison flag will be set for huge_zero_folio
+without increasing the folio refcnt.  But then unpoison_memory() will
+decrease the folio refcnt unexpectedly as it appears like a successfully
+hwpoisoned folio leading to VM_BUG_ON_PAGE(page_ref_count(page) == 0) when
+releasing huge_zero_folio.
+
+Skip unpoisoning huge_zero_folio in unpoison_memory() to fix this issue.
+We're not prepared to unpoison huge_zero_folio yet.
+
+Link: https://lkml.kernel.org/r/20240516122608.22610-1-linmiaohe@huawei.com
+Fixes: 478d134e9506 ("mm/huge_memory: do not overkill when splitting huge_zero_page")
+Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Reviewed-by: Yang Shi <shy828301@gmail.com>
+Reviewed-by: Oscar Salvador <osalvador@suse.de>
+Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
+Cc: Naoya Horiguchi <nao.horiguchi@gmail.com>
+Cc: Xu Yu <xuyu@linux.alibaba.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/memory-failure.c |    7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/mm/memory-failure.c
++++ b/mm/memory-failure.c
+@@ -2051,6 +2051,13 @@ int unpoison_memory(unsigned long pfn)
+       mutex_lock(&mf_mutex);
++      if (is_huge_zero_page(page)) {
++              unpoison_pr_info("Unpoison: huge zero page is not supported %#lx\n",
++                               pfn, &unpoison_rs);
++              ret = -EOPNOTSUPP;
++              goto unlock_mutex;
++      }
++
+       if (!PageHWPoison(p)) {
+               unpoison_pr_info("Unpoison: Page was already unpoisoned %#lx\n",
+                                pfn, &unpoison_rs);
diff --git a/queue-5.15/scsi-mpi3mr-fix-ata-ncq-priority-support.patch b/queue-5.15/scsi-mpi3mr-fix-ata-ncq-priority-support.patch
new file mode 100644 (file)
index 0000000..187104c
--- /dev/null
@@ -0,0 +1,254 @@
+From 90e6f08915ec6efe46570420412a65050ec826b2 Mon Sep 17 00:00:00 2001
+From: Damien Le Moal <dlemoal@kernel.org>
+Date: Tue, 11 Jun 2024 17:34:35 +0900
+Subject: scsi: mpi3mr: Fix ATA NCQ priority support
+
+From: Damien Le Moal <dlemoal@kernel.org>
+
+commit 90e6f08915ec6efe46570420412a65050ec826b2 upstream.
+
+The function mpi3mr_qcmd() of the mpi3mr driver is able to indicate to
+the HBA if a read or write command directed at an ATA device should be
+translated to an NCQ read/write command with the high prioiryt bit set
+when the request uses the RT priority class and the user has enabled NCQ
+priority through sysfs.
+
+However, unlike the mpt3sas driver, the mpi3mr driver does not define
+the sas_ncq_prio_supported and sas_ncq_prio_enable sysfs attributes, so
+the ncq_prio_enable field of struct mpi3mr_sdev_priv_data is never
+actually set and NCQ Priority cannot ever be used.
+
+Fix this by defining these missing atributes to allow a user to check if
+an ATA device supports NCQ priority and to enable/disable the use of NCQ
+priority. To do this, lift the function scsih_ncq_prio_supp() out of the
+mpt3sas driver and make it the generic SCSI SAS transport function
+sas_ata_ncq_prio_supported(). Nothing in that function is hardware
+specific, so this function can be used in both the mpt3sas driver and
+the mpi3mr driver.
+
+Reported-by: Scott McCoy <scott.mccoy@wdc.com>
+Fixes: 023ab2a9b4ed ("scsi: mpi3mr: Add support for queue command processing")
+Cc: stable@vger.kernel.org
+Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
+Link: https://lore.kernel.org/r/20240611083435.92961-1-dlemoal@kernel.org
+Reviewed-by: Niklas Cassel <cassel@kernel.org>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/scsi/mpi3mr/mpi3mr.h         |    1 
+ drivers/scsi/mpi3mr/mpi3mr_os.c      |   67 +++++++++++++++++++++++++++++++++++
+ drivers/scsi/mpt3sas/mpt3sas_base.h  |    3 -
+ drivers/scsi/mpt3sas/mpt3sas_ctl.c   |    4 +-
+ drivers/scsi/mpt3sas/mpt3sas_scsih.c |   25 -------------
+ drivers/scsi/scsi_transport_sas.c    |   29 +++++++++++++++
+ include/scsi/scsi_transport_sas.h    |    2 +
+ 7 files changed, 101 insertions(+), 30 deletions(-)
+
+--- a/drivers/scsi/mpi3mr/mpi3mr.h
++++ b/drivers/scsi/mpi3mr/mpi3mr.h
+@@ -38,6 +38,7 @@
+ #include <scsi/scsi_device.h>
+ #include <scsi/scsi_host.h>
+ #include <scsi/scsi_tcq.h>
++#include <scsi/scsi_transport_sas.h>
+ #include "mpi/mpi30_transport.h"
+ #include "mpi/mpi30_cnfg.h"
+--- a/drivers/scsi/mpi3mr/mpi3mr_os.c
++++ b/drivers/scsi/mpi3mr/mpi3mr_os.c
+@@ -3549,6 +3549,72 @@ out:
+       return retval;
+ }
++/**
++ * sas_ncq_prio_supported_show - Indicate if device supports NCQ priority
++ * @dev: pointer to embedded device
++ * @attr: sas_ncq_prio_supported attribute descriptor
++ * @buf: the buffer returned
++ *
++ * A sysfs 'read-only' sdev attribute, only works with SATA devices
++ */
++static ssize_t
++sas_ncq_prio_supported_show(struct device *dev,
++                          struct device_attribute *attr, char *buf)
++{
++      struct scsi_device *sdev = to_scsi_device(dev);
++
++      return sysfs_emit(buf, "%d\n", sas_ata_ncq_prio_supported(sdev));
++}
++static DEVICE_ATTR_RO(sas_ncq_prio_supported);
++
++/**
++ * sas_ncq_prio_enable_show - send prioritized io commands to device
++ * @dev: pointer to embedded device
++ * @attr: sas_ncq_prio_enable attribute descriptor
++ * @buf: the buffer returned
++ *
++ * A sysfs 'read/write' sdev attribute, only works with SATA devices
++ */
++static ssize_t
++sas_ncq_prio_enable_show(struct device *dev,
++                       struct device_attribute *attr, char *buf)
++{
++      struct scsi_device *sdev = to_scsi_device(dev);
++      struct mpi3mr_sdev_priv_data *sdev_priv_data =  sdev->hostdata;
++
++      if (!sdev_priv_data)
++              return 0;
++
++      return sysfs_emit(buf, "%d\n", sdev_priv_data->ncq_prio_enable);
++}
++
++static ssize_t
++sas_ncq_prio_enable_store(struct device *dev,
++                        struct device_attribute *attr,
++                        const char *buf, size_t count)
++{
++      struct scsi_device *sdev = to_scsi_device(dev);
++      struct mpi3mr_sdev_priv_data *sdev_priv_data =  sdev->hostdata;
++      bool ncq_prio_enable = 0;
++
++      if (kstrtobool(buf, &ncq_prio_enable))
++              return -EINVAL;
++
++      if (!sas_ata_ncq_prio_supported(sdev))
++              return -EINVAL;
++
++      sdev_priv_data->ncq_prio_enable = ncq_prio_enable;
++
++      return strlen(buf);
++}
++static DEVICE_ATTR_RW(sas_ncq_prio_enable);
++
++static struct device_attribute *mpi3mr_dev_attrs[] = {
++      &dev_attr_sas_ncq_prio_supported,
++      &dev_attr_sas_ncq_prio_enable,
++      NULL,
++};
++
+ static struct scsi_host_template mpi3mr_driver_template = {
+       .module                         = THIS_MODULE,
+       .name                           = "MPI3 Storage Controller",
+@@ -3577,6 +3643,7 @@ static struct scsi_host_template mpi3mr_
+       .cmd_per_lun                    = MPI3MR_MAX_CMDS_LUN,
+       .track_queue_depth              = 1,
+       .cmd_size                       = sizeof(struct scmd_priv),
++      .sdev_attrs                     = mpi3mr_dev_attrs,
+ };
+ /**
+--- a/drivers/scsi/mpt3sas/mpt3sas_base.h
++++ b/drivers/scsi/mpt3sas/mpt3sas_base.h
+@@ -2010,9 +2010,6 @@ void
+ mpt3sas_setup_direct_io(struct MPT3SAS_ADAPTER *ioc, struct scsi_cmnd *scmd,
+       struct _raid_device *raid_device, Mpi25SCSIIORequest_t *mpi_request);
+-/* NCQ Prio Handling Check */
+-bool scsih_ncq_prio_supp(struct scsi_device *sdev);
+-
+ void mpt3sas_setup_debugfs(struct MPT3SAS_ADAPTER *ioc);
+ void mpt3sas_destroy_debugfs(struct MPT3SAS_ADAPTER *ioc);
+ void mpt3sas_init_debugfs(void);
+--- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c
++++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c
+@@ -3933,7 +3933,7 @@ sas_ncq_prio_supported_show(struct devic
+ {
+       struct scsi_device *sdev = to_scsi_device(dev);
+-      return sysfs_emit(buf, "%d\n", scsih_ncq_prio_supp(sdev));
++      return sysfs_emit(buf, "%d\n", sas_ata_ncq_prio_supported(sdev));
+ }
+ static DEVICE_ATTR_RO(sas_ncq_prio_supported);
+@@ -3968,7 +3968,7 @@ sas_ncq_prio_enable_store(struct device
+       if (kstrtobool(buf, &ncq_prio_enable))
+               return -EINVAL;
+-      if (!scsih_ncq_prio_supp(sdev))
++      if (!sas_ata_ncq_prio_supported(sdev))
+               return -EINVAL;
+       sas_device_priv_data->ncq_prio_enable = ncq_prio_enable;
+--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
++++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+@@ -12580,31 +12580,6 @@ scsih_pci_mmio_enabled(struct pci_dev *p
+       return PCI_ERS_RESULT_RECOVERED;
+ }
+-/**
+- * scsih_ncq_prio_supp - Check for NCQ command priority support
+- * @sdev: scsi device struct
+- *
+- * This is called when a user indicates they would like to enable
+- * ncq command priorities. This works only on SATA devices.
+- */
+-bool scsih_ncq_prio_supp(struct scsi_device *sdev)
+-{
+-      unsigned char *buf;
+-      bool ncq_prio_supp = false;
+-
+-      if (!scsi_device_supports_vpd(sdev))
+-              return ncq_prio_supp;
+-
+-      buf = kmalloc(SCSI_VPD_PG_LEN, GFP_KERNEL);
+-      if (!buf)
+-              return ncq_prio_supp;
+-
+-      if (!scsi_get_vpd_page(sdev, 0x89, buf, SCSI_VPD_PG_LEN))
+-              ncq_prio_supp = (buf[213] >> 4) & 1;
+-
+-      kfree(buf);
+-      return ncq_prio_supp;
+-}
+ /*
+  * The pci device ids are defined in mpi/mpi2_cnfg.h.
+  */
+--- a/drivers/scsi/scsi_transport_sas.c
++++ b/drivers/scsi/scsi_transport_sas.c
+@@ -410,6 +410,35 @@ unsigned int sas_is_tlr_enabled(struct s
+ }
+ EXPORT_SYMBOL_GPL(sas_is_tlr_enabled);
++/**
++ * sas_ata_ncq_prio_supported - Check for ATA NCQ command priority support
++ * @sdev: SCSI device
++ *
++ * Check if an ATA device supports NCQ priority using VPD page 89h (ATA
++ * Information). Since this VPD page is implemented only for ATA devices,
++ * this function always returns false for SCSI devices.
++ */
++bool sas_ata_ncq_prio_supported(struct scsi_device *sdev)
++{
++      unsigned char *buf;
++      bool ncq_prio_supported = false;
++
++      if (!scsi_device_supports_vpd(sdev))
++              return false;
++
++      buf = kmalloc(SCSI_VPD_PG_LEN, GFP_KERNEL);
++      if (!buf)
++              return false;
++
++      if (!scsi_get_vpd_page(sdev, 0x89, buf, SCSI_VPD_PG_LEN))
++              ncq_prio_supported = (buf[213] >> 4) & 1;
++
++      kfree(buf);
++
++      return ncq_prio_supported;
++}
++EXPORT_SYMBOL_GPL(sas_ata_ncq_prio_supported);
++
+ /*
+  * SAS Phy attributes
+  */
+--- a/include/scsi/scsi_transport_sas.h
++++ b/include/scsi/scsi_transport_sas.h
+@@ -199,6 +199,8 @@ unsigned int sas_is_tlr_enabled(struct s
+ void sas_disable_tlr(struct scsi_device *);
+ void sas_enable_tlr(struct scsi_device *);
++bool sas_ata_ncq_prio_supported(struct scsi_device *sdev);
++
+ extern struct sas_rphy *sas_end_device_alloc(struct sas_port *);
+ extern struct sas_rphy *sas_expander_alloc(struct sas_port *, enum sas_device_type);
+ void sas_rphy_free(struct sas_rphy *);
index 7dd20e619179b4ddc6e45ce08609bb4f102a1f51..163100bc37b157fbb0bc2c08dc4c344e32f7859e 100644 (file)
@@ -163,3 +163,6 @@ intel_th-pci-add-sapphire-rapids-soc-support.patch
 intel_th-pci-add-meteor-lake-s-support.patch
 intel_th-pci-add-lunar-lake-support.patch
 nilfs2-fix-potential-kernel-bug-due-to-lack-of-writeback-flag-waiting.patch
+tick-nohz_full-don-t-abuse-smp_call_function_single-in-tick_setup_device.patch
+scsi-mpi3mr-fix-ata-ncq-priority-support.patch
+mm-huge_memory-don-t-unpoison-huge_zero_folio.patch
diff --git a/queue-5.15/tick-nohz_full-don-t-abuse-smp_call_function_single-in-tick_setup_device.patch b/queue-5.15/tick-nohz_full-don-t-abuse-smp_call_function_single-in-tick_setup_device.patch
new file mode 100644 (file)
index 0000000..ea72c5f
--- /dev/null
@@ -0,0 +1,94 @@
+From 07c54cc5988f19c9642fd463c2dbdac7fc52f777 Mon Sep 17 00:00:00 2001
+From: Oleg Nesterov <oleg@redhat.com>
+Date: Tue, 28 May 2024 14:20:19 +0200
+Subject: tick/nohz_full: Don't abuse smp_call_function_single() in tick_setup_device()
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+commit 07c54cc5988f19c9642fd463c2dbdac7fc52f777 upstream.
+
+After the recent commit 5097cbcb38e6 ("sched/isolation: Prevent boot crash
+when the boot CPU is nohz_full") the kernel no longer crashes, but there is
+another problem.
+
+In this case tick_setup_device() calls tick_take_do_timer_from_boot() to
+update tick_do_timer_cpu and this triggers the WARN_ON_ONCE(irqs_disabled)
+in smp_call_function_single().
+
+Kill tick_take_do_timer_from_boot() and just use WRITE_ONCE(), the new
+comment explains why this is safe (thanks Thomas!).
+
+Fixes: 08ae95f4fd3b ("nohz_full: Allow the boot CPU to be nohz_full")
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20240528122019.GA28794@redhat.com
+Link: https://lore.kernel.org/all/20240522151742.GA10400@redhat.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/time/tick-common.c |   42 ++++++++++++++----------------------------
+ 1 file changed, 14 insertions(+), 28 deletions(-)
+
+--- a/kernel/time/tick-common.c
++++ b/kernel/time/tick-common.c
+@@ -179,26 +179,6 @@ void tick_setup_periodic(struct clock_ev
+       }
+ }
+-#ifdef CONFIG_NO_HZ_FULL
+-static void giveup_do_timer(void *info)
+-{
+-      int cpu = *(unsigned int *)info;
+-
+-      WARN_ON(tick_do_timer_cpu != smp_processor_id());
+-
+-      tick_do_timer_cpu = cpu;
+-}
+-
+-static void tick_take_do_timer_from_boot(void)
+-{
+-      int cpu = smp_processor_id();
+-      int from = tick_do_timer_boot_cpu;
+-
+-      if (from >= 0 && from != cpu)
+-              smp_call_function_single(from, giveup_do_timer, &cpu, 1);
+-}
+-#endif
+-
+ /*
+  * Setup the tick device
+  */
+@@ -222,19 +202,25 @@ static void tick_setup_device(struct tic
+                       tick_next_period = ktime_get();
+ #ifdef CONFIG_NO_HZ_FULL
+                       /*
+-                       * The boot CPU may be nohz_full, in which case set
+-                       * tick_do_timer_boot_cpu so the first housekeeping
+-                       * secondary that comes up will take do_timer from
+-                       * us.
++                       * The boot CPU may be nohz_full, in which case the
++                       * first housekeeping secondary will take do_timer()
++                       * from it.
+                        */
+                       if (tick_nohz_full_cpu(cpu))
+                               tick_do_timer_boot_cpu = cpu;
+-              } else if (tick_do_timer_boot_cpu != -1 &&
+-                                              !tick_nohz_full_cpu(cpu)) {
+-                      tick_take_do_timer_from_boot();
++              } else if (tick_do_timer_boot_cpu != -1 && !tick_nohz_full_cpu(cpu)) {
+                       tick_do_timer_boot_cpu = -1;
+-                      WARN_ON(tick_do_timer_cpu != cpu);
++                      /*
++                       * The boot CPU will stay in periodic (NOHZ disabled)
++                       * mode until clocksource_done_booting() called after
++                       * smp_init() selects a high resolution clocksource and
++                       * timekeeping_notify() kicks the NOHZ stuff alive.
++                       *
++                       * So this WRITE_ONCE can only race with the READ_ONCE
++                       * check in tick_periodic() but this race is harmless.
++                       */
++                      WRITE_ONCE(tick_do_timer_cpu, cpu);
+ #endif
+               }