Fixes for 6.6

author Sasha Levin <sashal@kernel.org>

Wed, 11 Dec 2024 18:37:02 +0000 (13:37 -0500)

committer Sasha Levin <sashal@kernel.org>

Wed, 11 Dec 2024 18:37:02 +0000 (13:37 -0500)
author Sasha Levin <sashal@kernel.org>
Wed, 11 Dec 2024 18:37:02 +0000 (13:37 -0500)
committer Sasha Levin <sashal@kernel.org>
Wed, 11 Dec 2024 18:37:02 +0000 (13:37 -0500)
diff --git a/queue-6.6/genirq-irqdomain-add-domain_bus_device_msi.patch b/queue-6.6/genirq-irqdomain-add-domain_bus_device_msi.patch

new file mode 100644 (file)

index 0000000..9b0e683
--- /dev/null
+++ b/queue-6.6/genirq-irqdomain-add-domain_bus_device_msi.patch
@@ -0,0 +1,37 @@
+From d143460613d4f5c352e9fd397f083421e249ffe6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 27 Jan 2024 21:47:32 +0530
+Subject: genirq/irqdomain: Add DOMAIN_BUS_DEVICE_MSI
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+[ Upstream commit 6516d5a295356f8fd5827a1c0954d7ed5b2324dd ]
+
+Add a new domain bus token to prepare for device MSI which aims to replace
+the existing platform MSI maze.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Anup Patel <apatel@ventanamicro.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Link: https://lore.kernel.org/r/20240127161753.114685-5-apatel@ventanamicro.com
+Stable-dep-of: 64506b3d23a3 ("scsi: ufs: qcom: Only free platform MSIs when ESI is enabled")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/irqdomain_defs.h | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/include/linux/irqdomain_defs.h b/include/linux/irqdomain_defs.h
+index c29921fd8cd15..a7dea0c8c5e0c 100644
+--- a/include/linux/irqdomain_defs.h
++++ b/include/linux/irqdomain_defs.h
+@@ -26,6 +26,7 @@ enum irq_domain_bus_token {
+       DOMAIN_BUS_DMAR,
+       DOMAIN_BUS_AMDVI,
+       DOMAIN_BUS_PCI_DEVICE_IMS,
++      DOMAIN_BUS_DEVICE_MSI,
+ };
+ 
+ #endif /* _LINUX_IRQDOMAIN_DEFS_H */
+-- 
+2.43.0
+
diff --git a/queue-6.6/irqchip-convert-all-platform-msi-users-to-the-new-ap.patch b/queue-6.6/irqchip-convert-all-platform-msi-users-to-the-new-ap.patch

new file mode 100644 (file)

index 0000000..f742f9c
--- /dev/null
+++ b/queue-6.6/irqchip-convert-all-platform-msi-users-to-the-new-ap.patch
@@ -0,0 +1,202 @@
+From 5df23ec861a0208ef524a27e44c694ca2decb7ea Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 27 Jan 2024 21:47:34 +0530
+Subject: irqchip: Convert all platform MSI users to the new API
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+[ Upstream commit 14fd06c776b5289a43c91cdc64bac3bdbc7b397e ]
+
+Switch all the users of the platform MSI domain over to invoke the new
+interfaces which branch to the original platform MSI functions when the
+irqdomain associated to the caller device does not yet provide MSI parent
+functionality.
+
+No functional change.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Anup Patel <apatel@ventanamicro.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Link: https://lore.kernel.org/r/20240127161753.114685-7-apatel@ventanamicro.com
+Stable-dep-of: 64506b3d23a3 ("scsi: ufs: qcom: Only free platform MSIs when ESI is enabled")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/dma/mv_xor_v2.c                     | 8 ++++----
+ drivers/dma/qcom/hidma.c                    | 6 +++---
+ drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 5 +++--
+ drivers/mailbox/bcm-flexrm-mailbox.c        | 8 ++++----
+ drivers/perf/arm_smmuv3_pmu.c               | 4 ++--
+ drivers/ufs/host/ufs-qcom.c                 | 8 ++++----
+ 6 files changed, 20 insertions(+), 19 deletions(-)
+
+diff --git a/drivers/dma/mv_xor_v2.c b/drivers/dma/mv_xor_v2.c
+index 0e1e9ca1c005a..cd2b9a6ab621d 100644
+--- a/drivers/dma/mv_xor_v2.c
++++ b/drivers/dma/mv_xor_v2.c
+@@ -747,8 +747,8 @@ static int mv_xor_v2_probe(struct platform_device *pdev)
+       if (IS_ERR(xor_dev->clk))
+               return PTR_ERR(xor_dev->clk);
+ 
+-      ret = platform_msi_domain_alloc_irqs(&pdev->dev, 1,
+-                                           mv_xor_v2_set_msi_msg);
++      ret = platform_device_msi_init_and_alloc_irqs(&pdev->dev, 1,
++                                                    mv_xor_v2_set_msi_msg);
+       if (ret)
+               return ret;
+ 
+@@ -851,7 +851,7 @@ static int mv_xor_v2_probe(struct platform_device *pdev)
+                         xor_dev->desc_size * MV_XOR_V2_DESC_NUM,
+                         xor_dev->hw_desq_virt, xor_dev->hw_desq);
+ free_msi_irqs:
+-      platform_msi_domain_free_irqs(&pdev->dev);
++      platform_device_msi_free_irqs_all(&pdev->dev);
+       return ret;
+ }
+ 
+@@ -867,7 +867,7 @@ static int mv_xor_v2_remove(struct platform_device *pdev)
+ 
+       devm_free_irq(&pdev->dev, xor_dev->irq, xor_dev);
+ 
+-      platform_msi_domain_free_irqs(&pdev->dev);
++      platform_device_msi_free_irqs_all(&pdev->dev);
+ 
+       tasklet_kill(&xor_dev->irq_tasklet);
+ 
+diff --git a/drivers/dma/qcom/hidma.c b/drivers/dma/qcom/hidma.c
+index 834ae519c15de..f2b299c23b1e8 100644
+--- a/drivers/dma/qcom/hidma.c
++++ b/drivers/dma/qcom/hidma.c
+@@ -696,7 +696,7 @@ static void hidma_free_msis(struct hidma_dev *dmadev)
+                       devm_free_irq(dev, virq, &dmadev->lldev);
+       }
+ 
+-      platform_msi_domain_free_irqs(dev);
++      platform_device_msi_free_irqs_all(dev);
+ #endif
+ }
+ 
+@@ -706,8 +706,8 @@ static int hidma_request_msi(struct hidma_dev *dmadev,
+ #ifdef CONFIG_GENERIC_MSI_IRQ
+       int rc, i, virq;
+ 
+-      rc = platform_msi_domain_alloc_irqs(&pdev->dev, HIDMA_MSI_INTS,
+-                                          hidma_write_msi_msg);
++      rc = platform_device_msi_init_and_alloc_irqs(&pdev->dev, HIDMA_MSI_INTS,
++                                                   hidma_write_msi_msg);
+       if (rc)
+               return rc;
+ 
+diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+index 68b81f9c2f4b1..435eee52e033a 100644
+--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
++++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+@@ -3141,7 +3141,8 @@ static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
+ static void arm_smmu_free_msis(void *data)
+ {
+       struct device *dev = data;
+-      platform_msi_domain_free_irqs(dev);
++
++      platform_device_msi_free_irqs_all(dev);
+ }
+ 
+ static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
+@@ -3182,7 +3183,7 @@ static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
+       }
+ 
+       /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
+-      ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
++      ret = platform_device_msi_init_and_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
+       if (ret) {
+               dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
+               return;
+diff --git a/drivers/mailbox/bcm-flexrm-mailbox.c b/drivers/mailbox/bcm-flexrm-mailbox.c
+index a2b8839d4e7c5..7094d44869a8e 100644
+--- a/drivers/mailbox/bcm-flexrm-mailbox.c
++++ b/drivers/mailbox/bcm-flexrm-mailbox.c
+@@ -1587,8 +1587,8 @@ static int flexrm_mbox_probe(struct platform_device *pdev)
+       }
+ 
+       /* Allocate platform MSIs for each ring */
+-      ret = platform_msi_domain_alloc_irqs(dev, mbox->num_rings,
+-                                              flexrm_mbox_msi_write);
++      ret = platform_device_msi_init_and_alloc_irqs(dev, mbox->num_rings,
++                                                    flexrm_mbox_msi_write);
+       if (ret)
+               goto fail_destroy_cmpl_pool;
+ 
+@@ -1641,7 +1641,7 @@ static int flexrm_mbox_probe(struct platform_device *pdev)
+ 
+ fail_free_debugfs_root:
+       debugfs_remove_recursive(mbox->root);
+-      platform_msi_domain_free_irqs(dev);
++      platform_device_msi_free_irqs_all(dev);
+ fail_destroy_cmpl_pool:
+       dma_pool_destroy(mbox->cmpl_pool);
+ fail_destroy_bd_pool:
+@@ -1657,7 +1657,7 @@ static int flexrm_mbox_remove(struct platform_device *pdev)
+ 
+       debugfs_remove_recursive(mbox->root);
+ 
+-      platform_msi_domain_free_irqs(dev);
++      platform_device_msi_free_irqs_all(dev);
+ 
+       dma_pool_destroy(mbox->cmpl_pool);
+       dma_pool_destroy(mbox->bd_pool);
+diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c
+index 31e491e7f2065..2946422539fb7 100644
+--- a/drivers/perf/arm_smmuv3_pmu.c
++++ b/drivers/perf/arm_smmuv3_pmu.c
+@@ -719,7 +719,7 @@ static void smmu_pmu_free_msis(void *data)
+ {
+       struct device *dev = data;
+ 
+-      platform_msi_domain_free_irqs(dev);
++      platform_device_msi_free_irqs_all(dev);
+ }
+ 
+ static void smmu_pmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
+@@ -749,7 +749,7 @@ static void smmu_pmu_setup_msi(struct smmu_pmu *pmu)
+       if (!(readl(pmu->reg_base + SMMU_PMCG_CFGR) & SMMU_PMCG_CFGR_MSI))
+               return;
+ 
+-      ret = platform_msi_domain_alloc_irqs(dev, 1, smmu_pmu_write_msi_msg);
++      ret = platform_device_msi_init_and_alloc_irqs(dev, 1, smmu_pmu_write_msi_msg);
+       if (ret) {
+               dev_warn(dev, "failed to allocate MSIs\n");
+               return;
+diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c
+index 0a914fd44494d..d77cfb2ab1acd 100644
+--- a/drivers/ufs/host/ufs-qcom.c
++++ b/drivers/ufs/host/ufs-qcom.c
+@@ -1816,8 +1816,8 @@ static int ufs_qcom_config_esi(struct ufs_hba *hba)
+        * 2. Poll queues do not need ESI.
+        */
+       nr_irqs = hba->nr_hw_queues - hba->nr_queues[HCTX_TYPE_POLL];
+-      ret = platform_msi_domain_alloc_irqs(hba->dev, nr_irqs,
+-                                           ufs_qcom_write_msi_msg);
++      ret = platform_device_msi_init_and_alloc_irqs(hba->dev, nr_irqs,
++                                                    ufs_qcom_write_msi_msg);
+       if (ret) {
+               dev_err(hba->dev, "Failed to request Platform MSI %d\n", ret);
+               goto out;
+@@ -1846,7 +1846,7 @@ static int ufs_qcom_config_esi(struct ufs_hba *hba)
+                       devm_free_irq(hba->dev, desc->irq, hba);
+               }
+               msi_unlock_descs(hba->dev);
+-              platform_msi_domain_free_irqs(hba->dev);
++              platform_device_msi_free_irqs_all(hba->dev);
+       } else {
+               if (host->hw_ver.major == 6 && host->hw_ver.minor == 0 &&
+                   host->hw_ver.step == 0) {
+@@ -1926,7 +1926,7 @@ static void ufs_qcom_remove(struct platform_device *pdev)
+ 
+       pm_runtime_get_sync(&(pdev)->dev);
+       ufshcd_remove(hba);
+-      platform_msi_domain_free_irqs(hba->dev);
++      platform_device_msi_free_irqs_all(hba->dev);
+ }
+ 
+ static const struct of_device_id ufs_qcom_of_match[] __maybe_unused = {
+-- 
+2.43.0
+
diff --git a/queue-6.6/mempolicy-fix-migrate_pages-2-syscall-return-nr_fail.patch b/queue-6.6/mempolicy-fix-migrate_pages-2-syscall-return-nr_fail.patch

new file mode 100644 (file)

index 0000000..649c072
--- /dev/null
+++ b/queue-6.6/mempolicy-fix-migrate_pages-2-syscall-return-nr_fail.patch
@@ -0,0 +1,665 @@
+From 5ed5249f8a5b1f12689d57fea3f66ab1baebce5a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 3 Oct 2023 02:17:43 -0700
+Subject: mempolicy: fix migrate_pages(2) syscall return nr_failed
+
+From: Hugh Dickins <hughd@google.com>
+
+[ Upstream commit 1cb5d11a370f661c5d0d888bb0cfc2cdc5791382 ]
+
+"man 2 migrate_pages" says "On success migrate_pages() returns the number
+of pages that could not be moved".  Although 5.3 and 5.4 commits fixed
+mbind(MPOL_MF_STRICT|MPOL_MF_MOVE*) to fail with EIO when not all pages
+could be moved (because some could not be isolated for migration),
+migrate_pages(2) was left still reporting only those pages failing at the
+migration stage, forgetting those failing at the earlier isolation stage.
+
+Fix that by accumulating a long nr_failed count in struct queue_pages,
+returned by queue_pages_range() when it's not returning an error, for
+adding on to the nr_failed count from migrate_pages() in mm/migrate.c.  A
+count of pages?  It's more a count of folios, but changing it to pages
+would entail more work (also in mm/migrate.c): does not seem justified.
+
+queue_pages_range() itself should only return -EIO in the "strictly
+unmovable" case (STRICT without any MOVEs): in that case it's best to
+break out as soon as nr_failed gets set; but otherwise it should continue
+to isolate pages for MOVing even when nr_failed - as the mbind(2) manpage
+promises.
+
+There's a case when nr_failed should be incremented when it was missed:
+queue_folios_pte_range() and queue_folios_hugetlb() count the transient
+migration entries, like queue_folios_pmd() already did.  And there's a
+case when nr_failed should not be incremented when it would have been: in
+meeting later PTEs of the same large folio, which can only be isolated
+once: fixed by recording the current large folio in struct queue_pages.
+
+Clean up the affected functions, fixing or updating many comments.  Bool
+migrate_folio_add(), without -EIO: true if adding, or if skipping shared
+(but its arguable folio_estimated_sharers() heuristic left unchanged).
+Use MPOL_MF_WRLOCK flag to queue_pages_range(), instead of bool lock_vma.
+Use explicit STRICT|MOVE* flags where queue_pages_test_walk() checks for
+skipping, instead of hiding them behind MPOL_MF_VALID.
+
+Link: https://lkml.kernel.org/r/9a6b0b9-3bb-dbef-8adf-efab4397b8d@google.com
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Reviewed-by: "Huang, Ying" <ying.huang@intel.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Christoph Lameter <cl@linux.com>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Mike Kravetz <mike.kravetz@oracle.com>
+Cc: Nhat Pham <nphamcs@gmail.com>
+Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
+Cc: Suren Baghdasaryan <surenb@google.com>
+Cc: Tejun heo <tj@kernel.org>
+Cc: Vishal Moola (Oracle) <vishal.moola@gmail.com>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Yosry Ahmed <yosryahmed@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 091c1dd2d4df ("mm/mempolicy: fix migrate_to_node() assuming there is at least one VMA in a MM")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/mempolicy.c | 338 +++++++++++++++++++++++--------------------------
+ 1 file changed, 159 insertions(+), 179 deletions(-)
+
+diff --git a/mm/mempolicy.c b/mm/mempolicy.c
+index 109826a2af387..54f1b78d1b2c0 100644
+--- a/mm/mempolicy.c
++++ b/mm/mempolicy.c
+@@ -111,7 +111,8 @@
+ 
+ /* Internal flags */
+ #define MPOL_MF_DISCONTIG_OK (MPOL_MF_INTERNAL << 0)  /* Skip checks for continuous vmas */
+-#define MPOL_MF_INVERT (MPOL_MF_INTERNAL << 1)                /* Invert check for nodemask */
++#define MPOL_MF_INVERT       (MPOL_MF_INTERNAL << 1)  /* Invert check for nodemask */
++#define MPOL_MF_WRLOCK       (MPOL_MF_INTERNAL << 2)  /* Write-lock walked vmas */
+ 
+ static struct kmem_cache *policy_cache;
+ static struct kmem_cache *sn_cache;
+@@ -420,9 +421,19 @@ static const struct mempolicy_operations mpol_ops[MPOL_MAX] = {
+       },
+ };
+ 
+-static int migrate_folio_add(struct folio *folio, struct list_head *foliolist,
++static bool migrate_folio_add(struct folio *folio, struct list_head *foliolist,
+                               unsigned long flags);
+ 
++static bool strictly_unmovable(unsigned long flags)
++{
++      /*
++       * STRICT without MOVE flags lets do_mbind() fail immediately with -EIO
++       * if any misplaced page is found.
++       */
++      return (flags & (MPOL_MF_STRICT | MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) ==
++                       MPOL_MF_STRICT;
++}
++
+ struct queue_pages {
+       struct list_head *pagelist;
+       unsigned long flags;
+@@ -430,7 +441,8 @@ struct queue_pages {
+       unsigned long start;
+       unsigned long end;
+       struct vm_area_struct *first;
+-      bool has_unmovable;
++      struct folio *large;            /* note last large folio encountered */
++      long nr_failed;                 /* could not be isolated at this time */
+ };
+ 
+ /*
+@@ -448,61 +460,37 @@ static inline bool queue_folio_required(struct folio *folio,
+       return node_isset(nid, *qp->nmask) == !(flags & MPOL_MF_INVERT);
+ }
+ 
+-/*
+- * queue_folios_pmd() has three possible return values:
+- * 0 - folios are placed on the right node or queued successfully, or
+- *     special page is met, i.e. zero page, or unmovable page is found
+- *     but continue walking (indicated by queue_pages.has_unmovable).
+- * -EIO - is migration entry or only MPOL_MF_STRICT was specified and an
+- *        existing folio was already on a node that does not follow the
+- *        policy.
+- */
+-static int queue_folios_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
+-                              unsigned long end, struct mm_walk *walk)
+-      __releases(ptl)
++static void queue_folios_pmd(pmd_t *pmd, struct mm_walk *walk)
+ {
+-      int ret = 0;
+       struct folio *folio;
+       struct queue_pages *qp = walk->private;
+-      unsigned long flags;
+ 
+       if (unlikely(is_pmd_migration_entry(*pmd))) {
+-              ret = -EIO;
+-              goto unlock;
++              qp->nr_failed++;
++              return;
+       }
+       folio = pfn_folio(pmd_pfn(*pmd));
+       if (is_huge_zero_page(&folio->page)) {
+               walk->action = ACTION_CONTINUE;
+-              goto unlock;
++              return;
+       }
+       if (!queue_folio_required(folio, qp))
+-              goto unlock;
+-
+-      flags = qp->flags;
+-      /* go to folio migration */
+-      if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
+-              if (!vma_migratable(walk->vma) ||
+-                  migrate_folio_add(folio, qp->pagelist, flags)) {
+-                      qp->has_unmovable = true;
+-                      goto unlock;
+-              }
+-      } else
+-              ret = -EIO;
+-unlock:
+-      spin_unlock(ptl);
+-      return ret;
++              return;
++      if (!(qp->flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) ||
++          !vma_migratable(walk->vma) ||
++          !migrate_folio_add(folio, qp->pagelist, qp->flags))
++              qp->nr_failed++;
+ }
+ 
+ /*
+- * Scan through pages checking if pages follow certain conditions,
+- * and move them to the pagelist if they do.
++ * Scan through folios, checking if they satisfy the required conditions,
++ * moving them from LRU to local pagelist for migration if they do (or not).
+  *
+- * queue_folios_pte_range() has three possible return values:
+- * 0 - folios are placed on the right node or queued successfully, or
+- *     special page is met, i.e. zero page, or unmovable page is found
+- *     but continue walking (indicated by queue_pages.has_unmovable).
+- * -EIO - only MPOL_MF_STRICT was specified and an existing folio was already
+- *        on a node that does not follow the policy.
++ * queue_folios_pte_range() has two possible return values:
++ * 0 - continue walking to scan for more, even if an existing folio on the
++ *     wrong node could not be isolated and queued for migration.
++ * -EIO - only MPOL_MF_STRICT was specified, without MPOL_MF_MOVE or ..._ALL,
++ *        and an existing folio was on a node that does not follow the policy.
+  */
+ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr,
+                       unsigned long end, struct mm_walk *walk)
+@@ -516,8 +504,11 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr,
+       spinlock_t *ptl;
+ 
+       ptl = pmd_trans_huge_lock(pmd, vma);
+-      if (ptl)
+-              return queue_folios_pmd(pmd, ptl, addr, end, walk);
++      if (ptl) {
++              queue_folios_pmd(pmd, walk);
++              spin_unlock(ptl);
++              goto out;
++      }
+ 
+       mapped_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+       if (!pte) {
+@@ -526,8 +517,13 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr,
+       }
+       for (; addr != end; pte++, addr += PAGE_SIZE) {
+               ptent = ptep_get(pte);
+-              if (!pte_present(ptent))
++              if (pte_none(ptent))
+                       continue;
++              if (!pte_present(ptent)) {
++                      if (is_migration_entry(pte_to_swp_entry(ptent)))
++                              qp->nr_failed++;
++                      continue;
++              }
+               folio = vm_normal_folio(vma, addr, ptent);
+               if (!folio || folio_is_zone_device(folio))
+                       continue;
+@@ -539,95 +535,87 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr,
+                       continue;
+               if (!queue_folio_required(folio, qp))
+                       continue;
+-              if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
+-                      /*
+-                       * MPOL_MF_STRICT must be specified if we get here.
+-                       * Continue walking vmas due to MPOL_MF_MOVE* flags.
+-                       */
+-                      if (!vma_migratable(vma))
+-                              qp->has_unmovable = true;
+-
++              if (folio_test_large(folio)) {
+                       /*
+-                       * Do not abort immediately since there may be
+-                       * temporary off LRU pages in the range.  Still
+-                       * need migrate other LRU pages.
++                       * A large folio can only be isolated from LRU once,
++                       * but may be mapped by many PTEs (and Copy-On-Write may
++                       * intersperse PTEs of other, order 0, folios).  This is
++                       * a common case, so don't mistake it for failure (but
++                       * there can be other cases of multi-mapped pages which
++                       * this quick check does not help to filter out - and a
++                       * search of the pagelist might grow to be prohibitive).
++                       *
++                       * migrate_pages(&pagelist) returns nr_failed folios, so
++                       * check "large" now so that queue_pages_range() returns
++                       * a comparable nr_failed folios.  This does imply that
++                       * if folio could not be isolated for some racy reason
++                       * at its first PTE, later PTEs will not give it another
++                       * chance of isolation; but keeps the accounting simple.
+                        */
+-                      if (migrate_folio_add(folio, qp->pagelist, flags))
+-                              qp->has_unmovable = true;
+-              } else
+-                      break;
++                      if (folio == qp->large)
++                              continue;
++                      qp->large = folio;
++              }
++              if (!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) ||
++                  !vma_migratable(vma) ||
++                  !migrate_folio_add(folio, qp->pagelist, flags)) {
++                      qp->nr_failed++;
++                      if (strictly_unmovable(flags))
++                              break;
++              }
+       }
+       pte_unmap_unlock(mapped_pte, ptl);
+       cond_resched();
+-
+-      return addr != end ? -EIO : 0;
++out:
++      if (qp->nr_failed && strictly_unmovable(flags))
++              return -EIO;
++      return 0;
+ }
+ 
+ static int queue_folios_hugetlb(pte_t *pte, unsigned long hmask,
+                              unsigned long addr, unsigned long end,
+                              struct mm_walk *walk)
+ {
+-      int ret = 0;
+ #ifdef CONFIG_HUGETLB_PAGE
+       struct queue_pages *qp = walk->private;
+-      unsigned long flags = (qp->flags & MPOL_MF_VALID);
++      unsigned long flags = qp->flags;
+       struct folio *folio;
+       spinlock_t *ptl;
+       pte_t entry;
+ 
+       ptl = huge_pte_lock(hstate_vma(walk->vma), walk->mm, pte);
+       entry = huge_ptep_get(pte);
+-      if (!pte_present(entry))
++      if (!pte_present(entry)) {
++              if (unlikely(is_hugetlb_entry_migration(entry)))
++                      qp->nr_failed++;
+               goto unlock;
++      }
+       folio = pfn_folio(pte_pfn(entry));
+       if (!queue_folio_required(folio, qp))
+               goto unlock;
+-
+-      if (flags == MPOL_MF_STRICT) {
+-              /*
+-               * STRICT alone means only detecting misplaced folio and no
+-               * need to further check other vma.
+-               */
+-              ret = -EIO;
++      if (!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) ||
++          !vma_migratable(walk->vma)) {
++              qp->nr_failed++;
+               goto unlock;
+       }
+-
+-      if (!vma_migratable(walk->vma)) {
+-              /*
+-               * Must be STRICT with MOVE*, otherwise .test_walk() have
+-               * stopped walking current vma.
+-               * Detecting misplaced folio but allow migrating folios which
+-               * have been queued.
+-               */
+-              qp->has_unmovable = true;
+-              goto unlock;
+-      }
+-
+       /*
+-       * With MPOL_MF_MOVE, we try to migrate only unshared folios. If it
+-       * is shared it is likely not worth migrating.
++       * Unless MPOL_MF_MOVE_ALL, we try to avoid migrating a shared folio.
++       * Choosing not to migrate a shared folio is not counted as a failure.
+        *
+        * To check if the folio is shared, ideally we want to make sure
+        * every page is mapped to the same process. Doing that is very
+-       * expensive, so check the estimated mapcount of the folio instead.
++       * expensive, so check the estimated sharers of the folio instead.
+        */
+-      if (flags & (MPOL_MF_MOVE_ALL) ||
+-          (flags & MPOL_MF_MOVE && folio_estimated_sharers(folio) == 1 &&
+-           !hugetlb_pmd_shared(pte))) {
+-              if (!isolate_hugetlb(folio, qp->pagelist) &&
+-                      (flags & MPOL_MF_STRICT))
+-                      /*
+-                       * Failed to isolate folio but allow migrating pages
+-                       * which have been queued.
+-                       */
+-                      qp->has_unmovable = true;
+-      }
++      if ((flags & MPOL_MF_MOVE_ALL) ||
++          (folio_estimated_sharers(folio) == 1 && !hugetlb_pmd_shared(pte)))
++              if (!isolate_hugetlb(folio, qp->pagelist))
++                      qp->nr_failed++;
+ unlock:
+       spin_unlock(ptl);
+-#else
+-      BUG();
++      if (qp->nr_failed && strictly_unmovable(flags))
++              return -EIO;
+ #endif
+-      return ret;
++      return 0;
+ }
+ 
+ #ifdef CONFIG_NUMA_BALANCING
+@@ -708,8 +696,11 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end,
+               return 1;
+       }
+ 
+-      /* queue pages from current vma */
+-      if (flags & MPOL_MF_VALID)
++      /*
++       * Check page nodes, and queue pages to move, in the current vma.
++       * But if no moving, and no strict checking, the scan can be skipped.
++       */
++      if (flags & (MPOL_MF_STRICT | MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
+               return 0;
+       return 1;
+ }
+@@ -731,22 +722,21 @@ static const struct mm_walk_ops queue_pages_lock_vma_walk_ops = {
+ /*
+  * Walk through page tables and collect pages to be migrated.
+  *
+- * If pages found in a given range are on a set of nodes (determined by
+- * @nodes and @flags,) it's isolated and queued to the pagelist which is
+- * passed via @private.
++ * If pages found in a given range are not on the required set of @nodes,
++ * and migration is allowed, they are isolated and queued to @pagelist.
+  *
+- * queue_pages_range() has three possible return values:
+- * 1 - there is unmovable page, but MPOL_MF_MOVE* & MPOL_MF_STRICT were
+- *     specified.
+- * 0 - queue pages successfully or no misplaced page.
+- * errno - i.e. misplaced pages with MPOL_MF_STRICT specified (-EIO) or
+- *         memory range specified by nodemask and maxnode points outside
+- *         your accessible address space (-EFAULT)
++ * queue_pages_range() may return:
++ * 0 - all pages already on the right node, or successfully queued for moving
++ *     (or neither strict checking nor moving requested: only range checking).
++ * >0 - this number of misplaced folios could not be queued for moving
++ *      (a hugetlbfs page or a transparent huge page being counted as 1).
++ * -EIO - a misplaced page found, when MPOL_MF_STRICT specified without MOVEs.
++ * -EFAULT - a hole in the memory range, when MPOL_MF_DISCONTIG_OK unspecified.
+  */
+-static int
++static long
+ queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
+               nodemask_t *nodes, unsigned long flags,
+-              struct list_head *pagelist, bool lock_vma)
++              struct list_head *pagelist)
+ {
+       int err;
+       struct queue_pages qp = {
+@@ -756,20 +746,17 @@ queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
+               .start = start,
+               .end = end,
+               .first = NULL,
+-              .has_unmovable = false,
+       };
+-      const struct mm_walk_ops *ops = lock_vma ?
++      const struct mm_walk_ops *ops = (flags & MPOL_MF_WRLOCK) ?
+                       &queue_pages_lock_vma_walk_ops : &queue_pages_walk_ops;
+ 
+       err = walk_page_range(mm, start, end, ops, &qp);
+ 
+-      if (qp.has_unmovable)
+-              err = 1;
+       if (!qp.first)
+               /* whole range in hole */
+               err = -EFAULT;
+ 
+-      return err;
++      return err ? : qp.nr_failed;
+ }
+ 
+ /*
+@@ -1032,16 +1019,16 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
+ }
+ 
+ #ifdef CONFIG_MIGRATION
+-static int migrate_folio_add(struct folio *folio, struct list_head *foliolist,
++static bool migrate_folio_add(struct folio *folio, struct list_head *foliolist,
+                               unsigned long flags)
+ {
+       /*
+-       * We try to migrate only unshared folios. If it is shared it
+-       * is likely not worth migrating.
++       * Unless MPOL_MF_MOVE_ALL, we try to avoid migrating a shared folio.
++       * Choosing not to migrate a shared folio is not counted as a failure.
+        *
+        * To check if the folio is shared, ideally we want to make sure
+        * every page is mapped to the same process. Doing that is very
+-       * expensive, so check the estimated mapcount of the folio instead.
++       * expensive, so check the estimated sharers of the folio instead.
+        */
+       if ((flags & MPOL_MF_MOVE_ALL) || folio_estimated_sharers(folio) == 1) {
+               if (folio_isolate_lru(folio)) {
+@@ -1049,32 +1036,31 @@ static int migrate_folio_add(struct folio *folio, struct list_head *foliolist,
+                       node_stat_mod_folio(folio,
+                               NR_ISOLATED_ANON + folio_is_file_lru(folio),
+                               folio_nr_pages(folio));
+-              } else if (flags & MPOL_MF_STRICT) {
++              } else {
+                       /*
+                        * Non-movable folio may reach here.  And, there may be
+                        * temporary off LRU folios or non-LRU movable folios.
+                        * Treat them as unmovable folios since they can't be
+-                       * isolated, so they can't be moved at the moment.  It
+-                       * should return -EIO for this case too.
++                       * isolated, so they can't be moved at the moment.
+                        */
+-                      return -EIO;
++                      return false;
+               }
+       }
+-
+-      return 0;
++      return true;
+ }
+ 
+ /*
+  * Migrate pages from one node to a target node.
+  * Returns error or the number of pages not migrated.
+  */
+-static int migrate_to_node(struct mm_struct *mm, int source, int dest,
+-                         int flags)
++static long migrate_to_node(struct mm_struct *mm, int source, int dest,
++                          int flags)
+ {
+       nodemask_t nmask;
+       struct vm_area_struct *vma;
+       LIST_HEAD(pagelist);
+-      int err = 0;
++      long nr_failed;
++      long err = 0;
+       struct migration_target_control mtc = {
+               .nid = dest,
+               .gfp_mask = GFP_HIGHUSER_MOVABLE | __GFP_THISNODE,
+@@ -1083,23 +1069,27 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,
+       nodes_clear(nmask);
+       node_set(source, nmask);
+ 
++      VM_BUG_ON(!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)));
++      vma = find_vma(mm, 0);
++
+       /*
+-       * This does not "check" the range but isolates all pages that
++       * This does not migrate the range, but isolates all pages that
+        * need migration.  Between passing in the full user address
+-       * space range and MPOL_MF_DISCONTIG_OK, this call can not fail.
++       * space range and MPOL_MF_DISCONTIG_OK, this call cannot fail,
++       * but passes back the count of pages which could not be isolated.
+        */
+-      vma = find_vma(mm, 0);
+-      VM_BUG_ON(!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)));
+-      queue_pages_range(mm, vma->vm_start, mm->task_size, &nmask,
+-                      flags | MPOL_MF_DISCONTIG_OK, &pagelist, false);
++      nr_failed = queue_pages_range(mm, vma->vm_start, mm->task_size, &nmask,
++                                    flags | MPOL_MF_DISCONTIG_OK, &pagelist);
+ 
+       if (!list_empty(&pagelist)) {
+               err = migrate_pages(&pagelist, alloc_migration_target, NULL,
+-                              (unsigned long)&mtc, MIGRATE_SYNC, MR_SYSCALL, NULL);
++                      (unsigned long)&mtc, MIGRATE_SYNC, MR_SYSCALL, NULL);
+               if (err)
+                       putback_movable_pages(&pagelist);
+       }
+ 
++      if (err >= 0)
++              err += nr_failed;
+       return err;
+ }
+ 
+@@ -1112,8 +1102,8 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,
+ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
+                    const nodemask_t *to, int flags)
+ {
+-      int busy = 0;
+-      int err = 0;
++      long nr_failed = 0;
++      long err = 0;
+       nodemask_t tmp;
+ 
+       lru_cache_disable();
+@@ -1195,7 +1185,7 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
+               node_clear(source, tmp);
+               err = migrate_to_node(mm, source, dest, flags);
+               if (err > 0)
+-                      busy += err;
++                      nr_failed += err;
+               if (err < 0)
+                       break;
+       }
+@@ -1204,8 +1194,7 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
+       lru_cache_enable();
+       if (err < 0)
+               return err;
+-      return busy;
+-
++      return (nr_failed < INT_MAX) ? nr_failed : INT_MAX;
+ }
+ 
+ /*
+@@ -1244,10 +1233,10 @@ static struct folio *new_folio(struct folio *src, unsigned long start)
+ }
+ #else
+ 
+-static int migrate_folio_add(struct folio *folio, struct list_head *foliolist,
++static bool migrate_folio_add(struct folio *folio, struct list_head *foliolist,
+                               unsigned long flags)
+ {
+-      return -EIO;
++      return false;
+ }
+ 
+ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
+@@ -1271,8 +1260,8 @@ static long do_mbind(unsigned long start, unsigned long len,
+       struct vma_iterator vmi;
+       struct mempolicy *new;
+       unsigned long end;
+-      int err;
+-      int ret;
++      long err;
++      long nr_failed;
+       LIST_HEAD(pagelist);
+ 
+       if (flags & ~(unsigned long)MPOL_MF_VALID)
+@@ -1312,10 +1301,8 @@ static long do_mbind(unsigned long start, unsigned long len,
+                start, start + len, mode, mode_flags,
+                nmask ? nodes_addr(*nmask)[0] : NUMA_NO_NODE);
+ 
+-      if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
+-
++      if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
+               lru_cache_disable();
+-      }
+       {
+               NODEMASK_SCRATCH(scratch);
+               if (scratch) {
+@@ -1331,44 +1318,37 @@ static long do_mbind(unsigned long start, unsigned long len,
+               goto mpol_out;
+ 
+       /*
+-       * Lock the VMAs before scanning for pages to migrate, to ensure we don't
+-       * miss a concurrently inserted page.
++       * Lock the VMAs before scanning for pages to migrate,
++       * to ensure we don't miss a concurrently inserted page.
+        */
+-      ret = queue_pages_range(mm, start, end, nmask,
+-                        flags | MPOL_MF_INVERT, &pagelist, true);
++      nr_failed = queue_pages_range(mm, start, end, nmask,
++                      flags | MPOL_MF_INVERT | MPOL_MF_WRLOCK, &pagelist);
+ 
+-      if (ret < 0) {
+-              err = ret;
+-              goto up_out;
+-      }
+-
+-      vma_iter_init(&vmi, mm, start);
+-      prev = vma_prev(&vmi);
+-      for_each_vma_range(vmi, vma, end) {
+-              err = mbind_range(&vmi, vma, &prev, start, end, new);
+-              if (err)
+-                      break;
++      if (nr_failed < 0) {
++              err = nr_failed;
++      } else {
++              vma_iter_init(&vmi, mm, start);
++              prev = vma_prev(&vmi);
++              for_each_vma_range(vmi, vma, end) {
++                      err = mbind_range(&vmi, vma, &prev, start, end, new);
++                      if (err)
++                              break;
++              }
+       }
+ 
+       if (!err) {
+-              int nr_failed = 0;
+-
+               if (!list_empty(&pagelist)) {
+                       WARN_ON_ONCE(flags & MPOL_MF_LAZY);
+-                      nr_failed = migrate_pages(&pagelist, new_folio, NULL,
++                      nr_failed |= migrate_pages(&pagelist, new_folio, NULL,
+                               start, MIGRATE_SYNC, MR_MEMPOLICY_MBIND, NULL);
+-                      if (nr_failed)
+-                              putback_movable_pages(&pagelist);
+               }
+-
+-              if (((ret > 0) || nr_failed) && (flags & MPOL_MF_STRICT))
++              if (nr_failed && (flags & MPOL_MF_STRICT))
+                       err = -EIO;
+-      } else {
+-up_out:
+-              if (!list_empty(&pagelist))
+-                      putback_movable_pages(&pagelist);
+       }
+ 
++      if (!list_empty(&pagelist))
++              putback_movable_pages(&pagelist);
++
+       mmap_write_unlock(mm);
+ mpol_out:
+       mpol_put(new);
+-- 
+2.43.0
+
diff --git a/queue-6.6/mm-mempolicy-fix-migrate_to_node-assuming-there-is-a.patch b/queue-6.6/mm-mempolicy-fix-migrate_to_node-assuming-there-is-a.patch

new file mode 100644 (file)

index 0000000..167976c
--- /dev/null
+++ b/queue-6.6/mm-mempolicy-fix-migrate_to_node-assuming-there-is-a.patch
@@ -0,0 +1,80 @@
+From 2e67827c993ba96be423bdf4728e11e9c1fed90b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Nov 2024 21:11:51 +0100
+Subject: mm/mempolicy: fix migrate_to_node() assuming there is at least one
+ VMA in a MM
+
+From: David Hildenbrand <david@redhat.com>
+
+[ Upstream commit 091c1dd2d4df6edd1beebe0e5863d4034ade9572 ]
+
+We currently assume that there is at least one VMA in a MM, which isn't
+true.
+
+So we might end up having find_vma() return NULL, to then de-reference
+NULL.  So properly handle find_vma() returning NULL.
+
+This fixes the report:
+
+Oops: general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] PREEMPT SMP KASAN PTI
+KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007]
+CPU: 1 UID: 0 PID: 6021 Comm: syz-executor284 Not tainted 6.12.0-rc7-syzkaller-00187-gf868cd251776 #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/30/2024
+RIP: 0010:migrate_to_node mm/mempolicy.c:1090 [inline]
+RIP: 0010:do_migrate_pages+0x403/0x6f0 mm/mempolicy.c:1194
+Code: ...
+RSP: 0018:ffffc9000375fd08 EFLAGS: 00010246
+RAX: 0000000000000000 RBX: ffffc9000375fd78 RCX: 0000000000000000
+RDX: ffff88807e171300 RSI: dffffc0000000000 RDI: ffff88803390c044
+RBP: ffff88807e171428 R08: 0000000000000014 R09: fffffbfff2039ef1
+R10: ffffffff901cf78f R11: 0000000000000000 R12: 0000000000000003
+R13: ffffc9000375fe90 R14: ffffc9000375fe98 R15: ffffc9000375fdf8
+FS:  00005555919e1380(0000) GS:ffff8880b8700000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00005555919e1ca8 CR3: 000000007f12a000 CR4: 00000000003526f0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+ <TASK>
+ kernel_migrate_pages+0x5b2/0x750 mm/mempolicy.c:1709
+ __do_sys_migrate_pages mm/mempolicy.c:1727 [inline]
+ __se_sys_migrate_pages mm/mempolicy.c:1723 [inline]
+ __x64_sys_migrate_pages+0x96/0x100 mm/mempolicy.c:1723
+ do_syscall_x64 arch/x86/entry/common.c:52 [inline]
+ do_syscall_64+0xcd/0x250 arch/x86/entry/common.c:83
+ entry_SYSCALL_64_after_hwframe+0x77/0x7f
+
+[akpm@linux-foundation.org: add unlikely()]
+Link: https://lkml.kernel.org/r/20241120201151.9518-1-david@redhat.com
+Fixes: 39743889aaf7 ("[PATCH] Swap Migration V5: sys_migrate_pages interface")
+Signed-off-by: David Hildenbrand <david@redhat.com>
+Reported-by: syzbot+3511625422f7aa637f0d@syzkaller.appspotmail.com
+Closes: https://lore.kernel.org/lkml/673d2696.050a0220.3c9d61.012f.GAE@google.com/T/
+Reviewed-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
+Reviewed-by: Christoph Lameter <cl@linux.com>
+Cc: Liam R. Howlett <Liam.Howlett@Oracle.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/mempolicy.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/mm/mempolicy.c b/mm/mempolicy.c
+index 54f1b78d1b2c0..94c74c594d102 100644
+--- a/mm/mempolicy.c
++++ b/mm/mempolicy.c
+@@ -1071,6 +1071,10 @@ static long migrate_to_node(struct mm_struct *mm, int source, int dest,
+ 
+       VM_BUG_ON(!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)));
+       vma = find_vma(mm, 0);
++      if (unlikely(!vma)) {
++              mmap_read_unlock(mm);
++              return 0;
++      }
+ 
+       /*
+        * This does not migrate the range, but isolates all pages that
+-- 
+2.43.0
+
diff --git a/queue-6.6/platform-msi-prepare-for-real-per-device-domains.patch b/queue-6.6/platform-msi-prepare-for-real-per-device-domains.patch

new file mode 100644 (file)

index 0000000..31d84c4
--- /dev/null
+++ b/queue-6.6/platform-msi-prepare-for-real-per-device-domains.patch
@@ -0,0 +1,175 @@
+From 1c3c977271d7d952c04732d5b0175a8b3d5039a7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 27 Jan 2024 21:47:33 +0530
+Subject: platform-msi: Prepare for real per device domains
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+[ Upstream commit c88f9110bfbca5975a8dee4c9792ba12684c7bca ]
+
+Provide functions to create and remove per device MSI domains which replace
+the platform-MSI domains. The new model is that each of the devices which
+utilize platform-MSI gets now its private MSI domain which is "customized"
+in size and with a device specific function to write the MSI message into
+the device.
+
+This is the same functionality as platform-MSI but it avoids all the down
+sides of platform MSI, i.e. the extra ID book keeping, the special data
+structure in the msi descriptor. Further the domains are only created when
+the devices are really in use, so the burden is on the usage and not on the
+infrastructure.
+
+Fill in the domain template and provide two functions to init/allocate and
+remove a per device MSI domain.
+
+Until all users and parent domain providers are converted, the init/alloc
+function invokes the original platform-MSI code when the irqdomain which is
+associated to the device does not provide MSI parent functionality yet.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Anup Patel <apatel@ventanamicro.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Link: https://lore.kernel.org/r/20240127161753.114685-6-apatel@ventanamicro.com
+Stable-dep-of: 64506b3d23a3 ("scsi: ufs: qcom: Only free platform MSIs when ESI is enabled")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/base/platform-msi.c | 103 ++++++++++++++++++++++++++++++++++++
+ include/linux/msi.h         |   4 ++
+ 2 files changed, 107 insertions(+)
+
+diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c
+index f37ad34c80ec4..b56e919acabb9 100644
+--- a/drivers/base/platform-msi.c
++++ b/drivers/base/platform-msi.c
+@@ -13,6 +13,8 @@
+ #include <linux/msi.h>
+ #include <linux/slab.h>
+ 
++/* Begin of removal area. Once everything is converted over. Cleanup the includes too! */
++
+ #define DEV_ID_SHIFT  21
+ #define MAX_DEV_MSIS  (1 << (32 - DEV_ID_SHIFT))
+ 
+@@ -350,3 +352,104 @@ int platform_msi_device_domain_alloc(struct irq_domain *domain, unsigned int vir
+ 
+       return msi_domain_populate_irqs(domain->parent, dev, virq, nr_irqs, &data->arg);
+ }
++
++/* End of removal area */
++
++/* Real per device domain interfaces */
++
++/*
++ * This indirection can go when platform_device_msi_init_and_alloc_irqs()
++ * is switched to a proper irq_chip::irq_write_msi_msg() callback. Keep it
++ * simple for now.
++ */
++static void platform_msi_write_msi_msg(struct irq_data *d, struct msi_msg *msg)
++{
++      irq_write_msi_msg_t cb = d->chip_data;
++
++      cb(irq_data_get_msi_desc(d), msg);
++}
++
++static void platform_msi_set_desc_byindex(msi_alloc_info_t *arg, struct msi_desc *desc)
++{
++      arg->desc = desc;
++      arg->hwirq = desc->msi_index;
++}
++
++static const struct msi_domain_template platform_msi_template = {
++      .chip = {
++              .name                   = "pMSI",
++              .irq_mask               = irq_chip_mask_parent,
++              .irq_unmask             = irq_chip_unmask_parent,
++              .irq_write_msi_msg      = platform_msi_write_msi_msg,
++              /* The rest is filled in by the platform MSI parent */
++      },
++
++      .ops = {
++              .set_desc               = platform_msi_set_desc_byindex,
++      },
++
++      .info = {
++              .bus_token              = DOMAIN_BUS_DEVICE_MSI,
++      },
++};
++
++/**
++ * platform_device_msi_init_and_alloc_irqs - Initialize platform device MSI
++ *                                         and allocate interrupts for @dev
++ * @dev:              The device for which to allocate interrupts
++ * @nvec:             The number of interrupts to allocate
++ * @write_msi_msg:    Callback to write an interrupt message for @dev
++ *
++ * Returns:
++ * Zero for success, or an error code in case of failure
++ *
++ * This creates a MSI domain on @dev which has @dev->msi.domain as
++ * parent. The parent domain sets up the new domain. The domain has
++ * a fixed size of @nvec. The domain is managed by devres and will
++ * be removed when the device is removed.
++ *
++ * Note: For migration purposes this falls back to the original platform_msi code
++ *     up to the point where all platforms have been converted to the MSI
++ *     parent model.
++ */
++int platform_device_msi_init_and_alloc_irqs(struct device *dev, unsigned int nvec,
++                                          irq_write_msi_msg_t write_msi_msg)
++{
++      struct irq_domain *domain = dev->msi.domain;
++
++      if (!domain || !write_msi_msg)
++              return -EINVAL;
++
++      /* Migration support. Will go away once everything is converted */
++      if (!irq_domain_is_msi_parent(domain))
++              return platform_msi_domain_alloc_irqs(dev, nvec, write_msi_msg);
++
++      /*
++       * @write_msi_msg is stored in the resulting msi_domain_info::data.
++       * The underlying domain creation mechanism will assign that
++       * callback to the resulting irq chip.
++       */
++      if (!msi_create_device_irq_domain(dev, MSI_DEFAULT_DOMAIN,
++                                        &platform_msi_template,
++                                        nvec, NULL, write_msi_msg))
++              return -ENODEV;
++
++      return msi_domain_alloc_irqs_range(dev, MSI_DEFAULT_DOMAIN, 0, nvec - 1);
++}
++EXPORT_SYMBOL_GPL(platform_device_msi_init_and_alloc_irqs);
++
++/**
++ * platform_device_msi_free_irqs_all - Free all interrupts for @dev
++ * @dev:      The device for which to free interrupts
++ */
++void platform_device_msi_free_irqs_all(struct device *dev)
++{
++      struct irq_domain *domain = dev->msi.domain;
++
++      msi_domain_free_irqs_all(dev, MSI_DEFAULT_DOMAIN);
++
++      /* Migration support. Will go away once everything is converted */
++      if (!irq_domain_is_msi_parent(domain))
++              platform_msi_free_priv_data(dev);
++}
++EXPORT_SYMBOL_GPL(platform_device_msi_free_irqs_all);
+diff --git a/include/linux/msi.h b/include/linux/msi.h
+index ddace8c34dcf9..fc32c919e2edd 100644
+--- a/include/linux/msi.h
++++ b/include/linux/msi.h
+@@ -656,6 +656,10 @@ int platform_msi_device_domain_alloc(struct irq_domain *domain, unsigned int vir
+ void platform_msi_device_domain_free(struct irq_domain *domain, unsigned int virq,
+                                    unsigned int nvec);
+ void *platform_msi_get_host_data(struct irq_domain *domain);
++/* Per device platform MSI */
++int platform_device_msi_init_and_alloc_irqs(struct device *dev, unsigned int nvec,
++                                          irq_write_msi_msg_t write_msi_msg);
++void platform_device_msi_free_irqs_all(struct device *dev);
+ 
+ bool msi_device_has_isolated_msi(struct device *dev);
+ #else /* CONFIG_GENERIC_MSI_IRQ */
+-- 
+2.43.0
+
diff --git a/queue-6.6/sched-numa-fix-memory-leak-due-to-the-overwritten-vm.patch b/queue-6.6/sched-numa-fix-memory-leak-due-to-the-overwritten-vm.patch

new file mode 100644 (file)

index 0000000..7e99f33
--- /dev/null
+++ b/queue-6.6/sched-numa-fix-memory-leak-due-to-the-overwritten-vm.patch
@@ -0,0 +1,111 @@
+From c6f3fffef669e185df7ace84f9dded53ed049b8b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Nov 2024 18:21:46 +0800
+Subject: sched/numa: fix memory leak due to the overwritten vma->numab_state
+
+From: Adrian Huang <ahuang12@lenovo.com>
+
+[ Upstream commit 5f1b64e9a9b7ee9cfd32c6b2fab796e29bfed075 ]
+
+[Problem Description]
+When running the hackbench program of LTP, the following memory leak is
+reported by kmemleak.
+
+  # /opt/ltp/testcases/bin/hackbench 20 thread 1000
+  Running with 20*40 (== 800) tasks.
+
+  # dmesg | grep kmemleak
+  ...
+  kmemleak: 480 new suspected memory leaks (see /sys/kernel/debug/kmemleak)
+  kmemleak: 665 new suspected memory leaks (see /sys/kernel/debug/kmemleak)
+
+  # cat /sys/kernel/debug/kmemleak
+  unreferenced object 0xffff888cd8ca2c40 (size 64):
+    comm "hackbench", pid 17142, jiffies 4299780315
+    hex dump (first 32 bytes):
+      ac 74 49 00 01 00 00 00 4c 84 49 00 01 00 00 00  .tI.....L.I.....
+      00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+    backtrace (crc bff18fd4):
+      [<ffffffff81419a89>] __kmalloc_cache_noprof+0x2f9/0x3f0
+      [<ffffffff8113f715>] task_numa_work+0x725/0xa00
+      [<ffffffff8110f878>] task_work_run+0x58/0x90
+      [<ffffffff81ddd9f8>] syscall_exit_to_user_mode+0x1c8/0x1e0
+      [<ffffffff81dd78d5>] do_syscall_64+0x85/0x150
+      [<ffffffff81e0012b>] entry_SYSCALL_64_after_hwframe+0x76/0x7e
+  ...
+
+This issue can be consistently reproduced on three different servers:
+  * a 448-core server
+  * a 256-core server
+  * a 192-core server
+
+[Root Cause]
+Since multiple threads are created by the hackbench program (along with
+the command argument 'thread'), a shared vma might be accessed by two or
+more cores simultaneously. When two or more cores observe that
+vma->numab_state is NULL at the same time, vma->numab_state will be
+overwritten.
+
+Although current code ensures that only one thread scans the VMAs in a
+single 'numa_scan_period', there might be a chance for another thread
+to enter in the next 'numa_scan_period' while we have not gotten till
+numab_state allocation [1].
+
+Note that the command `/opt/ltp/testcases/bin/hackbench 50 process 1000`
+cannot the reproduce the issue. It is verified with 200+ test runs.
+
+[Solution]
+Use the cmpxchg atomic operation to ensure that only one thread executes
+the vma->numab_state assignment.
+
+[1] https://lore.kernel.org/lkml/1794be3c-358c-4cdc-a43d-a1f841d91ef7@amd.com/
+
+Link: https://lkml.kernel.org/r/20241113102146.2384-1-ahuang12@lenovo.com
+Fixes: ef6a22b70f6d ("sched/numa: apply the scan delay to every new vma")
+Signed-off-by: Adrian Huang <ahuang12@lenovo.com>
+Reported-by: Jiwei Sun <sunjw10@lenovo.com>
+Reviewed-by: Raghavendra K T <raghavendra.kt@amd.com>
+Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Ben Segall <bsegall@google.com>
+Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Juri Lelli <juri.lelli@redhat.com>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Steven Rostedt <rostedt@goodmis.org>
+Cc: Valentin Schneider <vschneid@redhat.com>
+Cc: Vincent Guittot <vincent.guittot@linaro.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/fair.c | 12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index 934d6f198b073..ddab19e5bd637 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -3344,10 +3344,16 @@ static void task_numa_work(struct callback_head *work)
+ 
+               /* Initialise new per-VMA NUMAB state. */
+               if (!vma->numab_state) {
+-                      vma->numab_state = kzalloc(sizeof(struct vma_numab_state),
+-                              GFP_KERNEL);
+-                      if (!vma->numab_state)
++                      struct vma_numab_state *ptr;
++
++                      ptr = kzalloc(sizeof(*ptr), GFP_KERNEL);
++                      if (!ptr)
++                              continue;
++
++                      if (cmpxchg(&vma->numab_state, NULL, ptr)) {
++                              kfree(ptr);
+                               continue;
++                      }
+ 
+                       vma->numab_state->start_scan_seq = mm->numa_scan_seq;
+ 
+-- 
+2.43.0
+
diff --git a/queue-6.6/sched-numa-fix-mm-numa_scan_seq-based-unconditional-.patch b/queue-6.6/sched-numa-fix-mm-numa_scan_seq-based-unconditional-.patch

new file mode 100644 (file)

index 0000000..1b6e18d
--- /dev/null
+++ b/queue-6.6/sched-numa-fix-mm-numa_scan_seq-based-unconditional-.patch
@@ -0,0 +1,121 @@
+From 364e7e94105819ed30f3ba32ed806e0d06715107 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 20 Oct 2023 21:27:46 +0530
+Subject: sched/numa: Fix mm numa_scan_seq based unconditional scan
+
+From: Raghavendra K T <raghavendra.kt@amd.com>
+
+[ Upstream commit 84db47ca7146d7bd00eb5cf2b93989a971c84650 ]
+
+Since commit fc137c0ddab2 ("sched/numa: enhance vma scanning logic")
+
+NUMA Balancing allows updating PTEs to trap NUMA hinting faults if the
+task had previously accessed VMA. However unconditional scan of VMAs are
+allowed during initial phase of VMA creation until process's
+mm numa_scan_seq reaches 2 even though current task had not accessed VMA.
+
+Rationale:
+ - Without initial scan subsequent PTE update may never happen.
+ - Give fair opportunity to all the VMAs to be scanned and subsequently
+understand the access pattern of all the VMAs.
+
+But it has a corner case where, if a VMA is created after some time,
+process's mm numa_scan_seq could be already greater than 2.
+
+For e.g., values of mm numa_scan_seq when VMAs are created by running
+mmtest autonuma benchmark briefly looks like:
+start_seq=0 : 459
+start_seq=2 : 138
+start_seq=3 : 144
+start_seq=4 : 8
+start_seq=8 : 1
+start_seq=9 : 1
+This results in no unconditional PTE updates for those VMAs created after
+some time.
+
+Fix:
+ - Note down the initial value of mm numa_scan_seq in per VMA start_seq.
+ - Allow unconditional scan till start_seq + 2.
+
+Result:
+SUT: AMD EPYC Milan with 2 NUMA nodes 256 cpus.
+base kernel: upstream 6.6-rc6 with Mels patches [1] applied.
+
+kernbench
+==========             base                  patched %gain
+Amean    elsp-128      165.09 ( 0.00%)      164.78 *   0.19%*
+
+Duration User       41404.28    41375.08
+Duration System      9862.22     9768.48
+Duration Elapsed      519.87      518.72
+
+Ops NUMA PTE updates           1041416.00      831536.00
+Ops NUMA hint faults            263296.00      220966.00
+Ops NUMA pages migrated         258021.00      212769.00
+Ops AutoNUMA cost                 1328.67        1114.69
+
+autonumabench
+
+NUMA01_THREADLOCAL
+==================
+Amean  elsp-NUMA01_THREADLOCAL   81.79 (0.00%)  67.74 *  17.18%*
+
+Duration User       54832.73    47379.67
+Duration System        75.00      185.75
+Duration Elapsed      576.72      476.09
+
+Ops NUMA PTE updates                  394429.00    11121044.00
+Ops NUMA hint faults                    1001.00     8906404.00
+Ops NUMA pages migrated                  288.00     2998694.00
+Ops AutoNUMA cost                          7.77       44666.84
+
+Signed-off-by: Raghavendra K T <raghavendra.kt@amd.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Mel Gorman <mgorman@suse.de>
+Link: https://lore.kernel.org/r/2ea7cbce80ac7c62e90cbfb9653a7972f902439f.1697816692.git.raghavendra.kt@amd.com
+Stable-dep-of: 5f1b64e9a9b7 ("sched/numa: fix memory leak due to the overwritten vma->numab_state")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/mm_types.h | 3 +++
+ kernel/sched/fair.c      | 4 +++-
+ 2 files changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
+index 43c19d85dfe7f..20c96ce98751a 100644
+--- a/include/linux/mm_types.h
++++ b/include/linux/mm_types.h
+@@ -576,6 +576,9 @@ struct vma_numab_state {
+        */
+       unsigned long pids_active[2];
+ 
++      /* MM scan sequence ID when scan first started after VMA creation */
++      int start_scan_seq;
++
+       /*
+        * MM scan sequence ID when the VMA was last completely scanned.
+        * A VMA is not eligible for scanning if prev_scan_seq == numa_scan_seq
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index db59bf549c644..934d6f198b073 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -3197,7 +3197,7 @@ static bool vma_is_accessed(struct mm_struct *mm, struct vm_area_struct *vma)
+        * This is also done to avoid any side effect of task scanning
+        * amplifying the unfairness of disjoint set of VMAs' access.
+        */
+-      if (READ_ONCE(current->mm->numa_scan_seq) < 2)
++      if ((READ_ONCE(current->mm->numa_scan_seq) - vma->numab_state->start_scan_seq) < 2)
+               return true;
+ 
+       pids = vma->numab_state->pids_active[0] | vma->numab_state->pids_active[1];
+@@ -3349,6 +3349,8 @@ static void task_numa_work(struct callback_head *work)
+                       if (!vma->numab_state)
+                               continue;
+ 
++                      vma->numab_state->start_scan_seq = mm->numa_scan_seq;
++
+                       vma->numab_state->next_scan = now +
+                               msecs_to_jiffies(sysctl_numa_balancing_scan_delay);
+ 
+-- 
+2.43.0
+
diff --git a/queue-6.6/scsi-ufs-convert-all-platform-drivers-to-return-void.patch b/queue-6.6/scsi-ufs-convert-all-platform-drivers-to-return-void.patch

new file mode 100644 (file)

index 0000000..df41763
--- /dev/null
+++ b/queue-6.6/scsi-ufs-convert-all-platform-drivers-to-return-void.patch
@@ -0,0 +1,298 @@
+From 53940a5722e6b2e1963ad33d83f92b8251830b0f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 17 Sep 2023 16:57:22 +0200
+Subject: scsi: ufs: Convert all platform drivers to return void
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
+
+[ Upstream commit 0842b7617e3491f489aff6f84712c388e32c1877 ]
+
+The .remove() callback for a platform driver returns an int which makes
+many driver authors wrongly assume it's possible to do error handling by
+returning an error code. However the value returned is ignored (apart from
+emitting a warning) and this typically results in resource leaks.  To
+improve here there is a quest to make the remove callback return void. In
+the first step of this quest all drivers are converted to .remove_new()
+which already returns void. Eventually after all drivers are converted,
+.remove_new() is renamed to .remove().
+
+All platform drivers below drivers/ufs/ unconditionally return zero in
+their remove callback and so can be converted trivially to the variant
+returning void.
+
+Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
+Link: https://lore.kernel.org/r/20230917145722.1131557-1-u.kleine-koenig@pengutronix.de
+Reviewed-by: Bean Huo <beanhuo@micron.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Stable-dep-of: 64506b3d23a3 ("scsi: ufs: qcom: Only free platform MSIs when ESI is enabled")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/ufs/host/cdns-pltfrm.c        | 5 ++---
+ drivers/ufs/host/tc-dwc-g210-pltfrm.c | 6 ++----
+ drivers/ufs/host/ti-j721e-ufs.c       | 6 ++----
+ drivers/ufs/host/ufs-exynos.c         | 6 ++----
+ drivers/ufs/host/ufs-hisi.c           | 5 ++---
+ drivers/ufs/host/ufs-mediatek.c       | 5 ++---
+ drivers/ufs/host/ufs-qcom.c           | 5 ++---
+ drivers/ufs/host/ufs-renesas.c        | 6 ++----
+ drivers/ufs/host/ufs-sprd.c           | 5 ++---
+ 9 files changed, 18 insertions(+), 31 deletions(-)
+
+diff --git a/drivers/ufs/host/cdns-pltfrm.c b/drivers/ufs/host/cdns-pltfrm.c
+index 56014ef302b49..66811d8d1929c 100644
+--- a/drivers/ufs/host/cdns-pltfrm.c
++++ b/drivers/ufs/host/cdns-pltfrm.c
+@@ -305,12 +305,11 @@ static int cdns_ufs_pltfrm_probe(struct platform_device *pdev)
+  *
+  * Return: 0 (success).
+  */
+-static int cdns_ufs_pltfrm_remove(struct platform_device *pdev)
++static void cdns_ufs_pltfrm_remove(struct platform_device *pdev)
+ {
+       struct ufs_hba *hba =  platform_get_drvdata(pdev);
+ 
+       ufshcd_remove(hba);
+-      return 0;
+ }
+ 
+ static const struct dev_pm_ops cdns_ufs_dev_pm_ops = {
+@@ -322,7 +321,7 @@ static const struct dev_pm_ops cdns_ufs_dev_pm_ops = {
+ 
+ static struct platform_driver cdns_ufs_pltfrm_driver = {
+       .probe  = cdns_ufs_pltfrm_probe,
+-      .remove = cdns_ufs_pltfrm_remove,
++      .remove_new = cdns_ufs_pltfrm_remove,
+       .driver = {
+               .name   = "cdns-ufshcd",
+               .pm     = &cdns_ufs_dev_pm_ops,
+diff --git a/drivers/ufs/host/tc-dwc-g210-pltfrm.c b/drivers/ufs/host/tc-dwc-g210-pltfrm.c
+index 4d5389dd95857..a3877592604d5 100644
+--- a/drivers/ufs/host/tc-dwc-g210-pltfrm.c
++++ b/drivers/ufs/host/tc-dwc-g210-pltfrm.c
+@@ -74,14 +74,12 @@ static int tc_dwc_g210_pltfm_probe(struct platform_device *pdev)
+  * @pdev: pointer to platform device structure
+  *
+  */
+-static int tc_dwc_g210_pltfm_remove(struct platform_device *pdev)
++static void tc_dwc_g210_pltfm_remove(struct platform_device *pdev)
+ {
+       struct ufs_hba *hba =  platform_get_drvdata(pdev);
+ 
+       pm_runtime_get_sync(&(pdev)->dev);
+       ufshcd_remove(hba);
+-
+-      return 0;
+ }
+ 
+ static const struct dev_pm_ops tc_dwc_g210_pltfm_pm_ops = {
+@@ -91,7 +89,7 @@ static const struct dev_pm_ops tc_dwc_g210_pltfm_pm_ops = {
+ 
+ static struct platform_driver tc_dwc_g210_pltfm_driver = {
+       .probe          = tc_dwc_g210_pltfm_probe,
+-      .remove         = tc_dwc_g210_pltfm_remove,
++      .remove_new     = tc_dwc_g210_pltfm_remove,
+       .driver         = {
+               .name   = "tc-dwc-g210-pltfm",
+               .pm     = &tc_dwc_g210_pltfm_pm_ops,
+diff --git a/drivers/ufs/host/ti-j721e-ufs.c b/drivers/ufs/host/ti-j721e-ufs.c
+index 117eb7da92acd..250c22df000d5 100644
+--- a/drivers/ufs/host/ti-j721e-ufs.c
++++ b/drivers/ufs/host/ti-j721e-ufs.c
+@@ -65,13 +65,11 @@ static int ti_j721e_ufs_probe(struct platform_device *pdev)
+       return ret;
+ }
+ 
+-static int ti_j721e_ufs_remove(struct platform_device *pdev)
++static void ti_j721e_ufs_remove(struct platform_device *pdev)
+ {
+       of_platform_depopulate(&pdev->dev);
+       pm_runtime_put_sync(&pdev->dev);
+       pm_runtime_disable(&pdev->dev);
+-
+-      return 0;
+ }
+ 
+ static const struct of_device_id ti_j721e_ufs_of_match[] = {
+@@ -85,7 +83,7 @@ MODULE_DEVICE_TABLE(of, ti_j721e_ufs_of_match);
+ 
+ static struct platform_driver ti_j721e_ufs_driver = {
+       .probe  = ti_j721e_ufs_probe,
+-      .remove = ti_j721e_ufs_remove,
++      .remove_new = ti_j721e_ufs_remove,
+       .driver = {
+               .name   = "ti-j721e-ufs",
+               .of_match_table = ti_j721e_ufs_of_match,
+diff --git a/drivers/ufs/host/ufs-exynos.c b/drivers/ufs/host/ufs-exynos.c
+index 268189f01e15b..25bc11811b4c1 100644
+--- a/drivers/ufs/host/ufs-exynos.c
++++ b/drivers/ufs/host/ufs-exynos.c
+@@ -1605,7 +1605,7 @@ static int exynos_ufs_probe(struct platform_device *pdev)
+       return err;
+ }
+ 
+-static int exynos_ufs_remove(struct platform_device *pdev)
++static void exynos_ufs_remove(struct platform_device *pdev)
+ {
+       struct ufs_hba *hba =  platform_get_drvdata(pdev);
+       struct exynos_ufs *ufs = ufshcd_get_variant(hba);
+@@ -1615,8 +1615,6 @@ static int exynos_ufs_remove(struct platform_device *pdev)
+ 
+       phy_power_off(ufs->phy);
+       phy_exit(ufs->phy);
+-
+-      return 0;
+ }
+ 
+ static struct exynos_ufs_uic_attr exynos7_uic_attr = {
+@@ -1756,7 +1754,7 @@ static const struct dev_pm_ops exynos_ufs_pm_ops = {
+ 
+ static struct platform_driver exynos_ufs_pltform = {
+       .probe  = exynos_ufs_probe,
+-      .remove = exynos_ufs_remove,
++      .remove_new = exynos_ufs_remove,
+       .driver = {
+               .name   = "exynos-ufshc",
+               .pm     = &exynos_ufs_pm_ops,
+diff --git a/drivers/ufs/host/ufs-hisi.c b/drivers/ufs/host/ufs-hisi.c
+index 5b3060cd0ab8b..0229ac0a8dbed 100644
+--- a/drivers/ufs/host/ufs-hisi.c
++++ b/drivers/ufs/host/ufs-hisi.c
+@@ -575,12 +575,11 @@ static int ufs_hisi_probe(struct platform_device *pdev)
+       return ufshcd_pltfrm_init(pdev, of_id->data);
+ }
+ 
+-static int ufs_hisi_remove(struct platform_device *pdev)
++static void ufs_hisi_remove(struct platform_device *pdev)
+ {
+       struct ufs_hba *hba =  platform_get_drvdata(pdev);
+ 
+       ufshcd_remove(hba);
+-      return 0;
+ }
+ 
+ static const struct dev_pm_ops ufs_hisi_pm_ops = {
+@@ -592,7 +591,7 @@ static const struct dev_pm_ops ufs_hisi_pm_ops = {
+ 
+ static struct platform_driver ufs_hisi_pltform = {
+       .probe  = ufs_hisi_probe,
+-      .remove = ufs_hisi_remove,
++      .remove_new = ufs_hisi_remove,
+       .driver = {
+               .name   = "ufshcd-hisi",
+               .pm     = &ufs_hisi_pm_ops,
+diff --git a/drivers/ufs/host/ufs-mediatek.c b/drivers/ufs/host/ufs-mediatek.c
+index 2383ecd88f1cb..1238faec3cc68 100644
+--- a/drivers/ufs/host/ufs-mediatek.c
++++ b/drivers/ufs/host/ufs-mediatek.c
+@@ -1748,13 +1748,12 @@ static int ufs_mtk_probe(struct platform_device *pdev)
+  *
+  * Always return 0
+  */
+-static int ufs_mtk_remove(struct platform_device *pdev)
++static void ufs_mtk_remove(struct platform_device *pdev)
+ {
+       struct ufs_hba *hba =  platform_get_drvdata(pdev);
+ 
+       pm_runtime_get_sync(&(pdev)->dev);
+       ufshcd_remove(hba);
+-      return 0;
+ }
+ 
+ #ifdef CONFIG_PM_SLEEP
+@@ -1818,7 +1817,7 @@ static const struct dev_pm_ops ufs_mtk_pm_ops = {
+ 
+ static struct platform_driver ufs_mtk_pltform = {
+       .probe      = ufs_mtk_probe,
+-      .remove     = ufs_mtk_remove,
++      .remove_new = ufs_mtk_remove,
+       .driver = {
+               .name   = "ufshcd-mtk",
+               .pm     = &ufs_mtk_pm_ops,
+diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c
+index 643157a92c62a..0a914fd44494d 100644
+--- a/drivers/ufs/host/ufs-qcom.c
++++ b/drivers/ufs/host/ufs-qcom.c
+@@ -1920,14 +1920,13 @@ static int ufs_qcom_probe(struct platform_device *pdev)
+  *
+  * Always returns 0
+  */
+-static int ufs_qcom_remove(struct platform_device *pdev)
++static void ufs_qcom_remove(struct platform_device *pdev)
+ {
+       struct ufs_hba *hba =  platform_get_drvdata(pdev);
+ 
+       pm_runtime_get_sync(&(pdev)->dev);
+       ufshcd_remove(hba);
+       platform_msi_domain_free_irqs(hba->dev);
+-      return 0;
+ }
+ 
+ static const struct of_device_id ufs_qcom_of_match[] __maybe_unused = {
+@@ -1959,7 +1958,7 @@ static const struct dev_pm_ops ufs_qcom_pm_ops = {
+ 
+ static struct platform_driver ufs_qcom_pltform = {
+       .probe  = ufs_qcom_probe,
+-      .remove = ufs_qcom_remove,
++      .remove_new = ufs_qcom_remove,
+       .driver = {
+               .name   = "ufshcd-qcom",
+               .pm     = &ufs_qcom_pm_ops,
+diff --git a/drivers/ufs/host/ufs-renesas.c b/drivers/ufs/host/ufs-renesas.c
+index ea3da773b1c14..3ff97112e1f6d 100644
+--- a/drivers/ufs/host/ufs-renesas.c
++++ b/drivers/ufs/host/ufs-renesas.c
+@@ -395,18 +395,16 @@ static int ufs_renesas_probe(struct platform_device *pdev)
+       return ufshcd_pltfrm_init(pdev, &ufs_renesas_vops);
+ }
+ 
+-static int ufs_renesas_remove(struct platform_device *pdev)
++static void ufs_renesas_remove(struct platform_device *pdev)
+ {
+       struct ufs_hba *hba = platform_get_drvdata(pdev);
+ 
+       ufshcd_remove(hba);
+-
+-      return 0;
+ }
+ 
+ static struct platform_driver ufs_renesas_platform = {
+       .probe  = ufs_renesas_probe,
+-      .remove = ufs_renesas_remove,
++      .remove_new = ufs_renesas_remove,
+       .driver = {
+               .name   = "ufshcd-renesas",
+               .of_match_table = of_match_ptr(ufs_renesas_of_match),
+diff --git a/drivers/ufs/host/ufs-sprd.c b/drivers/ufs/host/ufs-sprd.c
+index 2bad75dd6d589..d8b165908809d 100644
+--- a/drivers/ufs/host/ufs-sprd.c
++++ b/drivers/ufs/host/ufs-sprd.c
+@@ -425,13 +425,12 @@ static int ufs_sprd_probe(struct platform_device *pdev)
+       return err;
+ }
+ 
+-static int ufs_sprd_remove(struct platform_device *pdev)
++static void ufs_sprd_remove(struct platform_device *pdev)
+ {
+       struct ufs_hba *hba =  platform_get_drvdata(pdev);
+ 
+       pm_runtime_get_sync(&(pdev)->dev);
+       ufshcd_remove(hba);
+-      return 0;
+ }
+ 
+ static const struct dev_pm_ops ufs_sprd_pm_ops = {
+@@ -443,7 +442,7 @@ static const struct dev_pm_ops ufs_sprd_pm_ops = {
+ 
+ static struct platform_driver ufs_sprd_pltform = {
+       .probe = ufs_sprd_probe,
+-      .remove = ufs_sprd_remove,
++      .remove_new = ufs_sprd_remove,
+       .driver = {
+               .name = "ufshcd-sprd",
+               .pm = &ufs_sprd_pm_ops,
+-- 
+2.43.0
+
diff --git a/queue-6.6/scsi-ufs-pltfrm-dellocate-hba-during-ufshcd_pltfrm_r.patch b/queue-6.6/scsi-ufs-pltfrm-dellocate-hba-during-ufshcd_pltfrm_r.patch

new file mode 100644 (file)

index 0000000..9426986
--- /dev/null
+++ b/queue-6.6/scsi-ufs-pltfrm-dellocate-hba-during-ufshcd_pltfrm_r.patch
@@ -0,0 +1,40 @@
+From 2574a5ed78f1f65bc32917a55e63a7f3c601b6ac Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 11 Nov 2024 23:18:34 +0530
+Subject: scsi: ufs: pltfrm: Dellocate HBA during ufshcd_pltfrm_remove()
+
+From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+
+[ Upstream commit 897df60c16d54ad515a3d0887edab5c63da06d1f ]
+
+This will ensure that the scsi host is cleaned up properly using
+scsi_host_dev_release(). Otherwise, it may lead to memory leaks.
+
+Cc: stable@vger.kernel.org # 4.4
+Fixes: 03b1781aa978 ("[SCSI] ufs: Add Platform glue driver for ufshcd")
+Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+Link: https://lore.kernel.org/r/20241111-ufs_bug_fix-v1-5-45ad8b62f02e@linaro.org
+Reviewed-by: Peter Wang <peter.wang@mediatek.com>
+Reviewed-by: Bean Huo <beanhuo@micron.com>
+Reviewed-by: Bart Van Assche <bvanassche@acm.org>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/ufs/host/ufshcd-pltfrm.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/ufs/host/ufshcd-pltfrm.c b/drivers/ufs/host/ufshcd-pltfrm.c
+index 05836cb8b885b..e99d89d00606b 100644
+--- a/drivers/ufs/host/ufshcd-pltfrm.c
++++ b/drivers/ufs/host/ufshcd-pltfrm.c
+@@ -402,6 +402,7 @@ void ufshcd_pltfrm_remove(struct platform_device *pdev)
+ 
+       pm_runtime_get_sync(&pdev->dev);
+       ufshcd_remove(hba);
++      ufshcd_dealloc_host(hba);
+       pm_runtime_disable(&pdev->dev);
+       pm_runtime_put_noidle(&pdev->dev);
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.6/scsi-ufs-pltfrm-disable-runtime-pm-during-removal-of.patch b/queue-6.6/scsi-ufs-pltfrm-disable-runtime-pm-during-removal-of.patch

new file mode 100644 (file)

index 0000000..5eac0d5
--- /dev/null
+++ b/queue-6.6/scsi-ufs-pltfrm-disable-runtime-pm-during-removal-of.patch
@@ -0,0 +1,201 @@
+From f2020ee0d7f8af71ab3b510729abbc7912be6399 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 11 Nov 2024 23:18:32 +0530
+Subject: scsi: ufs: pltfrm: Disable runtime PM during removal of glue drivers
+
+From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+
+[ Upstream commit d3326e6a3f9bf1e075be2201fb704c2fdf19e2b7 ]
+
+When the UFSHCD platform glue drivers are removed, runtime PM should be
+disabled using pm_runtime_disable() to balance the enablement done in
+ufshcd_pltfrm_init(). This is also reported by PM core when the glue driver
+is removed and inserted again:
+
+ufshcd-qcom 1d84000.ufshc: Unbalanced pm_runtime_enable!
+
+So disable runtime PM using a new helper API ufshcd_pltfrm_remove(), that
+also takes care of removing ufshcd. This helper should be called during the
+remove() stage of glue drivers.
+
+Cc: stable@vger.kernel.org # 3.12
+Fixes: 62694735ca95 ("[SCSI] ufs: Add runtime PM support for UFS host controller driver")
+Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+Link: https://lore.kernel.org/r/20241111-ufs_bug_fix-v1-3-45ad8b62f02e@linaro.org
+Reviewed-by: Peter Wang <peter.wang@mediatek.com>
+Reviewed-by: Bean Huo <beanhuo@micron.com>
+Reviewed-by: Bart Van Assche <bvanassche@acm.org>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Stable-dep-of: 1745dcdb7227 ("scsi: ufs: pltfrm: Drop PM runtime reference count after ufshcd_remove()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/ufs/host/cdns-pltfrm.c        |  4 +---
+ drivers/ufs/host/tc-dwc-g210-pltfrm.c |  4 +---
+ drivers/ufs/host/ufs-exynos.c         |  2 +-
+ drivers/ufs/host/ufs-hisi.c           |  4 +---
+ drivers/ufs/host/ufs-mediatek.c       |  4 +---
+ drivers/ufs/host/ufs-qcom.c           |  2 +-
+ drivers/ufs/host/ufs-renesas.c        |  4 +---
+ drivers/ufs/host/ufs-sprd.c           |  4 +---
+ drivers/ufs/host/ufshcd-pltfrm.c      | 13 +++++++++++++
+ drivers/ufs/host/ufshcd-pltfrm.h      |  1 +
+ 10 files changed, 22 insertions(+), 20 deletions(-)
+
+diff --git a/drivers/ufs/host/cdns-pltfrm.c b/drivers/ufs/host/cdns-pltfrm.c
+index 66811d8d1929c..b31aa84111511 100644
+--- a/drivers/ufs/host/cdns-pltfrm.c
++++ b/drivers/ufs/host/cdns-pltfrm.c
+@@ -307,9 +307,7 @@ static int cdns_ufs_pltfrm_probe(struct platform_device *pdev)
+  */
+ static void cdns_ufs_pltfrm_remove(struct platform_device *pdev)
+ {
+-      struct ufs_hba *hba =  platform_get_drvdata(pdev);
+-
+-      ufshcd_remove(hba);
++      ufshcd_pltfrm_remove(pdev);
+ }
+ 
+ static const struct dev_pm_ops cdns_ufs_dev_pm_ops = {
+diff --git a/drivers/ufs/host/tc-dwc-g210-pltfrm.c b/drivers/ufs/host/tc-dwc-g210-pltfrm.c
+index a3877592604d5..113e0ef7b2cf8 100644
+--- a/drivers/ufs/host/tc-dwc-g210-pltfrm.c
++++ b/drivers/ufs/host/tc-dwc-g210-pltfrm.c
+@@ -76,10 +76,8 @@ static int tc_dwc_g210_pltfm_probe(struct platform_device *pdev)
+  */
+ static void tc_dwc_g210_pltfm_remove(struct platform_device *pdev)
+ {
+-      struct ufs_hba *hba =  platform_get_drvdata(pdev);
+-
+       pm_runtime_get_sync(&(pdev)->dev);
+-      ufshcd_remove(hba);
++      ufshcd_pltfrm_remove(pdev);
+ }
+ 
+ static const struct dev_pm_ops tc_dwc_g210_pltfm_pm_ops = {
+diff --git a/drivers/ufs/host/ufs-exynos.c b/drivers/ufs/host/ufs-exynos.c
+index 25bc11811b4c1..1cca797a00ba0 100644
+--- a/drivers/ufs/host/ufs-exynos.c
++++ b/drivers/ufs/host/ufs-exynos.c
+@@ -1611,7 +1611,7 @@ static void exynos_ufs_remove(struct platform_device *pdev)
+       struct exynos_ufs *ufs = ufshcd_get_variant(hba);
+ 
+       pm_runtime_get_sync(&(pdev)->dev);
+-      ufshcd_remove(hba);
++      ufshcd_pltfrm_remove(pdev);
+ 
+       phy_power_off(ufs->phy);
+       phy_exit(ufs->phy);
+diff --git a/drivers/ufs/host/ufs-hisi.c b/drivers/ufs/host/ufs-hisi.c
+index 0229ac0a8dbed..ceae0dd1617ed 100644
+--- a/drivers/ufs/host/ufs-hisi.c
++++ b/drivers/ufs/host/ufs-hisi.c
+@@ -577,9 +577,7 @@ static int ufs_hisi_probe(struct platform_device *pdev)
+ 
+ static void ufs_hisi_remove(struct platform_device *pdev)
+ {
+-      struct ufs_hba *hba =  platform_get_drvdata(pdev);
+-
+-      ufshcd_remove(hba);
++      ufshcd_pltfrm_remove(pdev);
+ }
+ 
+ static const struct dev_pm_ops ufs_hisi_pm_ops = {
+diff --git a/drivers/ufs/host/ufs-mediatek.c b/drivers/ufs/host/ufs-mediatek.c
+index 1238faec3cc68..49f63b13a040d 100644
+--- a/drivers/ufs/host/ufs-mediatek.c
++++ b/drivers/ufs/host/ufs-mediatek.c
+@@ -1750,10 +1750,8 @@ static int ufs_mtk_probe(struct platform_device *pdev)
+  */
+ static void ufs_mtk_remove(struct platform_device *pdev)
+ {
+-      struct ufs_hba *hba =  platform_get_drvdata(pdev);
+-
+       pm_runtime_get_sync(&(pdev)->dev);
+-      ufshcd_remove(hba);
++      ufshcd_pltfrm_remove(pdev);
+ }
+ 
+ #ifdef CONFIG_PM_SLEEP
+diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c
+index 1e75368930d99..762b3aa19f31d 100644
+--- a/drivers/ufs/host/ufs-qcom.c
++++ b/drivers/ufs/host/ufs-qcom.c
+@@ -1926,7 +1926,7 @@ static void ufs_qcom_remove(struct platform_device *pdev)
+       struct ufs_qcom_host *host = ufshcd_get_variant(hba);
+ 
+       pm_runtime_get_sync(&(pdev)->dev);
+-      ufshcd_remove(hba);
++      ufshcd_pltfrm_remove(pdev);
+       if (host->esi_enabled)
+               platform_device_msi_free_irqs_all(hba->dev);
+ }
+diff --git a/drivers/ufs/host/ufs-renesas.c b/drivers/ufs/host/ufs-renesas.c
+index 3ff97112e1f6d..21a64b34397d8 100644
+--- a/drivers/ufs/host/ufs-renesas.c
++++ b/drivers/ufs/host/ufs-renesas.c
+@@ -397,9 +397,7 @@ static int ufs_renesas_probe(struct platform_device *pdev)
+ 
+ static void ufs_renesas_remove(struct platform_device *pdev)
+ {
+-      struct ufs_hba *hba = platform_get_drvdata(pdev);
+-
+-      ufshcd_remove(hba);
++      ufshcd_pltfrm_remove(pdev);
+ }
+ 
+ static struct platform_driver ufs_renesas_platform = {
+diff --git a/drivers/ufs/host/ufs-sprd.c b/drivers/ufs/host/ufs-sprd.c
+index d8b165908809d..e455890cf7d49 100644
+--- a/drivers/ufs/host/ufs-sprd.c
++++ b/drivers/ufs/host/ufs-sprd.c
+@@ -427,10 +427,8 @@ static int ufs_sprd_probe(struct platform_device *pdev)
+ 
+ static void ufs_sprd_remove(struct platform_device *pdev)
+ {
+-      struct ufs_hba *hba =  platform_get_drvdata(pdev);
+-
+       pm_runtime_get_sync(&(pdev)->dev);
+-      ufshcd_remove(hba);
++      ufshcd_pltfrm_remove(pdev);
+ }
+ 
+ static const struct dev_pm_ops ufs_sprd_pm_ops = {
+diff --git a/drivers/ufs/host/ufshcd-pltfrm.c b/drivers/ufs/host/ufshcd-pltfrm.c
+index 797a4dfe45d90..0dc8651eabc22 100644
+--- a/drivers/ufs/host/ufshcd-pltfrm.c
++++ b/drivers/ufs/host/ufshcd-pltfrm.c
+@@ -392,6 +392,19 @@ int ufshcd_pltfrm_init(struct platform_device *pdev,
+ }
+ EXPORT_SYMBOL_GPL(ufshcd_pltfrm_init);
+ 
++/**
++ * ufshcd_pltfrm_remove - Remove ufshcd platform
++ * @pdev: pointer to Platform device handle
++ */
++void ufshcd_pltfrm_remove(struct platform_device *pdev)
++{
++      struct ufs_hba *hba =  platform_get_drvdata(pdev);
++
++      ufshcd_remove(hba);
++      pm_runtime_disable(&pdev->dev);
++}
++EXPORT_SYMBOL_GPL(ufshcd_pltfrm_remove);
++
+ MODULE_AUTHOR("Santosh Yaragnavi <santosh.sy@samsung.com>");
+ MODULE_AUTHOR("Vinayak Holikatti <h.vinayak@samsung.com>");
+ MODULE_DESCRIPTION("UFS host controller Platform bus based glue driver");
+diff --git a/drivers/ufs/host/ufshcd-pltfrm.h b/drivers/ufs/host/ufshcd-pltfrm.h
+index 2df108f4ac131..1cfc4f8ea07eb 100644
+--- a/drivers/ufs/host/ufshcd-pltfrm.h
++++ b/drivers/ufs/host/ufshcd-pltfrm.h
+@@ -31,6 +31,7 @@ int ufshcd_get_pwr_dev_param(const struct ufs_dev_params *dev_param,
+ void ufshcd_init_pwr_dev_param(struct ufs_dev_params *dev_param);
+ int ufshcd_pltfrm_init(struct platform_device *pdev,
+                      const struct ufs_hba_variant_ops *vops);
++void ufshcd_pltfrm_remove(struct platform_device *pdev);
+ int ufshcd_populate_vreg(struct device *dev, const char *name,
+                        struct ufs_vreg **out_vreg);
+ 
+-- 
+2.43.0
+
diff --git a/queue-6.6/scsi-ufs-pltfrm-drop-pm-runtime-reference-count-afte.patch b/queue-6.6/scsi-ufs-pltfrm-drop-pm-runtime-reference-count-afte.patch

new file mode 100644 (file)

index 0000000..e7659fb
--- /dev/null
+++ b/queue-6.6/scsi-ufs-pltfrm-drop-pm-runtime-reference-count-afte.patch
@@ -0,0 +1,115 @@
+From ea4799f2d7ae0a481aa95411f6f649d09f9f62f9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 11 Nov 2024 23:18:33 +0530
+Subject: scsi: ufs: pltfrm: Drop PM runtime reference count after
+ ufshcd_remove()
+
+From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+
+[ Upstream commit 1745dcdb7227102e16248a324c600b9121c8f6df ]
+
+During the remove stage of glue drivers, some of them are incrementing the
+reference count using pm_runtime_get_sync(), before removing the ufshcd
+using ufshcd_remove(). But they are not dropping that reference count after
+ufshcd_remove() to balance the refcount.
+
+So drop the reference count by calling pm_runtime_put_noidle() after
+ufshcd_remove(). Since the behavior is applicable to all glue drivers, move
+the PM handling to ufshcd_pltfrm_remove().
+
+Cc: stable@vger.kernel.org # 3.12
+Fixes: 62694735ca95 ("[SCSI] ufs: Add runtime PM support for UFS host controller driver")
+Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+Link: https://lore.kernel.org/r/20241111-ufs_bug_fix-v1-4-45ad8b62f02e@linaro.org
+Reviewed-by: Peter Wang <peter.wang@mediatek.com>
+Reviewed-by: Bean Huo <beanhuo@micron.com>
+Reviewed-by: Bart Van Assche <bvanassche@acm.org>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/ufs/host/tc-dwc-g210-pltfrm.c | 1 -
+ drivers/ufs/host/ufs-exynos.c         | 1 -
+ drivers/ufs/host/ufs-mediatek.c       | 1 -
+ drivers/ufs/host/ufs-qcom.c           | 1 -
+ drivers/ufs/host/ufs-sprd.c           | 1 -
+ drivers/ufs/host/ufshcd-pltfrm.c      | 2 ++
+ 6 files changed, 2 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/ufs/host/tc-dwc-g210-pltfrm.c b/drivers/ufs/host/tc-dwc-g210-pltfrm.c
+index 113e0ef7b2cf8..c6f8565ede21a 100644
+--- a/drivers/ufs/host/tc-dwc-g210-pltfrm.c
++++ b/drivers/ufs/host/tc-dwc-g210-pltfrm.c
+@@ -76,7 +76,6 @@ static int tc_dwc_g210_pltfm_probe(struct platform_device *pdev)
+  */
+ static void tc_dwc_g210_pltfm_remove(struct platform_device *pdev)
+ {
+-      pm_runtime_get_sync(&(pdev)->dev);
+       ufshcd_pltfrm_remove(pdev);
+ }
+ 
+diff --git a/drivers/ufs/host/ufs-exynos.c b/drivers/ufs/host/ufs-exynos.c
+index 1cca797a00ba0..4418c497a6d71 100644
+--- a/drivers/ufs/host/ufs-exynos.c
++++ b/drivers/ufs/host/ufs-exynos.c
+@@ -1610,7 +1610,6 @@ static void exynos_ufs_remove(struct platform_device *pdev)
+       struct ufs_hba *hba =  platform_get_drvdata(pdev);
+       struct exynos_ufs *ufs = ufshcd_get_variant(hba);
+ 
+-      pm_runtime_get_sync(&(pdev)->dev);
+       ufshcd_pltfrm_remove(pdev);
+ 
+       phy_power_off(ufs->phy);
+diff --git a/drivers/ufs/host/ufs-mediatek.c b/drivers/ufs/host/ufs-mediatek.c
+index 49f63b13a040d..64d85e63b7501 100644
+--- a/drivers/ufs/host/ufs-mediatek.c
++++ b/drivers/ufs/host/ufs-mediatek.c
+@@ -1750,7 +1750,6 @@ static int ufs_mtk_probe(struct platform_device *pdev)
+  */
+ static void ufs_mtk_remove(struct platform_device *pdev)
+ {
+-      pm_runtime_get_sync(&(pdev)->dev);
+       ufshcd_pltfrm_remove(pdev);
+ }
+ 
+diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c
+index 762b3aa19f31d..0dc4c14e9f35c 100644
+--- a/drivers/ufs/host/ufs-qcom.c
++++ b/drivers/ufs/host/ufs-qcom.c
+@@ -1925,7 +1925,6 @@ static void ufs_qcom_remove(struct platform_device *pdev)
+       struct ufs_hba *hba =  platform_get_drvdata(pdev);
+       struct ufs_qcom_host *host = ufshcd_get_variant(hba);
+ 
+-      pm_runtime_get_sync(&(pdev)->dev);
+       ufshcd_pltfrm_remove(pdev);
+       if (host->esi_enabled)
+               platform_device_msi_free_irqs_all(hba->dev);
+diff --git a/drivers/ufs/host/ufs-sprd.c b/drivers/ufs/host/ufs-sprd.c
+index e455890cf7d49..d220978c2d8c8 100644
+--- a/drivers/ufs/host/ufs-sprd.c
++++ b/drivers/ufs/host/ufs-sprd.c
+@@ -427,7 +427,6 @@ static int ufs_sprd_probe(struct platform_device *pdev)
+ 
+ static void ufs_sprd_remove(struct platform_device *pdev)
+ {
+-      pm_runtime_get_sync(&(pdev)->dev);
+       ufshcd_pltfrm_remove(pdev);
+ }
+ 
+diff --git a/drivers/ufs/host/ufshcd-pltfrm.c b/drivers/ufs/host/ufshcd-pltfrm.c
+index 0dc8651eabc22..05836cb8b885b 100644
+--- a/drivers/ufs/host/ufshcd-pltfrm.c
++++ b/drivers/ufs/host/ufshcd-pltfrm.c
+@@ -400,8 +400,10 @@ void ufshcd_pltfrm_remove(struct platform_device *pdev)
+ {
+       struct ufs_hba *hba =  platform_get_drvdata(pdev);
+ 
++      pm_runtime_get_sync(&pdev->dev);
+       ufshcd_remove(hba);
+       pm_runtime_disable(&pdev->dev);
++      pm_runtime_put_noidle(&pdev->dev);
+ }
+ EXPORT_SYMBOL_GPL(ufshcd_pltfrm_remove);
+ 
+-- 
+2.43.0
+
diff --git a/queue-6.6/scsi-ufs-qcom-only-free-platform-msis-when-esi-is-en.patch b/queue-6.6/scsi-ufs-qcom-only-free-platform-msis-when-esi-is-en.patch

new file mode 100644 (file)

index 0000000..036edce
--- /dev/null
+++ b/queue-6.6/scsi-ufs-qcom-only-free-platform-msis-when-esi-is-en.patch
@@ -0,0 +1,65 @@
+From fa58fa5bb0bd735892f87767755f409f2e7dd7f9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 11 Nov 2024 23:18:31 +0530
+Subject: scsi: ufs: qcom: Only free platform MSIs when ESI is enabled
+
+From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+
+[ Upstream commit 64506b3d23a337e98a74b18dcb10c8619365f2bd ]
+
+Otherwise, it will result in a NULL pointer dereference as below:
+
+Unable to handle kernel NULL pointer dereference at virtual address 0000000000000008
+Call trace:
+ mutex_lock+0xc/0x54
+ platform_device_msi_free_irqs_all+0x14/0x20
+ ufs_qcom_remove+0x34/0x48 [ufs_qcom]
+ platform_remove+0x28/0x44
+ device_remove+0x4c/0x80
+ device_release_driver_internal+0xd8/0x178
+ driver_detach+0x50/0x9c
+ bus_remove_driver+0x6c/0xbc
+ driver_unregister+0x30/0x60
+ platform_driver_unregister+0x14/0x20
+ ufs_qcom_pltform_exit+0x18/0xb94 [ufs_qcom]
+ __arm64_sys_delete_module+0x180/0x260
+ invoke_syscall+0x44/0x100
+ el0_svc_common.constprop.0+0xc0/0xe0
+ do_el0_svc+0x1c/0x28
+ el0_svc+0x34/0xdc
+ el0t_64_sync_handler+0xc0/0xc4
+ el0t_64_sync+0x190/0x194
+
+Cc: stable@vger.kernel.org # 6.3
+Fixes: 519b6274a777 ("scsi: ufs: qcom: Add MCQ ESI config vendor specific ops")
+Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+Link: https://lore.kernel.org/r/20241111-ufs_bug_fix-v1-2-45ad8b62f02e@linaro.org
+Reviewed-by: Bean Huo <beanhuo@micron.com>
+Reviewed-by: Bart Van Assche <bvanassche@acm.org>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/ufs/host/ufs-qcom.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c
+index d77cfb2ab1acd..1e75368930d99 100644
+--- a/drivers/ufs/host/ufs-qcom.c
++++ b/drivers/ufs/host/ufs-qcom.c
+@@ -1923,10 +1923,12 @@ static int ufs_qcom_probe(struct platform_device *pdev)
+ static void ufs_qcom_remove(struct platform_device *pdev)
+ {
+       struct ufs_hba *hba =  platform_get_drvdata(pdev);
++      struct ufs_qcom_host *host = ufshcd_get_variant(hba);
+ 
+       pm_runtime_get_sync(&(pdev)->dev);
+       ufshcd_remove(hba);
+-      platform_device_msi_free_irqs_all(hba->dev);
++      if (host->esi_enabled)
++              platform_device_msi_free_irqs_all(hba->dev);
+ }
+ 
+ static const struct of_device_id ufs_qcom_of_match[] __maybe_unused = {
+-- 
+2.43.0
+
diff --git a/queue-6.6/series b/queue-6.6/series

index 93ae5eb8aa6a68126604dcacff8290f4bee3a28a..87c3111b75919bf838f09223c7f3ad6e5103b3c6 100644 (file)
--- a/queue-6.6/series
+++ b/queue-6.6/series
@@ -316,3 +316,15 @@ serial-8250_dw-add-sophgo-sg2044-quirk.patch
  smb-client-don-t-try-following-dfs-links-in-cifs_tre.patch
  setlocalversion-work-around-git-describe-performance.patch
  io_uring-tctx-work-around-xa_store-allocation-error-.patch
+scsi-ufs-convert-all-platform-drivers-to-return-void.patch
+genirq-irqdomain-add-domain_bus_device_msi.patch
+platform-msi-prepare-for-real-per-device-domains.patch
+irqchip-convert-all-platform-msi-users-to-the-new-ap.patch
+scsi-ufs-qcom-only-free-platform-msis-when-esi-is-en.patch
+scsi-ufs-pltfrm-disable-runtime-pm-during-removal-of.patch
+scsi-ufs-pltfrm-drop-pm-runtime-reference-count-afte.patch
+scsi-ufs-pltfrm-dellocate-hba-during-ufshcd_pltfrm_r.patch
+sched-numa-fix-mm-numa_scan_seq-based-unconditional-.patch
+sched-numa-fix-memory-leak-due-to-the-overwritten-vm.patch
+mempolicy-fix-migrate_pages-2-syscall-return-nr_fail.patch
+mm-mempolicy-fix-migrate_to_node-assuming-there-is-a.patch
author	Sasha Levin <sashal@kernel.org>
	Wed, 11 Dec 2024 18:37:02 +0000 (13:37 -0500)
committer	Sasha Levin <sashal@kernel.org>
	Wed, 11 Dec 2024 18:37:02 +0000 (13:37 -0500)
queue-6.6/genirq-irqdomain-add-domain_bus_device_msi.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/irqchip-convert-all-platform-msi-users-to-the-new-ap.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/mempolicy-fix-migrate_pages-2-syscall-return-nr_fail.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/mm-mempolicy-fix-migrate_to_node-assuming-there-is-a.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/platform-msi-prepare-for-real-per-device-domains.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/sched-numa-fix-memory-leak-due-to-the-overwritten-vm.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/sched-numa-fix-mm-numa_scan_seq-based-unconditional-.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/scsi-ufs-convert-all-platform-drivers-to-return-void.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/scsi-ufs-pltfrm-dellocate-hba-during-ufshcd_pltfrm_r.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/scsi-ufs-pltfrm-disable-runtime-pm-during-removal-of.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/scsi-ufs-pltfrm-drop-pm-runtime-reference-count-afte.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/scsi-ufs-qcom-only-free-platform-msis-when-esi-is-en.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/series		patch \| blob \| blame \| history