--- /dev/null
+From 7363411fc5cfb9051d60ae14cc34df16e311ac84 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 27 Oct 2023 20:13:23 -0700
+Subject: cxl/port: Fix delete_endpoint() vs parent unregistration race
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+[ Upstream commit 8d2ad999ca3c64cb08cf6a58d227b9d9e746d708 ]
+
+The CXL subsystem, at cxl_mem ->probe() time, establishes a lineage of
+ports (struct cxl_port objects) between an endpoint and the root of a
+CXL topology. Each port including the endpoint port is attached to the
+cxl_port driver.
+
+Given that setup, it follows that when either any port in that lineage
+goes through a cxl_port ->remove() event, or the memdev goes through a
+cxl_mem ->remove() event. The hierarchy below the removed port, or the
+entire hierarchy if the memdev is removed needs to come down.
+
+The delete_endpoint() callback is careful to check whether it is being
+called to tear down the hierarchy, or if it is only being called to
+teardown the memdev because an ancestor port is going through
+->remove().
+
+That care needs to take the device_lock() of the endpoint's parent.
+Which requires 2 bugs to be fixed:
+
+1/ A reference on the parent is needed to prevent use-after-free
+ scenarios like this signature:
+
+ BUG: spinlock bad magic on CPU#0, kworker/u56:0/11
+ Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS edk2-20230524-3.fc38 05/24/2023
+ Workqueue: cxl_port detach_memdev [cxl_core]
+ RIP: 0010:spin_bug+0x65/0xa0
+ Call Trace:
+ do_raw_spin_lock+0x69/0xa0
+ __mutex_lock+0x695/0xb80
+ delete_endpoint+0xad/0x150 [cxl_core]
+ devres_release_all+0xb8/0x110
+ device_unbind_cleanup+0xe/0x70
+ device_release_driver_internal+0x1d2/0x210
+ detach_memdev+0x15/0x20 [cxl_core]
+ process_one_work+0x1e3/0x4c0
+ worker_thread+0x1dd/0x3d0
+
+2/ In the case of RCH topologies, the parent device that needs to be
+ locked is not always @port->dev as returned by cxl_mem_find_port(), use
+ endpoint->dev.parent instead.
+
+Fixes: 8dd2bc0f8e02 ("cxl/mem: Add the cxl_mem driver")
+Cc: <stable@vger.kernel.org>
+Reported-by: Robert Richter <rrichter@amd.com>
+Closes: http://lore.kernel.org/r/20231018171713.1883517-2-rrichter@amd.com
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/cxl/core/port.c | 34 +++++++++++++++++++---------------
+ 1 file changed, 19 insertions(+), 15 deletions(-)
+
+diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
+index 2c6001592fe20..6a75a3cb601ec 100644
+--- a/drivers/cxl/core/port.c
++++ b/drivers/cxl/core/port.c
+@@ -1242,35 +1242,39 @@ static struct device *grandparent(struct device *dev)
+ return NULL;
+ }
+
++static struct device *endpoint_host(struct cxl_port *endpoint)
++{
++ struct cxl_port *port = to_cxl_port(endpoint->dev.parent);
++
++ if (is_cxl_root(port))
++ return port->uport_dev;
++ return &port->dev;
++}
++
+ static void delete_endpoint(void *data)
+ {
+ struct cxl_memdev *cxlmd = data;
+ struct cxl_port *endpoint = cxlmd->endpoint;
+- struct cxl_port *parent_port;
+- struct device *parent;
+-
+- parent_port = cxl_mem_find_port(cxlmd, NULL);
+- if (!parent_port)
+- goto out;
+- parent = &parent_port->dev;
++ struct device *host = endpoint_host(endpoint);
+
+- device_lock(parent);
+- if (parent->driver && !endpoint->dead) {
+- devm_release_action(parent, cxl_unlink_parent_dport, endpoint);
+- devm_release_action(parent, cxl_unlink_uport, endpoint);
+- devm_release_action(parent, unregister_port, endpoint);
++ device_lock(host);
++ if (host->driver && !endpoint->dead) {
++ devm_release_action(host, cxl_unlink_parent_dport, endpoint);
++ devm_release_action(host, cxl_unlink_uport, endpoint);
++ devm_release_action(host, unregister_port, endpoint);
+ }
+ cxlmd->endpoint = NULL;
+- device_unlock(parent);
+- put_device(parent);
+-out:
++ device_unlock(host);
+ put_device(&endpoint->dev);
++ put_device(host);
+ }
+
+ int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint)
+ {
++ struct device *host = endpoint_host(endpoint);
+ struct device *dev = &cxlmd->dev;
+
++ get_device(host);
+ get_device(&endpoint->dev);
+ cxlmd->endpoint = endpoint;
+ cxlmd->depth = endpoint->depth;
+--
+2.42.0
+
--- /dev/null
+From f7a3527053cf716221efb6de989e670c6a185b21 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 26 Oct 2023 10:09:06 -0700
+Subject: cxl/region: Fix x1 root-decoder granularity calculations
+
+From: Jim Harris <jim.harris@samsung.com>
+
+[ Upstream commit 98a04c7aced2b43b3ac4befe216c4eecc7257d4b ]
+
+Root decoder granularity must match value from CFWMS, which may not
+be the region's granularity for non-interleaved root decoders.
+
+So when calculating granularities for host bridge decoders, use the
+region's granularity instead of the root decoder's granularity to ensure
+the correct granularities are set for the host bridge decoders and any
+downstream switch decoders.
+
+Test configuration is 1 host bridge * 2 switches * 2 endpoints per switch.
+
+Region created with 2048 granularity using following command line:
+
+cxl create-region -m -d decoder0.0 -w 4 mem0 mem2 mem1 mem3 \
+ -g 2048 -s 2048M
+
+Use "cxl list -PDE | grep granularity" to get a view of the granularity
+set at each level of the topology.
+
+Before this patch:
+ "interleave_granularity":2048,
+ "interleave_granularity":2048,
+ "interleave_granularity":512,
+ "interleave_granularity":2048,
+ "interleave_granularity":2048,
+ "interleave_granularity":512,
+"interleave_granularity":256,
+
+After:
+ "interleave_granularity":2048,
+ "interleave_granularity":2048,
+ "interleave_granularity":4096,
+ "interleave_granularity":2048,
+ "interleave_granularity":2048,
+ "interleave_granularity":4096,
+"interleave_granularity":2048,
+
+Fixes: 27b3f8d13830 ("cxl/region: Program target lists")
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Jim Harris <jim.harris@samsung.com>
+Link: https://lore.kernel.org/r/169824893473.1403938.16110924262989774582.stgit@bgt-140510-bm03.eng.stellus.in
+[djbw: fixup the prebuilt cxl_test region]
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/cxl/core/region.c | 9 ++++++++-
+ tools/testing/cxl/test/cxl.c | 2 +-
+ 2 files changed, 9 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
+index ba30394582763..c7e70ccdb9ef0 100644
+--- a/drivers/cxl/core/region.c
++++ b/drivers/cxl/core/region.c
+@@ -1127,7 +1127,14 @@ static int cxl_port_setup_targets(struct cxl_port *port,
+ }
+
+ if (is_cxl_root(parent_port)) {
+- parent_ig = cxlrd->cxlsd.cxld.interleave_granularity;
++ /*
++ * Root decoder IG is always set to value in CFMWS which
++ * may be different than this region's IG. We can use the
++ * region's IG here since interleave_granularity_store()
++ * does not allow interleaved host-bridges with
++ * root IG != region IG.
++ */
++ parent_ig = p->interleave_granularity;
+ parent_iw = cxlrd->cxlsd.cxld.interleave_ways;
+ /*
+ * For purposes of address bit routing, use power-of-2 math for
+diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c
+index fb6ab9cef84f7..b885462999022 100644
+--- a/tools/testing/cxl/test/cxl.c
++++ b/tools/testing/cxl/test/cxl.c
+@@ -831,7 +831,7 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld)
+ cxld->interleave_ways = 2;
+ else
+ cxld->interleave_ways = 1;
+- cxld->interleave_granularity = 256;
++ cxld->interleave_granularity = 4096;
+ cxld->hpa_range = (struct range) {
+ .start = base,
+ .end = base + size - 1,
+--
+2.42.0
+
--- /dev/null
+From e758d76b8656257c2b14cc6d93f3b7187f7041b9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 28 Jun 2023 17:53:04 -0400
+Subject: mm/hugetlb: prepare hugetlb_follow_page_mask() for FOLL_PIN
+
+From: Peter Xu <peterx@redhat.com>
+
+[ Upstream commit 458568c92953dee3716234711f1a2830a35261f3 ]
+
+follow_page() doesn't use FOLL_PIN, meanwhile hugetlb seems to not be the
+target of FOLL_WRITE either. However add the checks.
+
+Namely, either the need to CoW due to missing write bit, or proper
+unsharing on !AnonExclusive pages over R/O pins to reject the follow page.
+That brings this function closer to follow_hugetlb_page().
+
+So we don't care before, and also for now. But we'll care if we switch
+over slow-gup to use hugetlb_follow_page_mask(). We'll also care when to
+return -EMLINK properly, as that's the gup internal api to mean "we should
+unshare". Not really needed for follow page path, though.
+
+When at it, switching the try_grab_page() to use WARN_ON_ONCE(), to be
+clear that it just should never fail. When error happens, instead of
+setting page==NULL, capture the errno instead.
+
+Link: https://lkml.kernel.org/r/20230628215310.73782-3-peterx@redhat.com
+Signed-off-by: Peter Xu <peterx@redhat.com>
+Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
+Reviewed-by: David Hildenbrand <david@redhat.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: James Houghton <jthoughton@google.com>
+Cc: Jason Gunthorpe <jgg@nvidia.com>
+Cc: John Hubbard <jhubbard@nvidia.com>
+Cc: Kirill A . Shutemov <kirill@shutemov.name>
+Cc: Lorenzo Stoakes <lstoakes@gmail.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Mike Rapoport (IBM) <rppt@kernel.org>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Yang Shi <shy828301@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 426056efe835 ("mm/hugetlb: use nth_page() in place of direct struct page manipulation")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/hugetlb.c | 33 ++++++++++++++++++++++-----------
+ 1 file changed, 22 insertions(+), 11 deletions(-)
+
+diff --git a/mm/hugetlb.c b/mm/hugetlb.c
+index 097b81c37597e..d231f23088a77 100644
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -6521,13 +6521,7 @@ struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma,
+ struct page *page = NULL;
+ spinlock_t *ptl;
+ pte_t *pte, entry;
+-
+- /*
+- * FOLL_PIN is not supported for follow_page(). Ordinary GUP goes via
+- * follow_hugetlb_page().
+- */
+- if (WARN_ON_ONCE(flags & FOLL_PIN))
+- return NULL;
++ int ret;
+
+ hugetlb_vma_lock_read(vma);
+ pte = hugetlb_walk(vma, haddr, huge_page_size(h));
+@@ -6537,8 +6531,23 @@ struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma,
+ ptl = huge_pte_lock(h, mm, pte);
+ entry = huge_ptep_get(pte);
+ if (pte_present(entry)) {
+- page = pte_page(entry) +
+- ((address & ~huge_page_mask(h)) >> PAGE_SHIFT);
++ page = pte_page(entry);
++
++ if (!huge_pte_write(entry)) {
++ if (flags & FOLL_WRITE) {
++ page = NULL;
++ goto out;
++ }
++
++ if (gup_must_unshare(vma, flags, page)) {
++ /* Tell the caller to do unsharing */
++ page = ERR_PTR(-EMLINK);
++ goto out;
++ }
++ }
++
++ page += ((address & ~huge_page_mask(h)) >> PAGE_SHIFT);
++
+ /*
+ * Note that page may be a sub-page, and with vmemmap
+ * optimizations the page struct may be read only.
+@@ -6548,8 +6557,10 @@ struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma,
+ * try_grab_page() should always be able to get the page here,
+ * because we hold the ptl lock and have verified pte_present().
+ */
+- if (try_grab_page(page, flags)) {
+- page = NULL;
++ ret = try_grab_page(page, flags);
++
++ if (WARN_ON_ONCE(ret)) {
++ page = ERR_PTR(ret);
+ goto out;
+ }
+ }
+--
+2.42.0
+
--- /dev/null
+From 3bae2acb6a241b8001b97a2b1d6f2a47808b7217 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Sep 2023 16:12:45 -0400
+Subject: mm/hugetlb: use nth_page() in place of direct struct page
+ manipulation
+
+From: Zi Yan <ziy@nvidia.com>
+
+[ Upstream commit 426056efe835cf4864ccf4c328fe3af9146fc539 ]
+
+When dealing with hugetlb pages, manipulating struct page pointers
+directly can get to wrong struct page, since struct page is not guaranteed
+to be contiguous on SPARSEMEM without VMEMMAP. Use nth_page() to handle
+it properly.
+
+A wrong or non-existing page might be tried to be grabbed, either
+leading to a non freeable page or kernel memory access errors. No bug
+is reported. It comes from code inspection.
+
+Link: https://lkml.kernel.org/r/20230913201248.452081-3-zi.yan@sent.com
+Fixes: 57a196a58421 ("hugetlb: simplify hugetlb handling in follow_page_mask")
+Signed-off-by: Zi Yan <ziy@nvidia.com>
+Reviewed-by: Muchun Song <songmuchun@bytedance.com>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
+Cc: Mike Kravetz <mike.kravetz@oracle.com>
+Cc: Mike Rapoport (IBM) <rppt@kernel.org>
+Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/hugetlb.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/mm/hugetlb.c b/mm/hugetlb.c
+index d231f23088a77..9951fb7412cc7 100644
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -6546,7 +6546,7 @@ struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma,
+ }
+ }
+
+- page += ((address & ~huge_page_mask(h)) >> PAGE_SHIFT);
++ page = nth_page(page, ((address & ~huge_page_mask(h)) >> PAGE_SHIFT));
+
+ /*
+ * Note that page may be a sub-page, and with vmemmap
+--
+2.42.0
+
--- /dev/null
+From b0dc903a6b9bcabc68b1cb6580c510cb7ab93b38 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 15 Aug 2023 10:19:56 -0700
+Subject: of: dynamic: Add interfaces for creating device node dynamically
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Lizhi Hou <lizhi.hou@amd.com>
+
+[ Upstream commit b544fc2b8606d718d0cc788ff2ea2492871df488 ]
+
+of_changeset_create_node() creates device node dynamically and attaches
+the newly created node to a changeset.
+
+Expand of_changeset APIs to handle specific types of properties.
+ of_changeset_add_prop_string()
+ of_changeset_add_prop_string_array()
+ of_changeset_add_prop_u32_array()
+
+Signed-off-by: Clément Léger <clement.leger@bootlin.com>
+Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
+Link: https://lore.kernel.org/r/1692120000-46900-2-git-send-email-lizhi.hou@amd.com
+Signed-off-by: Rob Herring <robh@kernel.org>
+Stable-dep-of: c9260693aa0c ("PCI: Lengthen reset delay for VideoPropulsion Torrent QN16e card")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/of/dynamic.c | 164 ++++++++++++++++++++++++++++++++++++++++++
+ drivers/of/unittest.c | 19 ++++-
+ include/linux/of.h | 23 ++++++
+ 3 files changed, 205 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c
+index f7bb73cf821e6..cdad63ecb9023 100644
+--- a/drivers/of/dynamic.c
++++ b/drivers/of/dynamic.c
+@@ -486,6 +486,38 @@ struct device_node *__of_node_dup(const struct device_node *np,
+ return NULL;
+ }
+
++/**
++ * of_changeset_create_node - Dynamically create a device node and attach to
++ * a given changeset.
++ *
++ * @ocs: Pointer to changeset
++ * @parent: Pointer to parent device node
++ * @full_name: Node full name
++ *
++ * Return: Pointer to the created device node or NULL in case of an error.
++ */
++struct device_node *of_changeset_create_node(struct of_changeset *ocs,
++ struct device_node *parent,
++ const char *full_name)
++{
++ struct device_node *np;
++ int ret;
++
++ np = __of_node_dup(NULL, full_name);
++ if (!np)
++ return NULL;
++ np->parent = parent;
++
++ ret = of_changeset_attach_node(ocs, np);
++ if (ret) {
++ of_node_put(np);
++ return NULL;
++ }
++
++ return np;
++}
++EXPORT_SYMBOL(of_changeset_create_node);
++
+ static void __of_changeset_entry_destroy(struct of_changeset_entry *ce)
+ {
+ if (ce->action == OF_RECONFIG_ATTACH_NODE &&
+@@ -947,3 +979,135 @@ int of_changeset_action(struct of_changeset *ocs, unsigned long action,
+ return 0;
+ }
+ EXPORT_SYMBOL_GPL(of_changeset_action);
++
++static int of_changeset_add_prop_helper(struct of_changeset *ocs,
++ struct device_node *np,
++ const struct property *pp)
++{
++ struct property *new_pp;
++ int ret;
++
++ new_pp = __of_prop_dup(pp, GFP_KERNEL);
++ if (!new_pp)
++ return -ENOMEM;
++
++ ret = of_changeset_add_property(ocs, np, new_pp);
++ if (ret) {
++ kfree(new_pp->name);
++ kfree(new_pp->value);
++ kfree(new_pp);
++ }
++
++ return ret;
++}
++
++/**
++ * of_changeset_add_prop_string - Add a string property to a changeset
++ *
++ * @ocs: changeset pointer
++ * @np: device node pointer
++ * @prop_name: name of the property to be added
++ * @str: pointer to null terminated string
++ *
++ * Create a string property and add it to a changeset.
++ *
++ * Return: 0 on success, a negative error value in case of an error.
++ */
++int of_changeset_add_prop_string(struct of_changeset *ocs,
++ struct device_node *np,
++ const char *prop_name, const char *str)
++{
++ struct property prop;
++
++ prop.name = (char *)prop_name;
++ prop.length = strlen(str) + 1;
++ prop.value = (void *)str;
++
++ return of_changeset_add_prop_helper(ocs, np, &prop);
++}
++EXPORT_SYMBOL_GPL(of_changeset_add_prop_string);
++
++/**
++ * of_changeset_add_prop_string_array - Add a string list property to
++ * a changeset
++ *
++ * @ocs: changeset pointer
++ * @np: device node pointer
++ * @prop_name: name of the property to be added
++ * @str_array: pointer to an array of null terminated strings
++ * @sz: number of string array elements
++ *
++ * Create a string list property and add it to a changeset.
++ *
++ * Return: 0 on success, a negative error value in case of an error.
++ */
++int of_changeset_add_prop_string_array(struct of_changeset *ocs,
++ struct device_node *np,
++ const char *prop_name,
++ const char **str_array, size_t sz)
++{
++ struct property prop;
++ int i, ret;
++ char *vp;
++
++ prop.name = (char *)prop_name;
++
++ prop.length = 0;
++ for (i = 0; i < sz; i++)
++ prop.length += strlen(str_array[i]) + 1;
++
++ prop.value = kmalloc(prop.length, GFP_KERNEL);
++ if (!prop.value)
++ return -ENOMEM;
++
++ vp = prop.value;
++ for (i = 0; i < sz; i++) {
++ vp += snprintf(vp, (char *)prop.value + prop.length - vp, "%s",
++ str_array[i]) + 1;
++ }
++ ret = of_changeset_add_prop_helper(ocs, np, &prop);
++ kfree(prop.value);
++
++ return ret;
++}
++EXPORT_SYMBOL_GPL(of_changeset_add_prop_string_array);
++
++/**
++ * of_changeset_add_prop_u32_array - Add a property of 32 bit integers
++ * property to a changeset
++ *
++ * @ocs: changeset pointer
++ * @np: device node pointer
++ * @prop_name: name of the property to be added
++ * @array: pointer to an array of 32 bit integers
++ * @sz: number of array elements
++ *
++ * Create a property of 32 bit integers and add it to a changeset.
++ *
++ * Return: 0 on success, a negative error value in case of an error.
++ */
++int of_changeset_add_prop_u32_array(struct of_changeset *ocs,
++ struct device_node *np,
++ const char *prop_name,
++ const u32 *array, size_t sz)
++{
++ struct property prop;
++ __be32 *val;
++ int i, ret;
++
++ val = kcalloc(sz, sizeof(__be32), GFP_KERNEL);
++ if (!val)
++ return -ENOMEM;
++
++ for (i = 0; i < sz; i++)
++ val[i] = cpu_to_be32(array[i]);
++ prop.name = (char *)prop_name;
++ prop.length = sizeof(u32) * sz;
++ prop.value = (void *)val;
++
++ ret = of_changeset_add_prop_helper(ocs, np, &prop);
++ kfree(val);
++
++ return ret;
++}
++EXPORT_SYMBOL_GPL(of_changeset_add_prop_u32_array);
+diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c
+index f6784cce8369b..68e58b085c3db 100644
+--- a/drivers/of/unittest.c
++++ b/drivers/of/unittest.c
+@@ -802,7 +802,9 @@ static void __init of_unittest_changeset(void)
+ struct property *ppname_n21, pname_n21 = { .name = "name", .length = 3, .value = "n21" };
+ struct property *ppupdate, pupdate = { .name = "prop-update", .length = 5, .value = "abcd" };
+ struct property *ppremove;
+- struct device_node *n1, *n2, *n21, *nchangeset, *nremove, *parent, *np;
++ struct device_node *n1, *n2, *n21, *n22, *nchangeset, *nremove, *parent, *np;
++ static const char * const str_array[] = { "str1", "str2", "str3" };
++ const u32 u32_array[] = { 1, 2, 3 };
+ struct of_changeset chgset;
+
+ n1 = __of_node_dup(NULL, "n1");
+@@ -857,6 +859,17 @@ static void __init of_unittest_changeset(void)
+ unittest(!of_changeset_add_property(&chgset, parent, ppadd), "fail add prop prop-add\n");
+ unittest(!of_changeset_update_property(&chgset, parent, ppupdate), "fail update prop\n");
+ unittest(!of_changeset_remove_property(&chgset, parent, ppremove), "fail remove prop\n");
++ n22 = of_changeset_create_node(&chgset, n2, "n22");
++ unittest(n22, "fail create n22\n");
++ unittest(!of_changeset_add_prop_string(&chgset, n22, "prop-str", "abcd"),
++ "fail add prop prop-str");
++ unittest(!of_changeset_add_prop_string_array(&chgset, n22, "prop-str-array",
++ (const char **)str_array,
++ ARRAY_SIZE(str_array)),
++ "fail add prop prop-str-array");
++ unittest(!of_changeset_add_prop_u32_array(&chgset, n22, "prop-u32-array",
++ u32_array, ARRAY_SIZE(u32_array)),
++ "fail add prop prop-u32-array");
+
+ unittest(!of_changeset_apply(&chgset), "apply failed\n");
+
+@@ -866,6 +879,9 @@ static void __init of_unittest_changeset(void)
+ unittest((np = of_find_node_by_path("/testcase-data/changeset/n2/n21")),
+ "'%pOF' not added\n", n21);
+ of_node_put(np);
++ unittest((np = of_find_node_by_path("/testcase-data/changeset/n2/n22")),
++ "'%pOF' not added\n", n22);
++ of_node_put(np);
+
+ unittest(!of_changeset_revert(&chgset), "revert failed\n");
+
+@@ -874,6 +890,7 @@ static void __init of_unittest_changeset(void)
+ of_node_put(n1);
+ of_node_put(n2);
+ of_node_put(n21);
++ of_node_put(n22);
+ #endif
+ }
+
+diff --git a/include/linux/of.h b/include/linux/of.h
+index 6ecde0515677d..9b82a6b0f3f55 100644
+--- a/include/linux/of.h
++++ b/include/linux/of.h
+@@ -1580,6 +1580,29 @@ static inline int of_changeset_update_property(struct of_changeset *ocs,
+ {
+ return of_changeset_action(ocs, OF_RECONFIG_UPDATE_PROPERTY, np, prop);
+ }
++
++struct device_node *of_changeset_create_node(struct of_changeset *ocs,
++ struct device_node *parent,
++ const char *full_name);
++int of_changeset_add_prop_string(struct of_changeset *ocs,
++ struct device_node *np,
++ const char *prop_name, const char *str);
++int of_changeset_add_prop_string_array(struct of_changeset *ocs,
++ struct device_node *np,
++ const char *prop_name,
++ const char **str_array, size_t sz);
++int of_changeset_add_prop_u32_array(struct of_changeset *ocs,
++ struct device_node *np,
++ const char *prop_name,
++ const u32 *array, size_t sz);
++static inline int of_changeset_add_prop_u32(struct of_changeset *ocs,
++ struct device_node *np,
++ const char *prop_name,
++ const u32 val)
++{
++ return of_changeset_add_prop_u32_array(ocs, np, prop_name, &val, 1);
++}
++
+ #else /* CONFIG_OF_DYNAMIC */
+ static inline int of_reconfig_notifier_register(struct notifier_block *nb)
+ {
+--
+2.42.0
+
--- /dev/null
+From f379b0f4937323cbd8bfc85811fb3e68ecfd64a0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 15 Aug 2023 10:19:59 -0700
+Subject: of: overlay: Extend of_overlay_fdt_apply() to specify the target node
+
+From: Lizhi Hou <lizhi.hou@amd.com>
+
+[ Upstream commit 47284862bfc7fd5672e731e827f43f26bdbd155c ]
+
+Currently, in an overlay fdt fragment, it needs to specify the exact
+location in base DT. In another word, when the fdt fragment is generated,
+the base DT location for the fragment is already known.
+
+There is new use case that the base DT location is unknown when fdt
+fragment is generated. For example, the add-on device provide a fdt
+overlay with its firmware to describe its downstream devices. Because it
+is add-on device which can be plugged to different systems, its firmware
+will not be able to know the overlay location in base DT. Instead, the
+device driver will load the overlay fdt and apply it to base DT at runtime.
+In this case, of_overlay_fdt_apply() needs to be extended to specify
+the target node for device driver to apply overlay fdt.
+ int overlay_fdt_apply(..., struct device_node *base);
+
+Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
+Link: https://lore.kernel.org/r/1692120000-46900-5-git-send-email-lizhi.hou@amd.com
+Signed-off-by: Rob Herring <robh@kernel.org>
+Stable-dep-of: c9260693aa0c ("PCI: Lengthen reset delay for VideoPropulsion Torrent QN16e card")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/of/overlay.c | 42 +++++++++++++++++++++++++++++++-----------
+ drivers/of/unittest.c | 3 ++-
+ include/linux/of.h | 2 +-
+ 3 files changed, 34 insertions(+), 13 deletions(-)
+
+diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c
+index 28b479afd506f..dfb6fb962fc70 100644
+--- a/drivers/of/overlay.c
++++ b/drivers/of/overlay.c
+@@ -682,9 +682,11 @@ static int build_changeset(struct overlay_changeset *ovcs)
+ * 1) "target" property containing the phandle of the target
+ * 2) "target-path" property containing the path of the target
+ */
+-static struct device_node *find_target(struct device_node *info_node)
++static struct device_node *find_target(struct device_node *info_node,
++ struct device_node *target_base)
+ {
+ struct device_node *node;
++ char *target_path;
+ const char *path;
+ u32 val;
+ int ret;
+@@ -700,10 +702,23 @@ static struct device_node *find_target(struct device_node *info_node)
+
+ ret = of_property_read_string(info_node, "target-path", &path);
+ if (!ret) {
+- node = of_find_node_by_path(path);
+- if (!node)
+- pr_err("find target, node: %pOF, path '%s' not found\n",
+- info_node, path);
++ if (target_base) {
++ target_path = kasprintf(GFP_KERNEL, "%pOF%s", target_base, path);
++ if (!target_path)
++ return NULL;
++ node = of_find_node_by_path(target_path);
++ if (!node) {
++ pr_err("find target, node: %pOF, path '%s' not found\n",
++ info_node, target_path);
++ }
++ kfree(target_path);
++ } else {
++ node = of_find_node_by_path(path);
++ if (!node) {
++ pr_err("find target, node: %pOF, path '%s' not found\n",
++ info_node, path);
++ }
++ }
+ return node;
+ }
+
+@@ -715,6 +730,7 @@ static struct device_node *find_target(struct device_node *info_node)
+ /**
+ * init_overlay_changeset() - initialize overlay changeset from overlay tree
+ * @ovcs: Overlay changeset to build
++ * @target_base: Point to the target node to apply overlay
+ *
+ * Initialize @ovcs. Populate @ovcs->fragments with node information from
+ * the top level of @overlay_root. The relevant top level nodes are the
+@@ -725,7 +741,8 @@ static struct device_node *find_target(struct device_node *info_node)
+ * detected in @overlay_root. On error return, the caller of
+ * init_overlay_changeset() must call free_overlay_changeset().
+ */
+-static int init_overlay_changeset(struct overlay_changeset *ovcs)
++static int init_overlay_changeset(struct overlay_changeset *ovcs,
++ struct device_node *target_base)
+ {
+ struct device_node *node, *overlay_node;
+ struct fragment *fragment;
+@@ -784,7 +801,7 @@ static int init_overlay_changeset(struct overlay_changeset *ovcs)
+
+ fragment = &fragments[cnt];
+ fragment->overlay = overlay_node;
+- fragment->target = find_target(node);
++ fragment->target = find_target(node, target_base);
+ if (!fragment->target) {
+ of_node_put(fragment->overlay);
+ ret = -EINVAL;
+@@ -875,6 +892,7 @@ static void free_overlay_changeset(struct overlay_changeset *ovcs)
+ *
+ * of_overlay_apply() - Create and apply an overlay changeset
+ * @ovcs: overlay changeset
++ * @base: point to the target node to apply overlay
+ *
+ * Creates and applies an overlay changeset.
+ *
+@@ -898,7 +916,8 @@ static void free_overlay_changeset(struct overlay_changeset *ovcs)
+ * the caller of of_overlay_apply() must call free_overlay_changeset().
+ */
+
+-static int of_overlay_apply(struct overlay_changeset *ovcs)
++static int of_overlay_apply(struct overlay_changeset *ovcs,
++ struct device_node *base)
+ {
+ int ret = 0, ret_revert, ret_tmp;
+
+@@ -906,7 +925,7 @@ static int of_overlay_apply(struct overlay_changeset *ovcs)
+ if (ret)
+ goto out;
+
+- ret = init_overlay_changeset(ovcs);
++ ret = init_overlay_changeset(ovcs, base);
+ if (ret)
+ goto out;
+
+@@ -950,6 +969,7 @@ static int of_overlay_apply(struct overlay_changeset *ovcs)
+ * @overlay_fdt: pointer to overlay FDT
+ * @overlay_fdt_size: number of bytes in @overlay_fdt
+ * @ret_ovcs_id: pointer for returning created changeset id
++ * @base: pointer for the target node to apply overlay
+ *
+ * Creates and applies an overlay changeset.
+ *
+@@ -965,7 +985,7 @@ static int of_overlay_apply(struct overlay_changeset *ovcs)
+ */
+
+ int of_overlay_fdt_apply(const void *overlay_fdt, u32 overlay_fdt_size,
+- int *ret_ovcs_id)
++ int *ret_ovcs_id, struct device_node *base)
+ {
+ void *new_fdt;
+ void *new_fdt_align;
+@@ -1036,7 +1056,7 @@ int of_overlay_fdt_apply(const void *overlay_fdt, u32 overlay_fdt_size,
+ }
+ ovcs->overlay_mem = overlay_mem;
+
+- ret = of_overlay_apply(ovcs);
++ ret = of_overlay_apply(ovcs, base);
+ /*
+ * If of_overlay_apply() error, calling free_overlay_changeset() may
+ * result in a memory leak if the apply partly succeeded, so do NOT
+diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c
+index 68e58b085c3db..c812a0175d1d3 100644
+--- a/drivers/of/unittest.c
++++ b/drivers/of/unittest.c
+@@ -3480,7 +3480,8 @@ static int __init overlay_data_apply(const char *overlay_name, int *ovcs_id)
+ if (!size)
+ pr_err("no overlay data for %s\n", overlay_name);
+
+- ret = of_overlay_fdt_apply(info->dtbo_begin, size, &info->ovcs_id);
++ ret = of_overlay_fdt_apply(info->dtbo_begin, size, &info->ovcs_id,
++ NULL);
+ if (ovcs_id)
+ *ovcs_id = info->ovcs_id;
+ if (ret < 0)
+diff --git a/include/linux/of.h b/include/linux/of.h
+index 9b82a6b0f3f55..722140b2e8abd 100644
+--- a/include/linux/of.h
++++ b/include/linux/of.h
+@@ -1668,7 +1668,7 @@ struct of_overlay_notify_data {
+ #ifdef CONFIG_OF_OVERLAY
+
+ int of_overlay_fdt_apply(const void *overlay_fdt, u32 overlay_fdt_size,
+- int *ovcs_id);
++ int *ovcs_id, struct device_node *target_base);
+ int of_overlay_remove(int *ovcs_id);
+ int of_overlay_remove_all(void);
+
+--
+2.42.0
+
--- /dev/null
+From 56b0b554be0b5d4fb8148a4071bf9eaa48bed3c8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 15 Aug 2023 10:20:00 -0700
+Subject: of: unittest: Add pci_dt_testdrv pci driver
+
+From: Lizhi Hou <lizhi.hou@amd.com>
+
+[ Upstream commit 26409dd045892904b059dc411403e9c8ce7543ca ]
+
+pci_dt_testdrv is bound to QEMU PCI Test Device. It reads
+overlay_pci_node fdt fragment and apply it to Test Device. Then it
+calls of_platform_default_populate() to populate the platform
+devices.
+
+Tested-by: Herve Codina <herve.codina@bootlin.com>
+Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
+Link: https://lore.kernel.org/r/1692120000-46900-6-git-send-email-lizhi.hou@amd.com
+Signed-off-by: Rob Herring <robh@kernel.org>
+Stable-dep-of: c9260693aa0c ("PCI: Lengthen reset delay for VideoPropulsion Torrent QN16e card")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/of/unittest-data/Makefile | 3 +-
+ .../of/unittest-data/overlay_pci_node.dtso | 22 ++
+ drivers/of/unittest.c | 189 ++++++++++++++++++
+ drivers/pci/quirks.c | 1 +
+ 4 files changed, 214 insertions(+), 1 deletion(-)
+ create mode 100644 drivers/of/unittest-data/overlay_pci_node.dtso
+
+diff --git a/drivers/of/unittest-data/Makefile b/drivers/of/unittest-data/Makefile
+index ea5f4da68e23a..1aa8750881598 100644
+--- a/drivers/of/unittest-data/Makefile
++++ b/drivers/of/unittest-data/Makefile
+@@ -32,7 +32,8 @@ obj-$(CONFIG_OF_OVERLAY) += overlay.dtbo.o \
+ overlay_gpio_02b.dtbo.o \
+ overlay_gpio_03.dtbo.o \
+ overlay_gpio_04a.dtbo.o \
+- overlay_gpio_04b.dtbo.o
++ overlay_gpio_04b.dtbo.o \
++ overlay_pci_node.dtbo.o
+
+ # enable creation of __symbols__ node
+ DTC_FLAGS_overlay += -@
+diff --git a/drivers/of/unittest-data/overlay_pci_node.dtso b/drivers/of/unittest-data/overlay_pci_node.dtso
+new file mode 100644
+index 0000000000000..c05e52e9e44a9
+--- /dev/null
++++ b/drivers/of/unittest-data/overlay_pci_node.dtso
+@@ -0,0 +1,22 @@
++// SPDX-License-Identifier: GPL-2.0
++/dts-v1/;
++/ {
++ fragment@0 {
++ target-path="";
++ __overlay__ {
++ #address-cells = <3>;
++ #size-cells = <2>;
++ pci-ep-bus@0 {
++ compatible = "simple-bus";
++ #address-cells = <1>;
++ #size-cells = <1>;
++ ranges = <0x0 0x0 0x0 0x0 0x1000>;
++ reg = <0 0 0 0 0>;
++ unittest-pci@100 {
++ compatible = "unittest-pci";
++ reg = <0x100 0x200>;
++ };
++ };
++ };
++ };
++};
+diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c
+index c812a0175d1d3..f5faabf320ec9 100644
+--- a/drivers/of/unittest.c
++++ b/drivers/of/unittest.c
+@@ -22,6 +22,7 @@
+ #include <linux/slab.h>
+ #include <linux/device.h>
+ #include <linux/platform_device.h>
++#include <linux/pci.h>
+ #include <linux/kernel.h>
+
+ #include <linux/i2c.h>
+@@ -3326,6 +3327,7 @@ OVERLAY_INFO_EXTERN(overlay_gpio_02b);
+ OVERLAY_INFO_EXTERN(overlay_gpio_03);
+ OVERLAY_INFO_EXTERN(overlay_gpio_04a);
+ OVERLAY_INFO_EXTERN(overlay_gpio_04b);
++OVERLAY_INFO_EXTERN(overlay_pci_node);
+ OVERLAY_INFO_EXTERN(overlay_bad_add_dup_node);
+ OVERLAY_INFO_EXTERN(overlay_bad_add_dup_prop);
+ OVERLAY_INFO_EXTERN(overlay_bad_phandle);
+@@ -3361,6 +3363,7 @@ static struct overlay_info overlays[] = {
+ OVERLAY_INFO(overlay_gpio_03, 0),
+ OVERLAY_INFO(overlay_gpio_04a, 0),
+ OVERLAY_INFO(overlay_gpio_04b, 0),
++ OVERLAY_INFO(overlay_pci_node, 0),
+ OVERLAY_INFO(overlay_bad_add_dup_node, -EINVAL),
+ OVERLAY_INFO(overlay_bad_add_dup_prop, -EINVAL),
+ OVERLAY_INFO(overlay_bad_phandle, -EINVAL),
+@@ -3731,6 +3734,191 @@ static inline __init void of_unittest_overlay_high_level(void) {}
+
+ #endif
+
++#ifdef CONFIG_PCI_DYNAMIC_OF_NODES
++
++static int of_unittest_pci_dev_num;
++static int of_unittest_pci_child_num;
++
++/*
++ * PCI device tree node test driver
++ */
++static const struct pci_device_id testdrv_pci_ids[] = {
++ { PCI_DEVICE(PCI_VENDOR_ID_REDHAT, 0x5), }, /* PCI_VENDOR_ID_REDHAT */
++ { 0, }
++};
++
++static int testdrv_probe(struct pci_dev *pdev, const struct pci_device_id *id)
++{
++ struct overlay_info *info;
++ struct device_node *dn;
++ int ret, ovcs_id;
++ u32 size;
++
++ dn = pdev->dev.of_node;
++ if (!dn) {
++ dev_err(&pdev->dev, "does not find bus endpoint");
++ return -EINVAL;
++ }
++
++ for (info = overlays; info && info->name; info++) {
++ if (!strcmp(info->name, "overlay_pci_node"))
++ break;
++ }
++ if (!info || !info->name) {
++ dev_err(&pdev->dev, "no overlay data for overlay_pci_node");
++ return -ENODEV;
++ }
++
++ size = info->dtbo_end - info->dtbo_begin;
++ ret = of_overlay_fdt_apply(info->dtbo_begin, size, &ovcs_id, dn);
++ of_node_put(dn);
++ if (ret)
++ return ret;
++
++ of_platform_default_populate(dn, NULL, &pdev->dev);
++ pci_set_drvdata(pdev, (void *)(uintptr_t)ovcs_id);
++
++ return 0;
++}
++
++static void testdrv_remove(struct pci_dev *pdev)
++{
++ int ovcs_id = (int)(uintptr_t)pci_get_drvdata(pdev);
++
++ of_platform_depopulate(&pdev->dev);
++ of_overlay_remove(&ovcs_id);
++}
++
++static struct pci_driver testdrv_driver = {
++ .name = "pci_dt_testdrv",
++ .id_table = testdrv_pci_ids,
++ .probe = testdrv_probe,
++ .remove = testdrv_remove,
++};
++
++static int unittest_pci_probe(struct platform_device *pdev)
++{
++ struct resource *res;
++ struct device *dev;
++ u64 exp_addr;
++
++ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
++ if (!res)
++ return -ENODEV;
++
++ dev = &pdev->dev;
++ while (dev && !dev_is_pci(dev))
++ dev = dev->parent;
++ if (!dev) {
++ pr_err("unable to find parent device\n");
++ return -ENODEV;
++ }
++
++ exp_addr = pci_resource_start(to_pci_dev(dev), 0) + 0x100;
++ unittest(res->start == exp_addr, "Incorrect translated address %llx, expected %llx\n",
++ (u64)res->start, exp_addr);
++
++ of_unittest_pci_child_num++;
++
++ return 0;
++}
++
++static const struct of_device_id unittest_pci_of_match[] = {
++ { .compatible = "unittest-pci" },
++ { }
++};
++
++static struct platform_driver unittest_pci_driver = {
++ .probe = unittest_pci_probe,
++ .driver = {
++ .name = "unittest-pci",
++ .of_match_table = unittest_pci_of_match,
++ },
++};
++
++static int of_unittest_pci_node_verify(struct pci_dev *pdev, bool add)
++{
++ struct device_node *pnp, *np = NULL;
++ struct device *child_dev;
++ char *path = NULL;
++ const __be32 *reg;
++ int rc = 0;
++
++ pnp = pdev->dev.of_node;
++ unittest(pnp, "Failed creating PCI dt node\n");
++ if (!pnp)
++ return -ENODEV;
++
++ if (add) {
++ path = kasprintf(GFP_KERNEL, "%pOF/pci-ep-bus@0/unittest-pci@100", pnp);
++ np = of_find_node_by_path(path);
++ unittest(np, "Failed to get unittest-pci node under PCI node\n");
++ if (!np) {
++ rc = -ENODEV;
++ goto failed;
++ }
++
++ reg = of_get_property(np, "reg", NULL);
++ unittest(reg, "Failed to get reg property\n");
++ if (!reg)
++ rc = -ENODEV;
++ } else {
++ path = kasprintf(GFP_KERNEL, "%pOF/pci-ep-bus@0", pnp);
++ np = of_find_node_by_path(path);
++ unittest(!np, "Child device tree node is not removed\n");
++ child_dev = device_find_any_child(&pdev->dev);
++ unittest(!child_dev, "Child device is not removed\n");
++ }
++
++failed:
++ kfree(path);
++ if (np)
++ of_node_put(np);
++
++ return rc;
++}
++
++static void __init of_unittest_pci_node(void)
++{
++ struct pci_dev *pdev = NULL;
++ int rc;
++
++ rc = pci_register_driver(&testdrv_driver);
++ unittest(!rc, "Failed to register pci test driver; rc = %d\n", rc);
++ if (rc)
++ return;
++
++ rc = platform_driver_register(&unittest_pci_driver);
++ if (unittest(!rc, "Failed to register unittest pci driver\n")) {
++ pci_unregister_driver(&testdrv_driver);
++ return;
++ }
++
++ while ((pdev = pci_get_device(PCI_VENDOR_ID_REDHAT, 0x5, pdev)) != NULL) {
++ of_unittest_pci_node_verify(pdev, true);
++ of_unittest_pci_dev_num++;
++ }
++ if (pdev)
++ pci_dev_put(pdev);
++
++ unittest(of_unittest_pci_dev_num,
++ "No test PCI device been found. Please run QEMU with '-device pci-testdev'\n");
++ unittest(of_unittest_pci_dev_num == of_unittest_pci_child_num,
++ "Child device number %d is not expected %d", of_unittest_pci_child_num,
++ of_unittest_pci_dev_num);
++
++ platform_driver_unregister(&unittest_pci_driver);
++ pci_unregister_driver(&testdrv_driver);
++
++ while ((pdev = pci_get_device(PCI_VENDOR_ID_REDHAT, 0x5, pdev)) != NULL)
++ of_unittest_pci_node_verify(pdev, false);
++ if (pdev)
++ pci_dev_put(pdev);
++}
++#else
++static void __init of_unittest_pci_node(void) { }
++#endif
++
+ static int __init of_unittest(void)
+ {
+ struct device_node *np;
+@@ -3781,6 +3969,7 @@ static int __init of_unittest(void)
+ of_unittest_platform_populate();
+ of_unittest_overlay();
+ of_unittest_lifecycle();
++ of_unittest_pci_node();
+
+ /* Double check linkage after removing testcase data */
+ of_unittest_check_tree_linkage();
+diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
+index 3ec7bcfbf4dc0..7e9a14e430195 100644
+--- a/drivers/pci/quirks.c
++++ b/drivers/pci/quirks.c
+@@ -6172,3 +6172,4 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a31, dpc_log_size);
+ */
+ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_XILINX, 0x5020, of_pci_make_dev_node);
+ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_XILINX, 0x5021, of_pci_make_dev_node);
++DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_REDHAT, 0x0005, of_pci_make_dev_node);
+--
+2.42.0
+
--- /dev/null
+From 1bd7f29a9d50fe2ce66158e7e4c6892218dd70f0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 15 Aug 2023 10:19:58 -0700
+Subject: PCI: Add quirks to generate device tree node for Xilinx Alveo U50
+
+From: Lizhi Hou <lizhi.hou@amd.com>
+
+[ Upstream commit ae9813db1dc5ac987a09889791155a7b8c527f8d ]
+
+The Xilinx Alveo U50 PCI card exposes multiple hardware peripherals on
+its PCI BAR. The card firmware provides a flattened device tree to
+describe the hardware peripherals on its BARs. This allows U50 driver to
+load the flattened device tree and generate the device tree node for
+hardware peripherals underneath.
+
+To generate device tree node for U50 card, add PCI quirks to call
+of_pci_make_dev_node() for U50.
+
+Acked-by: Bjorn Helgaas <bhelgaas@google.com>
+Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
+Link: https://lore.kernel.org/r/1692120000-46900-4-git-send-email-lizhi.hou@amd.com
+Signed-off-by: Rob Herring <robh@kernel.org>
+Stable-dep-of: c9260693aa0c ("PCI: Lengthen reset delay for VideoPropulsion Torrent QN16e card")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/pci/quirks.c | 11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
+index 9fa3c9225bb30..3ec7bcfbf4dc0 100644
+--- a/drivers/pci/quirks.c
++++ b/drivers/pci/quirks.c
+@@ -6161,3 +6161,14 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a2d, dpc_log_size);
+ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a2f, dpc_log_size);
+ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a31, dpc_log_size);
+ #endif
++
++/*
++ * For a PCI device with multiple downstream devices, its driver may use
++ * a flattened device tree to describe the downstream devices.
++ * To overlay the flattened device tree, the PCI device and all its ancestor
++ * devices need to have device tree nodes on system base device tree. Thus,
++ * before driver probing, it might need to add a device tree node as the final
++ * fixup.
++ */
++DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_XILINX, 0x5020, of_pci_make_dev_node);
++DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_XILINX, 0x5021, of_pci_make_dev_node);
+--
+2.42.0
+
--- /dev/null
+From 5f5090220d6abc372575d6b4b2bc8e04a5def7f2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 15 Aug 2023 10:19:57 -0700
+Subject: PCI: Create device tree node for bridge
+
+From: Lizhi Hou <lizhi.hou@amd.com>
+
+[ Upstream commit 407d1a51921e9f28c1bcec647c2205925bd1fdab ]
+
+The PCI endpoint device such as Xilinx Alveo PCI card maps the register
+spaces from multiple hardware peripherals to its PCI BAR. Normally,
+the PCI core discovers devices and BARs using the PCI enumeration process.
+There is no infrastructure to discover the hardware peripherals that are
+present in a PCI device, and which can be accessed through the PCI BARs.
+
+Apparently, the device tree framework requires a device tree node for the
+PCI device. Thus, it can generate the device tree nodes for hardware
+peripherals underneath. Because PCI is self discoverable bus, there might
+not be a device tree node created for PCI devices. Furthermore, if the PCI
+device is hot pluggable, when it is plugged in, the device tree nodes for
+its parent bridges are required. Add support to generate device tree node
+for PCI bridges.
+
+Add an of_pci_make_dev_node() interface that can be used to create device
+tree node for PCI devices.
+
+Add a PCI_DYNAMIC_OF_NODES config option. When the option is turned on,
+the kernel will generate device tree nodes for PCI bridges unconditionally.
+
+Initially, add the basic properties for the dynamically generated device
+tree nodes which include #address-cells, #size-cells, device_type,
+compatible, ranges, reg.
+
+Acked-by: Bjorn Helgaas <bhelgaas@google.com>
+Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
+Link: https://lore.kernel.org/r/1692120000-46900-3-git-send-email-lizhi.hou@amd.com
+Signed-off-by: Rob Herring <robh@kernel.org>
+Stable-dep-of: c9260693aa0c ("PCI: Lengthen reset delay for VideoPropulsion Torrent QN16e card")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/pci/Kconfig | 12 ++
+ drivers/pci/Makefile | 1 +
+ drivers/pci/bus.c | 2 +
+ drivers/pci/of.c | 79 +++++++++
+ drivers/pci/of_property.c | 355 ++++++++++++++++++++++++++++++++++++++
+ drivers/pci/pci.h | 12 ++
+ drivers/pci/remove.c | 1 +
+ 7 files changed, 462 insertions(+)
+ create mode 100644 drivers/pci/of_property.c
+
+diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
+index 3c07d8d214b38..49bd09c7dd0a1 100644
+--- a/drivers/pci/Kconfig
++++ b/drivers/pci/Kconfig
+@@ -194,6 +194,18 @@ config PCI_HYPERV
+ The PCI device frontend driver allows the kernel to import arbitrary
+ PCI devices from a PCI backend to support PCI driver domains.
+
++config PCI_DYNAMIC_OF_NODES
++ bool "Create Device tree nodes for PCI devices"
++ depends on OF
++ select OF_DYNAMIC
++ help
++ This option enables support for generating device tree nodes for some
++ PCI devices. Thus, the driver of this kind can load and overlay
++ flattened device tree for its downstream devices.
++
++ Once this option is selected, the device tree nodes will be generated
++ for all PCI bridges.
++
+ choice
+ prompt "PCI Express hierarchy optimization setting"
+ default PCIE_BUS_DEFAULT
+diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
+index 2680e4c92f0ab..cc8b4e01e29de 100644
+--- a/drivers/pci/Makefile
++++ b/drivers/pci/Makefile
+@@ -32,6 +32,7 @@ obj-$(CONFIG_PCI_P2PDMA) += p2pdma.o
+ obj-$(CONFIG_XEN_PCIDEV_FRONTEND) += xen-pcifront.o
+ obj-$(CONFIG_VGA_ARB) += vgaarb.o
+ obj-$(CONFIG_PCI_DOE) += doe.o
++obj-$(CONFIG_PCI_DYNAMIC_OF_NODES) += of_property.o
+
+ # Endpoint library must be initialized before its users
+ obj-$(CONFIG_PCI_ENDPOINT) += endpoint/
+diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
+index 46b252bbe5000..9c2137dae429a 100644
+--- a/drivers/pci/bus.c
++++ b/drivers/pci/bus.c
+@@ -342,6 +342,8 @@ void pci_bus_add_device(struct pci_dev *dev)
+ */
+ pcibios_bus_add_device(dev);
+ pci_fixup_device(pci_fixup_final, dev);
++ if (pci_is_bridge(dev))
++ of_pci_make_dev_node(dev);
+ pci_create_sysfs_dev_files(dev);
+ pci_proc_attach_device(dev);
+ pci_bridge_d3_update(dev);
+diff --git a/drivers/pci/of.c b/drivers/pci/of.c
+index 3c158b17dcb53..2af64bcb7da3a 100644
+--- a/drivers/pci/of.c
++++ b/drivers/pci/of.c
+@@ -606,6 +606,85 @@ int devm_of_pci_bridge_init(struct device *dev, struct pci_host_bridge *bridge)
+ return pci_parse_request_of_pci_ranges(dev, bridge);
+ }
+
++#ifdef CONFIG_PCI_DYNAMIC_OF_NODES
++
++void of_pci_remove_node(struct pci_dev *pdev)
++{
++ struct device_node *np;
++
++ np = pci_device_to_OF_node(pdev);
++ if (!np || !of_node_check_flag(np, OF_DYNAMIC))
++ return;
++ pdev->dev.of_node = NULL;
++
++ of_changeset_revert(np->data);
++ of_changeset_destroy(np->data);
++ of_node_put(np);
++}
++
++void of_pci_make_dev_node(struct pci_dev *pdev)
++{
++ struct device_node *ppnode, *np = NULL;
++ const char *pci_type;
++ struct of_changeset *cset;
++ const char *name;
++ int ret;
++
++ /*
++ * If there is already a device tree node linked to this device,
++ * return immediately.
++ */
++ if (pci_device_to_OF_node(pdev))
++ return;
++
++ /* Check if there is device tree node for parent device */
++ if (!pdev->bus->self)
++ ppnode = pdev->bus->dev.of_node;
++ else
++ ppnode = pdev->bus->self->dev.of_node;
++ if (!ppnode)
++ return;
++
++ if (pci_is_bridge(pdev))
++ pci_type = "pci";
++ else
++ pci_type = "dev";
++
++ name = kasprintf(GFP_KERNEL, "%s@%x,%x", pci_type,
++ PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
++ if (!name)
++ return;
++
++ cset = kmalloc(sizeof(*cset), GFP_KERNEL);
++ if (!cset)
++ goto failed;
++ of_changeset_init(cset);
++
++ np = of_changeset_create_node(cset, ppnode, name);
++ if (!np)
++ goto failed;
++ np->data = cset;
++
++ ret = of_pci_add_properties(pdev, cset, np);
++ if (ret)
++ goto failed;
++
++ ret = of_changeset_apply(cset);
++ if (ret)
++ goto failed;
++
++ pdev->dev.of_node = np;
++ kfree(name);
++
++ return;
++
++failed:
++ if (np)
++ of_node_put(np);
++ kfree(name);
++}
++#endif
++
+ #endif /* CONFIG_PCI */
+
+ /**
+diff --git a/drivers/pci/of_property.c b/drivers/pci/of_property.c
+new file mode 100644
+index 0000000000000..710ec35ba4a17
+--- /dev/null
++++ b/drivers/pci/of_property.c
+@@ -0,0 +1,355 @@
++// SPDX-License-Identifier: GPL-2.0+
++/*
++ * Copyright (C) 2022-2023, Advanced Micro Devices, Inc.
++ */
++
++#include <linux/pci.h>
++#include <linux/of.h>
++#include <linux/of_irq.h>
++#include <linux/bitfield.h>
++#include <linux/bits.h>
++#include "pci.h"
++
++#define OF_PCI_ADDRESS_CELLS 3
++#define OF_PCI_SIZE_CELLS 2
++#define OF_PCI_MAX_INT_PIN 4
++
++struct of_pci_addr_pair {
++ u32 phys_addr[OF_PCI_ADDRESS_CELLS];
++ u32 size[OF_PCI_SIZE_CELLS];
++};
++
++/*
++ * Each entry in the ranges table is a tuple containing the child address,
++ * the parent address, and the size of the region in the child address space.
++ * Thus, for PCI, in each entry parent address is an address on the primary
++ * side and the child address is the corresponding address on the secondary
++ * side.
++ */
++struct of_pci_range {
++ u32 child_addr[OF_PCI_ADDRESS_CELLS];
++ u32 parent_addr[OF_PCI_ADDRESS_CELLS];
++ u32 size[OF_PCI_SIZE_CELLS];
++};
++
++#define OF_PCI_ADDR_SPACE_IO 0x1
++#define OF_PCI_ADDR_SPACE_MEM32 0x2
++#define OF_PCI_ADDR_SPACE_MEM64 0x3
++
++#define OF_PCI_ADDR_FIELD_NONRELOC BIT(31)
++#define OF_PCI_ADDR_FIELD_SS GENMASK(25, 24)
++#define OF_PCI_ADDR_FIELD_PREFETCH BIT(30)
++#define OF_PCI_ADDR_FIELD_BUS GENMASK(23, 16)
++#define OF_PCI_ADDR_FIELD_DEV GENMASK(15, 11)
++#define OF_PCI_ADDR_FIELD_FUNC GENMASK(10, 8)
++#define OF_PCI_ADDR_FIELD_REG GENMASK(7, 0)
++
++enum of_pci_prop_compatible {
++ PROP_COMPAT_PCI_VVVV_DDDD,
++ PROP_COMPAT_PCICLASS_CCSSPP,
++ PROP_COMPAT_PCICLASS_CCSS,
++ PROP_COMPAT_NUM,
++};
++
++static void of_pci_set_address(struct pci_dev *pdev, u32 *prop, u64 addr,
++ u32 reg_num, u32 flags, bool reloc)
++{
++ prop[0] = FIELD_PREP(OF_PCI_ADDR_FIELD_BUS, pdev->bus->number) |
++ FIELD_PREP(OF_PCI_ADDR_FIELD_DEV, PCI_SLOT(pdev->devfn)) |
++ FIELD_PREP(OF_PCI_ADDR_FIELD_FUNC, PCI_FUNC(pdev->devfn));
++ prop[0] |= flags | reg_num;
++ if (!reloc) {
++ prop[0] |= OF_PCI_ADDR_FIELD_NONRELOC;
++ prop[1] = upper_32_bits(addr);
++ prop[2] = lower_32_bits(addr);
++ }
++}
++
++static int of_pci_get_addr_flags(struct resource *res, u32 *flags)
++{
++ u32 ss;
++
++ if (res->flags & IORESOURCE_IO)
++ ss = OF_PCI_ADDR_SPACE_IO;
++ else if (res->flags & IORESOURCE_MEM_64)
++ ss = OF_PCI_ADDR_SPACE_MEM64;
++ else if (res->flags & IORESOURCE_MEM)
++ ss = OF_PCI_ADDR_SPACE_MEM32;
++ else
++ return -EINVAL;
++
++ *flags = 0;
++ if (res->flags & IORESOURCE_PREFETCH)
++ *flags |= OF_PCI_ADDR_FIELD_PREFETCH;
++
++ *flags |= FIELD_PREP(OF_PCI_ADDR_FIELD_SS, ss);
++
++ return 0;
++}
++
++static int of_pci_prop_bus_range(struct pci_dev *pdev,
++ struct of_changeset *ocs,
++ struct device_node *np)
++{
++ u32 bus_range[] = { pdev->subordinate->busn_res.start,
++ pdev->subordinate->busn_res.end };
++
++ return of_changeset_add_prop_u32_array(ocs, np, "bus-range", bus_range,
++ ARRAY_SIZE(bus_range));
++}
++
++static int of_pci_prop_ranges(struct pci_dev *pdev, struct of_changeset *ocs,
++ struct device_node *np)
++{
++ struct of_pci_range *rp;
++ struct resource *res;
++ int i, j, ret;
++ u32 flags, num;
++ u64 val64;
++
++ if (pci_is_bridge(pdev)) {
++ num = PCI_BRIDGE_RESOURCE_NUM;
++ res = &pdev->resource[PCI_BRIDGE_RESOURCES];
++ } else {
++ num = PCI_STD_NUM_BARS;
++ res = &pdev->resource[PCI_STD_RESOURCES];
++ }
++
++ rp = kcalloc(num, sizeof(*rp), GFP_KERNEL);
++ if (!rp)
++ return -ENOMEM;
++
++ for (i = 0, j = 0; j < num; j++) {
++ if (!resource_size(&res[j]))
++ continue;
++
++ if (of_pci_get_addr_flags(&res[j], &flags))
++ continue;
++
++ val64 = res[j].start;
++ of_pci_set_address(pdev, rp[i].parent_addr, val64, 0, flags,
++ false);
++ if (pci_is_bridge(pdev)) {
++ memcpy(rp[i].child_addr, rp[i].parent_addr,
++ sizeof(rp[i].child_addr));
++ } else {
++ /*
++ * For endpoint device, the lower 64-bits of child
++ * address is always zero.
++ */
++ rp[i].child_addr[0] = j;
++ }
++
++ val64 = resource_size(&res[j]);
++ rp[i].size[0] = upper_32_bits(val64);
++ rp[i].size[1] = lower_32_bits(val64);
++
++ i++;
++ }
++
++ ret = of_changeset_add_prop_u32_array(ocs, np, "ranges", (u32 *)rp,
++ i * sizeof(*rp) / sizeof(u32));
++ kfree(rp);
++
++ return ret;
++}
++
++static int of_pci_prop_reg(struct pci_dev *pdev, struct of_changeset *ocs,
++ struct device_node *np)
++{
++ struct of_pci_addr_pair reg = { 0 };
++
++ /* configuration space */
++ of_pci_set_address(pdev, reg.phys_addr, 0, 0, 0, true);
++
++ return of_changeset_add_prop_u32_array(ocs, np, "reg", (u32 *)®,
++ sizeof(reg) / sizeof(u32));
++}
++
++static int of_pci_prop_interrupts(struct pci_dev *pdev,
++ struct of_changeset *ocs,
++ struct device_node *np)
++{
++ int ret;
++ u8 pin;
++
++ ret = pci_read_config_byte(pdev, PCI_INTERRUPT_PIN, &pin);
++ if (ret != 0)
++ return ret;
++
++ if (!pin)
++ return 0;
++
++ return of_changeset_add_prop_u32(ocs, np, "interrupts", (u32)pin);
++}
++
++static int of_pci_prop_intr_map(struct pci_dev *pdev, struct of_changeset *ocs,
++ struct device_node *np)
++{
++ struct of_phandle_args out_irq[OF_PCI_MAX_INT_PIN];
++ u32 i, addr_sz[OF_PCI_MAX_INT_PIN], map_sz = 0;
++ __be32 laddr[OF_PCI_ADDRESS_CELLS] = { 0 };
++ u32 int_map_mask[] = { 0xffff00, 0, 0, 7 };
++ struct device_node *pnode;
++ struct pci_dev *child;
++ u32 *int_map, *mapp;
++ int ret;
++ u8 pin;
++
++ pnode = pci_device_to_OF_node(pdev->bus->self);
++ if (!pnode)
++ pnode = pci_bus_to_OF_node(pdev->bus);
++
++ if (!pnode) {
++ pci_err(pdev, "failed to get parent device node");
++ return -EINVAL;
++ }
++
++ laddr[0] = cpu_to_be32((pdev->bus->number << 16) | (pdev->devfn << 8));
++ for (pin = 1; pin <= OF_PCI_MAX_INT_PIN; pin++) {
++ i = pin - 1;
++ out_irq[i].np = pnode;
++ out_irq[i].args_count = 1;
++ out_irq[i].args[0] = pin;
++ ret = of_irq_parse_raw(laddr, &out_irq[i]);
++ if (ret) {
++ pci_err(pdev, "parse irq %d failed, ret %d", pin, ret);
++ continue;
++ }
++ ret = of_property_read_u32(out_irq[i].np, "#address-cells",
++ &addr_sz[i]);
++ if (ret)
++ addr_sz[i] = 0;
++ }
++
++ list_for_each_entry(child, &pdev->subordinate->devices, bus_list) {
++ for (pin = 1; pin <= OF_PCI_MAX_INT_PIN; pin++) {
++ i = pci_swizzle_interrupt_pin(child, pin) - 1;
++ map_sz += 5 + addr_sz[i] + out_irq[i].args_count;
++ }
++ }
++
++ int_map = kcalloc(map_sz, sizeof(u32), GFP_KERNEL);
++ mapp = int_map;
++
++ list_for_each_entry(child, &pdev->subordinate->devices, bus_list) {
++ for (pin = 1; pin <= OF_PCI_MAX_INT_PIN; pin++) {
++ *mapp = (child->bus->number << 16) |
++ (child->devfn << 8);
++ mapp += OF_PCI_ADDRESS_CELLS;
++ *mapp = pin;
++ mapp++;
++ i = pci_swizzle_interrupt_pin(child, pin) - 1;
++ *mapp = out_irq[i].np->phandle;
++ mapp++;
++ if (addr_sz[i]) {
++ ret = of_property_read_u32_array(out_irq[i].np,
++ "reg", mapp,
++ addr_sz[i]);
++ if (ret)
++ goto failed;
++ }
++ mapp += addr_sz[i];
++ memcpy(mapp, out_irq[i].args,
++ out_irq[i].args_count * sizeof(u32));
++ mapp += out_irq[i].args_count;
++ }
++ }
++
++ ret = of_changeset_add_prop_u32_array(ocs, np, "interrupt-map", int_map,
++ map_sz);
++ if (ret)
++ goto failed;
++
++ ret = of_changeset_add_prop_u32(ocs, np, "#interrupt-cells", 1);
++ if (ret)
++ goto failed;
++
++ ret = of_changeset_add_prop_u32_array(ocs, np, "interrupt-map-mask",
++ int_map_mask,
++ ARRAY_SIZE(int_map_mask));
++ if (ret)
++ goto failed;
++
++ kfree(int_map);
++ return 0;
++
++failed:
++ kfree(int_map);
++ return ret;
++}
++
++static int of_pci_prop_compatible(struct pci_dev *pdev,
++ struct of_changeset *ocs,
++ struct device_node *np)
++{
++ const char *compat_strs[PROP_COMPAT_NUM] = { 0 };
++ int i, ret;
++
++ compat_strs[PROP_COMPAT_PCI_VVVV_DDDD] =
++ kasprintf(GFP_KERNEL, "pci%x,%x", pdev->vendor, pdev->device);
++ compat_strs[PROP_COMPAT_PCICLASS_CCSSPP] =
++ kasprintf(GFP_KERNEL, "pciclass,%06x", pdev->class);
++ compat_strs[PROP_COMPAT_PCICLASS_CCSS] =
++ kasprintf(GFP_KERNEL, "pciclass,%04x", pdev->class >> 8);
++
++ ret = of_changeset_add_prop_string_array(ocs, np, "compatible",
++ compat_strs, PROP_COMPAT_NUM);
++ for (i = 0; i < PROP_COMPAT_NUM; i++)
++ kfree(compat_strs[i]);
++
++ return ret;
++}
++
++int of_pci_add_properties(struct pci_dev *pdev, struct of_changeset *ocs,
++ struct device_node *np)
++{
++ int ret;
++
++ /*
++ * The added properties will be released when the
++ * changeset is destroyed.
++ */
++ if (pci_is_bridge(pdev)) {
++ ret = of_changeset_add_prop_string(ocs, np, "device_type",
++ "pci");
++ if (ret)
++ return ret;
++
++ ret = of_pci_prop_bus_range(pdev, ocs, np);
++ if (ret)
++ return ret;
++
++ ret = of_pci_prop_intr_map(pdev, ocs, np);
++ if (ret)
++ return ret;
++ }
++
++ ret = of_pci_prop_ranges(pdev, ocs, np);
++ if (ret)
++ return ret;
++
++ ret = of_changeset_add_prop_u32(ocs, np, "#address-cells",
++ OF_PCI_ADDRESS_CELLS);
++ if (ret)
++ return ret;
++
++ ret = of_changeset_add_prop_u32(ocs, np, "#size-cells",
++ OF_PCI_SIZE_CELLS);
++ if (ret)
++ return ret;
++
++ ret = of_pci_prop_reg(pdev, ocs, np);
++ if (ret)
++ return ret;
++
++ ret = of_pci_prop_compatible(pdev, ocs, np);
++ if (ret)
++ return ret;
++
++ ret = of_pci_prop_interrupts(pdev, ocs, np);
++ if (ret)
++ return ret;
++
++ return 0;
++}
+diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
+index a4c3974340576..ba717bdd700db 100644
+--- a/drivers/pci/pci.h
++++ b/drivers/pci/pci.h
+@@ -679,6 +679,18 @@ static inline int devm_of_pci_bridge_init(struct device *dev, struct pci_host_br
+
+ #endif /* CONFIG_OF */
+
++struct of_changeset;
++
++#ifdef CONFIG_PCI_DYNAMIC_OF_NODES
++void of_pci_make_dev_node(struct pci_dev *pdev);
++void of_pci_remove_node(struct pci_dev *pdev);
++int of_pci_add_properties(struct pci_dev *pdev, struct of_changeset *ocs,
++ struct device_node *np);
++#else
++static inline void of_pci_make_dev_node(struct pci_dev *pdev) { }
++static inline void of_pci_remove_node(struct pci_dev *pdev) { }
++#endif
++
+ #ifdef CONFIG_PCIEAER
+ void pci_no_aer(void);
+ void pci_aer_init(struct pci_dev *dev);
+diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
+index d68aee29386b4..d749ea8250d65 100644
+--- a/drivers/pci/remove.c
++++ b/drivers/pci/remove.c
+@@ -22,6 +22,7 @@ static void pci_stop_dev(struct pci_dev *dev)
+ device_release_driver(&dev->dev);
+ pci_proc_detach_device(dev);
+ pci_remove_sysfs_dev_files(dev);
++ of_pci_remove_node(dev);
+
+ pci_dev_assign_added(dev, false);
+ }
+--
+2.42.0
+
--- /dev/null
+From 4e3e19c49144fac4f5ea8b3efbacbc1605aeaef4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 21 Sep 2023 16:23:34 +0200
+Subject: PCI: Lengthen reset delay for VideoPropulsion Torrent QN16e card
+
+From: Lukas Wunner <lukas@wunner.de>
+
+[ Upstream commit c9260693aa0c1e029ed23693cfd4d7814eee6624 ]
+
+Commit ac91e6980563 ("PCI: Unify delay handling for reset and resume")
+shortened an unconditional 1 sec delay after a Secondary Bus Reset to 100
+msec for PCIe (per PCIe r6.1 sec 6.6.1). The 1 sec delay is only required
+for Conventional PCI.
+
+But it turns out that there are PCIe devices which require a longer delay
+than prescribed before first config space access after reset recovery or
+resume from D3cold:
+
+Chad reports that a "VideoPropulsion Torrent QN16e" MPEG QAM Modulator
+"raises a PCI system error (PERR), as reported by the IPMI event log, and
+the hardware itself would suffer a catastrophic event, cycling the server"
+unless the longer delay is observed.
+
+The card is specified to conform to PCIe r1.0 and indeed only supports Gen1
+speed (2.5 GT/s) according to lspci. PCIe r1.0 sec 7.6 prescribes the same
+100 msec delay as PCIe r6.1 sec 6.6.1:
+
+ To allow components to perform internal initialization, system software
+ must wait for at least 100 ms from the end of a reset (cold/warm/hot)
+ before it is permitted to issue Configuration Requests
+
+The behavior of the Torrent QN16e card thus appears to be a quirk. Treat
+it as such and lengthen the reset delay for this specific device.
+
+Fixes: ac91e6980563 ("PCI: Unify delay handling for reset and resume")
+Link: https://lore.kernel.org/r/47727e792c7f0282dc144e3ec8ce8eb6e713394e.1695304512.git.lukas@wunner.de
+Reported-by: Chad Schroeder <CSchroeder@sonifi.com>
+Closes: https://lore.kernel.org/linux-pci/DM6PR16MB2844903E34CAB910082DF019B1FAA@DM6PR16MB2844.namprd16.prod.outlook.com/
+Tested-by: Chad Schroeder <CSchroeder@sonifi.com>
+Signed-off-by: Lukas Wunner <lukas@wunner.de>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Cc: stable@vger.kernel.org # v5.4+
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/pci/quirks.c | 12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
+index 7e9a14e430195..d024bfff12688 100644
+--- a/drivers/pci/quirks.c
++++ b/drivers/pci/quirks.c
+@@ -6173,3 +6173,15 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a31, dpc_log_size);
+ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_XILINX, 0x5020, of_pci_make_dev_node);
+ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_XILINX, 0x5021, of_pci_make_dev_node);
+ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_REDHAT, 0x0005, of_pci_make_dev_node);
++
++/*
++ * Devices known to require a longer delay before first config space access
++ * after reset recovery or resume from D3cold:
++ *
++ * VideoPropulsion (aka Genroco) Torrent QN16e MPEG QAM Modulator
++ */
++static void pci_fixup_d3cold_delay_1sec(struct pci_dev *pdev)
++{
++ pdev->d3cold_delay = 1000;
++}
++DECLARE_PCI_FIXUP_FINAL(0x5555, 0x0004, pci_fixup_d3cold_delay_1sec);
+--
+2.42.0
+
--- /dev/null
+From f47a5875214aac821155c7dcdc7af807df7cd924 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 25 Oct 2023 18:30:29 +0530
+Subject: PCI: qcom-ep: Add dedicated callback for writing to DBI2 registers
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+
+[ Upstream commit a07d2497ed657eb2efeb967af47e22f573dcd1d6 ]
+
+The DWC core driver exposes the write_dbi2() callback for writing to the
+DBI2 registers in a vendor-specific way.
+
+On the Qcom EP platforms, the DBI_CS2 bit in the ELBI region needs to be
+asserted before writing to any DBI2 registers and deasserted once done.
+
+So, let's implement the callback for the Qcom PCIe EP driver so that the
+DBI2 writes are correctly handled in the hardware.
+
+Without this callback, the DBI2 register writes like BAR size won't go
+through and as a result, the default BAR size is set for all BARs.
+
+[kwilczynski: commit log, renamed function to match the DWC convention]
+Fixes: f55fee56a631 ("PCI: qcom-ep: Add Qualcomm PCIe Endpoint controller driver")
+Suggested-by: Serge Semin <fancer.lancer@gmail.com>
+Link: https://lore.kernel.org/linux-pci/20231025130029.74693-2-manivannan.sadhasivam@linaro.org
+Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+Signed-off-by: Krzysztof Wilczyński <kwilczynski@kernel.org>
+Reviewed-by: Serge Semin <fancer.lancer@gmail.com>
+Cc: stable@vger.kernel.org # 5.16+
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/pci/controller/dwc/pcie-qcom-ep.c | 17 +++++++++++++++++
+ 1 file changed, 17 insertions(+)
+
+diff --git a/drivers/pci/controller/dwc/pcie-qcom-ep.c b/drivers/pci/controller/dwc/pcie-qcom-ep.c
+index 267e1247d548f..4a9741428619f 100644
+--- a/drivers/pci/controller/dwc/pcie-qcom-ep.c
++++ b/drivers/pci/controller/dwc/pcie-qcom-ep.c
+@@ -121,6 +121,7 @@
+
+ /* ELBI registers */
+ #define ELBI_SYS_STTS 0x08
++#define ELBI_CS2_ENABLE 0xa4
+
+ /* DBI registers */
+ #define DBI_CON_STATUS 0x44
+@@ -253,6 +254,21 @@ static void qcom_pcie_dw_stop_link(struct dw_pcie *pci)
+ disable_irq(pcie_ep->perst_irq);
+ }
+
++static void qcom_pcie_dw_write_dbi2(struct dw_pcie *pci, void __iomem *base,
++ u32 reg, size_t size, u32 val)
++{
++ struct qcom_pcie_ep *pcie_ep = to_pcie_ep(pci);
++ int ret;
++
++ writel(1, pcie_ep->elbi + ELBI_CS2_ENABLE);
++
++ ret = dw_pcie_write(pci->dbi_base2 + reg, size, val);
++ if (ret)
++ dev_err(pci->dev, "Failed to write DBI2 register (0x%x): %d\n", reg, ret);
++
++ writel(0, pcie_ep->elbi + ELBI_CS2_ENABLE);
++}
++
+ static int qcom_pcie_enable_resources(struct qcom_pcie_ep *pcie_ep)
+ {
+ int ret;
+@@ -451,6 +467,7 @@ static const struct dw_pcie_ops pci_ops = {
+ .link_up = qcom_pcie_dw_link_up,
+ .start_link = qcom_pcie_dw_start_link,
+ .stop_link = qcom_pcie_dw_stop_link,
++ .write_dbi2 = qcom_pcie_dw_write_dbi2,
+ };
+
+ static int qcom_pcie_ep_get_io_resources(struct platform_device *pdev,
+--
+2.42.0
+
--- /dev/null
+From 22d7df24493c62672d8b84d4fffc7e3a3af05a20 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 16 Oct 2023 10:02:04 +0200
+Subject: pmdomain: amlogic: Fix mask for the second NNA mem PD domain
+
+From: Tomeu Vizoso <tomeu@tomeuvizoso.net>
+
+[ Upstream commit b131329b9bfbd1b4c0c5e088cb0c6ec03a12930f ]
+
+Without this change, the NPU hangs when the 8th NN core is used.
+
+It matches what the out-of-tree driver does.
+
+Signed-off-by: Tomeu Vizoso <tomeu@tomeuvizoso.net>
+Fixes: 9a217b7e8953 ("soc: amlogic: meson-pwrc: Add NNA power domain for A311D")
+Acked-by: Neil Armstrong <neil.armstrong@linaro.org>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20231016080205.41982-2-tomeu@tomeuvizoso.net
+Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/soc/amlogic/meson-ee-pwrc.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/soc/amlogic/meson-ee-pwrc.c b/drivers/soc/amlogic/meson-ee-pwrc.c
+index f54acffc83f9f..f2b24361c8cac 100644
+--- a/drivers/soc/amlogic/meson-ee-pwrc.c
++++ b/drivers/soc/amlogic/meson-ee-pwrc.c
+@@ -229,7 +229,7 @@ static struct meson_ee_pwrc_mem_domain sm1_pwrc_mem_audio[] = {
+
+ static struct meson_ee_pwrc_mem_domain g12a_pwrc_mem_nna[] = {
+ { G12A_HHI_NANOQ_MEM_PD_REG0, GENMASK(31, 0) },
+- { G12A_HHI_NANOQ_MEM_PD_REG1, GENMASK(23, 0) },
++ { G12A_HHI_NANOQ_MEM_PD_REG1, GENMASK(31, 0) },
+ };
+
+ #define VPU_PD(__name, __top_pd, __mem, __is_pwr_off, __resets, __clks) \
+--
+2.42.0
+
--- /dev/null
+From 6ccc518f100b52f3209ada89e0697bd9c56e692b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 24 Oct 2023 07:10:40 -0300
+Subject: pmdomain: bcm: bcm2835-power: check if the ASB register is equal to
+ enable
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: MaÃra Canal <mcanal@igalia.com>
+
+[ Upstream commit 2e75396f1df61e1f1d26d0d703fc7292c4ae4371 ]
+
+The commit c494a447c14e ("soc: bcm: bcm2835-power: Refactor ASB control")
+refactored the ASB control by using a general function to handle both
+the enable and disable. But this patch introduced a subtle regression:
+we need to check if !!(readl(base + reg) & ASB_ACK) == enable, not just
+check if (readl(base + reg) & ASB_ACK) == true.
+
+Currently, this is causing an invalid register state in V3D when
+unloading and loading the driver, because `bcm2835_asb_disable()` will
+return -ETIMEDOUT and `bcm2835_asb_power_off()` will fail to disable the
+ASB slave for V3D.
+
+Fixes: c494a447c14e ("soc: bcm: bcm2835-power: Refactor ASB control")
+Signed-off-by: MaÃra Canal <mcanal@igalia.com>
+Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
+Reviewed-by: Stefan Wahren <stefan.wahren@i2se.com>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20231024101251.6357-2-mcanal@igalia.com
+Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/soc/bcm/bcm2835-power.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/soc/bcm/bcm2835-power.c b/drivers/soc/bcm/bcm2835-power.c
+index 1a179d4e011cf..d2f0233cb6206 100644
+--- a/drivers/soc/bcm/bcm2835-power.c
++++ b/drivers/soc/bcm/bcm2835-power.c
+@@ -175,7 +175,7 @@ static int bcm2835_asb_control(struct bcm2835_power *power, u32 reg, bool enable
+ }
+ writel(PM_PASSWORD | val, base + reg);
+
+- while (readl(base + reg) & ASB_ACK) {
++ while (!!(readl(base + reg) & ASB_ACK) == enable) {
+ cpu_relax();
+ if (ktime_get_ns() - start >= 1000)
+ return -ETIMEDOUT;
+--
+2.42.0
+
--- /dev/null
+From 506dde17f42635fe1003e1055b9199db323458fc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 21 Oct 2023 02:59:49 +0800
+Subject: pmdomain: imx: Make imx pgc power domain also set the fwnode
+
+From: Pengfei Li <pengfei.li_1@nxp.com>
+
+[ Upstream commit 374de39d38f97b0e58cfee88da590b2d056ccf7f ]
+
+Currently, The imx pgc power domain doesn't set the fwnode
+pointer, which results in supply regulator device can't get
+consumer imx pgc power domain device from fwnode when creating
+a link.
+
+This causes the driver core to instead try to create a link
+between the parent gpc device of imx pgc power domain device and
+supply regulator device. However, at this point, the gpc device
+has already been bound, and the link creation will fail. So adding
+the fwnode pointer to the imx pgc power domain device will fix
+this issue.
+
+Signed-off-by: Pengfei Li <pengfei.li_1@nxp.com>
+Tested-by: Emil Kronborg <emil.kronborg@protonmail.com>
+Fixes: 3fb16866b51d ("driver core: fw_devlink: Make cycle detection more robust")
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20231020185949.537083-1-pengfei.li_1@nxp.com
+Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/soc/imx/gpc.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/soc/imx/gpc.c b/drivers/soc/imx/gpc.c
+index 90a8b2c0676ff..419ed15cc10c4 100644
+--- a/drivers/soc/imx/gpc.c
++++ b/drivers/soc/imx/gpc.c
+@@ -498,6 +498,7 @@ static int imx_gpc_probe(struct platform_device *pdev)
+
+ pd_pdev->dev.parent = &pdev->dev;
+ pd_pdev->dev.of_node = np;
++ pd_pdev->dev.fwnode = of_fwnode_handle(np);
+
+ ret = platform_device_add(pd_pdev);
+ if (ret) {
+--
+2.42.0
+
--- /dev/null
+From d0a503a02a698bdf2d9ce2c69efc4fa46054c0cc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 29 Jul 2023 14:27:31 +0000
+Subject: rcutorture: Fix stuttering races and other issues
+
+From: Joel Fernandes (Google) <joel@joelfernandes.org>
+
+[ Upstream commit cca42bd8eb1b54a4c9bbf48c79d120e66619a3e4 ]
+
+The stuttering code isn't functioning as expected. Ideally, it should
+pause the torture threads for a designated period before resuming. Yet,
+it fails to halt the test for the correct duration. Additionally, a race
+condition exists, potentially causing the stuttering code to pause for
+an extended period if the 'spt' variable is non-zero due to the stutter
+orchestration thread's inadequate CPU time.
+
+Moreover, over-stuttering can hinder RCU's progress on TREE07 kernels.
+This happens as the stuttering code may run within a softirq due to RCU
+callbacks. Consequently, ksoftirqd keeps a CPU busy for several seconds,
+thus obstructing RCU's progress. This situation triggers a warning
+message in the logs:
+
+[ 2169.481783] rcu_torture_writer: rtort_pipe_count: 9
+
+This warning suggests that an RCU torture object, although invisible to
+RCU readers, couldn't make it past the pipe array and be freed -- a
+strong indication that there weren't enough grace periods during the
+stutter interval.
+
+To address these issues, this patch sets the "stutter end" time to an
+absolute point in the future set by the main stutter thread. This is
+then used for waiting in stutter_wait(). While the stutter thread still
+defines this absolute time, the waiters' waiting logic doesn't rely on
+the stutter thread receiving sufficient CPU time to halt the stuttering
+as the halting is now self-controlled.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
+Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
+Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/torture.c | 45 ++++++++++++---------------------------------
+ 1 file changed, 12 insertions(+), 33 deletions(-)
+
+diff --git a/kernel/torture.c b/kernel/torture.c
+index fd2168058dac8..cd299ccc4e5d5 100644
+--- a/kernel/torture.c
++++ b/kernel/torture.c
+@@ -713,7 +713,7 @@ static void torture_shutdown_cleanup(void)
+ * suddenly applied to or removed from the system.
+ */
+ static struct task_struct *stutter_task;
+-static int stutter_pause_test;
++static ktime_t stutter_till_abs_time;
+ static int stutter;
+ static int stutter_gap;
+
+@@ -723,30 +723,16 @@ static int stutter_gap;
+ */
+ bool stutter_wait(const char *title)
+ {
+- unsigned int i = 0;
+ bool ret = false;
+- int spt;
++ ktime_t till_ns;
+
+ cond_resched_tasks_rcu_qs();
+- spt = READ_ONCE(stutter_pause_test);
+- for (; spt; spt = READ_ONCE(stutter_pause_test)) {
+- if (!ret && !rt_task(current)) {
+- sched_set_normal(current, MAX_NICE);
+- ret = true;
+- }
+- if (spt == 1) {
+- torture_hrtimeout_jiffies(1, NULL);
+- } else if (spt == 2) {
+- while (READ_ONCE(stutter_pause_test)) {
+- if (!(i++ & 0xffff))
+- torture_hrtimeout_us(10, 0, NULL);
+- cond_resched();
+- }
+- } else {
+- torture_hrtimeout_jiffies(round_jiffies_relative(HZ), NULL);
+- }
+- torture_shutdown_absorb(title);
++ till_ns = READ_ONCE(stutter_till_abs_time);
++ if (till_ns && ktime_before(ktime_get(), till_ns)) {
++ torture_hrtimeout_ns(till_ns, 0, HRTIMER_MODE_ABS, NULL);
++ ret = true;
+ }
++ torture_shutdown_absorb(title);
+ return ret;
+ }
+ EXPORT_SYMBOL_GPL(stutter_wait);
+@@ -757,23 +743,16 @@ EXPORT_SYMBOL_GPL(stutter_wait);
+ */
+ static int torture_stutter(void *arg)
+ {
+- DEFINE_TORTURE_RANDOM(rand);
+- int wtime;
++ ktime_t till_ns;
+
+ VERBOSE_TOROUT_STRING("torture_stutter task started");
+ do {
+ if (!torture_must_stop() && stutter > 1) {
+- wtime = stutter;
+- if (stutter > 2) {
+- WRITE_ONCE(stutter_pause_test, 1);
+- wtime = stutter - 3;
+- torture_hrtimeout_jiffies(wtime, &rand);
+- wtime = 2;
+- }
+- WRITE_ONCE(stutter_pause_test, 2);
+- torture_hrtimeout_jiffies(wtime, NULL);
++ till_ns = ktime_add_ns(ktime_get(),
++ jiffies_to_nsecs(stutter));
++ WRITE_ONCE(stutter_till_abs_time, till_ns);
++ torture_hrtimeout_jiffies(stutter - 1, NULL);
+ }
+- WRITE_ONCE(stutter_pause_test, 0);
+ if (!torture_must_stop())
+ torture_hrtimeout_jiffies(stutter_gap, NULL);
+ torture_shutdown_absorb("torture_stutter");
+--
+2.42.0
+
i3c-master-svc-fix-check-wrong-status-register-in-irq-handler.patch
i3c-master-svc-fix-sda-keep-low-when-polling-ibiwon-timeout-happen.patch
i3c-master-svc-fix-random-hot-join-failure-since-timeout-error.patch
+pmdomain-bcm-bcm2835-power-check-if-the-asb-register.patch
+pmdomain-amlogic-fix-mask-for-the-second-nna-mem-pd-.patch
+pmdomain-imx-make-imx-pgc-power-domain-also-set-the-.patch
+pci-qcom-ep-add-dedicated-callback-for-writing-to-db.patch
+of-dynamic-add-interfaces-for-creating-device-node-d.patch
+pci-create-device-tree-node-for-bridge.patch
+pci-add-quirks-to-generate-device-tree-node-for-xili.patch
+of-overlay-extend-of_overlay_fdt_apply-to-specify-th.patch
+of-unittest-add-pci_dt_testdrv-pci-driver.patch
+pci-lengthen-reset-delay-for-videopropulsion-torrent.patch
+torture-add-a-kthread-creation-callback-to-_torture_.patch
+torture-add-lock_torture-writer_fifo-module-paramete.patch
+torture-make-torture_hrtimeout_-use-task_idle.patch
+torture-move-stutter_wait-timeouts-to-hrtimers.patch
+torture-make-torture_hrtimeout_ns-take-an-hrtimer-mo.patch
+rcutorture-fix-stuttering-races-and-other-issues.patch
+mm-hugetlb-prepare-hugetlb_follow_page_mask-for-foll.patch
+mm-hugetlb-use-nth_page-in-place-of-direct-struct-pa.patch
+cxl-region-fix-x1-root-decoder-granularity-calculati.patch
+cxl-port-fix-delete_endpoint-vs-parent-unregistratio.patch
--- /dev/null
+From fd0e51b6a7d080f459e997e927b62c0e4270454a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jul 2023 15:50:07 -0700
+Subject: torture: Add a kthread-creation callback to _torture_create_kthread()
+
+From: Paul E. McKenney <paulmck@kernel.org>
+
+[ Upstream commit 67d5404d274376890d6d095a10e6565854918f8e ]
+
+This commit adds a kthread-creation callback to the
+_torture_create_kthread() function, which allows callers of a new
+torture_create_kthread_cb() macro to specify a function to be invoked
+after the kthread is created but before it is awakened for the first time.
+
+Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
+Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
+Cc: Josh Triplett <josh@joshtriplett.org>
+Cc: Juri Lelli <juri.lelli@redhat.com>
+Cc: Valentin Schneider <vschneid@redhat.com>
+Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
+Cc: kernel-team@android.com
+Reviewed-by: Joel Fernandes (Google) <joel@joelfernandes.org>
+Acked-by: John Stultz <jstultz@google.com>
+Stable-dep-of: cca42bd8eb1b ("rcutorture: Fix stuttering races and other issues")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/torture.h | 7 +++++--
+ kernel/torture.c | 6 +++++-
+ 2 files changed, 10 insertions(+), 3 deletions(-)
+
+diff --git a/include/linux/torture.h b/include/linux/torture.h
+index 7038104463e48..bb466eec01e42 100644
+--- a/include/linux/torture.h
++++ b/include/linux/torture.h
+@@ -108,12 +108,15 @@ bool torture_must_stop(void);
+ bool torture_must_stop_irq(void);
+ void torture_kthread_stopping(char *title);
+ int _torture_create_kthread(int (*fn)(void *arg), void *arg, char *s, char *m,
+- char *f, struct task_struct **tp);
++ char *f, struct task_struct **tp, void (*cbf)(struct task_struct *tp));
+ void _torture_stop_kthread(char *m, struct task_struct **tp);
+
+ #define torture_create_kthread(n, arg, tp) \
+ _torture_create_kthread(n, (arg), #n, "Creating " #n " task", \
+- "Failed to create " #n, &(tp))
++ "Failed to create " #n, &(tp), NULL)
++#define torture_create_kthread_cb(n, arg, tp, cbf) \
++ _torture_create_kthread(n, (arg), #n, "Creating " #n " task", \
++ "Failed to create " #n, &(tp), cbf)
+ #define torture_stop_kthread(n, tp) \
+ _torture_stop_kthread("Stopping " #n " task", &(tp))
+
+diff --git a/kernel/torture.c b/kernel/torture.c
+index 1a0519b836ac9..1da48f3816f61 100644
+--- a/kernel/torture.c
++++ b/kernel/torture.c
+@@ -926,7 +926,7 @@ EXPORT_SYMBOL_GPL(torture_kthread_stopping);
+ * it starts, you will need to open-code your own.
+ */
+ int _torture_create_kthread(int (*fn)(void *arg), void *arg, char *s, char *m,
+- char *f, struct task_struct **tp)
++ char *f, struct task_struct **tp, void (*cbf)(struct task_struct *tp))
+ {
+ int ret = 0;
+
+@@ -938,6 +938,10 @@ int _torture_create_kthread(int (*fn)(void *arg), void *arg, char *s, char *m,
+ *tp = NULL;
+ return ret;
+ }
++
++ if (cbf)
++ cbf(*tp);
++
+ wake_up_process(*tp); // Process is sleeping, so ordering provided.
+ torture_shuffle_task_register(*tp);
+ return ret;
+--
+2.42.0
+
--- /dev/null
+From 6afb14077e9fc959e7a1f4867fe9dab751641a26 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 2 Jun 2023 22:02:10 +0000
+Subject: torture: Add lock_torture writer_fifo module parameter
+
+From: Dietmar Eggemann <dietmar.eggemann@arm.com>
+
+[ Upstream commit 5d248bb39fe1388943acb6510f8f48fa5570e0ec ]
+
+This commit adds a module parameter that causes the locktorture writer
+to run at real-time priority.
+
+To use it:
+insmod /lib/modules/torture.ko random_shuffle=1
+insmod /lib/modules/locktorture.ko torture_type=mutex_lock rt_boost=1 rt_boost_factor=50 nested_locks=3 writer_fifo=1
+ ^^^^^^^^^^^^^
+
+A predecessor to this patch has been helpful to uncover issues with the
+proxy-execution series.
+
+[ paulmck: Remove locktorture-specific code from kernel/torture.c. ]
+
+Cc: "Paul E. McKenney" <paulmck@kernel.org>
+Cc: Josh Triplett <josh@joshtriplett.org>
+Cc: Joel Fernandes <joel@joelfernandes.org>
+Cc: Juri Lelli <juri.lelli@redhat.com>
+Cc: Valentin Schneider <vschneid@redhat.com>
+Cc: kernel-team@android.com
+Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
+[jstultz: Include header change to build, reword commit message]
+Signed-off-by: John Stultz <jstultz@google.com>
+Acked-by: Davidlohr Bueso <dave@stgolabs.net>
+Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
+Stable-dep-of: cca42bd8eb1b ("rcutorture: Fix stuttering races and other issues")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ Documentation/admin-guide/kernel-parameters.txt | 4 ++++
+ kernel/locking/locktorture.c | 12 +++++++-----
+ kernel/torture.c | 3 ++-
+ 3 files changed, 13 insertions(+), 6 deletions(-)
+
+diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
+index b951b4c2808f1..5711129686d10 100644
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -2938,6 +2938,10 @@
+ locktorture.torture_type= [KNL]
+ Specify the locking implementation to test.
+
++ locktorture.writer_fifo= [KNL]
++ Run the write-side locktorture kthreads at
++ sched_set_fifo() real-time priority.
++
+ locktorture.verbose= [KNL]
+ Enable additional printk() statements.
+
+diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
+index 949d3deae5062..270c7f80ce84c 100644
+--- a/kernel/locking/locktorture.c
++++ b/kernel/locking/locktorture.c
+@@ -45,6 +45,7 @@ torture_param(int, stutter, 5, "Number of jiffies to run/halt test, 0=disable");
+ torture_param(int, rt_boost, 2,
+ "Do periodic rt-boost. 0=Disable, 1=Only for rt_mutex, 2=For all lock types.");
+ torture_param(int, rt_boost_factor, 50, "A factor determining how often rt-boost happens.");
++torture_param(int, writer_fifo, 0, "Run writers at sched_set_fifo() priority");
+ torture_param(int, verbose, 1, "Enable verbose debugging printk()s");
+ torture_param(int, nested_locks, 0, "Number of nested locks (max = 8)");
+ /* Going much higher trips "BUG: MAX_LOCKDEP_CHAIN_HLOCKS too low!" errors */
+@@ -809,7 +810,8 @@ static int lock_torture_writer(void *arg)
+ bool skip_main_lock;
+
+ VERBOSE_TOROUT_STRING("lock_torture_writer task started");
+- set_user_nice(current, MAX_NICE);
++ if (!rt_task(current))
++ set_user_nice(current, MAX_NICE);
+
+ do {
+ if ((torture_random(&rand) & 0xfffff) == 0)
+@@ -1015,8 +1017,7 @@ static void lock_torture_cleanup(void)
+
+ if (writer_tasks) {
+ for (i = 0; i < cxt.nrealwriters_stress; i++)
+- torture_stop_kthread(lock_torture_writer,
+- writer_tasks[i]);
++ torture_stop_kthread(lock_torture_writer, writer_tasks[i]);
+ kfree(writer_tasks);
+ writer_tasks = NULL;
+ }
+@@ -1244,8 +1245,9 @@ static int __init lock_torture_init(void)
+ goto create_reader;
+
+ /* Create writer. */
+- firsterr = torture_create_kthread(lock_torture_writer, &cxt.lwsa[i],
+- writer_tasks[i]);
++ firsterr = torture_create_kthread_cb(lock_torture_writer, &cxt.lwsa[i],
++ writer_tasks[i],
++ writer_fifo ? sched_set_fifo : NULL);
+ if (torture_init_error(firsterr))
+ goto unwind;
+
+diff --git a/kernel/torture.c b/kernel/torture.c
+index 1da48f3816f61..e06b03e987c9f 100644
+--- a/kernel/torture.c
++++ b/kernel/torture.c
+@@ -37,6 +37,7 @@
+ #include <linux/ktime.h>
+ #include <asm/byteorder.h>
+ #include <linux/torture.h>
++#include <linux/sched/rt.h>
+ #include "rcu/rcu.h"
+
+ MODULE_LICENSE("GPL");
+@@ -728,7 +729,7 @@ bool stutter_wait(const char *title)
+ cond_resched_tasks_rcu_qs();
+ spt = READ_ONCE(stutter_pause_test);
+ for (; spt; spt = READ_ONCE(stutter_pause_test)) {
+- if (!ret) {
++ if (!ret && !rt_task(current)) {
+ sched_set_normal(current, MAX_NICE);
+ ret = true;
+ }
+--
+2.42.0
+
--- /dev/null
+From 2ce7e761eca4d39102721b8f4435114fad029195 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 26 Jun 2023 14:59:29 -0700
+Subject: torture: Make torture_hrtimeout_*() use TASK_IDLE
+
+From: Paul E. McKenney <paulmck@kernel.org>
+
+[ Upstream commit 872948c665f50a1446e8a34b1ed57bb0b3a9ca4a ]
+
+Given that it is expected that more code will use torture_hrtimeout_*(),
+including for longer timeouts, make it use TASK_IDLE instead of
+TASK_UNINTERRUPTIBLE.
+
+Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
+Stable-dep-of: cca42bd8eb1b ("rcutorture: Fix stuttering races and other issues")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/torture.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/torture.c b/kernel/torture.c
+index e06b03e987c9f..4a2e0512f9197 100644
+--- a/kernel/torture.c
++++ b/kernel/torture.c
+@@ -90,7 +90,7 @@ int torture_hrtimeout_ns(ktime_t baset_ns, u32 fuzzt_ns, struct torture_random_s
+
+ if (trsp)
+ hto += (torture_random(trsp) >> 3) % fuzzt_ns;
+- set_current_state(TASK_UNINTERRUPTIBLE);
++ set_current_state(TASK_IDLE);
+ return schedule_hrtimeout(&hto, HRTIMER_MODE_REL);
+ }
+ EXPORT_SYMBOL_GPL(torture_hrtimeout_ns);
+--
+2.42.0
+
--- /dev/null
+From dfbc76c5a47a1bf96bce671f06cb16e7cc809fd4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 26 Jul 2023 13:57:03 -0700
+Subject: torture: Make torture_hrtimeout_ns() take an hrtimer mode parameter
+
+From: Paul E. McKenney <paulmck@kernel.org>
+
+[ Upstream commit a741deac787f0d2d7068638c067db20af9e63752 ]
+
+The current torture-test sleeps are waiting for a duration, but there
+are situations where it is better to wait for an absolute time, for
+example, when ending a stutter interval. This commit therefore adds
+an hrtimer mode parameter to torture_hrtimeout_ns(). Why not also the
+other torture_hrtimeout_*() functions? The theory is that most absolute
+times will be in nanoseconds, especially not (say) jiffies.
+
+Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
+Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
+Stable-dep-of: cca42bd8eb1b ("rcutorture: Fix stuttering races and other issues")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/torture.h | 3 ++-
+ kernel/torture.c | 13 +++++++------
+ 2 files changed, 9 insertions(+), 7 deletions(-)
+
+diff --git a/include/linux/torture.h b/include/linux/torture.h
+index bb466eec01e42..017f0f710815a 100644
+--- a/include/linux/torture.h
++++ b/include/linux/torture.h
+@@ -81,7 +81,8 @@ static inline void torture_random_init(struct torture_random_state *trsp)
+ }
+
+ /* Definitions for high-resolution-timer sleeps. */
+-int torture_hrtimeout_ns(ktime_t baset_ns, u32 fuzzt_ns, struct torture_random_state *trsp);
++int torture_hrtimeout_ns(ktime_t baset_ns, u32 fuzzt_ns, const enum hrtimer_mode mode,
++ struct torture_random_state *trsp);
+ int torture_hrtimeout_us(u32 baset_us, u32 fuzzt_ns, struct torture_random_state *trsp);
+ int torture_hrtimeout_ms(u32 baset_ms, u32 fuzzt_us, struct torture_random_state *trsp);
+ int torture_hrtimeout_jiffies(u32 baset_j, struct torture_random_state *trsp);
+diff --git a/kernel/torture.c b/kernel/torture.c
+index a55cb70b192fc..fd2168058dac8 100644
+--- a/kernel/torture.c
++++ b/kernel/torture.c
+@@ -84,14 +84,15 @@ EXPORT_SYMBOL_GPL(verbose_torout_sleep);
+ * nanosecond random fuzz. This function and its friends desynchronize
+ * testing from the timer wheel.
+ */
+-int torture_hrtimeout_ns(ktime_t baset_ns, u32 fuzzt_ns, struct torture_random_state *trsp)
++int torture_hrtimeout_ns(ktime_t baset_ns, u32 fuzzt_ns, const enum hrtimer_mode mode,
++ struct torture_random_state *trsp)
+ {
+ ktime_t hto = baset_ns;
+
+ if (trsp)
+ hto += (torture_random(trsp) >> 3) % fuzzt_ns;
+ set_current_state(TASK_IDLE);
+- return schedule_hrtimeout(&hto, HRTIMER_MODE_REL);
++ return schedule_hrtimeout(&hto, mode);
+ }
+ EXPORT_SYMBOL_GPL(torture_hrtimeout_ns);
+
+@@ -103,7 +104,7 @@ int torture_hrtimeout_us(u32 baset_us, u32 fuzzt_ns, struct torture_random_state
+ {
+ ktime_t baset_ns = baset_us * NSEC_PER_USEC;
+
+- return torture_hrtimeout_ns(baset_ns, fuzzt_ns, trsp);
++ return torture_hrtimeout_ns(baset_ns, fuzzt_ns, HRTIMER_MODE_REL, trsp);
+ }
+ EXPORT_SYMBOL_GPL(torture_hrtimeout_us);
+
+@@ -120,7 +121,7 @@ int torture_hrtimeout_ms(u32 baset_ms, u32 fuzzt_us, struct torture_random_state
+ fuzzt_ns = (u32)~0U;
+ else
+ fuzzt_ns = fuzzt_us * NSEC_PER_USEC;
+- return torture_hrtimeout_ns(baset_ns, fuzzt_ns, trsp);
++ return torture_hrtimeout_ns(baset_ns, fuzzt_ns, HRTIMER_MODE_REL, trsp);
+ }
+ EXPORT_SYMBOL_GPL(torture_hrtimeout_ms);
+
+@@ -133,7 +134,7 @@ int torture_hrtimeout_jiffies(u32 baset_j, struct torture_random_state *trsp)
+ {
+ ktime_t baset_ns = jiffies_to_nsecs(baset_j);
+
+- return torture_hrtimeout_ns(baset_ns, jiffies_to_nsecs(1), trsp);
++ return torture_hrtimeout_ns(baset_ns, jiffies_to_nsecs(1), HRTIMER_MODE_REL, trsp);
+ }
+ EXPORT_SYMBOL_GPL(torture_hrtimeout_jiffies);
+
+@@ -150,7 +151,7 @@ int torture_hrtimeout_s(u32 baset_s, u32 fuzzt_ms, struct torture_random_state *
+ fuzzt_ns = (u32)~0U;
+ else
+ fuzzt_ns = fuzzt_ms * NSEC_PER_MSEC;
+- return torture_hrtimeout_ns(baset_ns, fuzzt_ns, trsp);
++ return torture_hrtimeout_ns(baset_ns, fuzzt_ns, HRTIMER_MODE_REL, trsp);
+ }
+ EXPORT_SYMBOL_GPL(torture_hrtimeout_s);
+
+--
+2.42.0
+
--- /dev/null
+From 8caa919b345d9cdc091a659079c35eb1bbe0ea5a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 26 Jun 2023 16:52:35 -0700
+Subject: torture: Move stutter_wait() timeouts to hrtimers
+
+From: Paul E. McKenney <paulmck@kernel.org>
+
+[ Upstream commit 10af43671e8bf4ac153c4991a17cdf57bc6d2cfe ]
+
+In order to gain better race coverage, move the test start/stop
+waits in stutter_wait() to torture_hrtimeout_jiffies().
+
+Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
+Stable-dep-of: cca42bd8eb1b ("rcutorture: Fix stuttering races and other issues")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/torture.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/kernel/torture.c b/kernel/torture.c
+index 4a2e0512f9197..a55cb70b192fc 100644
+--- a/kernel/torture.c
++++ b/kernel/torture.c
+@@ -734,7 +734,7 @@ bool stutter_wait(const char *title)
+ ret = true;
+ }
+ if (spt == 1) {
+- schedule_timeout_interruptible(1);
++ torture_hrtimeout_jiffies(1, NULL);
+ } else if (spt == 2) {
+ while (READ_ONCE(stutter_pause_test)) {
+ if (!(i++ & 0xffff))
+@@ -742,7 +742,7 @@ bool stutter_wait(const char *title)
+ cond_resched();
+ }
+ } else {
+- schedule_timeout_interruptible(round_jiffies_relative(HZ));
++ torture_hrtimeout_jiffies(round_jiffies_relative(HZ), NULL);
+ }
+ torture_shutdown_absorb(title);
+ }
+--
+2.42.0
+