From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Tue, 8 Jun 2021 15:02:35 +0000 (+0200)
Subject: 5.4-stable patches
X-Git-Tag: v4.4.272~26
X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=df0a47efae995a7468c34dbc04f8202f0e58e566;p=thirdparty%2Fkernel%2Fstable-queue.git

5.4-stable patches

added patches:
	bnxt_en-remove-the-setting-of-dev_port.patch
	mm-add-thp_order.patch
	mm-filemap-fix-storing-to-a-thp-shadow-entry.patch
	xarray-add-xa_get_order.patch
	xarray-add-xas_split.patch
---

diff --git a/queue-5.4/bnxt_en-remove-the-setting-of-dev_port.patch b/queue-5.4/bnxt_en-remove-the-setting-of-dev_port.patch
new file mode 100644
index 00000000000..4786a72196a
--- /dev/null
+++ b/queue-5.4/bnxt_en-remove-the-setting-of-dev_port.patch
@@ -0,0 +1,32 @@
+From 1d86859fdf31a0d50cc82b5d0d6bfb5fe98f6c00 Mon Sep 17 00:00:00 2001
+From: Michael Chan <michael.chan@broadcom.com>
+Date: Mon, 27 Jan 2020 04:56:15 -0500
+Subject: bnxt_en: Remove the setting of dev_port.
+
+From: Michael Chan <michael.chan@broadcom.com>
+
+commit 1d86859fdf31a0d50cc82b5d0d6bfb5fe98f6c00 upstream.
+
+The dev_port is meant to distinguish the network ports belonging to
+the same PCI function.  Our devices only have one network port
+associated with each PCI function and so we should not set it for
+correctness.
+
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -7003,7 +7003,6 @@ static int __bnxt_hwrm_func_qcaps(struct
+ 
+ 		pf->fw_fid = le16_to_cpu(resp->fid);
+ 		pf->port_id = le16_to_cpu(resp->port_id);
+-		bp->dev->dev_port = pf->port_id;
+ 		memcpy(pf->mac_addr, resp->mac_address, ETH_ALEN);
+ 		pf->first_vf_id = le16_to_cpu(resp->first_vf_id);
+ 		pf->max_vfs = le16_to_cpu(resp->max_vfs);
diff --git a/queue-5.4/mm-add-thp_order.patch b/queue-5.4/mm-add-thp_order.patch
new file mode 100644
index 00000000000..0111bd6d229
--- /dev/null
+++ b/queue-5.4/mm-add-thp_order.patch
@@ -0,0 +1,64 @@
+From foo@baz Tue Jun  8 04:58:00 PM CEST 2021
+From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
+Date: Mon,  7 Jun 2021 21:08:42 +0100
+Subject: mm: add thp_order
+To: stable@vger.kernel.org
+Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>, Andrew Morton <akpm@linux-foundation.org>, William Kucharski <william.kucharski@oracle.com>, Zi Yan <ziy@nvidia.com>, David Hildenbrand <david@redhat.com>, Mike Kravetz <mike.kravetz@oracle.com>, "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>, Linus Torvalds <torvalds@linux-foundation.org>
+Message-ID: <20210607200845.3860579-2-willy@infradead.org>
+
+From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
+
+commit 6ffbb45826f5d9ae09aa60cd88594b7816c96190 upstream
+
+This function returns the order of a transparent huge page.  It compiles
+to 0 if CONFIG_TRANSPARENT_HUGEPAGE is disabled.
+
+Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Reviewed-by: William Kucharski <william.kucharski@oracle.com>
+Reviewed-by: Zi Yan <ziy@nvidia.com>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Mike Kravetz <mike.kravetz@oracle.com>
+Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Link: http://lkml.kernel.org/r/20200629151959.15779-4-willy@infradead.org
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/huge_mm.h |   19 +++++++++++++++++++
+ 1 file changed, 19 insertions(+)
+
+--- a/include/linux/huge_mm.h
++++ b/include/linux/huge_mm.h
+@@ -231,6 +231,19 @@ static inline spinlock_t *pud_trans_huge
+ 	else
+ 		return NULL;
+ }
++
++/**
++ * thp_order - Order of a transparent huge page.
++ * @page: Head page of a transparent huge page.
++ */
++static inline unsigned int thp_order(struct page *page)
++{
++	VM_BUG_ON_PGFLAGS(PageTail(page), page);
++	if (PageHead(page))
++		return HPAGE_PMD_ORDER;
++	return 0;
++}
++
+ static inline int hpage_nr_pages(struct page *page)
+ {
+ 	if (unlikely(PageTransHuge(page)))
+@@ -290,6 +303,12 @@ static inline struct list_head *page_def
+ #define HPAGE_PUD_MASK ({ BUILD_BUG(); 0; })
+ #define HPAGE_PUD_SIZE ({ BUILD_BUG(); 0; })
+ 
++static inline unsigned int thp_order(struct page *page)
++{
++	VM_BUG_ON_PGFLAGS(PageTail(page), page);
++	return 0;
++}
++
+ #define hpage_nr_pages(x) 1
+ 
+ static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma)
diff --git a/queue-5.4/mm-filemap-fix-storing-to-a-thp-shadow-entry.patch b/queue-5.4/mm-filemap-fix-storing-to-a-thp-shadow-entry.patch
new file mode 100644
index 00000000000..10ddaaeb714
--- /dev/null
+++ b/queue-5.4/mm-filemap-fix-storing-to-a-thp-shadow-entry.patch
@@ -0,0 +1,110 @@
+From foo@baz Tue Jun  8 04:58:00 PM CEST 2021
+From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
+Date: Mon,  7 Jun 2021 21:08:45 +0100
+Subject: mm/filemap: fix storing to a THP shadow entry
+To: stable@vger.kernel.org
+Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>, Andrew Morton <akpm@linux-foundation.org>, Song Liu <songliubraving@fb.com>, "Kirill A . Shutemov" <kirill@shutemov.name>, Qian Cai <cai@lca.pw>, Linus Torvalds <torvalds@linux-foundation.org>
+Message-ID: <20210607200845.3860579-5-willy@infradead.org>
+
+From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
+
+commit 198b62f83eef1d605d70eca32759c92cdcc14175 upstream
+
+When a THP is removed from the page cache by reclaim, we replace it with a
+shadow entry that occupies all slots of the XArray previously occupied by
+the THP.  If the user then accesses that page again, we only allocate a
+single page, but storing it into the shadow entry replaces all entries
+with that one page.  That leads to bugs like
+
+page dumped because: VM_BUG_ON_PAGE(page_to_pgoff(page) != offset)
+------------[ cut here ]------------
+kernel BUG at mm/filemap.c:2529!
+
+https://bugzilla.kernel.org/show_bug.cgi?id=206569
+
+This is hard to reproduce with mainline, but happens regularly with the
+THP patchset (as so many more THPs are created).  This solution is take
+from the THP patchset.  It splits the shadow entry into order-0 pieces at
+the time that we bring a new page into cache.
+
+Fixes: 99cb0dbd47a1 ("mm,thp: add read-only THP support for (non-shmem) FS")
+Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Cc: Song Liu <songliubraving@fb.com>
+Cc: "Kirill A . Shutemov" <kirill@shutemov.name>
+Cc: Qian Cai <cai@lca.pw>
+Link: https://lkml.kernel.org/r/20200903183029.14930-4-willy@infradead.org
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/filemap.c |   37 ++++++++++++++++++++++++++++---------
+ 1 file changed, 28 insertions(+), 9 deletions(-)
+
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -856,7 +856,6 @@ noinline int __add_to_page_cache_locked(
+ 	int huge = PageHuge(page);
+ 	struct mem_cgroup *memcg;
+ 	int error;
+-	void *old;
+ 
+ 	VM_BUG_ON_PAGE(!PageLocked(page), page);
+ 	VM_BUG_ON_PAGE(PageSwapBacked(page), page);
+@@ -872,21 +871,41 @@ noinline int __add_to_page_cache_locked(
+ 	get_page(page);
+ 	page->mapping = mapping;
+ 	page->index = offset;
++	gfp_mask &= GFP_RECLAIM_MASK;
+ 
+ 	do {
++		unsigned int order = xa_get_order(xas.xa, xas.xa_index);
++		void *entry, *old = NULL;
++
++		if (order > thp_order(page))
++			xas_split_alloc(&xas, xa_load(xas.xa, xas.xa_index),
++					order, gfp_mask);
+ 		xas_lock_irq(&xas);
+-		old = xas_load(&xas);
+-		if (old && !xa_is_value(old))
+-			xas_set_err(&xas, -EEXIST);
++		xas_for_each_conflict(&xas, entry) {
++			old = entry;
++			if (!xa_is_value(entry)) {
++				xas_set_err(&xas, -EEXIST);
++				goto unlock;
++			}
++		}
++
++		if (old) {
++			if (shadowp)
++				*shadowp = old;
++			/* entry may have been split before we acquired lock */
++			order = xa_get_order(xas.xa, xas.xa_index);
++			if (order > thp_order(page)) {
++				xas_split(&xas, old, order);
++				xas_reset(&xas);
++			}
++		}
++
+ 		xas_store(&xas, page);
+ 		if (xas_error(&xas))
+ 			goto unlock;
+ 
+-		if (xa_is_value(old)) {
++		if (old)
+ 			mapping->nrexceptional--;
+-			if (shadowp)
+-				*shadowp = old;
+-		}
+ 		mapping->nrpages++;
+ 
+ 		/* hugetlb pages do not participate in page cache accounting */
+@@ -894,7 +913,7 @@ noinline int __add_to_page_cache_locked(
+ 			__inc_node_page_state(page, NR_FILE_PAGES);
+ unlock:
+ 		xas_unlock_irq(&xas);
+-	} while (xas_nomem(&xas, gfp_mask & GFP_RECLAIM_MASK));
++	} while (xas_nomem(&xas, gfp_mask));
+ 
+ 	if (xas_error(&xas))
+ 		goto error;
diff --git a/queue-5.4/series b/queue-5.4/series
index 3aaf7a9915f..f33fe0983dc 100644
--- a/queue-5.4/series
+++ b/queue-5.4/series
@@ -61,3 +61,8 @@ btrfs-fix-error-handling-in-btrfs_del_csums.patch
 btrfs-return-errors-from-btrfs_del_csums-in-cleanup_ref_head.patch
 btrfs-fixup-error-handling-in-fixup_inode_link_counts.patch
 mm-hugetlb-fix-simple-resv_huge_pages-underflow-on-u.patch
+bnxt_en-remove-the-setting-of-dev_port.patch
+mm-add-thp_order.patch
+xarray-add-xa_get_order.patch
+xarray-add-xas_split.patch
+mm-filemap-fix-storing-to-a-thp-shadow-entry.patch
diff --git a/queue-5.4/xarray-add-xa_get_order.patch b/queue-5.4/xarray-add-xa_get_order.patch
new file mode 100644
index 00000000000..fcb5e3ab22d
--- /dev/null
+++ b/queue-5.4/xarray-add-xa_get_order.patch
@@ -0,0 +1,154 @@
+From foo@baz Tue Jun  8 04:58:00 PM CEST 2021
+From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
+Date: Mon,  7 Jun 2021 21:08:43 +0100
+Subject: XArray: add xa_get_order
+To: stable@vger.kernel.org
+Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>, Andrew Morton <akpm@linux-foundation.org>, "Kirill A . Shutemov" <kirill@shutemov.name>, Qian Cai <cai@lca.pw>, Song Liu <songliubraving@fb.com>, Linus Torvalds <torvalds@linux-foundation.org>
+Message-ID: <20210607200845.3860579-3-willy@infradead.org>
+
+From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
+
+commit 57417cebc96b57122a2207fc84a6077d20c84b4b upstream
+
+Patch series "Fix read-only THP for non-tmpfs filesystems".
+
+As described more verbosely in the [3/3] changelog, we can inadvertently
+put an order-0 page in the page cache which occupies 512 consecutive
+entries.  Users are running into this if they enable the
+READ_ONLY_THP_FOR_FS config option; see
+https://bugzilla.kernel.org/show_bug.cgi?id=206569 and Qian Cai has also
+reported it here:
+https://lore.kernel.org/lkml/20200616013309.GB815@lca.pw/
+
+This is a rather intrusive way of fixing the problem, but has the
+advantage that I've actually been testing it with the THP patches, which
+means that it sees far more use than it does upstream -- indeed, Song has
+been entirely unable to reproduce it.  It also has the advantage that it
+removes a few patches from my gargantuan backlog of THP patches.
+
+This patch (of 3):
+
+This function returns the order of the entry at the index.  We need this
+because there isn't space in the shadow entry to encode its order.
+
+[akpm@linux-foundation.org: export xa_get_order to modules]
+
+Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Cc: "Kirill A . Shutemov" <kirill@shutemov.name>
+Cc: Qian Cai <cai@lca.pw>
+Cc: Song Liu <songliubraving@fb.com>
+Link: https://lkml.kernel.org/r/20200903183029.14930-1-willy@infradead.org
+Link: https://lkml.kernel.org/r/20200903183029.14930-2-willy@infradead.org
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/xarray.h |    9 +++++++++
+ lib/test_xarray.c      |   21 +++++++++++++++++++++
+ lib/xarray.c           |   40 ++++++++++++++++++++++++++++++++++++++++
+ 3 files changed, 70 insertions(+)
+
+--- a/include/linux/xarray.h
++++ b/include/linux/xarray.h
+@@ -1470,6 +1470,15 @@ void xas_pause(struct xa_state *);
+ 
+ void xas_create_range(struct xa_state *);
+ 
++#ifdef CONFIG_XARRAY_MULTI
++int xa_get_order(struct xarray *, unsigned long index);
++#else
++static inline int xa_get_order(struct xarray *xa, unsigned long index)
++{
++	return 0;
++}
++#endif
++
+ /**
+  * xas_reload() - Refetch an entry from the xarray.
+  * @xas: XArray operation state.
+--- a/lib/test_xarray.c
++++ b/lib/test_xarray.c
+@@ -1649,6 +1649,26 @@ static noinline void check_account(struc
+ #endif
+ }
+ 
++static noinline void check_get_order(struct xarray *xa)
++{
++	unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 20 : 1;
++	unsigned int order;
++	unsigned long i, j;
++
++	for (i = 0; i < 3; i++)
++		XA_BUG_ON(xa, xa_get_order(xa, i) != 0);
++
++	for (order = 0; order < max_order; order++) {
++		for (i = 0; i < 10; i++) {
++			xa_store_order(xa, i << order, order,
++					xa_mk_index(i << order), GFP_KERNEL);
++			for (j = i << order; j < (i + 1) << order; j++)
++				XA_BUG_ON(xa, xa_get_order(xa, j) != order);
++			xa_erase(xa, i << order);
++		}
++	}
++}
++
+ static noinline void check_destroy(struct xarray *xa)
+ {
+ 	unsigned long index;
+@@ -1697,6 +1717,7 @@ static int xarray_checks(void)
+ 	check_reserve(&array);
+ 	check_reserve(&xa0);
+ 	check_multi_store(&array);
++	check_get_order(&array);
+ 	check_xa_alloc();
+ 	check_find(&array);
+ 	check_find_entry(&array);
+--- a/lib/xarray.c
++++ b/lib/xarray.c
+@@ -1592,6 +1592,46 @@ unlock:
+ 	return xas_result(&xas, NULL);
+ }
+ EXPORT_SYMBOL(xa_store_range);
++
++/**
++ * xa_get_order() - Get the order of an entry.
++ * @xa: XArray.
++ * @index: Index of the entry.
++ *
++ * Return: A number between 0 and 63 indicating the order of the entry.
++ */
++int xa_get_order(struct xarray *xa, unsigned long index)
++{
++	XA_STATE(xas, xa, index);
++	void *entry;
++	int order = 0;
++
++	rcu_read_lock();
++	entry = xas_load(&xas);
++
++	if (!entry)
++		goto unlock;
++
++	if (!xas.xa_node)
++		goto unlock;
++
++	for (;;) {
++		unsigned int slot = xas.xa_offset + (1 << order);
++
++		if (slot >= XA_CHUNK_SIZE)
++			break;
++		if (!xa_is_sibling(xas.xa_node->slots[slot]))
++			break;
++		order++;
++	}
++
++	order += xas.xa_node->shift;
++unlock:
++	rcu_read_unlock();
++
++	return order;
++}
++EXPORT_SYMBOL(xa_get_order);
+ #endif /* CONFIG_XARRAY_MULTI */
+ 
+ /**
diff --git a/queue-5.4/xarray-add-xas_split.patch b/queue-5.4/xarray-add-xas_split.patch
new file mode 100644
index 00000000000..977d08801cb
--- /dev/null
+++ b/queue-5.4/xarray-add-xas_split.patch
@@ -0,0 +1,371 @@
+From foo@baz Tue Jun  8 04:58:00 PM CEST 2021
+From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
+Date: Mon,  7 Jun 2021 21:08:44 +0100
+Subject: XArray: add xas_split
+To: stable@vger.kernel.org
+Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>, Andrew Morton <akpm@linux-foundation.org>, "Kirill A . Shutemov" <kirill@shutemov.name>, Qian Cai <cai@lca.pw>, Song Liu <songliubraving@fb.com>, Linus Torvalds <torvalds@linux-foundation.org>
+Message-ID: <20210607200845.3860579-4-willy@infradead.org>
+
+From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
+
+commit 8fc75643c5e14574c8be59b69182452ece28315a upstream
+
+In order to use multi-index entries for huge pages in the page cache, we
+need to be able to split a multi-index entry (eg if a file is truncated in
+the middle of a huge page entry).  This version does not support splitting
+more than one level of the tree at a time.  This is an acceptable
+limitation for the page cache as we do not expect to support order-12
+pages in the near future.
+
+[akpm@linux-foundation.org: export xas_split_alloc() to modules]
+[willy@infradead.org: fix xarray split]
+  Link: https://lkml.kernel.org/r/20200910175450.GV6583@casper.infradead.org
+[willy@infradead.org: fix xarray]
+  Link: https://lkml.kernel.org/r/20201001233943.GW20115@casper.infradead.org
+
+Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Cc: "Kirill A . Shutemov" <kirill@shutemov.name>
+Cc: Qian Cai <cai@lca.pw>
+Cc: Song Liu <songliubraving@fb.com>
+Link: https://lkml.kernel.org/r/20200903183029.14930-3-willy@infradead.org
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/core-api/xarray.rst |   14 +--
+ include/linux/xarray.h            |   13 ++
+ lib/test_xarray.c                 |   44 +++++++++
+ lib/xarray.c                      |  168 +++++++++++++++++++++++++++++++++++---
+ 4 files changed, 224 insertions(+), 15 deletions(-)
+
+--- a/Documentation/core-api/xarray.rst
++++ b/Documentation/core-api/xarray.rst
+@@ -461,13 +461,15 @@ or iterations will move the index to the
+ Each entry will only be returned once, no matter how many indices it
+ occupies.
+ 
+-Using xas_next() or xas_prev() with a multi-index xa_state
+-is not supported.  Using either of these functions on a multi-index entry
+-will reveal sibling entries; these should be skipped over by the caller.
++Using xas_next() or xas_prev() with a multi-index xa_state is not
++supported.  Using either of these functions on a multi-index entry will
++reveal sibling entries; these should be skipped over by the caller.
+ 
+-Storing ``NULL`` into any index of a multi-index entry will set the entry
+-at every index to ``NULL`` and dissolve the tie.  Splitting a multi-index
+-entry into entries occupying smaller ranges is not yet supported.
++Storing ``NULL`` into any index of a multi-index entry will set the
++entry at every index to ``NULL`` and dissolve the tie.  A multi-index
++entry can be split into entries occupying smaller ranges by calling
++xas_split_alloc() without the xa_lock held, followed by taking the lock
++and calling xas_split().
+ 
+ Functions and structures
+ ========================
+--- a/include/linux/xarray.h
++++ b/include/linux/xarray.h
+@@ -1472,11 +1472,24 @@ void xas_create_range(struct xa_state *)
+ 
+ #ifdef CONFIG_XARRAY_MULTI
+ int xa_get_order(struct xarray *, unsigned long index);
++void xas_split(struct xa_state *, void *entry, unsigned int order);
++void xas_split_alloc(struct xa_state *, void *entry, unsigned int order, gfp_t);
+ #else
+ static inline int xa_get_order(struct xarray *xa, unsigned long index)
+ {
+ 	return 0;
+ }
++
++static inline void xas_split(struct xa_state *xas, void *entry,
++		unsigned int order)
++{
++	xas_store(xas, entry);
++}
++
++static inline void xas_split_alloc(struct xa_state *xas, void *entry,
++		unsigned int order, gfp_t gfp)
++{
++}
+ #endif
+ 
+ /**
+--- a/lib/test_xarray.c
++++ b/lib/test_xarray.c
+@@ -1503,6 +1503,49 @@ static noinline void check_store_range(s
+ 	}
+ }
+ 
++#ifdef CONFIG_XARRAY_MULTI
++static void check_split_1(struct xarray *xa, unsigned long index,
++							unsigned int order)
++{
++	XA_STATE(xas, xa, index);
++	void *entry;
++	unsigned int i = 0;
++
++	xa_store_order(xa, index, order, xa, GFP_KERNEL);
++
++	xas_split_alloc(&xas, xa, order, GFP_KERNEL);
++	xas_lock(&xas);
++	xas_split(&xas, xa, order);
++	xas_unlock(&xas);
++
++	xa_for_each(xa, index, entry) {
++		XA_BUG_ON(xa, entry != xa);
++		i++;
++	}
++	XA_BUG_ON(xa, i != 1 << order);
++
++	xa_set_mark(xa, index, XA_MARK_0);
++	XA_BUG_ON(xa, !xa_get_mark(xa, index, XA_MARK_0));
++
++	xa_destroy(xa);
++}
++
++static noinline void check_split(struct xarray *xa)
++{
++	unsigned int order;
++
++	XA_BUG_ON(xa, !xa_empty(xa));
++
++	for (order = 1; order < 2 * XA_CHUNK_SHIFT; order++) {
++		check_split_1(xa, 0, order);
++		check_split_1(xa, 1UL << order, order);
++		check_split_1(xa, 3UL << order, order);
++	}
++}
++#else
++static void check_split(struct xarray *xa) { }
++#endif
++
+ static void check_align_1(struct xarray *xa, char *name)
+ {
+ 	int i;
+@@ -1729,6 +1772,7 @@ static int xarray_checks(void)
+ 	check_store_range(&array);
+ 	check_store_iter(&array);
+ 	check_align(&xa0);
++	check_split(&array);
+ 
+ 	check_workingset(&array, 0);
+ 	check_workingset(&array, 64);
+--- a/lib/xarray.c
++++ b/lib/xarray.c
+@@ -266,13 +266,14 @@ static void xa_node_free(struct xa_node
+  */
+ static void xas_destroy(struct xa_state *xas)
+ {
+-	struct xa_node *node = xas->xa_alloc;
++	struct xa_node *next, *node = xas->xa_alloc;
+ 
+-	if (!node)
+-		return;
+-	XA_NODE_BUG_ON(node, !list_empty(&node->private_list));
+-	kmem_cache_free(radix_tree_node_cachep, node);
+-	xas->xa_alloc = NULL;
++	while (node) {
++		XA_NODE_BUG_ON(node, !list_empty(&node->private_list));
++		next = rcu_dereference_raw(node->parent);
++		radix_tree_node_rcu_free(&node->rcu_head);
++		xas->xa_alloc = node = next;
++	}
+ }
+ 
+ /**
+@@ -304,6 +305,7 @@ bool xas_nomem(struct xa_state *xas, gfp
+ 	xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp);
+ 	if (!xas->xa_alloc)
+ 		return false;
++	xas->xa_alloc->parent = NULL;
+ 	XA_NODE_BUG_ON(xas->xa_alloc, !list_empty(&xas->xa_alloc->private_list));
+ 	xas->xa_node = XAS_RESTART;
+ 	return true;
+@@ -339,6 +341,7 @@ static bool __xas_nomem(struct xa_state
+ 	}
+ 	if (!xas->xa_alloc)
+ 		return false;
++	xas->xa_alloc->parent = NULL;
+ 	XA_NODE_BUG_ON(xas->xa_alloc, !list_empty(&xas->xa_alloc->private_list));
+ 	xas->xa_node = XAS_RESTART;
+ 	return true;
+@@ -403,7 +406,7 @@ static unsigned long xas_size(const stru
+ /*
+  * Use this to calculate the maximum index that will need to be created
+  * in order to add the entry described by @xas.  Because we cannot store a
+- * multiple-index entry at index 0, the calculation is a little more complex
++ * multi-index entry at index 0, the calculation is a little more complex
+  * than you might expect.
+  */
+ static unsigned long xas_max(struct xa_state *xas)
+@@ -946,6 +949,153 @@ void xas_init_marks(const struct xa_stat
+ }
+ EXPORT_SYMBOL_GPL(xas_init_marks);
+ 
++#ifdef CONFIG_XARRAY_MULTI
++static unsigned int node_get_marks(struct xa_node *node, unsigned int offset)
++{
++	unsigned int marks = 0;
++	xa_mark_t mark = XA_MARK_0;
++
++	for (;;) {
++		if (node_get_mark(node, offset, mark))
++			marks |= 1 << (__force unsigned int)mark;
++		if (mark == XA_MARK_MAX)
++			break;
++		mark_inc(mark);
++	}
++
++	return marks;
++}
++
++static void node_set_marks(struct xa_node *node, unsigned int offset,
++			struct xa_node *child, unsigned int marks)
++{
++	xa_mark_t mark = XA_MARK_0;
++
++	for (;;) {
++		if (marks & (1 << (__force unsigned int)mark)) {
++			node_set_mark(node, offset, mark);
++			if (child)
++				node_mark_all(child, mark);
++		}
++		if (mark == XA_MARK_MAX)
++			break;
++		mark_inc(mark);
++	}
++}
++
++/**
++ * xas_split_alloc() - Allocate memory for splitting an entry.
++ * @xas: XArray operation state.
++ * @entry: New entry which will be stored in the array.
++ * @order: New entry order.
++ * @gfp: Memory allocation flags.
++ *
++ * This function should be called before calling xas_split().
++ * If necessary, it will allocate new nodes (and fill them with @entry)
++ * to prepare for the upcoming split of an entry of @order size into
++ * entries of the order stored in the @xas.
++ *
++ * Context: May sleep if @gfp flags permit.
++ */
++void xas_split_alloc(struct xa_state *xas, void *entry, unsigned int order,
++		gfp_t gfp)
++{
++	unsigned int sibs = (1 << (order % XA_CHUNK_SHIFT)) - 1;
++	unsigned int mask = xas->xa_sibs;
++
++	/* XXX: no support for splitting really large entries yet */
++	if (WARN_ON(xas->xa_shift + 2 * XA_CHUNK_SHIFT < order))
++		goto nomem;
++	if (xas->xa_shift + XA_CHUNK_SHIFT > order)
++		return;
++
++	do {
++		unsigned int i;
++		void *sibling;
++		struct xa_node *node;
++
++		node = kmem_cache_alloc(radix_tree_node_cachep, gfp);
++		if (!node)
++			goto nomem;
++		node->array = xas->xa;
++		for (i = 0; i < XA_CHUNK_SIZE; i++) {
++			if ((i & mask) == 0) {
++				RCU_INIT_POINTER(node->slots[i], entry);
++				sibling = xa_mk_sibling(0);
++			} else {
++				RCU_INIT_POINTER(node->slots[i], sibling);
++			}
++		}
++		RCU_INIT_POINTER(node->parent, xas->xa_alloc);
++		xas->xa_alloc = node;
++	} while (sibs-- > 0);
++
++	return;
++nomem:
++	xas_destroy(xas);
++	xas_set_err(xas, -ENOMEM);
++}
++EXPORT_SYMBOL_GPL(xas_split_alloc);
++
++/**
++ * xas_split() - Split a multi-index entry into smaller entries.
++ * @xas: XArray operation state.
++ * @entry: New entry to store in the array.
++ * @order: New entry order.
++ *
++ * The value in the entry is copied to all the replacement entries.
++ *
++ * Context: Any context.  The caller should hold the xa_lock.
++ */
++void xas_split(struct xa_state *xas, void *entry, unsigned int order)
++{
++	unsigned int sibs = (1 << (order % XA_CHUNK_SHIFT)) - 1;
++	unsigned int offset, marks;
++	struct xa_node *node;
++	void *curr = xas_load(xas);
++	int values = 0;
++
++	node = xas->xa_node;
++	if (xas_top(node))
++		return;
++
++	marks = node_get_marks(node, xas->xa_offset);
++
++	offset = xas->xa_offset + sibs;
++	do {
++		if (xas->xa_shift < node->shift) {
++			struct xa_node *child = xas->xa_alloc;
++
++			xas->xa_alloc = rcu_dereference_raw(child->parent);
++			child->shift = node->shift - XA_CHUNK_SHIFT;
++			child->offset = offset;
++			child->count = XA_CHUNK_SIZE;
++			child->nr_values = xa_is_value(entry) ?
++					XA_CHUNK_SIZE : 0;
++			RCU_INIT_POINTER(child->parent, node);
++			node_set_marks(node, offset, child, marks);
++			rcu_assign_pointer(node->slots[offset],
++					xa_mk_node(child));
++			if (xa_is_value(curr))
++				values--;
++		} else {
++			unsigned int canon = offset - xas->xa_sibs;
++
++			node_set_marks(node, canon, NULL, marks);
++			rcu_assign_pointer(node->slots[canon], entry);
++			while (offset > canon)
++				rcu_assign_pointer(node->slots[offset--],
++						xa_mk_sibling(canon));
++			values += (xa_is_value(entry) - xa_is_value(curr)) *
++					(xas->xa_sibs + 1);
++		}
++	} while (offset-- > xas->xa_offset);
++
++	node->nr_values += values;
++}
++EXPORT_SYMBOL_GPL(xas_split);
++#endif
++
+ /**
+  * xas_pause() - Pause a walk to drop a lock.
+  * @xas: XArray operation state.
+@@ -1407,7 +1557,7 @@ EXPORT_SYMBOL(__xa_store);
+  * @gfp: Memory allocation flags.
+  *
+  * After this function returns, loads from this index will return @entry.
+- * Storing into an existing multislot entry updates the entry of every index.
++ * Storing into an existing multi-index entry updates the entry of every index.
+  * The marks associated with @index are unaffected unless @entry is %NULL.
+  *
+  * Context: Any context.  Takes and releases the xa_lock.
+@@ -1549,7 +1699,7 @@ static void xas_set_range(struct xa_stat
+  *
+  * After this function returns, loads from any index between @first and @last,
+  * inclusive will return @entry.
+- * Storing into an existing multislot entry updates the entry of every index.
++ * Storing into an existing multi-index entry updates the entry of every index.
+  * The marks associated with @index are unaffected unless @entry is %NULL.
+  *
+  * Context: Process context.  Takes and releases the xa_lock.  May sleep