]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
xfs: convert xfarray_pagesort to deal with large folios
authorDarrick J. Wong <djwong@kernel.org>
Mon, 19 Feb 2024 06:27:29 +0000 (07:27 +0100)
committerChandan Babu R <chandanbabu@kernel.org>
Wed, 21 Feb 2024 06:06:55 +0000 (11:36 +0530)
Convert xfarray_pagesort to handle large folios by introducing a new
xfile_get_folio routine that can return a folio of arbitrary size, and
using heapsort on the full folio.  This also corrects an off-by-one bug
in the calculation of len in xfarray_pagesort that was papered over by
xfarray_want_pagesort.

Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Kent Overstreet <kent.overstreet@linux.dev>
Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
fs/xfs/scrub/trace.h
fs/xfs/scrub/xfarray.c
fs/xfs/scrub/xfarray.h

index c61fa7a95ef522c5e38ca379261550f62ca65ca8..3a1a827828dcb96a59d6a265621079bb83e27bef 100644 (file)
@@ -956,7 +956,7 @@ TRACE_EVENT(xfarray_isort,
                  __entry->hi - __entry->lo)
 );
 
-TRACE_EVENT(xfarray_pagesort,
+TRACE_EVENT(xfarray_foliosort,
        TP_PROTO(struct xfarray_sortinfo *si, uint64_t lo, uint64_t hi),
        TP_ARGS(si, lo, hi),
        TP_STRUCT__entry(
@@ -1027,6 +1027,47 @@ TRACE_EVENT(xfarray_sort,
                  __entry->bytes)
 );
 
+TRACE_EVENT(xfarray_sort_scan,
+       TP_PROTO(struct xfarray_sortinfo *si, unsigned long long idx),
+       TP_ARGS(si, idx),
+       TP_STRUCT__entry(
+               __field(unsigned long, ino)
+               __field(unsigned long long, nr)
+               __field(size_t, obj_size)
+               __field(unsigned long long, idx)
+               __field(unsigned long long, folio_pos)
+               __field(unsigned long, folio_bytes)
+               __field(unsigned long long, first_idx)
+               __field(unsigned long long, last_idx)
+       ),
+       TP_fast_assign(
+               __entry->nr = si->array->nr;
+               __entry->obj_size = si->array->obj_size;
+               __entry->ino = file_inode(si->array->xfile->file)->i_ino;
+               __entry->idx = idx;
+               if (si->folio) {
+                       __entry->folio_pos = folio_pos(si->folio);
+                       __entry->folio_bytes = folio_size(si->folio);
+                       __entry->first_idx = si->first_folio_idx;
+                       __entry->last_idx = si->last_folio_idx;
+               } else {
+                       __entry->folio_pos = 0;
+                       __entry->folio_bytes = 0;
+                       __entry->first_idx = 0;
+                       __entry->last_idx = 0;
+               }
+       ),
+       TP_printk("xfino 0x%lx nr %llu objsz %zu idx %llu folio_pos 0x%llx folio_bytes 0x%lx first_idx %llu last_idx %llu",
+                 __entry->ino,
+                 __entry->nr,
+                 __entry->obj_size,
+                 __entry->idx,
+                 __entry->folio_pos,
+                 __entry->folio_bytes,
+                 __entry->first_idx,
+                 __entry->last_idx)
+);
+
 TRACE_EVENT(xfarray_sort_stats,
        TP_PROTO(struct xfarray_sortinfo *si, int error),
        TP_ARGS(si, error),
index 379e1db22269c75294cfc7e449d1716e6a2e040e..17c982a4821d47db929eb9a1f5911fd0c7e02cbb 100644 (file)
@@ -563,70 +563,42 @@ xfarray_isort(
        return xfile_store(si->array->xfile, scratch, len, lo_pos);
 }
 
-/* Grab a page for sorting records. */
-static inline int
-xfarray_sort_get_page(
-       struct xfarray_sortinfo *si,
-       loff_t                  pos,
-       uint64_t                len)
-{
-       return xfile_get_page(si->array->xfile, pos, len, &si->xfpage);
-}
-
-/* Release a page we grabbed for sorting records. */
-static inline int
-xfarray_sort_put_page(
-       struct xfarray_sortinfo *si)
-{
-       if (!xfile_page_cached(&si->xfpage))
-               return 0;
-       return xfile_put_page(si->array->xfile, &si->xfpage);
-}
-
-/* Decide if these records are eligible for in-page sorting. */
-static inline bool
-xfarray_want_pagesort(
-       struct xfarray_sortinfo *si,
-       xfarray_idx_t           lo,
-       xfarray_idx_t           hi)
-{
-       pgoff_t                 lo_page;
-       pgoff_t                 hi_page;
-       loff_t                  end_pos;
-
-       /* We can only map one page at a time. */
-       lo_page = xfarray_pos(si->array, lo) >> PAGE_SHIFT;
-       end_pos = xfarray_pos(si->array, hi) + si->array->obj_size - 1;
-       hi_page = end_pos >> PAGE_SHIFT;
-
-       return lo_page == hi_page;
-}
-
-/* Sort a bunch of records that all live in the same memory page. */
+/*
+ * Sort the records from lo to hi (inclusive) if they are all backed by the
+ * same memory folio.  Returns 1 if it sorted, 0 if it did not, or a negative
+ * errno.
+ */
 STATIC int
-xfarray_pagesort(
+xfarray_foliosort(
        struct xfarray_sortinfo *si,
        xfarray_idx_t           lo,
        xfarray_idx_t           hi)
 {
+       struct folio            *folio;
        void                    *startp;
        loff_t                  lo_pos = xfarray_pos(si->array, lo);
-       uint64_t                len = xfarray_pos(si->array, hi - lo);
-       int                     error = 0;
+       uint64_t                len = xfarray_pos(si->array, hi - lo + 1);
 
-       trace_xfarray_pagesort(si, lo, hi);
+       /* No single folio could back this many records. */
+       if (len > XFILE_MAX_FOLIO_SIZE)
+               return 0;
 
        xfarray_sort_bump_loads(si);
-       error = xfarray_sort_get_page(si, lo_pos, len);
-       if (error)
-               return error;
+       folio = xfile_get_folio(si->array->xfile, lo_pos, len, XFILE_ALLOC);
+       if (IS_ERR(folio))
+               return PTR_ERR(folio);
+       if (!folio)
+               return 0;
+
+       trace_xfarray_foliosort(si, lo, hi);
 
        xfarray_sort_bump_heapsorts(si);
-       startp = page_address(si->xfpage.page) + offset_in_page(lo_pos);
+       startp = folio_address(folio) + offset_in_folio(folio, lo_pos);
        sort(startp, hi - lo + 1, si->array->obj_size, si->cmp_fn, NULL);
 
        xfarray_sort_bump_stores(si);
-       return xfarray_sort_put_page(si);
+       xfile_put_folio(si->array->xfile, folio);
+       return 1;
 }
 
 /* Return a pointer to the xfarray pivot record within the sortinfo struct. */
@@ -814,63 +786,78 @@ xfarray_qsort_push(
        return 0;
 }
 
+static inline void
+xfarray_sort_scan_done(
+       struct xfarray_sortinfo *si)
+{
+       if (si->folio)
+               xfile_put_folio(si->array->xfile, si->folio);
+       si->folio = NULL;
+}
+
 /*
- * Load an element from the array into the first scratchpad and cache the page,
- * if possible.
+ * Cache the folio backing the start of the given array element.  If the array
+ * element is contained entirely within the folio, return a pointer to the
+ * cached folio.  Otherwise, load the element into the scratchpad and return a
+ * pointer to the scratchpad.
  */
 static inline int
-xfarray_sort_load_cached(
+xfarray_sort_scan(
        struct xfarray_sortinfo *si,
        xfarray_idx_t           idx,
-       void                    *ptr)
+       void                    **ptrp)
 {
        loff_t                  idx_pos = xfarray_pos(si->array, idx);
-       pgoff_t                 startpage;
-       pgoff_t                 endpage;
        int                     error = 0;
 
-       /*
-        * If this load would split a page, release the cached page, if any,
-        * and perform a traditional read.
-        */
-       startpage = idx_pos >> PAGE_SHIFT;
-       endpage = (idx_pos + si->array->obj_size - 1) >> PAGE_SHIFT;
-       if (startpage != endpage) {
-               error = xfarray_sort_put_page(si);
-               if (error)
-                       return error;
+       if (xfarray_sort_terminated(si, &error))
+               return error;
 
-               if (xfarray_sort_terminated(si, &error))
-                       return error;
+       trace_xfarray_sort_scan(si, idx);
 
-               return xfile_load(si->array->xfile, ptr,
-                               si->array->obj_size, idx_pos);
-       }
+       /* If the cached folio doesn't cover this index, release it. */
+       if (si->folio &&
+           (idx < si->first_folio_idx || idx > si->last_folio_idx))
+               xfarray_sort_scan_done(si);
 
-       /* If the cached page is not the one we want, release it. */
-       if (xfile_page_cached(&si->xfpage) &&
-           xfile_page_index(&si->xfpage) != startpage) {
-               error = xfarray_sort_put_page(si);
-               if (error)
-                       return error;
+       /* Grab the first folio that backs this array element. */
+       if (!si->folio) {
+               loff_t          next_pos;
+
+               si->folio = xfile_get_folio(si->array->xfile, idx_pos,
+                               si->array->obj_size, XFILE_ALLOC);
+               if (IS_ERR(si->folio))
+                       return PTR_ERR(si->folio);
+
+               si->first_folio_idx = xfarray_idx(si->array,
+                               folio_pos(si->folio) + si->array->obj_size - 1);
+
+               next_pos = folio_pos(si->folio) + folio_size(si->folio);
+               si->last_folio_idx = xfarray_idx(si->array, next_pos - 1);
+               if (xfarray_pos(si->array, si->last_folio_idx + 1) > next_pos)
+                       si->last_folio_idx--;
+
+               trace_xfarray_sort_scan(si, idx);
        }
 
        /*
-        * If we don't have a cached page (and we know the load is contained
-        * in a single page) then grab it.
+        * If this folio still doesn't cover the desired element, it must cross
+        * a folio boundary.  Read into the scratchpad and we're done.
         */
-       if (!xfile_page_cached(&si->xfpage)) {
-               if (xfarray_sort_terminated(si, &error))
-                       return error;
+       if (idx < si->first_folio_idx || idx > si->last_folio_idx) {
+               void            *temp = xfarray_scratch(si->array);
 
-               error = xfarray_sort_get_page(si, startpage << PAGE_SHIFT,
-                               PAGE_SIZE);
+               error = xfile_load(si->array->xfile, temp, si->array->obj_size,
+                               idx_pos);
                if (error)
                        return error;
+
+               *ptrp = temp;
+               return 0;
        }
 
-       memcpy(ptr, page_address(si->xfpage.page) + offset_in_page(idx_pos),
-                       si->array->obj_size);
+       /* Otherwise return a pointer to the array element in the folio. */
+       *ptrp = folio_address(si->folio) + offset_in_folio(si->folio, idx_pos);
        return 0;
 }
 
@@ -937,6 +924,8 @@ xfarray_sort(
        pivot = xfarray_sortinfo_pivot(si);
 
        while (si->stack_depth >= 0) {
+               int             ret;
+
                lo = si_lo[si->stack_depth];
                hi = si_hi[si->stack_depth];
 
@@ -949,13 +938,13 @@ xfarray_sort(
                }
 
                /*
-                * If directly mapping the page and sorting can solve our
+                * If directly mapping the folio and sorting can solve our
                 * problems, we're done.
                 */
-               if (xfarray_want_pagesort(si, lo, hi)) {
-                       error = xfarray_pagesort(si, lo, hi);
-                       if (error)
-                               goto out_free;
+               ret = xfarray_foliosort(si, lo, hi);
+               if (ret < 0)
+                       goto out_free;
+               if (ret == 1) {
                        si->stack_depth--;
                        continue;
                }
@@ -980,25 +969,24 @@ xfarray_sort(
                 * than the pivot is on the right side of the range.
                 */
                while (lo < hi) {
+                       void    *p;
+
                        /*
                         * Decrement hi until it finds an a[hi] less than the
                         * pivot value.
                         */
-                       error = xfarray_sort_load_cached(si, hi, scratch);
+                       error = xfarray_sort_scan(si, hi, &p);
                        if (error)
                                goto out_free;
-                       while (xfarray_sort_cmp(si, scratch, pivot) >= 0 &&
-                                                               lo < hi) {
+                       while (xfarray_sort_cmp(si, p, pivot) >= 0 && lo < hi) {
                                hi--;
-                               error = xfarray_sort_load_cached(si, hi,
-                                               scratch);
+                               error = xfarray_sort_scan(si, hi, &p);
                                if (error)
                                        goto out_free;
                        }
-                       error = xfarray_sort_put_page(si);
-                       if (error)
-                               goto out_free;
-
+                       if (p != scratch)
+                               memcpy(scratch, p, si->array->obj_size);
+                       xfarray_sort_scan_done(si);
                        if (xfarray_sort_terminated(si, &error))
                                goto out_free;
 
@@ -1013,21 +1001,18 @@ xfarray_sort(
                         * Increment lo until it finds an a[lo] greater than
                         * the pivot value.
                         */
-                       error = xfarray_sort_load_cached(si, lo, scratch);
+                       error = xfarray_sort_scan(si, lo, &p);
                        if (error)
                                goto out_free;
-                       while (xfarray_sort_cmp(si, scratch, pivot) <= 0 &&
-                                                               lo < hi) {
+                       while (xfarray_sort_cmp(si, p, pivot) <= 0 && lo < hi) {
                                lo++;
-                               error = xfarray_sort_load_cached(si, lo,
-                                               scratch);
+                               error = xfarray_sort_scan(si, lo, &p);
                                if (error)
                                        goto out_free;
                        }
-                       error = xfarray_sort_put_page(si);
-                       if (error)
-                               goto out_free;
-
+                       if (p != scratch)
+                               memcpy(scratch, p, si->array->obj_size);
+                       xfarray_sort_scan_done(si);
                        if (xfarray_sort_terminated(si, &error))
                                goto out_free;
 
index 6f2862054e194d009bfad79378251f2cba6366ce..ec643cc9fc143215506059e0ac1b437b0284a4fc 100644 (file)
@@ -105,8 +105,14 @@ struct xfarray_sortinfo {
        /* XFARRAY_SORT_* flags; see below. */
        unsigned int            flags;
 
-       /* Cache a page here for faster access. */
-       struct xfile_page       xfpage;
+       /* Cache a folio here for faster scanning for pivots */
+       struct folio            *folio;
+
+       /* First array index in folio that is completely readable */
+       xfarray_idx_t           first_folio_idx;
+
+       /* Last array index in folio that is completely readable */
+       xfarray_idx_t           last_folio_idx;
 
 #ifdef DEBUG
        /* Performance statistics. */