]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blobdiff - repair/prefetch.c
libxfs: refactor manage_zones()
[thirdparty/xfsprogs-dev.git] / repair / prefetch.c
index 4595310a269975babed500a96ef7216688e9f440..1de0e2ff414eb8c792c593c065891b3efee107f0 100644 (file)
@@ -1,4 +1,6 @@
-#include <libxfs.h>
+// SPDX-License-Identifier: GPL-2.0
+
+#include "libxfs.h"
 #include <pthread.h>
 #include "avl.h"
 #include "btree.h"
@@ -165,13 +167,13 @@ pf_read_bmbt_reclist(
 {
        int                     i;
        xfs_bmbt_irec_t         irec;
-       xfs_dfilblks_t          cp = 0;         /* prev count */
-       xfs_dfiloff_t           op = 0;         /* prev offset */
+       xfs_filblks_t           cp = 0;         /* prev count */
+       xfs_fileoff_t           op = 0;         /* prev offset */
 #define MAP_ARRAY_SZ 4
        struct xfs_buf_map      map_array[MAP_ARRAY_SZ];
        struct xfs_buf_map      *map = map_array;
        int                     max_extents = MAP_ARRAY_SZ;
-       int                     nmaps = 0;;
+       int                     nmaps = 0;
        unsigned int            len = 0;
        int                     ret = 0;
 
@@ -189,7 +191,7 @@ pf_read_bmbt_reclist(
                        goto out_free;
 
                if (!args->dirs_only && ((irec.br_startoff +
-                               irec.br_blockcount) >= mp->m_dirfreeblk))
+                               irec.br_blockcount) >= mp->m_dir_geo->freeblk))
                        break;  /* only Phase 6 reads the free blocks */
 
                op = irec.br_startoff;
@@ -200,8 +202,8 @@ pf_read_bmbt_reclist(
 
                        pftrace("queuing dir extent in AG %d", args->agno);
 
-                       if (len + irec.br_blockcount >= mp->m_dirblkfsbs)
-                               bm_len = mp->m_dirblkfsbs - len;
+                       if (len + irec.br_blockcount >= mp->m_dir_geo->fsbcount)
+                               bm_len = mp->m_dir_geo->fsbcount - len;
                        else
                                bm_len = irec.br_blockcount;
                        len += bm_len;
@@ -211,7 +213,7 @@ pf_read_bmbt_reclist(
                        map[nmaps].bm_len = XFS_FSB_TO_BB(mp, bm_len);
                        nmaps++;
 
-                       if (len == mp->m_dirblkfsbs) {
+                       if (len == mp->m_dir_geo->fsbcount) {
                                pf_queue_io(args, map, nmaps, B_DIR_META);
                                len = 0;
                                nmaps = 0;
@@ -257,7 +259,7 @@ out_free:
 
 static int
 pf_scan_lbtree(
-       xfs_dfsbno_t            dbno,
+       xfs_fsblock_t           dbno,
        int                     level,
        int                     isadir,
        prefetch_args_t         *args,
@@ -276,6 +278,18 @@ pf_scan_lbtree(
 
        XFS_BUF_SET_PRIORITY(bp, isadir ? B_DIR_BMAP : B_BMAP);
 
+       /*
+        * If the verifier flagged a problem with the buffer, we can't trust
+        * its contents for the purposes of reading ahead.  Stop prefetching
+        * the tree and mark the buffer unchecked so that the next read of the
+        * buffer will retain the error status and be acted upon appropriately.
+        */
+       if (bp->b_error) {
+               bp->b_flags |= LIBXFS_B_UNCHECKED;
+               libxfs_putbuf(bp);
+               return 0;
+       }
+
        rc = (*func)(XFS_BUF_TO_BLOCK(bp), level - 1, isadir, args);
 
        libxfs_putbuf(bp);
@@ -293,7 +307,7 @@ pf_scanfunc_bmap(
        xfs_bmbt_ptr_t          *pp;
        int                     numrecs;
        int                     i;
-       xfs_dfsbno_t            dbno;
+       xfs_fsblock_t           dbno;
 
        /*
         * do some validation on the block contents
@@ -318,7 +332,7 @@ pf_scanfunc_bmap(
        pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
 
        for (i = 0; i < numrecs; i++) {
-               dbno = be64_to_cpu(pp[i]);
+               dbno = get_unaligned_be64(&pp[i]);
                if (!verify_dfsbno(mp, dbno))
                        return 0;
                if (!pf_scan_lbtree(dbno, level, isadir, args, pf_scanfunc_bmap))
@@ -340,7 +354,7 @@ pf_read_btinode(
        int                     level;
        int                     numrecs;
        int                     dsize;
-       xfs_dfsbno_t            dbno;
+       xfs_fsblock_t           dbno;
 
        dib = (xfs_bmdr_block_t *)XFS_DFORK_DPTR(dino);
 
@@ -357,10 +371,10 @@ pf_read_btinode(
                return;
 
        dsize = XFS_DFORK_DSIZE(dino, mp);
-       pp = XFS_BMDR_PTR_ADDR(dib, 1, xfs_bmdr_maxrecs(mp, dsize, 0));
+       pp = XFS_BMDR_PTR_ADDR(dib, 1, libxfs_bmdr_maxrecs(dsize, 0));
 
        for (i = 0; i < numrecs; i++) {
-               dbno = be64_to_cpu(pp[i]);
+               dbno = get_unaligned_be64(&pp[i]);
                if (!verify_dfsbno(mp, dbno))
                        break;
                if (!pf_scan_lbtree(dbno, level, isadir, args, pf_scanfunc_bmap))
@@ -391,7 +405,7 @@ pf_read_inode_dirs(
        if (bp->b_error)
                return;
 
-       for (icnt = 0; icnt < (XFS_BUF_COUNT(bp) >> mp->m_sb.sb_inodelog); icnt++) {
+       for (icnt = 0; icnt < (bp->b_bcount >> mp->m_sb.sb_inodelog); icnt++) {
                dino = xfs_make_iptr(mp, bp, icnt);
 
                /*
@@ -419,8 +433,7 @@ pf_read_inode_dirs(
                if (be16_to_cpu(dino->di_magic) != XFS_DINODE_MAGIC)
                        continue;
 
-               if (!XFS_DINODE_GOOD_VERSION(dino->di_version) ||
-                               (!fs_inode_nlink && dino->di_version > 1))
+               if (!libxfs_dinode_good_version(mp, dino->di_version))
                        continue;
 
                if (be64_to_cpu(dino->di_size) <= XFS_DFORK_DSIZE(dino, mp))
@@ -466,7 +479,7 @@ pf_batch_read(
                num = 0;
                if (which == PF_SECONDARY) {
                        bplist[0] = btree_find(args->io_queue, 0, &fsbno);
-                       max_fsbno = MIN(fsbno + pf_max_fsbs,
+                       max_fsbno = min(fsbno + pf_max_fsbs,
                                                        args->last_bno_read);
                } else {
                        bplist[0] = btree_find(args->io_queue,
@@ -505,7 +518,7 @@ pf_batch_read(
                first_off = LIBXFS_BBTOOFF64(XFS_BUF_ADDR(bplist[0]));
                last_off = LIBXFS_BBTOOFF64(XFS_BUF_ADDR(bplist[num-1])) +
                        XFS_BUF_SIZE(bplist[num-1]);
-               while (last_off - first_off > pf_max_bytes) {
+               while (num > 1 && last_off - first_off > pf_max_bytes) {
                        num--;
                        last_off = LIBXFS_BBTOOFF64(XFS_BUF_ADDR(bplist[num-1])) +
                                XFS_BUF_SIZE(bplist[num-1]);
@@ -554,7 +567,7 @@ pf_batch_read(
                /*
                 * now read the data and put into the xfs_but_t's
                 */
-               len = pread64(mp_fd, buf, (int)(last_off - first_off), first_off);
+               len = pread(mp_fd, buf, (int)(last_off - first_off), first_off);
 
                /*
                 * Check the last buffer on the list to see if we need to
@@ -580,7 +593,7 @@ pf_batch_read(
                                size = XFS_BUF_SIZE(bplist[i]);
                                if (len < size)
                                        break;
-                               memcpy(XFS_BUF_PTR(bplist[i]), pbuf, size);
+                               memcpy(bplist[i]->b_addr, pbuf, size);
                                bplist[i]->b_flags |= (LIBXFS_B_UPTODATE |
                                                       LIBXFS_B_UNCHECKED);
                                len -= size;
@@ -668,11 +681,32 @@ static int
 pf_create_prefetch_thread(
        prefetch_args_t         *args);
 
+/*
+ * If we fail to create the queuing thread or can't create even one
+ * prefetch thread, we need to let processing continue without it.
+ */
+static void
+pf_skip_prefetch_thread(prefetch_args_t *args)
+{
+       prefetch_args_t *next;
+
+       pthread_mutex_lock(&args->lock);
+       args->prefetch_done = 1;
+       pf_start_processing(args);
+       next = args->next_args;
+       args->next_args = NULL;
+       pthread_mutex_unlock(&args->lock);
+
+       if (next)
+               pf_create_prefetch_thread(next);
+}
+
 static void *
 pf_queuing_worker(
        void                    *param)
 {
        prefetch_args_t         *args = param;
+       prefetch_args_t         *next_args;
        int                     num_inos;
        ino_tree_node_t         *irec;
        ino_tree_node_t         *cur_irec;
@@ -680,8 +714,9 @@ pf_queuing_worker(
        xfs_agblock_t           bno;
        int                     i;
        int                     err;
+       uint64_t                sparse;
 
-       blks_per_cluster =  XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog;
+       blks_per_cluster = mp->m_inode_cluster_size >> mp->m_sb.sb_blocklog;
        if (blks_per_cluster == 0)
                blks_per_cluster = 1;
 
@@ -691,8 +726,11 @@ pf_queuing_worker(
                if (err != 0) {
                        do_warn(_("failed to create prefetch thread: %s\n"),
                                strerror(err));
+                       pftrace("failed to create prefetch thread for AG %d: %s",
+                               args->agno, strerror(err));
+                       args->io_threads[i] = 0;
                        if (i == 0) {
-                               pf_start_processing(args);
+                               pf_skip_prefetch_thread(args);
                                return NULL;
                        }
                        /*
@@ -710,7 +748,7 @@ pf_queuing_worker(
                cur_irec = irec;
 
                num_inos = XFS_INODES_PER_CHUNK;
-               while (num_inos < XFS_IALLOC_INODES(mp) && irec != NULL) {
+               while (num_inos < mp->m_ialloc_inos && irec != NULL) {
                        irec = next_ino_rec(irec);
                        num_inos += XFS_INODES_PER_CHUNK;
                }
@@ -730,25 +768,39 @@ pf_queuing_worker(
                         * might get stuck on a buffer that has been locked
                         * and added to the I/O queue but is waiting for
                         * the thread to be woken.
+                        * Start processing as well, in case everything so
+                        * far was already prefetched and the queue is empty.
                         */
+                       
                        pf_start_io_workers(args);
+                       pf_start_processing(args);
                        sem_wait(&args->ra_count);
                }
 
                num_inos = 0;
                bno = XFS_AGINO_TO_AGBNO(mp, cur_irec->ino_startnum);
+               sparse = cur_irec->ir_sparse;
 
                do {
                        struct xfs_buf_map      map;
 
                        map.bm_bn = XFS_AGB_TO_DADDR(mp, args->agno, bno);
                        map.bm_len = XFS_FSB_TO_BB(mp, blks_per_cluster);
-                       pf_queue_io(args, &map, 1,
-                                   (cur_irec->ino_isa_dir != 0) ?  B_DIR_INODE
-                                                                : B_INODE);
+
+                       /*
+                        * Queue I/O for each non-sparse cluster. We can check
+                        * sparse state in cluster sized chunks as cluster size
+                        * is the min. granularity of sparse irec regions.
+                        */
+                       if ((sparse & ((1ULL << inodes_per_cluster) - 1)) == 0)
+                               pf_queue_io(args, &map, 1,
+                                           (cur_irec->ino_isa_dir != 0) ?
+                                            B_DIR_INODE : B_INODE);
+
                        bno += blks_per_cluster;
                        num_inos += inodes_per_cluster;
-               } while (num_inos < XFS_IALLOC_INODES(mp));
+                       sparse >>= inodes_per_cluster;
+               } while (num_inos < mp->m_ialloc_inos);
        }
 
        pthread_mutex_lock(&args->lock);
@@ -773,11 +825,13 @@ pf_queuing_worker(
        ASSERT(btree_is_empty(args->io_queue));
 
        args->prefetch_done = 1;
-       if (args->next_args)
-               pf_create_prefetch_thread(args->next_args);
-
+       next_args = args->next_args;
+       args->next_args = NULL;
        pthread_mutex_unlock(&args->lock);
 
+       if (next_args)
+               pf_create_prefetch_thread(next_args);
+
        return NULL;
 }
 
@@ -794,7 +848,10 @@ pf_create_prefetch_thread(
        if (err != 0) {
                do_warn(_("failed to create prefetch thread: %s\n"),
                        strerror(err));
-               cleanup_inode_prefetch(args);
+               pftrace("failed to create prefetch thread for AG %d: %s",
+                       args->agno, strerror(err));
+               args->queuing_thread = 0;
+               pf_skip_prefetch_thread(args);
        }
 
        return err == 0;
@@ -843,9 +900,10 @@ start_inode_prefetch(
         */
 
        max_queue = libxfs_bcache->c_maxcount / thread_count / 8;
-       if (XFS_INODE_CLUSTER_SIZE(mp) > mp->m_sb.sb_blocksize)
-               max_queue = max_queue * (XFS_INODE_CLUSTER_SIZE(mp) >>
-                               mp->m_sb.sb_blocklog) / XFS_IALLOC_BLOCKS(mp);
+       if (mp->m_inode_cluster_size > mp->m_sb.sb_blocksize)
+               max_queue = max_queue *
+                       (mp->m_inode_cluster_size >> mp->m_sb.sb_blocklog) /
+                       mp->m_ialloc_blks;
 
        sem_init(&args->ra_count, 0, max_queue);
 
@@ -855,11 +913,15 @@ start_inode_prefetch(
        } else {
                pthread_mutex_lock(&prev_args->lock);
                if (prev_args->prefetch_done) {
+                       pthread_mutex_unlock(&prev_args->lock);
                        if (!pf_create_prefetch_thread(args))
                                args = NULL;
-               } else
+               } else {
                        prev_args->next_args = args;
-               pthread_mutex_unlock(&prev_args->lock);
+                       pftrace("queued AG %d after AG %d",
+                               args->agno, prev_args->agno);
+                       pthread_mutex_unlock(&prev_args->lock);
+               }
        }
 
        return args;
@@ -887,11 +949,11 @@ start_inode_prefetch(
  */
 static void
 prefetch_ag_range(
-       struct work_queue       *work,
+       struct workqueue        *work,
        xfs_agnumber_t          start_ag,
        xfs_agnumber_t          end_ag,
        bool                    dirs_only,
-       void                    (*func)(struct work_queue *,
+       void                    (*func)(struct workqueue *,
                                        xfs_agnumber_t, void *))
 {
        int                     i;
@@ -911,18 +973,18 @@ struct pf_work_args {
        xfs_agnumber_t  start_ag;
        xfs_agnumber_t  end_ag;
        bool            dirs_only;
-       void            (*func)(struct work_queue *, xfs_agnumber_t, void *);
+       void            (*func)(struct workqueue *, xfs_agnumber_t, void *);
 };
 
 static void
 prefetch_ag_range_work(
-       struct work_queue       *work,
+       struct workqueue        *work,
        xfs_agnumber_t          unused,
        void                    *args)
 {
        struct pf_work_args *wargs = args;
 
-       prefetch_ag_range(work, wargs->start_ag, wargs->end_ag, 
+       prefetch_ag_range(work, wargs->start_ag, wargs->end_ag,
                          wargs->dirs_only, wargs->func);
        free(args);
 }
@@ -935,14 +997,14 @@ void
 do_inode_prefetch(
        struct xfs_mount        *mp,
        int                     stride,
-       void                    (*func)(struct work_queue *,
+       void                    (*func)(struct workqueue *,
                                        xfs_agnumber_t, void *),
        bool                    check_cache,
        bool                    dirs_only)
 {
        int                     i;
-       struct work_queue       queue;
-       struct work_queue       *queues;
+       struct workqueue        queue;
+       struct workqueue        *queues;
        int                     queues_started = 0;
 
        /*
@@ -952,7 +1014,7 @@ do_inode_prefetch(
         * CPU to maximise parallelism of the queue to be processed.
         */
        if (check_cache && !libxfs_bcache_overflowed()) {
-               queue.mp = mp;
+               queue.wq_ctx = mp;
                create_work_queue(&queue, mp, libxfs_nproc());
                for (i = 0; i < mp->m_sb.sb_agcount; i++)
                        queue_work(&queue, func, i, NULL);
@@ -965,7 +1027,7 @@ do_inode_prefetch(
         * directly after each AG is queued.
         */
        if (!stride) {
-               queue.mp = mp;
+               queue.wq_ctx = mp;
                prefetch_ag_range(&queue, 0, mp->m_sb.sb_agcount,
                                  dirs_only, func);
                return;
@@ -974,7 +1036,7 @@ do_inode_prefetch(
        /*
         * create one worker thread for each segment of the volume
         */
-       queues = malloc(thread_count * sizeof(work_queue_t));
+       queues = malloc(thread_count * sizeof(struct workqueue));
        for (i = 0; i < thread_count; i++) {
                struct pf_work_args *wargs;
 
@@ -1034,6 +1096,8 @@ cleanup_inode_prefetch(
 
        pftrace("AG %d prefetch done", args->agno);
 
+       ASSERT(args->next_args == NULL);
+
        pthread_mutex_destroy(&args->lock);
        pthread_cond_destroy(&args->start_reading);
        pthread_cond_destroy(&args->start_processing);