]> git.ipfire.org Git - people/ms/linux.git/blobdiff - fs/xfs/xfs_buf.c
Merge branch 'for-6.0/dax' into libnvdimm-fixes
[people/ms/linux.git] / fs / xfs / xfs_buf.c
index bf4e60871068205eb30a4909654717782d6bbe84..dde346450952afe8175990126de089b214cef881 100644 (file)
@@ -5,6 +5,7 @@
  */
 #include "xfs.h"
 #include <linux/backing-dev.h>
+#include <linux/dax.h>
 
 #include "xfs_shared.h"
 #include "xfs_format.h"
@@ -21,7 +22,7 @@
 #include "xfs_error.h"
 #include "xfs_ag.h"
 
-static struct kmem_cache *xfs_buf_cache;
+struct kmem_cache *xfs_buf_cache;
 
 /*
  * Locking orders
@@ -294,6 +295,16 @@ xfs_buf_free_pages(
        bp->b_flags &= ~_XBF_PAGES;
 }
 
+static void
+xfs_buf_free_callback(
+       struct callback_head    *cb)
+{
+       struct xfs_buf          *bp = container_of(cb, struct xfs_buf, b_rcu);
+
+       xfs_buf_free_maps(bp);
+       kmem_cache_free(xfs_buf_cache, bp);
+}
+
 static void
 xfs_buf_free(
        struct xfs_buf          *bp)
@@ -307,8 +318,7 @@ xfs_buf_free(
        else if (bp->b_flags & _XBF_KMEM)
                kmem_free(bp->b_addr);
 
-       xfs_buf_free_maps(bp);
-       kmem_cache_free(xfs_buf_cache, bp);
+       call_rcu(&bp->b_rcu, xfs_buf_free_callback);
 }
 
 static int
@@ -503,100 +513,45 @@ xfs_buf_hash_destroy(
        rhashtable_destroy(&pag->pag_buf_hash);
 }
 
-/*
- * Look up a buffer in the buffer cache and return it referenced and locked
- * in @found_bp.
- *
- * If @new_bp is supplied and we have a lookup miss, insert @new_bp into the
- * cache.
- *
- * If XBF_TRYLOCK is set in @flags, only try to lock the buffer and return
- * -EAGAIN if we fail to lock it.
- *
- * Return values are:
- *     -EFSCORRUPTED if have been supplied with an invalid address
- *     -EAGAIN on trylock failure
- *     -ENOENT if we fail to find a match and @new_bp was NULL
- *     0, with @found_bp:
- *             - @new_bp if we inserted it into the cache
- *             - the buffer we found and locked.
- */
 static int
-xfs_buf_find(
+xfs_buf_map_verify(
        struct xfs_buftarg      *btp,
-       struct xfs_buf_map      *map,
-       int                     nmaps,
-       xfs_buf_flags_t         flags,
-       struct xfs_buf          *new_bp,
-       struct xfs_buf          **found_bp)
+       struct xfs_buf_map      *map)
 {
-       struct xfs_perag        *pag;
-       struct xfs_buf          *bp;
-       struct xfs_buf_map      cmap = { .bm_bn = map[0].bm_bn };
        xfs_daddr_t             eofs;
-       int                     i;
-
-       *found_bp = NULL;
-
-       for (i = 0; i < nmaps; i++)
-               cmap.bm_len += map[i].bm_len;
 
        /* Check for IOs smaller than the sector size / not sector aligned */
-       ASSERT(!(BBTOB(cmap.bm_len) < btp->bt_meta_sectorsize));
-       ASSERT(!(BBTOB(cmap.bm_bn) & (xfs_off_t)btp->bt_meta_sectormask));
+       ASSERT(!(BBTOB(map->bm_len) < btp->bt_meta_sectorsize));
+       ASSERT(!(BBTOB(map->bm_bn) & (xfs_off_t)btp->bt_meta_sectormask));
 
        /*
         * Corrupted block numbers can get through to here, unfortunately, so we
         * have to check that the buffer falls within the filesystem bounds.
         */
        eofs = XFS_FSB_TO_BB(btp->bt_mount, btp->bt_mount->m_sb.sb_dblocks);
-       if (cmap.bm_bn < 0 || cmap.bm_bn >= eofs) {
+       if (map->bm_bn < 0 || map->bm_bn >= eofs) {
                xfs_alert(btp->bt_mount,
                          "%s: daddr 0x%llx out of range, EOFS 0x%llx",
-                         __func__, cmap.bm_bn, eofs);
+                         __func__, map->bm_bn, eofs);
                WARN_ON(1);
                return -EFSCORRUPTED;
        }
-
-       pag = xfs_perag_get(btp->bt_mount,
-                           xfs_daddr_to_agno(btp->bt_mount, cmap.bm_bn));
-
-       spin_lock(&pag->pag_buf_lock);
-       bp = rhashtable_lookup_fast(&pag->pag_buf_hash, &cmap,
-                                   xfs_buf_hash_params);
-       if (bp) {
-               atomic_inc(&bp->b_hold);
-               goto found;
-       }
-
-       /* No match found */
-       if (!new_bp) {
-               XFS_STATS_INC(btp->bt_mount, xb_miss_locked);
-               spin_unlock(&pag->pag_buf_lock);
-               xfs_perag_put(pag);
-               return -ENOENT;
-       }
-
-       /* the buffer keeps the perag reference until it is freed */
-       new_bp->b_pag = pag;
-       rhashtable_insert_fast(&pag->pag_buf_hash, &new_bp->b_rhash_head,
-                              xfs_buf_hash_params);
-       spin_unlock(&pag->pag_buf_lock);
-       *found_bp = new_bp;
        return 0;
+}
 
-found:
-       spin_unlock(&pag->pag_buf_lock);
-       xfs_perag_put(pag);
-
-       if (!xfs_buf_trylock(bp)) {
-               if (flags & XBF_TRYLOCK) {
-                       xfs_buf_rele(bp);
-                       XFS_STATS_INC(btp->bt_mount, xb_busy_locked);
+static int
+xfs_buf_find_lock(
+       struct xfs_buf          *bp,
+       xfs_buf_flags_t         flags)
+{
+       if (flags & XBF_TRYLOCK) {
+               if (!xfs_buf_trylock(bp)) {
+                       XFS_STATS_INC(bp->b_mount, xb_busy_locked);
                        return -EAGAIN;
                }
+       } else {
                xfs_buf_lock(bp);
-               XFS_STATS_INC(btp->bt_mount, xb_get_locked_waited);
+               XFS_STATS_INC(bp->b_mount, xb_get_locked_waited);
        }
 
        /*
@@ -609,57 +564,59 @@ found:
                bp->b_flags &= _XBF_KMEM | _XBF_PAGES;
                bp->b_ops = NULL;
        }
-
-       trace_xfs_buf_find(bp, flags, _RET_IP_);
-       XFS_STATS_INC(btp->bt_mount, xb_get_locked);
-       *found_bp = bp;
        return 0;
 }
 
-struct xfs_buf *
-xfs_buf_incore(
-       struct xfs_buftarg      *target,
-       xfs_daddr_t             blkno,
-       size_t                  numblks,
-       xfs_buf_flags_t         flags)
+static inline int
+xfs_buf_lookup(
+       struct xfs_perag        *pag,
+       struct xfs_buf_map      *map,
+       xfs_buf_flags_t         flags,
+       struct xfs_buf          **bpp)
 {
-       struct xfs_buf          *bp;
+       struct xfs_buf          *bp;
        int                     error;
-       DEFINE_SINGLE_BUF_MAP(map, blkno, numblks);
 
-       error = xfs_buf_find(target, &map, 1, flags, NULL, &bp);
-       if (error)
-               return NULL;
-       return bp;
+       rcu_read_lock();
+       bp = rhashtable_lookup(&pag->pag_buf_hash, map, xfs_buf_hash_params);
+       if (!bp || !atomic_inc_not_zero(&bp->b_hold)) {
+               rcu_read_unlock();
+               return -ENOENT;
+       }
+       rcu_read_unlock();
+
+       error = xfs_buf_find_lock(bp, flags);
+       if (error) {
+               xfs_buf_rele(bp);
+               return error;
+       }
+
+       trace_xfs_buf_find(bp, flags, _RET_IP_);
+       *bpp = bp;
+       return 0;
 }
 
 /*
- * Assembles a buffer covering the specified range. The code is optimised for
- * cache hits, as metadata intensive workloads will see 3 orders of magnitude
- * more hits than misses.
+ * Insert the new_bp into the hash table. This consumes the perag reference
+ * taken for the lookup regardless of the result of the insert.
  */
-int
-xfs_buf_get_map(
-       struct xfs_buftarg      *target,
+static int
+xfs_buf_find_insert(
+       struct xfs_buftarg      *btp,
+       struct xfs_perag        *pag,
+       struct xfs_buf_map      *cmap,
        struct xfs_buf_map      *map,
        int                     nmaps,
        xfs_buf_flags_t         flags,
        struct xfs_buf          **bpp)
 {
-       struct xfs_buf          *bp;
        struct xfs_buf          *new_bp;
+       struct xfs_buf          *bp;
        int                     error;
 
-       *bpp = NULL;
-       error = xfs_buf_find(target, map, nmaps, flags, NULL, &bp);
-       if (!error)
-               goto found;
-       if (error != -ENOENT)
-               return error;
-
-       error = _xfs_buf_alloc(target, map, nmaps, flags, &new_bp);
+       error = _xfs_buf_alloc(btp, map, nmaps, flags, &new_bp);
        if (error)
-               return error;
+               goto out_drop_pag;
 
        /*
         * For buffers that fit entirely within a single page, first attempt to
@@ -674,18 +631,94 @@ xfs_buf_get_map(
                        goto out_free_buf;
        }
 
-       error = xfs_buf_find(target, map, nmaps, flags, new_bp, &bp);
-       if (error)
+       spin_lock(&pag->pag_buf_lock);
+       bp = rhashtable_lookup_get_insert_fast(&pag->pag_buf_hash,
+                       &new_bp->b_rhash_head, xfs_buf_hash_params);
+       if (IS_ERR(bp)) {
+               error = PTR_ERR(bp);
+               spin_unlock(&pag->pag_buf_lock);
                goto out_free_buf;
+       }
+       if (bp) {
+               /* found an existing buffer */
+               atomic_inc(&bp->b_hold);
+               spin_unlock(&pag->pag_buf_lock);
+               error = xfs_buf_find_lock(bp, flags);
+               if (error)
+                       xfs_buf_rele(bp);
+               else
+                       *bpp = bp;
+               goto out_free_buf;
+       }
+
+       /* The new buffer keeps the perag reference until it is freed. */
+       new_bp->b_pag = pag;
+       spin_unlock(&pag->pag_buf_lock);
+       *bpp = new_bp;
+       return 0;
 
-       if (bp != new_bp)
-               xfs_buf_free(new_bp);
+out_free_buf:
+       xfs_buf_free(new_bp);
+out_drop_pag:
+       xfs_perag_put(pag);
+       return error;
+}
 
-found:
+/*
+ * Assembles a buffer covering the specified range. The code is optimised for
+ * cache hits, as metadata intensive workloads will see 3 orders of magnitude
+ * more hits than misses.
+ */
+int
+xfs_buf_get_map(
+       struct xfs_buftarg      *btp,
+       struct xfs_buf_map      *map,
+       int                     nmaps,
+       xfs_buf_flags_t         flags,
+       struct xfs_buf          **bpp)
+{
+       struct xfs_perag        *pag;
+       struct xfs_buf          *bp = NULL;
+       struct xfs_buf_map      cmap = { .bm_bn = map[0].bm_bn };
+       int                     error;
+       int                     i;
+
+       for (i = 0; i < nmaps; i++)
+               cmap.bm_len += map[i].bm_len;
+
+       error = xfs_buf_map_verify(btp, &cmap);
+       if (error)
+               return error;
+
+       pag = xfs_perag_get(btp->bt_mount,
+                           xfs_daddr_to_agno(btp->bt_mount, cmap.bm_bn));
+
+       error = xfs_buf_lookup(pag, &cmap, flags, &bp);
+       if (error && error != -ENOENT)
+               goto out_put_perag;
+
+       /* cache hits always outnumber misses by at least 10:1 */
+       if (unlikely(!bp)) {
+               XFS_STATS_INC(btp->bt_mount, xb_miss_locked);
+
+               if (flags & XBF_INCORE)
+                       goto out_put_perag;
+
+               /* xfs_buf_find_insert() consumes the perag reference. */
+               error = xfs_buf_find_insert(btp, pag, &cmap, map, nmaps,
+                               flags, &bp);
+               if (error)
+                       return error;
+       } else {
+               XFS_STATS_INC(btp->bt_mount, xb_get_locked);
+               xfs_perag_put(pag);
+       }
+
+       /* We do not hold a perag reference anymore. */
        if (!bp->b_addr) {
                error = _xfs_buf_map_pages(bp, flags);
                if (unlikely(error)) {
-                       xfs_warn_ratelimited(target->bt_mount,
+                       xfs_warn_ratelimited(btp->bt_mount,
                                "%s: failed to map %u pages", __func__,
                                bp->b_page_count);
                        xfs_buf_relse(bp);
@@ -700,12 +733,13 @@ found:
        if (!(flags & XBF_READ))
                xfs_buf_ioerror(bp, 0);
 
-       XFS_STATS_INC(target->bt_mount, xb_get);
+       XFS_STATS_INC(btp->bt_mount, xb_get);
        trace_xfs_buf_get(bp, flags, _RET_IP_);
        *bpp = bp;
        return 0;
-out_free_buf:
-       xfs_buf_free(new_bp);
+
+out_put_perag:
+       xfs_perag_put(pag);
        return error;
 }
 
@@ -1416,7 +1450,7 @@ xfs_buf_ioapply_map(
        int             map,
        int             *buf_offset,
        int             *count,
-       int             op)
+       blk_opf_t       op)
 {
        int             page_index;
        unsigned int    total_nr_pages = bp->b_page_count;
@@ -1493,7 +1527,7 @@ _xfs_buf_ioapply(
        struct xfs_buf  *bp)
 {
        struct blk_plug plug;
-       int             op;
+       blk_opf_t       op;
        int             offset;
        int             size;
        int             i;
@@ -1911,7 +1945,7 @@ xfs_free_buftarg(
        list_lru_destroy(&btp->bt_lru);
 
        blkdev_issue_flush(btp->bt_bdev);
-       fs_put_dax(btp->bt_daxdev);
+       fs_put_dax(btp->bt_daxdev, btp->bt_mount);
 
        kmem_free(btp);
 }
@@ -1958,13 +1992,18 @@ xfs_alloc_buftarg(
        struct block_device     *bdev)
 {
        xfs_buftarg_t           *btp;
+       const struct dax_holder_operations *ops = NULL;
 
+#if defined(CONFIG_FS_DAX) && defined(CONFIG_MEMORY_FAILURE)
+       ops = &xfs_dax_holder_operations;
+#endif
        btp = kmem_zalloc(sizeof(*btp), KM_NOFS);
 
        btp->bt_mount = mp;
        btp->bt_dev =  bdev->bd_dev;
        btp->bt_bdev = bdev;
-       btp->bt_daxdev = fs_dax_get_by_bdev(bdev, &btp->bt_dax_part_off);
+       btp->bt_daxdev = fs_dax_get_by_bdev(bdev, &btp->bt_dax_part_off,
+                                           mp, ops);
 
        /*
         * Buffer IO error rate limiting. Limit it to no more than 10 messages
@@ -1986,7 +2025,8 @@ xfs_alloc_buftarg(
        btp->bt_shrinker.scan_objects = xfs_buftarg_shrink_scan;
        btp->bt_shrinker.seeks = DEFAULT_SEEKS;
        btp->bt_shrinker.flags = SHRINKER_NUMA_AWARE;
-       if (register_shrinker(&btp->bt_shrinker))
+       if (register_shrinker(&btp->bt_shrinker, "xfs-buf:%s",
+                             mp->m_super->s_id))
                goto error_pcpu;
        return btp;
 
@@ -2275,29 +2315,6 @@ xfs_buf_delwri_pushbuf(
        return error;
 }
 
-int __init
-xfs_buf_init(void)
-{
-       xfs_buf_cache = kmem_cache_create("xfs_buf", sizeof(struct xfs_buf), 0,
-                                        SLAB_HWCACHE_ALIGN |
-                                        SLAB_RECLAIM_ACCOUNT |
-                                        SLAB_MEM_SPREAD,
-                                        NULL);
-       if (!xfs_buf_cache)
-               goto out;
-
-       return 0;
-
- out:
-       return -ENOMEM;
-}
-
-void
-xfs_buf_terminate(void)
-{
-       kmem_cache_destroy(xfs_buf_cache);
-}
-
 void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref)
 {
        /*