]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/commitdiff
libxfs: unmap xmbuf pages to avoid disaster
authorDarrick J. Wong <djwong@kernel.org>
Mon, 24 Feb 2025 18:21:41 +0000 (10:21 -0800)
committerDarrick J. Wong <djwong@kernel.org>
Tue, 25 Feb 2025 17:15:56 +0000 (09:15 -0800)
It turns out that there's a maximum mappings count, so we need to be
smartish about not overflowing that with too many xmbuf buffers.  This
needs to be a global value because high-agcount filesystems will create
a large number of xmbuf caches but this is a process-global limit.

Cc: <linux-xfs@vger.kernel.org> # v6.9.0
Fixes: 124b388dac17f5 ("libxfs: support in-memory buffer cache targets")
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
include/cache.h
libxfs/buf_mem.c
libxfs/cache.c

index 334ad26309e26dcd9d3a612fb37d262e9cd33699..279bf717ba335f7e5c57b8384649ac6878d15802 100644 (file)
@@ -64,6 +64,8 @@ typedef unsigned int (*cache_node_hash_t)(cache_key_t, unsigned int,
                                          unsigned int);
 typedef int (*cache_node_compare_t)(struct cache_node *, cache_key_t);
 typedef unsigned int (*cache_bulk_relse_t)(struct cache *, struct list_head *);
+typedef int (*cache_node_get_t)(struct cache_node *);
+typedef void (*cache_node_put_t)(struct cache_node *);
 
 struct cache_operations {
        cache_node_hash_t       hash;
@@ -72,6 +74,8 @@ struct cache_operations {
        cache_node_relse_t      relse;
        cache_node_compare_t    compare;
        cache_bulk_relse_t      bulkrelse;      /* optional */
+       cache_node_get_t        get;            /* optional */
+       cache_node_put_t        put;            /* optional */
 };
 
 struct cache_hash {
@@ -107,6 +111,8 @@ struct cache {
        cache_node_relse_t      relse;          /* memory free function */
        cache_node_compare_t    compare;        /* comparison routine */
        cache_bulk_relse_t      bulkrelse;      /* bulk release routine */
+       cache_node_get_t        get;            /* prepare cache node after get */
+       cache_node_put_t        put;            /* prepare to put cache node */
        unsigned int            c_hashsize;     /* hash bucket count */
        unsigned int            c_hashshift;    /* hash key shift */
        struct cache_hash       *c_hash;        /* hash table buckets */
index e5b91d3cfe04868dca42a514329cab2f2a286261..16cb038ba10e2a4644a479e3e8cae159732be3b3 100644 (file)
 unsigned int   XMBUF_BLOCKSIZE;
 unsigned int   XMBUF_BLOCKSHIFT;
 
+long           xmbuf_max_mappings;
+static atomic_t        xmbuf_mappings;
+bool           xmbuf_unmap_early = false;
+
+static long
+get_max_mmap_count(void)
+{
+       char    buffer[64];
+       char    *p = NULL;
+       long    ret = -1;
+       FILE    *file;
+
+       file = fopen("/proc/sys/vm/max_map_count", "r");
+       if (!file)
+               return -1;
+
+       while (fgets(buffer, sizeof(buffer), file)) {
+               errno = 0;
+               ret = strtol(buffer, &p, 0);
+               if (errno || p == buffer)
+                       continue;
+
+               /* only take half the maximum mmap count so others can use it */
+               ret /= 2;
+               break;
+       }
+       fclose(file);
+       return ret;
+}
+
 void
 xmbuf_libinit(void)
 {
@@ -45,6 +75,14 @@ xmbuf_libinit(void)
 
        XMBUF_BLOCKSIZE = ret;
        XMBUF_BLOCKSHIFT = libxfs_highbit32(XMBUF_BLOCKSIZE);
+
+       /*
+        * Figure out how many mmaps we will use simultaneously.  Pick a low
+        * default if we can't query procfs.
+        */
+       xmbuf_max_mappings = get_max_mmap_count();
+       if (xmbuf_max_mappings < 0)
+               xmbuf_max_mappings = 1024;
 }
 
 /* Allocate a new cache node (aka a xfs_buf) */
@@ -105,7 +143,8 @@ xmbuf_cache_relse(
        struct xfs_buf          *bp;
 
        bp = container_of(node, struct xfs_buf, b_node);
-       xmbuf_unmap_page(bp);
+       if (bp->b_addr)
+               xmbuf_unmap_page(bp);
        kmem_cache_free(xfs_buf_cache, bp);
 }
 
@@ -129,13 +168,50 @@ xmbuf_cache_bulkrelse(
        return count;
 }
 
+static int
+xmbuf_cache_node_get(
+       struct cache_node       *node)
+{
+       struct xfs_buf          *bp =
+               container_of(node, struct xfs_buf, b_node);
+       int                     error;
+
+       if (bp->b_addr != NULL)
+               return 0;
+
+       error = xmbuf_map_page(bp);
+       if (error) {
+               fprintf(stderr,
+ _("%s: %s can't mmap %u bytes at xfile offset %llu: %s\n"),
+                               progname, __FUNCTION__, BBTOB(bp->b_length),
+                               (unsigned long long)xfs_buf_daddr(bp),
+                               strerror(error));
+               return error;
+       }
+
+       return 0;
+}
+
+static void
+xmbuf_cache_node_put(
+       struct cache_node       *node)
+{
+       struct xfs_buf          *bp =
+               container_of(node, struct xfs_buf, b_node);
+
+       if (xmbuf_unmap_early)
+               xmbuf_unmap_page(bp);
+}
+
 static struct cache_operations xmbuf_bcache_operations = {
        .hash           = libxfs_bhash,
        .alloc          = xmbuf_cache_alloc,
        .flush          = xmbuf_cache_flush,
        .relse          = xmbuf_cache_relse,
        .compare        = libxfs_bcompare,
-       .bulkrelse      = xmbuf_cache_bulkrelse
+       .bulkrelse      = xmbuf_cache_bulkrelse,
+       .get            = xmbuf_cache_node_get,
+       .put            = xmbuf_cache_node_put,
 };
 
 /*
@@ -216,8 +292,24 @@ xmbuf_map_page(
        pos = xfile->partition_pos + BBTOB(xfs_buf_daddr(bp));
        p = mmap(NULL, BBTOB(bp->b_length), PROT_READ | PROT_WRITE, MAP_SHARED,
                        xfile->fcb->fd, pos);
-       if (p == MAP_FAILED)
-               return -errno;
+       if (p == MAP_FAILED) {
+               if (errno == ENOMEM && !xmbuf_unmap_early) {
+#ifdef DEBUG
+                       fprintf(stderr, "xmbuf could not make mappings!\n");
+#endif
+                       xmbuf_unmap_early = true;
+               }
+               return errno;
+       }
+
+       if (!xmbuf_unmap_early &&
+           atomic_inc_return(&xmbuf_mappings) > xmbuf_max_mappings) {
+#ifdef DEBUG
+               fprintf(stderr, _("xmbuf hit too many mappings (%ld)!\n",
+                                       xmbuf_max_mappings);
+#endif
+               xmbuf_unmap_early = true;
+       }
 
        bp->b_addr = p;
        bp->b_flags |= LIBXFS_B_UPTODATE | LIBXFS_B_UNCHECKED;
@@ -230,6 +322,8 @@ void
 xmbuf_unmap_page(
        struct xfs_buf          *bp)
 {
+       if (!xmbuf_unmap_early)
+               atomic_dec(&xmbuf_mappings);
        munmap(bp->b_addr, BBTOB(bp->b_length));
        bp->b_addr = NULL;
 }
index 139c7c1b9e715eb8de818dddbaa1d32d63bffa28..af20f3854df93e2689f780c83b243390e54dfdec 100644 (file)
@@ -61,6 +61,8 @@ cache_init(
        cache->compare = cache_operations->compare;
        cache->bulkrelse = cache_operations->bulkrelse ?
                cache_operations->bulkrelse : cache_generic_bulkrelse;
+       cache->get = cache_operations->get;
+       cache->put = cache_operations->put;
        pthread_mutex_init(&cache->c_mutex, NULL);
 
        for (i = 0; i < hashsize; i++) {
@@ -415,6 +417,13 @@ cache_node_get(
                         */
                        pthread_mutex_lock(&node->cn_mutex);
 
+                       if (node->cn_count == 0 && cache->get) {
+                               int err = cache->get(node);
+                               if (err) {
+                                       pthread_mutex_unlock(&node->cn_mutex);
+                                       goto next_object;
+                               }
+                       }
                        if (node->cn_count == 0) {
                                ASSERT(node->cn_priority >= 0);
                                ASSERT(!list_empty(&node->cn_mru));
@@ -503,6 +512,8 @@ cache_node_put(
 #endif
        node->cn_count--;
 
+       if (node->cn_count == 0 && cache->put)
+               cache->put(node);
        if (node->cn_count == 0) {
                /* add unreferenced node to appropriate MRU for shaker */
                mru = &cache->c_mrus[node->cn_priority];