xfsprogs: Release v6.8.0

[thirdparty/xfsprogs-dev.git] / libxfs / init.c
diff --git a/libxfs/init.c b/libxfs/init.c

index c7f9dc8bd9e62af4eba63e9449b20f9f0d660d0b..6ac9d682490a12ad4a0dfc1f84a6aa6f23583faf 100644 (file)
--- a/libxfs/init.c
+++ b/libxfs/init.c
@@ -1,19 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
  /*
   * Copyright (c) 2000-2005 Silicon Graphics, Inc.
   * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
   */
  
  #include <sys/stat.h>
@@ -26,33 +14,34 @@
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
+#include "xfs_defer.h"
  #include "xfs_inode_buf.h"
  #include "xfs_inode_fork.h"
  #include "xfs_inode.h"
  #include "xfs_trans.h"
+#include "xfs_rmap_btree.h"
+#include "xfs_refcount_btree.h"
+#include "libfrog/platform.h"
+
+#include "xfs_format.h"
+#include "xfs_da_format.h"
+#include "xfs_log_format.h"
+#include "xfs_ondisk.h"
  
  #include "libxfs.h"            /* for now */
  
+#ifndef HAVE_LIBURCU_ATOMIC64
+pthread_mutex_t        atomic64_lock = PTHREAD_MUTEX_INITIALIZER;
+#endif
+
  char *progname = "libxfs";     /* default, changed by each tool */
  
  struct cache *libxfs_bcache;   /* global buffer cache */
  int libxfs_bhash_size;         /* #buckets in bcache */
  
-int    use_xfs_buf_lock;       /* global flag: use xfs_buf_t locks for MT */
+int    use_xfs_buf_lock;       /* global flag: use xfs_buf locks for MT */
  
-static void manage_zones(int); /* setup global zones */
-
-kmem_zone_t    *xfs_inode_zone;
-
-/*
- * dev_map - map open devices to fd.
- */
-#define MAX_DEVS 10    /* arbitary maximum */
-int nextfakedev = -1;  /* device number to give to next fake device */
-static struct dev_to_fd {
-       dev_t   dev;
-       int     fd;
-} dev_map[MAX_DEVS]={{0}};
+static int nextfakedev = -1;   /* device number to give to next fake device */
  
  /*
   * Checks whether a given device has a mounted, writable
@@ -65,165 +54,196 @@ static struct dev_to_fd {
  static int
  check_isactive(char *name, char *block, int fatal)
  {
-       struct stat64   st;
+       struct stat     st;
  
-       if (stat64(block, &st) < 0)
+       if (stat(block, &st) < 0)
                 return 0;
         if ((st.st_mode & S_IFMT) != S_IFBLK)
                 return 0;
         if (platform_check_ismounted(name, block, &st, 0) == 0)
                 return 0;
-       return platform_check_iswritable(name, block, &st, fatal);
+       if (platform_check_iswritable(name, block, &st))
+               return fatal ? 1 : 0;
+       return 0;
  }
  
-/* libxfs_device_to_fd:
- *     lookup a device number in the device map
- *     return the associated fd
- */
-int
-libxfs_device_to_fd(dev_t device)
+static int
+check_open(
+       struct libxfs_init      *xi,
+       struct libxfs_dev       *dev)
  {
-       int     d;
+       struct stat     stbuf;
+
+       if (stat(dev->name, &stbuf) < 0) {
+               perror(dev->name);
+               return 0;
+       }
+       if (!(xi->flags & LIBXFS_ISREADONLY) &&
+           !(xi->flags & LIBXFS_ISINACTIVE) &&
+           platform_check_ismounted(dev->name, dev->name, NULL, 1))
+               return 0;
  
-       for (d = 0; d < MAX_DEVS; d++)
-               if (dev_map[d].dev == device)
-                       return dev_map[d].fd;
+       if ((xi->flags & LIBXFS_ISINACTIVE) &&
+           check_isactive(dev->name, dev->name, !!(xi->flags &
+                       (LIBXFS_ISREADONLY | LIBXFS_DANGEROUSLY))))
+               return 0;
  
-       fprintf(stderr, _("%s: %s: device %lld is not open\n"),
-               progname, __FUNCTION__, (long long)device);
-       exit(1);
-       /* NOTREACHED */
+       return 1;
  }
  
-/* libxfs_device_open:
- *     open a device and return its device number
- */
-dev_t
-libxfs_device_open(char *path, int creat, int xflags, int setblksize)
+static bool
+libxfs_device_open(
+       struct libxfs_init      *xi,
+       struct libxfs_dev       *dev)
  {
-       dev_t           dev;
-       int             fd, d, flags;
-       int             readonly, dio, excl;
-       struct stat64   statb;
+       struct stat             statb;
+       int                     flags;
  
-       readonly = (xflags & LIBXFS_ISREADONLY);
-       excl = (xflags & LIBXFS_EXCLUSIVELY) && !creat;
-       dio = (xflags & LIBXFS_DIRECT) && !creat && platform_direct_blockdev();
+       dev->fd = -1;
  
-retry:
-       flags = (readonly ? O_RDONLY : O_RDWR) | \
-               (creat ? (O_CREAT|O_TRUNC) : 0) | \
-               (dio ? O_DIRECT : 0) | \
-               (excl ? O_EXCL : 0);
+       if (!dev->name)
+               return true;
+       if (!dev->isfile && !check_open(xi, dev))
+               return false;
+
+       if (xi->flags & LIBXFS_ISREADONLY)
+               flags = O_RDONLY;
+       else
+               flags = O_RDWR;
  
-       if ((fd = open(path, flags, 0666)) < 0) {
-               if (errno == EINVAL && --dio == 0)
+       if (dev->create) {
+               flags |= O_CREAT | O_TRUNC;
+       } else {
+               if (xi->flags & LIBXFS_EXCLUSIVELY)
+                       flags |= O_EXCL;
+               if ((xi->flags & LIBXFS_DIRECT) && platform_direct_blockdev())
+                       flags |= O_DIRECT;
+       }
+
+retry:
+       dev->fd = open(dev->name, flags, 0666);
+       if (dev->fd < 0) {
+               if (errno == EINVAL && (flags & O_DIRECT)) {
+                       flags &= ~O_DIRECT;
                         goto retry;
+               }
                 fprintf(stderr, _("%s: cannot open %s: %s\n"),
-                       progname, path, strerror(errno));
+                       progname, dev->name, strerror(errno));
                 exit(1);
         }
  
-       if (fstat64(fd, &statb) < 0) {
+       if (fstat(dev->fd, &statb) < 0) {
                 fprintf(stderr, _("%s: cannot stat %s: %s\n"),
-                       progname, path, strerror(errno));
+                       progname, dev->name, strerror(errno));
                 exit(1);
         }
  
-       if (!readonly && setblksize && (statb.st_mode & S_IFMT) == S_IFBLK) {
-               if (setblksize == 1)
-                       /* use the default blocksize */
-                       (void)platform_set_blocksize(fd, path, statb.st_rdev, XFS_MIN_SECTORSIZE, 0);
-               else {
-                       /* given an explicit blocksize to use */
-                       if (platform_set_blocksize(fd, path, statb.st_rdev, setblksize, 1))
-                           exit(1);
-               }
+       if (!(xi->flags & LIBXFS_ISREADONLY) &&
+           xi->setblksize &&
+           (statb.st_mode & S_IFMT) == S_IFBLK) {
+               /*
+                * Try to use the given explicit blocksize.  Failure to set the
+                * block size is only fatal for direct I/O.
+                */
+               platform_set_blocksize(dev->fd, dev->name, statb.st_rdev,
+                               xi->setblksize, flags & O_DIRECT);
         }
  
         /*
-        * Get the device number from the stat buf - unless
-        * we're not opening a real device, in which case
-        * choose a new fake device number.
+        * Get the device number from the stat buf - unless we're not opening a
+        * real device, in which case choose a new fake device number.
          */
-       dev = (statb.st_rdev) ? (statb.st_rdev) : (nextfakedev--);
-
-       for (d = 0; d < MAX_DEVS; d++)
-               if (dev_map[d].dev == dev) {
-                       fprintf(stderr, _("%s: device %lld is already open\n"),
-                           progname, (long long)dev);
-                       exit(1);
-               }
-
-       for (d = 0; d < MAX_DEVS; d++)
-               if (!dev_map[d].dev) {
-                       dev_map[d].dev = dev;
-                       dev_map[d].fd = fd;
-
-                       return dev;
-               }
-
-       fprintf(stderr, _("%s: %s: too many open devices\n"),
-               progname, __FUNCTION__);
-       exit(1);
-       /* NOTREACHED */
+       if (statb.st_rdev)
+               dev->dev = statb.st_rdev;
+       else
+               dev->dev = nextfakedev--;
+       platform_findsizes(dev->name, dev->fd, &dev->size, &dev->bsize);
+       return true;
  }
  
-void
-libxfs_device_close(dev_t dev)
+static void
+libxfs_device_close(
+       struct libxfs_dev       *dev)
  {
-       int     d;
-
-       for (d = 0; d < MAX_DEVS; d++)
-               if (dev_map[d].dev == dev) {
-                       int     fd;
-
-                       fd = dev_map[d].fd;
-                       dev_map[d].dev = dev_map[d].fd = 0;
-
-                       fsync(fd);
-                       platform_flush_device(fd, dev);
-                       close(fd);
-
-                       return;
-               }
+       int                     ret;
+
+       ret = platform_flush_device(dev->fd, dev->dev);
+       if (ret) {
+               ret = -errno;
+               fprintf(stderr,
+       _("%s: flush of device %s failed, err=%d"),
+                       progname, dev->name, ret);
+       }
+       close(dev->fd);
  
-       fprintf(stderr, _("%s: %s: device %lld is not open\n"),
-                       progname, __FUNCTION__, (long long)dev);
-       exit(1);
+       dev->fd = -1;
+       dev->dev = 0;
  }
  
-static int
-check_open(char *path, int flags, char **rawfile, char **blockfile)
+/*
+ * Initialize/destroy all of the cache allocators we use.
+ */
+static void
+init_caches(void)
  {
-       int readonly = (flags & LIBXFS_ISREADONLY);
-       int inactive = (flags & LIBXFS_ISINACTIVE);
-       int dangerously = (flags & LIBXFS_DANGEROUSLY);
-       struct stat64   stbuf;
-
-       if (stat64(path, &stbuf) < 0) {
-               perror(path);
-               return 0;
-       }
-       if (!(*rawfile = platform_findrawpath(path))) {
-               fprintf(stderr, _("%s: "
-                                 "can't find a character device matching %s\n"),
-                       progname, path);
-               return 0;
+       int     error;
+
+       /* initialise cache allocation */
+       xfs_buf_cache = kmem_cache_init(sizeof(struct xfs_buf), "xfs_buffer");
+       xfs_inode_cache = kmem_cache_init(sizeof(struct xfs_inode), "xfs_inode");
+       xfs_ifork_cache = kmem_cache_init(sizeof(struct xfs_ifork), "xfs_ifork");
+       xfs_ili_cache = kmem_cache_init(
+                       sizeof(struct xfs_inode_log_item),"xfs_inode_log_item");
+       xfs_buf_item_cache = kmem_cache_init(
+                       sizeof(struct xfs_buf_log_item), "xfs_buf_log_item");
+       error = xfs_defer_init_item_caches();
+       if (error) {
+               fprintf(stderr, "Could not allocate defer init item caches.\n");
+               abort();
         }
-       if (!(*blockfile = platform_findblockpath(path))) {
-               fprintf(stderr, _("%s: "
-                                 "can't find a block device matching %s\n"),
-                       progname, path);
-               return 0;
+       xfs_da_state_cache = kmem_cache_init(
+                       sizeof(struct xfs_da_state), "xfs_da_state");
+       error = xfs_btree_init_cur_caches();
+       if (error) {
+               fprintf(stderr, "Could not allocate btree cursor caches.\n");
+               abort();
         }
-       if (!readonly && !inactive && platform_check_ismounted(path, *blockfile, NULL, 1))
-               return 0;
+       xfs_extfree_item_cache = kmem_cache_init(
+                       sizeof(struct xfs_extent_free_item),
+                       "xfs_extfree_item");
+       xfs_trans_cache = kmem_cache_init(
+                       sizeof(struct xfs_trans), "xfs_trans");
+}
  
-       if (inactive && check_isactive(path, *blockfile, ((readonly|dangerously)?1:0)))
-               return 0;
+static int
+destroy_caches(void)
+{
+       int     leaked = 0;
+
+       leaked += kmem_cache_destroy(xfs_buf_cache);
+       leaked += kmem_cache_destroy(xfs_ili_cache);
+       leaked += kmem_cache_destroy(xfs_inode_cache);
+       leaked += kmem_cache_destroy(xfs_ifork_cache);
+       leaked += kmem_cache_destroy(xfs_buf_item_cache);
+       leaked += kmem_cache_destroy(xfs_da_state_cache);
+       xfs_defer_destroy_item_caches();
+       xfs_btree_destroy_cur_caches();
+       leaked += kmem_cache_destroy(xfs_extfree_item_cache);
+       leaked += kmem_cache_destroy(xfs_trans_cache);
+
+       return leaked;
+}
  
-       return 1;
+static void
+libxfs_close_devices(
+       struct libxfs_init      *li)
+{
+       if (li->data.dev)
+               libxfs_device_close(&li->data);
+       if (li->log.dev && li->log.dev != li->data.dev)
+               libxfs_device_close(&li->log);
+       if (li->rt.dev)
+               libxfs_device_close(&li->rt);
  }
  
  /*
@@ -231,223 +251,80 @@ check_open(char *path, int flags, char **rawfile, char **blockfile)
   * Caller gets a 0 on failure (and we print a message), 1 on success.
   */
  int
-libxfs_init(libxfs_init_t *a)
+libxfs_init(struct libxfs_init *a)
  {
-       char            *blockfile;
-       char            curdir[MAXPATHLEN];
-       char            *dname;
-       char            dpath[25];
-       int             fd;
-       char            *logname;
-       char            logpath[25];
-       int             needcd;
-       char            *rawfile;
-       char            *rtname;
-       char            rtpath[25];
-       int             rval = 0;
-       int             flags;
-
-       dpath[0] = logpath[0] = rtpath[0] = '\0';
-       dname = a->dname;
-       logname = a->logname;
-       rtname = a->rtname;
-       a->dfd = a->logfd = a->rtfd = -1;
-       a->ddev = a->logdev = a->rtdev = 0;
-       a->dbsize = a->lbsize = a->rtbsize = 0;
-       a->dsize = a->logBBsize = a->logBBstart = a->rtsize = 0;
-
-       (void)getcwd(curdir,MAXPATHLEN);
-       needcd = 0;
-       fd = -1;
-       flags = (a->isreadonly | a->isdirect);
-
+       xfs_check_ondisk_structs();
+       rcu_init();
+       rcu_register_thread();
         radix_tree_init();
  
-       if (a->volname) {
-               if(!check_open(a->volname,flags,&rawfile,&blockfile))
-                       goto done;
-               needcd = 1;
-               fd = open(rawfile, O_RDONLY);
-               dname = a->dname = a->volname;
-               a->volname = NULL;
-       }
-       if (dname) {
-               if (dname[0] != '/' && needcd)
-                       chdir(curdir);
-               if (a->disfile) {
-                       a->ddev= libxfs_device_open(dname, a->dcreat, flags,
-                                                   a->setblksize);
-                       a->dfd = libxfs_device_to_fd(a->ddev);
-               } else {
-                       if (!check_open(dname, flags, &rawfile, &blockfile))
-                               goto done;
-                       a->ddev = libxfs_device_open(rawfile,
-                                       a->dcreat, flags, a->setblksize);
-                       a->dfd = libxfs_device_to_fd(a->ddev);
-                       platform_findsizes(rawfile, a->dfd,
-                                               &a->dsize, &a->dbsize);
-               }
-               needcd = 1;
-       } else
-               a->dsize = 0;
-       if (logname) {
-               if (logname[0] != '/' && needcd)
-                       chdir(curdir);
-               if (a->lisfile) {
-                       a->logdev = libxfs_device_open(logname,
-                                       a->lcreat, flags, a->setblksize);
-                       a->logfd = libxfs_device_to_fd(a->logdev);
-               } else {
-                       if (!check_open(logname, flags, &rawfile, &blockfile))
-                               goto done;
-                       a->logdev = libxfs_device_open(rawfile,
-                                       a->lcreat, flags, a->setblksize);
-                       a->logfd = libxfs_device_to_fd(a->logdev);
-                       platform_findsizes(rawfile, a->logfd,
-                                               &a->logBBsize, &a->lbsize);
-               }
-               needcd = 1;
-       } else
-               a->logBBsize = 0;
-       if (rtname) {
-               if (rtname[0] != '/' && needcd)
-                       chdir(curdir);
-               if (a->risfile) {
-                       a->rtdev = libxfs_device_open(rtname,
-                                       a->rcreat, flags, a->setblksize);
-                       a->rtfd = libxfs_device_to_fd(a->rtdev);
-               } else {
-                       if (!check_open(rtname, flags, &rawfile, &blockfile))
-                               goto done;
-                       a->rtdev = libxfs_device_open(rawfile,
-                                       a->rcreat, flags, a->setblksize);
-                       a->rtfd = libxfs_device_to_fd(a->rtdev);
-                       platform_findsizes(rawfile, a->rtfd,
-                                               &a->rtsize, &a->rtbsize);
-               }
-               needcd = 1;
-       } else
-               a->rtsize = 0;
-       if (a->dsize < 0) {
-               fprintf(stderr, _("%s: can't get size for data subvolume\n"),
-                       progname);
+       if (!libxfs_device_open(a, &a->data))
                 goto done;
-       }
-       if (a->logBBsize < 0) {
-               fprintf(stderr, _("%s: can't get size for log subvolume\n"),
-                       progname);
+       if (!libxfs_device_open(a, &a->log))
                 goto done;
-       }
-       if (a->rtsize < 0) {
-               fprintf(stderr, _("%s: can't get size for realtime subvolume\n"),
-                       progname);
+       if (!libxfs_device_open(a, &a->rt))
                 goto done;
-       }
-       if (needcd)
-               chdir(curdir);
+
         if (!libxfs_bhash_size)
                 libxfs_bhash_size = LIBXFS_BHASHSIZE(sbp);
         libxfs_bcache = cache_init(a->bcache_flags, libxfs_bhash_size,
                                    &libxfs_bcache_operations);
-       use_xfs_buf_lock = a->usebuflock;
-       manage_zones(0);
-       rval = 1;
+       use_xfs_buf_lock = a->flags & LIBXFS_USEBUFLOCK;
+       xfs_dir_startup();
+       init_caches();
+       return 1;
+
  done:
-       if (dpath[0])
-               unlink(dpath);
-       if (logpath[0])
-               unlink(logpath);
-       if (rtpath[0])
-               unlink(rtpath);
-       if (fd >= 0)
-               close(fd);
-       if (!rval && a->ddev)
-               libxfs_device_close(a->ddev);
-       if (!rval && a->logdev)
-               libxfs_device_close(a->logdev);
-       if (!rval && a->rtdev)
-               libxfs_device_close(a->rtdev);
-       return rval;
+       libxfs_close_devices(a);
+       rcu_unregister_thread();
+       return 0;
  }
  
  
-/*
- * Initialize/destroy all of the zone allocators we use.
- */
-static void
-manage_zones(int release)
-{
-       extern kmem_zone_t      *xfs_buf_zone;
-       extern kmem_zone_t      *xfs_ili_zone;
-       extern kmem_zone_t      *xfs_ifork_zone;
-       extern kmem_zone_t      *xfs_buf_item_zone;
-       extern kmem_zone_t      *xfs_da_state_zone;
-       extern kmem_zone_t      *xfs_btree_cur_zone;
-       extern kmem_zone_t      *xfs_bmap_free_item_zone;
-       extern kmem_zone_t      *xfs_log_item_desc_zone;
-       extern void             xfs_dir_startup();
-
-       if (release) {  /* free zone allocation */
-               kmem_free(xfs_buf_zone);
-               kmem_free(xfs_inode_zone);
-               kmem_free(xfs_ifork_zone);
-               kmem_free(xfs_buf_item_zone);
-               kmem_free(xfs_da_state_zone);
-               kmem_free(xfs_btree_cur_zone);
-               kmem_free(xfs_bmap_free_item_zone);
-               kmem_free(xfs_log_item_desc_zone);
-               return;
-       }
-       /* otherwise initialise zone allocation */
-       xfs_buf_zone = kmem_zone_init(sizeof(xfs_buf_t), "xfs_buffer");
-       xfs_inode_zone = kmem_zone_init(sizeof(struct xfs_inode), "xfs_inode");
-       xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
-       xfs_ili_zone = kmem_zone_init(
-                       sizeof(xfs_inode_log_item_t), "xfs_inode_log_item");
-       xfs_buf_item_zone = kmem_zone_init(
-                       sizeof(xfs_buf_log_item_t), "xfs_buf_log_item");
-       xfs_da_state_zone = kmem_zone_init(
-                       sizeof(xfs_da_state_t), "xfs_da_state");
-       xfs_btree_cur_zone = kmem_zone_init(
-                       sizeof(xfs_btree_cur_t), "xfs_btree_cur");
-       xfs_bmap_free_item_zone = kmem_zone_init(
-                       sizeof(xfs_bmap_free_item_t), "xfs_bmap_free_item");
-       xfs_log_item_desc_zone = kmem_zone_init(
-                       sizeof(struct xfs_log_item_desc), "xfs_log_item_desc");
-       xfs_dir_startup();
-}
-
  /*
   * Initialize realtime fields in the mount structure.
   */
  static int
  rtmount_init(
-       xfs_mount_t     *mp,    /* file system mount structure */
-       int             flags)
+       xfs_mount_t     *mp)    /* file system mount structure */
  {
-       xfs_buf_t       *bp;    /* buffer for last block of subvolume */
+       struct xfs_buf  *bp;    /* buffer for last block of subvolume */
         xfs_daddr_t     d;      /* address of last block of subvolume */
-       xfs_sb_t        *sbp;   /* filesystem superblock copy in mount */
+       unsigned int    rsumblocks;
+       int             error;
  
-       sbp = &mp->m_sb;
-       if (sbp->sb_rblocks == 0)
+       if (mp->m_sb.sb_rblocks == 0)
                 return 0;
-       if (mp->m_rtdev_targp->dev == 0 && !(flags & LIBXFS_MOUNT_DEBUGGER)) {
+
+       if (xfs_has_reflink(mp)) {
+               fprintf(stderr,
+       _("%s: Reflink not compatible with realtime device. Please try a newer xfsprogs.\n"),
+                               progname);
+               return -1;
+       }
+
+       if (xfs_has_rmapbt(mp)) {
+               fprintf(stderr,
+       _("%s: Reverse mapping btree not compatible with realtime device. Please try a newer xfsprogs.\n"),
+                               progname);
+               return -1;
+       }
+
+       if (mp->m_rtdev_targp->bt_bdev == 0 && !xfs_is_debugger(mp)) {
                 fprintf(stderr, _("%s: filesystem has a realtime subvolume\n"),
                         progname);
                 return -1;
         }
-       mp->m_rsumlevels = sbp->sb_rextslog + 1;
-       mp->m_rsumsize =
-               (uint)sizeof(xfs_suminfo_t) * mp->m_rsumlevels *
-               sbp->sb_rbmblocks;
-       mp->m_rsumsize = roundup(mp->m_rsumsize, sbp->sb_blocksize);
+       mp->m_rsumlevels = mp->m_sb.sb_rextslog + 1;
+       rsumblocks = xfs_rtsummary_blockcount(mp, mp->m_rsumlevels,
+                       mp->m_sb.sb_rbmblocks);
+       mp->m_rsumsize = XFS_FSB_TO_B(mp, rsumblocks);
         mp->m_rbmip = mp->m_rsumip = NULL;
  
         /*
          * Allow debugger to be run without the realtime device present.
          */
-       if (flags & LIBXFS_MOUNT_DEBUGGER)
+       if (xfs_is_debugger(mp))
                 return 0;
  
         /*
@@ -460,124 +337,122 @@ rtmount_init(
                         (unsigned long long) mp->m_sb.sb_rblocks);
                 return -1;
         }
-       bp = libxfs_readbuf(mp->m_rtdev,
-                       d - XFS_FSB_TO_BB(mp, 1), XFS_FSB_TO_BB(mp, 1), 0, NULL);
-       if (bp == NULL) {
+       error = libxfs_buf_read(mp->m_rtdev, d - XFS_FSB_TO_BB(mp, 1),
+                       XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL);
+       if (error) {
                 fprintf(stderr, _("%s: realtime size check failed\n"),
                         progname);
                 return -1;
         }
-       libxfs_putbuf(bp);
+       libxfs_buf_relse(bp);
         return 0;
  }
  
-static int
-libxfs_initialize_perag(
-       xfs_mount_t     *mp,
-       xfs_agnumber_t  agcount,
-       xfs_agnumber_t  *maxagi)
+static bool
+xfs_set_inode_alloc_perag(
+       struct xfs_perag        *pag,
+       xfs_ino_t               ino,
+       xfs_agnumber_t          max_metadata)
  {
-       xfs_agnumber_t  index, max_metadata;
-       xfs_agnumber_t  first_initialised = 0;
-       xfs_perag_t     *pag;
+       if (!xfs_is_inode32(pag->pag_mount)) {
+               set_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate);
+               clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate);
+               return false;
+       }
+
+       if (ino > XFS_MAXINUMBER_32) {
+               clear_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate);
+               clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate);
+               return false;
+       }
+
+       set_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate);
+       if (pag->pag_agno < max_metadata)
+               set_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate);
+       else
+               clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate);
+       return true;
+}
+
+/*
+ * Set parameters for inode allocation heuristics, taking into account
+ * filesystem size and inode32/inode64 mount options; i.e. specifically
+ * whether or not XFS_MOUNT_SMALL_INUMS is set.
+ *
+ * Inode allocation patterns are altered only if inode32 is requested
+ * (XFS_MOUNT_SMALL_INUMS), and the filesystem is sufficiently large.
+ * If altered, XFS_MOUNT_32BITINODES is set as well.
+ *
+ * An agcount independent of that in the mount structure is provided
+ * because in the growfs case, mp->m_sb.sb_agcount is not yet updated
+ * to the potentially higher ag count.
+ *
+ * Returns the maximum AG index which may contain inodes.
+ *
+ * NOTE: userspace has no concept of "inode32" and so xfs_has_small_inums
+ * is always false, and much of this code is a no-op.
+ */
+xfs_agnumber_t
+xfs_set_inode_alloc(
+       struct xfs_mount *mp,
+       xfs_agnumber_t  agcount)
+{
+       xfs_agnumber_t  index;
+       xfs_agnumber_t  maxagi = 0;
+       xfs_sb_t        *sbp = &mp->m_sb;
+       xfs_agnumber_t  max_metadata;
         xfs_agino_t     agino;
         xfs_ino_t       ino;
-       xfs_sb_t        *sbp = &mp->m_sb;
-       int             error = -ENOMEM;
  
         /*
-        * Walk the current per-ag tree so we don't try to initialise AGs
-        * that already exist (growfs case). Allocate and insert all the
-        * AGs we don't find ready for initialisation.
+        * Calculate how much should be reserved for inodes to meet
+        * the max inode percentage.  Used only for inode32.
          */
-       for (index = 0; index < agcount; index++) {
-               pag = xfs_perag_get(mp, index);
-               if (pag) {
-                       xfs_perag_put(pag);
-                       continue;
-               }
-               if (!first_initialised)
-                       first_initialised = index;
-
-               pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL);
-               if (!pag)
-                       goto out_unwind;
-               pag->pag_agno = index;
-               pag->pag_mount = mp;
-
-               if (radix_tree_insert(&mp->m_perag_tree, index, pag)) {
-                       error = -EEXIST;
-                       goto out_unwind;
-               }
+       if (M_IGEO(mp)->maxicount) {
+               uint64_t        icount;
+
+               icount = sbp->sb_dblocks * sbp->sb_imax_pct;
+               do_div(icount, 100);
+               icount += sbp->sb_agblocks - 1;
+               do_div(icount, sbp->sb_agblocks);
+               max_metadata = icount;
+       } else {
+               max_metadata = agcount;
         }
  
-       /*
-        * If we mount with the inode64 option, or no inode overflows
-        * the legacy 32-bit address space clear the inode32 option.
-        */
-       agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
+       /* Get the last possible inode in the filesystem */
+       agino = XFS_AGB_TO_AGINO(mp, sbp->sb_agblocks - 1);
         ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
  
-       if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > XFS_MAXINUMBER_32)
-               mp->m_flags |= XFS_MOUNT_32BITINODES;
+       /*
+        * If user asked for no more than 32-bit inodes, and the fs is
+        * sufficiently large, set XFS_MOUNT_32BITINODES if we must alter
+        * the allocator to accommodate the request.
+        */
+       if (xfs_has_small_inums(mp) && ino > XFS_MAXINUMBER_32)
+               set_bit(XFS_OPSTATE_INODE32, &mp->m_opstate);
         else
-               mp->m_flags &= ~XFS_MOUNT_32BITINODES;
+               clear_bit(XFS_OPSTATE_INODE32, &mp->m_opstate);
  
-       if (mp->m_flags & XFS_MOUNT_32BITINODES) {
-               /*
-                * Calculate how much should be reserved for inodes to meet
-                * the max inode percentage.
-                */
-               if (mp->m_maxicount) {
-                       __uint64_t      icount;
-
-                       icount = sbp->sb_dblocks * sbp->sb_imax_pct;
-                       do_div(icount, 100);
-                       icount += sbp->sb_agblocks - 1;
-                       do_div(icount, sbp->sb_agblocks);
-                       max_metadata = icount;
-               } else {
-                       max_metadata = agcount;
-               }
+       for (index = 0; index < agcount; index++) {
+               struct xfs_perag        *pag;
  
-               for (index = 0; index < agcount; index++) {
-                       ino = XFS_AGINO_TO_INO(mp, index, agino);
-                       if (ino > XFS_MAXINUMBER_32) {
-                               index++;
-                               break;
-                       }
+               ino = XFS_AGINO_TO_INO(mp, index, agino);
  
-                       pag = xfs_perag_get(mp, index);
-                       pag->pagi_inodeok = 1;
-                       if (index < max_metadata)
-                               pag->pagf_metadata = 1;
-                       xfs_perag_put(pag);
-               }
-       } else {
-               for (index = 0; index < agcount; index++) {
-                       pag = xfs_perag_get(mp, index);
-                       pag->pagi_inodeok = 1;
-                       xfs_perag_put(pag);
-               }
+               pag = xfs_perag_get(mp, index);
+               if (xfs_set_inode_alloc_perag(pag, ino, max_metadata))
+                       maxagi++;
+               xfs_perag_put(pag);
         }
  
-       if (maxagi)
-               *maxagi = index;
-       return 0;
-
-out_unwind:
-       kmem_free(pag);
-       for (; index > first_initialised; index--) {
-               pag = radix_tree_delete(&mp->m_perag_tree, index);
-               kmem_free(pag);
-       }
-       return error;
+       return xfs_is_inode32(mp) ? maxagi : agcount;
  }
  
  static struct xfs_buftarg *
  libxfs_buftarg_alloc(
         struct xfs_mount        *mp,
-       dev_t                   dev)
+       struct libxfs_dev       *dev,
+       unsigned long           write_fails)
  {
         struct xfs_buftarg      *btp;
  
@@ -588,42 +463,103 @@ libxfs_buftarg_alloc(
                 exit(1);
         }
         btp->bt_mount = mp;
-       btp->dev = dev;
+       btp->bt_bdev = dev->dev;
+       btp->bt_bdev_fd = dev->fd;
+       btp->flags = 0;
+       if (write_fails) {
+               btp->writes_left = write_fails;
+               btp->flags |= XFS_BUFTARG_INJECT_WRITE_FAIL;
+       }
+       pthread_mutex_init(&btp->lock, NULL);
+
         return btp;
  }
  
+enum libxfs_write_failure_nums {
+       WF_DATA = 0,
+       WF_LOG,
+       WF_RT,
+       WF_MAX_OPTS,
+};
+
+static char *wf_opts[] = {
+       [WF_DATA]               = "ddev",
+       [WF_LOG]                = "logdev",
+       [WF_RT]                 = "rtdev",
+       [WF_MAX_OPTS]           = NULL,
+};
+
  void
  libxfs_buftarg_init(
         struct xfs_mount        *mp,
-       dev_t                   dev,
-       dev_t                   logdev,
-       dev_t                   rtdev)
+       struct libxfs_init      *xi)
  {
+       char                    *p = getenv("LIBXFS_DEBUG_WRITE_CRASH");
+       unsigned long           dfail = 0, lfail = 0, rfail = 0;
+
+       /* Simulate utility crash after a certain number of writes. */
+       while (p && *p) {
+               char *val;
+
+               switch (getsubopt(&p, wf_opts, &val)) {
+               case WF_DATA:
+                       if (!val) {
+                               fprintf(stderr,
+               _("ddev write fail requires a parameter\n"));
+                               exit(1);
+                       }
+                       dfail = strtoul(val, NULL, 0);
+                       break;
+               case WF_LOG:
+                       if (!val) {
+                               fprintf(stderr,
+               _("logdev write fail requires a parameter\n"));
+                               exit(1);
+                       }
+                       lfail = strtoul(val, NULL, 0);
+                       break;
+               case WF_RT:
+                       if (!val) {
+                               fprintf(stderr,
+               _("rtdev write fail requires a parameter\n"));
+                               exit(1);
+                       }
+                       rfail = strtoul(val, NULL, 0);
+                       break;
+               default:
+                       fprintf(stderr, _("unknown write fail type %s\n"),
+                                       val);
+                       exit(1);
+                       break;
+               }
+       }
+
         if (mp->m_ddev_targp) {
                 /* should already have all buftargs initialised */
-               if (mp->m_ddev_targp->dev != dev ||
+               if (mp->m_ddev_targp->bt_bdev != xi->data.dev ||
                     mp->m_ddev_targp->bt_mount != mp) {
                         fprintf(stderr,
                                 _("%s: bad buftarg reinit, ddev\n"),
                                 progname);
                         exit(1);
                 }
-               if (!logdev || logdev == dev) {
+               if (!xi->log.dev || xi->log.dev == xi->data.dev) {
                         if (mp->m_logdev_targp != mp->m_ddev_targp) {
                                 fprintf(stderr,
                                 _("%s: bad buftarg reinit, ldev mismatch\n"),
                                         progname);
                                 exit(1);
                         }
-               } else if (mp->m_logdev_targp->dev != logdev ||
+               } else if (mp->m_logdev_targp->bt_bdev != xi->log.dev ||
                            mp->m_logdev_targp->bt_mount != mp) {
                         fprintf(stderr,
                                 _("%s: bad buftarg reinit, logdev\n"),
                                 progname);
                         exit(1);
                 }
-               if (rtdev && (mp->m_rtdev_targp->dev != rtdev ||
-                             mp->m_rtdev_targp->bt_mount != mp)) {
+               if (xi->rt.dev &&
+                   (mp->m_rtdev_targp->bt_bdev != xi->rt.dev ||
+                    mp->m_rtdev_targp->bt_mount != mp)) {
                         fprintf(stderr,
                                 _("%s: bad buftarg reinit, rtdev\n"),
                                 progname);
@@ -632,12 +568,55 @@ libxfs_buftarg_init(
                 return;
         }
  
-       mp->m_ddev_targp = libxfs_buftarg_alloc(mp, dev);
-       if (!logdev || logdev == dev)
+       mp->m_ddev_targp = libxfs_buftarg_alloc(mp, &xi->data, dfail);
+       if (!xi->log.dev || xi->log.dev == xi->data.dev)
                 mp->m_logdev_targp = mp->m_ddev_targp;
         else
-               mp->m_logdev_targp = libxfs_buftarg_alloc(mp, logdev);
-       mp->m_rtdev_targp = libxfs_buftarg_alloc(mp, rtdev);
+               mp->m_logdev_targp = libxfs_buftarg_alloc(mp, &xi->log, lfail);
+       mp->m_rtdev_targp = libxfs_buftarg_alloc(mp, &xi->rt, rfail);
+}
+
+/* Compute maximum possible height for per-AG btree types for this fs. */
+static inline void
+xfs_agbtree_compute_maxlevels(
+       struct xfs_mount        *mp)
+{
+       unsigned int            levels;
+
+       levels = max(mp->m_alloc_maxlevels, M_IGEO(mp)->inobt_maxlevels);
+       levels = max(levels, mp->m_rmap_maxlevels);
+       mp->m_agbtree_maxlevels = max(levels, mp->m_refc_maxlevels);
+}
+
+/* Compute maximum possible height of all btrees. */
+void
+libxfs_compute_all_maxlevels(
+       struct xfs_mount        *mp)
+{
+       xfs_alloc_compute_maxlevels(mp);
+       xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK);
+       xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK);
+       xfs_ialloc_setup_geometry(mp);
+       xfs_rmapbt_compute_maxlevels(mp);
+       xfs_refcountbt_compute_maxlevels(mp);
+
+       xfs_agbtree_compute_maxlevels(mp);
+}
+
+/*
+ * precalculate the low space thresholds for dynamic speculative preallocation.
+ */
+static void
+xfs_set_low_space_thresholds(
+       struct xfs_mount        *mp)
+{
+       uint64_t                dblocks = mp->m_sb.sb_dblocks;
+       int                     i;
+
+       do_div(dblocks, 100);
+
+       for (i = 0; i < XFS_LOWSP_MAX; i++)
+               mp->m_low_space[i] = dblocks * (i + 1);
  }
  
  /*
@@ -645,72 +624,44 @@ libxfs_buftarg_init(
   * such that the numerous XFS_* macros can be used.  If dev is zero,
   * no IO will be performed (no size checks, read root inodes).
   */
-xfs_mount_t *
+struct xfs_mount *
  libxfs_mount(
-       xfs_mount_t     *mp,
-       xfs_sb_t        *sb,
-       dev_t           dev,
-       dev_t           logdev,
-       dev_t           rtdev,
-       int             flags)
+       struct xfs_mount        *mp,
+       struct xfs_sb           *sb,
+       struct libxfs_init      *xi,
+       unsigned int            flags)
  {
-       xfs_daddr_t     d;
-       xfs_buf_t       *bp;
-       xfs_sb_t        *sbp;
-       int             error;
+       struct xfs_buf          *bp;
+       struct xfs_sb           *sbp;
+       xfs_daddr_t             d;
+       int                     error;
  
-       libxfs_buftarg_init(mp, dev, logdev, rtdev);
+       mp->m_features = xfs_sb_version_to_features(sb);
+       if (flags & LIBXFS_MOUNT_DEBUGGER)
+               xfs_set_debugger(mp);
+       if (flags & LIBXFS_MOUNT_REPORT_CORRUPTION)
+               xfs_set_reporting_corruption(mp);
+       libxfs_buftarg_init(mp, xi);
  
-       mp->m_flags = (LIBXFS_MOUNT_32BITINODES|LIBXFS_MOUNT_32BITINOOPT);
+       mp->m_finobt_nores = true;
+       xfs_set_inode32(mp);
         mp->m_sb = *sb;
         INIT_RADIX_TREE(&mp->m_perag_tree, GFP_KERNEL);
-       sbp = &(mp->m_sb);
+       sbp = &mp->m_sb;
+       spin_lock_init(&mp->m_sb_lock);
+       spin_lock_init(&mp->m_agirotor_lock);
  
         xfs_sb_mount_common(mp, sb);
  
-       xfs_alloc_compute_maxlevels(mp);
-       xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK);
-       xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK);
-       xfs_ialloc_compute_maxlevels(mp);
-
-       if (sbp->sb_imax_pct) {
-               /* Make sure the maximum inode count is a multiple of the
-                * units we allocate inodes in.
-                */
-               mp->m_maxicount = (sbp->sb_dblocks * sbp->sb_imax_pct) / 100;
-               mp->m_maxicount = ((mp->m_maxicount / mp->m_ialloc_blks) *
-                                 mp->m_ialloc_blks)  << sbp->sb_inopblog;
-       } else
-               mp->m_maxicount = 0;
-
-       mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE;
-
         /*
          * Set whether we're using stripe alignment.
          */
-       if (xfs_sb_version_hasdalign(&mp->m_sb)) {
+       if (xfs_has_dalign(mp)) {
                 mp->m_dalign = sbp->sb_unit;
                 mp->m_swidth = sbp->sb_width;
         }
  
-       /*
-        * Set whether we're using inode alignment.
-        */
-       if (xfs_sb_version_hasalign(&mp->m_sb) &&
-           mp->m_sb.sb_inoalignmt >=
-           XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size))
-               mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1;
-       else
-               mp->m_inoalign_mask = 0;
-       /*
-        * If we are using stripe alignment, check whether
-        * the stripe unit is a multiple of the inode alignment
-        */
-       if (mp->m_dalign && mp->m_inoalign_mask &&
-                                       !(mp->m_dalign & mp->m_inoalign_mask))
-               mp->m_sinoalign = mp->m_dalign;
-       else
-               mp->m_sinoalign = 0;
+       libxfs_compute_all_maxlevels(mp);
  
         /*
          * Check that the data (and log if separate) are an ok size.
@@ -718,7 +669,7 @@ libxfs_mount(
         d = (xfs_daddr_t) XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
         if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) {
                 fprintf(stderr, _("%s: size check failed\n"), progname);
-               if (!(flags & LIBXFS_MOUNT_DEBUGGER))
+               if (!xfs_is_debugger(mp))
                         return NULL;
         }
  
@@ -753,55 +704,78 @@ libxfs_mount(
  
         xfs_da_mount(mp);
  
-       if (xfs_sb_version_hasattr2(&mp->m_sb))
-               mp->m_flags |= LIBXFS_MOUNT_ATTR2;
-
         /* Initialize the precomputed transaction reservations values */
         xfs_trans_init(mp);
  
-       if (dev == 0)   /* maxtrres, we have no device so leave now */
+       if (xi->data.dev == 0)  /* maxtrres, we have no device so leave now */
                 return mp;
  
-       bp = libxfs_readbuf(mp->m_dev,
-                       d - XFS_FSS_TO_BB(mp, 1), XFS_FSS_TO_BB(mp, 1),
-                       !(flags & LIBXFS_MOUNT_DEBUGGER), NULL);
-       if (!bp) {
+       /* device size checks must pass unless we're a debugger. */
+       error = libxfs_buf_read(mp->m_dev, d - XFS_FSS_TO_BB(mp, 1),
+                       XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL);
+       if (error) {
                 fprintf(stderr, _("%s: data size check failed\n"), progname);
-               if (!(flags & LIBXFS_MOUNT_DEBUGGER))
+               if (!xfs_is_debugger(mp))
                         return NULL;
         } else
-               libxfs_putbuf(bp);
+               libxfs_buf_relse(bp);
  
-       if (mp->m_logdev_targp->dev &&
-           mp->m_logdev_targp->dev != mp->m_ddev_targp->dev) {
+       if (mp->m_logdev_targp->bt_bdev &&
+           mp->m_logdev_targp->bt_bdev != mp->m_ddev_targp->bt_bdev) {
                 d = (xfs_daddr_t) XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
-               if ( (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) ||
-                    (!(bp = libxfs_readbuf(mp->m_logdev_targp,
-                                       d - XFS_FSB_TO_BB(mp, 1),
-                                       XFS_FSB_TO_BB(mp, 1),
-                                       !(flags & LIBXFS_MOUNT_DEBUGGER), NULL))) ) {
+               if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks ||
+                   libxfs_buf_read(mp->m_logdev_targp,
+                               d - XFS_FSB_TO_BB(mp, 1), XFS_FSB_TO_BB(mp, 1),
+                               0, &bp, NULL)) {
                         fprintf(stderr, _("%s: log size checks failed\n"),
                                         progname);
-                       if (!(flags & LIBXFS_MOUNT_DEBUGGER))
+                       if (!xfs_is_debugger(mp))
                                 return NULL;
                 }
                 if (bp)
-                       libxfs_putbuf(bp);
+                       libxfs_buf_relse(bp);
         }
  
+       xfs_set_low_space_thresholds(mp);
+
         /* Initialize realtime fields in the mount structure */
-       if (rtmount_init(mp, flags)) {
+       if (rtmount_init(mp)) {
                 fprintf(stderr, _("%s: realtime device init failed\n"),
                         progname);
                         return NULL;
         }
  
-       error = libxfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi);
+       /*
+        * libxfs_initialize_perag will allocate a perag structure for each ag.
+        * If agcount is corrupted and insanely high, this will OOM the box.
+        * If the agount seems (arbitrarily) high, try to read what would be
+        * the last AG, and if that fails for a relatively high agcount, just
+        * read the first one and let the user know to check the geometry.
+        */
+       if (sbp->sb_agcount > 1000000) {
+               error = libxfs_buf_read(mp->m_dev,
+                               XFS_AG_DADDR(mp, sbp->sb_agcount - 1, 0), 1,
+                               0, &bp, NULL);
+               if (error) {
+                       fprintf(stderr, _("%s: read of AG %u failed\n"),
+                                               progname, sbp->sb_agcount);
+                       if (!xfs_is_debugger(mp))
+                               return NULL;
+                       fprintf(stderr, _("%s: limiting reads to AG 0\n"),
+                                                               progname);
+                       sbp->sb_agcount = 1;
+               } else
+                       libxfs_buf_relse(bp);
+       }
+
+       error = libxfs_initialize_perag(mp, sbp->sb_agcount, sbp->sb_dblocks,
+                       &mp->m_maxagi);
         if (error) {
                 fprintf(stderr, _("%s: perag init failed\n"),
                         progname);
                 exit(1);
         }
+       xfs_set_perag_data_loaded(mp);
  
         return mp;
  }
@@ -810,28 +784,121 @@ void
  libxfs_rtmount_destroy(xfs_mount_t *mp)
  {
         if (mp->m_rsumip)
-               IRELE(mp->m_rsumip);
+               libxfs_irele(mp->m_rsumip);
         if (mp->m_rbmip)
-               IRELE(mp->m_rbmip);
+               libxfs_irele(mp->m_rbmip);
         mp->m_rsumip = mp->m_rbmip = NULL;
  }
  
+/* Flush a device and report on writes that didn't make it to stable storage. */
+static inline int
+libxfs_flush_buftarg(
+       struct xfs_buftarg      *btp,
+       const char              *buftarg_descr)
+{
+       int                     error = 0;
+       int                     err2;
+
+       /*
+        * Write verifier failures are evidence of a buggy program.  Make sure
+        * that this state is always reported to the caller.
+        */
+       if (btp->flags & XFS_BUFTARG_CORRUPT_WRITE) {
+               fprintf(stderr,
+_("%s: Refusing to write a corrupt buffer to the %s!\n"),
+                               progname, buftarg_descr);
+               error = -EFSCORRUPTED;
+       }
+
+       if (btp->flags & XFS_BUFTARG_LOST_WRITE) {
+               fprintf(stderr,
+_("%s: Lost a write to the %s!\n"),
+                               progname, buftarg_descr);
+               if (!error)
+                       error = -EIO;
+       }
+
+       err2 = libxfs_blkdev_issue_flush(btp);
+       if (err2) {
+               fprintf(stderr,
+_("%s: Flushing the %s failed, err=%d!\n"),
+                               progname, buftarg_descr, -err2);
+       }
+       if (!error)
+               error = err2;
+
+       return error;
+}
+
+/*
+ * Flush all dirty buffers to stable storage and report on writes that didn't
+ * make it to stable storage.
+ */
+int
+libxfs_flush_mount(
+       struct xfs_mount        *mp)
+{
+       int                     error = 0;
+       int                     err2;
+
+       /*
+        * Flush the buffer cache to write all dirty buffers to disk.  Buffers
+        * that fail write verification will cause the CORRUPT_WRITE flag to be
+        * set in the buftarg.  Buffers that cannot be written will cause the
+        * LOST_WRITE flag to be set in the buftarg.  Once that's done,
+        * instruct the disks to persist their write caches.
+        */
+       libxfs_bcache_flush();
+
+       /* Flush all kernel and disk write caches, and report failures. */
+       if (mp->m_ddev_targp) {
+               err2 = libxfs_flush_buftarg(mp->m_ddev_targp, _("data device"));
+               if (!error)
+                       error = err2;
+       }
+
+       if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
+               err2 = libxfs_flush_buftarg(mp->m_logdev_targp,
+                               _("log device"));
+               if (!error)
+                       error = err2;
+       }
+
+       if (mp->m_rtdev_targp) {
+               err2 = libxfs_flush_buftarg(mp->m_rtdev_targp,
+                               _("realtime device"));
+               if (!error)
+                       error = err2;
+       }
+
+       return error;
+}
+
  /*
   * Release any resource obtained during a mount.
   */
-void
-libxfs_umount(xfs_mount_t *mp)
+int
+libxfs_umount(
+       struct xfs_mount        *mp)
  {
-       struct xfs_perag        *pag;
-       int                     agno;
+       int                     error;
  
         libxfs_rtmount_destroy(mp);
+
+       /*
+        * Purge the buffer cache to write all dirty buffers to disk and free
+        * all incore buffers, then pick up the outcome when we tell the disks
+        * to persist their write caches.
+        */
         libxfs_bcache_purge();
+       error = libxfs_flush_mount(mp);
  
-       for (agno = 0; agno < mp->m_maxagi; agno++) {
-               pag = radix_tree_delete(&mp->m_perag_tree, agno);
-               kmem_free(pag);
-       }
+       /*
+        * Only try to free the per-AG structures if we set them up in the
+        * first place.
+        */
+       if (xfs_is_perag_data_loaded(mp))
+               libxfs_free_perag(mp);
  
         kmem_free(mp->m_attr_geo);
         kmem_free(mp->m_dir_geo);
@@ -840,17 +907,29 @@ libxfs_umount(xfs_mount_t *mp)
         if (mp->m_logdev_targp != mp->m_ddev_targp)
                 kmem_free(mp->m_logdev_targp);
         kmem_free(mp->m_ddev_targp);
-       
+
+       return error;
  }
  
  /*
   * Release any global resources used by libxfs.
   */
  void
-libxfs_destroy(void)
+libxfs_destroy(
+       struct libxfs_init      *li)
  {
-       manage_zones(1);
+       int                     leaked;
+
+       libxfs_close_devices(li);
+
+       /* Free everything from the buffer cache before freeing buffer cache */
+       libxfs_bcache_purge();
+       libxfs_bcache_free();
         cache_destroy(libxfs_bcache);
+       leaked = destroy_caches();
+       rcu_unregister_thread();
+       if (getenv("LIBXFS_LEAK_CHECK") && leaked)
+               exit(1);
  }
  
  int
@@ -871,15 +950,3 @@ libxfs_report(FILE *fp)
         c = asctime(localtime(&t));
         fprintf(fp, "%s", c);
  }
-
-int
-libxfs_nproc(void)
-{
-       return platform_nproc();
-}
-
-unsigned long
-libxfs_physmem(void)
-{
-       return platform_physmem();
-}