]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - libxfs/init.c
xfs: fix low space alloc deadlock
[thirdparty/xfsprogs-dev.git] / libxfs / init.c
CommitLineData
959ef981 1// SPDX-License-Identifier: GPL-2.0
2bd0ea18 2/*
da23017d
NS
3 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
4 * All Rights Reserved.
2bd0ea18
NS
5 */
6
2bd0ea18 7#include <sys/stat.h>
9440d84d 8#include "init.h"
29e62271 9
9c799827 10#include "libxfs_priv.h"
b626fb59
DC
11#include "xfs_fs.h"
12#include "xfs_shared.h"
13#include "xfs_format.h"
14#include "xfs_log_format.h"
15#include "xfs_trans_resv.h"
16#include "xfs_mount.h"
794a5604 17#include "xfs_defer.h"
b626fb59
DC
18#include "xfs_inode_buf.h"
19#include "xfs_inode_fork.h"
20#include "xfs_inode.h"
21#include "xfs_trans.h"
b3a96b46 22#include "xfs_rmap_btree.h"
e7be6330 23#include "xfs_refcount_btree.h"
b658de93 24#include "libfrog/platform.h"
b626fb59 25
6b803e5a 26#include "libxfs.h" /* for now */
b626fb59 27
7448af58
DW
28#ifndef HAVE_LIBURCU_ATOMIC64
29pthread_mutex_t atomic64_lock = PTHREAD_MUTEX_INITIALIZER;
30#endif
31
2bd0ea18
NS
32char *progname = "libxfs"; /* default, changed by each tool */
33
f1b058f9 34struct cache *libxfs_bcache; /* global buffer cache */
9f38f08d 35int libxfs_bhash_size; /* #buckets in bcache */
f1b058f9 36
167137fe 37int use_xfs_buf_lock; /* global flag: use xfs_buf locks for MT */
d0572de5 38
2bd0ea18
NS
39/*
40 * dev_map - map open devices to fd.
41 */
42#define MAX_DEVS 10 /* arbitary maximum */
00ff2b10 43static int nextfakedev = -1; /* device number to give to next fake device */
2bd0ea18 44static struct dev_to_fd {
9440d84d
NS
45 dev_t dev;
46 int fd;
2bd0ea18
NS
47} dev_map[MAX_DEVS]={{0}};
48
2bd0ea18
NS
49/*
50 * Checks whether a given device has a mounted, writable
51 * filesystem, returns 1 if it does & fatal (just warns
52 * if not fatal, but allows us to proceed).
5000d01d 53 *
2bd0ea18
NS
54 * Useful to tools which will produce uncertain results
55 * if the filesystem is active - repair, check, logprint.
56 */
57static int
58check_isactive(char *name, char *block, int fatal)
59{
f594a0d1 60 struct stat st;
2bd0ea18 61
f594a0d1 62 if (stat(block, &st) < 0)
9440d84d 63 return 0;
fc8202ba 64 if ((st.st_mode & S_IFMT) != S_IFBLK)
9440d84d 65 return 0;
93d9f139 66 if (platform_check_ismounted(name, block, &st, 0) == 0)
9440d84d 67 return 0;
7f510afb
ES
68 if (platform_check_iswritable(name, block, &st))
69 return fatal ? 1 : 0;
70 return 0;
2bd0ea18
NS
71}
72
5000d01d 73/* libxfs_device_to_fd:
2bd0ea18
NS
74 * lookup a device number in the device map
75 * return the associated fd
76 */
77int
78libxfs_device_to_fd(dev_t device)
79{
9440d84d 80 int d;
5000d01d 81
9440d84d 82 for (d = 0; d < MAX_DEVS; d++)
5000d01d 83 if (dev_map[d].dev == device)
2bd0ea18 84 return dev_map[d].fd;
5000d01d 85
9440d84d
NS
86 fprintf(stderr, _("%s: %s: device %lld is not open\n"),
87 progname, __FUNCTION__, (long long)device);
2bd0ea18 88 exit(1);
25d246df 89 /* NOTREACHED */
2bd0ea18
NS
90}
91
92/* libxfs_device_open:
93 * open a device and return its device number
94 */
95dev_t
7eb6693f 96libxfs_device_open(char *path, int creat, int xflags, int setblksize)
2bd0ea18 97{
2bd0ea18 98 dev_t dev;
7eb6693f 99 int fd, d, flags;
b74a1f6a 100 int readonly, dio, excl;
f594a0d1 101 struct stat statb;
7eb6693f
NS
102
103 readonly = (xflags & LIBXFS_ISREADONLY);
104 excl = (xflags & LIBXFS_EXCLUSIVELY) && !creat;
b74a1f6a 105 dio = (xflags & LIBXFS_DIRECT) && !creat && platform_direct_blockdev();
2bd0ea18 106
b74a1f6a 107retry:
7eb6693f
NS
108 flags = (readonly ? O_RDONLY : O_RDWR) | \
109 (creat ? (O_CREAT|O_TRUNC) : 0) | \
b74a1f6a 110 (dio ? O_DIRECT : 0) | \
7eb6693f 111 (excl ? O_EXCL : 0);
b74a1f6a 112
7eb6693f 113 if ((fd = open(path, flags, 0666)) < 0) {
b74a1f6a
NS
114 if (errno == EINVAL && --dio == 0)
115 goto retry;
9440d84d 116 fprintf(stderr, _("%s: cannot open %s: %s\n"),
2bd0ea18
NS
117 progname, path, strerror(errno));
118 exit(1);
119 }
120
f594a0d1 121 if (fstat(fd, &statb) < 0) {
9440d84d 122 fprintf(stderr, _("%s: cannot stat %s: %s\n"),
2bd0ea18
NS
123 progname, path, strerror(errno));
124 exit(1);
125 }
a33a9e62 126
edd45774
TS
127 if (!readonly && setblksize && (statb.st_mode & S_IFMT) == S_IFBLK) {
128 if (setblksize == 1)
129 /* use the default blocksize */
130 (void)platform_set_blocksize(fd, path, statb.st_rdev, XFS_MIN_SECTORSIZE, 0);
131 else {
132 /* given an explicit blocksize to use */
133 if (platform_set_blocksize(fd, path, statb.st_rdev, setblksize, 1))
134 exit(1);
135 }
136 }
2bd0ea18 137
a33a9e62
NS
138 /*
139 * Get the device number from the stat buf - unless
2bd0ea18 140 * we're not opening a real device, in which case
a33a9e62 141 * choose a new fake device number.
2bd0ea18 142 */
32181a02 143 dev = (statb.st_rdev) ? (statb.st_rdev) : (nextfakedev--);
2bd0ea18 144
32181a02 145 for (d = 0; d < MAX_DEVS; d++)
2bd0ea18 146 if (dev_map[d].dev == dev) {
9440d84d 147 fprintf(stderr, _("%s: device %lld is already open\n"),
5b64e00a 148 progname, (long long)dev);
2bd0ea18
NS
149 exit(1);
150 }
151
32181a02 152 for (d = 0; d < MAX_DEVS; d++)
2bd0ea18 153 if (!dev_map[d].dev) {
32181a02
NS
154 dev_map[d].dev = dev;
155 dev_map[d].fd = fd;
5000d01d 156
2bd0ea18
NS
157 return dev;
158 }
159
9440d84d
NS
160 fprintf(stderr, _("%s: %s: too many open devices\n"),
161 progname, __FUNCTION__);
2bd0ea18 162 exit(1);
25d246df 163 /* NOTREACHED */
2bd0ea18
NS
164}
165
166void
167libxfs_device_close(dev_t dev)
168{
5000d01d 169 int d;
2bd0ea18 170
32181a02 171 for (d = 0; d < MAX_DEVS; d++)
2bd0ea18 172 if (dev_map[d].dev == dev) {
023ba280 173 int fd, ret;
5000d01d 174
32181a02
NS
175 fd = dev_map[d].fd;
176 dev_map[d].dev = dev_map[d].fd = 0;
5000d01d 177
023ba280
DW
178 ret = platform_flush_device(fd, dev);
179 if (ret) {
180 ret = -errno;
181 fprintf(stderr,
182 _("%s: flush of device %lld failed, err=%d"),
183 progname, (long long)dev, ret);
184 }
2bd0ea18 185 close(fd);
5000d01d 186
2bd0ea18
NS
187 return;
188 }
189
9440d84d
NS
190 fprintf(stderr, _("%s: %s: device %lld is not open\n"),
191 progname, __FUNCTION__, (long long)dev);
2bd0ea18
NS
192 exit(1);
193}
194
74668075
NS
195static int
196check_open(char *path, int flags, char **rawfile, char **blockfile)
197{
c781939c
RC
198 int readonly = (flags & LIBXFS_ISREADONLY);
199 int inactive = (flags & LIBXFS_ISINACTIVE);
200 int dangerously = (flags & LIBXFS_DANGEROUSLY);
f594a0d1 201 struct stat stbuf;
c781939c 202
f594a0d1 203 if (stat(path, &stbuf) < 0) {
c781939c
RC
204 perror(path);
205 return 0;
206 }
b74a1f6a 207 if (!(*rawfile = platform_findrawpath(path))) {
c781939c
RC
208 fprintf(stderr, _("%s: "
209 "can't find a character device matching %s\n"),
210 progname, path);
211 return 0;
212 }
b74a1f6a 213 if (!(*blockfile = platform_findblockpath(path))) {
c781939c
RC
214 fprintf(stderr, _("%s: "
215 "can't find a block device matching %s\n"),
216 progname, path);
217 return 0;
218 }
219 if (!readonly && !inactive && platform_check_ismounted(path, *blockfile, NULL, 1))
220 return 0;
e08f5594 221
c781939c
RC
222 if (inactive && check_isactive(path, *blockfile, ((readonly|dangerously)?1:0)))
223 return 0;
224
225 return 1;
226}
227
7a326ce0 228/*
2e1394fc 229 * Initialize/destroy all of the cache allocators we use.
7a326ce0
ES
230 */
231static void
2e1394fc 232init_caches(void)
7a326ce0 233{
7d10d094
DW
234 int error;
235
2e1394fc
DW
236 /* initialise cache allocation */
237 xfs_buf_cache = kmem_cache_init(sizeof(struct xfs_buf), "xfs_buffer");
238 xfs_inode_cache = kmem_cache_init(sizeof(struct xfs_inode), "xfs_inode");
239 xfs_ifork_cache = kmem_cache_init(sizeof(struct xfs_ifork), "xfs_ifork");
240 xfs_ili_cache = kmem_cache_init(
7a326ce0 241 sizeof(struct xfs_inode_log_item),"xfs_inode_log_item");
2e1394fc 242 xfs_buf_item_cache = kmem_cache_init(
7a326ce0 243 sizeof(struct xfs_buf_log_item), "xfs_buf_log_item");
1577541c
DW
244 error = xfs_defer_init_item_caches();
245 if (error) {
246 fprintf(stderr, "Could not allocate defer init item caches.\n");
247 abort();
248 }
2e1394fc 249 xfs_da_state_cache = kmem_cache_init(
7a326ce0 250 sizeof(struct xfs_da_state), "xfs_da_state");
7d10d094
DW
251 error = xfs_btree_init_cur_caches();
252 if (error) {
253 fprintf(stderr, "Could not allocate btree cursor caches.\n");
254 abort();
255 }
7d84b02d 256 xfs_extfree_item_cache = kmem_cache_init(
7a326ce0 257 sizeof(struct xfs_extent_free_item),
7d84b02d 258 "xfs_extfree_item");
2e1394fc 259 xfs_trans_cache = kmem_cache_init(
7a326ce0
ES
260 sizeof(struct xfs_trans), "xfs_trans");
261}
262
263static int
2e1394fc 264destroy_caches(void)
7a326ce0
ES
265{
266 int leaked = 0;
267
2e1394fc
DW
268 leaked += kmem_cache_destroy(xfs_buf_cache);
269 leaked += kmem_cache_destroy(xfs_ili_cache);
270 leaked += kmem_cache_destroy(xfs_inode_cache);
271 leaked += kmem_cache_destroy(xfs_ifork_cache);
272 leaked += kmem_cache_destroy(xfs_buf_item_cache);
273 leaked += kmem_cache_destroy(xfs_da_state_cache);
1577541c 274 xfs_defer_destroy_item_caches();
7d10d094 275 xfs_btree_destroy_cur_caches();
7d84b02d 276 leaked += kmem_cache_destroy(xfs_extfree_item_cache);
2e1394fc 277 leaked += kmem_cache_destroy(xfs_trans_cache);
7a326ce0
ES
278
279 return leaked;
280}
281
a9468486
DW
282static void
283libxfs_close_devices(
284 struct libxfs_xinit *li)
285{
286 if (li->ddev)
287 libxfs_device_close(li->ddev);
288 if (li->logdev && li->logdev != li->ddev)
289 libxfs_device_close(li->logdev);
290 if (li->rtdev)
291 libxfs_device_close(li->rtdev);
292
293 li->ddev = li->logdev = li->rtdev = 0;
294 li->dfd = li->logfd = li->rtfd = -1;
295}
296
2bd0ea18
NS
297/*
298 * libxfs initialization.
299 * Caller gets a 0 on failure (and we print a message), 1 on success.
300 */
301int
302libxfs_init(libxfs_init_t *a)
303{
304 char *blockfile;
2bd0ea18
NS
305 char *dname;
306 char dpath[25];
307 int fd;
308 char *logname;
309 char logpath[25];
2bd0ea18
NS
310 char *rawfile;
311 char *rtname;
312 char rtpath[25];
313 int rval = 0;
c781939c 314 int flags;
2bd0ea18
NS
315
316 dpath[0] = logpath[0] = rtpath[0] = '\0';
317 dname = a->dname;
318 logname = a->logname;
319 rtname = a->rtname;
2bd0ea18 320 a->dfd = a->logfd = a->rtfd = -1;
74668075 321 a->ddev = a->logdev = a->rtdev = 0;
06ac92fd
DC
322 a->dsize = a->lbsize = a->rtbsize = 0;
323 a->dbsize = a->logBBsize = a->logBBstart = a->rtsize = 0;
2bd0ea18 324
2bd0ea18 325 fd = -1;
b74a1f6a 326 flags = (a->isreadonly | a->isdirect);
c781939c 327
e4da1b16
DC
328 rcu_init();
329 rcu_register_thread();
bacd44a5
AE
330 radix_tree_init();
331
2bd0ea18 332 if (a->volname) {
c781939c 333 if(!check_open(a->volname,flags,&rawfile,&blockfile))
2bd0ea18 334 goto done;
2bd0ea18 335 fd = open(rawfile, O_RDONLY);
eb37fca5
NS
336 dname = a->dname = a->volname;
337 a->volname = NULL;
2bd0ea18 338 }
2bd0ea18 339 if (dname) {
2bd0ea18 340 if (a->disfile) {
7eb6693f 341 a->ddev= libxfs_device_open(dname, a->dcreat, flags,
c5907b96 342 a->setblksize);
2bd0ea18 343 a->dfd = libxfs_device_to_fd(a->ddev);
06ac92fd
DC
344 platform_findsizes(dname, a->dfd, &a->dsize,
345 &a->dbsize);
2bd0ea18 346 } else {
f02037ce 347 if (!check_open(dname, flags, &rawfile, &blockfile))
2bd0ea18
NS
348 goto done;
349 a->ddev = libxfs_device_open(rawfile,
7eb6693f 350 a->dcreat, flags, a->setblksize);
2bd0ea18 351 a->dfd = libxfs_device_to_fd(a->ddev);
f02037ce 352 platform_findsizes(rawfile, a->dfd,
06ac92fd 353 &a->dsize, &a->dbsize);
2bd0ea18 354 }
2bd0ea18
NS
355 } else
356 a->dsize = 0;
357 if (logname) {
2bd0ea18
NS
358 if (a->lisfile) {
359 a->logdev = libxfs_device_open(logname,
7eb6693f 360 a->lcreat, flags, a->setblksize);
2bd0ea18 361 a->logfd = libxfs_device_to_fd(a->logdev);
06ac92fd
DC
362 platform_findsizes(dname, a->logfd, &a->logBBsize,
363 &a->lbsize);
2bd0ea18 364 } else {
f02037ce 365 if (!check_open(logname, flags, &rawfile, &blockfile))
2bd0ea18
NS
366 goto done;
367 a->logdev = libxfs_device_open(rawfile,
7eb6693f 368 a->lcreat, flags, a->setblksize);
2bd0ea18 369 a->logfd = libxfs_device_to_fd(a->logdev);
f02037ce 370 platform_findsizes(rawfile, a->logfd,
06ac92fd 371 &a->logBBsize, &a->lbsize);
2bd0ea18 372 }
2bd0ea18
NS
373 } else
374 a->logBBsize = 0;
375 if (rtname) {
2bd0ea18
NS
376 if (a->risfile) {
377 a->rtdev = libxfs_device_open(rtname,
7eb6693f 378 a->rcreat, flags, a->setblksize);
2bd0ea18 379 a->rtfd = libxfs_device_to_fd(a->rtdev);
06ac92fd
DC
380 platform_findsizes(dname, a->rtfd, &a->rtsize,
381 &a->rtbsize);
2bd0ea18 382 } else {
f02037ce 383 if (!check_open(rtname, flags, &rawfile, &blockfile))
2bd0ea18
NS
384 goto done;
385 a->rtdev = libxfs_device_open(rawfile,
7eb6693f 386 a->rcreat, flags, a->setblksize);
2bd0ea18 387 a->rtfd = libxfs_device_to_fd(a->rtdev);
f02037ce 388 platform_findsizes(rawfile, a->rtfd,
06ac92fd 389 &a->rtsize, &a->rtbsize);
2bd0ea18 390 }
2bd0ea18
NS
391 } else
392 a->rtsize = 0;
393 if (a->dsize < 0) {
9440d84d 394 fprintf(stderr, _("%s: can't get size for data subvolume\n"),
2bd0ea18
NS
395 progname);
396 goto done;
397 }
398 if (a->logBBsize < 0) {
9440d84d 399 fprintf(stderr, _("%s: can't get size for log subvolume\n"),
2bd0ea18
NS
400 progname);
401 goto done;
402 }
403 if (a->rtsize < 0) {
9440d84d 404 fprintf(stderr, _("%s: can't get size for realtime subvolume\n"),
2bd0ea18
NS
405 progname);
406 goto done;
407 }
9f38f08d
MV
408 if (!libxfs_bhash_size)
409 libxfs_bhash_size = LIBXFS_BHASHSIZE(sbp);
ba9ecd40
DC
410 libxfs_bcache = cache_init(a->bcache_flags, libxfs_bhash_size,
411 &libxfs_bcache_operations);
d0572de5 412 use_xfs_buf_lock = a->usebuflock;
7a326ce0 413 xfs_dir_startup();
2e1394fc 414 init_caches();
2bd0ea18
NS
415 rval = 1;
416done:
417 if (dpath[0])
418 unlink(dpath);
419 if (logpath[0])
420 unlink(logpath);
421 if (rtpath[0])
422 unlink(rtpath);
423 if (fd >= 0)
424 close(fd);
c1ab394a 425 if (!rval) {
a9468486 426 libxfs_close_devices(a);
c1ab394a
DW
427 rcu_unregister_thread();
428 }
a9468486 429
2bd0ea18
NS
430 return rval;
431}
432
433
b391b7cd
NS
434/*
435 * Initialize realtime fields in the mount structure.
436 */
437static int
438rtmount_init(
2420d095 439 xfs_mount_t *mp) /* file system mount structure */
b391b7cd 440{
31079e67 441 struct xfs_buf *bp; /* buffer for last block of subvolume */
b391b7cd 442 xfs_daddr_t d; /* address of last block of subvolume */
31079e67 443 int error;
b391b7cd 444
575f24e5 445 if (mp->m_sb.sb_rblocks == 0)
b391b7cd 446 return 0;
4aaeedc4 447
eefdf2ab 448 if (xfs_has_reflink(mp)) {
4aaeedc4
DW
449 fprintf(stderr,
450 _("%s: Reflink not compatible with realtime device. Please try a newer xfsprogs.\n"),
451 progname);
452 return -1;
453 }
454
eefdf2ab 455 if (xfs_has_rmapbt(mp)) {
4aaeedc4
DW
456 fprintf(stderr,
457 _("%s: Reverse mapping btree not compatible with realtime device. Please try a newer xfsprogs.\n"),
458 progname);
459 return -1;
460 }
461
2420d095 462 if (mp->m_rtdev_targp->bt_bdev == 0 && !xfs_is_debugger(mp)) {
9440d84d 463 fprintf(stderr, _("%s: filesystem has a realtime subvolume\n"),
b391b7cd
NS
464 progname);
465 return -1;
466 }
575f24e5 467 mp->m_rsumlevels = mp->m_sb.sb_rextslog + 1;
b391b7cd
NS
468 mp->m_rsumsize =
469 (uint)sizeof(xfs_suminfo_t) * mp->m_rsumlevels *
575f24e5
DW
470 mp->m_sb.sb_rbmblocks;
471 mp->m_rsumsize = roundup(mp->m_rsumsize, mp->m_sb.sb_blocksize);
b391b7cd 472 mp->m_rbmip = mp->m_rsumip = NULL;
39798eb5
NS
473
474 /*
475 * Allow debugger to be run without the realtime device present.
476 */
2420d095 477 if (xfs_is_debugger(mp))
39798eb5
NS
478 return 0;
479
b391b7cd
NS
480 /*
481 * Check that the realtime section is an ok size.
482 */
483 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks);
484 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_rblocks) {
9440d84d
NS
485 fprintf(stderr, _("%s: realtime init - %llu != %llu\n"),
486 progname, (unsigned long long) XFS_BB_TO_FSB(mp, d),
b391b7cd
NS
487 (unsigned long long) mp->m_sb.sb_rblocks);
488 return -1;
489 }
31079e67
DW
490 error = libxfs_buf_read(mp->m_rtdev, d - XFS_FSB_TO_BB(mp, 1),
491 XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL);
492 if (error) {
9440d84d
NS
493 fprintf(stderr, _("%s: realtime size check failed\n"),
494 progname);
b391b7cd
NS
495 return -1;
496 }
e02ba985 497 libxfs_buf_relse(bp);
b391b7cd
NS
498 return 0;
499}
500
b9ee1227
DW
501/*
502 * Set parameters for inode allocation heuristics, taking into account
503 * filesystem size and inode32/inode64 mount options; i.e. specifically
504 * whether or not XFS_MOUNT_SMALL_INUMS is set.
505 *
506 * Inode allocation patterns are altered only if inode32 is requested
507 * (XFS_MOUNT_SMALL_INUMS), and the filesystem is sufficiently large.
508 * If altered, XFS_MOUNT_32BITINODES is set as well.
509 *
510 * An agcount independent of that in the mount structure is provided
511 * because in the growfs case, mp->m_sb.sb_agcount is not yet updated
512 * to the potentially higher ag count.
513 *
514 * Returns the maximum AG index which may contain inodes.
ed8f5980
DW
515 *
516 * NOTE: userspace has no concept of "inode32" and so xfs_has_small_inums
517 * is always false, and much of this code is a no-op.
b9ee1227
DW
518 */
519xfs_agnumber_t
520xfs_set_inode_alloc(
521 struct xfs_mount *mp,
522 xfs_agnumber_t agcount)
523{
524 xfs_agnumber_t index;
525 xfs_agnumber_t maxagi = 0;
526 xfs_sb_t *sbp = &mp->m_sb;
527 xfs_agnumber_t max_metadata;
528 xfs_agino_t agino;
529 xfs_ino_t ino;
530
531 /*
532 * Calculate how much should be reserved for inodes to meet
533 * the max inode percentage. Used only for inode32.
534 */
535 if (M_IGEO(mp)->maxicount) {
536 uint64_t icount;
537
538 icount = sbp->sb_dblocks * sbp->sb_imax_pct;
539 do_div(icount, 100);
540 icount += sbp->sb_agblocks - 1;
541 do_div(icount, sbp->sb_agblocks);
542 max_metadata = icount;
543 } else {
544 max_metadata = agcount;
545 }
546
547 /* Get the last possible inode in the filesystem */
548 agino = XFS_AGB_TO_AGINO(mp, sbp->sb_agblocks - 1);
549 ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
550
551 /*
552 * If user asked for no more than 32-bit inodes, and the fs is
553 * sufficiently large, set XFS_MOUNT_32BITINODES if we must alter
554 * the allocator to accommodate the request.
555 */
ed8f5980 556 if (xfs_has_small_inums(mp) && ino > XFS_MAXINUMBER_32)
0ee9753e 557 xfs_set_inode32(mp);
ed8f5980 558 else
0ee9753e 559 xfs_clear_inode32(mp);
b9ee1227
DW
560
561 for (index = 0; index < agcount; index++) {
562 struct xfs_perag *pag;
563
564 ino = XFS_AGINO_TO_INO(mp, index, agino);
565
566 pag = xfs_perag_get(mp, index);
567
0ee9753e 568 if (xfs_is_inode32(mp)) {
b9ee1227
DW
569 if (ino > XFS_MAXINUMBER_32) {
570 pag->pagi_inodeok = 0;
571 pag->pagf_metadata = 0;
572 } else {
573 pag->pagi_inodeok = 1;
574 maxagi++;
575 if (index < max_metadata)
576 pag->pagf_metadata = 1;
577 else
578 pag->pagf_metadata = 0;
579 }
580 } else {
581 pag->pagi_inodeok = 1;
582 pag->pagf_metadata = 0;
583 }
584
585 xfs_perag_put(pag);
586 }
587
0ee9753e 588 return xfs_is_inode32(mp) ? maxagi : agcount;
b9ee1227
DW
589}
590
75c8b434
DC
591static struct xfs_buftarg *
592libxfs_buftarg_alloc(
593 struct xfs_mount *mp,
704e4cef
DW
594 dev_t dev,
595 unsigned long write_fails)
75c8b434
DC
596{
597 struct xfs_buftarg *btp;
598
599 btp = malloc(sizeof(*btp));
600 if (!btp) {
601 fprintf(stderr, _("%s: buftarg init failed\n"),
602 progname);
603 exit(1);
604 }
605 btp->bt_mount = mp;
ab434d12 606 btp->bt_bdev = dev;
c335b673 607 btp->flags = 0;
704e4cef
DW
608 if (write_fails) {
609 btp->writes_left = write_fails;
610 btp->flags |= XFS_BUFTARG_INJECT_WRITE_FAIL;
611 }
612 pthread_mutex_init(&btp->lock, NULL);
c335b673 613
75c8b434
DC
614 return btp;
615}
616
704e4cef
DW
617enum libxfs_write_failure_nums {
618 WF_DATA = 0,
619 WF_LOG,
620 WF_RT,
621 WF_MAX_OPTS,
622};
623
624static char *wf_opts[] = {
625 [WF_DATA] = "ddev",
626 [WF_LOG] = "logdev",
627 [WF_RT] = "rtdev",
628 [WF_MAX_OPTS] = NULL,
629};
630
75c8b434
DC
631void
632libxfs_buftarg_init(
633 struct xfs_mount *mp,
634 dev_t dev,
635 dev_t logdev,
636 dev_t rtdev)
637{
704e4cef
DW
638 char *p = getenv("LIBXFS_DEBUG_WRITE_CRASH");
639 unsigned long dfail = 0, lfail = 0, rfail = 0;
640
641 /* Simulate utility crash after a certain number of writes. */
642 while (p && *p) {
643 char *val;
644
645 switch (getsubopt(&p, wf_opts, &val)) {
646 case WF_DATA:
647 if (!val) {
648 fprintf(stderr,
649 _("ddev write fail requires a parameter\n"));
650 exit(1);
651 }
652 dfail = strtoul(val, NULL, 0);
653 break;
654 case WF_LOG:
655 if (!val) {
656 fprintf(stderr,
657 _("logdev write fail requires a parameter\n"));
658 exit(1);
659 }
660 lfail = strtoul(val, NULL, 0);
661 break;
662 case WF_RT:
663 if (!val) {
664 fprintf(stderr,
665 _("rtdev write fail requires a parameter\n"));
666 exit(1);
667 }
668 rfail = strtoul(val, NULL, 0);
669 break;
670 default:
671 fprintf(stderr, _("unknown write fail type %s\n"),
672 val);
673 exit(1);
674 break;
675 }
676 }
677
75c8b434
DC
678 if (mp->m_ddev_targp) {
679 /* should already have all buftargs initialised */
ab434d12 680 if (mp->m_ddev_targp->bt_bdev != dev ||
75c8b434
DC
681 mp->m_ddev_targp->bt_mount != mp) {
682 fprintf(stderr,
683 _("%s: bad buftarg reinit, ddev\n"),
684 progname);
685 exit(1);
686 }
687 if (!logdev || logdev == dev) {
688 if (mp->m_logdev_targp != mp->m_ddev_targp) {
689 fprintf(stderr,
690 _("%s: bad buftarg reinit, ldev mismatch\n"),
691 progname);
692 exit(1);
693 }
ab434d12 694 } else if (mp->m_logdev_targp->bt_bdev != logdev ||
75c8b434
DC
695 mp->m_logdev_targp->bt_mount != mp) {
696 fprintf(stderr,
697 _("%s: bad buftarg reinit, logdev\n"),
698 progname);
699 exit(1);
700 }
ab434d12 701 if (rtdev && (mp->m_rtdev_targp->bt_bdev != rtdev ||
75c8b434
DC
702 mp->m_rtdev_targp->bt_mount != mp)) {
703 fprintf(stderr,
704 _("%s: bad buftarg reinit, rtdev\n"),
705 progname);
706 exit(1);
707 }
708 return;
709 }
710
704e4cef 711 mp->m_ddev_targp = libxfs_buftarg_alloc(mp, dev, dfail);
75c8b434
DC
712 if (!logdev || logdev == dev)
713 mp->m_logdev_targp = mp->m_ddev_targp;
714 else
704e4cef
DW
715 mp->m_logdev_targp = libxfs_buftarg_alloc(mp, logdev, lfail);
716 mp->m_rtdev_targp = libxfs_buftarg_alloc(mp, rtdev, rfail);
75c8b434
DC
717}
718
6afce48f
DW
719/* Compute maximum possible height for per-AG btree types for this fs. */
720static inline void
721xfs_agbtree_compute_maxlevels(
722 struct xfs_mount *mp)
723{
724 unsigned int levels;
725
726 levels = max(mp->m_alloc_maxlevels, M_IGEO(mp)->inobt_maxlevels);
727 levels = max(levels, mp->m_rmap_maxlevels);
728 mp->m_agbtree_maxlevels = max(levels, mp->m_refc_maxlevels);
729}
730
7aeffc87
DW
731/* Compute maximum possible height of all btrees. */
732void
733libxfs_compute_all_maxlevels(
734 struct xfs_mount *mp)
735{
736 xfs_alloc_compute_maxlevels(mp);
737 xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK);
738 xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK);
739 xfs_ialloc_setup_geometry(mp);
740 xfs_rmapbt_compute_maxlevels(mp);
741 xfs_refcountbt_compute_maxlevels(mp);
742
743 xfs_agbtree_compute_maxlevels(mp);
744}
745
2bd0ea18
NS
746/*
747 * Mount structure initialization, provides a filled-in xfs_mount_t
748 * such that the numerous XFS_* macros can be used. If dev is zero,
749 * no IO will be performed (no size checks, read root inodes).
750 */
d855bce8 751struct xfs_mount *
2bd0ea18 752libxfs_mount(
d855bce8
DW
753 struct xfs_mount *mp,
754 struct xfs_sb *sb,
755 dev_t dev,
756 dev_t logdev,
757 dev_t rtdev,
ed8f5980 758 unsigned int flags)
2bd0ea18 759{
d855bce8
DW
760 struct xfs_buf *bp;
761 struct xfs_sb *sbp;
762 xfs_daddr_t d;
d855bce8 763 int error;
2bd0ea18 764
3bc1fdd4 765 mp->m_features = xfs_sb_version_to_features(sb);
2420d095
DW
766 if (flags & LIBXFS_MOUNT_DEBUGGER)
767 xfs_set_debugger(mp);
e42c53f3 768 if (flags & LIBXFS_MOUNT_REPORT_CORRUPTION)
2420d095 769 xfs_set_reporting_corruption(mp);
75c8b434
DC
770 libxfs_buftarg_init(mp, dev, logdev, rtdev);
771
f747f7dd 772 mp->m_finobt_nores = true;
0ee9753e 773 xfs_set_inode32(mp);
2bd0ea18 774 mp->m_sb = *sb;
56b2de80 775 INIT_RADIX_TREE(&mp->m_perag_tree, GFP_KERNEL);
686bddf9
DC
776 sbp = &mp->m_sb;
777 spin_lock_init(&mp->m_sb_lock);
778 spin_lock_init(&mp->m_agirotor_lock);
2bd0ea18 779
4896e6c8 780 xfs_sb_mount_common(mp, sb);
2bd0ea18 781
949c0f10
NS
782 /*
783 * Set whether we're using stripe alignment.
784 */
2660e653 785 if (xfs_has_dalign(mp)) {
949c0f10
NS
786 mp->m_dalign = sbp->sb_unit;
787 mp->m_swidth = sbp->sb_width;
788 }
789
7aeffc87 790 libxfs_compute_all_maxlevels(mp);
6afce48f 791
2bd0ea18
NS
792 /*
793 * Check that the data (and log if separate) are an ok size.
794 */
9440d84d 795 d = (xfs_daddr_t) XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
2bd0ea18 796 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) {
9440d84d 797 fprintf(stderr, _("%s: size check failed\n"), progname);
2420d095 798 if (!xfs_is_debugger(mp))
4ca431fc 799 return NULL;
2bd0ea18
NS
800 }
801
ff105f75
DC
802 /*
803 * We automatically convert v1 inodes to v2 inodes now, so if
804 * the NLINK bit is not set we can't operate on the filesystem.
805 */
806 if (!(sbp->sb_versionnum & XFS_SB_VERSION_NLINKBIT)) {
807
808 fprintf(stderr, _(
809 "%s: V1 inodes unsupported. Please try an older xfsprogs.\n"),
810 progname);
811 exit(1);
812 }
813
814 /* Check for supported directory formats */
815 if (!(sbp->sb_versionnum & XFS_SB_VERSION_DIRV2BIT)) {
9a048535
DC
816
817 fprintf(stderr, _(
818 "%s: V1 directories unsupported. Please try an older xfsprogs.\n"),
819 progname);
820 exit(1);
5e656dbb 821 }
2bd0ea18 822
ff105f75
DC
823 /* check for unsupported other features */
824 if (!xfs_sb_good_version(sbp)) {
825 fprintf(stderr, _(
826 "%s: Unsupported features detected. Please try a newer xfsprogs.\n"),
827 progname);
828 exit(1);
829 }
830
831 xfs_da_mount(mp);
832
2bd0ea18 833 /* Initialize the precomputed transaction reservations values */
5e656dbb 834 xfs_trans_init(mp);
2bd0ea18
NS
835
836 if (dev == 0) /* maxtrres, we have no device so leave now */
837 return mp;
838
d855bce8 839 /* device size checks must pass unless we're a debugger. */
31079e67
DW
840 error = libxfs_buf_read(mp->m_dev, d - XFS_FSS_TO_BB(mp, 1),
841 XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL);
842 if (error) {
9440d84d 843 fprintf(stderr, _("%s: data size check failed\n"), progname);
2420d095 844 if (!xfs_is_debugger(mp))
4ca431fc 845 return NULL;
32244196 846 } else
e02ba985 847 libxfs_buf_relse(bp);
2bd0ea18 848
ab434d12
DC
849 if (mp->m_logdev_targp->bt_bdev &&
850 mp->m_logdev_targp->bt_bdev != mp->m_ddev_targp->bt_bdev) {
9440d84d 851 d = (xfs_daddr_t) XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
31079e67
DW
852 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks ||
853 libxfs_buf_read(mp->m_logdev_targp,
854 d - XFS_FSB_TO_BB(mp, 1), XFS_FSB_TO_BB(mp, 1),
855 0, &bp, NULL)) {
9440d84d 856 fprintf(stderr, _("%s: log size checks failed\n"),
2bd0ea18 857 progname);
2420d095 858 if (!xfs_is_debugger(mp))
4ca431fc 859 return NULL;
2bd0ea18 860 }
32244196 861 if (bp)
e02ba985 862 libxfs_buf_relse(bp);
2bd0ea18
NS
863 }
864
865 /* Initialize realtime fields in the mount structure */
2420d095 866 if (rtmount_init(mp)) {
9440d84d
NS
867 fprintf(stderr, _("%s: realtime device init failed\n"),
868 progname);
4ca431fc 869 return NULL;
2bd0ea18
NS
870 }
871
a547152d
ES
872 /*
873 * libxfs_initialize_perag will allocate a perag structure for each ag.
874 * If agcount is corrupted and insanely high, this will OOM the box.
875 * If the agount seems (arbitrarily) high, try to read what would be
876 * the last AG, and if that fails for a relatively high agcount, just
877 * read the first one and let the user know to check the geometry.
878 */
879 if (sbp->sb_agcount > 1000000) {
31079e67 880 error = libxfs_buf_read(mp->m_dev,
a547152d 881 XFS_AG_DADDR(mp, sbp->sb_agcount - 1, 0), 1,
31079e67
DW
882 0, &bp, NULL);
883 if (error) {
a547152d
ES
884 fprintf(stderr, _("%s: read of AG %u failed\n"),
885 progname, sbp->sb_agcount);
2420d095 886 if (!xfs_is_debugger(mp))
a547152d
ES
887 return NULL;
888 fprintf(stderr, _("%s: limiting reads to AG 0\n"),
889 progname);
890 sbp->sb_agcount = 1;
31079e67
DW
891 } else
892 libxfs_buf_relse(bp);
a547152d
ES
893 }
894
83af0d13
DC
895 error = libxfs_initialize_perag(mp, sbp->sb_agcount, sbp->sb_dblocks,
896 &mp->m_maxagi);
56b2de80
DC
897 if (error) {
898 fprintf(stderr, _("%s: perag init failed\n"),
899 progname);
2bd0ea18
NS
900 exit(1);
901 }
2420d095 902 xfs_set_perag_data_loaded(mp);
2bd0ea18 903
2bd0ea18
NS
904 return mp;
905}
906
f1b058f9
NS
907void
908libxfs_rtmount_destroy(xfs_mount_t *mp)
909{
910 if (mp->m_rsumip)
31845e4c 911 libxfs_irele(mp->m_rsumip);
f1b058f9 912 if (mp->m_rbmip)
31845e4c 913 libxfs_irele(mp->m_rbmip);
f1b058f9
NS
914 mp->m_rsumip = mp->m_rbmip = NULL;
915}
916
c335b673
DW
917/* Flush a device and report on writes that didn't make it to stable storage. */
918static inline int
919libxfs_flush_buftarg(
920 struct xfs_buftarg *btp,
921 const char *buftarg_descr)
922{
923 int error = 0;
924 int err2;
925
926 /*
927 * Write verifier failures are evidence of a buggy program. Make sure
928 * that this state is always reported to the caller.
929 */
930 if (btp->flags & XFS_BUFTARG_CORRUPT_WRITE) {
931 fprintf(stderr,
932_("%s: Refusing to write a corrupt buffer to the %s!\n"),
933 progname, buftarg_descr);
934 error = -EFSCORRUPTED;
935 }
936
937 if (btp->flags & XFS_BUFTARG_LOST_WRITE) {
938 fprintf(stderr,
939_("%s: Lost a write to the %s!\n"),
940 progname, buftarg_descr);
941 if (!error)
942 error = -EIO;
943 }
944
945 err2 = libxfs_blkdev_issue_flush(btp);
946 if (err2) {
947 fprintf(stderr,
948_("%s: Flushing the %s failed, err=%d!\n"),
949 progname, buftarg_descr, -err2);
950 }
951 if (!error)
952 error = err2;
953
954 return error;
955}
956
957/*
958 * Flush all dirty buffers to stable storage and report on writes that didn't
959 * make it to stable storage.
960 */
a7348c58 961int
c335b673
DW
962libxfs_flush_mount(
963 struct xfs_mount *mp)
964{
965 int error = 0;
966 int err2;
967
968 /*
a7348c58
DW
969 * Flush the buffer cache to write all dirty buffers to disk. Buffers
970 * that fail write verification will cause the CORRUPT_WRITE flag to be
971 * set in the buftarg. Buffers that cannot be written will cause the
972 * LOST_WRITE flag to be set in the buftarg. Once that's done,
973 * instruct the disks to persist their write caches.
c335b673 974 */
a7348c58 975 libxfs_bcache_flush();
c335b673
DW
976
977 /* Flush all kernel and disk write caches, and report failures. */
978 if (mp->m_ddev_targp) {
979 err2 = libxfs_flush_buftarg(mp->m_ddev_targp, _("data device"));
980 if (!error)
981 error = err2;
982 }
983
984 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
985 err2 = libxfs_flush_buftarg(mp->m_logdev_targp,
986 _("log device"));
987 if (!error)
988 error = err2;
989 }
990
991 if (mp->m_rtdev_targp) {
992 err2 = libxfs_flush_buftarg(mp->m_rtdev_targp,
993 _("realtime device"));
994 if (!error)
995 error = err2;
996 }
997
998 return error;
999}
1000
2bd0ea18 1001/*
9440d84d 1002 * Release any resource obtained during a mount.
2bd0ea18 1003 */
c335b673
DW
1004int
1005libxfs_umount(
1006 struct xfs_mount *mp)
2bd0ea18 1007{
c335b673 1008 int error;
56b2de80 1009
f1b058f9 1010 libxfs_rtmount_destroy(mp);
c335b673 1011
a7348c58
DW
1012 /*
1013 * Purge the buffer cache to write all dirty buffers to disk and free
1014 * all incore buffers, then pick up the outcome when we tell the disks
1015 * to persist their write caches.
1016 */
1017 libxfs_bcache_purge();
c335b673 1018 error = libxfs_flush_mount(mp);
f1b058f9 1019
7bf9cd9d
DW
1020 /*
1021 * Only try to free the per-AG structures if we set them up in the
1022 * first place.
1023 */
2420d095 1024 if (xfs_is_perag_data_loaded(mp))
4bcd30f6 1025 libxfs_free_perag(mp);
4334e2e8
ES
1026
1027 kmem_free(mp->m_attr_geo);
1028 kmem_free(mp->m_dir_geo);
1029
1030 kmem_free(mp->m_rtdev_targp);
1031 if (mp->m_logdev_targp != mp->m_ddev_targp)
1032 kmem_free(mp->m_logdev_targp);
1033 kmem_free(mp->m_ddev_targp);
f8149110 1034
c335b673 1035 return error;
2bd0ea18 1036}
f1b058f9
NS
1037
1038/*
1039 * Release any global resources used by libxfs.
1040 */
1041void
a9468486
DW
1042libxfs_destroy(
1043 struct libxfs_xinit *li)
f1b058f9 1044{
a9468486
DW
1045 int leaked;
1046
1047 libxfs_close_devices(li);
44488491 1048
2e1394fc 1049 /* Free everything from the buffer cache before freeing buffer cache */
864028ed
ES
1050 libxfs_bcache_purge();
1051 libxfs_bcache_free();
f1b058f9 1052 cache_destroy(libxfs_bcache);
2e1394fc 1053 leaked = destroy_caches();
e4da1b16 1054 rcu_unregister_thread();
44488491
ES
1055 if (getenv("LIBXFS_LEAK_CHECK") && leaked)
1056 exit(1);
f1b058f9 1057}
9f38f08d 1058
b74a1f6a
NS
1059int
1060libxfs_device_alignment(void)
1061{
1062 return platform_align_blockdev();
1063}
1064
9f38f08d 1065void
b6281496 1066libxfs_report(FILE *fp)
9f38f08d 1067{
cb5b3ef4
MV
1068 time_t t;
1069 char *c;
1070
b6281496 1071 cache_report(fp, "libxfs_bcache", libxfs_bcache);
cb5b3ef4
MV
1072
1073 t = time(NULL);
1074 c = asctime(localtime(&t));
1075 fprintf(fp, "%s", c);
1076}