]>
Commit | Line | Data |
---|---|---|
1 | // SPDX-License-Identifier: GPL-2.0 | |
2 | /* | |
3 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. | |
4 | * All Rights Reserved. | |
5 | */ | |
6 | ||
7 | #include <sys/stat.h> | |
8 | #include "init.h" | |
9 | ||
10 | #include "libxfs_priv.h" | |
11 | #include "xfs_fs.h" | |
12 | #include "xfs_shared.h" | |
13 | #include "xfs_format.h" | |
14 | #include "xfs_log_format.h" | |
15 | #include "xfs_trans_resv.h" | |
16 | #include "xfs_mount.h" | |
17 | #include "xfs_defer.h" | |
18 | #include "xfs_inode_buf.h" | |
19 | #include "xfs_inode_fork.h" | |
20 | #include "xfs_inode.h" | |
21 | #include "xfs_trans.h" | |
22 | #include "xfs_rmap_btree.h" | |
23 | #include "xfs_refcount_btree.h" | |
24 | #include "xfs_metafile.h" | |
25 | #include "libfrog/platform.h" | |
26 | #include "libfrog/util.h" | |
27 | #include "libxfs/xfile.h" | |
28 | #include "libxfs/buf_mem.h" | |
29 | ||
30 | #include "xfs_format.h" | |
31 | #include "xfs_da_format.h" | |
32 | #include "xfs_log_format.h" | |
33 | #include "xfs_ondisk.h" | |
34 | ||
35 | #include "libxfs.h" /* for now */ | |
36 | #include "xfs_rtgroup.h" | |
37 | ||
38 | #ifndef HAVE_LIBURCU_ATOMIC64 | |
39 | pthread_mutex_t atomic64_lock = PTHREAD_MUTEX_INITIALIZER; | |
40 | #endif | |
41 | ||
42 | char *progname = "libxfs"; /* default, changed by each tool */ | |
43 | ||
44 | int libxfs_bhash_size; /* #buckets in bcache */ | |
45 | ||
46 | int use_xfs_buf_lock; /* global flag: use xfs_buf locks for MT */ | |
47 | ||
48 | static int nextfakedev = -1; /* device number to give to next fake device */ | |
49 | ||
50 | unsigned int PAGE_SHIFT; | |
51 | ||
52 | /* | |
53 | * Checks whether a given device has a mounted, writable | |
54 | * filesystem, returns 1 if it does & fatal (just warns | |
55 | * if not fatal, but allows us to proceed). | |
56 | * | |
57 | * Useful to tools which will produce uncertain results | |
58 | * if the filesystem is active - repair, check, logprint. | |
59 | */ | |
60 | static int | |
61 | check_isactive(char *name, char *block, int fatal) | |
62 | { | |
63 | struct stat st; | |
64 | ||
65 | if (stat(block, &st) < 0) | |
66 | return 0; | |
67 | if ((st.st_mode & S_IFMT) != S_IFBLK) | |
68 | return 0; | |
69 | if (platform_check_ismounted(name, block, &st, 0) == 0) | |
70 | return 0; | |
71 | if (platform_check_iswritable(name, block, &st)) | |
72 | return fatal ? 1 : 0; | |
73 | return 0; | |
74 | } | |
75 | ||
76 | static int | |
77 | check_open( | |
78 | struct libxfs_init *xi, | |
79 | struct libxfs_dev *dev) | |
80 | { | |
81 | struct stat stbuf; | |
82 | ||
83 | if (stat(dev->name, &stbuf) < 0) { | |
84 | perror(dev->name); | |
85 | return 0; | |
86 | } | |
87 | if (!(xi->flags & LIBXFS_ISREADONLY) && | |
88 | !(xi->flags & LIBXFS_ISINACTIVE) && | |
89 | platform_check_ismounted(dev->name, dev->name, NULL, 1)) | |
90 | return 0; | |
91 | ||
92 | if ((xi->flags & LIBXFS_ISINACTIVE) && | |
93 | check_isactive(dev->name, dev->name, !!(xi->flags & | |
94 | (LIBXFS_ISREADONLY | LIBXFS_DANGEROUSLY)))) | |
95 | return 0; | |
96 | ||
97 | return 1; | |
98 | } | |
99 | ||
100 | static bool | |
101 | libxfs_device_open( | |
102 | struct libxfs_init *xi, | |
103 | struct libxfs_dev *dev) | |
104 | { | |
105 | struct stat statb; | |
106 | int flags; | |
107 | ||
108 | dev->fd = -1; | |
109 | ||
110 | if (!dev->name) | |
111 | return true; | |
112 | if (!dev->isfile && !check_open(xi, dev)) | |
113 | return false; | |
114 | ||
115 | if (xi->flags & LIBXFS_ISREADONLY) | |
116 | flags = O_RDONLY; | |
117 | else | |
118 | flags = O_RDWR; | |
119 | ||
120 | if (dev->create) { | |
121 | flags |= O_CREAT | O_TRUNC; | |
122 | } else { | |
123 | if (xi->flags & LIBXFS_EXCLUSIVELY) | |
124 | flags |= O_EXCL; | |
125 | if ((xi->flags & LIBXFS_DIRECT) && platform_direct_blockdev()) | |
126 | flags |= O_DIRECT; | |
127 | } | |
128 | ||
129 | retry: | |
130 | dev->fd = open(dev->name, flags, 0666); | |
131 | if (dev->fd < 0) { | |
132 | if (errno == EINVAL && (flags & O_DIRECT)) { | |
133 | flags &= ~O_DIRECT; | |
134 | goto retry; | |
135 | } | |
136 | fprintf(stderr, _("%s: cannot open %s: %s\n"), | |
137 | progname, dev->name, strerror(errno)); | |
138 | exit(1); | |
139 | } | |
140 | ||
141 | if (fstat(dev->fd, &statb) < 0) { | |
142 | fprintf(stderr, _("%s: cannot stat %s: %s\n"), | |
143 | progname, dev->name, strerror(errno)); | |
144 | exit(1); | |
145 | } | |
146 | ||
147 | if (!(xi->flags & LIBXFS_ISREADONLY) && | |
148 | xi->setblksize && | |
149 | (statb.st_mode & S_IFMT) == S_IFBLK) { | |
150 | /* | |
151 | * Try to use the given explicit blocksize. Failure to set the | |
152 | * block size is only fatal for direct I/O. | |
153 | */ | |
154 | platform_set_blocksize(dev->fd, dev->name, statb.st_rdev, | |
155 | xi->setblksize, flags & O_DIRECT); | |
156 | } | |
157 | ||
158 | /* | |
159 | * Get the device number from the stat buf - unless we're not opening a | |
160 | * real device, in which case choose a new fake device number. | |
161 | */ | |
162 | if (statb.st_rdev) | |
163 | dev->dev = statb.st_rdev; | |
164 | else | |
165 | dev->dev = nextfakedev--; | |
166 | platform_findsizes(dev->name, dev->fd, &dev->size, &dev->bsize); | |
167 | return true; | |
168 | } | |
169 | ||
170 | static void | |
171 | libxfs_device_close( | |
172 | struct libxfs_dev *dev) | |
173 | { | |
174 | int ret; | |
175 | ||
176 | ret = platform_flush_device(dev->fd, dev->dev); | |
177 | if (ret) { | |
178 | ret = -errno; | |
179 | fprintf(stderr, | |
180 | _("%s: flush of device %s failed, err=%d"), | |
181 | progname, dev->name, ret); | |
182 | } | |
183 | close(dev->fd); | |
184 | ||
185 | dev->fd = -1; | |
186 | dev->dev = 0; | |
187 | } | |
188 | ||
189 | /* | |
190 | * Initialize/destroy all of the cache allocators we use. | |
191 | */ | |
192 | static void | |
193 | init_caches(void) | |
194 | { | |
195 | int error; | |
196 | ||
197 | /* initialise cache allocation */ | |
198 | xfs_buf_cache = kmem_cache_init(sizeof(struct xfs_buf), "xfs_buffer"); | |
199 | xfs_inode_cache = kmem_cache_init(sizeof(struct xfs_inode), "xfs_inode"); | |
200 | xfs_ifork_cache = kmem_cache_init(sizeof(struct xfs_ifork), "xfs_ifork"); | |
201 | xfs_ili_cache = kmem_cache_init( | |
202 | sizeof(struct xfs_inode_log_item),"xfs_inode_log_item"); | |
203 | xfs_buf_item_cache = kmem_cache_init( | |
204 | sizeof(struct xfs_buf_log_item), "xfs_buf_log_item"); | |
205 | error = xfs_defer_init_item_caches(); | |
206 | if (error) { | |
207 | fprintf(stderr, "Could not allocate defer init item caches.\n"); | |
208 | abort(); | |
209 | } | |
210 | xfs_da_state_cache = kmem_cache_init( | |
211 | sizeof(struct xfs_da_state), "xfs_da_state"); | |
212 | error = xfs_btree_init_cur_caches(); | |
213 | if (error) { | |
214 | fprintf(stderr, "Could not allocate btree cursor caches.\n"); | |
215 | abort(); | |
216 | } | |
217 | xfs_extfree_item_cache = kmem_cache_init( | |
218 | sizeof(struct xfs_extent_free_item), | |
219 | "xfs_extfree_item"); | |
220 | xfs_trans_cache = kmem_cache_init( | |
221 | sizeof(struct xfs_trans), "xfs_trans"); | |
222 | xfs_parent_args_cache = kmem_cache_init( | |
223 | sizeof(struct xfs_parent_args), "xfs_parent_args"); | |
224 | } | |
225 | ||
226 | static int | |
227 | destroy_caches(void) | |
228 | { | |
229 | int leaked = 0; | |
230 | ||
231 | leaked += kmem_cache_destroy(xfs_buf_cache); | |
232 | leaked += kmem_cache_destroy(xfs_ili_cache); | |
233 | leaked += kmem_cache_destroy(xfs_inode_cache); | |
234 | leaked += kmem_cache_destroy(xfs_ifork_cache); | |
235 | leaked += kmem_cache_destroy(xfs_buf_item_cache); | |
236 | leaked += kmem_cache_destroy(xfs_da_state_cache); | |
237 | xfs_defer_destroy_item_caches(); | |
238 | xfs_btree_destroy_cur_caches(); | |
239 | leaked += kmem_cache_destroy(xfs_extfree_item_cache); | |
240 | leaked += kmem_cache_destroy(xfs_trans_cache); | |
241 | leaked += kmem_cache_destroy(xfs_parent_args_cache); | |
242 | ||
243 | return leaked; | |
244 | } | |
245 | ||
246 | static void | |
247 | libxfs_close_devices( | |
248 | struct libxfs_init *li) | |
249 | { | |
250 | if (li->data.dev) | |
251 | libxfs_device_close(&li->data); | |
252 | if (li->log.dev && li->log.dev != li->data.dev) | |
253 | libxfs_device_close(&li->log); | |
254 | if (li->rt.dev && li->rt.dev != li->data.dev) | |
255 | libxfs_device_close(&li->rt); | |
256 | } | |
257 | ||
258 | /* | |
259 | * libxfs initialization. | |
260 | * Caller gets a 0 on failure (and we print a message), 1 on success. | |
261 | */ | |
262 | int | |
263 | libxfs_init(struct libxfs_init *a) | |
264 | { | |
265 | if (!PAGE_SHIFT) | |
266 | PAGE_SHIFT = log2_roundup(PAGE_SIZE); | |
267 | xfs_check_ondisk_structs(); | |
268 | xmbuf_libinit(); | |
269 | rcu_init(); | |
270 | rcu_register_thread(); | |
271 | radix_tree_init(); | |
272 | ||
273 | if (!libxfs_device_open(a, &a->data)) | |
274 | goto done; | |
275 | if (!libxfs_device_open(a, &a->log)) | |
276 | goto done; | |
277 | if (!libxfs_device_open(a, &a->rt)) | |
278 | goto done; | |
279 | ||
280 | if (!libxfs_bhash_size) | |
281 | libxfs_bhash_size = LIBXFS_BHASHSIZE(sbp); | |
282 | use_xfs_buf_lock = a->flags & LIBXFS_USEBUFLOCK; | |
283 | xfs_dir_startup(); | |
284 | init_caches(); | |
285 | return 1; | |
286 | ||
287 | done: | |
288 | libxfs_close_devices(a); | |
289 | rcu_unregister_thread(); | |
290 | return 0; | |
291 | } | |
292 | ||
293 | ||
294 | /* | |
295 | * Initialize realtime fields in the mount structure. | |
296 | */ | |
297 | static int | |
298 | rtmount_init( | |
299 | xfs_mount_t *mp) /* file system mount structure */ | |
300 | { | |
301 | struct xfs_buf *bp; /* buffer for last block of subvolume */ | |
302 | xfs_daddr_t d; /* address of last block of subvolume */ | |
303 | int error; | |
304 | ||
305 | if (mp->m_sb.sb_rblocks == 0) | |
306 | return 0; | |
307 | ||
308 | if (xfs_has_reflink(mp) && mp->m_sb.sb_rextsize > 1) { | |
309 | fprintf(stderr, | |
310 | _("%s: Reflink not compatible with realtime extent size > 1. Please try a newer xfsprogs.\n"), | |
311 | progname); | |
312 | return -1; | |
313 | } | |
314 | ||
315 | if (mp->m_rtdev_targp->bt_bdev == 0 && !xfs_is_debugger(mp)) { | |
316 | fprintf(stderr, _("%s: filesystem has a realtime subvolume\n"), | |
317 | progname); | |
318 | return -1; | |
319 | } | |
320 | mp->m_rsumblocks = xfs_rtsummary_blockcount(mp, &mp->m_rsumlevels); | |
321 | ||
322 | /* | |
323 | * Allow debugger to be run without the realtime device present. | |
324 | */ | |
325 | if (xfs_is_debugger(mp)) | |
326 | return 0; | |
327 | ||
328 | /* | |
329 | * Check that the realtime section is an ok size. | |
330 | */ | |
331 | d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks); | |
332 | if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_rblocks) { | |
333 | fprintf(stderr, _("%s: realtime init - %llu != %llu\n"), | |
334 | progname, (unsigned long long) XFS_BB_TO_FSB(mp, d), | |
335 | (unsigned long long) mp->m_sb.sb_rblocks); | |
336 | return -1; | |
337 | } | |
338 | error = libxfs_buf_read(mp->m_rtdev, d - XFS_FSB_TO_BB(mp, 1), | |
339 | XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL); | |
340 | if (error) { | |
341 | fprintf(stderr, _("%s: realtime size check failed\n"), | |
342 | progname); | |
343 | return -1; | |
344 | } | |
345 | libxfs_buf_relse(bp); | |
346 | return 0; | |
347 | } | |
348 | ||
349 | static bool | |
350 | xfs_set_inode_alloc_perag( | |
351 | struct xfs_perag *pag, | |
352 | xfs_ino_t ino, | |
353 | xfs_agnumber_t max_metadata) | |
354 | { | |
355 | if (!xfs_is_inode32(pag_mount(pag))) { | |
356 | set_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate); | |
357 | clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate); | |
358 | return false; | |
359 | } | |
360 | ||
361 | if (ino > XFS_MAXINUMBER_32) { | |
362 | clear_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate); | |
363 | clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate); | |
364 | return false; | |
365 | } | |
366 | ||
367 | set_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate); | |
368 | if (pag_agno(pag) < max_metadata) | |
369 | set_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate); | |
370 | else | |
371 | clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate); | |
372 | return true; | |
373 | } | |
374 | ||
375 | /* | |
376 | * Set parameters for inode allocation heuristics, taking into account | |
377 | * filesystem size and inode32/inode64 mount options; i.e. specifically | |
378 | * whether or not XFS_MOUNT_SMALL_INUMS is set. | |
379 | * | |
380 | * Inode allocation patterns are altered only if inode32 is requested | |
381 | * (XFS_MOUNT_SMALL_INUMS), and the filesystem is sufficiently large. | |
382 | * If altered, XFS_MOUNT_32BITINODES is set as well. | |
383 | * | |
384 | * An agcount independent of that in the mount structure is provided | |
385 | * because in the growfs case, mp->m_sb.sb_agcount is not yet updated | |
386 | * to the potentially higher ag count. | |
387 | * | |
388 | * Returns the maximum AG index which may contain inodes. | |
389 | * | |
390 | * NOTE: userspace has no concept of "inode32" and so xfs_has_small_inums | |
391 | * is always false, and much of this code is a no-op. | |
392 | */ | |
393 | xfs_agnumber_t | |
394 | xfs_set_inode_alloc( | |
395 | struct xfs_mount *mp, | |
396 | xfs_agnumber_t agcount) | |
397 | { | |
398 | xfs_agnumber_t index; | |
399 | xfs_agnumber_t maxagi = 0; | |
400 | xfs_sb_t *sbp = &mp->m_sb; | |
401 | xfs_agnumber_t max_metadata; | |
402 | xfs_agino_t agino; | |
403 | xfs_ino_t ino; | |
404 | ||
405 | /* | |
406 | * Calculate how much should be reserved for inodes to meet | |
407 | * the max inode percentage. Used only for inode32. | |
408 | */ | |
409 | if (M_IGEO(mp)->maxicount) { | |
410 | uint64_t icount; | |
411 | ||
412 | icount = sbp->sb_dblocks * sbp->sb_imax_pct; | |
413 | do_div(icount, 100); | |
414 | icount += sbp->sb_agblocks - 1; | |
415 | do_div(icount, sbp->sb_agblocks); | |
416 | max_metadata = icount; | |
417 | } else { | |
418 | max_metadata = agcount; | |
419 | } | |
420 | ||
421 | /* Get the last possible inode in the filesystem */ | |
422 | agino = XFS_AGB_TO_AGINO(mp, sbp->sb_agblocks - 1); | |
423 | ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino); | |
424 | ||
425 | /* | |
426 | * If user asked for no more than 32-bit inodes, and the fs is | |
427 | * sufficiently large, set XFS_MOUNT_32BITINODES if we must alter | |
428 | * the allocator to accommodate the request. | |
429 | */ | |
430 | if (xfs_has_small_inums(mp) && ino > XFS_MAXINUMBER_32) | |
431 | set_bit(XFS_OPSTATE_INODE32, &mp->m_opstate); | |
432 | else | |
433 | clear_bit(XFS_OPSTATE_INODE32, &mp->m_opstate); | |
434 | ||
435 | for (index = 0; index < agcount; index++) { | |
436 | struct xfs_perag *pag; | |
437 | ||
438 | ino = XFS_AGINO_TO_INO(mp, index, agino); | |
439 | ||
440 | pag = xfs_perag_get(mp, index); | |
441 | if (xfs_set_inode_alloc_perag(pag, ino, max_metadata)) | |
442 | maxagi++; | |
443 | xfs_perag_put(pag); | |
444 | } | |
445 | ||
446 | return xfs_is_inode32(mp) ? maxagi : agcount; | |
447 | } | |
448 | ||
449 | static struct xfs_buftarg * | |
450 | libxfs_buftarg_alloc( | |
451 | struct xfs_mount *mp, | |
452 | struct libxfs_init *xi, | |
453 | struct libxfs_dev *dev, | |
454 | unsigned long write_fails) | |
455 | { | |
456 | struct xfs_buftarg *btp; | |
457 | ||
458 | btp = malloc(sizeof(*btp)); | |
459 | if (!btp) { | |
460 | fprintf(stderr, _("%s: buftarg init failed\n"), | |
461 | progname); | |
462 | exit(1); | |
463 | } | |
464 | btp->bt_mount = mp; | |
465 | btp->bt_bdev = dev->dev; | |
466 | btp->bt_bdev_fd = dev->fd; | |
467 | btp->bt_xfile = NULL; | |
468 | btp->flags = 0; | |
469 | if (write_fails) { | |
470 | btp->writes_left = write_fails; | |
471 | btp->flags |= XFS_BUFTARG_INJECT_WRITE_FAIL; | |
472 | } | |
473 | pthread_mutex_init(&btp->lock, NULL); | |
474 | ||
475 | btp->bcache = cache_init(xi->bcache_flags, libxfs_bhash_size, | |
476 | &libxfs_bcache_operations); | |
477 | ||
478 | return btp; | |
479 | } | |
480 | ||
481 | enum libxfs_write_failure_nums { | |
482 | WF_DATA = 0, | |
483 | WF_LOG, | |
484 | WF_RT, | |
485 | WF_MAX_OPTS, | |
486 | }; | |
487 | ||
488 | static char *wf_opts[] = { | |
489 | [WF_DATA] = "ddev", | |
490 | [WF_LOG] = "logdev", | |
491 | [WF_RT] = "rtdev", | |
492 | [WF_MAX_OPTS] = NULL, | |
493 | }; | |
494 | ||
495 | void | |
496 | libxfs_buftarg_init( | |
497 | struct xfs_mount *mp, | |
498 | struct libxfs_init *xi) | |
499 | { | |
500 | char *p = getenv("LIBXFS_DEBUG_WRITE_CRASH"); | |
501 | unsigned long dfail = 0, lfail = 0, rfail = 0; | |
502 | ||
503 | /* Simulate utility crash after a certain number of writes. */ | |
504 | while (p && *p) { | |
505 | char *val; | |
506 | ||
507 | switch (getsubopt(&p, wf_opts, &val)) { | |
508 | case WF_DATA: | |
509 | if (!val) { | |
510 | fprintf(stderr, | |
511 | _("ddev write fail requires a parameter\n")); | |
512 | exit(1); | |
513 | } | |
514 | dfail = strtoul(val, NULL, 0); | |
515 | break; | |
516 | case WF_LOG: | |
517 | if (!val) { | |
518 | fprintf(stderr, | |
519 | _("logdev write fail requires a parameter\n")); | |
520 | exit(1); | |
521 | } | |
522 | lfail = strtoul(val, NULL, 0); | |
523 | break; | |
524 | case WF_RT: | |
525 | if (!val) { | |
526 | fprintf(stderr, | |
527 | _("rtdev write fail requires a parameter\n")); | |
528 | exit(1); | |
529 | } | |
530 | rfail = strtoul(val, NULL, 0); | |
531 | break; | |
532 | default: | |
533 | fprintf(stderr, _("unknown write fail type %s\n"), | |
534 | val); | |
535 | exit(1); | |
536 | break; | |
537 | } | |
538 | } | |
539 | ||
540 | if (mp->m_ddev_targp) { | |
541 | /* should already have all buftargs initialised */ | |
542 | if (mp->m_ddev_targp->bt_bdev != xi->data.dev || | |
543 | mp->m_ddev_targp->bt_mount != mp) { | |
544 | fprintf(stderr, | |
545 | _("%s: bad buftarg reinit, ddev\n"), | |
546 | progname); | |
547 | exit(1); | |
548 | } | |
549 | if (!xi->log.dev || xi->log.dev == xi->data.dev) { | |
550 | if (mp->m_logdev_targp != mp->m_ddev_targp) { | |
551 | fprintf(stderr, | |
552 | _("%s: bad buftarg reinit, ldev mismatch\n"), | |
553 | progname); | |
554 | exit(1); | |
555 | } | |
556 | } else if (mp->m_logdev_targp->bt_bdev != xi->log.dev || | |
557 | mp->m_logdev_targp->bt_mount != mp) { | |
558 | fprintf(stderr, | |
559 | _("%s: bad buftarg reinit, logdev\n"), | |
560 | progname); | |
561 | exit(1); | |
562 | } | |
563 | if ((xi->rt.dev || xi->rt.dev == xi->data.dev) && | |
564 | (mp->m_rtdev_targp->bt_bdev != xi->rt.dev || | |
565 | mp->m_rtdev_targp->bt_mount != mp)) { | |
566 | fprintf(stderr, | |
567 | _("%s: bad buftarg reinit, rtdev\n"), | |
568 | progname); | |
569 | exit(1); | |
570 | } | |
571 | return; | |
572 | } | |
573 | ||
574 | mp->m_ddev_targp = libxfs_buftarg_alloc(mp, xi, &xi->data, dfail); | |
575 | if (!xi->log.dev || xi->log.dev == xi->data.dev) | |
576 | mp->m_logdev_targp = mp->m_ddev_targp; | |
577 | else | |
578 | mp->m_logdev_targp = libxfs_buftarg_alloc(mp, xi, &xi->log, | |
579 | lfail); | |
580 | if (!xi->rt.dev || xi->rt.dev == xi->data.dev) | |
581 | mp->m_rtdev_targp = mp->m_ddev_targp; | |
582 | else | |
583 | mp->m_rtdev_targp = libxfs_buftarg_alloc(mp, xi, &xi->rt, | |
584 | rfail); | |
585 | } | |
586 | ||
587 | /* Compute maximum possible height for per-AG btree types for this fs. */ | |
588 | static inline void | |
589 | xfs_agbtree_compute_maxlevels( | |
590 | struct xfs_mount *mp) | |
591 | { | |
592 | unsigned int levels; | |
593 | ||
594 | levels = max(mp->m_alloc_maxlevels, M_IGEO(mp)->inobt_maxlevels); | |
595 | levels = max(levels, mp->m_rmap_maxlevels); | |
596 | mp->m_agbtree_maxlevels = max(levels, mp->m_refc_maxlevels); | |
597 | } | |
598 | ||
599 | /* Compute maximum possible height for realtime btree types for this fs. */ | |
600 | static inline void | |
601 | xfs_rtbtree_compute_maxlevels( | |
602 | struct xfs_mount *mp) | |
603 | { | |
604 | mp->m_rtbtree_maxlevels = max(mp->m_rtrmap_maxlevels, | |
605 | mp->m_rtrefc_maxlevels); | |
606 | } | |
607 | ||
608 | /* Compute maximum possible height of all btrees. */ | |
609 | void | |
610 | libxfs_compute_all_maxlevels( | |
611 | struct xfs_mount *mp) | |
612 | { | |
613 | struct xfs_ino_geometry *igeo = M_IGEO(mp); | |
614 | ||
615 | xfs_alloc_compute_maxlevels(mp); | |
616 | xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK); | |
617 | xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK); | |
618 | igeo->attr_fork_offset = xfs_bmap_compute_attr_offset(mp); | |
619 | xfs_ialloc_setup_geometry(mp); | |
620 | xfs_rmapbt_compute_maxlevels(mp); | |
621 | xfs_rtrmapbt_compute_maxlevels(mp); | |
622 | xfs_refcountbt_compute_maxlevels(mp); | |
623 | xfs_rtrefcountbt_compute_maxlevels(mp); | |
624 | ||
625 | xfs_agbtree_compute_maxlevels(mp); | |
626 | xfs_rtbtree_compute_maxlevels(mp); | |
627 | } | |
628 | ||
629 | /* Mount the metadata files under the metadata directory tree. */ | |
630 | STATIC void | |
631 | libxfs_mount_setup_metadir( | |
632 | struct xfs_mount *mp) | |
633 | { | |
634 | int error; | |
635 | ||
636 | /* Ignore filesystems that are under construction. */ | |
637 | if (mp->m_sb.sb_inprogress) | |
638 | return; | |
639 | ||
640 | error = -libxfs_metafile_iget(mp, mp->m_sb.sb_metadirino, | |
641 | XFS_METAFILE_DIR, &mp->m_metadirip); | |
642 | if (error) { | |
643 | fprintf(stderr, | |
644 | _("%s: Failed to load metadir root directory, error %d\n"), | |
645 | progname, error); | |
646 | return; | |
647 | } | |
648 | } | |
649 | ||
650 | /* | |
651 | * precalculate the low space thresholds for dynamic speculative preallocation. | |
652 | */ | |
653 | static void | |
654 | xfs_set_low_space_thresholds( | |
655 | struct xfs_mount *mp) | |
656 | { | |
657 | uint64_t dblocks = mp->m_sb.sb_dblocks; | |
658 | int i; | |
659 | ||
660 | do_div(dblocks, 100); | |
661 | ||
662 | for (i = 0; i < XFS_LOWSP_MAX; i++) | |
663 | mp->m_low_space[i] = dblocks * (i + 1); | |
664 | } | |
665 | ||
666 | /* | |
667 | * libxfs_initialize_rtgroup will allocate a rtgroup structure for each | |
668 | * rtgroup. If rgcount is corrupted and insanely high, this will OOM the box. | |
669 | * Try to read what would be the last rtgroup superblock. If that fails, read | |
670 | * the first one and let the user know to check the geometry. | |
671 | */ | |
672 | static inline bool | |
673 | check_many_rtgroups( | |
674 | struct xfs_mount *mp, | |
675 | struct xfs_sb *sbp) | |
676 | { | |
677 | struct xfs_buf *bp; | |
678 | xfs_daddr_t d; | |
679 | int error; | |
680 | ||
681 | if (!mp->m_rtdev->bt_bdev) { | |
682 | fprintf(stderr, _("%s: no rt device, ignoring rgcount %u\n"), | |
683 | progname, sbp->sb_rgcount); | |
684 | if (!xfs_is_debugger(mp)) | |
685 | return false; | |
686 | ||
687 | sbp->sb_rgcount = 0; | |
688 | return true; | |
689 | } | |
690 | ||
691 | d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks); | |
692 | error = libxfs_buf_read(mp->m_rtdev, d - XFS_FSB_TO_BB(mp, 1), 1, 0, | |
693 | &bp, NULL); | |
694 | if (!error) { | |
695 | libxfs_buf_relse(bp); | |
696 | return true; | |
697 | } | |
698 | ||
699 | fprintf(stderr, _("%s: read of rtgroup %u failed\n"), progname, | |
700 | sbp->sb_rgcount - 1); | |
701 | if (!xfs_is_debugger(mp)) | |
702 | return false; | |
703 | ||
704 | fprintf(stderr, _("%s: limiting reads to rtgroup 0\n"), progname); | |
705 | sbp->sb_rgcount = 1; | |
706 | return true; | |
707 | } | |
708 | ||
709 | /* | |
710 | * Mount structure initialization, provides a filled-in xfs_mount_t | |
711 | * such that the numerous XFS_* macros can be used. If dev is zero, | |
712 | * no IO will be performed (no size checks, read root inodes). | |
713 | */ | |
714 | struct xfs_mount * | |
715 | libxfs_mount( | |
716 | struct xfs_mount *mp, | |
717 | struct xfs_sb *sb, | |
718 | struct libxfs_init *xi, | |
719 | unsigned int flags) | |
720 | { | |
721 | struct xfs_buf *bp; | |
722 | struct xfs_sb *sbp; | |
723 | xfs_daddr_t d; | |
724 | int i; | |
725 | int error; | |
726 | ||
727 | mp->m_features = xfs_sb_version_to_features(sb); | |
728 | if (flags & LIBXFS_MOUNT_DEBUGGER) | |
729 | xfs_set_debugger(mp); | |
730 | if (flags & LIBXFS_MOUNT_REPORT_CORRUPTION) | |
731 | xfs_set_reporting_corruption(mp); | |
732 | libxfs_buftarg_init(mp, xi); | |
733 | ||
734 | if (xi->data.name) | |
735 | mp->m_fsname = strdup(xi->data.name); | |
736 | else | |
737 | mp->m_fsname = NULL; | |
738 | ||
739 | mp->m_finobt_nores = true; | |
740 | xfs_set_inode32(mp); | |
741 | mp->m_sb = *sb; | |
742 | for (i = 0; i < XG_TYPE_MAX; i++) | |
743 | xa_init(&mp->m_groups[i].xa); | |
744 | sbp = &mp->m_sb; | |
745 | spin_lock_init(&mp->m_sb_lock); | |
746 | spin_lock_init(&mp->m_agirotor_lock); | |
747 | ||
748 | xfs_sb_mount_common(mp, sb); | |
749 | ||
750 | /* | |
751 | * Set whether we're using stripe alignment. | |
752 | */ | |
753 | if (xfs_has_dalign(mp)) { | |
754 | mp->m_dalign = sbp->sb_unit; | |
755 | mp->m_swidth = sbp->sb_width; | |
756 | } | |
757 | ||
758 | libxfs_compute_all_maxlevels(mp); | |
759 | ||
760 | /* | |
761 | * Check that the data (and log if separate) are an ok size. | |
762 | */ | |
763 | d = (xfs_daddr_t) XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); | |
764 | if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) { | |
765 | fprintf(stderr, _("%s: size check failed\n"), progname); | |
766 | if (!xfs_is_debugger(mp)) | |
767 | return NULL; | |
768 | } | |
769 | ||
770 | /* | |
771 | * We automatically convert v1 inodes to v2 inodes now, so if | |
772 | * the NLINK bit is not set we can't operate on the filesystem. | |
773 | */ | |
774 | if (!(sbp->sb_versionnum & XFS_SB_VERSION_NLINKBIT)) { | |
775 | ||
776 | fprintf(stderr, _( | |
777 | "%s: V1 inodes unsupported. Please try an older xfsprogs.\n"), | |
778 | progname); | |
779 | exit(1); | |
780 | } | |
781 | ||
782 | /* Check for supported directory formats */ | |
783 | if (!(sbp->sb_versionnum & XFS_SB_VERSION_DIRV2BIT)) { | |
784 | ||
785 | fprintf(stderr, _( | |
786 | "%s: V1 directories unsupported. Please try an older xfsprogs.\n"), | |
787 | progname); | |
788 | exit(1); | |
789 | } | |
790 | ||
791 | /* check for unsupported other features */ | |
792 | if (!xfs_sb_good_version(sbp)) { | |
793 | fprintf(stderr, _( | |
794 | "%s: Unsupported features detected. Please try a newer xfsprogs.\n"), | |
795 | progname); | |
796 | exit(1); | |
797 | } | |
798 | ||
799 | xfs_da_mount(mp); | |
800 | ||
801 | /* Initialize the precomputed transaction reservations values */ | |
802 | xfs_trans_init(mp); | |
803 | ||
804 | if (xi->data.dev == 0) /* maxtrres, we have no device so leave now */ | |
805 | return mp; | |
806 | ||
807 | /* device size checks must pass unless we're a debugger. */ | |
808 | error = libxfs_buf_read(mp->m_dev, d - XFS_FSS_TO_BB(mp, 1), | |
809 | XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL); | |
810 | if (error) { | |
811 | fprintf(stderr, _("%s: data size check failed\n"), progname); | |
812 | if (!xfs_is_debugger(mp)) | |
813 | goto out_da; | |
814 | } else | |
815 | libxfs_buf_relse(bp); | |
816 | ||
817 | if (mp->m_logdev_targp->bt_bdev && | |
818 | mp->m_logdev_targp->bt_bdev != mp->m_ddev_targp->bt_bdev) { | |
819 | d = (xfs_daddr_t) XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); | |
820 | if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks || | |
821 | libxfs_buf_read(mp->m_logdev_targp, | |
822 | d - XFS_FSB_TO_BB(mp, 1), XFS_FSB_TO_BB(mp, 1), | |
823 | 0, &bp, NULL)) { | |
824 | fprintf(stderr, _("%s: log size checks failed\n"), | |
825 | progname); | |
826 | if (!xfs_is_debugger(mp)) | |
827 | goto out_da; | |
828 | } | |
829 | if (bp) | |
830 | libxfs_buf_relse(bp); | |
831 | } | |
832 | ||
833 | xfs_set_low_space_thresholds(mp); | |
834 | ||
835 | /* Initialize realtime fields in the mount structure */ | |
836 | if (rtmount_init(mp)) { | |
837 | fprintf(stderr, _("%s: realtime device init failed\n"), | |
838 | progname); | |
839 | goto out_da; | |
840 | } | |
841 | ||
842 | /* | |
843 | * libxfs_initialize_perag will allocate a perag structure for each ag. | |
844 | * If agcount is corrupted and insanely high, this will OOM the box. | |
845 | * If the agount seems (arbitrarily) high, try to read what would be | |
846 | * the last AG, and if that fails for a relatively high agcount, just | |
847 | * read the first one and let the user know to check the geometry. | |
848 | */ | |
849 | if (sbp->sb_agcount > 1000000) { | |
850 | error = libxfs_buf_read(mp->m_dev, | |
851 | XFS_AG_DADDR(mp, sbp->sb_agcount - 1, 0), 1, | |
852 | 0, &bp, NULL); | |
853 | if (error) { | |
854 | fprintf(stderr, _("%s: read of AG %u failed\n"), | |
855 | progname, sbp->sb_agcount); | |
856 | if (!xfs_is_debugger(mp)) | |
857 | goto out_da; | |
858 | fprintf(stderr, _("%s: limiting reads to AG 0\n"), | |
859 | progname); | |
860 | sbp->sb_agcount = 1; | |
861 | } else | |
862 | libxfs_buf_relse(bp); | |
863 | } | |
864 | ||
865 | if (sbp->sb_rgcount > 1000000 && !check_many_rtgroups(mp, sbp)) | |
866 | goto out_da; | |
867 | ||
868 | error = libxfs_initialize_perag(mp, 0, sbp->sb_agcount, | |
869 | sbp->sb_dblocks, &mp->m_maxagi); | |
870 | if (error) { | |
871 | fprintf(stderr, _("%s: perag init failed\n"), | |
872 | progname); | |
873 | exit(1); | |
874 | } | |
875 | xfs_set_perag_data_loaded(mp); | |
876 | ||
877 | if (xfs_has_metadir(mp)) | |
878 | libxfs_mount_setup_metadir(mp); | |
879 | ||
880 | error = libxfs_initialize_rtgroups(mp, 0, sbp->sb_rgcount, | |
881 | sbp->sb_rextents); | |
882 | if (error) { | |
883 | fprintf(stderr, _("%s: rtgroup init failed\n"), | |
884 | progname); | |
885 | exit(1); | |
886 | } | |
887 | ||
888 | xfs_set_rtgroup_data_loaded(mp); | |
889 | ||
890 | return mp; | |
891 | out_da: | |
892 | xfs_da_unmount(mp); | |
893 | return NULL; | |
894 | } | |
895 | ||
896 | void | |
897 | libxfs_rtmount_destroy( | |
898 | struct xfs_mount *mp) | |
899 | { | |
900 | struct xfs_rtgroup *rtg = NULL; | |
901 | unsigned int i; | |
902 | ||
903 | while ((rtg = xfs_rtgroup_next(mp, rtg))) { | |
904 | for (i = 0; i < XFS_RTGI_MAX; i++) | |
905 | libxfs_rtginode_irele(&rtg->rtg_inodes[i]); | |
906 | kvfree(rtg->rtg_rsum_cache); | |
907 | } | |
908 | libxfs_rtginode_irele(&mp->m_rtdirip); | |
909 | } | |
910 | ||
911 | /* Flush a device and report on writes that didn't make it to stable storage. */ | |
912 | static inline int | |
913 | libxfs_flush_buftarg( | |
914 | struct xfs_buftarg *btp, | |
915 | const char *buftarg_descr) | |
916 | { | |
917 | int error = 0; | |
918 | int err2; | |
919 | ||
920 | /* | |
921 | * Write verifier failures are evidence of a buggy program. Make sure | |
922 | * that this state is always reported to the caller. | |
923 | */ | |
924 | if (btp->flags & XFS_BUFTARG_CORRUPT_WRITE) { | |
925 | fprintf(stderr, | |
926 | _("%s: Refusing to write a corrupt buffer to the %s!\n"), | |
927 | progname, buftarg_descr); | |
928 | error = -EFSCORRUPTED; | |
929 | } | |
930 | ||
931 | if (btp->flags & XFS_BUFTARG_LOST_WRITE) { | |
932 | fprintf(stderr, | |
933 | _("%s: Lost a write to the %s!\n"), | |
934 | progname, buftarg_descr); | |
935 | if (!error) | |
936 | error = -EIO; | |
937 | } | |
938 | ||
939 | err2 = libxfs_blkdev_issue_flush(btp); | |
940 | if (err2) { | |
941 | fprintf(stderr, | |
942 | _("%s: Flushing the %s failed, err=%d!\n"), | |
943 | progname, buftarg_descr, -err2); | |
944 | } | |
945 | if (!error) | |
946 | error = err2; | |
947 | ||
948 | return error; | |
949 | } | |
950 | ||
951 | /* | |
952 | * Flush all dirty buffers to stable storage and report on writes that didn't | |
953 | * make it to stable storage. | |
954 | */ | |
955 | int | |
956 | libxfs_flush_mount( | |
957 | struct xfs_mount *mp) | |
958 | { | |
959 | int error = 0; | |
960 | int err2; | |
961 | ||
962 | /* | |
963 | * Flush the buffer cache to write all dirty buffers to disk. Buffers | |
964 | * that fail write verification will cause the CORRUPT_WRITE flag to be | |
965 | * set in the buftarg. Buffers that cannot be written will cause the | |
966 | * LOST_WRITE flag to be set in the buftarg. Once that's done, | |
967 | * instruct the disks to persist their write caches. | |
968 | */ | |
969 | libxfs_bcache_flush(mp); | |
970 | ||
971 | /* Flush all kernel and disk write caches, and report failures. */ | |
972 | if (mp->m_ddev_targp) { | |
973 | err2 = libxfs_flush_buftarg(mp->m_ddev_targp, _("data device")); | |
974 | if (!error) | |
975 | error = err2; | |
976 | } | |
977 | ||
978 | if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) { | |
979 | err2 = libxfs_flush_buftarg(mp->m_logdev_targp, | |
980 | _("log device")); | |
981 | if (!error) | |
982 | error = err2; | |
983 | } | |
984 | ||
985 | if (mp->m_rtdev_targp && mp->m_rtdev_targp != mp->m_ddev_targp) { | |
986 | err2 = libxfs_flush_buftarg(mp->m_rtdev_targp, | |
987 | _("realtime device")); | |
988 | if (!error) | |
989 | error = err2; | |
990 | } | |
991 | ||
992 | return error; | |
993 | } | |
994 | ||
995 | static void | |
996 | libxfs_buftarg_free( | |
997 | struct xfs_buftarg *btp) | |
998 | { | |
999 | cache_destroy(btp->bcache); | |
1000 | kfree(btp); | |
1001 | } | |
1002 | ||
1003 | /* | |
1004 | * Release any resource obtained during a mount. | |
1005 | */ | |
1006 | int | |
1007 | libxfs_umount( | |
1008 | struct xfs_mount *mp) | |
1009 | { | |
1010 | int error; | |
1011 | ||
1012 | libxfs_rtmount_destroy(mp); | |
1013 | if (mp->m_metadirip) | |
1014 | libxfs_irele(mp->m_metadirip); | |
1015 | ||
1016 | /* | |
1017 | * Purge the buffer cache to write all dirty buffers to disk and free | |
1018 | * all incore buffers, then pick up the outcome when we tell the disks | |
1019 | * to persist their write caches. | |
1020 | */ | |
1021 | libxfs_bcache_purge(mp); | |
1022 | error = libxfs_flush_mount(mp); | |
1023 | ||
1024 | /* | |
1025 | * Only try to free the per-AG structures if we set them up in the | |
1026 | * first place. | |
1027 | */ | |
1028 | if (xfs_is_rtgroup_data_loaded(mp)) | |
1029 | libxfs_free_rtgroups(mp, 0, mp->m_sb.sb_rgcount); | |
1030 | if (xfs_is_perag_data_loaded(mp)) | |
1031 | libxfs_free_perag_range(mp, 0, mp->m_sb.sb_agcount); | |
1032 | ||
1033 | xfs_da_unmount(mp); | |
1034 | ||
1035 | free(mp->m_fsname); | |
1036 | mp->m_fsname = NULL; | |
1037 | ||
1038 | if (mp->m_rtdev_targp != mp->m_ddev_targp) | |
1039 | libxfs_buftarg_free(mp->m_rtdev_targp); | |
1040 | if (mp->m_logdev_targp != mp->m_ddev_targp) | |
1041 | libxfs_buftarg_free(mp->m_logdev_targp); | |
1042 | libxfs_buftarg_free(mp->m_ddev_targp); | |
1043 | ||
1044 | return error; | |
1045 | } | |
1046 | ||
1047 | /* | |
1048 | * Release any global resources used by libxfs. | |
1049 | */ | |
1050 | void | |
1051 | libxfs_destroy( | |
1052 | struct libxfs_init *li) | |
1053 | { | |
1054 | int leaked; | |
1055 | ||
1056 | libxfs_close_devices(li); | |
1057 | ||
1058 | libxfs_bcache_free(); | |
1059 | leaked = destroy_caches(); | |
1060 | rcu_unregister_thread(); | |
1061 | if (getenv("LIBXFS_LEAK_CHECK") && leaked) | |
1062 | exit(1); | |
1063 | } | |
1064 | ||
1065 | int | |
1066 | libxfs_device_alignment(void) | |
1067 | { | |
1068 | return platform_align_blockdev(); | |
1069 | } |