]>
Commit | Line | Data |
---|---|---|
959ef981 | 1 | // SPDX-License-Identifier: GPL-2.0 |
2bd0ea18 | 2 | /* |
da23017d NS |
3 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. |
4 | * All Rights Reserved. | |
2bd0ea18 NS |
5 | */ |
6 | ||
2bd0ea18 | 7 | #include <sys/stat.h> |
9440d84d | 8 | #include "init.h" |
29e62271 | 9 | |
9c799827 | 10 | #include "libxfs_priv.h" |
b626fb59 DC |
11 | #include "xfs_fs.h" |
12 | #include "xfs_shared.h" | |
13 | #include "xfs_format.h" | |
14 | #include "xfs_log_format.h" | |
15 | #include "xfs_trans_resv.h" | |
16 | #include "xfs_mount.h" | |
794a5604 | 17 | #include "xfs_defer.h" |
b626fb59 DC |
18 | #include "xfs_inode_buf.h" |
19 | #include "xfs_inode_fork.h" | |
20 | #include "xfs_inode.h" | |
21 | #include "xfs_trans.h" | |
b3a96b46 | 22 | #include "xfs_rmap_btree.h" |
e7be6330 | 23 | #include "xfs_refcount_btree.h" |
b658de93 | 24 | #include "libfrog/platform.h" |
b626fb59 | 25 | |
6b803e5a | 26 | #include "libxfs.h" /* for now */ |
b626fb59 | 27 | |
7448af58 DW |
28 | #ifndef HAVE_LIBURCU_ATOMIC64 |
29 | pthread_mutex_t atomic64_lock = PTHREAD_MUTEX_INITIALIZER; | |
30 | #endif | |
31 | ||
2bd0ea18 NS |
32 | char *progname = "libxfs"; /* default, changed by each tool */ |
33 | ||
f1b058f9 | 34 | struct cache *libxfs_bcache; /* global buffer cache */ |
9f38f08d | 35 | int libxfs_bhash_size; /* #buckets in bcache */ |
f1b058f9 | 36 | |
167137fe | 37 | int use_xfs_buf_lock; /* global flag: use xfs_buf locks for MT */ |
d0572de5 | 38 | |
00ff2b10 | 39 | static int nextfakedev = -1; /* device number to give to next fake device */ |
2bd0ea18 | 40 | |
2bd0ea18 NS |
41 | /* |
42 | * Checks whether a given device has a mounted, writable | |
43 | * filesystem, returns 1 if it does & fatal (just warns | |
44 | * if not fatal, but allows us to proceed). | |
5000d01d | 45 | * |
2bd0ea18 NS |
46 | * Useful to tools which will produce uncertain results |
47 | * if the filesystem is active - repair, check, logprint. | |
48 | */ | |
49 | static int | |
50 | check_isactive(char *name, char *block, int fatal) | |
51 | { | |
f594a0d1 | 52 | struct stat st; |
2bd0ea18 | 53 | |
f594a0d1 | 54 | if (stat(block, &st) < 0) |
9440d84d | 55 | return 0; |
fc8202ba | 56 | if ((st.st_mode & S_IFMT) != S_IFBLK) |
9440d84d | 57 | return 0; |
93d9f139 | 58 | if (platform_check_ismounted(name, block, &st, 0) == 0) |
9440d84d | 59 | return 0; |
7f510afb ES |
60 | if (platform_check_iswritable(name, block, &st)) |
61 | return fatal ? 1 : 0; | |
62 | return 0; | |
2bd0ea18 NS |
63 | } |
64 | ||
fc83c757 CH |
65 | static int |
66 | check_open( | |
67 | struct libxfs_init *xi, | |
68 | struct libxfs_dev *dev) | |
2bd0ea18 | 69 | { |
fc83c757 | 70 | struct stat stbuf; |
7eb6693f | 71 | |
fc83c757 CH |
72 | if (stat(dev->name, &stbuf) < 0) { |
73 | perror(dev->name); | |
74 | return 0; | |
75 | } | |
76 | if (!(xi->flags & LIBXFS_ISREADONLY) && | |
77 | !(xi->flags & LIBXFS_ISINACTIVE) && | |
78 | platform_check_ismounted(dev->name, dev->name, NULL, 1)) | |
79 | return 0; | |
2bd0ea18 | 80 | |
fc83c757 CH |
81 | if ((xi->flags & LIBXFS_ISINACTIVE) && |
82 | check_isactive(dev->name, dev->name, !!(xi->flags & | |
83 | (LIBXFS_ISREADONLY | LIBXFS_DANGEROUSLY)))) | |
84 | return 0; | |
85 | ||
86 | return 1; | |
87 | } | |
88 | ||
89 | static bool | |
90 | libxfs_device_open( | |
91 | struct libxfs_init *xi, | |
92 | struct libxfs_dev *dev) | |
93 | { | |
94 | struct stat statb; | |
95 | int flags; | |
96 | ||
97 | dev->fd = -1; | |
98 | ||
99 | if (!dev->name) | |
100 | return true; | |
101 | if (!dev->isfile && !check_open(xi, dev)) | |
102 | return false; | |
103 | ||
104 | if (xi->flags & LIBXFS_ISREADONLY) | |
105 | flags = O_RDONLY; | |
106 | else | |
107 | flags = O_RDWR; | |
108 | ||
109 | if (dev->create) { | |
110 | flags |= O_CREAT | O_TRUNC; | |
111 | } else { | |
112 | if (xi->flags & LIBXFS_EXCLUSIVELY) | |
113 | flags |= O_EXCL; | |
114 | if ((xi->flags & LIBXFS_DIRECT) && platform_direct_blockdev()) | |
115 | flags |= O_DIRECT; | |
116 | } | |
b74a1f6a | 117 | |
fc83c757 CH |
118 | retry: |
119 | dev->fd = open(dev->name, flags, 0666); | |
120 | if (dev->fd < 0) { | |
121 | if (errno == EINVAL && (flags & O_DIRECT)) { | |
122 | flags &= ~O_DIRECT; | |
b74a1f6a | 123 | goto retry; |
fc83c757 | 124 | } |
9440d84d | 125 | fprintf(stderr, _("%s: cannot open %s: %s\n"), |
fc83c757 | 126 | progname, dev->name, strerror(errno)); |
2bd0ea18 NS |
127 | exit(1); |
128 | } | |
129 | ||
fc83c757 | 130 | if (fstat(dev->fd, &statb) < 0) { |
9440d84d | 131 | fprintf(stderr, _("%s: cannot stat %s: %s\n"), |
fc83c757 | 132 | progname, dev->name, strerror(errno)); |
2bd0ea18 NS |
133 | exit(1); |
134 | } | |
a33a9e62 | 135 | |
fc83c757 CH |
136 | if (!(xi->flags & LIBXFS_ISREADONLY) && |
137 | xi->setblksize && | |
138 | (statb.st_mode & S_IFMT) == S_IFBLK) { | |
a3106f32 CH |
139 | /* |
140 | * Try to use the given explicit blocksize. Failure to set the | |
141 | * block size is only fatal for direct I/O. | |
142 | */ | |
fc83c757 CH |
143 | platform_set_blocksize(dev->fd, dev->name, statb.st_rdev, |
144 | xi->setblksize, flags & O_DIRECT); | |
edd45774 | 145 | } |
2bd0ea18 | 146 | |
a33a9e62 | 147 | /* |
7b47b1bc CH |
148 | * Get the device number from the stat buf - unless we're not opening a |
149 | * real device, in which case choose a new fake device number. | |
2bd0ea18 | 150 | */ |
7b47b1bc | 151 | if (statb.st_rdev) |
fc83c757 CH |
152 | dev->dev = statb.st_rdev; |
153 | else | |
154 | dev->dev = nextfakedev--; | |
155 | platform_findsizes(dev->name, dev->fd, &dev->size, &dev->bsize); | |
156 | return true; | |
2bd0ea18 NS |
157 | } |
158 | ||
4f112cb1 | 159 | static void |
fc83c757 CH |
160 | libxfs_device_close( |
161 | struct libxfs_dev *dev) | |
2bd0ea18 | 162 | { |
fc83c757 | 163 | int ret; |
2bd0ea18 | 164 | |
fc83c757 | 165 | ret = platform_flush_device(dev->fd, dev->dev); |
7b47b1bc CH |
166 | if (ret) { |
167 | ret = -errno; | |
168 | fprintf(stderr, | |
023ba280 | 169 | _("%s: flush of device %lld failed, err=%d"), |
7b47b1bc CH |
170 | progname, (long long)dev, ret); |
171 | } | |
fc83c757 | 172 | close(dev->fd); |
c781939c | 173 | |
fc83c757 CH |
174 | dev->fd = -1; |
175 | dev->dev = 0; | |
c781939c RC |
176 | } |
177 | ||
7a326ce0 | 178 | /* |
2e1394fc | 179 | * Initialize/destroy all of the cache allocators we use. |
7a326ce0 ES |
180 | */ |
181 | static void | |
2e1394fc | 182 | init_caches(void) |
7a326ce0 | 183 | { |
7d10d094 DW |
184 | int error; |
185 | ||
2e1394fc DW |
186 | /* initialise cache allocation */ |
187 | xfs_buf_cache = kmem_cache_init(sizeof(struct xfs_buf), "xfs_buffer"); | |
188 | xfs_inode_cache = kmem_cache_init(sizeof(struct xfs_inode), "xfs_inode"); | |
189 | xfs_ifork_cache = kmem_cache_init(sizeof(struct xfs_ifork), "xfs_ifork"); | |
190 | xfs_ili_cache = kmem_cache_init( | |
7a326ce0 | 191 | sizeof(struct xfs_inode_log_item),"xfs_inode_log_item"); |
2e1394fc | 192 | xfs_buf_item_cache = kmem_cache_init( |
7a326ce0 | 193 | sizeof(struct xfs_buf_log_item), "xfs_buf_log_item"); |
1577541c DW |
194 | error = xfs_defer_init_item_caches(); |
195 | if (error) { | |
196 | fprintf(stderr, "Could not allocate defer init item caches.\n"); | |
197 | abort(); | |
198 | } | |
2e1394fc | 199 | xfs_da_state_cache = kmem_cache_init( |
7a326ce0 | 200 | sizeof(struct xfs_da_state), "xfs_da_state"); |
7d10d094 DW |
201 | error = xfs_btree_init_cur_caches(); |
202 | if (error) { | |
203 | fprintf(stderr, "Could not allocate btree cursor caches.\n"); | |
204 | abort(); | |
205 | } | |
7d84b02d | 206 | xfs_extfree_item_cache = kmem_cache_init( |
7a326ce0 | 207 | sizeof(struct xfs_extent_free_item), |
7d84b02d | 208 | "xfs_extfree_item"); |
2e1394fc | 209 | xfs_trans_cache = kmem_cache_init( |
7a326ce0 ES |
210 | sizeof(struct xfs_trans), "xfs_trans"); |
211 | } | |
212 | ||
213 | static int | |
2e1394fc | 214 | destroy_caches(void) |
7a326ce0 ES |
215 | { |
216 | int leaked = 0; | |
217 | ||
2e1394fc DW |
218 | leaked += kmem_cache_destroy(xfs_buf_cache); |
219 | leaked += kmem_cache_destroy(xfs_ili_cache); | |
220 | leaked += kmem_cache_destroy(xfs_inode_cache); | |
221 | leaked += kmem_cache_destroy(xfs_ifork_cache); | |
222 | leaked += kmem_cache_destroy(xfs_buf_item_cache); | |
223 | leaked += kmem_cache_destroy(xfs_da_state_cache); | |
1577541c | 224 | xfs_defer_destroy_item_caches(); |
7d10d094 | 225 | xfs_btree_destroy_cur_caches(); |
7d84b02d | 226 | leaked += kmem_cache_destroy(xfs_extfree_item_cache); |
2e1394fc | 227 | leaked += kmem_cache_destroy(xfs_trans_cache); |
7a326ce0 ES |
228 | |
229 | return leaked; | |
230 | } | |
231 | ||
a9468486 DW |
232 | static void |
233 | libxfs_close_devices( | |
01dcfd9e | 234 | struct libxfs_init *li) |
a9468486 | 235 | { |
fc83c757 CH |
236 | if (li->data.dev) |
237 | libxfs_device_close(&li->data); | |
238 | if (li->log.dev && li->log.dev != li->data.dev) | |
239 | libxfs_device_close(&li->log); | |
240 | if (li->rt.dev) | |
241 | libxfs_device_close(&li->rt); | |
a9468486 DW |
242 | } |
243 | ||
2bd0ea18 NS |
244 | /* |
245 | * libxfs initialization. | |
246 | * Caller gets a 0 on failure (and we print a message), 1 on success. | |
247 | */ | |
248 | int | |
01dcfd9e | 249 | libxfs_init(struct libxfs_init *a) |
2bd0ea18 | 250 | { |
e4da1b16 DC |
251 | rcu_init(); |
252 | rcu_register_thread(); | |
bacd44a5 AE |
253 | radix_tree_init(); |
254 | ||
fc83c757 CH |
255 | if (!libxfs_device_open(a, &a->data)) |
256 | goto done; | |
257 | if (!libxfs_device_open(a, &a->log)) | |
258 | goto done; | |
259 | if (!libxfs_device_open(a, &a->rt)) | |
260 | goto done; | |
b6e08bf3 | 261 | |
9f38f08d MV |
262 | if (!libxfs_bhash_size) |
263 | libxfs_bhash_size = LIBXFS_BHASHSIZE(sbp); | |
ba9ecd40 DC |
264 | libxfs_bcache = cache_init(a->bcache_flags, libxfs_bhash_size, |
265 | &libxfs_bcache_operations); | |
23d88955 | 266 | use_xfs_buf_lock = a->flags & LIBXFS_USEBUFLOCK; |
7a326ce0 | 267 | xfs_dir_startup(); |
2e1394fc | 268 | init_caches(); |
732f5b90 | 269 | return 1; |
a9468486 | 270 | |
732f5b90 CH |
271 | done: |
272 | libxfs_close_devices(a); | |
273 | rcu_unregister_thread(); | |
274 | return 0; | |
2bd0ea18 NS |
275 | } |
276 | ||
277 | ||
b391b7cd NS |
278 | /* |
279 | * Initialize realtime fields in the mount structure. | |
280 | */ | |
281 | static int | |
282 | rtmount_init( | |
2420d095 | 283 | xfs_mount_t *mp) /* file system mount structure */ |
b391b7cd | 284 | { |
31079e67 | 285 | struct xfs_buf *bp; /* buffer for last block of subvolume */ |
b391b7cd | 286 | xfs_daddr_t d; /* address of last block of subvolume */ |
113af235 | 287 | unsigned int rsumblocks; |
31079e67 | 288 | int error; |
b391b7cd | 289 | |
575f24e5 | 290 | if (mp->m_sb.sb_rblocks == 0) |
b391b7cd | 291 | return 0; |
4aaeedc4 | 292 | |
eefdf2ab | 293 | if (xfs_has_reflink(mp)) { |
4aaeedc4 DW |
294 | fprintf(stderr, |
295 | _("%s: Reflink not compatible with realtime device. Please try a newer xfsprogs.\n"), | |
296 | progname); | |
297 | return -1; | |
298 | } | |
299 | ||
eefdf2ab | 300 | if (xfs_has_rmapbt(mp)) { |
4aaeedc4 DW |
301 | fprintf(stderr, |
302 | _("%s: Reverse mapping btree not compatible with realtime device. Please try a newer xfsprogs.\n"), | |
303 | progname); | |
304 | return -1; | |
305 | } | |
306 | ||
2420d095 | 307 | if (mp->m_rtdev_targp->bt_bdev == 0 && !xfs_is_debugger(mp)) { |
9440d84d | 308 | fprintf(stderr, _("%s: filesystem has a realtime subvolume\n"), |
b391b7cd NS |
309 | progname); |
310 | return -1; | |
311 | } | |
575f24e5 | 312 | mp->m_rsumlevels = mp->m_sb.sb_rextslog + 1; |
113af235 DW |
313 | rsumblocks = xfs_rtsummary_blockcount(mp, mp->m_rsumlevels, |
314 | mp->m_sb.sb_rbmblocks); | |
315 | mp->m_rsumsize = XFS_FSB_TO_B(mp, rsumblocks); | |
b391b7cd | 316 | mp->m_rbmip = mp->m_rsumip = NULL; |
39798eb5 NS |
317 | |
318 | /* | |
319 | * Allow debugger to be run without the realtime device present. | |
320 | */ | |
2420d095 | 321 | if (xfs_is_debugger(mp)) |
39798eb5 NS |
322 | return 0; |
323 | ||
b391b7cd NS |
324 | /* |
325 | * Check that the realtime section is an ok size. | |
326 | */ | |
327 | d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks); | |
328 | if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_rblocks) { | |
9440d84d NS |
329 | fprintf(stderr, _("%s: realtime init - %llu != %llu\n"), |
330 | progname, (unsigned long long) XFS_BB_TO_FSB(mp, d), | |
b391b7cd NS |
331 | (unsigned long long) mp->m_sb.sb_rblocks); |
332 | return -1; | |
333 | } | |
31079e67 DW |
334 | error = libxfs_buf_read(mp->m_rtdev, d - XFS_FSB_TO_BB(mp, 1), |
335 | XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL); | |
336 | if (error) { | |
9440d84d NS |
337 | fprintf(stderr, _("%s: realtime size check failed\n"), |
338 | progname); | |
b391b7cd NS |
339 | return -1; |
340 | } | |
e02ba985 | 341 | libxfs_buf_relse(bp); |
b391b7cd NS |
342 | return 0; |
343 | } | |
344 | ||
03dc2ef2 DC |
345 | static bool |
346 | xfs_set_inode_alloc_perag( | |
347 | struct xfs_perag *pag, | |
348 | xfs_ino_t ino, | |
349 | xfs_agnumber_t max_metadata) | |
350 | { | |
351 | if (!xfs_is_inode32(pag->pag_mount)) { | |
352 | set_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate); | |
353 | clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate); | |
354 | return false; | |
355 | } | |
356 | ||
357 | if (ino > XFS_MAXINUMBER_32) { | |
358 | clear_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate); | |
359 | clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate); | |
360 | return false; | |
361 | } | |
362 | ||
363 | set_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate); | |
364 | if (pag->pag_agno < max_metadata) | |
365 | set_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate); | |
366 | else | |
367 | clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate); | |
368 | return true; | |
369 | } | |
370 | ||
b9ee1227 DW |
371 | /* |
372 | * Set parameters for inode allocation heuristics, taking into account | |
373 | * filesystem size and inode32/inode64 mount options; i.e. specifically | |
374 | * whether or not XFS_MOUNT_SMALL_INUMS is set. | |
375 | * | |
376 | * Inode allocation patterns are altered only if inode32 is requested | |
377 | * (XFS_MOUNT_SMALL_INUMS), and the filesystem is sufficiently large. | |
378 | * If altered, XFS_MOUNT_32BITINODES is set as well. | |
379 | * | |
380 | * An agcount independent of that in the mount structure is provided | |
381 | * because in the growfs case, mp->m_sb.sb_agcount is not yet updated | |
382 | * to the potentially higher ag count. | |
383 | * | |
384 | * Returns the maximum AG index which may contain inodes. | |
ed8f5980 DW |
385 | * |
386 | * NOTE: userspace has no concept of "inode32" and so xfs_has_small_inums | |
387 | * is always false, and much of this code is a no-op. | |
b9ee1227 DW |
388 | */ |
389 | xfs_agnumber_t | |
390 | xfs_set_inode_alloc( | |
391 | struct xfs_mount *mp, | |
392 | xfs_agnumber_t agcount) | |
393 | { | |
394 | xfs_agnumber_t index; | |
395 | xfs_agnumber_t maxagi = 0; | |
396 | xfs_sb_t *sbp = &mp->m_sb; | |
397 | xfs_agnumber_t max_metadata; | |
398 | xfs_agino_t agino; | |
399 | xfs_ino_t ino; | |
400 | ||
401 | /* | |
402 | * Calculate how much should be reserved for inodes to meet | |
403 | * the max inode percentage. Used only for inode32. | |
404 | */ | |
405 | if (M_IGEO(mp)->maxicount) { | |
406 | uint64_t icount; | |
407 | ||
408 | icount = sbp->sb_dblocks * sbp->sb_imax_pct; | |
409 | do_div(icount, 100); | |
410 | icount += sbp->sb_agblocks - 1; | |
411 | do_div(icount, sbp->sb_agblocks); | |
412 | max_metadata = icount; | |
413 | } else { | |
414 | max_metadata = agcount; | |
415 | } | |
416 | ||
417 | /* Get the last possible inode in the filesystem */ | |
418 | agino = XFS_AGB_TO_AGINO(mp, sbp->sb_agblocks - 1); | |
419 | ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino); | |
420 | ||
421 | /* | |
422 | * If user asked for no more than 32-bit inodes, and the fs is | |
423 | * sufficiently large, set XFS_MOUNT_32BITINODES if we must alter | |
424 | * the allocator to accommodate the request. | |
425 | */ | |
ed8f5980 | 426 | if (xfs_has_small_inums(mp) && ino > XFS_MAXINUMBER_32) |
03dc2ef2 | 427 | set_bit(XFS_OPSTATE_INODE32, &mp->m_opstate); |
ed8f5980 | 428 | else |
03dc2ef2 | 429 | clear_bit(XFS_OPSTATE_INODE32, &mp->m_opstate); |
b9ee1227 DW |
430 | |
431 | for (index = 0; index < agcount; index++) { | |
432 | struct xfs_perag *pag; | |
433 | ||
434 | ino = XFS_AGINO_TO_INO(mp, index, agino); | |
435 | ||
436 | pag = xfs_perag_get(mp, index); | |
03dc2ef2 DC |
437 | if (xfs_set_inode_alloc_perag(pag, ino, max_metadata)) |
438 | maxagi++; | |
b9ee1227 DW |
439 | xfs_perag_put(pag); |
440 | } | |
441 | ||
0ee9753e | 442 | return xfs_is_inode32(mp) ? maxagi : agcount; |
b9ee1227 DW |
443 | } |
444 | ||
75c8b434 DC |
445 | static struct xfs_buftarg * |
446 | libxfs_buftarg_alloc( | |
447 | struct xfs_mount *mp, | |
fc83c757 | 448 | struct libxfs_dev *dev, |
704e4cef | 449 | unsigned long write_fails) |
75c8b434 DC |
450 | { |
451 | struct xfs_buftarg *btp; | |
452 | ||
453 | btp = malloc(sizeof(*btp)); | |
454 | if (!btp) { | |
455 | fprintf(stderr, _("%s: buftarg init failed\n"), | |
456 | progname); | |
457 | exit(1); | |
458 | } | |
459 | btp->bt_mount = mp; | |
fc83c757 CH |
460 | btp->bt_bdev = dev->dev; |
461 | btp->bt_bdev_fd = dev->fd; | |
c335b673 | 462 | btp->flags = 0; |
704e4cef DW |
463 | if (write_fails) { |
464 | btp->writes_left = write_fails; | |
465 | btp->flags |= XFS_BUFTARG_INJECT_WRITE_FAIL; | |
466 | } | |
467 | pthread_mutex_init(&btp->lock, NULL); | |
c335b673 | 468 | |
75c8b434 DC |
469 | return btp; |
470 | } | |
471 | ||
704e4cef DW |
472 | enum libxfs_write_failure_nums { |
473 | WF_DATA = 0, | |
474 | WF_LOG, | |
475 | WF_RT, | |
476 | WF_MAX_OPTS, | |
477 | }; | |
478 | ||
479 | static char *wf_opts[] = { | |
480 | [WF_DATA] = "ddev", | |
481 | [WF_LOG] = "logdev", | |
482 | [WF_RT] = "rtdev", | |
483 | [WF_MAX_OPTS] = NULL, | |
484 | }; | |
485 | ||
75c8b434 DC |
486 | void |
487 | libxfs_buftarg_init( | |
488 | struct xfs_mount *mp, | |
ca8cc76e | 489 | struct libxfs_init *xi) |
75c8b434 | 490 | { |
704e4cef DW |
491 | char *p = getenv("LIBXFS_DEBUG_WRITE_CRASH"); |
492 | unsigned long dfail = 0, lfail = 0, rfail = 0; | |
493 | ||
494 | /* Simulate utility crash after a certain number of writes. */ | |
495 | while (p && *p) { | |
496 | char *val; | |
497 | ||
498 | switch (getsubopt(&p, wf_opts, &val)) { | |
499 | case WF_DATA: | |
500 | if (!val) { | |
501 | fprintf(stderr, | |
502 | _("ddev write fail requires a parameter\n")); | |
503 | exit(1); | |
504 | } | |
505 | dfail = strtoul(val, NULL, 0); | |
506 | break; | |
507 | case WF_LOG: | |
508 | if (!val) { | |
509 | fprintf(stderr, | |
510 | _("logdev write fail requires a parameter\n")); | |
511 | exit(1); | |
512 | } | |
513 | lfail = strtoul(val, NULL, 0); | |
514 | break; | |
515 | case WF_RT: | |
516 | if (!val) { | |
517 | fprintf(stderr, | |
518 | _("rtdev write fail requires a parameter\n")); | |
519 | exit(1); | |
520 | } | |
521 | rfail = strtoul(val, NULL, 0); | |
522 | break; | |
523 | default: | |
524 | fprintf(stderr, _("unknown write fail type %s\n"), | |
525 | val); | |
526 | exit(1); | |
527 | break; | |
528 | } | |
529 | } | |
530 | ||
75c8b434 DC |
531 | if (mp->m_ddev_targp) { |
532 | /* should already have all buftargs initialised */ | |
fc83c757 | 533 | if (mp->m_ddev_targp->bt_bdev != xi->data.dev || |
75c8b434 DC |
534 | mp->m_ddev_targp->bt_mount != mp) { |
535 | fprintf(stderr, | |
536 | _("%s: bad buftarg reinit, ddev\n"), | |
537 | progname); | |
538 | exit(1); | |
539 | } | |
fc83c757 | 540 | if (!xi->log.dev || xi->log.dev == xi->data.dev) { |
75c8b434 DC |
541 | if (mp->m_logdev_targp != mp->m_ddev_targp) { |
542 | fprintf(stderr, | |
543 | _("%s: bad buftarg reinit, ldev mismatch\n"), | |
544 | progname); | |
545 | exit(1); | |
546 | } | |
fc83c757 | 547 | } else if (mp->m_logdev_targp->bt_bdev != xi->log.dev || |
75c8b434 DC |
548 | mp->m_logdev_targp->bt_mount != mp) { |
549 | fprintf(stderr, | |
550 | _("%s: bad buftarg reinit, logdev\n"), | |
551 | progname); | |
552 | exit(1); | |
553 | } | |
fc83c757 CH |
554 | if (xi->rt.dev && |
555 | (mp->m_rtdev_targp->bt_bdev != xi->rt.dev || | |
ca8cc76e | 556 | mp->m_rtdev_targp->bt_mount != mp)) { |
75c8b434 DC |
557 | fprintf(stderr, |
558 | _("%s: bad buftarg reinit, rtdev\n"), | |
559 | progname); | |
560 | exit(1); | |
561 | } | |
562 | return; | |
563 | } | |
564 | ||
fc83c757 CH |
565 | mp->m_ddev_targp = libxfs_buftarg_alloc(mp, &xi->data, dfail); |
566 | if (!xi->log.dev || xi->log.dev == xi->data.dev) | |
75c8b434 DC |
567 | mp->m_logdev_targp = mp->m_ddev_targp; |
568 | else | |
fc83c757 CH |
569 | mp->m_logdev_targp = libxfs_buftarg_alloc(mp, &xi->log, lfail); |
570 | mp->m_rtdev_targp = libxfs_buftarg_alloc(mp, &xi->rt, rfail); | |
75c8b434 DC |
571 | } |
572 | ||
6afce48f DW |
573 | /* Compute maximum possible height for per-AG btree types for this fs. */ |
574 | static inline void | |
575 | xfs_agbtree_compute_maxlevels( | |
576 | struct xfs_mount *mp) | |
577 | { | |
578 | unsigned int levels; | |
579 | ||
580 | levels = max(mp->m_alloc_maxlevels, M_IGEO(mp)->inobt_maxlevels); | |
581 | levels = max(levels, mp->m_rmap_maxlevels); | |
582 | mp->m_agbtree_maxlevels = max(levels, mp->m_refc_maxlevels); | |
583 | } | |
584 | ||
7aeffc87 DW |
585 | /* Compute maximum possible height of all btrees. */ |
586 | void | |
587 | libxfs_compute_all_maxlevels( | |
588 | struct xfs_mount *mp) | |
589 | { | |
590 | xfs_alloc_compute_maxlevels(mp); | |
591 | xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK); | |
592 | xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK); | |
593 | xfs_ialloc_setup_geometry(mp); | |
594 | xfs_rmapbt_compute_maxlevels(mp); | |
595 | xfs_refcountbt_compute_maxlevels(mp); | |
596 | ||
597 | xfs_agbtree_compute_maxlevels(mp); | |
598 | } | |
599 | ||
33f3aac8 DC |
600 | /* |
601 | * precalculate the low space thresholds for dynamic speculative preallocation. | |
602 | */ | |
603 | static void | |
604 | xfs_set_low_space_thresholds( | |
605 | struct xfs_mount *mp) | |
606 | { | |
607 | uint64_t dblocks = mp->m_sb.sb_dblocks; | |
608 | int i; | |
609 | ||
610 | do_div(dblocks, 100); | |
611 | ||
612 | for (i = 0; i < XFS_LOWSP_MAX; i++) | |
613 | mp->m_low_space[i] = dblocks * (i + 1); | |
614 | } | |
615 | ||
2bd0ea18 NS |
616 | /* |
617 | * Mount structure initialization, provides a filled-in xfs_mount_t | |
618 | * such that the numerous XFS_* macros can be used. If dev is zero, | |
619 | * no IO will be performed (no size checks, read root inodes). | |
620 | */ | |
d855bce8 | 621 | struct xfs_mount * |
2bd0ea18 | 622 | libxfs_mount( |
d855bce8 DW |
623 | struct xfs_mount *mp, |
624 | struct xfs_sb *sb, | |
ddd9942b | 625 | struct libxfs_init *xi, |
ed8f5980 | 626 | unsigned int flags) |
2bd0ea18 | 627 | { |
d855bce8 DW |
628 | struct xfs_buf *bp; |
629 | struct xfs_sb *sbp; | |
630 | xfs_daddr_t d; | |
d855bce8 | 631 | int error; |
2bd0ea18 | 632 | |
3bc1fdd4 | 633 | mp->m_features = xfs_sb_version_to_features(sb); |
2420d095 DW |
634 | if (flags & LIBXFS_MOUNT_DEBUGGER) |
635 | xfs_set_debugger(mp); | |
e42c53f3 | 636 | if (flags & LIBXFS_MOUNT_REPORT_CORRUPTION) |
2420d095 | 637 | xfs_set_reporting_corruption(mp); |
ca8cc76e | 638 | libxfs_buftarg_init(mp, xi); |
75c8b434 | 639 | |
f747f7dd | 640 | mp->m_finobt_nores = true; |
0ee9753e | 641 | xfs_set_inode32(mp); |
2bd0ea18 | 642 | mp->m_sb = *sb; |
56b2de80 | 643 | INIT_RADIX_TREE(&mp->m_perag_tree, GFP_KERNEL); |
686bddf9 DC |
644 | sbp = &mp->m_sb; |
645 | spin_lock_init(&mp->m_sb_lock); | |
646 | spin_lock_init(&mp->m_agirotor_lock); | |
2bd0ea18 | 647 | |
4896e6c8 | 648 | xfs_sb_mount_common(mp, sb); |
2bd0ea18 | 649 | |
949c0f10 NS |
650 | /* |
651 | * Set whether we're using stripe alignment. | |
652 | */ | |
2660e653 | 653 | if (xfs_has_dalign(mp)) { |
949c0f10 NS |
654 | mp->m_dalign = sbp->sb_unit; |
655 | mp->m_swidth = sbp->sb_width; | |
656 | } | |
657 | ||
7aeffc87 | 658 | libxfs_compute_all_maxlevels(mp); |
6afce48f | 659 | |
2bd0ea18 NS |
660 | /* |
661 | * Check that the data (and log if separate) are an ok size. | |
662 | */ | |
9440d84d | 663 | d = (xfs_daddr_t) XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); |
2bd0ea18 | 664 | if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) { |
9440d84d | 665 | fprintf(stderr, _("%s: size check failed\n"), progname); |
2420d095 | 666 | if (!xfs_is_debugger(mp)) |
4ca431fc | 667 | return NULL; |
2bd0ea18 NS |
668 | } |
669 | ||
ff105f75 DC |
670 | /* |
671 | * We automatically convert v1 inodes to v2 inodes now, so if | |
672 | * the NLINK bit is not set we can't operate on the filesystem. | |
673 | */ | |
674 | if (!(sbp->sb_versionnum & XFS_SB_VERSION_NLINKBIT)) { | |
675 | ||
676 | fprintf(stderr, _( | |
677 | "%s: V1 inodes unsupported. Please try an older xfsprogs.\n"), | |
678 | progname); | |
679 | exit(1); | |
680 | } | |
681 | ||
682 | /* Check for supported directory formats */ | |
683 | if (!(sbp->sb_versionnum & XFS_SB_VERSION_DIRV2BIT)) { | |
9a048535 DC |
684 | |
685 | fprintf(stderr, _( | |
686 | "%s: V1 directories unsupported. Please try an older xfsprogs.\n"), | |
687 | progname); | |
688 | exit(1); | |
5e656dbb | 689 | } |
2bd0ea18 | 690 | |
ff105f75 DC |
691 | /* check for unsupported other features */ |
692 | if (!xfs_sb_good_version(sbp)) { | |
693 | fprintf(stderr, _( | |
694 | "%s: Unsupported features detected. Please try a newer xfsprogs.\n"), | |
695 | progname); | |
696 | exit(1); | |
697 | } | |
698 | ||
699 | xfs_da_mount(mp); | |
700 | ||
2bd0ea18 | 701 | /* Initialize the precomputed transaction reservations values */ |
5e656dbb | 702 | xfs_trans_init(mp); |
2bd0ea18 | 703 | |
fc83c757 | 704 | if (xi->data.dev == 0) /* maxtrres, we have no device so leave now */ |
2bd0ea18 NS |
705 | return mp; |
706 | ||
d855bce8 | 707 | /* device size checks must pass unless we're a debugger. */ |
31079e67 DW |
708 | error = libxfs_buf_read(mp->m_dev, d - XFS_FSS_TO_BB(mp, 1), |
709 | XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL); | |
710 | if (error) { | |
9440d84d | 711 | fprintf(stderr, _("%s: data size check failed\n"), progname); |
2420d095 | 712 | if (!xfs_is_debugger(mp)) |
4ca431fc | 713 | return NULL; |
32244196 | 714 | } else |
e02ba985 | 715 | libxfs_buf_relse(bp); |
2bd0ea18 | 716 | |
ab434d12 DC |
717 | if (mp->m_logdev_targp->bt_bdev && |
718 | mp->m_logdev_targp->bt_bdev != mp->m_ddev_targp->bt_bdev) { | |
9440d84d | 719 | d = (xfs_daddr_t) XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); |
31079e67 DW |
720 | if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks || |
721 | libxfs_buf_read(mp->m_logdev_targp, | |
722 | d - XFS_FSB_TO_BB(mp, 1), XFS_FSB_TO_BB(mp, 1), | |
723 | 0, &bp, NULL)) { | |
9440d84d | 724 | fprintf(stderr, _("%s: log size checks failed\n"), |
2bd0ea18 | 725 | progname); |
2420d095 | 726 | if (!xfs_is_debugger(mp)) |
4ca431fc | 727 | return NULL; |
2bd0ea18 | 728 | } |
32244196 | 729 | if (bp) |
e02ba985 | 730 | libxfs_buf_relse(bp); |
2bd0ea18 NS |
731 | } |
732 | ||
33f3aac8 DC |
733 | xfs_set_low_space_thresholds(mp); |
734 | ||
2bd0ea18 | 735 | /* Initialize realtime fields in the mount structure */ |
2420d095 | 736 | if (rtmount_init(mp)) { |
9440d84d NS |
737 | fprintf(stderr, _("%s: realtime device init failed\n"), |
738 | progname); | |
4ca431fc | 739 | return NULL; |
2bd0ea18 NS |
740 | } |
741 | ||
a547152d ES |
742 | /* |
743 | * libxfs_initialize_perag will allocate a perag structure for each ag. | |
744 | * If agcount is corrupted and insanely high, this will OOM the box. | |
745 | * If the agount seems (arbitrarily) high, try to read what would be | |
746 | * the last AG, and if that fails for a relatively high agcount, just | |
747 | * read the first one and let the user know to check the geometry. | |
748 | */ | |
749 | if (sbp->sb_agcount > 1000000) { | |
31079e67 | 750 | error = libxfs_buf_read(mp->m_dev, |
a547152d | 751 | XFS_AG_DADDR(mp, sbp->sb_agcount - 1, 0), 1, |
31079e67 DW |
752 | 0, &bp, NULL); |
753 | if (error) { | |
a547152d ES |
754 | fprintf(stderr, _("%s: read of AG %u failed\n"), |
755 | progname, sbp->sb_agcount); | |
2420d095 | 756 | if (!xfs_is_debugger(mp)) |
a547152d ES |
757 | return NULL; |
758 | fprintf(stderr, _("%s: limiting reads to AG 0\n"), | |
759 | progname); | |
760 | sbp->sb_agcount = 1; | |
31079e67 DW |
761 | } else |
762 | libxfs_buf_relse(bp); | |
a547152d ES |
763 | } |
764 | ||
83af0d13 DC |
765 | error = libxfs_initialize_perag(mp, sbp->sb_agcount, sbp->sb_dblocks, |
766 | &mp->m_maxagi); | |
56b2de80 DC |
767 | if (error) { |
768 | fprintf(stderr, _("%s: perag init failed\n"), | |
769 | progname); | |
2bd0ea18 NS |
770 | exit(1); |
771 | } | |
2420d095 | 772 | xfs_set_perag_data_loaded(mp); |
2bd0ea18 | 773 | |
2bd0ea18 NS |
774 | return mp; |
775 | } | |
776 | ||
f1b058f9 NS |
777 | void |
778 | libxfs_rtmount_destroy(xfs_mount_t *mp) | |
779 | { | |
780 | if (mp->m_rsumip) | |
31845e4c | 781 | libxfs_irele(mp->m_rsumip); |
f1b058f9 | 782 | if (mp->m_rbmip) |
31845e4c | 783 | libxfs_irele(mp->m_rbmip); |
f1b058f9 NS |
784 | mp->m_rsumip = mp->m_rbmip = NULL; |
785 | } | |
786 | ||
c335b673 DW |
787 | /* Flush a device and report on writes that didn't make it to stable storage. */ |
788 | static inline int | |
789 | libxfs_flush_buftarg( | |
790 | struct xfs_buftarg *btp, | |
791 | const char *buftarg_descr) | |
792 | { | |
793 | int error = 0; | |
794 | int err2; | |
795 | ||
796 | /* | |
797 | * Write verifier failures are evidence of a buggy program. Make sure | |
798 | * that this state is always reported to the caller. | |
799 | */ | |
800 | if (btp->flags & XFS_BUFTARG_CORRUPT_WRITE) { | |
801 | fprintf(stderr, | |
802 | _("%s: Refusing to write a corrupt buffer to the %s!\n"), | |
803 | progname, buftarg_descr); | |
804 | error = -EFSCORRUPTED; | |
805 | } | |
806 | ||
807 | if (btp->flags & XFS_BUFTARG_LOST_WRITE) { | |
808 | fprintf(stderr, | |
809 | _("%s: Lost a write to the %s!\n"), | |
810 | progname, buftarg_descr); | |
811 | if (!error) | |
812 | error = -EIO; | |
813 | } | |
814 | ||
815 | err2 = libxfs_blkdev_issue_flush(btp); | |
816 | if (err2) { | |
817 | fprintf(stderr, | |
818 | _("%s: Flushing the %s failed, err=%d!\n"), | |
819 | progname, buftarg_descr, -err2); | |
820 | } | |
821 | if (!error) | |
822 | error = err2; | |
823 | ||
824 | return error; | |
825 | } | |
826 | ||
827 | /* | |
828 | * Flush all dirty buffers to stable storage and report on writes that didn't | |
829 | * make it to stable storage. | |
830 | */ | |
a7348c58 | 831 | int |
c335b673 DW |
832 | libxfs_flush_mount( |
833 | struct xfs_mount *mp) | |
834 | { | |
835 | int error = 0; | |
836 | int err2; | |
837 | ||
838 | /* | |
a7348c58 DW |
839 | * Flush the buffer cache to write all dirty buffers to disk. Buffers |
840 | * that fail write verification will cause the CORRUPT_WRITE flag to be | |
841 | * set in the buftarg. Buffers that cannot be written will cause the | |
842 | * LOST_WRITE flag to be set in the buftarg. Once that's done, | |
843 | * instruct the disks to persist their write caches. | |
c335b673 | 844 | */ |
a7348c58 | 845 | libxfs_bcache_flush(); |
c335b673 DW |
846 | |
847 | /* Flush all kernel and disk write caches, and report failures. */ | |
848 | if (mp->m_ddev_targp) { | |
849 | err2 = libxfs_flush_buftarg(mp->m_ddev_targp, _("data device")); | |
850 | if (!error) | |
851 | error = err2; | |
852 | } | |
853 | ||
854 | if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) { | |
855 | err2 = libxfs_flush_buftarg(mp->m_logdev_targp, | |
856 | _("log device")); | |
857 | if (!error) | |
858 | error = err2; | |
859 | } | |
860 | ||
861 | if (mp->m_rtdev_targp) { | |
862 | err2 = libxfs_flush_buftarg(mp->m_rtdev_targp, | |
863 | _("realtime device")); | |
864 | if (!error) | |
865 | error = err2; | |
866 | } | |
867 | ||
868 | return error; | |
869 | } | |
870 | ||
2bd0ea18 | 871 | /* |
9440d84d | 872 | * Release any resource obtained during a mount. |
2bd0ea18 | 873 | */ |
c335b673 DW |
874 | int |
875 | libxfs_umount( | |
876 | struct xfs_mount *mp) | |
2bd0ea18 | 877 | { |
c335b673 | 878 | int error; |
56b2de80 | 879 | |
f1b058f9 | 880 | libxfs_rtmount_destroy(mp); |
c335b673 | 881 | |
a7348c58 DW |
882 | /* |
883 | * Purge the buffer cache to write all dirty buffers to disk and free | |
884 | * all incore buffers, then pick up the outcome when we tell the disks | |
885 | * to persist their write caches. | |
886 | */ | |
887 | libxfs_bcache_purge(); | |
c335b673 | 888 | error = libxfs_flush_mount(mp); |
f1b058f9 | 889 | |
7bf9cd9d DW |
890 | /* |
891 | * Only try to free the per-AG structures if we set them up in the | |
892 | * first place. | |
893 | */ | |
2420d095 | 894 | if (xfs_is_perag_data_loaded(mp)) |
4bcd30f6 | 895 | libxfs_free_perag(mp); |
4334e2e8 ES |
896 | |
897 | kmem_free(mp->m_attr_geo); | |
898 | kmem_free(mp->m_dir_geo); | |
899 | ||
900 | kmem_free(mp->m_rtdev_targp); | |
901 | if (mp->m_logdev_targp != mp->m_ddev_targp) | |
902 | kmem_free(mp->m_logdev_targp); | |
903 | kmem_free(mp->m_ddev_targp); | |
f8149110 | 904 | |
c335b673 | 905 | return error; |
2bd0ea18 | 906 | } |
f1b058f9 NS |
907 | |
908 | /* | |
909 | * Release any global resources used by libxfs. | |
910 | */ | |
911 | void | |
a9468486 | 912 | libxfs_destroy( |
01dcfd9e | 913 | struct libxfs_init *li) |
f1b058f9 | 914 | { |
a9468486 DW |
915 | int leaked; |
916 | ||
917 | libxfs_close_devices(li); | |
44488491 | 918 | |
2e1394fc | 919 | /* Free everything from the buffer cache before freeing buffer cache */ |
864028ed ES |
920 | libxfs_bcache_purge(); |
921 | libxfs_bcache_free(); | |
f1b058f9 | 922 | cache_destroy(libxfs_bcache); |
2e1394fc | 923 | leaked = destroy_caches(); |
e4da1b16 | 924 | rcu_unregister_thread(); |
44488491 ES |
925 | if (getenv("LIBXFS_LEAK_CHECK") && leaked) |
926 | exit(1); | |
f1b058f9 | 927 | } |
9f38f08d | 928 | |
b74a1f6a NS |
929 | int |
930 | libxfs_device_alignment(void) | |
931 | { | |
932 | return platform_align_blockdev(); | |
933 | } | |
934 | ||
9f38f08d | 935 | void |
b6281496 | 936 | libxfs_report(FILE *fp) |
9f38f08d | 937 | { |
cb5b3ef4 MV |
938 | time_t t; |
939 | char *c; | |
940 | ||
b6281496 | 941 | cache_report(fp, "libxfs_bcache", libxfs_bcache); |
cb5b3ef4 MV |
942 | |
943 | t = time(NULL); | |
944 | c = asctime(localtime(&t)); | |
945 | fprintf(fp, "%s", c); | |
946 | } |