]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blob - libxfs/rdwr.c
xfs: make inode attribute forks a permanent part of struct xfs_inode
[thirdparty/xfsprogs-dev.git] / libxfs / rdwr.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
4 * All Rights Reserved.
5 */
6
7
8 #include "libxfs_priv.h"
9 #include "init.h"
10 #include "xfs_fs.h"
11 #include "xfs_shared.h"
12 #include "xfs_format.h"
13 #include "xfs_log_format.h"
14 #include "xfs_trans_resv.h"
15 #include "xfs_mount.h"
16 #include "xfs_inode_buf.h"
17 #include "xfs_inode_fork.h"
18 #include "xfs_inode.h"
19 #include "xfs_trans.h"
20 #include "libfrog/platform.h"
21
22 #include "libxfs.h"
23
24 static void libxfs_brelse(struct cache_node *node);
25
26 /*
27 * Important design/architecture note:
28 *
29 * The userspace code that uses the buffer cache is much less constrained than
30 * the kernel code. The userspace code is pretty nasty in places, especially
31 * when it comes to buffer error handling. Very little of the userspace code
32 * outside libxfs clears bp->b_error - very little code even checks it - so the
33 * libxfs code is tripping on stale errors left by the userspace code.
34 *
35 * We can't clear errors or zero buffer contents in libxfs_buf_get-* like we do
36 * in the kernel, because those functions are used by the libxfs_readbuf_*
37 * functions and hence need to leave the buffers unchanged on cache hits. This
38 * is actually the only way to gather a write error from a libxfs_writebuf()
39 * call - you need to get the buffer again so you can check bp->b_error field -
40 * assuming that the buffer is still in the cache when you check, that is.
41 *
42 * This is very different to the kernel code which does not release buffers on a
43 * write so we can wait on IO and check errors. The kernel buffer cache also
44 * guarantees a buffer of a known initial state from xfs_buf_get() even on a
45 * cache hit.
46 *
47 * IOWs, userspace is behaving quite differently to the kernel and as a result
48 * it leaks errors from reads, invalidations and writes through
49 * libxfs_buf_get/libxfs_buf_read.
50 *
51 * The result of this is that until the userspace code outside libxfs is cleaned
52 * up, functions that release buffers from userspace control (i.e
53 * libxfs_writebuf/libxfs_buf_relse) need to zero bp->b_error to prevent
54 * propagation of stale errors into future buffer operations.
55 */
56
57 #define BDSTRAT_SIZE (256 * 1024)
58
59 #define IO_BCOMPARE_CHECK
60
61 /* XXX: (dgc) Propagate errors, only exit if fail-on-error flag set */
62 int
63 libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len)
64 {
65 xfs_off_t start_offset, end_offset, offset;
66 ssize_t zsize, bytes;
67 size_t len_bytes;
68 char *z;
69 int error, fd;
70
71 fd = libxfs_device_to_fd(btp->bt_bdev);
72 start_offset = LIBXFS_BBTOOFF64(start);
73
74 /* try to use special zeroing methods, fall back to writes if needed */
75 len_bytes = LIBXFS_BBTOOFF64(len);
76 error = platform_zero_range(fd, start_offset, len_bytes);
77 if (!error) {
78 xfs_buftarg_trip_write(btp);
79 return 0;
80 }
81
82 zsize = min(BDSTRAT_SIZE, BBTOB(len));
83 if ((z = memalign(libxfs_device_alignment(), zsize)) == NULL) {
84 fprintf(stderr,
85 _("%s: %s can't memalign %d bytes: %s\n"),
86 progname, __FUNCTION__, (int)zsize, strerror(errno));
87 exit(1);
88 }
89 memset(z, 0, zsize);
90
91 if ((lseek(fd, start_offset, SEEK_SET)) < 0) {
92 fprintf(stderr, _("%s: %s seek to offset %llu failed: %s\n"),
93 progname, __FUNCTION__,
94 (unsigned long long)start_offset, strerror(errno));
95 exit(1);
96 }
97
98 end_offset = LIBXFS_BBTOOFF64(start + len) - start_offset;
99 for (offset = 0; offset < end_offset; ) {
100 bytes = min((ssize_t)(end_offset - offset), zsize);
101 if ((bytes = write(fd, z, bytes)) < 0) {
102 fprintf(stderr, _("%s: %s write failed: %s\n"),
103 progname, __FUNCTION__, strerror(errno));
104 exit(1);
105 } else if (bytes == 0) {
106 fprintf(stderr, _("%s: %s not progressing?\n"),
107 progname, __FUNCTION__);
108 exit(1);
109 }
110 xfs_buftarg_trip_write(btp);
111 offset += bytes;
112 }
113 free(z);
114 return 0;
115 }
116
117 static void unmount_record(void *p)
118 {
119 xlog_op_header_t *op = (xlog_op_header_t *)p;
120 /* the data section must be 32 bit size aligned */
121 struct {
122 uint16_t magic;
123 uint16_t pad1;
124 uint32_t pad2; /* may as well make it 64 bits */
125 } magic = { XLOG_UNMOUNT_TYPE, 0, 0 };
126
127 memset(p, 0, BBSIZE);
128 /* dummy tid to mark this as written from userspace */
129 op->oh_tid = cpu_to_be32(0xb0c0d0d0);
130 op->oh_len = cpu_to_be32(sizeof(magic));
131 op->oh_clientid = XFS_LOG;
132 op->oh_flags = XLOG_UNMOUNT_TRANS;
133 op->oh_res2 = 0;
134
135 /* and the data for this op */
136 memcpy((char *)p + sizeof(xlog_op_header_t), &magic, sizeof(magic));
137 }
138
139 static char *next(
140 char *ptr,
141 int offset,
142 void *private)
143 {
144 struct xfs_buf *buf = (struct xfs_buf *)private;
145
146 if (buf &&
147 (BBTOB(buf->b_length) < (int)(ptr - (char *)buf->b_addr) + offset))
148 abort();
149
150 return ptr + offset;
151 }
152
153 struct xfs_buf *
154 libxfs_getsb(
155 struct xfs_mount *mp)
156 {
157 struct xfs_buf *bp;
158
159 libxfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR, XFS_FSS_TO_BB(mp, 1),
160 0, &bp, &xfs_sb_buf_ops);
161 return bp;
162 }
163
164 struct kmem_cache *xfs_buf_cache;
165
166 static struct cache_mru xfs_buf_freelist =
167 {{&xfs_buf_freelist.cm_list, &xfs_buf_freelist.cm_list},
168 0, PTHREAD_MUTEX_INITIALIZER };
169
170 /*
171 * The bufkey is used to pass the new buffer information to the cache object
172 * allocation routine. Because discontiguous buffers need to pass different
173 * information, we need fields to pass that information. However, because the
174 * blkno and bblen is needed for the initial cache entry lookup (i.e. for
175 * bcompare) the fact that the map/nmaps is non-null to switch to discontiguous
176 * buffer initialisation instead of a contiguous buffer.
177 */
178 struct xfs_bufkey {
179 struct xfs_buftarg *buftarg;
180 xfs_daddr_t blkno;
181 unsigned int bblen;
182 struct xfs_buf_map *map;
183 int nmaps;
184 };
185
186 /* 2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */
187 #define GOLDEN_RATIO_PRIME 0x9e37fffffffc0001UL
188 #define CACHE_LINE_SIZE 64
189 static unsigned int
190 libxfs_bhash(cache_key_t key, unsigned int hashsize, unsigned int hashshift)
191 {
192 uint64_t hashval = ((struct xfs_bufkey *)key)->blkno;
193 uint64_t tmp;
194
195 tmp = hashval ^ (GOLDEN_RATIO_PRIME + hashval) / CACHE_LINE_SIZE;
196 tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> hashshift);
197 return tmp % hashsize;
198 }
199
200 static int
201 libxfs_bcompare(struct cache_node *node, cache_key_t key)
202 {
203 struct xfs_buf *bp = container_of(node, struct xfs_buf,
204 b_node);
205 struct xfs_bufkey *bkey = (struct xfs_bufkey *)key;
206
207 if (bp->b_target->bt_bdev == bkey->buftarg->bt_bdev &&
208 bp->b_cache_key == bkey->blkno) {
209 if (bp->b_length == bkey->bblen)
210 return CACHE_HIT;
211 #ifdef IO_BCOMPARE_CHECK
212 if (!(libxfs_bcache->c_flags & CACHE_MISCOMPARE_PURGE)) {
213 fprintf(stderr,
214 "%lx: Badness in key lookup (length)\n"
215 "bp=(bno 0x%llx, len %u bytes) key=(bno 0x%llx, len %u bytes)\n",
216 pthread_self(),
217 (unsigned long long)xfs_buf_daddr(bp),
218 BBTOB(bp->b_length),
219 (unsigned long long)bkey->blkno,
220 BBTOB(bkey->bblen));
221 }
222 #endif
223 return CACHE_PURGE;
224 }
225 return CACHE_MISS;
226 }
227
228 static void
229 __initbuf(struct xfs_buf *bp, struct xfs_buftarg *btp, xfs_daddr_t bno,
230 unsigned int bytes)
231 {
232 bp->b_flags = 0;
233 bp->b_cache_key = bno;
234 bp->b_length = BTOBB(bytes);
235 bp->b_target = btp;
236 bp->b_mount = btp->bt_mount;
237 bp->b_error = 0;
238 if (!bp->b_addr)
239 bp->b_addr = memalign(libxfs_device_alignment(), bytes);
240 if (!bp->b_addr) {
241 fprintf(stderr,
242 _("%s: %s can't memalign %u bytes: %s\n"),
243 progname, __FUNCTION__, bytes,
244 strerror(errno));
245 exit(1);
246 }
247 memset(bp->b_addr, 0, bytes);
248 pthread_mutex_init(&bp->b_lock, NULL);
249 bp->b_holder = 0;
250 bp->b_recur = 0;
251 bp->b_ops = NULL;
252 INIT_LIST_HEAD(&bp->b_li_list);
253
254 if (!bp->b_maps)
255 bp->b_maps = &bp->__b_map;
256
257 if (bp->b_maps == &bp->__b_map) {
258 bp->b_nmaps = 1;
259 bp->b_maps[0].bm_bn = bno;
260 bp->b_maps[0].bm_len = bp->b_length;
261 }
262 }
263
264 static void
265 libxfs_initbuf(struct xfs_buf *bp, struct xfs_buftarg *btp, xfs_daddr_t bno,
266 unsigned int bytes)
267 {
268 __initbuf(bp, btp, bno, bytes);
269 }
270
271 static void
272 libxfs_initbuf_map(struct xfs_buf *bp, struct xfs_buftarg *btp,
273 struct xfs_buf_map *map, int nmaps)
274 {
275 unsigned int bytes = 0;
276 int i;
277
278 bytes = sizeof(struct xfs_buf_map) * nmaps;
279 bp->b_maps = malloc(bytes);
280 if (!bp->b_maps) {
281 fprintf(stderr,
282 _("%s: %s can't malloc %u bytes: %s\n"),
283 progname, __FUNCTION__, bytes,
284 strerror(errno));
285 exit(1);
286 }
287 bp->b_nmaps = nmaps;
288
289 bytes = 0;
290 for ( i = 0; i < nmaps; i++) {
291 bp->b_maps[i].bm_bn = map[i].bm_bn;
292 bp->b_maps[i].bm_len = map[i].bm_len;
293 bytes += BBTOB(map[i].bm_len);
294 }
295
296 __initbuf(bp, btp, map[0].bm_bn, bytes);
297 bp->b_flags |= LIBXFS_B_DISCONTIG;
298 }
299
300 static struct xfs_buf *
301 __libxfs_getbufr(int blen)
302 {
303 struct xfs_buf *bp;
304
305 /*
306 * first look for a buffer that can be used as-is,
307 * if one cannot be found, see if there is a buffer,
308 * and if so, free its buffer and set b_addr to NULL
309 * before calling libxfs_initbuf.
310 */
311 pthread_mutex_lock(&xfs_buf_freelist.cm_mutex);
312 if (!list_empty(&xfs_buf_freelist.cm_list)) {
313 list_for_each_entry(bp, &xfs_buf_freelist.cm_list, b_node.cn_mru) {
314 if (bp->b_length == BTOBB(blen)) {
315 list_del_init(&bp->b_node.cn_mru);
316 break;
317 }
318 }
319 if (&bp->b_node.cn_mru == &xfs_buf_freelist.cm_list) {
320 bp = list_entry(xfs_buf_freelist.cm_list.next,
321 struct xfs_buf, b_node.cn_mru);
322 list_del_init(&bp->b_node.cn_mru);
323 free(bp->b_addr);
324 bp->b_addr = NULL;
325 if (bp->b_maps != &bp->__b_map)
326 free(bp->b_maps);
327 bp->b_maps = NULL;
328 }
329 } else
330 bp = kmem_cache_zalloc(xfs_buf_cache, 0);
331 pthread_mutex_unlock(&xfs_buf_freelist.cm_mutex);
332 bp->b_ops = NULL;
333 if (bp->b_flags & LIBXFS_B_DIRTY)
334 fprintf(stderr, "found dirty buffer (bulk) on free list!\n");
335
336 return bp;
337 }
338
339 static struct xfs_buf *
340 libxfs_getbufr(struct xfs_buftarg *btp, xfs_daddr_t blkno, int bblen)
341 {
342 struct xfs_buf *bp;
343 int blen = BBTOB(bblen);
344
345 bp =__libxfs_getbufr(blen);
346 if (bp)
347 libxfs_initbuf(bp, btp, blkno, blen);
348 return bp;
349 }
350
351 static struct xfs_buf *
352 libxfs_getbufr_map(struct xfs_buftarg *btp, xfs_daddr_t blkno, int bblen,
353 struct xfs_buf_map *map, int nmaps)
354 {
355 struct xfs_buf *bp;
356 int blen = BBTOB(bblen);
357
358 if (!map || !nmaps) {
359 fprintf(stderr,
360 _("%s: %s invalid map %p or nmaps %d\n"),
361 progname, __FUNCTION__, map, nmaps);
362 exit(1);
363 }
364
365 if (blkno != map[0].bm_bn) {
366 fprintf(stderr,
367 _("%s: %s map blkno 0x%llx doesn't match key 0x%llx\n"),
368 progname, __FUNCTION__, (long long)map[0].bm_bn,
369 (long long)blkno);
370 exit(1);
371 }
372
373 bp =__libxfs_getbufr(blen);
374 if (bp)
375 libxfs_initbuf_map(bp, btp, map, nmaps);
376 return bp;
377 }
378
379 void
380 xfs_buf_lock(
381 struct xfs_buf *bp)
382 {
383 if (use_xfs_buf_lock)
384 pthread_mutex_lock(&bp->b_lock);
385 }
386
387 static int
388 __cache_lookup(
389 struct xfs_bufkey *key,
390 unsigned int flags,
391 struct xfs_buf **bpp)
392 {
393 struct cache_node *cn = NULL;
394 struct xfs_buf *bp;
395
396 *bpp = NULL;
397
398 cache_node_get(libxfs_bcache, key, &cn);
399 if (!cn)
400 return -ENOMEM;
401 bp = container_of(cn, struct xfs_buf, b_node);
402
403 if (use_xfs_buf_lock) {
404 int ret;
405
406 ret = pthread_mutex_trylock(&bp->b_lock);
407 if (ret) {
408 ASSERT(ret == EAGAIN);
409 if (flags & LIBXFS_GETBUF_TRYLOCK) {
410 cache_node_put(libxfs_bcache, cn);
411 return -EAGAIN;
412 }
413
414 if (pthread_equal(bp->b_holder, pthread_self())) {
415 fprintf(stderr,
416 _("Warning: recursive buffer locking at block %" PRIu64 " detected\n"),
417 key->blkno);
418 bp->b_recur++;
419 *bpp = bp;
420 return 0;
421 } else {
422 pthread_mutex_lock(&bp->b_lock);
423 }
424 }
425
426 bp->b_holder = pthread_self();
427 }
428
429 cache_node_set_priority(libxfs_bcache, cn,
430 cache_node_get_priority(cn) - CACHE_PREFETCH_PRIORITY);
431 *bpp = bp;
432 return 0;
433 }
434
435 static int
436 libxfs_getbuf_flags(
437 struct xfs_buftarg *btp,
438 xfs_daddr_t blkno,
439 int len,
440 unsigned int flags,
441 struct xfs_buf **bpp)
442 {
443 struct xfs_bufkey key = {NULL};
444 int ret;
445
446 key.buftarg = btp;
447 key.blkno = blkno;
448 key.bblen = len;
449
450 ret = __cache_lookup(&key, flags, bpp);
451 if (ret)
452 return ret;
453
454 if (btp == btp->bt_mount->m_ddev_targp) {
455 (*bpp)->b_pag = xfs_perag_get(btp->bt_mount,
456 xfs_daddr_to_agno(btp->bt_mount, blkno));
457 }
458
459 return 0;
460 }
461
462 /*
463 * Clean the buffer flags for libxfs_getbuf*(), which wants to return
464 * an unused buffer with clean state. This prevents CRC errors on a
465 * re-read of a corrupt block that was prefetched and freed. This
466 * can happen with a massively corrupt directory that is discarded,
467 * but whose blocks are then recycled into expanding lost+found.
468 *
469 * Note however that if the buffer's dirty (prefetch calls getbuf)
470 * we'll leave the state alone because we don't want to discard blocks
471 * that have been fixed.
472 */
473 static void
474 reset_buf_state(
475 struct xfs_buf *bp)
476 {
477 if (bp && !(bp->b_flags & LIBXFS_B_DIRTY))
478 bp->b_flags &= ~(LIBXFS_B_UNCHECKED | LIBXFS_B_STALE |
479 LIBXFS_B_UPTODATE);
480 }
481
482 static int
483 __libxfs_buf_get_map(
484 struct xfs_buftarg *btp,
485 struct xfs_buf_map *map,
486 int nmaps,
487 int flags,
488 struct xfs_buf **bpp)
489 {
490 struct xfs_bufkey key = {NULL};
491 int i;
492
493 if (nmaps == 1)
494 return libxfs_getbuf_flags(btp, map[0].bm_bn, map[0].bm_len,
495 flags, bpp);
496
497 key.buftarg = btp;
498 key.blkno = map[0].bm_bn;
499 for (i = 0; i < nmaps; i++) {
500 key.bblen += map[i].bm_len;
501 }
502 key.map = map;
503 key.nmaps = nmaps;
504
505 return __cache_lookup(&key, flags, bpp);
506 }
507
508 int
509 libxfs_buf_get_map(
510 struct xfs_buftarg *btp,
511 struct xfs_buf_map *map,
512 int nmaps,
513 int flags,
514 struct xfs_buf **bpp)
515 {
516 int error;
517
518 error = __libxfs_buf_get_map(btp, map, nmaps, flags, bpp);
519 if (error)
520 return error;
521
522 reset_buf_state(*bpp);
523 return 0;
524 }
525
526 void
527 libxfs_buf_relse(
528 struct xfs_buf *bp)
529 {
530 /*
531 * ensure that any errors on this use of the buffer don't carry
532 * over to the next user.
533 */
534 bp->b_error = 0;
535 if (use_xfs_buf_lock) {
536 if (bp->b_recur) {
537 bp->b_recur--;
538 } else {
539 bp->b_holder = 0;
540 pthread_mutex_unlock(&bp->b_lock);
541 }
542 }
543
544 if (!list_empty(&bp->b_node.cn_hash))
545 cache_node_put(libxfs_bcache, &bp->b_node);
546 else if (--bp->b_node.cn_count == 0) {
547 if (bp->b_flags & LIBXFS_B_DIRTY)
548 libxfs_bwrite(bp);
549 libxfs_brelse(&bp->b_node);
550 }
551 }
552
553 static struct cache_node *
554 libxfs_balloc(
555 cache_key_t key)
556 {
557 struct xfs_bufkey *bufkey = (struct xfs_bufkey *)key;
558 struct xfs_buf *bp;
559
560 if (bufkey->map)
561 bp = libxfs_getbufr_map(bufkey->buftarg, bufkey->blkno,
562 bufkey->bblen, bufkey->map, bufkey->nmaps);
563 else
564 bp = libxfs_getbufr(bufkey->buftarg, bufkey->blkno,
565 bufkey->bblen);
566 return &bp->b_node;
567 }
568
569
570 static int
571 __read_buf(int fd, void *buf, int len, off64_t offset, int flags)
572 {
573 int sts;
574
575 sts = pread(fd, buf, len, offset);
576 if (sts < 0) {
577 int error = errno;
578 fprintf(stderr, _("%s: read failed: %s\n"),
579 progname, strerror(error));
580 return -error;
581 } else if (sts != len) {
582 fprintf(stderr, _("%s: error - read only %d of %d bytes\n"),
583 progname, sts, len);
584 return -EIO;
585 }
586 return 0;
587 }
588
589 int
590 libxfs_readbufr(struct xfs_buftarg *btp, xfs_daddr_t blkno, struct xfs_buf *bp,
591 int len, int flags)
592 {
593 int fd = libxfs_device_to_fd(btp->bt_bdev);
594 int bytes = BBTOB(len);
595 int error;
596
597 ASSERT(len <= bp->b_length);
598
599 error = __read_buf(fd, bp->b_addr, bytes, LIBXFS_BBTOOFF64(blkno), flags);
600 if (!error &&
601 bp->b_target->bt_bdev == btp->bt_bdev &&
602 bp->b_cache_key == blkno &&
603 bp->b_length == len)
604 bp->b_flags |= LIBXFS_B_UPTODATE;
605 bp->b_error = error;
606 return error;
607 }
608
609 int
610 libxfs_readbuf_verify(
611 struct xfs_buf *bp,
612 const struct xfs_buf_ops *ops)
613 {
614 if (!ops)
615 return bp->b_error;
616
617 bp->b_ops = ops;
618 bp->b_ops->verify_read(bp);
619 bp->b_flags &= ~LIBXFS_B_UNCHECKED;
620 return bp->b_error;
621 }
622
623 int
624 libxfs_readbufr_map(struct xfs_buftarg *btp, struct xfs_buf *bp, int flags)
625 {
626 int fd;
627 int error = 0;
628 void *buf;
629 int i;
630
631 fd = libxfs_device_to_fd(btp->bt_bdev);
632 buf = bp->b_addr;
633 for (i = 0; i < bp->b_nmaps; i++) {
634 off64_t offset = LIBXFS_BBTOOFF64(bp->b_maps[i].bm_bn);
635 int len = BBTOB(bp->b_maps[i].bm_len);
636
637 error = __read_buf(fd, buf, len, offset, flags);
638 if (error) {
639 bp->b_error = error;
640 break;
641 }
642 buf += len;
643 }
644
645 if (!error)
646 bp->b_flags |= LIBXFS_B_UPTODATE;
647 return error;
648 }
649
650 int
651 libxfs_buf_read_map(
652 struct xfs_buftarg *btp,
653 struct xfs_buf_map *map,
654 int nmaps,
655 int flags,
656 struct xfs_buf **bpp,
657 const struct xfs_buf_ops *ops)
658 {
659 struct xfs_buf *bp;
660 bool salvage = flags & LIBXFS_READBUF_SALVAGE;
661 int error = 0;
662
663 *bpp = NULL;
664 if (nmaps == 1)
665 error = libxfs_getbuf_flags(btp, map[0].bm_bn, map[0].bm_len,
666 0, &bp);
667 else
668 error = __libxfs_buf_get_map(btp, map, nmaps, 0, &bp);
669 if (error)
670 return error;
671
672 /*
673 * If the buffer was prefetched, it is likely that it was not validated.
674 * Hence if we are supplied an ops function and the buffer is marked as
675 * unchecked, we need to validate it now.
676 *
677 * We do this verification even if the buffer is dirty - the
678 * verification is almost certainly going to fail the CRC check in this
679 * case as a dirty buffer has not had the CRC recalculated. However, we
680 * should not be dirtying unchecked buffers and therefore failing it
681 * here because it's dirty and unchecked indicates we've screwed up
682 * somewhere else.
683 *
684 * Note that if the caller passes in LIBXFS_READBUF_SALVAGE, that means
685 * they want the buffer even if it fails verification.
686 */
687 bp->b_error = 0;
688 if (bp->b_flags & (LIBXFS_B_UPTODATE | LIBXFS_B_DIRTY)) {
689 if (bp->b_flags & LIBXFS_B_UNCHECKED)
690 error = libxfs_readbuf_verify(bp, ops);
691 if (error && !salvage)
692 goto err;
693 goto ok;
694 }
695
696 /*
697 * Set the ops on a cache miss (i.e. first physical read) as the
698 * verifier may change the ops to match the type of buffer it contains.
699 * A cache hit might reset the verifier to the original type if we set
700 * it again, but it won't get called again and set to match the buffer
701 * contents. *cough* xfs_da_node_buf_ops *cough*.
702 */
703 if (nmaps == 1)
704 error = libxfs_readbufr(btp, map[0].bm_bn, bp, map[0].bm_len,
705 flags);
706 else
707 error = libxfs_readbufr_map(btp, bp, flags);
708 if (error)
709 goto err;
710
711 error = libxfs_readbuf_verify(bp, ops);
712 if (error && !salvage)
713 goto err;
714
715 ok:
716 *bpp = bp;
717 return 0;
718 err:
719 libxfs_buf_relse(bp);
720 return error;
721 }
722
723 /* Allocate a raw uncached buffer. */
724 static inline struct xfs_buf *
725 libxfs_getbufr_uncached(
726 struct xfs_buftarg *targ,
727 xfs_daddr_t daddr,
728 size_t bblen)
729 {
730 struct xfs_buf *bp;
731
732 bp = libxfs_getbufr(targ, daddr, bblen);
733 if (!bp)
734 return NULL;
735
736 INIT_LIST_HEAD(&bp->b_node.cn_hash);
737 bp->b_node.cn_count = 1;
738 return bp;
739 }
740
741 /*
742 * Allocate an uncached buffer that points nowhere. The refcount will be 1,
743 * and the cache node hash list will be empty to indicate that it's uncached.
744 */
745 int
746 libxfs_buf_get_uncached(
747 struct xfs_buftarg *targ,
748 size_t bblen,
749 int flags,
750 struct xfs_buf **bpp)
751 {
752 *bpp = libxfs_getbufr_uncached(targ, XFS_BUF_DADDR_NULL, bblen);
753 return *bpp != NULL ? 0 : -ENOMEM;
754 }
755
756 /*
757 * Allocate and read an uncached buffer. The refcount will be 1, and the cache
758 * node hash list will be empty to indicate that it's uncached.
759 */
760 int
761 libxfs_buf_read_uncached(
762 struct xfs_buftarg *targ,
763 xfs_daddr_t daddr,
764 size_t bblen,
765 int flags,
766 struct xfs_buf **bpp,
767 const struct xfs_buf_ops *ops)
768 {
769 struct xfs_buf *bp;
770 int error;
771
772 *bpp = NULL;
773 bp = libxfs_getbufr_uncached(targ, daddr, bblen);
774 if (!bp)
775 return -ENOMEM;
776
777 error = libxfs_readbufr(targ, daddr, bp, bblen, flags);
778 if (error)
779 goto err;
780
781 error = libxfs_readbuf_verify(bp, ops);
782 if (error)
783 goto err;
784
785 *bpp = bp;
786 return 0;
787 err:
788 libxfs_buf_relse(bp);
789 return error;
790 }
791
792 static int
793 __write_buf(int fd, void *buf, int len, off64_t offset, int flags)
794 {
795 int sts;
796
797 sts = pwrite(fd, buf, len, offset);
798 if (sts < 0) {
799 int error = errno;
800 fprintf(stderr, _("%s: pwrite failed: %s\n"),
801 progname, strerror(error));
802 return -error;
803 } else if (sts != len) {
804 fprintf(stderr, _("%s: error - pwrite only %d of %d bytes\n"),
805 progname, sts, len);
806 return -EIO;
807 }
808 return 0;
809 }
810
811 int
812 libxfs_bwrite(
813 struct xfs_buf *bp)
814 {
815 int fd = libxfs_device_to_fd(bp->b_target->bt_bdev);
816
817 /*
818 * we never write buffers that are marked stale. This indicates they
819 * contain data that has been invalidated, and even if the buffer is
820 * dirty it must *never* be written. Verifiers are wonderful for finding
821 * bugs like this. Make sure the error is obvious as to the cause.
822 */
823 if (bp->b_flags & LIBXFS_B_STALE) {
824 bp->b_error = -ESTALE;
825 return bp->b_error;
826 }
827
828 /* Trigger the writeback hook if there is one. */
829 if (bp->b_mount->m_buf_writeback_fn)
830 bp->b_mount->m_buf_writeback_fn(bp);
831
832 /*
833 * clear any pre-existing error status on the buffer. This can occur if
834 * the buffer is corrupt on disk and the repair process doesn't clear
835 * the error before fixing and writing it back.
836 */
837 bp->b_error = 0;
838 if (bp->b_ops) {
839 bp->b_ops->verify_write(bp);
840 if (bp->b_error) {
841 fprintf(stderr,
842 _("%s: write verifier failed on %s bno 0x%llx/0x%x\n"),
843 __func__, bp->b_ops->name,
844 (unsigned long long)xfs_buf_daddr(bp),
845 bp->b_length);
846 return bp->b_error;
847 }
848 }
849
850 if (!(bp->b_flags & LIBXFS_B_DISCONTIG)) {
851 bp->b_error = __write_buf(fd, bp->b_addr, BBTOB(bp->b_length),
852 LIBXFS_BBTOOFF64(xfs_buf_daddr(bp)),
853 bp->b_flags);
854 } else {
855 int i;
856 void *buf = bp->b_addr;
857
858 for (i = 0; i < bp->b_nmaps; i++) {
859 off64_t offset = LIBXFS_BBTOOFF64(bp->b_maps[i].bm_bn);
860 int len = BBTOB(bp->b_maps[i].bm_len);
861
862 bp->b_error = __write_buf(fd, buf, len, offset,
863 bp->b_flags);
864 if (bp->b_error)
865 break;
866 buf += len;
867 }
868 }
869
870 if (bp->b_error) {
871 fprintf(stderr,
872 _("%s: write failed on %s bno 0x%llx/0x%x, err=%d\n"),
873 __func__, bp->b_ops ? bp->b_ops->name : "(unknown)",
874 (unsigned long long)xfs_buf_daddr(bp),
875 bp->b_length, -bp->b_error);
876 } else {
877 bp->b_flags |= LIBXFS_B_UPTODATE;
878 bp->b_flags &= ~(LIBXFS_B_DIRTY | LIBXFS_B_UNCHECKED);
879 xfs_buftarg_trip_write(bp->b_target);
880 }
881 return bp->b_error;
882 }
883
884 /*
885 * Mark a buffer dirty. The dirty data will be written out when the cache
886 * is flushed (or at release time if the buffer is uncached).
887 */
888 void
889 libxfs_buf_mark_dirty(
890 struct xfs_buf *bp)
891 {
892 /*
893 * Clear any error hanging over from reading the buffer. This prevents
894 * subsequent reads after this write from seeing stale errors.
895 */
896 bp->b_error = 0;
897 bp->b_flags &= ~LIBXFS_B_STALE;
898 bp->b_flags |= LIBXFS_B_DIRTY;
899 }
900
901 /* Prepare a buffer to be sent to the MRU list. */
902 static inline void
903 libxfs_buf_prepare_mru(
904 struct xfs_buf *bp)
905 {
906 if (bp->b_pag)
907 xfs_perag_put(bp->b_pag);
908 bp->b_pag = NULL;
909
910 if (!(bp->b_flags & LIBXFS_B_DIRTY))
911 return;
912
913 /* Complain about (and remember) dropping dirty buffers. */
914 fprintf(stderr, _("%s: Releasing dirty buffer to free list!\n"),
915 progname);
916
917 if (bp->b_error == -EFSCORRUPTED)
918 bp->b_target->flags |= XFS_BUFTARG_CORRUPT_WRITE;
919 bp->b_target->flags |= XFS_BUFTARG_LOST_WRITE;
920 }
921
922 static void
923 libxfs_brelse(
924 struct cache_node *node)
925 {
926 struct xfs_buf *bp = container_of(node, struct xfs_buf,
927 b_node);
928
929 if (!bp)
930 return;
931 libxfs_buf_prepare_mru(bp);
932
933 pthread_mutex_lock(&xfs_buf_freelist.cm_mutex);
934 list_add(&bp->b_node.cn_mru, &xfs_buf_freelist.cm_list);
935 pthread_mutex_unlock(&xfs_buf_freelist.cm_mutex);
936 }
937
938 static unsigned int
939 libxfs_bulkrelse(
940 struct cache *cache,
941 struct list_head *list)
942 {
943 struct xfs_buf *bp;
944 int count = 0;
945
946 if (list_empty(list))
947 return 0 ;
948
949 list_for_each_entry(bp, list, b_node.cn_mru) {
950 libxfs_buf_prepare_mru(bp);
951 count++;
952 }
953
954 pthread_mutex_lock(&xfs_buf_freelist.cm_mutex);
955 list_splice(list, &xfs_buf_freelist.cm_list);
956 pthread_mutex_unlock(&xfs_buf_freelist.cm_mutex);
957
958 return count;
959 }
960
961 /*
962 * Free everything from the xfs_buf_freelist MRU, used at final teardown
963 */
964 void
965 libxfs_bcache_free(void)
966 {
967 struct list_head *cm_list;
968 struct xfs_buf *bp, *next;
969
970 cm_list = &xfs_buf_freelist.cm_list;
971 list_for_each_entry_safe(bp, next, cm_list, b_node.cn_mru) {
972 free(bp->b_addr);
973 if (bp->b_maps != &bp->__b_map)
974 free(bp->b_maps);
975 kmem_cache_free(xfs_buf_cache, bp);
976 }
977 }
978
979 /*
980 * When a buffer is marked dirty, the error is cleared. Hence if we are trying
981 * to flush a buffer prior to cache reclaim that has an error on it it means
982 * we've already tried to flush it and it failed. Prevent repeated corruption
983 * errors from being reported by skipping such buffers - when the corruption is
984 * fixed the buffer will be marked dirty again and we can write it again.
985 */
986 static int
987 libxfs_bflush(
988 struct cache_node *node)
989 {
990 struct xfs_buf *bp = container_of(node, struct xfs_buf,
991 b_node);
992
993 if (!bp->b_error && bp->b_flags & LIBXFS_B_DIRTY)
994 return libxfs_bwrite(bp);
995 return bp->b_error;
996 }
997
998 void
999 libxfs_bcache_purge(void)
1000 {
1001 cache_purge(libxfs_bcache);
1002 }
1003
1004 void
1005 libxfs_bcache_flush(void)
1006 {
1007 cache_flush(libxfs_bcache);
1008 }
1009
1010 int
1011 libxfs_bcache_overflowed(void)
1012 {
1013 return cache_overflowed(libxfs_bcache);
1014 }
1015
1016 struct cache_operations libxfs_bcache_operations = {
1017 .hash = libxfs_bhash,
1018 .alloc = libxfs_balloc,
1019 .flush = libxfs_bflush,
1020 .relse = libxfs_brelse,
1021 .compare = libxfs_bcompare,
1022 .bulkrelse = libxfs_bulkrelse
1023 };
1024
1025 /*
1026 * Verify an on-disk magic value against the magic value specified in the
1027 * verifier structure. The verifier magic is in disk byte order so the caller is
1028 * expected to pass the value directly from disk.
1029 */
1030 bool
1031 xfs_verify_magic(
1032 struct xfs_buf *bp,
1033 __be32 dmagic)
1034 {
1035 struct xfs_mount *mp = bp->b_mount;
1036 int idx;
1037
1038 idx = xfs_has_crc(mp);
1039 if (unlikely(WARN_ON(!bp->b_ops || !bp->b_ops->magic[idx])))
1040 return false;
1041 return dmagic == bp->b_ops->magic[idx];
1042 }
1043
1044 /*
1045 * Verify an on-disk magic value against the magic value specified in the
1046 * verifier structure. The verifier magic is in disk byte order so the caller is
1047 * expected to pass the value directly from disk.
1048 */
1049 bool
1050 xfs_verify_magic16(
1051 struct xfs_buf *bp,
1052 __be16 dmagic)
1053 {
1054 struct xfs_mount *mp = bp->b_mount;
1055 int idx;
1056
1057 idx = xfs_has_crc(mp);
1058 if (unlikely(WARN_ON(!bp->b_ops || !bp->b_ops->magic16[idx])))
1059 return false;
1060 return dmagic == bp->b_ops->magic16[idx];
1061 }
1062
1063 /*
1064 * Inode cache stubs.
1065 */
1066
1067 struct kmem_cache *xfs_inode_cache;
1068 extern struct kmem_cache *xfs_ili_cache;
1069
1070 int
1071 libxfs_iget(
1072 struct xfs_mount *mp,
1073 struct xfs_trans *tp,
1074 xfs_ino_t ino,
1075 uint lock_flags,
1076 struct xfs_inode **ipp)
1077 {
1078 struct xfs_inode *ip;
1079 struct xfs_buf *bp;
1080 int error = 0;
1081
1082 ip = kmem_cache_zalloc(xfs_inode_cache, 0);
1083 if (!ip)
1084 return -ENOMEM;
1085
1086 VFS_I(ip)->i_count = 1;
1087 ip->i_ino = ino;
1088 ip->i_mount = mp;
1089 ip->i_af.if_format = XFS_DINODE_FMT_EXTENTS;
1090 ip->i_df.if_present = 1;
1091 spin_lock_init(&VFS_I(ip)->i_lock);
1092
1093 error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, 0);
1094 if (error)
1095 goto out_destroy;
1096
1097 error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp);
1098 if (error)
1099 goto out_destroy;
1100
1101 error = xfs_inode_from_disk(ip,
1102 xfs_buf_offset(bp, ip->i_imap.im_boffset));
1103 if (!error)
1104 xfs_buf_set_ref(bp, XFS_INO_REF);
1105 xfs_trans_brelse(tp, bp);
1106
1107 if (error)
1108 goto out_destroy;
1109
1110 *ipp = ip;
1111 return 0;
1112
1113 out_destroy:
1114 kmem_cache_free(xfs_inode_cache, ip);
1115 *ipp = NULL;
1116 return error;
1117 }
1118
1119 static void
1120 libxfs_idestroy(xfs_inode_t *ip)
1121 {
1122 switch (VFS_I(ip)->i_mode & S_IFMT) {
1123 case S_IFREG:
1124 case S_IFDIR:
1125 case S_IFLNK:
1126 libxfs_idestroy_fork(&ip->i_df);
1127 break;
1128 }
1129 if (ip->i_af.if_present) {
1130 libxfs_idestroy_fork(&ip->i_af);
1131 libxfs_ifork_zap_attr(ip);
1132 }
1133 if (ip->i_cowfp) {
1134 libxfs_idestroy_fork(ip->i_cowfp);
1135 kmem_cache_free(xfs_ifork_cache, ip->i_cowfp);
1136 }
1137 }
1138
1139 void
1140 libxfs_irele(
1141 struct xfs_inode *ip)
1142 {
1143 VFS_I(ip)->i_count--;
1144
1145 if (VFS_I(ip)->i_count == 0) {
1146 ASSERT(ip->i_itemp == NULL);
1147 libxfs_idestroy(ip);
1148 kmem_cache_free(xfs_inode_cache, ip);
1149 }
1150 }
1151
1152 /*
1153 * Flush everything dirty in the kernel and disk write caches to stable media.
1154 * Returns 0 for success or a negative error code.
1155 */
1156 int
1157 libxfs_blkdev_issue_flush(
1158 struct xfs_buftarg *btp)
1159 {
1160 int fd, ret;
1161
1162 if (btp->bt_bdev == 0)
1163 return 0;
1164
1165 fd = libxfs_device_to_fd(btp->bt_bdev);
1166 ret = platform_flush_device(fd, btp->bt_bdev);
1167 return ret ? -errno : 0;
1168 }
1169
1170 /*
1171 * Write out a buffer list synchronously.
1172 *
1173 * This will take the @buffer_list, write all buffers out and wait for I/O
1174 * completion on all of the buffers. @buffer_list is consumed by the function,
1175 * so callers must have some other way of tracking buffers if they require such
1176 * functionality.
1177 */
1178 int
1179 xfs_buf_delwri_submit(
1180 struct list_head *buffer_list)
1181 {
1182 struct xfs_buf *bp, *n;
1183 int error = 0, error2;
1184
1185 list_for_each_entry_safe(bp, n, buffer_list, b_list) {
1186 list_del_init(&bp->b_list);
1187 error2 = libxfs_bwrite(bp);
1188 if (!error)
1189 error = error2;
1190 libxfs_buf_relse(bp);
1191 }
1192
1193 return error;
1194 }
1195
1196 /*
1197 * Cancel a delayed write list.
1198 *
1199 * Remove each buffer from the list, clear the delwri queue flag and drop the
1200 * associated buffer reference.
1201 */
1202 void
1203 xfs_buf_delwri_cancel(
1204 struct list_head *list)
1205 {
1206 struct xfs_buf *bp;
1207
1208 while (!list_empty(list)) {
1209 bp = list_first_entry(list, struct xfs_buf, b_list);
1210
1211 list_del_init(&bp->b_list);
1212 libxfs_buf_relse(bp);
1213 }
1214 }
1215
1216 /*
1217 * Format the log. The caller provides either a buftarg which is used to access
1218 * the log via buffers or a direct pointer to a buffer that encapsulates the
1219 * entire log.
1220 */
1221 int
1222 libxfs_log_clear(
1223 struct xfs_buftarg *btp,
1224 char *dptr,
1225 xfs_daddr_t start,
1226 uint length, /* basic blocks */
1227 uuid_t *fs_uuid,
1228 int version,
1229 int sunit, /* bytes */
1230 int fmt,
1231 int cycle,
1232 bool max)
1233 {
1234 struct xfs_buf *bp = NULL;
1235 int len;
1236 xfs_lsn_t lsn;
1237 xfs_lsn_t tail_lsn;
1238 xfs_daddr_t blk;
1239 xfs_daddr_t end_blk;
1240 char *ptr;
1241
1242 if (((btp && dptr) || (!btp && !dptr)) ||
1243 (btp && !btp->bt_bdev) || !fs_uuid)
1244 return -EINVAL;
1245
1246 /* first zero the log */
1247 if (btp)
1248 libxfs_device_zero(btp, start, length);
1249 else
1250 memset(dptr, 0, BBTOB(length));
1251
1252 /*
1253 * Initialize the log record length and LSNs. XLOG_INIT_CYCLE is a
1254 * special reset case where we only write a single record where the lsn
1255 * and tail_lsn match. Otherwise, the record lsn starts at block 0 of
1256 * the specified cycle and points tail_lsn at the last record of the
1257 * previous cycle.
1258 */
1259 len = ((version == 2) && sunit) ? BTOBB(sunit) : 2;
1260 len = max(len, 2);
1261 lsn = xlog_assign_lsn(cycle, 0);
1262 if (cycle == XLOG_INIT_CYCLE)
1263 tail_lsn = lsn;
1264 else
1265 tail_lsn = xlog_assign_lsn(cycle - 1, length - len);
1266
1267 /* write out the first log record */
1268 ptr = dptr;
1269 if (btp) {
1270 bp = libxfs_getbufr_uncached(btp, start, len);
1271 ptr = bp->b_addr;
1272 }
1273 libxfs_log_header(ptr, fs_uuid, version, sunit, fmt, lsn, tail_lsn,
1274 next, bp);
1275 if (bp) {
1276 libxfs_buf_mark_dirty(bp);
1277 libxfs_buf_relse(bp);
1278 }
1279
1280 /*
1281 * There's nothing else to do if this is a log reset. The kernel detects
1282 * the rest of the log is zeroed and starts at cycle 1.
1283 */
1284 if (cycle == XLOG_INIT_CYCLE)
1285 return 0;
1286
1287 /*
1288 * Bump the record size for a full log format if the caller allows it.
1289 * This is primarily for performance reasons and most callers don't care
1290 * about record size since the log is clean after we're done.
1291 */
1292 if (max)
1293 len = BTOBB(BDSTRAT_SIZE);
1294
1295 /*
1296 * Otherwise, fill everything beyond the initial record with records of
1297 * the previous cycle so the kernel head/tail detection works correctly.
1298 *
1299 * We don't particularly care about the record size or content here.
1300 * It's only important that the headers are in place such that the
1301 * kernel finds 1.) a clean log and 2.) the correct current cycle value.
1302 * Therefore, bump up the record size to the max to use larger I/Os and
1303 * improve performance.
1304 */
1305 cycle--;
1306 blk = start + len;
1307 if (dptr)
1308 dptr += BBTOB(len);
1309 end_blk = start + length;
1310
1311 len = min(end_blk - blk, len);
1312 while (blk < end_blk) {
1313 lsn = xlog_assign_lsn(cycle, blk - start);
1314 tail_lsn = xlog_assign_lsn(cycle, blk - start - len);
1315
1316 ptr = dptr;
1317 if (btp) {
1318 bp = libxfs_getbufr_uncached(btp, blk, len);
1319 ptr = bp->b_addr;
1320 }
1321 /*
1322 * Note: pass the full buffer length as the sunit to initialize
1323 * the entire buffer.
1324 */
1325 libxfs_log_header(ptr, fs_uuid, version, BBTOB(len), fmt, lsn,
1326 tail_lsn, next, bp);
1327 if (bp) {
1328 libxfs_buf_mark_dirty(bp);
1329 libxfs_buf_relse(bp);
1330 }
1331
1332 blk += len;
1333 if (dptr)
1334 dptr += BBTOB(len);
1335 len = min(end_blk - blk, len);
1336 }
1337
1338 return 0;
1339 }
1340
1341 int
1342 libxfs_log_header(
1343 char *caddr,
1344 uuid_t *fs_uuid,
1345 int version,
1346 int sunit,
1347 int fmt,
1348 xfs_lsn_t lsn,
1349 xfs_lsn_t tail_lsn,
1350 libxfs_get_block_t *nextfunc,
1351 void *private)
1352 {
1353 xlog_rec_header_t *head = (xlog_rec_header_t *)caddr;
1354 char *p = caddr;
1355 __be32 cycle_lsn;
1356 int i, len;
1357 int hdrs = 1;
1358
1359 if (lsn == NULLCOMMITLSN)
1360 lsn = xlog_assign_lsn(XLOG_INIT_CYCLE, 0);
1361 if (tail_lsn == NULLCOMMITLSN)
1362 tail_lsn = lsn;
1363
1364 len = ((version == 2) && sunit) ? BTOBB(sunit) : 1;
1365
1366 memset(p, 0, BBSIZE);
1367 head->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM);
1368 head->h_cycle = cpu_to_be32(CYCLE_LSN(lsn));
1369 head->h_version = cpu_to_be32(version);
1370 head->h_crc = cpu_to_le32(0);
1371 head->h_prev_block = cpu_to_be32(-1);
1372 head->h_num_logops = cpu_to_be32(1);
1373 head->h_fmt = cpu_to_be32(fmt);
1374 head->h_size = cpu_to_be32(max(sunit, XLOG_BIG_RECORD_BSIZE));
1375
1376 head->h_lsn = cpu_to_be64(lsn);
1377 head->h_tail_lsn = cpu_to_be64(tail_lsn);
1378
1379 memcpy(&head->h_fs_uuid, fs_uuid, sizeof(uuid_t));
1380
1381 /*
1382 * The kernel expects to see either a log record header magic value or
1383 * the LSN cycle at the top of every log block. The first word of each
1384 * non-header block is copied to the record headers and replaced with
1385 * the cycle value (see xlog_[un]pack_data() and xlog_get_cycle() for
1386 * details).
1387 *
1388 * Even though we only ever write an unmount record (one block), we
1389 * support writing log records up to the max log buffer size of 256k to
1390 * improve log format performance. This means a record can require up
1391 * to 8 headers (1 rec. header + 7 ext. headers) for the packed cycle
1392 * data (each header supports 32k of data).
1393 */
1394 cycle_lsn = CYCLE_LSN_DISK(head->h_lsn);
1395 if (version == 2 && sunit > XLOG_HEADER_CYCLE_SIZE) {
1396 hdrs = sunit / XLOG_HEADER_CYCLE_SIZE;
1397 if (sunit % XLOG_HEADER_CYCLE_SIZE)
1398 hdrs++;
1399 }
1400
1401 /*
1402 * A fixed number of extended headers is expected based on h_size. If
1403 * required, format those now so the unmount record is located
1404 * correctly.
1405 *
1406 * Since we only write an unmount record, we only need one h_cycle_data
1407 * entry for the unmount record block. The subsequent record data
1408 * blocks are zeroed, which means we can stamp them directly with the
1409 * cycle and zero the rest of the cycle data in the extended headers.
1410 */
1411 if (hdrs > 1) {
1412 for (i = 1; i < hdrs; i++) {
1413 p = nextfunc(p, BBSIZE, private);
1414 memset(p, 0, BBSIZE);
1415 /* xlog_rec_ext_header.xh_cycle */
1416 *(__be32 *)p = cycle_lsn;
1417 }
1418 }
1419
1420 /*
1421 * The total length is the max of the stripe unit or 2 basic block
1422 * minimum (1 hdr blk + 1 data blk). The record length is the total
1423 * minus however many header blocks are required.
1424 */
1425 head->h_len = cpu_to_be32(max(BBTOB(2), sunit) - hdrs * BBSIZE);
1426
1427 /*
1428 * Write out the unmount record, pack the first word into the record
1429 * header and stamp the block with the cycle.
1430 */
1431 p = nextfunc(p, BBSIZE, private);
1432 unmount_record(p);
1433
1434 head->h_cycle_data[0] = *(__be32 *)p;
1435 *(__be32 *)p = cycle_lsn;
1436
1437 /*
1438 * Finally, zero all remaining blocks in the record and stamp each with
1439 * the cycle. We don't need to pack any of these blocks because the
1440 * cycle data in the headers has already been zeroed.
1441 */
1442 len = max(len, hdrs + 1);
1443 for (i = hdrs + 1; i < len; i++) {
1444 p = nextfunc(p, BBSIZE, private);
1445 memset(p, 0, BBSIZE);
1446 *(__be32 *)p = cycle_lsn;
1447 }
1448
1449 return BBTOB(len);
1450 }
1451
1452 void
1453 libxfs_buf_set_priority(
1454 struct xfs_buf *bp,
1455 int priority)
1456 {
1457 cache_node_set_priority(libxfs_bcache, &bp->b_node, priority);
1458 }
1459
1460 int
1461 libxfs_buf_priority(
1462 struct xfs_buf *bp)
1463 {
1464 return cache_node_get_priority(&bp->b_node);
1465 }
1466
1467 /*
1468 * Log a message about and stale a buffer that a caller has decided is corrupt.
1469 *
1470 * This function should be called for the kinds of metadata corruption that
1471 * cannot be detect from a verifier, such as incorrect inter-block relationship
1472 * data. Do /not/ call this function from a verifier function.
1473 *
1474 * The buffer must be XBF_DONE prior to the call. Afterwards, the buffer will
1475 * be marked stale, but b_error will not be set. The caller is responsible for
1476 * releasing the buffer or fixing it.
1477 */
1478 void
1479 __xfs_buf_mark_corrupt(
1480 struct xfs_buf *bp,
1481 xfs_failaddr_t fa)
1482 {
1483 ASSERT(bp->b_flags & XBF_DONE);
1484
1485 xfs_buf_corruption_error(bp, fa);
1486 xfs_buf_stale(bp);
1487 }