]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blob - libxfs/rdwr.c
xfsprogs: misc static function warning fixes
[thirdparty/xfsprogs-dev.git] / libxfs / rdwr.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
4 * All Rights Reserved.
5 */
6
7
8 #include "libxfs_priv.h"
9 #include "init.h"
10 #include "xfs_fs.h"
11 #include "xfs_shared.h"
12 #include "xfs_format.h"
13 #include "xfs_log_format.h"
14 #include "xfs_trans_resv.h"
15 #include "xfs_mount.h"
16 #include "xfs_inode_buf.h"
17 #include "xfs_inode_fork.h"
18 #include "xfs_inode.h"
19 #include "xfs_trans.h"
20
21 #include "libxfs.h" /* for LIBXFS_EXIT_ON_FAILURE */
22
23 /*
24 * Important design/architecture note:
25 *
26 * The userspace code that uses the buffer cache is much less constrained than
27 * the kernel code. The userspace code is pretty nasty in places, especially
28 * when it comes to buffer error handling. Very little of the userspace code
29 * outside libxfs clears bp->b_error - very little code even checks it - so the
30 * libxfs code is tripping on stale errors left by the userspace code.
31 *
32 * We can't clear errors or zero buffer contents in libxfs_getbuf-* like we do
33 * in the kernel, because those functions are used by the libxfs_readbuf_*
34 * functions and hence need to leave the buffers unchanged on cache hits. This
35 * is actually the only way to gather a write error from a libxfs_writebuf()
36 * call - you need to get the buffer again so you can check bp->b_error field -
37 * assuming that the buffer is still in the cache when you check, that is.
38 *
39 * This is very different to the kernel code which does not release buffers on a
40 * write so we can wait on IO and check errors. The kernel buffer cache also
41 * guarantees a buffer of a known initial state from xfs_buf_get() even on a
42 * cache hit.
43 *
44 * IOWs, userspace is behaving quite differently to the kernel and as a result
45 * it leaks errors from reads, invalidations and writes through
46 * libxfs_getbuf/libxfs_readbuf.
47 *
48 * The result of this is that until the userspace code outside libxfs is cleaned
49 * up, functions that release buffers from userspace control (i.e
50 * libxfs_writebuf/libxfs_putbuf) need to zero bp->b_error to prevent
51 * propagation of stale errors into future buffer operations.
52 */
53
54 #define BDSTRAT_SIZE (256 * 1024)
55
56 #define IO_BCOMPARE_CHECK
57
58 /* XXX: (dgc) Propagate errors, only exit if fail-on-error flag set */
59 int
60 libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len)
61 {
62 xfs_off_t start_offset, end_offset, offset;
63 ssize_t zsize, bytes;
64 char *z;
65 int fd;
66
67 zsize = min(BDSTRAT_SIZE, BBTOB(len));
68 if ((z = memalign(libxfs_device_alignment(), zsize)) == NULL) {
69 fprintf(stderr,
70 _("%s: %s can't memalign %d bytes: %s\n"),
71 progname, __FUNCTION__, (int)zsize, strerror(errno));
72 exit(1);
73 }
74 memset(z, 0, zsize);
75
76 fd = libxfs_device_to_fd(btp->dev);
77 start_offset = LIBXFS_BBTOOFF64(start);
78
79 if ((lseek(fd, start_offset, SEEK_SET)) < 0) {
80 fprintf(stderr, _("%s: %s seek to offset %llu failed: %s\n"),
81 progname, __FUNCTION__,
82 (unsigned long long)start_offset, strerror(errno));
83 exit(1);
84 }
85
86 end_offset = LIBXFS_BBTOOFF64(start + len) - start_offset;
87 for (offset = 0; offset < end_offset; ) {
88 bytes = min((ssize_t)(end_offset - offset), zsize);
89 if ((bytes = write(fd, z, bytes)) < 0) {
90 fprintf(stderr, _("%s: %s write failed: %s\n"),
91 progname, __FUNCTION__, strerror(errno));
92 exit(1);
93 } else if (bytes == 0) {
94 fprintf(stderr, _("%s: %s not progressing?\n"),
95 progname, __FUNCTION__);
96 exit(1);
97 }
98 offset += bytes;
99 }
100 free(z);
101 return 0;
102 }
103
104 static void unmount_record(void *p)
105 {
106 xlog_op_header_t *op = (xlog_op_header_t *)p;
107 /* the data section must be 32 bit size aligned */
108 struct {
109 uint16_t magic;
110 uint16_t pad1;
111 uint32_t pad2; /* may as well make it 64 bits */
112 } magic = { XLOG_UNMOUNT_TYPE, 0, 0 };
113
114 memset(p, 0, BBSIZE);
115 /* dummy tid to mark this as written from userspace */
116 op->oh_tid = cpu_to_be32(0xb0c0d0d0);
117 op->oh_len = cpu_to_be32(sizeof(magic));
118 op->oh_clientid = XFS_LOG;
119 op->oh_flags = XLOG_UNMOUNT_TRANS;
120 op->oh_res2 = 0;
121
122 /* and the data for this op */
123 memcpy((char *)p + sizeof(xlog_op_header_t), &magic, sizeof(magic));
124 }
125
126 static char *next(
127 char *ptr,
128 int offset,
129 void *private)
130 {
131 struct xfs_buf *buf = (struct xfs_buf *)private;
132
133 if (buf &&
134 (buf->b_bcount < (int)(ptr - (char *)buf->b_addr) + offset))
135 abort();
136
137 return ptr + offset;
138 }
139
140 /*
141 * Format the log. The caller provides either a buftarg which is used to access
142 * the log via buffers or a direct pointer to a buffer that encapsulates the
143 * entire log.
144 */
145 int
146 libxfs_log_clear(
147 struct xfs_buftarg *btp,
148 char *dptr,
149 xfs_daddr_t start,
150 uint length, /* basic blocks */
151 uuid_t *fs_uuid,
152 int version,
153 int sunit, /* bytes */
154 int fmt,
155 int cycle,
156 bool max)
157 {
158 struct xfs_buf *bp = NULL;
159 int len;
160 xfs_lsn_t lsn;
161 xfs_lsn_t tail_lsn;
162 xfs_daddr_t blk;
163 xfs_daddr_t end_blk;
164 char *ptr;
165
166 if (((btp && dptr) || (!btp && !dptr)) ||
167 (btp && !btp->dev) || !fs_uuid)
168 return -EINVAL;
169
170 /* first zero the log */
171 if (btp)
172 libxfs_device_zero(btp, start, length);
173 else
174 memset(dptr, 0, BBTOB(length));
175
176 /*
177 * Initialize the log record length and LSNs. XLOG_INIT_CYCLE is a
178 * special reset case where we only write a single record where the lsn
179 * and tail_lsn match. Otherwise, the record lsn starts at block 0 of
180 * the specified cycle and points tail_lsn at the last record of the
181 * previous cycle.
182 */
183 len = ((version == 2) && sunit) ? BTOBB(sunit) : 2;
184 len = max(len, 2);
185 lsn = xlog_assign_lsn(cycle, 0);
186 if (cycle == XLOG_INIT_CYCLE)
187 tail_lsn = lsn;
188 else
189 tail_lsn = xlog_assign_lsn(cycle - 1, length - len);
190
191 /* write out the first log record */
192 ptr = dptr;
193 if (btp) {
194 bp = libxfs_getbufr(btp, start, len);
195 ptr = bp->b_addr;
196 }
197 libxfs_log_header(ptr, fs_uuid, version, sunit, fmt, lsn, tail_lsn,
198 next, bp);
199 if (bp) {
200 bp->b_flags |= LIBXFS_B_DIRTY;
201 libxfs_putbufr(bp);
202 }
203
204 /*
205 * There's nothing else to do if this is a log reset. The kernel detects
206 * the rest of the log is zeroed and starts at cycle 1.
207 */
208 if (cycle == XLOG_INIT_CYCLE)
209 return 0;
210
211 /*
212 * Bump the record size for a full log format if the caller allows it.
213 * This is primarily for performance reasons and most callers don't care
214 * about record size since the log is clean after we're done.
215 */
216 if (max)
217 len = BTOBB(BDSTRAT_SIZE);
218
219 /*
220 * Otherwise, fill everything beyond the initial record with records of
221 * the previous cycle so the kernel head/tail detection works correctly.
222 *
223 * We don't particularly care about the record size or content here.
224 * It's only important that the headers are in place such that the
225 * kernel finds 1.) a clean log and 2.) the correct current cycle value.
226 * Therefore, bump up the record size to the max to use larger I/Os and
227 * improve performance.
228 */
229 cycle--;
230 blk = start + len;
231 if (dptr)
232 dptr += BBTOB(len);
233 end_blk = start + length;
234
235 len = min(end_blk - blk, len);
236 while (blk < end_blk) {
237 lsn = xlog_assign_lsn(cycle, blk - start);
238 tail_lsn = xlog_assign_lsn(cycle, blk - start - len);
239
240 ptr = dptr;
241 if (btp) {
242 bp = libxfs_getbufr(btp, blk, len);
243 ptr = bp->b_addr;
244 }
245 /*
246 * Note: pass the full buffer length as the sunit to initialize
247 * the entire buffer.
248 */
249 libxfs_log_header(ptr, fs_uuid, version, BBTOB(len), fmt, lsn,
250 tail_lsn, next, bp);
251 if (bp) {
252 bp->b_flags |= LIBXFS_B_DIRTY;
253 libxfs_putbufr(bp);
254 }
255
256 blk += len;
257 if (dptr)
258 dptr += BBTOB(len);
259 len = min(end_blk - blk, len);
260 }
261
262 return 0;
263 }
264
265 int
266 libxfs_log_header(
267 char *caddr,
268 uuid_t *fs_uuid,
269 int version,
270 int sunit,
271 int fmt,
272 xfs_lsn_t lsn,
273 xfs_lsn_t tail_lsn,
274 libxfs_get_block_t *nextfunc,
275 void *private)
276 {
277 xlog_rec_header_t *head = (xlog_rec_header_t *)caddr;
278 char *p = caddr;
279 __be32 cycle_lsn;
280 int i, len;
281 int hdrs = 1;
282
283 if (lsn == NULLCOMMITLSN)
284 lsn = xlog_assign_lsn(XLOG_INIT_CYCLE, 0);
285 if (tail_lsn == NULLCOMMITLSN)
286 tail_lsn = lsn;
287
288 len = ((version == 2) && sunit) ? BTOBB(sunit) : 1;
289
290 memset(p, 0, BBSIZE);
291 head->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM);
292 head->h_cycle = cpu_to_be32(CYCLE_LSN(lsn));
293 head->h_version = cpu_to_be32(version);
294 head->h_crc = cpu_to_le32(0);
295 head->h_prev_block = cpu_to_be32(-1);
296 head->h_num_logops = cpu_to_be32(1);
297 head->h_fmt = cpu_to_be32(fmt);
298 head->h_size = cpu_to_be32(max(sunit, XLOG_BIG_RECORD_BSIZE));
299
300 head->h_lsn = cpu_to_be64(lsn);
301 head->h_tail_lsn = cpu_to_be64(tail_lsn);
302
303 memcpy(&head->h_fs_uuid, fs_uuid, sizeof(uuid_t));
304
305 /*
306 * The kernel expects to see either a log record header magic value or
307 * the LSN cycle at the top of every log block. The first word of each
308 * non-header block is copied to the record headers and replaced with
309 * the cycle value (see xlog_[un]pack_data() and xlog_get_cycle() for
310 * details).
311 *
312 * Even though we only ever write an unmount record (one block), we
313 * support writing log records up to the max log buffer size of 256k to
314 * improve log format performance. This means a record can require up
315 * to 8 headers (1 rec. header + 7 ext. headers) for the packed cycle
316 * data (each header supports 32k of data).
317 */
318 cycle_lsn = CYCLE_LSN_DISK(head->h_lsn);
319 if (version == 2 && sunit > XLOG_HEADER_CYCLE_SIZE) {
320 hdrs = sunit / XLOG_HEADER_CYCLE_SIZE;
321 if (sunit % XLOG_HEADER_CYCLE_SIZE)
322 hdrs++;
323 }
324
325 /*
326 * A fixed number of extended headers is expected based on h_size. If
327 * required, format those now so the unmount record is located
328 * correctly.
329 *
330 * Since we only write an unmount record, we only need one h_cycle_data
331 * entry for the unmount record block. The subsequent record data
332 * blocks are zeroed, which means we can stamp them directly with the
333 * cycle and zero the rest of the cycle data in the extended headers.
334 */
335 if (hdrs > 1) {
336 for (i = 1; i < hdrs; i++) {
337 p = nextfunc(p, BBSIZE, private);
338 memset(p, 0, BBSIZE);
339 /* xlog_rec_ext_header.xh_cycle */
340 *(__be32 *)p = cycle_lsn;
341 }
342 }
343
344 /*
345 * The total length is the max of the stripe unit or 2 basic block
346 * minimum (1 hdr blk + 1 data blk). The record length is the total
347 * minus however many header blocks are required.
348 */
349 head->h_len = cpu_to_be32(max(BBTOB(2), sunit) - hdrs * BBSIZE);
350
351 /*
352 * Write out the unmount record, pack the first word into the record
353 * header and stamp the block with the cycle.
354 */
355 p = nextfunc(p, BBSIZE, private);
356 unmount_record(p);
357
358 head->h_cycle_data[0] = *(__be32 *)p;
359 *(__be32 *)p = cycle_lsn;
360
361 /*
362 * Finally, zero all remaining blocks in the record and stamp each with
363 * the cycle. We don't need to pack any of these blocks because the
364 * cycle data in the headers has already been zeroed.
365 */
366 len = max(len, hdrs + 1);
367 for (i = hdrs + 1; i < len; i++) {
368 p = nextfunc(p, BBSIZE, private);
369 memset(p, 0, BBSIZE);
370 *(__be32 *)p = cycle_lsn;
371 }
372
373 return BBTOB(len);
374 }
375
376 /*
377 * Simple I/O (buffer cache) interface
378 */
379
380
381 #ifdef XFS_BUF_TRACING
382
383 #undef libxfs_readbuf
384 #undef libxfs_readbuf_map
385 #undef libxfs_writebuf
386 #undef libxfs_getbuf
387 #undef libxfs_getbuf_map
388 #undef libxfs_getbuf_flags
389 #undef libxfs_putbuf
390
391 xfs_buf_t *libxfs_readbuf(struct xfs_buftarg *, xfs_daddr_t, int, int,
392 const struct xfs_buf_ops *);
393 xfs_buf_t *libxfs_readbuf_map(struct xfs_buftarg *, struct xfs_buf_map *,
394 int, int, const struct xfs_buf_ops *);
395 int libxfs_writebuf(xfs_buf_t *, int);
396 xfs_buf_t *libxfs_getbuf(struct xfs_buftarg *, xfs_daddr_t, int);
397 xfs_buf_t *libxfs_getbuf_map(struct xfs_buftarg *, struct xfs_buf_map *,
398 int, int);
399 xfs_buf_t *libxfs_getbuf_flags(struct xfs_buftarg *, xfs_daddr_t, int,
400 unsigned int);
401 void libxfs_putbuf (xfs_buf_t *);
402
403 #define __add_trace(bp, func, file, line) \
404 do { \
405 if (bp) { \
406 (bp)->b_func = (func); \
407 (bp)->b_file = (file); \
408 (bp)->b_line = (line); \
409 } \
410 } while (0)
411
412 xfs_buf_t *
413 libxfs_trace_readbuf(const char *func, const char *file, int line,
414 struct xfs_buftarg *btp, xfs_daddr_t blkno, int len, int flags,
415 const struct xfs_buf_ops *ops)
416 {
417 xfs_buf_t *bp = libxfs_readbuf(btp, blkno, len, flags, ops);
418 __add_trace(bp, func, file, line);
419 return bp;
420 }
421
422 xfs_buf_t *
423 libxfs_trace_readbuf_map(const char *func, const char *file, int line,
424 struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps, int flags,
425 const struct xfs_buf_ops *ops)
426 {
427 xfs_buf_t *bp = libxfs_readbuf_map(btp, map, nmaps, flags, ops);
428 __add_trace(bp, func, file, line);
429 return bp;
430 }
431
432 int
433 libxfs_trace_writebuf(const char *func, const char *file, int line, xfs_buf_t *bp, int flags)
434 {
435 __add_trace(bp, func, file, line);
436 return libxfs_writebuf(bp, flags);
437 }
438
439 xfs_buf_t *
440 libxfs_trace_getbuf(const char *func, const char *file, int line,
441 struct xfs_buftarg *btp, xfs_daddr_t blkno, int len)
442 {
443 xfs_buf_t *bp = libxfs_getbuf(btp, blkno, len);
444 __add_trace(bp, func, file, line);
445 return bp;
446 }
447
448 xfs_buf_t *
449 libxfs_trace_getbuf_map(const char *func, const char *file, int line,
450 struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps,
451 int flags)
452 {
453 xfs_buf_t *bp = libxfs_getbuf_map(btp, map, nmaps, flags);
454 __add_trace(bp, func, file, line);
455 return bp;
456 }
457
458 xfs_buf_t *
459 libxfs_trace_getbuf_flags(const char *func, const char *file, int line,
460 struct xfs_buftarg *btp, xfs_daddr_t blkno, int len, unsigned int flags)
461 {
462 xfs_buf_t *bp = libxfs_getbuf_flags(btp, blkno, len, flags);
463 __add_trace(bp, func, file, line);
464 return bp;
465 }
466
467 void
468 libxfs_trace_putbuf(const char *func, const char *file, int line, xfs_buf_t *bp)
469 {
470 __add_trace(bp, func, file, line);
471 libxfs_putbuf(bp);
472 }
473
474
475 #endif
476
477
478 xfs_buf_t *
479 libxfs_getsb(xfs_mount_t *mp, int flags)
480 {
481 return libxfs_readbuf(mp->m_ddev_targp, XFS_SB_DADDR,
482 XFS_FSS_TO_BB(mp, 1), flags, &xfs_sb_buf_ops);
483 }
484
485 kmem_zone_t *xfs_buf_zone;
486
487 static struct cache_mru xfs_buf_freelist =
488 {{&xfs_buf_freelist.cm_list, &xfs_buf_freelist.cm_list},
489 0, PTHREAD_MUTEX_INITIALIZER };
490
491 /*
492 * The bufkey is used to pass the new buffer information to the cache object
493 * allocation routine. Because discontiguous buffers need to pass different
494 * information, we need fields to pass that information. However, because the
495 * blkno and bblen is needed for the initial cache entry lookup (i.e. for
496 * bcompare) the fact that the map/nmaps is non-null to switch to discontiguous
497 * buffer initialisation instead of a contiguous buffer.
498 */
499 struct xfs_bufkey {
500 struct xfs_buftarg *buftarg;
501 xfs_daddr_t blkno;
502 unsigned int bblen;
503 struct xfs_buf_map *map;
504 int nmaps;
505 };
506
507 /* 2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */
508 #define GOLDEN_RATIO_PRIME 0x9e37fffffffc0001UL
509 #define CACHE_LINE_SIZE 64
510 static unsigned int
511 libxfs_bhash(cache_key_t key, unsigned int hashsize, unsigned int hashshift)
512 {
513 uint64_t hashval = ((struct xfs_bufkey *)key)->blkno;
514 uint64_t tmp;
515
516 tmp = hashval ^ (GOLDEN_RATIO_PRIME + hashval) / CACHE_LINE_SIZE;
517 tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> hashshift);
518 return tmp % hashsize;
519 }
520
521 static int
522 libxfs_bcompare(struct cache_node *node, cache_key_t key)
523 {
524 struct xfs_buf *bp = (struct xfs_buf *)node;
525 struct xfs_bufkey *bkey = (struct xfs_bufkey *)key;
526
527 if (bp->b_target->dev == bkey->buftarg->dev &&
528 bp->b_bn == bkey->blkno) {
529 if (bp->b_bcount == BBTOB(bkey->bblen))
530 return CACHE_HIT;
531 #ifdef IO_BCOMPARE_CHECK
532 if (!(libxfs_bcache->c_flags & CACHE_MISCOMPARE_PURGE)) {
533 fprintf(stderr,
534 "%lx: Badness in key lookup (length)\n"
535 "bp=(bno 0x%llx, len %u bytes) key=(bno 0x%llx, len %u bytes)\n",
536 pthread_self(),
537 (unsigned long long)bp->b_bn, (int)bp->b_bcount,
538 (unsigned long long)bkey->blkno,
539 BBTOB(bkey->bblen));
540 }
541 #endif
542 return CACHE_PURGE;
543 }
544 return CACHE_MISS;
545 }
546
547 static void
548 __initbuf(xfs_buf_t *bp, struct xfs_buftarg *btp, xfs_daddr_t bno,
549 unsigned int bytes)
550 {
551 bp->b_flags = 0;
552 bp->b_bn = bno;
553 bp->b_bcount = bytes;
554 bp->b_length = BTOBB(bytes);
555 bp->b_target = btp;
556 bp->b_error = 0;
557 if (!bp->b_addr)
558 bp->b_addr = memalign(libxfs_device_alignment(), bytes);
559 if (!bp->b_addr) {
560 fprintf(stderr,
561 _("%s: %s can't memalign %u bytes: %s\n"),
562 progname, __FUNCTION__, bytes,
563 strerror(errno));
564 exit(1);
565 }
566 memset(bp->b_addr, 0, bytes);
567 #ifdef XFS_BUF_TRACING
568 list_head_init(&bp->b_lock_list);
569 #endif
570 pthread_mutex_init(&bp->b_lock, NULL);
571 bp->b_holder = 0;
572 bp->b_recur = 0;
573 bp->b_ops = NULL;
574
575 if (!bp->b_maps) {
576 bp->b_nmaps = 1;
577 bp->b_maps = &bp->__b_map;
578 bp->b_maps[0].bm_bn = bp->b_bn;
579 bp->b_maps[0].bm_len = bp->b_length;
580 }
581 }
582
583 static void
584 libxfs_initbuf(xfs_buf_t *bp, struct xfs_buftarg *btp, xfs_daddr_t bno,
585 unsigned int bytes)
586 {
587 __initbuf(bp, btp, bno, bytes);
588 }
589
590 static void
591 libxfs_initbuf_map(xfs_buf_t *bp, struct xfs_buftarg *btp,
592 struct xfs_buf_map *map, int nmaps)
593 {
594 unsigned int bytes = 0;
595 int i;
596
597 bytes = sizeof(struct xfs_buf_map) * nmaps;
598 bp->b_maps = malloc(bytes);
599 if (!bp->b_maps) {
600 fprintf(stderr,
601 _("%s: %s can't malloc %u bytes: %s\n"),
602 progname, __FUNCTION__, bytes,
603 strerror(errno));
604 exit(1);
605 }
606 bp->b_nmaps = nmaps;
607
608 bytes = 0;
609 for ( i = 0; i < nmaps; i++) {
610 bp->b_maps[i].bm_bn = map[i].bm_bn;
611 bp->b_maps[i].bm_len = map[i].bm_len;
612 bytes += BBTOB(map[i].bm_len);
613 }
614
615 __initbuf(bp, btp, map[0].bm_bn, bytes);
616 bp->b_flags |= LIBXFS_B_DISCONTIG;
617 }
618
619 static xfs_buf_t *
620 __libxfs_getbufr(int blen)
621 {
622 xfs_buf_t *bp;
623
624 /*
625 * first look for a buffer that can be used as-is,
626 * if one cannot be found, see if there is a buffer,
627 * and if so, free its buffer and set b_addr to NULL
628 * before calling libxfs_initbuf.
629 */
630 pthread_mutex_lock(&xfs_buf_freelist.cm_mutex);
631 if (!list_empty(&xfs_buf_freelist.cm_list)) {
632 list_for_each_entry(bp, &xfs_buf_freelist.cm_list, b_node.cn_mru) {
633 if (bp->b_bcount == blen) {
634 list_del_init(&bp->b_node.cn_mru);
635 break;
636 }
637 }
638 if (&bp->b_node.cn_mru == &xfs_buf_freelist.cm_list) {
639 bp = list_entry(xfs_buf_freelist.cm_list.next,
640 xfs_buf_t, b_node.cn_mru);
641 list_del_init(&bp->b_node.cn_mru);
642 free(bp->b_addr);
643 bp->b_addr = NULL;
644 if (bp->b_maps != &bp->__b_map)
645 free(bp->b_maps);
646 bp->b_maps = NULL;
647 }
648 } else
649 bp = kmem_zone_zalloc(xfs_buf_zone, 0);
650 pthread_mutex_unlock(&xfs_buf_freelist.cm_mutex);
651 bp->b_ops = NULL;
652 if (bp->b_flags & LIBXFS_B_DIRTY)
653 fprintf(stderr, "found dirty buffer (bulk) on free list!");
654
655 return bp;
656 }
657
658 xfs_buf_t *
659 libxfs_getbufr(struct xfs_buftarg *btp, xfs_daddr_t blkno, int bblen)
660 {
661 xfs_buf_t *bp;
662 int blen = BBTOB(bblen);
663
664 bp =__libxfs_getbufr(blen);
665 if (bp)
666 libxfs_initbuf(bp, btp, blkno, blen);
667 #ifdef IO_DEBUG
668 printf("%lx: %s: allocated %u bytes buffer, key=0x%llx(0x%llx), %p\n",
669 pthread_self(), __FUNCTION__, blen,
670 (long long)LIBXFS_BBTOOFF64(blkno), (long long)blkno, bp);
671 #endif
672
673 return bp;
674 }
675
676 static xfs_buf_t *
677 libxfs_getbufr_map(struct xfs_buftarg *btp, xfs_daddr_t blkno, int bblen,
678 struct xfs_buf_map *map, int nmaps)
679 {
680 xfs_buf_t *bp;
681 int blen = BBTOB(bblen);
682
683 if (!map || !nmaps) {
684 fprintf(stderr,
685 _("%s: %s invalid map %p or nmaps %d\n"),
686 progname, __FUNCTION__, map, nmaps);
687 exit(1);
688 }
689
690 if (blkno != map[0].bm_bn) {
691 fprintf(stderr,
692 _("%s: %s map blkno 0x%llx doesn't match key 0x%llx\n"),
693 progname, __FUNCTION__, (long long)map[0].bm_bn,
694 (long long)blkno);
695 exit(1);
696 }
697
698 bp =__libxfs_getbufr(blen);
699 if (bp)
700 libxfs_initbuf_map(bp, btp, map, nmaps);
701 #ifdef IO_DEBUG
702 printf("%lx: %s: allocated %u bytes buffer, key=0x%llx(0x%llx), %p\n",
703 pthread_self(), __FUNCTION__, blen,
704 (long long)LIBXFS_BBTOOFF64(blkno), (long long)blkno, bp);
705 #endif
706
707 return bp;
708 }
709
710 #ifdef XFS_BUF_TRACING
711 struct list_head lock_buf_list = {&lock_buf_list, &lock_buf_list};
712 int lock_buf_count = 0;
713 #endif
714
715 static struct xfs_buf *
716 __cache_lookup(struct xfs_bufkey *key, unsigned int flags)
717 {
718 struct xfs_buf *bp;
719
720 cache_node_get(libxfs_bcache, key, (struct cache_node **)&bp);
721 if (!bp)
722 return NULL;
723
724 if (use_xfs_buf_lock) {
725 int ret;
726
727 ret = pthread_mutex_trylock(&bp->b_lock);
728 if (ret) {
729 ASSERT(ret == EAGAIN);
730 if (flags & LIBXFS_GETBUF_TRYLOCK)
731 goto out_put;
732
733 if (pthread_equal(bp->b_holder, pthread_self())) {
734 fprintf(stderr,
735 _("Warning: recursive buffer locking at block %" PRIu64 " detected\n"),
736 key->blkno);
737 bp->b_recur++;
738 return bp;
739 } else {
740 pthread_mutex_lock(&bp->b_lock);
741 }
742 }
743
744 bp->b_holder = pthread_self();
745 }
746
747 cache_node_set_priority(libxfs_bcache, (struct cache_node *)bp,
748 cache_node_get_priority((struct cache_node *)bp) -
749 CACHE_PREFETCH_PRIORITY);
750 #ifdef XFS_BUF_TRACING
751 pthread_mutex_lock(&libxfs_bcache->c_mutex);
752 lock_buf_count++;
753 list_add(&bp->b_lock_list, &lock_buf_list);
754 pthread_mutex_unlock(&libxfs_bcache->c_mutex);
755 #endif
756 #ifdef IO_DEBUG
757 printf("%lx %s: hit buffer %p for bno = 0x%llx/0x%llx\n",
758 pthread_self(), __FUNCTION__,
759 bp, bp->b_bn, (long long)LIBXFS_BBTOOFF64(key->blkno));
760 #endif
761
762 return bp;
763 out_put:
764 cache_node_put(libxfs_bcache, (struct cache_node *)bp);
765 return NULL;
766 }
767
768 struct xfs_buf *
769 libxfs_getbuf_flags(struct xfs_buftarg *btp, xfs_daddr_t blkno, int len,
770 unsigned int flags)
771 {
772 struct xfs_bufkey key = {NULL};
773
774 key.buftarg = btp;
775 key.blkno = blkno;
776 key.bblen = len;
777
778 return __cache_lookup(&key, flags);
779 }
780
781 /*
782 * Clean the buffer flags for libxfs_getbuf*(), which wants to return
783 * an unused buffer with clean state. This prevents CRC errors on a
784 * re-read of a corrupt block that was prefetched and freed. This
785 * can happen with a massively corrupt directory that is discarded,
786 * but whose blocks are then recycled into expanding lost+found.
787 *
788 * Note however that if the buffer's dirty (prefetch calls getbuf)
789 * we'll leave the state alone because we don't want to discard blocks
790 * that have been fixed.
791 */
792 static void
793 reset_buf_state(
794 struct xfs_buf *bp)
795 {
796 if (bp && !(bp->b_flags & LIBXFS_B_DIRTY))
797 bp->b_flags &= ~(LIBXFS_B_UNCHECKED | LIBXFS_B_STALE |
798 LIBXFS_B_UPTODATE);
799 }
800
801 struct xfs_buf *
802 libxfs_getbuf(struct xfs_buftarg *btp, xfs_daddr_t blkno, int len)
803 {
804 struct xfs_buf *bp;
805
806 bp = libxfs_getbuf_flags(btp, blkno, len, 0);
807 reset_buf_state(bp);
808 return bp;
809 }
810
811 static struct xfs_buf *
812 __libxfs_getbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map,
813 int nmaps, int flags)
814 {
815 struct xfs_bufkey key = {NULL};
816 int i;
817
818 if (nmaps == 1)
819 return libxfs_getbuf_flags(btp, map[0].bm_bn, map[0].bm_len,
820 flags);
821
822 key.buftarg = btp;
823 key.blkno = map[0].bm_bn;
824 for (i = 0; i < nmaps; i++) {
825 key.bblen += map[i].bm_len;
826 }
827 key.map = map;
828 key.nmaps = nmaps;
829
830 return __cache_lookup(&key, flags);
831 }
832
833 struct xfs_buf *
834 libxfs_getbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map,
835 int nmaps, int flags)
836 {
837 struct xfs_buf *bp;
838
839 bp = __libxfs_getbuf_map(btp, map, nmaps, flags);
840 reset_buf_state(bp);
841 return bp;
842 }
843
844 void
845 libxfs_putbuf(xfs_buf_t *bp)
846 {
847 /*
848 * ensure that any errors on this use of the buffer don't carry
849 * over to the next user.
850 */
851 bp->b_error = 0;
852
853 #ifdef XFS_BUF_TRACING
854 pthread_mutex_lock(&libxfs_bcache->c_mutex);
855 lock_buf_count--;
856 ASSERT(lock_buf_count >= 0);
857 list_del_init(&bp->b_lock_list);
858 pthread_mutex_unlock(&libxfs_bcache->c_mutex);
859 #endif
860 if (use_xfs_buf_lock) {
861 if (bp->b_recur) {
862 bp->b_recur--;
863 } else {
864 bp->b_holder = 0;
865 pthread_mutex_unlock(&bp->b_lock);
866 }
867 }
868
869 cache_node_put(libxfs_bcache, (struct cache_node *)bp);
870 }
871
872 void
873 libxfs_purgebuf(xfs_buf_t *bp)
874 {
875 struct xfs_bufkey key = {NULL};
876
877 key.buftarg = bp->b_target;
878 key.blkno = bp->b_bn;
879 key.bblen = bp->b_length;
880
881 cache_node_purge(libxfs_bcache, &key, (struct cache_node *)bp);
882 }
883
884 static struct cache_node *
885 libxfs_balloc(cache_key_t key)
886 {
887 struct xfs_bufkey *bufkey = (struct xfs_bufkey *)key;
888
889 if (bufkey->map)
890 return (struct cache_node *)
891 libxfs_getbufr_map(bufkey->buftarg,
892 bufkey->blkno, bufkey->bblen,
893 bufkey->map, bufkey->nmaps);
894 return (struct cache_node *)libxfs_getbufr(bufkey->buftarg,
895 bufkey->blkno, bufkey->bblen);
896 }
897
898
899 static int
900 __read_buf(int fd, void *buf, int len, off64_t offset, int flags)
901 {
902 int sts;
903
904 sts = pread(fd, buf, len, offset);
905 if (sts < 0) {
906 int error = errno;
907 fprintf(stderr, _("%s: read failed: %s\n"),
908 progname, strerror(error));
909 if (flags & LIBXFS_EXIT_ON_FAILURE)
910 exit(1);
911 return -error;
912 } else if (sts != len) {
913 fprintf(stderr, _("%s: error - read only %d of %d bytes\n"),
914 progname, sts, len);
915 if (flags & LIBXFS_EXIT_ON_FAILURE)
916 exit(1);
917 return -EIO;
918 }
919 return 0;
920 }
921
922 int
923 libxfs_readbufr(struct xfs_buftarg *btp, xfs_daddr_t blkno, xfs_buf_t *bp,
924 int len, int flags)
925 {
926 int fd = libxfs_device_to_fd(btp->dev);
927 int bytes = BBTOB(len);
928 int error;
929
930 ASSERT(BBTOB(len) <= bp->b_bcount);
931
932 error = __read_buf(fd, bp->b_addr, bytes, LIBXFS_BBTOOFF64(blkno), flags);
933 if (!error &&
934 bp->b_target->dev == btp->dev &&
935 bp->b_bn == blkno &&
936 bp->b_bcount == bytes)
937 bp->b_flags |= LIBXFS_B_UPTODATE;
938 #ifdef IO_DEBUG
939 printf("%lx: %s: read %u bytes, error %d, blkno=0x%llx(0x%llx), %p\n",
940 pthread_self(), __FUNCTION__, bytes, error,
941 (long long)LIBXFS_BBTOOFF64(blkno), (long long)blkno, bp);
942 #endif
943 return error;
944 }
945
946 void
947 libxfs_readbuf_verify(struct xfs_buf *bp, const struct xfs_buf_ops *ops)
948 {
949 if (!ops)
950 return;
951 bp->b_ops = ops;
952 bp->b_ops->verify_read(bp);
953 bp->b_flags &= ~LIBXFS_B_UNCHECKED;
954 }
955
956
957 xfs_buf_t *
958 libxfs_readbuf(struct xfs_buftarg *btp, xfs_daddr_t blkno, int len, int flags,
959 const struct xfs_buf_ops *ops)
960 {
961 xfs_buf_t *bp;
962 int error;
963
964 bp = libxfs_getbuf_flags(btp, blkno, len, 0);
965 if (!bp)
966 return NULL;
967
968 /*
969 * if the buffer was prefetched, it is likely that it was not validated.
970 * Hence if we are supplied an ops function and the buffer is marked as
971 * unchecked, we need to validate it now.
972 *
973 * We do this verification even if the buffer is dirty - the
974 * verification is almost certainly going to fail the CRC check in this
975 * case as a dirty buffer has not had the CRC recalculated. However, we
976 * should not be dirtying unchecked buffers and therefore failing it
977 * here because it's dirty and unchecked indicates we've screwed up
978 * somewhere else.
979 */
980 bp->b_error = 0;
981 if ((bp->b_flags & (LIBXFS_B_UPTODATE|LIBXFS_B_DIRTY))) {
982 if (bp->b_flags & LIBXFS_B_UNCHECKED)
983 libxfs_readbuf_verify(bp, ops);
984 return bp;
985 }
986
987 /*
988 * Set the ops on a cache miss (i.e. first physical read) as the
989 * verifier may change the ops to match the type of buffer it contains.
990 * A cache hit might reset the verifier to the original type if we set
991 * it again, but it won't get called again and set to match the buffer
992 * contents. *cough* xfs_da_node_buf_ops *cough*.
993 */
994 error = libxfs_readbufr(btp, blkno, bp, len, flags);
995 if (error)
996 bp->b_error = error;
997 else
998 libxfs_readbuf_verify(bp, ops);
999 return bp;
1000 }
1001
1002 int
1003 libxfs_readbufr_map(struct xfs_buftarg *btp, struct xfs_buf *bp, int flags)
1004 {
1005 int fd;
1006 int error = 0;
1007 void *buf;
1008 int i;
1009
1010 fd = libxfs_device_to_fd(btp->dev);
1011 buf = bp->b_addr;
1012 for (i = 0; i < bp->b_nmaps; i++) {
1013 off64_t offset = LIBXFS_BBTOOFF64(bp->b_maps[i].bm_bn);
1014 int len = BBTOB(bp->b_maps[i].bm_len);
1015
1016 error = __read_buf(fd, buf, len, offset, flags);
1017 if (error) {
1018 bp->b_error = error;
1019 break;
1020 }
1021 buf += len;
1022 }
1023
1024 if (!error)
1025 bp->b_flags |= LIBXFS_B_UPTODATE;
1026 #ifdef IO_DEBUG
1027 printf("%lx: %s: read %lu bytes, error %d, blkno=%llu(%llu), %p\n",
1028 pthread_self(), __FUNCTION__, buf - (char *)bp->b_addr, error,
1029 (long long)LIBXFS_BBTOOFF64(bp->b_bn), (long long)bp->b_bn, bp);
1030 #endif
1031 return error;
1032 }
1033
1034 struct xfs_buf *
1035 libxfs_readbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps,
1036 int flags, const struct xfs_buf_ops *ops)
1037 {
1038 struct xfs_buf *bp;
1039 int error = 0;
1040
1041 if (nmaps == 1)
1042 return libxfs_readbuf(btp, map[0].bm_bn, map[0].bm_len,
1043 flags, ops);
1044
1045 bp = __libxfs_getbuf_map(btp, map, nmaps, 0);
1046 if (!bp)
1047 return NULL;
1048
1049 bp->b_error = 0;
1050 if ((bp->b_flags & (LIBXFS_B_UPTODATE|LIBXFS_B_DIRTY))) {
1051 if (bp->b_flags & LIBXFS_B_UNCHECKED)
1052 libxfs_readbuf_verify(bp, ops);
1053 return bp;
1054 }
1055 error = libxfs_readbufr_map(btp, bp, flags);
1056 if (!error)
1057 libxfs_readbuf_verify(bp, ops);
1058
1059 #ifdef IO_DEBUGX
1060 printf("%lx: %s: read %lu bytes, error %d, blkno=%llu(%llu), %p\n",
1061 pthread_self(), __FUNCTION__, buf - (char *)bp->b_addr, error,
1062 (long long)LIBXFS_BBTOOFF64(bp->b_bn), (long long)bp->b_bn, bp);
1063 #endif
1064 return bp;
1065 }
1066
1067 static int
1068 __write_buf(int fd, void *buf, int len, off64_t offset, int flags)
1069 {
1070 int sts;
1071
1072 sts = pwrite(fd, buf, len, offset);
1073 if (sts < 0) {
1074 int error = errno;
1075 fprintf(stderr, _("%s: pwrite failed: %s\n"),
1076 progname, strerror(error));
1077 if (flags & LIBXFS_B_EXIT)
1078 exit(1);
1079 return -error;
1080 } else if (sts != len) {
1081 fprintf(stderr, _("%s: error - pwrite only %d of %d bytes\n"),
1082 progname, sts, len);
1083 if (flags & LIBXFS_B_EXIT)
1084 exit(1);
1085 return -EIO;
1086 }
1087 return 0;
1088 }
1089
1090 int
1091 libxfs_writebufr(xfs_buf_t *bp)
1092 {
1093 int fd = libxfs_device_to_fd(bp->b_target->dev);
1094
1095 /*
1096 * we never write buffers that are marked stale. This indicates they
1097 * contain data that has been invalidated, and even if the buffer is
1098 * dirty it must *never* be written. Verifiers are wonderful for finding
1099 * bugs like this. Make sure the error is obvious as to the cause.
1100 */
1101 if (bp->b_flags & LIBXFS_B_STALE) {
1102 bp->b_error = -ESTALE;
1103 return bp->b_error;
1104 }
1105
1106 /*
1107 * clear any pre-existing error status on the buffer. This can occur if
1108 * the buffer is corrupt on disk and the repair process doesn't clear
1109 * the error before fixing and writing it back.
1110 */
1111 bp->b_error = 0;
1112 if (bp->b_ops) {
1113 bp->b_ops->verify_write(bp);
1114 if (bp->b_error) {
1115 fprintf(stderr,
1116 _("%s: write verifer failed on %s bno 0x%llx/0x%x\n"),
1117 __func__, bp->b_ops->name,
1118 (long long)bp->b_bn, bp->b_bcount);
1119 return bp->b_error;
1120 }
1121 }
1122
1123 if (!(bp->b_flags & LIBXFS_B_DISCONTIG)) {
1124 bp->b_error = __write_buf(fd, bp->b_addr, bp->b_bcount,
1125 LIBXFS_BBTOOFF64(bp->b_bn), bp->b_flags);
1126 } else {
1127 int i;
1128 void *buf = bp->b_addr;
1129
1130 for (i = 0; i < bp->b_nmaps; i++) {
1131 off64_t offset = LIBXFS_BBTOOFF64(bp->b_maps[i].bm_bn);
1132 int len = BBTOB(bp->b_maps[i].bm_len);
1133
1134 bp->b_error = __write_buf(fd, buf, len, offset,
1135 bp->b_flags);
1136 if (bp->b_error)
1137 break;
1138 buf += len;
1139 }
1140 }
1141
1142 #ifdef IO_DEBUG
1143 printf("%lx: %s: wrote %u bytes, blkno=%llu(%llu), %p, error %d\n",
1144 pthread_self(), __FUNCTION__, bp->b_bcount,
1145 (long long)LIBXFS_BBTOOFF64(bp->b_bn),
1146 (long long)bp->b_bn, bp, bp->b_error);
1147 #endif
1148 if (!bp->b_error) {
1149 bp->b_flags |= LIBXFS_B_UPTODATE;
1150 bp->b_flags &= ~(LIBXFS_B_DIRTY | LIBXFS_B_EXIT |
1151 LIBXFS_B_UNCHECKED);
1152 }
1153 return bp->b_error;
1154 }
1155
1156 int
1157 libxfs_writebuf_int(xfs_buf_t *bp, int flags)
1158 {
1159 /*
1160 * Clear any error hanging over from reading the buffer. This prevents
1161 * subsequent reads after this write from seeing stale errors.
1162 */
1163 bp->b_error = 0;
1164 bp->b_flags &= ~LIBXFS_B_STALE;
1165 bp->b_flags |= (LIBXFS_B_DIRTY | flags);
1166 return 0;
1167 }
1168
1169 int
1170 libxfs_writebuf(xfs_buf_t *bp, int flags)
1171 {
1172 #ifdef IO_DEBUG
1173 printf("%lx: %s: dirty blkno=%llu(%llu)\n",
1174 pthread_self(), __FUNCTION__,
1175 (long long)LIBXFS_BBTOOFF64(bp->b_bn),
1176 (long long)bp->b_bn);
1177 #endif
1178 /*
1179 * Clear any error hanging over from reading the buffer. This prevents
1180 * subsequent reads after this write from seeing stale errors.
1181 */
1182 bp->b_error = 0;
1183 bp->b_flags &= ~LIBXFS_B_STALE;
1184 bp->b_flags |= (LIBXFS_B_DIRTY | flags);
1185 libxfs_putbuf(bp);
1186 return 0;
1187 }
1188
1189 void
1190 libxfs_iomove(xfs_buf_t *bp, uint boff, int len, void *data, int flags)
1191 {
1192 #ifdef IO_DEBUG
1193 if (boff + len > bp->b_bcount) {
1194 printf("Badness, iomove out of range!\n"
1195 "bp=(bno 0x%llx, bytes %u) range=(boff %u, bytes %u)\n",
1196 (long long)bp->b_bn, bp->b_bcount, boff, len);
1197 abort();
1198 }
1199 #endif
1200 switch (flags) {
1201 case LIBXFS_BZERO:
1202 memset(bp->b_addr + boff, 0, len);
1203 break;
1204 case LIBXFS_BREAD:
1205 memcpy(data, bp->b_addr + boff, len);
1206 break;
1207 case LIBXFS_BWRITE:
1208 memcpy(bp->b_addr + boff, data, len);
1209 break;
1210 }
1211 }
1212
1213 static void
1214 libxfs_brelse(
1215 struct cache_node *node)
1216 {
1217 struct xfs_buf *bp = (struct xfs_buf *)node;
1218
1219 if (!bp)
1220 return;
1221 if (bp->b_flags & LIBXFS_B_DIRTY)
1222 fprintf(stderr,
1223 "releasing dirty buffer to free list!");
1224
1225 pthread_mutex_lock(&xfs_buf_freelist.cm_mutex);
1226 list_add(&bp->b_node.cn_mru, &xfs_buf_freelist.cm_list);
1227 pthread_mutex_unlock(&xfs_buf_freelist.cm_mutex);
1228 }
1229
1230 static unsigned int
1231 libxfs_bulkrelse(
1232 struct cache *cache,
1233 struct list_head *list)
1234 {
1235 xfs_buf_t *bp;
1236 int count = 0;
1237
1238 if (list_empty(list))
1239 return 0 ;
1240
1241 list_for_each_entry(bp, list, b_node.cn_mru) {
1242 if (bp->b_flags & LIBXFS_B_DIRTY)
1243 fprintf(stderr,
1244 "releasing dirty buffer (bulk) to free list!");
1245 count++;
1246 }
1247
1248 pthread_mutex_lock(&xfs_buf_freelist.cm_mutex);
1249 list_splice(list, &xfs_buf_freelist.cm_list);
1250 pthread_mutex_unlock(&xfs_buf_freelist.cm_mutex);
1251
1252 return count;
1253 }
1254
1255 /*
1256 * Free everything from the xfs_buf_freelist MRU, used at final teardown
1257 */
1258 void
1259 libxfs_bcache_free(void)
1260 {
1261 struct list_head *cm_list;
1262 xfs_buf_t *bp, *next;
1263
1264 cm_list = &xfs_buf_freelist.cm_list;
1265 list_for_each_entry_safe(bp, next, cm_list, b_node.cn_mru) {
1266 free(bp->b_addr);
1267 if (bp->b_maps != &bp->__b_map)
1268 free(bp->b_maps);
1269 kmem_zone_free(xfs_buf_zone, bp);
1270 }
1271 }
1272
1273 /*
1274 * When a buffer is marked dirty, the error is cleared. Hence if we are trying
1275 * to flush a buffer prior to cache reclaim that has an error on it it means
1276 * we've already tried to flush it and it failed. Prevent repeated corruption
1277 * errors from being reported by skipping such buffers - when the corruption is
1278 * fixed the buffer will be marked dirty again and we can write it again.
1279 */
1280 static int
1281 libxfs_bflush(
1282 struct cache_node *node)
1283 {
1284 struct xfs_buf *bp = (struct xfs_buf *)node;
1285
1286 if (!bp->b_error && bp->b_flags & LIBXFS_B_DIRTY)
1287 return libxfs_writebufr(bp);
1288 return bp->b_error;
1289 }
1290
1291 void
1292 libxfs_putbufr(xfs_buf_t *bp)
1293 {
1294 if (bp->b_flags & LIBXFS_B_DIRTY)
1295 libxfs_writebufr(bp);
1296 libxfs_brelse((struct cache_node *)bp);
1297 }
1298
1299
1300 void
1301 libxfs_bcache_purge(void)
1302 {
1303 cache_purge(libxfs_bcache);
1304 }
1305
1306 void
1307 libxfs_bcache_flush(void)
1308 {
1309 cache_flush(libxfs_bcache);
1310 }
1311
1312 int
1313 libxfs_bcache_overflowed(void)
1314 {
1315 return cache_overflowed(libxfs_bcache);
1316 }
1317
1318 struct cache_operations libxfs_bcache_operations = {
1319 .hash = libxfs_bhash,
1320 .alloc = libxfs_balloc,
1321 .flush = libxfs_bflush,
1322 .relse = libxfs_brelse,
1323 .compare = libxfs_bcompare,
1324 .bulkrelse = libxfs_bulkrelse
1325 };
1326
1327
1328 /*
1329 * Inode cache stubs.
1330 */
1331
1332 kmem_zone_t *xfs_inode_zone;
1333 extern kmem_zone_t *xfs_ili_zone;
1334
1335 /*
1336 * If there are inline format data / attr forks attached to this inode,
1337 * make sure they're not corrupt.
1338 */
1339 bool
1340 libxfs_inode_verify_forks(
1341 struct xfs_inode *ip,
1342 struct xfs_ifork_ops *ops)
1343 {
1344 struct xfs_ifork *ifp;
1345 xfs_failaddr_t fa;
1346
1347 if (!ops)
1348 return true;
1349
1350 fa = xfs_ifork_verify_data(ip, ops);
1351 if (fa) {
1352 ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
1353 xfs_inode_verifier_error(ip, -EFSCORRUPTED, "data fork",
1354 ifp->if_u1.if_data, ifp->if_bytes, fa);
1355 return false;
1356 }
1357
1358 fa = xfs_ifork_verify_attr(ip, ops);
1359 if (fa) {
1360 ifp = XFS_IFORK_PTR(ip, XFS_ATTR_FORK);
1361 xfs_inode_verifier_error(ip, -EFSCORRUPTED, "attr fork",
1362 ifp ? ifp->if_u1.if_data : NULL,
1363 ifp ? ifp->if_bytes : 0, fa);
1364 return false;
1365 }
1366 return true;
1367 }
1368
1369 int
1370 libxfs_iget(
1371 struct xfs_mount *mp,
1372 struct xfs_trans *tp,
1373 xfs_ino_t ino,
1374 uint lock_flags,
1375 struct xfs_inode **ipp,
1376 struct xfs_ifork_ops *ifork_ops)
1377 {
1378 struct xfs_inode *ip;
1379 int error = 0;
1380
1381 ip = kmem_zone_zalloc(xfs_inode_zone, 0);
1382 if (!ip)
1383 return -ENOMEM;
1384
1385 ip->i_ino = ino;
1386 ip->i_mount = mp;
1387 error = xfs_iread(mp, tp, ip, 0);
1388 if (error) {
1389 kmem_zone_free(xfs_inode_zone, ip);
1390 *ipp = NULL;
1391 return error;
1392 }
1393
1394 if (!libxfs_inode_verify_forks(ip, ifork_ops)) {
1395 libxfs_irele(ip);
1396 return -EFSCORRUPTED;
1397 }
1398
1399 /*
1400 * set up the inode ops structure that the libxfs code relies on
1401 */
1402 if (XFS_ISDIR(ip))
1403 ip->d_ops = mp->m_dir_inode_ops;
1404 else
1405 ip->d_ops = mp->m_nondir_inode_ops;
1406
1407 *ipp = ip;
1408 return 0;
1409 }
1410
1411 static void
1412 libxfs_idestroy(xfs_inode_t *ip)
1413 {
1414 switch (VFS_I(ip)->i_mode & S_IFMT) {
1415 case S_IFREG:
1416 case S_IFDIR:
1417 case S_IFLNK:
1418 libxfs_idestroy_fork(ip, XFS_DATA_FORK);
1419 break;
1420 }
1421 if (ip->i_afp)
1422 libxfs_idestroy_fork(ip, XFS_ATTR_FORK);
1423 if (ip->i_cowfp)
1424 xfs_idestroy_fork(ip, XFS_COW_FORK);
1425 }
1426
1427 void
1428 libxfs_irele(
1429 struct xfs_inode *ip)
1430 {
1431 if (ip->i_itemp)
1432 kmem_zone_free(xfs_ili_zone, ip->i_itemp);
1433 ip->i_itemp = NULL;
1434 libxfs_idestroy(ip);
1435 kmem_zone_free(xfs_inode_zone, ip);
1436 }