]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blob - libxfs/rdwr.c
xfsprogs: make static things static
[thirdparty/xfsprogs-dev.git] / libxfs / rdwr.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
4 * All Rights Reserved.
5 */
6
7
8 #include "libxfs_priv.h"
9 #include "init.h"
10 #include "xfs_fs.h"
11 #include "xfs_shared.h"
12 #include "xfs_format.h"
13 #include "xfs_log_format.h"
14 #include "xfs_trans_resv.h"
15 #include "xfs_mount.h"
16 #include "xfs_inode_buf.h"
17 #include "xfs_inode_fork.h"
18 #include "xfs_inode.h"
19 #include "xfs_trans.h"
20
21 #include "libxfs.h" /* for LIBXFS_EXIT_ON_FAILURE */
22
23 /*
24 * Important design/architecture note:
25 *
26 * The userspace code that uses the buffer cache is much less constrained than
27 * the kernel code. The userspace code is pretty nasty in places, especially
28 * when it comes to buffer error handling. Very little of the userspace code
29 * outside libxfs clears bp->b_error - very little code even checks it - so the
30 * libxfs code is tripping on stale errors left by the userspace code.
31 *
32 * We can't clear errors or zero buffer contents in libxfs_getbuf-* like we do
33 * in the kernel, because those functions are used by the libxfs_readbuf_*
34 * functions and hence need to leave the buffers unchanged on cache hits. This
35 * is actually the only way to gather a write error from a libxfs_writebuf()
36 * call - you need to get the buffer again so you can check bp->b_error field -
37 * assuming that the buffer is still in the cache when you check, that is.
38 *
39 * This is very different to the kernel code which does not release buffers on a
40 * write so we can wait on IO and check errors. The kernel buffer cache also
41 * guarantees a buffer of a known initial state from xfs_buf_get() even on a
42 * cache hit.
43 *
44 * IOWs, userspace is behaving quite differently to the kernel and as a result
45 * it leaks errors from reads, invalidations and writes through
46 * libxfs_getbuf/libxfs_readbuf.
47 *
48 * The result of this is that until the userspace code outside libxfs is cleaned
49 * up, functions that release buffers from userspace control (i.e
50 * libxfs_writebuf/libxfs_putbuf) need to zero bp->b_error to prevent
51 * propagation of stale errors into future buffer operations.
52 */
53
54 #define BDSTRAT_SIZE (256 * 1024)
55
56 #define IO_BCOMPARE_CHECK
57
58 /* XXX: (dgc) Propagate errors, only exit if fail-on-error flag set */
59 int
60 libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len)
61 {
62 xfs_off_t start_offset, end_offset, offset;
63 ssize_t zsize, bytes;
64 char *z;
65 int fd;
66
67 zsize = min(BDSTRAT_SIZE, BBTOB(len));
68 if ((z = memalign(libxfs_device_alignment(), zsize)) == NULL) {
69 fprintf(stderr,
70 _("%s: %s can't memalign %d bytes: %s\n"),
71 progname, __FUNCTION__, (int)zsize, strerror(errno));
72 exit(1);
73 }
74 memset(z, 0, zsize);
75
76 fd = libxfs_device_to_fd(btp->dev);
77 start_offset = LIBXFS_BBTOOFF64(start);
78
79 if ((lseek(fd, start_offset, SEEK_SET)) < 0) {
80 fprintf(stderr, _("%s: %s seek to offset %llu failed: %s\n"),
81 progname, __FUNCTION__,
82 (unsigned long long)start_offset, strerror(errno));
83 exit(1);
84 }
85
86 end_offset = LIBXFS_BBTOOFF64(start + len) - start_offset;
87 for (offset = 0; offset < end_offset; ) {
88 bytes = min((ssize_t)(end_offset - offset), zsize);
89 if ((bytes = write(fd, z, bytes)) < 0) {
90 fprintf(stderr, _("%s: %s write failed: %s\n"),
91 progname, __FUNCTION__, strerror(errno));
92 exit(1);
93 } else if (bytes == 0) {
94 fprintf(stderr, _("%s: %s not progressing?\n"),
95 progname, __FUNCTION__);
96 exit(1);
97 }
98 offset += bytes;
99 }
100 free(z);
101 return 0;
102 }
103
104 static void unmount_record(void *p)
105 {
106 xlog_op_header_t *op = (xlog_op_header_t *)p;
107 /* the data section must be 32 bit size aligned */
108 struct {
109 uint16_t magic;
110 uint16_t pad1;
111 uint32_t pad2; /* may as well make it 64 bits */
112 } magic = { XLOG_UNMOUNT_TYPE, 0, 0 };
113
114 memset(p, 0, BBSIZE);
115 /* dummy tid to mark this as written from userspace */
116 op->oh_tid = cpu_to_be32(0xb0c0d0d0);
117 op->oh_len = cpu_to_be32(sizeof(magic));
118 op->oh_clientid = XFS_LOG;
119 op->oh_flags = XLOG_UNMOUNT_TRANS;
120 op->oh_res2 = 0;
121
122 /* and the data for this op */
123 memcpy((char *)p + sizeof(xlog_op_header_t), &magic, sizeof(magic));
124 }
125
126 static char *next(
127 char *ptr,
128 int offset,
129 void *private)
130 {
131 struct xfs_buf *buf = (struct xfs_buf *)private;
132
133 if (buf &&
134 (buf->b_bcount < (int)(ptr - (char *)buf->b_addr) + offset))
135 abort();
136
137 return ptr + offset;
138 }
139
140 /*
141 * Format the log. The caller provides either a buftarg which is used to access
142 * the log via buffers or a direct pointer to a buffer that encapsulates the
143 * entire log.
144 */
145 int
146 libxfs_log_clear(
147 struct xfs_buftarg *btp,
148 char *dptr,
149 xfs_daddr_t start,
150 uint length, /* basic blocks */
151 uuid_t *fs_uuid,
152 int version,
153 int sunit, /* bytes */
154 int fmt,
155 int cycle,
156 bool max)
157 {
158 struct xfs_buf *bp = NULL;
159 int len;
160 xfs_lsn_t lsn;
161 xfs_lsn_t tail_lsn;
162 xfs_daddr_t blk;
163 xfs_daddr_t end_blk;
164 char *ptr;
165
166 if (((btp && dptr) || (!btp && !dptr)) ||
167 (btp && !btp->dev) || !fs_uuid)
168 return -EINVAL;
169
170 /* first zero the log */
171 if (btp)
172 libxfs_device_zero(btp, start, length);
173 else
174 memset(dptr, 0, BBTOB(length));
175
176 /*
177 * Initialize the log record length and LSNs. XLOG_INIT_CYCLE is a
178 * special reset case where we only write a single record where the lsn
179 * and tail_lsn match. Otherwise, the record lsn starts at block 0 of
180 * the specified cycle and points tail_lsn at the last record of the
181 * previous cycle.
182 */
183 len = ((version == 2) && sunit) ? BTOBB(sunit) : 2;
184 len = max(len, 2);
185 lsn = xlog_assign_lsn(cycle, 0);
186 if (cycle == XLOG_INIT_CYCLE)
187 tail_lsn = lsn;
188 else
189 tail_lsn = xlog_assign_lsn(cycle - 1, length - len);
190
191 /* write out the first log record */
192 ptr = dptr;
193 if (btp) {
194 bp = libxfs_getbufr(btp, start, len);
195 ptr = bp->b_addr;
196 }
197 libxfs_log_header(ptr, fs_uuid, version, sunit, fmt, lsn, tail_lsn,
198 next, bp);
199 if (bp) {
200 bp->b_flags |= LIBXFS_B_DIRTY;
201 libxfs_putbufr(bp);
202 }
203
204 /*
205 * There's nothing else to do if this is a log reset. The kernel detects
206 * the rest of the log is zeroed and starts at cycle 1.
207 */
208 if (cycle == XLOG_INIT_CYCLE)
209 return 0;
210
211 /*
212 * Bump the record size for a full log format if the caller allows it.
213 * This is primarily for performance reasons and most callers don't care
214 * about record size since the log is clean after we're done.
215 */
216 if (max)
217 len = BTOBB(BDSTRAT_SIZE);
218
219 /*
220 * Otherwise, fill everything beyond the initial record with records of
221 * the previous cycle so the kernel head/tail detection works correctly.
222 *
223 * We don't particularly care about the record size or content here.
224 * It's only important that the headers are in place such that the
225 * kernel finds 1.) a clean log and 2.) the correct current cycle value.
226 * Therefore, bump up the record size to the max to use larger I/Os and
227 * improve performance.
228 */
229 cycle--;
230 blk = start + len;
231 if (dptr)
232 dptr += BBTOB(len);
233 end_blk = start + length;
234
235 len = min(end_blk - blk, len);
236 while (blk < end_blk) {
237 lsn = xlog_assign_lsn(cycle, blk - start);
238 tail_lsn = xlog_assign_lsn(cycle, blk - start - len);
239
240 ptr = dptr;
241 if (btp) {
242 bp = libxfs_getbufr(btp, blk, len);
243 ptr = bp->b_addr;
244 }
245 /*
246 * Note: pass the full buffer length as the sunit to initialize
247 * the entire buffer.
248 */
249 libxfs_log_header(ptr, fs_uuid, version, BBTOB(len), fmt, lsn,
250 tail_lsn, next, bp);
251 if (bp) {
252 bp->b_flags |= LIBXFS_B_DIRTY;
253 libxfs_putbufr(bp);
254 }
255
256 blk += len;
257 if (dptr)
258 dptr += BBTOB(len);
259 len = min(end_blk - blk, len);
260 }
261
262 return 0;
263 }
264
265 int
266 libxfs_log_header(
267 char *caddr,
268 uuid_t *fs_uuid,
269 int version,
270 int sunit,
271 int fmt,
272 xfs_lsn_t lsn,
273 xfs_lsn_t tail_lsn,
274 libxfs_get_block_t *nextfunc,
275 void *private)
276 {
277 xlog_rec_header_t *head = (xlog_rec_header_t *)caddr;
278 char *p = caddr;
279 __be32 cycle_lsn;
280 int i, len;
281 int hdrs = 1;
282
283 if (lsn == NULLCOMMITLSN)
284 lsn = xlog_assign_lsn(XLOG_INIT_CYCLE, 0);
285 if (tail_lsn == NULLCOMMITLSN)
286 tail_lsn = lsn;
287
288 len = ((version == 2) && sunit) ? BTOBB(sunit) : 1;
289
290 memset(p, 0, BBSIZE);
291 head->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM);
292 head->h_cycle = cpu_to_be32(CYCLE_LSN(lsn));
293 head->h_version = cpu_to_be32(version);
294 head->h_crc = cpu_to_le32(0);
295 head->h_prev_block = cpu_to_be32(-1);
296 head->h_num_logops = cpu_to_be32(1);
297 head->h_fmt = cpu_to_be32(fmt);
298 head->h_size = cpu_to_be32(max(sunit, XLOG_BIG_RECORD_BSIZE));
299
300 head->h_lsn = cpu_to_be64(lsn);
301 head->h_tail_lsn = cpu_to_be64(tail_lsn);
302
303 memcpy(&head->h_fs_uuid, fs_uuid, sizeof(uuid_t));
304
305 /*
306 * The kernel expects to see either a log record header magic value or
307 * the LSN cycle at the top of every log block. The first word of each
308 * non-header block is copied to the record headers and replaced with
309 * the cycle value (see xlog_[un]pack_data() and xlog_get_cycle() for
310 * details).
311 *
312 * Even though we only ever write an unmount record (one block), we
313 * support writing log records up to the max log buffer size of 256k to
314 * improve log format performance. This means a record can require up
315 * to 8 headers (1 rec. header + 7 ext. headers) for the packed cycle
316 * data (each header supports 32k of data).
317 */
318 cycle_lsn = CYCLE_LSN_DISK(head->h_lsn);
319 if (version == 2 && sunit > XLOG_HEADER_CYCLE_SIZE) {
320 hdrs = sunit / XLOG_HEADER_CYCLE_SIZE;
321 if (sunit % XLOG_HEADER_CYCLE_SIZE)
322 hdrs++;
323 }
324
325 /*
326 * A fixed number of extended headers is expected based on h_size. If
327 * required, format those now so the unmount record is located
328 * correctly.
329 *
330 * Since we only write an unmount record, we only need one h_cycle_data
331 * entry for the unmount record block. The subsequent record data
332 * blocks are zeroed, which means we can stamp them directly with the
333 * cycle and zero the rest of the cycle data in the extended headers.
334 */
335 if (hdrs > 1) {
336 for (i = 1; i < hdrs; i++) {
337 p = nextfunc(p, BBSIZE, private);
338 memset(p, 0, BBSIZE);
339 /* xlog_rec_ext_header.xh_cycle */
340 *(__be32 *)p = cycle_lsn;
341 }
342 }
343
344 /*
345 * The total length is the max of the stripe unit or 2 basic block
346 * minimum (1 hdr blk + 1 data blk). The record length is the total
347 * minus however many header blocks are required.
348 */
349 head->h_len = cpu_to_be32(max(BBTOB(2), sunit) - hdrs * BBSIZE);
350
351 /*
352 * Write out the unmount record, pack the first word into the record
353 * header and stamp the block with the cycle.
354 */
355 p = nextfunc(p, BBSIZE, private);
356 unmount_record(p);
357
358 head->h_cycle_data[0] = *(__be32 *)p;
359 *(__be32 *)p = cycle_lsn;
360
361 /*
362 * Finally, zero all remaining blocks in the record and stamp each with
363 * the cycle. We don't need to pack any of these blocks because the
364 * cycle data in the headers has already been zeroed.
365 */
366 len = max(len, hdrs + 1);
367 for (i = hdrs + 1; i < len; i++) {
368 p = nextfunc(p, BBSIZE, private);
369 memset(p, 0, BBSIZE);
370 *(__be32 *)p = cycle_lsn;
371 }
372
373 return BBTOB(len);
374 }
375
376 /*
377 * Simple I/O (buffer cache) interface
378 */
379
380
381 #ifdef XFS_BUF_TRACING
382
383 #undef libxfs_readbuf
384 #undef libxfs_readbuf_map
385 #undef libxfs_writebuf
386 #undef libxfs_getbuf
387 #undef libxfs_getbuf_map
388 #undef libxfs_getbuf_flags
389 #undef libxfs_putbuf
390
391 xfs_buf_t *libxfs_readbuf(struct xfs_buftarg *, xfs_daddr_t, int, int,
392 const struct xfs_buf_ops *);
393 xfs_buf_t *libxfs_readbuf_map(struct xfs_buftarg *, struct xfs_buf_map *,
394 int, int, const struct xfs_buf_ops *);
395 int libxfs_writebuf(xfs_buf_t *, int);
396 xfs_buf_t *libxfs_getbuf(struct xfs_buftarg *, xfs_daddr_t, int);
397 xfs_buf_t *libxfs_getbuf_map(struct xfs_buftarg *, struct xfs_buf_map *,
398 int, int);
399 xfs_buf_t *libxfs_getbuf_flags(struct xfs_buftarg *, xfs_daddr_t, int,
400 unsigned int);
401 void libxfs_putbuf (xfs_buf_t *);
402
403 #define __add_trace(bp, func, file, line) \
404 do { \
405 if (bp) { \
406 (bp)->b_func = (func); \
407 (bp)->b_file = (file); \
408 (bp)->b_line = (line); \
409 } \
410 } while (0)
411
412 xfs_buf_t *
413 libxfs_trace_readbuf(const char *func, const char *file, int line,
414 struct xfs_buftarg *btp, xfs_daddr_t blkno, int len, int flags,
415 const struct xfs_buf_ops *ops)
416 {
417 xfs_buf_t *bp = libxfs_readbuf(btp, blkno, len, flags, ops);
418 __add_trace(bp, func, file, line);
419 return bp;
420 }
421
422 xfs_buf_t *
423 libxfs_trace_readbuf_map(const char *func, const char *file, int line,
424 struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps, int flags,
425 const struct xfs_buf_ops *ops)
426 {
427 xfs_buf_t *bp = libxfs_readbuf_map(btp, map, nmaps, flags, ops);
428 __add_trace(bp, func, file, line);
429 return bp;
430 }
431
432 int
433 libxfs_trace_writebuf(const char *func, const char *file, int line, xfs_buf_t *bp, int flags)
434 {
435 __add_trace(bp, func, file, line);
436 return libxfs_writebuf(bp, flags);
437 }
438
439 xfs_buf_t *
440 libxfs_trace_getbuf(const char *func, const char *file, int line,
441 struct xfs_buftarg *btp, xfs_daddr_t blkno, int len)
442 {
443 xfs_buf_t *bp = libxfs_getbuf(btp, blkno, len);
444 __add_trace(bp, func, file, line);
445 return bp;
446 }
447
448 xfs_buf_t *
449 libxfs_trace_getbuf_map(const char *func, const char *file, int line,
450 struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps,
451 int flags)
452 {
453 xfs_buf_t *bp = libxfs_getbuf_map(btp, map, nmaps, flags);
454 __add_trace(bp, func, file, line);
455 return bp;
456 }
457
458 xfs_buf_t *
459 libxfs_trace_getbuf_flags(const char *func, const char *file, int line,
460 struct xfs_buftarg *btp, xfs_daddr_t blkno, int len, unsigned int flags)
461 {
462 xfs_buf_t *bp = libxfs_getbuf_flags(btp, blkno, len, flags);
463 __add_trace(bp, func, file, line);
464 return bp;
465 }
466
467 void
468 libxfs_trace_putbuf(const char *func, const char *file, int line, xfs_buf_t *bp)
469 {
470 __add_trace(bp, func, file, line);
471 libxfs_putbuf(bp);
472 }
473
474
475 #endif
476
477
478 xfs_buf_t *
479 libxfs_getsb(xfs_mount_t *mp, int flags)
480 {
481 return libxfs_readbuf(mp->m_ddev_targp, XFS_SB_DADDR,
482 XFS_FSS_TO_BB(mp, 1), flags, &xfs_sb_buf_ops);
483 }
484
485 kmem_zone_t *xfs_buf_zone;
486
487 static struct cache_mru xfs_buf_freelist =
488 {{&xfs_buf_freelist.cm_list, &xfs_buf_freelist.cm_list},
489 0, PTHREAD_MUTEX_INITIALIZER };
490
491 /*
492 * The bufkey is used to pass the new buffer information to the cache object
493 * allocation routine. Because discontiguous buffers need to pass different
494 * information, we need fields to pass that information. However, because the
495 * blkno and bblen is needed for the initial cache entry lookup (i.e. for
496 * bcompare) the fact that the map/nmaps is non-null to switch to discontiguous
497 * buffer initialisation instead of a contiguous buffer.
498 */
499 struct xfs_bufkey {
500 struct xfs_buftarg *buftarg;
501 xfs_daddr_t blkno;
502 unsigned int bblen;
503 struct xfs_buf_map *map;
504 int nmaps;
505 };
506
507 /* 2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */
508 #define GOLDEN_RATIO_PRIME 0x9e37fffffffc0001UL
509 #define CACHE_LINE_SIZE 64
510 static unsigned int
511 libxfs_bhash(cache_key_t key, unsigned int hashsize, unsigned int hashshift)
512 {
513 uint64_t hashval = ((struct xfs_bufkey *)key)->blkno;
514 uint64_t tmp;
515
516 tmp = hashval ^ (GOLDEN_RATIO_PRIME + hashval) / CACHE_LINE_SIZE;
517 tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> hashshift);
518 return tmp % hashsize;
519 }
520
521 static int
522 libxfs_bcompare(struct cache_node *node, cache_key_t key)
523 {
524 struct xfs_buf *bp = (struct xfs_buf *)node;
525 struct xfs_bufkey *bkey = (struct xfs_bufkey *)key;
526
527 if (bp->b_target->dev == bkey->buftarg->dev &&
528 bp->b_bn == bkey->blkno) {
529 if (bp->b_bcount == BBTOB(bkey->bblen))
530 return CACHE_HIT;
531 #ifdef IO_BCOMPARE_CHECK
532 if (!(libxfs_bcache->c_flags & CACHE_MISCOMPARE_PURGE)) {
533 fprintf(stderr,
534 "%lx: Badness in key lookup (length)\n"
535 "bp=(bno 0x%llx, len %u bytes) key=(bno 0x%llx, len %u bytes)\n",
536 pthread_self(),
537 (unsigned long long)bp->b_bn, (int)bp->b_bcount,
538 (unsigned long long)bkey->blkno,
539 BBTOB(bkey->bblen));
540 }
541 #endif
542 return CACHE_PURGE;
543 }
544 return CACHE_MISS;
545 }
546
547 static void
548 libxfs_bprint(xfs_buf_t *bp)
549 {
550 fprintf(stderr, "Buffer %p blkno=%llu bytes=%u flags=0x%x count=%u\n",
551 bp, (unsigned long long)bp->b_bn, (unsigned)bp->b_bcount,
552 bp->b_flags, bp->b_node.cn_count);
553 }
554
555 static void
556 __initbuf(xfs_buf_t *bp, struct xfs_buftarg *btp, xfs_daddr_t bno,
557 unsigned int bytes)
558 {
559 bp->b_flags = 0;
560 bp->b_bn = bno;
561 bp->b_bcount = bytes;
562 bp->b_length = BTOBB(bytes);
563 bp->b_target = btp;
564 bp->b_error = 0;
565 if (!bp->b_addr)
566 bp->b_addr = memalign(libxfs_device_alignment(), bytes);
567 if (!bp->b_addr) {
568 fprintf(stderr,
569 _("%s: %s can't memalign %u bytes: %s\n"),
570 progname, __FUNCTION__, bytes,
571 strerror(errno));
572 exit(1);
573 }
574 memset(bp->b_addr, 0, bytes);
575 #ifdef XFS_BUF_TRACING
576 list_head_init(&bp->b_lock_list);
577 #endif
578 pthread_mutex_init(&bp->b_lock, NULL);
579 bp->b_holder = 0;
580 bp->b_recur = 0;
581 bp->b_ops = NULL;
582
583 if (!bp->b_maps) {
584 bp->b_nmaps = 1;
585 bp->b_maps = &bp->__b_map;
586 bp->b_maps[0].bm_bn = bp->b_bn;
587 bp->b_maps[0].bm_len = bp->b_length;
588 }
589 }
590
591 static void
592 libxfs_initbuf(xfs_buf_t *bp, struct xfs_buftarg *btp, xfs_daddr_t bno,
593 unsigned int bytes)
594 {
595 __initbuf(bp, btp, bno, bytes);
596 }
597
598 static void
599 libxfs_initbuf_map(xfs_buf_t *bp, struct xfs_buftarg *btp,
600 struct xfs_buf_map *map, int nmaps)
601 {
602 unsigned int bytes = 0;
603 int i;
604
605 bytes = sizeof(struct xfs_buf_map) * nmaps;
606 bp->b_maps = malloc(bytes);
607 if (!bp->b_maps) {
608 fprintf(stderr,
609 _("%s: %s can't malloc %u bytes: %s\n"),
610 progname, __FUNCTION__, bytes,
611 strerror(errno));
612 exit(1);
613 }
614 bp->b_nmaps = nmaps;
615
616 bytes = 0;
617 for ( i = 0; i < nmaps; i++) {
618 bp->b_maps[i].bm_bn = map[i].bm_bn;
619 bp->b_maps[i].bm_len = map[i].bm_len;
620 bytes += BBTOB(map[i].bm_len);
621 }
622
623 __initbuf(bp, btp, map[0].bm_bn, bytes);
624 bp->b_flags |= LIBXFS_B_DISCONTIG;
625 }
626
627 static xfs_buf_t *
628 __libxfs_getbufr(int blen)
629 {
630 xfs_buf_t *bp;
631
632 /*
633 * first look for a buffer that can be used as-is,
634 * if one cannot be found, see if there is a buffer,
635 * and if so, free its buffer and set b_addr to NULL
636 * before calling libxfs_initbuf.
637 */
638 pthread_mutex_lock(&xfs_buf_freelist.cm_mutex);
639 if (!list_empty(&xfs_buf_freelist.cm_list)) {
640 list_for_each_entry(bp, &xfs_buf_freelist.cm_list, b_node.cn_mru) {
641 if (bp->b_bcount == blen) {
642 list_del_init(&bp->b_node.cn_mru);
643 break;
644 }
645 }
646 if (&bp->b_node.cn_mru == &xfs_buf_freelist.cm_list) {
647 bp = list_entry(xfs_buf_freelist.cm_list.next,
648 xfs_buf_t, b_node.cn_mru);
649 list_del_init(&bp->b_node.cn_mru);
650 free(bp->b_addr);
651 bp->b_addr = NULL;
652 if (bp->b_maps != &bp->__b_map)
653 free(bp->b_maps);
654 bp->b_maps = NULL;
655 }
656 } else
657 bp = kmem_zone_zalloc(xfs_buf_zone, 0);
658 pthread_mutex_unlock(&xfs_buf_freelist.cm_mutex);
659 bp->b_ops = NULL;
660 if (bp->b_flags & LIBXFS_B_DIRTY)
661 fprintf(stderr, "found dirty buffer (bulk) on free list!");
662
663 return bp;
664 }
665
666 xfs_buf_t *
667 libxfs_getbufr(struct xfs_buftarg *btp, xfs_daddr_t blkno, int bblen)
668 {
669 xfs_buf_t *bp;
670 int blen = BBTOB(bblen);
671
672 bp =__libxfs_getbufr(blen);
673 if (bp)
674 libxfs_initbuf(bp, btp, blkno, blen);
675 #ifdef IO_DEBUG
676 printf("%lx: %s: allocated %u bytes buffer, key=0x%llx(0x%llx), %p\n",
677 pthread_self(), __FUNCTION__, blen,
678 (long long)LIBXFS_BBTOOFF64(blkno), (long long)blkno, bp);
679 #endif
680
681 return bp;
682 }
683
684 static xfs_buf_t *
685 libxfs_getbufr_map(struct xfs_buftarg *btp, xfs_daddr_t blkno, int bblen,
686 struct xfs_buf_map *map, int nmaps)
687 {
688 xfs_buf_t *bp;
689 int blen = BBTOB(bblen);
690
691 if (!map || !nmaps) {
692 fprintf(stderr,
693 _("%s: %s invalid map %p or nmaps %d\n"),
694 progname, __FUNCTION__, map, nmaps);
695 exit(1);
696 }
697
698 if (blkno != map[0].bm_bn) {
699 fprintf(stderr,
700 _("%s: %s map blkno 0x%llx doesn't match key 0x%llx\n"),
701 progname, __FUNCTION__, (long long)map[0].bm_bn,
702 (long long)blkno);
703 exit(1);
704 }
705
706 bp =__libxfs_getbufr(blen);
707 if (bp)
708 libxfs_initbuf_map(bp, btp, map, nmaps);
709 #ifdef IO_DEBUG
710 printf("%lx: %s: allocated %u bytes buffer, key=0x%llx(0x%llx), %p\n",
711 pthread_self(), __FUNCTION__, blen,
712 (long long)LIBXFS_BBTOOFF64(blkno), (long long)blkno, bp);
713 #endif
714
715 return bp;
716 }
717
718 #ifdef XFS_BUF_TRACING
719 struct list_head lock_buf_list = {&lock_buf_list, &lock_buf_list};
720 int lock_buf_count = 0;
721 #endif
722
723 extern int use_xfs_buf_lock;
724
725 static struct xfs_buf *
726 __cache_lookup(struct xfs_bufkey *key, unsigned int flags)
727 {
728 struct xfs_buf *bp;
729
730 cache_node_get(libxfs_bcache, key, (struct cache_node **)&bp);
731 if (!bp)
732 return NULL;
733
734 if (use_xfs_buf_lock) {
735 int ret;
736
737 ret = pthread_mutex_trylock(&bp->b_lock);
738 if (ret) {
739 ASSERT(ret == EAGAIN);
740 if (flags & LIBXFS_GETBUF_TRYLOCK)
741 goto out_put;
742
743 if (pthread_equal(bp->b_holder, pthread_self())) {
744 fprintf(stderr,
745 _("Warning: recursive buffer locking at block %" PRIu64 " detected\n"),
746 key->blkno);
747 bp->b_recur++;
748 return bp;
749 } else {
750 pthread_mutex_lock(&bp->b_lock);
751 }
752 }
753
754 bp->b_holder = pthread_self();
755 }
756
757 cache_node_set_priority(libxfs_bcache, (struct cache_node *)bp,
758 cache_node_get_priority((struct cache_node *)bp) -
759 CACHE_PREFETCH_PRIORITY);
760 #ifdef XFS_BUF_TRACING
761 pthread_mutex_lock(&libxfs_bcache->c_mutex);
762 lock_buf_count++;
763 list_add(&bp->b_lock_list, &lock_buf_list);
764 pthread_mutex_unlock(&libxfs_bcache->c_mutex);
765 #endif
766 #ifdef IO_DEBUG
767 printf("%lx %s: hit buffer %p for bno = 0x%llx/0x%llx\n",
768 pthread_self(), __FUNCTION__,
769 bp, bp->b_bn, (long long)LIBXFS_BBTOOFF64(key->blkno));
770 #endif
771
772 return bp;
773 out_put:
774 cache_node_put(libxfs_bcache, (struct cache_node *)bp);
775 return NULL;
776 }
777
778 struct xfs_buf *
779 libxfs_getbuf_flags(struct xfs_buftarg *btp, xfs_daddr_t blkno, int len,
780 unsigned int flags)
781 {
782 struct xfs_bufkey key = {NULL};
783
784 key.buftarg = btp;
785 key.blkno = blkno;
786 key.bblen = len;
787
788 return __cache_lookup(&key, flags);
789 }
790
791 /*
792 * Clean the buffer flags for libxfs_getbuf*(), which wants to return
793 * an unused buffer with clean state. This prevents CRC errors on a
794 * re-read of a corrupt block that was prefetched and freed. This
795 * can happen with a massively corrupt directory that is discarded,
796 * but whose blocks are then recycled into expanding lost+found.
797 *
798 * Note however that if the buffer's dirty (prefetch calls getbuf)
799 * we'll leave the state alone because we don't want to discard blocks
800 * that have been fixed.
801 */
802 static void
803 reset_buf_state(
804 struct xfs_buf *bp)
805 {
806 if (bp && !(bp->b_flags & LIBXFS_B_DIRTY))
807 bp->b_flags &= ~(LIBXFS_B_UNCHECKED | LIBXFS_B_STALE |
808 LIBXFS_B_UPTODATE);
809 }
810
811 struct xfs_buf *
812 libxfs_getbuf(struct xfs_buftarg *btp, xfs_daddr_t blkno, int len)
813 {
814 struct xfs_buf *bp;
815
816 bp = libxfs_getbuf_flags(btp, blkno, len, 0);
817 reset_buf_state(bp);
818 return bp;
819 }
820
821 static struct xfs_buf *
822 __libxfs_getbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map,
823 int nmaps, int flags)
824 {
825 struct xfs_bufkey key = {NULL};
826 int i;
827
828 if (nmaps == 1)
829 return libxfs_getbuf_flags(btp, map[0].bm_bn, map[0].bm_len,
830 flags);
831
832 key.buftarg = btp;
833 key.blkno = map[0].bm_bn;
834 for (i = 0; i < nmaps; i++) {
835 key.bblen += map[i].bm_len;
836 }
837 key.map = map;
838 key.nmaps = nmaps;
839
840 return __cache_lookup(&key, flags);
841 }
842
843 struct xfs_buf *
844 libxfs_getbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map,
845 int nmaps, int flags)
846 {
847 struct xfs_buf *bp;
848
849 bp = __libxfs_getbuf_map(btp, map, nmaps, flags);
850 reset_buf_state(bp);
851 return bp;
852 }
853
854 void
855 libxfs_putbuf(xfs_buf_t *bp)
856 {
857 /*
858 * ensure that any errors on this use of the buffer don't carry
859 * over to the next user.
860 */
861 bp->b_error = 0;
862
863 #ifdef XFS_BUF_TRACING
864 pthread_mutex_lock(&libxfs_bcache->c_mutex);
865 lock_buf_count--;
866 ASSERT(lock_buf_count >= 0);
867 list_del_init(&bp->b_lock_list);
868 pthread_mutex_unlock(&libxfs_bcache->c_mutex);
869 #endif
870 if (use_xfs_buf_lock) {
871 if (bp->b_recur) {
872 bp->b_recur--;
873 } else {
874 bp->b_holder = 0;
875 pthread_mutex_unlock(&bp->b_lock);
876 }
877 }
878
879 cache_node_put(libxfs_bcache, (struct cache_node *)bp);
880 }
881
882 void
883 libxfs_purgebuf(xfs_buf_t *bp)
884 {
885 struct xfs_bufkey key = {NULL};
886
887 key.buftarg = bp->b_target;
888 key.blkno = bp->b_bn;
889 key.bblen = bp->b_length;
890
891 cache_node_purge(libxfs_bcache, &key, (struct cache_node *)bp);
892 }
893
894 static struct cache_node *
895 libxfs_balloc(cache_key_t key)
896 {
897 struct xfs_bufkey *bufkey = (struct xfs_bufkey *)key;
898
899 if (bufkey->map)
900 return (struct cache_node *)
901 libxfs_getbufr_map(bufkey->buftarg,
902 bufkey->blkno, bufkey->bblen,
903 bufkey->map, bufkey->nmaps);
904 return (struct cache_node *)libxfs_getbufr(bufkey->buftarg,
905 bufkey->blkno, bufkey->bblen);
906 }
907
908
909 static int
910 __read_buf(int fd, void *buf, int len, off64_t offset, int flags)
911 {
912 int sts;
913
914 sts = pread(fd, buf, len, offset);
915 if (sts < 0) {
916 int error = errno;
917 fprintf(stderr, _("%s: read failed: %s\n"),
918 progname, strerror(error));
919 if (flags & LIBXFS_EXIT_ON_FAILURE)
920 exit(1);
921 return -error;
922 } else if (sts != len) {
923 fprintf(stderr, _("%s: error - read only %d of %d bytes\n"),
924 progname, sts, len);
925 if (flags & LIBXFS_EXIT_ON_FAILURE)
926 exit(1);
927 return -EIO;
928 }
929 return 0;
930 }
931
932 int
933 libxfs_readbufr(struct xfs_buftarg *btp, xfs_daddr_t blkno, xfs_buf_t *bp,
934 int len, int flags)
935 {
936 int fd = libxfs_device_to_fd(btp->dev);
937 int bytes = BBTOB(len);
938 int error;
939
940 ASSERT(BBTOB(len) <= bp->b_bcount);
941
942 error = __read_buf(fd, bp->b_addr, bytes, LIBXFS_BBTOOFF64(blkno), flags);
943 if (!error &&
944 bp->b_target->dev == btp->dev &&
945 bp->b_bn == blkno &&
946 bp->b_bcount == bytes)
947 bp->b_flags |= LIBXFS_B_UPTODATE;
948 #ifdef IO_DEBUG
949 printf("%lx: %s: read %u bytes, error %d, blkno=0x%llx(0x%llx), %p\n",
950 pthread_self(), __FUNCTION__, bytes, error,
951 (long long)LIBXFS_BBTOOFF64(blkno), (long long)blkno, bp);
952 #endif
953 return error;
954 }
955
956 void
957 libxfs_readbuf_verify(struct xfs_buf *bp, const struct xfs_buf_ops *ops)
958 {
959 if (!ops)
960 return;
961 bp->b_ops = ops;
962 bp->b_ops->verify_read(bp);
963 bp->b_flags &= ~LIBXFS_B_UNCHECKED;
964 }
965
966
967 xfs_buf_t *
968 libxfs_readbuf(struct xfs_buftarg *btp, xfs_daddr_t blkno, int len, int flags,
969 const struct xfs_buf_ops *ops)
970 {
971 xfs_buf_t *bp;
972 int error;
973
974 bp = libxfs_getbuf_flags(btp, blkno, len, 0);
975 if (!bp)
976 return NULL;
977
978 /*
979 * if the buffer was prefetched, it is likely that it was not validated.
980 * Hence if we are supplied an ops function and the buffer is marked as
981 * unchecked, we need to validate it now.
982 *
983 * We do this verification even if the buffer is dirty - the
984 * verification is almost certainly going to fail the CRC check in this
985 * case as a dirty buffer has not had the CRC recalculated. However, we
986 * should not be dirtying unchecked buffers and therefore failing it
987 * here because it's dirty and unchecked indicates we've screwed up
988 * somewhere else.
989 */
990 bp->b_error = 0;
991 if ((bp->b_flags & (LIBXFS_B_UPTODATE|LIBXFS_B_DIRTY))) {
992 if (bp->b_flags & LIBXFS_B_UNCHECKED)
993 libxfs_readbuf_verify(bp, ops);
994 return bp;
995 }
996
997 /*
998 * Set the ops on a cache miss (i.e. first physical read) as the
999 * verifier may change the ops to match the type of buffer it contains.
1000 * A cache hit might reset the verifier to the original type if we set
1001 * it again, but it won't get called again and set to match the buffer
1002 * contents. *cough* xfs_da_node_buf_ops *cough*.
1003 */
1004 error = libxfs_readbufr(btp, blkno, bp, len, flags);
1005 if (error)
1006 bp->b_error = error;
1007 else
1008 libxfs_readbuf_verify(bp, ops);
1009 return bp;
1010 }
1011
1012 int
1013 libxfs_readbufr_map(struct xfs_buftarg *btp, struct xfs_buf *bp, int flags)
1014 {
1015 int fd;
1016 int error = 0;
1017 void *buf;
1018 int i;
1019
1020 fd = libxfs_device_to_fd(btp->dev);
1021 buf = bp->b_addr;
1022 for (i = 0; i < bp->b_nmaps; i++) {
1023 off64_t offset = LIBXFS_BBTOOFF64(bp->b_maps[i].bm_bn);
1024 int len = BBTOB(bp->b_maps[i].bm_len);
1025
1026 error = __read_buf(fd, buf, len, offset, flags);
1027 if (error) {
1028 bp->b_error = error;
1029 break;
1030 }
1031 buf += len;
1032 }
1033
1034 if (!error)
1035 bp->b_flags |= LIBXFS_B_UPTODATE;
1036 #ifdef IO_DEBUG
1037 printf("%lx: %s: read %lu bytes, error %d, blkno=%llu(%llu), %p\n",
1038 pthread_self(), __FUNCTION__, buf - (char *)bp->b_addr, error,
1039 (long long)LIBXFS_BBTOOFF64(bp->b_bn), (long long)bp->b_bn, bp);
1040 #endif
1041 return error;
1042 }
1043
1044 struct xfs_buf *
1045 libxfs_readbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps,
1046 int flags, const struct xfs_buf_ops *ops)
1047 {
1048 struct xfs_buf *bp;
1049 int error = 0;
1050
1051 if (nmaps == 1)
1052 return libxfs_readbuf(btp, map[0].bm_bn, map[0].bm_len,
1053 flags, ops);
1054
1055 bp = __libxfs_getbuf_map(btp, map, nmaps, 0);
1056 if (!bp)
1057 return NULL;
1058
1059 bp->b_error = 0;
1060 if ((bp->b_flags & (LIBXFS_B_UPTODATE|LIBXFS_B_DIRTY))) {
1061 if (bp->b_flags & LIBXFS_B_UNCHECKED)
1062 libxfs_readbuf_verify(bp, ops);
1063 return bp;
1064 }
1065 error = libxfs_readbufr_map(btp, bp, flags);
1066 if (!error)
1067 libxfs_readbuf_verify(bp, ops);
1068
1069 #ifdef IO_DEBUGX
1070 printf("%lx: %s: read %lu bytes, error %d, blkno=%llu(%llu), %p\n",
1071 pthread_self(), __FUNCTION__, buf - (char *)bp->b_addr, error,
1072 (long long)LIBXFS_BBTOOFF64(bp->b_bn), (long long)bp->b_bn, bp);
1073 #endif
1074 return bp;
1075 }
1076
1077 static int
1078 __write_buf(int fd, void *buf, int len, off64_t offset, int flags)
1079 {
1080 int sts;
1081
1082 sts = pwrite(fd, buf, len, offset);
1083 if (sts < 0) {
1084 int error = errno;
1085 fprintf(stderr, _("%s: pwrite failed: %s\n"),
1086 progname, strerror(error));
1087 if (flags & LIBXFS_B_EXIT)
1088 exit(1);
1089 return -error;
1090 } else if (sts != len) {
1091 fprintf(stderr, _("%s: error - pwrite only %d of %d bytes\n"),
1092 progname, sts, len);
1093 if (flags & LIBXFS_B_EXIT)
1094 exit(1);
1095 return -EIO;
1096 }
1097 return 0;
1098 }
1099
1100 int
1101 libxfs_writebufr(xfs_buf_t *bp)
1102 {
1103 int fd = libxfs_device_to_fd(bp->b_target->dev);
1104
1105 /*
1106 * we never write buffers that are marked stale. This indicates they
1107 * contain data that has been invalidated, and even if the buffer is
1108 * dirty it must *never* be written. Verifiers are wonderful for finding
1109 * bugs like this. Make sure the error is obvious as to the cause.
1110 */
1111 if (bp->b_flags & LIBXFS_B_STALE) {
1112 bp->b_error = -ESTALE;
1113 return bp->b_error;
1114 }
1115
1116 /*
1117 * clear any pre-existing error status on the buffer. This can occur if
1118 * the buffer is corrupt on disk and the repair process doesn't clear
1119 * the error before fixing and writing it back.
1120 */
1121 bp->b_error = 0;
1122 if (bp->b_ops) {
1123 bp->b_ops->verify_write(bp);
1124 if (bp->b_error) {
1125 fprintf(stderr,
1126 _("%s: write verifer failed on %s bno 0x%llx/0x%x\n"),
1127 __func__, bp->b_ops->name,
1128 (long long)bp->b_bn, bp->b_bcount);
1129 return bp->b_error;
1130 }
1131 }
1132
1133 if (!(bp->b_flags & LIBXFS_B_DISCONTIG)) {
1134 bp->b_error = __write_buf(fd, bp->b_addr, bp->b_bcount,
1135 LIBXFS_BBTOOFF64(bp->b_bn), bp->b_flags);
1136 } else {
1137 int i;
1138 void *buf = bp->b_addr;
1139
1140 for (i = 0; i < bp->b_nmaps; i++) {
1141 off64_t offset = LIBXFS_BBTOOFF64(bp->b_maps[i].bm_bn);
1142 int len = BBTOB(bp->b_maps[i].bm_len);
1143
1144 bp->b_error = __write_buf(fd, buf, len, offset,
1145 bp->b_flags);
1146 if (bp->b_error)
1147 break;
1148 buf += len;
1149 }
1150 }
1151
1152 #ifdef IO_DEBUG
1153 printf("%lx: %s: wrote %u bytes, blkno=%llu(%llu), %p, error %d\n",
1154 pthread_self(), __FUNCTION__, bp->b_bcount,
1155 (long long)LIBXFS_BBTOOFF64(bp->b_bn),
1156 (long long)bp->b_bn, bp, bp->b_error);
1157 #endif
1158 if (!bp->b_error) {
1159 bp->b_flags |= LIBXFS_B_UPTODATE;
1160 bp->b_flags &= ~(LIBXFS_B_DIRTY | LIBXFS_B_EXIT |
1161 LIBXFS_B_UNCHECKED);
1162 }
1163 return bp->b_error;
1164 }
1165
1166 int
1167 libxfs_writebuf_int(xfs_buf_t *bp, int flags)
1168 {
1169 /*
1170 * Clear any error hanging over from reading the buffer. This prevents
1171 * subsequent reads after this write from seeing stale errors.
1172 */
1173 bp->b_error = 0;
1174 bp->b_flags &= ~LIBXFS_B_STALE;
1175 bp->b_flags |= (LIBXFS_B_DIRTY | flags);
1176 return 0;
1177 }
1178
1179 int
1180 libxfs_writebuf(xfs_buf_t *bp, int flags)
1181 {
1182 #ifdef IO_DEBUG
1183 printf("%lx: %s: dirty blkno=%llu(%llu)\n",
1184 pthread_self(), __FUNCTION__,
1185 (long long)LIBXFS_BBTOOFF64(bp->b_bn),
1186 (long long)bp->b_bn);
1187 #endif
1188 /*
1189 * Clear any error hanging over from reading the buffer. This prevents
1190 * subsequent reads after this write from seeing stale errors.
1191 */
1192 bp->b_error = 0;
1193 bp->b_flags &= ~LIBXFS_B_STALE;
1194 bp->b_flags |= (LIBXFS_B_DIRTY | flags);
1195 libxfs_putbuf(bp);
1196 return 0;
1197 }
1198
1199 void
1200 libxfs_iomove(xfs_buf_t *bp, uint boff, int len, void *data, int flags)
1201 {
1202 #ifdef IO_DEBUG
1203 if (boff + len > bp->b_bcount) {
1204 printf("Badness, iomove out of range!\n"
1205 "bp=(bno 0x%llx, bytes %u) range=(boff %u, bytes %u)\n",
1206 (long long)bp->b_bn, bp->b_bcount, boff, len);
1207 abort();
1208 }
1209 #endif
1210 switch (flags) {
1211 case LIBXFS_BZERO:
1212 memset(bp->b_addr + boff, 0, len);
1213 break;
1214 case LIBXFS_BREAD:
1215 memcpy(data, bp->b_addr + boff, len);
1216 break;
1217 case LIBXFS_BWRITE:
1218 memcpy(bp->b_addr + boff, data, len);
1219 break;
1220 }
1221 }
1222
1223 static void
1224 libxfs_brelse(
1225 struct cache_node *node)
1226 {
1227 struct xfs_buf *bp = (struct xfs_buf *)node;
1228
1229 if (!bp)
1230 return;
1231 if (bp->b_flags & LIBXFS_B_DIRTY)
1232 fprintf(stderr,
1233 "releasing dirty buffer to free list!");
1234
1235 pthread_mutex_lock(&xfs_buf_freelist.cm_mutex);
1236 list_add(&bp->b_node.cn_mru, &xfs_buf_freelist.cm_list);
1237 pthread_mutex_unlock(&xfs_buf_freelist.cm_mutex);
1238 }
1239
1240 static unsigned int
1241 libxfs_bulkrelse(
1242 struct cache *cache,
1243 struct list_head *list)
1244 {
1245 xfs_buf_t *bp;
1246 int count = 0;
1247
1248 if (list_empty(list))
1249 return 0 ;
1250
1251 list_for_each_entry(bp, list, b_node.cn_mru) {
1252 if (bp->b_flags & LIBXFS_B_DIRTY)
1253 fprintf(stderr,
1254 "releasing dirty buffer (bulk) to free list!");
1255 count++;
1256 }
1257
1258 pthread_mutex_lock(&xfs_buf_freelist.cm_mutex);
1259 list_splice(list, &xfs_buf_freelist.cm_list);
1260 pthread_mutex_unlock(&xfs_buf_freelist.cm_mutex);
1261
1262 return count;
1263 }
1264
1265 /*
1266 * Free everything from the xfs_buf_freelist MRU, used at final teardown
1267 */
1268 void
1269 libxfs_bcache_free(void)
1270 {
1271 struct list_head *cm_list;
1272 xfs_buf_t *bp, *next;
1273
1274 cm_list = &xfs_buf_freelist.cm_list;
1275 list_for_each_entry_safe(bp, next, cm_list, b_node.cn_mru) {
1276 free(bp->b_addr);
1277 if (bp->b_maps != &bp->__b_map)
1278 free(bp->b_maps);
1279 kmem_zone_free(xfs_buf_zone, bp);
1280 }
1281 }
1282
1283 /*
1284 * When a buffer is marked dirty, the error is cleared. Hence if we are trying
1285 * to flush a buffer prior to cache reclaim that has an error on it it means
1286 * we've already tried to flush it and it failed. Prevent repeated corruption
1287 * errors from being reported by skipping such buffers - when the corruption is
1288 * fixed the buffer will be marked dirty again and we can write it again.
1289 */
1290 static int
1291 libxfs_bflush(
1292 struct cache_node *node)
1293 {
1294 struct xfs_buf *bp = (struct xfs_buf *)node;
1295
1296 if (!bp->b_error && bp->b_flags & LIBXFS_B_DIRTY)
1297 return libxfs_writebufr(bp);
1298 return bp->b_error;
1299 }
1300
1301 void
1302 libxfs_putbufr(xfs_buf_t *bp)
1303 {
1304 if (bp->b_flags & LIBXFS_B_DIRTY)
1305 libxfs_writebufr(bp);
1306 libxfs_brelse((struct cache_node *)bp);
1307 }
1308
1309
1310 void
1311 libxfs_bcache_purge(void)
1312 {
1313 cache_purge(libxfs_bcache);
1314 }
1315
1316 void
1317 libxfs_bcache_flush(void)
1318 {
1319 cache_flush(libxfs_bcache);
1320 }
1321
1322 int
1323 libxfs_bcache_overflowed(void)
1324 {
1325 return cache_overflowed(libxfs_bcache);
1326 }
1327
1328 struct cache_operations libxfs_bcache_operations = {
1329 .hash = libxfs_bhash,
1330 .alloc = libxfs_balloc,
1331 .flush = libxfs_bflush,
1332 .relse = libxfs_brelse,
1333 .compare = libxfs_bcompare,
1334 .bulkrelse = libxfs_bulkrelse
1335 };
1336
1337
1338 /*
1339 * Inode cache stubs.
1340 */
1341
1342 kmem_zone_t *xfs_inode_zone;
1343 extern kmem_zone_t *xfs_ili_zone;
1344
1345 /*
1346 * If there are inline format data / attr forks attached to this inode,
1347 * make sure they're not corrupt.
1348 */
1349 bool
1350 libxfs_inode_verify_forks(
1351 struct xfs_inode *ip,
1352 struct xfs_ifork_ops *ops)
1353 {
1354 struct xfs_ifork *ifp;
1355 xfs_failaddr_t fa;
1356
1357 if (!ops)
1358 return true;
1359
1360 fa = xfs_ifork_verify_data(ip, ops);
1361 if (fa) {
1362 ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
1363 xfs_inode_verifier_error(ip, -EFSCORRUPTED, "data fork",
1364 ifp->if_u1.if_data, ifp->if_bytes, fa);
1365 return false;
1366 }
1367
1368 fa = xfs_ifork_verify_attr(ip, ops);
1369 if (fa) {
1370 ifp = XFS_IFORK_PTR(ip, XFS_ATTR_FORK);
1371 xfs_inode_verifier_error(ip, -EFSCORRUPTED, "attr fork",
1372 ifp ? ifp->if_u1.if_data : NULL,
1373 ifp ? ifp->if_bytes : 0, fa);
1374 return false;
1375 }
1376 return true;
1377 }
1378
1379 int
1380 libxfs_iget(
1381 struct xfs_mount *mp,
1382 struct xfs_trans *tp,
1383 xfs_ino_t ino,
1384 uint lock_flags,
1385 struct xfs_inode **ipp,
1386 struct xfs_ifork_ops *ifork_ops)
1387 {
1388 struct xfs_inode *ip;
1389 int error = 0;
1390
1391 ip = kmem_zone_zalloc(xfs_inode_zone, 0);
1392 if (!ip)
1393 return -ENOMEM;
1394
1395 ip->i_ino = ino;
1396 ip->i_mount = mp;
1397 error = xfs_iread(mp, tp, ip, 0);
1398 if (error) {
1399 kmem_zone_free(xfs_inode_zone, ip);
1400 *ipp = NULL;
1401 return error;
1402 }
1403
1404 if (!libxfs_inode_verify_forks(ip, ifork_ops)) {
1405 libxfs_irele(ip);
1406 return -EFSCORRUPTED;
1407 }
1408
1409 /*
1410 * set up the inode ops structure that the libxfs code relies on
1411 */
1412 if (XFS_ISDIR(ip))
1413 ip->d_ops = mp->m_dir_inode_ops;
1414 else
1415 ip->d_ops = mp->m_nondir_inode_ops;
1416
1417 *ipp = ip;
1418 return 0;
1419 }
1420
1421 static void
1422 libxfs_idestroy(xfs_inode_t *ip)
1423 {
1424 switch (VFS_I(ip)->i_mode & S_IFMT) {
1425 case S_IFREG:
1426 case S_IFDIR:
1427 case S_IFLNK:
1428 libxfs_idestroy_fork(ip, XFS_DATA_FORK);
1429 break;
1430 }
1431 if (ip->i_afp)
1432 libxfs_idestroy_fork(ip, XFS_ATTR_FORK);
1433 if (ip->i_cowfp)
1434 xfs_idestroy_fork(ip, XFS_COW_FORK);
1435 }
1436
1437 void
1438 libxfs_irele(
1439 struct xfs_inode *ip)
1440 {
1441 if (ip->i_itemp)
1442 kmem_zone_free(xfs_ili_zone, ip->i_itemp);
1443 ip->i_itemp = NULL;
1444 libxfs_idestroy(ip);
1445 kmem_zone_free(xfs_inode_zone, ip);
1446 }