]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - libxfs/rdwr.c
xfsprogs: make static things static
[thirdparty/xfsprogs-dev.git] / libxfs / rdwr.c
CommitLineData
959ef981 1// SPDX-License-Identifier: GPL-2.0
2bd0ea18 2/*
f1b058f9 3 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
da23017d 4 * All Rights Reserved.
2bd0ea18
NS
5 */
6
b626fb59 7
9c799827 8#include "libxfs_priv.h"
1aef52f8 9#include "init.h"
b626fb59
DC
10#include "xfs_fs.h"
11#include "xfs_shared.h"
12#include "xfs_format.h"
13#include "xfs_log_format.h"
14#include "xfs_trans_resv.h"
15#include "xfs_mount.h"
16#include "xfs_inode_buf.h"
17#include "xfs_inode_fork.h"
18#include "xfs_inode.h"
19#include "xfs_trans.h"
20
6b803e5a 21#include "libxfs.h" /* for LIBXFS_EXIT_ON_FAILURE */
2bd0ea18 22
6af7c1ea
DC
23/*
24 * Important design/architecture note:
25 *
26 * The userspace code that uses the buffer cache is much less constrained than
27 * the kernel code. The userspace code is pretty nasty in places, especially
28 * when it comes to buffer error handling. Very little of the userspace code
29 * outside libxfs clears bp->b_error - very little code even checks it - so the
30 * libxfs code is tripping on stale errors left by the userspace code.
31 *
32 * We can't clear errors or zero buffer contents in libxfs_getbuf-* like we do
33 * in the kernel, because those functions are used by the libxfs_readbuf_*
34 * functions and hence need to leave the buffers unchanged on cache hits. This
35 * is actually the only way to gather a write error from a libxfs_writebuf()
36 * call - you need to get the buffer again so you can check bp->b_error field -
37 * assuming that the buffer is still in the cache when you check, that is.
38 *
39 * This is very different to the kernel code which does not release buffers on a
40 * write so we can wait on IO and check errors. The kernel buffer cache also
41 * guarantees a buffer of a known initial state from xfs_buf_get() even on a
42 * cache hit.
43 *
44 * IOWs, userspace is behaving quite differently to the kernel and as a result
45 * it leaks errors from reads, invalidations and writes through
46 * libxfs_getbuf/libxfs_readbuf.
47 *
48 * The result of this is that until the userspace code outside libxfs is cleaned
49 * up, functions that release buffers from userspace control (i.e
50 * libxfs_writebuf/libxfs_putbuf) need to zero bp->b_error to prevent
51 * propagation of stale errors into future buffer operations.
52 */
53
5000d01d 54#define BDSTRAT_SIZE (256 * 1024)
2bd0ea18 55
2556c98b
BN
56#define IO_BCOMPARE_CHECK
57
9542ae13
DC
58/* XXX: (dgc) Propagate errors, only exit if fail-on-error flag set */
59int
75c8b434 60libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len)
2bd0ea18 61{
3cc4d0db
NS
62 xfs_off_t start_offset, end_offset, offset;
63 ssize_t zsize, bytes;
2bd0ea18 64 char *z;
3cc4d0db 65 int fd;
2bd0ea18 66
3cc4d0db 67 zsize = min(BDSTRAT_SIZE, BBTOB(len));
b74a1f6a 68 if ((z = memalign(libxfs_device_alignment(), zsize)) == NULL) {
9440d84d
NS
69 fprintf(stderr,
70 _("%s: %s can't memalign %d bytes: %s\n"),
7dfd8291 71 progname, __FUNCTION__, (int)zsize, strerror(errno));
2bd0ea18
NS
72 exit(1);
73 }
3cc4d0db
NS
74 memset(z, 0, zsize);
75
75c8b434 76 fd = libxfs_device_to_fd(btp->dev);
cb5b3ef4 77 start_offset = LIBXFS_BBTOOFF64(start);
3cc4d0db 78
dc8878f4 79 if ((lseek(fd, start_offset, SEEK_SET)) < 0) {
3cc4d0db 80 fprintf(stderr, _("%s: %s seek to offset %llu failed: %s\n"),
7dfd8291
NS
81 progname, __FUNCTION__,
82 (unsigned long long)start_offset, strerror(errno));
3cc4d0db
NS
83 exit(1);
84 }
85
cb5b3ef4 86 end_offset = LIBXFS_BBTOOFF64(start + len) - start_offset;
3cc4d0db
NS
87 for (offset = 0; offset < end_offset; ) {
88 bytes = min((ssize_t)(end_offset - offset), zsize);
89 if ((bytes = write(fd, z, bytes)) < 0) {
90 fprintf(stderr, _("%s: %s write failed: %s\n"),
9440d84d 91 progname, __FUNCTION__, strerror(errno));
2bd0ea18 92 exit(1);
3cc4d0db
NS
93 } else if (bytes == 0) {
94 fprintf(stderr, _("%s: %s not progressing?\n"),
95 progname, __FUNCTION__);
96 exit(1);
2bd0ea18 97 }
3cc4d0db 98 offset += bytes;
2bd0ea18
NS
99 }
100 free(z);
9542ae13 101 return 0;
2bd0ea18
NS
102}
103
989b74bc 104static void unmount_record(void *p)
2bd0ea18 105{
989b74bc 106 xlog_op_header_t *op = (xlog_op_header_t *)p;
5000d01d
SL
107 /* the data section must be 32 bit size aligned */
108 struct {
14f8b681
DW
109 uint16_t magic;
110 uint16_t pad1;
111 uint32_t pad2; /* may as well make it 64 bits */
5000d01d
SL
112 } magic = { XLOG_UNMOUNT_TYPE, 0, 0 };
113
989b74bc 114 memset(p, 0, BBSIZE);
ad9b88eb
BF
115 /* dummy tid to mark this as written from userspace */
116 op->oh_tid = cpu_to_be32(0xb0c0d0d0);
5e656dbb
BN
117 op->oh_len = cpu_to_be32(sizeof(magic));
118 op->oh_clientid = XFS_LOG;
119 op->oh_flags = XLOG_UNMOUNT_TRANS;
120 op->oh_res2 = 0;
989b74bc
NS
121
122 /* and the data for this op */
1552a820 123 memcpy((char *)p + sizeof(xlog_op_header_t), &magic, sizeof(magic));
989b74bc
NS
124}
125
1c12a814
BF
126static char *next(
127 char *ptr,
128 int offset,
129 void *private)
989b74bc 130{
1c12a814 131 struct xfs_buf *buf = (struct xfs_buf *)private;
989b74bc 132
1c12a814 133 if (buf &&
135e4bfe 134 (buf->b_bcount < (int)(ptr - (char *)buf->b_addr) + offset))
989b74bc 135 abort();
1c12a814 136
989b74bc
NS
137 return ptr + offset;
138}
139
1c12a814
BF
140/*
141 * Format the log. The caller provides either a buftarg which is used to access
142 * the log via buffers or a direct pointer to a buffer that encapsulates the
143 * entire log.
144 */
989b74bc
NS
145int
146libxfs_log_clear(
75c8b434 147 struct xfs_buftarg *btp,
1c12a814 148 char *dptr,
989b74bc 149 xfs_daddr_t start,
1c12a814 150 uint length, /* basic blocks */
989b74bc
NS
151 uuid_t *fs_uuid,
152 int version,
1c12a814 153 int sunit, /* bytes */
0c12ba5f 154 int fmt,
571a78a7
BF
155 int cycle,
156 bool max)
989b74bc 157{
1c12a814 158 struct xfs_buf *bp = NULL;
989b74bc 159 int len;
0c12ba5f 160 xfs_lsn_t lsn;
0337f27c
BF
161 xfs_lsn_t tail_lsn;
162 xfs_daddr_t blk;
163 xfs_daddr_t end_blk;
1c12a814 164 char *ptr;
989b74bc 165
1c12a814
BF
166 if (((btp && dptr) || (!btp && !dptr)) ||
167 (btp && !btp->dev) || !fs_uuid)
2bd0ea18 168 return -EINVAL;
5000d01d
SL
169
170 /* first zero the log */
1c12a814
BF
171 if (btp)
172 libxfs_device_zero(btp, start, length);
173 else
174 memset(dptr, 0, BBTOB(length));
5000d01d 175
0337f27c
BF
176 /*
177 * Initialize the log record length and LSNs. XLOG_INIT_CYCLE is a
178 * special reset case where we only write a single record where the lsn
179 * and tail_lsn match. Otherwise, the record lsn starts at block 0 of
180 * the specified cycle and points tail_lsn at the last record of the
181 * previous cycle.
182 */
989b74bc 183 len = ((version == 2) && sunit) ? BTOBB(sunit) : 2;
68d16907 184 len = max(len, 2);
0337f27c
BF
185 lsn = xlog_assign_lsn(cycle, 0);
186 if (cycle == XLOG_INIT_CYCLE)
187 tail_lsn = lsn;
188 else
189 tail_lsn = xlog_assign_lsn(cycle - 1, length - len);
190
191 /* write out the first log record */
1c12a814
BF
192 ptr = dptr;
193 if (btp) {
194 bp = libxfs_getbufr(btp, start, len);
04338619 195 ptr = bp->b_addr;
1c12a814
BF
196 }
197 libxfs_log_header(ptr, fs_uuid, version, sunit, fmt, lsn, tail_lsn,
198 next, bp);
199 if (bp) {
200 bp->b_flags |= LIBXFS_B_DIRTY;
201 libxfs_putbufr(bp);
202 }
0337f27c
BF
203
204 /*
205 * There's nothing else to do if this is a log reset. The kernel detects
206 * the rest of the log is zeroed and starts at cycle 1.
207 */
208 if (cycle == XLOG_INIT_CYCLE)
209 return 0;
210
571a78a7
BF
211 /*
212 * Bump the record size for a full log format if the caller allows it.
213 * This is primarily for performance reasons and most callers don't care
214 * about record size since the log is clean after we're done.
215 */
216 if (max)
217 len = BTOBB(BDSTRAT_SIZE);
218
0337f27c
BF
219 /*
220 * Otherwise, fill everything beyond the initial record with records of
221 * the previous cycle so the kernel head/tail detection works correctly.
222 *
223 * We don't particularly care about the record size or content here.
224 * It's only important that the headers are in place such that the
225 * kernel finds 1.) a clean log and 2.) the correct current cycle value.
226 * Therefore, bump up the record size to the max to use larger I/Os and
227 * improve performance.
228 */
229 cycle--;
230 blk = start + len;
1c12a814
BF
231 if (dptr)
232 dptr += BBTOB(len);
0337f27c
BF
233 end_blk = start + length;
234
571a78a7 235 len = min(end_blk - blk, len);
0337f27c
BF
236 while (blk < end_blk) {
237 lsn = xlog_assign_lsn(cycle, blk - start);
238 tail_lsn = xlog_assign_lsn(cycle, blk - start - len);
239
1c12a814
BF
240 ptr = dptr;
241 if (btp) {
242 bp = libxfs_getbufr(btp, blk, len);
04338619 243 ptr = bp->b_addr;
1c12a814 244 }
0337f27c
BF
245 /*
246 * Note: pass the full buffer length as the sunit to initialize
247 * the entire buffer.
248 */
1c12a814
BF
249 libxfs_log_header(ptr, fs_uuid, version, BBTOB(len), fmt, lsn,
250 tail_lsn, next, bp);
251 if (bp) {
252 bp->b_flags |= LIBXFS_B_DIRTY;
253 libxfs_putbufr(bp);
254 }
0337f27c 255
0337f27c 256 blk += len;
1c12a814
BF
257 if (dptr)
258 dptr += BBTOB(len);
571a78a7 259 len = min(end_blk - blk, len);
0337f27c
BF
260 }
261
989b74bc
NS
262 return 0;
263}
5000d01d 264
989b74bc
NS
265int
266libxfs_log_header(
d60ba955 267 char *caddr,
989b74bc
NS
268 uuid_t *fs_uuid,
269 int version,
270 int sunit,
271 int fmt,
0c12ba5f
BF
272 xfs_lsn_t lsn,
273 xfs_lsn_t tail_lsn,
989b74bc
NS
274 libxfs_get_block_t *nextfunc,
275 void *private)
276{
277 xlog_rec_header_t *head = (xlog_rec_header_t *)caddr;
d60ba955 278 char *p = caddr;
5e656dbb 279 __be32 cycle_lsn;
989b74bc 280 int i, len;
20fbd459 281 int hdrs = 1;
989b74bc 282
0c12ba5f
BF
283 if (lsn == NULLCOMMITLSN)
284 lsn = xlog_assign_lsn(XLOG_INIT_CYCLE, 0);
285 if (tail_lsn == NULLCOMMITLSN)
286 tail_lsn = lsn;
287
989b74bc 288 len = ((version == 2) && sunit) ? BTOBB(sunit) : 1;
5000d01d 289
989b74bc 290 memset(p, 0, BBSIZE);
5e656dbb 291 head->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM);
0c12ba5f 292 head->h_cycle = cpu_to_be32(CYCLE_LSN(lsn));
5e656dbb 293 head->h_version = cpu_to_be32(version);
167d49cb 294 head->h_crc = cpu_to_le32(0);
5e656dbb
BN
295 head->h_prev_block = cpu_to_be32(-1);
296 head->h_num_logops = cpu_to_be32(1);
5e656dbb 297 head->h_fmt = cpu_to_be32(fmt);
68d16907 298 head->h_size = cpu_to_be32(max(sunit, XLOG_BIG_RECORD_BSIZE));
5000d01d 299
0c12ba5f
BF
300 head->h_lsn = cpu_to_be64(lsn);
301 head->h_tail_lsn = cpu_to_be64(tail_lsn);
5000d01d 302
6699422d 303 memcpy(&head->h_fs_uuid, fs_uuid, sizeof(uuid_t));
73bf5988 304
ad9b88eb 305 /*
20fbd459
BF
306 * The kernel expects to see either a log record header magic value or
307 * the LSN cycle at the top of every log block. The first word of each
308 * non-header block is copied to the record headers and replaced with
309 * the cycle value (see xlog_[un]pack_data() and xlog_get_cycle() for
310 * details).
311 *
312 * Even though we only ever write an unmount record (one block), we
313 * support writing log records up to the max log buffer size of 256k to
314 * improve log format performance. This means a record can require up
315 * to 8 headers (1 rec. header + 7 ext. headers) for the packed cycle
316 * data (each header supports 32k of data).
ad9b88eb 317 */
46eca962 318 cycle_lsn = CYCLE_LSN_DISK(head->h_lsn);
20fbd459
BF
319 if (version == 2 && sunit > XLOG_HEADER_CYCLE_SIZE) {
320 hdrs = sunit / XLOG_HEADER_CYCLE_SIZE;
321 if (sunit % XLOG_HEADER_CYCLE_SIZE)
322 hdrs++;
323 }
324
325 /*
326 * A fixed number of extended headers is expected based on h_size. If
327 * required, format those now so the unmount record is located
328 * correctly.
329 *
330 * Since we only write an unmount record, we only need one h_cycle_data
331 * entry for the unmount record block. The subsequent record data
332 * blocks are zeroed, which means we can stamp them directly with the
333 * cycle and zero the rest of the cycle data in the extended headers.
334 */
335 if (hdrs > 1) {
336 for (i = 1; i < hdrs; i++) {
337 p = nextfunc(p, BBSIZE, private);
338 memset(p, 0, BBSIZE);
339 /* xlog_rec_ext_header.xh_cycle */
340 *(__be32 *)p = cycle_lsn;
341 }
342 }
343
344 /*
345 * The total length is the max of the stripe unit or 2 basic block
346 * minimum (1 hdr blk + 1 data blk). The record length is the total
347 * minus however many header blocks are required.
348 */
68d16907 349 head->h_len = cpu_to_be32(max(BBTOB(2), sunit) - hdrs * BBSIZE);
20fbd459
BF
350
351 /*
352 * Write out the unmount record, pack the first word into the record
353 * header and stamp the block with the cycle.
354 */
355 p = nextfunc(p, BBSIZE, private);
356 unmount_record(p);
357
ad9b88eb
BF
358 head->h_cycle_data[0] = *(__be32 *)p;
359 *(__be32 *)p = cycle_lsn;
360
361 /*
20fbd459
BF
362 * Finally, zero all remaining blocks in the record and stamp each with
363 * the cycle. We don't need to pack any of these blocks because the
364 * cycle data in the headers has already been zeroed.
ad9b88eb 365 */
68d16907 366 len = max(len, hdrs + 1);
20fbd459 367 for (i = hdrs + 1; i < len; i++) {
989b74bc
NS
368 p = nextfunc(p, BBSIZE, private);
369 memset(p, 0, BBSIZE);
5e656dbb 370 *(__be32 *)p = cycle_lsn;
73bf5988 371 }
5000d01d 372
989b74bc 373 return BBTOB(len);
2bd0ea18
NS
374}
375
2556c98b
BN
376/*
377 * Simple I/O (buffer cache) interface
378 */
379
380
381#ifdef XFS_BUF_TRACING
382
383#undef libxfs_readbuf
a2ceac1f 384#undef libxfs_readbuf_map
2556c98b
BN
385#undef libxfs_writebuf
386#undef libxfs_getbuf
a2ceac1f 387#undef libxfs_getbuf_map
2ae22647 388#undef libxfs_getbuf_flags
2556c98b
BN
389#undef libxfs_putbuf
390
75c8b434 391xfs_buf_t *libxfs_readbuf(struct xfs_buftarg *, xfs_daddr_t, int, int,
f756f80c 392 const struct xfs_buf_ops *);
75c8b434 393xfs_buf_t *libxfs_readbuf_map(struct xfs_buftarg *, struct xfs_buf_map *,
f756f80c 394 int, int, const struct xfs_buf_ops *);
2556c98b 395int libxfs_writebuf(xfs_buf_t *, int);
75c8b434 396xfs_buf_t *libxfs_getbuf(struct xfs_buftarg *, xfs_daddr_t, int);
7e3ab890
DC
397xfs_buf_t *libxfs_getbuf_map(struct xfs_buftarg *, struct xfs_buf_map *,
398 int, int);
75c8b434
DC
399xfs_buf_t *libxfs_getbuf_flags(struct xfs_buftarg *, xfs_daddr_t, int,
400 unsigned int);
2556c98b
BN
401void libxfs_putbuf (xfs_buf_t *);
402
a2ceac1f
DC
403#define __add_trace(bp, func, file, line) \
404do { \
405 if (bp) { \
406 (bp)->b_func = (func); \
407 (bp)->b_file = (file); \
408 (bp)->b_line = (line); \
409 } \
410} while (0)
411
2556c98b 412xfs_buf_t *
a2ceac1f 413libxfs_trace_readbuf(const char *func, const char *file, int line,
75c8b434
DC
414 struct xfs_buftarg *btp, xfs_daddr_t blkno, int len, int flags,
415 const struct xfs_buf_ops *ops)
2556c98b 416{
75c8b434 417 xfs_buf_t *bp = libxfs_readbuf(btp, blkno, len, flags, ops);
a2ceac1f
DC
418 __add_trace(bp, func, file, line);
419 return bp;
420}
2556c98b 421
a2ceac1f
DC
422xfs_buf_t *
423libxfs_trace_readbuf_map(const char *func, const char *file, int line,
75c8b434
DC
424 struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps, int flags,
425 const struct xfs_buf_ops *ops)
a2ceac1f 426{
75c8b434 427 xfs_buf_t *bp = libxfs_readbuf_map(btp, map, nmaps, flags, ops);
a2ceac1f 428 __add_trace(bp, func, file, line);
2556c98b
BN
429 return bp;
430}
431
432int
433libxfs_trace_writebuf(const char *func, const char *file, int line, xfs_buf_t *bp, int flags)
434{
a2ceac1f 435 __add_trace(bp, func, file, line);
2556c98b
BN
436 return libxfs_writebuf(bp, flags);
437}
438
439xfs_buf_t *
a2ceac1f 440libxfs_trace_getbuf(const char *func, const char *file, int line,
75c8b434 441 struct xfs_buftarg *btp, xfs_daddr_t blkno, int len)
2556c98b 442{
75c8b434 443 xfs_buf_t *bp = libxfs_getbuf(btp, blkno, len);
a2ceac1f
DC
444 __add_trace(bp, func, file, line);
445 return bp;
446}
2556c98b 447
a2ceac1f
DC
448xfs_buf_t *
449libxfs_trace_getbuf_map(const char *func, const char *file, int line,
7e3ab890
DC
450 struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps,
451 int flags)
a2ceac1f 452{
7e3ab890 453 xfs_buf_t *bp = libxfs_getbuf_map(btp, map, nmaps, flags);
a2ceac1f 454 __add_trace(bp, func, file, line);
2556c98b
BN
455 return bp;
456}
457
2ae22647
CH
458xfs_buf_t *
459libxfs_trace_getbuf_flags(const char *func, const char *file, int line,
75c8b434 460 struct xfs_buftarg *btp, xfs_daddr_t blkno, int len, unsigned int flags)
2ae22647 461{
75c8b434 462 xfs_buf_t *bp = libxfs_getbuf_flags(btp, blkno, len, flags);
a2ceac1f 463 __add_trace(bp, func, file, line);
2ae22647
CH
464 return bp;
465}
466
2556c98b
BN
467void
468libxfs_trace_putbuf(const char *func, const char *file, int line, xfs_buf_t *bp)
469{
a2ceac1f 470 __add_trace(bp, func, file, line);
2556c98b
BN
471 libxfs_putbuf(bp);
472}
473
474
475#endif
476
477
f1b058f9
NS
478xfs_buf_t *
479libxfs_getsb(xfs_mount_t *mp, int flags)
480{
75c8b434
DC
481 return libxfs_readbuf(mp->m_ddev_targp, XFS_SB_DADDR,
482 XFS_FSS_TO_BB(mp, 1), flags, &xfs_sb_buf_ops);
f1b058f9
NS
483}
484
5e656dbb 485kmem_zone_t *xfs_buf_zone;
69ec88b5
BN
486
487static struct cache_mru xfs_buf_freelist =
488 {{&xfs_buf_freelist.cm_list, &xfs_buf_freelist.cm_list},
489 0, PTHREAD_MUTEX_INITIALIZER };
f1b058f9 490
a2ceac1f
DC
491/*
492 * The bufkey is used to pass the new buffer information to the cache object
493 * allocation routine. Because discontiguous buffers need to pass different
494 * information, we need fields to pass that information. However, because the
495 * blkno and bblen is needed for the initial cache entry lookup (i.e. for
496 * bcompare) the fact that the map/nmaps is non-null to switch to discontiguous
497 * buffer initialisation instead of a contiguous buffer.
498 */
499struct xfs_bufkey {
75c8b434 500 struct xfs_buftarg *buftarg;
a2ceac1f
DC
501 xfs_daddr_t blkno;
502 unsigned int bblen;
503 struct xfs_buf_map *map;
504 int nmaps;
505};
f1b058f9 506
602dcc0e
DC
507/* 2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */
508#define GOLDEN_RATIO_PRIME 0x9e37fffffffc0001UL
509#define CACHE_LINE_SIZE 64
f1b058f9 510static unsigned int
602dcc0e 511libxfs_bhash(cache_key_t key, unsigned int hashsize, unsigned int hashshift)
f1b058f9 512{
602dcc0e
DC
513 uint64_t hashval = ((struct xfs_bufkey *)key)->blkno;
514 uint64_t tmp;
515
516 tmp = hashval ^ (GOLDEN_RATIO_PRIME + hashval) / CACHE_LINE_SIZE;
517 tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> hashshift);
518 return tmp % hashsize;
f1b058f9
NS
519}
520
521static int
522libxfs_bcompare(struct cache_node *node, cache_key_t key)
523{
a2ceac1f
DC
524 struct xfs_buf *bp = (struct xfs_buf *)node;
525 struct xfs_bufkey *bkey = (struct xfs_bufkey *)key;
f1b058f9 526
75c8b434 527 if (bp->b_target->dev == bkey->buftarg->dev &&
ba9ecd40
DC
528 bp->b_bn == bkey->blkno) {
529 if (bp->b_bcount == BBTOB(bkey->bblen))
530 return CACHE_HIT;
531#ifdef IO_BCOMPARE_CHECK
532 if (!(libxfs_bcache->c_flags & CACHE_MISCOMPARE_PURGE)) {
533 fprintf(stderr,
534 "%lx: Badness in key lookup (length)\n"
535 "bp=(bno 0x%llx, len %u bytes) key=(bno 0x%llx, len %u bytes)\n",
536 pthread_self(),
537 (unsigned long long)bp->b_bn, (int)bp->b_bcount,
538 (unsigned long long)bkey->blkno,
539 BBTOB(bkey->bblen));
540 }
f1b058f9 541#endif
ba9ecd40
DC
542 return CACHE_PURGE;
543 }
544 return CACHE_MISS;
f1b058f9
NS
545}
546
00ff2b10 547static void
f1b058f9
NS
548libxfs_bprint(xfs_buf_t *bp)
549{
ef4109d1 550 fprintf(stderr, "Buffer %p blkno=%llu bytes=%u flags=0x%x count=%u\n",
5dfa5cd2 551 bp, (unsigned long long)bp->b_bn, (unsigned)bp->b_bcount,
f1b058f9
NS
552 bp->b_flags, bp->b_node.cn_count);
553}
554
e6b359b3 555static void
75c8b434
DC
556__initbuf(xfs_buf_t *bp, struct xfs_buftarg *btp, xfs_daddr_t bno,
557 unsigned int bytes)
e6b359b3
NS
558{
559 bp->b_flags = 0;
5dfa5cd2 560 bp->b_bn = bno;
e6b359b3 561 bp->b_bcount = bytes;
a2ceac1f 562 bp->b_length = BTOBB(bytes);
75c8b434 563 bp->b_target = btp;
a6a7776a 564 bp->b_error = 0;
69ec88b5
BN
565 if (!bp->b_addr)
566 bp->b_addr = memalign(libxfs_device_alignment(), bytes);
e6b359b3
NS
567 if (!bp->b_addr) {
568 fprintf(stderr,
569 _("%s: %s can't memalign %u bytes: %s\n"),
570 progname, __FUNCTION__, bytes,
571 strerror(errno));
572 exit(1);
573 }
bf43fd28 574 memset(bp->b_addr, 0, bytes);
2556c98b
BN
575#ifdef XFS_BUF_TRACING
576 list_head_init(&bp->b_lock_list);
577#endif
578 pthread_mutex_init(&bp->b_lock, NULL);
50722af1
CH
579 bp->b_holder = 0;
580 bp->b_recur = 0;
75c8b434 581 bp->b_ops = NULL;
2c6c6328
BF
582
583 if (!bp->b_maps) {
584 bp->b_nmaps = 1;
585 bp->b_maps = &bp->__b_map;
586 bp->b_maps[0].bm_bn = bp->b_bn;
587 bp->b_maps[0].bm_len = bp->b_length;
588 }
e6b359b3
NS
589}
590
a2ceac1f 591static void
75c8b434
DC
592libxfs_initbuf(xfs_buf_t *bp, struct xfs_buftarg *btp, xfs_daddr_t bno,
593 unsigned int bytes)
a2ceac1f 594{
75c8b434 595 __initbuf(bp, btp, bno, bytes);
a2ceac1f
DC
596}
597
598static void
75c8b434
DC
599libxfs_initbuf_map(xfs_buf_t *bp, struct xfs_buftarg *btp,
600 struct xfs_buf_map *map, int nmaps)
a2ceac1f
DC
601{
602 unsigned int bytes = 0;
603 int i;
604
605 bytes = sizeof(struct xfs_buf_map) * nmaps;
85428dd2
DC
606 bp->b_maps = malloc(bytes);
607 if (!bp->b_maps) {
a2ceac1f
DC
608 fprintf(stderr,
609 _("%s: %s can't malloc %u bytes: %s\n"),
610 progname, __FUNCTION__, bytes,
611 strerror(errno));
612 exit(1);
613 }
614 bp->b_nmaps = nmaps;
615
616 bytes = 0;
617 for ( i = 0; i < nmaps; i++) {
85428dd2
DC
618 bp->b_maps[i].bm_bn = map[i].bm_bn;
619 bp->b_maps[i].bm_len = map[i].bm_len;
a2ceac1f
DC
620 bytes += BBTOB(map[i].bm_len);
621 }
622
75c8b434 623 __initbuf(bp, btp, map[0].bm_bn, bytes);
a2ceac1f
DC
624 bp->b_flags |= LIBXFS_B_DISCONTIG;
625}
626
00ff2b10 627static xfs_buf_t *
a2ceac1f 628__libxfs_getbufr(int blen)
e6b359b3
NS
629{
630 xfs_buf_t *bp;
69ec88b5
BN
631
632 /*
633 * first look for a buffer that can be used as-is,
634 * if one cannot be found, see if there is a buffer,
ff1f79a7 635 * and if so, free its buffer and set b_addr to NULL
69ec88b5
BN
636 * before calling libxfs_initbuf.
637 */
638 pthread_mutex_lock(&xfs_buf_freelist.cm_mutex);
639 if (!list_empty(&xfs_buf_freelist.cm_list)) {
640 list_for_each_entry(bp, &xfs_buf_freelist.cm_list, b_node.cn_mru) {
641 if (bp->b_bcount == blen) {
642 list_del_init(&bp->b_node.cn_mru);
643 break;
644 }
645 }
646 if (&bp->b_node.cn_mru == &xfs_buf_freelist.cm_list) {
647 bp = list_entry(xfs_buf_freelist.cm_list.next,
648 xfs_buf_t, b_node.cn_mru);
649 list_del_init(&bp->b_node.cn_mru);
650 free(bp->b_addr);
651 bp->b_addr = NULL;
2c6c6328
BF
652 if (bp->b_maps != &bp->__b_map)
653 free(bp->b_maps);
85428dd2 654 bp->b_maps = NULL;
69ec88b5
BN
655 }
656 } else
5e656dbb 657 bp = kmem_zone_zalloc(xfs_buf_zone, 0);
69ec88b5 658 pthread_mutex_unlock(&xfs_buf_freelist.cm_mutex);
e0607266 659 bp->b_ops = NULL;
0a7942b3
DC
660 if (bp->b_flags & LIBXFS_B_DIRTY)
661 fprintf(stderr, "found dirty buffer (bulk) on free list!");
e6b359b3 662
a2ceac1f
DC
663 return bp;
664}
665
666xfs_buf_t *
75c8b434 667libxfs_getbufr(struct xfs_buftarg *btp, xfs_daddr_t blkno, int bblen)
a2ceac1f
DC
668{
669 xfs_buf_t *bp;
670 int blen = BBTOB(bblen);
671
672 bp =__libxfs_getbufr(blen);
673 if (bp)
75c8b434 674 libxfs_initbuf(bp, btp, blkno, blen);
2556c98b 675#ifdef IO_DEBUG
a2ceac1f 676 printf("%lx: %s: allocated %u bytes buffer, key=0x%llx(0x%llx), %p\n",
f63fd268 677 pthread_self(), __FUNCTION__, blen,
2556c98b
BN
678 (long long)LIBXFS_BBTOOFF64(blkno), (long long)blkno, bp);
679#endif
69ec88b5 680
e6b359b3
NS
681 return bp;
682}
683
00ff2b10 684static xfs_buf_t *
75c8b434 685libxfs_getbufr_map(struct xfs_buftarg *btp, xfs_daddr_t blkno, int bblen,
a2ceac1f
DC
686 struct xfs_buf_map *map, int nmaps)
687{
688 xfs_buf_t *bp;
689 int blen = BBTOB(bblen);
690
691 if (!map || !nmaps) {
692 fprintf(stderr,
693 _("%s: %s invalid map %p or nmaps %d\n"),
694 progname, __FUNCTION__, map, nmaps);
695 exit(1);
696 }
697
698 if (blkno != map[0].bm_bn) {
699 fprintf(stderr,
b47c8cae
NS
700 _("%s: %s map blkno 0x%llx doesn't match key 0x%llx\n"),
701 progname, __FUNCTION__, (long long)map[0].bm_bn,
702 (long long)blkno);
a2ceac1f
DC
703 exit(1);
704 }
705
706 bp =__libxfs_getbufr(blen);
707 if (bp)
75c8b434 708 libxfs_initbuf_map(bp, btp, map, nmaps);
a2ceac1f
DC
709#ifdef IO_DEBUG
710 printf("%lx: %s: allocated %u bytes buffer, key=0x%llx(0x%llx), %p\n",
711 pthread_self(), __FUNCTION__, blen,
712 (long long)LIBXFS_BBTOOFF64(blkno), (long long)blkno, bp);
713#endif
714
715 return bp;
716}
2556c98b
BN
717
718#ifdef XFS_BUF_TRACING
719struct list_head lock_buf_list = {&lock_buf_list, &lock_buf_list};
720int lock_buf_count = 0;
721#endif
e6b359b3 722
d0572de5
BN
723extern int use_xfs_buf_lock;
724
a2ceac1f
DC
725static struct xfs_buf *
726__cache_lookup(struct xfs_bufkey *key, unsigned int flags)
2bd0ea18 727{
a2ceac1f 728 struct xfs_buf *bp;
2556c98b 729
a2ceac1f 730 cache_node_get(libxfs_bcache, key, (struct cache_node **)&bp);
2ae22647
CH
731 if (!bp)
732 return NULL;
733
734 if (use_xfs_buf_lock) {
50722af1
CH
735 int ret;
736
737 ret = pthread_mutex_trylock(&bp->b_lock);
738 if (ret) {
739 ASSERT(ret == EAGAIN);
740 if (flags & LIBXFS_GETBUF_TRYLOCK)
741 goto out_put;
742
743 if (pthread_equal(bp->b_holder, pthread_self())) {
744 fprintf(stderr,
745 _("Warning: recursive buffer locking at block %" PRIu64 " detected\n"),
a2ceac1f 746 key->blkno);
50722af1
CH
747 bp->b_recur++;
748 return bp;
749 } else {
750 pthread_mutex_lock(&bp->b_lock);
2ae22647 751 }
2ae22647 752 }
50722af1
CH
753
754 bp->b_holder = pthread_self();
2ae22647
CH
755 }
756
757 cache_node_set_priority(libxfs_bcache, (struct cache_node *)bp,
758 cache_node_get_priority((struct cache_node *)bp) -
a040d7c9 759 CACHE_PREFETCH_PRIORITY);
2556c98b 760#ifdef XFS_BUF_TRACING
2ae22647
CH
761 pthread_mutex_lock(&libxfs_bcache->c_mutex);
762 lock_buf_count++;
763 list_add(&bp->b_lock_list, &lock_buf_list);
764 pthread_mutex_unlock(&libxfs_bcache->c_mutex);
2556c98b 765#endif
2bd0ea18 766#ifdef IO_DEBUG
a2ceac1f
DC
767 printf("%lx %s: hit buffer %p for bno = 0x%llx/0x%llx\n",
768 pthread_self(), __FUNCTION__,
769 bp, bp->b_bn, (long long)LIBXFS_BBTOOFF64(key->blkno));
2bd0ea18 770#endif
2556c98b 771
f1b058f9 772 return bp;
50722af1
CH
773out_put:
774 cache_node_put(libxfs_bcache, (struct cache_node *)bp);
775 return NULL;
f1b058f9
NS
776}
777
a2ceac1f 778struct xfs_buf *
75c8b434
DC
779libxfs_getbuf_flags(struct xfs_buftarg *btp, xfs_daddr_t blkno, int len,
780 unsigned int flags)
a2ceac1f 781{
3dd2705a 782 struct xfs_bufkey key = {NULL};
a2ceac1f 783
75c8b434 784 key.buftarg = btp;
a2ceac1f
DC
785 key.blkno = blkno;
786 key.bblen = len;
787
788 return __cache_lookup(&key, flags);
789}
790
e8ecd760
DW
791/*
792 * Clean the buffer flags for libxfs_getbuf*(), which wants to return
793 * an unused buffer with clean state. This prevents CRC errors on a
794 * re-read of a corrupt block that was prefetched and freed. This
795 * can happen with a massively corrupt directory that is discarded,
796 * but whose blocks are then recycled into expanding lost+found.
797 *
798 * Note however that if the buffer's dirty (prefetch calls getbuf)
799 * we'll leave the state alone because we don't want to discard blocks
800 * that have been fixed.
801 */
802static void
803reset_buf_state(
804 struct xfs_buf *bp)
805{
806 if (bp && !(bp->b_flags & LIBXFS_B_DIRTY))
807 bp->b_flags &= ~(LIBXFS_B_UNCHECKED | LIBXFS_B_STALE |
808 LIBXFS_B_UPTODATE);
809}
810
2ae22647 811struct xfs_buf *
75c8b434 812libxfs_getbuf(struct xfs_buftarg *btp, xfs_daddr_t blkno, int len)
2ae22647 813{
e8ecd760
DW
814 struct xfs_buf *bp;
815
816 bp = libxfs_getbuf_flags(btp, blkno, len, 0);
817 reset_buf_state(bp);
818 return bp;
2ae22647
CH
819}
820
e8ecd760
DW
821static struct xfs_buf *
822__libxfs_getbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map,
823 int nmaps, int flags)
a2ceac1f 824{
3dd2705a 825 struct xfs_bufkey key = {NULL};
a2ceac1f
DC
826 int i;
827
f388124d
DC
828 if (nmaps == 1)
829 return libxfs_getbuf_flags(btp, map[0].bm_bn, map[0].bm_len,
830 flags);
831
75c8b434 832 key.buftarg = btp;
a2ceac1f
DC
833 key.blkno = map[0].bm_bn;
834 for (i = 0; i < nmaps; i++) {
835 key.bblen += map[i].bm_len;
836 }
837 key.map = map;
838 key.nmaps = nmaps;
839
7e3ab890 840 return __cache_lookup(&key, flags);
a2ceac1f
DC
841}
842
e8ecd760
DW
843struct xfs_buf *
844libxfs_getbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map,
845 int nmaps, int flags)
846{
847 struct xfs_buf *bp;
848
849 bp = __libxfs_getbuf_map(btp, map, nmaps, flags);
850 reset_buf_state(bp);
851 return bp;
852}
853
f1b058f9
NS
854void
855libxfs_putbuf(xfs_buf_t *bp)
856{
cee99cfa
DC
857 /*
858 * ensure that any errors on this use of the buffer don't carry
859 * over to the next user.
860 */
861 bp->b_error = 0;
862
2556c98b
BN
863#ifdef XFS_BUF_TRACING
864 pthread_mutex_lock(&libxfs_bcache->c_mutex);
865 lock_buf_count--;
866 ASSERT(lock_buf_count >= 0);
867 list_del_init(&bp->b_lock_list);
868 pthread_mutex_unlock(&libxfs_bcache->c_mutex);
869#endif
50722af1
CH
870 if (use_xfs_buf_lock) {
871 if (bp->b_recur) {
872 bp->b_recur--;
873 } else {
874 bp->b_holder = 0;
875 pthread_mutex_unlock(&bp->b_lock);
876 }
877 }
6af7c1ea 878
a040d7c9 879 cache_node_put(libxfs_bcache, (struct cache_node *)bp);
f1b058f9
NS
880}
881
882void
883libxfs_purgebuf(xfs_buf_t *bp)
884{
3dd2705a 885 struct xfs_bufkey key = {NULL};
f1b058f9 886
75c8b434 887 key.buftarg = bp->b_target;
5dfa5cd2 888 key.blkno = bp->b_bn;
75c8b434 889 key.bblen = bp->b_length;
f1b058f9
NS
890
891 cache_node_purge(libxfs_bcache, &key, (struct cache_node *)bp);
892}
2bd0ea18 893
f1b058f9 894static struct cache_node *
2556c98b 895libxfs_balloc(cache_key_t key)
f1b058f9 896{
a2ceac1f 897 struct xfs_bufkey *bufkey = (struct xfs_bufkey *)key;
2556c98b 898
a2ceac1f
DC
899 if (bufkey->map)
900 return (struct cache_node *)
75c8b434 901 libxfs_getbufr_map(bufkey->buftarg,
a2ceac1f
DC
902 bufkey->blkno, bufkey->bblen,
903 bufkey->map, bufkey->nmaps);
75c8b434 904 return (struct cache_node *)libxfs_getbufr(bufkey->buftarg,
a2ceac1f 905 bufkey->blkno, bufkey->bblen);
2bd0ea18
NS
906}
907
a2ceac1f
DC
908
909static int
910__read_buf(int fd, void *buf, int len, off64_t offset, int flags)
2bd0ea18 911{
bcea58c7 912 int sts;
2bd0ea18 913
2f9a125c 914 sts = pread(fd, buf, len, offset);
bcea58c7 915 if (sts < 0) {
11202ec2 916 int error = errno;
9440d84d 917 fprintf(stderr, _("%s: read failed: %s\n"),
c3928e39 918 progname, strerror(error));
9440d84d 919 if (flags & LIBXFS_EXIT_ON_FAILURE)
2bd0ea18 920 exit(1);
11202ec2 921 return -error;
a2ceac1f 922 } else if (sts != len) {
bcea58c7 923 fprintf(stderr, _("%s: error - read only %d of %d bytes\n"),
a2ceac1f 924 progname, sts, len);
bcea58c7
CH
925 if (flags & LIBXFS_EXIT_ON_FAILURE)
926 exit(1);
12b53197 927 return -EIO;
2bd0ea18 928 }
a2ceac1f
DC
929 return 0;
930}
931
932int
75c8b434
DC
933libxfs_readbufr(struct xfs_buftarg *btp, xfs_daddr_t blkno, xfs_buf_t *bp,
934 int len, int flags)
a2ceac1f 935{
75c8b434 936 int fd = libxfs_device_to_fd(btp->dev);
a2ceac1f
DC
937 int bytes = BBTOB(len);
938 int error;
939
940 ASSERT(BBTOB(len) <= bp->b_bcount);
941
942 error = __read_buf(fd, bp->b_addr, bytes, LIBXFS_BBTOOFF64(blkno), flags);
943 if (!error &&
75c8b434 944 bp->b_target->dev == btp->dev &&
5dfa5cd2 945 bp->b_bn == blkno &&
f1b058f9
NS
946 bp->b_bcount == bytes)
947 bp->b_flags |= LIBXFS_B_UPTODATE;
a2ceac1f
DC
948#ifdef IO_DEBUG
949 printf("%lx: %s: read %u bytes, error %d, blkno=0x%llx(0x%llx), %p\n",
950 pthread_self(), __FUNCTION__, bytes, error,
951 (long long)LIBXFS_BBTOOFF64(blkno), (long long)blkno, bp);
952#endif
953 return error;
2bd0ea18
NS
954}
955
adbb3573
DC
956void
957libxfs_readbuf_verify(struct xfs_buf *bp, const struct xfs_buf_ops *ops)
958{
959 if (!ops)
960 return;
961 bp->b_ops = ops;
962 bp->b_ops->verify_read(bp);
963 bp->b_flags &= ~LIBXFS_B_UNCHECKED;
964}
965
966
2bd0ea18 967xfs_buf_t *
75c8b434
DC
968libxfs_readbuf(struct xfs_buftarg *btp, xfs_daddr_t blkno, int len, int flags,
969 const struct xfs_buf_ops *ops)
2bd0ea18 970{
f1b058f9 971 xfs_buf_t *bp;
2bd0ea18
NS
972 int error;
973
e8ecd760 974 bp = libxfs_getbuf_flags(btp, blkno, len, 0);
75c8b434
DC
975 if (!bp)
976 return NULL;
adbb3573
DC
977
978 /*
979 * if the buffer was prefetched, it is likely that it was not validated.
980 * Hence if we are supplied an ops function and the buffer is marked as
981 * unchecked, we need to validate it now.
982 *
983 * We do this verification even if the buffer is dirty - the
984 * verification is almost certainly going to fail the CRC check in this
985 * case as a dirty buffer has not had the CRC recalculated. However, we
986 * should not be dirtying unchecked buffers and therefore failing it
987 * here because it's dirty and unchecked indicates we've screwed up
988 * somewhere else.
989 */
990 bp->b_error = 0;
991 if ((bp->b_flags & (LIBXFS_B_UPTODATE|LIBXFS_B_DIRTY))) {
992 if (bp->b_flags & LIBXFS_B_UNCHECKED)
993 libxfs_readbuf_verify(bp, ops);
75c8b434 994 return bp;
adbb3573 995 }
75c8b434
DC
996
997 /*
adbb3573
DC
998 * Set the ops on a cache miss (i.e. first physical read) as the
999 * verifier may change the ops to match the type of buffer it contains.
75c8b434
DC
1000 * A cache hit might reset the verifier to the original type if we set
1001 * it again, but it won't get called again and set to match the buffer
1002 * contents. *cough* xfs_da_node_buf_ops *cough*.
1003 */
75c8b434
DC
1004 error = libxfs_readbufr(btp, blkno, bp, len, flags);
1005 if (error)
1006 bp->b_error = error;
adbb3573
DC
1007 else
1008 libxfs_readbuf_verify(bp, ops);
f1b058f9 1009 return bp;
2bd0ea18
NS
1010}
1011
800db1c1 1012int
6d5e5ee0 1013libxfs_readbufr_map(struct xfs_buftarg *btp, struct xfs_buf *bp, int flags)
a2ceac1f 1014{
d0bbcbcb 1015 int fd;
800db1c1 1016 int error = 0;
04338619 1017 void *buf;
800db1c1 1018 int i;
75c8b434 1019
75c8b434 1020 fd = libxfs_device_to_fd(btp->dev);
a2ceac1f
DC
1021 buf = bp->b_addr;
1022 for (i = 0; i < bp->b_nmaps; i++) {
85428dd2
DC
1023 off64_t offset = LIBXFS_BBTOOFF64(bp->b_maps[i].bm_bn);
1024 int len = BBTOB(bp->b_maps[i].bm_len);
a2ceac1f 1025
a2ceac1f
DC
1026 error = __read_buf(fd, buf, len, offset, flags);
1027 if (error) {
1028 bp->b_error = error;
1029 break;
1030 }
1031 buf += len;
a2ceac1f
DC
1032 }
1033
64eb960f 1034 if (!error)
800db1c1
DC
1035 bp->b_flags |= LIBXFS_B_UPTODATE;
1036#ifdef IO_DEBUG
15028317
DW
1037 printf("%lx: %s: read %lu bytes, error %d, blkno=%llu(%llu), %p\n",
1038 pthread_self(), __FUNCTION__, buf - (char *)bp->b_addr, error,
1039 (long long)LIBXFS_BBTOOFF64(bp->b_bn), (long long)bp->b_bn, bp);
800db1c1
DC
1040#endif
1041 return error;
1042}
1043
1044struct xfs_buf *
1045libxfs_readbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps,
1046 int flags, const struct xfs_buf_ops *ops)
1047{
1048 struct xfs_buf *bp;
1049 int error = 0;
1050
1051 if (nmaps == 1)
1052 return libxfs_readbuf(btp, map[0].bm_bn, map[0].bm_len,
1053 flags, ops);
1054
e8ecd760 1055 bp = __libxfs_getbuf_map(btp, map, nmaps, 0);
800db1c1
DC
1056 if (!bp)
1057 return NULL;
1058
1059 bp->b_error = 0;
adbb3573
DC
1060 if ((bp->b_flags & (LIBXFS_B_UPTODATE|LIBXFS_B_DIRTY))) {
1061 if (bp->b_flags & LIBXFS_B_UNCHECKED)
1062 libxfs_readbuf_verify(bp, ops);
800db1c1 1063 return bp;
75c8b434 1064 }
adbb3573
DC
1065 error = libxfs_readbufr_map(btp, bp, flags);
1066 if (!error)
1067 libxfs_readbuf_verify(bp, ops);
1068
15028317 1069#ifdef IO_DEBUGX
a2ceac1f
DC
1070 printf("%lx: %s: read %lu bytes, error %d, blkno=%llu(%llu), %p\n",
1071 pthread_self(), __FUNCTION__, buf - (char *)bp->b_addr, error,
5dfa5cd2 1072 (long long)LIBXFS_BBTOOFF64(bp->b_bn), (long long)bp->b_bn, bp);
a2ceac1f
DC
1073#endif
1074 return bp;
1075}
1076
1077static int
1078__write_buf(int fd, void *buf, int len, off64_t offset, int flags)
2bd0ea18
NS
1079{
1080 int sts;
2bd0ea18 1081
2f9a125c 1082 sts = pwrite(fd, buf, len, offset);
2bd0ea18 1083 if (sts < 0) {
11202ec2 1084 int error = errno;
2f9a125c 1085 fprintf(stderr, _("%s: pwrite failed: %s\n"),
c3928e39 1086 progname, strerror(error));
a2ceac1f 1087 if (flags & LIBXFS_B_EXIT)
2bd0ea18 1088 exit(1);
11202ec2 1089 return -error;
a2ceac1f 1090 } else if (sts != len) {
2f9a125c 1091 fprintf(stderr, _("%s: error - pwrite only %d of %d bytes\n"),
a2ceac1f
DC
1092 progname, sts, len);
1093 if (flags & LIBXFS_B_EXIT)
2bd0ea18 1094 exit(1);
12b53197 1095 return -EIO;
2bd0ea18 1096 }
a2ceac1f
DC
1097 return 0;
1098}
1099
1100int
1101libxfs_writebufr(xfs_buf_t *bp)
1102{
75c8b434 1103 int fd = libxfs_device_to_fd(bp->b_target->dev);
a2ceac1f 1104
75c8b434
DC
1105 /*
1106 * we never write buffers that are marked stale. This indicates they
1107 * contain data that has been invalidated, and even if the buffer is
1108 * dirty it must *never* be written. Verifiers are wonderful for finding
1109 * bugs like this. Make sure the error is obvious as to the cause.
1110 */
1111 if (bp->b_flags & LIBXFS_B_STALE) {
12b53197 1112 bp->b_error = -ESTALE;
75c8b434
DC
1113 return bp->b_error;
1114 }
1115
1116 /*
1117 * clear any pre-existing error status on the buffer. This can occur if
1118 * the buffer is corrupt on disk and the repair process doesn't clear
1119 * the error before fixing and writing it back.
1120 */
1121 bp->b_error = 0;
1122 if (bp->b_ops) {
1123 bp->b_ops->verify_write(bp);
1124 if (bp->b_error) {
1125 fprintf(stderr,
a3fac935
ES
1126 _("%s: write verifer failed on %s bno 0x%llx/0x%x\n"),
1127 __func__, bp->b_ops->name,
1128 (long long)bp->b_bn, bp->b_bcount);
75c8b434
DC
1129 return bp->b_error;
1130 }
1131 }
1132
a2ceac1f 1133 if (!(bp->b_flags & LIBXFS_B_DISCONTIG)) {
e8f1e8aa 1134 bp->b_error = __write_buf(fd, bp->b_addr, bp->b_bcount,
5dfa5cd2 1135 LIBXFS_BBTOOFF64(bp->b_bn), bp->b_flags);
a2ceac1f
DC
1136 } else {
1137 int i;
04338619 1138 void *buf = bp->b_addr;
a2ceac1f
DC
1139
1140 for (i = 0; i < bp->b_nmaps; i++) {
85428dd2
DC
1141 off64_t offset = LIBXFS_BBTOOFF64(bp->b_maps[i].bm_bn);
1142 int len = BBTOB(bp->b_maps[i].bm_len);
a2ceac1f 1143
e8f1e8aa
DC
1144 bp->b_error = __write_buf(fd, buf, len, offset,
1145 bp->b_flags);
1146 if (bp->b_error)
a2ceac1f 1147 break;
a2ceac1f 1148 buf += len;
a2ceac1f
DC
1149 }
1150 }
1151
f1b058f9 1152#ifdef IO_DEBUG
d085fb48 1153 printf("%lx: %s: wrote %u bytes, blkno=%llu(%llu), %p, error %d\n",
2556c98b 1154 pthread_self(), __FUNCTION__, bp->b_bcount,
5dfa5cd2 1155 (long long)LIBXFS_BBTOOFF64(bp->b_bn),
e8f1e8aa 1156 (long long)bp->b_bn, bp, bp->b_error);
f1b058f9 1157#endif
e8f1e8aa 1158 if (!bp->b_error) {
a2ceac1f 1159 bp->b_flags |= LIBXFS_B_UPTODATE;
adbb3573
DC
1160 bp->b_flags &= ~(LIBXFS_B_DIRTY | LIBXFS_B_EXIT |
1161 LIBXFS_B_UNCHECKED);
a2ceac1f 1162 }
e8f1e8aa 1163 return bp->b_error;
2bd0ea18
NS
1164}
1165
1166int
f1b058f9 1167libxfs_writebuf_int(xfs_buf_t *bp, int flags)
2bd0ea18 1168{
203d38cc
DC
1169 /*
1170 * Clear any error hanging over from reading the buffer. This prevents
1171 * subsequent reads after this write from seeing stale errors.
1172 */
1173 bp->b_error = 0;
6af7c1ea 1174 bp->b_flags &= ~LIBXFS_B_STALE;
f1b058f9
NS
1175 bp->b_flags |= (LIBXFS_B_DIRTY | flags);
1176 return 0;
1177}
1178
1179int
1180libxfs_writebuf(xfs_buf_t *bp, int flags)
1181{
e0607266
DC
1182#ifdef IO_DEBUG
1183 printf("%lx: %s: dirty blkno=%llu(%llu)\n",
1184 pthread_self(), __FUNCTION__,
1185 (long long)LIBXFS_BBTOOFF64(bp->b_bn),
1186 (long long)bp->b_bn);
1187#endif
203d38cc
DC
1188 /*
1189 * Clear any error hanging over from reading the buffer. This prevents
1190 * subsequent reads after this write from seeing stale errors.
1191 */
1192 bp->b_error = 0;
6af7c1ea 1193 bp->b_flags &= ~LIBXFS_B_STALE;
f1b058f9
NS
1194 bp->b_flags |= (LIBXFS_B_DIRTY | flags);
1195 libxfs_putbuf(bp);
1196 return 0;
2bd0ea18
NS
1197}
1198
57c9fccb 1199void
f1b058f9 1200libxfs_iomove(xfs_buf_t *bp, uint boff, int len, void *data, int flags)
57c9fccb 1201{
f1b058f9
NS
1202#ifdef IO_DEBUG
1203 if (boff + len > bp->b_bcount) {
2556c98b 1204 printf("Badness, iomove out of range!\n"
a2ceac1f 1205 "bp=(bno 0x%llx, bytes %u) range=(boff %u, bytes %u)\n",
5dfa5cd2 1206 (long long)bp->b_bn, bp->b_bcount, boff, len);
57c9fccb 1207 abort();
f1b058f9
NS
1208 }
1209#endif
57c9fccb
NS
1210 switch (flags) {
1211 case LIBXFS_BZERO:
f1b058f9 1212 memset(bp->b_addr + boff, 0, len);
57c9fccb
NS
1213 break;
1214 case LIBXFS_BREAD:
f1b058f9 1215 memcpy(data, bp->b_addr + boff, len);
57c9fccb
NS
1216 break;
1217 case LIBXFS_BWRITE:
f1b058f9 1218 memcpy(bp->b_addr + boff, data, len);
57c9fccb
NS
1219 break;
1220 }
1221}
1222
33165ec3 1223static void
0a7942b3
DC
1224libxfs_brelse(
1225 struct cache_node *node)
33165ec3 1226{
0a7942b3 1227 struct xfs_buf *bp = (struct xfs_buf *)node;
33165ec3 1228
0a7942b3
DC
1229 if (!bp)
1230 return;
1231 if (bp->b_flags & LIBXFS_B_DIRTY)
1232 fprintf(stderr,
1233 "releasing dirty buffer to free list!");
1234
1235 pthread_mutex_lock(&xfs_buf_freelist.cm_mutex);
1236 list_add(&bp->b_node.cn_mru, &xfs_buf_freelist.cm_list);
1237 pthread_mutex_unlock(&xfs_buf_freelist.cm_mutex);
33165ec3
BN
1238}
1239
e08f5594 1240static unsigned int
69ec88b5 1241libxfs_bulkrelse(
0a7942b3
DC
1242 struct cache *cache,
1243 struct list_head *list)
2556c98b 1244{
69ec88b5 1245 xfs_buf_t *bp;
e08f5594 1246 int count = 0;
2556c98b 1247
69ec88b5 1248 if (list_empty(list))
e08f5594 1249 return 0 ;
69ec88b5
BN
1250
1251 list_for_each_entry(bp, list, b_node.cn_mru) {
2556c98b 1252 if (bp->b_flags & LIBXFS_B_DIRTY)
0a7942b3
DC
1253 fprintf(stderr,
1254 "releasing dirty buffer (bulk) to free list!");
e08f5594 1255 count++;
2556c98b 1256 }
69ec88b5
BN
1257
1258 pthread_mutex_lock(&xfs_buf_freelist.cm_mutex);
0b90dda6 1259 list_splice(list, &xfs_buf_freelist.cm_list);
69ec88b5 1260 pthread_mutex_unlock(&xfs_buf_freelist.cm_mutex);
e08f5594
BN
1261
1262 return count;
69ec88b5
BN
1263}
1264
864028ed
ES
1265/*
1266 * Free everything from the xfs_buf_freelist MRU, used at final teardown
1267 */
1268void
1269libxfs_bcache_free(void)
1270{
1271 struct list_head *cm_list;
1272 xfs_buf_t *bp, *next;
1273
1274 cm_list = &xfs_buf_freelist.cm_list;
1275 list_for_each_entry_safe(bp, next, cm_list, b_node.cn_mru) {
1276 free(bp->b_addr);
1277 if (bp->b_maps != &bp->__b_map)
1278 free(bp->b_maps);
1279 kmem_zone_free(xfs_buf_zone, bp);
1280 }
1281}
1282
e8f1e8aa
DC
1283/*
1284 * When a buffer is marked dirty, the error is cleared. Hence if we are trying
1285 * to flush a buffer prior to cache reclaim that has an error on it it means
1286 * we've already tried to flush it and it failed. Prevent repeated corruption
1287 * errors from being reported by skipping such buffers - when the corruption is
1288 * fixed the buffer will be marked dirty again and we can write it again.
1289 */
0a7942b3
DC
1290static int
1291libxfs_bflush(
1292 struct cache_node *node)
69ec88b5 1293{
0a7942b3 1294 struct xfs_buf *bp = (struct xfs_buf *)node;
69ec88b5 1295
e8f1e8aa 1296 if (!bp->b_error && bp->b_flags & LIBXFS_B_DIRTY)
0a7942b3 1297 return libxfs_writebufr(bp);
e8f1e8aa 1298 return bp->b_error;
2556c98b
BN
1299}
1300
1301void
1302libxfs_putbufr(xfs_buf_t *bp)
1303{
0a7942b3
DC
1304 if (bp->b_flags & LIBXFS_B_DIRTY)
1305 libxfs_writebufr(bp);
2556c98b
BN
1306 libxfs_brelse((struct cache_node *)bp);
1307}
1308
1309
f1b058f9
NS
1310void
1311libxfs_bcache_purge(void)
1312{
1313 cache_purge(libxfs_bcache);
1314}
1315
e8cb94ee 1316void
33165ec3
BN
1317libxfs_bcache_flush(void)
1318{
1319 cache_flush(libxfs_bcache);
1320}
1321
2556c98b
BN
1322int
1323libxfs_bcache_overflowed(void)
1324{
1325 return cache_overflowed(libxfs_bcache);
1326}
1327
f1b058f9 1328struct cache_operations libxfs_bcache_operations = {
bd9cc49a
ES
1329 .hash = libxfs_bhash,
1330 .alloc = libxfs_balloc,
1331 .flush = libxfs_bflush,
1332 .relse = libxfs_brelse,
1333 .compare = libxfs_bcompare,
1334 .bulkrelse = libxfs_bulkrelse
f1b058f9
NS
1335};
1336
2bd0ea18 1337
f1b058f9 1338/*
3a19fb7d 1339 * Inode cache stubs.
f1b058f9
NS
1340 */
1341
bf0e024f 1342kmem_zone_t *xfs_inode_zone;
5e656dbb 1343extern kmem_zone_t *xfs_ili_zone;
f1b058f9 1344
20e882d4
DW
1345/*
1346 * If there are inline format data / attr forks attached to this inode,
1347 * make sure they're not corrupt.
1348 */
1349bool
1350libxfs_inode_verify_forks(
12ac6e04
DW
1351 struct xfs_inode *ip,
1352 struct xfs_ifork_ops *ops)
20e882d4 1353{
1d3bac1f 1354 struct xfs_ifork *ifp;
20e882d4
DW
1355 xfs_failaddr_t fa;
1356
12ac6e04
DW
1357 if (!ops)
1358 return true;
1359
1360 fa = xfs_ifork_verify_data(ip, ops);
20e882d4 1361 if (fa) {
1d3bac1f
DW
1362 ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
1363 xfs_inode_verifier_error(ip, -EFSCORRUPTED, "data fork",
1364 ifp->if_u1.if_data, ifp->if_bytes, fa);
20e882d4
DW
1365 return false;
1366 }
1367
12ac6e04 1368 fa = xfs_ifork_verify_attr(ip, ops);
20e882d4 1369 if (fa) {
1d3bac1f
DW
1370 ifp = XFS_IFORK_PTR(ip, XFS_ATTR_FORK);
1371 xfs_inode_verifier_error(ip, -EFSCORRUPTED, "attr fork",
1372 ifp ? ifp->if_u1.if_data : NULL,
1373 ifp ? ifp->if_bytes : 0, fa);
20e882d4
DW
1374 return false;
1375 }
1376 return true;
1377}
1378
2bd0ea18 1379int
12ac6e04
DW
1380libxfs_iget(
1381 struct xfs_mount *mp,
1382 struct xfs_trans *tp,
1383 xfs_ino_t ino,
1384 uint lock_flags,
1385 struct xfs_inode **ipp,
1386 struct xfs_ifork_ops *ifork_ops)
2bd0ea18 1387{
12ac6e04
DW
1388 struct xfs_inode *ip;
1389 int error = 0;
2bd0ea18 1390
3a19fb7d
CH
1391 ip = kmem_zone_zalloc(xfs_inode_zone, 0);
1392 if (!ip)
12b53197 1393 return -ENOMEM;
2bd0ea18 1394
3a19fb7d
CH
1395 ip->i_ino = ino;
1396 ip->i_mount = mp;
81f8132a 1397 error = xfs_iread(mp, tp, ip, 0);
3a19fb7d
CH
1398 if (error) {
1399 kmem_zone_free(xfs_inode_zone, ip);
1400 *ipp = NULL;
1401 return error;
1402 }
f1b058f9 1403
12ac6e04 1404 if (!libxfs_inode_verify_forks(ip, ifork_ops)) {
31845e4c 1405 libxfs_irele(ip);
20e882d4
DW
1406 return -EFSCORRUPTED;
1407 }
1408
ff105f75
DC
1409 /*
1410 * set up the inode ops structure that the libxfs code relies on
1411 */
e37bf53c 1412 if (XFS_ISDIR(ip))
ff105f75
DC
1413 ip->d_ops = mp->m_dir_inode_ops;
1414 else
1415 ip->d_ops = mp->m_nondir_inode_ops;
1416
3a19fb7d
CH
1417 *ipp = ip;
1418 return 0;
f1b058f9
NS
1419}
1420
1421static void
014e5f6d
ES
1422libxfs_idestroy(xfs_inode_t *ip)
1423{
e37bf53c 1424 switch (VFS_I(ip)->i_mode & S_IFMT) {
014e5f6d
ES
1425 case S_IFREG:
1426 case S_IFDIR:
1427 case S_IFLNK:
1428 libxfs_idestroy_fork(ip, XFS_DATA_FORK);
1429 break;
1430 }
1431 if (ip->i_afp)
1432 libxfs_idestroy_fork(ip, XFS_ATTR_FORK);
a90b9ad2
DW
1433 if (ip->i_cowfp)
1434 xfs_idestroy_fork(ip, XFS_COW_FORK);
014e5f6d
ES
1435}
1436
2bd0ea18 1437void
31845e4c
DW
1438libxfs_irele(
1439 struct xfs_inode *ip)
2bd0ea18 1440{
3a19fb7d
CH
1441 if (ip->i_itemp)
1442 kmem_zone_free(xfs_ili_zone, ip->i_itemp);
1443 ip->i_itemp = NULL;
1444 libxfs_idestroy(ip);
1445 kmem_zone_free(xfs_inode_zone, ip);
2bd0ea18 1446}