]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - libxfs/rdwr.c
libxfs: Replace XFS_BUF_SET_PTR with xfs_buf_associate_memory
[thirdparty/xfsprogs-dev.git] / libxfs / rdwr.c
CommitLineData
2bd0ea18 1/*
f1b058f9 2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
da23017d 3 * All Rights Reserved.
2bd0ea18 4 *
da23017d
NS
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
2bd0ea18
NS
7 * published by the Free Software Foundation.
8 *
da23017d
NS
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
2bd0ea18 13 *
da23017d
NS
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
2bd0ea18
NS
17 */
18
b626fb59 19
9c799827 20#include "libxfs_priv.h"
1aef52f8 21#include "init.h"
b626fb59
DC
22#include "xfs_fs.h"
23#include "xfs_shared.h"
24#include "xfs_format.h"
25#include "xfs_log_format.h"
26#include "xfs_trans_resv.h"
27#include "xfs_mount.h"
28#include "xfs_inode_buf.h"
29#include "xfs_inode_fork.h"
30#include "xfs_inode.h"
31#include "xfs_trans.h"
32
6b803e5a 33#include "libxfs.h" /* for LIBXFS_EXIT_ON_FAILURE */
2bd0ea18 34
6af7c1ea
DC
35/*
36 * Important design/architecture note:
37 *
38 * The userspace code that uses the buffer cache is much less constrained than
39 * the kernel code. The userspace code is pretty nasty in places, especially
40 * when it comes to buffer error handling. Very little of the userspace code
41 * outside libxfs clears bp->b_error - very little code even checks it - so the
42 * libxfs code is tripping on stale errors left by the userspace code.
43 *
44 * We can't clear errors or zero buffer contents in libxfs_getbuf-* like we do
45 * in the kernel, because those functions are used by the libxfs_readbuf_*
46 * functions and hence need to leave the buffers unchanged on cache hits. This
47 * is actually the only way to gather a write error from a libxfs_writebuf()
48 * call - you need to get the buffer again so you can check bp->b_error field -
49 * assuming that the buffer is still in the cache when you check, that is.
50 *
51 * This is very different to the kernel code which does not release buffers on a
52 * write so we can wait on IO and check errors. The kernel buffer cache also
53 * guarantees a buffer of a known initial state from xfs_buf_get() even on a
54 * cache hit.
55 *
56 * IOWs, userspace is behaving quite differently to the kernel and as a result
57 * it leaks errors from reads, invalidations and writes through
58 * libxfs_getbuf/libxfs_readbuf.
59 *
60 * The result of this is that until the userspace code outside libxfs is cleaned
61 * up, functions that release buffers from userspace control (i.e
62 * libxfs_writebuf/libxfs_putbuf) need to zero bp->b_error to prevent
63 * propagation of stale errors into future buffer operations.
64 */
65
5000d01d 66#define BDSTRAT_SIZE (256 * 1024)
2bd0ea18 67
2556c98b
BN
68#define IO_BCOMPARE_CHECK
69
9542ae13
DC
70/* XXX: (dgc) Propagate errors, only exit if fail-on-error flag set */
71int
75c8b434 72libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len)
2bd0ea18 73{
3cc4d0db
NS
74 xfs_off_t start_offset, end_offset, offset;
75 ssize_t zsize, bytes;
2bd0ea18 76 char *z;
3cc4d0db 77 int fd;
2bd0ea18 78
3cc4d0db 79 zsize = min(BDSTRAT_SIZE, BBTOB(len));
b74a1f6a 80 if ((z = memalign(libxfs_device_alignment(), zsize)) == NULL) {
9440d84d
NS
81 fprintf(stderr,
82 _("%s: %s can't memalign %d bytes: %s\n"),
7dfd8291 83 progname, __FUNCTION__, (int)zsize, strerror(errno));
2bd0ea18
NS
84 exit(1);
85 }
3cc4d0db
NS
86 memset(z, 0, zsize);
87
75c8b434 88 fd = libxfs_device_to_fd(btp->dev);
cb5b3ef4 89 start_offset = LIBXFS_BBTOOFF64(start);
3cc4d0db 90
dc8878f4 91 if ((lseek(fd, start_offset, SEEK_SET)) < 0) {
3cc4d0db 92 fprintf(stderr, _("%s: %s seek to offset %llu failed: %s\n"),
7dfd8291
NS
93 progname, __FUNCTION__,
94 (unsigned long long)start_offset, strerror(errno));
3cc4d0db
NS
95 exit(1);
96 }
97
cb5b3ef4 98 end_offset = LIBXFS_BBTOOFF64(start + len) - start_offset;
3cc4d0db
NS
99 for (offset = 0; offset < end_offset; ) {
100 bytes = min((ssize_t)(end_offset - offset), zsize);
101 if ((bytes = write(fd, z, bytes)) < 0) {
102 fprintf(stderr, _("%s: %s write failed: %s\n"),
9440d84d 103 progname, __FUNCTION__, strerror(errno));
2bd0ea18 104 exit(1);
3cc4d0db
NS
105 } else if (bytes == 0) {
106 fprintf(stderr, _("%s: %s not progressing?\n"),
107 progname, __FUNCTION__);
108 exit(1);
2bd0ea18 109 }
3cc4d0db 110 offset += bytes;
2bd0ea18
NS
111 }
112 free(z);
9542ae13 113 return 0;
2bd0ea18
NS
114}
115
989b74bc 116static void unmount_record(void *p)
2bd0ea18 117{
989b74bc 118 xlog_op_header_t *op = (xlog_op_header_t *)p;
5000d01d
SL
119 /* the data section must be 32 bit size aligned */
120 struct {
14f8b681
DW
121 uint16_t magic;
122 uint16_t pad1;
123 uint32_t pad2; /* may as well make it 64 bits */
5000d01d
SL
124 } magic = { XLOG_UNMOUNT_TYPE, 0, 0 };
125
989b74bc 126 memset(p, 0, BBSIZE);
ad9b88eb
BF
127 /* dummy tid to mark this as written from userspace */
128 op->oh_tid = cpu_to_be32(0xb0c0d0d0);
5e656dbb
BN
129 op->oh_len = cpu_to_be32(sizeof(magic));
130 op->oh_clientid = XFS_LOG;
131 op->oh_flags = XLOG_UNMOUNT_TRANS;
132 op->oh_res2 = 0;
989b74bc
NS
133
134 /* and the data for this op */
1552a820 135 memcpy((char *)p + sizeof(xlog_op_header_t), &magic, sizeof(magic));
989b74bc
NS
136}
137
1c12a814
BF
138static char *next(
139 char *ptr,
140 int offset,
141 void *private)
989b74bc 142{
1c12a814 143 struct xfs_buf *buf = (struct xfs_buf *)private;
989b74bc 144
1c12a814
BF
145 if (buf &&
146 (XFS_BUF_COUNT(buf) < (int)(ptr - XFS_BUF_PTR(buf)) + offset))
989b74bc 147 abort();
1c12a814 148
989b74bc
NS
149 return ptr + offset;
150}
151
1c12a814
BF
152/*
153 * Format the log. The caller provides either a buftarg which is used to access
154 * the log via buffers or a direct pointer to a buffer that encapsulates the
155 * entire log.
156 */
989b74bc
NS
157int
158libxfs_log_clear(
75c8b434 159 struct xfs_buftarg *btp,
1c12a814 160 char *dptr,
989b74bc 161 xfs_daddr_t start,
1c12a814 162 uint length, /* basic blocks */
989b74bc
NS
163 uuid_t *fs_uuid,
164 int version,
1c12a814 165 int sunit, /* bytes */
0c12ba5f 166 int fmt,
571a78a7
BF
167 int cycle,
168 bool max)
989b74bc 169{
1c12a814 170 struct xfs_buf *bp = NULL;
989b74bc 171 int len;
0c12ba5f 172 xfs_lsn_t lsn;
0337f27c
BF
173 xfs_lsn_t tail_lsn;
174 xfs_daddr_t blk;
175 xfs_daddr_t end_blk;
1c12a814 176 char *ptr;
989b74bc 177
1c12a814
BF
178 if (((btp && dptr) || (!btp && !dptr)) ||
179 (btp && !btp->dev) || !fs_uuid)
2bd0ea18 180 return -EINVAL;
5000d01d
SL
181
182 /* first zero the log */
1c12a814
BF
183 if (btp)
184 libxfs_device_zero(btp, start, length);
185 else
186 memset(dptr, 0, BBTOB(length));
5000d01d 187
0337f27c
BF
188 /*
189 * Initialize the log record length and LSNs. XLOG_INIT_CYCLE is a
190 * special reset case where we only write a single record where the lsn
191 * and tail_lsn match. Otherwise, the record lsn starts at block 0 of
192 * the specified cycle and points tail_lsn at the last record of the
193 * previous cycle.
194 */
989b74bc
NS
195 len = ((version == 2) && sunit) ? BTOBB(sunit) : 2;
196 len = MAX(len, 2);
0337f27c
BF
197 lsn = xlog_assign_lsn(cycle, 0);
198 if (cycle == XLOG_INIT_CYCLE)
199 tail_lsn = lsn;
200 else
201 tail_lsn = xlog_assign_lsn(cycle - 1, length - len);
202
203 /* write out the first log record */
1c12a814
BF
204 ptr = dptr;
205 if (btp) {
206 bp = libxfs_getbufr(btp, start, len);
207 ptr = XFS_BUF_PTR(bp);
208 }
209 libxfs_log_header(ptr, fs_uuid, version, sunit, fmt, lsn, tail_lsn,
210 next, bp);
211 if (bp) {
212 bp->b_flags |= LIBXFS_B_DIRTY;
213 libxfs_putbufr(bp);
214 }
0337f27c
BF
215
216 /*
217 * There's nothing else to do if this is a log reset. The kernel detects
218 * the rest of the log is zeroed and starts at cycle 1.
219 */
220 if (cycle == XLOG_INIT_CYCLE)
221 return 0;
222
571a78a7
BF
223 /*
224 * Bump the record size for a full log format if the caller allows it.
225 * This is primarily for performance reasons and most callers don't care
226 * about record size since the log is clean after we're done.
227 */
228 if (max)
229 len = BTOBB(BDSTRAT_SIZE);
230
0337f27c
BF
231 /*
232 * Otherwise, fill everything beyond the initial record with records of
233 * the previous cycle so the kernel head/tail detection works correctly.
234 *
235 * We don't particularly care about the record size or content here.
236 * It's only important that the headers are in place such that the
237 * kernel finds 1.) a clean log and 2.) the correct current cycle value.
238 * Therefore, bump up the record size to the max to use larger I/Os and
239 * improve performance.
240 */
241 cycle--;
242 blk = start + len;
1c12a814
BF
243 if (dptr)
244 dptr += BBTOB(len);
0337f27c
BF
245 end_blk = start + length;
246
571a78a7 247 len = min(end_blk - blk, len);
0337f27c
BF
248 while (blk < end_blk) {
249 lsn = xlog_assign_lsn(cycle, blk - start);
250 tail_lsn = xlog_assign_lsn(cycle, blk - start - len);
251
1c12a814
BF
252 ptr = dptr;
253 if (btp) {
254 bp = libxfs_getbufr(btp, blk, len);
255 ptr = XFS_BUF_PTR(bp);
256 }
0337f27c
BF
257 /*
258 * Note: pass the full buffer length as the sunit to initialize
259 * the entire buffer.
260 */
1c12a814
BF
261 libxfs_log_header(ptr, fs_uuid, version, BBTOB(len), fmt, lsn,
262 tail_lsn, next, bp);
263 if (bp) {
264 bp->b_flags |= LIBXFS_B_DIRTY;
265 libxfs_putbufr(bp);
266 }
0337f27c 267
0337f27c 268 blk += len;
1c12a814
BF
269 if (dptr)
270 dptr += BBTOB(len);
571a78a7 271 len = min(end_blk - blk, len);
0337f27c
BF
272 }
273
989b74bc
NS
274 return 0;
275}
5000d01d 276
989b74bc
NS
277int
278libxfs_log_header(
d60ba955 279 char *caddr,
989b74bc
NS
280 uuid_t *fs_uuid,
281 int version,
282 int sunit,
283 int fmt,
0c12ba5f
BF
284 xfs_lsn_t lsn,
285 xfs_lsn_t tail_lsn,
989b74bc
NS
286 libxfs_get_block_t *nextfunc,
287 void *private)
288{
289 xlog_rec_header_t *head = (xlog_rec_header_t *)caddr;
d60ba955 290 char *p = caddr;
5e656dbb 291 __be32 cycle_lsn;
989b74bc 292 int i, len;
20fbd459 293 int hdrs = 1;
989b74bc 294
0c12ba5f
BF
295 if (lsn == NULLCOMMITLSN)
296 lsn = xlog_assign_lsn(XLOG_INIT_CYCLE, 0);
297 if (tail_lsn == NULLCOMMITLSN)
298 tail_lsn = lsn;
299
989b74bc 300 len = ((version == 2) && sunit) ? BTOBB(sunit) : 1;
5000d01d 301
989b74bc 302 memset(p, 0, BBSIZE);
5e656dbb 303 head->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM);
0c12ba5f 304 head->h_cycle = cpu_to_be32(CYCLE_LSN(lsn));
5e656dbb 305 head->h_version = cpu_to_be32(version);
167d49cb 306 head->h_crc = cpu_to_le32(0);
5e656dbb
BN
307 head->h_prev_block = cpu_to_be32(-1);
308 head->h_num_logops = cpu_to_be32(1);
5e656dbb 309 head->h_fmt = cpu_to_be32(fmt);
20fbd459 310 head->h_size = cpu_to_be32(MAX(sunit, XLOG_BIG_RECORD_BSIZE));
5000d01d 311
0c12ba5f
BF
312 head->h_lsn = cpu_to_be64(lsn);
313 head->h_tail_lsn = cpu_to_be64(tail_lsn);
5000d01d 314
6699422d 315 memcpy(&head->h_fs_uuid, fs_uuid, sizeof(uuid_t));
73bf5988 316
ad9b88eb 317 /*
20fbd459
BF
318 * The kernel expects to see either a log record header magic value or
319 * the LSN cycle at the top of every log block. The first word of each
320 * non-header block is copied to the record headers and replaced with
321 * the cycle value (see xlog_[un]pack_data() and xlog_get_cycle() for
322 * details).
323 *
324 * Even though we only ever write an unmount record (one block), we
325 * support writing log records up to the max log buffer size of 256k to
326 * improve log format performance. This means a record can require up
327 * to 8 headers (1 rec. header + 7 ext. headers) for the packed cycle
328 * data (each header supports 32k of data).
ad9b88eb 329 */
46eca962 330 cycle_lsn = CYCLE_LSN_DISK(head->h_lsn);
20fbd459
BF
331 if (version == 2 && sunit > XLOG_HEADER_CYCLE_SIZE) {
332 hdrs = sunit / XLOG_HEADER_CYCLE_SIZE;
333 if (sunit % XLOG_HEADER_CYCLE_SIZE)
334 hdrs++;
335 }
336
337 /*
338 * A fixed number of extended headers is expected based on h_size. If
339 * required, format those now so the unmount record is located
340 * correctly.
341 *
342 * Since we only write an unmount record, we only need one h_cycle_data
343 * entry for the unmount record block. The subsequent record data
344 * blocks are zeroed, which means we can stamp them directly with the
345 * cycle and zero the rest of the cycle data in the extended headers.
346 */
347 if (hdrs > 1) {
348 for (i = 1; i < hdrs; i++) {
349 p = nextfunc(p, BBSIZE, private);
350 memset(p, 0, BBSIZE);
351 /* xlog_rec_ext_header.xh_cycle */
352 *(__be32 *)p = cycle_lsn;
353 }
354 }
355
356 /*
357 * The total length is the max of the stripe unit or 2 basic block
358 * minimum (1 hdr blk + 1 data blk). The record length is the total
359 * minus however many header blocks are required.
360 */
361 head->h_len = cpu_to_be32(MAX(BBTOB(2), sunit) - hdrs * BBSIZE);
362
363 /*
364 * Write out the unmount record, pack the first word into the record
365 * header and stamp the block with the cycle.
366 */
367 p = nextfunc(p, BBSIZE, private);
368 unmount_record(p);
369
ad9b88eb
BF
370 head->h_cycle_data[0] = *(__be32 *)p;
371 *(__be32 *)p = cycle_lsn;
372
373 /*
20fbd459
BF
374 * Finally, zero all remaining blocks in the record and stamp each with
375 * the cycle. We don't need to pack any of these blocks because the
376 * cycle data in the headers has already been zeroed.
ad9b88eb 377 */
20fbd459
BF
378 len = MAX(len, hdrs + 1);
379 for (i = hdrs + 1; i < len; i++) {
989b74bc
NS
380 p = nextfunc(p, BBSIZE, private);
381 memset(p, 0, BBSIZE);
5e656dbb 382 *(__be32 *)p = cycle_lsn;
73bf5988 383 }
5000d01d 384
989b74bc 385 return BBTOB(len);
2bd0ea18
NS
386}
387
2556c98b
BN
388/*
389 * Simple I/O (buffer cache) interface
390 */
391
392
393#ifdef XFS_BUF_TRACING
394
395#undef libxfs_readbuf
a2ceac1f 396#undef libxfs_readbuf_map
2556c98b
BN
397#undef libxfs_writebuf
398#undef libxfs_getbuf
a2ceac1f 399#undef libxfs_getbuf_map
2ae22647 400#undef libxfs_getbuf_flags
2556c98b
BN
401#undef libxfs_putbuf
402
75c8b434 403xfs_buf_t *libxfs_readbuf(struct xfs_buftarg *, xfs_daddr_t, int, int,
f756f80c 404 const struct xfs_buf_ops *);
75c8b434 405xfs_buf_t *libxfs_readbuf_map(struct xfs_buftarg *, struct xfs_buf_map *,
f756f80c 406 int, int, const struct xfs_buf_ops *);
2556c98b 407int libxfs_writebuf(xfs_buf_t *, int);
75c8b434 408xfs_buf_t *libxfs_getbuf(struct xfs_buftarg *, xfs_daddr_t, int);
7e3ab890
DC
409xfs_buf_t *libxfs_getbuf_map(struct xfs_buftarg *, struct xfs_buf_map *,
410 int, int);
75c8b434
DC
411xfs_buf_t *libxfs_getbuf_flags(struct xfs_buftarg *, xfs_daddr_t, int,
412 unsigned int);
2556c98b
BN
413void libxfs_putbuf (xfs_buf_t *);
414
a2ceac1f
DC
415#define __add_trace(bp, func, file, line) \
416do { \
417 if (bp) { \
418 (bp)->b_func = (func); \
419 (bp)->b_file = (file); \
420 (bp)->b_line = (line); \
421 } \
422} while (0)
423
2556c98b 424xfs_buf_t *
a2ceac1f 425libxfs_trace_readbuf(const char *func, const char *file, int line,
75c8b434
DC
426 struct xfs_buftarg *btp, xfs_daddr_t blkno, int len, int flags,
427 const struct xfs_buf_ops *ops)
2556c98b 428{
75c8b434 429 xfs_buf_t *bp = libxfs_readbuf(btp, blkno, len, flags, ops);
a2ceac1f
DC
430 __add_trace(bp, func, file, line);
431 return bp;
432}
2556c98b 433
a2ceac1f
DC
434xfs_buf_t *
435libxfs_trace_readbuf_map(const char *func, const char *file, int line,
75c8b434
DC
436 struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps, int flags,
437 const struct xfs_buf_ops *ops)
a2ceac1f 438{
75c8b434 439 xfs_buf_t *bp = libxfs_readbuf_map(btp, map, nmaps, flags, ops);
a2ceac1f 440 __add_trace(bp, func, file, line);
2556c98b
BN
441 return bp;
442}
443
444int
445libxfs_trace_writebuf(const char *func, const char *file, int line, xfs_buf_t *bp, int flags)
446{
a2ceac1f 447 __add_trace(bp, func, file, line);
2556c98b
BN
448 return libxfs_writebuf(bp, flags);
449}
450
451xfs_buf_t *
a2ceac1f 452libxfs_trace_getbuf(const char *func, const char *file, int line,
75c8b434 453 struct xfs_buftarg *btp, xfs_daddr_t blkno, int len)
2556c98b 454{
75c8b434 455 xfs_buf_t *bp = libxfs_getbuf(btp, blkno, len);
a2ceac1f
DC
456 __add_trace(bp, func, file, line);
457 return bp;
458}
2556c98b 459
a2ceac1f
DC
460xfs_buf_t *
461libxfs_trace_getbuf_map(const char *func, const char *file, int line,
7e3ab890
DC
462 struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps,
463 int flags)
a2ceac1f 464{
7e3ab890 465 xfs_buf_t *bp = libxfs_getbuf_map(btp, map, nmaps, flags);
a2ceac1f 466 __add_trace(bp, func, file, line);
2556c98b
BN
467 return bp;
468}
469
2ae22647
CH
470xfs_buf_t *
471libxfs_trace_getbuf_flags(const char *func, const char *file, int line,
75c8b434 472 struct xfs_buftarg *btp, xfs_daddr_t blkno, int len, unsigned int flags)
2ae22647 473{
75c8b434 474 xfs_buf_t *bp = libxfs_getbuf_flags(btp, blkno, len, flags);
a2ceac1f 475 __add_trace(bp, func, file, line);
2ae22647
CH
476 return bp;
477}
478
2556c98b
BN
479void
480libxfs_trace_putbuf(const char *func, const char *file, int line, xfs_buf_t *bp)
481{
a2ceac1f 482 __add_trace(bp, func, file, line);
2556c98b
BN
483 libxfs_putbuf(bp);
484}
485
486
487#endif
488
489
f1b058f9
NS
490xfs_buf_t *
491libxfs_getsb(xfs_mount_t *mp, int flags)
492{
75c8b434
DC
493 return libxfs_readbuf(mp->m_ddev_targp, XFS_SB_DADDR,
494 XFS_FSS_TO_BB(mp, 1), flags, &xfs_sb_buf_ops);
f1b058f9
NS
495}
496
5e656dbb 497kmem_zone_t *xfs_buf_zone;
69ec88b5
BN
498
499static struct cache_mru xfs_buf_freelist =
500 {{&xfs_buf_freelist.cm_list, &xfs_buf_freelist.cm_list},
501 0, PTHREAD_MUTEX_INITIALIZER };
f1b058f9 502
a2ceac1f
DC
503/*
504 * The bufkey is used to pass the new buffer information to the cache object
505 * allocation routine. Because discontiguous buffers need to pass different
506 * information, we need fields to pass that information. However, because the
507 * blkno and bblen is needed for the initial cache entry lookup (i.e. for
508 * bcompare) the fact that the map/nmaps is non-null to switch to discontiguous
509 * buffer initialisation instead of a contiguous buffer.
510 */
511struct xfs_bufkey {
75c8b434 512 struct xfs_buftarg *buftarg;
a2ceac1f
DC
513 xfs_daddr_t blkno;
514 unsigned int bblen;
515 struct xfs_buf_map *map;
516 int nmaps;
517};
f1b058f9 518
602dcc0e
DC
519/* 2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */
520#define GOLDEN_RATIO_PRIME 0x9e37fffffffc0001UL
521#define CACHE_LINE_SIZE 64
f1b058f9 522static unsigned int
602dcc0e 523libxfs_bhash(cache_key_t key, unsigned int hashsize, unsigned int hashshift)
f1b058f9 524{
602dcc0e
DC
525 uint64_t hashval = ((struct xfs_bufkey *)key)->blkno;
526 uint64_t tmp;
527
528 tmp = hashval ^ (GOLDEN_RATIO_PRIME + hashval) / CACHE_LINE_SIZE;
529 tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> hashshift);
530 return tmp % hashsize;
f1b058f9
NS
531}
532
533static int
534libxfs_bcompare(struct cache_node *node, cache_key_t key)
535{
a2ceac1f
DC
536 struct xfs_buf *bp = (struct xfs_buf *)node;
537 struct xfs_bufkey *bkey = (struct xfs_bufkey *)key;
f1b058f9 538
75c8b434 539 if (bp->b_target->dev == bkey->buftarg->dev &&
ba9ecd40
DC
540 bp->b_bn == bkey->blkno) {
541 if (bp->b_bcount == BBTOB(bkey->bblen))
542 return CACHE_HIT;
543#ifdef IO_BCOMPARE_CHECK
544 if (!(libxfs_bcache->c_flags & CACHE_MISCOMPARE_PURGE)) {
545 fprintf(stderr,
546 "%lx: Badness in key lookup (length)\n"
547 "bp=(bno 0x%llx, len %u bytes) key=(bno 0x%llx, len %u bytes)\n",
548 pthread_self(),
549 (unsigned long long)bp->b_bn, (int)bp->b_bcount,
550 (unsigned long long)bkey->blkno,
551 BBTOB(bkey->bblen));
552 }
f1b058f9 553#endif
ba9ecd40
DC
554 return CACHE_PURGE;
555 }
556 return CACHE_MISS;
f1b058f9
NS
557}
558
559void
560libxfs_bprint(xfs_buf_t *bp)
561{
ef4109d1 562 fprintf(stderr, "Buffer %p blkno=%llu bytes=%u flags=0x%x count=%u\n",
5dfa5cd2 563 bp, (unsigned long long)bp->b_bn, (unsigned)bp->b_bcount,
f1b058f9
NS
564 bp->b_flags, bp->b_node.cn_count);
565}
566
e6b359b3 567static void
75c8b434
DC
568__initbuf(xfs_buf_t *bp, struct xfs_buftarg *btp, xfs_daddr_t bno,
569 unsigned int bytes)
e6b359b3
NS
570{
571 bp->b_flags = 0;
5dfa5cd2 572 bp->b_bn = bno;
e6b359b3 573 bp->b_bcount = bytes;
a2ceac1f 574 bp->b_length = BTOBB(bytes);
75c8b434 575 bp->b_target = btp;
a6a7776a 576 bp->b_error = 0;
69ec88b5
BN
577 if (!bp->b_addr)
578 bp->b_addr = memalign(libxfs_device_alignment(), bytes);
e6b359b3
NS
579 if (!bp->b_addr) {
580 fprintf(stderr,
581 _("%s: %s can't memalign %u bytes: %s\n"),
582 progname, __FUNCTION__, bytes,
583 strerror(errno));
584 exit(1);
585 }
bf43fd28 586 memset(bp->b_addr, 0, bytes);
2556c98b
BN
587#ifdef XFS_BUF_TRACING
588 list_head_init(&bp->b_lock_list);
589#endif
590 pthread_mutex_init(&bp->b_lock, NULL);
50722af1
CH
591 bp->b_holder = 0;
592 bp->b_recur = 0;
75c8b434 593 bp->b_ops = NULL;
2c6c6328
BF
594
595 if (!bp->b_maps) {
596 bp->b_nmaps = 1;
597 bp->b_maps = &bp->__b_map;
598 bp->b_maps[0].bm_bn = bp->b_bn;
599 bp->b_maps[0].bm_len = bp->b_length;
600 }
e6b359b3
NS
601}
602
a2ceac1f 603static void
75c8b434
DC
604libxfs_initbuf(xfs_buf_t *bp, struct xfs_buftarg *btp, xfs_daddr_t bno,
605 unsigned int bytes)
a2ceac1f 606{
75c8b434 607 __initbuf(bp, btp, bno, bytes);
a2ceac1f
DC
608}
609
610static void
75c8b434
DC
611libxfs_initbuf_map(xfs_buf_t *bp, struct xfs_buftarg *btp,
612 struct xfs_buf_map *map, int nmaps)
a2ceac1f
DC
613{
614 unsigned int bytes = 0;
615 int i;
616
617 bytes = sizeof(struct xfs_buf_map) * nmaps;
85428dd2
DC
618 bp->b_maps = malloc(bytes);
619 if (!bp->b_maps) {
a2ceac1f
DC
620 fprintf(stderr,
621 _("%s: %s can't malloc %u bytes: %s\n"),
622 progname, __FUNCTION__, bytes,
623 strerror(errno));
624 exit(1);
625 }
626 bp->b_nmaps = nmaps;
627
628 bytes = 0;
629 for ( i = 0; i < nmaps; i++) {
85428dd2
DC
630 bp->b_maps[i].bm_bn = map[i].bm_bn;
631 bp->b_maps[i].bm_len = map[i].bm_len;
a2ceac1f
DC
632 bytes += BBTOB(map[i].bm_len);
633 }
634
75c8b434 635 __initbuf(bp, btp, map[0].bm_bn, bytes);
a2ceac1f
DC
636 bp->b_flags |= LIBXFS_B_DISCONTIG;
637}
638
e6b359b3 639xfs_buf_t *
a2ceac1f 640__libxfs_getbufr(int blen)
e6b359b3
NS
641{
642 xfs_buf_t *bp;
69ec88b5
BN
643
644 /*
645 * first look for a buffer that can be used as-is,
646 * if one cannot be found, see if there is a buffer,
ff1f79a7 647 * and if so, free its buffer and set b_addr to NULL
69ec88b5
BN
648 * before calling libxfs_initbuf.
649 */
650 pthread_mutex_lock(&xfs_buf_freelist.cm_mutex);
651 if (!list_empty(&xfs_buf_freelist.cm_list)) {
652 list_for_each_entry(bp, &xfs_buf_freelist.cm_list, b_node.cn_mru) {
653 if (bp->b_bcount == blen) {
654 list_del_init(&bp->b_node.cn_mru);
655 break;
656 }
657 }
658 if (&bp->b_node.cn_mru == &xfs_buf_freelist.cm_list) {
659 bp = list_entry(xfs_buf_freelist.cm_list.next,
660 xfs_buf_t, b_node.cn_mru);
661 list_del_init(&bp->b_node.cn_mru);
662 free(bp->b_addr);
663 bp->b_addr = NULL;
2c6c6328
BF
664 if (bp->b_maps != &bp->__b_map)
665 free(bp->b_maps);
85428dd2 666 bp->b_maps = NULL;
69ec88b5
BN
667 }
668 } else
5e656dbb 669 bp = kmem_zone_zalloc(xfs_buf_zone, 0);
69ec88b5 670 pthread_mutex_unlock(&xfs_buf_freelist.cm_mutex);
e0607266 671 bp->b_ops = NULL;
0a7942b3
DC
672 if (bp->b_flags & LIBXFS_B_DIRTY)
673 fprintf(stderr, "found dirty buffer (bulk) on free list!");
e6b359b3 674
a2ceac1f
DC
675 return bp;
676}
677
678xfs_buf_t *
75c8b434 679libxfs_getbufr(struct xfs_buftarg *btp, xfs_daddr_t blkno, int bblen)
a2ceac1f
DC
680{
681 xfs_buf_t *bp;
682 int blen = BBTOB(bblen);
683
684 bp =__libxfs_getbufr(blen);
685 if (bp)
75c8b434 686 libxfs_initbuf(bp, btp, blkno, blen);
2556c98b 687#ifdef IO_DEBUG
a2ceac1f 688 printf("%lx: %s: allocated %u bytes buffer, key=0x%llx(0x%llx), %p\n",
f63fd268 689 pthread_self(), __FUNCTION__, blen,
2556c98b
BN
690 (long long)LIBXFS_BBTOOFF64(blkno), (long long)blkno, bp);
691#endif
69ec88b5 692
e6b359b3
NS
693 return bp;
694}
695
a2ceac1f 696xfs_buf_t *
75c8b434 697libxfs_getbufr_map(struct xfs_buftarg *btp, xfs_daddr_t blkno, int bblen,
a2ceac1f
DC
698 struct xfs_buf_map *map, int nmaps)
699{
700 xfs_buf_t *bp;
701 int blen = BBTOB(bblen);
702
703 if (!map || !nmaps) {
704 fprintf(stderr,
705 _("%s: %s invalid map %p or nmaps %d\n"),
706 progname, __FUNCTION__, map, nmaps);
707 exit(1);
708 }
709
710 if (blkno != map[0].bm_bn) {
711 fprintf(stderr,
b47c8cae
NS
712 _("%s: %s map blkno 0x%llx doesn't match key 0x%llx\n"),
713 progname, __FUNCTION__, (long long)map[0].bm_bn,
714 (long long)blkno);
a2ceac1f
DC
715 exit(1);
716 }
717
718 bp =__libxfs_getbufr(blen);
719 if (bp)
75c8b434 720 libxfs_initbuf_map(bp, btp, map, nmaps);
a2ceac1f
DC
721#ifdef IO_DEBUG
722 printf("%lx: %s: allocated %u bytes buffer, key=0x%llx(0x%llx), %p\n",
723 pthread_self(), __FUNCTION__, blen,
724 (long long)LIBXFS_BBTOOFF64(blkno), (long long)blkno, bp);
725#endif
726
727 return bp;
728}
2556c98b
BN
729
730#ifdef XFS_BUF_TRACING
731struct list_head lock_buf_list = {&lock_buf_list, &lock_buf_list};
732int lock_buf_count = 0;
733#endif
e6b359b3 734
d0572de5
BN
735extern int use_xfs_buf_lock;
736
a2ceac1f
DC
737static struct xfs_buf *
738__cache_lookup(struct xfs_bufkey *key, unsigned int flags)
2bd0ea18 739{
a2ceac1f 740 struct xfs_buf *bp;
2556c98b 741
a2ceac1f 742 cache_node_get(libxfs_bcache, key, (struct cache_node **)&bp);
2ae22647
CH
743 if (!bp)
744 return NULL;
745
746 if (use_xfs_buf_lock) {
50722af1
CH
747 int ret;
748
749 ret = pthread_mutex_trylock(&bp->b_lock);
750 if (ret) {
751 ASSERT(ret == EAGAIN);
752 if (flags & LIBXFS_GETBUF_TRYLOCK)
753 goto out_put;
754
755 if (pthread_equal(bp->b_holder, pthread_self())) {
756 fprintf(stderr,
757 _("Warning: recursive buffer locking at block %" PRIu64 " detected\n"),
a2ceac1f 758 key->blkno);
50722af1
CH
759 bp->b_recur++;
760 return bp;
761 } else {
762 pthread_mutex_lock(&bp->b_lock);
2ae22647 763 }
2ae22647 764 }
50722af1
CH
765
766 bp->b_holder = pthread_self();
2ae22647
CH
767 }
768
769 cache_node_set_priority(libxfs_bcache, (struct cache_node *)bp,
770 cache_node_get_priority((struct cache_node *)bp) -
a040d7c9 771 CACHE_PREFETCH_PRIORITY);
2556c98b 772#ifdef XFS_BUF_TRACING
2ae22647
CH
773 pthread_mutex_lock(&libxfs_bcache->c_mutex);
774 lock_buf_count++;
775 list_add(&bp->b_lock_list, &lock_buf_list);
776 pthread_mutex_unlock(&libxfs_bcache->c_mutex);
2556c98b 777#endif
2bd0ea18 778#ifdef IO_DEBUG
a2ceac1f
DC
779 printf("%lx %s: hit buffer %p for bno = 0x%llx/0x%llx\n",
780 pthread_self(), __FUNCTION__,
781 bp, bp->b_bn, (long long)LIBXFS_BBTOOFF64(key->blkno));
2bd0ea18 782#endif
2556c98b 783
f1b058f9 784 return bp;
50722af1
CH
785out_put:
786 cache_node_put(libxfs_bcache, (struct cache_node *)bp);
787 return NULL;
f1b058f9
NS
788}
789
a2ceac1f 790struct xfs_buf *
75c8b434
DC
791libxfs_getbuf_flags(struct xfs_buftarg *btp, xfs_daddr_t blkno, int len,
792 unsigned int flags)
a2ceac1f
DC
793{
794 struct xfs_bufkey key = {0};
795
75c8b434 796 key.buftarg = btp;
a2ceac1f
DC
797 key.blkno = blkno;
798 key.bblen = len;
799
800 return __cache_lookup(&key, flags);
801}
802
e8ecd760
DW
803/*
804 * Clean the buffer flags for libxfs_getbuf*(), which wants to return
805 * an unused buffer with clean state. This prevents CRC errors on a
806 * re-read of a corrupt block that was prefetched and freed. This
807 * can happen with a massively corrupt directory that is discarded,
808 * but whose blocks are then recycled into expanding lost+found.
809 *
810 * Note however that if the buffer's dirty (prefetch calls getbuf)
811 * we'll leave the state alone because we don't want to discard blocks
812 * that have been fixed.
813 */
814static void
815reset_buf_state(
816 struct xfs_buf *bp)
817{
818 if (bp && !(bp->b_flags & LIBXFS_B_DIRTY))
819 bp->b_flags &= ~(LIBXFS_B_UNCHECKED | LIBXFS_B_STALE |
820 LIBXFS_B_UPTODATE);
821}
822
2ae22647 823struct xfs_buf *
75c8b434 824libxfs_getbuf(struct xfs_buftarg *btp, xfs_daddr_t blkno, int len)
2ae22647 825{
e8ecd760
DW
826 struct xfs_buf *bp;
827
828 bp = libxfs_getbuf_flags(btp, blkno, len, 0);
829 reset_buf_state(bp);
830 return bp;
2ae22647
CH
831}
832
e8ecd760
DW
833static struct xfs_buf *
834__libxfs_getbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map,
835 int nmaps, int flags)
a2ceac1f
DC
836{
837 struct xfs_bufkey key = {0};
838 int i;
839
f388124d
DC
840 if (nmaps == 1)
841 return libxfs_getbuf_flags(btp, map[0].bm_bn, map[0].bm_len,
842 flags);
843
75c8b434 844 key.buftarg = btp;
a2ceac1f
DC
845 key.blkno = map[0].bm_bn;
846 for (i = 0; i < nmaps; i++) {
847 key.bblen += map[i].bm_len;
848 }
849 key.map = map;
850 key.nmaps = nmaps;
851
7e3ab890 852 return __cache_lookup(&key, flags);
a2ceac1f
DC
853}
854
e8ecd760
DW
855struct xfs_buf *
856libxfs_getbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map,
857 int nmaps, int flags)
858{
859 struct xfs_buf *bp;
860
861 bp = __libxfs_getbuf_map(btp, map, nmaps, flags);
862 reset_buf_state(bp);
863 return bp;
864}
865
f1b058f9
NS
866void
867libxfs_putbuf(xfs_buf_t *bp)
868{
cee99cfa
DC
869 /*
870 * ensure that any errors on this use of the buffer don't carry
871 * over to the next user.
872 */
873 bp->b_error = 0;
874
2556c98b
BN
875#ifdef XFS_BUF_TRACING
876 pthread_mutex_lock(&libxfs_bcache->c_mutex);
877 lock_buf_count--;
878 ASSERT(lock_buf_count >= 0);
879 list_del_init(&bp->b_lock_list);
880 pthread_mutex_unlock(&libxfs_bcache->c_mutex);
881#endif
50722af1
CH
882 if (use_xfs_buf_lock) {
883 if (bp->b_recur) {
884 bp->b_recur--;
885 } else {
886 bp->b_holder = 0;
887 pthread_mutex_unlock(&bp->b_lock);
888 }
889 }
6af7c1ea 890
a040d7c9 891 cache_node_put(libxfs_bcache, (struct cache_node *)bp);
f1b058f9
NS
892}
893
894void
895libxfs_purgebuf(xfs_buf_t *bp)
896{
a2ceac1f 897 struct xfs_bufkey key = {0};
f1b058f9 898
75c8b434 899 key.buftarg = bp->b_target;
5dfa5cd2 900 key.blkno = bp->b_bn;
75c8b434 901 key.bblen = bp->b_length;
f1b058f9
NS
902
903 cache_node_purge(libxfs_bcache, &key, (struct cache_node *)bp);
904}
2bd0ea18 905
f1b058f9 906static struct cache_node *
2556c98b 907libxfs_balloc(cache_key_t key)
f1b058f9 908{
a2ceac1f 909 struct xfs_bufkey *bufkey = (struct xfs_bufkey *)key;
2556c98b 910
a2ceac1f
DC
911 if (bufkey->map)
912 return (struct cache_node *)
75c8b434 913 libxfs_getbufr_map(bufkey->buftarg,
a2ceac1f
DC
914 bufkey->blkno, bufkey->bblen,
915 bufkey->map, bufkey->nmaps);
75c8b434 916 return (struct cache_node *)libxfs_getbufr(bufkey->buftarg,
a2ceac1f 917 bufkey->blkno, bufkey->bblen);
2bd0ea18
NS
918}
919
a2ceac1f
DC
920
921static int
922__read_buf(int fd, void *buf, int len, off64_t offset, int flags)
2bd0ea18 923{
bcea58c7 924 int sts;
2bd0ea18 925
2f9a125c 926 sts = pread(fd, buf, len, offset);
bcea58c7 927 if (sts < 0) {
11202ec2 928 int error = errno;
9440d84d 929 fprintf(stderr, _("%s: read failed: %s\n"),
c3928e39 930 progname, strerror(error));
9440d84d 931 if (flags & LIBXFS_EXIT_ON_FAILURE)
2bd0ea18 932 exit(1);
11202ec2 933 return -error;
a2ceac1f 934 } else if (sts != len) {
bcea58c7 935 fprintf(stderr, _("%s: error - read only %d of %d bytes\n"),
a2ceac1f 936 progname, sts, len);
bcea58c7
CH
937 if (flags & LIBXFS_EXIT_ON_FAILURE)
938 exit(1);
12b53197 939 return -EIO;
2bd0ea18 940 }
a2ceac1f
DC
941 return 0;
942}
943
944int
75c8b434
DC
945libxfs_readbufr(struct xfs_buftarg *btp, xfs_daddr_t blkno, xfs_buf_t *bp,
946 int len, int flags)
a2ceac1f 947{
75c8b434 948 int fd = libxfs_device_to_fd(btp->dev);
a2ceac1f
DC
949 int bytes = BBTOB(len);
950 int error;
951
952 ASSERT(BBTOB(len) <= bp->b_bcount);
953
954 error = __read_buf(fd, bp->b_addr, bytes, LIBXFS_BBTOOFF64(blkno), flags);
955 if (!error &&
75c8b434 956 bp->b_target->dev == btp->dev &&
5dfa5cd2 957 bp->b_bn == blkno &&
f1b058f9
NS
958 bp->b_bcount == bytes)
959 bp->b_flags |= LIBXFS_B_UPTODATE;
a2ceac1f
DC
960#ifdef IO_DEBUG
961 printf("%lx: %s: read %u bytes, error %d, blkno=0x%llx(0x%llx), %p\n",
962 pthread_self(), __FUNCTION__, bytes, error,
963 (long long)LIBXFS_BBTOOFF64(blkno), (long long)blkno, bp);
964#endif
965 return error;
2bd0ea18
NS
966}
967
adbb3573
DC
968void
969libxfs_readbuf_verify(struct xfs_buf *bp, const struct xfs_buf_ops *ops)
970{
971 if (!ops)
972 return;
973 bp->b_ops = ops;
974 bp->b_ops->verify_read(bp);
975 bp->b_flags &= ~LIBXFS_B_UNCHECKED;
976}
977
978
2bd0ea18 979xfs_buf_t *
75c8b434
DC
980libxfs_readbuf(struct xfs_buftarg *btp, xfs_daddr_t blkno, int len, int flags,
981 const struct xfs_buf_ops *ops)
2bd0ea18 982{
f1b058f9 983 xfs_buf_t *bp;
2bd0ea18
NS
984 int error;
985
e8ecd760 986 bp = libxfs_getbuf_flags(btp, blkno, len, 0);
75c8b434
DC
987 if (!bp)
988 return NULL;
adbb3573
DC
989
990 /*
991 * if the buffer was prefetched, it is likely that it was not validated.
992 * Hence if we are supplied an ops function and the buffer is marked as
993 * unchecked, we need to validate it now.
994 *
995 * We do this verification even if the buffer is dirty - the
996 * verification is almost certainly going to fail the CRC check in this
997 * case as a dirty buffer has not had the CRC recalculated. However, we
998 * should not be dirtying unchecked buffers and therefore failing it
999 * here because it's dirty and unchecked indicates we've screwed up
1000 * somewhere else.
1001 */
1002 bp->b_error = 0;
1003 if ((bp->b_flags & (LIBXFS_B_UPTODATE|LIBXFS_B_DIRTY))) {
1004 if (bp->b_flags & LIBXFS_B_UNCHECKED)
1005 libxfs_readbuf_verify(bp, ops);
75c8b434 1006 return bp;
adbb3573 1007 }
75c8b434
DC
1008
1009 /*
adbb3573
DC
1010 * Set the ops on a cache miss (i.e. first physical read) as the
1011 * verifier may change the ops to match the type of buffer it contains.
75c8b434
DC
1012 * A cache hit might reset the verifier to the original type if we set
1013 * it again, but it won't get called again and set to match the buffer
1014 * contents. *cough* xfs_da_node_buf_ops *cough*.
1015 */
75c8b434
DC
1016 error = libxfs_readbufr(btp, blkno, bp, len, flags);
1017 if (error)
1018 bp->b_error = error;
adbb3573
DC
1019 else
1020 libxfs_readbuf_verify(bp, ops);
f1b058f9 1021 return bp;
2bd0ea18
NS
1022}
1023
800db1c1 1024int
6d5e5ee0 1025libxfs_readbufr_map(struct xfs_buftarg *btp, struct xfs_buf *bp, int flags)
a2ceac1f 1026{
d0bbcbcb 1027 int fd;
800db1c1
DC
1028 int error = 0;
1029 char *buf;
1030 int i;
75c8b434 1031
75c8b434 1032 fd = libxfs_device_to_fd(btp->dev);
a2ceac1f
DC
1033 buf = bp->b_addr;
1034 for (i = 0; i < bp->b_nmaps; i++) {
85428dd2
DC
1035 off64_t offset = LIBXFS_BBTOOFF64(bp->b_maps[i].bm_bn);
1036 int len = BBTOB(bp->b_maps[i].bm_len);
a2ceac1f 1037
a2ceac1f
DC
1038 error = __read_buf(fd, buf, len, offset, flags);
1039 if (error) {
1040 bp->b_error = error;
1041 break;
1042 }
1043 buf += len;
a2ceac1f
DC
1044 }
1045
64eb960f 1046 if (!error)
800db1c1
DC
1047 bp->b_flags |= LIBXFS_B_UPTODATE;
1048#ifdef IO_DEBUG
15028317
DW
1049 printf("%lx: %s: read %lu bytes, error %d, blkno=%llu(%llu), %p\n",
1050 pthread_self(), __FUNCTION__, buf - (char *)bp->b_addr, error,
1051 (long long)LIBXFS_BBTOOFF64(bp->b_bn), (long long)bp->b_bn, bp);
800db1c1
DC
1052#endif
1053 return error;
1054}
1055
1056struct xfs_buf *
1057libxfs_readbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps,
1058 int flags, const struct xfs_buf_ops *ops)
1059{
1060 struct xfs_buf *bp;
1061 int error = 0;
1062
1063 if (nmaps == 1)
1064 return libxfs_readbuf(btp, map[0].bm_bn, map[0].bm_len,
1065 flags, ops);
1066
e8ecd760 1067 bp = __libxfs_getbuf_map(btp, map, nmaps, 0);
800db1c1
DC
1068 if (!bp)
1069 return NULL;
1070
1071 bp->b_error = 0;
adbb3573
DC
1072 if ((bp->b_flags & (LIBXFS_B_UPTODATE|LIBXFS_B_DIRTY))) {
1073 if (bp->b_flags & LIBXFS_B_UNCHECKED)
1074 libxfs_readbuf_verify(bp, ops);
800db1c1 1075 return bp;
75c8b434 1076 }
adbb3573
DC
1077 error = libxfs_readbufr_map(btp, bp, flags);
1078 if (!error)
1079 libxfs_readbuf_verify(bp, ops);
1080
15028317 1081#ifdef IO_DEBUGX
a2ceac1f
DC
1082 printf("%lx: %s: read %lu bytes, error %d, blkno=%llu(%llu), %p\n",
1083 pthread_self(), __FUNCTION__, buf - (char *)bp->b_addr, error,
5dfa5cd2 1084 (long long)LIBXFS_BBTOOFF64(bp->b_bn), (long long)bp->b_bn, bp);
a2ceac1f
DC
1085#endif
1086 return bp;
1087}
1088
1089static int
1090__write_buf(int fd, void *buf, int len, off64_t offset, int flags)
2bd0ea18
NS
1091{
1092 int sts;
2bd0ea18 1093
2f9a125c 1094 sts = pwrite(fd, buf, len, offset);
2bd0ea18 1095 if (sts < 0) {
11202ec2 1096 int error = errno;
2f9a125c 1097 fprintf(stderr, _("%s: pwrite failed: %s\n"),
c3928e39 1098 progname, strerror(error));
a2ceac1f 1099 if (flags & LIBXFS_B_EXIT)
2bd0ea18 1100 exit(1);
11202ec2 1101 return -error;
a2ceac1f 1102 } else if (sts != len) {
2f9a125c 1103 fprintf(stderr, _("%s: error - pwrite only %d of %d bytes\n"),
a2ceac1f
DC
1104 progname, sts, len);
1105 if (flags & LIBXFS_B_EXIT)
2bd0ea18 1106 exit(1);
12b53197 1107 return -EIO;
2bd0ea18 1108 }
a2ceac1f
DC
1109 return 0;
1110}
1111
1112int
1113libxfs_writebufr(xfs_buf_t *bp)
1114{
75c8b434 1115 int fd = libxfs_device_to_fd(bp->b_target->dev);
a2ceac1f 1116
75c8b434
DC
1117 /*
1118 * we never write buffers that are marked stale. This indicates they
1119 * contain data that has been invalidated, and even if the buffer is
1120 * dirty it must *never* be written. Verifiers are wonderful for finding
1121 * bugs like this. Make sure the error is obvious as to the cause.
1122 */
1123 if (bp->b_flags & LIBXFS_B_STALE) {
12b53197 1124 bp->b_error = -ESTALE;
75c8b434
DC
1125 return bp->b_error;
1126 }
1127
1128 /*
1129 * clear any pre-existing error status on the buffer. This can occur if
1130 * the buffer is corrupt on disk and the repair process doesn't clear
1131 * the error before fixing and writing it back.
1132 */
1133 bp->b_error = 0;
1134 if (bp->b_ops) {
1135 bp->b_ops->verify_write(bp);
1136 if (bp->b_error) {
1137 fprintf(stderr,
a3fac935
ES
1138 _("%s: write verifer failed on %s bno 0x%llx/0x%x\n"),
1139 __func__, bp->b_ops->name,
1140 (long long)bp->b_bn, bp->b_bcount);
75c8b434
DC
1141 return bp->b_error;
1142 }
1143 }
1144
a2ceac1f 1145 if (!(bp->b_flags & LIBXFS_B_DISCONTIG)) {
e8f1e8aa 1146 bp->b_error = __write_buf(fd, bp->b_addr, bp->b_bcount,
5dfa5cd2 1147 LIBXFS_BBTOOFF64(bp->b_bn), bp->b_flags);
a2ceac1f
DC
1148 } else {
1149 int i;
1150 char *buf = bp->b_addr;
1151
1152 for (i = 0; i < bp->b_nmaps; i++) {
85428dd2
DC
1153 off64_t offset = LIBXFS_BBTOOFF64(bp->b_maps[i].bm_bn);
1154 int len = BBTOB(bp->b_maps[i].bm_len);
a2ceac1f 1155
e8f1e8aa
DC
1156 bp->b_error = __write_buf(fd, buf, len, offset,
1157 bp->b_flags);
1158 if (bp->b_error)
a2ceac1f 1159 break;
a2ceac1f 1160 buf += len;
a2ceac1f
DC
1161 }
1162 }
1163
f1b058f9 1164#ifdef IO_DEBUG
d085fb48 1165 printf("%lx: %s: wrote %u bytes, blkno=%llu(%llu), %p, error %d\n",
2556c98b 1166 pthread_self(), __FUNCTION__, bp->b_bcount,
5dfa5cd2 1167 (long long)LIBXFS_BBTOOFF64(bp->b_bn),
e8f1e8aa 1168 (long long)bp->b_bn, bp, bp->b_error);
f1b058f9 1169#endif
e8f1e8aa 1170 if (!bp->b_error) {
a2ceac1f 1171 bp->b_flags |= LIBXFS_B_UPTODATE;
adbb3573
DC
1172 bp->b_flags &= ~(LIBXFS_B_DIRTY | LIBXFS_B_EXIT |
1173 LIBXFS_B_UNCHECKED);
a2ceac1f 1174 }
e8f1e8aa 1175 return bp->b_error;
2bd0ea18
NS
1176}
1177
1178int
f1b058f9 1179libxfs_writebuf_int(xfs_buf_t *bp, int flags)
2bd0ea18 1180{
203d38cc
DC
1181 /*
1182 * Clear any error hanging over from reading the buffer. This prevents
1183 * subsequent reads after this write from seeing stale errors.
1184 */
1185 bp->b_error = 0;
6af7c1ea 1186 bp->b_flags &= ~LIBXFS_B_STALE;
f1b058f9
NS
1187 bp->b_flags |= (LIBXFS_B_DIRTY | flags);
1188 return 0;
1189}
1190
1191int
1192libxfs_writebuf(xfs_buf_t *bp, int flags)
1193{
e0607266
DC
1194#ifdef IO_DEBUG
1195 printf("%lx: %s: dirty blkno=%llu(%llu)\n",
1196 pthread_self(), __FUNCTION__,
1197 (long long)LIBXFS_BBTOOFF64(bp->b_bn),
1198 (long long)bp->b_bn);
1199#endif
203d38cc
DC
1200 /*
1201 * Clear any error hanging over from reading the buffer. This prevents
1202 * subsequent reads after this write from seeing stale errors.
1203 */
1204 bp->b_error = 0;
6af7c1ea 1205 bp->b_flags &= ~LIBXFS_B_STALE;
f1b058f9
NS
1206 bp->b_flags |= (LIBXFS_B_DIRTY | flags);
1207 libxfs_putbuf(bp);
1208 return 0;
2bd0ea18
NS
1209}
1210
57c9fccb 1211void
f1b058f9 1212libxfs_iomove(xfs_buf_t *bp, uint boff, int len, void *data, int flags)
57c9fccb 1213{
f1b058f9
NS
1214#ifdef IO_DEBUG
1215 if (boff + len > bp->b_bcount) {
2556c98b 1216 printf("Badness, iomove out of range!\n"
a2ceac1f 1217 "bp=(bno 0x%llx, bytes %u) range=(boff %u, bytes %u)\n",
5dfa5cd2 1218 (long long)bp->b_bn, bp->b_bcount, boff, len);
57c9fccb 1219 abort();
f1b058f9
NS
1220 }
1221#endif
57c9fccb
NS
1222 switch (flags) {
1223 case LIBXFS_BZERO:
f1b058f9 1224 memset(bp->b_addr + boff, 0, len);
57c9fccb
NS
1225 break;
1226 case LIBXFS_BREAD:
f1b058f9 1227 memcpy(data, bp->b_addr + boff, len);
57c9fccb
NS
1228 break;
1229 case LIBXFS_BWRITE:
f1b058f9 1230 memcpy(bp->b_addr + boff, data, len);
57c9fccb
NS
1231 break;
1232 }
1233}
1234
33165ec3 1235static void
0a7942b3
DC
1236libxfs_brelse(
1237 struct cache_node *node)
33165ec3 1238{
0a7942b3 1239 struct xfs_buf *bp = (struct xfs_buf *)node;
33165ec3 1240
0a7942b3
DC
1241 if (!bp)
1242 return;
1243 if (bp->b_flags & LIBXFS_B_DIRTY)
1244 fprintf(stderr,
1245 "releasing dirty buffer to free list!");
1246
1247 pthread_mutex_lock(&xfs_buf_freelist.cm_mutex);
1248 list_add(&bp->b_node.cn_mru, &xfs_buf_freelist.cm_list);
1249 pthread_mutex_unlock(&xfs_buf_freelist.cm_mutex);
33165ec3
BN
1250}
1251
e08f5594 1252static unsigned int
69ec88b5 1253libxfs_bulkrelse(
0a7942b3
DC
1254 struct cache *cache,
1255 struct list_head *list)
2556c98b 1256{
69ec88b5 1257 xfs_buf_t *bp;
e08f5594 1258 int count = 0;
2556c98b 1259
69ec88b5 1260 if (list_empty(list))
e08f5594 1261 return 0 ;
69ec88b5
BN
1262
1263 list_for_each_entry(bp, list, b_node.cn_mru) {
2556c98b 1264 if (bp->b_flags & LIBXFS_B_DIRTY)
0a7942b3
DC
1265 fprintf(stderr,
1266 "releasing dirty buffer (bulk) to free list!");
e08f5594 1267 count++;
2556c98b 1268 }
69ec88b5
BN
1269
1270 pthread_mutex_lock(&xfs_buf_freelist.cm_mutex);
0b90dda6 1271 list_splice(list, &xfs_buf_freelist.cm_list);
69ec88b5 1272 pthread_mutex_unlock(&xfs_buf_freelist.cm_mutex);
e08f5594
BN
1273
1274 return count;
69ec88b5
BN
1275}
1276
e8f1e8aa
DC
1277/*
1278 * When a buffer is marked dirty, the error is cleared. Hence if we are trying
1279 * to flush a buffer prior to cache reclaim that has an error on it it means
1280 * we've already tried to flush it and it failed. Prevent repeated corruption
1281 * errors from being reported by skipping such buffers - when the corruption is
1282 * fixed the buffer will be marked dirty again and we can write it again.
1283 */
0a7942b3
DC
1284static int
1285libxfs_bflush(
1286 struct cache_node *node)
69ec88b5 1287{
0a7942b3 1288 struct xfs_buf *bp = (struct xfs_buf *)node;
69ec88b5 1289
e8f1e8aa 1290 if (!bp->b_error && bp->b_flags & LIBXFS_B_DIRTY)
0a7942b3 1291 return libxfs_writebufr(bp);
e8f1e8aa 1292 return bp->b_error;
2556c98b
BN
1293}
1294
1295void
1296libxfs_putbufr(xfs_buf_t *bp)
1297{
0a7942b3
DC
1298 if (bp->b_flags & LIBXFS_B_DIRTY)
1299 libxfs_writebufr(bp);
2556c98b
BN
1300 libxfs_brelse((struct cache_node *)bp);
1301}
1302
1303
f1b058f9
NS
1304void
1305libxfs_bcache_purge(void)
1306{
1307 cache_purge(libxfs_bcache);
1308}
1309
e8cb94ee 1310void
33165ec3
BN
1311libxfs_bcache_flush(void)
1312{
1313 cache_flush(libxfs_bcache);
1314}
1315
2556c98b
BN
1316int
1317libxfs_bcache_overflowed(void)
1318{
1319 return cache_overflowed(libxfs_bcache);
1320}
1321
f1b058f9 1322struct cache_operations libxfs_bcache_operations = {
bd9cc49a
ES
1323 .hash = libxfs_bhash,
1324 .alloc = libxfs_balloc,
1325 .flush = libxfs_bflush,
1326 .relse = libxfs_brelse,
1327 .compare = libxfs_bcompare,
1328 .bulkrelse = libxfs_bulkrelse
f1b058f9
NS
1329};
1330
2bd0ea18 1331
f1b058f9 1332/*
3a19fb7d 1333 * Inode cache stubs.
f1b058f9
NS
1334 */
1335
5e656dbb
BN
1336extern kmem_zone_t *xfs_ili_zone;
1337extern kmem_zone_t *xfs_inode_zone;
f1b058f9 1338
20e882d4
DW
1339/*
1340 * If there are inline format data / attr forks attached to this inode,
1341 * make sure they're not corrupt.
1342 */
1343bool
1344libxfs_inode_verify_forks(
1345 struct xfs_inode *ip)
1346{
1d3bac1f 1347 struct xfs_ifork *ifp;
20e882d4
DW
1348 xfs_failaddr_t fa;
1349
1350 fa = xfs_ifork_verify_data(ip, &xfs_default_ifork_ops);
1351 if (fa) {
1d3bac1f
DW
1352 ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
1353 xfs_inode_verifier_error(ip, -EFSCORRUPTED, "data fork",
1354 ifp->if_u1.if_data, ifp->if_bytes, fa);
20e882d4
DW
1355 return false;
1356 }
1357
1358 fa = xfs_ifork_verify_attr(ip, &xfs_default_ifork_ops);
1359 if (fa) {
1d3bac1f
DW
1360 ifp = XFS_IFORK_PTR(ip, XFS_ATTR_FORK);
1361 xfs_inode_verifier_error(ip, -EFSCORRUPTED, "attr fork",
1362 ifp ? ifp->if_u1.if_data : NULL,
1363 ifp ? ifp->if_bytes : 0, fa);
20e882d4
DW
1364 return false;
1365 }
1366 return true;
1367}
1368
2bd0ea18
NS
1369int
1370libxfs_iget(xfs_mount_t *mp, xfs_trans_t *tp, xfs_ino_t ino, uint lock_flags,
81f8132a 1371 xfs_inode_t **ipp)
2bd0ea18
NS
1372{
1373 xfs_inode_t *ip;
f1b058f9 1374 int error = 0;
2bd0ea18 1375
3a19fb7d
CH
1376 ip = kmem_zone_zalloc(xfs_inode_zone, 0);
1377 if (!ip)
12b53197 1378 return -ENOMEM;
2bd0ea18 1379
3a19fb7d
CH
1380 ip->i_ino = ino;
1381 ip->i_mount = mp;
81f8132a 1382 error = xfs_iread(mp, tp, ip, 0);
3a19fb7d
CH
1383 if (error) {
1384 kmem_zone_free(xfs_inode_zone, ip);
1385 *ipp = NULL;
1386 return error;
1387 }
f1b058f9 1388
20e882d4
DW
1389 if (!libxfs_inode_verify_forks(ip)) {
1390 libxfs_iput(ip);
1391 return -EFSCORRUPTED;
1392 }
1393
ff105f75
DC
1394 /*
1395 * set up the inode ops structure that the libxfs code relies on
1396 */
e37bf53c 1397 if (XFS_ISDIR(ip))
ff105f75
DC
1398 ip->d_ops = mp->m_dir_inode_ops;
1399 else
1400 ip->d_ops = mp->m_nondir_inode_ops;
1401
3a19fb7d
CH
1402 *ipp = ip;
1403 return 0;
f1b058f9
NS
1404}
1405
1406static void
014e5f6d
ES
1407libxfs_idestroy(xfs_inode_t *ip)
1408{
e37bf53c 1409 switch (VFS_I(ip)->i_mode & S_IFMT) {
014e5f6d
ES
1410 case S_IFREG:
1411 case S_IFDIR:
1412 case S_IFLNK:
1413 libxfs_idestroy_fork(ip, XFS_DATA_FORK);
1414 break;
1415 }
1416 if (ip->i_afp)
1417 libxfs_idestroy_fork(ip, XFS_ATTR_FORK);
a90b9ad2
DW
1418 if (ip->i_cowfp)
1419 xfs_idestroy_fork(ip, XFS_COW_FORK);
014e5f6d
ES
1420}
1421
2bd0ea18 1422void
dd2c21d2 1423libxfs_iput(xfs_inode_t *ip)
2bd0ea18 1424{
3a19fb7d
CH
1425 if (ip->i_itemp)
1426 kmem_zone_free(xfs_ili_zone, ip->i_itemp);
1427 ip->i_itemp = NULL;
1428 libxfs_idestroy(ip);
1429 kmem_zone_free(xfs_inode_zone, ip);
2bd0ea18 1430}