]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - libxfs/rdwr.c
libxfs: fix root inode handling inconsistencies
[thirdparty/xfsprogs-dev.git] / libxfs / rdwr.c
CommitLineData
2bd0ea18 1/*
f1b058f9 2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
da23017d 3 * All Rights Reserved.
2bd0ea18 4 *
da23017d
NS
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
2bd0ea18
NS
7 * published by the Free Software Foundation.
8 *
da23017d
NS
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
2bd0ea18 13 *
da23017d
NS
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
2bd0ea18
NS
17 */
18
1d7e80ee 19#include <xfs/libxfs.h>
1aef52f8 20#include "init.h"
2bd0ea18 21
5000d01d 22#define BDSTRAT_SIZE (256 * 1024)
2bd0ea18 23
2556c98b
BN
24#define IO_BCOMPARE_CHECK
25
2bd0ea18 26void
75c8b434 27libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len)
2bd0ea18 28{
3cc4d0db
NS
29 xfs_off_t start_offset, end_offset, offset;
30 ssize_t zsize, bytes;
2bd0ea18 31 char *z;
3cc4d0db 32 int fd;
2bd0ea18 33
3cc4d0db 34 zsize = min(BDSTRAT_SIZE, BBTOB(len));
b74a1f6a 35 if ((z = memalign(libxfs_device_alignment(), zsize)) == NULL) {
9440d84d
NS
36 fprintf(stderr,
37 _("%s: %s can't memalign %d bytes: %s\n"),
7dfd8291 38 progname, __FUNCTION__, (int)zsize, strerror(errno));
2bd0ea18
NS
39 exit(1);
40 }
3cc4d0db
NS
41 memset(z, 0, zsize);
42
75c8b434 43 fd = libxfs_device_to_fd(btp->dev);
cb5b3ef4 44 start_offset = LIBXFS_BBTOOFF64(start);
3cc4d0db
NS
45
46 if ((lseek64(fd, start_offset, SEEK_SET)) < 0) {
47 fprintf(stderr, _("%s: %s seek to offset %llu failed: %s\n"),
7dfd8291
NS
48 progname, __FUNCTION__,
49 (unsigned long long)start_offset, strerror(errno));
3cc4d0db
NS
50 exit(1);
51 }
52
cb5b3ef4 53 end_offset = LIBXFS_BBTOOFF64(start + len) - start_offset;
3cc4d0db
NS
54 for (offset = 0; offset < end_offset; ) {
55 bytes = min((ssize_t)(end_offset - offset), zsize);
56 if ((bytes = write(fd, z, bytes)) < 0) {
57 fprintf(stderr, _("%s: %s write failed: %s\n"),
9440d84d 58 progname, __FUNCTION__, strerror(errno));
2bd0ea18 59 exit(1);
3cc4d0db
NS
60 } else if (bytes == 0) {
61 fprintf(stderr, _("%s: %s not progressing?\n"),
62 progname, __FUNCTION__);
63 exit(1);
2bd0ea18 64 }
3cc4d0db 65 offset += bytes;
2bd0ea18
NS
66 }
67 free(z);
68}
69
989b74bc 70static void unmount_record(void *p)
2bd0ea18 71{
989b74bc 72 xlog_op_header_t *op = (xlog_op_header_t *)p;
5000d01d
SL
73 /* the data section must be 32 bit size aligned */
74 struct {
75 __uint16_t magic;
76 __uint16_t pad1;
77 __uint32_t pad2; /* may as well make it 64 bits */
78 } magic = { XLOG_UNMOUNT_TYPE, 0, 0 };
79
989b74bc 80 memset(p, 0, BBSIZE);
5e656dbb
BN
81 op->oh_tid = cpu_to_be32(1);
82 op->oh_len = cpu_to_be32(sizeof(magic));
83 op->oh_clientid = XFS_LOG;
84 op->oh_flags = XLOG_UNMOUNT_TRANS;
85 op->oh_res2 = 0;
989b74bc
NS
86
87 /* and the data for this op */
1552a820 88 memcpy((char *)p + sizeof(xlog_op_header_t), &magic, sizeof(magic));
989b74bc
NS
89}
90
91static xfs_caddr_t next(xfs_caddr_t ptr, int offset, void *private)
92{
93 xfs_buf_t *buf = (xfs_buf_t *)private;
94
95 if (XFS_BUF_COUNT(buf) < (int)(ptr - XFS_BUF_PTR(buf)) + offset)
96 abort();
97 return ptr + offset;
98}
99
100int
101libxfs_log_clear(
75c8b434 102 struct xfs_buftarg *btp,
989b74bc
NS
103 xfs_daddr_t start,
104 uint length,
105 uuid_t *fs_uuid,
106 int version,
107 int sunit,
108 int fmt)
109{
e6b359b3 110 xfs_buf_t *bp;
989b74bc
NS
111 int len;
112
75c8b434 113 if (!btp->dev || !fs_uuid)
2bd0ea18 114 return -EINVAL;
5000d01d
SL
115
116 /* first zero the log */
75c8b434 117 libxfs_device_zero(btp, start, length);
5000d01d
SL
118
119 /* then write a log record header */
989b74bc
NS
120 len = ((version == 2) && sunit) ? BTOBB(sunit) : 2;
121 len = MAX(len, 2);
75c8b434 122 bp = libxfs_getbufr(btp, start, len);
e6b359b3
NS
123 libxfs_log_header(XFS_BUF_PTR(bp),
124 fs_uuid, version, sunit, fmt, next, bp);
125 bp->b_flags |= LIBXFS_B_DIRTY;
126 libxfs_putbufr(bp);
989b74bc
NS
127 return 0;
128}
5000d01d 129
989b74bc
NS
130int
131libxfs_log_header(
132 xfs_caddr_t caddr,
133 uuid_t *fs_uuid,
134 int version,
135 int sunit,
136 int fmt,
137 libxfs_get_block_t *nextfunc,
138 void *private)
139{
140 xlog_rec_header_t *head = (xlog_rec_header_t *)caddr;
141 xfs_caddr_t p = caddr;
5e656dbb 142 __be32 cycle_lsn;
989b74bc
NS
143 int i, len;
144
145 len = ((version == 2) && sunit) ? BTOBB(sunit) : 1;
5000d01d
SL
146
147 /* note that oh_tid actually contains the cycle number
148 * and the tid is stored in h_cycle_data[0] - that's the
149 * way things end up on disk.
150 */
989b74bc 151 memset(p, 0, BBSIZE);
5e656dbb
BN
152 head->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM);
153 head->h_cycle = cpu_to_be32(1);
154 head->h_version = cpu_to_be32(version);
73bf5988 155 if (len != 1)
5e656dbb 156 head->h_len = cpu_to_be32(sunit - BBSIZE);
73bf5988 157 else
5e656dbb 158 head->h_len = cpu_to_be32(20);
a2ceac1f 159 head->h_crc = cpu_to_be32(0);
5e656dbb
BN
160 head->h_prev_block = cpu_to_be32(-1);
161 head->h_num_logops = cpu_to_be32(1);
162 head->h_cycle_data[0] = cpu_to_be32(0xb0c0d0d0);
163 head->h_fmt = cpu_to_be32(fmt);
164 head->h_size = cpu_to_be32(XLOG_HEADER_CYCLE_SIZE);
5000d01d 165
5e656dbb
BN
166 head->h_lsn = cpu_to_be64(xlog_assign_lsn(1, 0));
167 head->h_tail_lsn = cpu_to_be64(xlog_assign_lsn(1, 0));
5000d01d 168
6699422d 169 memcpy(&head->h_fs_uuid, fs_uuid, sizeof(uuid_t));
73bf5988 170
989b74bc
NS
171 len = MAX(len, 2);
172 p = nextfunc(p, BBSIZE, private);
173 unmount_record(p);
73bf5988 174
46eca962 175 cycle_lsn = CYCLE_LSN_DISK(head->h_lsn);
989b74bc
NS
176 for (i = 2; i < len; i++) {
177 p = nextfunc(p, BBSIZE, private);
178 memset(p, 0, BBSIZE);
5e656dbb 179 *(__be32 *)p = cycle_lsn;
73bf5988 180 }
5000d01d 181
989b74bc 182 return BBTOB(len);
2bd0ea18
NS
183}
184
2556c98b
BN
185/*
186 * Simple I/O (buffer cache) interface
187 */
188
189
190#ifdef XFS_BUF_TRACING
191
192#undef libxfs_readbuf
a2ceac1f 193#undef libxfs_readbuf_map
2556c98b
BN
194#undef libxfs_writebuf
195#undef libxfs_getbuf
a2ceac1f 196#undef libxfs_getbuf_map
2ae22647 197#undef libxfs_getbuf_flags
2556c98b
BN
198#undef libxfs_putbuf
199
75c8b434 200xfs_buf_t *libxfs_readbuf(struct xfs_buftarg *, xfs_daddr_t, int, int,
f756f80c 201 const struct xfs_buf_ops *);
75c8b434 202xfs_buf_t *libxfs_readbuf_map(struct xfs_buftarg *, struct xfs_buf_map *,
f756f80c 203 int, int, const struct xfs_buf_ops *);
2556c98b 204int libxfs_writebuf(xfs_buf_t *, int);
75c8b434
DC
205xfs_buf_t *libxfs_getbuf(struct xfs_buftarg *, xfs_daddr_t, int);
206xfs_buf_t *libxfs_getbuf_map(struct xfs_buftarg *, struct xfs_buf_map *, int);
207xfs_buf_t *libxfs_getbuf_flags(struct xfs_buftarg *, xfs_daddr_t, int,
208 unsigned int);
2556c98b
BN
209void libxfs_putbuf (xfs_buf_t *);
210
a2ceac1f
DC
211#define __add_trace(bp, func, file, line) \
212do { \
213 if (bp) { \
214 (bp)->b_func = (func); \
215 (bp)->b_file = (file); \
216 (bp)->b_line = (line); \
217 } \
218} while (0)
219
2556c98b 220xfs_buf_t *
a2ceac1f 221libxfs_trace_readbuf(const char *func, const char *file, int line,
75c8b434
DC
222 struct xfs_buftarg *btp, xfs_daddr_t blkno, int len, int flags,
223 const struct xfs_buf_ops *ops)
2556c98b 224{
75c8b434 225 xfs_buf_t *bp = libxfs_readbuf(btp, blkno, len, flags, ops);
a2ceac1f
DC
226 __add_trace(bp, func, file, line);
227 return bp;
228}
2556c98b 229
a2ceac1f
DC
230xfs_buf_t *
231libxfs_trace_readbuf_map(const char *func, const char *file, int line,
75c8b434
DC
232 struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps, int flags,
233 const struct xfs_buf_ops *ops)
a2ceac1f 234{
75c8b434 235 xfs_buf_t *bp = libxfs_readbuf_map(btp, map, nmaps, flags, ops);
a2ceac1f 236 __add_trace(bp, func, file, line);
2556c98b
BN
237 return bp;
238}
239
240int
241libxfs_trace_writebuf(const char *func, const char *file, int line, xfs_buf_t *bp, int flags)
242{
a2ceac1f 243 __add_trace(bp, func, file, line);
2556c98b
BN
244 return libxfs_writebuf(bp, flags);
245}
246
247xfs_buf_t *
a2ceac1f 248libxfs_trace_getbuf(const char *func, const char *file, int line,
75c8b434 249 struct xfs_buftarg *btp, xfs_daddr_t blkno, int len)
2556c98b 250{
75c8b434 251 xfs_buf_t *bp = libxfs_getbuf(btp, blkno, len);
a2ceac1f
DC
252 __add_trace(bp, func, file, line);
253 return bp;
254}
2556c98b 255
a2ceac1f
DC
256xfs_buf_t *
257libxfs_trace_getbuf_map(const char *func, const char *file, int line,
75c8b434 258 struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps)
a2ceac1f 259{
75c8b434 260 xfs_buf_t *bp = libxfs_getbuf_map(btp, map, nmaps);
a2ceac1f 261 __add_trace(bp, func, file, line);
2556c98b
BN
262 return bp;
263}
264
2ae22647
CH
265xfs_buf_t *
266libxfs_trace_getbuf_flags(const char *func, const char *file, int line,
75c8b434 267 struct xfs_buftarg *btp, xfs_daddr_t blkno, int len, unsigned int flags)
2ae22647 268{
75c8b434 269 xfs_buf_t *bp = libxfs_getbuf_flags(btp, blkno, len, flags);
a2ceac1f 270 __add_trace(bp, func, file, line);
2ae22647
CH
271 return bp;
272}
273
2556c98b
BN
274void
275libxfs_trace_putbuf(const char *func, const char *file, int line, xfs_buf_t *bp)
276{
a2ceac1f 277 __add_trace(bp, func, file, line);
2556c98b
BN
278 libxfs_putbuf(bp);
279}
280
281
282#endif
283
284
f1b058f9
NS
285xfs_buf_t *
286libxfs_getsb(xfs_mount_t *mp, int flags)
287{
75c8b434
DC
288 return libxfs_readbuf(mp->m_ddev_targp, XFS_SB_DADDR,
289 XFS_FSS_TO_BB(mp, 1), flags, &xfs_sb_buf_ops);
f1b058f9
NS
290}
291
5e656dbb 292kmem_zone_t *xfs_buf_zone;
69ec88b5
BN
293
294static struct cache_mru xfs_buf_freelist =
295 {{&xfs_buf_freelist.cm_list, &xfs_buf_freelist.cm_list},
296 0, PTHREAD_MUTEX_INITIALIZER };
f1b058f9 297
a2ceac1f
DC
298/*
299 * The bufkey is used to pass the new buffer information to the cache object
300 * allocation routine. Because discontiguous buffers need to pass different
301 * information, we need fields to pass that information. However, because the
302 * blkno and bblen is needed for the initial cache entry lookup (i.e. for
303 * bcompare) the fact that the map/nmaps is non-null to switch to discontiguous
304 * buffer initialisation instead of a contiguous buffer.
305 */
306struct xfs_bufkey {
75c8b434 307 struct xfs_buftarg *buftarg;
a2ceac1f
DC
308 xfs_daddr_t blkno;
309 unsigned int bblen;
310 struct xfs_buf_map *map;
311 int nmaps;
312};
f1b058f9
NS
313
314static unsigned int
315libxfs_bhash(cache_key_t key, unsigned int hashsize)
316{
a2ceac1f 317 return (((unsigned int)((struct xfs_bufkey *)key)->blkno) >> 5) % hashsize;
f1b058f9
NS
318}
319
320static int
321libxfs_bcompare(struct cache_node *node, cache_key_t key)
322{
a2ceac1f
DC
323 struct xfs_buf *bp = (struct xfs_buf *)node;
324 struct xfs_bufkey *bkey = (struct xfs_bufkey *)key;
f1b058f9
NS
325
326#ifdef IO_BCOMPARE_CHECK
75c8b434 327 if (bp->b_target->dev == bkey->buftarg->dev &&
5dfa5cd2 328 bp->b_bn == bkey->blkno &&
2556c98b
BN
329 bp->b_bcount != BBTOB(bkey->bblen))
330 fprintf(stderr, "%lx: Badness in key lookup (length)\n"
a2ceac1f 331 "bp=(bno 0x%llx, len %u bytes) key=(bno 0x%llx, len %u bytes)\n",
2556c98b 332 pthread_self(),
5dfa5cd2 333 (unsigned long long)bp->b_bn, (int)bp->b_bcount,
2556c98b 334 (unsigned long long)bkey->blkno, BBTOB(bkey->bblen));
f1b058f9
NS
335#endif
336
75c8b434 337 return (bp->b_target->dev == bkey->buftarg->dev &&
5dfa5cd2 338 bp->b_bn == bkey->blkno &&
2556c98b 339 bp->b_bcount == BBTOB(bkey->bblen));
f1b058f9
NS
340}
341
342void
343libxfs_bprint(xfs_buf_t *bp)
344{
345 fprintf(stderr, "Buffer 0x%p blkno=%llu bytes=%u flags=0x%x count=%u\n",
5dfa5cd2 346 bp, (unsigned long long)bp->b_bn, (unsigned)bp->b_bcount,
f1b058f9
NS
347 bp->b_flags, bp->b_node.cn_count);
348}
349
e6b359b3 350static void
75c8b434
DC
351__initbuf(xfs_buf_t *bp, struct xfs_buftarg *btp, xfs_daddr_t bno,
352 unsigned int bytes)
e6b359b3
NS
353{
354 bp->b_flags = 0;
5dfa5cd2 355 bp->b_bn = bno;
e6b359b3 356 bp->b_bcount = bytes;
a2ceac1f 357 bp->b_length = BTOBB(bytes);
75c8b434 358 bp->b_target = btp;
a6a7776a 359 bp->b_error = 0;
69ec88b5
BN
360 if (!bp->b_addr)
361 bp->b_addr = memalign(libxfs_device_alignment(), bytes);
e6b359b3
NS
362 if (!bp->b_addr) {
363 fprintf(stderr,
364 _("%s: %s can't memalign %u bytes: %s\n"),
365 progname, __FUNCTION__, bytes,
366 strerror(errno));
367 exit(1);
368 }
2556c98b
BN
369#ifdef XFS_BUF_TRACING
370 list_head_init(&bp->b_lock_list);
371#endif
372 pthread_mutex_init(&bp->b_lock, NULL);
50722af1
CH
373 bp->b_holder = 0;
374 bp->b_recur = 0;
75c8b434 375 bp->b_ops = NULL;
e6b359b3
NS
376}
377
a2ceac1f 378static void
75c8b434
DC
379libxfs_initbuf(xfs_buf_t *bp, struct xfs_buftarg *btp, xfs_daddr_t bno,
380 unsigned int bytes)
a2ceac1f 381{
75c8b434 382 __initbuf(bp, btp, bno, bytes);
a2ceac1f
DC
383}
384
385static void
75c8b434
DC
386libxfs_initbuf_map(xfs_buf_t *bp, struct xfs_buftarg *btp,
387 struct xfs_buf_map *map, int nmaps)
a2ceac1f
DC
388{
389 unsigned int bytes = 0;
390 int i;
391
392 bytes = sizeof(struct xfs_buf_map) * nmaps;
393 bp->b_map = malloc(bytes);
394 if (!bp->b_map) {
395 fprintf(stderr,
396 _("%s: %s can't malloc %u bytes: %s\n"),
397 progname, __FUNCTION__, bytes,
398 strerror(errno));
399 exit(1);
400 }
401 bp->b_nmaps = nmaps;
402
403 bytes = 0;
404 for ( i = 0; i < nmaps; i++) {
405 bp->b_map[i].bm_bn = map[i].bm_bn;
406 bp->b_map[i].bm_len = map[i].bm_len;
407 bytes += BBTOB(map[i].bm_len);
408 }
409
75c8b434 410 __initbuf(bp, btp, map[0].bm_bn, bytes);
a2ceac1f
DC
411 bp->b_flags |= LIBXFS_B_DISCONTIG;
412}
413
e6b359b3 414xfs_buf_t *
a2ceac1f 415__libxfs_getbufr(int blen)
e6b359b3
NS
416{
417 xfs_buf_t *bp;
69ec88b5
BN
418
419 /*
420 * first look for a buffer that can be used as-is,
421 * if one cannot be found, see if there is a buffer,
ff1f79a7 422 * and if so, free its buffer and set b_addr to NULL
69ec88b5
BN
423 * before calling libxfs_initbuf.
424 */
425 pthread_mutex_lock(&xfs_buf_freelist.cm_mutex);
426 if (!list_empty(&xfs_buf_freelist.cm_list)) {
427 list_for_each_entry(bp, &xfs_buf_freelist.cm_list, b_node.cn_mru) {
428 if (bp->b_bcount == blen) {
429 list_del_init(&bp->b_node.cn_mru);
430 break;
431 }
432 }
433 if (&bp->b_node.cn_mru == &xfs_buf_freelist.cm_list) {
434 bp = list_entry(xfs_buf_freelist.cm_list.next,
435 xfs_buf_t, b_node.cn_mru);
436 list_del_init(&bp->b_node.cn_mru);
437 free(bp->b_addr);
438 bp->b_addr = NULL;
a2ceac1f
DC
439 free(bp->b_map);
440 bp->b_map = NULL;
69ec88b5
BN
441 }
442 } else
5e656dbb 443 bp = kmem_zone_zalloc(xfs_buf_zone, 0);
69ec88b5 444 pthread_mutex_unlock(&xfs_buf_freelist.cm_mutex);
e0607266 445 bp->b_ops = NULL;
e6b359b3 446
a2ceac1f
DC
447 return bp;
448}
449
450xfs_buf_t *
75c8b434 451libxfs_getbufr(struct xfs_buftarg *btp, xfs_daddr_t blkno, int bblen)
a2ceac1f
DC
452{
453 xfs_buf_t *bp;
454 int blen = BBTOB(bblen);
455
456 bp =__libxfs_getbufr(blen);
457 if (bp)
75c8b434 458 libxfs_initbuf(bp, btp, blkno, blen);
2556c98b 459#ifdef IO_DEBUG
a2ceac1f 460 printf("%lx: %s: allocated %u bytes buffer, key=0x%llx(0x%llx), %p\n",
f63fd268 461 pthread_self(), __FUNCTION__, blen,
2556c98b
BN
462 (long long)LIBXFS_BBTOOFF64(blkno), (long long)blkno, bp);
463#endif
69ec88b5 464
e6b359b3
NS
465 return bp;
466}
467
a2ceac1f 468xfs_buf_t *
75c8b434 469libxfs_getbufr_map(struct xfs_buftarg *btp, xfs_daddr_t blkno, int bblen,
a2ceac1f
DC
470 struct xfs_buf_map *map, int nmaps)
471{
472 xfs_buf_t *bp;
473 int blen = BBTOB(bblen);
474
475 if (!map || !nmaps) {
476 fprintf(stderr,
477 _("%s: %s invalid map %p or nmaps %d\n"),
478 progname, __FUNCTION__, map, nmaps);
479 exit(1);
480 }
481
482 if (blkno != map[0].bm_bn) {
483 fprintf(stderr,
484 _("%s: %s map blkno %lx doesn't match key %lx\n"),
485 progname, __FUNCTION__, map[0].bm_bn, blkno);
486 exit(1);
487 }
488
489 bp =__libxfs_getbufr(blen);
490 if (bp)
75c8b434 491 libxfs_initbuf_map(bp, btp, map, nmaps);
a2ceac1f
DC
492#ifdef IO_DEBUG
493 printf("%lx: %s: allocated %u bytes buffer, key=0x%llx(0x%llx), %p\n",
494 pthread_self(), __FUNCTION__, blen,
495 (long long)LIBXFS_BBTOOFF64(blkno), (long long)blkno, bp);
496#endif
497
498 return bp;
499}
2556c98b
BN
500
501#ifdef XFS_BUF_TRACING
502struct list_head lock_buf_list = {&lock_buf_list, &lock_buf_list};
503int lock_buf_count = 0;
504#endif
e6b359b3 505
d0572de5
BN
506extern int use_xfs_buf_lock;
507
a2ceac1f
DC
508static struct xfs_buf *
509__cache_lookup(struct xfs_bufkey *key, unsigned int flags)
2bd0ea18 510{
a2ceac1f 511 struct xfs_buf *bp;
2556c98b 512
a2ceac1f 513 cache_node_get(libxfs_bcache, key, (struct cache_node **)&bp);
2ae22647
CH
514 if (!bp)
515 return NULL;
516
517 if (use_xfs_buf_lock) {
50722af1
CH
518 int ret;
519
520 ret = pthread_mutex_trylock(&bp->b_lock);
521 if (ret) {
522 ASSERT(ret == EAGAIN);
523 if (flags & LIBXFS_GETBUF_TRYLOCK)
524 goto out_put;
525
526 if (pthread_equal(bp->b_holder, pthread_self())) {
527 fprintf(stderr,
528 _("Warning: recursive buffer locking at block %" PRIu64 " detected\n"),
a2ceac1f 529 key->blkno);
50722af1
CH
530 bp->b_recur++;
531 return bp;
532 } else {
533 pthread_mutex_lock(&bp->b_lock);
2ae22647 534 }
2ae22647 535 }
50722af1
CH
536
537 bp->b_holder = pthread_self();
2ae22647
CH
538 }
539
540 cache_node_set_priority(libxfs_bcache, (struct cache_node *)bp,
541 cache_node_get_priority((struct cache_node *)bp) -
a040d7c9 542 CACHE_PREFETCH_PRIORITY);
2556c98b 543#ifdef XFS_BUF_TRACING
2ae22647
CH
544 pthread_mutex_lock(&libxfs_bcache->c_mutex);
545 lock_buf_count++;
546 list_add(&bp->b_lock_list, &lock_buf_list);
547 pthread_mutex_unlock(&libxfs_bcache->c_mutex);
2556c98b 548#endif
2bd0ea18 549#ifdef IO_DEBUG
a2ceac1f
DC
550 printf("%lx %s: hit buffer %p for bno = 0x%llx/0x%llx\n",
551 pthread_self(), __FUNCTION__,
552 bp, bp->b_bn, (long long)LIBXFS_BBTOOFF64(key->blkno));
2bd0ea18 553#endif
2556c98b 554
f1b058f9 555 return bp;
50722af1
CH
556out_put:
557 cache_node_put(libxfs_bcache, (struct cache_node *)bp);
558 return NULL;
f1b058f9
NS
559}
560
a2ceac1f 561struct xfs_buf *
75c8b434
DC
562libxfs_getbuf_flags(struct xfs_buftarg *btp, xfs_daddr_t blkno, int len,
563 unsigned int flags)
a2ceac1f
DC
564{
565 struct xfs_bufkey key = {0};
566
75c8b434 567 key.buftarg = btp;
a2ceac1f
DC
568 key.blkno = blkno;
569 key.bblen = len;
570
571 return __cache_lookup(&key, flags);
572}
573
2ae22647 574struct xfs_buf *
75c8b434 575libxfs_getbuf(struct xfs_buftarg *btp, xfs_daddr_t blkno, int len)
2ae22647 576{
75c8b434 577 return libxfs_getbuf_flags(btp, blkno, len, 0);
2ae22647
CH
578}
579
a2ceac1f 580struct xfs_buf *
75c8b434 581libxfs_getbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps)
a2ceac1f
DC
582{
583 struct xfs_bufkey key = {0};
584 int i;
585
75c8b434 586 key.buftarg = btp;
a2ceac1f
DC
587 key.blkno = map[0].bm_bn;
588 for (i = 0; i < nmaps; i++) {
589 key.bblen += map[i].bm_len;
590 }
591 key.map = map;
592 key.nmaps = nmaps;
593
594 return __cache_lookup(&key, 0);
595}
596
f1b058f9
NS
597void
598libxfs_putbuf(xfs_buf_t *bp)
599{
2556c98b
BN
600#ifdef XFS_BUF_TRACING
601 pthread_mutex_lock(&libxfs_bcache->c_mutex);
602 lock_buf_count--;
603 ASSERT(lock_buf_count >= 0);
604 list_del_init(&bp->b_lock_list);
605 pthread_mutex_unlock(&libxfs_bcache->c_mutex);
606#endif
50722af1
CH
607 if (use_xfs_buf_lock) {
608 if (bp->b_recur) {
609 bp->b_recur--;
610 } else {
611 bp->b_holder = 0;
612 pthread_mutex_unlock(&bp->b_lock);
613 }
614 }
a040d7c9 615 cache_node_put(libxfs_bcache, (struct cache_node *)bp);
f1b058f9
NS
616}
617
618void
619libxfs_purgebuf(xfs_buf_t *bp)
620{
a2ceac1f 621 struct xfs_bufkey key = {0};
f1b058f9 622
75c8b434 623 key.buftarg = bp->b_target;
5dfa5cd2 624 key.blkno = bp->b_bn;
75c8b434 625 key.bblen = bp->b_length;
f1b058f9
NS
626
627 cache_node_purge(libxfs_bcache, &key, (struct cache_node *)bp);
628}
2bd0ea18 629
f1b058f9 630static struct cache_node *
2556c98b 631libxfs_balloc(cache_key_t key)
f1b058f9 632{
a2ceac1f 633 struct xfs_bufkey *bufkey = (struct xfs_bufkey *)key;
2556c98b 634
a2ceac1f
DC
635 if (bufkey->map)
636 return (struct cache_node *)
75c8b434 637 libxfs_getbufr_map(bufkey->buftarg,
a2ceac1f
DC
638 bufkey->blkno, bufkey->bblen,
639 bufkey->map, bufkey->nmaps);
75c8b434 640 return (struct cache_node *)libxfs_getbufr(bufkey->buftarg,
a2ceac1f 641 bufkey->blkno, bufkey->bblen);
2bd0ea18
NS
642}
643
a2ceac1f
DC
644
645static int
646__read_buf(int fd, void *buf, int len, off64_t offset, int flags)
2bd0ea18 647{
bcea58c7 648 int sts;
2bd0ea18 649
a2ceac1f 650 sts = pread64(fd, buf, len, offset);
bcea58c7 651 if (sts < 0) {
a2ceac1f 652 int error = errno;
9440d84d 653 fprintf(stderr, _("%s: read failed: %s\n"),
c3928e39 654 progname, strerror(error));
9440d84d 655 if (flags & LIBXFS_EXIT_ON_FAILURE)
2bd0ea18 656 exit(1);
c3928e39 657 return error;
a2ceac1f 658 } else if (sts != len) {
bcea58c7 659 fprintf(stderr, _("%s: error - read only %d of %d bytes\n"),
a2ceac1f 660 progname, sts, len);
bcea58c7
CH
661 if (flags & LIBXFS_EXIT_ON_FAILURE)
662 exit(1);
663 return EIO;
2bd0ea18 664 }
a2ceac1f
DC
665 return 0;
666}
667
668int
75c8b434
DC
669libxfs_readbufr(struct xfs_buftarg *btp, xfs_daddr_t blkno, xfs_buf_t *bp,
670 int len, int flags)
a2ceac1f 671{
75c8b434 672 int fd = libxfs_device_to_fd(btp->dev);
a2ceac1f
DC
673 int bytes = BBTOB(len);
674 int error;
675
676 ASSERT(BBTOB(len) <= bp->b_bcount);
677
678 error = __read_buf(fd, bp->b_addr, bytes, LIBXFS_BBTOOFF64(blkno), flags);
679 if (!error &&
75c8b434 680 bp->b_target->dev == btp->dev &&
5dfa5cd2 681 bp->b_bn == blkno &&
f1b058f9
NS
682 bp->b_bcount == bytes)
683 bp->b_flags |= LIBXFS_B_UPTODATE;
a2ceac1f
DC
684#ifdef IO_DEBUG
685 printf("%lx: %s: read %u bytes, error %d, blkno=0x%llx(0x%llx), %p\n",
686 pthread_self(), __FUNCTION__, bytes, error,
687 (long long)LIBXFS_BBTOOFF64(blkno), (long long)blkno, bp);
688#endif
689 return error;
2bd0ea18
NS
690}
691
692xfs_buf_t *
75c8b434
DC
693libxfs_readbuf(struct xfs_buftarg *btp, xfs_daddr_t blkno, int len, int flags,
694 const struct xfs_buf_ops *ops)
2bd0ea18 695{
f1b058f9 696 xfs_buf_t *bp;
2bd0ea18
NS
697 int error;
698
75c8b434
DC
699 bp = libxfs_getbuf(btp, blkno, len);
700 if (!bp)
701 return NULL;
702 if ((bp->b_flags & (LIBXFS_B_UPTODATE|LIBXFS_B_DIRTY)))
703 return bp;
704
705 /*
706 * only set the ops on a cache miss (i.e. first physical read) as the
707 * verifier may change the ops to match the typ eof buffer it contains.
708 * A cache hit might reset the verifier to the original type if we set
709 * it again, but it won't get called again and set to match the buffer
710 * contents. *cough* xfs_da_node_buf_ops *cough*.
711 */
712 bp->b_error = 0;
713 bp->b_ops = ops;
714 error = libxfs_readbufr(btp, blkno, bp, len, flags);
715 if (error)
716 bp->b_error = error;
717 else if (bp->b_ops)
718 bp->b_ops->verify_read(bp);
f1b058f9 719 return bp;
2bd0ea18
NS
720}
721
a2ceac1f 722struct xfs_buf *
75c8b434
DC
723libxfs_readbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps,
724 int flags, const struct xfs_buf_ops *ops)
a2ceac1f
DC
725{
726 xfs_buf_t *bp;
727 int error = 0;
728 int fd;
729 int i;
730 char *buf;
731
732 if (nmaps == 1)
75c8b434
DC
733 return libxfs_readbuf(btp, map[0].bm_bn, map[0].bm_len,
734 flags, ops);
a2ceac1f 735
75c8b434
DC
736 bp = libxfs_getbuf_map(btp, map, nmaps);
737 if (!bp)
738 return NULL;
739
740 bp->b_error = 0;
741 bp->b_ops = ops;
742 if ((bp->b_flags & (LIBXFS_B_UPTODATE|LIBXFS_B_DIRTY)))
a2ceac1f
DC
743 return bp;
744
745 ASSERT(bp->b_nmaps = nmaps);
746
75c8b434 747 fd = libxfs_device_to_fd(btp->dev);
a2ceac1f
DC
748 buf = bp->b_addr;
749 for (i = 0; i < bp->b_nmaps; i++) {
750 off64_t offset = LIBXFS_BBTOOFF64(bp->b_map[i].bm_bn);
751 int len = BBTOB(bp->b_map[i].bm_len);
752
753 ASSERT(bp->b_map[i].bm_bn == map[i].bm_bn);
754 ASSERT(bp->b_map[i].bm_len == map[i].bm_len);
755
756 error = __read_buf(fd, buf, len, offset, flags);
757 if (error) {
758 bp->b_error = error;
759 break;
760 }
761 buf += len;
762 offset += len;
763 }
764
75c8b434 765 if (!error) {
a2ceac1f 766 bp->b_flags |= LIBXFS_B_UPTODATE;
75c8b434
DC
767 if (bp->b_ops)
768 bp->b_ops->verify_read(bp);
769 }
a2ceac1f
DC
770#ifdef IO_DEBUG
771 printf("%lx: %s: read %lu bytes, error %d, blkno=%llu(%llu), %p\n",
772 pthread_self(), __FUNCTION__, buf - (char *)bp->b_addr, error,
5dfa5cd2 773 (long long)LIBXFS_BBTOOFF64(bp->b_bn), (long long)bp->b_bn, bp);
a2ceac1f
DC
774#endif
775 return bp;
776}
777
778static int
779__write_buf(int fd, void *buf, int len, off64_t offset, int flags)
2bd0ea18
NS
780{
781 int sts;
2bd0ea18 782
a2ceac1f 783 sts = pwrite64(fd, buf, len, offset);
2bd0ea18 784 if (sts < 0) {
a2ceac1f 785 int error = errno;
9440d84d 786 fprintf(stderr, _("%s: pwrite64 failed: %s\n"),
c3928e39 787 progname, strerror(error));
a2ceac1f 788 if (flags & LIBXFS_B_EXIT)
2bd0ea18 789 exit(1);
c3928e39 790 return error;
a2ceac1f
DC
791 } else if (sts != len) {
792 fprintf(stderr, _("%s: error - pwrite64 only %d of %d bytes\n"),
793 progname, sts, len);
794 if (flags & LIBXFS_B_EXIT)
2bd0ea18
NS
795 exit(1);
796 return EIO;
797 }
a2ceac1f
DC
798 return 0;
799}
800
801int
802libxfs_writebufr(xfs_buf_t *bp)
803{
75c8b434 804 int fd = libxfs_device_to_fd(bp->b_target->dev);
a2ceac1f
DC
805 int error = 0;
806
75c8b434
DC
807 /*
808 * we never write buffers that are marked stale. This indicates they
809 * contain data that has been invalidated, and even if the buffer is
810 * dirty it must *never* be written. Verifiers are wonderful for finding
811 * bugs like this. Make sure the error is obvious as to the cause.
812 */
813 if (bp->b_flags & LIBXFS_B_STALE) {
814 bp->b_error = ESTALE;
815 return bp->b_error;
816 }
817
818 /*
819 * clear any pre-existing error status on the buffer. This can occur if
820 * the buffer is corrupt on disk and the repair process doesn't clear
821 * the error before fixing and writing it back.
822 */
823 bp->b_error = 0;
824 if (bp->b_ops) {
825 bp->b_ops->verify_write(bp);
826 if (bp->b_error) {
827 fprintf(stderr,
828 _("%s: write verifer failed on bno 0x%llx/0x%x\n"),
829 __func__, (long long)bp->b_bn, bp->b_bcount);
830 return bp->b_error;
831 }
832 }
833
a2ceac1f
DC
834 if (!(bp->b_flags & LIBXFS_B_DISCONTIG)) {
835 error = __write_buf(fd, bp->b_addr, bp->b_bcount,
5dfa5cd2 836 LIBXFS_BBTOOFF64(bp->b_bn), bp->b_flags);
a2ceac1f
DC
837 } else {
838 int i;
839 char *buf = bp->b_addr;
840
841 for (i = 0; i < bp->b_nmaps; i++) {
842 off64_t offset = LIBXFS_BBTOOFF64(bp->b_map[i].bm_bn);
843 int len = BBTOB(bp->b_map[i].bm_len);
844
845 error = __write_buf(fd, buf, len, offset, bp->b_flags);
846 if (error) {
847 bp->b_error = error;
848 break;
849 }
850 buf += len;
851 offset += len;
852 }
853 }
854
f1b058f9 855#ifdef IO_DEBUG
2556c98b
BN
856 printf("%lx: %s: wrote %u bytes, blkno=%llu(%llu), %p\n",
857 pthread_self(), __FUNCTION__, bp->b_bcount,
5dfa5cd2
DC
858 (long long)LIBXFS_BBTOOFF64(bp->b_bn),
859 (long long)bp->b_bn, bp);
f1b058f9 860#endif
a2ceac1f
DC
861 if (!error) {
862 bp->b_flags |= LIBXFS_B_UPTODATE;
863 bp->b_flags &= ~(LIBXFS_B_DIRTY | LIBXFS_B_EXIT);
864 }
865 return error;
2bd0ea18
NS
866}
867
868int
f1b058f9 869libxfs_writebuf_int(xfs_buf_t *bp, int flags)
2bd0ea18 870{
f1b058f9
NS
871 bp->b_flags |= (LIBXFS_B_DIRTY | flags);
872 return 0;
873}
874
875int
876libxfs_writebuf(xfs_buf_t *bp, int flags)
877{
e0607266
DC
878#ifdef IO_DEBUG
879 printf("%lx: %s: dirty blkno=%llu(%llu)\n",
880 pthread_self(), __FUNCTION__,
881 (long long)LIBXFS_BBTOOFF64(bp->b_bn),
882 (long long)bp->b_bn);
883#endif
f1b058f9
NS
884 bp->b_flags |= (LIBXFS_B_DIRTY | flags);
885 libxfs_putbuf(bp);
886 return 0;
2bd0ea18
NS
887}
888
57c9fccb 889void
f1b058f9 890libxfs_iomove(xfs_buf_t *bp, uint boff, int len, void *data, int flags)
57c9fccb 891{
f1b058f9
NS
892#ifdef IO_DEBUG
893 if (boff + len > bp->b_bcount) {
2556c98b 894 printf("Badness, iomove out of range!\n"
a2ceac1f 895 "bp=(bno 0x%llx, bytes %u) range=(boff %u, bytes %u)\n",
5dfa5cd2 896 (long long)bp->b_bn, bp->b_bcount, boff, len);
57c9fccb 897 abort();
f1b058f9
NS
898 }
899#endif
57c9fccb
NS
900 switch (flags) {
901 case LIBXFS_BZERO:
f1b058f9 902 memset(bp->b_addr + boff, 0, len);
57c9fccb
NS
903 break;
904 case LIBXFS_BREAD:
f1b058f9 905 memcpy(data, bp->b_addr + boff, len);
57c9fccb
NS
906 break;
907 case LIBXFS_BWRITE:
f1b058f9 908 memcpy(bp->b_addr + boff, data, len);
57c9fccb
NS
909 break;
910 }
911}
912
33165ec3 913static void
69ec88b5 914libxfs_brelse(struct cache_node *node)
33165ec3
BN
915{
916 xfs_buf_t *bp = (xfs_buf_t *)node;
917
69ec88b5
BN
918 if (bp != NULL) {
919 if (bp->b_flags & LIBXFS_B_DIRTY)
920 libxfs_writebufr(bp);
921 pthread_mutex_lock(&xfs_buf_freelist.cm_mutex);
922 list_add(&bp->b_node.cn_mru, &xfs_buf_freelist.cm_list);
923 pthread_mutex_unlock(&xfs_buf_freelist.cm_mutex);
924 }
33165ec3
BN
925}
926
e08f5594 927static unsigned int
69ec88b5
BN
928libxfs_bulkrelse(
929 struct cache *cache,
930 struct list_head *list)
2556c98b 931{
69ec88b5 932 xfs_buf_t *bp;
e08f5594 933 int count = 0;
2556c98b 934
69ec88b5 935 if (list_empty(list))
e08f5594 936 return 0 ;
69ec88b5
BN
937
938 list_for_each_entry(bp, list, b_node.cn_mru) {
2556c98b
BN
939 if (bp->b_flags & LIBXFS_B_DIRTY)
940 libxfs_writebufr(bp);
e08f5594 941 count++;
2556c98b 942 }
69ec88b5
BN
943
944 pthread_mutex_lock(&xfs_buf_freelist.cm_mutex);
945 __list_splice(list, &xfs_buf_freelist.cm_list);
946 pthread_mutex_unlock(&xfs_buf_freelist.cm_mutex);
e08f5594
BN
947
948 return count;
69ec88b5
BN
949}
950
951static void
952libxfs_bflush(struct cache_node *node)
953{
954 xfs_buf_t *bp = (xfs_buf_t *)node;
955
956 if ((bp != NULL) && (bp->b_flags & LIBXFS_B_DIRTY))
957 libxfs_writebufr(bp);
2556c98b
BN
958}
959
960void
961libxfs_putbufr(xfs_buf_t *bp)
962{
963 libxfs_brelse((struct cache_node *)bp);
964}
965
966
f1b058f9
NS
967void
968libxfs_bcache_purge(void)
969{
970 cache_purge(libxfs_bcache);
971}
972
e8cb94ee 973void
33165ec3
BN
974libxfs_bcache_flush(void)
975{
976 cache_flush(libxfs_bcache);
977}
978
2556c98b
BN
979int
980libxfs_bcache_overflowed(void)
981{
982 return cache_overflowed(libxfs_bcache);
983}
984
f1b058f9
NS
985struct cache_operations libxfs_bcache_operations = {
986 /* .hash */ libxfs_bhash,
987 /* .alloc */ libxfs_balloc,
33165ec3 988 /* .flush */ libxfs_bflush,
f1b058f9
NS
989 /* .relse */ libxfs_brelse,
990 /* .compare */ libxfs_bcompare,
69ec88b5 991 /* .bulkrelse */libxfs_bulkrelse
f1b058f9
NS
992};
993
2bd0ea18 994
f1b058f9
NS
995/*
996 * Inode cache interfaces
997 */
998
5e656dbb
BN
999extern kmem_zone_t *xfs_ili_zone;
1000extern kmem_zone_t *xfs_inode_zone;
f1b058f9
NS
1001
1002static unsigned int
1003libxfs_ihash(cache_key_t key, unsigned int hashsize)
1004{
1005 return ((unsigned int)*(xfs_ino_t *)key) % hashsize;
1006}
1007
1008static int
1009libxfs_icompare(struct cache_node *node, cache_key_t key)
1010{
1011 xfs_inode_t *ip = (xfs_inode_t *)node;
1012
1013 return (ip->i_ino == *(xfs_ino_t *)key);
1014}
1015
2bd0ea18
NS
1016int
1017libxfs_iget(xfs_mount_t *mp, xfs_trans_t *tp, xfs_ino_t ino, uint lock_flags,
1018 xfs_inode_t **ipp, xfs_daddr_t bno)
1019{
1020 xfs_inode_t *ip;
f1b058f9 1021 int error = 0;
2bd0ea18 1022
f1b058f9
NS
1023 if (cache_node_get(libxfs_icache, &ino, (struct cache_node **)&ip)) {
1024#ifdef INO_DEBUG
1025 fprintf(stderr, "%s: allocated inode, ino=%llu(%llu), %p\n",
1026 __FUNCTION__, (unsigned long long)ino, bno, ip);
1027#endif
a2ceac1f
DC
1028 ip->i_ino = ino;
1029 ip->i_mount = mp;
1030 error = xfs_iread(mp, tp, ip, bno);
1031 if (error) {
f1b058f9
NS
1032 cache_node_purge(libxfs_icache, &ino,
1033 (struct cache_node *)ip);
1034 ip = NULL;
1035 }
1036 }
2bd0ea18 1037 *ipp = ip;
f1b058f9 1038 return error;
2bd0ea18
NS
1039}
1040
014e5f6d 1041void
f1b058f9
NS
1042libxfs_iput(xfs_inode_t *ip, uint lock_flags)
1043{
a040d7c9 1044 cache_node_put(libxfs_icache, (struct cache_node *)ip);
f1b058f9
NS
1045}
1046
1047static struct cache_node *
2556c98b 1048libxfs_ialloc(cache_key_t key)
f1b058f9 1049{
5e656dbb 1050 return kmem_zone_zalloc(xfs_inode_zone, 0);
f1b058f9
NS
1051}
1052
1053static void
014e5f6d
ES
1054libxfs_idestroy(xfs_inode_t *ip)
1055{
1056 switch (ip->i_d.di_mode & S_IFMT) {
1057 case S_IFREG:
1058 case S_IFDIR:
1059 case S_IFLNK:
1060 libxfs_idestroy_fork(ip, XFS_DATA_FORK);
1061 break;
1062 }
1063 if (ip->i_afp)
1064 libxfs_idestroy_fork(ip, XFS_ATTR_FORK);
1065}
1066
f1b058f9
NS
1067static void
1068libxfs_irelse(struct cache_node *node)
2bd0ea18 1069{
f1b058f9 1070 xfs_inode_t *ip = (xfs_inode_t *)node;
2bd0ea18
NS
1071
1072 if (ip != NULL) {
5000d01d 1073 if (ip->i_itemp)
5e656dbb 1074 kmem_zone_free(xfs_ili_zone, ip->i_itemp);
5000d01d 1075 ip->i_itemp = NULL;
014e5f6d 1076 libxfs_idestroy(ip);
5e656dbb 1077 kmem_zone_free(xfs_inode_zone, ip);
2bd0ea18
NS
1078 ip = NULL;
1079 }
1080}
1081
2bd0ea18 1082void
f1b058f9 1083libxfs_icache_purge(void)
2bd0ea18 1084{
f1b058f9 1085 cache_purge(libxfs_icache);
2bd0ea18 1086}
f1b058f9
NS
1087
1088struct cache_operations libxfs_icache_operations = {
1089 /* .hash */ libxfs_ihash,
1090 /* .alloc */ libxfs_ialloc,
33165ec3 1091 /* .flush */ NULL,
f1b058f9
NS
1092 /* .relse */ libxfs_irelse,
1093 /* .compare */ libxfs_icompare,
1c4110bd 1094 /* .bulkrelse */ NULL
f1b058f9 1095};