libxfs/rdwr.c

   1 /*
   2  * Copyright (c) 2000-2006 Silicon Graphics, Inc.
   3  * All Rights Reserved.
   4  *
   5  * This program is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU General Public License as
   7  * published by the Free Software Foundation.
   8  *
   9  * This program is distributed in the hope that it would be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  * GNU General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU General Public License
  15  * along with this program; if not, write the Free Software Foundation,
  16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  17  */
  18
  19
  20 #include "libxfs_priv.h"
  21 #include "init.h"
  22 #include "xfs_fs.h"
  23 #include "xfs_shared.h"
  24 #include "xfs_format.h"
  25 #include "xfs_log_format.h"
  26 #include "xfs_trans_resv.h"
  27 #include "xfs_mount.h"
  28 #include "xfs_inode_buf.h"
  29 #include "xfs_inode_fork.h"
  30 #include "xfs_inode.h"
  31 #include "xfs_trans.h"
  32
  33 #include "libxfs.h"             /* for LIBXFS_EXIT_ON_FAILURE */
  34
  35 /*
  36  * Important design/architecture note:
  37  *
  38  * The userspace code that uses the buffer cache is much less constrained than
  39  * the kernel code. The userspace code is pretty nasty in places, especially
  40  * when it comes to buffer error handling.  Very little of the userspace code
  41  * outside libxfs clears bp->b_error - very little code even checks it - so the
  42  * libxfs code is tripping on stale errors left by the userspace code.
  43  *
  44  * We can't clear errors or zero buffer contents in libxfs_getbuf-* like we do
  45  * in the kernel, because those functions are used by the libxfs_readbuf_*
  46  * functions and hence need to leave the buffers unchanged on cache hits. This
  47  * is actually the only way to gather a write error from a libxfs_writebuf()
  48  * call - you need to get the buffer again so you can check bp->b_error field -
  49  * assuming that the buffer is still in the cache when you check, that is.
  50  *
  51  * This is very different to the kernel code which does not release buffers on a
  52  * write so we can wait on IO and check errors. The kernel buffer cache also
  53  * guarantees a buffer of a known initial state from xfs_buf_get() even on a
  54  * cache hit.
  55  *
  56  * IOWs, userspace is behaving quite differently to the kernel and as a result
  57  * it leaks errors from reads, invalidations and writes through
  58  * libxfs_getbuf/libxfs_readbuf.
  59  *
  60  * The result of this is that until the userspace code outside libxfs is cleaned
  61  * up, functions that release buffers from userspace control (i.e
  62  * libxfs_writebuf/libxfs_putbuf) need to zero bp->b_error to prevent
  63  * propagation of stale errors into future buffer operations.
  64  */
  65
  66 #define BDSTRAT_SIZE    (256 * 1024)
  67
  68 #define IO_BCOMPARE_CHECK
  69
  70 /* XXX: (dgc) Propagate errors, only exit if fail-on-error flag set */
  71 int
  72 libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len)
  73 {
  74         xfs_off_t       start_offset, end_offset, offset;
  75         ssize_t         zsize, bytes;
  76         char            *z;
  77         int             fd;
  78
  79         zsize = min(BDSTRAT_SIZE, BBTOB(len));
  80         if ((z = memalign(libxfs_device_alignment(), zsize)) == NULL) {
  81                 fprintf(stderr,
  82                         _("%s: %s can't memalign %d bytes: %s\n"),
  83                         progname, __FUNCTION__, (int)zsize, strerror(errno));
  84                 exit(1);
  85         }
  86         memset(z, 0, zsize);
  87
  88         fd = libxfs_device_to_fd(btp->dev);
  89         start_offset = LIBXFS_BBTOOFF64(start);
  90
  91         if ((lseek64(fd, start_offset, SEEK_SET)) < 0) {
  92                 fprintf(stderr, _("%s: %s seek to offset %llu failed: %s\n"),
  93                         progname, __FUNCTION__,
  94                         (unsigned long long)start_offset, strerror(errno));
  95                 exit(1);
  96         }
  97
  98         end_offset = LIBXFS_BBTOOFF64(start + len) - start_offset;
  99         for (offset = 0; offset < end_offset; ) {
 100                 bytes = min((ssize_t)(end_offset - offset), zsize);
 101                 if ((bytes = write(fd, z, bytes)) < 0) {
 102                         fprintf(stderr, _("%s: %s write failed: %s\n"),
 103                                 progname, __FUNCTION__, strerror(errno));
 104                         exit(1);
 105                 } else if (bytes == 0) {
 106                         fprintf(stderr, _("%s: %s not progressing?\n"),
 107                                 progname, __FUNCTION__);
 108                         exit(1);
 109                 }
 110                 offset += bytes;
 111         }
 112         free(z);
 113         return 0;
 114 }
 115
 116 static void unmount_record(void *p)
 117 {
 118         xlog_op_header_t        *op = (xlog_op_header_t *)p;
 119         /* the data section must be 32 bit size aligned */
 120         struct {
 121             __uint16_t magic;
 122             __uint16_t pad1;
 123             __uint32_t pad2; /* may as well make it 64 bits */
 124         } magic = { XLOG_UNMOUNT_TYPE, 0, 0 };
 125
 126         memset(p, 0, BBSIZE);
 127         /* dummy tid to mark this as written from userspace */
 128         op->oh_tid = cpu_to_be32(0xb0c0d0d0);
 129         op->oh_len = cpu_to_be32(sizeof(magic));
 130         op->oh_clientid = XFS_LOG;
 131         op->oh_flags = XLOG_UNMOUNT_TRANS;
 132         op->oh_res2 = 0;
 133
 134         /* and the data for this op */
 135         memcpy((char *)p + sizeof(xlog_op_header_t), &magic, sizeof(magic));
 136 }
 137
 138 static char *next(
 139         char            *ptr,
 140         int             offset,
 141         void            *private)
 142 {
 143         struct xfs_buf  *buf = (struct xfs_buf *)private;
 144
 145         if (buf &&
 146             (XFS_BUF_COUNT(buf) < (int)(ptr - XFS_BUF_PTR(buf)) + offset))
 147                 abort();
 148
 149         return ptr + offset;
 150 }
 151
 152 /*
 153  * Format the log. The caller provides either a buftarg which is used to access
 154  * the log via buffers or a direct pointer to a buffer that encapsulates the
 155  * entire log.
 156  */
 157 int
 158 libxfs_log_clear(
 159         struct xfs_buftarg      *btp,
 160         char                    *dptr,
 161         xfs_daddr_t             start,
 162         uint                    length,         /* basic blocks */
 163         uuid_t                  *fs_uuid,
 164         int                     version,
 165         int                     sunit,          /* bytes */
 166         int                     fmt,
 167         int                     cycle,
 168         bool                    max)
 169 {
 170         struct xfs_buf          *bp = NULL;
 171         int                     len;
 172         xfs_lsn_t               lsn;
 173         xfs_lsn_t               tail_lsn;
 174         xfs_daddr_t             blk;
 175         xfs_daddr_t             end_blk;
 176         char                    *ptr;
 177
 178         if (((btp && dptr) || (!btp && !dptr)) ||
 179             (btp && !btp->dev) || !fs_uuid)
 180                 return -EINVAL;
 181
 182         /* first zero the log */
 183         if (btp)
 184                 libxfs_device_zero(btp, start, length);
 185         else
 186                 memset(dptr, 0, BBTOB(length));
 187
 188         /*
 189          * Initialize the log record length and LSNs. XLOG_INIT_CYCLE is a
 190          * special reset case where we only write a single record where the lsn
 191          * and tail_lsn match. Otherwise, the record lsn starts at block 0 of
 192          * the specified cycle and points tail_lsn at the last record of the
 193          * previous cycle.
 194          */
 195         len = ((version == 2) && sunit) ? BTOBB(sunit) : 2;
 196         len = MAX(len, 2);
 197         lsn = xlog_assign_lsn(cycle, 0);
 198         if (cycle == XLOG_INIT_CYCLE)
 199                 tail_lsn = lsn;
 200         else
 201                 tail_lsn = xlog_assign_lsn(cycle - 1, length - len);
 202
 203         /* write out the first log record */
 204         ptr = dptr;
 205         if (btp) {
 206                 bp = libxfs_getbufr(btp, start, len);
 207                 ptr = XFS_BUF_PTR(bp);
 208         }
 209         libxfs_log_header(ptr, fs_uuid, version, sunit, fmt, lsn, tail_lsn,
 210                           next, bp);
 211         if (bp) {
 212                 bp->b_flags |= LIBXFS_B_DIRTY;
 213                 libxfs_putbufr(bp);
 214         }
 215
 216         /*
 217          * There's nothing else to do if this is a log reset. The kernel detects
 218          * the rest of the log is zeroed and starts at cycle 1.
 219          */
 220         if (cycle == XLOG_INIT_CYCLE)
 221                 return 0;
 222
 223         /*
 224          * Bump the record size for a full log format if the caller allows it.
 225          * This is primarily for performance reasons and most callers don't care
 226          * about record size since the log is clean after we're done.
 227          */
 228         if (max)
 229                 len = BTOBB(BDSTRAT_SIZE);
 230
 231         /*
 232          * Otherwise, fill everything beyond the initial record with records of
 233          * the previous cycle so the kernel head/tail detection works correctly.
 234          *
 235          * We don't particularly care about the record size or content here.
 236          * It's only important that the headers are in place such that the
 237          * kernel finds 1.) a clean log and 2.) the correct current cycle value.
 238          * Therefore, bump up the record size to the max to use larger I/Os and
 239          * improve performance.
 240          */
 241         cycle--;
 242         blk = start + len;
 243         if (dptr)
 244                 dptr += BBTOB(len);
 245         end_blk = start + length;
 246
 247         len = min(end_blk - blk, len);
 248         while (blk < end_blk) {
 249                 lsn = xlog_assign_lsn(cycle, blk - start);
 250                 tail_lsn = xlog_assign_lsn(cycle, blk - start - len);
 251
 252                 ptr = dptr;
 253                 if (btp) {
 254                         bp = libxfs_getbufr(btp, blk, len);
 255                         ptr = XFS_BUF_PTR(bp);
 256                 }
 257                 /*
 258                  * Note: pass the full buffer length as the sunit to initialize
 259                  * the entire buffer.
 260                  */
 261                 libxfs_log_header(ptr, fs_uuid, version, BBTOB(len), fmt, lsn,
 262                                   tail_lsn, next, bp);
 263                 if (bp) {
 264                         bp->b_flags |= LIBXFS_B_DIRTY;
 265                         libxfs_putbufr(bp);
 266                 }
 267
 268                 blk += len;
 269                 if (dptr)
 270                         dptr += BBTOB(len);
 271                 len = min(end_blk - blk, len);
 272         }
 273
 274         return 0;
 275 }
 276
 277 int
 278 libxfs_log_header(
 279         char                    *caddr,
 280         uuid_t                  *fs_uuid,
 281         int                     version,
 282         int                     sunit,
 283         int                     fmt,
 284         xfs_lsn_t               lsn,
 285         xfs_lsn_t               tail_lsn,
 286         libxfs_get_block_t      *nextfunc,
 287         void                    *private)
 288 {
 289         xlog_rec_header_t       *head = (xlog_rec_header_t *)caddr;
 290         char                    *p = caddr;
 291         __be32                  cycle_lsn;
 292         int                     i, len;
 293         int                     hdrs = 1;
 294
 295         if (lsn == NULLCOMMITLSN)
 296                 lsn = xlog_assign_lsn(XLOG_INIT_CYCLE, 0);
 297         if (tail_lsn == NULLCOMMITLSN)
 298                 tail_lsn = lsn;
 299
 300         len = ((version == 2) && sunit) ? BTOBB(sunit) : 1;
 301
 302         memset(p, 0, BBSIZE);
 303         head->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM);
 304         head->h_cycle = cpu_to_be32(CYCLE_LSN(lsn));
 305         head->h_version = cpu_to_be32(version);
 306         head->h_crc = cpu_to_le32(0);
 307         head->h_prev_block = cpu_to_be32(-1);
 308         head->h_num_logops = cpu_to_be32(1);
 309         head->h_fmt = cpu_to_be32(fmt);
 310         head->h_size = cpu_to_be32(MAX(sunit, XLOG_BIG_RECORD_BSIZE));
 311
 312         head->h_lsn = cpu_to_be64(lsn);
 313         head->h_tail_lsn = cpu_to_be64(tail_lsn);
 314
 315         memcpy(&head->h_fs_uuid, fs_uuid, sizeof(uuid_t));
 316
 317         /*
 318          * The kernel expects to see either a log record header magic value or
 319          * the LSN cycle at the top of every log block. The first word of each
 320          * non-header block is copied to the record headers and replaced with
 321          * the cycle value (see xlog_[un]pack_data() and xlog_get_cycle() for
 322          * details).
 323          *
 324          * Even though we only ever write an unmount record (one block), we
 325          * support writing log records up to the max log buffer size of 256k to
 326          * improve log format performance. This means a record can require up
 327          * to 8 headers (1 rec. header + 7 ext. headers) for the packed cycle
 328          * data (each header supports 32k of data).
 329          */
 330         cycle_lsn = CYCLE_LSN_DISK(head->h_lsn);
 331         if (version == 2 && sunit > XLOG_HEADER_CYCLE_SIZE) {
 332                 hdrs = sunit / XLOG_HEADER_CYCLE_SIZE;
 333                 if (sunit % XLOG_HEADER_CYCLE_SIZE)
 334                         hdrs++;
 335         }
 336
 337         /*
 338          * A fixed number of extended headers is expected based on h_size. If
 339          * required, format those now so the unmount record is located
 340          * correctly.
 341          *
 342          * Since we only write an unmount record, we only need one h_cycle_data
 343          * entry for the unmount record block. The subsequent record data
 344          * blocks are zeroed, which means we can stamp them directly with the
 345          * cycle and zero the rest of the cycle data in the extended headers.
 346          */
 347         if (hdrs > 1) {
 348                 for (i = 1; i < hdrs; i++) {
 349                         p = nextfunc(p, BBSIZE, private);
 350                         memset(p, 0, BBSIZE);
 351                         /* xlog_rec_ext_header.xh_cycle */
 352                         *(__be32 *)p = cycle_lsn;
 353                 }
 354         }
 355
 356         /*
 357          * The total length is the max of the stripe unit or 2 basic block
 358          * minimum (1 hdr blk + 1 data blk). The record length is the total
 359          * minus however many header blocks are required.
 360          */
 361         head->h_len = cpu_to_be32(MAX(BBTOB(2), sunit) - hdrs * BBSIZE);
 362
 363         /*
 364          * Write out the unmount record, pack the first word into the record
 365          * header and stamp the block with the cycle.
 366          */
 367         p = nextfunc(p, BBSIZE, private);
 368         unmount_record(p);
 369
 370         head->h_cycle_data[0] = *(__be32 *)p;
 371         *(__be32 *)p = cycle_lsn;
 372
 373         /*
 374          * Finally, zero all remaining blocks in the record and stamp each with
 375          * the cycle. We don't need to pack any of these blocks because the
 376          * cycle data in the headers has already been zeroed.
 377          */
 378         len = MAX(len, hdrs + 1);
 379         for (i = hdrs + 1; i < len; i++) {
 380                 p = nextfunc(p, BBSIZE, private);
 381                 memset(p, 0, BBSIZE);
 382                 *(__be32 *)p = cycle_lsn;
 383         }
 384
 385         return BBTOB(len);
 386 }
 387
 388 /*
 389  * Simple I/O (buffer cache) interface
 390  */
 391
 392
 393 #ifdef XFS_BUF_TRACING
 394
 395 #undef libxfs_readbuf
 396 #undef libxfs_readbuf_map
 397 #undef libxfs_writebuf
 398 #undef libxfs_getbuf
 399 #undef libxfs_getbuf_map
 400 #undef libxfs_getbuf_flags
 401 #undef libxfs_putbuf
 402
 403 xfs_buf_t       *libxfs_readbuf(struct xfs_buftarg *, xfs_daddr_t, int, int,
 404                                 const struct xfs_buf_ops *);
 405 xfs_buf_t       *libxfs_readbuf_map(struct xfs_buftarg *, struct xfs_buf_map *,
 406                                 int, int, const struct xfs_buf_ops *);
 407 int             libxfs_writebuf(xfs_buf_t *, int);
 408 xfs_buf_t       *libxfs_getbuf(struct xfs_buftarg *, xfs_daddr_t, int);
 409 xfs_buf_t       *libxfs_getbuf_map(struct xfs_buftarg *, struct xfs_buf_map *,
 410                                 int, int);
 411 xfs_buf_t       *libxfs_getbuf_flags(struct xfs_buftarg *, xfs_daddr_t, int,
 412                                 unsigned int);
 413 void            libxfs_putbuf (xfs_buf_t *);
 414
 415 #define __add_trace(bp, func, file, line)       \
 416 do {                                            \
 417         if (bp) {                               \
 418                 (bp)->b_func = (func);          \
 419                 (bp)->b_file = (file);          \
 420                 (bp)->b_line = (line);          \
 421         }                                       \
 422 } while (0)
 423
 424 xfs_buf_t *
 425 libxfs_trace_readbuf(const char *func, const char *file, int line,
 426                 struct xfs_buftarg *btp, xfs_daddr_t blkno, int len, int flags,
 427                 const struct xfs_buf_ops *ops)
 428 {
 429         xfs_buf_t       *bp = libxfs_readbuf(btp, blkno, len, flags, ops);
 430         __add_trace(bp, func, file, line);
 431         return bp;
 432 }
 433
 434 xfs_buf_t *
 435 libxfs_trace_readbuf_map(const char *func, const char *file, int line,
 436                 struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps, int flags,
 437                 const struct xfs_buf_ops *ops)
 438 {
 439         xfs_buf_t       *bp = libxfs_readbuf_map(btp, map, nmaps, flags, ops);
 440         __add_trace(bp, func, file, line);
 441         return bp;
 442 }
 443
 444 int
 445 libxfs_trace_writebuf(const char *func, const char *file, int line, xfs_buf_t *bp, int flags)
 446 {
 447         __add_trace(bp, func, file, line);
 448         return libxfs_writebuf(bp, flags);
 449 }
 450
 451 xfs_buf_t *
 452 libxfs_trace_getbuf(const char *func, const char *file, int line,
 453                 struct xfs_buftarg *btp, xfs_daddr_t blkno, int len)
 454 {
 455         xfs_buf_t       *bp = libxfs_getbuf(btp, blkno, len);
 456         __add_trace(bp, func, file, line);
 457         return bp;
 458 }
 459
 460 xfs_buf_t *
 461 libxfs_trace_getbuf_map(const char *func, const char *file, int line,
 462                 struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps,
 463                 int flags)
 464 {
 465         xfs_buf_t       *bp = libxfs_getbuf_map(btp, map, nmaps, flags);
 466         __add_trace(bp, func, file, line);
 467         return bp;
 468 }
 469
 470 xfs_buf_t *
 471 libxfs_trace_getbuf_flags(const char *func, const char *file, int line,
 472                 struct xfs_buftarg *btp, xfs_daddr_t blkno, int len, unsigned int flags)
 473 {
 474         xfs_buf_t       *bp = libxfs_getbuf_flags(btp, blkno, len, flags);
 475         __add_trace(bp, func, file, line);
 476         return bp;
 477 }
 478
 479 void
 480 libxfs_trace_putbuf(const char *func, const char *file, int line, xfs_buf_t *bp)
 481 {
 482         __add_trace(bp, func, file, line);
 483         libxfs_putbuf(bp);
 484 }
 485
 486
 487 #endif
 488
 489
 490 xfs_buf_t *
 491 libxfs_getsb(xfs_mount_t *mp, int flags)
 492 {
 493         return libxfs_readbuf(mp->m_ddev_targp, XFS_SB_DADDR,
 494                                 XFS_FSS_TO_BB(mp, 1), flags, &xfs_sb_buf_ops);
 495 }
 496
 497 kmem_zone_t                     *xfs_buf_zone;
 498
 499 static struct cache_mru         xfs_buf_freelist =
 500         {{&xfs_buf_freelist.cm_list, &xfs_buf_freelist.cm_list},
 501          0, PTHREAD_MUTEX_INITIALIZER };
 502
 503 /*
 504  * The bufkey is used to pass the new buffer information to the cache object
 505  * allocation routine. Because discontiguous buffers need to pass different
 506  * information, we need fields to pass that information. However, because the
 507  * blkno and bblen is needed for the initial cache entry lookup (i.e. for
 508  * bcompare) the fact that the map/nmaps is non-null to switch to discontiguous
 509  * buffer initialisation instead of a contiguous buffer.
 510  */
 511 struct xfs_bufkey {
 512         struct xfs_buftarg      *buftarg;
 513         xfs_daddr_t             blkno;
 514         unsigned int            bblen;
 515         struct xfs_buf_map      *map;
 516         int                     nmaps;
 517 };
 518
 519 /*  2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */
 520 #define GOLDEN_RATIO_PRIME      0x9e37fffffffc0001UL
 521 #define CACHE_LINE_SIZE         64
 522 static unsigned int
 523 libxfs_bhash(cache_key_t key, unsigned int hashsize, unsigned int hashshift)
 524 {
 525         uint64_t        hashval = ((struct xfs_bufkey *)key)->blkno;
 526         uint64_t        tmp;
 527
 528         tmp = hashval ^ (GOLDEN_RATIO_PRIME + hashval) / CACHE_LINE_SIZE;
 529         tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> hashshift);
 530         return tmp % hashsize;
 531 }
 532
 533 static int
 534 libxfs_bcompare(struct cache_node *node, cache_key_t key)
 535 {
 536         struct xfs_buf  *bp = (struct xfs_buf *)node;
 537         struct xfs_bufkey *bkey = (struct xfs_bufkey *)key;
 538
 539         if (bp->b_target->dev == bkey->buftarg->dev &&
 540             bp->b_bn == bkey->blkno) {
 541                 if (bp->b_bcount == BBTOB(bkey->bblen))
 542                         return CACHE_HIT;
 543 #ifdef IO_BCOMPARE_CHECK
 544                 if (!(libxfs_bcache->c_flags & CACHE_MISCOMPARE_PURGE)) {
 545                         fprintf(stderr,
 546         "%lx: Badness in key lookup (length)\n"
 547         "bp=(bno 0x%llx, len %u bytes) key=(bno 0x%llx, len %u bytes)\n",
 548                                 pthread_self(),
 549                                 (unsigned long long)bp->b_bn, (int)bp->b_bcount,
 550                                 (unsigned long long)bkey->blkno,
 551                                 BBTOB(bkey->bblen));
 552                 }
 553 #endif
 554                 return CACHE_PURGE;
 555         }
 556         return CACHE_MISS;
 557 }
 558
 559 void
 560 libxfs_bprint(xfs_buf_t *bp)
 561 {
 562         fprintf(stderr, "Buffer 0x%p blkno=%llu bytes=%u flags=0x%x count=%u\n",
 563                 bp, (unsigned long long)bp->b_bn, (unsigned)bp->b_bcount,
 564                 bp->b_flags, bp->b_node.cn_count);
 565 }
 566
 567 static void
 568 __initbuf(xfs_buf_t *bp, struct xfs_buftarg *btp, xfs_daddr_t bno,
 569                 unsigned int bytes)
 570 {
 571         bp->b_flags = 0;
 572         bp->b_bn = bno;
 573         bp->b_bcount = bytes;
 574         bp->b_length = BTOBB(bytes);
 575         bp->b_target = btp;
 576         bp->b_error = 0;
 577         if (!bp->b_addr)
 578                 bp->b_addr = memalign(libxfs_device_alignment(), bytes);
 579         if (!bp->b_addr) {
 580                 fprintf(stderr,
 581                         _("%s: %s can't memalign %u bytes: %s\n"),
 582                         progname, __FUNCTION__, bytes,
 583                         strerror(errno));
 584                 exit(1);
 585         }
 586         memset(bp->b_addr, 0, bytes);
 587 #ifdef XFS_BUF_TRACING
 588         list_head_init(&bp->b_lock_list);
 589 #endif
 590         pthread_mutex_init(&bp->b_lock, NULL);
 591         bp->b_holder = 0;
 592         bp->b_recur = 0;
 593         bp->b_ops = NULL;
 594 }
 595
 596 static void
 597 libxfs_initbuf(xfs_buf_t *bp, struct xfs_buftarg *btp, xfs_daddr_t bno,
 598                 unsigned int bytes)
 599 {
 600         __initbuf(bp, btp, bno, bytes);
 601 }
 602
 603 static void
 604 libxfs_initbuf_map(xfs_buf_t *bp, struct xfs_buftarg *btp,
 605                 struct xfs_buf_map *map, int nmaps)
 606 {
 607         unsigned int bytes = 0;
 608         int i;
 609
 610         bytes = sizeof(struct xfs_buf_map) * nmaps;
 611         bp->b_maps = malloc(bytes);
 612         if (!bp->b_maps) {
 613                 fprintf(stderr,
 614                         _("%s: %s can't malloc %u bytes: %s\n"),
 615                         progname, __FUNCTION__, bytes,
 616                         strerror(errno));
 617                 exit(1);
 618         }
 619         bp->b_nmaps = nmaps;
 620
 621         bytes = 0;
 622         for ( i = 0; i < nmaps; i++) {
 623                 bp->b_maps[i].bm_bn = map[i].bm_bn;
 624                 bp->b_maps[i].bm_len = map[i].bm_len;
 625                 bytes += BBTOB(map[i].bm_len);
 626         }
 627
 628         __initbuf(bp, btp, map[0].bm_bn, bytes);
 629         bp->b_flags |= LIBXFS_B_DISCONTIG;
 630 }
 631
 632 xfs_buf_t *
 633 __libxfs_getbufr(int blen)
 634 {
 635         xfs_buf_t       *bp;
 636
 637         /*
 638          * first look for a buffer that can be used as-is,
 639          * if one cannot be found, see if there is a buffer,
 640          * and if so, free its buffer and set b_addr to NULL
 641          * before calling libxfs_initbuf.
 642          */
 643         pthread_mutex_lock(&xfs_buf_freelist.cm_mutex);
 644         if (!list_empty(&xfs_buf_freelist.cm_list)) {
 645                 list_for_each_entry(bp, &xfs_buf_freelist.cm_list, b_node.cn_mru) {
 646                         if (bp->b_bcount == blen) {
 647                                 list_del_init(&bp->b_node.cn_mru);
 648                                 break;
 649                         }
 650                 }
 651                 if (&bp->b_node.cn_mru == &xfs_buf_freelist.cm_list) {
 652                         bp = list_entry(xfs_buf_freelist.cm_list.next,
 653                                         xfs_buf_t, b_node.cn_mru);
 654                         list_del_init(&bp->b_node.cn_mru);
 655                         free(bp->b_addr);
 656                         bp->b_addr = NULL;
 657                         free(bp->b_maps);
 658                         bp->b_maps = NULL;
 659                 }
 660         } else
 661                 bp = kmem_zone_zalloc(xfs_buf_zone, 0);
 662         pthread_mutex_unlock(&xfs_buf_freelist.cm_mutex);
 663         bp->b_ops = NULL;
 664         if (bp->b_flags & LIBXFS_B_DIRTY)
 665                 fprintf(stderr, "found dirty buffer (bulk) on free list!");
 666
 667         return bp;
 668 }
 669
 670 xfs_buf_t *
 671 libxfs_getbufr(struct xfs_buftarg *btp, xfs_daddr_t blkno, int bblen)
 672 {
 673         xfs_buf_t       *bp;
 674         int             blen = BBTOB(bblen);
 675
 676         bp =__libxfs_getbufr(blen);
 677         if (bp)
 678                 libxfs_initbuf(bp, btp, blkno, blen);
 679 #ifdef IO_DEBUG
 680         printf("%lx: %s: allocated %u bytes buffer, key=0x%llx(0x%llx), %p\n",
 681                 pthread_self(), __FUNCTION__, blen,
 682                 (long long)LIBXFS_BBTOOFF64(blkno), (long long)blkno, bp);
 683 #endif
 684
 685         return bp;
 686 }
 687
 688 xfs_buf_t *
 689 libxfs_getbufr_map(struct xfs_buftarg *btp, xfs_daddr_t blkno, int bblen,
 690                 struct xfs_buf_map *map, int nmaps)
 691 {
 692         xfs_buf_t       *bp;
 693         int             blen = BBTOB(bblen);
 694
 695         if (!map || !nmaps) {
 696                 fprintf(stderr,
 697                         _("%s: %s invalid map %p or nmaps %d\n"),
 698                         progname, __FUNCTION__, map, nmaps);
 699                 exit(1);
 700         }
 701
 702         if (blkno != map[0].bm_bn) {
 703                 fprintf(stderr,
 704                         _("%s: %s map blkno 0x%llx doesn't match key 0x%llx\n"),
 705                         progname, __FUNCTION__, (long long)map[0].bm_bn,
 706                         (long long)blkno);
 707                 exit(1);
 708         }
 709
 710         bp =__libxfs_getbufr(blen);
 711         if (bp)
 712                 libxfs_initbuf_map(bp, btp, map, nmaps);
 713 #ifdef IO_DEBUG
 714         printf("%lx: %s: allocated %u bytes buffer, key=0x%llx(0x%llx), %p\n",
 715                 pthread_self(), __FUNCTION__, blen,
 716                 (long long)LIBXFS_BBTOOFF64(blkno), (long long)blkno, bp);
 717 #endif
 718
 719         return bp;
 720 }
 721
 722 #ifdef XFS_BUF_TRACING
 723 struct list_head        lock_buf_list = {&lock_buf_list, &lock_buf_list};
 724 int                     lock_buf_count = 0;
 725 #endif
 726
 727 extern int     use_xfs_buf_lock;
 728
 729 static struct xfs_buf *
 730 __cache_lookup(struct xfs_bufkey *key, unsigned int flags)
 731 {
 732         struct xfs_buf  *bp;
 733
 734         cache_node_get(libxfs_bcache, key, (struct cache_node **)&bp);
 735         if (!bp)
 736                 return NULL;
 737
 738         if (use_xfs_buf_lock) {
 739                 int ret;
 740
 741                 ret = pthread_mutex_trylock(&bp->b_lock);
 742                 if (ret) {
 743                         ASSERT(ret == EAGAIN);
 744                         if (flags & LIBXFS_GETBUF_TRYLOCK)
 745                                 goto out_put;
 746
 747                         if (pthread_equal(bp->b_holder, pthread_self())) {
 748                                 fprintf(stderr,
 749         _("Warning: recursive buffer locking at block %" PRIu64 " detected\n"),
 750                                         key->blkno);
 751                                 bp->b_recur++;
 752                                 return bp;
 753                         } else {
 754                                 pthread_mutex_lock(&bp->b_lock);
 755                         }
 756                 }
 757
 758                 bp->b_holder = pthread_self();
 759         }
 760
 761         cache_node_set_priority(libxfs_bcache, (struct cache_node *)bp,
 762                 cache_node_get_priority((struct cache_node *)bp) -
 763                                                 CACHE_PREFETCH_PRIORITY);
 764 #ifdef XFS_BUF_TRACING
 765         pthread_mutex_lock(&libxfs_bcache->c_mutex);
 766         lock_buf_count++;
 767         list_add(&bp->b_lock_list, &lock_buf_list);
 768         pthread_mutex_unlock(&libxfs_bcache->c_mutex);
 769 #endif
 770 #ifdef IO_DEBUG
 771         printf("%lx %s: hit buffer %p for bno = 0x%llx/0x%llx\n",
 772                 pthread_self(), __FUNCTION__,
 773                 bp, bp->b_bn, (long long)LIBXFS_BBTOOFF64(key->blkno));
 774 #endif
 775
 776         return bp;
 777 out_put:
 778         cache_node_put(libxfs_bcache, (struct cache_node *)bp);
 779         return NULL;
 780 }
 781
 782 struct xfs_buf *
 783 libxfs_getbuf_flags(struct xfs_buftarg *btp, xfs_daddr_t blkno, int len,
 784                 unsigned int flags)
 785 {
 786         struct xfs_bufkey key = {0};
 787
 788         key.buftarg = btp;
 789         key.blkno = blkno;
 790         key.bblen = len;
 791
 792         return __cache_lookup(&key, flags);
 793 }
 794
 795 /*
 796  * Clean the buffer flags for libxfs_getbuf*(), which wants to return
 797  * an unused buffer with clean state.  This prevents CRC errors on a
 798  * re-read of a corrupt block that was prefetched and freed.  This
 799  * can happen with a massively corrupt directory that is discarded,
 800  * but whose blocks are then recycled into expanding lost+found.
 801  *
 802  * Note however that if the buffer's dirty (prefetch calls getbuf)
 803  * we'll leave the state alone because we don't want to discard blocks
 804  * that have been fixed.
 805  */
 806 static void
 807 reset_buf_state(
 808         struct xfs_buf  *bp)
 809 {
 810         if (bp && !(bp->b_flags & LIBXFS_B_DIRTY))
 811                 bp->b_flags &= ~(LIBXFS_B_UNCHECKED | LIBXFS_B_STALE |
 812                                 LIBXFS_B_UPTODATE);
 813 }
 814
 815 struct xfs_buf *
 816 libxfs_getbuf(struct xfs_buftarg *btp, xfs_daddr_t blkno, int len)
 817 {
 818         struct xfs_buf  *bp;
 819
 820         bp = libxfs_getbuf_flags(btp, blkno, len, 0);
 821         reset_buf_state(bp);
 822         return bp;
 823 }
 824
 825 static struct xfs_buf *
 826 __libxfs_getbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map,
 827                     int nmaps, int flags)
 828 {
 829         struct xfs_bufkey key = {0};
 830         int i;
 831
 832         if (nmaps == 1)
 833                 return libxfs_getbuf_flags(btp, map[0].bm_bn, map[0].bm_len,
 834                                            flags);
 835
 836         key.buftarg = btp;
 837         key.blkno = map[0].bm_bn;
 838         for (i = 0; i < nmaps; i++) {
 839                 key.bblen += map[i].bm_len;
 840         }
 841         key.map = map;
 842         key.nmaps = nmaps;
 843
 844         return __cache_lookup(&key, flags);
 845 }
 846
 847 struct xfs_buf *
 848 libxfs_getbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map,
 849                   int nmaps, int flags)
 850 {
 851         struct xfs_buf  *bp;
 852
 853         bp = __libxfs_getbuf_map(btp, map, nmaps, flags);
 854         reset_buf_state(bp);
 855         return bp;
 856 }
 857
 858 void
 859 libxfs_putbuf(xfs_buf_t *bp)
 860 {
 861         /*
 862          * ensure that any errors on this use of the buffer don't carry
 863          * over to the next user.
 864          */
 865         bp->b_error = 0;
 866
 867 #ifdef XFS_BUF_TRACING
 868         pthread_mutex_lock(&libxfs_bcache->c_mutex);
 869         lock_buf_count--;
 870         ASSERT(lock_buf_count >= 0);
 871         list_del_init(&bp->b_lock_list);
 872         pthread_mutex_unlock(&libxfs_bcache->c_mutex);
 873 #endif
 874         if (use_xfs_buf_lock) {
 875                 if (bp->b_recur) {
 876                         bp->b_recur--;
 877                 } else {
 878                         bp->b_holder = 0;
 879                         pthread_mutex_unlock(&bp->b_lock);
 880                 }
 881         }
 882
 883         cache_node_put(libxfs_bcache, (struct cache_node *)bp);
 884 }
 885
 886 void
 887 libxfs_purgebuf(xfs_buf_t *bp)
 888 {
 889         struct xfs_bufkey key = {0};
 890
 891         key.buftarg = bp->b_target;
 892         key.blkno = bp->b_bn;
 893         key.bblen = bp->b_length;
 894
 895         cache_node_purge(libxfs_bcache, &key, (struct cache_node *)bp);
 896 }
 897
 898 static struct cache_node *
 899 libxfs_balloc(cache_key_t key)
 900 {
 901         struct xfs_bufkey *bufkey = (struct xfs_bufkey *)key;
 902
 903         if (bufkey->map)
 904                 return (struct cache_node *)
 905                        libxfs_getbufr_map(bufkey->buftarg,
 906                                           bufkey->blkno, bufkey->bblen,
 907                                           bufkey->map, bufkey->nmaps);
 908         return (struct cache_node *)libxfs_getbufr(bufkey->buftarg,
 909                                           bufkey->blkno, bufkey->bblen);
 910 }
 911
 912
 913 static int
 914 __read_buf(int fd, void *buf, int len, off64_t offset, int flags)
 915 {
 916         int     sts;
 917
 918         sts = pread64(fd, buf, len, offset);
 919         if (sts < 0) {
 920                 int error = errno;
 921                 fprintf(stderr, _("%s: read failed: %s\n"),
 922                         progname, strerror(error));
 923                 if (flags & LIBXFS_EXIT_ON_FAILURE)
 924                         exit(1);
 925                 return -error;
 926         } else if (sts != len) {
 927                 fprintf(stderr, _("%s: error - read only %d of %d bytes\n"),
 928                         progname, sts, len);
 929                 if (flags & LIBXFS_EXIT_ON_FAILURE)
 930                         exit(1);
 931                 return -EIO;
 932         }
 933         return 0;
 934 }
 935
 936 int
 937 libxfs_readbufr(struct xfs_buftarg *btp, xfs_daddr_t blkno, xfs_buf_t *bp,
 938                 int len, int flags)
 939 {
 940         int     fd = libxfs_device_to_fd(btp->dev);
 941         int     bytes = BBTOB(len);
 942         int     error;
 943
 944         ASSERT(BBTOB(len) <= bp->b_bcount);
 945
 946         error = __read_buf(fd, bp->b_addr, bytes, LIBXFS_BBTOOFF64(blkno), flags);
 947         if (!error &&
 948             bp->b_target->dev == btp->dev &&
 949             bp->b_bn == blkno &&
 950             bp->b_bcount == bytes)
 951                 bp->b_flags |= LIBXFS_B_UPTODATE;
 952 #ifdef IO_DEBUG
 953         printf("%lx: %s: read %u bytes, error %d, blkno=0x%llx(0x%llx), %p\n",
 954                 pthread_self(), __FUNCTION__, bytes, error,
 955                 (long long)LIBXFS_BBTOOFF64(blkno), (long long)blkno, bp);
 956 #endif
 957         return error;
 958 }
 959
 960 void
 961 libxfs_readbuf_verify(struct xfs_buf *bp, const struct xfs_buf_ops *ops)
 962 {
 963         if (!ops)
 964                 return;
 965         bp->b_ops = ops;
 966         bp->b_ops->verify_read(bp);
 967         bp->b_flags &= ~LIBXFS_B_UNCHECKED;
 968 }
 969
 970
 971 xfs_buf_t *
 972 libxfs_readbuf(struct xfs_buftarg *btp, xfs_daddr_t blkno, int len, int flags,
 973                 const struct xfs_buf_ops *ops)
 974 {
 975         xfs_buf_t       *bp;
 976         int             error;
 977
 978         bp = libxfs_getbuf_flags(btp, blkno, len, 0);
 979         if (!bp)
 980                 return NULL;
 981
 982         /*
 983          * if the buffer was prefetched, it is likely that it was not validated.
 984          * Hence if we are supplied an ops function and the buffer is marked as
 985          * unchecked, we need to validate it now.
 986          *
 987          * We do this verification even if the buffer is dirty - the
 988          * verification is almost certainly going to fail the CRC check in this
 989          * case as a dirty buffer has not had the CRC recalculated. However, we
 990          * should not be dirtying unchecked buffers and therefore failing it
 991          * here because it's dirty and unchecked indicates we've screwed up
 992          * somewhere else.
 993          */
 994         bp->b_error = 0;
 995         if ((bp->b_flags & (LIBXFS_B_UPTODATE|LIBXFS_B_DIRTY))) {
 996                 if (bp->b_flags & LIBXFS_B_UNCHECKED)
 997                         libxfs_readbuf_verify(bp, ops);
 998                 return bp;
 999         }
1000
1001         /*
1002          * Set the ops on a cache miss (i.e. first physical read) as the
1003          * verifier may change the ops to match the type of buffer it contains.
1004          * A cache hit might reset the verifier to the original type if we set
1005          * it again, but it won't get called again and set to match the buffer
1006          * contents. *cough* xfs_da_node_buf_ops *cough*.
1007          */
1008         error = libxfs_readbufr(btp, blkno, bp, len, flags);
1009         if (error)
1010                 bp->b_error = error;
1011         else
1012                 libxfs_readbuf_verify(bp, ops);
1013         return bp;
1014 }
1015
1016 int
1017 libxfs_readbufr_map(struct xfs_buftarg *btp, struct xfs_buf *bp, int flags)
1018 {
1019         int     fd;
1020         int     error = 0;
1021         char    *buf;
1022         int     i;
1023
1024         fd = libxfs_device_to_fd(btp->dev);
1025         buf = bp->b_addr;
1026         for (i = 0; i < bp->b_nmaps; i++) {
1027                 off64_t offset = LIBXFS_BBTOOFF64(bp->b_maps[i].bm_bn);
1028                 int len = BBTOB(bp->b_maps[i].bm_len);
1029
1030                 error = __read_buf(fd, buf, len, offset, flags);
1031                 if (error) {
1032                         bp->b_error = error;
1033                         break;
1034                 }
1035                 buf += len;
1036         }
1037
1038         if (!error)
1039                 bp->b_flags |= LIBXFS_B_UPTODATE;
1040 #ifdef IO_DEBUG
1041         printf("%lx: %s: read %u bytes, error %d, blkno=0x%llx(0x%llx), %p\n",
1042                 pthread_self(), __FUNCTION__, , error,
1043                 (long long)LIBXFS_BBTOOFF64(blkno), (long long)blkno, bp);
1044 #endif
1045         return error;
1046 }
1047
1048 struct xfs_buf *
1049 libxfs_readbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps,
1050                 int flags, const struct xfs_buf_ops *ops)
1051 {
1052         struct xfs_buf  *bp;
1053         int             error = 0;
1054
1055         if (nmaps == 1)
1056                 return libxfs_readbuf(btp, map[0].bm_bn, map[0].bm_len,
1057                                         flags, ops);
1058
1059         bp = __libxfs_getbuf_map(btp, map, nmaps, 0);
1060         if (!bp)
1061                 return NULL;
1062
1063         bp->b_error = 0;
1064         if ((bp->b_flags & (LIBXFS_B_UPTODATE|LIBXFS_B_DIRTY))) {
1065                 if (bp->b_flags & LIBXFS_B_UNCHECKED)
1066                         libxfs_readbuf_verify(bp, ops);
1067                 return bp;
1068         }
1069         error = libxfs_readbufr_map(btp, bp, flags);
1070         if (!error)
1071                 libxfs_readbuf_verify(bp, ops);
1072
1073 #ifdef IO_DEBUG
1074         printf("%lx: %s: read %lu bytes, error %d, blkno=%llu(%llu), %p\n",
1075                 pthread_self(), __FUNCTION__, buf - (char *)bp->b_addr, error,
1076                 (long long)LIBXFS_BBTOOFF64(bp->b_bn), (long long)bp->b_bn, bp);
1077 #endif
1078         return bp;
1079 }
1080
1081 static int
1082 __write_buf(int fd, void *buf, int len, off64_t offset, int flags)
1083 {
1084         int     sts;
1085
1086         sts = pwrite64(fd, buf, len, offset);
1087         if (sts < 0) {
1088                 int error = errno;
1089                 fprintf(stderr, _("%s: pwrite64 failed: %s\n"),
1090                         progname, strerror(error));
1091                 if (flags & LIBXFS_B_EXIT)
1092                         exit(1);
1093                 return -error;
1094         } else if (sts != len) {
1095                 fprintf(stderr, _("%s: error - pwrite64 only %d of %d bytes\n"),
1096                         progname, sts, len);
1097                 if (flags & LIBXFS_B_EXIT)
1098                         exit(1);
1099                 return -EIO;
1100         }
1101         return 0;
1102 }
1103
1104 int
1105 libxfs_writebufr(xfs_buf_t *bp)
1106 {
1107         int     fd = libxfs_device_to_fd(bp->b_target->dev);
1108
1109         /*
1110          * we never write buffers that are marked stale. This indicates they
1111          * contain data that has been invalidated, and even if the buffer is
1112          * dirty it must *never* be written. Verifiers are wonderful for finding
1113          * bugs like this. Make sure the error is obvious as to the cause.
1114          */
1115         if (bp->b_flags & LIBXFS_B_STALE) {
1116                 bp->b_error = -ESTALE;
1117                 return bp->b_error;
1118         }
1119
1120         /*
1121          * clear any pre-existing error status on the buffer. This can occur if
1122          * the buffer is corrupt on disk and the repair process doesn't clear
1123          * the error before fixing and writing it back.
1124          */
1125         bp->b_error = 0;
1126         if (bp->b_ops) {
1127                 bp->b_ops->verify_write(bp);
1128                 if (bp->b_error) {
1129                         fprintf(stderr,
1130         _("%s: write verifer failed on %s bno 0x%llx/0x%x\n"),
1131                                 __func__, bp->b_ops->name,
1132                                 (long long)bp->b_bn, bp->b_bcount);
1133                         return bp->b_error;
1134                 }
1135         }
1136
1137         if (!(bp->b_flags & LIBXFS_B_DISCONTIG)) {
1138                 bp->b_error = __write_buf(fd, bp->b_addr, bp->b_bcount,
1139                                     LIBXFS_BBTOOFF64(bp->b_bn), bp->b_flags);
1140         } else {
1141                 int     i;
1142                 char    *buf = bp->b_addr;
1143
1144                 for (i = 0; i < bp->b_nmaps; i++) {
1145                         off64_t offset = LIBXFS_BBTOOFF64(bp->b_maps[i].bm_bn);
1146                         int len = BBTOB(bp->b_maps[i].bm_len);
1147
1148                         bp->b_error = __write_buf(fd, buf, len, offset,
1149                                                   bp->b_flags);
1150                         if (bp->b_error)
1151                                 break;
1152                         buf += len;
1153                 }
1154         }
1155
1156 #ifdef IO_DEBUG
1157         printf("%lx: %s: wrote %u bytes, blkno=%llu(%llu), %p, error %d\n",
1158                         pthread_self(), __FUNCTION__, bp->b_bcount,
1159                         (long long)LIBXFS_BBTOOFF64(bp->b_bn),
1160                         (long long)bp->b_bn, bp, bp->b_error);
1161 #endif
1162         if (!bp->b_error) {
1163                 bp->b_flags |= LIBXFS_B_UPTODATE;
1164                 bp->b_flags &= ~(LIBXFS_B_DIRTY | LIBXFS_B_EXIT |
1165                                  LIBXFS_B_UNCHECKED);
1166         }
1167         return bp->b_error;
1168 }
1169
1170 int
1171 libxfs_writebuf_int(xfs_buf_t *bp, int flags)
1172 {
1173         /*
1174          * Clear any error hanging over from reading the buffer. This prevents
1175          * subsequent reads after this write from seeing stale errors.
1176          */
1177         bp->b_error = 0;
1178         bp->b_flags &= ~LIBXFS_B_STALE;
1179         bp->b_flags |= (LIBXFS_B_DIRTY | flags);
1180         return 0;
1181 }
1182
1183 int
1184 libxfs_writebuf(xfs_buf_t *bp, int flags)
1185 {
1186 #ifdef IO_DEBUG
1187         printf("%lx: %s: dirty blkno=%llu(%llu)\n",
1188                         pthread_self(), __FUNCTION__,
1189                         (long long)LIBXFS_BBTOOFF64(bp->b_bn),
1190                         (long long)bp->b_bn);
1191 #endif
1192         /*
1193          * Clear any error hanging over from reading the buffer. This prevents
1194          * subsequent reads after this write from seeing stale errors.
1195          */
1196         bp->b_error = 0;
1197         bp->b_flags &= ~LIBXFS_B_STALE;
1198         bp->b_flags |= (LIBXFS_B_DIRTY | flags);
1199         libxfs_putbuf(bp);
1200         return 0;
1201 }
1202
1203 void
1204 libxfs_iomove(xfs_buf_t *bp, uint boff, int len, void *data, int flags)
1205 {
1206 #ifdef IO_DEBUG
1207         if (boff + len > bp->b_bcount) {
1208                 printf("Badness, iomove out of range!\n"
1209                         "bp=(bno 0x%llx, bytes %u) range=(boff %u, bytes %u)\n",
1210                         (long long)bp->b_bn, bp->b_bcount, boff, len);
1211                 abort();
1212         }
1213 #endif
1214         switch (flags) {
1215         case LIBXFS_BZERO:
1216                 memset(bp->b_addr + boff, 0, len);
1217                 break;
1218         case LIBXFS_BREAD:
1219                 memcpy(data, bp->b_addr + boff, len);
1220                 break;
1221         case LIBXFS_BWRITE:
1222                 memcpy(bp->b_addr + boff, data, len);
1223                 break;
1224         }
1225 }
1226
1227 static void
1228 libxfs_brelse(
1229         struct cache_node       *node)
1230 {
1231         struct xfs_buf          *bp = (struct xfs_buf *)node;
1232
1233         if (!bp)
1234                 return;
1235         if (bp->b_flags & LIBXFS_B_DIRTY)
1236                 fprintf(stderr,
1237                         "releasing dirty buffer to free list!");
1238
1239         pthread_mutex_lock(&xfs_buf_freelist.cm_mutex);
1240         list_add(&bp->b_node.cn_mru, &xfs_buf_freelist.cm_list);
1241         pthread_mutex_unlock(&xfs_buf_freelist.cm_mutex);
1242 }
1243
1244 static unsigned int
1245 libxfs_bulkrelse(
1246         struct cache            *cache,
1247         struct list_head        *list)
1248 {
1249         xfs_buf_t               *bp;
1250         int                     count = 0;
1251
1252         if (list_empty(list))
1253                 return 0 ;
1254
1255         list_for_each_entry(bp, list, b_node.cn_mru) {
1256                 if (bp->b_flags & LIBXFS_B_DIRTY)
1257                         fprintf(stderr,
1258                                 "releasing dirty buffer (bulk) to free list!");
1259                 count++;
1260         }
1261
1262         pthread_mutex_lock(&xfs_buf_freelist.cm_mutex);
1263         list_splice(list, &xfs_buf_freelist.cm_list);
1264         pthread_mutex_unlock(&xfs_buf_freelist.cm_mutex);
1265
1266         return count;
1267 }
1268
1269 /*
1270  * When a buffer is marked dirty, the error is cleared. Hence if we are trying
1271  * to flush a buffer prior to cache reclaim that has an error on it it means
1272  * we've already tried to flush it and it failed. Prevent repeated corruption
1273  * errors from being reported by skipping such buffers - when the corruption is
1274  * fixed the buffer will be marked dirty again and we can write it again.
1275  */
1276 static int
1277 libxfs_bflush(
1278         struct cache_node       *node)
1279 {
1280         struct xfs_buf          *bp = (struct xfs_buf *)node;
1281
1282         if (!bp->b_error && bp->b_flags & LIBXFS_B_DIRTY)
1283                 return libxfs_writebufr(bp);
1284         return bp->b_error;
1285 }
1286
1287 void
1288 libxfs_putbufr(xfs_buf_t *bp)
1289 {
1290         if (bp->b_flags & LIBXFS_B_DIRTY)
1291                 libxfs_writebufr(bp);
1292         libxfs_brelse((struct cache_node *)bp);
1293 }
1294
1295
1296 void
1297 libxfs_bcache_purge(void)
1298 {
1299         cache_purge(libxfs_bcache);
1300 }
1301
1302 void
1303 libxfs_bcache_flush(void)
1304 {
1305         cache_flush(libxfs_bcache);
1306 }
1307
1308 int
1309 libxfs_bcache_overflowed(void)
1310 {
1311         return cache_overflowed(libxfs_bcache);
1312 }
1313
1314 struct cache_operations libxfs_bcache_operations = {
1315         .hash           = libxfs_bhash,
1316         .alloc          = libxfs_balloc,
1317         .flush          = libxfs_bflush,
1318         .relse          = libxfs_brelse,
1319         .compare        = libxfs_bcompare,
1320         .bulkrelse      = libxfs_bulkrelse
1321 };
1322
1323
1324 /*
1325  * Inode cache stubs.
1326  */
1327
1328 extern kmem_zone_t      *xfs_ili_zone;
1329 extern kmem_zone_t      *xfs_inode_zone;
1330
1331 int
1332 libxfs_iget(xfs_mount_t *mp, xfs_trans_t *tp, xfs_ino_t ino, uint lock_flags,
1333                 xfs_inode_t **ipp)
1334 {
1335         xfs_inode_t     *ip;
1336         int             error = 0;
1337
1338         ip = kmem_zone_zalloc(xfs_inode_zone, 0);
1339         if (!ip)
1340                 return -ENOMEM;
1341
1342         ip->i_ino = ino;
1343         ip->i_mount = mp;
1344         error = xfs_iread(mp, tp, ip, 0);
1345         if (error) {
1346                 kmem_zone_free(xfs_inode_zone, ip);
1347                 *ipp = NULL;
1348                 return error;
1349         }
1350
1351         /*
1352          * set up the inode ops structure that the libxfs code relies on
1353          */
1354         if (XFS_ISDIR(ip))
1355                 ip->d_ops = mp->m_dir_inode_ops;
1356         else
1357                 ip->d_ops = mp->m_nondir_inode_ops;
1358
1359         *ipp = ip;
1360         return 0;
1361 }
1362
1363 static void
1364 libxfs_idestroy(xfs_inode_t *ip)
1365 {
1366         switch (VFS_I(ip)->i_mode & S_IFMT) {
1367                 case S_IFREG:
1368                 case S_IFDIR:
1369                 case S_IFLNK:
1370                         libxfs_idestroy_fork(ip, XFS_DATA_FORK);
1371                         break;
1372         }
1373         if (ip->i_afp)
1374                 libxfs_idestroy_fork(ip, XFS_ATTR_FORK);
1375 }
1376
1377 void
1378 libxfs_iput(xfs_inode_t *ip)
1379 {
1380         if (ip->i_itemp)
1381                 kmem_zone_free(xfs_ili_zone, ip->i_itemp);
1382         ip->i_itemp = NULL;
1383         libxfs_idestroy(ip);
1384         kmem_zone_free(xfs_inode_zone, ip);
1385 }