]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/commitdiff
Sync up XFS user and kernel source.
authorNathan Scott <nathans@sgi.com>
Tue, 20 May 2003 05:59:27 +0000 (05:59 +0000)
committerNathan Scott <nathans@sgi.com>
Tue, 20 May 2003 05:59:27 +0000 (05:59 +0000)
12 files changed:
include/libxfs.h
include/libxlog.h
include/xfs_ag.h
include/xfs_inode.h
include/xfs_log.h
include/xfs_log_priv.h
include/xfs_mount.h
libxfs/xfs.h
libxfs/xfs_ialloc.c
libxfs/xfs_inode.c
libxfs/xfs_mount.c
libxlog/xfs_log_recover.c

index 579cf1d9ab97926d6b7d71a212b15d3344d6517d..ad72e106883154b5f5ee45571b0d9311f995dd7e 100644 (file)
@@ -175,7 +175,6 @@ typedef struct xfs_mount {
        __uint64_t              m_maxicount;    /* maximum inode count */
        int                     m_dalign;       /* stripe unit */
        int                     m_swidth;       /* stripe width */
-       int                     m_lstripemask;  /* log stripe mask */
        int                     m_sinoalign;    /* stripe unit inode alignmnt */
        int                     m_dir_magicpct; /* 37% of the dir blocksize */
        __uint8_t               m_dirversion;   /* 1 or 2 */
index 3f6029ed269b9fe1ec4b52f87a6b56a8aadea2e4..a8e37ef21c60de079ee1ee401e9c38fe9eb08cb5 100644 (file)
@@ -58,6 +58,7 @@ typedef struct log {
        int             l_grant_reserve_bytes;  /* */
        int             l_grant_write_cycle;    /* */
        int             l_grant_write_bytes;    /* */
+       uint            l_sectbb_log;   /* log2 of sector size in bbs */
 } xlog_t;
 
 #include <xfs/xfs_log_recover.h>
@@ -65,6 +66,12 @@ typedef struct log {
 #include <xfs/xfs_inode_item.h>
 #include <xfs/xfs_extfree_item.h>
 
+typedef union {
+       xlog_rec_header_t       hic_header;
+       xlog_rec_ext_header_t   hic_xheader;
+       char                    hic_sector[XLOG_HEADER_SIZE];
+} xlog_in_core_2_t;
+
 /*
  * macros mapping kernel code to user code
  */
@@ -98,13 +105,15 @@ typedef struct log {
        xlog_exit(__VA_ARGS__)
 #endif
 
-#define xlog_get_bp(nbblks, mp)        libxfs_getbuf(x.logdev, 0, (nbblks))
+#define xlog_get_bp(log,bbs)   libxfs_getbuf(x.logdev, 0, (bbs))
 #define xlog_put_bp(bp)                libxfs_putbuf(bp)
-#define xlog_bread(log,blkno,nbblks,bp)        \
+#define xlog_bread(log,blkno,bbs,bp)   \
        (libxfs_readbufr(x.logdev,      \
-                       (log)->l_logBBstart+(blkno), bp, (nbblks), 1), 0)
+                       (log)->l_logBBstart+(blkno), bp, (bbs), 1), 0)
+#define xlog_align(log,blkno,nbblks,bp)        XFS_BUF_PTR(bp)
 
 #define kmem_zalloc(size, foo)                 calloc(size,1)
+#define kmem_alloc(size, foo)                  calloc(size,1)
 #define kmem_free(ptr, foo)                    free(ptr)
 #define kmem_realloc(ptr, len, old, foo)       realloc(ptr, len)
 
index 4820ad2eace2dada8f80e88da7957900fed8e5ed..58893e32e7c4dea145ee85b6bb72553e9438ef9f 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -185,9 +185,8 @@ xfs_agblock_t xfs_agfl_block(struct xfs_mount *mp);
 #endif
 #define XFS_AGFL_SIZE(mp)      ((mp)->m_sb.sb_sectsize / sizeof(xfs_agblock_t))
 
-/* -- nathans TODO ... use of BBSIZE here - should be sector size -- */
 typedef struct xfs_agfl {
-       xfs_agblock_t   agfl_bno[BBSIZE/sizeof(xfs_agblock_t)];
+       xfs_agblock_t   agfl_bno[1];    /* actually XFS_AGFL_SIZE(mp) */
 } xfs_agfl_t;
 
 /*
index 9b68c5f1aeda2b1fec50161ec161472ba9fddfa8..1983c3e6402804668a30cb9c5397bd6605ff654b 100644 (file)
@@ -192,9 +192,7 @@ typedef struct xfs_chashlist {
        struct xfs_inode        *chl_ip;
        xfs_daddr_t             chl_blkno;      /* starting block number of
                                                 * the cluster */
-#ifdef DEBUG
-       struct xfs_buf          *chl_buf;       /* debug: the inode buffer */
-#endif
+       struct xfs_buf          *chl_buf;       /* the inode buffer */
 } xfs_chashlist_t;
 
 typedef struct xfs_chash {
@@ -243,6 +241,7 @@ typedef struct xfs_inode {
        struct xfs_inode        *i_mprev;       /* ptr to prev inode */
        struct xfs_inode        **i_prevp;      /* ptr to prev i_next */
        struct xfs_mount        *i_mount;       /* fs mount struct ptr */
+       struct list_head        i_reclaim;      /* reclaim list */
        struct bhv_desc         i_bhv_desc;     /* inode behavior descriptor*/
        struct xfs_dquot        *i_udquot;      /* user dquot */
        struct xfs_dquot        *i_gdquot;      /* group dquot */
@@ -479,7 +478,7 @@ void                xfs_iunlock_map_shared(xfs_inode_t *, uint);
 void           xfs_ifunlock(xfs_inode_t *);
 void           xfs_ireclaim(xfs_inode_t *);
 int            xfs_finish_reclaim(xfs_inode_t *, int, int);
-int            xfs_finish_reclaim_all(struct xfs_mount *);
+int            xfs_finish_reclaim_all(struct xfs_mount *, int);
 
 /*
  * xfs_inode.c prototypes.
index 7b280da83a4dc3980087656a0026f1f376bf012a..1bb2452296b5a2e6f4e6d6ae7233c9e54cd88aa4 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -153,7 +153,6 @@ xfs_lsn_t xfs_log_done(struct xfs_mount *mp,
 int      xfs_log_force(struct xfs_mount *mp,
                        xfs_lsn_t        lsn,
                        uint             flags);
-int      xfs_log_init(void);
 int      xfs_log_mount(struct xfs_mount *mp,
                        dev_t            log_dev,
                        xfs_daddr_t              start_block,
index 37bcde28d9678c953787404316c32f0694fee321..bc36172162bf363eccc9948087484b40c84081ec 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -73,6 +73,9 @@ int xlog_btolrbb(int b);
 
 #define XLOG_HEADER_SIZE       512
 
+#define XLOG_REC_SHIFT(log) \
+       BTOBB(1 << (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) ? \
+        XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT))
 #define XLOG_TOTAL_REC_SHIFT(log) \
        BTOBB(XLOG_MAX_ICLOGS << (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) ? \
         XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT))
@@ -202,9 +205,9 @@ void xlog_grant_add_space(struct log *log, int bytes, int type);
 #define LOG_LOCK(log)          mutex_spinlock(&(log)->l_icloglock)
 #define LOG_UNLOCK(log, s)     mutex_spinunlock(&(log)->l_icloglock, s)
 
-#define xlog_panic(s)          {cmn_err(CE_PANIC, s); }
-#define xlog_exit(s)           {cmn_err(CE_PANIC, s); }
-#define xlog_warn(s)           {cmn_err(CE_WARN, s); }
+#define xlog_panic(args...)    cmn_err(CE_PANIC, ## args)
+#define xlog_exit(args...)     cmn_err(CE_PANIC, ## args)
+#define xlog_warn(args...)     cmn_err(CE_WARN, ## args)
 
 /*
  * In core log state
@@ -403,6 +406,7 @@ typedef struct xlog_rec_ext_header {
        uint      xh_cycle;     /* write cycle of log                   : 4 */
        uint      xh_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE]; /*    : 256 */
 } xlog_rec_ext_header_t;
+
 #ifdef __KERNEL__
 /*
  * - A log record header is 512 bytes.  There is plenty of room to grow the
@@ -441,12 +445,10 @@ typedef struct xlog_iclog_fields {
        char                    *ic_datap;      /* pointer to iclog data */
 } xlog_iclog_fields_t;
 
-typedef struct xlog_in_core2 {
-       union {
-               xlog_rec_header_t hic_header;
-               xlog_rec_ext_header_t hic_xheader;
-               char              hic_sector[XLOG_HEADER_SIZE];
-       } ic_h;
+typedef union xlog_in_core2 {
+       xlog_rec_header_t       hic_header;
+       xlog_rec_ext_header_t   hic_xheader;
+       char                    hic_sector[XLOG_HEADER_SIZE];
 } xlog_in_core_2_t;
 
 typedef struct xlog_in_core {
@@ -473,7 +475,7 @@ typedef struct xlog_in_core {
 #define        ic_bwritecnt    hic_fields.ic_bwritecnt
 #define        ic_state        hic_fields.ic_state
 #define ic_datap       hic_fields.ic_datap
-#define ic_header      hic_data->ic_h.hic_header
+#define ic_header      hic_data->hic_header
 
 /*
  * The reservation head lsn is not made up of a cycle number and block number.
@@ -530,8 +532,11 @@ typedef struct log {
     uint               l_flags;
     uint               l_quotaoffs_flag;/* XFS_DQ_*, if QUOTAOFFs found */
     struct xfs_buf_cancel **l_buf_cancel_table;
+    int                        l_stripemask;   /* log stripe mask */
     int                        l_iclog_hsize;  /* size of iclog header */
     int                        l_iclog_heads;  /* number of iclog header sectors */
+    uint               l_sectbb_log;   /* log2 of sector size in bbs */
+    uint               l_sectbb_mask;  /* sector size in bbs alignment mask */
 } xlog_t;
 
 
@@ -546,11 +551,13 @@ extern int         xlog_print_find_oldest(xlog_t *log, xfs_daddr_t *last_blk);
 extern int      xlog_recover(xlog_t *log, int readonly);
 extern int      xlog_recover_finish(xlog_t *log, int mfsi_flags);
 extern void     xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog);
-extern struct xfs_buf *xlog_get_bp(int,xfs_mount_t *);
-extern void     xlog_put_bp(struct xfs_buf *);
-extern int      xlog_bread(xlog_t *, xfs_daddr_t blkno, int bblks, struct xfs_buf *bp);
 extern void     xlog_recover_process_iunlinks(xlog_t *log);
 
+extern struct xfs_buf *xlog_get_bp(xlog_t *, int);
+extern void     xlog_put_bp(struct xfs_buf *);
+extern int      xlog_bread(xlog_t *, xfs_daddr_t, int, struct xfs_buf *);
+extern xfs_caddr_t xlog_align(xlog_t *, xfs_daddr_t, int, struct xfs_buf *);
+
 #define XLOG_TRACE_GRAB_FLUSH  1
 #define XLOG_TRACE_REL_FLUSH   2
 #define XLOG_TRACE_SLEEP_FLUSH 3
index 0d3c9338ede3a1bd32044af23a8b96d8703514b6..27c5df0b70b81be18b2afccdf5324658afeed866 100644 (file)
@@ -68,6 +68,7 @@ typedef struct xfs_trans_reservations {
        ((xfs_agblock_t)(XFS_BB_TO_FSBT(mp, d) % (mp)->m_sb.sb_agblocks))
 #else
 struct cred;
+struct log;
 struct vfs;
 struct vnode;
 struct xfs_mount_args;
@@ -296,13 +297,14 @@ typedef struct xfs_mount {
        int                     m_ihsize;       /* size of next field */
        struct xfs_ihash        *m_ihash;       /* fs private inode hash table*/
        struct xfs_inode        *m_inodes;      /* active inode list */
+       struct list_head        m_del_inodes;   /* inodes to reclaim */
        mutex_t                 m_ilock;        /* inode list mutex */
        uint                    m_ireclaims;    /* count of calls to reclaim*/
        uint                    m_readio_log;   /* min read size log bytes */
        uint                    m_readio_blocks; /* min read size blocks */
        uint                    m_writeio_log;  /* min write size log bytes */
        uint                    m_writeio_blocks; /* min write size blocks */
-       void                    *m_log;         /* log specific stuff */
+       struct log              *m_log;         /* log specific stuff */
        int                     m_logbufs;      /* number of log buffers */
        int                     m_logbsize;     /* size of each log buffer */
        uint                    m_rsumlevels;   /* rt summary levels */
@@ -357,7 +359,6 @@ typedef struct xfs_mount {
 #endif
        int                     m_dalign;       /* stripe unit */
        int                     m_swidth;       /* stripe width */
-       int                     m_lstripemask;  /* log stripe mask */
        int                     m_sinoalign;    /* stripe unit inode alignmnt */
        int                     m_attr_magicpct;/* 37% of the blocksize */
        int                     m_dir_magicpct; /* 37% of the dir blocksize */
@@ -383,8 +384,6 @@ typedef struct xfs_mount {
                                                 * snapshot */
        sv_t                    m_wait_unfreeze;/* waiting to unfreeze */
        atomic_t                m_active_trans; /* number trans frozen */
-       struct timer_list       m_sbdirty_timer;/* superblock dirty timer
-                                                * for nfs refcache */
 } xfs_mount_t;
 
 /*
index 52c768e4e964ec18899f5ccd9df0ee84586208c4..5c3ca99d0895cbf056c3194586b613cf0f03a49c 100644 (file)
@@ -246,6 +246,7 @@ typedef struct { dev_t dev; } xfs_buftarg_t;
 #define m_ddev_targp   m_dev
 #define unlikely(x)    (x)
 #define kdev_none(x)   (!(x))
+#define INIT_LIST_HEAD(x)
 #define KERN_WARNING
 #define XFS_ERROR(e)   (e)
 #define XFS_ERRLEVEL_LOW               1
index 7e476a6382c55fb4218923d0f23569f0139fbc5b..62978f1e2bee769619e6df1ec8de8981ed4df7b2 100644 (file)
@@ -129,7 +129,6 @@ xfs_ialloc_ag_alloc(
        int             ninodes;        /* num inodes per buf */
        xfs_agino_t     thisino;        /* current inode number, for loop */
        int             version;        /* inode version number to use */
-       static xfs_timestamp_t ztime;   /* zero xfs timestamp */
        int             isaligned;      /* inode allocation at stripe unit */
                                        /* boundary */
        xfs_dinode_core_t dic;          /* a dinode_core to copy to new */
@@ -243,6 +242,11 @@ xfs_ialloc_ag_alloc(
                version = XFS_DINODE_VERSION_2;
        else
                version = XFS_DINODE_VERSION_1;
+
+       memset(&dic, 0, sizeof(xfs_dinode_core_t));
+       INT_SET(dic.di_magic, ARCH_CONVERT, XFS_DINODE_MAGIC);
+       INT_SET(dic.di_version, ARCH_CONVERT, version);
+
        for (j = 0; j < nbufs; j++) {
                /*
                 * Get the block.
@@ -257,36 +261,6 @@ xfs_ialloc_ag_alloc(
                /*
                 * Loop over the inodes in this buffer.
                 */
-               INT_SET(dic.di_magic, ARCH_CONVERT, XFS_DINODE_MAGIC);
-               INT_ZERO(dic.di_mode, ARCH_CONVERT);
-               INT_SET(dic.di_version, ARCH_CONVERT, version);
-               INT_ZERO(dic.di_format, ARCH_CONVERT);
-               INT_ZERO(dic.di_onlink, ARCH_CONVERT);
-               INT_ZERO(dic.di_uid, ARCH_CONVERT);
-               INT_ZERO(dic.di_gid, ARCH_CONVERT);
-               INT_ZERO(dic.di_nlink, ARCH_CONVERT);
-               INT_ZERO(dic.di_projid, ARCH_CONVERT);
-               memset(&(dic.di_pad[0]), 0, sizeof(dic.di_pad));
-               INT_SET(dic.di_atime.t_sec, ARCH_CONVERT, ztime.t_sec);
-               INT_SET(dic.di_atime.t_nsec, ARCH_CONVERT, ztime.t_nsec);
-
-               INT_SET(dic.di_mtime.t_sec, ARCH_CONVERT, ztime.t_sec);
-               INT_SET(dic.di_mtime.t_nsec, ARCH_CONVERT, ztime.t_nsec);
-
-               INT_SET(dic.di_ctime.t_sec, ARCH_CONVERT, ztime.t_sec);
-               INT_SET(dic.di_ctime.t_nsec, ARCH_CONVERT, ztime.t_nsec);
-
-               INT_ZERO(dic.di_size, ARCH_CONVERT);
-               INT_ZERO(dic.di_nblocks, ARCH_CONVERT);
-               INT_ZERO(dic.di_extsize, ARCH_CONVERT);
-               INT_ZERO(dic.di_nextents, ARCH_CONVERT);
-               INT_ZERO(dic.di_anextents, ARCH_CONVERT);
-               INT_ZERO(dic.di_forkoff, ARCH_CONVERT);
-               INT_ZERO(dic.di_aformat, ARCH_CONVERT);
-               INT_ZERO(dic.di_dmevmask, ARCH_CONVERT);
-               INT_ZERO(dic.di_dmstate, ARCH_CONVERT);
-               INT_ZERO(dic.di_flags, ARCH_CONVERT);
-               INT_ZERO(dic.di_gen, ARCH_CONVERT);
 
                for (i = 0; i < ninodes; i++) {
                        free = XFS_MAKE_IPTR(args.mp, fbuf, i);
index b1317d1f112bd7fd767fee051cee35c7014f5601..25d5a0753733499e1ef763bed34ad528f5047b08 100644 (file)
@@ -750,6 +750,8 @@ xfs_iread(
                        XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
        }
 
+       INIT_LIST_HEAD(&ip->i_reclaim);
+
        /*
         * The inode format changed when we moved the link count and
         * made it 32 bits long.  If this is an old format inode,
index b08248f174cfe14bbf7fab5d516cd39149f4234d..3c1258f8a4e7d58b046d6129cc0d2b113c2c9103 100644 (file)
@@ -55,16 +55,7 @@ xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp)
        mp->m_blockmask = sbp->sb_blocksize - 1;
        mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG;
        mp->m_blockwmask = mp->m_blockwsize - 1;
-
-
-       if (XFS_SB_VERSION_HASLOGV2(sbp)) {
-               if (sbp->sb_logsunit <= 1) {
-                       mp->m_lstripemask = 1;
-               } else {
-                       mp->m_lstripemask =
-                               1 << xfs_highbit32(sbp->sb_logsunit >> BBSHIFT);
-               }
-       }
+       INIT_LIST_HEAD(&mp->m_del_inodes);
 
        /*
         * Setup for attributes, in case they get created.
index a43a99404cf59e3c07328f1ecc79a1819c283498..2ce5a5845d78c281d1d07528cf70a8afd22d326b 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
 
 #include <xfs/libxlog.h>
 
+#define xlog_unpack_data_checksum(rhead, dp, log)      ((void)0)
+#define xlog_clear_stale_blocks(log, tail_lsn)         (0)
+#define xfs_readonly_buftarg(buftarg)                  (0)
+
 /*
  * This routine finds (to an approximation) the first block in the physical
  * log which contains the given cycle.  It uses a binary search algorithm.
  * necessarily be perfect.
  */
 int
-xlog_find_cycle_start(xlog_t           *log,
-                     xfs_buf_t         *bp,
-                     xfs_daddr_t       first_blk,
-                     xfs_daddr_t       *last_blk,
-                     uint              cycle)
+xlog_find_cycle_start(
+       xlog_t          *log,
+       xfs_buf_t       *bp,
+       xfs_daddr_t     first_blk,
+       xfs_daddr_t     *last_blk,
+       uint            cycle)
 {
+       xfs_caddr_t     offset;
        xfs_daddr_t     mid_blk;
        uint            mid_cycle;
        int             error;
@@ -53,7 +59,8 @@ xlog_find_cycle_start(xlog_t          *log,
        while (mid_blk != first_blk && mid_blk != *last_blk) {
                if ((error = xlog_bread(log, mid_blk, 1, bp)))
                        return error;
-               mid_cycle = GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT);
+               offset = xlog_align(log, mid_blk, 1, bp);
+               mid_cycle = GET_CYCLE(offset, ARCH_CONVERT);
                if (mid_cycle == cycle) {
                        *last_blk = mid_blk;
                        /* last_half_cycle == mid_cycle */
@@ -67,8 +74,7 @@ xlog_find_cycle_start(xlog_t          *log,
               (mid_blk == *last_blk && mid_blk-1 == first_blk));
 
        return 0;
-}      /* xlog_find_cycle_start */
-
+}
 
 /*
  * Check that the range of blocks does not contain the cycle number
@@ -80,27 +86,27 @@ xlog_find_cycle_start(xlog_t                *log,
  * Set blkno to -1 if we encounter no errors.  This is an invalid block number
  * since we don't ever expect logs to get this large.
  */
-
 STATIC int
-xlog_find_verify_cycle( xlog_t         *log,
-                       xfs_daddr_t     start_blk,
-                       int             nbblks,
-                       uint            stop_on_cycle_no,
-                       xfs_daddr_t     *new_blk)
+xlog_find_verify_cycle(
+       xlog_t          *log,
+       xfs_daddr_t     start_blk,
+       int             nbblks,
+       uint            stop_on_cycle_no,
+       xfs_daddr_t     *new_blk)
 {
-       xfs_daddr_t             i, j;
-       uint                    cycle;
-       xfs_buf_t               *bp;
-       char                    *buf        = NULL;
-       int                     error       = 0;
-       xfs_daddr_t             bufblks;
+       xfs_daddr_t     i, j;
+       uint            cycle;
+       xfs_buf_t       *bp;
+       xfs_daddr_t     bufblks;
+       xfs_caddr_t     buf = NULL;
+       int             error = 0;
 
        bufblks = 1 << ffs(nbblks);
 
-       while (!(bp = xlog_get_bp(bufblks, log->l_mp))) {
+       while (!(bp = xlog_get_bp(log, bufblks))) {
                /* can't get enough memory to do everything in one big buffer */
                bufblks >>= 1;
-               if (!bufblks)
+               if (bufblks <= log->l_sectbb_log)
                        return ENOMEM;
        }
 
@@ -112,7 +118,7 @@ xlog_find_verify_cycle( xlog_t              *log,
                if ((error = xlog_bread(log, i, bcount, bp)))
                        goto out;
 
-               buf = XFS_BUF_PTR(bp);
+               buf = xlog_align(log, i, bcount, bp);
                for (j = 0; j < bcount; j++) {
                        cycle = GET_CYCLE(buf, ARCH_CONVERT);
                        if (cycle == stop_on_cycle_no) {
@@ -128,10 +134,8 @@ xlog_find_verify_cycle( xlog_t             *log,
 
 out:
        xlog_put_bp(bp);
-
        return error;
-}      /* xlog_find_verify_cycle */
-
+}
 
 /*
  * Potentially backup over partial log record write.
@@ -145,98 +149,103 @@ out:
  * extra_bblks is the number of blocks potentially verified on a previous
  * call to this routine.
  */
-
 STATIC int
-xlog_find_verify_log_record(xlog_t     *log,
-                           xfs_daddr_t start_blk,
-                           xfs_daddr_t *last_blk,
-                           int         extra_bblks)
+xlog_find_verify_log_record(
+       xlog_t                  *log,
+       xfs_daddr_t             start_blk,
+       xfs_daddr_t             *last_blk,
+       int                     extra_bblks)
 {
-    xfs_daddr_t         i;
-    xfs_buf_t          *bp;
-    char                *buf        = NULL;
-    xlog_rec_header_t  *head       = NULL;
-    int                        error       = 0;
-    int                 smallmem    = 0;
-    int                 num_blks    = *last_blk - start_blk;
-    int                        xhdrs;
-
-    ASSERT(start_blk != 0 || *last_blk != start_blk);
-
-    if (!(bp = xlog_get_bp(num_blks, log->l_mp))) {
-       if (!(bp = xlog_get_bp(1, log->l_mp)))
-           return ENOMEM;
-       smallmem = 1;
-       buf = XFS_BUF_PTR(bp);
-    } else {
-       if ((error = xlog_bread(log, start_blk, num_blks, bp)))
-           goto out;
-       buf = XFS_BUF_PTR(bp) + ((num_blks - 1) << BBSHIFT);
-    }
-
-    for (i = (*last_blk) - 1; i >= 0; i--) {
-       if (i < start_blk) {
-           /* legal log record not found */
-           xlog_warn("XFS: Log inconsistent (didn't find previous header)");
-           ASSERT(0);
-           error = XFS_ERROR(EIO);
-           goto out;
+       xfs_daddr_t             i;
+       xfs_buf_t               *bp;
+       xfs_caddr_t             offset = NULL;
+       xlog_rec_header_t       *head = NULL;
+       int                     error = 0;
+       int                     smallmem = 0;
+       int                     num_blks = *last_blk - start_blk;
+       int                     xhdrs;
+
+       ASSERT(start_blk != 0 || *last_blk != start_blk);
+
+       if (!(bp = xlog_get_bp(log, num_blks))) {
+               if (!(bp = xlog_get_bp(log, 1)))
+                       return ENOMEM;
+               smallmem = 1;
+       } else {
+               if ((error = xlog_bread(log, start_blk, num_blks, bp)))
+                       goto out;
+               offset = xlog_align(log, start_blk, num_blks, bp);
+               offset += ((num_blks - 1) << BBSHIFT);
        }
 
-       if (smallmem && (error = xlog_bread(log, i, 1, bp)))
-           goto out;
-       head = (xlog_rec_header_t*)buf;
-
-       if (INT_GET(head->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM)
-           break;
-
-       if (!smallmem)
-           buf -= BBSIZE;
-    }
-
-    /*
-     * We hit the beginning of the physical log & still no header.  Return
-     * to caller.  If caller can handle a return of -1, then this routine
-     * will be called again for the end of the physical log.
-     */
-    if (i == -1) {
-       error = -1;
-       goto out;
-    }
-
-    /* we have the final block of the good log (the first block
-     * of the log record _before_ the head. So we check the uuid.
-     */
-
-    if ((error = xlog_header_check_mount(log->l_mp, head)))
-       goto out;
-
-    /*
-     * We may have found a log record header before we expected one.
-     * last_blk will be the 1st block # with a given cycle #.  We may end
-     * up reading an entire log record.  In this case, we don't want to
-     * reset last_blk.  Only when last_blk points in the middle of a log
-     * record do we update last_blk.
-     */
-    if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
-       uint    h_size = INT_GET(head->h_size, ARCH_CONVERT);
-
-       xhdrs = h_size / XLOG_HEADER_CYCLE_SIZE;
-       if (h_size % XLOG_HEADER_CYCLE_SIZE)
-               xhdrs++;
-    } else {
-       xhdrs = 1;
-    }
-
-    if (*last_blk - i + extra_bblks
-               != BTOBB(INT_GET(head->h_len, ARCH_CONVERT))+xhdrs)
-           *last_blk = i;
+       for (i = (*last_blk) - 1; i >= 0; i--) {
+               if (i < start_blk) {
+                       /* legal log record not found */
+                       xlog_warn(
+               "XFS: Log inconsistent (didn't find previous header)");
+                       ASSERT(0);
+                       error = XFS_ERROR(EIO);
+                       goto out;
+               }
 
-out:
-    xlog_put_bp(bp);
+               if (smallmem) {
+                       if ((error = xlog_bread(log, i, 1, bp)))
+                               goto out;
+                       offset = xlog_align(log, i, 1, bp);
+               }
+
+               head = (xlog_rec_header_t *)offset;
 
-    return error;
-}      /* xlog_find_verify_log_record */
+               if (XLOG_HEADER_MAGIC_NUM ==
+                   INT_GET(head->h_magicno, ARCH_CONVERT))
+                       break;
+
+               if (!smallmem)
+                       offset -= BBSIZE;
+       }
+
+       /*
+        * We hit the beginning of the physical log & still no header.  Return
+        * to caller.  If caller can handle a return of -1, then this routine
+        * will be called again for the end of the physical log.
+        */
+       if (i == -1) {
+               error = -1;
+               goto out;
+       }
+
+       /*
+        * We have the final block of the good log (the first block
+        * of the log record _before_ the head. So we check the uuid.
+        */
+       if ((error = xlog_header_check_mount(log->l_mp, head)))
+               goto out;
+
+       /*
+        * We may have found a log record header before we expected one.
+        * last_blk will be the 1st block # with a given cycle #.  We may end
+        * up reading an entire log record.  In this case, we don't want to
+        * reset last_blk.  Only when last_blk points in the middle of a log
+        * record do we update last_blk.
+        */
+       if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
+               uint    h_size = INT_GET(head->h_size, ARCH_CONVERT);
+
+               xhdrs = h_size / XLOG_HEADER_CYCLE_SIZE;
+               if (h_size % XLOG_HEADER_CYCLE_SIZE)
+                       xhdrs++;
+       } else {
+               xhdrs = 1;
+       }
+
+       if (*last_blk - i + extra_bblks
+                       != BTOBB(INT_GET(head->h_len, ARCH_CONVERT)) + xhdrs)
+               *last_blk = i;
+
+out:
+       xlog_put_bp(bp);
+       return error;
+}
 
 /*
  * Head is defined to be the point of the log where the next log write
@@ -249,252 +258,257 @@ out:
  * last_blk contains the block number of the first block with a given
  * cycle number.
  *
- * Also called from xfs_log_print.c
- *
  * Return: zero if normal, non-zero if error.
  */
 int
-xlog_find_head(xlog_t  *log,
-              xfs_daddr_t *return_head_blk)
+xlog_find_head(
+       xlog_t          *log,
+       xfs_daddr_t     *return_head_blk)
 {
-    xfs_buf_t   *bp;
-    xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk;
-    int     num_scan_bblks;
-    uint    first_half_cycle, last_half_cycle;
-    uint    stop_on_cycle;
-    int     error, log_bbnum = log->l_logBBsize;
-
-    /* Is the end of the log device zeroed? */
-    if ((error = xlog_find_zeroed(log, &first_blk)) == -1) {
-       *return_head_blk = first_blk;
-
-       /* is the whole lot zeroed? */
-       if (!first_blk) {
-           /* Linux XFS shouldn't generate totally zeroed logs -
-            * mkfs etc write a dummy unmount record to a fresh
-            * log so we can store the uuid in there
-            */
-           xlog_warn("XFS: totally zeroed log");
+       xfs_buf_t       *bp;
+       xfs_caddr_t     offset;
+       xfs_daddr_t     new_blk, first_blk, start_blk, last_blk, head_blk;
+       int             num_scan_bblks;
+       uint            first_half_cycle, last_half_cycle;
+       uint            stop_on_cycle;
+       int             error, log_bbnum = log->l_logBBsize;
+
+       /* Is the end of the log device zeroed? */
+       if ((error = xlog_find_zeroed(log, &first_blk)) == -1) {
+               *return_head_blk = first_blk;
+
+               /* Is the whole lot zeroed? */
+               if (!first_blk) {
+                       /* Linux XFS shouldn't generate totally zeroed logs -
+                        * mkfs etc write a dummy unmount record to a fresh
+                        * log so we can store the uuid in there
+                        */
+                       xlog_warn("XFS: totally zeroed log");
+               }
+
+               return 0;
+       } else if (error) {
+               xlog_warn("XFS: empty log check failed");
+               return error;
        }
 
-       return 0;
-    } else if (error) {
-       xlog_warn("XFS: empty log check failed");
-       return error;
-    }
-
-    first_blk = 0;                             /* get cycle # of 1st block */
-    bp = xlog_get_bp(1,log->l_mp);
-    if (!bp)
-       return ENOMEM;
-    if ((error = xlog_bread(log, 0, 1, bp)))
-       goto bp_err;
-    first_half_cycle = GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT);
-
-    last_blk = head_blk = log_bbnum-1;         /* get cycle # of last block */
-    if ((error = xlog_bread(log, last_blk, 1, bp)))
-       goto bp_err;
-    last_half_cycle = GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT);
-    ASSERT(last_half_cycle != 0);
-
-    /*
-     * If the 1st half cycle number is equal to the last half cycle number,
-     * then the entire log is stamped with the same cycle number.  In this
-     * case, head_blk can't be set to zero (which makes sense).  The below
-     * math doesn't work out properly with head_blk equal to zero.  Instead,
-     * we set it to log_bbnum which is an illegal block number, but this
-     * value makes the math correct.  If head_blk doesn't changed through
-     * all the tests below, *head_blk is set to zero at the very end rather
-     * than log_bbnum.  In a sense, log_bbnum and zero are the same block
-     * in a circular file.
-     */
-    if (first_half_cycle == last_half_cycle) {
-       /*
-        * In this case we believe that the entire log should have cycle
-        * number last_half_cycle.  We need to scan backwards from the
-        * end verifying that there are no holes still containing
-        * last_half_cycle - 1.  If we find such a hole, then the start
-        * of that hole will be the new head.  The simple case looks like
-        *        x | x ... | x - 1 | x
-        * Another case that fits this picture would be
-        *        x | x + 1 | x ... | x
-        * In this case the head really is somwhere at the end of the
-        * log, as one of the latest writes at the beginning was incomplete.
-        * One more case is
-        *        x | x + 1 | x ... | x - 1 | x
-        * This is really the combination of the above two cases, and the
-        * head has to end up at the start of the x-1 hole at the end of
-        * the log.
-        *
-        * In the 256k log case, we will read from the beginning to the
-        * end of the log and search for cycle numbers equal to x-1.  We
-        * don't worry about the x+1 blocks that we encounter, because
-        * we know that they cannot be the head since the log started with
-        * x.
-        */
-       head_blk = log_bbnum;
-       stop_on_cycle = last_half_cycle - 1;
-    } else {
+       first_blk = 0;                  /* get cycle # of 1st block */
+       bp = xlog_get_bp(log, 1);
+       if (!bp)
+               return ENOMEM;
+       if ((error = xlog_bread(log, 0, 1, bp)))
+               goto bp_err;
+       offset = xlog_align(log, 0, 1, bp);
+       first_half_cycle = GET_CYCLE(offset, ARCH_CONVERT);
+
+       last_blk = head_blk = log_bbnum - 1;    /* get cycle # of last block */
+       if ((error = xlog_bread(log, last_blk, 1, bp)))
+               goto bp_err;
+       offset = xlog_align(log, last_blk, 1, bp);
+       last_half_cycle = GET_CYCLE(offset, ARCH_CONVERT);
+       ASSERT(last_half_cycle != 0);
+
        /*
-        * In this case we want to find the first block with cycle number
-        * matching last_half_cycle.  We expect the log to be some
-        * variation on
-        *        x + 1 ... | x ...
-        * The first block with cycle number x (last_half_cycle) will be
-        * where the new head belongs.  First we do a binary search for
-        * the first occurrence of last_half_cycle.  The binary search
-        * may not be totally accurate, so then we scan back from there
-        * looking for occurrences of last_half_cycle before us.  If
-        * that backwards scan wraps around the beginning of the log,
-        * then we look for occurrences of last_half_cycle - 1 at the
-        * end of the log.  The cases we're looking for look like
-        *        x + 1 ... | x | x + 1 | x ...
-        *                               ^ binary search stopped here
-        * or
-        *        x + 1 ... | x ... | x - 1 | x
-        *        <---------> less than scan distance
+        * If the 1st half cycle number is equal to the last half cycle number,
+        * then the entire log is stamped with the same cycle number.  In this
+        * case, head_blk can't be set to zero (which makes sense).  The below
+        * math doesn't work out properly with head_blk equal to zero.  Instead,
+        * we set it to log_bbnum which is an illegal block number, but this
+        * value makes the math correct.  If head_blk doesn't changed through
+        * all the tests below, *head_blk is set to zero at the very end rather
+        * than log_bbnum.  In a sense, log_bbnum and zero are the same block
+        * in a circular file.
         */
-       stop_on_cycle = last_half_cycle;
-       if ((error = xlog_find_cycle_start(log, bp, first_blk,
-                                         &head_blk, last_half_cycle)))
-           goto bp_err;
-    }
-
-    /*
-     * Now validate the answer.  Scan back some number of maximum possible
-     * blocks and make sure each one has the expected cycle number.  The
-     * maximum is determined by the total possible amount of buffering
-     * in the in-core log.  The following number can be made tighter if
-     * we actually look at the block size of the filesystem.
-     */
-    num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log);
-    if (head_blk >= num_scan_bblks) {
+       if (first_half_cycle == last_half_cycle) {
+               /*
+                * In this case we believe that the entire log should have
+                * cycle number last_half_cycle.  We need to scan backwards
+                * from the end verifying that there are no holes still
+                * containing last_half_cycle - 1.  If we find such a hole,
+                * then the start of that hole will be the new head.  The
+                * simple case looks like
+                *        x | x ... | x - 1 | x
+                * Another case that fits this picture would be
+                *        x | x + 1 | x ... | x
+                * In this case the head really is somwhere at the end of the
+                * log, as one of the latest writes at the beginning was
+                * incomplete.
+                * One more case is
+                *        x | x + 1 | x ... | x - 1 | x
+                * This is really the combination of the above two cases, and
+                * the head has to end up at the start of the x-1 hole at the
+                * end of the log.
+                *
+                * In the 256k log case, we will read from the beginning to the
+                * end of the log and search for cycle numbers equal to x-1.
+                * We don't worry about the x+1 blocks that we encounter,
+                * because we know that they cannot be the head since the log
+                * started with x.
+                */
+               head_blk = log_bbnum;
+               stop_on_cycle = last_half_cycle - 1;
+       } else {
+               /*
+                * In this case we want to find the first block with cycle
+                * number matching last_half_cycle.  We expect the log to be
+                * some variation on
+                *        x + 1 ... | x ...
+                * The first block with cycle number x (last_half_cycle) will
+                * be where the new head belongs.  First we do a binary search
+                * for the first occurrence of last_half_cycle.  The binary
+                * search may not be totally accurate, so then we scan back
+                * from there looking for occurrences of last_half_cycle before
+                * us.  If that backwards scan wraps around the beginning of
+                * the log, then we look for occurrences of last_half_cycle - 1
+                * at the end of the log.  The cases we're looking for look
+                * like
+                *        x + 1 ... | x | x + 1 | x ...
+                *                               ^ binary search stopped here
+                * or
+                *        x + 1 ... | x ... | x - 1 | x
+                *        <---------> less than scan distance
+                */
+               stop_on_cycle = last_half_cycle;
+               if ((error = xlog_find_cycle_start(log, bp, first_blk,
+                                               &head_blk, last_half_cycle)))
+                       goto bp_err;
+       }
+
        /*
-        * We are guaranteed that the entire check can be performed
-        * in one buffer.
+        * Now validate the answer.  Scan back some number of maximum possible
+        * blocks and make sure each one has the expected cycle number.  The
+        * maximum is determined by the total possible amount of buffering
+        * in the in-core log.  The following number can be made tighter if
+        * we actually look at the block size of the filesystem.
         */
-       start_blk = head_blk - num_scan_bblks;
-       if ((error = xlog_find_verify_cycle(log, start_blk, num_scan_bblks,
-                                        stop_on_cycle, &new_blk)))
-           goto bp_err;
-       if (new_blk != -1)
-           head_blk = new_blk;
-    } else {                   /* need to read 2 parts of log */
+       num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log);
+       if (head_blk >= num_scan_bblks) {
+               /*
+                * We are guaranteed that the entire check can be performed
+                * in one buffer.
+                */
+               start_blk = head_blk - num_scan_bblks;
+               if ((error = xlog_find_verify_cycle(log,
+                                               start_blk, num_scan_bblks,
+                                               stop_on_cycle, &new_blk)))
+                       goto bp_err;
+               if (new_blk != -1)
+                       head_blk = new_blk;
+       } else {                /* need to read 2 parts of log */
+               /*
+                * We are going to scan backwards in the log in two parts.
+                * First we scan the physical end of the log.  In this part
+                * of the log, we are looking for blocks with cycle number
+                * last_half_cycle - 1.
+                * If we find one, then we know that the log starts there, as
+                * we've found a hole that didn't get written in going around
+                * the end of the physical log.  The simple case for this is
+                *        x + 1 ... | x ... | x - 1 | x
+                *        <---------> less than scan distance
+                * If all of the blocks at the end of the log have cycle number
+                * last_half_cycle, then we check the blocks at the start of
+                * the log looking for occurrences of last_half_cycle.  If we
+                * find one, then our current estimate for the location of the
+                * first occurrence of last_half_cycle is wrong and we move
+                * back to the hole we've found.  This case looks like
+                *        x + 1 ... | x | x + 1 | x ...
+                *                               ^ binary search stopped here
+                * Another case we need to handle that only occurs in 256k
+                * logs is
+                *        x + 1 ... | x ... | x+1 | x ...
+                *                   ^ binary search stops here
+                * In a 256k log, the scan at the end of the log will see the
+                * x + 1 blocks.  We need to skip past those since that is
+                * certainly not the head of the log.  By searching for
+                * last_half_cycle-1 we accomplish that.
+                */
+               start_blk = log_bbnum - num_scan_bblks + head_blk;
+               ASSERT(head_blk <= INT_MAX &&
+                       (xfs_daddr_t) num_scan_bblks - head_blk >= 0);
+               if ((error = xlog_find_verify_cycle(log, start_blk,
+                                       num_scan_bblks - (int)head_blk,
+                                       (stop_on_cycle - 1), &new_blk)))
+                       goto bp_err;
+               if (new_blk != -1) {
+                       head_blk = new_blk;
+                       goto bad_blk;
+               }
+
+               /*
+                * Scan beginning of log now.  The last part of the physical
+                * log is good.  This scan needs to verify that it doesn't find
+                * the last_half_cycle.
+                */
+               start_blk = 0;
+               ASSERT(head_blk <= INT_MAX);
+               if ((error = xlog_find_verify_cycle(log,
+                                       start_blk, (int)head_blk,
+                                       stop_on_cycle, &new_blk)))
+                       goto bp_err;
+               if (new_blk != -1)
+                       head_blk = new_blk;
+       }
+
+ bad_blk:
        /*
-        * We are going to scan backwards in the log in two parts.  First
-        * we scan the physical end of the log.  In this part of the log,
-        * we are looking for blocks with cycle number last_half_cycle - 1.
-        * If we find one, then we know that the log starts there, as we've
-        * found a hole that didn't get written in going around the end
-        * of the physical log.  The simple case for this is
-        *        x + 1 ... | x ... | x - 1 | x
-        *        <---------> less than scan distance
-        * If all of the blocks at the end of the log have cycle number
-        * last_half_cycle, then we check the blocks at the start of the
-        * log looking for occurrences of last_half_cycle.  If we find one,
-        * then our current estimate for the location of the first
-        * occurrence of last_half_cycle is wrong and we move back to the
-        * hole we've found.  This case looks like
-        *        x + 1 ... | x | x + 1 | x ...
-        *                               ^ binary search stopped here
-        * Another case we need to handle that only occurs in 256k logs is
-        *        x + 1 ... | x ... | x+1 | x ...
-        *                   ^ binary search stops here
-        * In a 256k log, the scan at the end of the log will see the x+1
-        * blocks.  We need to skip past those since that is certainly not
-        * the head of the log.  By searching for last_half_cycle-1 we
-        * accomplish that.
+        * Now we need to make sure head_blk is not pointing to a block in
+        * the middle of a log record.
         */
-       start_blk = log_bbnum - num_scan_bblks + head_blk;
-       ASSERT(head_blk <= INT_MAX && (xfs_daddr_t) num_scan_bblks-head_blk >= 0);
-       if ((error = xlog_find_verify_cycle(log, start_blk,
-                       num_scan_bblks-(int)head_blk, (stop_on_cycle - 1),
-                       &new_blk)))
-               goto bp_err;
-       if (new_blk != -1) {
-           head_blk = new_blk;
-           goto bad_blk;
+       num_scan_bblks = XLOG_REC_SHIFT(log);
+       if (head_blk >= num_scan_bblks) {
+               start_blk = head_blk - num_scan_bblks; /* don't read head_blk */
+
+               /* start ptr at last block ptr before head_blk */
+               if ((error = xlog_find_verify_log_record(log, start_blk,
+                                                       &head_blk, 0)) == -1) {
+                       error = XFS_ERROR(EIO);
+                       goto bp_err;
+               } else if (error)
+                       goto bp_err;
+       } else {
+               start_blk = 0;
+               ASSERT(head_blk <= INT_MAX);
+               if ((error = xlog_find_verify_log_record(log, start_blk,
+                                                       &head_blk, 0)) == -1) {
+                       /* We hit the beginning of the log during our search */
+                       start_blk = log_bbnum - num_scan_bblks + head_blk;
+                       new_blk = log_bbnum;
+                       ASSERT(start_blk <= INT_MAX &&
+                               (xfs_daddr_t) log_bbnum-start_blk >= 0);
+                       ASSERT(head_blk <= INT_MAX);
+                       if ((error = xlog_find_verify_log_record(log,
+                                                       start_blk, &new_blk,
+                                                       (int)head_blk)) == -1) {
+                               error = XFS_ERROR(EIO);
+                               goto bp_err;
+                       } else if (error)
+                               goto bp_err;
+                       if (new_blk != log_bbnum)
+                               head_blk = new_blk;
+               } else if (error)
+                       goto bp_err;
        }
 
+       xlog_put_bp(bp);
+       if (head_blk == log_bbnum)
+               *return_head_blk = 0;
+       else
+               *return_head_blk = head_blk;
        /*
-        * Scan beginning of log now.  The last part of the physical log
-        * is good.  This scan needs to verify that it doesn't find the
-        * last_half_cycle.
+        * When returning here, we have a good block number.  Bad block
+        * means that during a previous crash, we didn't have a clean break
+        * from cycle number N to cycle number N-1.  In this case, we need
+        * to find the first block with cycle number N-1.
         */
-       start_blk = 0;
-       ASSERT(head_blk <= INT_MAX);
-       if ((error = xlog_find_verify_cycle(log, start_blk, (int) head_blk,
-                                        stop_on_cycle, &new_blk)))
-           goto bp_err;
-       if (new_blk != -1)
-           head_blk = new_blk;
-    }
-
-bad_blk:
-    /*
-     * Now we need to make sure head_blk is not pointing to a block in
-     * the middle of a log record.
-     */
-    num_scan_bblks = BTOBB(XLOG_MAX_RECORD_BSIZE);
-    if (head_blk >= num_scan_bblks) {
-       start_blk = head_blk - num_scan_bblks;  /* don't read head_blk */
-
-       /* start ptr at last block ptr before head_blk */
-       if ((error = xlog_find_verify_log_record(log,
-                                                start_blk,
-                                                &head_blk,
-                                                0)) == -1) {
-           error = XFS_ERROR(EIO);
-           goto bp_err;
-       } else if (error)
-           goto bp_err;
-    } else {
-       start_blk = 0;
-       ASSERT(head_blk <= INT_MAX);
-       if ((error = xlog_find_verify_log_record(log,
-                                                start_blk,
-                                                &head_blk,
-                                                0)) == -1) {
-           /* We hit the beginning of the log during our search */
-           start_blk = log_bbnum - num_scan_bblks + head_blk;
-           new_blk = log_bbnum;
-           ASSERT(start_blk <= INT_MAX && (xfs_daddr_t) log_bbnum-start_blk >= 0);
-           ASSERT(head_blk <= INT_MAX);
-           if ((error = xlog_find_verify_log_record(log,
-                                                    start_blk,
-                                                    &new_blk,
-                                                    (int)head_blk)) == -1) {
-               error = XFS_ERROR(EIO);
-               goto bp_err;
-           } else if (error)
-               goto bp_err;
-           if (new_blk != log_bbnum)
-               head_blk = new_blk;
-       } else if (error)
-           goto bp_err;
-    }
-
-    xlog_put_bp(bp);
-    if (head_blk == log_bbnum)
-           *return_head_blk = 0;
-    else
-           *return_head_blk = head_blk;
-    /*
-     * When returning here, we have a good block number.  Bad block
-     * means that during a previous crash, we didn't have a clean break
-     * from cycle number N to cycle number N-1.  In this case, we need
-     * to find the first block with cycle number N-1.
-     */
-    return 0;
+       return 0;
 
-bp_err:
+ bp_err:
        xlog_put_bp(bp);
 
        if (error)
            xlog_warn("XFS: failed to find log head");
-
        return error;
-}      /* xlog_find_head */
+}
 
 /*
  * Find the sync block number or the tail of the log.
@@ -513,13 +527,15 @@ bp_err:
  * available.
  */
 int
-xlog_find_tail(xlog_t          *log,
-              xfs_daddr_t      *head_blk,
-              xfs_daddr_t      *tail_blk,
-              int              readonly)
+xlog_find_tail(
+       xlog_t                  *log,
+       xfs_daddr_t             *head_blk,
+       xfs_daddr_t             *tail_blk,
+       int                     readonly)
 {
        xlog_rec_header_t       *rhead;
        xlog_op_header_t        *op_head;
+       xfs_caddr_t             offset = NULL;
        xfs_buf_t               *bp;
        int                     error, i, found;
        xfs_daddr_t             umount_data_blk;
@@ -535,13 +551,14 @@ xlog_find_tail(xlog_t             *log,
        if ((error = xlog_find_head(log, head_blk)))
                return error;
 
-       bp = xlog_get_bp(1,log->l_mp);
+       bp = xlog_get_bp(log, 1);
        if (!bp)
                return ENOMEM;
        if (*head_blk == 0) {                           /* special case */
                if ((error = xlog_bread(log, 0, 1, bp)))
                        goto bread_err;
-               if (GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT) == 0) {
+               offset = xlog_align(log, 0, 1, bp);
+               if (GET_CYCLE(offset, ARCH_CONVERT) == 0) {
                        *tail_blk = 0;
                        /* leave all other log inited values alone */
                        goto exit;
@@ -555,8 +572,9 @@ xlog_find_tail(xlog_t               *log,
        for (i = (int)(*head_blk) - 1; i >= 0; i--) {
                if ((error = xlog_bread(log, i, 1, bp)))
                        goto bread_err;
+               offset = xlog_align(log, i, 1, bp);
                if (XLOG_HEADER_MAGIC_NUM ==
-                   INT_GET(*(uint *)(XFS_BUF_PTR(bp)), ARCH_CONVERT)) {
+                   INT_GET(*(uint *)offset, ARCH_CONVERT)) {
                        found = 1;
                        break;
                }
@@ -571,8 +589,9 @@ xlog_find_tail(xlog_t               *log,
                for (i = log->l_logBBsize - 1; i >= (int)(*head_blk); i--) {
                        if ((error = xlog_bread(log, i, 1, bp)))
                                goto bread_err;
+                       offset = xlog_align(log, i, 1, bp);
                        if (XLOG_HEADER_MAGIC_NUM ==
-                           INT_GET(*(uint*)(XFS_BUF_PTR(bp)), ARCH_CONVERT)) {
+                           INT_GET(*(uint*)offset, ARCH_CONVERT)) {
                                found = 2;
                                break;
                        }
@@ -585,7 +604,7 @@ xlog_find_tail(xlog_t               *log,
        }
 
        /* find blk_no of tail of log */
-       rhead = (xlog_rec_header_t *)XFS_BUF_PTR(bp);
+       rhead = (xlog_rec_header_t *)offset;
        *tail_blk = BLOCK_LSN(rhead->h_tail_lsn, ARCH_CONVERT);
 
        /*
@@ -645,7 +664,8 @@ xlog_find_tail(xlog_t               *log,
                if ((error = xlog_bread(log, umount_data_blk, 1, bp))) {
                        goto bread_err;
                }
-               op_head = (xlog_op_header_t *)XFS_BUF_PTR(bp);
+               offset = xlog_align(log, umount_data_blk, 1, bp);
+               op_head = (xlog_op_header_t *)offset;
                if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) {
                        /*
                         * Set tail and last sync so that newly written
@@ -660,7 +680,6 @@ xlog_find_tail(xlog_t               *log,
                }
        }
 
-#ifdef __KERNEL__
        /*
         * Make sure that there are no blocks in front of the head
         * with the same cycle number as the head.  This can happen
@@ -680,11 +699,9 @@ xlog_find_tail(xlog_t              *log,
         * But... if the -device- itself is readonly, just skip this.
         * We can't recover this device anyway, so it won't matter.
         */
-
-       if (!is_read_only(log->l_mp->m_logdev_targp->pbr_kdev)) {
+       if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) {
                error = xlog_clear_stale_blocks(log, tail_lsn);
        }
-#endif
 
 bread_err:
 exit:
@@ -692,10 +709,8 @@ exit:
 
        if (error)
                xlog_warn("XFS: failed to locate log tail");
-
        return error;
-}      /* xlog_find_tail */
-
+}
 
 /*
  * Is the log zeroed at all?
@@ -714,22 +729,25 @@ exit:
  *     >0 => error has occurred
  */
 int
-xlog_find_zeroed(struct log    *log,
-                xfs_daddr_t    *blk_no)
+xlog_find_zeroed(
+       xlog_t          *log,
+       xfs_daddr_t     *blk_no)
 {
        xfs_buf_t       *bp;
+       xfs_caddr_t     offset;
        uint            first_cycle, last_cycle;
        xfs_daddr_t     new_blk, last_blk, start_blk;
        xfs_daddr_t     num_scan_bblks;
        int             error, log_bbnum = log->l_logBBsize;
 
        /* check totally zeroed log */
-       bp = xlog_get_bp(1,log->l_mp);
+       bp = xlog_get_bp(log, 1);
        if (!bp)
                return ENOMEM;
        if ((error = xlog_bread(log, 0, 1, bp)))
                goto bp_err;
-       first_cycle = GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT);
+       offset = xlog_align(log, 0, 1, bp);
+       first_cycle = GET_CYCLE(offset, ARCH_CONVERT);
        if (first_cycle == 0) {         /* completely zeroed log */
                *blk_no = 0;
                xlog_put_bp(bp);
@@ -739,7 +757,8 @@ xlog_find_zeroed(struct log *log,
        /* check partially zeroed log */
        if ((error = xlog_bread(log, log_bbnum-1, 1, bp)))
                goto bp_err;
-       last_cycle = GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT);
+       offset = xlog_align(log, log_bbnum-1, 1, bp);
+       last_cycle = GET_CYCLE(offset, ARCH_CONVERT);
        if (last_cycle != 0) {          /* log completely written to */
                xlog_put_bp(bp);
                return 0;
@@ -800,34 +819,25 @@ bp_err:
        if (error)
                return error;
        return -1;
-}      /* xlog_find_zeroed */
+}
 
-/* stuff for transactional view */
 STATIC void
-xlog_unpack_data(xlog_rec_header_t *rhead,
-                xfs_caddr_t       dp,
-                xlog_t            *log)
+xlog_unpack_data(
+       xlog_rec_header_t       *rhead,
+       xfs_caddr_t             dp,
+       xlog_t                  *log)
 {
-       int i, j, k;
-       union ich {
-               xlog_rec_header_t       hic_header;
-               xlog_rec_ext_header_t   hic_xheader;
-               char                    hic_sector[XLOG_HEADER_SIZE];
-       } *xhdr;
-
-#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
-       uint *up = (uint *)dp;
-       uint chksum = 0;
-#endif
-
-       for (i=0; i < BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)) &&
+       int                     i, j, k;
+       xlog_in_core_2_t        *xhdr;
+
+       for (i = 0; i < BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)) &&
                  i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) {
                *(uint *)dp = *(uint *)&rhead->h_cycle_data[i];
                dp += BBSIZE;
        }
 
        if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
-               xhdr = (union ich*)rhead;
+               xhdr = (xlog_in_core_2_t *)rhead;
                for ( ; i < BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); i++) {
                        j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
                        k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
@@ -836,35 +846,15 @@ xlog_unpack_data(xlog_rec_header_t *rhead,
                }
        }
 
-#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
-       /* divide length by 4 to get # words */
-       for (i=0; i < INT_GET(rhead->h_len, ARCH_CONVERT) >> 2; i++) {
-               chksum ^= INT_GET(*up, ARCH_CONVERT);
-               up++;
-       }
-       if (chksum != INT_GET(rhead->h_chksum, ARCH_CONVERT)) {
-           if (!INT_ISZERO(rhead->h_chksum, ARCH_CONVERT) ||
-               ((log->l_flags & XLOG_CHKSUM_MISMATCH) == 0)) {
-                   cmn_err(CE_DEBUG,
-                       "XFS: LogR chksum mismatch: was (0x%x) is (0x%x)",
-                           INT_GET(rhead->h_chksum, ARCH_CONVERT), chksum);
-                   cmn_err(CE_DEBUG,
-"XFS: Disregard message if filesystem was created with non-DEBUG kernel");
-                   if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
-                           cmn_err(CE_DEBUG,
-                               "XFS: LogR this is a LogV2 filesystem");
-                   }
-                   log->l_flags |= XLOG_CHKSUM_MISMATCH;
-           }
-       }
-#endif /* DEBUG && XFS_LOUD_RECOVERY */
-}      /* xlog_unpack_data */
+       xlog_unpack_data_checksum(rhead, dp, log);
+}
 
 STATIC xlog_recover_t *
-xlog_recover_find_tid(xlog_recover_t *q,
-                     xlog_tid_t     tid)
+xlog_recover_find_tid(
+       xlog_recover_t          *q,
+       xlog_tid_t              tid)
 {
-       xlog_recover_t *p = q;
+       xlog_recover_t          *p = q;
 
        while (p != NULL) {
                if (p->r_log_tid == tid)
@@ -872,38 +862,38 @@ xlog_recover_find_tid(xlog_recover_t *q,
                p = p->r_next;
        }
        return p;
-}      /* xlog_recover_find_tid */
-
+}
 
 STATIC void
-xlog_recover_put_hashq(xlog_recover_t **q,
-                      xlog_recover_t *trans)
+xlog_recover_put_hashq(
+       xlog_recover_t  **q,
+       xlog_recover_t  *trans)
 {
        trans->r_next = *q;
        *q = trans;
-}      /* xlog_recover_put_hashq */
-
+}
 
 STATIC void
-xlog_recover_new_tid(xlog_recover_t    **q,
-                    xlog_tid_t         tid,
-                    xfs_lsn_t          lsn)
+xlog_recover_new_tid(
+       xlog_recover_t          **q,
+       xlog_tid_t              tid,
+       xfs_lsn_t               lsn)
 {
-       xlog_recover_t  *trans;
+       xlog_recover_t          *trans;
 
-       trans = kmem_zalloc(sizeof(xlog_recover_t), 0);
+       trans = kmem_zalloc(sizeof(xlog_recover_t), KM_SLEEP);
        trans->r_log_tid   = tid;
        trans->r_lsn       = lsn;
        xlog_recover_put_hashq(q, trans);
-}      /* xlog_recover_new_tid */
-
+}
 
 STATIC int
-xlog_recover_unlink_tid(xlog_recover_t **q,
-                       xlog_recover_t  *trans)
+xlog_recover_unlink_tid(
+       xlog_recover_t          **q,
+       xlog_recover_t          *trans)
 {
-       xlog_recover_t  *tp;
-       int             found = 0;
+       xlog_recover_t          *tp;
+       int                     found = 0;
 
        ASSERT(trans != 0);
        if (trans == *q) {
@@ -926,7 +916,7 @@ xlog_recover_unlink_tid(xlog_recover_t      **q,
                tp->r_next = tp->r_next->r_next;
        }
        return 0;
-}      /* xlog_recover_unlink_tid */
+}
 
 /*
  * Free up any resources allocated by the transaction
@@ -934,10 +924,11 @@ xlog_recover_unlink_tid(xlog_recover_t    **q,
  * Remember that EFIs, EFDs, and IUNLINKs are handled later.
  */
 STATIC void
-xlog_recover_free_trans(xlog_recover_t      *trans)
+xlog_recover_free_trans(
+       xlog_recover_t          *trans)
 {
-       xlog_recover_item_t *first_item, *item, *free_item;
-       int i;
+       xlog_recover_item_t     *first_item, *item, *free_item;
+       int                     i;
 
        item = first_item = trans->r_itemq;
        do {
@@ -955,16 +946,16 @@ xlog_recover_free_trans(xlog_recover_t      *trans)
        } while (first_item != item);
        /* Free the transaction recover structure */
        kmem_free(trans, sizeof(xlog_recover_t));
-}      /* xlog_recover_free_trans */
-
+}
 
 STATIC int
-xlog_recover_commit_trans(xlog_t        *log,
-                         xlog_recover_t **q,
-                         xlog_recover_t *trans,
-                         int            pass)
+xlog_recover_commit_trans(
+       xlog_t                  *log,
+       xlog_recover_t          **q,
+       xlog_recover_t          *trans,
+       int                     pass)
 {
-       int error;
+       int                     error;
 
        if ((error = xlog_recover_unlink_tid(q, trans)))
                return error;
@@ -972,11 +963,12 @@ xlog_recover_commit_trans(xlog_t   *log,
                return error;
        xlog_recover_free_trans(trans);                 /* no error */
        return 0;
-}      /* xlog_recover_commit_trans */
+}
 
 STATIC void
-xlog_recover_insert_item_backq(xlog_recover_item_t **q,
-                              xlog_recover_item_t *item)
+xlog_recover_insert_item_backq(
+       xlog_recover_item_t     **q,
+       xlog_recover_item_t     *item)
 {
        if (*q == 0) {
                item->ri_prev = item->ri_next = item;
@@ -987,18 +979,51 @@ xlog_recover_insert_item_backq(xlog_recover_item_t **q,
                (*q)->ri_prev           = item;
                item->ri_prev->ri_next  = item;
        }
-}      /* xlog_recover_insert_item_backq */
+}
 
 STATIC void
-xlog_recover_add_item(xlog_recover_item_t **itemq)
+xlog_recover_add_item(
+       xlog_recover_item_t     **itemq)
 {
-       xlog_recover_item_t *item;
+       xlog_recover_item_t     *item;
 
        item = kmem_zalloc(sizeof(xlog_recover_item_t), 0);
        xlog_recover_insert_item_backq(itemq, item);
-}      /* xlog_recover_add_item */
+}
 
-/* The next region to add is the start of a new region.         It could be
+STATIC int
+xlog_recover_add_to_cont_trans(
+       xlog_recover_t          *trans,
+       xfs_caddr_t             dp,
+       int                     len)
+{
+       xlog_recover_item_t     *item;
+       xfs_caddr_t             ptr, old_ptr;
+       int                     old_len;
+
+       item = trans->r_itemq;
+       if (item == 0) {
+               /* finish copying rest of trans header */
+               xlog_recover_add_item(&trans->r_itemq);
+               ptr = (xfs_caddr_t) &trans->r_theader +
+                               sizeof(xfs_trans_header_t) - len;
+               memcpy(ptr, dp, len); /* d, s, l */
+               return 0;
+       }
+       item = item->ri_prev;
+
+       old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
+       old_len = item->ri_buf[item->ri_cnt-1].i_len;
+
+       ptr = kmem_realloc(old_ptr, len+old_len, old_len, 0);
+       memcpy(&ptr[old_len], dp, len); /* d, s, l */
+       item->ri_buf[item->ri_cnt-1].i_len += len;
+       item->ri_buf[item->ri_cnt-1].i_addr = ptr;
+       return 0;
+}
+
+/*
+ * The next region to add is the start of a new region.  It could be
  * a whole region or it could be the first part of a new region.  Because
  * of this, the assumption here is that the type and size fields of all
  * format structures fit into the first 32 bits of the structure.
@@ -1011,20 +1036,17 @@ xlog_recover_add_item(xlog_recover_item_t **itemq)
  * will appear in the current log item.
  */
 STATIC int
- xlog_recover_add_to_trans(xlog_recover_t      *trans,
-                         xfs_caddr_t           dp,
-                         int                   len)
+xlog_recover_add_to_trans(
+       xlog_recover_t          *trans,
+       xfs_caddr_t             dp,
+       int                     len)
 {
-       xfs_inode_log_format_t   *in_f;                 /* any will do */
-       xlog_recover_item_t      *item;
-       xfs_caddr_t              ptr;
+       xfs_inode_log_format_t  *in_f;                  /* any will do */
+       xlog_recover_item_t     *item;
+       xfs_caddr_t             ptr;
 
        if (!len)
                return 0;
-       ptr = kmem_zalloc(len, 0);
-       memcpy(ptr, dp, len);
-
-       in_f = (xfs_inode_log_format_t *)ptr;
        item = trans->r_itemq;
        if (item == 0) {
                ASSERT(*(uint *)dp == XFS_TRANS_HEADER_MAGIC);
@@ -1033,6 +1055,11 @@ STATIC int
                memcpy(&trans->r_theader, dp, len); /* d, s, l */
                return 0;
        }
+
+       ptr = kmem_alloc(len, KM_SLEEP);
+       memcpy(ptr, dp, len);
+       in_f = (xfs_inode_log_format_t *)ptr;
+
        if (item->ri_prev->ri_total != 0 &&
             item->ri_prev->ri_total == item->ri_prev->ri_cnt) {
                xlog_recover_add_item(&trans->r_itemq);
@@ -1052,129 +1079,113 @@ STATIC int
        item->ri_buf[item->ri_cnt].i_len  = len;
        item->ri_cnt++;
        return 0;
-}      /* xlog_recover_add_to_trans */
-
-STATIC int
-xlog_recover_add_to_cont_trans(xlog_recover_t  *trans,
-                              xfs_caddr_t              dp,
-                              int              len)
-{
-       xlog_recover_item_t     *item;
-       xfs_caddr_t                     ptr, old_ptr;
-       int                     old_len;
-
-       item = trans->r_itemq;
-       if (item == 0) {
-               /* finish copying rest of trans header */
-               xlog_recover_add_item(&trans->r_itemq);
-               ptr = (xfs_caddr_t)&trans->r_theader+sizeof(xfs_trans_header_t)-len;
-               memcpy(ptr, dp, len); /* d, s, l */
-               return 0;
-       }
-       item = item->ri_prev;
-
-       old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
-       old_len = item->ri_buf[item->ri_cnt-1].i_len;
-
-       ptr = kmem_realloc(old_ptr, len+old_len, old_len, 0);
-       memcpy(&ptr[old_len], dp, len); /* d, s, l */
-       item->ri_buf[item->ri_cnt-1].i_len += len;
-       item->ri_buf[item->ri_cnt-1].i_addr = ptr;
-       return 0;
-}      /* xlog_recover_add_to_cont_trans */
+}
 
 STATIC int
-xlog_recover_unmount_trans(xlog_recover_t *trans)
+xlog_recover_unmount_trans(
+       xlog_recover_t  *trans)
 {
        /* Do nothing now */
        xlog_warn("XFS: xlog_recover_unmount_trans: Unmount LR");
-       return( 0 );
-}      /* xlog_recover_unmount_trans */
-
+       return 0;
+}
 
+/*
+ * There are two valid states of the r_state field.  0 indicates that the
+ * transaction structure is in a normal state.  We have either seen the
+ * start of the transaction or the last operation we added was not a partial
+ * operation.  If the last operation we added to the transaction was a
+ * partial operation, we need to mark r_state with XLOG_WAS_CONT_TRANS.
+ *
+ * NOTE: skip LRs with 0 data length.
+ */
 STATIC int
-xlog_recover_process_data(xlog_t           *log,
-                         xlog_recover_t    *rhash[],
-                         xlog_rec_header_t *rhead,
-                         xfs_caddr_t       dp,
-                         int               pass)
+xlog_recover_process_data(
+       xlog_t                  *log,
+       xlog_recover_t          *rhash[],
+       xlog_rec_header_t       *rhead,
+       xfs_caddr_t             dp,
+       int                     pass)
 {
-    xfs_caddr_t                lp         = dp+INT_GET(rhead->h_len, ARCH_CONVERT);
-    int                        num_logops = INT_GET(rhead->h_num_logops, ARCH_CONVERT);
-    xlog_op_header_t   *ohead;
-    xlog_recover_t     *trans;
-    xlog_tid_t         tid;
-    int                        error;
-    unsigned long      hash;
-    uint               flags;
-
-    /* check the log format matches our own - else we can't recover */
-    if (xlog_header_check_recover(log->l_mp, rhead))
-           return (XFS_ERROR(EIO));
-
-    while ((dp < lp) && num_logops) {
-       ASSERT(dp + sizeof(xlog_op_header_t) <= lp);
-       ohead = (xlog_op_header_t *)dp;
-       dp += sizeof(xlog_op_header_t);
-       if (ohead->oh_clientid != XFS_TRANSACTION &&
-           ohead->oh_clientid != XFS_LOG) {
-           xlog_warn("XFS: xlog_recover_process_data: bad clientid");
-           ASSERT(0);
-           return (XFS_ERROR(EIO));
-       }
-       tid = INT_GET(ohead->oh_tid, ARCH_CONVERT);
-       hash = XLOG_RHASH(tid);
-       trans = xlog_recover_find_tid(rhash[hash], tid);
-       if (trans == NULL) {                       /* not found; add new tid */
-           if (ohead->oh_flags & XLOG_START_TRANS)
-               xlog_recover_new_tid(&rhash[hash], tid, INT_GET(rhead->h_lsn, ARCH_CONVERT));
-       } else {
-           ASSERT(dp+INT_GET(ohead->oh_len, ARCH_CONVERT) <= lp);
-           flags = ohead->oh_flags & ~XLOG_END_TRANS;
-           if (flags & XLOG_WAS_CONT_TRANS)
-               flags &= ~XLOG_CONTINUE_TRANS;
-           switch (flags) {
-               case XLOG_COMMIT_TRANS: {
-                   error = xlog_recover_commit_trans(log, &rhash[hash],
-                                                     trans, pass);
-                   break;
-               }
-               case XLOG_UNMOUNT_TRANS: {
-                   error = xlog_recover_unmount_trans(trans);
-                   break;
-               }
-               case XLOG_WAS_CONT_TRANS: {
-                   error = xlog_recover_add_to_cont_trans(trans, dp,
-                                 INT_GET(ohead->oh_len, ARCH_CONVERT));
-                   break;
-               }
-               case XLOG_START_TRANS : {
-                   xlog_warn("XFS: xlog_recover_process_data: bad transaction");
-                   ASSERT(0);
-                   error = XFS_ERROR(EIO);
-                   break;
-               }
-               case 0:
-               case XLOG_CONTINUE_TRANS: {
-                   error = xlog_recover_add_to_trans(trans, dp,
-                                  INT_GET(ohead->oh_len, ARCH_CONVERT));
-                   break;
+       xfs_caddr_t             lp;
+       int                     num_logops;
+       xlog_op_header_t        *ohead;
+       xlog_recover_t          *trans;
+       xlog_tid_t              tid;
+       int                     error;
+       unsigned long           hash;
+       uint                    flags;
+
+       lp = dp + INT_GET(rhead->h_len, ARCH_CONVERT);
+       num_logops = INT_GET(rhead->h_num_logops, ARCH_CONVERT);
+
+       /* check the log format matches our own - else we can't recover */
+       if (xlog_header_check_recover(log->l_mp, rhead))
+               return (XFS_ERROR(EIO));
+
+       while ((dp < lp) && num_logops) {
+               ASSERT(dp + sizeof(xlog_op_header_t) <= lp);
+               ohead = (xlog_op_header_t *)dp;
+               dp += sizeof(xlog_op_header_t);
+               if (ohead->oh_clientid != XFS_TRANSACTION &&
+                   ohead->oh_clientid != XFS_LOG) {
+                       xlog_warn(
+               "XFS: xlog_recover_process_data: bad clientid");
+                       ASSERT(0);
+                       return (XFS_ERROR(EIO));
                }
-               default: {
-                   xlog_warn("XFS: xlog_recover_process_data: bad flag");
-                   ASSERT(0);
-                   error = XFS_ERROR(EIO);
-                   break;
+               tid = INT_GET(ohead->oh_tid, ARCH_CONVERT);
+               hash = XLOG_RHASH(tid);
+               trans = xlog_recover_find_tid(rhash[hash], tid);
+               if (trans == NULL) {               /* not found; add new tid */
+                       if (ohead->oh_flags & XLOG_START_TRANS)
+                               xlog_recover_new_tid(&rhash[hash], tid,
+                                       INT_GET(rhead->h_lsn, ARCH_CONVERT));
+               } else {
+                       ASSERT(dp+INT_GET(ohead->oh_len, ARCH_CONVERT) <= lp);
+                       flags = ohead->oh_flags & ~XLOG_END_TRANS;
+                       if (flags & XLOG_WAS_CONT_TRANS)
+                               flags &= ~XLOG_CONTINUE_TRANS;
+                       switch (flags) {
+                       case XLOG_COMMIT_TRANS:
+                               error = xlog_recover_commit_trans(log,
+                                               &rhash[hash], trans, pass);
+                               break;
+                       case XLOG_UNMOUNT_TRANS:
+                               error = xlog_recover_unmount_trans(trans);
+                               break;
+                       case XLOG_WAS_CONT_TRANS:
+                               error = xlog_recover_add_to_cont_trans(trans,
+                                               dp, INT_GET(ohead->oh_len,
+                                                       ARCH_CONVERT));
+                               break;
+                       case XLOG_START_TRANS:
+                               xlog_warn(
+                       "XFS: xlog_recover_process_data: bad transaction");
+                               ASSERT(0);
+                               error = XFS_ERROR(EIO);
+                               break;
+                       case 0:
+                       case XLOG_CONTINUE_TRANS:
+                               error = xlog_recover_add_to_trans(trans,
+                                               dp, INT_GET(ohead->oh_len,
+                                                       ARCH_CONVERT));
+                               break;
+                       default:
+                               xlog_warn(
+                       "XFS: xlog_recover_process_data: bad flag");
+                               ASSERT(0);
+                               error = XFS_ERROR(EIO);
+                               break;
+                       }
+                       if (error)
+                               return error;
                }
-           } /* switch */
-           if (error)
-               return error;
-       } /* if */
-       dp += INT_GET(ohead->oh_len, ARCH_CONVERT);
-       num_logops--;
-    }
-    return( 0 );
-}      /* xlog_recover_process_data */
+               dp += INT_GET(ohead->oh_len, ARCH_CONVERT);
+               num_logops--;
+       }
+       return 0;
+}
 
 /*
  * Read the log from tail to head and process the log records found.
@@ -1185,221 +1196,292 @@ xlog_recover_process_data(xlog_t          *log,
  * here.
  */
 int
-xlog_do_recovery_pass(xlog_t   *log,
-                     xfs_daddr_t       head_blk,
-                     xfs_daddr_t       tail_blk,
-                     int       pass)
+xlog_do_recovery_pass(
+       xlog_t                  *log,
+       xfs_daddr_t             head_blk,
+       xfs_daddr_t             tail_blk,
+       int                     pass)
 {
-    xlog_rec_header_t  *rhead;
-    xfs_daddr_t                blk_no;
-    xfs_caddr_t                bufaddr;
-    xfs_buf_t          *hbp, *dbp;
-    int                        error, h_size;
-    int                        bblks, split_bblks;
-    int                        hblks, split_hblks, wrapped_hblks;
-    xlog_recover_t     *rhash[XLOG_RHASH_SIZE];
-
-    error = 0;
-
-
-    /*
-     * Read the header of the tail block and get the iclog buffer size from
-     * h_size.  Use this to tell how many sectors make up the log header.
-     */
-    if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
+       xlog_rec_header_t       *rhead;
+       xfs_daddr_t             blk_no;
+       xfs_caddr_t             bufaddr, offset;
+       xfs_buf_t               *hbp, *dbp;
+       int                     error = 0, h_size;
+       int                     bblks, split_bblks;
+       int                     hblks, split_hblks, wrapped_hblks;
+       xlog_recover_t          *rhash[XLOG_RHASH_SIZE];
+
        /*
-        * When using variable length iclogs, read first sector of iclog
-        * header and extract the header size from it.  Get a new hbp that
-        * is the correct size.
+        * Read the header of the tail block and get the iclog buffer size from
+        * h_size.  Use this to tell how many sectors make up the log header.
         */
-       hbp = xlog_get_bp(1, log->l_mp);
-       if (!hbp)
-           return ENOMEM;
-       if ((error = xlog_bread(log, tail_blk, 1, hbp)))
-           goto bread_err1;
-       rhead = (xlog_rec_header_t *)XFS_BUF_PTR(hbp);
-       ASSERT(INT_GET(rhead->h_magicno, ARCH_CONVERT) ==
+       if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
+               /*
+                * When using variable length iclogs, read first sector of
+                * iclog header and extract the header size from it.  Get a
+                * new hbp that is the correct size.
+                */
+               hbp = xlog_get_bp(log, 1);
+               if (!hbp)
+                       return ENOMEM;
+               if ((error = xlog_bread(log, tail_blk, 1, hbp)))
+                       goto bread_err1;
+               offset = xlog_align(log, tail_blk, 1, hbp);
+               rhead = (xlog_rec_header_t *)offset;
+               ASSERT(INT_GET(rhead->h_magicno, ARCH_CONVERT) ==
                                                XLOG_HEADER_MAGIC_NUM);
-       if ((INT_GET(rhead->h_version, ARCH_CONVERT) & (~XLOG_VERSION_OKBITS)) != 0) {
-           xlog_warn("XFS: xlog_do_recovery_pass: unrecognised log version number.");
-           error = XFS_ERROR(EIO);
-           goto bread_err1;
-       }
-       h_size = INT_GET(rhead->h_size, ARCH_CONVERT);
-
-       if ((INT_GET(rhead->h_version, ARCH_CONVERT) & XLOG_VERSION_2) &&
-           (h_size > XLOG_HEADER_CYCLE_SIZE)) {
-           hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
-           if (h_size % XLOG_HEADER_CYCLE_SIZE)
-               hblks++;
-           xlog_put_bp(hbp);
-           hbp = xlog_get_bp(hblks, log->l_mp);
+               if ((INT_GET(rhead->h_version, ARCH_CONVERT) &
+                               (~XLOG_VERSION_OKBITS)) != 0) {
+                       xlog_warn(
+       "XFS: xlog_do_recovery_pass: unrecognised log version number.");
+                       error = XFS_ERROR(EIO);
+                       goto bread_err1;
+               }
+               h_size = INT_GET(rhead->h_size, ARCH_CONVERT);
+
+               if ((INT_GET(rhead->h_version, ARCH_CONVERT)
+                               & XLOG_VERSION_2) &&
+                   (h_size > XLOG_HEADER_CYCLE_SIZE)) {
+                       hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
+                       if (h_size % XLOG_HEADER_CYCLE_SIZE)
+                               hblks++;
+                       xlog_put_bp(hbp);
+                       hbp = xlog_get_bp(log, hblks);
+               } else {
+                       hblks = 1;
+               }
        } else {
-           hblks=1;
+               ASSERT(log->l_sectbb_log == 0);
+               hblks = 1;
+               hbp = xlog_get_bp(log, 1);
+               h_size = XLOG_BIG_RECORD_BSIZE;
        }
-    } else {
-       hblks=1;
-       hbp = xlog_get_bp(1, log->l_mp);
-       h_size = XLOG_BIG_RECORD_BSIZE;
-    }
-
-    if (!hbp)
-       return ENOMEM;
-    dbp = xlog_get_bp(BTOBB(h_size),log->l_mp);
-    if (!dbp) {
-       xlog_put_bp(hbp);
-       return ENOMEM;
-    }
-
-    memset(rhash, 0, sizeof(rhash));
-    if (tail_blk <= head_blk) {
-       for (blk_no = tail_blk; blk_no < head_blk; ) {
-           if ((error = xlog_bread(log, blk_no, hblks, hbp)))
-               goto bread_err2;
-           rhead = (xlog_rec_header_t *)XFS_BUF_PTR(hbp);
-           ASSERT(INT_GET(rhead->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM);
-           ASSERT(BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT) <= INT_MAX));
-           bblks = (int) BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT));   /* blocks in data section */
-
-           if (unlikely((INT_GET(rhead->h_magicno, ARCH_CONVERT) != XLOG_HEADER_MAGIC_NUM) ||
-               (BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT) > INT_MAX)) ||
-               (bblks <= 0) ||
-               (blk_no > log->l_logBBsize))) {
-                   XFS_ERROR_REPORT("xlog_do_recovery_pass(1)",
-                                    XFS_ERRLEVEL_LOW, log->l_mp);
-                   error = EFSCORRUPTED;
-                   goto bread_err2;
-           }
-
-           if ((INT_GET(rhead->h_version, ARCH_CONVERT) & (~XLOG_VERSION_OKBITS)) != 0) {
-               xlog_warn("XFS: xlog_do_recovery_pass: unrecognised log version number.");
-               error = XFS_ERROR(EIO);
-               goto bread_err2;
-           }
-           bblks = (int) BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT));   /* blocks in data section */
-           if (bblks > 0) {
-               if ((error = xlog_bread(log, blk_no+hblks, bblks, dbp)))
-                   goto bread_err2;
-               xlog_unpack_data(rhead, XFS_BUF_PTR(dbp), log);
-               if ((error = xlog_recover_process_data(log, rhash,
-                                                     rhead, XFS_BUF_PTR(dbp),
-                                                     pass)))
-                       goto bread_err2;
-           }
-           blk_no += (bblks+hblks);
+
+       if (!hbp)
+               return ENOMEM;
+       dbp = xlog_get_bp(log, BTOBB(h_size));
+       if (!dbp) {
+               xlog_put_bp(hbp);
+               return ENOMEM;
        }
-    } else {
-       /*
-        * Perform recovery around the end of the physical log.  When the head
-        * is not on the same cycle number as the tail, we can't do a sequential
-        * recovery as above.
-        */
-       blk_no = tail_blk;
-       while (blk_no < log->l_logBBsize) {
-           /*
-            * Check for header wrapping around physical end-of-log
-            */
-           wrapped_hblks = 0;
-           if (blk_no+hblks <= log->l_logBBsize) {
-               /* Read header in one read */
-               if ((error = xlog_bread(log, blk_no, hblks, hbp)))
-                   goto bread_err2;
-           } else {
-               /* This log record is split across physical end of log */
-               split_hblks = 0;
-               if (blk_no != log->l_logBBsize) {
-                   /* some data is before physical end of log */
-                   ASSERT(blk_no <= INT_MAX);
-                   split_hblks = log->l_logBBsize - (int)blk_no;
-                   ASSERT(split_hblks > 0);
-                   if ((error = xlog_bread(log, blk_no, split_hblks, hbp)))
-                       goto bread_err2;
+
+       memset(rhash, 0, sizeof(rhash));
+       if (tail_blk <= head_blk) {
+               for (blk_no = tail_blk; blk_no < head_blk; ) {
+                       if ((error = xlog_bread(log, blk_no, hblks, hbp)))
+                               goto bread_err2;
+                       offset = xlog_align(log, blk_no, hblks, hbp);
+                       rhead = (xlog_rec_header_t *)offset;
+                       ASSERT(INT_GET(rhead->h_magicno, ARCH_CONVERT) ==
+                               XLOG_HEADER_MAGIC_NUM);
+                       ASSERT(BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT) <=
+                               INT_MAX));
+                       /* blocks in data section */
+                       bblks = (int)BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT));
+
+                       if (unlikely(
+                           (INT_GET(rhead->h_magicno, ARCH_CONVERT) !=
+                                       XLOG_HEADER_MAGIC_NUM) ||
+                           (BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT) >
+                                       INT_MAX)) ||
+                           (bblks <= 0) ||
+                           (blk_no > log->l_logBBsize))) {
+                               XFS_ERROR_REPORT("xlog_do_recovery_pass(1)",
+                                               XFS_ERRLEVEL_LOW, log->l_mp);
+                               error = EFSCORRUPTED;
+                               goto bread_err2;
+                       }
+
+                       if ((INT_GET(rhead->h_version, ARCH_CONVERT) &
+                                       (~XLOG_VERSION_OKBITS)) != 0) {
+                               xlog_warn(
+               "XFS: xlog_do_recovery_pass: unrecognised log version number.");
+                               error = XFS_ERROR(EIO);
+                               goto bread_err2;
+                       }
+                       /* blocks in data section */
+                       bblks = (int)BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT));
+                       if (bblks > 0) {
+                               if ((error = xlog_bread(log, blk_no + hblks,
+                                                       bblks, dbp)))
+                                       goto bread_err2;
+                               offset = xlog_align(log, blk_no + hblks,
+                                                       bblks, dbp);
+                               xlog_unpack_data(rhead, offset, log);
+                               if ((error = xlog_recover_process_data(log,
+                                               rhash, rhead, offset, pass)))
+                                       goto bread_err2;
+                       }
+                       blk_no += (bblks+hblks);
                }
-               bufaddr = XFS_BUF_PTR(hbp);
-               XFS_BUF_SET_PTR(hbp, bufaddr + BBTOB(split_hblks),
-                       BBTOB(hblks - split_hblks));
-               wrapped_hblks = hblks - split_hblks;
-               if ((error = xlog_bread(log, 0, wrapped_hblks, hbp)))
-                   goto bread_err2;
-               XFS_BUF_SET_PTR(hbp, bufaddr, hblks);
-           }
-           rhead = (xlog_rec_header_t *)XFS_BUF_PTR(hbp);
-           ASSERT(INT_GET(rhead->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM);
-           ASSERT(BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT) <= INT_MAX));
-           bblks = (int) BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT));
-
-           /* LR body must have data or it wouldn't have been written */
-           ASSERT(bblks > 0);
-           blk_no += hblks;                    /* successfully read header */
-
-           if (unlikely((INT_GET(rhead->h_magicno, ARCH_CONVERT) != XLOG_HEADER_MAGIC_NUM) ||
-               (BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT) > INT_MAX)) ||
-               (bblks <= 0))) {
-                   XFS_ERROR_REPORT("xlog_do_recovery_pass(2)",
-                                    XFS_ERRLEVEL_LOW, log->l_mp);
-                   error = EFSCORRUPTED;
-                   goto bread_err2;
-           }
-
-           /* Read in data for log record */
-           if (blk_no+bblks <= log->l_logBBsize) {
-               if ((error = xlog_bread(log, blk_no, bblks, dbp)))
-                   goto bread_err2;
-           } else {
-               /* This log record is split across physical end of log */
-               split_bblks = 0;
-               if (blk_no != log->l_logBBsize) {
-
-                   /* some data is before physical end of log */
-                   ASSERT(blk_no <= INT_MAX);
-                   split_bblks = log->l_logBBsize - (int)blk_no;
-                   ASSERT(split_bblks > 0);
-                   if ((error = xlog_bread(log, blk_no, split_bblks, dbp)))
-                       goto bread_err2;
+       } else {
+               /*
+                * Perform recovery around the end of the physical log.
+                * When the head is not on the same cycle number as the tail,
+                * we can't do a sequential recovery as above.
+                */
+               blk_no = tail_blk;
+               while (blk_no < log->l_logBBsize) {
+                       /*
+                        * Check for header wrapping around physical end-of-log
+                        */
+                       wrapped_hblks = 0;
+                       if (blk_no+hblks <= log->l_logBBsize) {
+                               /* Read header in one read */
+                               if ((error = xlog_bread(log, blk_no,
+                                                       hblks, hbp)))
+                                       goto bread_err2;
+                               offset = xlog_align(log, blk_no, hblks, hbp);
+                       } else {
+                               /* This LR is split across physical log end */
+                               offset = NULL;
+                               split_hblks = 0;
+                               if (blk_no != log->l_logBBsize) {
+                                       /* some data before physical log end */
+                                       ASSERT(blk_no <= INT_MAX);
+                                       split_hblks = log->l_logBBsize - (int)blk_no;
+                                       ASSERT(split_hblks > 0);
+                                       if ((error = xlog_bread(log, blk_no,
+                                                       split_hblks, hbp)))
+                                               goto bread_err2;
+                                       offset = xlog_align(log, blk_no,
+                                                       split_hblks, hbp);
+                               }
+                               /*
+                                * Note: this black magic still works with
+                                * large sector sizes (non-512) only because:
+                                * - we increased the buffer size originally
+                                *   by 1 sector giving us enough extra space
+                                *   for the second read;
+                                * - the log start is guaranteed to be sector
+                                *   aligned;
+                                * - we read the log end (LR header start)
+                                *   _first_, then the log start (LR header end)
+                                *   - order is important.
+                                */
+                               bufaddr = XFS_BUF_PTR(hbp);
+                               XFS_BUF_SET_PTR(hbp,
+                                               bufaddr + BBTOB(split_hblks),
+                                               BBTOB(hblks - split_hblks));
+                               wrapped_hblks = hblks - split_hblks;
+                               if ((error = xlog_bread(log, 0,
+                                                       wrapped_hblks, hbp)))
+                                       goto bread_err2;
+                               XFS_BUF_SET_PTR(hbp, bufaddr, hblks);
+                               if (!offset)
+                                       offset = xlog_align(log, 0,
+                                                       wrapped_hblks, hbp);
+                       }
+                       rhead = (xlog_rec_header_t *)offset;
+                       ASSERT(INT_GET(rhead->h_magicno, ARCH_CONVERT) ==
+                               XLOG_HEADER_MAGIC_NUM);
+                       ASSERT(BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT) <=
+                               INT_MAX));
+                       bblks = (int)BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT));
+
+                       /* LR body must have data or it wouldn't have been
+                        * written */
+                       ASSERT(bblks > 0);
+                       blk_no += hblks;        /* successfully read header */
+
+                       if (unlikely(
+                           (INT_GET(rhead->h_magicno, ARCH_CONVERT) !=
+                                       XLOG_HEADER_MAGIC_NUM) ||
+                           (BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT) >
+                                       INT_MAX)) ||
+                           (bblks <= 0))) {
+                               XFS_ERROR_REPORT("xlog_do_recovery_pass(2)",
+                                            XFS_ERRLEVEL_LOW, log->l_mp);
+                               error = EFSCORRUPTED;
+                               goto bread_err2;
+                       }
+
+                       /* Read in data for log record */
+                       if (blk_no+bblks <= log->l_logBBsize) {
+                               if ((error = xlog_bread(log, blk_no,
+                                                       bblks, dbp)))
+                                       goto bread_err2;
+                               offset = xlog_align(log, blk_no, bblks, dbp);
+                       } else {
+                               /* This log record is split across the
+                                * physical end of log */
+                               offset = NULL;
+                               split_bblks = 0;
+                               if (blk_no != log->l_logBBsize) {
+                                       /* some data is before the physical
+                                        * end of log */
+                                       ASSERT(!wrapped_hblks);
+                                       ASSERT(blk_no <= INT_MAX);
+                                       split_bblks =
+                                               log->l_logBBsize - (int)blk_no;
+                                       ASSERT(split_bblks > 0);
+                                       if ((error = xlog_bread(log, blk_no,
+                                                       split_bblks, dbp)))
+                                               goto bread_err2;
+                                       offset = xlog_align(log, blk_no,
+                                                       split_bblks, dbp);
+                               }
+                               /*
+                                * Note: this black magic still works with
+                                * large sector sizes (non-512) only because:
+                                * - we increased the buffer size originally
+                                *   by 1 sector giving us enough extra space
+                                *   for the second read;
+                                * - the log start is guaranteed to be sector
+                                *   aligned;
+                                * - we read the log end (LR header start)
+                                *   _first_, then the log start (LR header end)
+                                *   - order is important.
+                                */
+                               bufaddr = XFS_BUF_PTR(dbp);
+                               XFS_BUF_SET_PTR(dbp,
+                                               bufaddr + BBTOB(split_bblks),
+                                               BBTOB(bblks - split_bblks));
+                               if ((error = xlog_bread(log, wrapped_hblks,
+                                               bblks - split_bblks, dbp)))
+                                       goto bread_err2;
+                               XFS_BUF_SET_PTR(dbp, bufaddr,
+                                               XLOG_BIG_RECORD_BSIZE);
+                               if (!offset)
+                                       offset = xlog_align(log, wrapped_hblks,
+                                               bblks - split_bblks, dbp);
+                       }
+                       xlog_unpack_data(rhead, offset, log);
+                       if ((error = xlog_recover_process_data(log, rhash,
+                                                 rhead, offset, pass)))
+                               goto bread_err2;
+                       blk_no += bblks;
                }
-               bufaddr = XFS_BUF_PTR(dbp);
-               XFS_BUF_SET_PTR(dbp, bufaddr + BBTOB(split_bblks),
-                       BBTOB(bblks - split_bblks));
-               if ((error = xlog_bread(log, wrapped_hblks,
-                                       bblks - split_bblks, dbp)))
-                   goto bread_err2;
-               XFS_BUF_SET_PTR(dbp, bufaddr, XLOG_BIG_RECORD_BSIZE);
-           }
-           xlog_unpack_data(rhead, XFS_BUF_PTR(dbp), log);
-           if ((error = xlog_recover_process_data(log, rhash,
-                                                 rhead, XFS_BUF_PTR(dbp),
-                                                 pass)))
-               goto bread_err2;
-           blk_no += bblks;
-       }
 
-       ASSERT(blk_no >= log->l_logBBsize);
-       blk_no -= log->l_logBBsize;
-
-       /* read first part of physical log */
-       while (blk_no < head_blk) {
-           if ((error = xlog_bread(log, blk_no, hblks, hbp)))
-               goto bread_err2;
-           rhead = (xlog_rec_header_t *)XFS_BUF_PTR(hbp);
-           ASSERT(INT_GET(rhead->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM);
-           ASSERT(BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT) <= INT_MAX));
-           bblks = (int) BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT));
-           ASSERT(bblks > 0);
-           if ((error = xlog_bread(log, blk_no+hblks, bblks, dbp)))
-               goto bread_err2;
-           xlog_unpack_data(rhead, XFS_BUF_PTR(dbp), log);
-           if ((error = xlog_recover_process_data(log, rhash,
-                                                 rhead, XFS_BUF_PTR(dbp),
-                                                 pass)))
-               goto bread_err2;
-           blk_no += (bblks+hblks);
+               ASSERT(blk_no >= log->l_logBBsize);
+               blk_no -= log->l_logBBsize;
+
+               /* read first part of physical log */
+               while (blk_no < head_blk) {
+                       if ((error = xlog_bread(log, blk_no, hblks, hbp)))
+                               goto bread_err2;
+                       offset = xlog_align(log, blk_no, hblks, hbp);
+                       rhead = (xlog_rec_header_t *)offset;
+                       ASSERT(INT_GET(rhead->h_magicno, ARCH_CONVERT) ==
+                               XLOG_HEADER_MAGIC_NUM);
+                       ASSERT(BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT) <=
+                               INT_MAX));
+                       bblks = (int)BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT));
+                       ASSERT(bblks > 0);
+                       if ((error = xlog_bread(log, blk_no+hblks, bblks, dbp)))
+                               goto bread_err2;
+                       offset = xlog_align(log, blk_no+hblks, bblks, dbp);
+                       xlog_unpack_data(rhead, offset, log);
+                       if ((error = xlog_recover_process_data(log, rhash,
+                                                         rhead, offset, pass)))
+                               goto bread_err2;
+                       blk_no += (bblks+hblks);
+               }
        }
-    }
-
-bread_err2:
-    xlog_put_bp(dbp);
-bread_err1:
-    xlog_put_bp(hbp);
 
-    return error;
+ bread_err2:
+       xlog_put_bp(dbp);
+ bread_err1:
+       xlog_put_bp(hbp);
+       return error;
 }