libxlog: sync up with 2.6.38 kernel code

author Dave Chinner <dchinner@redhat.com>

Mon, 25 Jul 2011 20:45:18 +0000 (06:45 +1000)

committer Dave Chinner <david@fromorbit.com>

Mon, 25 Jul 2011 20:45:18 +0000 (06:45 +1000)
author Dave Chinner <dchinner@redhat.com>
Mon, 25 Jul 2011 20:45:18 +0000 (06:45 +1000)
committer Dave Chinner <david@fromorbit.com>
Mon, 25 Jul 2011 20:45:18 +0000 (06:45 +1000)
diff --git a/include/Makefile b/include/Makefile

index 63709e6d72fe74c51e8ba9058a047a69d468ea45..a84963c149d955ef3a64ea070ffc497ec01a2585 100644 (file)
--- a/include/Makefile
+++ b/include/Makefile
@@ -19,7 +19,8 @@ TOPDIR = ..
  include $(TOPDIR)/include/builddefs
  
  QAHFILES = libxfs.h libxlog.h \
-       bitops.h cache.h kmem.h list.h parent.h radix-tree.h swab.h \
+       atomic.h bitops.h cache.h kmem.h list.h hlist.h parent.h radix-tree.h \
+       swab.h \
         xfs_ag.h xfs_alloc.h xfs_alloc_btree.h xfs_arch.h xfs_attr_leaf.h \
         xfs_attr_sf.h xfs_bit.h xfs_bmap.h xfs_bmap_btree.h xfs_btree.h \
         xfs_btree_trace.h xfs_buf_item.h xfs_da_btree.h xfs_dinode.h \
@@ -28,7 +29,7 @@ QAHFILES = libxfs.h libxlog.h \
         xfs_extfree_item.h xfs_ialloc.h xfs_ialloc_btree.h \
         xfs_imap.h xfs_inode.h xfs_inode_item.h xfs_inum.h \
         xfs_log.h xfs_log_priv.h xfs_log_recover.h xfs_metadump.h \
-       xfs_mount.h xfs_quota.h xfs_rtalloc.h xfs_sb.h \
+       xfs_mount.h xfs_quota.h xfs_rtalloc.h xfs_sb.h xfs_trace.h \
         xfs_trans.h xfs_trans_space.h xfs_types.h xfs_dfrag.h
  
  HFILES = handle.h jdm.h xqm.h xfs.h xfs_fs.h
diff --git a/include/atomic.h b/include/atomic.h

new file mode 100644 (file)

index 0000000..151c5bf
--- /dev/null
+++ b/include/atomic.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2011 RedHat, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __ATOMIC_H__
+#define __ATOMIC_H__
+
+/*
+ * Warning: These are not really atomic at all. They are wrappers around the
+ * kernel atomic variable interface. If we do need these variables to be atomic
+ * (due to multithreading of the code that uses them) we need to add some
+ * pthreads magic here.
+ */
+typedef        int32_t atomic_t;
+typedef        int64_t atomic64_t;
+
+#define atomic_inc_return(x)   (++(*(x)))
+#define atomic_dec_return(x)   (--(*(x)))
+
+#define atomic64_read(x)       *(x)
+#define atomic64_set(x, v)     (*(x) = v)
+
+#endif /* __ATOMIC_H__ */
+
diff --git a/include/hlist.h b/include/hlist.h

new file mode 100644 (file)

index 0000000..9ee096c
--- /dev/null
+++ b/include/hlist.h
@@ -0,0 +1,74 @@
+/*
+ * double-linked hash list with single head implementation taken from linux
+ * kernel headers as of 2.6.38-rc1.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __HLIST_H__
+#define __HLIST_H__
+
+struct hlist_node {
+       struct hlist_node *next;
+       struct hlist_node **pprev;
+};
+struct hlist_head {
+       struct hlist_node *first;
+};
+
+#define HLIST_HEAD_INIT { .first = NULL }
+static inline void INIT_HLIST_NODE(struct hlist_node *h)
+{
+       h->next = NULL;
+       h->pprev = NULL;
+}
+
+static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h)
+{
+        struct hlist_node *first = h->first;
+       n->next = first;
+       if (first)
+               first->pprev = &n->next;
+       h->first = n;
+       n->pprev = &h->first;
+}
+
+static inline void __hlist_del(struct hlist_node *n)
+{
+       struct hlist_node *next = n->next;
+       struct hlist_node **pprev = n->pprev;
+       *pprev = next;
+       if (next)
+               next->pprev = pprev;
+}
+
+static inline void hlist_del(struct hlist_node *n)
+{
+       __hlist_del(n);
+}
+
+#define hlist_entry(ptr, type, member)  ({ \
+               const typeof( ((type *)0)->member ) *__mptr = (ptr);    \
+               (type *)( (char *)__mptr - offsetof(type,member) );})
+
+
+#define hlist_for_each(pos, head) \
+       for (pos = (head)->first; pos; pos = pos->next)
+
+#define hlist_for_each_entry(tpos, pos, head, member)                    \
+        for (pos = (head)->first;                                        \
+            pos && ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+            pos = pos->next)
+
+
+#endif /* __LIST_H__ */
diff --git a/include/libxfs.h b/include/libxfs.h

index ff59ca4e6ade8813b37c8a7feccc4696b948542b..eed79cf72305bed85eb864c3057abe87d2041b11 100644 (file)
--- a/include/libxfs.h
+++ b/include/libxfs.h
@@ -25,11 +25,13 @@
  #include <xfs/platform_defs.h>
  
  #include <xfs/list.h>
+#include <xfs/hlist.h>
  #include <xfs/cache.h>
  #include <xfs/bitops.h>
  #include <xfs/kmem.h>
  #include <xfs/radix-tree.h>
  #include <xfs/swab.h>
+#include <xfs/atomic.h>
  
  #include <xfs/xfs_fs.h>
  #include <xfs/xfs_types.h>
@@ -55,6 +57,7 @@
  #include <xfs/xfs_btree.h>
  #include <xfs/xfs_btree_trace.h>
  #include <xfs/xfs_bmap.h>
+#include <xfs/xfs_trace.h>
  
  #ifndef ARRAY_SIZE
  #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
@@ -66,6 +69,10 @@
  
  #define xfs_isset(a,i) ((a)[(i)/(sizeof((a))*NBBY)] & (1<<((i)%(sizeof((a))*NBBY))))
  
+#define __round_mask(x, y) ((__typeof__(x))((y)-1))
+#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
+#define round_down(x, y) ((x) & ~__round_mask(x, y))
+
  /*
   * Argument structure for libxfs_init().
   */
@@ -244,8 +251,11 @@ enum xfs_buf_flags_t {     /* b_flags bits */
  #define XFS_BUF_SIZE(bp)               ((bp)->b_bcount)
  #define XFS_BUF_COUNT(bp)              ((bp)->b_bcount)
  #define XFS_BUF_TARGET(bp)             ((bp)->b_dev)
-#define XFS_BUF_SET_PTR(bp,p,cnt)      ((bp)->b_addr = (char *)(p)); \
-                                               XFS_BUF_SET_COUNT(bp,cnt)
+#define XFS_BUF_SET_PTR(bp,p,cnt)      ({      \
+       (bp)->b_addr = (char *)(p);             \
+       XFS_BUF_SET_COUNT(bp,cnt);              \
+})
+
  #define XFS_BUF_SET_ADDR(bp,blk)       ((bp)->b_blkno = (blk))
  #define XFS_BUF_SET_COUNT(bp,cnt)      ((bp)->b_bcount = (cnt))
  
@@ -333,6 +343,7 @@ typedef struct xfs_inode_log_item {
         unsigned short          ili_flags;              /* misc flags */
         unsigned int            ili_last_fields;        /* fields when flushed*/
         xfs_inode_log_format_t  ili_format;             /* logged structure */
+       int                     ili_lock_flags;
  } xfs_inode_log_item_t;
  
  typedef struct xfs_buf_log_item {
@@ -355,8 +366,7 @@ typedef struct xfs_trans {
         long            t_ifree_delta;          /* superblock ifree change */
         long            t_fdblocks_delta;       /* superblock fdblocks chg */
         long            t_frextents_delta;      /* superblock freextents chg */
-       unsigned int    t_items_free;           /* log item descs free */
-       xfs_log_item_chunk_t    t_items;        /* first log item desc chunk */
+       struct list_head        t_items;        /* first log item desc chunk */
  } xfs_trans_t;
  
  extern xfs_trans_t     *libxfs_trans_alloc (xfs_mount_t *, int);
diff --git a/include/libxlog.h b/include/libxlog.h

index 2a8a25111af6e54c852be598732998c6f137bbee..d1142ab010b9714af96509dcba0d24ef0a6067a3 100644 (file)
--- a/include/libxlog.h
+++ b/include/libxlog.h
@@ -39,13 +39,12 @@ typedef struct log {
         int             l_iclog_size;    /* size of log in bytes */
         int             l_iclog_size_log;/* log power size of log */
         int             l_iclog_bufs;    /* number of iclog buffers */
-       int             l_grant_reserve_cycle;  /* */
-       int             l_grant_reserve_bytes;  /* */
-       int             l_grant_write_cycle;    /* */
-       int             l_grant_write_bytes;    /* */
+       atomic64_t      l_grant_reserve_head;
+       atomic64_t      l_grant_write_head;
         uint            l_sectbb_log;   /* log2 of sector size in bbs */
         uint            l_sectbb_mask;  /* sector size (in BBs)
                                          * alignment mask */
+       int             l_sectBBsize;   /* size of log sector in 512 byte chunks */
  } xlog_t;
  
  #include <xfs/xfs_log_recover.h>
@@ -91,7 +90,10 @@ extern libxfs_init_t x;
  
  extern struct xfs_buf *xlog_get_bp(xlog_t *, int);
  extern void    xlog_put_bp(struct xfs_buf *);
-extern int     xlog_bread(xlog_t *, xfs_daddr_t, int, struct xfs_buf *);
+extern int     xlog_bread(xlog_t *log, xfs_daddr_t blk_no, int nbblks,
+                               xfs_buf_t *bp, xfs_caddr_t *offset);
+extern int     xlog_bread_noalign(xlog_t *log, xfs_daddr_t blk_no, int nbblks,
+                               xfs_buf_t *bp);
  
  extern int     xlog_find_zeroed(xlog_t *log, xfs_daddr_t *blk_no);
  extern int     xlog_find_cycle_start(xlog_t *log, xfs_buf_t *bp,
@@ -110,7 +112,7 @@ extern int  xlog_print_find_oldest(xlog_t *log, xfs_daddr_t *last_blk);
  /* for transactional view */
  extern void    xlog_recover_print_trans_head(xlog_recover_t *tr);
  extern void    xlog_recover_print_trans(xlog_recover_t *trans,
-                               xlog_recover_item_t *itemq, int print);
+                               struct list_head *itemq, int print);
  extern int     xlog_do_recovery_pass(xlog_t *log, xfs_daddr_t head_blk,
                                 xfs_daddr_t tail_blk, int pass);
  extern int     xlog_recover_do_trans(xlog_t *log, xlog_recover_t *trans,
@@ -120,4 +122,6 @@ extern int  xlog_header_check_recover(xfs_mount_t *mp,
  extern int     xlog_header_check_mount(xfs_mount_t *mp,
                                 xlog_rec_header_t *head);
  
+#define xlog_assign_atomic_lsn(l,a,b) ((void) 0)
+#define xlog_assign_grant_head(l,a,b) ((void) 0)
  #endif /* LIBXLOG_H */
diff --git a/include/list.h b/include/list.h

index 2389a6c0eb6be3eec3c6ba4eef8900d1464521a5..3f087a47c917821172fffbb872a8864d36fcf821 100644 (file)
--- a/include/list.h
+++ b/include/list.h
@@ -27,6 +27,12 @@ struct list_head {
         struct list_head *prev;
  };
  
+#define LIST_HEAD_INIT(name) { &(name), &(name) }
+
+#define LIST_HEAD(name) \
+               struct list_head name = LIST_HEAD_INIT(name)
+
+#define INIT_LIST_HEAD(list) list_head_init(list)
  static inline void list_head_init(struct list_head *list)
  {
         list->next = list->prev = list;
@@ -68,6 +74,11 @@ static inline void list_del_init(struct list_head *entry)
         list_head_init(entry);
  }
  
+static inline void list_del(struct list_head *entry)
+{
+       __list_del(entry->prev, entry->next);
+}
+
  static inline void list_move(struct list_head *list, struct list_head *head)
  {
         __list_del(list->prev, list->next);
diff --git a/include/xfs_buf_item.h b/include/xfs_buf_item.h

index 5a41c348bb1ce7f6b2d7e55a7d096ae17ebc4ba8..a5efba911a42315543511da9c9a4325b8bdabad2 100644 (file)
--- a/include/xfs_buf_item.h
+++ b/include/xfs_buf_item.h
@@ -26,7 +26,7 @@ extern kmem_zone_t    *xfs_buf_item_zone;
   * have been logged.
   * For 6.2 and beyond, this is XFS_LI_BUF.  We use this to log everything.
   */
-typedef struct xfs_buf_log_format_t {
+typedef struct xfs_buf_log_format {
         unsigned short  blf_type;       /* buf log item type indicator */
         unsigned short  blf_size;       /* size of this item */
         ushort          blf_flags;      /* misc state */
@@ -41,22 +41,22 @@ typedef struct xfs_buf_log_format_t {
   * This flag indicates that the buffer contains on disk inodes
   * and requires special recovery handling.
   */
-#define        XFS_BLI_INODE_BUF       0x1
+#define        XFS_BLF_INODE_BUF       0x1
  /*
   * This flag indicates that the buffer should not be replayed
   * during recovery because its blocks are being freed.
   */
-#define        XFS_BLI_CANCEL          0x2
+#define        XFS_BLF_CANCEL          0x2
  /*
   * This flag indicates that the buffer contains on disk
   * user or group dquots and may require special recovery handling.
   */
-#define        XFS_BLI_UDQUOT_BUF      0x4
-#define XFS_BLI_PDQUOT_BUF     0x8
-#define        XFS_BLI_GDQUOT_BUF      0x10
+#define        XFS_BLF_UDQUOT_BUF      0x4
+#define XFS_BLF_PDQUOT_BUF     0x8
+#define        XFS_BLF_GDQUOT_BUF      0x10
  
-#define        XFS_BLI_CHUNK           128
-#define        XFS_BLI_SHIFT           7
+#define        XFS_BLF_CHUNK           128
+#define        XFS_BLF_SHIFT           7
  #define        BIT_TO_WORD_SHIFT       5
  #define        NBWORD                  (NBBY * sizeof(unsigned int))
  
@@ -69,23 +69,23 @@ typedef struct xfs_buf_log_format_t {
  #define        XFS_BLI_LOGGED          0x08
  #define        XFS_BLI_INODE_ALLOC_BUF 0x10
  #define XFS_BLI_STALE_INODE    0x20
+#define XFS_BLI_INODE_BUF      0x40
  
+#define XFS_BLI_FLAGS \
+       { XFS_BLI_HOLD,         "HOLD" }, \
+       { XFS_BLI_DIRTY,        "DIRTY" }, \
+       { XFS_BLI_STALE,        "STALE" }, \
+       { XFS_BLI_LOGGED,       "LOGGED" }, \
+       { XFS_BLI_INODE_ALLOC_BUF, "INODE_ALLOC" }, \
+       { XFS_BLI_STALE_INODE,  "STALE_INODE" }, \
+       { XFS_BLI_INODE_BUF,    "INODE_BUF" }
  
  #ifdef __KERNEL__
  
  struct xfs_buf;
-struct ktrace;
  struct xfs_mount;
  struct xfs_buf_log_item;
  
-#if defined(XFS_BLI_TRACE)
-#define        XFS_BLI_TRACE_SIZE      32
-
-void   xfs_buf_item_trace(char *, struct xfs_buf_log_item *);
-#else
-#define        xfs_buf_item_trace(id, bip)
-#endif
-
  /*
   * This is the in core log item structure used to track information
   * needed to log buffers.  It tracks how many times the lock has been
@@ -97,9 +97,6 @@ typedef struct xfs_buf_log_item {
         unsigned int            bli_flags;      /* misc flags */
         unsigned int            bli_recur;      /* lock recursion count */
         atomic_t                bli_refcount;   /* cnt of tp refs */
-#ifdef XFS_BLI_TRACE
-       struct ktrace           *bli_trace;     /* event trace buf */
-#endif
  #ifdef XFS_TRANS_DEBUG
         char                    *bli_orig;      /* original buffer copy */
         char                    *bli_logged;    /* bytes logged (bitmap) */
@@ -107,17 +104,6 @@ typedef struct xfs_buf_log_item {
         xfs_buf_log_format_t    bli_format;     /* in-log header */
  } xfs_buf_log_item_t;
  
-/*
- * This structure is used during recovery to record the buf log
- * items which have been canceled and should not be replayed.
- */
-typedef struct xfs_buf_cancel {
-       xfs_daddr_t             bc_blkno;
-       uint                    bc_len;
-       int                     bc_refcount;
-       struct xfs_buf_cancel   *bc_next;
-} xfs_buf_cancel_t;
-
  void   xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *);
  void   xfs_buf_item_relse(struct xfs_buf *);
  void   xfs_buf_item_log(xfs_buf_log_item_t *, uint, uint);
@@ -126,7 +112,7 @@ void        xfs_buf_attach_iodone(struct xfs_buf *,
                               void(*)(struct xfs_buf *, xfs_log_item_t *),
                               xfs_log_item_t *);
  void   xfs_buf_iodone_callbacks(struct xfs_buf *);
-void   xfs_buf_iodone(struct xfs_buf *, xfs_buf_log_item_t *);
+void   xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *);
  
  #ifdef XFS_TRANS_DEBUG
  void
diff --git a/include/xfs_extfree_item.h b/include/xfs_extfree_item.h

index 2f049f63e85f73ea3ae1763c5eb66e6798368635..375f68e42531a614138fa7a204e73c0e61baa14a 100644 (file)
--- a/include/xfs_extfree_item.h
+++ b/include/xfs_extfree_item.h
@@ -33,12 +33,10 @@ typedef struct xfs_extent {
   * conversion routine.
   */
  
-#ifndef HAVE_FORMAT32
  typedef struct xfs_extent_32 {
         __uint64_t      ext_start;
         __uint32_t      ext_len;
  } __attribute__((packed)) xfs_extent_32_t;
-#endif
  
  typedef struct xfs_extent_64 {
         __uint64_t      ext_start;
@@ -59,7 +57,6 @@ typedef struct xfs_efi_log_format {
         xfs_extent_t            efi_extents[1]; /* array of extents to free */
  } xfs_efi_log_format_t;
  
-#ifndef HAVE_FORMAT32
  typedef struct xfs_efi_log_format_32 {
         __uint16_t              efi_type;       /* efi log item type */
         __uint16_t              efi_size;       /* size of this item */
@@ -67,7 +64,6 @@ typedef struct xfs_efi_log_format_32 {
         __uint64_t              efi_id;         /* efi identifier */
         xfs_extent_32_t         efi_extents[1]; /* array of extents to free */
  } __attribute__((packed)) xfs_efi_log_format_32_t;
-#endif
  
  typedef struct xfs_efi_log_format_64 {
         __uint16_t              efi_type;       /* efi log item type */
@@ -90,7 +86,6 @@ typedef struct xfs_efd_log_format {
         xfs_extent_t            efd_extents[1]; /* array of extents freed */
  } xfs_efd_log_format_t;
  
-#ifndef HAVE_FORMAT32
  typedef struct xfs_efd_log_format_32 {
         __uint16_t              efd_type;       /* efd log item type */
         __uint16_t              efd_size;       /* size of this item */
@@ -98,7 +93,6 @@ typedef struct xfs_efd_log_format_32 {
         __uint64_t              efd_efi_id;     /* id of corresponding efi */
         xfs_extent_32_t         efd_extents[1]; /* array of extents freed */
  } __attribute__((packed)) xfs_efd_log_format_32_t;
-#endif
  
  typedef struct xfs_efd_log_format_64 {
         __uint16_t              efd_type;       /* efd log item type */
@@ -117,11 +111,10 @@ typedef struct xfs_efd_log_format_64 {
  #define        XFS_EFI_MAX_FAST_EXTENTS        16
  
  /*
- * Define EFI flags.
+ * Define EFI flag bits. Manipulated by set/clear/test_bit operators.
   */
-#define        XFS_EFI_RECOVERED       0x1
-#define        XFS_EFI_COMMITTED       0x2
-#define        XFS_EFI_CANCELED        0x4
+#define        XFS_EFI_RECOVERED       1
+#define        XFS_EFI_COMMITTED       2
  
  /*
   * This is the "extent free intention" log item.  It is used
@@ -131,8 +124,8 @@ typedef struct xfs_efd_log_format_64 {
   */
  typedef struct xfs_efi_log_item {
         xfs_log_item_t          efi_item;
-       uint                    efi_flags;      /* misc flags */
-       uint                    efi_next_extent;
+       atomic_t                efi_next_extent;
+       unsigned long           efi_flags;      /* misc flags */
         xfs_efi_log_format_t    efi_format;
  } xfs_efi_log_item_t;
  
diff --git a/include/xfs_inode_item.h b/include/xfs_inode_item.h

index 1ff04cc323ad98c5b5faddd5e760ac4c212aba2e..81dcf9448f9df0576d87996a0a753f7be1946cc3 100644 (file)
--- a/include/xfs_inode_item.h
+++ b/include/xfs_inode_item.h
@@ -40,7 +40,6 @@ typedef struct xfs_inode_log_format {
         __int32_t               ilf_boffset;    /* off of inode in buffer */
  } xfs_inode_log_format_t;
  
-#ifndef HAVE_FORMAT32
  typedef struct xfs_inode_log_format_32 {
         __uint16_t              ilf_type;       /* inode log item type */
         __uint16_t              ilf_size;       /* size of this item */
@@ -56,7 +55,6 @@ typedef struct xfs_inode_log_format_32 {
         __int32_t               ilf_len;        /* len of inode buffer */
         __int32_t               ilf_boffset;    /* off of inode in buffer */
  } __attribute__((packed)) xfs_inode_log_format_32_t;
-#endif
  
  typedef struct xfs_inode_log_format_64 {
         __uint16_t              ilf_type;       /* inode log item type */
diff --git a/include/xfs_log.h b/include/xfs_log.h

index d47b91f10822b98400a6d11961a8a12d4e930d76..916eb7db14d9a09ff541882bf12905d9fd5068ea 100644 (file)
--- a/include/xfs_log.h
+++ b/include/xfs_log.h
@@ -19,7 +19,6 @@
  #define __XFS_LOG_H__
  
  /* get lsn fields */
-
  #define CYCLE_LSN(lsn) ((uint)((lsn)>>32))
  #define BLOCK_LSN(lsn) ((uint)(lsn))
  
@@ -56,28 +55,18 @@ static inline xfs_lsn_t     _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
  /*
   * Flags to xfs_log_reserve()
   *
- *     XFS_LOG_SLEEP:   If space is not available, sleep (default)
- *     XFS_LOG_NOSLEEP: If space is not available, return error
   *     XFS_LOG_PERM_RESERV: Permanent reservation.  When writes are
   *             performed against this type of reservation, the reservation
   *             is not decreased.  Long running transactions should use this.
   */
-#define XFS_LOG_SLEEP          0x0
-#define XFS_LOG_NOSLEEP                0x1
  #define XFS_LOG_PERM_RESERV    0x2
  
  /*
   * Flags to xfs_log_force()
   *
   *     XFS_LOG_SYNC:   Synchronous force in-core log to disk
- *     XFS_LOG_FORCE:  Start in-core log write now.
- *     XFS_LOG_URGE:   Start write within some window of time.
- *
- * Note: Either XFS_LOG_FORCE or XFS_LOG_URGE must be set.
   */
  #define XFS_LOG_SYNC           0x1
-#define XFS_LOG_FORCE          0x2
-#define XFS_LOG_URGE           0x4
  
  #endif /* __KERNEL__ */
  
@@ -110,15 +99,20 @@ static inline xfs_lsn_t    _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
  #define XLOG_REG_TYPE_TRANSHDR         19
  #define XLOG_REG_TYPE_MAX              19
  
-#define XLOG_VEC_SET_TYPE(vecp, t) ((vecp)->i_type = (t))
-
  typedef struct xfs_log_iovec {
-       xfs_caddr_t             i_addr;         /* beginning address of region */
+       void            *i_addr;        /* beginning address of region */
         int             i_len;          /* length in bytes of region */
         uint            i_type;         /* type of region */
  } xfs_log_iovec_t;
  
-typedef void* xfs_log_ticket_t;
+struct xfs_log_vec {
+       struct xfs_log_vec      *lv_next;       /* next lv in build list */
+       int                     lv_niovecs;     /* number of iovecs in lv */
+       struct xfs_log_iovec    *lv_iovecp;     /* iovec array */
+       struct xfs_log_item     *lv_item;       /* owner */
+       char                    *lv_buf;        /* formatted buffer */
+       int                     lv_buf_len;     /* size of formatted buffer */
+};
  
  /*
   * Structure used to pass callback function and the function's argument
@@ -134,17 +128,33 @@ typedef struct xfs_log_callback {
  #ifdef __KERNEL__
  /* Log manager interfaces */
  struct xfs_mount;
+struct xlog_in_core;
+struct xlog_ticket;
+struct xfs_log_item;
+struct xfs_item_ops;
+struct xfs_trans;
+
+void   xfs_log_item_init(struct xfs_mount      *mp,
+                       struct xfs_log_item     *item,
+                       int                     type,
+                       struct xfs_item_ops     *ops);
+
  xfs_lsn_t xfs_log_done(struct xfs_mount *mp,
-                      xfs_log_ticket_t ticket,
-                      void             **iclog,
+                      struct xlog_ticket *ticket,
+                      struct xlog_in_core **iclog,
                        uint             flags);
  int      _xfs_log_force(struct xfs_mount *mp,
-                        xfs_lsn_t      lsn,
                          uint           flags,
                          int            *log_forced);
  void     xfs_log_force(struct xfs_mount        *mp,
-                       xfs_lsn_t               lsn,
                         uint                    flags);
+int      _xfs_log_force_lsn(struct xfs_mount *mp,
+                            xfs_lsn_t          lsn,
+                            uint               flags,
+                            int                *log_forced);
+void     xfs_log_force_lsn(struct xfs_mount    *mp,
+                           xfs_lsn_t           lsn,
+                           uint                flags);
  int      xfs_log_mount(struct xfs_mount        *mp,
                         struct xfs_buftarg      *log_target,
                         xfs_daddr_t             start_block,
@@ -153,34 +163,38 @@ int         xfs_log_mount_finish(struct xfs_mount *mp);
  void     xfs_log_move_tail(struct xfs_mount    *mp,
                             xfs_lsn_t           tail_lsn);
  int      xfs_log_notify(struct xfs_mount       *mp,
-                        void                   *iclog,
+                        struct xlog_in_core    *iclog,
                          xfs_log_callback_t     *callback_entry);
  int      xfs_log_release_iclog(struct xfs_mount *mp,
-                        void                    *iclog_hndl);
+                        struct xlog_in_core     *iclog);
  int      xfs_log_reserve(struct xfs_mount *mp,
                           int              length,
                           int              count,
-                         xfs_log_ticket_t *ticket,
+                         struct xlog_ticket **ticket,
                           __uint8_t        clientid,
                           uint             flags,
                           uint             t_type);
  int      xfs_log_write(struct xfs_mount *mp,
                         xfs_log_iovec_t  region[],
                         int              nentries,
-                       xfs_log_ticket_t ticket,
+                       struct xlog_ticket *ticket,
                         xfs_lsn_t        *start_lsn);
-int      xfs_log_unmount(struct xfs_mount *mp);
  int      xfs_log_unmount_write(struct xfs_mount *mp);
-void      xfs_log_unmount_dealloc(struct xfs_mount *mp);
+void      xfs_log_unmount(struct xfs_mount *mp);
  int      xfs_log_force_umount(struct xfs_mount *mp, int logerror);
  int      xfs_log_need_covered(struct xfs_mount *mp);
  
  void     xlog_iodone(struct xfs_buf *);
  
-#endif
-
+struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
+void     xfs_log_ticket_put(struct xlog_ticket *ticket);
  
-extern int xlog_debug;         /* set to 1 to enable real log */
+xlog_tid_t xfs_log_get_trans_ident(struct xfs_trans *tp);
  
+int    xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
+                               struct xfs_log_vec *log_vector,
+                               xfs_lsn_t *commit_lsn, int flags);
+bool   xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
  
+#endif
  #endif /* __XFS_LOG_H__ */
diff --git a/include/xfs_log_priv.h b/include/xfs_log_priv.h

index e7d8f84443fab87ceee83561fb6b1f04685400ff..d5f8be8f4bf603cac8c3f0394759e5fefdd8d42e 100644 (file)
--- a/include/xfs_log_priv.h
+++ b/include/xfs_log_priv.h
@@ -19,10 +19,8 @@
  #define __XFS_LOG_PRIV_H__
  
  struct xfs_buf;
-struct ktrace;
  struct log;
  struct xlog_ticket;
-struct xfs_buf_cancel;
  struct xfs_mount;
  
  /*
@@ -55,7 +53,6 @@ struct xfs_mount;
         BTOBB(XLOG_MAX_ICLOGS << (xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? \
          XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT))
  
-
  static inline xfs_lsn_t xlog_assign_lsn(uint cycle, uint block)
  {
         return ((xfs_lsn_t)cycle << 32) | block;
@@ -134,7 +131,11 @@ static inline uint xlog_get_client_id(__be32 i)
   */
  #define XLOG_TIC_INITED                0x1     /* has been initialized */
  #define XLOG_TIC_PERM_RESERV   0x2     /* permanent reservation */
-#define XLOG_TIC_IN_Q          0x4
+
+#define XLOG_TIC_FLAGS \
+       { XLOG_TIC_INITED,      "XLOG_TIC_INITED" }, \
+       { XLOG_TIC_PERM_RESERV, "XLOG_TIC_PERM_RESERV" }
+
  #endif /* __KERNEL__ */
  
  #define XLOG_UNMOUNT_TYPE      0x556e  /* Un for Unmount */
@@ -147,8 +148,6 @@ static inline uint xlog_get_client_id(__be32 i)
  #define        XLOG_RECOVERY_NEEDED    0x4     /* log was recovered */
  #define XLOG_IO_ERROR          0x8     /* log hit an I/O error, and being
                                            shutdown */
-typedef __uint32_t xlog_tid_t;
-
  
  #ifdef __KERNEL__
  /*
@@ -241,10 +240,10 @@ typedef struct xlog_res {
  } xlog_res_t;
  
  typedef struct xlog_ticket {
-       sv_t               t_wait;       /* ticket wait queue            : 20 */
-       struct xlog_ticket *t_next;      /*                              :4|8 */
-       struct xlog_ticket *t_prev;      /*                              :4|8 */
+       wait_queue_head_t  t_wait;       /* ticket wait queue */
+       struct list_head   t_queue;      /* reserve/write queue */
         xlog_tid_t         t_tid;        /* transaction identifier       : 4  */
+       atomic_t           t_ref;        /* ticket reference count       : 4  */
         int                t_curr_res;   /* current reservation in bytes : 4  */
         int                t_unit_res;   /* unit reservation in bytes    : 4  */
         char               t_ocnt;       /* original count               : 1  */
@@ -309,6 +308,16 @@ typedef struct xlog_rec_ext_header {
  } xlog_rec_ext_header_t;
  
  #ifdef __KERNEL__
+
+/*
+ * Quite misnamed, because this union lays out the actual on-disk log buffer.
+ */
+typedef union xlog_in_core2 {
+       xlog_rec_header_t       hic_header;
+       xlog_rec_ext_header_t   hic_xheader;
+       char                    hic_sector[XLOG_HEADER_SIZE];
+} xlog_in_core_2_t;
+
  /*
   * - A log record header is 512 bytes.  There is plenty of room to grow the
   *     xlog_rec_header_t into the reserved space.
@@ -338,9 +347,9 @@ typedef struct xlog_rec_ext_header {
   * We'll put all the read-only and l_icloglock fields in the first cacheline,
   * and move everything else out to subsequent cachelines.
   */
-typedef struct xlog_iclog_fields {
-       sv_t                    ic_force_wait;
-       sv_t                    ic_write_wait;
+typedef struct xlog_in_core {
+       wait_queue_head_t       ic_force_wait;
+       wait_queue_head_t       ic_write_wait;
         struct xlog_in_core     *ic_next;
         struct xlog_in_core     *ic_prev;
         struct xfs_buf          *ic_bp;
@@ -348,11 +357,8 @@ typedef struct xlog_iclog_fields {
         int                     ic_size;
         int                     ic_offset;
         int                     ic_bwritecnt;
-       ushort_t                ic_state;
+       unsigned short          ic_state;
         char                    *ic_datap;      /* pointer to iclog data */
-#ifdef XFS_LOG_TRACE
-       struct ktrace           *ic_trace;
-#endif
  
         /* Callback structures need their own cacheline */
         spinlock_t              ic_callback_lock ____cacheline_aligned_in_smp;
@@ -361,39 +367,108 @@ typedef struct xlog_iclog_fields {
  
         /* reference counts need their own cacheline */
         atomic_t                ic_refcnt ____cacheline_aligned_in_smp;
-} xlog_iclog_fields_t;
+       xlog_in_core_2_t        *ic_data;
+#define ic_header      ic_data->hic_header
+} xlog_in_core_t;
  
-typedef union xlog_in_core2 {
-       xlog_rec_header_t       hic_header;
-       xlog_rec_ext_header_t   hic_xheader;
-       char                    hic_sector[XLOG_HEADER_SIZE];
-} xlog_in_core_2_t;
+/*
+ * The CIL context is used to aggregate per-transaction details as well be
+ * passed to the iclog for checkpoint post-commit processing.  After being
+ * passed to the iclog, another context needs to be allocated for tracking the
+ * next set of transactions to be aggregated into a checkpoint.
+ */
+struct xfs_cil;
+
+struct xfs_cil_ctx {
+       struct xfs_cil          *cil;
+       xfs_lsn_t               sequence;       /* chkpt sequence # */
+       xfs_lsn_t               start_lsn;      /* first LSN of chkpt commit */
+       xfs_lsn_t               commit_lsn;     /* chkpt commit record lsn */
+       struct xlog_ticket      *ticket;        /* chkpt ticket */
+       int                     nvecs;          /* number of regions */
+       int                     space_used;     /* aggregate size of regions */
+       struct list_head        busy_extents;   /* busy extents in chkpt */
+       struct xfs_log_vec      *lv_chain;      /* logvecs being pushed */
+       xfs_log_callback_t      log_cb;         /* completion callback hook. */
+       struct list_head        committing;     /* ctx committing list */
+};
  
-typedef struct xlog_in_core {
-       xlog_iclog_fields_t     hic_fields;
-       xlog_in_core_2_t        *hic_data;
-} xlog_in_core_t;
+/*
+ * Committed Item List structure
+ *
+ * This structure is used to track log items that have been committed but not
+ * yet written into the log. It is used only when the delayed logging mount
+ * option is enabled.
+ *
+ * This structure tracks the list of committing checkpoint contexts so
+ * we can avoid the problem of having to hold out new transactions during a
+ * flush until we have a the commit record LSN of the checkpoint. We can
+ * traverse the list of committing contexts in xlog_cil_push_lsn() to find a
+ * sequence match and extract the commit LSN directly from there. If the
+ * checkpoint is still in the process of committing, we can block waiting for
+ * the commit LSN to be determined as well. This should make synchronous
+ * operations almost as efficient as the old logging methods.
+ */
+struct xfs_cil {
+       struct log              *xc_log;
+       struct list_head        xc_cil;
+       spinlock_t              xc_cil_lock;
+       struct xfs_cil_ctx      *xc_ctx;
+       struct rw_semaphore     xc_ctx_lock;
+       struct list_head        xc_committing;
+       wait_queue_head_t       xc_commit_wait;
+       xfs_lsn_t               xc_current_sequence;
+};
  
  /*
- * Defines to save our code from this glop.
+ * The amount of log space we allow the CIL to aggregate is difficult to size.
+ * Whatever we choose, we have to make sure we can get a reservation for the
+ * log space effectively, that it is large enough to capture sufficient
+ * relogging to reduce log buffer IO significantly, but it is not too large for
+ * the log or induces too much latency when writing out through the iclogs. We
+ * track both space consumed and the number of vectors in the checkpoint
+ * context, so we need to decide which to use for limiting.
+ *
+ * Every log buffer we write out during a push needs a header reserved, which
+ * is at least one sector and more for v2 logs. Hence we need a reservation of
+ * at least 512 bytes per 32k of log space just for the LR headers. That means
+ * 16KB of reservation per megabyte of delayed logging space we will consume,
+ * plus various headers.  The number of headers will vary based on the num of
+ * io vectors, so limiting on a specific number of vectors is going to result
+ * in transactions of varying size. IOWs, it is more consistent to track and
+ * limit space consumed in the log rather than by the number of objects being
+ * logged in order to prevent checkpoint ticket overruns.
+ *
+ * Further, use of static reservations through the log grant mechanism is
+ * problematic. It introduces a lot of complexity (e.g. reserve grant vs write
+ * grant) and a significant deadlock potential because regranting write space
+ * can block on log pushes. Hence if we have to regrant log space during a log
+ * push, we can deadlock.
+ *
+ * However, we can avoid this by use of a dynamic "reservation stealing"
+ * technique during transaction commit whereby unused reservation space in the
+ * transaction ticket is transferred to the CIL ctx commit ticket to cover the
+ * space needed by the checkpoint transaction. This means that we never need to
+ * specifically reserve space for the CIL checkpoint transaction, nor do we
+ * need to regrant space once the checkpoint completes. This also means the
+ * checkpoint transaction ticket is specific to the checkpoint context, rather
+ * than the CIL itself.
+ *
+ * With dynamic reservations, we can effectively make up arbitrary limits for
+ * the checkpoint size so long as they don't violate any other size rules.
+ * Recovery imposes a rule that no transaction exceed half the log, so we are
+ * limited by that.  Furthermore, the log transaction reservation subsystem
+ * tries to keep 25% of the log free, so we need to keep below that limit or we
+ * risk running out of free log space to start any new transactions.
+ *
+ * In order to keep background CIL push efficient, we will set a lower
+ * threshold at which background pushing is attempted without blocking current
+ * transaction commits.  A separate, higher bound defines when CIL pushes are
+ * enforced to ensure we stay within our maximum checkpoint size bounds.
+ * threshold, yet give us plenty of space for aggregation on large logs.
   */
-#define        ic_force_wait   hic_fields.ic_force_wait
-#define ic_write_wait  hic_fields.ic_write_wait
-#define        ic_next         hic_fields.ic_next
-#define        ic_prev         hic_fields.ic_prev
-#define        ic_bp           hic_fields.ic_bp
-#define        ic_log          hic_fields.ic_log
-#define        ic_callback     hic_fields.ic_callback
-#define        ic_callback_lock hic_fields.ic_callback_lock
-#define        ic_callback_tail hic_fields.ic_callback_tail
-#define        ic_trace        hic_fields.ic_trace
-#define        ic_size         hic_fields.ic_size
-#define        ic_offset       hic_fields.ic_offset
-#define        ic_refcnt       hic_fields.ic_refcnt
-#define        ic_bwritecnt    hic_fields.ic_bwritecnt
-#define        ic_state        hic_fields.ic_state
-#define ic_datap       hic_fields.ic_datap
-#define ic_header      hic_data->hic_header
+#define XLOG_CIL_SPACE_LIMIT(log)      (log->l_logsize >> 3)
+#define XLOG_CIL_HARD_SPACE_LIMIT(log) (3 * (log->l_logsize >> 4))
  
  /*
   * The reservation head lsn is not made up of a cycle number and block number.
@@ -404,17 +479,17 @@ typedef struct xlog_in_core {
  typedef struct log {
         /* The following fields don't need locking */
         struct xfs_mount        *l_mp;          /* mount point */
+       struct xfs_ail          *l_ailp;        /* AIL log is working with */
+       struct xfs_cil          *l_cilp;        /* CIL log is working with */
         struct xfs_buf          *l_xbuf;        /* extra buffer for log
                                                  * wrapping */
         struct xfs_buftarg      *l_targ;        /* buftarg of log */
         uint                    l_flags;
         uint                    l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */
-       struct xfs_buf_cancel   **l_buf_cancel_table;
+       struct list_head        *l_buf_cancel_table;
         int                     l_iclog_hsize;  /* size of iclog header */
         int                     l_iclog_heads;  /* # of iclog header sectors */
-       uint                    l_sectbb_log;   /* log2 of sector size in BBs */
-       uint                    l_sectbb_mask;  /* sector size (in BBs)
-                                                * alignment mask */
+       uint                    l_sectBBsize;   /* sector size in BBs (2^n) */
         int                     l_iclog_size;   /* size of log in bytes */
         int                     l_iclog_size_log; /* log power size of log */
         int                     l_iclog_bufs;   /* number of iclog buffers */
@@ -423,33 +498,40 @@ typedef struct log {
         int                     l_logBBsize;    /* size of log in BB chunks */
  
         /* The following block of fields are changed while holding icloglock */
-       sv_t                    l_flush_wait ____cacheline_aligned_in_smp;
+       wait_queue_head_t       l_flush_wait ____cacheline_aligned_in_smp;
                                                 /* waiting for iclog flush */
         int                     l_covered_state;/* state of "covering disk
                                                  * log entries" */
         xlog_in_core_t          *l_iclog;       /* head log queue       */
         spinlock_t              l_icloglock;    /* grab to change iclog state */
-       xfs_lsn_t               l_tail_lsn;     /* lsn of 1st LR with unflushed
-                                                * buffers */
-       xfs_lsn_t               l_last_sync_lsn;/* lsn of last LR on disk */
         int                     l_curr_cycle;   /* Cycle number of log writes */
         int                     l_prev_cycle;   /* Cycle number before last
                                                  * block increment */
         int                     l_curr_block;   /* current logical log block */
         int                     l_prev_block;   /* previous logical log block */
  
-       /* The following block of fields are changed while holding grant_lock */
-       spinlock_t              l_grant_lock ____cacheline_aligned_in_smp;
-       xlog_ticket_t           *l_reserve_headq;
-       xlog_ticket_t           *l_write_headq;
-       int                     l_grant_reserve_cycle;
-       int                     l_grant_reserve_bytes;
-       int                     l_grant_write_cycle;
-       int                     l_grant_write_bytes;
-
-#ifdef XFS_LOG_TRACE
-       struct ktrace           *l_grant_trace;
-#endif
+       /*
+        * l_last_sync_lsn and l_tail_lsn are atomics so they can be set and
+        * read without needing to hold specific locks. To avoid operations
+        * contending with other hot objects, place each of them on a separate
+        * cacheline.
+        */
+       /* lsn of last LR on disk */
+       atomic64_t              l_last_sync_lsn ____cacheline_aligned_in_smp;
+       /* lsn of 1st LR with unflushed * buffers */
+       atomic64_t              l_tail_lsn ____cacheline_aligned_in_smp;
+
+       /*
+        * ticket grant locks, queues and accounting have their own cachlines
+        * as these are quite hot and can be operated on concurrently.
+        */
+       spinlock_t              l_grant_reserve_lock ____cacheline_aligned_in_smp;
+       struct list_head        l_reserveq;
+       atomic64_t              l_grant_reserve_head;
+
+       spinlock_t              l_grant_write_lock ____cacheline_aligned_in_smp;
+       struct list_head        l_writeq;
+       atomic64_t              l_grant_write_head;
  
         /* The following field are used for debugging; need to hold icloglock */
  #ifdef DEBUG
@@ -458,30 +540,108 @@ typedef struct log {
  
  } xlog_t;
  
-#define XLOG_FORCED_SHUTDOWN(log)      ((log)->l_flags & XLOG_IO_ERROR)
+#define XLOG_BUF_CANCEL_BUCKET(log, blkno) \
+       ((log)->l_buf_cancel_table + ((__uint64_t)blkno % XLOG_BC_TABLE_SIZE))
  
+#define XLOG_FORCED_SHUTDOWN(log)      ((log)->l_flags & XLOG_IO_ERROR)
  
  /* common routines */
  extern xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp);
-extern int      xlog_find_tail(xlog_t  *log,
-                               xfs_daddr_t *head_blk,
-                               xfs_daddr_t *tail_blk);
  extern int      xlog_recover(xlog_t *log);
  extern int      xlog_recover_finish(xlog_t *log);
  extern void     xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int);
-extern void     xlog_recover_process_iunlinks(xlog_t *log);
  
-extern struct xfs_buf *xlog_get_bp(xlog_t *, int);
-extern void     xlog_put_bp(struct xfs_buf *);
-extern int      xlog_bread(xlog_t *, xfs_daddr_t, int, struct xfs_buf *);
+extern kmem_zone_t *xfs_log_ticket_zone;
+struct xlog_ticket *xlog_ticket_alloc(struct log *log, int unit_bytes,
+                               int count, char client, uint xflags,
+                               int alloc_flags);
+
  
-extern kmem_zone_t     *xfs_log_ticket_zone;
+static inline void
+xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes)
+{
+       *ptr += bytes;
+       *len -= bytes;
+       *off += bytes;
+}
  
-/* iclog tracing */
-#define XLOG_TRACE_GRAB_FLUSH  1
-#define XLOG_TRACE_REL_FLUSH   2
-#define XLOG_TRACE_SLEEP_FLUSH 3
-#define XLOG_TRACE_WAKE_FLUSH  4
+void   xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket);
+int    xlog_write(struct log *log, struct xfs_log_vec *log_vector,
+                               struct xlog_ticket *tic, xfs_lsn_t *start_lsn,
+                               xlog_in_core_t **commit_iclog, uint flags);
+
+/*
+ * When we crack an atomic LSN, we sample it first so that the value will not
+ * change while we are cracking it into the component values. This means we
+ * will always get consistent component values to work from. This should always
+ * be used to smaple and crack LSNs taht are stored and updated in atomic
+ * variables.
+ */
+static inline void
+xlog_crack_atomic_lsn(atomic64_t *lsn, uint *cycle, uint *block)
+{
+       xfs_lsn_t val = atomic64_read(lsn);
+
+       *cycle = CYCLE_LSN(val);
+       *block = BLOCK_LSN(val);
+}
+
+/*
+ * Calculate and assign a value to an atomic LSN variable from component pieces.
+ */
+static inline void
+xlog_assign_atomic_lsn(atomic64_t *lsn, uint cycle, uint block)
+{
+       atomic64_set(lsn, xlog_assign_lsn(cycle, block));
+}
+
+/*
+ * When we crack the grant head, we sample it first so that the value will not
+ * change while we are cracking it into the component values. This means we
+ * will always get consistent component values to work from.
+ */
+static inline void
+xlog_crack_grant_head_val(int64_t val, int *cycle, int *space)
+{
+       *cycle = val >> 32;
+       *space = val & 0xffffffff;
+}
+
+static inline void
+xlog_crack_grant_head(atomic64_t *head, int *cycle, int *space)
+{
+       xlog_crack_grant_head_val(atomic64_read(head), cycle, space);
+}
+
+static inline int64_t
+xlog_assign_grant_head_val(int cycle, int space)
+{
+       return ((int64_t)cycle << 32) | space;
+}
+
+static inline void
+xlog_assign_grant_head(atomic64_t *head, int cycle, int space)
+{
+       atomic64_set(head, xlog_assign_grant_head_val(cycle, space));
+}
+
+/*
+ * Committed Item List interfaces
+ */
+int    xlog_cil_init(struct log *log);
+void   xlog_cil_init_post_recovery(struct log *log);
+void   xlog_cil_destroy(struct log *log);
+
+/*
+ * CIL force routines
+ */
+xfs_lsn_t xlog_cil_force_lsn(struct log *log, xfs_lsn_t sequence);
+
+static inline void
+xlog_cil_force(struct log *log)
+{
+       xlog_cil_force_lsn(log, log->l_cilp->xc_current_sequence);
+}
  
  /*
   * Unmount record type is used as a pseudo transaction type for the ticket.
@@ -489,6 +649,21 @@ extern kmem_zone_t *xfs_log_ticket_zone;
   */
  #define XLOG_UNMOUNT_REC_TYPE  (-1U)
  
+/*
+ * Wrapper function for waiting on a wait queue serialised against wakeups
+ * by a spinlock. This matches the semantics of all the wait queues used in the
+ * log code.
+ */
+static inline void xlog_wait(wait_queue_head_t *wq, spinlock_t *lock)
+{
+       DECLARE_WAITQUEUE(wait, current);
+
+       add_wait_queue_exclusive(wq, &wait);
+       __set_current_state(TASK_UNINTERRUPTIBLE);
+       spin_unlock(lock);
+       schedule();
+       remove_wait_queue(wq, &wait);
+}
  #endif /* __KERNEL__ */
  
  #endif /* __XFS_LOG_PRIV_H__ */
diff --git a/include/xfs_log_recover.h b/include/xfs_log_recover.h

index b2254555530106dc907f58cecf3099bcbae56619..1c55ccbb379d0b2a7dc744cb3c869ce51ed5b14f 100644 (file)
--- a/include/xfs_log_recover.h
+++ b/include/xfs_log_recover.h
@@ -28,29 +28,28 @@
  #define XLOG_RHASH(tid)        \
         ((((__uint32_t)tid)>>XLOG_RHASH_SHIFT) & (XLOG_RHASH_SIZE-1))
  
-#define XLOG_MAX_REGIONS_IN_ITEM   (XFS_MAX_BLOCKSIZE / XFS_BLI_CHUNK / 2 + 1)
+#define XLOG_MAX_REGIONS_IN_ITEM   (XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK / 2 + 1)
  
  
  /*
   * item headers are in ri_buf[0].  Additional buffers follow.
   */
  typedef struct xlog_recover_item {
-       struct xlog_recover_item *ri_next;
-       struct xlog_recover_item *ri_prev;
-       int                      ri_type;
-       int                      ri_cnt;        /* count of regions found */
-       int                      ri_total;      /* total regions */
-       xfs_log_iovec_t          *ri_buf;       /* ptr to regions buffer */
+       struct list_head        ri_list;
+       int                     ri_type;
+       int                     ri_cnt; /* count of regions found */
+       int                     ri_total;       /* total regions */
+       xfs_log_iovec_t         *ri_buf;        /* ptr to regions buffer */
  } xlog_recover_item_t;
  
  struct xlog_tid;
  typedef struct xlog_recover {
-       struct xlog_recover *r_next;
-       xlog_tid_t          r_log_tid;          /* log's transaction id */
-       xfs_trans_header_t  r_theader;          /* trans header for partial */
-       int                 r_state;            /* not needed */
-       xfs_lsn_t           r_lsn;              /* xact lsn */
-       xlog_recover_item_t *r_itemq;           /* q for items */
+       struct hlist_node       r_list;
+       xlog_tid_t              r_log_tid;      /* log's transaction id */
+       xfs_trans_header_t      r_theader;      /* trans header for partial */
+       int                     r_state;        /* not needed */
+       xfs_lsn_t               r_lsn;          /* xact lsn */
+       struct list_head        r_itemq;        /* q for items */
  } xlog_recover_t;
  
  #define ITEM_TYPE(i)   (*(ushort *)(i)->ri_buf[0].i_addr)
diff --git a/include/xfs_trace.h b/include/xfs_trace.h

new file mode 100644 (file)

index 0000000..e73a4d4
--- /dev/null
+++ b/include/xfs_trace.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2011 RedHat, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __TRACE_H__
+#define __TRACE_H__
+
+#define trace_xfs_log_recover_item_reorder_head(a,b,c,d)       ((void) 0)
+#define trace_xfs_log_recover_item_reorder_tail(a,b,c,d)       ((void) 0)
+#define trace_xfs_log_recover_item_add_cont(a,b,c,d)   ((void) 0)
+#define trace_xfs_log_recover_item_add(a,b,c,d)        ((void) 0)
+
+#endif /* __TRACE_H__ */
diff --git a/include/xfs_trans.h b/include/xfs_trans.h

index 1d89d50a5b99e64052143398ca910e679a254085..c2042b736b81131a780703d8a5907c848793eebb 100644 (file)
--- a/include/xfs_trans.h
+++ b/include/xfs_trans.h
@@ -49,6 +49,15 @@ typedef struct xfs_trans_header {
  #define        XFS_LI_DQUOT            0x123d
  #define        XFS_LI_QUOTAOFF         0x123e
  
+#define XFS_LI_TYPE_DESC \
+       { XFS_LI_EFI,           "XFS_LI_EFI" }, \
+       { XFS_LI_EFD,           "XFS_LI_EFD" }, \
+       { XFS_LI_IUNLINK,       "XFS_LI_IUNLINK" }, \
+       { XFS_LI_INODE,         "XFS_LI_INODE" }, \
+       { XFS_LI_BUF,           "XFS_LI_BUF" }, \
+       { XFS_LI_DQUOT,         "XFS_LI_DQUOT" }, \
+       { XFS_LI_QUOTAOFF,      "XFS_LI_QUOTAOFF" }
+
  /*
   * Transaction types.  Used to distinguish types of buffers.
   */
@@ -68,7 +77,7 @@ typedef struct xfs_trans_header {
  #define XFS_TRANS_GROWFS               14
  #define XFS_TRANS_STRAT_WRITE          15
  #define XFS_TRANS_DIOSTRAT             16
-#define        XFS_TRANS_WRITE_SYNC            17
+/* 17 was XFS_TRANS_WRITE_SYNC */
  #define        XFS_TRANS_WRITEID               18
  #define        XFS_TRANS_ADDAFORK              19
  #define        XFS_TRANS_ATTRINVAL             20
@@ -97,9 +106,54 @@ typedef struct xfs_trans_header {
  #define        XFS_TRANS_GROWFSRT_FREE         39
  #define        XFS_TRANS_SWAPEXT               40
  #define        XFS_TRANS_SB_COUNT              41
-#define        XFS_TRANS_TYPE_MAX              41
+#define        XFS_TRANS_CHECKPOINT            42
+#define        XFS_TRANS_TYPE_MAX              42
  /* new transaction types need to be reflected in xfs_logprint(8) */
  
+#define XFS_TRANS_TYPES \
+       { XFS_TRANS_SETATTR_NOT_SIZE,   "SETATTR_NOT_SIZE" }, \
+       { XFS_TRANS_SETATTR_SIZE,       "SETATTR_SIZE" }, \
+       { XFS_TRANS_INACTIVE,           "INACTIVE" }, \
+       { XFS_TRANS_CREATE,             "CREATE" }, \
+       { XFS_TRANS_CREATE_TRUNC,       "CREATE_TRUNC" }, \
+       { XFS_TRANS_TRUNCATE_FILE,      "TRUNCATE_FILE" }, \
+       { XFS_TRANS_REMOVE,             "REMOVE" }, \
+       { XFS_TRANS_LINK,               "LINK" }, \
+       { XFS_TRANS_RENAME,             "RENAME" }, \
+       { XFS_TRANS_MKDIR,              "MKDIR" }, \
+       { XFS_TRANS_RMDIR,              "RMDIR" }, \
+       { XFS_TRANS_SYMLINK,            "SYMLINK" }, \
+       { XFS_TRANS_SET_DMATTRS,        "SET_DMATTRS" }, \
+       { XFS_TRANS_GROWFS,             "GROWFS" }, \
+       { XFS_TRANS_STRAT_WRITE,        "STRAT_WRITE" }, \
+       { XFS_TRANS_DIOSTRAT,           "DIOSTRAT" }, \
+       { XFS_TRANS_WRITEID,            "WRITEID" }, \
+       { XFS_TRANS_ADDAFORK,           "ADDAFORK" }, \
+       { XFS_TRANS_ATTRINVAL,          "ATTRINVAL" }, \
+       { XFS_TRANS_ATRUNCATE,          "ATRUNCATE" }, \
+       { XFS_TRANS_ATTR_SET,           "ATTR_SET" }, \
+       { XFS_TRANS_ATTR_RM,            "ATTR_RM" }, \
+       { XFS_TRANS_ATTR_FLAG,          "ATTR_FLAG" }, \
+       { XFS_TRANS_CLEAR_AGI_BUCKET,   "CLEAR_AGI_BUCKET" }, \
+       { XFS_TRANS_QM_SBCHANGE,        "QM_SBCHANGE" }, \
+       { XFS_TRANS_QM_QUOTAOFF,        "QM_QUOTAOFF" }, \
+       { XFS_TRANS_QM_DQALLOC,         "QM_DQALLOC" }, \
+       { XFS_TRANS_QM_SETQLIM,         "QM_SETQLIM" }, \
+       { XFS_TRANS_QM_DQCLUSTER,       "QM_DQCLUSTER" }, \
+       { XFS_TRANS_QM_QINOCREATE,      "QM_QINOCREATE" }, \
+       { XFS_TRANS_QM_QUOTAOFF_END,    "QM_QOFF_END" }, \
+       { XFS_TRANS_SB_UNIT,            "SB_UNIT" }, \
+       { XFS_TRANS_FSYNC_TS,           "FSYNC_TS" }, \
+       { XFS_TRANS_GROWFSRT_ALLOC,     "GROWFSRT_ALLOC" }, \
+       { XFS_TRANS_GROWFSRT_ZERO,      "GROWFSRT_ZERO" }, \
+       { XFS_TRANS_GROWFSRT_FREE,      "GROWFSRT_FREE" }, \
+       { XFS_TRANS_SWAPEXT,            "SWAPEXT" }, \
+       { XFS_TRANS_SB_COUNT,           "SB_COUNT" }, \
+       { XFS_TRANS_CHECKPOINT,         "CHECKPOINT" }, \
+       { XFS_TRANS_DUMMY1,             "DUMMY1" }, \
+       { XFS_TRANS_DUMMY2,             "DUMMY2" }, \
+       { XLOG_UNMOUNT_REC_TYPE,        "UNMOUNT" }
+
  /*
   * This structure is used to track log items associated with
   * a transaction.  It points to the log item and keeps some
@@ -107,106 +161,14 @@ typedef struct xfs_trans_header {
   * the amount of space needed to log the item it describes
   * once we get to commit processing (see xfs_trans_commit()).
   */
-typedef struct xfs_log_item_desc {
+struct xfs_log_item_desc {
         struct xfs_log_item     *lid_item;
-       ushort          lid_size;
-       unsigned char   lid_flags;
-       unsigned char   lid_index;
-} xfs_log_item_desc_t;
+       ushort                  lid_size;
+       unsigned char           lid_flags;
+       struct list_head        lid_trans;
+};
  
  #define XFS_LID_DIRTY          0x1
-#define XFS_LID_PINNED         0x2
-#define XFS_LID_BUF_STALE      0x8
-
-/*
- * This structure is used to maintain a chunk list of log_item_desc
- * structures. The free field is a bitmask indicating which descriptors
- * in this chunk's array are free.  The unused field is the first value
- * not used since this chunk was allocated.
- */
-#define        XFS_LIC_NUM_SLOTS       15
-typedef struct xfs_log_item_chunk {
-       struct xfs_log_item_chunk       *lic_next;
-       ushort                          lic_free;
-       ushort                          lic_unused;
-       xfs_log_item_desc_t             lic_descs[XFS_LIC_NUM_SLOTS];
-} xfs_log_item_chunk_t;
-
-#define        XFS_LIC_MAX_SLOT        (XFS_LIC_NUM_SLOTS - 1)
-#define        XFS_LIC_FREEMASK        ((1 << XFS_LIC_NUM_SLOTS) - 1)
-
-
-/*
- * Initialize the given chunk.  Set the chunk's free descriptor mask
- * to indicate that all descriptors are free.  The caller gets to set
- * lic_unused to the right value (0 matches all free).  The
- * lic_descs.lid_index values are set up as each desc is allocated.
- */
-static inline void xfs_lic_init(xfs_log_item_chunk_t *cp)
-{
-       cp->lic_free = XFS_LIC_FREEMASK;
-}
-
-static inline void xfs_lic_init_slot(xfs_log_item_chunk_t *cp, int slot)
-{
-       cp->lic_descs[slot].lid_index = (unsigned char)(slot);
-}
-
-static inline int xfs_lic_vacancy(xfs_log_item_chunk_t *cp)
-{
-       return cp->lic_free & XFS_LIC_FREEMASK;
-}
-
-static inline void xfs_lic_all_free(xfs_log_item_chunk_t *cp)
-{
-       cp->lic_free = XFS_LIC_FREEMASK;
-}
-
-static inline int xfs_lic_are_all_free(xfs_log_item_chunk_t *cp)
-{
-       return ((cp->lic_free & XFS_LIC_FREEMASK) == XFS_LIC_FREEMASK);
-}
-
-static inline int xfs_lic_isfree(xfs_log_item_chunk_t *cp, int slot)
-{
-       return (cp->lic_free & (1 << slot));
-}
-
-static inline void xfs_lic_claim(xfs_log_item_chunk_t *cp, int slot)
-{
-       cp->lic_free &= ~(1 << slot);
-}
-
-static inline void xfs_lic_relse(xfs_log_item_chunk_t *cp, int slot)
-{
-       cp->lic_free |= 1 << slot;
-}
-
-static inline xfs_log_item_desc_t *
-xfs_lic_slot(xfs_log_item_chunk_t *cp, int slot)
-{
-       return &(cp->lic_descs[slot]);
-}
-
-static inline int xfs_lic_desc_to_slot(xfs_log_item_desc_t *dp)
-{
-       return (uint)dp->lid_index;
-}
-
-/*
- * Calculate the address of a chunk given a descriptor pointer:
- * dp - dp->lid_index give the address of the start of the lic_descs array.
- * From this we subtract the offset of the lic_descs field in a chunk.
- * All of this yields the address of the chunk, which is
- * cast to a chunk pointer.
- */
-static inline xfs_log_item_chunk_t *
-xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
-{
-       return (xfs_log_item_chunk_t*) \
-               (((xfs_caddr_t)((dp) - (dp)->lid_index)) - \
-               (xfs_caddr_t)(((xfs_log_item_chunk_t*)0)->lic_descs));
-}
  
  #define        XFS_TRANS_MAGIC         0x5452414E      /* 'TRAN' */
  /*
@@ -222,8 +184,6 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
  /*
   * Values for call flags parameter.
   */
-#define        XFS_TRANS_NOSLEEP               0x1
-#define        XFS_TRANS_WAIT                  0x2
  #define        XFS_TRANS_RELEASE_LOG_RES       0x4
  #define        XFS_TRANS_ABORT                 0x8
  
@@ -246,24 +206,6 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
  #define        XFS_TRANS_SB_REXTSLOG           0x00002000
  
  
-/*
- * Various log reservation values.
- * These are based on the size of the file system block
- * because that is what most transactions manipulate.
- * Each adds in an additional 128 bytes per item logged to
- * try to account for the overhead of the transaction mechanism.
- *
- * Note:
- * Most of the reservations underestimate the number of allocation
- * groups into which they could free extents in the xfs_bmap_finish()
- * call.  This is because the number in the worst case is quite high
- * and quite unusual.  In order to fix this we need to change
- * xfs_bmap_finish() to free extents in only a single AG at a time.
- * This will require changes to the EFI code as well, however, so that
- * the EFI for the extents not freed is logged again in each transaction.
- * See bug 261917.
- */
-
  /*
   * Per-extent log reservation for the allocation btree changes
   * involved in freeing or allocating an extent.
@@ -288,429 +230,36 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
         (XFS_DAENTER_BLOCKS(mp, XFS_DATA_FORK) + \
          XFS_DAENTER_BMAPS(mp, XFS_DATA_FORK) + 1)
  
-/*
- * In a write transaction we can allocate a maximum of 2
- * extents.  This gives:
- *    the inode getting the new extents: inode size
- *    the inode\'s bmap btree: max depth * block size
- *    the agfs of the ags from which the extents are allocated: 2 * sector
- *    the superblock free block counter: sector size
- *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
- * And the bmap_finish transaction can free bmap blocks in a join:
- *    the agfs of the ags containing the blocks: 2 * sector size
- *    the agfls of the ags containing the blocks: 2 * sector size
- *    the super block free block counter: sector size
- *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
- */
-#define XFS_CALC_WRITE_LOG_RES(mp) \
-       (MAX( \
-        ((mp)->m_sb.sb_inodesize + \
-         XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) + \
-         (2 * (mp)->m_sb.sb_sectsize) + \
-         (mp)->m_sb.sb_sectsize + \
-         XFS_ALLOCFREE_LOG_RES(mp, 2) + \
-         (128 * (4 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + XFS_ALLOCFREE_LOG_COUNT(mp, 2)))),\
-        ((2 * (mp)->m_sb.sb_sectsize) + \
-         (2 * (mp)->m_sb.sb_sectsize) + \
-         (mp)->m_sb.sb_sectsize + \
-         XFS_ALLOCFREE_LOG_RES(mp, 2) + \
-         (128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))))))
  
  #define        XFS_WRITE_LOG_RES(mp)   ((mp)->m_reservations.tr_write)
-
-/*
- * In truncating a file we free up to two extents at once.  We can modify:
- *    the inode being truncated: inode size
- *    the inode\'s bmap btree: (max depth + 1) * block size
- * And the bmap_finish transaction can free the blocks and bmap blocks:
- *    the agf for each of the ags: 4 * sector size
- *    the agfl for each of the ags: 4 * sector size
- *    the super block to reflect the freed blocks: sector size
- *    worst case split in allocation btrees per extent assuming 4 extents:
- *             4 exts * 2 trees * (2 * max depth - 1) * block size
- *    the inode btree: max depth * blocksize
- *    the allocation btrees: 2 trees * (max depth - 1) * block size
- */
-#define        XFS_CALC_ITRUNCATE_LOG_RES(mp) \
-       (MAX( \
-        ((mp)->m_sb.sb_inodesize + \
-         XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1) + \
-         (128 * (2 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)))), \
-        ((4 * (mp)->m_sb.sb_sectsize) + \
-         (4 * (mp)->m_sb.sb_sectsize) + \
-         (mp)->m_sb.sb_sectsize + \
-         XFS_ALLOCFREE_LOG_RES(mp, 4) + \
-         (128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4))) + \
-         (128 * 5) + \
-         XFS_ALLOCFREE_LOG_RES(mp, 1) + \
-          (128 * (2 + XFS_IALLOC_BLOCKS(mp) + XFS_IN_MAXLEVELS(mp) + \
-           XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
-
  #define        XFS_ITRUNCATE_LOG_RES(mp)   ((mp)->m_reservations.tr_itruncate)
-
-/*
- * In renaming a files we can modify:
- *    the four inodes involved: 4 * inode size
- *    the two directory btrees: 2 * (max depth + v2) * dir block size
- *    the two directory bmap btrees: 2 * max depth * block size
- * And the bmap_finish transaction can free dir and bmap blocks (two sets
- *     of bmap blocks) giving:
- *    the agf for the ags in which the blocks live: 3 * sector size
- *    the agfl for the ags in which the blocks live: 3 * sector size
- *    the superblock for the free block count: sector size
- *    the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size
- */
-#define        XFS_CALC_RENAME_LOG_RES(mp) \
-       (MAX( \
-        ((4 * (mp)->m_sb.sb_inodesize) + \
-         (2 * XFS_DIROP_LOG_RES(mp)) + \
-         (128 * (4 + 2 * XFS_DIROP_LOG_COUNT(mp)))), \
-        ((3 * (mp)->m_sb.sb_sectsize) + \
-         (3 * (mp)->m_sb.sb_sectsize) + \
-         (mp)->m_sb.sb_sectsize + \
-         XFS_ALLOCFREE_LOG_RES(mp, 3) + \
-         (128 * (7 + XFS_ALLOCFREE_LOG_COUNT(mp, 3))))))
-
  #define        XFS_RENAME_LOG_RES(mp)  ((mp)->m_reservations.tr_rename)
-
-/*
- * For creating a link to an inode:
- *    the parent directory inode: inode size
- *    the linked inode: inode size
- *    the directory btree could split: (max depth + v2) * dir block size
- *    the directory bmap btree could join or split: (max depth + v2) * blocksize
- * And the bmap_finish transaction can free some bmap blocks giving:
- *    the agf for the ag in which the blocks live: sector size
- *    the agfl for the ag in which the blocks live: sector size
- *    the superblock for the free block count: sector size
- *    the allocation btrees: 2 trees * (2 * max depth - 1) * block size
- */
-#define        XFS_CALC_LINK_LOG_RES(mp) \
-       (MAX( \
-        ((mp)->m_sb.sb_inodesize + \
-         (mp)->m_sb.sb_inodesize + \
-         XFS_DIROP_LOG_RES(mp) + \
-         (128 * (2 + XFS_DIROP_LOG_COUNT(mp)))), \
-        ((mp)->m_sb.sb_sectsize + \
-         (mp)->m_sb.sb_sectsize + \
-         (mp)->m_sb.sb_sectsize + \
-         XFS_ALLOCFREE_LOG_RES(mp, 1) + \
-         (128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
-
  #define        XFS_LINK_LOG_RES(mp)    ((mp)->m_reservations.tr_link)
-
-/*
- * For removing a directory entry we can modify:
- *    the parent directory inode: inode size
- *    the removed inode: inode size
- *    the directory btree could join: (max depth + v2) * dir block size
- *    the directory bmap btree could join or split: (max depth + v2) * blocksize
- * And the bmap_finish transaction can free the dir and bmap blocks giving:
- *    the agf for the ag in which the blocks live: 2 * sector size
- *    the agfl for the ag in which the blocks live: 2 * sector size
- *    the superblock for the free block count: sector size
- *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
- */
-#define        XFS_CALC_REMOVE_LOG_RES(mp)     \
-       (MAX( \
-        ((mp)->m_sb.sb_inodesize + \
-         (mp)->m_sb.sb_inodesize + \
-         XFS_DIROP_LOG_RES(mp) + \
-         (128 * (2 + XFS_DIROP_LOG_COUNT(mp)))), \
-        ((2 * (mp)->m_sb.sb_sectsize) + \
-         (2 * (mp)->m_sb.sb_sectsize) + \
-         (mp)->m_sb.sb_sectsize + \
-         XFS_ALLOCFREE_LOG_RES(mp, 2) + \
-         (128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))))))
-
  #define        XFS_REMOVE_LOG_RES(mp)  ((mp)->m_reservations.tr_remove)
-
-/*
- * For symlink we can modify:
- *    the parent directory inode: inode size
- *    the new inode: inode size
- *    the inode btree entry: 1 block
- *    the directory btree: (max depth + v2) * dir block size
- *    the directory inode\'s bmap btree: (max depth + v2) * block size
- *    the blocks for the symlink: 1 KB
- * Or in the first xact we allocate some inodes giving:
- *    the agi and agf of the ag getting the new inodes: 2 * sectorsize
- *    the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
- *    the inode btree: max depth * blocksize
- *    the allocation btrees: 2 trees * (2 * max depth - 1) * block size
- */
-#define        XFS_CALC_SYMLINK_LOG_RES(mp)            \
-       (MAX( \
-        ((mp)->m_sb.sb_inodesize + \
-         (mp)->m_sb.sb_inodesize + \
-         XFS_FSB_TO_B(mp, 1) + \
-         XFS_DIROP_LOG_RES(mp) + \
-         1024 + \
-         (128 * (4 + XFS_DIROP_LOG_COUNT(mp)))), \
-        (2 * (mp)->m_sb.sb_sectsize + \
-         XFS_FSB_TO_B((mp), XFS_IALLOC_BLOCKS((mp))) + \
-         XFS_FSB_TO_B((mp), XFS_IN_MAXLEVELS(mp)) + \
-         XFS_ALLOCFREE_LOG_RES(mp, 1) + \
-         (128 * (2 + XFS_IALLOC_BLOCKS(mp) + XFS_IN_MAXLEVELS(mp) + \
-          XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
-
  #define        XFS_SYMLINK_LOG_RES(mp) ((mp)->m_reservations.tr_symlink)
-
-/*
- * For create we can modify:
- *    the parent directory inode: inode size
- *    the new inode: inode size
- *    the inode btree entry: block size
- *    the superblock for the nlink flag: sector size
- *    the directory btree: (max depth + v2) * dir block size
- *    the directory inode\'s bmap btree: (max depth + v2) * block size
- * Or in the first xact we allocate some inodes giving:
- *    the agi and agf of the ag getting the new inodes: 2 * sectorsize
- *    the superblock for the nlink flag: sector size
- *    the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
- *    the inode btree: max depth * blocksize
- *    the allocation btrees: 2 trees * (max depth - 1) * block size
- */
-#define        XFS_CALC_CREATE_LOG_RES(mp)             \
-       (MAX( \
-        ((mp)->m_sb.sb_inodesize + \
-         (mp)->m_sb.sb_inodesize + \
-         (mp)->m_sb.sb_sectsize + \
-         XFS_FSB_TO_B(mp, 1) + \
-         XFS_DIROP_LOG_RES(mp) + \
-         (128 * (3 + XFS_DIROP_LOG_COUNT(mp)))), \
-        (3 * (mp)->m_sb.sb_sectsize + \
-         XFS_FSB_TO_B((mp), XFS_IALLOC_BLOCKS((mp))) + \
-         XFS_FSB_TO_B((mp), XFS_IN_MAXLEVELS(mp)) + \
-         XFS_ALLOCFREE_LOG_RES(mp, 1) + \
-         (128 * (2 + XFS_IALLOC_BLOCKS(mp) + XFS_IN_MAXLEVELS(mp) + \
-          XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
-
  #define        XFS_CREATE_LOG_RES(mp)  ((mp)->m_reservations.tr_create)
-
-/*
- * Making a new directory is the same as creating a new file.
- */
-#define        XFS_CALC_MKDIR_LOG_RES(mp)      XFS_CALC_CREATE_LOG_RES(mp)
-
  #define        XFS_MKDIR_LOG_RES(mp)   ((mp)->m_reservations.tr_mkdir)
-
-/*
- * In freeing an inode we can modify:
- *    the inode being freed: inode size
- *    the super block free inode counter: sector size
- *    the agi hash list and counters: sector size
- *    the inode btree entry: block size
- *    the on disk inode before ours in the agi hash list: inode cluster size
- *    the inode btree: max depth * blocksize
- *    the allocation btrees: 2 trees * (max depth - 1) * block size
- */
-#define        XFS_CALC_IFREE_LOG_RES(mp) \
-       ((mp)->m_sb.sb_inodesize + \
-        (mp)->m_sb.sb_sectsize + \
-        (mp)->m_sb.sb_sectsize + \
-        XFS_FSB_TO_B((mp), 1) + \
-        MAX((__uint16_t)XFS_FSB_TO_B((mp), 1), XFS_INODE_CLUSTER_SIZE(mp)) + \
-        (128 * 5) + \
-         XFS_ALLOCFREE_LOG_RES(mp, 1) + \
-         (128 * (2 + XFS_IALLOC_BLOCKS(mp) + XFS_IN_MAXLEVELS(mp) + \
-          XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
-
-
  #define        XFS_IFREE_LOG_RES(mp)   ((mp)->m_reservations.tr_ifree)
-
-/*
- * When only changing the inode we log the inode and possibly the superblock
- * We also add a bit of slop for the transaction stuff.
- */
-#define        XFS_CALC_ICHANGE_LOG_RES(mp)    ((mp)->m_sb.sb_inodesize + \
-                                        (mp)->m_sb.sb_sectsize + 512)
-
  #define        XFS_ICHANGE_LOG_RES(mp) ((mp)->m_reservations.tr_ichange)
-
-/*
- * Growing the data section of the filesystem.
- *     superblock
- *     agi and agf
- *     allocation btrees
- */
-#define        XFS_CALC_GROWDATA_LOG_RES(mp) \
-       ((mp)->m_sb.sb_sectsize * 3 + \
-        XFS_ALLOCFREE_LOG_RES(mp, 1) + \
-        (128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
-
  #define        XFS_GROWDATA_LOG_RES(mp)    ((mp)->m_reservations.tr_growdata)
-
-/*
- * Growing the rt section of the filesystem.
- * In the first set of transactions (ALLOC) we allocate space to the
- * bitmap or summary files.
- *     superblock: sector size
- *     agf of the ag from which the extent is allocated: sector size
- *     bmap btree for bitmap/summary inode: max depth * blocksize
- *     bitmap/summary inode: inode size
- *     allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize
- */
-#define        XFS_CALC_GROWRTALLOC_LOG_RES(mp) \
-       (2 * (mp)->m_sb.sb_sectsize + \
-        XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) + \
-        (mp)->m_sb.sb_inodesize + \
-        XFS_ALLOCFREE_LOG_RES(mp, 1) + \
-        (128 * \
-         (3 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + \
-          XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
-
  #define        XFS_GROWRTALLOC_LOG_RES(mp)     ((mp)->m_reservations.tr_growrtalloc)
-
-/*
- * Growing the rt section of the filesystem.
- * In the second set of transactions (ZERO) we zero the new metadata blocks.
- *     one bitmap/summary block: blocksize
- */
-#define        XFS_CALC_GROWRTZERO_LOG_RES(mp) \
-       ((mp)->m_sb.sb_blocksize + 128)
-
  #define        XFS_GROWRTZERO_LOG_RES(mp)      ((mp)->m_reservations.tr_growrtzero)
-
-/*
- * Growing the rt section of the filesystem.
- * In the third set of transactions (FREE) we update metadata without
- * allocating any new blocks.
- *     superblock: sector size
- *     bitmap inode: inode size
- *     summary inode: inode size
- *     one bitmap block: blocksize
- *     summary blocks: new summary size
- */
-#define        XFS_CALC_GROWRTFREE_LOG_RES(mp) \
-       ((mp)->m_sb.sb_sectsize + \
-        2 * (mp)->m_sb.sb_inodesize + \
-        (mp)->m_sb.sb_blocksize + \
-        (mp)->m_rsumsize + \
-        (128 * 5))
-
  #define        XFS_GROWRTFREE_LOG_RES(mp)      ((mp)->m_reservations.tr_growrtfree)
-
-/*
- * Logging the inode modification timestamp on a synchronous write.
- *     inode
- */
-#define        XFS_CALC_SWRITE_LOG_RES(mp) \
-       ((mp)->m_sb.sb_inodesize + 128)
-
  #define        XFS_SWRITE_LOG_RES(mp)  ((mp)->m_reservations.tr_swrite)
-
  /*
   * Logging the inode timestamps on an fsync -- same as SWRITE
   * as long as SWRITE logs the entire inode core
   */
  #define XFS_FSYNC_TS_LOG_RES(mp)        ((mp)->m_reservations.tr_swrite)
-
-/*
- * Logging the inode mode bits when writing a setuid/setgid file
- *     inode
- */
-#define        XFS_CALC_WRITEID_LOG_RES(mp) \
-       ((mp)->m_sb.sb_inodesize + 128)
-
  #define        XFS_WRITEID_LOG_RES(mp) ((mp)->m_reservations.tr_swrite)
-
-/*
- * Converting the inode from non-attributed to attributed.
- *     the inode being converted: inode size
- *     agf block and superblock (for block allocation)
- *     the new block (directory sized)
- *     bmap blocks for the new directory block
- *     allocation btrees
- */
-#define        XFS_CALC_ADDAFORK_LOG_RES(mp)   \
-       ((mp)->m_sb.sb_inodesize + \
-        (mp)->m_sb.sb_sectsize * 2 + \
-        (mp)->m_dirblksize + \
-        XFS_FSB_TO_B(mp, (XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1)) + \
-        XFS_ALLOCFREE_LOG_RES(mp, 1) + \
-        (128 * (4 + (XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1) + \
-                XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
-
  #define        XFS_ADDAFORK_LOG_RES(mp)        ((mp)->m_reservations.tr_addafork)
-
-/*
- * Removing the attribute fork of a file
- *    the inode being truncated: inode size
- *    the inode\'s bmap btree: max depth * block size
- * And the bmap_finish transaction can free the blocks and bmap blocks:
- *    the agf for each of the ags: 4 * sector size
- *    the agfl for each of the ags: 4 * sector size
- *    the super block to reflect the freed blocks: sector size
- *    worst case split in allocation btrees per extent assuming 4 extents:
- *             4 exts * 2 trees * (2 * max depth - 1) * block size
- */
-#define        XFS_CALC_ATTRINVAL_LOG_RES(mp)  \
-       (MAX( \
-        ((mp)->m_sb.sb_inodesize + \
-         XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + \
-         (128 * (1 + XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)))), \
-        ((4 * (mp)->m_sb.sb_sectsize) + \
-         (4 * (mp)->m_sb.sb_sectsize) + \
-         (mp)->m_sb.sb_sectsize + \
-         XFS_ALLOCFREE_LOG_RES(mp, 4) + \
-         (128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4))))))
-
  #define        XFS_ATTRINVAL_LOG_RES(mp)       ((mp)->m_reservations.tr_attrinval)
-
-/*
- * Setting an attribute.
- *     the inode getting the attribute
- *     the superblock for allocations
- *     the agfs extents are allocated from
- *     the attribute btree * max depth
- *     the inode allocation btree
- * Since attribute transaction space is dependent on the size of the attribute,
- * the calculation is done partially at mount time and partially at runtime.
- */
-#define        XFS_CALC_ATTRSET_LOG_RES(mp)    \
-       ((mp)->m_sb.sb_inodesize + \
-        (mp)->m_sb.sb_sectsize + \
-         XFS_FSB_TO_B((mp), XFS_DA_NODE_MAXDEPTH) + \
-         (128 * (2 + XFS_DA_NODE_MAXDEPTH)))
-
  #define        XFS_ATTRSET_LOG_RES(mp, ext)    \
         ((mp)->m_reservations.tr_attrset + \
          (ext * (mp)->m_sb.sb_sectsize) + \
          (ext * XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))) + \
          (128 * (ext + (ext * XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)))))
-
-/*
- * Removing an attribute.
- *    the inode: inode size
- *    the attribute btree could join: max depth * block size
- *    the inode bmap btree could join or split: max depth * block size
- * And the bmap_finish transaction can free the attr blocks freed giving:
- *    the agf for the ag in which the blocks live: 2 * sector size
- *    the agfl for the ag in which the blocks live: 2 * sector size
- *    the superblock for the free block count: sector size
- *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
- */
-#define        XFS_CALC_ATTRRM_LOG_RES(mp)     \
-       (MAX( \
-         ((mp)->m_sb.sb_inodesize + \
-         XFS_FSB_TO_B((mp), XFS_DA_NODE_MAXDEPTH) + \
-         XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + \
-         (128 * (1 + XFS_DA_NODE_MAXDEPTH + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)))), \
-        ((2 * (mp)->m_sb.sb_sectsize) + \
-         (2 * (mp)->m_sb.sb_sectsize) + \
-         (mp)->m_sb.sb_sectsize + \
-         XFS_ALLOCFREE_LOG_RES(mp, 2) + \
-         (128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))))))
-
  #define        XFS_ATTRRM_LOG_RES(mp)  ((mp)->m_reservations.tr_attrrm)
-
-/*
- * Clearing a bad agino number in an agi hash bucket.
- */
-#define        XFS_CALC_CLEAR_AGI_BUCKET_LOG_RES(mp) \
-       ((mp)->m_sb.sb_sectsize + 128)
-
  #define        XFS_CLEAR_AGI_BUCKET_LOG_RES(mp)  ((mp)->m_reservations.tr_clearagi)
  
  
@@ -745,8 +294,8 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
  #define        XFS_ALLOC_BTREE_REF     2
  #define        XFS_BMAP_BTREE_REF      2
  #define        XFS_DIR_BTREE_REF       2
+#define        XFS_INO_REF             2
  #define        XFS_ATTR_BTREE_REF      1
-#define        XFS_INO_REF             1
  #define        XFS_DQUOT_REF           1
  
  #ifdef __KERNEL__
@@ -762,12 +311,14 @@ struct xfs_log_item_desc;
  struct xfs_mount;
  struct xfs_trans;
  struct xfs_dquot_acct;
+struct xfs_busy_extent;
  
  typedef struct xfs_log_item {
         struct list_head                li_ail;         /* AIL pointers */
         xfs_lsn_t                       li_lsn;         /* last on-disk lsn */
         struct xfs_log_item_desc        *li_desc;       /* ptr to current desc*/
         struct xfs_mount                *li_mountp;     /* ptr to fs mount */
+       struct xfs_ail                  *li_ailp;       /* ptr to AIL */
         uint                            li_type;        /* item type */
         uint                            li_flags;       /* misc flags */
         struct xfs_log_item             *li_bio_list;   /* buffer item list */
@@ -776,17 +327,25 @@ typedef struct xfs_log_item {
                                                         /* buffer item iodone */
                                                         /* callback func */
         struct xfs_item_ops             *li_ops;        /* function list */
+
+       /* delayed logging */
+       struct list_head                li_cil;         /* CIL pointers */
+       struct xfs_log_vec              *li_lv;         /* active log vector */
+       xfs_lsn_t                       li_seq;         /* CIL commit seq */
  } xfs_log_item_t;
  
  #define        XFS_LI_IN_AIL   0x1
  #define XFS_LI_ABORTED 0x2
  
+#define XFS_LI_FLAGS \
+       { XFS_LI_IN_AIL,        "IN_AIL" }, \
+       { XFS_LI_ABORTED,       "ABORTED" }
+
  typedef struct xfs_item_ops {
         uint (*iop_size)(xfs_log_item_t *);
         void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *);
         void (*iop_pin)(xfs_log_item_t *);
-       void (*iop_unpin)(xfs_log_item_t *, int);
-       void (*iop_unpin_remove)(xfs_log_item_t *, struct xfs_trans *);
+       void (*iop_unpin)(xfs_log_item_t *, int remove);
         uint (*iop_trylock)(xfs_log_item_t *);
         void (*iop_unlock)(xfs_log_item_t *);
         xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t);
@@ -798,8 +357,7 @@ typedef struct xfs_item_ops {
  #define IOP_SIZE(ip)           (*(ip)->li_ops->iop_size)(ip)
  #define IOP_FORMAT(ip,vp)      (*(ip)->li_ops->iop_format)(ip, vp)
  #define IOP_PIN(ip)            (*(ip)->li_ops->iop_pin)(ip)
-#define IOP_UNPIN(ip, flags)   (*(ip)->li_ops->iop_unpin)(ip, flags)
-#define IOP_UNPIN_REMOVE(ip,tp) (*(ip)->li_ops->iop_unpin_remove)(ip, tp)
+#define IOP_UNPIN(ip, remove)  (*(ip)->li_ops->iop_unpin)(ip, remove)
  #define IOP_TRYLOCK(ip)                (*(ip)->li_ops->iop_trylock)(ip)
  #define IOP_UNLOCK(ip)         (*(ip)->li_ops->iop_unlock)(ip)
  #define IOP_COMMITTED(ip, lsn) (*(ip)->li_ops->iop_committed)(ip, lsn)
@@ -813,36 +371,7 @@ typedef struct xfs_item_ops {
  #define        XFS_ITEM_SUCCESS        0
  #define        XFS_ITEM_PINNED         1
  #define        XFS_ITEM_LOCKED         2
-#define        XFS_ITEM_FLUSHING       3
-#define XFS_ITEM_PUSHBUF       4
-
-/*
- * This structure is used to maintain a list of block ranges that have been
- * freed in the transaction.  The ranges are listed in the perag[] busy list
- * between when they're freed and the transaction is committed to disk.
- */
-
-typedef struct xfs_log_busy_slot {
-       xfs_agnumber_t          lbc_ag;
-       ushort                  lbc_idx;        /* index in perag.busy[] */
-} xfs_log_busy_slot_t;
-
-#define XFS_LBC_NUM_SLOTS      31
-typedef struct xfs_log_busy_chunk {
-       struct xfs_log_busy_chunk       *lbc_next;
-       uint                            lbc_free;       /* free slots bitmask */
-       ushort                          lbc_unused;     /* first unused */
-       xfs_log_busy_slot_t             lbc_busy[XFS_LBC_NUM_SLOTS];
-} xfs_log_busy_chunk_t;
-
-#define        XFS_LBC_MAX_SLOT        (XFS_LBC_NUM_SLOTS - 1)
-#define        XFS_LBC_FREEMASK        ((1U << XFS_LBC_NUM_SLOTS) - 1)
-
-#define        XFS_LBC_INIT(cp)        ((cp)->lbc_free = XFS_LBC_FREEMASK)
-#define        XFS_LBC_CLAIM(cp, slot) ((cp)->lbc_free &= ~(1 << (slot)))
-#define        XFS_LBC_SLOT(cp, slot)  (&((cp)->lbc_busy[(slot)]))
-#define        XFS_LBC_VACANCY(cp)     (((cp)->lbc_free) & XFS_LBC_FREEMASK)
-#define        XFS_LBC_ISFREE(cp, slot) ((cp)->lbc_free & (1 << (slot)))
+#define XFS_ITEM_PUSHBUF       3
  
  /*
   * This is the type of function which can be given to xfs_trans_callback()
@@ -863,15 +392,13 @@ typedef struct xfs_trans {
         unsigned int            t_blk_res_used; /* # of resvd blocks used */
         unsigned int            t_rtx_res;      /* # of rt extents resvd */
         unsigned int            t_rtx_res_used; /* # of resvd rt extents used */
-       xfs_log_ticket_t        t_ticket;       /* log mgr ticket */
+       struct xlog_ticket      *t_ticket;      /* log mgr ticket */
         xfs_lsn_t               t_lsn;          /* log seq num of start of
                                                  * transaction. */
         xfs_lsn_t               t_commit_lsn;   /* log seq num of end of
                                                  * transaction. */
         struct xfs_mount        *t_mountp;      /* ptr to fs mount struct */
         struct xfs_dquot_acct   *t_dqinfo;      /* acctg info for dquots */
-       xfs_trans_callback_t    t_callback;     /* transaction callback */
-       void                    *t_callarg;     /* callback arg */
         unsigned int            t_flags;        /* misc flags */
         int64_t                 t_icount_delta; /* superblock icount change */
         int64_t                 t_ifree_delta;  /* superblock ifree change */
@@ -892,11 +419,9 @@ typedef struct xfs_trans {
         int64_t                 t_rblocks_delta;/* superblock rblocks change */
         int64_t                 t_rextents_delta;/* superblocks rextents chg */
         int64_t                 t_rextslog_delta;/* superblocks rextslog chg */
-       unsigned int            t_items_free;   /* log item descs free */
-       xfs_log_item_chunk_t    t_items;        /* first log item desc chunk */
+       struct list_head        t_items;        /* log item descriptors */
         xfs_trans_header_t      t_header;       /* header for in-log trans */
-       unsigned int            t_busy_free;    /* busy descs free */
-       xfs_log_busy_chunk_t    t_busy;         /* busy/async free blocks */
+       struct list_head        t_busy;         /* list of busy extents */
         unsigned long           t_pflags;       /* saved process flags state */
  } xfs_trans_t;
  
@@ -923,7 +448,7 @@ typedef struct xfs_trans {
   * XFS transaction mechanism exported interfaces.
   */
  xfs_trans_t    *xfs_trans_alloc(struct xfs_mount *, uint);
-xfs_trans_t    *_xfs_trans_alloc(struct xfs_mount *, uint);
+xfs_trans_t    *_xfs_trans_alloc(struct xfs_mount *, uint, uint);
  xfs_trans_t    *xfs_trans_dup(xfs_trans_t *);
  int            xfs_trans_reserve(xfs_trans_t *, uint, uint, uint,
                                   uint, uint);
@@ -946,8 +471,9 @@ void                xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint);
  void           xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *);
  int            xfs_trans_iget(struct xfs_mount *, xfs_trans_t *,
                                xfs_ino_t , uint, uint, struct xfs_inode **);
-void           xfs_trans_ijoin(xfs_trans_t *, struct xfs_inode *, uint);
-void           xfs_trans_ihold(xfs_trans_t *, struct xfs_inode *);
+void           xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int);
+void           xfs_trans_ijoin_ref(struct xfs_trans *, struct xfs_inode *, uint);
+void           xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *);
  void           xfs_trans_log_buf(xfs_trans_t *, struct xfs_buf *, uint, uint);
  void           xfs_trans_log_inode(xfs_trans_t *, struct xfs_inode *, uint);
  struct xfs_efi_log_item        *xfs_trans_get_efi(xfs_trans_t *, uint);
@@ -970,15 +496,9 @@ int                _xfs_trans_commit(xfs_trans_t *,
  void           xfs_trans_cancel(xfs_trans_t *, int);
  int            xfs_trans_ail_init(struct xfs_mount *);
  void           xfs_trans_ail_destroy(struct xfs_mount *);
-void           xfs_trans_push_ail(struct xfs_mount *, xfs_lsn_t);
-xfs_lsn_t      xfs_trans_tail_ail(struct xfs_mount *);
-void           xfs_trans_unlocked_item(struct xfs_mount *,
-                                       xfs_log_item_t *);
-xfs_log_busy_slot_t *xfs_trans_add_busy(xfs_trans_t *tp,
-                                       xfs_agnumber_t ag,
-                                       xfs_extlen_t idx);
  
  extern kmem_zone_t     *xfs_trans_zone;
+extern kmem_zone_t     *xfs_log_item_desc_zone;
  
  #endif /* __KERNEL__ */
  
diff --git a/include/xfs_types.h b/include/xfs_types.h

index 228b94886821ad6779138c2fa2d18ea0dd59416f..901d4acc3a218744301a28d9003510eef3842f8c 100644 (file)
--- a/include/xfs_types.h
+++ b/include/xfs_types.h
@@ -81,6 +81,8 @@ typedef       __int32_t       xfs_tid_t;      /* transaction identifier */
  typedef        __uint32_t      xfs_dablk_t;    /* dir/attr block number (in file) */
  typedef        __uint32_t      xfs_dahash_t;   /* dir/attr hash value */
  
+typedef __uint32_t     xlog_tid_t;     /* transaction ID type */
+
  /*
   * These types are 64 bits on disk but are either 32 or 64 bits in memory.
   * Disk based types:
diff --git a/libxfs/logitem.c b/libxfs/logitem.c

index d6ef10beafa08b81e7d458b8b08a2f092dca40d0..9d32f5b6e79a0141ce330d47b97fe85d9288edc0 100644 (file)
--- a/libxfs/logitem.c
+++ b/libxfs/logitem.c
@@ -21,227 +21,13 @@
  kmem_zone_t    *xfs_buf_item_zone;
  kmem_zone_t    *xfs_ili_zone;          /* inode log item zone */
  
-/*
- * Following functions from fs/xfs/xfs_trans_item.c
- */
-
-/*
- * This is called to add the given log item to the transaction's
- * list of log items.  It must find a free log item descriptor
- * or allocate a new one and add the item to that descriptor.
- * The function returns a pointer to item descriptor used to point
- * to the new item.  The log item will now point to its new descriptor
- * with its li_desc field.
- */
-xfs_log_item_desc_t *
-xfs_trans_add_item(
-       xfs_trans_t             *tp,
-       xfs_log_item_t          *lip)
-{
-       xfs_log_item_desc_t     *lidp;
-       xfs_log_item_chunk_t    *licp;
-       int                     i = 0;
-
-       /*
-        * If there are no free descriptors, allocate a new chunk
-        * of them and put it at the front of the chunk list.
-        */
-       if (tp->t_items_free == 0) {
-               licp = (xfs_log_item_chunk_t*)
-                      kmem_alloc(sizeof(xfs_log_item_chunk_t), KM_SLEEP);
-               ASSERT(licp != NULL);
-               /*
-                * Initialize the chunk, and then
-                * claim the first slot in the newly allocated chunk.
-                */
-               xfs_lic_init(licp);
-               xfs_lic_claim(licp, 0);
-               licp->lic_unused = 1;
-               xfs_lic_init_slot(licp, 0);
-               lidp = xfs_lic_slot(licp, 0);
-
-               /*
-                * Link in the new chunk and update the free count.
-                */
-               licp->lic_next = tp->t_items.lic_next;
-               tp->t_items.lic_next = licp;
-               tp->t_items_free = XFS_LIC_NUM_SLOTS - 1;
-
-               /*
-                * Initialize the descriptor and the generic portion
-                * of the log item.
-                *
-                * Point the new slot at this item and return it.
-                * Also point the log item at its currently active
-                * descriptor and set the item's mount pointer.
-                */
-               lidp->lid_item = lip;
-               lidp->lid_flags = 0;
-               lidp->lid_size = 0;
-               lip->li_desc = lidp;
-               lip->li_mountp = tp->t_mountp;
-               return lidp;
-       }
-
-       /*
-        * Find the free descriptor. It is somewhere in the chunklist
-        * of descriptors.
-        */
-       licp = &tp->t_items;
-       while (licp != NULL) {
-               if (xfs_lic_vacancy(licp)) {
-                       if (licp->lic_unused <= XFS_LIC_MAX_SLOT) {
-                               i = licp->lic_unused;
-                               ASSERT(xfs_lic_isfree(licp, i));
-                               break;
-                       }
-                       for (i = 0; i <= XFS_LIC_MAX_SLOT; i++) {
-                               if (xfs_lic_isfree(licp, i))
-                                       break;
-                       }
-                       ASSERT(i <= XFS_LIC_MAX_SLOT);
-                       break;
-               }
-               licp = licp->lic_next;
-       }
-       ASSERT(licp != NULL);
-       /*
-        * If we find a free descriptor, claim it,
-        * initialize it, and return it.
-        */
-       xfs_lic_claim(licp, i);
-       if (licp->lic_unused <= i) {
-               licp->lic_unused = i + 1;
-               xfs_lic_init_slot(licp, i);
-       }
-       lidp = xfs_lic_slot(licp, i);
-       tp->t_items_free--;
-       lidp->lid_item = lip;
-       lidp->lid_flags = 0;
-       lidp->lid_size = 0;
-       lip->li_desc = lidp;
-       lip->li_mountp = tp->t_mountp;
-       return lidp;
-}
-
-/*
- * Free the given descriptor.
- *
- * This requires setting the bit in the chunk's free mask corresponding
- * to the given slot.
- */
-void
-xfs_trans_free_item(
-       xfs_trans_t             *tp,
-       xfs_log_item_desc_t     *lidp)
-{
-       uint                    slot;
-       xfs_log_item_chunk_t    *licp;
-       xfs_log_item_chunk_t    **licpp;
-
-       slot = xfs_lic_desc_to_slot(lidp);
-       licp = xfs_lic_desc_to_chunk(lidp);
-       xfs_lic_relse(licp, slot);
-       lidp->lid_item->li_desc = NULL;
-       tp->t_items_free++;
-
-       /*
-        * If there are no more used items in the chunk and this is not
-        * the chunk embedded in the transaction structure, then free
-        * the chunk. First pull it from the chunk list and then
-        * free it back to the heap.  We didn't bother with a doubly
-        * linked list here because the lists should be very short
-        * and this is not a performance path.  It's better to save
-        * the memory of the extra pointer.
-        *
-        * Also decrement the transaction structure's count of free items
-        * by the number in a chunk since we are freeing an empty chunk.
-        */
-       if (xfs_lic_are_all_free(licp) && (licp != &(tp->t_items))) {
-               licpp = &(tp->t_items.lic_next);
-               while (*licpp != licp) {
-                       ASSERT(*licpp != NULL);
-                       licpp = &((*licpp)->lic_next);
-               }
-               *licpp = licp->lic_next;
-               kmem_free(licp);
-               tp->t_items_free -= XFS_LIC_NUM_SLOTS;
-       }
-}
-
-/*
- * This is called to find the descriptor corresponding to the given
- * log item.  It returns a pointer to the descriptor.
- * The log item MUST have a corresponding descriptor in the given
- * transaction.         This routine does not return NULL, it panics.
- *
- * The descriptor pointer is kept in the log item's li_desc field.
- * Just return it.
- */
-xfs_log_item_desc_t *
-xfs_trans_find_item(
-       xfs_trans_t             *tp,
-       xfs_log_item_t          *lip)
-{
-       ASSERT(lip->li_desc != NULL);
-
-       return lip->li_desc;
-}
-
-/*
- * This is called to unlock all of the items of a transaction and to free
- * all the descriptors of that transaction.
- *
- * It walks the list of descriptors and unlocks each item.  It frees
- * each chunk except that embedded in the transaction as it goes along.
- */
-void
-xfs_trans_free_items(
-       xfs_trans_t             *tp,
-       int                     flags)
-{
-       xfs_log_item_chunk_t    *licp;
-       xfs_log_item_chunk_t    *next_licp;
-       int                     abort;
-
-       abort = flags & XFS_TRANS_ABORT;
-       licp = &tp->t_items;
-       /*
-        * Special case the embedded chunk so we don't free it below.
-        */
-       if (!xfs_lic_are_all_free(licp)) {
-               (void) xfs_trans_unlock_chunk(licp, 1, abort, NULLCOMMITLSN);
-               xfs_lic_all_free(licp);
-               licp->lic_unused = 0;
-       }
-       licp = licp->lic_next;
-
-       /*
-        * Unlock each item in each chunk and free the chunks.
-        */
-       while (licp != NULL) {
-               ASSERT(!xfs_lic_are_all_free(licp));
-               (void) xfs_trans_unlock_chunk(licp, 1, abort, NULLCOMMITLSN);
-               next_licp = licp->lic_next;
-               kmem_free(licp);
-               licp = next_licp;
-       }
-
-       /*
-        * Reset the transaction structure's free item count.
-        */
-       tp->t_items_free = XFS_LIC_NUM_SLOTS;
-       tp->t_items.lic_next = NULL;
-}
-
  /*
   * Following functions from fs/xfs/xfs_trans_buf.c
   */
  
  /*
   * Check to see if a buffer matching the given parameters is already
- * a part of the given transaction.  Only check the first, embedded
- * chunk, since we don't want to spend all day scanning large transactions.
+ * a part of the given transaction.
   */
  xfs_buf_t *
  xfs_trans_buf_item_match(
@@ -250,149 +36,21 @@ xfs_trans_buf_item_match(
         xfs_daddr_t             blkno,
         int                     len)
  {
-       xfs_log_item_chunk_t    *licp;
-       xfs_log_item_desc_t     *lidp;
-       xfs_buf_log_item_t      *blip;
-       xfs_buf_t               *bp;
-       int                     i;
-
-#ifdef LI_DEBUG
-       fprintf(stderr, "buf_item_match (fast) log items for xact %p\n", tp);
-#endif
-
-       bp = NULL;
-       len = BBTOB(len);
-       licp = &tp->t_items;
-       if (!xfs_lic_are_all_free(licp)) {
-               for (i = 0; i < licp->lic_unused; i++) {
-                       /*
-                        * Skip unoccupied slots.
-                        */
-                       if (xfs_lic_isfree(licp, i)) {
-                               continue;
-                       }
-
-                       lidp = xfs_lic_slot(licp, i);
-                       blip = (xfs_buf_log_item_t *)lidp->lid_item;
-#ifdef LI_DEBUG
-                       fprintf(stderr,
-                               "\tfound log item, xact %p, blip=%p (%d/%d)\n",
-                               tp, blip, i, licp->lic_unused);
-#endif
-                       if (blip->bli_item.li_type != XFS_LI_BUF) {
-                               continue;
-                       }
-
-                       bp = blip->bli_buf;
-#ifdef LI_DEBUG
-                       fprintf(stderr,
-                       "\tfound buf %p log item, xact %p, blip=%p (%d)\n",
-                               bp, tp, blip, i);
-#endif
-                       if ((XFS_BUF_TARGET(bp) == target->dev) &&
-                           (XFS_BUF_ADDR(bp) == blkno) &&
-                           (XFS_BUF_COUNT(bp) == len)) {
-                               /*
-                                * We found it.  Break out and
-                                * return the pointer to the buffer.
-                                */
-#ifdef LI_DEBUG
-                               fprintf(stderr,
-                                       "\tfound REAL buf log item, bp=%p\n",
-                                       bp);
-#endif
-                               break;
-                       } else {
-                               bp = NULL;
-                       }
-               }
-       }
-#ifdef LI_DEBUG
-       if (!bp) fprintf(stderr, "\tfast search - got nothing\n");
-#endif
-       return bp;
-}
-
-/*
- * Check to see if a buffer matching the given parameters is already
- * a part of the given transaction.  Check all the chunks, we
- * want to be thorough.
- */
-xfs_buf_t *
-xfs_trans_buf_item_match_all(
-       xfs_trans_t             *tp,
-       xfs_buftarg_t           *target,
-       xfs_daddr_t             blkno,
-       int                     len)
-{
-       xfs_log_item_chunk_t    *licp;
-       xfs_log_item_desc_t     *lidp;
-       xfs_buf_log_item_t      *blip;
-       xfs_buf_t               *bp;
-       int                     i;
-
-#ifdef LI_DEBUG
-       fprintf(stderr, "buf_item_match_all (slow) log items for xact %p\n",
-               tp);
-#endif
-
-       bp = NULL;
-       len = BBTOB(len);
-       for (licp = &tp->t_items; licp != NULL; licp = licp->lic_next) {
-               if (xfs_lic_are_all_free(licp)) {
-                       ASSERT(licp == &tp->t_items);
-                       ASSERT(licp->lic_next == NULL);
-                       return NULL;
-               }
-               for (i = 0; i < licp->lic_unused; i++) {
-                       /*
-                        * Skip unoccupied slots.
-                        */
-                       if (xfs_lic_isfree(licp, i)) {
-                               continue;
-                       }
-
-                       lidp = xfs_lic_slot(licp, i);
-                       blip = (xfs_buf_log_item_t *)lidp->lid_item;
-#ifdef LI_DEBUG
-                       fprintf(stderr,
-                               "\tfound log item, xact %p, blip=%p (%d/%d)\n",
-                               tp, blip, i, licp->lic_unused);
-#endif
-                       if (blip->bli_item.li_type != XFS_LI_BUF) {
-                               continue;
-                       }
-
-                       bp = blip->bli_buf;
-                       ASSERT(bp);
-                       ASSERT(XFS_BUF_ADDR(bp));
-#ifdef LI_DEBUG
-                       fprintf(stderr,
-                       "\tfound buf %p log item, xact %p, blip=%p (%d)\n",
-                               bp, tp, blip, i);
-#endif
-                       if ((XFS_BUF_TARGET(bp) == target->dev) &&
-                           (XFS_BUF_ADDR(bp) == blkno) &&
-                           (XFS_BUF_COUNT(bp) == len)) {
-                               /*
-                                * We found it.  Break out and
-                                * return the pointer to the buffer.
-                                */
-#ifdef LI_DEBUG
-                               fprintf(stderr,
-                                       "\tfound REAL buf log item, bp=%p\n",
-                                       bp);
-#endif
-                               return bp;
-                       }
-               }
-       }
-#ifdef LI_DEBUG
-       if (!bp) fprintf(stderr, "slow search - got nothing\n");
-#endif
-       return NULL;
+        struct xfs_log_item_desc *lidp;
+        struct xfs_buf_log_item *blip;
+
+        len = BBTOB(len);
+        list_for_each_entry(lidp, &tp->t_items, lid_trans) {
+                blip = (struct xfs_buf_log_item *)lidp->lid_item;
+                if (blip->bli_item.li_type == XFS_LI_BUF &&
+                    XFS_BUF_TARGET(blip->bli_buf) == target->dev &&
+                    XFS_BUF_ADDR(blip->bli_buf) == blkno &&
+                    XFS_BUF_COUNT(blip->bli_buf) == len)
+                        return blip->bli_buf;
+        }
+
+        return NULL;
  }
-
  /*
   * The following are from fs/xfs/xfs_buf_item.c
   */
diff --git a/libxfs/trans.c b/libxfs/trans.c

index 3ed2795f459637cf9215db397e8c127a68bfcdbe..e1a96b1d6686088623ea9d7ec85de0e3ca9a60d1 100644 (file)
--- a/libxfs/trans.c
+++ b/libxfs/trans.c
@@ -36,8 +36,7 @@ libxfs_trans_alloc(
         }
         ptr->t_mountp = mp;
         ptr->t_type = type;
-       ptr->t_items_free = XFS_LIC_NUM_SLOTS;
-       xfs_lic_init(&ptr->t_items);
+       INIT_LIST_HEAD(&ptr->t_items);
  #ifdef XACT_DEBUG
         fprintf(stderr, "allocated new transaction %p\n", ptr);
  #endif
@@ -139,7 +138,6 @@ libxfs_trans_iput(
         uint                    lock_flags)
  {
         xfs_inode_log_item_t    *iip;
-       xfs_log_item_desc_t     *lidp;
  
         if (tp == NULL) {
                 libxfs_iput(ip, lock_flags);
@@ -149,12 +147,7 @@ libxfs_trans_iput(
         ASSERT(ip->i_transp == tp);
         iip = ip->i_itemp;
         ASSERT(iip != NULL);
-
-       lidp = xfs_trans_find_item(tp, (xfs_log_item_t *)iip);
-       ASSERT(lidp != NULL);
-       ASSERT(lidp->lid_item == (xfs_log_item_t *)iip);
-       ASSERT(!(lidp->lid_flags & XFS_LID_DIRTY));
-       xfs_trans_free_item(tp, lidp);
+       xfs_trans_del_item(&iip->ili_item);
  
         libxfs_iput(ip, lock_flags);
  }
@@ -182,6 +175,23 @@ libxfs_trans_ijoin(
  #endif
  }
  
+void
+libxfs_trans_ijoin_ref(
+       xfs_trans_t             *tp,
+       xfs_inode_t             *ip,
+       int                     lock_flags)
+{
+       ASSERT(ip->i_transp == tp);
+       ASSERT(ip->i_itemp != NULL);
+
+       xfs_trans_ijoin(tp, ip, lock_flags);
+       ip->i_itemp->ili_lock_flags = lock_flags;
+
+#ifdef XACT_DEBUG
+       fprintf(stderr, "ijoin_ref'd inode %llu, transaction %p\n", ip->i_ino, tp);
+#endif
+}
+
  void
  libxfs_trans_ihold(
         xfs_trans_t             *tp,
@@ -190,7 +200,8 @@ libxfs_trans_ihold(
         ASSERT(ip->i_transp == tp);
         ASSERT(ip->i_itemp != NULL);
  
-       ip->i_itemp->ili_flags |= XFS_ILI_HOLD;
+       ip->i_itemp->ili_lock_flags = 1;
+
  #ifdef XACT_DEBUG
         fprintf(stderr, "ihold'd inode %llu, transaction %p\n", ip->i_ino, tp);
  #endif
@@ -224,19 +235,14 @@ xfs_trans_log_inode(
         xfs_inode_t             *ip,
         uint                    flags)
  {
-       xfs_log_item_desc_t     *lidp;
-
         ASSERT(ip->i_transp == tp);
         ASSERT(ip->i_itemp != NULL);
  #ifdef XACT_DEBUG
         fprintf(stderr, "dirtied inode %llu, transaction %p\n", ip->i_ino, tp);
  #endif
  
-       lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)(ip->i_itemp));
-       ASSERT(lidp != NULL);
-
         tp->t_flags |= XFS_TRANS_DIRTY;
-       lidp->lid_flags |= XFS_LID_DIRTY;
+       ip->i_itemp->ili_item.li_desc->lid_flags |= XFS_LID_DIRTY;
  
         /*
          * Always OR in the bits from the ili_last_fields field.
@@ -266,7 +272,6 @@ libxfs_trans_log_buf(
         uint                    last)
  {
         xfs_buf_log_item_t      *bip;
-       xfs_log_item_desc_t     *lidp;
  
         ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
         ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
@@ -277,11 +282,8 @@ libxfs_trans_log_buf(
  
         bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
  
-       lidp = xfs_trans_find_item(tp, (xfs_log_item_t *)bip);
-       ASSERT(lidp != NULL);
-
         tp->t_flags |= XFS_TRANS_DIRTY;
-       lidp->lid_flags |= XFS_LID_DIRTY;
+       bip->bli_item.li_desc->lid_flags |= XFS_LID_DIRTY;
         xfs_buf_item_log(bip, first, last);
  }
  
@@ -291,7 +293,6 @@ libxfs_trans_brelse(
         xfs_buf_t               *bp)
  {
         xfs_buf_log_item_t      *bip;
-       xfs_log_item_desc_t     *lidp;
  #ifdef XACT_DEBUG
         fprintf(stderr, "released buffer %p, transaction %p\n", bp, tp);
  #endif
@@ -304,8 +305,6 @@ libxfs_trans_brelse(
         ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
         bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
         ASSERT(bip->bli_item.li_type == XFS_LI_BUF);
-       lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip);
-       ASSERT(lidp != NULL);
         if (bip->bli_recur > 0) {
                 bip->bli_recur--;
                 return;
@@ -313,9 +312,9 @@ libxfs_trans_brelse(
         /* If dirty/stale, can't release till transaction committed */
         if (bip->bli_flags & XFS_BLI_STALE)
                 return;
-       if (lidp->lid_flags & XFS_LID_DIRTY)
+       if (bip->bli_item.li_desc->lid_flags & XFS_LID_DIRTY)
                 return;
-       xfs_trans_free_item(tp, lidp);
+       xfs_trans_del_item(&bip->bli_item);
         if (bip->bli_flags & XFS_BLI_HOLD)
                 bip->bli_flags &= ~XFS_BLI_HOLD;
         XFS_BUF_SET_FSPRIVATE2(bp, NULL);
@@ -327,7 +326,6 @@ libxfs_trans_binval(
         xfs_trans_t             *tp,
         xfs_buf_t               *bp)
  {
-       xfs_log_item_desc_t     *lidp;
         xfs_buf_log_item_t      *bip;
  #ifdef XACT_DEBUG
         fprintf(stderr, "binval'd buffer %p, transaction %p\n", bp, tp);
@@ -337,17 +335,15 @@ libxfs_trans_binval(
         ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
  
         bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
-       lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip);
-       ASSERT(lidp != NULL);
         if (bip->bli_flags & XFS_BLI_STALE)
                 return;
         XFS_BUF_UNDELAYWRITE(bp);
         XFS_BUF_STALE(bp);
         bip->bli_flags |= XFS_BLI_STALE;
         bip->bli_flags &= ~XFS_BLI_DIRTY;
-       bip->bli_format.blf_flags &= ~XFS_BLI_INODE_BUF;
-       bip->bli_format.blf_flags |= XFS_BLI_CANCEL;
-       lidp->lid_flags |= XFS_LID_DIRTY;
+       bip->bli_format.blf_flags &= ~XFS_BLF_INODE_BUF;
+       bip->bli_format.blf_flags |= XFS_BLF_CANCEL;
+       bip->bli_item.li_desc->lid_flags |= XFS_LID_DIRTY;
         tp->t_flags |= XFS_TRANS_DIRTY;
  }
  
@@ -402,10 +398,7 @@ libxfs_trans_get_buf(
                 return libxfs_getbuf(dev, d, len);
  
         bdev.dev = dev;
-       if (tp->t_items.lic_next == NULL)
-               bp = xfs_trans_buf_item_match(tp, &bdev, d, len);
-       else
-               bp = xfs_trans_buf_item_match_all(tp, &bdev, d, len);
+       bp = xfs_trans_buf_item_match(tp, &bdev, d, len);
         if (bp != NULL) {
                 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
                 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
@@ -447,10 +440,7 @@ libxfs_trans_getsb(
  
         bdev.dev = mp->m_dev;
         len = XFS_FSS_TO_BB(mp, 1);
-       if (tp->t_items.lic_next == NULL)
-               bp = xfs_trans_buf_item_match(tp, &bdev, XFS_SB_DADDR, len);
-       else
-               bp = xfs_trans_buf_item_match_all(tp, &bdev, XFS_SB_DADDR, len);
+       bp = xfs_trans_buf_item_match(tp, &bdev, XFS_SB_DADDR, len);
         if (bp != NULL) {
                 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
                 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
@@ -494,10 +484,7 @@ libxfs_trans_read_buf(
         }
  
         bdev.dev = dev;
-       if (tp->t_items.lic_next == NULL)
-               bp = xfs_trans_buf_item_match(tp, &bdev, blkno, len);
-       else
-               bp = xfs_trans_buf_item_match_all(tp, &bdev, blkno, len);
+       bp = xfs_trans_buf_item_match(tp, &bdev, blkno, len);
         if (bp != NULL) {
                 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
                 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
@@ -578,13 +565,11 @@ inode_item_done(
         xfs_inode_t             *ip;
         xfs_mount_t             *mp;
         xfs_buf_t               *bp;
-       int                     hold;
         int                     error;
         extern kmem_zone_t      *xfs_ili_zone;
  
         ip = iip->ili_inode;
         mp = iip->ili_item.li_mountp;
-       hold = iip->ili_flags & XFS_ILI_HOLD;
         ASSERT(ip != NULL);
  
         if (!(iip->ili_format.ilf_fields & XFS_ILOG_ALL)) {
@@ -617,11 +602,11 @@ inode_item_done(
         libxfs_writebuf(bp, 0);
  #ifdef XACT_DEBUG
         fprintf(stderr, "flushing dirty inode %llu, buffer %p (hold=%u)\n",
-                       ip->i_ino, bp, hold);
+                       ip->i_ino, bp, iip->ili_lock_flags);
  #endif
  ili_done:
-       if (hold) {
-               iip->ili_flags &= ~XFS_ILI_HOLD;
+       if (iip->ili_lock_flags) {
+               iip->ili_lock_flags = 0;
                 return;
         } else {
                 libxfs_iput(ip, 0);
@@ -663,63 +648,26 @@ buf_item_done(
         kmem_zone_free(xfs_buf_item_zone, bip);
  }
  
-/*
- * This is called to perform the commit processing for each
- * item described by the given chunk.
- */
  static void
-trans_chunk_committed(
-       xfs_log_item_chunk_t    *licp)
+trans_committed(
+       xfs_trans_t             *tp)
  {
-       xfs_log_item_desc_t     *lidp;
-       xfs_log_item_t          *lip;
-       int                     i;
-
-       lidp = licp->lic_descs;
-       for (i = 0; i < licp->lic_unused; i++, lidp++) {
-               if (xfs_lic_isfree(licp, i))
-                       continue;
-               lip = lidp->lid_item;
+        struct xfs_log_item_desc *lidp, *next;
+
+        list_for_each_entry_safe(lidp, next, &tp->t_items, lid_trans) {
+               struct xfs_log_item *lip = lidp->lid_item;
+
+                xfs_trans_del_item(lip);
                 if (lip->li_type == XFS_LI_BUF)
-                       buf_item_done((xfs_buf_log_item_t *)lidp->lid_item);
+                       buf_item_done((xfs_buf_log_item_t *)lip);
                 else if (lip->li_type == XFS_LI_INODE)
-                       inode_item_done((xfs_inode_log_item_t *)lidp->lid_item);
+                       inode_item_done((xfs_inode_log_item_t *)lip);
                 else {
                         fprintf(stderr, _("%s: unrecognised log item type\n"),
                                 progname);
                         ASSERT(0);
                 }
-       }
-}
-
-/*
- * Calls trans_chunk_committed() to process the items in each chunk.
- */
-static void
-trans_committed(
-       xfs_trans_t             *tp)
-{
-       xfs_log_item_chunk_t    *licp;
-       xfs_log_item_chunk_t    *next_licp;
-
-       /*
-        * Special case the chunk embedded in the transaction.
-        */
-       licp = &(tp->t_items);
-       if (!(xfs_lic_are_all_free(licp))) {
-               trans_chunk_committed(licp);
-       }
-
-       /*
-        * Process the items in each chunk in turn.
-        */
-       licp = licp->lic_next;
-       while (licp != NULL) {
-               trans_chunk_committed(licp);
-               next_licp = licp->lic_next;
-               kmem_free(licp);
-               licp = next_licp;
-       }
+        }
  }
  
  static void
@@ -733,9 +681,9 @@ buf_item_unlock(
         XFS_BUF_SET_FSPRIVATE2(bip->bli_buf, NULL);
  
         hold = bip->bli_flags & XFS_BLI_HOLD;
+       bip->bli_flags &= ~XFS_BLI_HOLD;
         if (!hold)
                 libxfs_putbuf(bp);
-       bip->bli_flags &= ~XFS_BLI_HOLD;
  }
  
  static void
@@ -743,75 +691,44 @@ inode_item_unlock(
         xfs_inode_log_item_t    *iip)
  {
         xfs_inode_t             *ip = iip->ili_inode;
-       uint                    hold;
  
         /* Clear the transaction pointer in the inode. */
         ip->i_transp = NULL;
  
-       hold = iip->ili_flags & XFS_ILI_HOLD;
-       if (!hold)
-               libxfs_iput(ip, 0);
         iip->ili_flags = 0;
+       if (!iip->ili_lock_flags)
+               libxfs_iput(ip, 0);
+       else
+               iip->ili_lock_flags = 0;
  }
  
  /*
- * Unlock each item pointed to by a descriptor in the given chunk.
- * Free descriptors pointing to items which are not dirty if freeing_chunk
- * is zero. If freeing_chunk is non-zero, then we need to unlock all
- * items in the chunk. Return the number of descriptors freed.
- * Originally based on xfs_trans_unlock_chunk() - adapted for libxfs
- * transactions though.
+ * Unlock all of the items of a transaction and free all the descriptors
+ * of that transaction.
   */
-int
-xfs_trans_unlock_chunk(
-       xfs_log_item_chunk_t    *licp,
-       int                     freeing_chunk,
-       int                     abort,
-       xfs_lsn_t               commit_lsn)     /* nb: unused */
+void
+xfs_trans_free_items(
+       struct xfs_trans        *tp,
+       int                     flags)
  {
-       xfs_log_item_desc_t     *lidp;
-       xfs_log_item_t          *lip;
-       int                     i;
-       int                     freed;
-
-       freed = 0;
-       lidp = licp->lic_descs;
-       for (i = 0; i < licp->lic_unused; i++, lidp++) {
-               if (xfs_lic_isfree(licp, i)) {
-                       continue;
-               }
-               lip = lidp->lid_item;
-               lip->li_desc = NULL;
+       struct xfs_log_item_desc *lidp, *next;
  
-               /*
-                * Disassociate the logged item from this transaction
-                */
+       list_for_each_entry_safe(lidp, next, &tp->t_items, lid_trans) {
+               struct xfs_log_item     *lip = lidp->lid_item;
+
+                xfs_trans_del_item(lip);
                 if (lip->li_type == XFS_LI_BUF)
-                       buf_item_unlock((xfs_buf_log_item_t *)lidp->lid_item);
+                       buf_item_unlock((xfs_buf_log_item_t *)lip);
                 else if (lip->li_type == XFS_LI_INODE)
-                       inode_item_unlock((xfs_inode_log_item_t *)lidp->lid_item);
+                       inode_item_unlock((xfs_inode_log_item_t *)lip);
                 else {
                         fprintf(stderr, _("%s: unrecognised log item type\n"),
                                 progname);
                         ASSERT(0);
                 }
-
-               /*
-                * Free the descriptor if the item is not dirty
-                * within this transaction and the caller is not
-                * going to just free the entire thing regardless.
-                */
-               if (!(freeing_chunk) &&
-                   (!(lidp->lid_flags & XFS_LID_DIRTY) || abort)) {
-                       xfs_lic_relse(licp, i);
-                       freed++;
-               }
         }
-
-       return (freed);
  }
  
-
  /*
   * Commit the changes represented by this transaction
   */
diff --git a/libxfs/xfs.h b/libxfs/xfs.h

index 8e94dad719897713614f5ed2024ff711e12db770..a9e2bf141ec8564fc7668a165568748f71195eb3 100644 (file)
--- a/libxfs/xfs.h
+++ b/libxfs/xfs.h
@@ -108,8 +108,6 @@ typedef __uint32_t          inst_t;         /* an instruction */
  
  #define PAGE_CACHE_SIZE        getpagesize()
  
-#define INIT_LIST_HEAD(x)
-
  static inline int __do_div(unsigned long long *n, unsigned base)
  {
         int __res;
@@ -329,10 +327,9 @@ void xfs_mount_common(xfs_mount_t *, xfs_sb_t *);
   */
  
  /* xfs_trans_item.c */
-xfs_log_item_desc_t *xfs_trans_add_item (xfs_trans_t *, xfs_log_item_t *);
-xfs_log_item_desc_t *xfs_trans_find_item (xfs_trans_t *, xfs_log_item_t *);
-void xfs_trans_free_item (xfs_trans_t *, xfs_log_item_desc_t *);
-void xfs_trans_free_items (xfs_trans_t *, int);
+void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *);
+void xfs_trans_del_item(struct xfs_log_item *);
+void xfs_trans_free_items(struct xfs_trans *, int);
  
  /* xfs_inode_item.c */
  void xfs_inode_item_init (xfs_inode_t *, xfs_mount_t *);
@@ -344,10 +341,7 @@ void xfs_buf_item_log (xfs_buf_log_item_t *, uint, uint);
  /* xfs_trans_buf.c */
  xfs_buf_t *xfs_trans_buf_item_match (xfs_trans_t *, xfs_buftarg_t *,
                         xfs_daddr_t, int);
-xfs_buf_t *xfs_trans_buf_item_match_all (xfs_trans_t *, xfs_buftarg_t *,
-                       xfs_daddr_t, int);
  
  /* local source files */
  int  xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int);
  void xfs_trans_mod_sb(xfs_trans_t *, uint, long);
-int  xfs_trans_unlock_chunk (xfs_log_item_chunk_t *, int, int, xfs_lsn_t);
diff --git a/libxfs/xfs_mount.c b/libxfs/xfs_mount.c

index 02bff42cbe899a2d8aa999acd3e2e92e7825165d..dde967847d12937eee6bf875b42b7dc6f6f6c479 100644 (file)
--- a/libxfs/xfs_mount.c
+++ b/libxfs/xfs_mount.c
@@ -270,7 +270,6 @@ xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp)
         mp->m_blockmask = sbp->sb_blocksize - 1;
         mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG;
         mp->m_blockwmask = mp->m_blockwsize - 1;
-       INIT_LIST_HEAD(&mp->m_del_inodes);
  
         /*
          * Setup for attributes, in case they get created.
diff --git a/libxfs/xfs_trans.c b/libxfs/xfs_trans.c

index 9036995605fce5fe085d367ca249f3c9beaa0032..635de8f28c59856d6f568155d28c9fe09a1bcfef 100644 (file)
--- a/libxfs/xfs_trans.c
+++ b/libxfs/xfs_trans.c
@@ -1,5 +1,6 @@
  /*
   * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * Copyright (C) 2010 Red Hat, Inc.
   * All Rights Reserved.
   *
   * This program is free software; you can redistribute it and/or
@@ -18,135 +19,491 @@
  
  #include <xfs.h>
  
+kmem_zone_t    *xfs_trans_zone;
+kmem_zone_t    *xfs_log_item_desc_zone;
+
  /*
- * Reservation functions here avoid a huge stack in xfs_trans_init
- * due to register overflow from temporaries in the calculations.
+ * Various log reservation values.
+ *
+ * These are based on the size of the file system block because that is what
+ * most transactions manipulate.  Each adds in an additional 128 bytes per
+ * item logged to try to account for the overhead of the transaction mechanism.
+ *
+ * Note:  Most of the reservations underestimate the number of allocation
+ * groups into which they could free extents in the xfs_bmap_finish() call.
+ * This is because the number in the worst case is quite high and quite
+ * unusual.  In order to fix this we need to change xfs_bmap_finish() to free
+ * extents in only a single AG at a time.  This will require changes to the
+ * EFI code as well, however, so that the EFI for the extents not freed is
+ * logged again in each transaction.  See SGI PV #261917.
+ *
+ * Reservation functions here avoid a huge stack in xfs_trans_init due to
+ * register overflow from temporaries in the calculations.
   */
  
+
+/*
+ * In a write transaction we can allocate a maximum of 2
+ * extents.  This gives:
+ *    the inode getting the new extents: inode size
+ *    the inode's bmap btree: max depth * block size
+ *    the agfs of the ags from which the extents are allocated: 2 * sector
+ *    the superblock free block counter: sector size
+ *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
+ * And the bmap_finish transaction can free bmap blocks in a join:
+ *    the agfs of the ags containing the blocks: 2 * sector size
+ *    the agfls of the ags containing the blocks: 2 * sector size
+ *    the super block free block counter: sector size
+ *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
+ */
  STATIC uint
-xfs_calc_write_reservation(xfs_mount_t *mp)
+xfs_calc_write_reservation(
+       struct xfs_mount        *mp)
  {
-       return XFS_CALC_WRITE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+       return XFS_DQUOT_LOGRES(mp) +
+               MAX((mp->m_sb.sb_inodesize +
+                    XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) +
+                    2 * mp->m_sb.sb_sectsize +
+                    mp->m_sb.sb_sectsize +
+                    XFS_ALLOCFREE_LOG_RES(mp, 2) +
+                    128 * (4 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) +
+                           XFS_ALLOCFREE_LOG_COUNT(mp, 2))),
+                   (2 * mp->m_sb.sb_sectsize +
+                    2 * mp->m_sb.sb_sectsize +
+                    mp->m_sb.sb_sectsize +
+                    XFS_ALLOCFREE_LOG_RES(mp, 2) +
+                    128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
  }
  
+/*
+ * In truncating a file we free up to two extents at once.  We can modify:
+ *    the inode being truncated: inode size
+ *    the inode's bmap btree: (max depth + 1) * block size
+ * And the bmap_finish transaction can free the blocks and bmap blocks:
+ *    the agf for each of the ags: 4 * sector size
+ *    the agfl for each of the ags: 4 * sector size
+ *    the super block to reflect the freed blocks: sector size
+ *    worst case split in allocation btrees per extent assuming 4 extents:
+ *             4 exts * 2 trees * (2 * max depth - 1) * block size
+ *    the inode btree: max depth * blocksize
+ *    the allocation btrees: 2 trees * (max depth - 1) * block size
+ */
  STATIC uint
-xfs_calc_itruncate_reservation(xfs_mount_t *mp)
+xfs_calc_itruncate_reservation(
+       struct xfs_mount        *mp)
  {
-       return XFS_CALC_ITRUNCATE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+       return XFS_DQUOT_LOGRES(mp) +
+               MAX((mp->m_sb.sb_inodesize +
+                    XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1) +
+                    128 * (2 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK))),
+                   (4 * mp->m_sb.sb_sectsize +
+                    4 * mp->m_sb.sb_sectsize +
+                    mp->m_sb.sb_sectsize +
+                    XFS_ALLOCFREE_LOG_RES(mp, 4) +
+                    128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4)) +
+                    128 * 5 +
+                    XFS_ALLOCFREE_LOG_RES(mp, 1) +
+                    128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
+                           XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
  }
  
+/*
+ * In renaming a files we can modify:
+ *    the four inodes involved: 4 * inode size
+ *    the two directory btrees: 2 * (max depth + v2) * dir block size
+ *    the two directory bmap btrees: 2 * max depth * block size
+ * And the bmap_finish transaction can free dir and bmap blocks (two sets
+ *     of bmap blocks) giving:
+ *    the agf for the ags in which the blocks live: 3 * sector size
+ *    the agfl for the ags in which the blocks live: 3 * sector size
+ *    the superblock for the free block count: sector size
+ *    the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size
+ */
  STATIC uint
-xfs_calc_rename_reservation(xfs_mount_t *mp)
+xfs_calc_rename_reservation(
+       struct xfs_mount        *mp)
  {
-       return XFS_CALC_RENAME_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+       return XFS_DQUOT_LOGRES(mp) +
+               MAX((4 * mp->m_sb.sb_inodesize +
+                    2 * XFS_DIROP_LOG_RES(mp) +
+                    128 * (4 + 2 * XFS_DIROP_LOG_COUNT(mp))),
+                   (3 * mp->m_sb.sb_sectsize +
+                    3 * mp->m_sb.sb_sectsize +
+                    mp->m_sb.sb_sectsize +
+                    XFS_ALLOCFREE_LOG_RES(mp, 3) +
+                    128 * (7 + XFS_ALLOCFREE_LOG_COUNT(mp, 3))));
  }
  
+/*
+ * For creating a link to an inode:
+ *    the parent directory inode: inode size
+ *    the linked inode: inode size
+ *    the directory btree could split: (max depth + v2) * dir block size
+ *    the directory bmap btree could join or split: (max depth + v2) * blocksize
+ * And the bmap_finish transaction can free some bmap blocks giving:
+ *    the agf for the ag in which the blocks live: sector size
+ *    the agfl for the ag in which the blocks live: sector size
+ *    the superblock for the free block count: sector size
+ *    the allocation btrees: 2 trees * (2 * max depth - 1) * block size
+ */
  STATIC uint
-xfs_calc_link_reservation(xfs_mount_t *mp)
+xfs_calc_link_reservation(
+       struct xfs_mount        *mp)
  {
-       return XFS_CALC_LINK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+       return XFS_DQUOT_LOGRES(mp) +
+               MAX((mp->m_sb.sb_inodesize +
+                    mp->m_sb.sb_inodesize +
+                    XFS_DIROP_LOG_RES(mp) +
+                    128 * (2 + XFS_DIROP_LOG_COUNT(mp))),
+                   (mp->m_sb.sb_sectsize +
+                    mp->m_sb.sb_sectsize +
+                    mp->m_sb.sb_sectsize +
+                    XFS_ALLOCFREE_LOG_RES(mp, 1) +
+                    128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
  }
  
+/*
+ * For removing a directory entry we can modify:
+ *    the parent directory inode: inode size
+ *    the removed inode: inode size
+ *    the directory btree could join: (max depth + v2) * dir block size
+ *    the directory bmap btree could join or split: (max depth + v2) * blocksize
+ * And the bmap_finish transaction can free the dir and bmap blocks giving:
+ *    the agf for the ag in which the blocks live: 2 * sector size
+ *    the agfl for the ag in which the blocks live: 2 * sector size
+ *    the superblock for the free block count: sector size
+ *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
+ */
  STATIC uint
-xfs_calc_remove_reservation(xfs_mount_t *mp)
+xfs_calc_remove_reservation(
+       struct xfs_mount        *mp)
  {
-       return XFS_CALC_REMOVE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+       return XFS_DQUOT_LOGRES(mp) +
+               MAX((mp->m_sb.sb_inodesize +
+                    mp->m_sb.sb_inodesize +
+                    XFS_DIROP_LOG_RES(mp) +
+                    128 * (2 + XFS_DIROP_LOG_COUNT(mp))),
+                   (2 * mp->m_sb.sb_sectsize +
+                    2 * mp->m_sb.sb_sectsize +
+                    mp->m_sb.sb_sectsize +
+                    XFS_ALLOCFREE_LOG_RES(mp, 2) +
+                    128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
  }
  
+/*
+ * For symlink we can modify:
+ *    the parent directory inode: inode size
+ *    the new inode: inode size
+ *    the inode btree entry: 1 block
+ *    the directory btree: (max depth + v2) * dir block size
+ *    the directory inode's bmap btree: (max depth + v2) * block size
+ *    the blocks for the symlink: 1 kB
+ * Or in the first xact we allocate some inodes giving:
+ *    the agi and agf of the ag getting the new inodes: 2 * sectorsize
+ *    the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
+ *    the inode btree: max depth * blocksize
+ *    the allocation btrees: 2 trees * (2 * max depth - 1) * block size
+ */
  STATIC uint
-xfs_calc_symlink_reservation(xfs_mount_t *mp)
+xfs_calc_symlink_reservation(
+       struct xfs_mount        *mp)
  {
-       return XFS_CALC_SYMLINK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+       return XFS_DQUOT_LOGRES(mp) +
+               MAX((mp->m_sb.sb_inodesize +
+                    mp->m_sb.sb_inodesize +
+                    XFS_FSB_TO_B(mp, 1) +
+                    XFS_DIROP_LOG_RES(mp) +
+                    1024 +
+                    128 * (4 + XFS_DIROP_LOG_COUNT(mp))),
+                   (2 * mp->m_sb.sb_sectsize +
+                    XFS_FSB_TO_B(mp, XFS_IALLOC_BLOCKS(mp)) +
+                    XFS_FSB_TO_B(mp, mp->m_in_maxlevels) +
+                    XFS_ALLOCFREE_LOG_RES(mp, 1) +
+                    128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
+                           XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
  }
  
+/*
+ * For create we can modify:
+ *    the parent directory inode: inode size
+ *    the new inode: inode size
+ *    the inode btree entry: block size
+ *    the superblock for the nlink flag: sector size
+ *    the directory btree: (max depth + v2) * dir block size
+ *    the directory inode's bmap btree: (max depth + v2) * block size
+ * Or in the first xact we allocate some inodes giving:
+ *    the agi and agf of the ag getting the new inodes: 2 * sectorsize
+ *    the superblock for the nlink flag: sector size
+ *    the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
+ *    the inode btree: max depth * blocksize
+ *    the allocation btrees: 2 trees * (max depth - 1) * block size
+ */
  STATIC uint
-xfs_calc_create_reservation(xfs_mount_t *mp)
+xfs_calc_create_reservation(
+       struct xfs_mount        *mp)
  {
-       return XFS_CALC_CREATE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+       return XFS_DQUOT_LOGRES(mp) +
+               MAX((mp->m_sb.sb_inodesize +
+                    mp->m_sb.sb_inodesize +
+                    mp->m_sb.sb_sectsize +
+                    XFS_FSB_TO_B(mp, 1) +
+                    XFS_DIROP_LOG_RES(mp) +
+                    128 * (3 + XFS_DIROP_LOG_COUNT(mp))),
+                   (3 * mp->m_sb.sb_sectsize +
+                    XFS_FSB_TO_B(mp, XFS_IALLOC_BLOCKS(mp)) +
+                    XFS_FSB_TO_B(mp, mp->m_in_maxlevels) +
+                    XFS_ALLOCFREE_LOG_RES(mp, 1) +
+                    128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
+                           XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
  }
  
+/*
+ * Making a new directory is the same as creating a new file.
+ */
  STATIC uint
-xfs_calc_mkdir_reservation(xfs_mount_t *mp)
+xfs_calc_mkdir_reservation(
+       struct xfs_mount        *mp)
  {
-       return XFS_CALC_MKDIR_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+       return xfs_calc_create_reservation(mp);
  }
  
+/*
+ * In freeing an inode we can modify:
+ *    the inode being freed: inode size
+ *    the super block free inode counter: sector size
+ *    the agi hash list and counters: sector size
+ *    the inode btree entry: block size
+ *    the on disk inode before ours in the agi hash list: inode cluster size
+ *    the inode btree: max depth * blocksize
+ *    the allocation btrees: 2 trees * (max depth - 1) * block size
+ */
  STATIC uint
-xfs_calc_ifree_reservation(xfs_mount_t *mp)
+xfs_calc_ifree_reservation(
+       struct xfs_mount        *mp)
  {
-       return XFS_CALC_IFREE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+       return XFS_DQUOT_LOGRES(mp) +
+               mp->m_sb.sb_inodesize +
+               mp->m_sb.sb_sectsize +
+               mp->m_sb.sb_sectsize +
+               XFS_FSB_TO_B(mp, 1) +
+               MAX((__uint16_t)XFS_FSB_TO_B(mp, 1),
+                   XFS_INODE_CLUSTER_SIZE(mp)) +
+               128 * 5 +
+               XFS_ALLOCFREE_LOG_RES(mp, 1) +
+               128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
+                      XFS_ALLOCFREE_LOG_COUNT(mp, 1));
  }
  
+/*
+ * When only changing the inode we log the inode and possibly the superblock
+ * We also add a bit of slop for the transaction stuff.
+ */
  STATIC uint
-xfs_calc_ichange_reservation(xfs_mount_t *mp)
+xfs_calc_ichange_reservation(
+       struct xfs_mount        *mp)
  {
-       return XFS_CALC_ICHANGE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+       return XFS_DQUOT_LOGRES(mp) +
+               mp->m_sb.sb_inodesize +
+               mp->m_sb.sb_sectsize +
+               512;
+
  }
  
+/*
+ * Growing the data section of the filesystem.
+ *     superblock
+ *     agi and agf
+ *     allocation btrees
+ */
  STATIC uint
-xfs_calc_growdata_reservation(xfs_mount_t *mp)
+xfs_calc_growdata_reservation(
+       struct xfs_mount        *mp)
  {
-       return XFS_CALC_GROWDATA_LOG_RES(mp);
+       return mp->m_sb.sb_sectsize * 3 +
+               XFS_ALLOCFREE_LOG_RES(mp, 1) +
+               128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1));
  }
  
+/*
+ * Growing the rt section of the filesystem.
+ * In the first set of transactions (ALLOC) we allocate space to the
+ * bitmap or summary files.
+ *     superblock: sector size
+ *     agf of the ag from which the extent is allocated: sector size
+ *     bmap btree for bitmap/summary inode: max depth * blocksize
+ *     bitmap/summary inode: inode size
+ *     allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize
+ */
  STATIC uint
-xfs_calc_growrtalloc_reservation(xfs_mount_t *mp)
+xfs_calc_growrtalloc_reservation(
+       struct xfs_mount        *mp)
  {
-       return XFS_CALC_GROWRTALLOC_LOG_RES(mp);
+       return 2 * mp->m_sb.sb_sectsize +
+               XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) +
+               mp->m_sb.sb_inodesize +
+               XFS_ALLOCFREE_LOG_RES(mp, 1) +
+               128 * (3 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) +
+                      XFS_ALLOCFREE_LOG_COUNT(mp, 1));
  }
  
+/*
+ * Growing the rt section of the filesystem.
+ * In the second set of transactions (ZERO) we zero the new metadata blocks.
+ *     one bitmap/summary block: blocksize
+ */
  STATIC uint
-xfs_calc_growrtzero_reservation(xfs_mount_t *mp)
+xfs_calc_growrtzero_reservation(
+       struct xfs_mount        *mp)
  {
-       return XFS_CALC_GROWRTZERO_LOG_RES(mp);
+       return mp->m_sb.sb_blocksize + 128;
  }
  
+/*
+ * Growing the rt section of the filesystem.
+ * In the third set of transactions (FREE) we update metadata without
+ * allocating any new blocks.
+ *     superblock: sector size
+ *     bitmap inode: inode size
+ *     summary inode: inode size
+ *     one bitmap block: blocksize
+ *     summary blocks: new summary size
+ */
  STATIC uint
-xfs_calc_growrtfree_reservation(xfs_mount_t *mp)
+xfs_calc_growrtfree_reservation(
+       struct xfs_mount        *mp)
  {
-       return XFS_CALC_GROWRTFREE_LOG_RES(mp);
+       return mp->m_sb.sb_sectsize +
+               2 * mp->m_sb.sb_inodesize +
+               mp->m_sb.sb_blocksize +
+               mp->m_rsumsize +
+               128 * 5;
  }
  
+/*
+ * Logging the inode modification timestamp on a synchronous write.
+ *     inode
+ */
  STATIC uint
-xfs_calc_swrite_reservation(xfs_mount_t *mp)
+xfs_calc_swrite_reservation(
+       struct xfs_mount        *mp)
  {
-       return XFS_CALC_SWRITE_LOG_RES(mp);
+       return mp->m_sb.sb_inodesize + 128;
  }
  
+/*
+ * Logging the inode mode bits when writing a setuid/setgid file
+ *     inode
+ */
  STATIC uint
  xfs_calc_writeid_reservation(xfs_mount_t *mp)
  {
-       return XFS_CALC_WRITEID_LOG_RES(mp);
+       return mp->m_sb.sb_inodesize + 128;
  }
  
+/*
+ * Converting the inode from non-attributed to attributed.
+ *     the inode being converted: inode size
+ *     agf block and superblock (for block allocation)
+ *     the new block (directory sized)
+ *     bmap blocks for the new directory block
+ *     allocation btrees
+ */
  STATIC uint
-xfs_calc_addafork_reservation(xfs_mount_t *mp)
+xfs_calc_addafork_reservation(
+       struct xfs_mount        *mp)
  {
-       return XFS_CALC_ADDAFORK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+       return XFS_DQUOT_LOGRES(mp) +
+               mp->m_sb.sb_inodesize +
+               mp->m_sb.sb_sectsize * 2 +
+               mp->m_dirblksize +
+               XFS_FSB_TO_B(mp, XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1) +
+               XFS_ALLOCFREE_LOG_RES(mp, 1) +
+               128 * (4 + XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1 +
+                      XFS_ALLOCFREE_LOG_COUNT(mp, 1));
  }
  
+/*
+ * Removing the attribute fork of a file
+ *    the inode being truncated: inode size
+ *    the inode's bmap btree: max depth * block size
+ * And the bmap_finish transaction can free the blocks and bmap blocks:
+ *    the agf for each of the ags: 4 * sector size
+ *    the agfl for each of the ags: 4 * sector size
+ *    the super block to reflect the freed blocks: sector size
+ *    worst case split in allocation btrees per extent assuming 4 extents:
+ *             4 exts * 2 trees * (2 * max depth - 1) * block size
+ */
  STATIC uint
-xfs_calc_attrinval_reservation(xfs_mount_t *mp)
+xfs_calc_attrinval_reservation(
+       struct xfs_mount        *mp)
  {
-       return XFS_CALC_ATTRINVAL_LOG_RES(mp);
+       return MAX((mp->m_sb.sb_inodesize +
+                   XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
+                   128 * (1 + XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))),
+                  (4 * mp->m_sb.sb_sectsize +
+                   4 * mp->m_sb.sb_sectsize +
+                   mp->m_sb.sb_sectsize +
+                   XFS_ALLOCFREE_LOG_RES(mp, 4) +
+                   128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4))));
  }
  
+/*
+ * Setting an attribute.
+ *     the inode getting the attribute
+ *     the superblock for allocations
+ *     the agfs extents are allocated from
+ *     the attribute btree * max depth
+ *     the inode allocation btree
+ * Since attribute transaction space is dependent on the size of the attribute,
+ * the calculation is done partially at mount time and partially at runtime.
+ */
  STATIC uint
-xfs_calc_attrset_reservation(xfs_mount_t *mp)
+xfs_calc_attrset_reservation(
+       struct xfs_mount        *mp)
  {
-       return XFS_CALC_ATTRSET_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+       return XFS_DQUOT_LOGRES(mp) +
+               mp->m_sb.sb_inodesize +
+               mp->m_sb.sb_sectsize +
+               XFS_FSB_TO_B(mp, XFS_DA_NODE_MAXDEPTH) +
+               128 * (2 + XFS_DA_NODE_MAXDEPTH);
  }
  
+/*
+ * Removing an attribute.
+ *    the inode: inode size
+ *    the attribute btree could join: max depth * block size
+ *    the inode bmap btree could join or split: max depth * block size
+ * And the bmap_finish transaction can free the attr blocks freed giving:
+ *    the agf for the ag in which the blocks live: 2 * sector size
+ *    the agfl for the ag in which the blocks live: 2 * sector size
+ *    the superblock for the free block count: sector size
+ *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
+ */
  STATIC uint
-xfs_calc_attrrm_reservation(xfs_mount_t *mp)
+xfs_calc_attrrm_reservation(
+       struct xfs_mount        *mp)
  {
-       return XFS_CALC_ATTRRM_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+       return XFS_DQUOT_LOGRES(mp) +
+               MAX((mp->m_sb.sb_inodesize +
+                    XFS_FSB_TO_B(mp, XFS_DA_NODE_MAXDEPTH) +
+                    XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
+                    128 * (1 + XFS_DA_NODE_MAXDEPTH +
+                           XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK))),
+                   (2 * mp->m_sb.sb_sectsize +
+                    2 * mp->m_sb.sb_sectsize +
+                    mp->m_sb.sb_sectsize +
+                    XFS_ALLOCFREE_LOG_RES(mp, 2) +
+                    128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
  }
  
+/*
+ * Clearing a bad agino number in an agi hash bucket.
+ */
  STATIC uint
-xfs_calc_clear_agi_bucket_reservation(xfs_mount_t *mp)
+xfs_calc_clear_agi_bucket_reservation(
+       struct xfs_mount        *mp)
  {
-       return XFS_CALC_CLEAR_AGI_BUCKET_LOG_RES(mp);
+       return mp->m_sb.sb_sectsize + 128;
  }
  
  /*
@@ -155,11 +512,10 @@ xfs_calc_clear_agi_bucket_reservation(xfs_mount_t *mp)
   */
  void
  xfs_trans_init(
-       xfs_mount_t     *mp)
+       struct xfs_mount        *mp)
  {
-       xfs_trans_reservations_t        *resp;
+       struct xfs_trans_reservations *resp = &mp->m_reservations;
  
-       resp = &(mp->m_reservations);
         resp->tr_write = xfs_calc_write_reservation(mp);
         resp->tr_itruncate = xfs_calc_itruncate_reservation(mp);
         resp->tr_rename = xfs_calc_rename_reservation(mp);
@@ -183,6 +539,50 @@ xfs_trans_init(
         resp->tr_growrtfree = xfs_calc_growrtfree_reservation(mp);
  }
  
+/*
+ * Add the given log item to the transaction's list of log items.
+ *
+ * The log item will now point to its new descriptor with its li_desc field.
+ */
+void
+xfs_trans_add_item(
+       struct xfs_trans        *tp,
+       struct xfs_log_item     *lip)
+{
+       struct xfs_log_item_desc *lidp;
+
+       ASSERT(lip->li_mountp = tp->t_mountp);
+       ASSERT(lip->li_ailp = tp->t_mountp->m_ail);
+
+       lidp = kmem_zone_zalloc(xfs_log_item_desc_zone, KM_SLEEP | KM_NOFS);
+
+       lidp->lid_item = lip;
+       lidp->lid_flags = 0;
+       lidp->lid_size = 0;
+       list_add_tail(&lidp->lid_trans, &tp->t_items);
+
+       lip->li_desc = lidp;
+}
+
+STATIC void
+xfs_trans_free_item_desc(
+       struct xfs_log_item_desc *lidp)
+{
+       list_del_init(&lidp->lid_trans);
+       kmem_zone_free(xfs_log_item_desc_zone, lidp);
+}
+
+/*
+ * Unlink and free the given descriptor.
+ */
+void
+xfs_trans_del_item(
+       struct xfs_log_item     *lip)
+{
+       xfs_trans_free_item_desc(lip->li_desc);
+       lip->li_desc = NULL;
+}
+
  /*
   * Roll from one trans in the sequence of PERMANENT transactions to
   * the next: permanent transactions are only flushed out when
diff --git a/libxlog/xfs_log_recover.c b/libxlog/xfs_log_recover.c

index 9e0e5678df8c17fa106dedb9524b34f8bb466874..23fe6fd544afc62e9804d669adfca3df66a42d79 100644 (file)
--- a/libxlog/xfs_log_recover.c
+++ b/libxlog/xfs_log_recover.c
@@ -22,31 +22,60 @@
  #define xlog_clear_stale_blocks(log, tail_lsn)         (0)
  #define xfs_readonly_buftarg(buftarg)                  (0)
  
-STATIC void    xlog_recover_insert_item_backq(xlog_recover_item_t **q,
-                                              xlog_recover_item_t *item);
  
  /*
- * Sector aligned buffer routines for buffer create/read/write/access
+ * Verify the given count of basic blocks is valid number of blocks
+ * to specify for an operation involving the given XFS log buffer.
+ * Returns nonzero if the count is valid, 0 otherwise.
   */
  
-#define XLOG_SECTOR_ROUNDUP_BBCOUNT(log, bbs)  \
-       ( ((log)->l_sectbb_mask && (bbs & (log)->l_sectbb_mask)) ? \
-       ((bbs + (log)->l_sectbb_mask + 1) & ~(log)->l_sectbb_mask) : (bbs) )
-#define XLOG_SECTOR_ROUNDDOWN_BLKNO(log, bno)  ((bno) & ~(log)->l_sectbb_mask)
+static inline int
+xlog_buf_bbcount_valid(
+       xlog_t          *log,
+       int             bbcount)
+{
+       return bbcount > 0 && bbcount <= log->l_logBBsize;
+}
  
+/*
+ * Allocate a buffer to hold log data.  The buffer needs to be able
+ * to map to a range of nbblks basic blocks at any valid (basic
+ * block) offset within the log.
+ */
  xfs_buf_t *
  xlog_get_bp(
         xlog_t          *log,
-       int             num_bblks)
+       int             nbblks)
  {
-       ASSERT(num_bblks > 0);
-
-       if (log->l_sectbb_log) {
-               if (num_bblks > 1)
-                       num_bblks += XLOG_SECTOR_ROUNDUP_BBCOUNT(log, 1);
-               num_bblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, num_bblks);
+       if (!xlog_buf_bbcount_valid(log, nbblks)) {
+               xlog_warn("XFS: Invalid block length (0x%x) given for buffer",
+                       nbblks);
+               XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
+               return NULL;
         }
-       return libxfs_getbufr(log->l_dev, (xfs_daddr_t)-1, num_bblks);
+
+       /*
+        * We do log I/O in units of log sectors (a power-of-2
+        * multiple of the basic block size), so we round up the
+        * requested size to acommodate the basic blocks required
+        * for complete log sectors.
+        *
+        * In addition, the buffer may be used for a non-sector-
+        * aligned block offset, in which case an I/O of the
+        * requested size could extend beyond the end of the
+        * buffer.  If the requested size is only 1 basic block it
+        * will never straddle a sector boundary, so this won't be
+        * an issue.  Nor will this be a problem if the log I/O is
+        * done in basic blocks (sector size 1).  But otherwise we
+        * extend the buffer by one extra log sector to ensure
+        * there's space to accomodate this possiblility.
+        */
+       if (nbblks > 1 && log->l_sectBBsize > 1)
+               nbblks += log->l_sectBBsize;
+       if (log->l_sectBBsize)
+               nbblks = round_up(nbblks, log->l_sectBBsize);
+
+       return libxfs_getbufr(log->l_dev, (xfs_daddr_t)-1, nbblks);
  }
  
  void
@@ -56,25 +85,50 @@ xlog_put_bp(
         libxfs_putbufr(bp);
  }
  
+/*
+ * Return the address of the start of the given block number's data
+ * in a log buffer.  The buffer covers a log sector-aligned region.
+ */
+STATIC xfs_caddr_t
+xlog_align(
+       xlog_t          *log,
+       xfs_daddr_t     blk_no,
+       int             nbblks,
+       xfs_buf_t       *bp)
+{
+       xfs_daddr_t     offset = 0;
+
+       if (log->l_sectBBsize)
+               offset = blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1);
+
+       ASSERT(BBTOB(offset + nbblks) <= XFS_BUF_SIZE(bp));
+       return XFS_BUF_PTR(bp) + BBTOB(offset);
+}
  
  /*
   * nbblks should be uint, but oh well.  Just want to catch that 32-bit length.
   */
  int
-xlog_bread(
+xlog_bread_noalign(
         xlog_t          *log,
         xfs_daddr_t     blk_no,
         int             nbblks,
         xfs_buf_t       *bp)
  {
-       if (log->l_sectbb_log) {
-               blk_no = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, blk_no);
-               nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks);
+       if (!xlog_buf_bbcount_valid(log, nbblks)) {
+               xlog_warn("XFS: Invalid block length (0x%x) given for buffer",
+                       nbblks);
+               XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
+               return EFSCORRUPTED;
+       }
+
+       if (log->l_sectBBsize > 1) {
+               blk_no = round_down(blk_no, log->l_sectBBsize);
+               nbblks = round_up(nbblks, log->l_sectBBsize);
         }
  
         ASSERT(nbblks > 0);
         ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp));
-       ASSERT(bp);
  
         XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no);
         XFS_BUF_SET_COUNT(bp, BBTOB(nbblks));
@@ -82,26 +136,24 @@ xlog_bread(
         return libxfs_readbufr(log->l_dev, XFS_BUF_ADDR(bp), bp, nbblks, 0);
  }
  
-
-static xfs_caddr_t
-xlog_align(
+int
+xlog_bread(
         xlog_t          *log,
         xfs_daddr_t     blk_no,
         int             nbblks,
-       xfs_buf_t       *bp)
+       xfs_buf_t       *bp,
+       xfs_caddr_t     *offset)
  {
-       xfs_caddr_t     ptr;
+       int             error;
  
-       if (!log->l_sectbb_log)
-               return XFS_BUF_PTR(bp);
+       error = xlog_bread_noalign(log, blk_no, nbblks, bp);
+       if (error)
+               return error;
  
-       ptr = XFS_BUF_PTR(bp) + BBTOB((int)blk_no & log->l_sectbb_mask);
-       ASSERT(XFS_BUF_SIZE(bp) >=
-               BBTOB(nbblks + (blk_no & log->l_sectbb_mask)));
-       return ptr;
+       *offset = xlog_align(log, blk_no, nbblks, bp);
+       return 0;
  }
  
-
  /*
   * This routine finds (to an approximation) the first block in the physical
   * log which contains the given cycle.  It uses a binary search algorithm.
@@ -118,39 +170,38 @@ xlog_find_cycle_start(
  {
         xfs_caddr_t     offset;
         xfs_daddr_t     mid_blk;
+       xfs_daddr_t     end_blk;
         uint            mid_cycle;
         int             error;
  
-       mid_blk = BLK_AVG(first_blk, *last_blk);
-       while (mid_blk != first_blk && mid_blk != *last_blk) {
-               if ((error = xlog_bread(log, mid_blk, 1, bp)))
+       end_blk = *last_blk;
+       mid_blk = BLK_AVG(first_blk, end_blk);
+       while (mid_blk != first_blk && mid_blk != end_blk) {
+               error = xlog_bread(log, mid_blk, 1, bp, &offset);
+               if (error)
                         return error;
-               offset = xlog_align(log, mid_blk, 1, bp);
                 mid_cycle = xlog_get_cycle(offset);
-               if (mid_cycle == cycle) {
-                       *last_blk = mid_blk;
-                       /* last_half_cycle == mid_cycle */
-               } else {
-                       first_blk = mid_blk;
-                       /* first_half_cycle == mid_cycle */
-               }
-               mid_blk = BLK_AVG(first_blk, *last_blk);
+               if (mid_cycle == cycle)
+                       end_blk = mid_blk;   /* last_half_cycle == mid_cycle */
+               else
+                       first_blk = mid_blk; /* first_half_cycle == mid_cycle */
+               mid_blk = BLK_AVG(first_blk, end_blk);
         }
-       ASSERT((mid_blk == first_blk && mid_blk+1 == *last_blk) ||
-              (mid_blk == *last_blk && mid_blk-1 == first_blk));
+       ASSERT((mid_blk == first_blk && mid_blk+1 == end_blk) ||
+              (mid_blk == end_blk && mid_blk-1 == first_blk));
+
+       *last_blk = end_blk;
  
         return 0;
  }
  
  /*
- * Check that the range of blocks does not contain the cycle number
- * given.  The scan needs to occur from front to back and the ptr into the
- * region must be updated since a later routine will need to perform another
- * test.  If the region is completely good, we end up returning the same
- * last block number.
- *
- * Set blkno to -1 if we encounter no errors.  This is an invalid block number
- * since we don't ever expect logs to get this large.
+ * Check that a range of blocks does not contain stop_on_cycle_no.
+ * Fill in *new_blk with the block offset where such a block is
+ * found, or with -1 (an invalid block number) if there is no such
+ * block in the range.  The scan needs to occur from front to back
+ * and the pointer into the region must be updated since a later
+ * routine will need to perform another test.
   */
  STATIC int
  xlog_find_verify_cycle(
@@ -167,12 +218,16 @@ xlog_find_verify_cycle(
         xfs_caddr_t     buf = NULL;
         int             error = 0;
  
+       /*
+        * Greedily allocate a buffer big enough to handle the full
+        * range of basic blocks we'll be examining.  If that fails,
+        * try a smaller size.  We need to be able to read at least
+        * a log sector, or we're out of luck.
+        */
         bufblks = 1 << ffs(nbblks);
-
         while (!(bp = xlog_get_bp(log, bufblks))) {
-               /* can't get enough memory to do everything in one big buffer */
                 bufblks >>= 1;
-               if (bufblks <= log->l_sectbb_log)
+               if (bufblks < MAX(log->l_sectBBsize, 1))
                         return ENOMEM;
         }
  
@@ -181,10 +236,10 @@ xlog_find_verify_cycle(
  
                 bcount = min(bufblks, (start_blk + nbblks - i));
  
-               if ((error = xlog_bread(log, i, bcount, bp)))
+               error = xlog_bread(log, i, bcount, bp, &buf);
+               if (error)
                         goto out;
  
-               buf = xlog_align(log, i, bcount, bp);
                 for (j = 0; j < bcount; j++) {
                         cycle = xlog_get_cycle(buf);
                         if (cycle == stop_on_cycle_no) {
@@ -238,9 +293,9 @@ xlog_find_verify_log_record(
                         return ENOMEM;
                 smallmem = 1;
         } else {
-               if ((error = xlog_bread(log, start_blk, num_blks, bp)))
+               error = xlog_bread(log, start_blk, num_blks, bp, &offset);
+               if (error)
                         goto out;
-               offset = xlog_align(log, start_blk, num_blks, bp);
                 offset += ((num_blks - 1) << BBSHIFT);
         }
  
@@ -255,9 +310,9 @@ xlog_find_verify_log_record(
                 }
  
                 if (smallmem) {
-                       if ((error = xlog_bread(log, i, 1, bp)))
+                       error = xlog_bread(log, i, 1, bp, &offset);
+                       if (error)
                                 goto out;
-                       offset = xlog_align(log, i, 1, bp);
                 }
  
                 head = (xlog_rec_header_t *)offset;
@@ -325,7 +380,7 @@ out:
   *
   * Return: zero if normal, non-zero if error.
   */
-int
+STATIC int
  xlog_find_head(
         xlog_t          *log,
         xfs_daddr_t     *return_head_blk)
@@ -337,14 +392,13 @@ xlog_find_head(
         uint            first_half_cycle, last_half_cycle;
         uint            stop_on_cycle;
         int             error, log_bbnum = log->l_logBBsize;
-       extern int      platform_has_uuid;
  
         /* Is the end of the log device zeroed? */
         if ((error = xlog_find_zeroed(log, &first_blk)) == -1) {
                 *return_head_blk = first_blk;
  
                 /* Is the whole lot zeroed? */
-               if (!first_blk && platform_has_uuid) {
+               if (!first_blk) {
                         /* Linux XFS shouldn't generate totally zeroed logs -
                          * mkfs etc write a dummy unmount record to a fresh
                          * log so we can store the uuid in there
@@ -362,15 +416,18 @@ xlog_find_head(
         bp = xlog_get_bp(log, 1);
         if (!bp)
                 return ENOMEM;
-       if ((error = xlog_bread(log, 0, 1, bp)))
+
+       error = xlog_bread(log, 0, 1, bp, &offset);
+       if (error)
                 goto bp_err;
-       offset = xlog_align(log, 0, 1, bp);
+
         first_half_cycle = xlog_get_cycle(offset);
  
         last_blk = head_blk = log_bbnum - 1;    /* get cycle # of last block */
-       if ((error = xlog_bread(log, last_blk, 1, bp)))
+       error = xlog_bread(log, last_blk, 1, bp, &offset);
+       if (error)
                 goto bp_err;
-       offset = xlog_align(log, last_blk, 1, bp);
+
         last_half_cycle = xlog_get_cycle(offset);
         ASSERT(last_half_cycle != 0);
  
@@ -418,7 +475,7 @@ xlog_find_head(
                  * In this case we want to find the first block with cycle
                  * number matching last_half_cycle.  We expect the log to be
                  * some variation on
-                *        x + 1 ... | x ...
+                *        x + 1 ... | x ... | x
                  * The first block with cycle number x (last_half_cycle) will
                  * be where the new head belongs.  First we do a binary search
                  * for the first occurrence of last_half_cycle.  The binary
@@ -428,11 +485,13 @@ xlog_find_head(
                  * the log, then we look for occurrences of last_half_cycle - 1
                  * at the end of the log.  The cases we're looking for look
                  * like
-                *        x + 1 ... | x | x + 1 | x ...
-                *                               ^ binary search stopped here
+                *                               v binary search stopped here
+                *        x + 1 ... | x | x + 1 | x ... | x
+                *                   ^ but we want to locate this spot
                  * or
-                *        x + 1 ... | x ... | x - 1 | x
                  *        <---------> less than scan distance
+                *        x + 1 ... | x ... | x - 1 | x
+                *                           ^ we want to locate this spot
                  */
                 stop_on_cycle = last_half_cycle;
                 if ((error = xlog_find_cycle_start(log, bp, first_blk,
@@ -488,16 +547,16 @@ xlog_find_head(
                  * certainly not the head of the log.  By searching for
                  * last_half_cycle-1 we accomplish that.
                  */
-               start_blk = log_bbnum - num_scan_bblks + head_blk;
                 ASSERT(head_blk <= INT_MAX &&
-                       (xfs_daddr_t) num_scan_bblks - head_blk >= 0);
+                       (xfs_daddr_t) num_scan_bblks >= head_blk);
+               start_blk = log_bbnum - (num_scan_bblks - head_blk);
                 if ((error = xlog_find_verify_cycle(log, start_blk,
                                         num_scan_bblks - (int)head_blk,
                                         (stop_on_cycle - 1), &new_blk)))
                         goto bp_err;
                 if (new_blk != -1) {
                         head_blk = new_blk;
-                       goto bad_blk;
+                       goto validate_head;
                 }
  
                 /*
@@ -515,7 +574,7 @@ xlog_find_head(
                         head_blk = new_blk;
         }
  
- bad_blk:
+validate_head:
         /*
          * Now we need to make sure head_blk is not pointing to a block in
          * the middle of a log record.
@@ -537,7 +596,7 @@ xlog_find_head(
                 if ((error = xlog_find_verify_log_record(log, start_blk,
                                                         &head_blk, 0)) == -1) {
                         /* We hit the beginning of the log during our search */
-                       start_blk = log_bbnum - num_scan_bblks + head_blk;
+                       start_blk = log_bbnum - (num_scan_bblks - head_blk);
                         new_blk = log_bbnum;
                         ASSERT(start_blk <= INT_MAX &&
                                 (xfs_daddr_t) log_bbnum-start_blk >= 0);
@@ -620,13 +679,14 @@ xlog_find_tail(
         if (!bp)
                 return ENOMEM;
         if (*head_blk == 0) {                           /* special case */
-               if ((error = xlog_bread(log, 0, 1, bp)))
-                       goto bread_err;
-               offset = xlog_align(log, 0, 1, bp);
+               error = xlog_bread(log, 0, 1, bp, &offset);
+               if (error)
+                       goto done;
+
                 if (xlog_get_cycle(offset) == 0) {
                         *tail_blk = 0;
                         /* leave all other log inited values alone */
-                       goto exit;
+                       goto done;
                 }
         }
  
@@ -635,9 +695,10 @@ xlog_find_tail(
          */
         ASSERT(*head_blk < INT_MAX);
         for (i = (int)(*head_blk) - 1; i >= 0; i--) {
-               if ((error = xlog_bread(log, i, 1, bp)))
-                       goto bread_err;
-               offset = xlog_align(log, i, 1, bp);
+               error = xlog_bread(log, i, 1, bp, &offset);
+               if (error)
+                       goto done;
+
                 if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(*(__be32 *)offset)) {
                         found = 1;
                         break;
@@ -651,9 +712,10 @@ xlog_find_tail(
          */
         if (!found) {
                 for (i = log->l_logBBsize - 1; i >= (int)(*head_blk); i--) {
-                       if ((error = xlog_bread(log, i, 1, bp)))
-                               goto bread_err;
-                       offset = xlog_align(log, i, 1, bp);
+                       error = xlog_bread(log, i, 1, bp, &offset);
+                       if (error)
+                               goto done;
+
                         if (XLOG_HEADER_MAGIC_NUM ==
                             be32_to_cpu(*(__be32 *)offset)) {
                                 found = 2;
@@ -686,12 +748,12 @@ xlog_find_tail(
         log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
         if (found == 2)
                 log->l_curr_cycle++;
-       log->l_tail_lsn = be64_to_cpu(rhead->h_tail_lsn);
-       log->l_last_sync_lsn = be64_to_cpu(rhead->h_lsn);
-       log->l_grant_reserve_cycle = log->l_curr_cycle;
-       log->l_grant_reserve_bytes = BBTOB(log->l_curr_block);
-       log->l_grant_write_cycle = log->l_curr_cycle;
-       log->l_grant_write_bytes = BBTOB(log->l_curr_block);
+       atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
+       atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
+       xlog_assign_grant_head(&log->l_grant_reserve_head, log->l_curr_cycle,
+                                       BBTOB(log->l_curr_block));
+       xlog_assign_grant_head(&log->l_grant_write_head, log->l_curr_cycle,
+                                       BBTOB(log->l_curr_block));
  
         /*
          * Look for unmount record.  If we find it, then we know there
@@ -721,14 +783,14 @@ xlog_find_tail(
         }
         after_umount_blk = (i + hblks + (int)
                 BTOBB(be32_to_cpu(rhead->h_len))) % log->l_logBBsize;
-       tail_lsn = log->l_tail_lsn;
+       tail_lsn = atomic64_read(&log->l_tail_lsn);
         if (*head_blk == after_umount_blk &&
             be32_to_cpu(rhead->h_num_logops) == 1) {
                 umount_data_blk = (i + hblks) % log->l_logBBsize;
-               if ((error = xlog_bread(log, umount_data_blk, 1, bp))) {
-                       goto bread_err;
-               }
-               offset = xlog_align(log, umount_data_blk, 1, bp);
+               error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
+               if (error)
+                       goto done;
+
                 op_head = (xlog_op_header_t *)offset;
                 if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) {
                         /*
@@ -736,12 +798,10 @@ xlog_find_tail(
                          * log records will point recovery to after the
                          * current unmount record.
                          */
-                       log->l_tail_lsn =
-                               xlog_assign_lsn(log->l_curr_cycle,
-                                               after_umount_blk);
-                       log->l_last_sync_lsn =
-                               xlog_assign_lsn(log->l_curr_cycle,
-                                               after_umount_blk);
+                       xlog_assign_atomic_lsn(&log->l_tail_lsn,
+                                       log->l_curr_cycle, after_umount_blk);
+                       xlog_assign_atomic_lsn(&log->l_last_sync_lsn,
+                                       log->l_curr_cycle, after_umount_blk);
                         *tail_blk = after_umount_blk;
  
                         /*
@@ -773,12 +833,10 @@ xlog_find_tail(
          * But... if the -device- itself is readonly, just skip this.
          * We can't recover this device anyway, so it won't matter.
          */
-       if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) {
+       if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp))
                 error = xlog_clear_stale_blocks(log, tail_lsn);
-       }
  
-bread_err:
-exit:
+done:
         xlog_put_bp(bp);
  
         if (error)
@@ -820,9 +878,10 @@ xlog_find_zeroed(
         bp = xlog_get_bp(log, 1);
         if (!bp)
                 return ENOMEM;
-       if ((error = xlog_bread(log, 0, 1, bp)))
+       error = xlog_bread(log, 0, 1, bp, &offset);
+       if (error)
                 goto bp_err;
-       offset = xlog_align(log, 0, 1, bp);
+
         first_cycle = xlog_get_cycle(offset);
         if (first_cycle == 0) {         /* completely zeroed log */
                 *blk_no = 0;
@@ -831,9 +890,10 @@ xlog_find_zeroed(
         }
  
         /* check partially zeroed log */
-       if ((error = xlog_bread(log, log_bbnum-1, 1, bp)))
+       error = xlog_bread(log, log_bbnum-1, 1, bp, &offset);
+       if (error)
                 goto bp_err;
-       offset = xlog_align(log, log_bbnum-1, 1, bp);
+
         last_cycle = xlog_get_cycle(offset);
         if (last_cycle != 0) {          /* log completely written to */
                 xlog_put_bp(bp);
@@ -899,40 +959,50 @@ bp_err:
  
  STATIC xlog_recover_t *
  xlog_recover_find_tid(
-       xlog_recover_t          *q,
+       struct hlist_head       *head,
         xlog_tid_t              tid)
  {
-       xlog_recover_t          *p = q;
+       xlog_recover_t          *trans;
+       struct hlist_node       *n;
  
-       while (p != NULL) {
-               if (p->r_log_tid == tid)
-                   break;
-               p = p->r_next;
+       hlist_for_each_entry(trans, n, head, r_list) {
+               if (trans->r_log_tid == tid)
+                       return trans;
         }
-       return p;
+       return NULL;
  }
  
  STATIC void
-xlog_recover_put_hashq(
-       xlog_recover_t          **q,
-       xlog_recover_t          *trans)
+xlog_recover_new_tid(
+       struct hlist_head       *head,
+       xlog_tid_t              tid,
+       xfs_lsn_t               lsn)
  {
-       trans->r_next = *q;
-       *q = trans;
+       xlog_recover_t          *trans;
+
+       trans = kmem_zalloc(sizeof(xlog_recover_t), KM_SLEEP);
+       trans->r_log_tid   = tid;
+       trans->r_lsn       = lsn;
+       INIT_LIST_HEAD(&trans->r_itemq);
+
+       INIT_HLIST_NODE(&trans->r_list);
+       hlist_add_head(&trans->r_list, head);
  }
  
  STATIC void
  xlog_recover_add_item(
-       xlog_recover_item_t     **itemq)
+       struct list_head        *head)
  {
         xlog_recover_item_t     *item;
  
         item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP);
-       xlog_recover_insert_item_backq(itemq, item);
+       INIT_LIST_HEAD(&item->ri_list);
+       list_add_tail(&item->ri_list, head);
  }
  
  STATIC int
  xlog_recover_add_to_cont_trans(
+       struct log              *log,
         xlog_recover_t          *trans,
         xfs_caddr_t             dp,
         int                     len)
@@ -941,8 +1011,7 @@ xlog_recover_add_to_cont_trans(
         xfs_caddr_t             ptr, old_ptr;
         int                     old_len;
  
-       item = trans->r_itemq;
-       if (item == NULL) {
+       if (list_empty(&trans->r_itemq)) {
                 /* finish copying rest of trans header */
                 xlog_recover_add_item(&trans->r_itemq);
                 ptr = (xfs_caddr_t) &trans->r_theader +
@@ -950,7 +1019,8 @@ xlog_recover_add_to_cont_trans(
                 memcpy(ptr, dp, len); /* d, s, l */
                 return 0;
         }
-       item = item->ri_prev;
+       /* take the tail entry */
+       item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list);
  
         old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
         old_len = item->ri_buf[item->ri_cnt-1].i_len;
@@ -959,6 +1029,7 @@ xlog_recover_add_to_cont_trans(
         memcpy(&ptr[old_len], dp, len); /* d, s, l */
         item->ri_buf[item->ri_cnt-1].i_len += len;
         item->ri_buf[item->ri_cnt-1].i_addr = ptr;
+       trace_xfs_log_recover_item_add_cont(log, trans, item, 0);
         return 0;
  }
  
@@ -977,6 +1048,7 @@ xlog_recover_add_to_cont_trans(
   */
  STATIC int
  xlog_recover_add_to_trans(
+       struct log              *log,
         xlog_recover_t          *trans,
         xfs_caddr_t             dp,
         int                     len)
@@ -987,9 +1059,14 @@ xlog_recover_add_to_trans(
  
         if (!len)
                 return 0;
-       item = trans->r_itemq;
-       if (item == NULL) {
-               ASSERT(*(uint *)dp == XFS_TRANS_HEADER_MAGIC);
+       if (list_empty(&trans->r_itemq)) {
+               /* we need to catch log corruptions here */
+               if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) {
+                       xlog_warn("XFS: xlog_recover_add_to_trans: "
+                                 "bad header magic number");
+                       ASSERT(0);
+                       return XFS_ERROR(EIO);
+               }
                 if (len == sizeof(xfs_trans_header_t))
                         xlog_recover_add_item(&trans->r_itemq);
                 memcpy(&trans->r_theader, dp, len); /* d, s, l */
@@ -1000,88 +1077,40 @@ xlog_recover_add_to_trans(
         memcpy(ptr, dp, len);
         in_f = (xfs_inode_log_format_t *)ptr;
  
-       if (item->ri_prev->ri_total != 0 &&
-            item->ri_prev->ri_total == item->ri_prev->ri_cnt) {
+       /* take the tail entry */
+       item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list);
+       if (item->ri_total != 0 &&
+            item->ri_total == item->ri_cnt) {
+               /* tail item is in use, get a new one */
                 xlog_recover_add_item(&trans->r_itemq);
+               item = list_entry(trans->r_itemq.prev,
+                                       xlog_recover_item_t, ri_list);
         }
-       item = trans->r_itemq;
-       item = item->ri_prev;
  
         if (item->ri_total == 0) {              /* first region to be added */
-               item->ri_total  = in_f->ilf_size;
-               ASSERT(item->ri_total <= XLOG_MAX_REGIONS_IN_ITEM);
-               item->ri_buf = kmem_zalloc((item->ri_total *
-                                           sizeof(xfs_log_iovec_t)), KM_SLEEP);
+               if (in_f->ilf_size == 0 ||
+                   in_f->ilf_size > XLOG_MAX_REGIONS_IN_ITEM) {
+                       xlog_warn(
+       "XFS: bad number of regions (%d) in inode log format",
+                                 in_f->ilf_size);
+                       ASSERT(0);
+                       return XFS_ERROR(EIO);
+               }
+
+               item->ri_total = in_f->ilf_size;
+               item->ri_buf =
+                       kmem_zalloc(item->ri_total * sizeof(xfs_log_iovec_t),
+                                   KM_SLEEP);
         }
         ASSERT(item->ri_total > item->ri_cnt);
         /* Description region is ri_buf[0] */
         item->ri_buf[item->ri_cnt].i_addr = ptr;
         item->ri_buf[item->ri_cnt].i_len  = len;
         item->ri_cnt++;
+       trace_xfs_log_recover_item_add(log, trans, item, 0);
         return 0;
  }
  
-STATIC void
-xlog_recover_new_tid(
-       xlog_recover_t          **q,
-       xlog_tid_t              tid,
-       xfs_lsn_t               lsn)
-{
-       xlog_recover_t          *trans;
-
-       trans = kmem_zalloc(sizeof(xlog_recover_t), KM_SLEEP);
-       trans->r_log_tid   = tid;
-       trans->r_lsn       = lsn;
-       xlog_recover_put_hashq(q, trans);
-}
-
-STATIC int
-xlog_recover_unlink_tid(
-       xlog_recover_t          **q,
-       xlog_recover_t          *trans)
-{
-       xlog_recover_t          *tp;
-       int                     found = 0;
-
-       ASSERT(trans != NULL);
-       if (trans == *q) {
-               *q = (*q)->r_next;
-       } else {
-               tp = *q;
-               while (tp) {
-                       if (tp->r_next == trans) {
-                               found = 1;
-                               break;
-                       }
-                       tp = tp->r_next;
-               }
-               if (!found) {
-                       xlog_warn(
-                            "XFS: xlog_recover_unlink_tid: trans not found");
-                       ASSERT(0);
-                       return XFS_ERROR(EIO);
-               }
-               tp->r_next = tp->r_next->r_next;
-       }
-       return 0;
-}
-
-STATIC void
-xlog_recover_insert_item_backq(
-       xlog_recover_item_t     **q,
-       xlog_recover_item_t     *item)
-{
-       if (*q == NULL) {
-               item->ri_prev = item->ri_next = item;
-               *q = item;
-       } else {
-               item->ri_next           = *q;
-               item->ri_prev           = (*q)->ri_prev;
-               (*q)->ri_prev           = item;
-               item->ri_prev->ri_next  = item;
-       }
-}
-
  /*
   * Free up any resources allocated by the transaction
   *
@@ -1089,41 +1118,43 @@ xlog_recover_insert_item_backq(
   */
  STATIC void
  xlog_recover_free_trans(
-       xlog_recover_t          *trans)
+       struct xlog_recover     *trans)
  {
-       xlog_recover_item_t     *first_item, *item, *free_item;
+       xlog_recover_item_t     *item, *n;
         int                     i;
  
-       item = first_item = trans->r_itemq;
-       do {
-               free_item = item;
-               item = item->ri_next;
-                /* Free the regions in the item. */
-               for (i = 0; i < free_item->ri_cnt; i++) {
-                       kmem_free(free_item->ri_buf[i].i_addr);
-               }
+       list_for_each_entry_safe(item, n, &trans->r_itemq, ri_list) {
+               /* Free the regions in the item. */
+               list_del(&item->ri_list);
+               for (i = 0; i < item->ri_cnt; i++)
+                       kmem_free(item->ri_buf[i].i_addr);
                 /* Free the item itself */
-               kmem_free(free_item->ri_buf);
-               kmem_free(free_item);
-       } while (first_item != item);
+               kmem_free(item->ri_buf);
+               kmem_free(item);
+       }
         /* Free the transaction recover structure */
         kmem_free(trans);
  }
  
+/*
+ * Perform the transaction.
+ *
+ * If the transaction modifies a buffer or inode, do it now.  Otherwise,
+ * EFIs and EFDs get queued up by adding entries into the AIL for them.
+ */
  STATIC int
  xlog_recover_commit_trans(
-       xlog_t                  *log,
-       xlog_recover_t          **q,
-       xlog_recover_t          *trans,
+       struct log              *log,
+       struct xlog_recover     *trans,
         int                     pass)
  {
-       int                     error;
+       int                     error = 0;
  
-       if ((error = xlog_recover_unlink_tid(q, trans)))
-               return error;
+       hlist_del(&trans->r_list);
         if ((error = xlog_recover_do_trans(log, trans, pass)))
                 return error;
-       xlog_recover_free_trans(trans);                 /* no error */
+
+       xlog_recover_free_trans(trans);
         return 0;
  }
  
@@ -1148,7 +1179,7 @@ xlog_recover_unmount_trans(
  STATIC int
  xlog_recover_process_data(
         xlog_t                  *log,
-       xlog_recover_t          *rhash[],
+       struct hlist_head       rhash[],
         xlog_rec_header_t       *rhead,
         xfs_caddr_t             dp,
         int                     pass)
@@ -1182,27 +1213,32 @@ xlog_recover_process_data(
                 }
                 tid = be32_to_cpu(ohead->oh_tid);
                 hash = XLOG_RHASH(tid);
-               trans = xlog_recover_find_tid(rhash[hash], tid);
+               trans = xlog_recover_find_tid(&rhash[hash], tid);
                 if (trans == NULL) {               /* not found; add new tid */
                         if (ohead->oh_flags & XLOG_START_TRANS)
                                 xlog_recover_new_tid(&rhash[hash], tid,
                                         be64_to_cpu(rhead->h_lsn));
                 } else {
-                       ASSERT(dp + be32_to_cpu(ohead->oh_len) <= lp);
+                       if (dp + be32_to_cpu(ohead->oh_len) > lp) {
+                               xlog_warn(
+                       "XFS: xlog_recover_process_data: bad length");
+                               return (XFS_ERROR(EIO));
+                       }
                         flags = ohead->oh_flags & ~XLOG_END_TRANS;
                         if (flags & XLOG_WAS_CONT_TRANS)
                                 flags &= ~XLOG_CONTINUE_TRANS;
                         switch (flags) {
                         case XLOG_COMMIT_TRANS:
                                 error = xlog_recover_commit_trans(log,
-                                               &rhash[hash], trans, pass);
+                                                               trans, pass);
                                 break;
                         case XLOG_UNMOUNT_TRANS:
                                 error = xlog_recover_unmount_trans(trans);
                                 break;
                         case XLOG_WAS_CONT_TRANS:
-                               error = xlog_recover_add_to_cont_trans(trans,
-                                               dp, be32_to_cpu(ohead->oh_len));
+                               error = xlog_recover_add_to_cont_trans(log,
+                                               trans, dp,
+                                               be32_to_cpu(ohead->oh_len));
                                 break;
                         case XLOG_START_TRANS:
                                 xlog_warn(
@@ -1212,7 +1248,7 @@ xlog_recover_process_data(
                                 break;
                         case 0:
                         case XLOG_CONTINUE_TRANS:
-                               error = xlog_recover_add_to_trans(trans,
+                               error = xlog_recover_add_to_trans(log, trans,
                                                 dp, be32_to_cpu(ohead->oh_len));
                                 break;
                         default:
@@ -1238,7 +1274,6 @@ xlog_unpack_data(
         xlog_t                  *log)
  {
         int                     i, j, k;
-       xlog_in_core_2_t        *xhdr;
  
         for (i = 0; i < BTOBB(be32_to_cpu(rhead->h_len)) &&
                   i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) {
@@ -1247,7 +1282,7 @@ xlog_unpack_data(
         }
  
         if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
-               xhdr = (xlog_in_core_2_t *)rhead;
+               xlog_in_core_2_t *xhdr = (xlog_in_core_2_t *)rhead;
                 for ( ; i < BTOBB(be32_to_cpu(rhead->h_len)); i++) {
                         j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
                         k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
@@ -1255,8 +1290,6 @@ xlog_unpack_data(
                         dp += BBSIZE;
                 }
         }
-
-       xlog_unpack_data_checksum(rhead, dp, log);
  }
  
  STATIC int
@@ -1312,12 +1345,12 @@ xlog_do_recovery_pass(
  {
         xlog_rec_header_t       *rhead;
         xfs_daddr_t             blk_no;
-       xfs_caddr_t             bufaddr, offset;
+       xfs_caddr_t             offset;
         xfs_buf_t               *hbp, *dbp;
         int                     error = 0, h_size;
         int                     bblks, split_bblks;
         int                     hblks, split_hblks, wrapped_hblks;
-       xlog_recover_t          *rhash[XLOG_RHASH_SIZE];
+       struct hlist_head       rhash[XLOG_RHASH_SIZE];
  
         ASSERT(head_blk != tail_blk);
  
@@ -1334,9 +1367,11 @@ xlog_do_recovery_pass(
                 hbp = xlog_get_bp(log, 1);
                 if (!hbp)
                         return ENOMEM;
-               if ((error = xlog_bread(log, tail_blk, 1, hbp)))
+
+               error = xlog_bread(log, tail_blk, 1, hbp, &offset);
+               if (error)
                         goto bread_err1;
-               offset = xlog_align(log, tail_blk, 1, hbp);
+
                 rhead = (xlog_rec_header_t *)offset;
                 error = xlog_valid_rec_header(log, rhead, tail_blk);
                 if (error)
@@ -1353,7 +1388,7 @@ xlog_do_recovery_pass(
                         hblks = 1;
                 }
         } else {
-               ASSERT(log->l_sectbb_log == 0);
+               ASSERT(log->l_sectBBsize == 1);
                 hblks = 1;
                 hbp = xlog_get_bp(log, 1);
                 h_size = XLOG_BIG_RECORD_BSIZE;
@@ -1370,9 +1405,10 @@ xlog_do_recovery_pass(
         memset(rhash, 0, sizeof(rhash));
         if (tail_blk <= head_blk) {
                 for (blk_no = tail_blk; blk_no < head_blk; ) {
-                       if ((error = xlog_bread(log, blk_no, hblks, hbp)))
+                       error = xlog_bread(log, blk_no, hblks, hbp, &offset);
+                       if (error)
                                 goto bread_err2;
-                       offset = xlog_align(log, blk_no, hblks, hbp);
+
                         rhead = (xlog_rec_header_t *)offset;
                         error = xlog_valid_rec_header(log, rhead, blk_no);
                         if (error)
@@ -1380,10 +1416,11 @@ xlog_do_recovery_pass(
  
                         /* blocks in data section */
                         bblks = (int)BTOBB(be32_to_cpu(rhead->h_len));
-                       error = xlog_bread(log, blk_no + hblks, bblks, dbp);
+                       error = xlog_bread(log, blk_no + hblks, bblks, dbp,
+                                          &offset);
                         if (error)
                                 goto bread_err2;
-                       offset = xlog_align(log, blk_no + hblks, bblks, dbp);
+
                         xlog_unpack_data(rhead, offset, log);
                         if ((error = xlog_recover_process_data(log,
                                                 rhash, rhead, offset, pass)))
@@ -1401,15 +1438,15 @@ xlog_do_recovery_pass(
                         /*
                          * Check for header wrapping around physical end-of-log
                          */
-                       offset = NULL;
+                       offset = XFS_BUF_PTR(hbp);
                         split_hblks = 0;
                         wrapped_hblks = 0;
                         if (blk_no + hblks <= log->l_logBBsize) {
                                 /* Read header in one read */
-                               error = xlog_bread(log, blk_no, hblks, hbp);
+                               error = xlog_bread(log, blk_no, hblks, hbp,
+                                                  &offset);
                                 if (error)
                                         goto bread_err2;
-                               offset = xlog_align(log, blk_no, hblks, hbp);
                         } else {
                                 /* This LR is split across physical log end */
                                 if (blk_no != log->l_logBBsize) {
@@ -1417,12 +1454,13 @@ xlog_do_recovery_pass(
                                         ASSERT(blk_no <= INT_MAX);
                                         split_hblks = log->l_logBBsize - (int)blk_no;
                                         ASSERT(split_hblks > 0);
-                                       if ((error = xlog_bread(log, blk_no,
-                                                       split_hblks, hbp)))
+                                       error = xlog_bread(log, blk_no,
+                                                          split_hblks, hbp,
+                                                          &offset);
+                                       if (error)
                                                 goto bread_err2;
-                                       offset = xlog_align(log, blk_no,
-                                                       split_hblks, hbp);
                                 }
+
                                 /*
                                  * Note: this black magic still works with
                                  * large sector sizes (non-512) only because:
@@ -1436,17 +1474,21 @@ xlog_do_recovery_pass(
                                  *   - order is important.
                                  */
                                 wrapped_hblks = hblks - split_hblks;
-                               bufaddr = XFS_BUF_PTR(hbp);
-                               XFS_BUF_SET_PTR(hbp,
-                                               bufaddr + BBTOB(split_hblks),
+                               error = XFS_BUF_SET_PTR(hbp,
+                                               offset + BBTOB(split_hblks),
                                                 BBTOB(hblks - split_hblks));
-                               error = xlog_bread(log, 0, wrapped_hblks, hbp);
                                 if (error)
                                         goto bread_err2;
-                               XFS_BUF_SET_PTR(hbp, bufaddr, BBTOB(hblks));
-                               if (!offset)
-                                       offset = xlog_align(log, 0,
-                                                       wrapped_hblks, hbp);
+
+                               error = xlog_bread_noalign(log, 0,
+                                                          wrapped_hblks, hbp);
+                               if (error)
+                                       goto bread_err2;
+
+                               error = XFS_BUF_SET_PTR(hbp, offset,
+                                                       BBTOB(hblks));
+                               if (error)
+                                       goto bread_err2;
                         }
                         rhead = (xlog_rec_header_t *)offset;
                         error = xlog_valid_rec_header(log, rhead,
@@ -1459,14 +1501,14 @@ xlog_do_recovery_pass(
  
                         /* Read in data for log record */
                         if (blk_no + bblks <= log->l_logBBsize) {
-                               error = xlog_bread(log, blk_no, bblks, dbp);
+                               error = xlog_bread(log, blk_no, bblks, dbp,
+                                                  &offset);
                                 if (error)
                                         goto bread_err2;
-                               offset = xlog_align(log, blk_no, bblks, dbp);
                         } else {
                                 /* This log record is split across the
                                  * physical end of log */
-                               offset = NULL;
+                               offset = XFS_BUF_PTR(dbp);
                                 split_bblks = 0;
                                 if (blk_no != log->l_logBBsize) {
                                         /* some data is before the physical
@@ -1476,12 +1518,13 @@ xlog_do_recovery_pass(
                                         split_bblks =
                                                 log->l_logBBsize - (int)blk_no;
                                         ASSERT(split_bblks > 0);
-                                       if ((error = xlog_bread(log, blk_no,
-                                                       split_bblks, dbp)))
+                                       error = xlog_bread(log, blk_no,
+                                                       split_bblks, dbp,
+                                                       &offset);
+                                       if (error)
                                                 goto bread_err2;
-                                       offset = xlog_align(log, blk_no,
-                                                       split_bblks, dbp);
                                 }
+
                                 /*
                                  * Note: this black magic still works with
                                  * large sector sizes (non-512) only because:
@@ -1494,18 +1537,21 @@ xlog_do_recovery_pass(
                                  *   _first_, then the log start (LR header end)
                                  *   - order is important.
                                  */
-                               bufaddr = XFS_BUF_PTR(dbp);
-                               XFS_BUF_SET_PTR(dbp,
-                                               bufaddr + BBTOB(split_bblks),
+                               error = XFS_BUF_SET_PTR(dbp,
+                                               offset + BBTOB(split_bblks),
                                                 BBTOB(bblks - split_bblks));
-                               error = xlog_bread(log, wrapped_hblks,
-                                               bblks - split_bblks, dbp);
                                 if (error)
                                         goto bread_err2;
-                               XFS_BUF_SET_PTR(dbp, bufaddr, h_size);
-                               if (!offset)
-                                       offset = xlog_align(log, wrapped_hblks,
-                                               bblks - split_bblks, dbp);
+
+                               error = xlog_bread_noalign(log, wrapped_hblks,
+                                               bblks - split_bblks,
+                                               dbp);
+                               if (error)
+                                       goto bread_err2;
+
+                               error = XFS_BUF_SET_PTR(dbp, offset, h_size);
+                               if (error)
+                                       goto bread_err2;
                         }
                         xlog_unpack_data(rhead, offset, log);
                         if ((error = xlog_recover_process_data(log, rhash,
@@ -1519,17 +1565,21 @@ xlog_do_recovery_pass(
  
                 /* read first part of physical log */
                 while (blk_no < head_blk) {
-                       if ((error = xlog_bread(log, blk_no, hblks, hbp)))
+                       error = xlog_bread(log, blk_no, hblks, hbp, &offset);
+                       if (error)
                                 goto bread_err2;
-                       offset = xlog_align(log, blk_no, hblks, hbp);
+
                         rhead = (xlog_rec_header_t *)offset;
                         error = xlog_valid_rec_header(log, rhead, blk_no);
                         if (error)
                                 goto bread_err2;
+
                         bblks = (int)BTOBB(be32_to_cpu(rhead->h_len));
-                       if ((error = xlog_bread(log, blk_no+hblks, bblks, dbp)))
+                       error = xlog_bread(log, blk_no+hblks, bblks, dbp,
+                                          &offset);
+                       if (error)
                                 goto bread_err2;
-                       offset = xlog_align(log, blk_no+hblks, bblks, dbp);
+
                         xlog_unpack_data(rhead, offset, log);
                         if ((error = xlog_recover_process_data(log, rhash,
                                                         rhead, offset, pass)))
diff --git a/logprint/log_misc.c b/logprint/log_misc.c

index 253e146a29779782b6db84353acd67add72586dc..279d9f3762008e8ad698d937ced2cb2fdf39b079 100644 (file)
--- a/logprint/log_misc.c
+++ b/logprint/log_misc.c
@@ -71,6 +71,7 @@ char *trans_type[] = {
         "GROWFSRT_FREE",
         "SWAPEXT",
         "SB_COUNT",
+       "CHECKPOINT",
  };
  
  typedef struct xlog_split_item {
diff --git a/logprint/log_print_all.c b/logprint/log_print_all.c

index 7bd46172d12731e1d0a6dfde4ee8506523b089ad..62727bff5df0a58127e8ad7838abf2ac996216c4 100644 (file)
--- a/logprint/log_print_all.c
+++ b/logprint/log_print_all.c
@@ -36,10 +36,10 @@ xlog_print_find_oldest(
  
         first_blk = 0;          /* read first block */
         bp = xlog_get_bp(log, 1);
-       xlog_bread(log, 0, 1, bp);
+       xlog_bread_noalign(log, 0, 1, bp);
         first_half_cycle = xlog_get_cycle(XFS_BUF_PTR(bp));
         *last_blk = log->l_logBBsize-1; /* read last block */
-       xlog_bread(log, *last_blk, 1, bp);
+       xlog_bread_noalign(log, *last_blk, 1, bp);
         last_half_cycle = xlog_get_cycle(XFS_BUF_PTR(bp));
         ASSERT(last_half_cycle != 0);
  
@@ -486,19 +486,16 @@ xlog_recover_print_item(
  void
  xlog_recover_print_trans(
         xlog_recover_t          *trans,
-       xlog_recover_item_t     *itemq,
+       struct list_head        *itemq,
         int                     print)
  {
-       xlog_recover_item_t     *first_item, *item;
+       xlog_recover_item_t     *item;
  
         if (print < 3)
                 return;
  
         print_xlog_record_line();
         xlog_recover_print_trans_head(trans);
-       item = first_item = itemq;
-       do {
+       list_for_each_entry(item, itemq, ri_list)
                 xlog_recover_print_item(item);
-               item = item->ri_next;
-       } while (first_item != item);
  }
diff --git a/logprint/log_print_trans.c b/logprint/log_print_trans.c

index 8b212573208d269bc1f63e01d8541abb9f7ab79f..7405772f950621e2e6baaf5da16715b2127fa29c 100644 (file)
--- a/logprint/log_print_trans.c
+++ b/logprint/log_print_trans.c
@@ -25,7 +25,7 @@ xlog_recover_print_trans_head(
         printf(_("TRANS: tid:0x%x  type:%s  #items:%d  trans:0x%x  q:0x%lx\n"),
                tr->r_log_tid, trans_type[tr->r_theader.th_type],
                tr->r_theader.th_num_items,
-              tr->r_theader.th_tid, (long)tr->r_itemq);
+              tr->r_theader.th_tid, (long)&tr->r_itemq);
  }
  
  int
@@ -34,7 +34,7 @@ xlog_recover_do_trans(
         xlog_recover_t  *trans,
         int             pass)
  {
-       xlog_recover_print_trans(trans, trans->r_itemq, 3);
+       xlog_recover_print_trans(trans, &trans->r_itemq, 3);
         return 0;
  }
author	Dave Chinner <dchinner@redhat.com>
	Mon, 25 Jul 2011 20:45:18 +0000 (06:45 +1000)
committer	Dave Chinner <david@fromorbit.com>
	Mon, 25 Jul 2011 20:45:18 +0000 (06:45 +1000)
include/Makefile		patch \| blob \| blame \| history
include/atomic.h	[new file with mode: 0644]	patch \| blob
include/hlist.h	[new file with mode: 0644]	patch \| blob
include/libxfs.h		patch \| blob \| blame \| history
include/libxlog.h		patch \| blob \| blame \| history
include/list.h		patch \| blob \| blame \| history
include/xfs_buf_item.h		patch \| blob \| blame \| history
include/xfs_extfree_item.h		patch \| blob \| blame \| history
include/xfs_inode_item.h		patch \| blob \| blame \| history
include/xfs_log.h		patch \| blob \| blame \| history
include/xfs_log_priv.h		patch \| blob \| blame \| history
include/xfs_log_recover.h		patch \| blob \| blame \| history
include/xfs_trace.h	[new file with mode: 0644]	patch \| blob
include/xfs_trans.h		patch \| blob \| blame \| history
include/xfs_types.h		patch \| blob \| blame \| history
libxfs/logitem.c		patch \| blob \| blame \| history
libxfs/trans.c		patch \| blob \| blame \| history
libxfs/xfs.h		patch \| blob \| blame \| history
libxfs/xfs_mount.c		patch \| blob \| blame \| history
libxfs/xfs_trans.c		patch \| blob \| blame \| history
libxlog/xfs_log_recover.c		patch \| blob \| blame \| history
logprint/log_misc.c		patch \| blob \| blame \| history
logprint/log_print_all.c		patch \| blob \| blame \| history
logprint/log_print_trans.c		patch \| blob \| blame \| history