sync up with recent kernel changes, shouldn't affect us in userspace.
struct xfs_buf;
struct xfs_mount;
+struct xfs_trans;
#define XFS_AGF_MAGIC 0x58414746 /* 'XAGF' */
#define XFS_AGI_MAGIC 0x58414749 /* 'XAGI' */
xfs_agblock_t agfl_bno[XFS_AGFL_SIZE];
} xfs_agfl_t;
+/*
+ * Busy block/extent entry. Used in perag to mark blocks that have been freed
+ * but whose transactions aren't committed to disk yet.
+ */
+typedef struct xfs_perag_busy {
+ xfs_agblock_t busy_start;
+ xfs_extlen_t busy_length;
+ struct xfs_trans *busy_tp; /* transaction that did the free */
+} xfs_perag_busy_t;
+
/*
* Per-ag incore structure, copies of information in agf and agi,
* to improve the performance of allocation group selection.
+ *
+ * pick sizes which fit in allocation buckets well
*/
+#if (BITS_PER_LONG == 32)
+#define XFS_PAGB_NUM_SLOTS 84
+#elif (BITS_PER_LONG == 64)
+#define XFS_PAGB_NUM_SLOTS 128
+#endif
+
typedef struct xfs_perag
{
char pagf_init; /* this agf's entry is initialized */
xfs_extlen_t pagf_freeblks; /* total free blocks */
xfs_extlen_t pagf_longest; /* longest free space */
xfs_agino_t pagi_freecount; /* number of free inodes */
+#ifdef __KERNEL__
+ lock_t pagb_lock; /* lock for pagb_list */
+ int pagb_count; /* pagb slots in use */
+ xfs_perag_busy_t *pagb_list; /* unstable blocks */
+#endif
} xfs_perag_t;
#define XFS_AG_MIN_BYTES (1LL << 24) /* 16 MB */
#define XFS_ALLOC_KTRACE_ALLOC 1
#define XFS_ALLOC_KTRACE_FREE 2
#define XFS_ALLOC_KTRACE_MODAGF 3
+#define XFS_ALLOC_KTRACE_BUSY 4
+#define XFS_ALLOC_KTRACE_UNBUSY 5
+#define XFS_ALLOC_KTRACE_BUSYSEARCH 6
+
+
/*
* Allocation tracing buffer size.
*/
xfs_fsblock_t bno, /* starting block number of extent */
xfs_extlen_t len); /* length of extent */
+void
+xfs_alloc_mark_busy(xfs_trans_t *tp,
+ xfs_agnumber_t agno,
+ xfs_agblock_t bno,
+ xfs_extlen_t len);
+
+void
+xfs_alloc_clear_busy(xfs_trans_t *tp,
+ xfs_agnumber_t ag,
+ int idx);
+
+
#endif /* __KERNEL__ */
#endif /* __XFS_ALLOC_H__ */
#define XFS_CFORK_Q(dcp) xfs_cfork_q(dcp)
#else
#define XFS_CFORK_Q_ARCH(dcp,arch) (INT_GET((dcp)->di_forkoff, arch) != 0)
-#define XFS_CFORK_Q(dcp) XFS_CFORK_Q_ARCH(dcp,ARCH_NOCONVERT)
+#define XFS_CFORK_Q(dcp) ((dcp)->di_forkoff != 0)
#endif
#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CFORK_BOFF)
#define XFS_CFORK_BOFF(dcp) xfs_cfork_boff(dcp)
#else
#define XFS_CFORK_BOFF_ARCH(dcp,arch) ((int)(INT_GET((dcp)->di_forkoff, arch) << 3))
-#define XFS_CFORK_BOFF(dcp) XFS_CFORK_BOFF_ARCH(dcp,ARCH_NOCONVERT)
+#define XFS_CFORK_BOFF(dcp) ((int)((dcp)->di_forkoff << 3))
#endif
#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CFORK_DSIZE)
#else
#define XFS_CFORK_DSIZE_ARCH(dcp,mp,arch) \
(XFS_CFORK_Q_ARCH(dcp, arch) ? XFS_CFORK_BOFF_ARCH(dcp, arch) : XFS_LITINO(mp))
-#define XFS_CFORK_DSIZE(dcp,mp) XFS_CFORK_DSIZE_ARCH(dcp,mp,ARCH_NOCONVERT)
+#define XFS_CFORK_DSIZE(dcp,mp) \
+ (XFS_CFORK_Q(dcp) ? XFS_CFORK_BOFF(dcp) : XFS_LITINO(mp))
#endif
#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CFORK_ASIZE)
#else
#define XFS_CFORK_ASIZE_ARCH(dcp,mp,arch) \
(XFS_CFORK_Q_ARCH(dcp, arch) ? XFS_LITINO(mp) - XFS_CFORK_BOFF_ARCH(dcp, arch) : 0)
-#define XFS_CFORK_ASIZE(dcp,mp) XFS_CFORK_ASIZE_ARCH(dcp,mp,ARCH_NOCONVERT)
+#define XFS_CFORK_ASIZE(dcp,mp) \
+ (XFS_CFORK_Q(dcp) ? XFS_LITINO(mp) - XFS_CFORK_BOFF(dcp) : 0)
#endif
#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CFORK_SIZE)
#define XFS_CFORK_SIZE_ARCH(dcp,mp,w,arch) \
((w) == XFS_DATA_FORK ? \
XFS_CFORK_DSIZE_ARCH(dcp, mp, arch) : XFS_CFORK_ASIZE_ARCH(dcp, mp, arch))
-#define XFS_CFORK_SIZE(dcp,mp,w) XFS_CFORK_SIZE_ARCH(dcp,mp,w,ARCH_NOCONVERT)
+#define XFS_CFORK_SIZE(dcp,mp,w) \
+ ((w) == XFS_DATA_FORK ? \
+ XFS_CFORK_DSIZE(dcp, mp) : XFS_CFORK_ASIZE(dcp, mp))
#endif
struct xfs_bmap_free;
struct xfs_bmbt_irec;
struct xfs_bmbt_block;
-struct xfs_ext_attr;
struct xfs_inode;
struct xfs_inode_log_item;
struct xfs_mount;
/* Miscellaneous state. */
unsigned short i_flags; /* see defined flags below */
- unsigned short i_update_core; /* timestamps/size is dirty */
- unsigned short i_update_size; /* di_size field is dirty */
+ unsigned char i_update_core; /* timestamps/size is dirty */
+ unsigned char i_update_size; /* di_size field is dirty */
unsigned int i_gen; /* generation count */
unsigned int i_delayed_blks; /* count of delay alloc blks */
- struct xfs_ext_attr *i_ext_attr; /* Critical ext attributes */
- void *i_ilock_ra; /* current ilock ret addr */
xfs_dinode_core_t i_d; /* most of ondisk inode */
xfs_chashlist_t *i_chash; /* cluster hash list header */
#define XFS_ILOCK_EXCL 0x004
#define XFS_ILOCK_SHARED 0x008
#define XFS_IUNLOCK_NONOTIFY 0x010
-#define XFS_IOLOCK_NESTED 0x020
#define XFS_EXTENT_TOKEN_RD 0x040
#define XFS_SIZE_TOKEN_RD 0x080
#define XFS_EXTSIZE_RD (XFS_EXTENT_TOKEN_RD|XFS_SIZE_TOKEN_RD)
#define XFS_LOCK_MASK \
(XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL | \
- XFS_IOLOCK_NESTED | \
XFS_ILOCK_SHARED | XFS_EXTENT_TOKEN_RD | XFS_SIZE_TOKEN_RD | \
XFS_WILLLEND)
#endif
#ifdef __KERNEL__
+/*
+ * This structure is used to maintain a list of block ranges that have been
+ * freed in the transaction. The ranges are listed in the perag[] busy list
+ * between when they're freed and the transaction is committed to disk.
+ */
+
+typedef struct xfs_log_busy_slot {
+ xfs_agnumber_t lbc_ag;
+ ushort lbc_idx; /* index in perag.busy[] */
+} xfs_log_busy_slot_t;
+
+#define XFS_LBC_NUM_SLOTS 31
+typedef struct xfs_log_busy_chunk {
+ struct xfs_log_busy_chunk *lbc_next;
+ uint lbc_free; /* bitmask of free slots */
+ ushort lbc_unused; /* first unused */
+ xfs_log_busy_slot_t lbc_busy[XFS_LBC_NUM_SLOTS];
+} xfs_log_busy_chunk_t;
+
+#define XFS_LBC_MAX_SLOT (XFS_LBC_NUM_SLOTS - 1)
+#define XFS_LBC_FREEMASK ((1U << XFS_LBC_NUM_SLOTS) - 1)
+
+#define XFS_LBC_INIT(cp) ((cp)->lbc_free = XFS_LBC_FREEMASK)
+#define XFS_LBC_CLAIM(cp, slot) ((cp)->lbc_free &= ~(1 << (slot)))
+#define XFS_LBC_SLOT(cp, slot) (&((cp)->lbc_busy[(slot)]))
+#define XFS_LBC_VACANCY(cp) (((cp)->lbc_free) & XFS_LBC_FREEMASK)
+#define XFS_LBC_ISFREE(cp, slot) ((cp)->lbc_free & (1 << (slot)))
+
/*
* This is the type of function which can be given to xfs_trans_callback()
* to be called upon the transaction's commit to disk.
unsigned int t_items_free; /* log item descs free */
xfs_log_item_chunk_t t_items; /* first log item desc chunk */
xfs_trans_header_t t_header; /* header for in-log trans */
+ unsigned int t_busy_free; /* busy descs free */
+ xfs_log_busy_chunk_t t_busy; /* busy/async free blocks */
} xfs_trans_t;
#endif /* __KERNEL__ */
xfs_lsn_t xfs_trans_tail_ail(struct xfs_mount *);
void xfs_trans_unlocked_item(struct xfs_mount *,
xfs_log_item_t *);
+xfs_log_busy_slot_t *xfs_trans_add_busy(xfs_trans_t *tp,
+ xfs_agnumber_t ag,
+ xfs_extlen_t idx);
/*
* Not necessarily exported, but used outside a single file.
#define xfs_read_buf(mp,devp,blkno,len,f,bpp) \
( *(bpp) = libxfs_readbuf( *(dev_t*)devp, (blkno), (len), 1), 0 )
-
/* transaction management */
#define xfs_trans_set_sync(tp) ((void) 0)
#define xfs_trans_agblocks_delta(tp, d) ((void) 0) /* debug only */
#define xfs_trans_unreserve_blkquota(tp,i,n) ((void) 0)
#define xfs_trans_unreserve_rtblkquota(tp,i,n) ((void) 0)
-
/* memory management */
#define KM_SLEEP 1
#define KM_SLEEP_IO 2
#define xfs_dir2_trace_args_sb(where, args, s, bp) ((void) 0)
#define xfs_dir_shortform_validate_ondisk(a,b) ((void) 0)
-
/* block management */
#define xfs_bmap_check_extents(ip,w) ((void) 0)
#define xfs_bmap_trace_delete(f,d,ip,i,c,w) ((void) 0)
#define XFS_BMBT_TRACE_ARGIK(c,i,k) ((void) 0)
#define XFS_BMBT_TRACE_CURSOR(c,s) ((void) 0)
+/* space allocation */
+#define xfs_alloc_search_busy(tp,ag,b,len) ((void) 0)
+#define xfs_alloc_mark_busy(tp,ag,b,len) ((void) 0)
/* anything else */
typedef __uint32_t uint_t;
#define mrunlock(a) ((void) 0)
#define mraccess(a) ((void) 0)
#define ismrlocked(a,b) 1
+#define spinlock_init(a,b) ((void) 0)
#define ovbcopy(from,to,count) memmove(to,from,count)
#define __return_address __builtin_return_address(0)
#define xfs_btree_reada_bufl(m,fsb,c) ((void) 0)
TRACE_MODAGF(NULL, agf, XFS_AGF_FREEBLKS);
xfs_alloc_log_agf(args->tp, args->agbp,
XFS_AGF_FREEBLKS);
+ /* search the busylist for these blocks */
+ xfs_alloc_search_busy(args->tp, args->agno,
+ args->agbno, args->len);
}
if (!args->isfl)
xfs_trans_mod_sb(args->tp,
bp = xfs_btree_get_bufs(args->mp, args->tp,
args->agno, fbno, 0);
xfs_trans_binval(args->tp, bp);
- /*
- * Since blocks move to the free list without
- * the coordination used in xfs_bmap_finish,
- * we can't allow the user to write to the
- * block until we know that the transaction
- * that moved it to the free list is
- * permanently on disk. The only way to
- * ensure that is to make this transaction
- * synchronous.
- */
- xfs_trans_set_sync(args->tp);
}
args->len = 1;
args->agbno = fbno;
(haveright ? "both" : "left") :
(haveright ? "right" : "none"),
agno, bno, len, isfl);
+
+ /*
+ * Since blocks move to the free list without the coordination
+ * used in xfs_bmap_finish, we can't allow block to be available
+ * for reallocation and non-transaction writing (user data)
+ * until we know that the transaction that moved it to the free
+ * list is permanently on disk. We track the blocks by declaring
+ * these blocks as "busy"; the busy list is maintained on a per-ag
+ * basis and each transaction records which entries should be removed
+ * when the iclog commits to disk. If a busy block is allocated,
+ * the iclog is pushed up to the LSN that freed the block.
+ */
+ xfs_alloc_mark_busy(tp, agno, bno, len);
return 0;
error0:
return error;
bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
xfs_trans_binval(tp, bp);
- /*
- * Since blocks move to the free list without
- * the coordination used in xfs_bmap_finish,
- * we can't allow block to be available for reallocation
- * and non-transaction writing (user data)
- * until we know that the transaction
- * that moved it to the free list is
- * permanently on disk. The only way to
- * ensure that is to make this transaction
- * synchronous. The one exception to this
- * is in the case of wsync-mounted filesystem
- * where we know that any block that made it
- * onto the freelist won't be seen again in
- * the file from which it came since the transactions
- * that free metadata blocks or shrink inodes in
- * wsync filesystems are all themselves synchronous.
- */
- if (!(mp->m_flags & XFS_MOUNT_WSYNC))
- xfs_trans_set_sync(tp);
}
/*
* Initialize the args structure.
TRACE_MODAGF(NULL, agf, XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT);
xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT);
*bnop = bno;
+
+ /*
+ * As blocks are freed, they are added to the per-ag busy list
+ * and remain there until the freeing transaction is committed to
+ * disk. Now that we have allocated blocks, this list must be
+ * searched to see if a block is being reused. If one is, then
+ * the freeing transaction must be pushed to disk NOW by forcing
+ * to disk all iclogs up that transaction's LSN.
+ */
+ xfs_alloc_search_busy(tp, INT_GET(agf->agf_seqno, ARCH_CONVERT), bno, 1);
return 0;
}
(int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl),
(int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl +
sizeof(xfs_agblock_t) - 1));
+ /*
+ * Since blocks move to the free list without the coordination
+ * used in xfs_bmap_finish, we can't allow block to be available
+ * for reallocation and non-transaction writing (user data)
+ * until we know that the transaction that moved it to the free
+ * list is permanently on disk. We track the blocks by declaring
+ * these blocks as "busy"; the busy list is maintained on a per-ag
+ * basis and each transaction records which entries should be removed
+ * when the iclog commits to disk. If a busy block is allocated,
+ * the iclog is pushed up to the LSN that freed the block.
+ */
+ xfs_alloc_mark_busy(tp, INT_GET(agf->agf_seqno, ARCH_CONVERT), bno, 1);
+
return 0;
}
INT_GET(agf->agf_levels[XFS_BTNUM_BNOi], ARCH_CONVERT);
pag->pagf_levels[XFS_BTNUM_CNTi] =
INT_GET(agf->agf_levels[XFS_BTNUM_CNTi], ARCH_CONVERT);
+ spinlock_init(&pag->pagb_lock, "xfspagb");
pag->pagf_init = 1;
}
#ifdef DEBUG
ip->i_ino != mp->m_sb.sb_uquotino &&
ip->i_ino != mp->m_sb.sb_gquotino)
qfield = XFS_TRANS_DQ_BCOUNT;
- /*
- * If we're freeing meta-data, then the transaction
- * that frees the blocks must be synchronous. This
- * ensures that noone can reuse the blocks before
- * they are permanently free. For regular data
- * it is the callers responsibility to make the
- * data permanently inaccessible before calling
- * here to free it.
- */
- if (iflags & XFS_BMAPI_METADATA)
- xfs_trans_set_sync(tp);
}
/*
* Set up del_endblock and cur for later.