From: Nathan Scott Date: Mon, 25 Feb 2002 22:35:36 +0000 (+0000) Subject: Merge of xfs-cmds-2.4.18:slinx:112141a by nathans. X-Git-Tag: v2.1.0~59 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=3e535bba4f908ac8e9c7936a5b6c5499883bec0d;p=thirdparty%2Fxfsprogs-dev.git Merge of xfs-cmds-2.4.18:slinx:112141a by nathans. sync up with recent kernel changes, shouldn't affect us in userspace. --- diff --git a/include/xfs_ag.h b/include/xfs_ag.h index 40ee6b07a..6b803af43 100644 --- a/include/xfs_ag.h +++ b/include/xfs_ag.h @@ -40,6 +40,7 @@ struct xfs_buf; struct xfs_mount; +struct xfs_trans; #define XFS_AGF_MAGIC 0x58414746 /* 'XAGF' */ #define XFS_AGI_MAGIC 0x58414749 /* 'XAGI' */ @@ -188,10 +189,28 @@ typedef struct xfs_agfl xfs_agblock_t agfl_bno[XFS_AGFL_SIZE]; } xfs_agfl_t; +/* + * Busy block/extent entry. Used in perag to mark blocks that have been freed + * but whose transactions aren't committed to disk yet. + */ +typedef struct xfs_perag_busy { + xfs_agblock_t busy_start; + xfs_extlen_t busy_length; + struct xfs_trans *busy_tp; /* transaction that did the free */ +} xfs_perag_busy_t; + /* * Per-ag incore structure, copies of information in agf and agi, * to improve the performance of allocation group selection. + * + * pick sizes which fit in allocation buckets well */ +#if (BITS_PER_LONG == 32) +#define XFS_PAGB_NUM_SLOTS 84 +#elif (BITS_PER_LONG == 64) +#define XFS_PAGB_NUM_SLOTS 128 +#endif + typedef struct xfs_perag { char pagf_init; /* this agf's entry is initialized */ @@ -204,6 +223,11 @@ typedef struct xfs_perag xfs_extlen_t pagf_freeblks; /* total free blocks */ xfs_extlen_t pagf_longest; /* longest free space */ xfs_agino_t pagi_freecount; /* number of free inodes */ +#ifdef __KERNEL__ + lock_t pagb_lock; /* lock for pagb_list */ + int pagb_count; /* pagb slots in use */ + xfs_perag_busy_t *pagb_list; /* unstable blocks */ +#endif } xfs_perag_t; #define XFS_AG_MIN_BYTES (1LL << 24) /* 16 MB */ diff --git a/include/xfs_alloc.h b/include/xfs_alloc.h index 2a0ccf01c..0093e3872 100644 --- a/include/xfs_alloc.h +++ b/include/xfs_alloc.h @@ -101,6 +101,11 @@ typedef struct xfs_alloc_arg { #define XFS_ALLOC_KTRACE_ALLOC 1 #define XFS_ALLOC_KTRACE_FREE 2 #define XFS_ALLOC_KTRACE_MODAGF 3 +#define XFS_ALLOC_KTRACE_BUSY 4 +#define XFS_ALLOC_KTRACE_UNBUSY 5 +#define XFS_ALLOC_KTRACE_BUSYSEARCH 6 + + /* * Allocation tracing buffer size. */ @@ -201,6 +206,18 @@ xfs_free_extent( xfs_fsblock_t bno, /* starting block number of extent */ xfs_extlen_t len); /* length of extent */ +void +xfs_alloc_mark_busy(xfs_trans_t *tp, + xfs_agnumber_t agno, + xfs_agblock_t bno, + xfs_extlen_t len); + +void +xfs_alloc_clear_busy(xfs_trans_t *tp, + xfs_agnumber_t ag, + int idx); + + #endif /* __KERNEL__ */ #endif /* __XFS_ALLOC_H__ */ diff --git a/include/xfs_dinode.h b/include/xfs_dinode.h index 7bda53ebe..27641928a 100644 --- a/include/xfs_dinode.h +++ b/include/xfs_dinode.h @@ -207,7 +207,7 @@ int xfs_cfork_q(xfs_dinode_core_t *dcp); #define XFS_CFORK_Q(dcp) xfs_cfork_q(dcp) #else #define XFS_CFORK_Q_ARCH(dcp,arch) (INT_GET((dcp)->di_forkoff, arch) != 0) -#define XFS_CFORK_Q(dcp) XFS_CFORK_Q_ARCH(dcp,ARCH_NOCONVERT) +#define XFS_CFORK_Q(dcp) ((dcp)->di_forkoff != 0) #endif #if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CFORK_BOFF) @@ -217,7 +217,7 @@ int xfs_cfork_boff(xfs_dinode_core_t *dcp); #define XFS_CFORK_BOFF(dcp) xfs_cfork_boff(dcp) #else #define XFS_CFORK_BOFF_ARCH(dcp,arch) ((int)(INT_GET((dcp)->di_forkoff, arch) << 3)) -#define XFS_CFORK_BOFF(dcp) XFS_CFORK_BOFF_ARCH(dcp,ARCH_NOCONVERT) +#define XFS_CFORK_BOFF(dcp) ((int)((dcp)->di_forkoff << 3)) #endif #if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CFORK_DSIZE) @@ -228,7 +228,8 @@ int xfs_cfork_dsize(xfs_dinode_core_t *dcp, struct xfs_mount *mp); #else #define XFS_CFORK_DSIZE_ARCH(dcp,mp,arch) \ (XFS_CFORK_Q_ARCH(dcp, arch) ? XFS_CFORK_BOFF_ARCH(dcp, arch) : XFS_LITINO(mp)) -#define XFS_CFORK_DSIZE(dcp,mp) XFS_CFORK_DSIZE_ARCH(dcp,mp,ARCH_NOCONVERT) +#define XFS_CFORK_DSIZE(dcp,mp) \ + (XFS_CFORK_Q(dcp) ? XFS_CFORK_BOFF(dcp) : XFS_LITINO(mp)) #endif #if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CFORK_ASIZE) @@ -239,7 +240,8 @@ int xfs_cfork_asize(xfs_dinode_core_t *dcp, struct xfs_mount *mp); #else #define XFS_CFORK_ASIZE_ARCH(dcp,mp,arch) \ (XFS_CFORK_Q_ARCH(dcp, arch) ? XFS_LITINO(mp) - XFS_CFORK_BOFF_ARCH(dcp, arch) : 0) -#define XFS_CFORK_ASIZE(dcp,mp) XFS_CFORK_ASIZE_ARCH(dcp,mp,ARCH_NOCONVERT) +#define XFS_CFORK_ASIZE(dcp,mp) \ + (XFS_CFORK_Q(dcp) ? XFS_LITINO(mp) - XFS_CFORK_BOFF(dcp) : 0) #endif #if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CFORK_SIZE) @@ -251,7 +253,9 @@ int xfs_cfork_size(xfs_dinode_core_t *dcp, struct xfs_mount *mp, int w); #define XFS_CFORK_SIZE_ARCH(dcp,mp,w,arch) \ ((w) == XFS_DATA_FORK ? \ XFS_CFORK_DSIZE_ARCH(dcp, mp, arch) : XFS_CFORK_ASIZE_ARCH(dcp, mp, arch)) -#define XFS_CFORK_SIZE(dcp,mp,w) XFS_CFORK_SIZE_ARCH(dcp,mp,w,ARCH_NOCONVERT) +#define XFS_CFORK_SIZE(dcp,mp,w) \ + ((w) == XFS_DATA_FORK ? \ + XFS_CFORK_DSIZE(dcp, mp) : XFS_CFORK_ASIZE(dcp, mp)) #endif diff --git a/include/xfs_inode.h b/include/xfs_inode.h index e27f03b8d..8a75f34b7 100644 --- a/include/xfs_inode.h +++ b/include/xfs_inode.h @@ -93,7 +93,6 @@ struct xfs_buf; struct xfs_bmap_free; struct xfs_bmbt_irec; struct xfs_bmbt_block; -struct xfs_ext_attr; struct xfs_inode; struct xfs_inode_log_item; struct xfs_mount; @@ -317,12 +316,10 @@ typedef struct xfs_inode { /* Miscellaneous state. */ unsigned short i_flags; /* see defined flags below */ - unsigned short i_update_core; /* timestamps/size is dirty */ - unsigned short i_update_size; /* di_size field is dirty */ + unsigned char i_update_core; /* timestamps/size is dirty */ + unsigned char i_update_size; /* di_size field is dirty */ unsigned int i_gen; /* generation count */ unsigned int i_delayed_blks; /* count of delay alloc blks */ - struct xfs_ext_attr *i_ext_attr; /* Critical ext attributes */ - void *i_ilock_ra; /* current ilock ret addr */ xfs_dinode_core_t i_d; /* most of ondisk inode */ xfs_chashlist_t *i_chash; /* cluster hash list header */ @@ -420,7 +417,6 @@ void xfs_ifork_next_set(xfs_inode_t *ip, int w, int n); #define XFS_ILOCK_EXCL 0x004 #define XFS_ILOCK_SHARED 0x008 #define XFS_IUNLOCK_NONOTIFY 0x010 -#define XFS_IOLOCK_NESTED 0x020 #define XFS_EXTENT_TOKEN_RD 0x040 #define XFS_SIZE_TOKEN_RD 0x080 #define XFS_EXTSIZE_RD (XFS_EXTENT_TOKEN_RD|XFS_SIZE_TOKEN_RD) @@ -432,7 +428,6 @@ void xfs_ifork_next_set(xfs_inode_t *ip, int w, int n); #define XFS_LOCK_MASK \ (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL | \ - XFS_IOLOCK_NESTED | \ XFS_ILOCK_SHARED | XFS_EXTENT_TOKEN_RD | XFS_SIZE_TOKEN_RD | \ XFS_WILLLEND) diff --git a/include/xfs_trans.h b/include/xfs_trans.h index 7db2821f5..430d9f649 100644 --- a/include/xfs_trans.h +++ b/include/xfs_trans.h @@ -327,6 +327,34 @@ xfs_log_item_chunk_t *xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp); #endif #ifdef __KERNEL__ +/* + * This structure is used to maintain a list of block ranges that have been + * freed in the transaction. The ranges are listed in the perag[] busy list + * between when they're freed and the transaction is committed to disk. + */ + +typedef struct xfs_log_busy_slot { + xfs_agnumber_t lbc_ag; + ushort lbc_idx; /* index in perag.busy[] */ +} xfs_log_busy_slot_t; + +#define XFS_LBC_NUM_SLOTS 31 +typedef struct xfs_log_busy_chunk { + struct xfs_log_busy_chunk *lbc_next; + uint lbc_free; /* bitmask of free slots */ + ushort lbc_unused; /* first unused */ + xfs_log_busy_slot_t lbc_busy[XFS_LBC_NUM_SLOTS]; +} xfs_log_busy_chunk_t; + +#define XFS_LBC_MAX_SLOT (XFS_LBC_NUM_SLOTS - 1) +#define XFS_LBC_FREEMASK ((1U << XFS_LBC_NUM_SLOTS) - 1) + +#define XFS_LBC_INIT(cp) ((cp)->lbc_free = XFS_LBC_FREEMASK) +#define XFS_LBC_CLAIM(cp, slot) ((cp)->lbc_free &= ~(1 << (slot))) +#define XFS_LBC_SLOT(cp, slot) (&((cp)->lbc_busy[(slot)])) +#define XFS_LBC_VACANCY(cp) (((cp)->lbc_free) & XFS_LBC_FREEMASK) +#define XFS_LBC_ISFREE(cp, slot) ((cp)->lbc_free & (1 << (slot))) + /* * This is the type of function which can be given to xfs_trans_callback() * to be called upon the transaction's commit to disk. @@ -376,6 +404,8 @@ typedef struct xfs_trans { unsigned int t_items_free; /* log item descs free */ xfs_log_item_chunk_t t_items; /* first log item desc chunk */ xfs_trans_header_t t_header; /* header for in-log trans */ + unsigned int t_busy_free; /* busy descs free */ + xfs_log_busy_chunk_t t_busy; /* busy/async free blocks */ } xfs_trans_t; #endif /* __KERNEL__ */ @@ -990,6 +1020,9 @@ xfs_lsn_t xfs_trans_push_ail(struct xfs_mount *, xfs_lsn_t); xfs_lsn_t xfs_trans_tail_ail(struct xfs_mount *); void xfs_trans_unlocked_item(struct xfs_mount *, xfs_log_item_t *); +xfs_log_busy_slot_t *xfs_trans_add_busy(xfs_trans_t *tp, + xfs_agnumber_t ag, + xfs_extlen_t idx); /* * Not necessarily exported, but used outside a single file. diff --git a/libxfs/xfs.h b/libxfs/xfs.h index 94cb4174b..a1ecf1415 100644 --- a/libxfs/xfs.h +++ b/libxfs/xfs.h @@ -173,7 +173,6 @@ #define xfs_read_buf(mp,devp,blkno,len,f,bpp) \ ( *(bpp) = libxfs_readbuf( *(dev_t*)devp, (blkno), (len), 1), 0 ) - /* transaction management */ #define xfs_trans_set_sync(tp) ((void) 0) #define xfs_trans_agblocks_delta(tp, d) ((void) 0) /* debug only */ @@ -185,7 +184,6 @@ #define xfs_trans_unreserve_blkquota(tp,i,n) ((void) 0) #define xfs_trans_unreserve_rtblkquota(tp,i,n) ((void) 0) - /* memory management */ #define KM_SLEEP 1 #define KM_SLEEP_IO 2 @@ -208,7 +206,6 @@ #define xfs_dir2_trace_args_sb(where, args, s, bp) ((void) 0) #define xfs_dir_shortform_validate_ondisk(a,b) ((void) 0) - /* block management */ #define xfs_bmap_check_extents(ip,w) ((void) 0) #define xfs_bmap_trace_delete(f,d,ip,i,c,w) ((void) 0) @@ -227,6 +224,9 @@ #define XFS_BMBT_TRACE_ARGIK(c,i,k) ((void) 0) #define XFS_BMBT_TRACE_CURSOR(c,s) ((void) 0) +/* space allocation */ +#define xfs_alloc_search_busy(tp,ag,b,len) ((void) 0) +#define xfs_alloc_mark_busy(tp,ag,b,len) ((void) 0) /* anything else */ typedef __uint32_t uint_t; @@ -256,6 +256,7 @@ typedef struct { dev_t dev; } buftarg_t; #define mrunlock(a) ((void) 0) #define mraccess(a) ((void) 0) #define ismrlocked(a,b) 1 +#define spinlock_init(a,b) ((void) 0) #define ovbcopy(from,to,count) memmove(to,from,count) #define __return_address __builtin_return_address(0) #define xfs_btree_reada_bufl(m,fsb,c) ((void) 0) diff --git a/libxfs/xfs_alloc.c b/libxfs/xfs_alloc.c index 148f22901..28f3cab4d 100644 --- a/libxfs/xfs_alloc.c +++ b/libxfs/xfs_alloc.c @@ -526,6 +526,9 @@ xfs_alloc_ag_vextent( TRACE_MODAGF(NULL, agf, XFS_AGF_FREEBLKS); xfs_alloc_log_agf(args->tp, args->agbp, XFS_AGF_FREEBLKS); + /* search the busylist for these blocks */ + xfs_alloc_search_busy(args->tp, args->agno, + args->agbno, args->len); } if (!args->isfl) xfs_trans_mod_sb(args->tp, @@ -1385,17 +1388,6 @@ xfs_alloc_ag_vextent_small( bp = xfs_btree_get_bufs(args->mp, args->tp, args->agno, fbno, 0); xfs_trans_binval(args->tp, bp); - /* - * Since blocks move to the free list without - * the coordination used in xfs_bmap_finish, - * we can't allow the user to write to the - * block until we know that the transaction - * that moved it to the free list is - * permanently on disk. The only way to - * ensure that is to make this transaction - * synchronous. - */ - xfs_trans_set_sync(args->tp); } args->len = 1; args->agbno = fbno; @@ -1694,6 +1686,19 @@ xfs_free_ag_extent( (haveright ? "both" : "left") : (haveright ? "right" : "none"), agno, bno, len, isfl); + + /* + * Since blocks move to the free list without the coordination + * used in xfs_bmap_finish, we can't allow block to be available + * for reallocation and non-transaction writing (user data) + * until we know that the transaction that moved it to the free + * list is permanently on disk. We track the blocks by declaring + * these blocks as "busy"; the busy list is maintained on a per-ag + * basis and each transaction records which entries should be removed + * when the iclog commits to disk. If a busy block is allocated, + * the iclog is pushed up to the LSN that freed the block. + */ + xfs_alloc_mark_busy(tp, agno, bno, len); return 0; error0: @@ -1845,25 +1850,6 @@ xfs_alloc_fix_freelist( return error; bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0); xfs_trans_binval(tp, bp); - /* - * Since blocks move to the free list without - * the coordination used in xfs_bmap_finish, - * we can't allow block to be available for reallocation - * and non-transaction writing (user data) - * until we know that the transaction - * that moved it to the free list is - * permanently on disk. The only way to - * ensure that is to make this transaction - * synchronous. The one exception to this - * is in the case of wsync-mounted filesystem - * where we know that any block that made it - * onto the freelist won't be seen again in - * the file from which it came since the transactions - * that free metadata blocks or shrink inodes in - * wsync filesystems are all themselves synchronous. - */ - if (!(mp->m_flags & XFS_MOUNT_WSYNC)) - xfs_trans_set_sync(tp); } /* * Initialize the args structure. @@ -1962,6 +1948,16 @@ xfs_alloc_get_freelist( TRACE_MODAGF(NULL, agf, XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT); xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT); *bnop = bno; + + /* + * As blocks are freed, they are added to the per-ag busy list + * and remain there until the freeing transaction is committed to + * disk. Now that we have allocated blocks, this list must be + * searched to see if a block is being reused. If one is, then + * the freeing transaction must be pushed to disk NOW by forcing + * to disk all iclogs up that transaction's LSN. + */ + xfs_alloc_search_busy(tp, INT_GET(agf->agf_seqno, ARCH_CONVERT), bno, 1); return 0; } @@ -2058,6 +2054,19 @@ xfs_alloc_put_freelist( (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl), (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl + sizeof(xfs_agblock_t) - 1)); + /* + * Since blocks move to the free list without the coordination + * used in xfs_bmap_finish, we can't allow block to be available + * for reallocation and non-transaction writing (user data) + * until we know that the transaction that moved it to the free + * list is permanently on disk. We track the blocks by declaring + * these blocks as "busy"; the busy list is maintained on a per-ag + * basis and each transaction records which entries should be removed + * when the iclog commits to disk. If a busy block is allocated, + * the iclog is pushed up to the LSN that freed the block. + */ + xfs_alloc_mark_busy(tp, INT_GET(agf->agf_seqno, ARCH_CONVERT), bno, 1); + return 0; } @@ -2141,6 +2150,7 @@ xfs_alloc_read_agf( INT_GET(agf->agf_levels[XFS_BTNUM_BNOi], ARCH_CONVERT); pag->pagf_levels[XFS_BTNUM_CNTi] = INT_GET(agf->agf_levels[XFS_BTNUM_CNTi], ARCH_CONVERT); + spinlock_init(&pag->pagb_lock, "xfspagb"); pag->pagf_init = 1; } #ifdef DEBUG diff --git a/libxfs/xfs_bmap.c b/libxfs/xfs_bmap.c index b21555f39..71bbaeaa1 100644 --- a/libxfs/xfs_bmap.c +++ b/libxfs/xfs_bmap.c @@ -2406,17 +2406,6 @@ xfs_bmap_del_extent( ip->i_ino != mp->m_sb.sb_uquotino && ip->i_ino != mp->m_sb.sb_gquotino) qfield = XFS_TRANS_DQ_BCOUNT; - /* - * If we're freeing meta-data, then the transaction - * that frees the blocks must be synchronous. This - * ensures that noone can reuse the blocks before - * they are permanently free. For regular data - * it is the callers responsibility to make the - * data permanently inaccessible before calling - * here to free it. - */ - if (iflags & XFS_BMAPI_METADATA) - xfs_trans_set_sync(tp); } /* * Set up del_endblock and cur for later.