Merge of master-melb:xfs-cmds:29150a by kenmcd.
Update to 2.9.2
#
PKG_MAJOR=2
PKG_MINOR=9
-PKG_REVISION=1
+PKG_REVISION=2
PKG_BUILD=1
+xfsprogs-2.9.2 (16 July 2007)
+ - Next major round of xfs_repair performance improvements:
+ - Cache disk nlink values in Phase 3 for Phase 7.
+ - Do multithreaded prefetch/processing based on AG stride
+ option (ie. for concats).
+ - Don't trash lost+found at the start of Phase 4, eliminates
+ repeated "moving disconnected inode to lost+found" with
+ successive xfs_repair runs.
+ - Do multi-threaded sequential metadata prefetch.
+ Method based on Agami patches posted for 2.7.18 xfsprogs.
+ - Improve the libxfs cache with priority tagging to keep
+ blocks around that have unfavourable I/O characteristics.
+ - Make mkfs.xfs -f zero the old secondary superblocks before writing
+ the new superblocks.
+ - Fix up xfs_info and xfs_quota's -c handling with global commands.
+ - Improve xfs_bmap -vp output to always show the FLAGS column.
+
xfsprogs-2.9.1 (28 June 2007)
- Added filestreams support to xfs_io.
- Fix up libattr Makefile to append to LTLDFLAGS. Thanks to
#define HASH_CACHE_RATIO 8
+#define CACHE_MAX_PRIORITY 15
+
/*
* Simple, generic implementation of a cache (arbitrary data).
* Provides a hash table with a capped number of cache entries.
*/
struct cache;
-struct cache_hash;
struct cache_node;
typedef void *cache_key_t;
cache_bulk_relse_t bulkrelse; /* optional */
};
+struct cache_hash {
+ struct list_head ch_list; /* hash chain head */
+ unsigned int ch_count; /* hash chain length */
+ pthread_mutex_t ch_mutex; /* hash chain mutex */
+};
+
+struct cache_mru {
+ struct list_head cm_list; /* MRU head */
+ unsigned int cm_count; /* MRU length */
+ pthread_mutex_t cm_mutex; /* MRU lock */
+};
+
+struct cache_node {
+ struct list_head cn_hash; /* hash chain */
+ struct list_head cn_mru; /* MRU chain */
+ unsigned int cn_count; /* reference count */
+ unsigned int cn_hashidx; /* hash chain index */
+ int cn_priority; /* priority, -1 = free list */
+ pthread_mutex_t cn_mutex; /* node mutex */
+};
+
struct cache {
unsigned int c_maxcount; /* max cache nodes */
unsigned int c_count; /* count of nodes */
cache_bulk_relse_t bulkrelse; /* bulk release routine */
unsigned int c_hashsize; /* hash bucket count */
struct cache_hash *c_hash; /* hash table buckets */
+ struct cache_mru c_mrus[CACHE_MAX_PRIORITY + 1];
unsigned long long c_misses; /* cache misses */
unsigned long long c_hits; /* cache hits */
unsigned int c_max; /* max nodes ever used */
};
-struct cache_hash {
- struct list_head ch_list; /* hash chain head */
- unsigned int ch_count; /* hash chain length */
- pthread_mutex_t ch_mutex; /* hash chain mutex */
-};
-
-struct cache_node {
- struct list_head cn_list; /* hash chain */
- unsigned int cn_count; /* reference count */
- pthread_mutex_t cn_mutex; /* refcount mutex */
-};
-
struct cache *cache_init(unsigned int, struct cache_operations *);
void cache_destroy(struct cache *);
void cache_walk(struct cache *, cache_walk_t);
int cache_node_get(struct cache *, cache_key_t, struct cache_node **);
void cache_node_put(struct cache_node *);
+void cache_node_set_priority(struct cache *, struct cache_node *, int);
+int cache_node_get_priority(struct cache_node *);
int cache_node_purge(struct cache *, cache_key_t, struct cache_node *);
void cache_report(FILE *fp, const char *, struct cache *);
int cache_overflowed(struct cache *);
#define XFS_BUF_FSPRIVATE3(bp,type) ((type)(bp)->b_fsprivate3)
#define XFS_BUF_SET_FSPRIVATE3(bp,val) (bp)->b_fsprivate3 = (void *)(val)
+#define XFS_BUF_SET_PRIORITY(bp,pri) cache_node_set_priority( \
+ libxfs_bcache, \
+ (struct cache_node *)(bp), \
+ (pri))
+#define XFS_BUF_PRIORITY(bp) (cache_node_get_priority( \
+ (struct cache_node *)(bp)))
+
/* Buffer Cache Interfaces */
extern struct cache *libxfs_bcache;
return head->next == head;
}
+static inline void __list_splice(struct list_head *list,
+ struct list_head *head)
+{
+ struct list_head *first = list->next;
+ struct list_head *last = list->prev;
+ struct list_head *at = head->next;
+
+ first->prev = head;
+ head->next = first;
+
+ last->next = at;
+ at->prev = last;
+}
+
+static inline void list_splice(struct list_head *list, struct list_head *head)
+{
+ if (!list_empty(list))
+ __list_splice(list, head);
+}
+
+static inline void list_splice_init(struct list_head *list,
+ struct list_head *head)
+{
+ if (!list_empty(list)) {
+ __list_splice(list, head);
+ list_head_init(list);
+ }
+}
+
+#define list_entry(ptr, type, member) ({ \
+ const typeof( ((type *)0)->member ) *__mptr = (ptr); \
+ (type *)( (char *)__mptr - offsetof(type,member) );})
+
+#define list_for_each(pos, head) \
+ for (pos = (head)->next; pos != (head); pos = pos->next)
+
+#define list_for_each_safe(pos, n, head) \
+ for (pos = (head)->next, n = pos->next; pos != (head); \
+ pos = n, n = pos->next)
+
+#define list_for_each_entry(pos, head, member) \
+ for (pos = list_entry((head)->next, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = list_entry(pos->member.next, typeof(*pos), member))
+
+#define list_for_each_entry_safe(pos, n, head, member) \
+ for (pos = list_entry((head)->next, typeof(*pos), member), \
+ n = list_entry(pos->member.next, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
#endif /* __LIST_H__ */
#undef CACHE_ABORT
/* #define CACHE_ABORT 1 */
+#define CACHE_SHAKE_COUNT 64
+
static unsigned int cache_generic_bulkrelse(struct cache *, struct list_head *);
struct cache *
cache->c_hash[i].ch_count = 0;
pthread_mutex_init(&cache->c_hash[i].ch_mutex, NULL);
}
+
+ for (i = 0; i <= CACHE_MAX_PRIORITY; i++) {
+ list_head_init(&cache->c_mrus[i].cm_list);
+ cache->c_mrus[i].cm_count = 0;
+ pthread_mutex_init(&cache->c_mrus[i].cm_mutex, NULL);
+ }
return cache;
}
list_head_destroy(&cache->c_hash[i].ch_list);
pthread_mutex_destroy(&cache->c_hash[i].ch_mutex);
}
+ for (i = 0; i <= CACHE_MAX_PRIORITY; i++) {
+ list_head_destroy(&cache->c_mrus[i].cm_list);
+ pthread_mutex_destroy(&cache->c_mrus[i].cm_mutex);
+ }
pthread_mutex_destroy(&cache->c_mutex);
free(cache->c_hash);
free(cache);
}
-static int
-cache_shake_node(
+static unsigned int
+cache_generic_bulkrelse(
struct cache * cache,
- cache_key_t key,
- struct cache_node * node)
+ struct list_head * list)
{
- struct list_head * head;
- struct list_head * pos;
- struct list_head * n;
- struct cache_hash * hash;
- int count = -1;
+ struct cache_node * node;
+ unsigned int count = 0;
- hash = cache->c_hash + cache->hash(key, cache->c_hashsize);
- head = &hash->ch_list;
- pthread_mutex_lock(&hash->ch_mutex);
- for (pos = head->next, n = pos->next;
- pos != head;
- pos = n, n = pos->next) {
- if ((struct cache_node *)pos != node)
- continue;
- pthread_mutex_lock(&node->cn_mutex);
- count = node->cn_count;
- pthread_mutex_unlock(&node->cn_mutex);
- if (count != 0)
- break;
+ while (!list_empty(list)) {
+ node = list_entry(list->next, struct cache_node, cn_mru);
pthread_mutex_destroy(&node->cn_mutex);
- list_del_init(&node->cn_list);
- hash->ch_count--;
+ list_del_init(&node->cn_mru);
cache->relse(node);
- break;
+ count++;
}
- pthread_mutex_unlock(&hash->ch_mutex);
+
return count;
}
/*
* We've hit the limit on cache size, so we need to start reclaiming
- * nodes we've used. This reclaims from the one given hash bucket
- * only. Returns the number of freed up nodes, its left to the
- * caller to updates the global counter of used nodes for the cache.
- * The hash chain lock is held for the hash list argument, must be
- * dropped before returning.
- * We walk backwards through the hash (remembering we keep recently
- * used nodes toward the front) until we hit an in-use node. We'll
- * stop there if its a low priority call but keep going if its not.
+ * nodes we've used. The MRU specified by the priority is shaken.
+ * Returns new priority at end of the call (in case we call again).
*/
static unsigned int
-cache_shake_hash(
+cache_shake(
struct cache * cache,
- struct cache_hash * hash,
- unsigned int priority)
+ unsigned int priority,
+ int all)
{
+ struct cache_mru *mru;
+ struct cache_hash * hash;
struct list_head temp;
struct list_head * head;
struct list_head * pos;
struct list_head * n;
struct cache_node * node;
- unsigned int inuse = 0;
+ unsigned int count;
+ ASSERT(priority <= CACHE_MAX_PRIORITY);
+ if (priority > CACHE_MAX_PRIORITY)
+ priority = 0;
+
+ mru = &cache->c_mrus[priority];
+ count = 0;
list_head_init(&temp);
- head = &hash->ch_list;
- for (pos = head->prev, n = pos->prev;
- pos != head;
- pos = n, n = pos->prev) {
- node = (struct cache_node *)pos;
- pthread_mutex_lock(&node->cn_mutex);
- if (!(inuse = (node->cn_count > 0))) {
- hash->ch_count--;
- list_move_tail(&node->cn_list, &temp);
+ head = &mru->cm_list;
+
+ pthread_mutex_lock(&mru->cm_mutex);
+ for (pos = head->prev, n = pos->prev; pos != head;
+ pos = n, n = pos->prev) {
+ node = list_entry(pos, struct cache_node, cn_mru);
+
+ if (pthread_mutex_trylock(&node->cn_mutex) != 0)
+ continue;
+
+ hash = cache->c_hash + node->cn_hashidx;
+ if (node->cn_count > 0 ||
+ pthread_mutex_trylock(&hash->ch_mutex) != 0) {
+ pthread_mutex_unlock(&node->cn_mutex);
+ continue;
}
- pthread_mutex_unlock(&node->cn_mutex);
- if (inuse && !priority)
- break;
- }
- pthread_mutex_unlock(&hash->ch_mutex);
- return cache->bulkrelse(cache, &temp);
-}
+ ASSERT(node->cn_priority == priority);
+ node->cn_priority = -1;
-/*
- * Generic implementation of bulk release, which just iterates over
- * the list calling the single node relse routine for each node.
- */
-static unsigned int
-cache_generic_bulkrelse(
- struct cache * cache,
- struct list_head * list)
-{
- struct cache_node * node;
- unsigned int count = 0;
+ list_move(&node->cn_mru, &temp);
+ list_del_init(&node->cn_hash);
+ hash->ch_count--;
+ mru->cm_count--;
+ pthread_mutex_unlock(&hash->ch_mutex);
+ pthread_mutex_unlock(&node->cn_mutex);
- while (!list_empty(list)) {
- node = (struct cache_node *)list->next;
- pthread_mutex_destroy(&node->cn_mutex);
- list_del_init(&node->cn_list);
- cache->relse(node);
count++;
+ if (!all && count == CACHE_SHAKE_COUNT)
+ break;
}
- return count;
-}
+ pthread_mutex_unlock(&mru->cm_mutex);
-/*
- * We've hit the limit on cache size, so we need to start reclaiming
- * nodes we've used. Start by shaking this hash chain only, unless
- * the shake priority has been increased already.
- * The hash chain lock is held for the hash list argument, must be
- * dropped before returning.
- * Returns new priority at end of the call (in case we call again).
- */
-static unsigned int
-cache_shake(
- struct cache * cache,
- struct cache_hash * hash,
- unsigned int priority)
-{
- unsigned int count;
- unsigned int i;
+ if (count > 0) {
+ cache->bulkrelse(cache, &temp);
- if (!priority) { /* do just one */
- count = cache_shake_hash(cache, hash, priority);
- } else { /* use a bigger hammer */
- pthread_mutex_unlock(&hash->ch_mutex);
- for (count = 0, i = 0; i < cache->c_hashsize; i++) {
- hash = &cache->c_hash[i];
- pthread_mutex_lock(&hash->ch_mutex);
- count += cache_shake_hash(cache, hash, priority - 1);
- }
- }
- if (count) {
pthread_mutex_lock(&cache->c_mutex);
cache->c_count -= count;
pthread_mutex_unlock(&cache->c_mutex);
}
- return ++priority;
+
+ return (count == CACHE_SHAKE_COUNT) ? priority : ++priority;
}
/*
* Allocate a new hash node (updating atomic counter in the process),
* unless doing so will push us over the maximum cache size.
*/
-struct cache_node *
+static struct cache_node *
cache_node_allocate(
struct cache * cache,
- struct cache_hash * hashlist,
cache_key_t key)
{
unsigned int nodesfree;
struct cache_node * node;
pthread_mutex_lock(&cache->c_mutex);
- if ((nodesfree = (cache->c_count < cache->c_maxcount))) {
+ nodesfree = (cache->c_count < cache->c_maxcount);
+ if (nodesfree) {
cache->c_count++;
if (cache->c_count > cache->c_max)
cache->c_max = cache->c_count;
pthread_mutex_unlock(&cache->c_mutex);
if (!nodesfree)
return NULL;
- if (!(node = cache->alloc(key))) { /* uh-oh */
+ node = cache->alloc(key);
+ if (node == NULL) { /* uh-oh */
pthread_mutex_lock(&cache->c_mutex);
cache->c_count--;
pthread_mutex_unlock(&cache->c_mutex);
return NULL;
}
pthread_mutex_init(&node->cn_mutex, NULL);
- list_head_init(&node->cn_list);
node->cn_count = 1;
+ node->cn_priority = 0;
return node;
}
{
struct cache_node * node = NULL;
struct cache_hash * hash;
+ struct cache_mru * mru;
struct list_head * head;
struct list_head * pos;
+ unsigned int hashidx;
int priority = 0;
- int allocated = 0;
- hash = cache->c_hash + cache->hash(key, cache->c_hashsize);
+ hashidx = cache->hash(key, cache->c_hashsize);
+ hash = cache->c_hash + hashidx;
head = &hash->ch_list;
- restart:
- pthread_mutex_lock(&hash->ch_mutex);
- for (pos = head->next; pos != head; pos = pos->next) {
- node = (struct cache_node *)pos;
- if (cache->compare(node, key) == 0)
- continue;
- pthread_mutex_lock(&node->cn_mutex);
- node->cn_count++;
- pthread_mutex_unlock(&node->cn_mutex);
- pthread_mutex_lock(&cache->c_mutex);
- cache->c_hits++;
- pthread_mutex_unlock(&cache->c_mutex);
- break;
- }
- if (pos == head) {
- node = cache_node_allocate(cache, hash, key);
- if (!node) {
- priority = cache_shake(cache, hash, priority);
- goto restart;
+ for (;;) {
+ pthread_mutex_lock(&hash->ch_mutex);
+ for (pos = head->next; pos != head; pos = pos->next) {
+ node = list_entry(pos, struct cache_node, cn_hash);
+ if (!cache->compare(node, key))
+ continue;
+ /*
+ * node found, bump node's reference count, move it to the
+ * top of its MRU list, and update stats.
+ */
+ pthread_mutex_lock(&node->cn_mutex);
+ node->cn_count++;
+
+ mru = &cache->c_mrus[node->cn_priority];
+ pthread_mutex_lock(&mru->cm_mutex);
+ list_move(&node->cn_mru, &mru->cm_list);
+ pthread_mutex_unlock(&mru->cm_mutex);
+
+ pthread_mutex_unlock(&node->cn_mutex);
+ pthread_mutex_unlock(&hash->ch_mutex);
+
+ pthread_mutex_lock(&cache->c_mutex);
+ cache->c_hits++;
+ pthread_mutex_unlock(&cache->c_mutex);
+
+ *nodep = node;
+ return 0;
}
- allocated = 1;
- hash->ch_count++; /* new entry */
+ pthread_mutex_unlock(&hash->ch_mutex);
+ /*
+ * not found, allocate a new entry
+ */
+ node = cache_node_allocate(cache, key);
+ if (node)
+ break;
+ priority = cache_shake(cache, priority, 0);
}
- /* looked at it, move to hash list head */
- list_move(&node->cn_list, &hash->ch_list);
+
+ node->cn_hashidx = hashidx;
+
+ /* add new node to appropriate hash and lowest priority MRU */
+ mru = &cache->c_mrus[0];
+ pthread_mutex_lock(&mru->cm_mutex);
+ pthread_mutex_lock(&hash->ch_mutex);
+ hash->ch_count++;
+ mru->cm_count++;
+ list_add(&node->cn_hash, &hash->ch_list);
+ list_add(&node->cn_mru, &mru->cm_list);
pthread_mutex_unlock(&hash->ch_mutex);
+ pthread_mutex_unlock(&mru->cm_mutex);
+
*nodep = node;
- return allocated;
+ return 1;
}
void
pthread_mutex_unlock(&node->cn_mutex);
}
+void
+cache_node_set_priority(
+ struct cache * cache,
+ struct cache_node * node,
+ int priority)
+{
+ struct cache_mru * mru;
+
+ if (priority < 0)
+ priority = 0;
+ else if (priority > CACHE_MAX_PRIORITY)
+ priority = CACHE_MAX_PRIORITY;
+
+ pthread_mutex_lock(&node->cn_mutex);
+
+ ASSERT(node->cn_count > 0);
+ if (priority == node->cn_priority) {
+ pthread_mutex_unlock(&node->cn_mutex);
+ return;
+ }
+ mru = &cache->c_mrus[node->cn_priority];
+ pthread_mutex_lock(&mru->cm_mutex);
+ list_del_init(&node->cn_mru);
+ mru->cm_count--;
+ pthread_mutex_unlock(&mru->cm_mutex);
+
+ mru = &cache->c_mrus[priority];
+ pthread_mutex_lock(&mru->cm_mutex);
+ list_add(&node->cn_mru, &mru->cm_list);
+ node->cn_priority = priority;
+ mru->cm_count++;
+ pthread_mutex_unlock(&mru->cm_mutex);
+
+ pthread_mutex_unlock(&node->cn_mutex);
+}
+
+int
+cache_node_get_priority(
+ struct cache_node * node)
+{
+ int priority;
+
+ pthread_mutex_lock(&node->cn_mutex);
+ priority = node->cn_priority;
+ pthread_mutex_unlock(&node->cn_mutex);
+
+ return priority;
+}
+
+
/*
* Purge a specific node from the cache. Reference count must be zero.
*/
cache_key_t key,
struct cache_node * node)
{
- int refcount;
+ struct list_head * head;
+ struct list_head * pos;
+ struct list_head * n;
+ struct cache_hash * hash;
+ struct cache_mru * mru;
+ int count = -1;
+
+ hash = cache->c_hash + cache->hash(key, cache->c_hashsize);
+ head = &hash->ch_list;
+ pthread_mutex_lock(&hash->ch_mutex);
+ for (pos = head->next, n = pos->next; pos != head;
+ pos = n, n = pos->next) {
+ if ((struct cache_node *)pos != node)
+ continue;
- refcount = cache_shake_node(cache, key, node);
- if (refcount == 0) {
+ pthread_mutex_lock(&node->cn_mutex);
+ count = node->cn_count;
+ if (count != 0) {
+ pthread_mutex_unlock(&node->cn_mutex);
+ break;
+ }
+ mru = &cache->c_mrus[node->cn_priority];
+ pthread_mutex_lock(&mru->cm_mutex);
+ list_del_init(&node->cn_mru);
+ mru->cm_count--;
+ pthread_mutex_unlock(&mru->cm_mutex);
+
+ pthread_mutex_unlock(&node->cn_mutex);
+ pthread_mutex_destroy(&node->cn_mutex);
+ list_del_init(&node->cn_hash);
+ hash->ch_count--;
+ cache->relse(node);
+ break;
+ }
+ pthread_mutex_unlock(&hash->ch_mutex);
+
+ if (count == 0) {
pthread_mutex_lock(&cache->c_mutex);
cache->c_count--;
pthread_mutex_unlock(&cache->c_mutex);
}
#ifdef CACHE_DEBUG
- if (refcount >= 1) {
+ if (count >= 1) {
fprintf(stderr, "%s: refcount was %u, not zero (node=%p)\n",
- __FUNCTION__, refcount, node);
+ __FUNCTION__, count, node);
cache_abort();
}
- if (refcount == -1) {
+ if (count == -1) {
fprintf(stderr, "%s: purge node not found! (node=%p)\n",
__FUNCTION__, node);
cache_abort();
}
#endif
- return (refcount == 0);
+ return (count == 0);
}
/*
cache_purge(
struct cache * cache)
{
- struct cache_hash * hash;
+ int i;
+
+ for (i = 0; i <= CACHE_MAX_PRIORITY; i++)
+ cache_shake(cache, i, 1);
- hash = &cache->c_hash[0];
- pthread_mutex_lock(&hash->ch_mutex);
- cache_shake(cache, hash, (unsigned int)-1);
#ifdef CACHE_DEBUG
if (cache->c_count != 0) {
+ /* flush referenced nodes to disk */
+ cache_flush(cache);
fprintf(stderr, "%s: shake on cache %p left %u nodes!?\n",
__FUNCTION__, cache, cache->c_count);
cache_abort();
}
#endif
- /* flush any remaining nodes to disk */
- cache_flush(cache);
}
/*
}
}
-#define HASH_REPORT (3*HASH_CACHE_RATIO)
+#define HASH_REPORT (3 * HASH_CACHE_RATIO)
void
-cache_report(FILE *fp, const char *name, struct cache * cache)
+cache_report(
+ FILE *fp,
+ const char *name,
+ struct cache *cache)
{
- int i;
- unsigned long count, index, total;
- unsigned long hash_bucket_lengths[HASH_REPORT+2];
+ int i;
+ unsigned long count, index, total;
+ unsigned long hash_bucket_lengths[HASH_REPORT + 2];
- if ((cache->c_hits+cache->c_misses) == 0)
+ if ((cache->c_hits + cache->c_misses) == 0)
return;
/* report cache summary */
cache->c_hashsize,
cache->c_hits,
cache->c_misses,
- (double) (cache->c_hits*100/(cache->c_hits+cache->c_misses))
+ (double)cache->c_hits * 100 /
+ (cache->c_hits + cache->c_misses)
);
+ for (i = 0; i <= CACHE_MAX_PRIORITY; i++)
+ fprintf(fp, "MRU %d entries = %6u (%3u%%)\n",
+ i, cache->c_mrus[i].cm_count,
+ cache->c_mrus[i].cm_count * 100 / cache->c_count);
+
/* report hash bucket lengths */
bzero(hash_bucket_lengths, sizeof(hash_bucket_lengths));
}
total = 0;
- for (i = 0; i < HASH_REPORT+1; i++) {
- total += i*hash_bucket_lengths[i];
+ for (i = 0; i < HASH_REPORT + 1; i++) {
+ total += i * hash_bucket_lengths[i];
if (hash_bucket_lengths[i] == 0)
continue;
- fprintf(fp, "Hash buckets with %2d entries %5ld (%3ld%%)\n",
- i, hash_bucket_lengths[i], (i*hash_bucket_lengths[i]*100)/cache->c_count);
+ fprintf(fp, "Hash buckets with %2d entries %6ld (%3ld%%)\n",
+ i, hash_bucket_lengths[i],
+ (i * hash_bucket_lengths[i] * 100) / cache->c_count);
}
if (hash_bucket_lengths[i]) /* last report bucket is the overflow bucket */
- fprintf(fp, "Hash buckets with >%2d entries %5ld (%3ld%%)\n",
- i-1, hash_bucket_lengths[i], ((cache->c_count-total)*100)/cache->c_count);
+ fprintf(fp, "Hash buckets with >%2d entries %6ld (%3ld%%)\n",
+ i - 1, hash_bucket_lengths[i],
+ ((cache->c_count - total) * 100) / cache->c_count);
}
XFS_FSS_TO_BB(mp, 1), flags);
}
-xfs_zone_t *xfs_buf_zone;
+xfs_zone_t *xfs_buf_zone;
+
+static struct cache_mru xfs_buf_freelist =
+ {{&xfs_buf_freelist.cm_list, &xfs_buf_freelist.cm_list},
+ 0, PTHREAD_MUTEX_INITIALIZER };
typedef struct {
dev_t device;
bp->b_blkno = bno;
bp->b_bcount = bytes;
bp->b_dev = device;
- bp->b_addr = memalign(libxfs_device_alignment(), bytes);
+ if (!bp->b_addr)
+ bp->b_addr = memalign(libxfs_device_alignment(), bytes);
if (!bp->b_addr) {
fprintf(stderr,
_("%s: %s can't memalign %u bytes: %s\n"),
}
xfs_buf_t *
-libxfs_getbufr(dev_t device, xfs_daddr_t blkno, int len)
+libxfs_getbufr(dev_t device, xfs_daddr_t blkno, int bblen)
{
xfs_buf_t *bp;
+ int blen = BBTOB(bblen);
+
+ /*
+ * first look for a buffer that can be used as-is,
+ * if one cannot be found, see if there is a buffer,
+ * and if so, free it's buffer and set b_addr to NULL
+ * before calling libxfs_initbuf.
+ */
+ pthread_mutex_lock(&xfs_buf_freelist.cm_mutex);
+ if (!list_empty(&xfs_buf_freelist.cm_list)) {
+ list_for_each_entry(bp, &xfs_buf_freelist.cm_list, b_node.cn_mru) {
+ if (bp->b_bcount == blen) {
+ list_del_init(&bp->b_node.cn_mru);
+ break;
+ }
+ }
+ if (&bp->b_node.cn_mru == &xfs_buf_freelist.cm_list) {
+ bp = list_entry(xfs_buf_freelist.cm_list.next,
+ xfs_buf_t, b_node.cn_mru);
+ list_del_init(&bp->b_node.cn_mru);
+ free(bp->b_addr);
+ bp->b_addr = NULL;
+ }
+ } else
+ bp = libxfs_zone_zalloc(xfs_buf_zone);
+ pthread_mutex_unlock(&xfs_buf_freelist.cm_mutex);
- bp = libxfs_zone_zalloc(xfs_buf_zone);
if (bp != NULL)
- libxfs_initbuf(bp, device, blkno, BBTOB(len));
+ libxfs_initbuf(bp, device, blkno, blen);
#ifdef IO_DEBUG
printf("%lx: %s: allocated %u bytes buffer, key=%llu(%llu), %p\n",
pthread_self(), __FUNCTION__, BBTOB(len),
(long long)LIBXFS_BBTOOFF64(blkno), (long long)blkno, bp);
#endif
+
return bp;
}
miss = cache_node_get(libxfs_bcache, &key, (struct cache_node **)&bp);
if (bp) {
pthread_mutex_lock(&bp->b_lock);
+ cache_node_set_priority(libxfs_bcache, (struct cache_node *)bp,
+ cache_node_get_priority((struct cache_node *)bp) - 4);
#ifdef XFS_BUF_TRACING
pthread_mutex_lock(&libxfs_bcache->c_mutex);
lock_buf_count++;
}
static void
-libxfs_bflush(struct cache_node *node)
+libxfs_brelse(struct cache_node *node)
{
xfs_buf_t *bp = (xfs_buf_t *)node;
- if ((bp != NULL) && (bp->b_flags & LIBXFS_B_DIRTY))
- libxfs_writebufr(bp);
+ if (bp != NULL) {
+ if (bp->b_flags & LIBXFS_B_DIRTY)
+ libxfs_writebufr(bp);
+ pthread_mutex_lock(&xfs_buf_freelist.cm_mutex);
+ list_add(&bp->b_node.cn_mru, &xfs_buf_freelist.cm_list);
+ pthread_mutex_unlock(&xfs_buf_freelist.cm_mutex);
+ }
}
static void
-libxfs_brelse(struct cache_node *node)
+libxfs_bulkrelse(
+ struct cache *cache,
+ struct list_head *list)
{
- xfs_buf_t *bp = (xfs_buf_t *)node;
- xfs_buf_log_item_t *bip;
- extern xfs_zone_t *xfs_buf_item_zone;
+ xfs_buf_t *bp;
- if (bp != NULL) {
+ if (list_empty(list))
+ return;
+
+ list_for_each_entry(bp, list, b_node.cn_mru) {
if (bp->b_flags & LIBXFS_B_DIRTY)
libxfs_writebufr(bp);
- bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
- if (bip)
- libxfs_zone_free(xfs_buf_item_zone, bip);
- free(bp->b_addr);
- pthread_mutex_destroy(&bp->b_lock);
- bp->b_addr = NULL;
- bp->b_flags = 0;
- free(bp);
- bp = NULL;
}
+
+ pthread_mutex_lock(&xfs_buf_freelist.cm_mutex);
+ __list_splice(list, &xfs_buf_freelist.cm_list);
+ pthread_mutex_unlock(&xfs_buf_freelist.cm_mutex);
+}
+
+static void
+libxfs_bflush(struct cache_node *node)
+{
+ xfs_buf_t *bp = (xfs_buf_t *)node;
+
+ if ((bp != NULL) && (bp->b_flags & LIBXFS_B_DIRTY))
+ libxfs_writebufr(bp);
}
void
/* .flush */ libxfs_bflush,
/* .relse */ libxfs_brelse,
/* .compare */ libxfs_bcompare,
- /* .bulkrelse */ NULL /* TODO: lio_listio64 interface? */
+ /* .bulkrelse */libxfs_bulkrelse
};
int i;
int error;
const mode_t mode = 0755;
+ ino_tree_node_t *irec;
tp = libxfs_trans_alloc(mp, 0);
ip = NULL;
dir_init(mp, tp, ip, ip);
libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_SYNC, 0);
+
+ irec = find_inode_rec(XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino),
+ XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino));
+ set_inode_isadir(irec, XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino) -
+ irec->ino_startnum);
}
/*
xfs_mount_t *mp)
{
int i;
- work_queue_t *queues;
+ work_queue_t queue;
prefetch_args_t *pf_args[2];
- queues = malloc(thread_count * sizeof(work_queue_t));
- queues[0].mp = mp;
-
- if (!libxfs_bcache_overflowed()) {
- /*create_work_queue(&queues[0], mp, libxfs_nproc());
- for (i = 0; i < glob_agcount; i++)
- queue_work(&queues[0], traverse_function, i, NULL);
- destroy_work_queue(&queues[0]);*/
- for (i = 0; i < glob_agcount; i++)
- traverse_function(&queues[0], i, NULL);
- } else {
- /* TODO: AG stride support */
- pf_args[0] = start_inode_prefetch(0, 1, NULL);
- for (i = 0; i < glob_agcount; i++) {
- pf_args[(~i) & 1] = start_inode_prefetch(i + 1, 1,
- pf_args[i & 1]);
- traverse_function(&queues[0], i, pf_args[i & 1]);
- }
+ /*
+ * we always do prefetch for phase 6 as it will fill in the gaps
+ * not read during phase 3 prefetch.
+ */
+ queue.mp = mp;
+ pf_args[0] = start_inode_prefetch(0, 1, NULL);
+ for (i = 0; i < glob_agcount; i++) {
+ pf_args[(~i) & 1] = start_inode_prefetch(i + 1, 1,
+ pf_args[i & 1]);
+ traverse_function(&queue, i, pf_args[i & 1]);
}
- free(queues);
}
void
mark_standalone_inodes(mp);
- do_log(_(" - traversing filesystem ... \n"));
+ do_log(_(" - traversing filesystem ...\n"));
irec = find_inode_rec(XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino),
XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino));
*/
traverse_ags(mp);
- do_log(_(" - traversals finished ... \n"));
- do_log(_(" - moving disconnected inodes to %s ... \n"),
+ do_log(_(" - traversal finished ...\n"));
+ do_log(_(" - moving disconnected inodes to %s ...\n"),
ORPHANAGE);
/*
static int pf_batch_bytes;
static int pf_batch_fsbs;
-#define B_INODE 0x1000000
-#define B_META 0x2000000
+static void pf_read_inode_dirs(prefetch_args_t *, xfs_buf_t *);
+
+/* buffer priorities for the libxfs cache */
+
+#define B_DIR_BMAP 15
+#define B_DIR_META_2 13 /* metadata in secondary queue */
+#define B_DIR_META_H 11 /* metadata fetched for PF_META_ONLY */
+#define B_DIR_META_S 9 /* single block of metadata */
+#define B_DIR_META 7
+#define B_DIR_INODE 6
+#define B_BMAP 5
+#define B_INODE 4
+
+#define B_IS_INODE(b) (((b) & 1) == 0)
+#define B_IS_META(b) (((b) & 1) != 0)
#define DEF_BATCH_BYTES 0x10000
#define MAX_BUFS 128
-#define IO_THRESHOLD (MAX_BUFS * PF_THREAD_COUNT)
+#define IO_THRESHOLD (MAX_BUFS * 2)
typedef enum pf_which {
PF_PRIMARY,
bp = libxfs_getbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, fsbno),
XFS_FSB_TO_BB(mp, blen));
if (bp->b_flags & LIBXFS_B_UPTODATE) {
+ if (B_IS_INODE(flag))
+ pf_read_inode_dirs(args, bp);
+ XFS_BUF_SET_PRIORITY(bp, XFS_BUF_PRIORITY(bp) + 8);
libxfs_putbuf(bp);
return;
}
- bp->b_flags |= flag;
+ XFS_BUF_SET_PRIORITY(bp, flag);
pthread_mutex_lock(&args->lock);
if (fsbno > args->last_bno_read) {
radix_tree_insert(&args->primary_io_queue, fsbno, bp);
- if (flag == B_META)
+ if (B_IS_META(flag))
radix_tree_tag_set(&args->primary_io_queue, fsbno, 0);
else {
args->inode_bufs_queued++;
#ifdef XR_PF_TRACE
pftrace("getbuf %c %p (%llu) in AG %d (fsbno = %lu) added to "
"primary queue (inode_bufs_queued = %d, last_bno = %lu)",
- flag == B_INODE ? 'I' : 'M', bp,
+ B_IS_INODE(flag) ? 'I' : 'M', bp,
(long long)XFS_BUF_ADDR(bp), args->agno, fsbno,
args->inode_bufs_queued, args->last_bno_read);
#endif
#ifdef XR_PF_TRACE
pftrace("getbuf %c %p (%llu) in AG %d (fsbno = %lu) added to "
"secondary queue (last_bno = %lu)",
- flag == B_INODE ? 'I' : 'M', bp,
+ B_IS_INODE(flag) ? 'I' : 'M', bp,
(long long)XFS_BUF_ADDR(bp), args->agno, fsbno,
args->last_bno_read);
#endif
- ASSERT(flag == B_META);
+ ASSERT(B_IS_META(flag));
+ XFS_BUF_SET_PRIORITY(bp, B_DIR_META_2);
radix_tree_insert(&args->secondary_io_queue, fsbno, bp);
}
#ifdef XR_PF_TRACE
pftrace("queuing dir extent in AG %d", args->agno);
#endif
- pf_queue_io(args, s, 1, B_META);
+ pf_queue_io(args, s, 1, B_DIR_META);
c--;
s++;
}
if (!bp)
return 0;
+ XFS_BUF_SET_PRIORITY(bp, isadir ? B_DIR_BMAP : B_BMAP);
+
rc = (*func)((xfs_btree_lblock_t *)XFS_BUF_PTR(bp), level - 1, isadir, args);
libxfs_putbuf(bp);
{
xfs_dinode_t *dino;
int icnt = 0;
+ int hasdir = 0;
+ int isadir;
xfs_dinode_core_t *dinoc;
for (icnt = 0; icnt < (XFS_BUF_COUNT(bp) >> mp->m_sb.sb_inodelog); icnt++) {
* We are only prefetching directory contents in extents
* and btree nodes for other inodes
*/
- if (dinoc->di_format <= XFS_DINODE_FMT_LOCAL ||
- (dinoc->di_format == XFS_DINODE_FMT_EXTENTS &&
- (be16_to_cpu(dinoc->di_mode) & S_IFMT) != S_IFDIR))
+ isadir = (be16_to_cpu(dinoc->di_mode) & S_IFMT) == S_IFDIR;
+ hasdir |= isadir;
+
+ if (dinoc->di_format <= XFS_DINODE_FMT_LOCAL)
+ continue;
+
+ if (!isadir && (dinoc->di_format == XFS_DINODE_FMT_EXTENTS ||
+ args->dirs_only))
continue;
/*
pf_read_exinode(args, dino);
break;
case XFS_DINODE_FMT_BTREE:
- pf_read_btinode(args, dino, (be16_to_cpu(
- dinoc->di_mode) & S_IFMT) == S_IFDIR);
+ pf_read_btinode(args, dino, isadir);
break;
}
}
+ if (hasdir)
+ XFS_BUF_SET_PRIORITY(bp, B_DIR_INODE);
}
/*
if (which == PF_PRIMARY) {
for (inode_bufs = 0, i = 0; i < num; i++) {
- if (bplist[i]->b_flags & B_INODE)
+ if (B_IS_INODE(XFS_BUF_PRIORITY(bplist[i])))
inode_bufs++;
}
args->inode_bufs_queued -= inode_bufs;
memcpy(XFS_BUF_PTR(bplist[i]), pbuf, size);
bplist[i]->b_flags |= LIBXFS_B_UPTODATE;
len -= size;
- if (bplist[i]->b_flags & B_INODE)
+ if (B_IS_INODE(XFS_BUF_PRIORITY(bplist[i])))
pf_read_inode_dirs(args, bplist[i]);
+ else if (which == PF_META_ONLY)
+ XFS_BUF_SET_PRIORITY(bplist[i],
+ B_DIR_META_H);
+ else if (which == PF_PRIMARY && num == 1)
+ XFS_BUF_SET_PRIORITY(bplist[i],
+ B_DIR_META_S);
}
}
for (i = 0; i < num; i++) {
#ifdef XR_PF_TRACE
pftrace("putbuf %c %p (%llu) in AG %d",
- bplist[i]->b_flags & B_INODE ? 'I' : 'M',
+ B_IS_INODE(XFS_BUF_PRIORITY(bplist[i])) ? 'I' : 'M',
bplist[i], (long long)XFS_BUF_ADDR(bplist[i]),
args->agno);
#endif
do {
pf_queue_io(args, XFS_AGB_TO_FSB(mp, args->agno, bno),
- blks_per_cluster, B_INODE);
+ blks_per_cluster,
+ (cur_irec->ino_isa_dir != 0) ?
+ B_DIR_INODE : B_INODE);
bno += blks_per_cluster;
num_inos += inos_per_cluster;
} while (num_inos < XFS_IALLOC_INODES(mp));