From 50722af108c849c2512badadda1331c29fc4ee70 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 11 Jan 2012 18:52:06 +0000 Subject: [PATCH] repair: use recursive buffer locking MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit On a sufficiently corrupt filesystem walking the btree nodes might hit the same node node again, which currently will deadlock. Use a recursion counter to avoid the direct deadlock and let them normal loop detection (two bad nodes and out) do its work. This is how repair behaved before we added the lock when implementing buffer prefetching. Reviewed-by: Dave Chinner Reported-by: Arkadiusz Miśkiewicz Tested-by: Arkadiusz Miśkiewicz Signed-off-by: Christoph Hellwig --- include/libxfs.h | 2 ++ libxfs/rdwr.c | 43 +++++++++++++++++++++++++++++++------------ 2 files changed, 33 insertions(+), 12 deletions(-) diff --git a/include/libxfs.h b/include/libxfs.h index 893173279..1ecb14156 100644 --- a/include/libxfs.h +++ b/include/libxfs.h @@ -226,6 +226,8 @@ typedef struct xfs_buf { unsigned b_bcount; dev_t b_dev; pthread_mutex_t b_lock; + pthread_t b_holder; + unsigned int b_recur; void *b_fsprivate; void *b_fsprivate2; void *b_fsprivate3; diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c index dbd94c50f..432a1af5f 100644 --- a/libxfs/rdwr.c +++ b/libxfs/rdwr.c @@ -342,6 +342,8 @@ libxfs_initbuf(xfs_buf_t *bp, dev_t device, xfs_daddr_t bno, unsigned int bytes) list_head_init(&bp->b_lock_list); #endif pthread_mutex_init(&bp->b_lock, NULL); + bp->b_holder = 0; + bp->b_recur = 0; } xfs_buf_t * @@ -410,18 +412,26 @@ libxfs_getbuf_flags(dev_t device, xfs_daddr_t blkno, int len, unsigned int flags return NULL; if (use_xfs_buf_lock) { - if (flags & LIBXFS_GETBUF_TRYLOCK) { - int ret; - - ret = pthread_mutex_trylock(&bp->b_lock); - if (ret) { - ASSERT(ret == EAGAIN); - cache_node_put(libxfs_bcache, (struct cache_node *)bp); - return NULL; + int ret; + + ret = pthread_mutex_trylock(&bp->b_lock); + if (ret) { + ASSERT(ret == EAGAIN); + if (flags & LIBXFS_GETBUF_TRYLOCK) + goto out_put; + + if (pthread_equal(bp->b_holder, pthread_self())) { + fprintf(stderr, + _("Warning: recursive buffer locking at block %" PRIu64 " detected\n"), + blkno); + bp->b_recur++; + return bp; + } else { + pthread_mutex_lock(&bp->b_lock); } - } else { - pthread_mutex_lock(&bp->b_lock); } + + bp->b_holder = pthread_self(); } cache_node_set_priority(libxfs_bcache, (struct cache_node *)bp, @@ -440,6 +450,9 @@ libxfs_getbuf_flags(dev_t device, xfs_daddr_t blkno, int len, unsigned int flags #endif return bp; +out_put: + cache_node_put(libxfs_bcache, (struct cache_node *)bp); + return NULL; } struct xfs_buf * @@ -458,8 +471,14 @@ libxfs_putbuf(xfs_buf_t *bp) list_del_init(&bp->b_lock_list); pthread_mutex_unlock(&libxfs_bcache->c_mutex); #endif - if (use_xfs_buf_lock) - pthread_mutex_unlock(&bp->b_lock); + if (use_xfs_buf_lock) { + if (bp->b_recur) { + bp->b_recur--; + } else { + bp->b_holder = 0; + pthread_mutex_unlock(&bp->b_lock); + } + } cache_node_put(libxfs_bcache, (struct cache_node *)bp); } -- 2.47.2