]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.14-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 8 Aug 2014 21:30:49 +0000 (14:30 -0700)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 8 Aug 2014 21:30:49 +0000 (14:30 -0700)
added patches:
xfs-log-vector-rounding-leaks-log-space.patch

queue-3.14/series
queue-3.14/xfs-log-vector-rounding-leaks-log-space.patch [new file with mode: 0644]

index 32718503b46349bcfa2f5a21b090a2cd86053ef1..b274d41c33c464a48bd2ace5d88906d375a0861f 100644 (file)
@@ -36,3 +36,4 @@ bbc-i2c-fix-bbc-i2c-envctrl-on-sunblade-2000.patch
 sunsab-fix-detection-of-break-on-sunsab-serial-console.patch
 sparc64-ldc_connect-should-not-return-einval-when-handshake-is-in-progress.patch
 arch-sparc-math-emu-math_32.c-drop-stray-break-operator.patch
+xfs-log-vector-rounding-leaks-log-space.patch
diff --git a/queue-3.14/xfs-log-vector-rounding-leaks-log-space.patch b/queue-3.14/xfs-log-vector-rounding-leaks-log-space.patch
new file mode 100644 (file)
index 0000000..30998c3
--- /dev/null
@@ -0,0 +1,144 @@
+From 110dc24ad2ae4e9b94b08632fe1eb2fcdff83045 Mon Sep 17 00:00:00 2001
+From: Dave Chinner <dchinner@redhat.com>
+Date: Tue, 20 May 2014 08:18:09 +1000
+Subject: xfs: log vector rounding leaks log space
+
+From: Dave Chinner <dchinner@redhat.com>
+
+commit 110dc24ad2ae4e9b94b08632fe1eb2fcdff83045 upstream.
+
+The addition of direct formatting of log items into the CIL
+linear buffer added alignment restrictions that the start of each
+vector needed to be 64 bit aligned. Hence padding was added in
+xlog_finish_iovec() to round up the vector length to ensure the next
+vector started with the correct alignment.
+
+This adds a small number of bytes to the size of
+the linear buffer that is otherwise unused. The issue is that we
+then use the linear buffer size to determine the log space used by
+the log item, and this includes the unused space. Hence when we
+account for space used by the log item, it's more than is actually
+written into the iclogs, and hence we slowly leak this space.
+
+This results on log hangs when reserving space, with threads getting
+stuck with these stack traces:
+
+Call Trace:
+[<ffffffff81d15989>] schedule+0x29/0x70
+[<ffffffff8150d3a2>] xlog_grant_head_wait+0xa2/0x1a0
+[<ffffffff8150d55d>] xlog_grant_head_check+0xbd/0x140
+[<ffffffff8150ee33>] xfs_log_reserve+0x103/0x220
+[<ffffffff814b7f05>] xfs_trans_reserve+0x2f5/0x310
+.....
+
+The 4 bytes is significant. Brain Foster did all the hard work in
+tracking down a reproducable leak to inode chunk allocation (it went
+away with the ikeep mount option). His rough numbers were that
+creating 50,000 inodes leaked 11 log blocks. This turns out to be
+roughly 800 inode chunks or 1600 inode cluster buffers. That
+works out at roughly 4 bytes per cluster buffer logged, and at that
+I started looking for a 4 byte leak in the buffer logging code.
+
+What I found was that a struct xfs_buf_log_format structure for an
+inode cluster buffer is 28 bytes in length. This gets rounded up to
+32 bytes, but the vector length remains 28 bytes. Hence the CIL
+ticket reservation is decremented by 32 bytes (via lv->lv_buf_len)
+for that vector rather than 28 bytes which are written into the log.
+
+The fix for this problem is to separately track the bytes used by
+the log vectors in the item and use that instead of the buffer
+length when accounting for the log space that will be used by the
+formatted log item.
+
+Again, thanks to Brian Foster for doing all the hard work and long
+hours to isolate this leak and make finding the bug relatively
+simple.
+
+Signed-off-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Bill <billstuff2001@sbcglobal.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/xfs_log.h     |   19 +++++++++++++------
+ fs/xfs/xfs_log_cil.c |    7 ++++---
+ 2 files changed, 17 insertions(+), 9 deletions(-)
+
+--- a/fs/xfs/xfs_log.h
++++ b/fs/xfs/xfs_log.h
+@@ -24,7 +24,8 @@ struct xfs_log_vec {
+       struct xfs_log_iovec    *lv_iovecp;     /* iovec array */
+       struct xfs_log_item     *lv_item;       /* owner */
+       char                    *lv_buf;        /* formatted buffer */
+-      int                     lv_buf_len;     /* size of formatted buffer */
++      int                     lv_bytes;       /* accounted space in buffer */
++      int                     lv_buf_len;     /* aligned size of buffer */
+       int                     lv_size;        /* size of allocated lv */
+ };
+@@ -52,15 +53,21 @@ xlog_prepare_iovec(struct xfs_log_vec *l
+       return vec->i_addr;
+ }
++/*
++ * We need to make sure the next buffer is naturally aligned for the biggest
++ * basic data type we put into it.  We already accounted for this padding when
++ * sizing the buffer.
++ *
++ * However, this padding does not get written into the log, and hence we have to
++ * track the space used by the log vectors separately to prevent log space hangs
++ * due to inaccurate accounting (i.e. a leak) of the used log space through the
++ * CIL context ticket.
++ */
+ static inline void
+ xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec, int len)
+ {
+-      /*
+-       * We need to make sure the next buffer is naturally aligned for the
+-       * biggest basic data type we put into it.  We already accounted for
+-       * this when sizing the buffer.
+-       */
+       lv->lv_buf_len += round_up(len, sizeof(uint64_t));
++      lv->lv_bytes += len;
+       vec->i_len = len;
+ }
+--- a/fs/xfs/xfs_log_cil.c
++++ b/fs/xfs/xfs_log_cil.c
+@@ -97,7 +97,7 @@ xfs_cil_prepare_item(
+ {
+       /* Account for the new LV being passed in */
+       if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED) {
+-              *diff_len += lv->lv_buf_len;
++              *diff_len += lv->lv_bytes;
+               *diff_iovecs += lv->lv_niovecs;
+       }
+@@ -111,7 +111,7 @@ xfs_cil_prepare_item(
+       else if (old_lv != lv) {
+               ASSERT(lv->lv_buf_len != XFS_LOG_VEC_ORDERED);
+-              *diff_len -= old_lv->lv_buf_len;
++              *diff_len -= old_lv->lv_bytes;
+               *diff_iovecs -= old_lv->lv_niovecs;
+               kmem_free(old_lv);
+       }
+@@ -239,7 +239,7 @@ xlog_cil_insert_format_items(
+                        * that the space reservation accounting is correct.
+                        */
+                       *diff_iovecs -= lv->lv_niovecs;
+-                      *diff_len -= lv->lv_buf_len;
++                      *diff_len -= lv->lv_bytes;
+               } else {
+                       /* allocate new data chunk */
+                       lv = kmem_zalloc(buf_size, KM_SLEEP|KM_NOFS);
+@@ -259,6 +259,7 @@ xlog_cil_insert_format_items(
+               /* The allocated data region lies beyond the iovec region */
+               lv->lv_buf_len = 0;
++              lv->lv_bytes = 0;
+               lv->lv_buf = (char *)lv + buf_size - nbytes;
+               ASSERT(IS_ALIGNED((unsigned long)lv->lv_buf, sizeof(uint64_t)));