From 72f10e63302c43016e65501e08d06aceb0cf7ae0 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 8 Aug 2014 14:30:49 -0700 Subject: [PATCH] 3.14-stable patches added patches: xfs-log-vector-rounding-leaks-log-space.patch --- queue-3.14/series | 1 + ...-log-vector-rounding-leaks-log-space.patch | 144 ++++++++++++++++++ 2 files changed, 145 insertions(+) create mode 100644 queue-3.14/xfs-log-vector-rounding-leaks-log-space.patch diff --git a/queue-3.14/series b/queue-3.14/series index 32718503b46..b274d41c33c 100644 --- a/queue-3.14/series +++ b/queue-3.14/series @@ -36,3 +36,4 @@ bbc-i2c-fix-bbc-i2c-envctrl-on-sunblade-2000.patch sunsab-fix-detection-of-break-on-sunsab-serial-console.patch sparc64-ldc_connect-should-not-return-einval-when-handshake-is-in-progress.patch arch-sparc-math-emu-math_32.c-drop-stray-break-operator.patch +xfs-log-vector-rounding-leaks-log-space.patch diff --git a/queue-3.14/xfs-log-vector-rounding-leaks-log-space.patch b/queue-3.14/xfs-log-vector-rounding-leaks-log-space.patch new file mode 100644 index 00000000000..30998c3defc --- /dev/null +++ b/queue-3.14/xfs-log-vector-rounding-leaks-log-space.patch @@ -0,0 +1,144 @@ +From 110dc24ad2ae4e9b94b08632fe1eb2fcdff83045 Mon Sep 17 00:00:00 2001 +From: Dave Chinner +Date: Tue, 20 May 2014 08:18:09 +1000 +Subject: xfs: log vector rounding leaks log space + +From: Dave Chinner + +commit 110dc24ad2ae4e9b94b08632fe1eb2fcdff83045 upstream. + +The addition of direct formatting of log items into the CIL +linear buffer added alignment restrictions that the start of each +vector needed to be 64 bit aligned. Hence padding was added in +xlog_finish_iovec() to round up the vector length to ensure the next +vector started with the correct alignment. + +This adds a small number of bytes to the size of +the linear buffer that is otherwise unused. The issue is that we +then use the linear buffer size to determine the log space used by +the log item, and this includes the unused space. Hence when we +account for space used by the log item, it's more than is actually +written into the iclogs, and hence we slowly leak this space. + +This results on log hangs when reserving space, with threads getting +stuck with these stack traces: + +Call Trace: +[] schedule+0x29/0x70 +[] xlog_grant_head_wait+0xa2/0x1a0 +[] xlog_grant_head_check+0xbd/0x140 +[] xfs_log_reserve+0x103/0x220 +[] xfs_trans_reserve+0x2f5/0x310 +..... + +The 4 bytes is significant. Brain Foster did all the hard work in +tracking down a reproducable leak to inode chunk allocation (it went +away with the ikeep mount option). His rough numbers were that +creating 50,000 inodes leaked 11 log blocks. This turns out to be +roughly 800 inode chunks or 1600 inode cluster buffers. That +works out at roughly 4 bytes per cluster buffer logged, and at that +I started looking for a 4 byte leak in the buffer logging code. + +What I found was that a struct xfs_buf_log_format structure for an +inode cluster buffer is 28 bytes in length. This gets rounded up to +32 bytes, but the vector length remains 28 bytes. Hence the CIL +ticket reservation is decremented by 32 bytes (via lv->lv_buf_len) +for that vector rather than 28 bytes which are written into the log. + +The fix for this problem is to separately track the bytes used by +the log vectors in the item and use that instead of the buffer +length when accounting for the log space that will be used by the +formatted log item. + +Again, thanks to Brian Foster for doing all the hard work and long +hours to isolate this leak and make finding the bug relatively +simple. + +Signed-off-by: Dave Chinner +Reviewed-by: Christoph Hellwig +Reviewed-by: Brian Foster +Signed-off-by: Dave Chinner +Cc: Bill +Signed-off-by: Greg Kroah-Hartman + +--- + fs/xfs/xfs_log.h | 19 +++++++++++++------ + fs/xfs/xfs_log_cil.c | 7 ++++--- + 2 files changed, 17 insertions(+), 9 deletions(-) + +--- a/fs/xfs/xfs_log.h ++++ b/fs/xfs/xfs_log.h +@@ -24,7 +24,8 @@ struct xfs_log_vec { + struct xfs_log_iovec *lv_iovecp; /* iovec array */ + struct xfs_log_item *lv_item; /* owner */ + char *lv_buf; /* formatted buffer */ +- int lv_buf_len; /* size of formatted buffer */ ++ int lv_bytes; /* accounted space in buffer */ ++ int lv_buf_len; /* aligned size of buffer */ + int lv_size; /* size of allocated lv */ + }; + +@@ -52,15 +53,21 @@ xlog_prepare_iovec(struct xfs_log_vec *l + return vec->i_addr; + } + ++/* ++ * We need to make sure the next buffer is naturally aligned for the biggest ++ * basic data type we put into it. We already accounted for this padding when ++ * sizing the buffer. ++ * ++ * However, this padding does not get written into the log, and hence we have to ++ * track the space used by the log vectors separately to prevent log space hangs ++ * due to inaccurate accounting (i.e. a leak) of the used log space through the ++ * CIL context ticket. ++ */ + static inline void + xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec, int len) + { +- /* +- * We need to make sure the next buffer is naturally aligned for the +- * biggest basic data type we put into it. We already accounted for +- * this when sizing the buffer. +- */ + lv->lv_buf_len += round_up(len, sizeof(uint64_t)); ++ lv->lv_bytes += len; + vec->i_len = len; + } + +--- a/fs/xfs/xfs_log_cil.c ++++ b/fs/xfs/xfs_log_cil.c +@@ -97,7 +97,7 @@ xfs_cil_prepare_item( + { + /* Account for the new LV being passed in */ + if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED) { +- *diff_len += lv->lv_buf_len; ++ *diff_len += lv->lv_bytes; + *diff_iovecs += lv->lv_niovecs; + } + +@@ -111,7 +111,7 @@ xfs_cil_prepare_item( + else if (old_lv != lv) { + ASSERT(lv->lv_buf_len != XFS_LOG_VEC_ORDERED); + +- *diff_len -= old_lv->lv_buf_len; ++ *diff_len -= old_lv->lv_bytes; + *diff_iovecs -= old_lv->lv_niovecs; + kmem_free(old_lv); + } +@@ -239,7 +239,7 @@ xlog_cil_insert_format_items( + * that the space reservation accounting is correct. + */ + *diff_iovecs -= lv->lv_niovecs; +- *diff_len -= lv->lv_buf_len; ++ *diff_len -= lv->lv_bytes; + } else { + /* allocate new data chunk */ + lv = kmem_zalloc(buf_size, KM_SLEEP|KM_NOFS); +@@ -259,6 +259,7 @@ xlog_cil_insert_format_items( + + /* The allocated data region lies beyond the iovec region */ + lv->lv_buf_len = 0; ++ lv->lv_bytes = 0; + lv->lv_buf = (char *)lv + buf_size - nbytes; + ASSERT(IS_ALIGNED((unsigned long)lv->lv_buf, sizeof(uint64_t))); + -- 2.47.3