]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 5 Jun 2016 20:56:08 +0000 (13:56 -0700)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 5 Jun 2016 20:56:08 +0000 (13:56 -0700)
added patches:
dma-debug-avoid-spinlock-recursion-when-disabling-dma-debug.patch
ext4-address-ubsan-warning-in-mb_find_order_for_block.patch
ext4-clean-up-error-handling-when-orphan-list-is-corrupted.patch
ext4-fix-hang-when-processing-corrupted-orphaned-inode-list.patch
ext4-fix-oops-on-corrupted-filesystem.patch
ext4-silence-ubsan-in-ext4_mb_init.patch
gcov-disable-tree-loop-im-to-reduce-stack-usage.patch
gpio-davinci-fix-missed-parent-conversion.patch
pm-sleep-handle-failures-in-device_suspend_late-consistently.patch
scripts-package-makefile-rpmbuild-add-support-of-rpmopts.patch
xfs-disallow-rw-remount-on-fs-with-unknown-ro-compat-features.patch
xfs-don-t-wrap-growfs-agfl-indexes.patch
xfs-fix-inode-validity-check-in-xfs_iflush_cluster.patch
xfs-handle-dquot-buffer-readahead-in-log-recovery-correctly.patch
xfs-print-name-of-verifier-if-it-fails.patch
xfs-skip-stale-inodes-in-xfs_iflush_cluster.patch
xfs-xfs_iflush_cluster-fails-to-abort-on-error.patch

18 files changed:
queue-4.4/dma-debug-avoid-spinlock-recursion-when-disabling-dma-debug.patch [new file with mode: 0644]
queue-4.4/ext4-address-ubsan-warning-in-mb_find_order_for_block.patch [new file with mode: 0644]
queue-4.4/ext4-clean-up-error-handling-when-orphan-list-is-corrupted.patch [new file with mode: 0644]
queue-4.4/ext4-fix-hang-when-processing-corrupted-orphaned-inode-list.patch [new file with mode: 0644]
queue-4.4/ext4-fix-oops-on-corrupted-filesystem.patch [new file with mode: 0644]
queue-4.4/ext4-silence-ubsan-in-ext4_mb_init.patch [new file with mode: 0644]
queue-4.4/gcov-disable-tree-loop-im-to-reduce-stack-usage.patch [new file with mode: 0644]
queue-4.4/gpio-davinci-fix-missed-parent-conversion.patch [new file with mode: 0644]
queue-4.4/pm-sleep-handle-failures-in-device_suspend_late-consistently.patch [new file with mode: 0644]
queue-4.4/scripts-package-makefile-rpmbuild-add-support-of-rpmopts.patch [new file with mode: 0644]
queue-4.4/series
queue-4.4/xfs-disallow-rw-remount-on-fs-with-unknown-ro-compat-features.patch [new file with mode: 0644]
queue-4.4/xfs-don-t-wrap-growfs-agfl-indexes.patch [new file with mode: 0644]
queue-4.4/xfs-fix-inode-validity-check-in-xfs_iflush_cluster.patch [new file with mode: 0644]
queue-4.4/xfs-handle-dquot-buffer-readahead-in-log-recovery-correctly.patch [new file with mode: 0644]
queue-4.4/xfs-print-name-of-verifier-if-it-fails.patch [new file with mode: 0644]
queue-4.4/xfs-skip-stale-inodes-in-xfs_iflush_cluster.patch [new file with mode: 0644]
queue-4.4/xfs-xfs_iflush_cluster-fails-to-abort-on-error.patch [new file with mode: 0644]

diff --git a/queue-4.4/dma-debug-avoid-spinlock-recursion-when-disabling-dma-debug.patch b/queue-4.4/dma-debug-avoid-spinlock-recursion-when-disabling-dma-debug.patch
new file mode 100644 (file)
index 0000000..931d147
--- /dev/null
@@ -0,0 +1,40 @@
+From 3017cd63f26fc655d56875aaf497153ba60e9edf Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
+Date: Thu, 26 May 2016 15:16:25 -0700
+Subject: dma-debug: avoid spinlock recursion when disabling dma-debug
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Ville Syrjälä <ville.syrjala@linux.intel.com>
+
+commit 3017cd63f26fc655d56875aaf497153ba60e9edf upstream.
+
+With netconsole (at least) the pr_err("...  disablingn") call can
+recurse back into the dma-debug code, where it'll try to grab
+free_entries_lock again.  Avoid the problem by doing the printk after
+dropping the lock.
+
+Link: http://lkml.kernel.org/r/1463678421-18683-1-git-send-email-ville.syrjala@linux.intel.com
+Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ lib/dma-debug.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/lib/dma-debug.c
++++ b/lib/dma-debug.c
+@@ -657,9 +657,9 @@ static struct dma_debug_entry *dma_entry
+       spin_lock_irqsave(&free_entries_lock, flags);
+       if (list_empty(&free_entries)) {
+-              pr_err("DMA-API: debugging out of memory - disabling\n");
+               global_disable = true;
+               spin_unlock_irqrestore(&free_entries_lock, flags);
++              pr_err("DMA-API: debugging out of memory - disabling\n");
+               return NULL;
+       }
diff --git a/queue-4.4/ext4-address-ubsan-warning-in-mb_find_order_for_block.patch b/queue-4.4/ext4-address-ubsan-warning-in-mb_find_order_for_block.patch
new file mode 100644 (file)
index 0000000..28540c4
--- /dev/null
@@ -0,0 +1,74 @@
+From b5cb316cdf3a3f5f6125412b0f6065185240cfdc Mon Sep 17 00:00:00 2001
+From: Nicolai Stange <nicstange@gmail.com>
+Date: Thu, 5 May 2016 17:38:03 -0400
+Subject: ext4: address UBSAN warning in mb_find_order_for_block()
+
+From: Nicolai Stange <nicstange@gmail.com>
+
+commit b5cb316cdf3a3f5f6125412b0f6065185240cfdc upstream.
+
+Currently, in mb_find_order_for_block(), there's a loop like the following:
+
+  while (order <= e4b->bd_blkbits + 1) {
+    ...
+    bb += 1 << (e4b->bd_blkbits - order);
+  }
+
+Note that the updated bb is used in the loop's next iteration only.
+
+However, at the last iteration, that is at order == e4b->bd_blkbits + 1,
+the shift count becomes negative (c.f. C99 6.5.7(3)) and UBSAN reports
+
+  UBSAN: Undefined behaviour in fs/ext4/mballoc.c:1281:11
+  shift exponent -1 is negative
+  [...]
+  Call Trace:
+   [<ffffffff818c4d35>] dump_stack+0xbc/0x117
+   [<ffffffff818c4c79>] ? _atomic_dec_and_lock+0x169/0x169
+   [<ffffffff819411bb>] ubsan_epilogue+0xd/0x4e
+   [<ffffffff81941cbc>] __ubsan_handle_shift_out_of_bounds+0x1fb/0x254
+   [<ffffffff81941ac1>] ? __ubsan_handle_load_invalid_value+0x158/0x158
+   [<ffffffff816e93a0>] ? ext4_mb_generate_from_pa+0x590/0x590
+   [<ffffffff816502c8>] ? ext4_read_block_bitmap_nowait+0x598/0xe80
+   [<ffffffff816e7b7e>] mb_find_order_for_block+0x1ce/0x240
+   [...]
+
+Unless compilers start to do some fancy transformations (which at least
+GCC 6.0.0 doesn't currently do), the issue is of cosmetic nature only: the
+such calculated value of bb is never used again.
+
+Silence UBSAN by introducing another variable, bb_incr, holding the next
+increment to apply to bb and adjust that one by right shifting it by one
+position per loop iteration.
+
+Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=114701
+Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=112161
+
+Signed-off-by: Nicolai Stange <nicstange@gmail.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/mballoc.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -1259,6 +1259,7 @@ static void ext4_mb_unload_buddy(struct
+ static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
+ {
+       int order = 1;
++      int bb_incr = 1 << (e4b->bd_blkbits - 1);
+       void *bb;
+       BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
+@@ -1271,7 +1272,8 @@ static int mb_find_order_for_block(struc
+                       /* this block is part of buddy of order 'order' */
+                       return order;
+               }
+-              bb += 1 << (e4b->bd_blkbits - order);
++              bb += bb_incr;
++              bb_incr >>= 1;
+               order++;
+       }
+       return 0;
diff --git a/queue-4.4/ext4-clean-up-error-handling-when-orphan-list-is-corrupted.patch b/queue-4.4/ext4-clean-up-error-handling-when-orphan-list-is-corrupted.patch
new file mode 100644 (file)
index 0000000..bac6090
--- /dev/null
@@ -0,0 +1,111 @@
+From 7827a7f6ebfcb7f388dc47fddd48567a314701ba Mon Sep 17 00:00:00 2001
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Sat, 30 Apr 2016 00:49:54 -0400
+Subject: ext4: clean up error handling when orphan list is corrupted
+
+From: Theodore Ts'o <tytso@mit.edu>
+
+commit 7827a7f6ebfcb7f388dc47fddd48567a314701ba upstream.
+
+Instead of just printing warning messages, if the orphan list is
+corrupted, declare the file system is corrupted.  If there are any
+reserved inodes in the orphaned inode list, declare the file system
+corrupted and stop right away to avoid doing more potential damage to
+the file system.
+
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/ialloc.c |   49 ++++++++++++++++++++++---------------------------
+ 1 file changed, 22 insertions(+), 27 deletions(-)
+
+--- a/fs/ext4/ialloc.c
++++ b/fs/ext4/ialloc.c
+@@ -1143,25 +1143,20 @@ struct inode *ext4_orphan_get(struct sup
+       unsigned long max_ino = le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count);
+       ext4_group_t block_group;
+       int bit;
+-      struct buffer_head *bitmap_bh;
++      struct buffer_head *bitmap_bh = NULL;
+       struct inode *inode = NULL;
+-      long err = -EIO;
++      int err = -EFSCORRUPTED;
+-      /* Error cases - e2fsck has already cleaned up for us */
+-      if (ino > max_ino) {
+-              ext4_warning(sb, "bad orphan ino %lu!  e2fsck was run?", ino);
+-              err = -EFSCORRUPTED;
+-              goto error;
+-      }
++      if (ino < EXT4_FIRST_INO(sb) || ino > max_ino)
++              goto bad_orphan;
+       block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
+       bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
+       bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
+       if (IS_ERR(bitmap_bh)) {
+-              err = PTR_ERR(bitmap_bh);
+-              ext4_warning(sb, "inode bitmap error %ld for orphan %lu",
+-                           ino, err);
+-              goto error;
++              ext4_error(sb, "inode bitmap error %ld for orphan %lu",
++                         ino, PTR_ERR(bitmap_bh));
++              return (struct inode *) bitmap_bh;
+       }
+       /* Having the inode bit set should be a 100% indicator that this
+@@ -1172,8 +1167,12 @@ struct inode *ext4_orphan_get(struct sup
+               goto bad_orphan;
+       inode = ext4_iget(sb, ino);
+-      if (IS_ERR(inode))
+-              goto iget_failed;
++      if (IS_ERR(inode)) {
++              err = PTR_ERR(inode);
++              ext4_error(sb, "couldn't read orphan inode %lu (err %d)",
++                         ino, err);
++              return inode;
++      }
+       /*
+        * If the orphans has i_nlinks > 0 then it should be able to
+@@ -1190,29 +1189,25 @@ struct inode *ext4_orphan_get(struct sup
+       brelse(bitmap_bh);
+       return inode;
+-iget_failed:
+-      err = PTR_ERR(inode);
+-      inode = NULL;
+ bad_orphan:
+-      ext4_warning(sb, "bad orphan inode %lu!  e2fsck was run?", ino);
+-      printk(KERN_WARNING "ext4_test_bit(bit=%d, block=%llu) = %d\n",
+-             bit, (unsigned long long)bitmap_bh->b_blocknr,
+-             ext4_test_bit(bit, bitmap_bh->b_data));
+-      printk(KERN_WARNING "inode=%p\n", inode);
++      ext4_error(sb, "bad orphan inode %lu", ino);
++      if (bitmap_bh)
++              printk(KERN_ERR "ext4_test_bit(bit=%d, block=%llu) = %d\n",
++                     bit, (unsigned long long)bitmap_bh->b_blocknr,
++                     ext4_test_bit(bit, bitmap_bh->b_data));
+       if (inode) {
+-              printk(KERN_WARNING "is_bad_inode(inode)=%d\n",
++              printk(KERN_ERR "is_bad_inode(inode)=%d\n",
+                      is_bad_inode(inode));
+-              printk(KERN_WARNING "NEXT_ORPHAN(inode)=%u\n",
++              printk(KERN_ERR "NEXT_ORPHAN(inode)=%u\n",
+                      NEXT_ORPHAN(inode));
+-              printk(KERN_WARNING "max_ino=%lu\n", max_ino);
+-              printk(KERN_WARNING "i_nlink=%u\n", inode->i_nlink);
++              printk(KERN_ERR "max_ino=%lu\n", max_ino);
++              printk(KERN_ERR "i_nlink=%u\n", inode->i_nlink);
+               /* Avoid freeing blocks if we got a bad deleted inode */
+               if (inode->i_nlink == 0)
+                       inode->i_blocks = 0;
+               iput(inode);
+       }
+       brelse(bitmap_bh);
+-error:
+       return ERR_PTR(err);
+ }
diff --git a/queue-4.4/ext4-fix-hang-when-processing-corrupted-orphaned-inode-list.patch b/queue-4.4/ext4-fix-hang-when-processing-corrupted-orphaned-inode-list.patch
new file mode 100644 (file)
index 0000000..6dcf51b
--- /dev/null
@@ -0,0 +1,58 @@
+From c9eb13a9105e2e418f72e46a2b6da3f49e696902 Mon Sep 17 00:00:00 2001
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Sat, 30 Apr 2016 00:48:54 -0400
+Subject: ext4: fix hang when processing corrupted orphaned inode list
+
+From: Theodore Ts'o <tytso@mit.edu>
+
+commit c9eb13a9105e2e418f72e46a2b6da3f49e696902 upstream.
+
+If the orphaned inode list contains inode #5, ext4_iget() returns a
+bad inode (since the bootloader inode should never be referenced
+directly).  Because of the bad inode, we end up processing the inode
+repeatedly and this hangs the machine.
+
+This can be reproduced via:
+
+   mke2fs -t ext4 /tmp/foo.img 100
+   debugfs -w -R "ssv last_orphan 5" /tmp/foo.img
+   mount -o loop /tmp/foo.img /mnt
+
+(But don't do this if you are using an unpatched kernel if you care
+about the system staying functional.  :-)
+
+This bug was found by the port of American Fuzzy Lop into the kernel
+to find file system problems[1].  (Since it *only* happens if inode #5
+shows up on the orphan list --- 3, 7, 8, etc. won't do it, it's not
+surprising that AFL needed two hours before it found it.)
+
+[1] http://events.linuxfoundation.org/sites/events/files/slides/AFL%20filesystem%20fuzzing%2C%20Vault%202016_0.pdf
+
+Reported by: Vegard Nossum <vegard.nossum@oracle.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/ialloc.c |   10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/fs/ext4/ialloc.c
++++ b/fs/ext4/ialloc.c
+@@ -1176,11 +1176,13 @@ struct inode *ext4_orphan_get(struct sup
+               goto iget_failed;
+       /*
+-       * If the orphans has i_nlinks > 0 then it should be able to be
+-       * truncated, otherwise it won't be removed from the orphan list
+-       * during processing and an infinite loop will result.
++       * If the orphans has i_nlinks > 0 then it should be able to
++       * be truncated, otherwise it won't be removed from the orphan
++       * list during processing and an infinite loop will result.
++       * Similarly, it must not be a bad inode.
+        */
+-      if (inode->i_nlink && !ext4_can_truncate(inode))
++      if ((inode->i_nlink && !ext4_can_truncate(inode)) ||
++          is_bad_inode(inode))
+               goto bad_orphan;
+       if (NEXT_ORPHAN(inode) > max_ino)
diff --git a/queue-4.4/ext4-fix-oops-on-corrupted-filesystem.patch b/queue-4.4/ext4-fix-oops-on-corrupted-filesystem.patch
new file mode 100644 (file)
index 0000000..c829723
--- /dev/null
@@ -0,0 +1,87 @@
+From 74177f55b70e2f2be770dd28684dd6d17106a4ba Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Thu, 5 May 2016 11:10:15 -0400
+Subject: ext4: fix oops on corrupted filesystem
+
+From: Jan Kara <jack@suse.cz>
+
+commit 74177f55b70e2f2be770dd28684dd6d17106a4ba upstream.
+
+When filesystem is corrupted in the right way, it can happen
+ext4_mark_iloc_dirty() in ext4_orphan_add() returns error and we
+subsequently remove inode from the in-memory orphan list. However this
+deletion is done with list_del(&EXT4_I(inode)->i_orphan) and thus we
+leave i_orphan list_head with a stale content. Later we can look at this
+content causing list corruption, oops, or other issues. The reported
+trace looked like:
+
+WARNING: CPU: 0 PID: 46 at lib/list_debug.c:53 __list_del_entry+0x6b/0x100()
+list_del corruption, 0000000061c1d6e0->next is LIST_POISON1
+0000000000100100)
+CPU: 0 PID: 46 Comm: ext4.exe Not tainted 4.1.0-rc4+ #250
+Stack:
+ 60462947 62219960 602ede24 62219960
+ 602ede24 603ca293 622198f0 602f02eb
+ 62219950 6002c12c 62219900 601b4d6b
+Call Trace:
+ [<6005769c>] ? vprintk_emit+0x2dc/0x5c0
+ [<602ede24>] ? printk+0x0/0x94
+ [<600190bc>] show_stack+0xdc/0x1a0
+ [<602ede24>] ? printk+0x0/0x94
+ [<602ede24>] ? printk+0x0/0x94
+ [<602f02eb>] dump_stack+0x2a/0x2c
+ [<6002c12c>] warn_slowpath_common+0x9c/0xf0
+ [<601b4d6b>] ? __list_del_entry+0x6b/0x100
+ [<6002c254>] warn_slowpath_fmt+0x94/0xa0
+ [<602f4d09>] ? __mutex_lock_slowpath+0x239/0x3a0
+ [<6002c1c0>] ? warn_slowpath_fmt+0x0/0xa0
+ [<60023ebf>] ? set_signals+0x3f/0x50
+ [<600a205a>] ? kmem_cache_free+0x10a/0x180
+ [<602f4e88>] ? mutex_lock+0x18/0x30
+ [<601b4d6b>] __list_del_entry+0x6b/0x100
+ [<601177ec>] ext4_orphan_del+0x22c/0x2f0
+ [<6012f27c>] ? __ext4_journal_start_sb+0x2c/0xa0
+ [<6010b973>] ? ext4_truncate+0x383/0x390
+ [<6010bc8b>] ext4_write_begin+0x30b/0x4b0
+ [<6001bb50>] ? copy_from_user+0x0/0xb0
+ [<601aa840>] ? iov_iter_fault_in_readable+0xa0/0xc0
+ [<60072c4f>] generic_perform_write+0xaf/0x1e0
+ [<600c4166>] ? file_update_time+0x46/0x110
+ [<60072f0f>] __generic_file_write_iter+0x18f/0x1b0
+ [<6010030f>] ext4_file_write_iter+0x15f/0x470
+ [<60094e10>] ? unlink_file_vma+0x0/0x70
+ [<6009b180>] ? unlink_anon_vmas+0x0/0x260
+ [<6008f169>] ? free_pgtables+0xb9/0x100
+ [<600a6030>] __vfs_write+0xb0/0x130
+ [<600a61d5>] vfs_write+0xa5/0x170
+ [<600a63d6>] SyS_write+0x56/0xe0
+ [<6029fcb0>] ? __libc_waitpid+0x0/0xa0
+ [<6001b698>] handle_syscall+0x68/0x90
+ [<6002633d>] userspace+0x4fd/0x600
+ [<6002274f>] ? save_registers+0x1f/0x40
+ [<60028bd7>] ? arch_prctl+0x177/0x1b0
+ [<60017bd5>] fork_handler+0x85/0x90
+
+Fix the problem by using list_del_init() as we always should with
+i_orphan list.
+
+Reported-by: Vegard Nossum <vegard.nossum@oracle.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/namei.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/ext4/namei.c
++++ b/fs/ext4/namei.c
+@@ -2809,7 +2809,7 @@ int ext4_orphan_add(handle_t *handle, st
+                        * list entries can cause panics at unmount time.
+                        */
+                       mutex_lock(&sbi->s_orphan_lock);
+-                      list_del(&EXT4_I(inode)->i_orphan);
++                      list_del_init(&EXT4_I(inode)->i_orphan);
+                       mutex_unlock(&sbi->s_orphan_lock);
+               }
+       }
diff --git a/queue-4.4/ext4-silence-ubsan-in-ext4_mb_init.patch b/queue-4.4/ext4-silence-ubsan-in-ext4_mb_init.patch
new file mode 100644 (file)
index 0000000..e94b3e8
--- /dev/null
@@ -0,0 +1,90 @@
+From 935244cd54b86ca46e69bc6604d2adfb1aec2d42 Mon Sep 17 00:00:00 2001
+From: Nicolai Stange <nicstange@gmail.com>
+Date: Thu, 5 May 2016 19:46:19 -0400
+Subject: ext4: silence UBSAN in ext4_mb_init()
+
+From: Nicolai Stange <nicstange@gmail.com>
+
+commit 935244cd54b86ca46e69bc6604d2adfb1aec2d42 upstream.
+
+Currently, in ext4_mb_init(), there's a loop like the following:
+
+  do {
+    ...
+    offset += 1 << (sb->s_blocksize_bits - i);
+    i++;
+  } while (i <= sb->s_blocksize_bits + 1);
+
+Note that the updated offset is used in the loop's next iteration only.
+
+However, at the last iteration, that is at i == sb->s_blocksize_bits + 1,
+the shift count becomes equal to (unsigned)-1 > 31 (c.f. C99 6.5.7(3))
+and UBSAN reports
+
+  UBSAN: Undefined behaviour in fs/ext4/mballoc.c:2621:15
+  shift exponent 4294967295 is too large for 32-bit type 'int'
+  [...]
+  Call Trace:
+   [<ffffffff818c4d25>] dump_stack+0xbc/0x117
+   [<ffffffff818c4c69>] ? _atomic_dec_and_lock+0x169/0x169
+   [<ffffffff819411ab>] ubsan_epilogue+0xd/0x4e
+   [<ffffffff81941cac>] __ubsan_handle_shift_out_of_bounds+0x1fb/0x254
+   [<ffffffff81941ab1>] ? __ubsan_handle_load_invalid_value+0x158/0x158
+   [<ffffffff814b6dc1>] ? kmem_cache_alloc+0x101/0x390
+   [<ffffffff816fc13b>] ? ext4_mb_init+0x13b/0xfd0
+   [<ffffffff814293c7>] ? create_cache+0x57/0x1f0
+   [<ffffffff8142948a>] ? create_cache+0x11a/0x1f0
+   [<ffffffff821c2168>] ? mutex_lock+0x38/0x60
+   [<ffffffff821c23ab>] ? mutex_unlock+0x1b/0x50
+   [<ffffffff814c26ab>] ? put_online_mems+0x5b/0xc0
+   [<ffffffff81429677>] ? kmem_cache_create+0x117/0x2c0
+   [<ffffffff816fcc49>] ext4_mb_init+0xc49/0xfd0
+   [...]
+
+Observe that the mentioned shift exponent, 4294967295, equals (unsigned)-1.
+
+Unless compilers start to do some fancy transformations (which at least
+GCC 6.0.0 doesn't currently do), the issue is of cosmetic nature only: the
+such calculated value of offset is never used again.
+
+Silence UBSAN by introducing another variable, offset_incr, holding the
+next increment to apply to offset and adjust that one by right shifting it
+by one position per loop iteration.
+
+Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=114701
+Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=112161
+
+Signed-off-by: Nicolai Stange <nicstange@gmail.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/mballoc.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -2578,7 +2578,7 @@ int ext4_mb_init(struct super_block *sb)
+ {
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+       unsigned i, j;
+-      unsigned offset;
++      unsigned offset, offset_incr;
+       unsigned max;
+       int ret;
+@@ -2607,11 +2607,13 @@ int ext4_mb_init(struct super_block *sb)
+       i = 1;
+       offset = 0;
++      offset_incr = 1 << (sb->s_blocksize_bits - 1);
+       max = sb->s_blocksize << 2;
+       do {
+               sbi->s_mb_offsets[i] = offset;
+               sbi->s_mb_maxs[i] = max;
+-              offset += 1 << (sb->s_blocksize_bits - i);
++              offset += offset_incr;
++              offset_incr = offset_incr >> 1;
+               max = max >> 1;
+               i++;
+       } while (i <= sb->s_blocksize_bits + 1);
diff --git a/queue-4.4/gcov-disable-tree-loop-im-to-reduce-stack-usage.patch b/queue-4.4/gcov-disable-tree-loop-im-to-reduce-stack-usage.patch
new file mode 100644 (file)
index 0000000..79c3c4c
--- /dev/null
@@ -0,0 +1,50 @@
+From c87bf431448b404a6ef5fbabd74c0e3e42157a7f Mon Sep 17 00:00:00 2001
+From: Arnd Bergmann <arnd@arndb.de>
+Date: Mon, 25 Apr 2016 17:35:30 +0200
+Subject: gcov: disable tree-loop-im to reduce stack usage
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+commit c87bf431448b404a6ef5fbabd74c0e3e42157a7f upstream.
+
+Enabling CONFIG_GCOV_PROFILE_ALL produces us a lot of warnings like
+
+lib/lz4/lz4hc_compress.c: In function 'lz4_compresshcctx':
+lib/lz4/lz4hc_compress.c:514:1: warning: the frame size of 1504 bytes is larger than 1024 bytes [-Wframe-larger-than=]
+
+After some investigation, I found that this behavior started with gcc-4.9,
+and opened https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69702.
+A suggested workaround for it is to use the -fno-tree-loop-im
+flag that turns off one of the optimization stages in gcc, so the
+code runs a little slower but does not use excessive amounts
+of stack.
+
+We could make this conditional on the gcc version, but I could not
+find an easy way to do this in Kbuild and the benefit would be
+fairly small, given that most of the gcc version in production are
+affected now.
+
+I'm marking this for 'stable' backports because it addresses a bug
+with code generation in gcc that exists in all kernel versions
+with the affected gcc releases.
+
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Acked-by: Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
+Signed-off-by: Michal Marek <mmarek@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ Makefile |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/Makefile
++++ b/Makefile
+@@ -364,7 +364,7 @@ AFLAGS_MODULE   =
+ LDFLAGS_MODULE  =
+ CFLAGS_KERNEL =
+ AFLAGS_KERNEL =
+-CFLAGS_GCOV   = -fprofile-arcs -ftest-coverage
++CFLAGS_GCOV   = -fprofile-arcs -ftest-coverage -fno-tree-loop-im
+ # Use USERINCLUDE when you must reference the UAPI directories only.
diff --git a/queue-4.4/gpio-davinci-fix-missed-parent-conversion.patch b/queue-4.4/gpio-davinci-fix-missed-parent-conversion.patch
new file mode 100644 (file)
index 0000000..09230f5
--- /dev/null
@@ -0,0 +1,31 @@
+From 6ddbaed3eff9f60d29805413404251670d2e8f0c Mon Sep 17 00:00:00 2001
+From: Linus Walleij <linus.walleij@linaro.org>
+Date: Fri, 4 Dec 2015 14:13:59 +0100
+Subject: gpio: davinci: fix missed parent conversion
+
+From: Linus Walleij <linus.walleij@linaro.org>
+
+commit 6ddbaed3eff9f60d29805413404251670d2e8f0c upstream.
+
+I missed to convert this driver properly to use .parent to
+point to the parent device. ARMv7 multiplatform would not
+compile.
+
+Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpio/gpio-davinci.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/gpio/gpio-davinci.c
++++ b/drivers/gpio/gpio-davinci.c
+@@ -254,7 +254,7 @@ static int davinci_gpio_probe(struct pla
+ #ifdef CONFIG_OF_GPIO
+               chips[i].chip.of_gpio_n_cells = 2;
+               chips[i].chip.of_xlate = davinci_gpio_of_xlate;
+-              chips[i].chip.dev = dev;
++              chips[i].chip.parent = dev;
+               chips[i].chip.of_node = dev->of_node;
+ #endif
+               spin_lock_init(&chips[i].lock);
diff --git a/queue-4.4/pm-sleep-handle-failures-in-device_suspend_late-consistently.patch b/queue-4.4/pm-sleep-handle-failures-in-device_suspend_late-consistently.patch
new file mode 100644 (file)
index 0000000..c60ecec
--- /dev/null
@@ -0,0 +1,54 @@
+From 3a17fb329da68cb00558721aff876a80bba2fdb9 Mon Sep 17 00:00:00 2001
+From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
+Date: Fri, 20 May 2016 23:09:49 +0200
+Subject: PM / sleep: Handle failures in device_suspend_late() consistently
+
+From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+commit 3a17fb329da68cb00558721aff876a80bba2fdb9 upstream.
+
+Grygorii Strashko reports:
+
+ The PM runtime will be left disabled for the device if its
+ .suspend_late() callback fails and async suspend is not allowed
+ for this device. In this case device will not be added in
+ dpm_late_early_list and dpm_resume_early() will ignore this
+ device, as result PM runtime will be disabled for it forever
+ (side effect: after 8 subsequent failures for the same device
+ the PM runtime will be reenabled due to disable_depth overflow).
+
+To fix this problem, add devices to dpm_late_early_list regardless
+of whether or not device_suspend_late() returns errors for them.
+
+That will ensure failures in there to be handled consistently for
+all devices regardless of their async suspend/resume status.
+
+Reported-by: Grygorii Strashko <grygorii.strashko@ti.com>
+Tested-by: Grygorii Strashko <grygorii.strashko@ti.com>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/base/power/main.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/drivers/base/power/main.c
++++ b/drivers/base/power/main.c
+@@ -1262,14 +1262,15 @@ int dpm_suspend_late(pm_message_t state)
+               error = device_suspend_late(dev);
+               mutex_lock(&dpm_list_mtx);
++              if (!list_empty(&dev->power.entry))
++                      list_move(&dev->power.entry, &dpm_late_early_list);
++
+               if (error) {
+                       pm_dev_err(dev, state, " late", error);
+                       dpm_save_failed_dev(dev_name(dev));
+                       put_device(dev);
+                       break;
+               }
+-              if (!list_empty(&dev->power.entry))
+-                      list_move(&dev->power.entry, &dpm_late_early_list);
+               put_device(dev);
+               if (async_error)
diff --git a/queue-4.4/scripts-package-makefile-rpmbuild-add-support-of-rpmopts.patch b/queue-4.4/scripts-package-makefile-rpmbuild-add-support-of-rpmopts.patch
new file mode 100644 (file)
index 0000000..0af23e6
--- /dev/null
@@ -0,0 +1,44 @@
+From 65a9f31c5042e5bb50d30ed8ae374044be561054 Mon Sep 17 00:00:00 2001
+From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+Date: Sat, 14 May 2016 20:09:52 -0700
+Subject: scripts/package/Makefile: rpmbuild add support of RPMOPTS
+
+From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+
+commit 65a9f31c5042e5bb50d30ed8ae374044be561054 upstream.
+
+After commit 21a59991ce0c ("scripts/package/Makefile: rpmbuild is needed
+for rpm targets"), it is no longer possible to specify RPMOPTS.
+For example, we can no longer able to control _topdir using the following
+make command.
+make RPMOPTS="--define '_topdir /home/xyz/workspace/'" binrpm-pkg
+
+Fixes: 21a59991ce0c ("scripts/package/Makefile: rpmbuild is needed for rpm targets")
+Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+Signed-off-by: Michal Marek <mmarek@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ scripts/package/Makefile |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/scripts/package/Makefile
++++ b/scripts/package/Makefile
+@@ -52,7 +52,7 @@ rpm-pkg rpm: FORCE
+       $(call cmd,src_tar,$(KERNELPATH),kernel.spec)
+       $(CONFIG_SHELL) $(srctree)/scripts/mkversion > $(objtree)/.tmp_version
+       mv -f $(objtree)/.tmp_version $(objtree)/.version
+-      rpmbuild --target $(UTS_MACHINE) -ta $(KERNELPATH).tar.gz
++      rpmbuild $(RPMOPTS) --target $(UTS_MACHINE) -ta $(KERNELPATH).tar.gz
+       rm $(KERNELPATH).tar.gz kernel.spec
+ # binrpm-pkg
+@@ -63,7 +63,7 @@ binrpm-pkg: FORCE
+       $(CONFIG_SHELL) $(srctree)/scripts/mkversion > $(objtree)/.tmp_version
+       mv -f $(objtree)/.tmp_version $(objtree)/.version
+-      rpmbuild --define "_builddir $(objtree)" --target \
++      rpmbuild $(RPMOPTS) --define "_builddir $(objtree)" --target \
+               $(UTS_MACHINE) -bb $(objtree)/binkernel.spec
+       rm binkernel.spec
index 35bccfad3fcbd9925b93dd8a9f07a56e3e126cbc..8217437f660d77b2397d4a854eb158f44a507797 100644 (file)
@@ -80,3 +80,20 @@ drm-fb_helper-fix-references-to-dev-mode_config.num_connector.patch
 drm-atomic-verify-connector-funcs-null-when-clearing-states.patch
 drm-i915-don-t-leave-old-junk-in-ilk-active-watermarks-on-readout.patch
 drm-imx-match-imx-ipuv3-crtc-components-using-device-node-in-platform-data.patch
+ext4-fix-hang-when-processing-corrupted-orphaned-inode-list.patch
+ext4-clean-up-error-handling-when-orphan-list-is-corrupted.patch
+ext4-fix-oops-on-corrupted-filesystem.patch
+ext4-address-ubsan-warning-in-mb_find_order_for_block.patch
+ext4-silence-ubsan-in-ext4_mb_init.patch
+pm-sleep-handle-failures-in-device_suspend_late-consistently.patch
+dma-debug-avoid-spinlock-recursion-when-disabling-dma-debug.patch
+scripts-package-makefile-rpmbuild-add-support-of-rpmopts.patch
+gcov-disable-tree-loop-im-to-reduce-stack-usage.patch
+xfs-disallow-rw-remount-on-fs-with-unknown-ro-compat-features.patch
+xfs-don-t-wrap-growfs-agfl-indexes.patch
+xfs-xfs_iflush_cluster-fails-to-abort-on-error.patch
+xfs-fix-inode-validity-check-in-xfs_iflush_cluster.patch
+xfs-skip-stale-inodes-in-xfs_iflush_cluster.patch
+xfs-print-name-of-verifier-if-it-fails.patch
+xfs-handle-dquot-buffer-readahead-in-log-recovery-correctly.patch
+gpio-davinci-fix-missed-parent-conversion.patch
diff --git a/queue-4.4/xfs-disallow-rw-remount-on-fs-with-unknown-ro-compat-features.patch b/queue-4.4/xfs-disallow-rw-remount-on-fs-with-unknown-ro-compat-features.patch
new file mode 100644 (file)
index 0000000..a3a63f3
--- /dev/null
@@ -0,0 +1,52 @@
+From d0a58e833931234c44e515b5b8bede32bd4e6eed Mon Sep 17 00:00:00 2001
+From: Eric Sandeen <sandeen@redhat.com>
+Date: Wed, 6 Apr 2016 07:05:41 +1000
+Subject: xfs: disallow rw remount on fs with unknown ro-compat features
+
+From: Eric Sandeen <sandeen@redhat.com>
+
+commit d0a58e833931234c44e515b5b8bede32bd4e6eed upstream.
+
+Today, a kernel which refuses to mount a filesystem read-write
+due to unknown ro-compat features can still transition to read-write
+via the remount path.  The old kernel is most likely none the wiser,
+because it's unaware of the new feature, and isn't using it.  However,
+writing to the filesystem may well corrupt metadata related to that
+new feature, and moving to a newer kernel which understand the feature
+will have problems.
+
+Right now the only ro-compat feature we have is the free inode btree,
+which showed up in v3.16.  It would be good to push this back to
+all the active stable kernels, I think, so that if anyone is using
+newer mkfs (which enables the finobt feature) with older kernel
+releases, they'll be protected.
+
+Signed-off-by: Eric Sandeen <sandeen@redhat.com>
+Reviewed-by: Bill O'Donnell <billodo@redhat.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/xfs_super.c |   10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+--- a/fs/xfs/xfs_super.c
++++ b/fs/xfs/xfs_super.c
+@@ -1233,6 +1233,16 @@ xfs_fs_remount(
+                       return -EINVAL;
+               }
++              if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
++                  xfs_sb_has_ro_compat_feature(sbp,
++                                      XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) {
++                      xfs_warn(mp,
++"ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem",
++                              (sbp->sb_features_ro_compat &
++                                      XFS_SB_FEAT_RO_COMPAT_UNKNOWN));
++                      return -EINVAL;
++              }
++
+               mp->m_flags &= ~XFS_MOUNT_RDONLY;
+               /*
diff --git a/queue-4.4/xfs-don-t-wrap-growfs-agfl-indexes.patch b/queue-4.4/xfs-don-t-wrap-growfs-agfl-indexes.patch
new file mode 100644 (file)
index 0000000..0d6e1bc
--- /dev/null
@@ -0,0 +1,46 @@
+From ad747e3b299671e1a53db74963cc6c5f6cdb9f6d Mon Sep 17 00:00:00 2001
+From: Dave Chinner <dchinner@redhat.com>
+Date: Wed, 6 Apr 2016 07:06:20 +1000
+Subject: xfs: Don't wrap growfs AGFL indexes
+
+From: Dave Chinner <dchinner@redhat.com>
+
+commit ad747e3b299671e1a53db74963cc6c5f6cdb9f6d upstream.
+
+Commit 96f859d ("libxfs: pack the agfl header structure so
+XFS_AGFL_SIZE is correct") allowed the freelist to use the empty
+slot at the end of the freelist on 64 bit systems that was not
+being used due to sizeof() rounding up the structure size.
+
+This has caused versions of xfs_repair prior to 4.5.0 (which also
+has the fix) to report this as a corruption once the filesystem has
+been grown. Older kernels can also have problems (seen from a whacky
+container/vm management environment) mounting filesystems grown on a
+system with a newer kernel than the vm/container it is deployed on.
+
+To avoid this problem, change the initial free list indexes not to
+wrap across the end of the AGFL, hence avoiding the initialisation
+of agf_fllast to the last index in the AGFL.
+
+Signed-off-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/xfs_fsops.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/fs/xfs/xfs_fsops.c
++++ b/fs/xfs/xfs_fsops.c
+@@ -243,8 +243,8 @@ xfs_growfs_data_private(
+               agf->agf_roots[XFS_BTNUM_CNTi] = cpu_to_be32(XFS_CNT_BLOCK(mp));
+               agf->agf_levels[XFS_BTNUM_BNOi] = cpu_to_be32(1);
+               agf->agf_levels[XFS_BTNUM_CNTi] = cpu_to_be32(1);
+-              agf->agf_flfirst = 0;
+-              agf->agf_fllast = cpu_to_be32(XFS_AGFL_SIZE(mp) - 1);
++              agf->agf_flfirst = cpu_to_be32(1);
++              agf->agf_fllast = 0;
+               agf->agf_flcount = 0;
+               tmpsize = agsize - XFS_PREALLOC_BLOCKS(mp);
+               agf->agf_freeblks = cpu_to_be32(tmpsize);
diff --git a/queue-4.4/xfs-fix-inode-validity-check-in-xfs_iflush_cluster.patch b/queue-4.4/xfs-fix-inode-validity-check-in-xfs_iflush_cluster.patch
new file mode 100644 (file)
index 0000000..3e46b80
--- /dev/null
@@ -0,0 +1,47 @@
+From 51b07f30a71c27405259a0248206ed4e22adbee2 Mon Sep 17 00:00:00 2001
+From: Dave Chinner <dchinner@redhat.com>
+Date: Wed, 18 May 2016 13:54:22 +1000
+Subject: xfs: fix inode validity check in xfs_iflush_cluster
+
+From: Dave Chinner <dchinner@redhat.com>
+
+commit 51b07f30a71c27405259a0248206ed4e22adbee2 upstream.
+
+Some careless idiot(*) wrote crap code in commit 1a3e8f3 ("xfs:
+convert inode cache lookups to use RCU locking") back in late 2010,
+and so xfs_iflush_cluster checks the wrong inode for whether it is
+still valid under RCU protection. Fix it to lock and check the
+correct inode.
+
+(*) Careless-idiot: Dave Chinner <dchinner@redhat.com>
+
+Discovered-by: Brain Foster <bfoster@redhat.com>
+Signed-off-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/xfs_inode.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/fs/xfs/xfs_inode.c
++++ b/fs/xfs/xfs_inode.c
+@@ -3220,13 +3220,13 @@ xfs_iflush_cluster(
+                * We need to check under the i_flags_lock for a valid inode
+                * here. Skip it if it is not valid or the wrong inode.
+                */
+-              spin_lock(&ip->i_flags_lock);
+-              if (!ip->i_ino ||
++              spin_lock(&iq->i_flags_lock);
++              if (!iq->i_ino ||
+                   (XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) {
+-                      spin_unlock(&ip->i_flags_lock);
++                      spin_unlock(&iq->i_flags_lock);
+                       continue;
+               }
+-              spin_unlock(&ip->i_flags_lock);
++              spin_unlock(&iq->i_flags_lock);
+               /*
+                * Do an un-protected check to see if the inode is dirty and
diff --git a/queue-4.4/xfs-handle-dquot-buffer-readahead-in-log-recovery-correctly.patch b/queue-4.4/xfs-handle-dquot-buffer-readahead-in-log-recovery-correctly.patch
new file mode 100644 (file)
index 0000000..619d07f
--- /dev/null
@@ -0,0 +1,199 @@
+From 7d6a13f023567d573ac362502bb702eda716e654 Mon Sep 17 00:00:00 2001
+From: Dave Chinner <dchinner@redhat.com>
+Date: Tue, 12 Jan 2016 07:04:01 +1100
+Subject: xfs: handle dquot buffer readahead in log recovery correctly
+
+From: Dave Chinner <dchinner@redhat.com>
+
+commit 7d6a13f023567d573ac362502bb702eda716e654 upstream.
+
+When we do dquot readahead in log recovery, we do not use a verifier
+as the underlying buffer may not have dquots in it. e.g. the
+allocation operation hasn't yet been replayed. Hence we do not want
+to fail recovery because we detect an operation to be replayed has
+not been run yet. This problem was addressed for inodes in commit
+d891400 ("xfs: inode buffers may not be valid during recovery
+readahead") but the problem was not recognised to exist for dquots
+and their buffers as the dquot readahead did not have a verifier.
+
+The result of not using a verifier is that when the buffer is then
+next read to replay a dquot modification, the dquot buffer verifier
+will only be attached to the buffer if *readahead is not complete*.
+Hence we can read the buffer, replay the dquot changes and then add
+it to the delwri submission list without it having a verifier
+attached to it. This then generates warnings in xfs_buf_ioapply(),
+which catches and warns about this case.
+
+Fix this and make it handle the same readahead verifier error cases
+as for inode buffers by adding a new readahead verifier that has a
+write operation as well as a read operation that marks the buffer as
+not done if any corruption is detected.  Also make sure we don't run
+readahead if the dquot buffer has been marked as cancelled by
+recovery.
+
+This will result in readahead either succeeding and the buffer
+having a valid write verifier, or readahead failing and the buffer
+state requiring the subsequent read to resubmit the IO with the new
+verifier.  In either case, this will result in the buffer always
+ending up with a valid write verifier on it.
+
+Note: we also need to fix the inode buffer readahead error handling
+to mark the buffer with EIO. Brian noticed the code I copied from
+there wrong during review, so fix it at the same time. Add comments
+linking the two functions that handle readahead verifier errors
+together so we don't forget this behavioural link in future.
+
+Signed-off-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/libxfs/xfs_dquot_buf.c  |   36 ++++++++++++++++++++++++++++++------
+ fs/xfs/libxfs/xfs_inode_buf.c  |    2 ++
+ fs/xfs/libxfs/xfs_quota_defs.h |    2 +-
+ fs/xfs/libxfs/xfs_shared.h     |    1 +
+ fs/xfs/xfs_log_recover.c       |    9 +++++++--
+ 5 files changed, 41 insertions(+), 9 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_dquot_buf.c
++++ b/fs/xfs/libxfs/xfs_dquot_buf.c
+@@ -54,7 +54,7 @@ xfs_dqcheck(
+       xfs_dqid_t       id,
+       uint             type,    /* used only when IO_dorepair is true */
+       uint             flags,
+-      char             *str)
++      const char       *str)
+ {
+       xfs_dqblk_t      *d = (xfs_dqblk_t *)ddq;
+       int             errs = 0;
+@@ -207,7 +207,8 @@ xfs_dquot_buf_verify_crc(
+ STATIC bool
+ xfs_dquot_buf_verify(
+       struct xfs_mount        *mp,
+-      struct xfs_buf          *bp)
++      struct xfs_buf          *bp,
++      int                     warn)
+ {
+       struct xfs_dqblk        *d = (struct xfs_dqblk *)bp->b_addr;
+       xfs_dqid_t              id = 0;
+@@ -240,8 +241,7 @@ xfs_dquot_buf_verify(
+               if (i == 0)
+                       id = be32_to_cpu(ddq->d_id);
+-              error = xfs_dqcheck(mp, ddq, id + i, 0, XFS_QMOPT_DOWARN,
+-                                     "xfs_dquot_buf_verify");
++              error = xfs_dqcheck(mp, ddq, id + i, 0, warn, __func__);
+               if (error)
+                       return false;
+       }
+@@ -256,7 +256,7 @@ xfs_dquot_buf_read_verify(
+       if (!xfs_dquot_buf_verify_crc(mp, bp))
+               xfs_buf_ioerror(bp, -EFSBADCRC);
+-      else if (!xfs_dquot_buf_verify(mp, bp))
++      else if (!xfs_dquot_buf_verify(mp, bp, XFS_QMOPT_DOWARN))
+               xfs_buf_ioerror(bp, -EFSCORRUPTED);
+       if (bp->b_error)
+@@ -264,6 +264,25 @@ xfs_dquot_buf_read_verify(
+ }
+ /*
++ * readahead errors are silent and simply leave the buffer as !done so a real
++ * read will then be run with the xfs_dquot_buf_ops verifier. See
++ * xfs_inode_buf_verify() for why we use EIO and ~XBF_DONE here rather than
++ * reporting the failure.
++ */
++static void
++xfs_dquot_buf_readahead_verify(
++      struct xfs_buf  *bp)
++{
++      struct xfs_mount        *mp = bp->b_target->bt_mount;
++
++      if (!xfs_dquot_buf_verify_crc(mp, bp) ||
++          !xfs_dquot_buf_verify(mp, bp, 0)) {
++              xfs_buf_ioerror(bp, -EIO);
++              bp->b_flags &= ~XBF_DONE;
++      }
++}
++
++/*
+  * we don't calculate the CRC here as that is done when the dquot is flushed to
+  * the buffer after the update is done. This ensures that the dquot in the
+  * buffer always has an up-to-date CRC value.
+@@ -274,7 +293,7 @@ xfs_dquot_buf_write_verify(
+ {
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+-      if (!xfs_dquot_buf_verify(mp, bp)) {
++      if (!xfs_dquot_buf_verify(mp, bp, XFS_QMOPT_DOWARN)) {
+               xfs_buf_ioerror(bp, -EFSCORRUPTED);
+               xfs_verifier_error(bp);
+               return;
+@@ -287,3 +306,8 @@ const struct xfs_buf_ops xfs_dquot_buf_o
+       .verify_write = xfs_dquot_buf_write_verify,
+ };
++const struct xfs_buf_ops xfs_dquot_buf_ra_ops = {
++      .name = "xfs_dquot_ra",
++      .verify_read = xfs_dquot_buf_readahead_verify,
++      .verify_write = xfs_dquot_buf_write_verify,
++};
+--- a/fs/xfs/libxfs/xfs_inode_buf.c
++++ b/fs/xfs/libxfs/xfs_inode_buf.c
+@@ -68,6 +68,8 @@ xfs_inobp_check(
+  * recovery and we don't get unnecssary panics on debug kernels. We use EIO here
+  * because all we want to do is say readahead failed; there is no-one to report
+  * the error to, so this will distinguish it from a non-ra verifier failure.
++ * Changes to this readahead error behavour also need to be reflected in
++ * xfs_dquot_buf_readahead_verify().
+  */
+ static void
+ xfs_inode_buf_verify(
+--- a/fs/xfs/libxfs/xfs_quota_defs.h
++++ b/fs/xfs/libxfs/xfs_quota_defs.h
+@@ -153,7 +153,7 @@ typedef __uint16_t xfs_qwarncnt_t;
+ #define XFS_QMOPT_RESBLK_MASK (XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_RES_RTBLKS)
+ extern int xfs_dqcheck(struct xfs_mount *mp, xfs_disk_dquot_t *ddq,
+-                     xfs_dqid_t id, uint type, uint flags, char *str);
++                     xfs_dqid_t id, uint type, uint flags, const char *str);
+ extern int xfs_calc_dquots_per_chunk(unsigned int nbblks);
+ #endif        /* __XFS_QUOTA_H__ */
+--- a/fs/xfs/libxfs/xfs_shared.h
++++ b/fs/xfs/libxfs/xfs_shared.h
+@@ -49,6 +49,7 @@ extern const struct xfs_buf_ops xfs_inob
+ extern const struct xfs_buf_ops xfs_inode_buf_ops;
+ extern const struct xfs_buf_ops xfs_inode_buf_ra_ops;
+ extern const struct xfs_buf_ops xfs_dquot_buf_ops;
++extern const struct xfs_buf_ops xfs_dquot_buf_ra_ops;
+ extern const struct xfs_buf_ops xfs_sb_buf_ops;
+ extern const struct xfs_buf_ops xfs_sb_quiet_buf_ops;
+ extern const struct xfs_buf_ops xfs_symlink_buf_ops;
+--- a/fs/xfs/xfs_log_recover.c
++++ b/fs/xfs/xfs_log_recover.c
+@@ -3204,6 +3204,7 @@ xlog_recover_dquot_ra_pass2(
+       struct xfs_disk_dquot   *recddq;
+       struct xfs_dq_logformat *dq_f;
+       uint                    type;
++      int                     len;
+       if (mp->m_qflags == 0)
+@@ -3224,8 +3225,12 @@ xlog_recover_dquot_ra_pass2(
+       ASSERT(dq_f);
+       ASSERT(dq_f->qlf_len == 1);
+-      xfs_buf_readahead(mp->m_ddev_targp, dq_f->qlf_blkno,
+-                        XFS_FSB_TO_BB(mp, dq_f->qlf_len), NULL);
++      len = XFS_FSB_TO_BB(mp, dq_f->qlf_len);
++      if (xlog_peek_buffer_cancelled(log, dq_f->qlf_blkno, len, 0))
++              return;
++
++      xfs_buf_readahead(mp->m_ddev_targp, dq_f->qlf_blkno, len,
++                        &xfs_dquot_buf_ra_ops);
+ }
+ STATIC void
diff --git a/queue-4.4/xfs-print-name-of-verifier-if-it-fails.patch b/queue-4.4/xfs-print-name-of-verifier-if-it-fails.patch
new file mode 100644 (file)
index 0000000..c667120
--- /dev/null
@@ -0,0 +1,256 @@
+From 233135b763db7c64d07b728a9c66745fb0376275 Mon Sep 17 00:00:00 2001
+From: Eric Sandeen <sandeen@redhat.com>
+Date: Mon, 4 Jan 2016 16:10:19 +1100
+Subject: xfs: print name of verifier if it fails
+
+From: Eric Sandeen <sandeen@redhat.com>
+
+commit 233135b763db7c64d07b728a9c66745fb0376275 upstream.
+
+This adds a name to each buf_ops structure, so that if
+a verifier fails we can print the type of verifier that
+failed it.  Should be a slight debugging aid, I hope.
+
+Signed-off-by: Eric Sandeen <sandeen@redhat.com>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Holger Hoffstätte <holger@applied-asynchrony.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/libxfs/xfs_alloc.c          |    2 ++
+ fs/xfs/libxfs/xfs_alloc_btree.c    |    1 +
+ fs/xfs/libxfs/xfs_attr_leaf.c      |    1 +
+ fs/xfs/libxfs/xfs_attr_remote.c    |    1 +
+ fs/xfs/libxfs/xfs_bmap_btree.c     |    1 +
+ fs/xfs/libxfs/xfs_da_btree.c       |    1 +
+ fs/xfs/libxfs/xfs_dir2_block.c     |    1 +
+ fs/xfs/libxfs/xfs_dir2_data.c      |    2 ++
+ fs/xfs/libxfs/xfs_dir2_leaf.c      |    2 ++
+ fs/xfs/libxfs/xfs_dir2_node.c      |    1 +
+ fs/xfs/libxfs/xfs_dquot_buf.c      |    1 +
+ fs/xfs/libxfs/xfs_ialloc.c         |    1 +
+ fs/xfs/libxfs/xfs_ialloc_btree.c   |    1 +
+ fs/xfs/libxfs/xfs_inode_buf.c      |    2 ++
+ fs/xfs/libxfs/xfs_sb.c             |    2 ++
+ fs/xfs/libxfs/xfs_symlink_remote.c |    1 +
+ fs/xfs/xfs_buf.h                   |    1 +
+ fs/xfs/xfs_error.c                 |    4 ++--
+ 18 files changed, 24 insertions(+), 2 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_alloc.c
++++ b/fs/xfs/libxfs/xfs_alloc.c
+@@ -535,6 +535,7 @@ xfs_agfl_write_verify(
+ }
+ const struct xfs_buf_ops xfs_agfl_buf_ops = {
++      .name = "xfs_agfl",
+       .verify_read = xfs_agfl_read_verify,
+       .verify_write = xfs_agfl_write_verify,
+ };
+@@ -2339,6 +2340,7 @@ xfs_agf_write_verify(
+ }
+ const struct xfs_buf_ops xfs_agf_buf_ops = {
++      .name = "xfs_agf",
+       .verify_read = xfs_agf_read_verify,
+       .verify_write = xfs_agf_write_verify,
+ };
+--- a/fs/xfs/libxfs/xfs_alloc_btree.c
++++ b/fs/xfs/libxfs/xfs_alloc_btree.c
+@@ -379,6 +379,7 @@ xfs_allocbt_write_verify(
+ }
+ const struct xfs_buf_ops xfs_allocbt_buf_ops = {
++      .name = "xfs_allocbt",
+       .verify_read = xfs_allocbt_read_verify,
+       .verify_write = xfs_allocbt_write_verify,
+ };
+--- a/fs/xfs/libxfs/xfs_attr_leaf.c
++++ b/fs/xfs/libxfs/xfs_attr_leaf.c
+@@ -328,6 +328,7 @@ xfs_attr3_leaf_read_verify(
+ }
+ const struct xfs_buf_ops xfs_attr3_leaf_buf_ops = {
++      .name = "xfs_attr3_leaf",
+       .verify_read = xfs_attr3_leaf_read_verify,
+       .verify_write = xfs_attr3_leaf_write_verify,
+ };
+--- a/fs/xfs/libxfs/xfs_attr_remote.c
++++ b/fs/xfs/libxfs/xfs_attr_remote.c
+@@ -201,6 +201,7 @@ xfs_attr3_rmt_write_verify(
+ }
+ const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = {
++      .name = "xfs_attr3_rmt",
+       .verify_read = xfs_attr3_rmt_read_verify,
+       .verify_write = xfs_attr3_rmt_write_verify,
+ };
+--- a/fs/xfs/libxfs/xfs_bmap_btree.c
++++ b/fs/xfs/libxfs/xfs_bmap_btree.c
+@@ -720,6 +720,7 @@ xfs_bmbt_write_verify(
+ }
+ const struct xfs_buf_ops xfs_bmbt_buf_ops = {
++      .name = "xfs_bmbt",
+       .verify_read = xfs_bmbt_read_verify,
+       .verify_write = xfs_bmbt_write_verify,
+ };
+--- a/fs/xfs/libxfs/xfs_da_btree.c
++++ b/fs/xfs/libxfs/xfs_da_btree.c
+@@ -245,6 +245,7 @@ xfs_da3_node_read_verify(
+ }
+ const struct xfs_buf_ops xfs_da3_node_buf_ops = {
++      .name = "xfs_da3_node",
+       .verify_read = xfs_da3_node_read_verify,
+       .verify_write = xfs_da3_node_write_verify,
+ };
+--- a/fs/xfs/libxfs/xfs_dir2_block.c
++++ b/fs/xfs/libxfs/xfs_dir2_block.c
+@@ -123,6 +123,7 @@ xfs_dir3_block_write_verify(
+ }
+ const struct xfs_buf_ops xfs_dir3_block_buf_ops = {
++      .name = "xfs_dir3_block",
+       .verify_read = xfs_dir3_block_read_verify,
+       .verify_write = xfs_dir3_block_write_verify,
+ };
+--- a/fs/xfs/libxfs/xfs_dir2_data.c
++++ b/fs/xfs/libxfs/xfs_dir2_data.c
+@@ -305,11 +305,13 @@ xfs_dir3_data_write_verify(
+ }
+ const struct xfs_buf_ops xfs_dir3_data_buf_ops = {
++      .name = "xfs_dir3_data",
+       .verify_read = xfs_dir3_data_read_verify,
+       .verify_write = xfs_dir3_data_write_verify,
+ };
+ static const struct xfs_buf_ops xfs_dir3_data_reada_buf_ops = {
++      .name = "xfs_dir3_data_reada",
+       .verify_read = xfs_dir3_data_reada_verify,
+       .verify_write = xfs_dir3_data_write_verify,
+ };
+--- a/fs/xfs/libxfs/xfs_dir2_leaf.c
++++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
+@@ -245,11 +245,13 @@ xfs_dir3_leafn_write_verify(
+ }
+ const struct xfs_buf_ops xfs_dir3_leaf1_buf_ops = {
++      .name = "xfs_dir3_leaf1",
+       .verify_read = xfs_dir3_leaf1_read_verify,
+       .verify_write = xfs_dir3_leaf1_write_verify,
+ };
+ const struct xfs_buf_ops xfs_dir3_leafn_buf_ops = {
++      .name = "xfs_dir3_leafn",
+       .verify_read = xfs_dir3_leafn_read_verify,
+       .verify_write = xfs_dir3_leafn_write_verify,
+ };
+--- a/fs/xfs/libxfs/xfs_dir2_node.c
++++ b/fs/xfs/libxfs/xfs_dir2_node.c
+@@ -150,6 +150,7 @@ xfs_dir3_free_write_verify(
+ }
+ const struct xfs_buf_ops xfs_dir3_free_buf_ops = {
++      .name = "xfs_dir3_free",
+       .verify_read = xfs_dir3_free_read_verify,
+       .verify_write = xfs_dir3_free_write_verify,
+ };
+--- a/fs/xfs/libxfs/xfs_dquot_buf.c
++++ b/fs/xfs/libxfs/xfs_dquot_buf.c
+@@ -282,6 +282,7 @@ xfs_dquot_buf_write_verify(
+ }
+ const struct xfs_buf_ops xfs_dquot_buf_ops = {
++      .name = "xfs_dquot",
+       .verify_read = xfs_dquot_buf_read_verify,
+       .verify_write = xfs_dquot_buf_write_verify,
+ };
+--- a/fs/xfs/libxfs/xfs_ialloc.c
++++ b/fs/xfs/libxfs/xfs_ialloc.c
+@@ -2572,6 +2572,7 @@ xfs_agi_write_verify(
+ }
+ const struct xfs_buf_ops xfs_agi_buf_ops = {
++      .name = "xfs_agi",
+       .verify_read = xfs_agi_read_verify,
+       .verify_write = xfs_agi_write_verify,
+ };
+--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
++++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
+@@ -304,6 +304,7 @@ xfs_inobt_write_verify(
+ }
+ const struct xfs_buf_ops xfs_inobt_buf_ops = {
++      .name = "xfs_inobt",
+       .verify_read = xfs_inobt_read_verify,
+       .verify_write = xfs_inobt_write_verify,
+ };
+--- a/fs/xfs/libxfs/xfs_inode_buf.c
++++ b/fs/xfs/libxfs/xfs_inode_buf.c
+@@ -134,11 +134,13 @@ xfs_inode_buf_write_verify(
+ }
+ const struct xfs_buf_ops xfs_inode_buf_ops = {
++      .name = "xfs_inode",
+       .verify_read = xfs_inode_buf_read_verify,
+       .verify_write = xfs_inode_buf_write_verify,
+ };
+ const struct xfs_buf_ops xfs_inode_buf_ra_ops = {
++      .name = "xxfs_inode_ra",
+       .verify_read = xfs_inode_buf_readahead_verify,
+       .verify_write = xfs_inode_buf_write_verify,
+ };
+--- a/fs/xfs/libxfs/xfs_sb.c
++++ b/fs/xfs/libxfs/xfs_sb.c
+@@ -679,11 +679,13 @@ xfs_sb_write_verify(
+ }
+ const struct xfs_buf_ops xfs_sb_buf_ops = {
++      .name = "xfs_sb",
+       .verify_read = xfs_sb_read_verify,
+       .verify_write = xfs_sb_write_verify,
+ };
+ const struct xfs_buf_ops xfs_sb_quiet_buf_ops = {
++      .name = "xfs_sb_quiet",
+       .verify_read = xfs_sb_quiet_read_verify,
+       .verify_write = xfs_sb_write_verify,
+ };
+--- a/fs/xfs/libxfs/xfs_symlink_remote.c
++++ b/fs/xfs/libxfs/xfs_symlink_remote.c
+@@ -168,6 +168,7 @@ xfs_symlink_write_verify(
+ }
+ const struct xfs_buf_ops xfs_symlink_buf_ops = {
++      .name = "xfs_symlink",
+       .verify_read = xfs_symlink_read_verify,
+       .verify_write = xfs_symlink_write_verify,
+ };
+--- a/fs/xfs/xfs_buf.h
++++ b/fs/xfs/xfs_buf.h
+@@ -132,6 +132,7 @@ struct xfs_buf_map {
+       struct xfs_buf_map (map) = { .bm_bn = (blkno), .bm_len = (numblk) };
+ struct xfs_buf_ops {
++      char *name;
+       void (*verify_read)(struct xfs_buf *);
+       void (*verify_write)(struct xfs_buf *);
+ };
+--- a/fs/xfs/xfs_error.c
++++ b/fs/xfs/xfs_error.c
+@@ -164,9 +164,9 @@ xfs_verifier_error(
+ {
+       struct xfs_mount *mp = bp->b_target->bt_mount;
+-      xfs_alert(mp, "Metadata %s detected at %pF, block 0x%llx",
++      xfs_alert(mp, "Metadata %s detected at %pF, %s block 0x%llx",
+                 bp->b_error == -EFSBADCRC ? "CRC error" : "corruption",
+-                __return_address, bp->b_bn);
++                __return_address, bp->b_ops->name, bp->b_bn);
+       xfs_alert(mp, "Unmount and run xfs_repair");
diff --git a/queue-4.4/xfs-skip-stale-inodes-in-xfs_iflush_cluster.patch b/queue-4.4/xfs-skip-stale-inodes-in-xfs_iflush_cluster.patch
new file mode 100644 (file)
index 0000000..b7aecc4
--- /dev/null
@@ -0,0 +1,32 @@
+From 7d3aa7fe970791f1a674b14572a411accf2f4d4e Mon Sep 17 00:00:00 2001
+From: Dave Chinner <dchinner@redhat.com>
+Date: Wed, 18 May 2016 13:54:23 +1000
+Subject: xfs: skip stale inodes in xfs_iflush_cluster
+
+From: Dave Chinner <dchinner@redhat.com>
+
+commit 7d3aa7fe970791f1a674b14572a411accf2f4d4e upstream.
+
+We don't write back stale inodes so we should skip them in
+xfs_iflush_cluster, too.
+
+Signed-off-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/xfs_inode.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/xfs/xfs_inode.c
++++ b/fs/xfs/xfs_inode.c
+@@ -3222,6 +3222,7 @@ xfs_iflush_cluster(
+                */
+               spin_lock(&iq->i_flags_lock);
+               if (!iq->i_ino ||
++                  __xfs_iflags_test(iq, XFS_ISTALE) ||
+                   (XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) {
+                       spin_unlock(&iq->i_flags_lock);
+                       continue;
diff --git a/queue-4.4/xfs-xfs_iflush_cluster-fails-to-abort-on-error.patch b/queue-4.4/xfs-xfs_iflush_cluster-fails-to-abort-on-error.patch
new file mode 100644 (file)
index 0000000..2e6dfba
--- /dev/null
@@ -0,0 +1,76 @@
+From b1438f477934f5a4d5a44df26f3079a7575d5946 Mon Sep 17 00:00:00 2001
+From: Dave Chinner <dchinner@redhat.com>
+Date: Wed, 18 May 2016 13:53:42 +1000
+Subject: xfs: xfs_iflush_cluster fails to abort on error
+
+From: Dave Chinner <dchinner@redhat.com>
+
+commit b1438f477934f5a4d5a44df26f3079a7575d5946 upstream.
+
+When a failure due to an inode buffer occurs, the error handling
+fails to abort the inode writeback correctly. This can result in the
+inode being reclaimed whilst still in the AIL, leading to
+use-after-free situations as well as filesystems that cannot be
+unmounted as the inode log items left in the AIL never get removed.
+
+Fix this by ensuring fatal errors from xfs_imap_to_bp() result in
+the inode flush being aborted correctly.
+
+Reported-by: Shyam Kaushik <shyam@zadarastorage.com>
+Diagnosed-by: Shyam Kaushik <shyam@zadarastorage.com>
+Tested-by: Shyam Kaushik <shyam@zadarastorage.com>
+Signed-off-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/xfs_inode.c |   17 +++++++++++++----
+ 1 file changed, 13 insertions(+), 4 deletions(-)
+
+--- a/fs/xfs/xfs_inode.c
++++ b/fs/xfs/xfs_inode.c
+@@ -3342,7 +3342,7 @@ xfs_iflush(
+       struct xfs_buf          **bpp)
+ {
+       struct xfs_mount        *mp = ip->i_mount;
+-      struct xfs_buf          *bp;
++      struct xfs_buf          *bp = NULL;
+       struct xfs_dinode       *dip;
+       int                     error;
+@@ -3384,14 +3384,22 @@ xfs_iflush(
+       }
+       /*
+-       * Get the buffer containing the on-disk inode.
++       * Get the buffer containing the on-disk inode. We are doing a try-lock
++       * operation here, so we may get  an EAGAIN error. In that case, we
++       * simply want to return with the inode still dirty.
++       *
++       * If we get any other error, we effectively have a corruption situation
++       * and we cannot flush the inode, so we treat it the same as failing
++       * xfs_iflush_int().
+        */
+       error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &bp, XBF_TRYLOCK,
+                              0);
+-      if (error || !bp) {
++      if (error == -EAGAIN) {
+               xfs_ifunlock(ip);
+               return error;
+       }
++      if (error)
++              goto corrupt_out;
+       /*
+        * First flush out the inode that xfs_iflush was called with.
+@@ -3419,7 +3427,8 @@ xfs_iflush(
+       return 0;
+ corrupt_out:
+-      xfs_buf_relse(bp);
++      if (bp)
++              xfs_buf_relse(bp);
+       xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+ cluster_corrupt_out:
+       error = -EFSCORRUPTED;