]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.14-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 3 May 2014 18:46:54 +0000 (14:46 -0400)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 3 May 2014 18:46:54 +0000 (14:46 -0400)
added patches:
block-fix-for_each_bvec.patch
clk-s2mps11-fix-possible-null-pointer-dereference.patch
ext4-fibmap-ioctl-causes-bug_on-due-to-handle-ext_max_blocks.patch
ext4-fix-jbd2-warning-under-heavy-xattr-load.patch
ext4-move-ext4_update_i_disksize-into-mpage_map_and_submit_extent.patch
ext4-note-the-error-in-ext4_end_bio.patch
ext4-use-i_size_read-in-ext4_unaligned_aio.patch
ocfs2-dlm-fix-lock-migration-crash.patch
ocfs2-dlm-fix-recovery-hung.patch
ocfs2-do-not-put-bh-when-buffer_uptodate-failed.patch
ocfs2-fix-panic-on-kfree-xattr-name.patch
smarter-propagate_mnt.patch
xattr-guard-against-simultaneous-glibc-header-inclusion.patch

14 files changed:
queue-3.14/block-fix-for_each_bvec.patch [new file with mode: 0644]
queue-3.14/clk-s2mps11-fix-possible-null-pointer-dereference.patch [new file with mode: 0644]
queue-3.14/ext4-fibmap-ioctl-causes-bug_on-due-to-handle-ext_max_blocks.patch [new file with mode: 0644]
queue-3.14/ext4-fix-jbd2-warning-under-heavy-xattr-load.patch [new file with mode: 0644]
queue-3.14/ext4-move-ext4_update_i_disksize-into-mpage_map_and_submit_extent.patch [new file with mode: 0644]
queue-3.14/ext4-note-the-error-in-ext4_end_bio.patch [new file with mode: 0644]
queue-3.14/ext4-use-i_size_read-in-ext4_unaligned_aio.patch [new file with mode: 0644]
queue-3.14/ocfs2-dlm-fix-lock-migration-crash.patch [new file with mode: 0644]
queue-3.14/ocfs2-dlm-fix-recovery-hung.patch [new file with mode: 0644]
queue-3.14/ocfs2-do-not-put-bh-when-buffer_uptodate-failed.patch [new file with mode: 0644]
queue-3.14/ocfs2-fix-panic-on-kfree-xattr-name.patch [new file with mode: 0644]
queue-3.14/series
queue-3.14/smarter-propagate_mnt.patch [new file with mode: 0644]
queue-3.14/xattr-guard-against-simultaneous-glibc-header-inclusion.patch [new file with mode: 0644]

diff --git a/queue-3.14/block-fix-for_each_bvec.patch b/queue-3.14/block-fix-for_each_bvec.patch
new file mode 100644 (file)
index 0000000..3795c7e
--- /dev/null
@@ -0,0 +1,39 @@
+From b7aa84d9cb9f26da1a9312c3e39dbd1a3c25a426 Mon Sep 17 00:00:00 2001
+From: "Martin K. Petersen" <martin.petersen@oracle.com>
+Date: Tue, 8 Apr 2014 22:43:43 -0400
+Subject: block: Fix for_each_bvec()
+
+From: "Martin K. Petersen" <martin.petersen@oracle.com>
+
+commit b7aa84d9cb9f26da1a9312c3e39dbd1a3c25a426 upstream.
+
+Commit 4550dd6c6b062 introduced for_each_bvec() which iterates over each
+bvec attached to a bio or bip. However, the macro fails to check bi_size
+before dereferencing which can lead to crashes while counting/mapping
+integrity scatterlist segments.
+
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Cc: Kent Overstreet <kmo@daterainc.com>
+Cc: Nicholas Bellinger <nab@linux-iscsi.org>
+Signed-off-by: Jens Axboe <axboe@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/bio.h |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/include/linux/bio.h
++++ b/include/linux/bio.h
+@@ -216,9 +216,9 @@ static inline void bvec_iter_advance(str
+ }
+ #define for_each_bvec(bvl, bio_vec, iter, start)                      \
+-      for ((iter) = start;                                            \
+-           (bvl) = bvec_iter_bvec((bio_vec), (iter)),                 \
+-              (iter).bi_size;                                         \
++      for (iter = (start);                                            \
++           (iter).bi_size &&                                          \
++              ((bvl = bvec_iter_bvec((bio_vec), (iter))), 1); \
+            bvec_iter_advance((bio_vec), &(iter), (bvl).bv_len))
diff --git a/queue-3.14/clk-s2mps11-fix-possible-null-pointer-dereference.patch b/queue-3.14/clk-s2mps11-fix-possible-null-pointer-dereference.patch
new file mode 100644 (file)
index 0000000..0d8b317
--- /dev/null
@@ -0,0 +1,32 @@
+From 238e14055da87d0d012257788e39fe0df3a82226 Mon Sep 17 00:00:00 2001
+From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
+Date: Fri, 21 Mar 2014 13:18:17 +0100
+Subject: clk: s2mps11: Fix possible NULL pointer dereference
+
+From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
+
+commit 238e14055da87d0d012257788e39fe0df3a82226 upstream.
+
+If parent device does not have of_node set the s2mps11_clk_parse_dt()
+returned NULL. This NULL was later passed to of_clk_add_provider() which
+dereferenced it in pr_debug() call.
+
+Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
+Signed-off-by: Mike Turquette <mturquette@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/clk/clk-s2mps11.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/clk/clk-s2mps11.c
++++ b/drivers/clk/clk-s2mps11.c
+@@ -130,7 +130,7 @@ static struct device_node *s2mps11_clk_p
+       int i;
+       if (!iodev->dev->of_node)
+-              return NULL;
++              return ERR_PTR(-EINVAL);
+       clk_np = of_find_node_by_name(iodev->dev->of_node, "clocks");
+       if (!clk_np) {
diff --git a/queue-3.14/ext4-fibmap-ioctl-causes-bug_on-due-to-handle-ext_max_blocks.patch b/queue-3.14/ext4-fibmap-ioctl-causes-bug_on-due-to-handle-ext_max_blocks.patch
new file mode 100644 (file)
index 0000000..1f8b0af
--- /dev/null
@@ -0,0 +1,41 @@
+From 4adb6ab3e0fa71363a5ef229544b2d17de6600d7 Mon Sep 17 00:00:00 2001
+From: Kazuya Mio <k-mio@sx.jp.nec.com>
+Date: Mon, 7 Apr 2014 10:53:28 -0400
+Subject: ext4: FIBMAP ioctl causes BUG_ON due to handle EXT_MAX_BLOCKS
+
+From: Kazuya Mio <k-mio@sx.jp.nec.com>
+
+commit 4adb6ab3e0fa71363a5ef229544b2d17de6600d7 upstream.
+
+When we try to get 2^32-1 block of the file which has the extent
+(ee_block=2^32-2, ee_len=1) with FIBMAP ioctl, it causes BUG_ON
+in ext4_ext_put_gap_in_cache().
+
+To avoid the problem, ext4_map_blocks() needs to check the file logical block
+number. ext4_ext_put_gap_in_cache() called via ext4_map_blocks() cannot
+handle 2^32-1 because the maximum file logical block number is 2^32-2.
+
+Note that ext4_ind_map_blocks() returns -EIO when the block number is invalid.
+So ext4_map_blocks() should also return the same errno.
+
+Signed-off-by: Kazuya Mio <k-mio@sx.jp.nec.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/inode.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -515,6 +515,10 @@ int ext4_map_blocks(handle_t *handle, st
+                 "logical block %lu\n", inode->i_ino, flags, map->m_len,
+                 (unsigned long) map->m_lblk);
++      /* We can handle the block number less than EXT_MAX_BLOCKS */
++      if (unlikely(map->m_lblk >= EXT_MAX_BLOCKS))
++              return -EIO;
++
+       /* Lookup extent status tree firstly */
+       if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
+               ext4_es_lru_add(inode);
diff --git a/queue-3.14/ext4-fix-jbd2-warning-under-heavy-xattr-load.patch b/queue-3.14/ext4-fix-jbd2-warning-under-heavy-xattr-load.patch
new file mode 100644 (file)
index 0000000..1975a32
--- /dev/null
@@ -0,0 +1,131 @@
+From ec4cb1aa2b7bae18dd8164f2e9c7c51abcf61280 Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Mon, 7 Apr 2014 10:54:21 -0400
+Subject: ext4: fix jbd2 warning under heavy xattr load
+
+From: Jan Kara <jack@suse.cz>
+
+commit ec4cb1aa2b7bae18dd8164f2e9c7c51abcf61280 upstream.
+
+When heavily exercising xattr code the assertion that
+jbd2_journal_dirty_metadata() shouldn't return error was triggered:
+
+WARNING: at /srv/autobuild-ceph/gitbuilder.git/build/fs/jbd2/transaction.c:1237
+jbd2_journal_dirty_metadata+0x1ba/0x260()
+
+CPU: 0 PID: 8877 Comm: ceph-osd Tainted: G    W 3.10.0-ceph-00049-g68d04c9 #1
+Hardware name: Dell Inc. PowerEdge R410/01V648, BIOS 1.6.3 02/07/2011
+ ffffffff81a1d3c8 ffff880214469928 ffffffff816311b0 ffff880214469968
+ ffffffff8103fae0 ffff880214469958 ffff880170a9dc30 ffff8802240fbe80
+ 0000000000000000 ffff88020b366000 ffff8802256e7510 ffff880214469978
+Call Trace:
+ [<ffffffff816311b0>] dump_stack+0x19/0x1b
+ [<ffffffff8103fae0>] warn_slowpath_common+0x70/0xa0
+ [<ffffffff8103fb2a>] warn_slowpath_null+0x1a/0x20
+ [<ffffffff81267c2a>] jbd2_journal_dirty_metadata+0x1ba/0x260
+ [<ffffffff81245093>] __ext4_handle_dirty_metadata+0xa3/0x140
+ [<ffffffff812561f3>] ext4_xattr_release_block+0x103/0x1f0
+ [<ffffffff81256680>] ext4_xattr_block_set+0x1e0/0x910
+ [<ffffffff8125795b>] ext4_xattr_set_handle+0x38b/0x4a0
+ [<ffffffff810a319d>] ? trace_hardirqs_on+0xd/0x10
+ [<ffffffff81257b32>] ext4_xattr_set+0xc2/0x140
+ [<ffffffff81258547>] ext4_xattr_user_set+0x47/0x50
+ [<ffffffff811935ce>] generic_setxattr+0x6e/0x90
+ [<ffffffff81193ecb>] __vfs_setxattr_noperm+0x7b/0x1c0
+ [<ffffffff811940d4>] vfs_setxattr+0xc4/0xd0
+ [<ffffffff8119421e>] setxattr+0x13e/0x1e0
+ [<ffffffff811719c7>] ? __sb_start_write+0xe7/0x1b0
+ [<ffffffff8118f2e8>] ? mnt_want_write_file+0x28/0x60
+ [<ffffffff8118c65c>] ? fget_light+0x3c/0x130
+ [<ffffffff8118f2e8>] ? mnt_want_write_file+0x28/0x60
+ [<ffffffff8118f1f8>] ? __mnt_want_write+0x58/0x70
+ [<ffffffff811946be>] SyS_fsetxattr+0xbe/0x100
+ [<ffffffff816407c2>] system_call_fastpath+0x16/0x1b
+
+The reason for the warning is that buffer_head passed into
+jbd2_journal_dirty_metadata() didn't have journal_head attached. This is
+caused by the following race of two ext4_xattr_release_block() calls:
+
+CPU1                                CPU2
+ext4_xattr_release_block()          ext4_xattr_release_block()
+lock_buffer(bh);
+/* False */
+if (BHDR(bh)->h_refcount == cpu_to_le32(1))
+} else {
+  le32_add_cpu(&BHDR(bh)->h_refcount, -1);
+  unlock_buffer(bh);
+                                    lock_buffer(bh);
+                                    /* True */
+                                    if (BHDR(bh)->h_refcount == cpu_to_le32(1))
+                                      get_bh(bh);
+                                      ext4_free_blocks()
+                                        ...
+                                        jbd2_journal_forget()
+                                          jbd2_journal_unfile_buffer()
+                                          -> JH is gone
+  error = ext4_handle_dirty_xattr_block(handle, inode, bh);
+  -> triggers the warning
+
+We fix the problem by moving ext4_handle_dirty_xattr_block() under the
+buffer lock. Sadly this cannot be done in nojournal mode as that
+function can call sync_dirty_buffer() which would deadlock. Luckily in
+nojournal mode the race is harmless (we only dirty already freed buffer)
+and thus for nojournal mode we leave the dirtying outside of the buffer
+lock.
+
+Reported-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/xattr.c |   23 +++++++++++++++++++----
+ 1 file changed, 19 insertions(+), 4 deletions(-)
+
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -517,8 +517,8 @@ static void ext4_xattr_update_super_bloc
+ }
+ /*
+- * Release the xattr block BH: If the reference count is > 1, decrement
+- * it; otherwise free the block.
++ * Release the xattr block BH: If the reference count is > 1, decrement it;
++ * otherwise free the block.
+  */
+ static void
+ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
+@@ -538,16 +538,31 @@ ext4_xattr_release_block(handle_t *handl
+               if (ce)
+                       mb_cache_entry_free(ce);
+               get_bh(bh);
++              unlock_buffer(bh);
+               ext4_free_blocks(handle, inode, bh, 0, 1,
+                                EXT4_FREE_BLOCKS_METADATA |
+                                EXT4_FREE_BLOCKS_FORGET);
+-              unlock_buffer(bh);
+       } else {
+               le32_add_cpu(&BHDR(bh)->h_refcount, -1);
+               if (ce)
+                       mb_cache_entry_release(ce);
++              /*
++               * Beware of this ugliness: Releasing of xattr block references
++               * from different inodes can race and so we have to protect
++               * from a race where someone else frees the block (and releases
++               * its journal_head) before we are done dirtying the buffer. In
++               * nojournal mode this race is harmless and we actually cannot
++               * call ext4_handle_dirty_xattr_block() with locked buffer as
++               * that function can call sync_dirty_buffer() so for that case
++               * we handle the dirtying after unlocking the buffer.
++               */
++              if (ext4_handle_valid(handle))
++                      error = ext4_handle_dirty_xattr_block(handle, inode,
++                                                            bh);
+               unlock_buffer(bh);
+-              error = ext4_handle_dirty_xattr_block(handle, inode, bh);
++              if (!ext4_handle_valid(handle))
++                      error = ext4_handle_dirty_xattr_block(handle, inode,
++                                                            bh);
+               if (IS_SYNC(inode))
+                       ext4_handle_sync(handle);
+               dquot_free_block(inode, EXT4_C2B(EXT4_SB(inode->i_sb), 1));
diff --git a/queue-3.14/ext4-move-ext4_update_i_disksize-into-mpage_map_and_submit_extent.patch b/queue-3.14/ext4-move-ext4_update_i_disksize-into-mpage_map_and_submit_extent.patch
new file mode 100644 (file)
index 0000000..5c245cb
--- /dev/null
@@ -0,0 +1,90 @@
+From 622cad1325e404598fe3b148c3fa640dbaabc235 Mon Sep 17 00:00:00 2001
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Fri, 11 Apr 2014 10:35:17 -0400
+Subject: ext4: move ext4_update_i_disksize() into mpage_map_and_submit_extent()
+
+From: Theodore Ts'o <tytso@mit.edu>
+
+commit 622cad1325e404598fe3b148c3fa640dbaabc235 upstream.
+
+The function ext4_update_i_disksize() is used in only one place, in
+the function mpage_map_and_submit_extent().  Move its code to simplify
+the code paths, and also move the call to ext4_mark_inode_dirty() into
+the i_data_sem's critical region, to be consistent with all of the
+other places where we update i_disksize.  That way, we also keep the
+raw_inode's i_disksize protected, to avoid the following race:
+
+      CPU #1                                 CPU #2
+
+   down_write(&i_data_sem)
+   Modify i_disk_size
+   up_write(&i_data_sem)
+                                        down_write(&i_data_sem)
+                                        Modify i_disk_size
+                                        Copy i_disk_size to on-disk inode
+                                        up_write(&i_data_sem)
+   Copy i_disk_size to on-disk inode
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/ext4.h  |   17 -----------------
+ fs/ext4/inode.c |   14 ++++++++++++--
+ 2 files changed, 12 insertions(+), 19 deletions(-)
+
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -2462,23 +2462,6 @@ static inline void ext4_update_i_disksiz
+       up_write(&EXT4_I(inode)->i_data_sem);
+ }
+-/*
+- * Update i_disksize after writeback has been started. Races with truncate
+- * are avoided by checking i_size under i_data_sem.
+- */
+-static inline void ext4_wb_update_i_disksize(struct inode *inode, loff_t newsize)
+-{
+-      loff_t i_size;
+-
+-      down_write(&EXT4_I(inode)->i_data_sem);
+-      i_size = i_size_read(inode);
+-      if (newsize > i_size)
+-              newsize = i_size;
+-      if (newsize > EXT4_I(inode)->i_disksize)
+-              EXT4_I(inode)->i_disksize = newsize;
+-      up_write(&EXT4_I(inode)->i_data_sem);
+-}
+-
+ struct ext4_group_info {
+       unsigned long   bb_state;
+       struct rb_root  bb_free_root;
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -2236,13 +2236,23 @@ static int mpage_map_and_submit_extent(h
+                       return err;
+       } while (map->m_len);
+-      /* Update on-disk size after IO is submitted */
++      /*
++       * Update on-disk size after IO is submitted.  Races with
++       * truncate are avoided by checking i_size under i_data_sem.
++       */
+       disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT;
+       if (disksize > EXT4_I(inode)->i_disksize) {
+               int err2;
++              loff_t i_size;
+-              ext4_wb_update_i_disksize(inode, disksize);
++              down_write(&EXT4_I(inode)->i_data_sem);
++              i_size = i_size_read(inode);
++              if (disksize > i_size)
++                      disksize = i_size;
++              if (disksize > EXT4_I(inode)->i_disksize)
++                      EXT4_I(inode)->i_disksize = disksize;
+               err2 = ext4_mark_inode_dirty(handle, inode);
++              up_write(&EXT4_I(inode)->i_data_sem);
+               if (err2)
+                       ext4_error(inode->i_sb,
+                                  "Failed to mark inode %lu dirty",
diff --git a/queue-3.14/ext4-note-the-error-in-ext4_end_bio.patch b/queue-3.14/ext4-note-the-error-in-ext4_end_bio.patch
new file mode 100644 (file)
index 0000000..3724c60
--- /dev/null
@@ -0,0 +1,42 @@
+From 9503c67c93ed0b95ba62d12d1fd09da6245dbdd6 Mon Sep 17 00:00:00 2001
+From: Matthew Wilcox <willy@linux.intel.com>
+Date: Mon, 7 Apr 2014 10:54:20 -0400
+Subject: ext4: note the error in ext4_end_bio()
+
+From: Matthew Wilcox <willy@linux.intel.com>
+
+commit 9503c67c93ed0b95ba62d12d1fd09da6245dbdd6 upstream.
+
+ext4_end_bio() currently throws away the error that it receives.  Chances
+are this is part of a spate of errors, one of which will end up getting
+the error returned to userspace somehow, but we shouldn't take that risk.
+Also print out the errno to aid in debug.
+
+Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/page-io.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/fs/ext4/page-io.c
++++ b/fs/ext4/page-io.c
+@@ -308,13 +308,14 @@ static void ext4_end_bio(struct bio *bio
+       if (error) {
+               struct inode *inode = io_end->inode;
+-              ext4_warning(inode->i_sb, "I/O error writing to inode %lu "
++              ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu "
+                            "(offset %llu size %ld starting block %llu)",
+-                           inode->i_ino,
++                           error, inode->i_ino,
+                            (unsigned long long) io_end->offset,
+                            (long) io_end->size,
+                            (unsigned long long)
+                            bi_sector >> (inode->i_blkbits - 9));
++              mapping_set_error(inode->i_mapping, error);
+       }
+       if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
diff --git a/queue-3.14/ext4-use-i_size_read-in-ext4_unaligned_aio.patch b/queue-3.14/ext4-use-i_size_read-in-ext4_unaligned_aio.patch
new file mode 100644 (file)
index 0000000..aef179d
--- /dev/null
@@ -0,0 +1,29 @@
+From 6e6358fc3c3c862bfe9a5bc029d3f8ce43dc9765 Mon Sep 17 00:00:00 2001
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Sat, 12 Apr 2014 12:45:25 -0400
+Subject: ext4: use i_size_read in ext4_unaligned_aio()
+
+From: Theodore Ts'o <tytso@mit.edu>
+
+commit 6e6358fc3c3c862bfe9a5bc029d3f8ce43dc9765 upstream.
+
+We haven't taken i_mutex yet, so we need to use i_size_read().
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/file.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/ext4/file.c
++++ b/fs/ext4/file.c
+@@ -82,7 +82,7 @@ ext4_unaligned_aio(struct inode *inode,
+       size_t count = iov_length(iov, nr_segs);
+       loff_t final_size = pos + count;
+-      if (pos >= inode->i_size)
++      if (pos >= i_size_read(inode))
+               return 0;
+       if ((pos & blockmask) || (final_size & blockmask))
diff --git a/queue-3.14/ocfs2-dlm-fix-lock-migration-crash.patch b/queue-3.14/ocfs2-dlm-fix-lock-migration-crash.patch
new file mode 100644 (file)
index 0000000..b1b1bae
--- /dev/null
@@ -0,0 +1,119 @@
+From 34aa8dac482f1358d59110d5e3a12f4351f6acaa Mon Sep 17 00:00:00 2001
+From: Junxiao Bi <junxiao.bi@oracle.com>
+Date: Thu, 3 Apr 2014 14:46:49 -0700
+Subject: ocfs2: dlm: fix lock migration crash
+
+From: Junxiao Bi <junxiao.bi@oracle.com>
+
+commit 34aa8dac482f1358d59110d5e3a12f4351f6acaa upstream.
+
+This issue was introduced by commit 800deef3f6f8 ("ocfs2: use
+list_for_each_entry where benefical") in 2007 where it replaced
+list_for_each with list_for_each_entry.  The variable "lock" will point
+to invalid data if "tmpq" list is empty and a panic will be triggered
+due to this.  Sunil advised reverting it back, but the old version was
+also not right.  At the end of the outer for loop, that
+list_for_each_entry will also set "lock" to an invalid data, then in the
+next loop, if the "tmpq" list is empty, "lock" will be an stale invalid
+data and cause the panic.  So reverting the list_for_each back and reset
+"lock" to NULL to fix this issue.
+
+Another concern is that this seemes can not happen because the "tmpq"
+list should not be empty.  Let me describe how.
+
+old lock resource owner(node 1):                                  migratation target(node 2):
+image there's lockres with a EX lock from node 2 in
+granted list, a NR lock from node x with convert_type
+EX in converting list.
+dlm_empty_lockres() {
+ dlm_pick_migration_target() {
+   pick node 2 as target as its lock is the first one
+   in granted list.
+ }
+ dlm_migrate_lockres() {
+   dlm_mark_lockres_migrating() {
+     res->state |= DLM_LOCK_RES_BLOCK_DIRTY;
+     wait_event(dlm->ast_wq, !dlm_lockres_is_dirty(dlm, res));
+        //after the above code, we can not dirty lockres any more,
+     // so dlm_thread shuffle list will not run
+                                                                   downconvert lock from EX to NR
+                                                                   upconvert lock from NR to EX
+<<< migration may schedule out here, then
+<<< node 2 send down convert request to convert type from EX to
+<<< NR, then send up convert request to convert type from NR to
+<<< EX, at this time, lockres granted list is empty, and two locks
+<<< in the converting list, node x up convert lock followed by
+<<< node 2 up convert lock.
+
+        // will set lockres RES_MIGRATING flag, the following
+        // lock/unlock can not run
+     dlm_lockres_release_ast(dlm, res);
+   }
+
+   dlm_send_one_lockres()
+                                                                 dlm_process_recovery_data()
+                                                                   for (i=0; i<mres->num_locks; i++)
+                                                                     if (ml->node == dlm->node_num)
+                                                                       for (j = DLM_GRANTED_LIST; j <= DLM_BLOCKED_LIST; j++) {
+                                                                        list_for_each_entry(lock, tmpq, list)
+                                                                        if (lock) break; <<< lock is invalid as grant list is empty.
+                                                                       }
+                                                                       if (lock->ml.node != ml->node)
+                                                                         BUG() >>> crash here
+ }
+
+I see the above locks status from a vmcore of our internal bug.
+
+Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
+Reviewed-by: Wengang Wang <wen.gang.wang@oracle.com>
+Cc: Sunil Mushran <sunil.mushran@gmail.com>
+Reviewed-by: Srinivas Eeda <srinivas.eeda@oracle.com>
+Cc: Joel Becker <jlbec@evilplan.org>
+Cc: Mark Fasheh <mfasheh@suse.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ocfs2/dlm/dlmrecovery.c |   14 ++++++++------
+ 1 file changed, 8 insertions(+), 6 deletions(-)
+
+--- a/fs/ocfs2/dlm/dlmrecovery.c
++++ b/fs/ocfs2/dlm/dlmrecovery.c
+@@ -1750,13 +1750,13 @@ static int dlm_process_recovery_data(str
+                                    struct dlm_migratable_lockres *mres)
+ {
+       struct dlm_migratable_lock *ml;
+-      struct list_head *queue;
++      struct list_head *queue, *iter;
+       struct list_head *tmpq = NULL;
+       struct dlm_lock *newlock = NULL;
+       struct dlm_lockstatus *lksb = NULL;
+       int ret = 0;
+       int i, j, bad;
+-      struct dlm_lock *lock = NULL;
++      struct dlm_lock *lock;
+       u8 from = O2NM_MAX_NODES;
+       unsigned int added = 0;
+       __be64 c;
+@@ -1791,14 +1791,16 @@ static int dlm_process_recovery_data(str
+                       /* MIGRATION ONLY! */
+                       BUG_ON(!(mres->flags & DLM_MRES_MIGRATION));
++                      lock = NULL;
+                       spin_lock(&res->spinlock);
+                       for (j = DLM_GRANTED_LIST; j <= DLM_BLOCKED_LIST; j++) {
+                               tmpq = dlm_list_idx_to_ptr(res, j);
+-                              list_for_each_entry(lock, tmpq, list) {
+-                                      if (lock->ml.cookie != ml->cookie)
+-                                              lock = NULL;
+-                                      else
++                              list_for_each(iter, tmpq) {
++                                      lock = list_entry(iter,
++                                                struct dlm_lock, list);
++                                      if (lock->ml.cookie == ml->cookie)
+                                               break;
++                                      lock = NULL;
+                               }
+                               if (lock)
+                                       break;
diff --git a/queue-3.14/ocfs2-dlm-fix-recovery-hung.patch b/queue-3.14/ocfs2-dlm-fix-recovery-hung.patch
new file mode 100644 (file)
index 0000000..5e8d70f
--- /dev/null
@@ -0,0 +1,103 @@
+From ded2cf71419b9353060e633b59e446c42a6a2a09 Mon Sep 17 00:00:00 2001
+From: Junxiao Bi <junxiao.bi@oracle.com>
+Date: Thu, 3 Apr 2014 14:46:51 -0700
+Subject: ocfs2: dlm: fix recovery hung
+
+From: Junxiao Bi <junxiao.bi@oracle.com>
+
+commit ded2cf71419b9353060e633b59e446c42a6a2a09 upstream.
+
+There is a race window in dlm_do_recovery() between dlm_remaster_locks()
+and dlm_reset_recovery() when the recovery master nearly finish the
+recovery process for a dead node.  After the master sends FINALIZE_RECO
+message in dlm_remaster_locks(), another node may become the recovery
+master for another dead node, and then send the BEGIN_RECO message to
+all the nodes included the old master, in the handler of this message
+dlm_begin_reco_handler() of old master, dlm->reco.dead_node and
+dlm->reco.new_master will be set to the second dead node and the new
+master, then in dlm_reset_recovery(), these two variables will be reset
+to default value.  This will cause new recovery master can not finish
+the recovery process and hung, at last the whole cluster will hung for
+recovery.
+
+old recovery master:                                 new recovery master:
+dlm_remaster_locks()
+                                                  become recovery master for
+                                                  another dead node.
+                                                  dlm_send_begin_reco_message()
+dlm_begin_reco_handler()
+{
+ if (dlm->reco.state & DLM_RECO_STATE_FINALIZE) {
+  return -EAGAIN;
+ }
+ dlm_set_reco_master(dlm, br->node_idx);
+ dlm_set_reco_dead_node(dlm, br->dead_node);
+}
+dlm_reset_recovery()
+{
+ dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM);
+ dlm_set_reco_master(dlm, O2NM_INVALID_NODE_NUM);
+}
+                                                  will hang in dlm_remaster_locks() for
+                                                  request dlm locks info
+
+Before send FINALIZE_RECO message, recovery master should set
+DLM_RECO_STATE_FINALIZE for itself and clear it after the recovery done,
+this can break the race windows as the BEGIN_RECO messages will not be
+handled before DLM_RECO_STATE_FINALIZE flag is cleared.
+
+A similar race may happen between new recovery master and normal node
+which is in dlm_finalize_reco_handler(), also fix it.
+
+Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
+Reviewed-by: Srinivas Eeda <srinivas.eeda@oracle.com>
+Reviewed-by: Wengang Wang <wen.gang.wang@oracle.com>
+Cc: Joel Becker <jlbec@evilplan.org>
+Cc: Mark Fasheh <mfasheh@suse.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ocfs2/dlm/dlmrecovery.c |   15 +++++++++++++--
+ 1 file changed, 13 insertions(+), 2 deletions(-)
+
+--- a/fs/ocfs2/dlm/dlmrecovery.c
++++ b/fs/ocfs2/dlm/dlmrecovery.c
+@@ -537,7 +537,10 @@ master_here:
+               /* success!  see if any other nodes need recovery */
+               mlog(0, "DONE mastering recovery of %s:%u here(this=%u)!\n",
+                    dlm->name, dlm->reco.dead_node, dlm->node_num);
+-              dlm_reset_recovery(dlm);
++              spin_lock(&dlm->spinlock);
++              __dlm_reset_recovery(dlm);
++              dlm->reco.state &= ~DLM_RECO_STATE_FINALIZE;
++              spin_unlock(&dlm->spinlock);
+       }
+       dlm_end_recovery(dlm);
+@@ -695,6 +698,14 @@ static int dlm_remaster_locks(struct dlm
+               if (all_nodes_done) {
+                       int ret;
++                      /* Set this flag on recovery master to avoid
++                       * a new recovery for another dead node start
++                       * before the recovery is not done. That may
++                       * cause recovery hung.*/
++                      spin_lock(&dlm->spinlock);
++                      dlm->reco.state |= DLM_RECO_STATE_FINALIZE;
++                      spin_unlock(&dlm->spinlock);
++
+                       /* all nodes are now in DLM_RECO_NODE_DATA_DONE state
+                        * just send a finalize message to everyone and
+                        * clean up */
+@@ -2884,8 +2895,8 @@ int dlm_finalize_reco_handler(struct o2n
+                               BUG();
+                       }
+                       dlm->reco.state &= ~DLM_RECO_STATE_FINALIZE;
++                      __dlm_reset_recovery(dlm);
+                       spin_unlock(&dlm->spinlock);
+-                      dlm_reset_recovery(dlm);
+                       dlm_kick_recovery_thread(dlm);
+                       break;
+               default:
diff --git a/queue-3.14/ocfs2-do-not-put-bh-when-buffer_uptodate-failed.patch b/queue-3.14/ocfs2-do-not-put-bh-when-buffer_uptodate-failed.patch
new file mode 100644 (file)
index 0000000..4c58562
--- /dev/null
@@ -0,0 +1,45 @@
+From f7cf4f5bfe073ad792ab49c04f247626b3e38db6 Mon Sep 17 00:00:00 2001
+From: alex chen <alex.chen@huawei.com>
+Date: Thu, 3 Apr 2014 14:47:05 -0700
+Subject: ocfs2: do not put bh when buffer_uptodate failed
+
+From: alex chen <alex.chen@huawei.com>
+
+commit f7cf4f5bfe073ad792ab49c04f247626b3e38db6 upstream.
+
+Do not put bh when buffer_uptodate failed in ocfs2_write_block and
+ocfs2_write_super_or_backup, because it will put bh in b_end_io.
+Otherwise it will hit a warning "VFS: brelse: Trying to free free
+buffer".
+
+Signed-off-by: Alex Chen <alex.chen@huawei.com>
+Reviewed-by: Joseph Qi <joseph.qi@huawei.com>
+Reviewed-by: Srinivas Eeda <srinivas.eeda@oracle.com>
+Cc: Mark Fasheh <mfasheh@suse.com>
+Acked-by: Joel Becker <jlbec@evilplan.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ocfs2/buffer_head_io.c |    2 --
+ 1 file changed, 2 deletions(-)
+
+--- a/fs/ocfs2/buffer_head_io.c
++++ b/fs/ocfs2/buffer_head_io.c
+@@ -90,7 +90,6 @@ int ocfs2_write_block(struct ocfs2_super
+                * information for this bh as it's not marked locally
+                * uptodate. */
+               ret = -EIO;
+-              put_bh(bh);
+               mlog_errno(ret);
+       }
+@@ -420,7 +419,6 @@ int ocfs2_write_super_or_backup(struct o
+       if (!buffer_uptodate(bh)) {
+               ret = -EIO;
+-              put_bh(bh);
+               mlog_errno(ret);
+       }
diff --git a/queue-3.14/ocfs2-fix-panic-on-kfree-xattr-name.patch b/queue-3.14/ocfs2-fix-panic-on-kfree-xattr-name.patch
new file mode 100644 (file)
index 0000000..e739e9d
--- /dev/null
@@ -0,0 +1,47 @@
+From f81c20158f8d5f7938d5eb86ecc42ecc09273ce6 Mon Sep 17 00:00:00 2001
+From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Date: Thu, 3 Apr 2014 14:47:07 -0700
+Subject: ocfs2: fix panic on kfree(xattr->name)
+
+From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+
+commit f81c20158f8d5f7938d5eb86ecc42ecc09273ce6 upstream.
+
+Commit 9548906b2bb7 ('xattr: Constify ->name member of "struct xattr"')
+missed that ocfs2 is calling kfree(xattr->name).  As a result, kernel
+panic occurs upon calling kfree(xattr->name) because xattr->name refers
+static constant names.  This patch removes kfree(xattr->name) from
+ocfs2_mknod() and ocfs2_symlink().
+
+Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Reported-by: Tariq Saeed <tariq.x.saeed@oracle.com>
+Tested-by: Tariq Saeed <tariq.x.saeed@oracle.com>
+Reviewed-by: Srinivas Eeda <srinivas.eeda@oracle.com>
+Cc: Joel Becker <jlbec@evilplan.org>
+Cc: Mark Fasheh <mfasheh@suse.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ocfs2/namei.c |    2 --
+ 1 file changed, 2 deletions(-)
+
+--- a/fs/ocfs2/namei.c
++++ b/fs/ocfs2/namei.c
+@@ -450,7 +450,6 @@ leave:
+       brelse(new_fe_bh);
+       brelse(parent_fe_bh);
+-      kfree(si.name);
+       kfree(si.value);
+       ocfs2_free_dir_lookup_result(&lookup);
+@@ -1855,7 +1854,6 @@ bail:
+       brelse(new_fe_bh);
+       brelse(parent_fe_bh);
+-      kfree(si.name);
+       kfree(si.value);
+       ocfs2_free_dir_lookup_result(&lookup);
+       if (inode_ac)
index 2a9ecc0c009673e95a366d08797d7e2eb48dec66..94e7f4201a1128fa8849783095602dd548609eee 100644 (file)
@@ -138,3 +138,16 @@ mm-try_to_unmap_cluster-should-lock_page-before-mlocking.patch
 mm-hugetlb-fix-softlockup-when-a-large-number-of-hugepages-are-freed.patch
 mm-vmscan-do-not-swap-anon-pages-just-because-free-file-is-low.patch
 hung_task-check-the-value-of-sysctl_hung_task_timeout_sec.patch
+xattr-guard-against-simultaneous-glibc-header-inclusion.patch
+ocfs2-dlm-fix-lock-migration-crash.patch
+ocfs2-dlm-fix-recovery-hung.patch
+ocfs2-do-not-put-bh-when-buffer_uptodate-failed.patch
+ocfs2-fix-panic-on-kfree-xattr-name.patch
+clk-s2mps11-fix-possible-null-pointer-dereference.patch
+smarter-propagate_mnt.patch
+block-fix-for_each_bvec.patch
+ext4-fibmap-ioctl-causes-bug_on-due-to-handle-ext_max_blocks.patch
+ext4-note-the-error-in-ext4_end_bio.patch
+ext4-fix-jbd2-warning-under-heavy-xattr-load.patch
+ext4-move-ext4_update_i_disksize-into-mpage_map_and_submit_extent.patch
+ext4-use-i_size_read-in-ext4_unaligned_aio.patch
diff --git a/queue-3.14/smarter-propagate_mnt.patch b/queue-3.14/smarter-propagate_mnt.patch
new file mode 100644 (file)
index 0000000..c6eded8
--- /dev/null
@@ -0,0 +1,370 @@
+From f2ebb3a921c1ca1e2ddd9242e95a1989a50c4c68 Mon Sep 17 00:00:00 2001
+From: Al Viro <viro@zeniv.linux.org.uk>
+Date: Thu, 27 Feb 2014 09:35:45 -0500
+Subject: smarter propagate_mnt()
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+commit f2ebb3a921c1ca1e2ddd9242e95a1989a50c4c68 upstream.
+
+The current mainline has copies propagated to *all* nodes, then
+tears down the copies we made for nodes that do not contain
+counterparts of the desired mountpoint.  That sets the right
+propagation graph for the copies (at teardown time we move
+the slaves of removed node to a surviving peer or directly
+to master), but we end up paying a fairly steep price in
+useless allocations.  It's fairly easy to create a situation
+where N calls of mount(2) create exactly N bindings, with
+O(N^2) vfsmounts allocated and freed in process.
+
+Fortunately, it is possible to avoid those allocations/freeings.
+The trick is to create copies in the right order and find which
+one would've eventually become a master with the current algorithm.
+It turns out to be possible in O(nodes getting propagation) time
+and with no extra allocations at all.
+
+One part is that we need to make sure that eventual master will be
+created before its slaves, so we need to walk the propagation
+tree in a different order - by peer groups.  And iterate through
+the peers before dealing with the next group.
+
+Another thing is finding the (earlier) copy that will be a master
+of one we are about to create; to do that we are (temporary) marking
+the masters of mountpoints we are attaching the copies to.
+
+Either we are in a peer of the last mountpoint we'd dealt with,
+or we have the following situation: we are attaching to mountpoint M,
+the last copy S_0 had been attached to M_0 and there are sequences
+S_0...S_n, M_0...M_n such that S_{i+1} is a master of S_{i},
+S_{i} mounted on M{i} and we need to create a slave of the first S_{k}
+such that M is getting propagation from M_{k}.  It means that the master
+of M_{k} will be among the sequence of masters of M.  On the
+other hand, the nearest marked node in that sequence will either
+be the master of M_{k} or the master of M_{k-1} (the latter -
+in the case if M_{k-1} is a slave of something M gets propagation
+from, but in a wrong peer group).
+
+So we go through the sequence of masters of M until we find
+a marked one (P).  Let N be the one before it.  Then we go through
+the sequence of masters of S_0 until we find one (say, S) mounted
+on a node D that has P as master and check if D is a peer of N.
+If it is, S will be the master of new copy, if not - the master of S
+will be.
+
+That's it for the hard part; the rest is fairly simple.  Iterator
+is in next_group(), handling of one prospective mountpoint is
+propagate_one().
+
+It seems to survive all tests and gives a noticably better performance
+than the current mainline for setups that are seriously using shared
+subtrees.
+
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/namespace.c        |   11 ++
+ fs/pnode.c            |  198 ++++++++++++++++++++++++++++++--------------------
+ fs/pnode.h            |    3 
+ include/linux/mount.h |    3 
+ 4 files changed, 133 insertions(+), 82 deletions(-)
+
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -885,7 +885,7 @@ static struct mount *clone_mnt(struct mo
+                       goto out_free;
+       }
+-      mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD;
++      mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~(MNT_WRITE_HOLD|MNT_MARKED);
+       /* Don't allow unprivileged users to change mount flags */
+       if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY))
+               mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;
+@@ -1661,9 +1661,9 @@ static int attach_recursive_mnt(struct m
+               if (err)
+                       goto out;
+               err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list);
++              lock_mount_hash();
+               if (err)
+                       goto out_cleanup_ids;
+-              lock_mount_hash();
+               for (p = source_mnt; p; p = next_mnt(p, source_mnt))
+                       set_mnt_shared(p);
+       } else {
+@@ -1690,6 +1690,11 @@ static int attach_recursive_mnt(struct m
+       return 0;
+  out_cleanup_ids:
++      while (!hlist_empty(&tree_list)) {
++              child = hlist_entry(tree_list.first, struct mount, mnt_hash);
++              umount_tree(child, 0);
++      }
++      unlock_mount_hash();
+       cleanup_group_ids(source_mnt, NULL);
+  out:
+       return err;
+@@ -2044,7 +2049,7 @@ static int do_add_mount(struct mount *ne
+       struct mount *parent;
+       int err;
+-      mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | MNT_DOOMED | MNT_SYNC_UMOUNT);
++      mnt_flags &= ~MNT_INTERNAL_FLAGS;
+       mp = lock_mount(path);
+       if (IS_ERR(mp))
+--- a/fs/pnode.c
++++ b/fs/pnode.c
+@@ -164,46 +164,94 @@ static struct mount *propagation_next(st
+       }
+ }
+-/*
+- * return the source mount to be used for cloning
+- *
+- * @dest      the current destination mount
+- * @last_dest         the last seen destination mount
+- * @last_src          the last seen source mount
+- * @type      return CL_SLAVE if the new mount has to be
+- *            cloned as a slave.
+- */
+-static struct mount *get_source(struct mount *dest,
+-                              struct mount *last_dest,
+-                              struct mount *last_src,
+-                              int *type)
++static struct mount *next_group(struct mount *m, struct mount *origin)
+ {
+-      struct mount *p_last_src = NULL;
+-      struct mount *p_last_dest = NULL;
+-
+-      while (last_dest != dest->mnt_master) {
+-              p_last_dest = last_dest;
+-              p_last_src = last_src;
+-              last_dest = last_dest->mnt_master;
+-              last_src = last_src->mnt_master;
++      while (1) {
++              while (1) {
++                      struct mount *next;
++                      if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
++                              return first_slave(m);
++                      next = next_peer(m);
++                      if (m->mnt_group_id == origin->mnt_group_id) {
++                              if (next == origin)
++                                      return NULL;
++                      } else if (m->mnt_slave.next != &next->mnt_slave)
++                              break;
++                      m = next;
++              }
++              /* m is the last peer */
++              while (1) {
++                      struct mount *master = m->mnt_master;
++                      if (m->mnt_slave.next != &master->mnt_slave_list)
++                              return next_slave(m);
++                      m = next_peer(master);
++                      if (master->mnt_group_id == origin->mnt_group_id)
++                              break;
++                      if (master->mnt_slave.next == &m->mnt_slave)
++                              break;
++                      m = master;
++              }
++              if (m == origin)
++                      return NULL;
+       }
++}
+-      if (p_last_dest) {
+-              do {
+-                      p_last_dest = next_peer(p_last_dest);
+-              } while (IS_MNT_NEW(p_last_dest));
+-              /* is that a peer of the earlier? */
+-              if (dest == p_last_dest) {
+-                      *type = CL_MAKE_SHARED;
+-                      return p_last_src;
++/* all accesses are serialized by namespace_sem */
++static struct user_namespace *user_ns;
++static struct mount *last_dest, *last_source, *dest_master;
++static struct mountpoint *mp;
++static struct hlist_head *list;
++
++static int propagate_one(struct mount *m)
++{
++      struct mount *child;
++      int type;
++      /* skip ones added by this propagate_mnt() */
++      if (IS_MNT_NEW(m))
++              return 0;
++      /* skip if mountpoint isn't covered by it */
++      if (!is_subdir(mp->m_dentry, m->mnt.mnt_root))
++              return 0;
++      if (m->mnt_group_id == last_dest->mnt_group_id) {
++              type = CL_MAKE_SHARED;
++      } else {
++              struct mount *n, *p;
++              for (n = m; ; n = p) {
++                      p = n->mnt_master;
++                      if (p == dest_master || IS_MNT_MARKED(p)) {
++                              while (last_dest->mnt_master != p) {
++                                      last_source = last_source->mnt_master;
++                                      last_dest = last_source->mnt_parent;
++                              }
++                              if (n->mnt_group_id != last_dest->mnt_group_id) {
++                                      last_source = last_source->mnt_master;
++                                      last_dest = last_source->mnt_parent;
++                              }
++                              break;
++                      }
+               }
++              type = CL_SLAVE;
++              /* beginning of peer group among the slaves? */
++              if (IS_MNT_SHARED(m))
++                      type |= CL_MAKE_SHARED;
+       }
+-      /* slave of the earlier, then */
+-      *type = CL_SLAVE;
+-      /* beginning of peer group among the slaves? */
+-      if (IS_MNT_SHARED(dest))
+-              *type |= CL_MAKE_SHARED;
+-      return last_src;
++
++      /* Notice when we are propagating across user namespaces */
++      if (m->mnt_ns->user_ns != user_ns)
++              type |= CL_UNPRIVILEGED;
++      child = copy_tree(last_source, last_source->mnt.mnt_root, type);
++      if (IS_ERR(child))
++              return PTR_ERR(child);
++      mnt_set_mountpoint(m, mp, child);
++      last_dest = m;
++      last_source = child;
++      if (m->mnt_master != dest_master) {
++              read_seqlock_excl(&mount_lock);
++              SET_MNT_MARK(m->mnt_master);
++              read_sequnlock_excl(&mount_lock);
++      }
++      hlist_add_head(&child->mnt_hash, list);
++      return 0;
+ }
+ /*
+@@ -222,56 +270,48 @@ static struct mount *get_source(struct m
+ int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp,
+                   struct mount *source_mnt, struct hlist_head *tree_list)
+ {
+-      struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
+-      struct mount *m, *child;
++      struct mount *m, *n;
+       int ret = 0;
+-      struct mount *prev_dest_mnt = dest_mnt;
+-      struct mount *prev_src_mnt  = source_mnt;
+-      HLIST_HEAD(tmp_list);
+-
+-      for (m = propagation_next(dest_mnt, dest_mnt); m;
+-                      m = propagation_next(m, dest_mnt)) {
+-              int type;
+-              struct mount *source;
+-
+-              if (IS_MNT_NEW(m))
+-                      continue;
+-
+-              source =  get_source(m, prev_dest_mnt, prev_src_mnt, &type);
+-
+-              /* Notice when we are propagating across user namespaces */
+-              if (m->mnt_ns->user_ns != user_ns)
+-                      type |= CL_UNPRIVILEGED;
+-
+-              child = copy_tree(source, source->mnt.mnt_root, type);
+-              if (IS_ERR(child)) {
+-                      ret = PTR_ERR(child);
+-                      tmp_list = *tree_list;
+-                      tmp_list.first->pprev = &tmp_list.first;
+-                      INIT_HLIST_HEAD(tree_list);
++
++      /*
++       * we don't want to bother passing tons of arguments to
++       * propagate_one(); everything is serialized by namespace_sem,
++       * so globals will do just fine.
++       */
++      user_ns = current->nsproxy->mnt_ns->user_ns;
++      last_dest = dest_mnt;
++      last_source = source_mnt;
++      mp = dest_mp;
++      list = tree_list;
++      dest_master = dest_mnt->mnt_master;
++
++      /* all peers of dest_mnt, except dest_mnt itself */
++      for (n = next_peer(dest_mnt); n != dest_mnt; n = next_peer(n)) {
++              ret = propagate_one(n);
++              if (ret)
+                       goto out;
+-              }
++      }
+-              if (is_subdir(dest_mp->m_dentry, m->mnt.mnt_root)) {
+-                      mnt_set_mountpoint(m, dest_mp, child);
+-                      hlist_add_head(&child->mnt_hash, tree_list);
+-              } else {
+-                      /*
+-                       * This can happen if the parent mount was bind mounted
+-                       * on some subdirectory of a shared/slave mount.
+-                       */
+-                      hlist_add_head(&child->mnt_hash, &tmp_list);
+-              }
+-              prev_dest_mnt = m;
+-              prev_src_mnt  = child;
++      /* all slave groups */
++      for (m = next_group(dest_mnt, dest_mnt); m;
++                      m = next_group(m, dest_mnt)) {
++              /* everything in that slave group */
++              n = m;
++              do {
++                      ret = propagate_one(n);
++                      if (ret)
++                              goto out;
++                      n = next_peer(n);
++              } while (n != m);
+       }
+ out:
+-      lock_mount_hash();
+-      while (!hlist_empty(&tmp_list)) {
+-              child = hlist_entry(tmp_list.first, struct mount, mnt_hash);
+-              umount_tree(child, 0);
++      read_seqlock_excl(&mount_lock);
++      hlist_for_each_entry(n, tree_list, mnt_hash) {
++              m = n->mnt_parent;
++              if (m->mnt_master != dest_mnt->mnt_master)
++                      CLEAR_MNT_MARK(m->mnt_master);
+       }
+-      unlock_mount_hash();
++      read_sequnlock_excl(&mount_lock);
+       return ret;
+ }
+--- a/fs/pnode.h
++++ b/fs/pnode.h
+@@ -16,6 +16,9 @@
+ #define IS_MNT_NEW(m)  (!(m)->mnt_ns)
+ #define CLEAR_MNT_SHARED(m) ((m)->mnt.mnt_flags &= ~MNT_SHARED)
+ #define IS_MNT_UNBINDABLE(m) ((m)->mnt.mnt_flags & MNT_UNBINDABLE)
++#define IS_MNT_MARKED(m) ((m)->mnt.mnt_flags & MNT_MARKED)
++#define SET_MNT_MARK(m) ((m)->mnt.mnt_flags |= MNT_MARKED)
++#define CLEAR_MNT_MARK(m) ((m)->mnt.mnt_flags &= ~MNT_MARKED)
+ #define CL_EXPIRE             0x01
+ #define CL_SLAVE              0x02
+--- a/include/linux/mount.h
++++ b/include/linux/mount.h
+@@ -44,6 +44,8 @@ struct mnt_namespace;
+ #define MNT_SHARED_MASK       (MNT_UNBINDABLE)
+ #define MNT_PROPAGATION_MASK  (MNT_SHARED | MNT_UNBINDABLE)
++#define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \
++                          MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED)
+ #define MNT_INTERNAL  0x4000
+@@ -51,6 +53,7 @@ struct mnt_namespace;
+ #define MNT_LOCKED            0x800000
+ #define MNT_DOOMED            0x1000000
+ #define MNT_SYNC_UMOUNT               0x2000000
++#define MNT_MARKED            0x4000000
+ struct vfsmount {
+       struct dentry *mnt_root;        /* root of the mounted tree */
diff --git a/queue-3.14/xattr-guard-against-simultaneous-glibc-header-inclusion.patch b/queue-3.14/xattr-guard-against-simultaneous-glibc-header-inclusion.patch
new file mode 100644 (file)
index 0000000..668b954
--- /dev/null
@@ -0,0 +1,78 @@
+From ea1a8217b06b41b31a2b60b0b83f75c77ef9c873 Mon Sep 17 00:00:00 2001
+From: Serge Hallyn <serge.hallyn@ubuntu.com>
+Date: Thu, 3 Apr 2014 14:48:33 -0700
+Subject: xattr: guard against simultaneous glibc header inclusion
+
+From: Serge Hallyn <serge.hallyn@ubuntu.com>
+
+commit ea1a8217b06b41b31a2b60b0b83f75c77ef9c873 upstream.
+
+If the glibc xattr.h header is included after the uapi header,
+compilation fails due to an enum re-using a #define from the uapi
+header.
+
+Protect against this by guarding the define and enum inclusions against
+each other.
+
+(See https://lists.debian.org/debian-glibc/2014/03/msg00029.html
+and https://sourceware.org/glibc/wiki/Synchronizing_Headers
+for more information.)
+
+Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Allan McRae <allan@archlinux.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/uapi/linux/libc-compat.h |    9 +++++++++
+ include/uapi/linux/xattr.h       |    7 +++++++
+ 2 files changed, 16 insertions(+)
+
+--- a/include/uapi/linux/libc-compat.h
++++ b/include/uapi/linux/libc-compat.h
+@@ -85,6 +85,12 @@
+ #endif /* _NETINET_IN_H */
++/* Definitions for xattr.h */
++#if defined(_SYS_XATTR_H)
++#define __UAPI_DEF_XATTR              0
++#else
++#define __UAPI_DEF_XATTR              1
++#endif
+ /* If we did not see any headers from any supported C libraries,
+  * or we are being included in the kernel, then define everything
+@@ -98,6 +104,9 @@
+ #define __UAPI_DEF_IPV6_MREQ          1
+ #define __UAPI_DEF_IPPROTO_V6         1
++/* Definitions for xattr.h */
++#define __UAPI_DEF_XATTR              1
++
+ #endif /* __GLIBC__ */
+ #endif /* _UAPI_LIBC_COMPAT_H */
+--- a/include/uapi/linux/xattr.h
++++ b/include/uapi/linux/xattr.h
+@@ -7,11 +7,18 @@
+   Copyright (c) 2001-2002 Silicon Graphics, Inc.  All Rights Reserved.
+   Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
+ */
++
++#include <linux/libc-compat.h>
++
+ #ifndef _UAPI_LINUX_XATTR_H
+ #define _UAPI_LINUX_XATTR_H
++#ifdef __UAPI_DEF_XATTR
++#define __USE_KERNEL_XATTR_DEFS
++
+ #define XATTR_CREATE  0x1     /* set value, fail if attr already exists */
+ #define XATTR_REPLACE 0x2     /* set value, fail if attr does not exist */
++#endif
+ /* Namespaces */
+ #define XATTR_OS2_PREFIX "os2."