--- /dev/null
+From b7aa84d9cb9f26da1a9312c3e39dbd1a3c25a426 Mon Sep 17 00:00:00 2001
+From: "Martin K. Petersen" <martin.petersen@oracle.com>
+Date: Tue, 8 Apr 2014 22:43:43 -0400
+Subject: block: Fix for_each_bvec()
+
+From: "Martin K. Petersen" <martin.petersen@oracle.com>
+
+commit b7aa84d9cb9f26da1a9312c3e39dbd1a3c25a426 upstream.
+
+Commit 4550dd6c6b062 introduced for_each_bvec() which iterates over each
+bvec attached to a bio or bip. However, the macro fails to check bi_size
+before dereferencing which can lead to crashes while counting/mapping
+integrity scatterlist segments.
+
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Cc: Kent Overstreet <kmo@daterainc.com>
+Cc: Nicholas Bellinger <nab@linux-iscsi.org>
+Signed-off-by: Jens Axboe <axboe@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/bio.h | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/include/linux/bio.h
++++ b/include/linux/bio.h
+@@ -216,9 +216,9 @@ static inline void bvec_iter_advance(str
+ }
+
+ #define for_each_bvec(bvl, bio_vec, iter, start) \
+- for ((iter) = start; \
+- (bvl) = bvec_iter_bvec((bio_vec), (iter)), \
+- (iter).bi_size; \
++ for (iter = (start); \
++ (iter).bi_size && \
++ ((bvl = bvec_iter_bvec((bio_vec), (iter))), 1); \
+ bvec_iter_advance((bio_vec), &(iter), (bvl).bv_len))
+
+
--- /dev/null
+From 238e14055da87d0d012257788e39fe0df3a82226 Mon Sep 17 00:00:00 2001
+From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
+Date: Fri, 21 Mar 2014 13:18:17 +0100
+Subject: clk: s2mps11: Fix possible NULL pointer dereference
+
+From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
+
+commit 238e14055da87d0d012257788e39fe0df3a82226 upstream.
+
+If parent device does not have of_node set the s2mps11_clk_parse_dt()
+returned NULL. This NULL was later passed to of_clk_add_provider() which
+dereferenced it in pr_debug() call.
+
+Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
+Signed-off-by: Mike Turquette <mturquette@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/clk/clk-s2mps11.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/clk/clk-s2mps11.c
++++ b/drivers/clk/clk-s2mps11.c
+@@ -130,7 +130,7 @@ static struct device_node *s2mps11_clk_p
+ int i;
+
+ if (!iodev->dev->of_node)
+- return NULL;
++ return ERR_PTR(-EINVAL);
+
+ clk_np = of_find_node_by_name(iodev->dev->of_node, "clocks");
+ if (!clk_np) {
--- /dev/null
+From 4adb6ab3e0fa71363a5ef229544b2d17de6600d7 Mon Sep 17 00:00:00 2001
+From: Kazuya Mio <k-mio@sx.jp.nec.com>
+Date: Mon, 7 Apr 2014 10:53:28 -0400
+Subject: ext4: FIBMAP ioctl causes BUG_ON due to handle EXT_MAX_BLOCKS
+
+From: Kazuya Mio <k-mio@sx.jp.nec.com>
+
+commit 4adb6ab3e0fa71363a5ef229544b2d17de6600d7 upstream.
+
+When we try to get 2^32-1 block of the file which has the extent
+(ee_block=2^32-2, ee_len=1) with FIBMAP ioctl, it causes BUG_ON
+in ext4_ext_put_gap_in_cache().
+
+To avoid the problem, ext4_map_blocks() needs to check the file logical block
+number. ext4_ext_put_gap_in_cache() called via ext4_map_blocks() cannot
+handle 2^32-1 because the maximum file logical block number is 2^32-2.
+
+Note that ext4_ind_map_blocks() returns -EIO when the block number is invalid.
+So ext4_map_blocks() should also return the same errno.
+
+Signed-off-by: Kazuya Mio <k-mio@sx.jp.nec.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/inode.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -515,6 +515,10 @@ int ext4_map_blocks(handle_t *handle, st
+ "logical block %lu\n", inode->i_ino, flags, map->m_len,
+ (unsigned long) map->m_lblk);
+
++ /* We can handle the block number less than EXT_MAX_BLOCKS */
++ if (unlikely(map->m_lblk >= EXT_MAX_BLOCKS))
++ return -EIO;
++
+ /* Lookup extent status tree firstly */
+ if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
+ ext4_es_lru_add(inode);
--- /dev/null
+From ec4cb1aa2b7bae18dd8164f2e9c7c51abcf61280 Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Mon, 7 Apr 2014 10:54:21 -0400
+Subject: ext4: fix jbd2 warning under heavy xattr load
+
+From: Jan Kara <jack@suse.cz>
+
+commit ec4cb1aa2b7bae18dd8164f2e9c7c51abcf61280 upstream.
+
+When heavily exercising xattr code the assertion that
+jbd2_journal_dirty_metadata() shouldn't return error was triggered:
+
+WARNING: at /srv/autobuild-ceph/gitbuilder.git/build/fs/jbd2/transaction.c:1237
+jbd2_journal_dirty_metadata+0x1ba/0x260()
+
+CPU: 0 PID: 8877 Comm: ceph-osd Tainted: G W 3.10.0-ceph-00049-g68d04c9 #1
+Hardware name: Dell Inc. PowerEdge R410/01V648, BIOS 1.6.3 02/07/2011
+ ffffffff81a1d3c8 ffff880214469928 ffffffff816311b0 ffff880214469968
+ ffffffff8103fae0 ffff880214469958 ffff880170a9dc30 ffff8802240fbe80
+ 0000000000000000 ffff88020b366000 ffff8802256e7510 ffff880214469978
+Call Trace:
+ [<ffffffff816311b0>] dump_stack+0x19/0x1b
+ [<ffffffff8103fae0>] warn_slowpath_common+0x70/0xa0
+ [<ffffffff8103fb2a>] warn_slowpath_null+0x1a/0x20
+ [<ffffffff81267c2a>] jbd2_journal_dirty_metadata+0x1ba/0x260
+ [<ffffffff81245093>] __ext4_handle_dirty_metadata+0xa3/0x140
+ [<ffffffff812561f3>] ext4_xattr_release_block+0x103/0x1f0
+ [<ffffffff81256680>] ext4_xattr_block_set+0x1e0/0x910
+ [<ffffffff8125795b>] ext4_xattr_set_handle+0x38b/0x4a0
+ [<ffffffff810a319d>] ? trace_hardirqs_on+0xd/0x10
+ [<ffffffff81257b32>] ext4_xattr_set+0xc2/0x140
+ [<ffffffff81258547>] ext4_xattr_user_set+0x47/0x50
+ [<ffffffff811935ce>] generic_setxattr+0x6e/0x90
+ [<ffffffff81193ecb>] __vfs_setxattr_noperm+0x7b/0x1c0
+ [<ffffffff811940d4>] vfs_setxattr+0xc4/0xd0
+ [<ffffffff8119421e>] setxattr+0x13e/0x1e0
+ [<ffffffff811719c7>] ? __sb_start_write+0xe7/0x1b0
+ [<ffffffff8118f2e8>] ? mnt_want_write_file+0x28/0x60
+ [<ffffffff8118c65c>] ? fget_light+0x3c/0x130
+ [<ffffffff8118f2e8>] ? mnt_want_write_file+0x28/0x60
+ [<ffffffff8118f1f8>] ? __mnt_want_write+0x58/0x70
+ [<ffffffff811946be>] SyS_fsetxattr+0xbe/0x100
+ [<ffffffff816407c2>] system_call_fastpath+0x16/0x1b
+
+The reason for the warning is that buffer_head passed into
+jbd2_journal_dirty_metadata() didn't have journal_head attached. This is
+caused by the following race of two ext4_xattr_release_block() calls:
+
+CPU1 CPU2
+ext4_xattr_release_block() ext4_xattr_release_block()
+lock_buffer(bh);
+/* False */
+if (BHDR(bh)->h_refcount == cpu_to_le32(1))
+} else {
+ le32_add_cpu(&BHDR(bh)->h_refcount, -1);
+ unlock_buffer(bh);
+ lock_buffer(bh);
+ /* True */
+ if (BHDR(bh)->h_refcount == cpu_to_le32(1))
+ get_bh(bh);
+ ext4_free_blocks()
+ ...
+ jbd2_journal_forget()
+ jbd2_journal_unfile_buffer()
+ -> JH is gone
+ error = ext4_handle_dirty_xattr_block(handle, inode, bh);
+ -> triggers the warning
+
+We fix the problem by moving ext4_handle_dirty_xattr_block() under the
+buffer lock. Sadly this cannot be done in nojournal mode as that
+function can call sync_dirty_buffer() which would deadlock. Luckily in
+nojournal mode the race is harmless (we only dirty already freed buffer)
+and thus for nojournal mode we leave the dirtying outside of the buffer
+lock.
+
+Reported-by: Sage Weil <sage@inktank.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/xattr.c | 23 +++++++++++++++++++----
+ 1 file changed, 19 insertions(+), 4 deletions(-)
+
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -517,8 +517,8 @@ static void ext4_xattr_update_super_bloc
+ }
+
+ /*
+- * Release the xattr block BH: If the reference count is > 1, decrement
+- * it; otherwise free the block.
++ * Release the xattr block BH: If the reference count is > 1, decrement it;
++ * otherwise free the block.
+ */
+ static void
+ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
+@@ -538,16 +538,31 @@ ext4_xattr_release_block(handle_t *handl
+ if (ce)
+ mb_cache_entry_free(ce);
+ get_bh(bh);
++ unlock_buffer(bh);
+ ext4_free_blocks(handle, inode, bh, 0, 1,
+ EXT4_FREE_BLOCKS_METADATA |
+ EXT4_FREE_BLOCKS_FORGET);
+- unlock_buffer(bh);
+ } else {
+ le32_add_cpu(&BHDR(bh)->h_refcount, -1);
+ if (ce)
+ mb_cache_entry_release(ce);
++ /*
++ * Beware of this ugliness: Releasing of xattr block references
++ * from different inodes can race and so we have to protect
++ * from a race where someone else frees the block (and releases
++ * its journal_head) before we are done dirtying the buffer. In
++ * nojournal mode this race is harmless and we actually cannot
++ * call ext4_handle_dirty_xattr_block() with locked buffer as
++ * that function can call sync_dirty_buffer() so for that case
++ * we handle the dirtying after unlocking the buffer.
++ */
++ if (ext4_handle_valid(handle))
++ error = ext4_handle_dirty_xattr_block(handle, inode,
++ bh);
+ unlock_buffer(bh);
+- error = ext4_handle_dirty_xattr_block(handle, inode, bh);
++ if (!ext4_handle_valid(handle))
++ error = ext4_handle_dirty_xattr_block(handle, inode,
++ bh);
+ if (IS_SYNC(inode))
+ ext4_handle_sync(handle);
+ dquot_free_block(inode, EXT4_C2B(EXT4_SB(inode->i_sb), 1));
--- /dev/null
+From 622cad1325e404598fe3b148c3fa640dbaabc235 Mon Sep 17 00:00:00 2001
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Fri, 11 Apr 2014 10:35:17 -0400
+Subject: ext4: move ext4_update_i_disksize() into mpage_map_and_submit_extent()
+
+From: Theodore Ts'o <tytso@mit.edu>
+
+commit 622cad1325e404598fe3b148c3fa640dbaabc235 upstream.
+
+The function ext4_update_i_disksize() is used in only one place, in
+the function mpage_map_and_submit_extent(). Move its code to simplify
+the code paths, and also move the call to ext4_mark_inode_dirty() into
+the i_data_sem's critical region, to be consistent with all of the
+other places where we update i_disksize. That way, we also keep the
+raw_inode's i_disksize protected, to avoid the following race:
+
+ CPU #1 CPU #2
+
+ down_write(&i_data_sem)
+ Modify i_disk_size
+ up_write(&i_data_sem)
+ down_write(&i_data_sem)
+ Modify i_disk_size
+ Copy i_disk_size to on-disk inode
+ up_write(&i_data_sem)
+ Copy i_disk_size to on-disk inode
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/ext4.h | 17 -----------------
+ fs/ext4/inode.c | 14 ++++++++++++--
+ 2 files changed, 12 insertions(+), 19 deletions(-)
+
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -2462,23 +2462,6 @@ static inline void ext4_update_i_disksiz
+ up_write(&EXT4_I(inode)->i_data_sem);
+ }
+
+-/*
+- * Update i_disksize after writeback has been started. Races with truncate
+- * are avoided by checking i_size under i_data_sem.
+- */
+-static inline void ext4_wb_update_i_disksize(struct inode *inode, loff_t newsize)
+-{
+- loff_t i_size;
+-
+- down_write(&EXT4_I(inode)->i_data_sem);
+- i_size = i_size_read(inode);
+- if (newsize > i_size)
+- newsize = i_size;
+- if (newsize > EXT4_I(inode)->i_disksize)
+- EXT4_I(inode)->i_disksize = newsize;
+- up_write(&EXT4_I(inode)->i_data_sem);
+-}
+-
+ struct ext4_group_info {
+ unsigned long bb_state;
+ struct rb_root bb_free_root;
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -2236,13 +2236,23 @@ static int mpage_map_and_submit_extent(h
+ return err;
+ } while (map->m_len);
+
+- /* Update on-disk size after IO is submitted */
++ /*
++ * Update on-disk size after IO is submitted. Races with
++ * truncate are avoided by checking i_size under i_data_sem.
++ */
+ disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT;
+ if (disksize > EXT4_I(inode)->i_disksize) {
+ int err2;
++ loff_t i_size;
+
+- ext4_wb_update_i_disksize(inode, disksize);
++ down_write(&EXT4_I(inode)->i_data_sem);
++ i_size = i_size_read(inode);
++ if (disksize > i_size)
++ disksize = i_size;
++ if (disksize > EXT4_I(inode)->i_disksize)
++ EXT4_I(inode)->i_disksize = disksize;
+ err2 = ext4_mark_inode_dirty(handle, inode);
++ up_write(&EXT4_I(inode)->i_data_sem);
+ if (err2)
+ ext4_error(inode->i_sb,
+ "Failed to mark inode %lu dirty",
--- /dev/null
+From 9503c67c93ed0b95ba62d12d1fd09da6245dbdd6 Mon Sep 17 00:00:00 2001
+From: Matthew Wilcox <willy@linux.intel.com>
+Date: Mon, 7 Apr 2014 10:54:20 -0400
+Subject: ext4: note the error in ext4_end_bio()
+
+From: Matthew Wilcox <willy@linux.intel.com>
+
+commit 9503c67c93ed0b95ba62d12d1fd09da6245dbdd6 upstream.
+
+ext4_end_bio() currently throws away the error that it receives. Chances
+are this is part of a spate of errors, one of which will end up getting
+the error returned to userspace somehow, but we shouldn't take that risk.
+Also print out the errno to aid in debug.
+
+Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/page-io.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/fs/ext4/page-io.c
++++ b/fs/ext4/page-io.c
+@@ -308,13 +308,14 @@ static void ext4_end_bio(struct bio *bio
+ if (error) {
+ struct inode *inode = io_end->inode;
+
+- ext4_warning(inode->i_sb, "I/O error writing to inode %lu "
++ ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu "
+ "(offset %llu size %ld starting block %llu)",
+- inode->i_ino,
++ error, inode->i_ino,
+ (unsigned long long) io_end->offset,
+ (long) io_end->size,
+ (unsigned long long)
+ bi_sector >> (inode->i_blkbits - 9));
++ mapping_set_error(inode->i_mapping, error);
+ }
+
+ if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
--- /dev/null
+From 6e6358fc3c3c862bfe9a5bc029d3f8ce43dc9765 Mon Sep 17 00:00:00 2001
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Sat, 12 Apr 2014 12:45:25 -0400
+Subject: ext4: use i_size_read in ext4_unaligned_aio()
+
+From: Theodore Ts'o <tytso@mit.edu>
+
+commit 6e6358fc3c3c862bfe9a5bc029d3f8ce43dc9765 upstream.
+
+We haven't taken i_mutex yet, so we need to use i_size_read().
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/file.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/ext4/file.c
++++ b/fs/ext4/file.c
+@@ -82,7 +82,7 @@ ext4_unaligned_aio(struct inode *inode,
+ size_t count = iov_length(iov, nr_segs);
+ loff_t final_size = pos + count;
+
+- if (pos >= inode->i_size)
++ if (pos >= i_size_read(inode))
+ return 0;
+
+ if ((pos & blockmask) || (final_size & blockmask))
--- /dev/null
+From 34aa8dac482f1358d59110d5e3a12f4351f6acaa Mon Sep 17 00:00:00 2001
+From: Junxiao Bi <junxiao.bi@oracle.com>
+Date: Thu, 3 Apr 2014 14:46:49 -0700
+Subject: ocfs2: dlm: fix lock migration crash
+
+From: Junxiao Bi <junxiao.bi@oracle.com>
+
+commit 34aa8dac482f1358d59110d5e3a12f4351f6acaa upstream.
+
+This issue was introduced by commit 800deef3f6f8 ("ocfs2: use
+list_for_each_entry where benefical") in 2007 where it replaced
+list_for_each with list_for_each_entry. The variable "lock" will point
+to invalid data if "tmpq" list is empty and a panic will be triggered
+due to this. Sunil advised reverting it back, but the old version was
+also not right. At the end of the outer for loop, that
+list_for_each_entry will also set "lock" to an invalid data, then in the
+next loop, if the "tmpq" list is empty, "lock" will be an stale invalid
+data and cause the panic. So reverting the list_for_each back and reset
+"lock" to NULL to fix this issue.
+
+Another concern is that this seemes can not happen because the "tmpq"
+list should not be empty. Let me describe how.
+
+old lock resource owner(node 1): migratation target(node 2):
+image there's lockres with a EX lock from node 2 in
+granted list, a NR lock from node x with convert_type
+EX in converting list.
+dlm_empty_lockres() {
+ dlm_pick_migration_target() {
+ pick node 2 as target as its lock is the first one
+ in granted list.
+ }
+ dlm_migrate_lockres() {
+ dlm_mark_lockres_migrating() {
+ res->state |= DLM_LOCK_RES_BLOCK_DIRTY;
+ wait_event(dlm->ast_wq, !dlm_lockres_is_dirty(dlm, res));
+ //after the above code, we can not dirty lockres any more,
+ // so dlm_thread shuffle list will not run
+ downconvert lock from EX to NR
+ upconvert lock from NR to EX
+<<< migration may schedule out here, then
+<<< node 2 send down convert request to convert type from EX to
+<<< NR, then send up convert request to convert type from NR to
+<<< EX, at this time, lockres granted list is empty, and two locks
+<<< in the converting list, node x up convert lock followed by
+<<< node 2 up convert lock.
+
+ // will set lockres RES_MIGRATING flag, the following
+ // lock/unlock can not run
+ dlm_lockres_release_ast(dlm, res);
+ }
+
+ dlm_send_one_lockres()
+ dlm_process_recovery_data()
+ for (i=0; i<mres->num_locks; i++)
+ if (ml->node == dlm->node_num)
+ for (j = DLM_GRANTED_LIST; j <= DLM_BLOCKED_LIST; j++) {
+ list_for_each_entry(lock, tmpq, list)
+ if (lock) break; <<< lock is invalid as grant list is empty.
+ }
+ if (lock->ml.node != ml->node)
+ BUG() >>> crash here
+ }
+
+I see the above locks status from a vmcore of our internal bug.
+
+Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
+Reviewed-by: Wengang Wang <wen.gang.wang@oracle.com>
+Cc: Sunil Mushran <sunil.mushran@gmail.com>
+Reviewed-by: Srinivas Eeda <srinivas.eeda@oracle.com>
+Cc: Joel Becker <jlbec@evilplan.org>
+Cc: Mark Fasheh <mfasheh@suse.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ocfs2/dlm/dlmrecovery.c | 14 ++++++++------
+ 1 file changed, 8 insertions(+), 6 deletions(-)
+
+--- a/fs/ocfs2/dlm/dlmrecovery.c
++++ b/fs/ocfs2/dlm/dlmrecovery.c
+@@ -1750,13 +1750,13 @@ static int dlm_process_recovery_data(str
+ struct dlm_migratable_lockres *mres)
+ {
+ struct dlm_migratable_lock *ml;
+- struct list_head *queue;
++ struct list_head *queue, *iter;
+ struct list_head *tmpq = NULL;
+ struct dlm_lock *newlock = NULL;
+ struct dlm_lockstatus *lksb = NULL;
+ int ret = 0;
+ int i, j, bad;
+- struct dlm_lock *lock = NULL;
++ struct dlm_lock *lock;
+ u8 from = O2NM_MAX_NODES;
+ unsigned int added = 0;
+ __be64 c;
+@@ -1791,14 +1791,16 @@ static int dlm_process_recovery_data(str
+ /* MIGRATION ONLY! */
+ BUG_ON(!(mres->flags & DLM_MRES_MIGRATION));
+
++ lock = NULL;
+ spin_lock(&res->spinlock);
+ for (j = DLM_GRANTED_LIST; j <= DLM_BLOCKED_LIST; j++) {
+ tmpq = dlm_list_idx_to_ptr(res, j);
+- list_for_each_entry(lock, tmpq, list) {
+- if (lock->ml.cookie != ml->cookie)
+- lock = NULL;
+- else
++ list_for_each(iter, tmpq) {
++ lock = list_entry(iter,
++ struct dlm_lock, list);
++ if (lock->ml.cookie == ml->cookie)
+ break;
++ lock = NULL;
+ }
+ if (lock)
+ break;
--- /dev/null
+From ded2cf71419b9353060e633b59e446c42a6a2a09 Mon Sep 17 00:00:00 2001
+From: Junxiao Bi <junxiao.bi@oracle.com>
+Date: Thu, 3 Apr 2014 14:46:51 -0700
+Subject: ocfs2: dlm: fix recovery hung
+
+From: Junxiao Bi <junxiao.bi@oracle.com>
+
+commit ded2cf71419b9353060e633b59e446c42a6a2a09 upstream.
+
+There is a race window in dlm_do_recovery() between dlm_remaster_locks()
+and dlm_reset_recovery() when the recovery master nearly finish the
+recovery process for a dead node. After the master sends FINALIZE_RECO
+message in dlm_remaster_locks(), another node may become the recovery
+master for another dead node, and then send the BEGIN_RECO message to
+all the nodes included the old master, in the handler of this message
+dlm_begin_reco_handler() of old master, dlm->reco.dead_node and
+dlm->reco.new_master will be set to the second dead node and the new
+master, then in dlm_reset_recovery(), these two variables will be reset
+to default value. This will cause new recovery master can not finish
+the recovery process and hung, at last the whole cluster will hung for
+recovery.
+
+old recovery master: new recovery master:
+dlm_remaster_locks()
+ become recovery master for
+ another dead node.
+ dlm_send_begin_reco_message()
+dlm_begin_reco_handler()
+{
+ if (dlm->reco.state & DLM_RECO_STATE_FINALIZE) {
+ return -EAGAIN;
+ }
+ dlm_set_reco_master(dlm, br->node_idx);
+ dlm_set_reco_dead_node(dlm, br->dead_node);
+}
+dlm_reset_recovery()
+{
+ dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM);
+ dlm_set_reco_master(dlm, O2NM_INVALID_NODE_NUM);
+}
+ will hang in dlm_remaster_locks() for
+ request dlm locks info
+
+Before send FINALIZE_RECO message, recovery master should set
+DLM_RECO_STATE_FINALIZE for itself and clear it after the recovery done,
+this can break the race windows as the BEGIN_RECO messages will not be
+handled before DLM_RECO_STATE_FINALIZE flag is cleared.
+
+A similar race may happen between new recovery master and normal node
+which is in dlm_finalize_reco_handler(), also fix it.
+
+Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
+Reviewed-by: Srinivas Eeda <srinivas.eeda@oracle.com>
+Reviewed-by: Wengang Wang <wen.gang.wang@oracle.com>
+Cc: Joel Becker <jlbec@evilplan.org>
+Cc: Mark Fasheh <mfasheh@suse.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ocfs2/dlm/dlmrecovery.c | 15 +++++++++++++--
+ 1 file changed, 13 insertions(+), 2 deletions(-)
+
+--- a/fs/ocfs2/dlm/dlmrecovery.c
++++ b/fs/ocfs2/dlm/dlmrecovery.c
+@@ -537,7 +537,10 @@ master_here:
+ /* success! see if any other nodes need recovery */
+ mlog(0, "DONE mastering recovery of %s:%u here(this=%u)!\n",
+ dlm->name, dlm->reco.dead_node, dlm->node_num);
+- dlm_reset_recovery(dlm);
++ spin_lock(&dlm->spinlock);
++ __dlm_reset_recovery(dlm);
++ dlm->reco.state &= ~DLM_RECO_STATE_FINALIZE;
++ spin_unlock(&dlm->spinlock);
+ }
+ dlm_end_recovery(dlm);
+
+@@ -695,6 +698,14 @@ static int dlm_remaster_locks(struct dlm
+ if (all_nodes_done) {
+ int ret;
+
++ /* Set this flag on recovery master to avoid
++ * a new recovery for another dead node start
++ * before the recovery is not done. That may
++ * cause recovery hung.*/
++ spin_lock(&dlm->spinlock);
++ dlm->reco.state |= DLM_RECO_STATE_FINALIZE;
++ spin_unlock(&dlm->spinlock);
++
+ /* all nodes are now in DLM_RECO_NODE_DATA_DONE state
+ * just send a finalize message to everyone and
+ * clean up */
+@@ -2884,8 +2895,8 @@ int dlm_finalize_reco_handler(struct o2n
+ BUG();
+ }
+ dlm->reco.state &= ~DLM_RECO_STATE_FINALIZE;
++ __dlm_reset_recovery(dlm);
+ spin_unlock(&dlm->spinlock);
+- dlm_reset_recovery(dlm);
+ dlm_kick_recovery_thread(dlm);
+ break;
+ default:
--- /dev/null
+From f7cf4f5bfe073ad792ab49c04f247626b3e38db6 Mon Sep 17 00:00:00 2001
+From: alex chen <alex.chen@huawei.com>
+Date: Thu, 3 Apr 2014 14:47:05 -0700
+Subject: ocfs2: do not put bh when buffer_uptodate failed
+
+From: alex chen <alex.chen@huawei.com>
+
+commit f7cf4f5bfe073ad792ab49c04f247626b3e38db6 upstream.
+
+Do not put bh when buffer_uptodate failed in ocfs2_write_block and
+ocfs2_write_super_or_backup, because it will put bh in b_end_io.
+Otherwise it will hit a warning "VFS: brelse: Trying to free free
+buffer".
+
+Signed-off-by: Alex Chen <alex.chen@huawei.com>
+Reviewed-by: Joseph Qi <joseph.qi@huawei.com>
+Reviewed-by: Srinivas Eeda <srinivas.eeda@oracle.com>
+Cc: Mark Fasheh <mfasheh@suse.com>
+Acked-by: Joel Becker <jlbec@evilplan.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ocfs2/buffer_head_io.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+--- a/fs/ocfs2/buffer_head_io.c
++++ b/fs/ocfs2/buffer_head_io.c
+@@ -90,7 +90,6 @@ int ocfs2_write_block(struct ocfs2_super
+ * information for this bh as it's not marked locally
+ * uptodate. */
+ ret = -EIO;
+- put_bh(bh);
+ mlog_errno(ret);
+ }
+
+@@ -420,7 +419,6 @@ int ocfs2_write_super_or_backup(struct o
+
+ if (!buffer_uptodate(bh)) {
+ ret = -EIO;
+- put_bh(bh);
+ mlog_errno(ret);
+ }
+
--- /dev/null
+From f81c20158f8d5f7938d5eb86ecc42ecc09273ce6 Mon Sep 17 00:00:00 2001
+From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Date: Thu, 3 Apr 2014 14:47:07 -0700
+Subject: ocfs2: fix panic on kfree(xattr->name)
+
+From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+
+commit f81c20158f8d5f7938d5eb86ecc42ecc09273ce6 upstream.
+
+Commit 9548906b2bb7 ('xattr: Constify ->name member of "struct xattr"')
+missed that ocfs2 is calling kfree(xattr->name). As a result, kernel
+panic occurs upon calling kfree(xattr->name) because xattr->name refers
+static constant names. This patch removes kfree(xattr->name) from
+ocfs2_mknod() and ocfs2_symlink().
+
+Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Reported-by: Tariq Saeed <tariq.x.saeed@oracle.com>
+Tested-by: Tariq Saeed <tariq.x.saeed@oracle.com>
+Reviewed-by: Srinivas Eeda <srinivas.eeda@oracle.com>
+Cc: Joel Becker <jlbec@evilplan.org>
+Cc: Mark Fasheh <mfasheh@suse.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ocfs2/namei.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+--- a/fs/ocfs2/namei.c
++++ b/fs/ocfs2/namei.c
+@@ -450,7 +450,6 @@ leave:
+
+ brelse(new_fe_bh);
+ brelse(parent_fe_bh);
+- kfree(si.name);
+ kfree(si.value);
+
+ ocfs2_free_dir_lookup_result(&lookup);
+@@ -1855,7 +1854,6 @@ bail:
+
+ brelse(new_fe_bh);
+ brelse(parent_fe_bh);
+- kfree(si.name);
+ kfree(si.value);
+ ocfs2_free_dir_lookup_result(&lookup);
+ if (inode_ac)
mm-hugetlb-fix-softlockup-when-a-large-number-of-hugepages-are-freed.patch
mm-vmscan-do-not-swap-anon-pages-just-because-free-file-is-low.patch
hung_task-check-the-value-of-sysctl_hung_task_timeout_sec.patch
+xattr-guard-against-simultaneous-glibc-header-inclusion.patch
+ocfs2-dlm-fix-lock-migration-crash.patch
+ocfs2-dlm-fix-recovery-hung.patch
+ocfs2-do-not-put-bh-when-buffer_uptodate-failed.patch
+ocfs2-fix-panic-on-kfree-xattr-name.patch
+clk-s2mps11-fix-possible-null-pointer-dereference.patch
+smarter-propagate_mnt.patch
+block-fix-for_each_bvec.patch
+ext4-fibmap-ioctl-causes-bug_on-due-to-handle-ext_max_blocks.patch
+ext4-note-the-error-in-ext4_end_bio.patch
+ext4-fix-jbd2-warning-under-heavy-xattr-load.patch
+ext4-move-ext4_update_i_disksize-into-mpage_map_and_submit_extent.patch
+ext4-use-i_size_read-in-ext4_unaligned_aio.patch
--- /dev/null
+From f2ebb3a921c1ca1e2ddd9242e95a1989a50c4c68 Mon Sep 17 00:00:00 2001
+From: Al Viro <viro@zeniv.linux.org.uk>
+Date: Thu, 27 Feb 2014 09:35:45 -0500
+Subject: smarter propagate_mnt()
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+commit f2ebb3a921c1ca1e2ddd9242e95a1989a50c4c68 upstream.
+
+The current mainline has copies propagated to *all* nodes, then
+tears down the copies we made for nodes that do not contain
+counterparts of the desired mountpoint. That sets the right
+propagation graph for the copies (at teardown time we move
+the slaves of removed node to a surviving peer or directly
+to master), but we end up paying a fairly steep price in
+useless allocations. It's fairly easy to create a situation
+where N calls of mount(2) create exactly N bindings, with
+O(N^2) vfsmounts allocated and freed in process.
+
+Fortunately, it is possible to avoid those allocations/freeings.
+The trick is to create copies in the right order and find which
+one would've eventually become a master with the current algorithm.
+It turns out to be possible in O(nodes getting propagation) time
+and with no extra allocations at all.
+
+One part is that we need to make sure that eventual master will be
+created before its slaves, so we need to walk the propagation
+tree in a different order - by peer groups. And iterate through
+the peers before dealing with the next group.
+
+Another thing is finding the (earlier) copy that will be a master
+of one we are about to create; to do that we are (temporary) marking
+the masters of mountpoints we are attaching the copies to.
+
+Either we are in a peer of the last mountpoint we'd dealt with,
+or we have the following situation: we are attaching to mountpoint M,
+the last copy S_0 had been attached to M_0 and there are sequences
+S_0...S_n, M_0...M_n such that S_{i+1} is a master of S_{i},
+S_{i} mounted on M{i} and we need to create a slave of the first S_{k}
+such that M is getting propagation from M_{k}. It means that the master
+of M_{k} will be among the sequence of masters of M. On the
+other hand, the nearest marked node in that sequence will either
+be the master of M_{k} or the master of M_{k-1} (the latter -
+in the case if M_{k-1} is a slave of something M gets propagation
+from, but in a wrong peer group).
+
+So we go through the sequence of masters of M until we find
+a marked one (P). Let N be the one before it. Then we go through
+the sequence of masters of S_0 until we find one (say, S) mounted
+on a node D that has P as master and check if D is a peer of N.
+If it is, S will be the master of new copy, if not - the master of S
+will be.
+
+That's it for the hard part; the rest is fairly simple. Iterator
+is in next_group(), handling of one prospective mountpoint is
+propagate_one().
+
+It seems to survive all tests and gives a noticably better performance
+than the current mainline for setups that are seriously using shared
+subtrees.
+
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/namespace.c | 11 ++
+ fs/pnode.c | 198 ++++++++++++++++++++++++++++++--------------------
+ fs/pnode.h | 3
+ include/linux/mount.h | 3
+ 4 files changed, 133 insertions(+), 82 deletions(-)
+
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -885,7 +885,7 @@ static struct mount *clone_mnt(struct mo
+ goto out_free;
+ }
+
+- mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD;
++ mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~(MNT_WRITE_HOLD|MNT_MARKED);
+ /* Don't allow unprivileged users to change mount flags */
+ if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY))
+ mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;
+@@ -1661,9 +1661,9 @@ static int attach_recursive_mnt(struct m
+ if (err)
+ goto out;
+ err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list);
++ lock_mount_hash();
+ if (err)
+ goto out_cleanup_ids;
+- lock_mount_hash();
+ for (p = source_mnt; p; p = next_mnt(p, source_mnt))
+ set_mnt_shared(p);
+ } else {
+@@ -1690,6 +1690,11 @@ static int attach_recursive_mnt(struct m
+ return 0;
+
+ out_cleanup_ids:
++ while (!hlist_empty(&tree_list)) {
++ child = hlist_entry(tree_list.first, struct mount, mnt_hash);
++ umount_tree(child, 0);
++ }
++ unlock_mount_hash();
+ cleanup_group_ids(source_mnt, NULL);
+ out:
+ return err;
+@@ -2044,7 +2049,7 @@ static int do_add_mount(struct mount *ne
+ struct mount *parent;
+ int err;
+
+- mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | MNT_DOOMED | MNT_SYNC_UMOUNT);
++ mnt_flags &= ~MNT_INTERNAL_FLAGS;
+
+ mp = lock_mount(path);
+ if (IS_ERR(mp))
+--- a/fs/pnode.c
++++ b/fs/pnode.c
+@@ -164,46 +164,94 @@ static struct mount *propagation_next(st
+ }
+ }
+
+-/*
+- * return the source mount to be used for cloning
+- *
+- * @dest the current destination mount
+- * @last_dest the last seen destination mount
+- * @last_src the last seen source mount
+- * @type return CL_SLAVE if the new mount has to be
+- * cloned as a slave.
+- */
+-static struct mount *get_source(struct mount *dest,
+- struct mount *last_dest,
+- struct mount *last_src,
+- int *type)
++static struct mount *next_group(struct mount *m, struct mount *origin)
+ {
+- struct mount *p_last_src = NULL;
+- struct mount *p_last_dest = NULL;
+-
+- while (last_dest != dest->mnt_master) {
+- p_last_dest = last_dest;
+- p_last_src = last_src;
+- last_dest = last_dest->mnt_master;
+- last_src = last_src->mnt_master;
++ while (1) {
++ while (1) {
++ struct mount *next;
++ if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
++ return first_slave(m);
++ next = next_peer(m);
++ if (m->mnt_group_id == origin->mnt_group_id) {
++ if (next == origin)
++ return NULL;
++ } else if (m->mnt_slave.next != &next->mnt_slave)
++ break;
++ m = next;
++ }
++ /* m is the last peer */
++ while (1) {
++ struct mount *master = m->mnt_master;
++ if (m->mnt_slave.next != &master->mnt_slave_list)
++ return next_slave(m);
++ m = next_peer(master);
++ if (master->mnt_group_id == origin->mnt_group_id)
++ break;
++ if (master->mnt_slave.next == &m->mnt_slave)
++ break;
++ m = master;
++ }
++ if (m == origin)
++ return NULL;
+ }
++}
+
+- if (p_last_dest) {
+- do {
+- p_last_dest = next_peer(p_last_dest);
+- } while (IS_MNT_NEW(p_last_dest));
+- /* is that a peer of the earlier? */
+- if (dest == p_last_dest) {
+- *type = CL_MAKE_SHARED;
+- return p_last_src;
++/* all accesses are serialized by namespace_sem */
++static struct user_namespace *user_ns;
++static struct mount *last_dest, *last_source, *dest_master;
++static struct mountpoint *mp;
++static struct hlist_head *list;
++
++static int propagate_one(struct mount *m)
++{
++ struct mount *child;
++ int type;
++ /* skip ones added by this propagate_mnt() */
++ if (IS_MNT_NEW(m))
++ return 0;
++ /* skip if mountpoint isn't covered by it */
++ if (!is_subdir(mp->m_dentry, m->mnt.mnt_root))
++ return 0;
++ if (m->mnt_group_id == last_dest->mnt_group_id) {
++ type = CL_MAKE_SHARED;
++ } else {
++ struct mount *n, *p;
++ for (n = m; ; n = p) {
++ p = n->mnt_master;
++ if (p == dest_master || IS_MNT_MARKED(p)) {
++ while (last_dest->mnt_master != p) {
++ last_source = last_source->mnt_master;
++ last_dest = last_source->mnt_parent;
++ }
++ if (n->mnt_group_id != last_dest->mnt_group_id) {
++ last_source = last_source->mnt_master;
++ last_dest = last_source->mnt_parent;
++ }
++ break;
++ }
+ }
++ type = CL_SLAVE;
++ /* beginning of peer group among the slaves? */
++ if (IS_MNT_SHARED(m))
++ type |= CL_MAKE_SHARED;
+ }
+- /* slave of the earlier, then */
+- *type = CL_SLAVE;
+- /* beginning of peer group among the slaves? */
+- if (IS_MNT_SHARED(dest))
+- *type |= CL_MAKE_SHARED;
+- return last_src;
++
++ /* Notice when we are propagating across user namespaces */
++ if (m->mnt_ns->user_ns != user_ns)
++ type |= CL_UNPRIVILEGED;
++ child = copy_tree(last_source, last_source->mnt.mnt_root, type);
++ if (IS_ERR(child))
++ return PTR_ERR(child);
++ mnt_set_mountpoint(m, mp, child);
++ last_dest = m;
++ last_source = child;
++ if (m->mnt_master != dest_master) {
++ read_seqlock_excl(&mount_lock);
++ SET_MNT_MARK(m->mnt_master);
++ read_sequnlock_excl(&mount_lock);
++ }
++ hlist_add_head(&child->mnt_hash, list);
++ return 0;
+ }
+
+ /*
+@@ -222,56 +270,48 @@ static struct mount *get_source(struct m
+ int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp,
+ struct mount *source_mnt, struct hlist_head *tree_list)
+ {
+- struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
+- struct mount *m, *child;
++ struct mount *m, *n;
+ int ret = 0;
+- struct mount *prev_dest_mnt = dest_mnt;
+- struct mount *prev_src_mnt = source_mnt;
+- HLIST_HEAD(tmp_list);
+-
+- for (m = propagation_next(dest_mnt, dest_mnt); m;
+- m = propagation_next(m, dest_mnt)) {
+- int type;
+- struct mount *source;
+-
+- if (IS_MNT_NEW(m))
+- continue;
+-
+- source = get_source(m, prev_dest_mnt, prev_src_mnt, &type);
+-
+- /* Notice when we are propagating across user namespaces */
+- if (m->mnt_ns->user_ns != user_ns)
+- type |= CL_UNPRIVILEGED;
+-
+- child = copy_tree(source, source->mnt.mnt_root, type);
+- if (IS_ERR(child)) {
+- ret = PTR_ERR(child);
+- tmp_list = *tree_list;
+- tmp_list.first->pprev = &tmp_list.first;
+- INIT_HLIST_HEAD(tree_list);
++
++ /*
++ * we don't want to bother passing tons of arguments to
++ * propagate_one(); everything is serialized by namespace_sem,
++ * so globals will do just fine.
++ */
++ user_ns = current->nsproxy->mnt_ns->user_ns;
++ last_dest = dest_mnt;
++ last_source = source_mnt;
++ mp = dest_mp;
++ list = tree_list;
++ dest_master = dest_mnt->mnt_master;
++
++ /* all peers of dest_mnt, except dest_mnt itself */
++ for (n = next_peer(dest_mnt); n != dest_mnt; n = next_peer(n)) {
++ ret = propagate_one(n);
++ if (ret)
+ goto out;
+- }
++ }
+
+- if (is_subdir(dest_mp->m_dentry, m->mnt.mnt_root)) {
+- mnt_set_mountpoint(m, dest_mp, child);
+- hlist_add_head(&child->mnt_hash, tree_list);
+- } else {
+- /*
+- * This can happen if the parent mount was bind mounted
+- * on some subdirectory of a shared/slave mount.
+- */
+- hlist_add_head(&child->mnt_hash, &tmp_list);
+- }
+- prev_dest_mnt = m;
+- prev_src_mnt = child;
++ /* all slave groups */
++ for (m = next_group(dest_mnt, dest_mnt); m;
++ m = next_group(m, dest_mnt)) {
++ /* everything in that slave group */
++ n = m;
++ do {
++ ret = propagate_one(n);
++ if (ret)
++ goto out;
++ n = next_peer(n);
++ } while (n != m);
+ }
+ out:
+- lock_mount_hash();
+- while (!hlist_empty(&tmp_list)) {
+- child = hlist_entry(tmp_list.first, struct mount, mnt_hash);
+- umount_tree(child, 0);
++ read_seqlock_excl(&mount_lock);
++ hlist_for_each_entry(n, tree_list, mnt_hash) {
++ m = n->mnt_parent;
++ if (m->mnt_master != dest_mnt->mnt_master)
++ CLEAR_MNT_MARK(m->mnt_master);
+ }
+- unlock_mount_hash();
++ read_sequnlock_excl(&mount_lock);
+ return ret;
+ }
+
+--- a/fs/pnode.h
++++ b/fs/pnode.h
+@@ -16,6 +16,9 @@
+ #define IS_MNT_NEW(m) (!(m)->mnt_ns)
+ #define CLEAR_MNT_SHARED(m) ((m)->mnt.mnt_flags &= ~MNT_SHARED)
+ #define IS_MNT_UNBINDABLE(m) ((m)->mnt.mnt_flags & MNT_UNBINDABLE)
++#define IS_MNT_MARKED(m) ((m)->mnt.mnt_flags & MNT_MARKED)
++#define SET_MNT_MARK(m) ((m)->mnt.mnt_flags |= MNT_MARKED)
++#define CLEAR_MNT_MARK(m) ((m)->mnt.mnt_flags &= ~MNT_MARKED)
+
+ #define CL_EXPIRE 0x01
+ #define CL_SLAVE 0x02
+--- a/include/linux/mount.h
++++ b/include/linux/mount.h
+@@ -44,6 +44,8 @@ struct mnt_namespace;
+ #define MNT_SHARED_MASK (MNT_UNBINDABLE)
+ #define MNT_PROPAGATION_MASK (MNT_SHARED | MNT_UNBINDABLE)
+
++#define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \
++ MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED)
+
+ #define MNT_INTERNAL 0x4000
+
+@@ -51,6 +53,7 @@ struct mnt_namespace;
+ #define MNT_LOCKED 0x800000
+ #define MNT_DOOMED 0x1000000
+ #define MNT_SYNC_UMOUNT 0x2000000
++#define MNT_MARKED 0x4000000
+
+ struct vfsmount {
+ struct dentry *mnt_root; /* root of the mounted tree */
--- /dev/null
+From ea1a8217b06b41b31a2b60b0b83f75c77ef9c873 Mon Sep 17 00:00:00 2001
+From: Serge Hallyn <serge.hallyn@ubuntu.com>
+Date: Thu, 3 Apr 2014 14:48:33 -0700
+Subject: xattr: guard against simultaneous glibc header inclusion
+
+From: Serge Hallyn <serge.hallyn@ubuntu.com>
+
+commit ea1a8217b06b41b31a2b60b0b83f75c77ef9c873 upstream.
+
+If the glibc xattr.h header is included after the uapi header,
+compilation fails due to an enum re-using a #define from the uapi
+header.
+
+Protect against this by guarding the define and enum inclusions against
+each other.
+
+(See https://lists.debian.org/debian-glibc/2014/03/msg00029.html
+and https://sourceware.org/glibc/wiki/Synchronizing_Headers
+for more information.)
+
+Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Allan McRae <allan@archlinux.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/uapi/linux/libc-compat.h | 9 +++++++++
+ include/uapi/linux/xattr.h | 7 +++++++
+ 2 files changed, 16 insertions(+)
+
+--- a/include/uapi/linux/libc-compat.h
++++ b/include/uapi/linux/libc-compat.h
+@@ -85,6 +85,12 @@
+
+ #endif /* _NETINET_IN_H */
+
++/* Definitions for xattr.h */
++#if defined(_SYS_XATTR_H)
++#define __UAPI_DEF_XATTR 0
++#else
++#define __UAPI_DEF_XATTR 1
++#endif
+
+ /* If we did not see any headers from any supported C libraries,
+ * or we are being included in the kernel, then define everything
+@@ -98,6 +104,9 @@
+ #define __UAPI_DEF_IPV6_MREQ 1
+ #define __UAPI_DEF_IPPROTO_V6 1
+
++/* Definitions for xattr.h */
++#define __UAPI_DEF_XATTR 1
++
+ #endif /* __GLIBC__ */
+
+ #endif /* _UAPI_LIBC_COMPAT_H */
+--- a/include/uapi/linux/xattr.h
++++ b/include/uapi/linux/xattr.h
+@@ -7,11 +7,18 @@
+ Copyright (c) 2001-2002 Silicon Graphics, Inc. All Rights Reserved.
+ Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
+ */
++
++#include <linux/libc-compat.h>
++
+ #ifndef _UAPI_LINUX_XATTR_H
+ #define _UAPI_LINUX_XATTR_H
+
++#ifdef __UAPI_DEF_XATTR
++#define __USE_KERNEL_XATTR_DEFS
++
+ #define XATTR_CREATE 0x1 /* set value, fail if attr already exists */
+ #define XATTR_REPLACE 0x2 /* set value, fail if attr does not exist */
++#endif
+
+ /* Namespaces */
+ #define XATTR_OS2_PREFIX "os2."