From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 4 Jan 2017 10:07:33 +0000 (+0100)
Subject: 4.9-stable patches
X-Git-Tag: v4.9.1~16
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=81051d92849496ef8b6956cdd95e4be5a0f26e8b;p=thirdparty%2Fkernel%2Fstable-queue.git

4.9-stable patches

added patches:
	block_dev-don-t-test-bdev-bd_contains-when-it-is-not-stable.patch
	crypto-caam-fix-aead-givenc-descriptors.patch
	exec-ensure-mm-user_ns-contains-the-execed-files.patch
	ext4-add-sanity-checking-to-count_overhead.patch
	ext4-do-not-perform-data-journaling-when-data-is-encrypted.patch
	ext4-don-t-lock-buffer-in-ext4_commit_super-if-holding-spinlock.patch
	ext4-fix-in-superblock-mount-options-processing.patch
	ext4-fix-mballoc-breakage-with-64k-block-size.patch
	ext4-fix-stack-memory-corruption-with-64k-block-size.patch
	ext4-reject-inodes-with-negative-size.patch
	ext4-return-enomem-instead-of-success.patch
	ext4-use-more-strict-checks-for-inodes_per_block-on-mount.patch
	f2fs-fix-overflow-due-to-condition-check-order.patch
	f2fs-fix-to-determine-start_cp_addr-by-sbi-cur_cp_pack.patch
	f2fs-set-owner-for-debugfs-status-file-s-file_operations.patch
	fs-exec-apply-cloexec-before-changing-dumpable-task-flags.patch
	loop-return-proper-error-from-loop_queue_rq.patch
	mm-add-a-user_ns-owner-to-mm_struct-and-fix-ptrace-permission-checks.patch
	mm-page_alloc-keep-pcp-count-and-list-contents-in-sync-if-struct-page-is-corrupted.patch
	mm-vmscan.c-set-correct-defer-count-for-shrinker.patch
	nvmet-fix-possible-infinite-loop-triggered-on-hot-namespace-removal.patch
	ptrace-capture-the-ptracer-s-creds-not-pt_ptrace_cap.patch
	ptrace-don-t-allow-accessing-an-undumpable-mm.patch
	revert-f2fs-use-percpu_counter-for-of-dirty-pages-in-inode.patch
	splice-reinstate-sigpipe-epipe-handling.patch
	vfs-mm-fix-return-value-of-read-at-s_maxbytes.patch
---

diff --git a/queue-4.9/block_dev-don-t-test-bdev-bd_contains-when-it-is-not-stable.patch b/queue-4.9/block_dev-don-t-test-bdev-bd_contains-when-it-is-not-stable.patch
new file mode 100644
index 00000000000..0d4c42fd648
--- /dev/null
+++ b/queue-4.9/block_dev-don-t-test-bdev-bd_contains-when-it-is-not-stable.patch
@@ -0,0 +1,68 @@
+From bcc7f5b4bee8e327689a4d994022765855c807ff Mon Sep 17 00:00:00 2001
+From: NeilBrown <neilb@suse.com>
+Date: Mon, 12 Dec 2016 08:21:51 -0700
+Subject: block_dev: don't test bdev->bd_contains when it is not stable
+
+From: NeilBrown <neilb@suse.com>
+
+commit bcc7f5b4bee8e327689a4d994022765855c807ff upstream.
+
+bdev->bd_contains is not stable before calling __blkdev_get().
+When __blkdev_get() is called on a parition with ->bd_openers == 0
+it sets
+  bdev->bd_contains = bdev;
+which is not correct for a partition.
+After a call to __blkdev_get() succeeds, ->bd_openers will be > 0
+and then ->bd_contains is stable.
+
+When FMODE_EXCL is used, blkdev_get() calls
+   bd_start_claiming() ->  bd_prepare_to_claim() -> bd_may_claim()
+
+This call happens before __blkdev_get() is called, so ->bd_contains
+is not stable.  So bd_may_claim() cannot safely use ->bd_contains.
+It currently tries to use it, and this can lead to a BUG_ON().
+
+This happens when a whole device is already open with a bd_holder (in
+use by dm in my particular example) and two threads race to open a
+partition of that device for the first time, one opening with O_EXCL and
+one without.
+
+The thread that doesn't use O_EXCL gets through blkdev_get() to
+__blkdev_get(), gains the ->bd_mutex, and sets bdev->bd_contains = bdev;
+
+Immediately thereafter the other thread, using FMODE_EXCL, calls
+bd_start_claiming() from blkdev_get().  This should fail because the
+whole device has a holder, but because bdev->bd_contains == bdev
+bd_may_claim() incorrectly reports success.
+This thread continues and blocks on bd_mutex.
+
+The first thread then sets bdev->bd_contains correctly and drops the mutex.
+The thread using FMODE_EXCL then continues and when it calls bd_may_claim()
+again in:
+			BUG_ON(!bd_may_claim(bdev, whole, holder));
+The BUG_ON fires.
+
+Fix this by removing the dependency on ->bd_contains in
+bd_may_claim().  As bd_may_claim() has direct access to the whole
+device, it can simply test if the target bdev is the whole device.
+
+Fixes: 6b4517a7913a ("block: implement bd_claiming and claiming block")
+Signed-off-by: NeilBrown <neilb@suse.com>
+Signed-off-by: Jens Axboe <axboe@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/block_dev.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/block_dev.c
++++ b/fs/block_dev.c
+@@ -832,7 +832,7 @@ static bool bd_may_claim(struct block_de
+ 		return true;	 /* already a holder */
+ 	else if (bdev->bd_holder != NULL)
+ 		return false; 	 /* held by someone else */
+-	else if (bdev->bd_contains == bdev)
++	else if (whole == bdev)
+ 		return true;  	 /* is a whole device which isn't held */
+ 
+ 	else if (whole->bd_holder == bd_may_claim)
diff --git a/queue-4.9/crypto-caam-fix-aead-givenc-descriptors.patch b/queue-4.9/crypto-caam-fix-aead-givenc-descriptors.patch
new file mode 100644
index 00000000000..52afcef4648
--- /dev/null
+++ b/queue-4.9/crypto-caam-fix-aead-givenc-descriptors.patch
@@ -0,0 +1,48 @@
+From d128af17876d79b87edf048303f98b35f6a53dbc Mon Sep 17 00:00:00 2001
+From: Alex Porosanu <alexandru.porosanu@nxp.com>
+Date: Wed, 9 Nov 2016 10:46:11 +0200
+Subject: crypto: caam - fix AEAD givenc descriptors
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Alex Porosanu <alexandru.porosanu@nxp.com>
+
+commit d128af17876d79b87edf048303f98b35f6a53dbc upstream.
+
+The AEAD givenc descriptor relies on moving the IV through the
+output FIFO and then back to the CTX2 for authentication. The
+SEQ FIFO STORE could be scheduled before the data can be
+read from OFIFO, especially since the SEQ FIFO LOAD needs
+to wait for the SEQ FIFO LOAD SKIP to finish first. The
+SKIP takes more time when the input is SG than when it's
+a contiguous buffer. If the SEQ FIFO LOAD is not scheduled
+before the STORE, the DECO will hang waiting for data
+to be available in the OFIFO so it can be transferred to C2.
+In order to overcome this, first force transfer of IV to C2
+by starting the "cryptlen" transfer first and then starting to
+store data from OFIFO to the output buffer.
+
+Fixes: 1acebad3d8db8 ("crypto: caam - faster aead implementation")
+Signed-off-by: Alex Porosanu <alexandru.porosanu@nxp.com>
+Signed-off-by: Horia GeantÄ <horia.geanta@nxp.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/crypto/caam/caamalg.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/crypto/caam/caamalg.c
++++ b/drivers/crypto/caam/caamalg.c
+@@ -736,7 +736,9 @@ copy_iv:
+ 
+ 	/* Will read cryptlen */
+ 	append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
+-	aead_append_src_dst(desc, FIFOLD_TYPE_MSG1OUT2);
++	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_BOTH | KEY_VLF |
++			     FIFOLD_TYPE_MSG1OUT2 | FIFOLD_TYPE_LASTBOTH);
++	append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | KEY_VLF);
+ 
+ 	/* Write ICV */
+ 	append_seq_store(desc, ctx->authsize, LDST_CLASS_2_CCB |
diff --git a/queue-4.9/exec-ensure-mm-user_ns-contains-the-execed-files.patch b/queue-4.9/exec-ensure-mm-user_ns-contains-the-execed-files.patch
new file mode 100644
index 00000000000..291704a9d56
--- /dev/null
+++ b/queue-4.9/exec-ensure-mm-user_ns-contains-the-execed-files.patch
@@ -0,0 +1,117 @@
+From f84df2a6f268de584a201e8911384a2d244876e3 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Wed, 16 Nov 2016 22:06:51 -0600
+Subject: exec: Ensure mm->user_ns contains the execed files
+
+From: Eric W. Biederman <ebiederm@xmission.com>
+
+commit f84df2a6f268de584a201e8911384a2d244876e3 upstream.
+
+When the user namespace support was merged the need to prevent
+ptrace from revealing the contents of an unreadable executable
+was overlooked.
+
+Correct this oversight by ensuring that the executed file
+or files are in mm->user_ns, by adjusting mm->user_ns.
+
+Use the new function privileged_wrt_inode_uidgid to see if
+the executable is a member of the user namespace, and as such
+if having CAP_SYS_PTRACE in the user namespace should allow
+tracing the executable.  If not update mm->user_ns to
+the parent user namespace until an appropriate parent is found.
+
+Reported-by: Jann Horn <jann@thejh.net>
+Fixes: 9e4a36ece652 ("userns: Fail exec for suid and sgid binaries with ids outside our user namespace.")
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/exec.c                  |   19 +++++++++++++++++--
+ include/linux/capability.h |    1 +
+ kernel/capability.c        |   16 ++++++++++++++--
+ 3 files changed, 32 insertions(+), 4 deletions(-)
+
+--- a/fs/exec.c
++++ b/fs/exec.c
+@@ -1275,8 +1275,22 @@ EXPORT_SYMBOL(flush_old_exec);
+ 
+ void would_dump(struct linux_binprm *bprm, struct file *file)
+ {
+-	if (inode_permission(file_inode(file), MAY_READ) < 0)
++	struct inode *inode = file_inode(file);
++	if (inode_permission(inode, MAY_READ) < 0) {
++		struct user_namespace *old, *user_ns;
+ 		bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
++
++		/* Ensure mm->user_ns contains the executable */
++		user_ns = old = bprm->mm->user_ns;
++		while ((user_ns != &init_user_ns) &&
++		       !privileged_wrt_inode_uidgid(user_ns, inode))
++			user_ns = user_ns->parent;
++
++		if (old != user_ns) {
++			bprm->mm->user_ns = get_user_ns(user_ns);
++			put_user_ns(old);
++		}
++	}
+ }
+ EXPORT_SYMBOL(would_dump);
+ 
+@@ -1306,7 +1320,6 @@ void setup_new_exec(struct linux_binprm
+ 	    !gid_eq(bprm->cred->gid, current_egid())) {
+ 		current->pdeath_signal = 0;
+ 	} else {
+-		would_dump(bprm, bprm->file);
+ 		if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)
+ 			set_dumpable(current->mm, suid_dumpable);
+ 	}
+@@ -1741,6 +1754,8 @@ static int do_execveat_common(int fd, st
+ 	if (retval < 0)
+ 		goto out;
+ 
++	would_dump(bprm, bprm->file);
++
+ 	retval = exec_binprm(bprm);
+ 	if (retval < 0)
+ 		goto out;
+--- a/include/linux/capability.h
++++ b/include/linux/capability.h
+@@ -240,6 +240,7 @@ static inline bool ns_capable_noaudit(st
+ 	return true;
+ }
+ #endif /* CONFIG_MULTIUSER */
++extern bool privileged_wrt_inode_uidgid(struct user_namespace *ns, const struct inode *inode);
+ extern bool capable_wrt_inode_uidgid(const struct inode *inode, int cap);
+ extern bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap);
+ 
+--- a/kernel/capability.c
++++ b/kernel/capability.c
+@@ -457,6 +457,19 @@ bool file_ns_capable(const struct file *
+ EXPORT_SYMBOL(file_ns_capable);
+ 
+ /**
++ * privileged_wrt_inode_uidgid - Do capabilities in the namespace work over the inode?
++ * @ns: The user namespace in question
++ * @inode: The inode in question
++ *
++ * Return true if the inode uid and gid are within the namespace.
++ */
++bool privileged_wrt_inode_uidgid(struct user_namespace *ns, const struct inode *inode)
++{
++	return kuid_has_mapping(ns, inode->i_uid) &&
++		kgid_has_mapping(ns, inode->i_gid);
++}
++
++/**
+  * capable_wrt_inode_uidgid - Check nsown_capable and uid and gid mapped
+  * @inode: The inode in question
+  * @cap: The capability in question
+@@ -469,7 +482,6 @@ bool capable_wrt_inode_uidgid(const stru
+ {
+ 	struct user_namespace *ns = current_user_ns();
+ 
+-	return ns_capable(ns, cap) && kuid_has_mapping(ns, inode->i_uid) &&
+-		kgid_has_mapping(ns, inode->i_gid);
++	return ns_capable(ns, cap) && privileged_wrt_inode_uidgid(ns, inode);
+ }
+ EXPORT_SYMBOL(capable_wrt_inode_uidgid);
diff --git a/queue-4.9/ext4-add-sanity-checking-to-count_overhead.patch b/queue-4.9/ext4-add-sanity-checking-to-count_overhead.patch
new file mode 100644
index 00000000000..4d50dbce6b0
--- /dev/null
+++ b/queue-4.9/ext4-add-sanity-checking-to-count_overhead.patch
@@ -0,0 +1,42 @@
+From c48ae41bafe31e9a66d8be2ced4e42a6b57fa814 Mon Sep 17 00:00:00 2001
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Fri, 18 Nov 2016 13:37:47 -0500
+Subject: ext4: add sanity checking to count_overhead()
+
+From: Theodore Ts'o <tytso@mit.edu>
+
+commit c48ae41bafe31e9a66d8be2ced4e42a6b57fa814 upstream.
+
+The commit "ext4: sanity check the block and cluster size at mount
+time" should prevent any problems, but in case the superblock is
+modified while the file system is mounted, add an extra safety check
+to make sure we won't overrun the allocated buffer.
+
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/super.c |   11 ++++++++---
+ 1 file changed, 8 insertions(+), 3 deletions(-)
+
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -3193,10 +3193,15 @@ static int count_overhead(struct super_b
+ 			ext4_set_bit(s++, buf);
+ 			count++;
+ 		}
+-		for (j = ext4_bg_num_gdb(sb, grp); j > 0; j--) {
+-			ext4_set_bit(EXT4_B2C(sbi, s++), buf);
+-			count++;
++		j = ext4_bg_num_gdb(sb, grp);
++		if (s + j > EXT4_BLOCKS_PER_GROUP(sb)) {
++			ext4_error(sb, "Invalid number of block group "
++				   "descriptor blocks: %d", j);
++			j = EXT4_BLOCKS_PER_GROUP(sb) - s;
+ 		}
++		count += j;
++		for (; j > 0; j--)
++			ext4_set_bit(EXT4_B2C(sbi, s++), buf);
+ 	}
+ 	if (!count)
+ 		return 0;
diff --git a/queue-4.9/ext4-do-not-perform-data-journaling-when-data-is-encrypted.patch b/queue-4.9/ext4-do-not-perform-data-journaling-when-data-is-encrypted.patch
new file mode 100644
index 00000000000..603c2a2e22b
--- /dev/null
+++ b/queue-4.9/ext4-do-not-perform-data-journaling-when-data-is-encrypted.patch
@@ -0,0 +1,93 @@
+From 73b92a2a5e97d17cc4d5c4fe9d724d3273fb6fd2 Mon Sep 17 00:00:00 2001
+From: Sergey Karamov <skaramov@google.com>
+Date: Sat, 10 Dec 2016 17:54:58 -0500
+Subject: ext4: do not perform data journaling when data is encrypted
+
+From: Sergey Karamov <skaramov@google.com>
+
+commit 73b92a2a5e97d17cc4d5c4fe9d724d3273fb6fd2 upstream.
+
+Currently data journalling is incompatible with encryption: enabling both
+at the same time has never been supported by design, and would result in
+unpredictable behavior. However, users are not precluded from turning on
+both features simultaneously. This change programmatically replaces data
+journaling for encrypted regular files with ordered data journaling mode.
+
+Background:
+Journaling encrypted data has not been supported because it operates on
+buffer heads of the page in the page cache. Namely, when the commit
+happens, which could be up to five seconds after caching, the commit
+thread uses the buffer heads attached to the page to copy the contents of
+the page to the journal. With encryption, it would have been required to
+keep the bounce buffer with ciphertext for up to the aforementioned five
+seconds, since the page cache can only hold plaintext and could not be
+used for journaling. Alternatively, it would be required to setup the
+journal to initiate a callback at the commit time to perform deferred
+encryption - in this case, not only would the data have to be written
+twice, but it would also have to be encrypted twice. This level of
+complexity was not justified for a mode that in practice is very rarely
+used because of the overhead from the data journalling.
+
+Solution:
+If data=journaled has been set as a mount option for a filesystem, or if
+journaling is enabled on a regular file, do not perform journaling if the
+file is also encrypted, instead fall back to the data=ordered mode for the
+file.
+
+Rationale:
+The intent is to allow seamless and proper filesystem operation when
+journaling and encryption have both been enabled, and have these two
+conflicting features gracefully resolved by the filesystem.
+
+Fixes: 4461471107b7
+Signed-off-by: Sergey Karamov <skaramov@google.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/ext4_jbd2.h |   14 ++++++++------
+ fs/ext4/super.c     |    5 +++++
+ 2 files changed, 13 insertions(+), 6 deletions(-)
+
+--- a/fs/ext4/ext4_jbd2.h
++++ b/fs/ext4/ext4_jbd2.h
+@@ -414,17 +414,19 @@ static inline int ext4_inode_journal_mod
+ 		return EXT4_INODE_WRITEBACK_DATA_MODE;	/* writeback */
+ 	/* We do not support data journalling with delayed allocation */
+ 	if (!S_ISREG(inode->i_mode) ||
+-	    test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
+-		return EXT4_INODE_JOURNAL_DATA_MODE;	/* journal data */
+-	if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA) &&
+-	    !test_opt(inode->i_sb, DELALLOC))
++	    test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ||
++	    (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA) &&
++	    !test_opt(inode->i_sb, DELALLOC))) {
++		/* We do not support data journalling for encrypted data */
++		if (S_ISREG(inode->i_mode) && ext4_encrypted_inode(inode))
++			return EXT4_INODE_ORDERED_DATA_MODE;  /* ordered */
+ 		return EXT4_INODE_JOURNAL_DATA_MODE;	/* journal data */
++	}
+ 	if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
+ 		return EXT4_INODE_ORDERED_DATA_MODE;	/* ordered */
+ 	if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
+ 		return EXT4_INODE_WRITEBACK_DATA_MODE;	/* writeback */
+-	else
+-		BUG();
++	BUG();
+ }
+ 
+ static inline int ext4_should_journal_data(struct inode *inode)
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -3516,6 +3516,11 @@ static int ext4_fill_super(struct super_
+ 				 "both data=journal and dax");
+ 			goto failed_mount;
+ 		}
++		if (ext4_has_feature_encrypt(sb)) {
++			ext4_msg(sb, KERN_WARNING,
++				 "encrypted files will use data=ordered "
++				 "instead of data journaling mode");
++		}
+ 		if (test_opt(sb, DELALLOC))
+ 			clear_opt(sb, DELALLOC);
+ 	} else {
diff --git a/queue-4.9/ext4-don-t-lock-buffer-in-ext4_commit_super-if-holding-spinlock.patch b/queue-4.9/ext4-don-t-lock-buffer-in-ext4_commit_super-if-holding-spinlock.patch
new file mode 100644
index 00000000000..861956d825d
--- /dev/null
+++ b/queue-4.9/ext4-don-t-lock-buffer-in-ext4_commit_super-if-holding-spinlock.patch
@@ -0,0 +1,69 @@
+From 1566a48aaa10c6bb29b9a69dd8279f9a4fc41e35 Mon Sep 17 00:00:00 2001
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Sun, 13 Nov 2016 22:02:29 -0500
+Subject: ext4: don't lock buffer in ext4_commit_super if holding spinlock
+
+From: Theodore Ts'o <tytso@mit.edu>
+
+commit 1566a48aaa10c6bb29b9a69dd8279f9a4fc41e35 upstream.
+
+If there is an error reported in mballoc via ext4_grp_locked_error(),
+the code is holding a spinlock, so ext4_commit_super() must not try to
+lock the buffer head, or else it will trigger a BUG:
+
+  BUG: sleeping function called from invalid context at ./include/linux/buffer_head.h:358
+  in_atomic(): 1, irqs_disabled(): 0, pid: 993, name: mount
+  CPU: 0 PID: 993 Comm: mount Not tainted 4.9.0-rc1-clouder1 #62
+  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.1-0-g4adadbd-20150316_085822-nilsson.home.kraxel.org 04/01/2014
+   ffff880006423548 ffffffff81318c89 ffffffff819ecdd0 0000000000000166
+   ffff880006423558 ffffffff810810b0 ffff880006423580 ffffffff81081153
+   ffff880006e5a1a0 ffff88000690e400 0000000000000000 ffff8800064235c0
+  Call Trace:
+    [<ffffffff81318c89>] dump_stack+0x67/0x9e
+    [<ffffffff810810b0>] ___might_sleep+0xf0/0x140
+    [<ffffffff81081153>] __might_sleep+0x53/0xb0
+    [<ffffffff8126c1dc>] ext4_commit_super+0x19c/0x290
+    [<ffffffff8126e61a>] __ext4_grp_locked_error+0x14a/0x230
+    [<ffffffff81081153>] ? __might_sleep+0x53/0xb0
+    [<ffffffff812822be>] ext4_mb_generate_buddy+0x1de/0x320
+
+Since ext4_grp_locked_error() calls ext4_commit_super with sync == 0
+(and it is the only caller which does so), avoid locking and unlocking
+the buffer in this case.
+
+This can result in races with ext4_commit_super() if there are other
+problems (which is what commit 4743f83990614 was trying to address),
+but a Warning is better than BUG.
+
+Fixes: 4743f83990614
+Reported-by: Nikolay Borisov <kernel@kyup.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/super.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -4550,7 +4550,8 @@ static int ext4_commit_super(struct supe
+ 				&EXT4_SB(sb)->s_freeinodes_counter));
+ 	BUFFER_TRACE(sbh, "marking dirty");
+ 	ext4_superblock_csum_set(sb);
+-	lock_buffer(sbh);
++	if (sync)
++		lock_buffer(sbh);
+ 	if (buffer_write_io_error(sbh)) {
+ 		/*
+ 		 * Oh, dear.  A previous attempt to write the
+@@ -4566,8 +4567,8 @@ static int ext4_commit_super(struct supe
+ 		set_buffer_uptodate(sbh);
+ 	}
+ 	mark_buffer_dirty(sbh);
+-	unlock_buffer(sbh);
+ 	if (sync) {
++		unlock_buffer(sbh);
+ 		error = __sync_dirty_buffer(sbh,
+ 			test_opt(sb, BARRIER) ? WRITE_FUA : WRITE_SYNC);
+ 		if (error)
diff --git a/queue-4.9/ext4-fix-in-superblock-mount-options-processing.patch b/queue-4.9/ext4-fix-in-superblock-mount-options-processing.patch
new file mode 100644
index 00000000000..1d09fc8d7eb
--- /dev/null
+++ b/queue-4.9/ext4-fix-in-superblock-mount-options-processing.patch
@@ -0,0 +1,105 @@
+From 5aee0f8a3f42c94c5012f1673420aee96315925a Mon Sep 17 00:00:00 2001
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Fri, 18 Nov 2016 13:24:26 -0500
+Subject: ext4: fix in-superblock mount options processing
+
+From: Theodore Ts'o <tytso@mit.edu>
+
+commit 5aee0f8a3f42c94c5012f1673420aee96315925a upstream.
+
+Fix a large number of problems with how we handle mount options in the
+superblock.  For one, if the string in the superblock is long enough
+that it is not null terminated, we could run off the end of the string
+and try to interpret superblocks fields as characters.  It's unlikely
+this will cause a security problem, but it could result in an invalid
+parse.  Also, parse_options is destructive to the string, so in some
+cases if there is a comma-separated string, it would be modified in
+the superblock.  (Fortunately it only happens on file systems with a
+1k block size.)
+
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/super.c |   38 +++++++++++++++++++++++---------------
+ 1 file changed, 23 insertions(+), 15 deletions(-)
+
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -3301,7 +3301,7 @@ static int ext4_fill_super(struct super_
+ 	char *orig_data = kstrdup(data, GFP_KERNEL);
+ 	struct buffer_head *bh;
+ 	struct ext4_super_block *es = NULL;
+-	struct ext4_sb_info *sbi;
++	struct ext4_sb_info *sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
+ 	ext4_fsblk_t block;
+ 	ext4_fsblk_t sb_block = get_sb_block(&data);
+ 	ext4_fsblk_t logical_sb_block;
+@@ -3320,16 +3320,14 @@ static int ext4_fill_super(struct super_
+ 	unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
+ 	ext4_group_t first_not_zeroed;
+ 
+-	sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
+-	if (!sbi)
+-		goto out_free_orig;
++	if ((data && !orig_data) || !sbi)
++		goto out_free_base;
+ 
+ 	sbi->s_blockgroup_lock =
+ 		kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
+-	if (!sbi->s_blockgroup_lock) {
+-		kfree(sbi);
+-		goto out_free_orig;
+-	}
++	if (!sbi->s_blockgroup_lock)
++		goto out_free_base;
++
+ 	sb->s_fs_info = sbi;
+ 	sbi->s_sb = sb;
+ 	sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
+@@ -3475,11 +3473,19 @@ static int ext4_fill_super(struct super_
+ 	 */
+ 	sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
+ 
+-	if (!parse_options((char *) sbi->s_es->s_mount_opts, sb,
+-			   &journal_devnum, &journal_ioprio, 0)) {
+-		ext4_msg(sb, KERN_WARNING,
+-			 "failed to parse options in superblock: %s",
+-			 sbi->s_es->s_mount_opts);
++	if (sbi->s_es->s_mount_opts[0]) {
++		char *s_mount_opts = kstrndup(sbi->s_es->s_mount_opts,
++					      sizeof(sbi->s_es->s_mount_opts),
++					      GFP_KERNEL);
++		if (!s_mount_opts)
++			goto failed_mount;
++		if (!parse_options(s_mount_opts, sb, &journal_devnum,
++				   &journal_ioprio, 0)) {
++			ext4_msg(sb, KERN_WARNING,
++				 "failed to parse options in superblock: %s",
++				 s_mount_opts);
++		}
++		kfree(s_mount_opts);
+ 	}
+ 	sbi->s_def_mount_opt = sbi->s_mount_opt;
+ 	if (!parse_options((char *) data, sb, &journal_devnum,
+@@ -4157,7 +4163,9 @@ no_journal:
+ 
+ 	if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount"))
+ 		ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
+-			 "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts,
++			 "Opts: %.*s%s%s", descr,
++			 (int) sizeof(sbi->s_es->s_mount_opts),
++			 sbi->s_es->s_mount_opts,
+ 			 *sbi->s_es->s_mount_opts ? "; " : "", orig_data);
+ 
+ 	if (es->s_error_count)
+@@ -4236,8 +4244,8 @@ failed_mount:
+ out_fail:
+ 	sb->s_fs_info = NULL;
+ 	kfree(sbi->s_blockgroup_lock);
++out_free_base:
+ 	kfree(sbi);
+-out_free_orig:
+ 	kfree(orig_data);
+ 	return err ? err : ret;
+ }
diff --git a/queue-4.9/ext4-fix-mballoc-breakage-with-64k-block-size.patch b/queue-4.9/ext4-fix-mballoc-breakage-with-64k-block-size.patch
new file mode 100644
index 00000000000..74c0e3e6ca3
--- /dev/null
+++ b/queue-4.9/ext4-fix-mballoc-breakage-with-64k-block-size.patch
@@ -0,0 +1,35 @@
+From 69e43e8cc971a79dd1ee5d4343d8e63f82725123 Mon Sep 17 00:00:00 2001
+From: Chandan Rajendra <chandan@linux.vnet.ibm.com>
+Date: Mon, 14 Nov 2016 21:04:37 -0500
+Subject: ext4: fix mballoc breakage with 64k block size
+
+From: Chandan Rajendra <chandan@linux.vnet.ibm.com>
+
+commit 69e43e8cc971a79dd1ee5d4343d8e63f82725123 upstream.
+
+'border' variable is set to a value of 2 times the block size of the
+underlying filesystem. With 64k block size, the resulting value won't
+fit into a 16-bit variable. Hence this commit changes the data type of
+'border' to 'unsigned int'.
+
+Fixes: c9de560ded61f
+Signed-off-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Reviewed-by: Andreas Dilger <adilger@dilger.ca>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/mballoc.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -669,7 +669,7 @@ static void ext4_mb_mark_free_simple(str
+ 	ext4_grpblk_t min;
+ 	ext4_grpblk_t max;
+ 	ext4_grpblk_t chunk;
+-	unsigned short border;
++	unsigned int border;
+ 
+ 	BUG_ON(len > EXT4_CLUSTERS_PER_GROUP(sb));
+ 
diff --git a/queue-4.9/ext4-fix-stack-memory-corruption-with-64k-block-size.patch b/queue-4.9/ext4-fix-stack-memory-corruption-with-64k-block-size.patch
new file mode 100644
index 00000000000..1c30ffe046b
--- /dev/null
+++ b/queue-4.9/ext4-fix-stack-memory-corruption-with-64k-block-size.patch
@@ -0,0 +1,36 @@
+From 30a9d7afe70ed6bd9191d3000e2ef1a34fb58493 Mon Sep 17 00:00:00 2001
+From: Chandan Rajendra <chandan@linux.vnet.ibm.com>
+Date: Mon, 14 Nov 2016 21:26:26 -0500
+Subject: ext4: fix stack memory corruption with 64k block size
+
+From: Chandan Rajendra <chandan@linux.vnet.ibm.com>
+
+commit 30a9d7afe70ed6bd9191d3000e2ef1a34fb58493 upstream.
+
+The number of 'counters' elements needed in 'struct sg' is
+super_block->s_blocksize_bits + 2. Presently we have 16 'counters'
+elements in the array. This is insufficient for block sizes >= 32k. In
+such cases the memcpy operation performed in ext4_mb_seq_groups_show()
+would cause stack memory corruption.
+
+Fixes: c9de560ded61f
+Signed-off-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/mballoc.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -2287,7 +2287,7 @@ static int ext4_mb_seq_groups_show(struc
+ 	struct ext4_group_info *grinfo;
+ 	struct sg {
+ 		struct ext4_group_info info;
+-		ext4_grpblk_t counters[16];
++		ext4_grpblk_t counters[EXT4_MAX_BLOCK_LOG_SIZE + 2];
+ 	} sg;
+ 
+ 	group--;
diff --git a/queue-4.9/ext4-reject-inodes-with-negative-size.patch b/queue-4.9/ext4-reject-inodes-with-negative-size.patch
new file mode 100644
index 00000000000..c8bad8e71a8
--- /dev/null
+++ b/queue-4.9/ext4-reject-inodes-with-negative-size.patch
@@ -0,0 +1,45 @@
+From 7e6e1ef48fc02f3ac5d0edecbb0c6087cd758d58 Mon Sep 17 00:00:00 2001
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+Date: Sat, 10 Dec 2016 09:55:01 -0500
+Subject: ext4: reject inodes with negative size
+
+From: Darrick J. Wong <darrick.wong@oracle.com>
+
+commit 7e6e1ef48fc02f3ac5d0edecbb0c6087cd758d58 upstream.
+
+Don't load an inode with a negative size; this causes integer overflow
+problems in the VFS.
+
+[ Added EXT4_ERROR_INODE() to mark file system as corrupted. -TYT]
+
+Fixes: a48380f769df (ext4: rename i_dir_acl to i_size_high)
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/inode.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -4434,6 +4434,7 @@ struct inode *ext4_iget(struct super_blo
+ 	struct inode *inode;
+ 	journal_t *journal = EXT4_SB(sb)->s_journal;
+ 	long ret;
++	loff_t size;
+ 	int block;
+ 	uid_t i_uid;
+ 	gid_t i_gid;
+@@ -4534,6 +4535,11 @@ struct inode *ext4_iget(struct super_blo
+ 		ei->i_file_acl |=
+ 			((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
+ 	inode->i_size = ext4_isize(raw_inode);
++	if ((size = i_size_read(inode)) < 0) {
++		EXT4_ERROR_INODE(inode, "bad i_size value: %lld", size);
++		ret = -EFSCORRUPTED;
++		goto bad_inode;
++	}
+ 	ei->i_disksize = inode->i_size;
+ #ifdef CONFIG_QUOTA
+ 	ei->i_reserved_quota = 0;
diff --git a/queue-4.9/ext4-return-enomem-instead-of-success.patch b/queue-4.9/ext4-return-enomem-instead-of-success.patch
new file mode 100644
index 00000000000..6d8f344b5b9
--- /dev/null
+++ b/queue-4.9/ext4-return-enomem-instead-of-success.patch
@@ -0,0 +1,34 @@
+From 578620f451f836389424833f1454eeeb2ffc9e9f Mon Sep 17 00:00:00 2001
+From: Dan Carpenter <dan.carpenter@oracle.com>
+Date: Sat, 10 Dec 2016 09:56:01 -0500
+Subject: ext4: return -ENOMEM instead of success
+
+From: Dan Carpenter <dan.carpenter@oracle.com>
+
+commit 578620f451f836389424833f1454eeeb2ffc9e9f upstream.
+
+We should set the error code if kzalloc() fails.
+
+Fixes: 67cf5b09a46f ("ext4: add the basic function for inline data support")
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/inline.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/fs/ext4/inline.c
++++ b/fs/ext4/inline.c
+@@ -336,8 +336,10 @@ static int ext4_update_inline_data(handl
+ 
+ 	len -= EXT4_MIN_INLINE_DATA_SIZE;
+ 	value = kzalloc(len, GFP_NOFS);
+-	if (!value)
++	if (!value) {
++		error = -ENOMEM;
+ 		goto out;
++	}
+ 
+ 	error = ext4_xattr_ibody_get(inode, i.name_index, i.name,
+ 				     value, len);
diff --git a/queue-4.9/ext4-use-more-strict-checks-for-inodes_per_block-on-mount.patch b/queue-4.9/ext4-use-more-strict-checks-for-inodes_per_block-on-mount.patch
new file mode 100644
index 00000000000..dbde9a45a27
--- /dev/null
+++ b/queue-4.9/ext4-use-more-strict-checks-for-inodes_per_block-on-mount.patch
@@ -0,0 +1,55 @@
+From cd6bb35bf7f6d7d922509bf50265383a0ceabe96 Mon Sep 17 00:00:00 2001
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Fri, 18 Nov 2016 13:28:30 -0500
+Subject: ext4: use more strict checks for inodes_per_block on mount
+
+From: Theodore Ts'o <tytso@mit.edu>
+
+commit cd6bb35bf7f6d7d922509bf50265383a0ceabe96 upstream.
+
+Centralize the checks for inodes_per_block and be more strict to make
+sure the inodes_per_block_group can't end up being zero.
+
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Reviewed-by: Andreas Dilger <adilger@dilger.ca>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/super.c |   15 ++++++---------
+ 1 file changed, 6 insertions(+), 9 deletions(-)
+
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -3660,12 +3660,16 @@ static int ext4_fill_super(struct super_
+ 
+ 	sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
+ 	sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
+-	if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0)
+-		goto cantfind_ext4;
+ 
+ 	sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb);
+ 	if (sbi->s_inodes_per_block == 0)
+ 		goto cantfind_ext4;
++	if (sbi->s_inodes_per_group < sbi->s_inodes_per_block ||
++	    sbi->s_inodes_per_group > blocksize * 8) {
++		ext4_msg(sb, KERN_ERR, "invalid inodes per group: %lu\n",
++			 sbi->s_blocks_per_group);
++		goto failed_mount;
++	}
+ 	sbi->s_itb_per_group = sbi->s_inodes_per_group /
+ 					sbi->s_inodes_per_block;
+ 	sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb);
+@@ -3748,13 +3752,6 @@ static int ext4_fill_super(struct super_
+ 	}
+ 	sbi->s_cluster_ratio = clustersize / blocksize;
+ 
+-	if (sbi->s_inodes_per_group > blocksize * 8) {
+-		ext4_msg(sb, KERN_ERR,
+-		       "#inodes per group too big: %lu",
+-		       sbi->s_inodes_per_group);
+-		goto failed_mount;
+-	}
+-
+ 	/* Do we have standard group size of clustersize * 8 blocks ? */
+ 	if (sbi->s_blocks_per_group == clustersize << 3)
+ 		set_opt2(sb, STD_GROUP_SIZE);
diff --git a/queue-4.9/f2fs-fix-overflow-due-to-condition-check-order.patch b/queue-4.9/f2fs-fix-overflow-due-to-condition-check-order.patch
new file mode 100644
index 00000000000..9f6a4466adc
--- /dev/null
+++ b/queue-4.9/f2fs-fix-overflow-due-to-condition-check-order.patch
@@ -0,0 +1,32 @@
+From e87f7329bbd6760c2acc4f1eb423362b08851a71 Mon Sep 17 00:00:00 2001
+From: Jaegeuk Kim <jaegeuk@kernel.org>
+Date: Wed, 23 Nov 2016 10:51:17 -0800
+Subject: f2fs: fix overflow due to condition check order
+
+From: Jaegeuk Kim <jaegeuk@kernel.org>
+
+commit e87f7329bbd6760c2acc4f1eb423362b08851a71 upstream.
+
+In the last ilen case, i was already increased, resulting in accessing out-
+of-boundary entry of do_replace and blkaddr.
+Fix to check ilen first to exit the loop.
+
+Fixes: 2aa8fbb9693020 ("f2fs: refactor __exchange_data_block for speed up")
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/f2fs/file.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/f2fs/file.c
++++ b/fs/f2fs/file.c
+@@ -967,7 +967,7 @@ static int __clone_blkaddrs(struct inode
+ 				new_size = (dst + i) << PAGE_SHIFT;
+ 				if (dst_inode->i_size < new_size)
+ 					f2fs_i_size_write(dst_inode, new_size);
+-			} while ((do_replace[i] || blkaddr[i] == NULL_ADDR) && --ilen);
++			} while (--ilen && (do_replace[i] || blkaddr[i] == NULL_ADDR));
+ 
+ 			f2fs_put_dnode(&dn);
+ 		} else {
diff --git a/queue-4.9/f2fs-fix-to-determine-start_cp_addr-by-sbi-cur_cp_pack.patch b/queue-4.9/f2fs-fix-to-determine-start_cp_addr-by-sbi-cur_cp_pack.patch
new file mode 100644
index 00000000000..d0dcdead763
--- /dev/null
+++ b/queue-4.9/f2fs-fix-to-determine-start_cp_addr-by-sbi-cur_cp_pack.patch
@@ -0,0 +1,99 @@
+From 8508e44ae98622f841f5ef29d0bf3d5db4e0c1cc Mon Sep 17 00:00:00 2001
+From: Jaegeuk Kim <jaegeuk@kernel.org>
+Date: Thu, 24 Nov 2016 12:45:15 -0800
+Subject: f2fs: fix to determine start_cp_addr by sbi->cur_cp_pack
+
+From: Jaegeuk Kim <jaegeuk@kernel.org>
+
+commit 8508e44ae98622f841f5ef29d0bf3d5db4e0c1cc upstream.
+
+We don't guarantee cp_addr is fixed by cp_version.
+This is to sync with f2fs-tools.
+
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/f2fs/checkpoint.c |    8 +++++++-
+ fs/f2fs/f2fs.h       |   26 ++++++++++++++++----------
+ 2 files changed, 23 insertions(+), 11 deletions(-)
+
+--- a/fs/f2fs/checkpoint.c
++++ b/fs/f2fs/checkpoint.c
+@@ -772,6 +772,11 @@ int get_valid_checkpoint(struct f2fs_sb_
+ 	if (sanity_check_ckpt(sbi))
+ 		goto fail_no_cp;
+ 
++	if (cur_page == cp1)
++		sbi->cur_cp_pack = 1;
++	else
++		sbi->cur_cp_pack = 2;
++
+ 	if (cp_blks <= 1)
+ 		goto done;
+ 
+@@ -1123,7 +1128,7 @@ static int do_checkpoint(struct f2fs_sb_
+ 				le32_to_cpu(ckpt->checksum_offset)))
+ 				= cpu_to_le32(crc32);
+ 
+-	start_blk = __start_cp_addr(sbi);
++	start_blk = __start_cp_next_addr(sbi);
+ 
+ 	/* need to wait for end_io results */
+ 	wait_on_all_pages_writeback(sbi);
+@@ -1187,6 +1192,7 @@ static int do_checkpoint(struct f2fs_sb_
+ 	clear_prefree_segments(sbi, cpc);
+ 	clear_sbi_flag(sbi, SBI_IS_DIRTY);
+ 	clear_sbi_flag(sbi, SBI_NEED_CP);
++	__set_cp_next_pack(sbi);
+ 
+ 	/*
+ 	 * redirty superblock if metadata like node page or inode cache is
+--- a/fs/f2fs/f2fs.h
++++ b/fs/f2fs/f2fs.h
+@@ -764,6 +764,7 @@ struct f2fs_sb_info {
+ 
+ 	/* for checkpoint */
+ 	struct f2fs_checkpoint *ckpt;		/* raw checkpoint pointer */
++	int cur_cp_pack;			/* remain current cp pack */
+ 	spinlock_t cp_lock;			/* for flag in ckpt */
+ 	struct inode *meta_inode;		/* cache meta blocks */
+ 	struct mutex cp_mutex;			/* checkpoint procedure lock */
+@@ -1329,22 +1330,27 @@ static inline void *__bitmap_ptr(struct
+ 
+ static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi)
+ {
+-	block_t start_addr;
+-	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+-	unsigned long long ckpt_version = cur_cp_version(ckpt);
+-
+-	start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr);
++	block_t start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr);
+ 
+-	/*
+-	 * odd numbered checkpoint should at cp segment 0
+-	 * and even segment must be at cp segment 1
+-	 */
+-	if (!(ckpt_version & 1))
++	if (sbi->cur_cp_pack == 2)
+ 		start_addr += sbi->blocks_per_seg;
++	return start_addr;
++}
+ 
++static inline block_t __start_cp_next_addr(struct f2fs_sb_info *sbi)
++{
++	block_t start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr);
++
++	if (sbi->cur_cp_pack == 1)
++		start_addr += sbi->blocks_per_seg;
+ 	return start_addr;
+ }
+ 
++static inline void __set_cp_next_pack(struct f2fs_sb_info *sbi)
++{
++	sbi->cur_cp_pack = (sbi->cur_cp_pack == 1) ? 2 : 1;
++}
++
+ static inline block_t __start_sum_addr(struct f2fs_sb_info *sbi)
+ {
+ 	return le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_start_sum);
diff --git a/queue-4.9/f2fs-set-owner-for-debugfs-status-file-s-file_operations.patch b/queue-4.9/f2fs-set-owner-for-debugfs-status-file-s-file_operations.patch
new file mode 100644
index 00000000000..f18a10f3976
--- /dev/null
+++ b/queue-4.9/f2fs-set-owner-for-debugfs-status-file-s-file_operations.patch
@@ -0,0 +1,63 @@
+From 05e6ea2685c964db1e675a24a4f4e2adc22d2388 Mon Sep 17 00:00:00 2001
+From: Nicolai Stange <nicstange@gmail.com>
+Date: Sun, 20 Nov 2016 19:57:23 +0100
+Subject: f2fs: set ->owner for debugfs status file's file_operations
+
+From: Nicolai Stange <nicstange@gmail.com>
+
+commit 05e6ea2685c964db1e675a24a4f4e2adc22d2388 upstream.
+
+The struct file_operations instance serving the f2fs/status debugfs file
+lacks an initialization of its ->owner.
+
+This means that although that file might have been opened, the f2fs module
+can still get removed. Any further operation on that opened file, releasing
+included,  will cause accesses to unmapped memory.
+
+Indeed, Mike Marshall reported the following:
+
+  BUG: unable to handle kernel paging request at ffffffffa0307430
+  IP: [<ffffffff8132a224>] full_proxy_release+0x24/0x90
+  <...>
+  Call Trace:
+   [] __fput+0xdf/0x1d0
+   [] ____fput+0xe/0x10
+   [] task_work_run+0x8e/0xc0
+   [] do_exit+0x2ae/0xae0
+   [] ? __audit_syscall_entry+0xae/0x100
+   [] ? syscall_trace_enter+0x1ca/0x310
+   [] do_group_exit+0x44/0xc0
+   [] SyS_exit_group+0x14/0x20
+   [] do_syscall_64+0x61/0x150
+   [] entry_SYSCALL64_slow_path+0x25/0x25
+  <...>
+  ---[ end trace f22ae883fa3ea6b8 ]---
+  Fixing recursive fault but reboot is needed!
+
+Fix this by initializing the f2fs/status file_operations' ->owner with
+THIS_MODULE.
+
+This will allow debugfs to grab a reference to the f2fs module upon any
+open on that file, thus preventing it from getting removed.
+
+Fixes: 902829aa0b72 ("f2fs: move proc files to debugfs")
+Reported-by: Mike Marshall <hubcap@omnibond.com>
+Reported-by: Martin Brandenburg <martin@omnibond.com>
+Signed-off-by: Nicolai Stange <nicstange@gmail.com>
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/f2fs/debug.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/f2fs/debug.c
++++ b/fs/f2fs/debug.c
+@@ -373,6 +373,7 @@ static int stat_open(struct inode *inode
+ }
+ 
+ static const struct file_operations stat_fops = {
++	.owner = THIS_MODULE,
+ 	.open = stat_open,
+ 	.read = seq_read,
+ 	.llseek = seq_lseek,
diff --git a/queue-4.9/fs-exec-apply-cloexec-before-changing-dumpable-task-flags.patch b/queue-4.9/fs-exec-apply-cloexec-before-changing-dumpable-task-flags.patch
new file mode 100644
index 00000000000..00c7af4b324
--- /dev/null
+++ b/queue-4.9/fs-exec-apply-cloexec-before-changing-dumpable-task-flags.patch
@@ -0,0 +1,79 @@
+From 613cc2b6f272c1a8ad33aefa21cad77af23139f7 Mon Sep 17 00:00:00 2001
+From: Aleksa Sarai <asarai@suse.de>
+Date: Wed, 21 Dec 2016 16:26:24 +1100
+Subject: fs: exec: apply CLOEXEC before changing dumpable task flags
+
+From: Aleksa Sarai <asarai@suse.de>
+
+commit 613cc2b6f272c1a8ad33aefa21cad77af23139f7 upstream.
+
+If you have a process that has set itself to be non-dumpable, and it
+then undergoes exec(2), any CLOEXEC file descriptors it has open are
+"exposed" during a race window between the dumpable flags of the process
+being reset for exec(2) and CLOEXEC being applied to the file
+descriptors. This can be exploited by a process by attempting to access
+/proc/<pid>/fd/... during this window, without requiring CAP_SYS_PTRACE.
+
+The race in question is after set_dumpable has been (for get_link,
+though the trace is basically the same for readlink):
+
+[vfs]
+-> proc_pid_link_inode_operations.get_link
+   -> proc_pid_get_link
+      -> proc_fd_access_allowed
+         -> ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
+
+Which will return 0, during the race window and CLOEXEC file descriptors
+will still be open during this window because do_close_on_exec has not
+been called yet. As a result, the ordering of these calls should be
+reversed to avoid this race window.
+
+This is of particular concern to container runtimes, where joining a
+PID namespace with file descriptors referring to the host filesystem
+can result in security issues (since PRCTL_SET_DUMPABLE doesn't protect
+against access of CLOEXEC file descriptors -- file descriptors which may
+reference filesystem objects the container shouldn't have access to).
+
+Cc: dev@opencontainers.org
+Reported-by: Michael Crosby <crosbymichael@gmail.com>
+Signed-off-by: Aleksa Sarai <asarai@suse.de>
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/exec.c |   10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+--- a/fs/exec.c
++++ b/fs/exec.c
+@@ -19,7 +19,7 @@
+  * current->executable is only used by the procfs.  This allows a dispatch
+  * table to check for several different types  of binary formats.  We keep
+  * trying until we recognize the file or we run out of supported binary
+- * formats. 
++ * formats.
+  */
+ 
+ #include <linux/slab.h>
+@@ -1266,6 +1266,13 @@ int flush_old_exec(struct linux_binprm *
+ 	flush_thread();
+ 	current->personality &= ~bprm->per_clear;
+ 
++	/*
++	 * We have to apply CLOEXEC before we change whether the process is
++	 * dumpable (in setup_new_exec) to avoid a race with a process in userspace
++	 * trying to access the should-be-closed file descriptors of a process
++	 * undergoing exec(2).
++	 */
++	do_close_on_exec(current->files);
+ 	return 0;
+ 
+ out:
+@@ -1328,7 +1335,6 @@ void setup_new_exec(struct linux_binprm
+ 	   group */
+ 	current->self_exec_id++;
+ 	flush_signal_handlers(current, 0);
+-	do_close_on_exec(current->files);
+ }
+ EXPORT_SYMBOL(setup_new_exec);
+ 
diff --git a/queue-4.9/loop-return-proper-error-from-loop_queue_rq.patch b/queue-4.9/loop-return-proper-error-from-loop_queue_rq.patch
new file mode 100644
index 00000000000..7274d5b026c
--- /dev/null
+++ b/queue-4.9/loop-return-proper-error-from-loop_queue_rq.patch
@@ -0,0 +1,32 @@
+From b4a567e8114327518c09f5632339a5954ab975a3 Mon Sep 17 00:00:00 2001
+From: Omar Sandoval <osandov@fb.com>
+Date: Mon, 14 Nov 2016 14:56:17 -0800
+Subject: loop: return proper error from loop_queue_rq()
+
+From: Omar Sandoval <osandov@fb.com>
+
+commit b4a567e8114327518c09f5632339a5954ab975a3 upstream.
+
+->queue_rq() should return one of the BLK_MQ_RQ_QUEUE_* constants, not
+an errno.
+
+Fixes: f4aa4c7bbac6 ("block: loop: convert to per-device workqueue")
+Signed-off-by: Omar Sandoval <osandov@fb.com>
+Signed-off-by: Jens Axboe <axboe@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/block/loop.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/block/loop.c
++++ b/drivers/block/loop.c
+@@ -1646,7 +1646,7 @@ static int loop_queue_rq(struct blk_mq_h
+ 	blk_mq_start_request(bd->rq);
+ 
+ 	if (lo->lo_state != Lo_bound)
+-		return -EIO;
++		return BLK_MQ_RQ_QUEUE_ERROR;
+ 
+ 	switch (req_op(cmd->rq)) {
+ 	case REQ_OP_FLUSH:
diff --git a/queue-4.9/mm-add-a-user_ns-owner-to-mm_struct-and-fix-ptrace-permission-checks.patch b/queue-4.9/mm-add-a-user_ns-owner-to-mm_struct-and-fix-ptrace-permission-checks.patch
new file mode 100644
index 00000000000..352bd80d4a5
--- /dev/null
+++ b/queue-4.9/mm-add-a-user_ns-owner-to-mm_struct-and-fix-ptrace-permission-checks.patch
@@ -0,0 +1,181 @@
+From bfedb589252c01fa505ac9f6f2a3d5d68d707ef4 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Thu, 13 Oct 2016 21:23:16 -0500
+Subject: mm: Add a user_ns owner to mm_struct and fix ptrace permission checks
+
+From: Eric W. Biederman <ebiederm@xmission.com>
+
+commit bfedb589252c01fa505ac9f6f2a3d5d68d707ef4 upstream.
+
+During exec dumpable is cleared if the file that is being executed is
+not readable by the user executing the file.  A bug in
+ptrace_may_access allows reading the file if the executable happens to
+enter into a subordinate user namespace (aka clone(CLONE_NEWUSER),
+unshare(CLONE_NEWUSER), or setns(fd, CLONE_NEWUSER).
+
+This problem is fixed with only necessary userspace breakage by adding
+a user namespace owner to mm_struct, captured at the time of exec, so
+it is clear in which user namespace CAP_SYS_PTRACE must be present in
+to be able to safely give read permission to the executable.
+
+The function ptrace_may_access is modified to verify that the ptracer
+has CAP_SYS_ADMIN in task->mm->user_ns instead of task->cred->user_ns.
+This ensures that if the task changes it's cred into a subordinate
+user namespace it does not become ptraceable.
+
+The function ptrace_attach is modified to only set PT_PTRACE_CAP when
+CAP_SYS_PTRACE is held over task->mm->user_ns.  The intent of
+PT_PTRACE_CAP is to be a flag to note that whatever permission changes
+the task might go through the tracer has sufficient permissions for
+it not to be an issue.  task->cred->user_ns is always the same
+as or descendent of mm->user_ns.  Which guarantees that having
+CAP_SYS_PTRACE over mm->user_ns is the worst case for the tasks
+credentials.
+
+To prevent regressions mm->dumpable and mm->user_ns are not considered
+when a task has no mm.  As simply failing ptrace_may_attach causes
+regressions in privileged applications attempting to read things
+such as /proc/<pid>/stat
+
+Acked-by: Kees Cook <keescook@chromium.org>
+Tested-by: Cyrill Gorcunov <gorcunov@openvz.org>
+Fixes: 8409cca70561 ("userns: allow ptrace from non-init user namespaces")
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/mm_types.h |    1 +
+ kernel/fork.c            |    9 ++++++---
+ kernel/ptrace.c          |   26 +++++++++++---------------
+ mm/init-mm.c             |    2 ++
+ 4 files changed, 20 insertions(+), 18 deletions(-)
+
+--- a/include/linux/mm_types.h
++++ b/include/linux/mm_types.h
+@@ -473,6 +473,7 @@ struct mm_struct {
+ 	 */
+ 	struct task_struct __rcu *owner;
+ #endif
++	struct user_namespace *user_ns;
+ 
+ 	/* store ref to file /proc/<pid>/exe symlink points to */
+ 	struct file __rcu *exe_file;
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -745,7 +745,8 @@ static void mm_init_owner(struct mm_stru
+ #endif
+ }
+ 
+-static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
++static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
++	struct user_namespace *user_ns)
+ {
+ 	mm->mmap = NULL;
+ 	mm->mm_rb = RB_ROOT;
+@@ -785,6 +786,7 @@ static struct mm_struct *mm_init(struct
+ 	if (init_new_context(p, mm))
+ 		goto fail_nocontext;
+ 
++	mm->user_ns = get_user_ns(user_ns);
+ 	return mm;
+ 
+ fail_nocontext:
+@@ -830,7 +832,7 @@ struct mm_struct *mm_alloc(void)
+ 		return NULL;
+ 
+ 	memset(mm, 0, sizeof(*mm));
+-	return mm_init(mm, current);
++	return mm_init(mm, current, current_user_ns());
+ }
+ 
+ /*
+@@ -845,6 +847,7 @@ void __mmdrop(struct mm_struct *mm)
+ 	destroy_context(mm);
+ 	mmu_notifier_mm_destroy(mm);
+ 	check_mm(mm);
++	put_user_ns(mm->user_ns);
+ 	free_mm(mm);
+ }
+ EXPORT_SYMBOL_GPL(__mmdrop);
+@@ -1126,7 +1129,7 @@ static struct mm_struct *dup_mm(struct t
+ 
+ 	memcpy(mm, oldmm, sizeof(*mm));
+ 
+-	if (!mm_init(mm, tsk))
++	if (!mm_init(mm, tsk, mm->user_ns))
+ 		goto fail_nomem;
+ 
+ 	err = dup_mmap(mm, oldmm);
+--- a/kernel/ptrace.c
++++ b/kernel/ptrace.c
+@@ -220,7 +220,7 @@ static int ptrace_has_cap(struct user_na
+ static int __ptrace_may_access(struct task_struct *task, unsigned int mode)
+ {
+ 	const struct cred *cred = current_cred(), *tcred;
+-	int dumpable = 0;
++	struct mm_struct *mm;
+ 	kuid_t caller_uid;
+ 	kgid_t caller_gid;
+ 
+@@ -271,16 +271,11 @@ static int __ptrace_may_access(struct ta
+ 	return -EPERM;
+ ok:
+ 	rcu_read_unlock();
+-	smp_rmb();
+-	if (task->mm)
+-		dumpable = get_dumpable(task->mm);
+-	rcu_read_lock();
+-	if (dumpable != SUID_DUMP_USER &&
+-	    !ptrace_has_cap(__task_cred(task)->user_ns, mode)) {
+-		rcu_read_unlock();
+-		return -EPERM;
+-	}
+-	rcu_read_unlock();
++	mm = task->mm;
++	if (mm &&
++	    ((get_dumpable(mm) != SUID_DUMP_USER) &&
++	     !ptrace_has_cap(mm->user_ns, mode)))
++	    return -EPERM;
+ 
+ 	return security_ptrace_access_check(task, mode);
+ }
+@@ -331,6 +326,11 @@ static int ptrace_attach(struct task_str
+ 
+ 	task_lock(task);
+ 	retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH_REALCREDS);
++	if (!retval) {
++		struct mm_struct *mm = task->mm;
++		if (mm && ns_capable(mm->user_ns, CAP_SYS_PTRACE))
++			flags |= PT_PTRACE_CAP;
++	}
+ 	task_unlock(task);
+ 	if (retval)
+ 		goto unlock_creds;
+@@ -344,10 +344,6 @@ static int ptrace_attach(struct task_str
+ 
+ 	if (seize)
+ 		flags |= PT_SEIZED;
+-	rcu_read_lock();
+-	if (ns_capable(__task_cred(task)->user_ns, CAP_SYS_PTRACE))
+-		flags |= PT_PTRACE_CAP;
+-	rcu_read_unlock();
+ 	task->ptrace = flags;
+ 
+ 	__ptrace_link(task, current);
+--- a/mm/init-mm.c
++++ b/mm/init-mm.c
+@@ -6,6 +6,7 @@
+ #include <linux/cpumask.h>
+ 
+ #include <linux/atomic.h>
++#include <linux/user_namespace.h>
+ #include <asm/pgtable.h>
+ #include <asm/mmu.h>
+ 
+@@ -21,5 +22,6 @@ struct mm_struct init_mm = {
+ 	.mmap_sem	= __RWSEM_INITIALIZER(init_mm.mmap_sem),
+ 	.page_table_lock =  __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
+ 	.mmlist		= LIST_HEAD_INIT(init_mm.mmlist),
++	.user_ns	= &init_user_ns,
+ 	INIT_MM_CONTEXT(init_mm)
+ };
diff --git a/queue-4.9/mm-page_alloc-keep-pcp-count-and-list-contents-in-sync-if-struct-page-is-corrupted.patch b/queue-4.9/mm-page_alloc-keep-pcp-count-and-list-contents-in-sync-if-struct-page-is-corrupted.patch
new file mode 100644
index 00000000000..6fa4466fc2a
--- /dev/null
+++ b/queue-4.9/mm-page_alloc-keep-pcp-count-and-list-contents-in-sync-if-struct-page-is-corrupted.patch
@@ -0,0 +1,82 @@
+From a6de734bc002fe2027ccc074fbbd87d72957b7a4 Mon Sep 17 00:00:00 2001
+From: Mel Gorman <mgorman@techsingularity.net>
+Date: Mon, 12 Dec 2016 16:44:41 -0800
+Subject: mm, page_alloc: keep pcp count and list contents in sync if struct page is corrupted
+
+From: Mel Gorman <mgorman@techsingularity.net>
+
+commit a6de734bc002fe2027ccc074fbbd87d72957b7a4 upstream.
+
+Vlastimil Babka pointed out that commit 479f854a207c ("mm, page_alloc:
+defer debugging checks of pages allocated from the PCP") will allow the
+per-cpu list counter to be out of sync with the per-cpu list contents if
+a struct page is corrupted.
+
+The consequence is an infinite loop if the per-cpu lists get fully
+drained by free_pcppages_bulk because all the lists are empty but the
+count is positive.  The infinite loop occurs here
+
+                do {
+                        batch_free++;
+                        if (++migratetype == MIGRATE_PCPTYPES)
+                                migratetype = 0;
+                        list = &pcp->lists[migratetype];
+                } while (list_empty(list));
+
+What the user sees is a bad page warning followed by a soft lockup with
+interrupts disabled in free_pcppages_bulk().
+
+This patch keeps the accounting in sync.
+
+Fixes: 479f854a207c ("mm, page_alloc: defer debugging checks of pages allocated from the PCP")
+Link: http://lkml.kernel.org/r/20161202112951.23346-2-mgorman@techsingularity.net
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com>
+Cc: Christoph Lameter <cl@linux.com>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Jesper Dangaard Brouer <brouer@redhat.com>
+Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/page_alloc.c |   12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -2192,7 +2192,7 @@ static int rmqueue_bulk(struct zone *zon
+ 			unsigned long count, struct list_head *list,
+ 			int migratetype, bool cold)
+ {
+-	int i;
++	int i, alloced = 0;
+ 
+ 	spin_lock(&zone->lock);
+ 	for (i = 0; i < count; ++i) {
+@@ -2217,13 +2217,21 @@ static int rmqueue_bulk(struct zone *zon
+ 		else
+ 			list_add_tail(&page->lru, list);
+ 		list = &page->lru;
++		alloced++;
+ 		if (is_migrate_cma(get_pcppage_migratetype(page)))
+ 			__mod_zone_page_state(zone, NR_FREE_CMA_PAGES,
+ 					      -(1 << order));
+ 	}
++
++	/*
++	 * i pages were removed from the buddy list even if some leak due
++	 * to check_pcp_refill failing so adjust NR_FREE_PAGES based
++	 * on i. Do not confuse with 'alloced' which is the number of
++	 * pages added to the pcp list.
++	 */
+ 	__mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order));
+ 	spin_unlock(&zone->lock);
+-	return i;
++	return alloced;
+ }
+ 
+ #ifdef CONFIG_NUMA
diff --git a/queue-4.9/mm-vmscan.c-set-correct-defer-count-for-shrinker.patch b/queue-4.9/mm-vmscan.c-set-correct-defer-count-for-shrinker.patch
new file mode 100644
index 00000000000..021fcecf4c7
--- /dev/null
+++ b/queue-4.9/mm-vmscan.c-set-correct-defer-count-for-shrinker.patch
@@ -0,0 +1,85 @@
+From 5f33a0803bbd781de916f5c7448cbbbbc763d911 Mon Sep 17 00:00:00 2001
+From: Shaohua Li <shli@fb.com>
+Date: Mon, 12 Dec 2016 16:41:50 -0800
+Subject: mm/vmscan.c: set correct defer count for shrinker
+
+From: Shaohua Li <shli@fb.com>
+
+commit 5f33a0803bbd781de916f5c7448cbbbbc763d911 upstream.
+
+Our system uses significantly more slab memory with memcg enabled with
+the latest kernel.  With 3.10 kernel, slab uses 2G memory, while with
+4.6 kernel, 6G memory is used.  The shrinker has problem.  Let's see we
+have two memcg for one shrinker.  In do_shrink_slab:
+
+1. Check cg1.  nr_deferred = 0, assume total_scan = 700.  batch size
+   is 1024, then no memory is freed.  nr_deferred = 700
+
+2. Check cg2.  nr_deferred = 700.  Assume freeable = 20, then
+   total_scan = 10 or 40.  Let's assume it's 10.  No memory is freed.
+   nr_deferred = 10.
+
+The deferred share of cg1 is lost in this case.  kswapd will free no
+memory even run above steps again and again.
+
+The fix makes sure one memcg's deferred share isn't lost.
+
+Link: http://lkml.kernel.org/r/2414be961b5d25892060315fbb56bb19d81d0c07.1476227351.git.shli@fb.com
+Signed-off-by: Shaohua Li <shli@fb.com>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Vladimir Davydov <vdavydov@parallels.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/vmscan.c |   14 +++++++++++---
+ 1 file changed, 11 insertions(+), 3 deletions(-)
+
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -291,6 +291,7 @@ static unsigned long do_shrink_slab(stru
+ 	int nid = shrinkctl->nid;
+ 	long batch_size = shrinker->batch ? shrinker->batch
+ 					  : SHRINK_BATCH;
++	long scanned = 0, next_deferred;
+ 
+ 	freeable = shrinker->count_objects(shrinker, shrinkctl);
+ 	if (freeable == 0)
+@@ -312,7 +313,9 @@ static unsigned long do_shrink_slab(stru
+ 		pr_err("shrink_slab: %pF negative objects to delete nr=%ld\n",
+ 		       shrinker->scan_objects, total_scan);
+ 		total_scan = freeable;
+-	}
++		next_deferred = nr;
++	} else
++		next_deferred = total_scan;
+ 
+ 	/*
+ 	 * We need to avoid excessive windup on filesystem shrinkers
+@@ -369,17 +372,22 @@ static unsigned long do_shrink_slab(stru
+ 
+ 		count_vm_events(SLABS_SCANNED, nr_to_scan);
+ 		total_scan -= nr_to_scan;
++		scanned += nr_to_scan;
+ 
+ 		cond_resched();
+ 	}
+ 
++	if (next_deferred >= scanned)
++		next_deferred -= scanned;
++	else
++		next_deferred = 0;
+ 	/*
+ 	 * move the unused scan count back into the shrinker in a
+ 	 * manner that handles concurrent updates. If we exhausted the
+ 	 * scan, there is no need to do an update.
+ 	 */
+-	if (total_scan > 0)
+-		new_nr = atomic_long_add_return(total_scan,
++	if (next_deferred > 0)
++		new_nr = atomic_long_add_return(next_deferred,
+ 						&shrinker->nr_deferred[nid]);
+ 	else
+ 		new_nr = atomic_long_read(&shrinker->nr_deferred[nid]);
diff --git a/queue-4.9/nvmet-fix-possible-infinite-loop-triggered-on-hot-namespace-removal.patch b/queue-4.9/nvmet-fix-possible-infinite-loop-triggered-on-hot-namespace-removal.patch
new file mode 100644
index 00000000000..2fd5c3ab7fd
--- /dev/null
+++ b/queue-4.9/nvmet-fix-possible-infinite-loop-triggered-on-hot-namespace-removal.patch
@@ -0,0 +1,129 @@
+From e4fcf07cca6a3b6c4be00df16f08be894325eaa3 Mon Sep 17 00:00:00 2001
+From: Solganik Alexander <sashas@lightbitslabs.com>
+Date: Sun, 30 Oct 2016 10:35:15 +0200
+Subject: nvmet: Fix possible infinite loop triggered on hot namespace removal
+
+From: Solganik Alexander <sashas@lightbitslabs.com>
+
+commit e4fcf07cca6a3b6c4be00df16f08be894325eaa3 upstream.
+
+When removing a namespace we delete it from the subsystem namespaces
+list with list_del_init which allows us to know if it is enabled or
+not.
+
+The problem is that list_del_init initialize the list next and does
+not respect the RCU list-traversal we do on the IO path for locating
+a namespace. Instead we need to use list_del_rcu which is allowed to
+run concurrently with the _rcu list-traversal primitives (keeps list
+next intact) and guarantees concurrent nvmet_find_naespace forward
+progress.
+
+By changing that, we cannot rely on ns->dev_link for knowing if the
+namspace is enabled, so add enabled indicator entry to nvmet_ns for
+that.
+
+Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
+Signed-off-by: Solganik Alexander <sashas@lightbitslabs.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/nvme/target/configfs.c |    6 +++---
+ drivers/nvme/target/core.c     |   14 ++++++++------
+ drivers/nvme/target/nvmet.h    |    6 +-----
+ 3 files changed, 12 insertions(+), 14 deletions(-)
+
+--- a/drivers/nvme/target/configfs.c
++++ b/drivers/nvme/target/configfs.c
+@@ -271,7 +271,7 @@ static ssize_t nvmet_ns_device_path_stor
+ 
+ 	mutex_lock(&subsys->lock);
+ 	ret = -EBUSY;
+-	if (nvmet_ns_enabled(ns))
++	if (ns->enabled)
+ 		goto out_unlock;
+ 
+ 	kfree(ns->device_path);
+@@ -307,7 +307,7 @@ static ssize_t nvmet_ns_device_nguid_sto
+ 	int ret = 0;
+ 
+ 	mutex_lock(&subsys->lock);
+-	if (nvmet_ns_enabled(ns)) {
++	if (ns->enabled) {
+ 		ret = -EBUSY;
+ 		goto out_unlock;
+ 	}
+@@ -339,7 +339,7 @@ CONFIGFS_ATTR(nvmet_ns_, device_nguid);
+ 
+ static ssize_t nvmet_ns_enable_show(struct config_item *item, char *page)
+ {
+-	return sprintf(page, "%d\n", nvmet_ns_enabled(to_nvmet_ns(item)));
++	return sprintf(page, "%d\n", to_nvmet_ns(item)->enabled);
+ }
+ 
+ static ssize_t nvmet_ns_enable_store(struct config_item *item,
+--- a/drivers/nvme/target/core.c
++++ b/drivers/nvme/target/core.c
+@@ -264,7 +264,7 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
+ 	int ret = 0;
+ 
+ 	mutex_lock(&subsys->lock);
+-	if (!list_empty(&ns->dev_link))
++	if (ns->enabled)
+ 		goto out_unlock;
+ 
+ 	ns->bdev = blkdev_get_by_path(ns->device_path, FMODE_READ | FMODE_WRITE,
+@@ -309,6 +309,7 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
+ 	list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
+ 		nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 0, 0);
+ 
++	ns->enabled = true;
+ 	ret = 0;
+ out_unlock:
+ 	mutex_unlock(&subsys->lock);
+@@ -325,11 +326,11 @@ void nvmet_ns_disable(struct nvmet_ns *n
+ 	struct nvmet_ctrl *ctrl;
+ 
+ 	mutex_lock(&subsys->lock);
+-	if (list_empty(&ns->dev_link)) {
+-		mutex_unlock(&subsys->lock);
+-		return;
+-	}
+-	list_del_init(&ns->dev_link);
++	if (!ns->enabled)
++		goto out_unlock;
++
++	ns->enabled = false;
++	list_del_rcu(&ns->dev_link);
+ 	mutex_unlock(&subsys->lock);
+ 
+ 	/*
+@@ -351,6 +352,7 @@ void nvmet_ns_disable(struct nvmet_ns *n
+ 
+ 	if (ns->bdev)
+ 		blkdev_put(ns->bdev, FMODE_WRITE|FMODE_READ);
++out_unlock:
+ 	mutex_unlock(&subsys->lock);
+ }
+ 
+--- a/drivers/nvme/target/nvmet.h
++++ b/drivers/nvme/target/nvmet.h
+@@ -47,6 +47,7 @@ struct nvmet_ns {
+ 	loff_t			size;
+ 	u8			nguid[16];
+ 
++	bool			enabled;
+ 	struct nvmet_subsys	*subsys;
+ 	const char		*device_path;
+ 
+@@ -61,11 +62,6 @@ static inline struct nvmet_ns *to_nvmet_
+ 	return container_of(to_config_group(item), struct nvmet_ns, group);
+ }
+ 
+-static inline bool nvmet_ns_enabled(struct nvmet_ns *ns)
+-{
+-	return !list_empty_careful(&ns->dev_link);
+-}
+-
+ struct nvmet_cq {
+ 	u16			qid;
+ 	u16			size;
diff --git a/queue-4.9/ptrace-capture-the-ptracer-s-creds-not-pt_ptrace_cap.patch b/queue-4.9/ptrace-capture-the-ptracer-s-creds-not-pt_ptrace_cap.patch
new file mode 100644
index 00000000000..2cff45b3e3b
--- /dev/null
+++ b/queue-4.9/ptrace-capture-the-ptracer-s-creds-not-pt_ptrace_cap.patch
@@ -0,0 +1,148 @@
+From 64b875f7ac8a5d60a4e191479299e931ee949b67 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Mon, 14 Nov 2016 18:48:07 -0600
+Subject: ptrace: Capture the ptracer's creds not PT_PTRACE_CAP
+
+From: Eric W. Biederman <ebiederm@xmission.com>
+
+commit 64b875f7ac8a5d60a4e191479299e931ee949b67 upstream.
+
+When the flag PT_PTRACE_CAP was added the PTRACE_TRACEME path was
+overlooked.  This can result in incorrect behavior when an application
+like strace traces an exec of a setuid executable.
+
+Further PT_PTRACE_CAP does not have enough information for making good
+security decisions as it does not report which user namespace the
+capability is in.  This has already allowed one mistake through
+insufficient granulariy.
+
+I found this issue when I was testing another corner case of exec and
+discovered that I could not get strace to set PT_PTRACE_CAP even when
+running strace as root with a full set of caps.
+
+This change fixes the above issue with strace allowing stracing as
+root a setuid executable without disabling setuid.  More fundamentaly
+this change allows what is allowable at all times, by using the correct
+information in it's decision.
+
+Fixes: 4214e42f96d4 ("v2.4.9.11 -> v2.4.9.12")
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/exec.c                  |    2 +-
+ include/linux/capability.h |    1 +
+ include/linux/ptrace.h     |    1 -
+ include/linux/sched.h      |    1 +
+ kernel/capability.c        |   20 ++++++++++++++++++++
+ kernel/ptrace.c            |   12 +++++++-----
+ 6 files changed, 30 insertions(+), 7 deletions(-)
+
+--- a/fs/exec.c
++++ b/fs/exec.c
+@@ -1425,7 +1425,7 @@ static void check_unsafe_exec(struct lin
+ 	unsigned n_fs;
+ 
+ 	if (p->ptrace) {
+-		if (p->ptrace & PT_PTRACE_CAP)
++		if (ptracer_capable(p, current_user_ns()))
+ 			bprm->unsafe |= LSM_UNSAFE_PTRACE_CAP;
+ 		else
+ 			bprm->unsafe |= LSM_UNSAFE_PTRACE;
+--- a/include/linux/capability.h
++++ b/include/linux/capability.h
+@@ -243,6 +243,7 @@ static inline bool ns_capable_noaudit(st
+ extern bool privileged_wrt_inode_uidgid(struct user_namespace *ns, const struct inode *inode);
+ extern bool capable_wrt_inode_uidgid(const struct inode *inode, int cap);
+ extern bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap);
++extern bool ptracer_capable(struct task_struct *tsk, struct user_namespace *ns);
+ 
+ /* audit system wants to get cap info from files as well */
+ extern int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps);
+--- a/include/linux/ptrace.h
++++ b/include/linux/ptrace.h
+@@ -19,7 +19,6 @@
+ #define PT_SEIZED	0x00010000	/* SEIZE used, enable new behavior */
+ #define PT_PTRACED	0x00000001
+ #define PT_DTRACE	0x00000002	/* delayed trace (used on m68k, i386) */
+-#define PT_PTRACE_CAP	0x00000004	/* ptracer can follow suid-exec */
+ 
+ #define PT_OPT_FLAG_SHIFT	3
+ /* PT_TRACE_* event enable flags */
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1656,6 +1656,7 @@ struct task_struct {
+ 	struct list_head cpu_timers[3];
+ 
+ /* process credentials */
++	const struct cred __rcu *ptracer_cred; /* Tracer's credentials at attach */
+ 	const struct cred __rcu *real_cred; /* objective and real subjective task
+ 					 * credentials (COW) */
+ 	const struct cred __rcu *cred;	/* effective (overridable) subjective task
+--- a/kernel/capability.c
++++ b/kernel/capability.c
+@@ -485,3 +485,23 @@ bool capable_wrt_inode_uidgid(const stru
+ 	return ns_capable(ns, cap) && privileged_wrt_inode_uidgid(ns, inode);
+ }
+ EXPORT_SYMBOL(capable_wrt_inode_uidgid);
++
++/**
++ * ptracer_capable - Determine if the ptracer holds CAP_SYS_PTRACE in the namespace
++ * @tsk: The task that may be ptraced
++ * @ns: The user namespace to search for CAP_SYS_PTRACE in
++ *
++ * Return true if the task that is ptracing the current task had CAP_SYS_PTRACE
++ * in the specified user namespace.
++ */
++bool ptracer_capable(struct task_struct *tsk, struct user_namespace *ns)
++{
++	int ret = 0;  /* An absent tracer adds no restrictions */
++	const struct cred *cred;
++	rcu_read_lock();
++	cred = rcu_dereference(tsk->ptracer_cred);
++	if (cred)
++		ret = security_capable_noaudit(cred, ns, CAP_SYS_PTRACE);
++	rcu_read_unlock();
++	return (ret == 0);
++}
+--- a/kernel/ptrace.c
++++ b/kernel/ptrace.c
+@@ -39,6 +39,9 @@ void __ptrace_link(struct task_struct *c
+ 	BUG_ON(!list_empty(&child->ptrace_entry));
+ 	list_add(&child->ptrace_entry, &new_parent->ptraced);
+ 	child->parent = new_parent;
++	rcu_read_lock();
++	child->ptracer_cred = get_cred(__task_cred(new_parent));
++	rcu_read_unlock();
+ }
+ 
+ /**
+@@ -71,12 +74,16 @@ void __ptrace_link(struct task_struct *c
+  */
+ void __ptrace_unlink(struct task_struct *child)
+ {
++	const struct cred *old_cred;
+ 	BUG_ON(!child->ptrace);
+ 
+ 	clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+ 
+ 	child->parent = child->real_parent;
+ 	list_del_init(&child->ptrace_entry);
++	old_cred = child->ptracer_cred;
++	child->ptracer_cred = NULL;
++	put_cred(old_cred);
+ 
+ 	spin_lock(&child->sighand->siglock);
+ 	child->ptrace = 0;
+@@ -326,11 +333,6 @@ static int ptrace_attach(struct task_str
+ 
+ 	task_lock(task);
+ 	retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH_REALCREDS);
+-	if (!retval) {
+-		struct mm_struct *mm = task->mm;
+-		if (mm && ns_capable(mm->user_ns, CAP_SYS_PTRACE))
+-			flags |= PT_PTRACE_CAP;
+-	}
+ 	task_unlock(task);
+ 	if (retval)
+ 		goto unlock_creds;
diff --git a/queue-4.9/ptrace-don-t-allow-accessing-an-undumpable-mm.patch b/queue-4.9/ptrace-don-t-allow-accessing-an-undumpable-mm.patch
new file mode 100644
index 00000000000..7e10707987b
--- /dev/null
+++ b/queue-4.9/ptrace-don-t-allow-accessing-an-undumpable-mm.patch
@@ -0,0 +1,279 @@
+From 84d77d3f06e7e8dea057d10e8ec77ad71f721be3 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Tue, 22 Nov 2016 12:06:50 -0600
+Subject: ptrace: Don't allow accessing an undumpable mm
+
+From: Eric W. Biederman <ebiederm@xmission.com>
+
+commit 84d77d3f06e7e8dea057d10e8ec77ad71f721be3 upstream.
+
+It is the reasonable expectation that if an executable file is not
+readable there will be no way for a user without special privileges to
+read the file.  This is enforced in ptrace_attach but if ptrace
+is already attached before exec there is no enforcement for read-only
+executables.
+
+As the only way to read such an mm is through access_process_vm
+spin a variant called ptrace_access_vm that will fail if the
+target process is not being ptraced by the current process, or
+the current process did not have sufficient privileges when ptracing
+began to read the target processes mm.
+
+In the ptrace implementations replace access_process_vm by
+ptrace_access_vm.  There remain several ptrace sites that still use
+access_process_vm as they are reading the target executables
+instructions (for kernel consumption) or register stacks.  As such it
+does not appear necessary to add a permission check to those calls.
+
+This bug has always existed in Linux.
+
+Fixes: v1.0
+Reported-by: Andy Lutomirski <luto@amacapital.net>
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/alpha/kernel/ptrace.c         |    2 -
+ arch/blackfin/kernel/ptrace.c      |    4 +--
+ arch/cris/arch-v32/kernel/ptrace.c |    2 -
+ arch/ia64/kernel/ptrace.c          |    2 -
+ arch/mips/kernel/ptrace32.c        |    4 +--
+ arch/powerpc/kernel/ptrace32.c     |    4 +--
+ include/linux/mm.h                 |    2 +
+ include/linux/ptrace.h             |    3 ++
+ kernel/ptrace.c                    |   42 +++++++++++++++++++++++++++++++------
+ mm/memory.c                        |    2 -
+ mm/nommu.c                         |    2 -
+ 11 files changed, 52 insertions(+), 17 deletions(-)
+
+--- a/arch/alpha/kernel/ptrace.c
++++ b/arch/alpha/kernel/ptrace.c
+@@ -283,7 +283,7 @@ long arch_ptrace(struct task_struct *chi
+ 	/* When I and D space are separate, these will need to be fixed.  */
+ 	case PTRACE_PEEKTEXT: /* read word at location addr. */
+ 	case PTRACE_PEEKDATA:
+-		copied = access_process_vm(child, addr, &tmp, sizeof(tmp),
++		copied = ptrace_access_vm(child, addr, &tmp, sizeof(tmp),
+ 				FOLL_FORCE);
+ 		ret = -EIO;
+ 		if (copied != sizeof(tmp))
+--- a/arch/blackfin/kernel/ptrace.c
++++ b/arch/blackfin/kernel/ptrace.c
+@@ -270,7 +270,7 @@ long arch_ptrace(struct task_struct *chi
+ 			switch (bfin_mem_access_type(addr, to_copy)) {
+ 			case BFIN_MEM_ACCESS_CORE:
+ 			case BFIN_MEM_ACCESS_CORE_ONLY:
+-				copied = access_process_vm(child, addr, &tmp,
++				copied = ptrace_access_vm(child, addr, &tmp,
+ 							   to_copy, FOLL_FORCE);
+ 				if (copied)
+ 					break;
+@@ -323,7 +323,7 @@ long arch_ptrace(struct task_struct *chi
+ 			switch (bfin_mem_access_type(addr, to_copy)) {
+ 			case BFIN_MEM_ACCESS_CORE:
+ 			case BFIN_MEM_ACCESS_CORE_ONLY:
+-				copied = access_process_vm(child, addr, &data,
++				copied = ptrace_access_vm(child, addr, &data,
+ 				                           to_copy,
+ 							   FOLL_FORCE | FOLL_WRITE);
+ 				break;
+--- a/arch/cris/arch-v32/kernel/ptrace.c
++++ b/arch/cris/arch-v32/kernel/ptrace.c
+@@ -147,7 +147,7 @@ long arch_ptrace(struct task_struct *chi
+ 				/* The trampoline page is globally mapped, no page table to traverse.*/
+ 				tmp = *(unsigned long*)addr;
+ 			} else {
+-				copied = access_process_vm(child, addr, &tmp, sizeof(tmp), FOLL_FORCE);
++				copied = ptrace_access_vm(child, addr, &tmp, sizeof(tmp), FOLL_FORCE);
+ 
+ 				if (copied != sizeof(tmp))
+ 					break;
+--- a/arch/ia64/kernel/ptrace.c
++++ b/arch/ia64/kernel/ptrace.c
+@@ -1159,7 +1159,7 @@ arch_ptrace (struct task_struct *child,
+ 	case PTRACE_PEEKTEXT:
+ 	case PTRACE_PEEKDATA:
+ 		/* read word at location addr */
+-		if (access_process_vm(child, addr, &data, sizeof(data),
++		if (ptrace_access_vm(child, addr, &data, sizeof(data),
+ 				FOLL_FORCE)
+ 		    != sizeof(data))
+ 			return -EIO;
+--- a/arch/mips/kernel/ptrace32.c
++++ b/arch/mips/kernel/ptrace32.c
+@@ -69,7 +69,7 @@ long compat_arch_ptrace(struct task_stru
+ 		if (get_user(addrOthers, (u32 __user * __user *) (unsigned long) addr) != 0)
+ 			break;
+ 
+-		copied = access_process_vm(child, (u64)addrOthers, &tmp,
++		copied = ptrace_access_vm(child, (u64)addrOthers, &tmp,
+ 				sizeof(tmp), FOLL_FORCE);
+ 		if (copied != sizeof(tmp))
+ 			break;
+@@ -178,7 +178,7 @@ long compat_arch_ptrace(struct task_stru
+ 		if (get_user(addrOthers, (u32 __user * __user *) (unsigned long) addr) != 0)
+ 			break;
+ 		ret = 0;
+-		if (access_process_vm(child, (u64)addrOthers, &data,
++		if (ptrace_access_vm(child, (u64)addrOthers, &data,
+ 					sizeof(data),
+ 					FOLL_FORCE | FOLL_WRITE) == sizeof(data))
+ 			break;
+--- a/arch/powerpc/kernel/ptrace32.c
++++ b/arch/powerpc/kernel/ptrace32.c
+@@ -73,7 +73,7 @@ long compat_arch_ptrace(struct task_stru
+ 		if (get_user(addrOthers, (u32 __user * __user *)addr) != 0)
+ 			break;
+ 
+-		copied = access_process_vm(child, (u64)addrOthers, &tmp,
++		copied = ptrace_access_vm(child, (u64)addrOthers, &tmp,
+ 				sizeof(tmp), FOLL_FORCE);
+ 		if (copied != sizeof(tmp))
+ 			break;
+@@ -178,7 +178,7 @@ long compat_arch_ptrace(struct task_stru
+ 		if (get_user(addrOthers, (u32 __user * __user *)addr) != 0)
+ 			break;
+ 		ret = 0;
+-		if (access_process_vm(child, (u64)addrOthers, &tmp,
++		if (ptrace_access_vm(child, (u64)addrOthers, &tmp,
+ 					sizeof(tmp),
+ 					FOLL_FORCE | FOLL_WRITE) == sizeof(tmp))
+ 			break;
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -1270,6 +1270,8 @@ extern int access_process_vm(struct task
+ 		unsigned int gup_flags);
+ extern int access_remote_vm(struct mm_struct *mm, unsigned long addr,
+ 		void *buf, int len, unsigned int gup_flags);
++extern int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
++		unsigned long addr, void *buf, int len, unsigned int gup_flags);
+ 
+ long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
+ 			    unsigned long start, unsigned long nr_pages,
+--- a/include/linux/ptrace.h
++++ b/include/linux/ptrace.h
+@@ -8,6 +8,9 @@
+ #include <linux/pid_namespace.h>	/* For task_active_pid_ns.  */
+ #include <uapi/linux/ptrace.h>
+ 
++extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr,
++			    void *buf, int len, unsigned int gup_flags);
++
+ /*
+  * Ptrace flags
+  *
+--- a/kernel/ptrace.c
++++ b/kernel/ptrace.c
+@@ -27,6 +27,35 @@
+ #include <linux/cn_proc.h>
+ #include <linux/compat.h>
+ 
++/*
++ * Access another process' address space via ptrace.
++ * Source/target buffer must be kernel space,
++ * Do not walk the page table directly, use get_user_pages
++ */
++int ptrace_access_vm(struct task_struct *tsk, unsigned long addr,
++		     void *buf, int len, unsigned int gup_flags)
++{
++	struct mm_struct *mm;
++	int ret;
++
++	mm = get_task_mm(tsk);
++	if (!mm)
++		return 0;
++
++	if (!tsk->ptrace ||
++	    (current != tsk->parent) ||
++	    ((get_dumpable(mm) != SUID_DUMP_USER) &&
++	     !ptracer_capable(tsk, mm->user_ns))) {
++		mmput(mm);
++		return 0;
++	}
++
++	ret = __access_remote_vm(tsk, mm, addr, buf, len, gup_flags);
++	mmput(mm);
++
++	return ret;
++}
++
+ 
+ /*
+  * ptrace a task: make the debugger its new parent and
+@@ -535,7 +564,8 @@ int ptrace_readdata(struct task_struct *
+ 		int this_len, retval;
+ 
+ 		this_len = (len > sizeof(buf)) ? sizeof(buf) : len;
+-		retval = access_process_vm(tsk, src, buf, this_len, FOLL_FORCE);
++		retval = ptrace_access_vm(tsk, src, buf, this_len, FOLL_FORCE);
++
+ 		if (!retval) {
+ 			if (copied)
+ 				break;
+@@ -562,7 +592,7 @@ int ptrace_writedata(struct task_struct
+ 		this_len = (len > sizeof(buf)) ? sizeof(buf) : len;
+ 		if (copy_from_user(buf, src, this_len))
+ 			return -EFAULT;
+-		retval = access_process_vm(tsk, dst, buf, this_len,
++		retval = ptrace_access_vm(tsk, dst, buf, this_len,
+ 				FOLL_FORCE | FOLL_WRITE);
+ 		if (!retval) {
+ 			if (copied)
+@@ -1126,7 +1156,7 @@ int generic_ptrace_peekdata(struct task_
+ 	unsigned long tmp;
+ 	int copied;
+ 
+-	copied = access_process_vm(tsk, addr, &tmp, sizeof(tmp), FOLL_FORCE);
++	copied = ptrace_access_vm(tsk, addr, &tmp, sizeof(tmp), FOLL_FORCE);
+ 	if (copied != sizeof(tmp))
+ 		return -EIO;
+ 	return put_user(tmp, (unsigned long __user *)data);
+@@ -1137,7 +1167,7 @@ int generic_ptrace_pokedata(struct task_
+ {
+ 	int copied;
+ 
+-	copied = access_process_vm(tsk, addr, &data, sizeof(data),
++	copied = ptrace_access_vm(tsk, addr, &data, sizeof(data),
+ 			FOLL_FORCE | FOLL_WRITE);
+ 	return (copied == sizeof(data)) ? 0 : -EIO;
+ }
+@@ -1155,7 +1185,7 @@ int compat_ptrace_request(struct task_st
+ 	switch (request) {
+ 	case PTRACE_PEEKTEXT:
+ 	case PTRACE_PEEKDATA:
+-		ret = access_process_vm(child, addr, &word, sizeof(word),
++		ret = ptrace_access_vm(child, addr, &word, sizeof(word),
+ 				FOLL_FORCE);
+ 		if (ret != sizeof(word))
+ 			ret = -EIO;
+@@ -1165,7 +1195,7 @@ int compat_ptrace_request(struct task_st
+ 
+ 	case PTRACE_POKETEXT:
+ 	case PTRACE_POKEDATA:
+-		ret = access_process_vm(child, addr, &data, sizeof(data),
++		ret = ptrace_access_vm(child, addr, &data, sizeof(data),
+ 				FOLL_FORCE | FOLL_WRITE);
+ 		ret = (ret != sizeof(data) ? -EIO : 0);
+ 		break;
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -3868,7 +3868,7 @@ EXPORT_SYMBOL_GPL(generic_access_phys);
+  * Access another process' address space as given in mm.  If non-NULL, use the
+  * given task for page fault accounting.
+  */
+-static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
++int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
+ 		unsigned long addr, void *buf, int len, unsigned int gup_flags)
+ {
+ 	struct vm_area_struct *vma;
+--- a/mm/nommu.c
++++ b/mm/nommu.c
+@@ -1808,7 +1808,7 @@ void filemap_map_pages(struct fault_env
+ }
+ EXPORT_SYMBOL(filemap_map_pages);
+ 
+-static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
++int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
+ 		unsigned long addr, void *buf, int len, unsigned int gup_flags)
+ {
+ 	struct vm_area_struct *vma;
diff --git a/queue-4.9/revert-f2fs-use-percpu_counter-for-of-dirty-pages-in-inode.patch b/queue-4.9/revert-f2fs-use-percpu_counter-for-of-dirty-pages-in-inode.patch
new file mode 100644
index 00000000000..9073f922599
--- /dev/null
+++ b/queue-4.9/revert-f2fs-use-percpu_counter-for-of-dirty-pages-in-inode.patch
@@ -0,0 +1,100 @@
+From 204706c7accfabb67b97eef9f9a28361b6201199 Mon Sep 17 00:00:00 2001
+From: Jaegeuk Kim <jaegeuk@kernel.org>
+Date: Fri, 2 Dec 2016 15:11:32 -0800
+Subject: Revert "f2fs: use percpu_counter for # of dirty pages in inode"
+
+From: Jaegeuk Kim <jaegeuk@kernel.org>
+
+commit 204706c7accfabb67b97eef9f9a28361b6201199 upstream.
+
+This reverts commit 1beba1b3a953107c3ff5448ab4e4297db4619c76.
+
+The perpcu_counter doesn't provide atomicity in single core and consume more
+DRAM. That incurs fs_mark test failure due to ENOMEM.
+
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/f2fs/f2fs.h  |   10 +++++-----
+ fs/f2fs/file.c  |    2 +-
+ fs/f2fs/super.c |    7 +------
+ 3 files changed, 7 insertions(+), 12 deletions(-)
+
+--- a/fs/f2fs/f2fs.h
++++ b/fs/f2fs/f2fs.h
+@@ -428,7 +428,7 @@ struct f2fs_inode_info {
+ 	/* Use below internally in f2fs*/
+ 	unsigned long flags;		/* use to pass per-file flags */
+ 	struct rw_semaphore i_sem;	/* protect fi info */
+-	struct percpu_counter dirty_pages;	/* # of dirty pages */
++	atomic_t dirty_pages;		/* # of dirty pages */
+ 	f2fs_hash_t chash;		/* hash value of given file name */
+ 	unsigned int clevel;		/* maximum level of given file name */
+ 	nid_t i_xattr_nid;		/* node id that contains xattrs */
+@@ -1242,7 +1242,7 @@ static inline void inc_page_count(struct
+ 
+ static inline void inode_inc_dirty_pages(struct inode *inode)
+ {
+-	percpu_counter_inc(&F2FS_I(inode)->dirty_pages);
++	atomic_inc(&F2FS_I(inode)->dirty_pages);
+ 	inc_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ?
+ 				F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA);
+ }
+@@ -1258,7 +1258,7 @@ static inline void inode_dec_dirty_pages
+ 			!S_ISLNK(inode->i_mode))
+ 		return;
+ 
+-	percpu_counter_dec(&F2FS_I(inode)->dirty_pages);
++	atomic_dec(&F2FS_I(inode)->dirty_pages);
+ 	dec_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ?
+ 				F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA);
+ }
+@@ -1268,9 +1268,9 @@ static inline s64 get_pages(struct f2fs_
+ 	return percpu_counter_sum_positive(&sbi->nr_pages[count_type]);
+ }
+ 
+-static inline s64 get_dirty_pages(struct inode *inode)
++static inline int get_dirty_pages(struct inode *inode)
+ {
+-	return percpu_counter_sum_positive(&F2FS_I(inode)->dirty_pages);
++	return atomic_read(&F2FS_I(inode)->dirty_pages);
+ }
+ 
+ static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type)
+--- a/fs/f2fs/file.c
++++ b/fs/f2fs/file.c
+@@ -1526,7 +1526,7 @@ static int f2fs_ioc_start_atomic_write(s
+ 		goto out;
+ 
+ 	f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING,
+-		"Unexpected flush for atomic writes: ino=%lu, npages=%lld",
++		"Unexpected flush for atomic writes: ino=%lu, npages=%u",
+ 					inode->i_ino, get_dirty_pages(inode));
+ 	ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
+ 	if (ret)
+--- a/fs/f2fs/super.c
++++ b/fs/f2fs/super.c
+@@ -558,13 +558,9 @@ static struct inode *f2fs_alloc_inode(st
+ 
+ 	init_once((void *) fi);
+ 
+-	if (percpu_counter_init(&fi->dirty_pages, 0, GFP_NOFS)) {
+-		kmem_cache_free(f2fs_inode_cachep, fi);
+-		return NULL;
+-	}
+-
+ 	/* Initialize f2fs-specific inode info */
+ 	fi->vfs_inode.i_version = 1;
++	atomic_set(&fi->dirty_pages, 0);
+ 	fi->i_current_depth = 1;
+ 	fi->i_advise = 0;
+ 	init_rwsem(&fi->i_sem);
+@@ -687,7 +683,6 @@ static void f2fs_i_callback(struct rcu_h
+ 
+ static void f2fs_destroy_inode(struct inode *inode)
+ {
+-	percpu_counter_destroy(&F2FS_I(inode)->dirty_pages);
+ 	call_rcu(&inode->i_rcu, f2fs_i_callback);
+ }
+ 
diff --git a/queue-4.9/series b/queue-4.9/series
index e158e8848eb..aae541eab2e 100644
--- a/queue-4.9/series
+++ b/queue-4.9/series
@@ -26,3 +26,29 @@ alsa-hda-fix-headset-mic-problem-on-a-dell-laptop.patch
 alsa-hda-gate-the-mic-jack-on-hp-z1-gen3-aio.patch
 alsa-hda-when-comparing-pin-configurations-ignore-assoc-in-addition-to-seq.patch
 clk-ti-omap36xx-work-around-sprz319-advisory-2.1.patch
+exec-ensure-mm-user_ns-contains-the-execed-files.patch
+fs-exec-apply-cloexec-before-changing-dumpable-task-flags.patch
+splice-reinstate-sigpipe-epipe-handling.patch
+block_dev-don-t-test-bdev-bd_contains-when-it-is-not-stable.patch
+mm-add-a-user_ns-owner-to-mm_struct-and-fix-ptrace-permission-checks.patch
+vfs-mm-fix-return-value-of-read-at-s_maxbytes.patch
+ptrace-capture-the-ptracer-s-creds-not-pt_ptrace_cap.patch
+ptrace-don-t-allow-accessing-an-undumpable-mm.patch
+crypto-caam-fix-aead-givenc-descriptors.patch
+ext4-don-t-lock-buffer-in-ext4_commit_super-if-holding-spinlock.patch
+ext4-fix-mballoc-breakage-with-64k-block-size.patch
+ext4-fix-stack-memory-corruption-with-64k-block-size.patch
+ext4-use-more-strict-checks-for-inodes_per_block-on-mount.patch
+ext4-fix-in-superblock-mount-options-processing.patch
+ext4-add-sanity-checking-to-count_overhead.patch
+ext4-reject-inodes-with-negative-size.patch
+ext4-return-enomem-instead-of-success.patch
+ext4-do-not-perform-data-journaling-when-data-is-encrypted.patch
+revert-f2fs-use-percpu_counter-for-of-dirty-pages-in-inode.patch
+f2fs-set-owner-for-debugfs-status-file-s-file_operations.patch
+f2fs-fix-overflow-due-to-condition-check-order.patch
+f2fs-fix-to-determine-start_cp_addr-by-sbi-cur_cp_pack.patch
+loop-return-proper-error-from-loop_queue_rq.patch
+nvmet-fix-possible-infinite-loop-triggered-on-hot-namespace-removal.patch
+mm-vmscan.c-set-correct-defer-count-for-shrinker.patch
+mm-page_alloc-keep-pcp-count-and-list-contents-in-sync-if-struct-page-is-corrupted.patch
diff --git a/queue-4.9/splice-reinstate-sigpipe-epipe-handling.patch b/queue-4.9/splice-reinstate-sigpipe-epipe-handling.patch
new file mode 100644
index 00000000000..e954b4e8fce
--- /dev/null
+++ b/queue-4.9/splice-reinstate-sigpipe-epipe-handling.patch
@@ -0,0 +1,50 @@
+From 52bce91165e5f2db422b2b972e83d389e5e4725c Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Wed, 21 Dec 2016 10:59:34 -0800
+Subject: splice: reinstate SIGPIPE/EPIPE handling
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 52bce91165e5f2db422b2b972e83d389e5e4725c upstream.
+
+Commit 8924feff66f3 ("splice: lift pipe_lock out of splice_to_pipe()")
+caused a regression when there were no more readers left on a pipe that
+was being spliced into: rather than the expected SIGPIPE and -EPIPE
+return value, the writer would end up waiting forever for space to free
+up (which obviously was not going to happen with no readers around).
+
+Fixes: 8924feff66f3 ("splice: lift pipe_lock out of splice_to_pipe()")
+Reported-and-tested-by: Andreas Schwab <schwab@linux-m68k.org>
+Debugged-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/splice.c |    9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/fs/splice.c
++++ b/fs/splice.c
+@@ -1086,7 +1086,13 @@ EXPORT_SYMBOL(do_splice_direct);
+ 
+ static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags)
+ {
+-	while (pipe->nrbufs == pipe->buffers) {
++	for (;;) {
++		if (unlikely(!pipe->readers)) {
++			send_sig(SIGPIPE, current, 0);
++			return -EPIPE;
++		}
++		if (pipe->nrbufs != pipe->buffers)
++			return 0;
+ 		if (flags & SPLICE_F_NONBLOCK)
+ 			return -EAGAIN;
+ 		if (signal_pending(current))
+@@ -1095,7 +1101,6 @@ static int wait_for_space(struct pipe_in
+ 		pipe_wait(pipe);
+ 		pipe->waiting_writers--;
+ 	}
+-	return 0;
+ }
+ 
+ static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
diff --git a/queue-4.9/vfs-mm-fix-return-value-of-read-at-s_maxbytes.patch b/queue-4.9/vfs-mm-fix-return-value-of-read-at-s_maxbytes.patch
new file mode 100644
index 00000000000..950f4048cb9
--- /dev/null
+++ b/queue-4.9/vfs-mm-fix-return-value-of-read-at-s_maxbytes.patch
@@ -0,0 +1,49 @@
+From d05c5f7ba164aed3db02fb188c26d0dd94f5455b Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Wed, 14 Dec 2016 12:45:25 -0800
+Subject: vfs,mm: fix return value of read() at s_maxbytes
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit d05c5f7ba164aed3db02fb188c26d0dd94f5455b upstream.
+
+We truncated the possible read iterator to s_maxbytes in commit
+c2a9737f45e2 ("vfs,mm: fix a dead loop in truncate_inode_pages_range()"),
+but our end condition handling was wrong: it's not an error to try to
+read at the end of the file.
+
+Reading past the end should return EOF (0), not EINVAL.
+
+See for example
+
+  https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1649342
+  http://lists.gnu.org/archive/html/bug-coreutils/2016-12/msg00008.html
+
+where a md5sum of a maximally sized file fails because the final read is
+exactly at s_maxbytes.
+
+Fixes: c2a9737f45e2 ("vfs,mm: fix a dead loop in truncate_inode_pages_range()")
+Reported-by: Joseph Salisbury <joseph.salisbury@canonical.com>
+Cc: Wei Fang <fangwei1@huawei.com>
+Cc: Christoph Hellwig <hch@infradead.org>
+Cc: Dave Chinner <david@fromorbit.com>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/filemap.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -1686,7 +1686,7 @@ static ssize_t do_generic_file_read(stru
+ 	int error = 0;
+ 
+ 	if (unlikely(*ppos >= inode->i_sb->s_maxbytes))
+-		return -EINVAL;
++		return 0;
+ 	iov_iter_truncate(iter, inode->i_sb->s_maxbytes);
+ 
+ 	index = *ppos >> PAGE_SHIFT;