]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 4 Jan 2017 10:05:46 +0000 (11:05 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 4 Jan 2017 10:05:46 +0000 (11:05 +0100)
added patches:
block_dev-don-t-test-bdev-bd_contains-when-it-is-not-stable.patch
crypto-caam-fix-aead-givenc-descriptors.patch
exec-ensure-mm-user_ns-contains-the-execed-files.patch
ext4-add-sanity-checking-to-count_overhead.patch
ext4-do-not-perform-data-journaling-when-data-is-encrypted.patch
ext4-fix-in-superblock-mount-options-processing.patch
ext4-fix-mballoc-breakage-with-64k-block-size.patch
ext4-fix-stack-memory-corruption-with-64k-block-size.patch
ext4-reject-inodes-with-negative-size.patch
ext4-return-enomem-instead-of-success.patch
ext4-use-more-strict-checks-for-inodes_per_block-on-mount.patch
f2fs-set-owner-for-debugfs-status-file-s-file_operations.patch
fs-exec-apply-cloexec-before-changing-dumpable-task-flags.patch
loop-return-proper-error-from-loop_queue_rq.patch
mm-add-a-user_ns-owner-to-mm_struct-and-fix-ptrace-permission-checks.patch
mm-vmscan.c-set-correct-defer-count-for-shrinker.patch
ptrace-capture-the-ptracer-s-creds-not-pt_ptrace_cap.patch

18 files changed:
queue-4.4/block_dev-don-t-test-bdev-bd_contains-when-it-is-not-stable.patch [new file with mode: 0644]
queue-4.4/crypto-caam-fix-aead-givenc-descriptors.patch [new file with mode: 0644]
queue-4.4/exec-ensure-mm-user_ns-contains-the-execed-files.patch [new file with mode: 0644]
queue-4.4/ext4-add-sanity-checking-to-count_overhead.patch [new file with mode: 0644]
queue-4.4/ext4-do-not-perform-data-journaling-when-data-is-encrypted.patch [new file with mode: 0644]
queue-4.4/ext4-fix-in-superblock-mount-options-processing.patch [new file with mode: 0644]
queue-4.4/ext4-fix-mballoc-breakage-with-64k-block-size.patch [new file with mode: 0644]
queue-4.4/ext4-fix-stack-memory-corruption-with-64k-block-size.patch [new file with mode: 0644]
queue-4.4/ext4-reject-inodes-with-negative-size.patch [new file with mode: 0644]
queue-4.4/ext4-return-enomem-instead-of-success.patch [new file with mode: 0644]
queue-4.4/ext4-use-more-strict-checks-for-inodes_per_block-on-mount.patch [new file with mode: 0644]
queue-4.4/f2fs-set-owner-for-debugfs-status-file-s-file_operations.patch [new file with mode: 0644]
queue-4.4/fs-exec-apply-cloexec-before-changing-dumpable-task-flags.patch [new file with mode: 0644]
queue-4.4/loop-return-proper-error-from-loop_queue_rq.patch [new file with mode: 0644]
queue-4.4/mm-add-a-user_ns-owner-to-mm_struct-and-fix-ptrace-permission-checks.patch [new file with mode: 0644]
queue-4.4/mm-vmscan.c-set-correct-defer-count-for-shrinker.patch [new file with mode: 0644]
queue-4.4/ptrace-capture-the-ptracer-s-creds-not-pt_ptrace_cap.patch [new file with mode: 0644]
queue-4.4/series

diff --git a/queue-4.4/block_dev-don-t-test-bdev-bd_contains-when-it-is-not-stable.patch b/queue-4.4/block_dev-don-t-test-bdev-bd_contains-when-it-is-not-stable.patch
new file mode 100644 (file)
index 0000000..0d0e7b8
--- /dev/null
@@ -0,0 +1,68 @@
+From bcc7f5b4bee8e327689a4d994022765855c807ff Mon Sep 17 00:00:00 2001
+From: NeilBrown <neilb@suse.com>
+Date: Mon, 12 Dec 2016 08:21:51 -0700
+Subject: block_dev: don't test bdev->bd_contains when it is not stable
+
+From: NeilBrown <neilb@suse.com>
+
+commit bcc7f5b4bee8e327689a4d994022765855c807ff upstream.
+
+bdev->bd_contains is not stable before calling __blkdev_get().
+When __blkdev_get() is called on a parition with ->bd_openers == 0
+it sets
+  bdev->bd_contains = bdev;
+which is not correct for a partition.
+After a call to __blkdev_get() succeeds, ->bd_openers will be > 0
+and then ->bd_contains is stable.
+
+When FMODE_EXCL is used, blkdev_get() calls
+   bd_start_claiming() ->  bd_prepare_to_claim() -> bd_may_claim()
+
+This call happens before __blkdev_get() is called, so ->bd_contains
+is not stable.  So bd_may_claim() cannot safely use ->bd_contains.
+It currently tries to use it, and this can lead to a BUG_ON().
+
+This happens when a whole device is already open with a bd_holder (in
+use by dm in my particular example) and two threads race to open a
+partition of that device for the first time, one opening with O_EXCL and
+one without.
+
+The thread that doesn't use O_EXCL gets through blkdev_get() to
+__blkdev_get(), gains the ->bd_mutex, and sets bdev->bd_contains = bdev;
+
+Immediately thereafter the other thread, using FMODE_EXCL, calls
+bd_start_claiming() from blkdev_get().  This should fail because the
+whole device has a holder, but because bdev->bd_contains == bdev
+bd_may_claim() incorrectly reports success.
+This thread continues and blocks on bd_mutex.
+
+The first thread then sets bdev->bd_contains correctly and drops the mutex.
+The thread using FMODE_EXCL then continues and when it calls bd_may_claim()
+again in:
+                       BUG_ON(!bd_may_claim(bdev, whole, holder));
+The BUG_ON fires.
+
+Fix this by removing the dependency on ->bd_contains in
+bd_may_claim().  As bd_may_claim() has direct access to the whole
+device, it can simply test if the target bdev is the whole device.
+
+Fixes: 6b4517a7913a ("block: implement bd_claiming and claiming block")
+Signed-off-by: NeilBrown <neilb@suse.com>
+Signed-off-by: Jens Axboe <axboe@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/block_dev.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/block_dev.c
++++ b/fs/block_dev.c
+@@ -759,7 +759,7 @@ static bool bd_may_claim(struct block_de
+               return true;     /* already a holder */
+       else if (bdev->bd_holder != NULL)
+               return false;    /* held by someone else */
+-      else if (bdev->bd_contains == bdev)
++      else if (whole == bdev)
+               return true;     /* is a whole device which isn't held */
+       else if (whole->bd_holder == bd_may_claim)
diff --git a/queue-4.4/crypto-caam-fix-aead-givenc-descriptors.patch b/queue-4.4/crypto-caam-fix-aead-givenc-descriptors.patch
new file mode 100644 (file)
index 0000000..3ec2b0c
--- /dev/null
@@ -0,0 +1,48 @@
+From d128af17876d79b87edf048303f98b35f6a53dbc Mon Sep 17 00:00:00 2001
+From: Alex Porosanu <alexandru.porosanu@nxp.com>
+Date: Wed, 9 Nov 2016 10:46:11 +0200
+Subject: crypto: caam - fix AEAD givenc descriptors
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Alex Porosanu <alexandru.porosanu@nxp.com>
+
+commit d128af17876d79b87edf048303f98b35f6a53dbc upstream.
+
+The AEAD givenc descriptor relies on moving the IV through the
+output FIFO and then back to the CTX2 for authentication. The
+SEQ FIFO STORE could be scheduled before the data can be
+read from OFIFO, especially since the SEQ FIFO LOAD needs
+to wait for the SEQ FIFO LOAD SKIP to finish first. The
+SKIP takes more time when the input is SG than when it's
+a contiguous buffer. If the SEQ FIFO LOAD is not scheduled
+before the STORE, the DECO will hang waiting for data
+to be available in the OFIFO so it can be transferred to C2.
+In order to overcome this, first force transfer of IV to C2
+by starting the "cryptlen" transfer first and then starting to
+store data from OFIFO to the output buffer.
+
+Fixes: 1acebad3d8db8 ("crypto: caam - faster aead implementation")
+Signed-off-by: Alex Porosanu <alexandru.porosanu@nxp.com>
+Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/crypto/caam/caamalg.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/crypto/caam/caamalg.c
++++ b/drivers/crypto/caam/caamalg.c
+@@ -702,7 +702,9 @@ copy_iv:
+       /* Will read cryptlen */
+       append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
+-      aead_append_src_dst(desc, FIFOLD_TYPE_MSG1OUT2);
++      append_seq_fifo_load(desc, 0, FIFOLD_CLASS_BOTH | KEY_VLF |
++                           FIFOLD_TYPE_MSG1OUT2 | FIFOLD_TYPE_LASTBOTH);
++      append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | KEY_VLF);
+       /* Write ICV */
+       append_seq_store(desc, ctx->authsize, LDST_CLASS_2_CCB |
diff --git a/queue-4.4/exec-ensure-mm-user_ns-contains-the-execed-files.patch b/queue-4.4/exec-ensure-mm-user_ns-contains-the-execed-files.patch
new file mode 100644 (file)
index 0000000..0dbacf6
--- /dev/null
@@ -0,0 +1,117 @@
+From f84df2a6f268de584a201e8911384a2d244876e3 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Wed, 16 Nov 2016 22:06:51 -0600
+Subject: exec: Ensure mm->user_ns contains the execed files
+
+From: Eric W. Biederman <ebiederm@xmission.com>
+
+commit f84df2a6f268de584a201e8911384a2d244876e3 upstream.
+
+When the user namespace support was merged the need to prevent
+ptrace from revealing the contents of an unreadable executable
+was overlooked.
+
+Correct this oversight by ensuring that the executed file
+or files are in mm->user_ns, by adjusting mm->user_ns.
+
+Use the new function privileged_wrt_inode_uidgid to see if
+the executable is a member of the user namespace, and as such
+if having CAP_SYS_PTRACE in the user namespace should allow
+tracing the executable.  If not update mm->user_ns to
+the parent user namespace until an appropriate parent is found.
+
+Reported-by: Jann Horn <jann@thejh.net>
+Fixes: 9e4a36ece652 ("userns: Fail exec for suid and sgid binaries with ids outside our user namespace.")
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/exec.c                  |   19 +++++++++++++++++--
+ include/linux/capability.h |    1 +
+ kernel/capability.c        |   16 ++++++++++++++--
+ 3 files changed, 32 insertions(+), 4 deletions(-)
+
+--- a/fs/exec.c
++++ b/fs/exec.c
+@@ -1123,8 +1123,22 @@ EXPORT_SYMBOL(flush_old_exec);
+ void would_dump(struct linux_binprm *bprm, struct file *file)
+ {
+-      if (inode_permission(file_inode(file), MAY_READ) < 0)
++      struct inode *inode = file_inode(file);
++      if (inode_permission(inode, MAY_READ) < 0) {
++              struct user_namespace *old, *user_ns;
+               bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
++
++              /* Ensure mm->user_ns contains the executable */
++              user_ns = old = bprm->mm->user_ns;
++              while ((user_ns != &init_user_ns) &&
++                     !privileged_wrt_inode_uidgid(user_ns, inode))
++                      user_ns = user_ns->parent;
++
++              if (old != user_ns) {
++                      bprm->mm->user_ns = get_user_ns(user_ns);
++                      put_user_ns(old);
++              }
++      }
+ }
+ EXPORT_SYMBOL(would_dump);
+@@ -1154,7 +1168,6 @@ void setup_new_exec(struct linux_binprm
+           !gid_eq(bprm->cred->gid, current_egid())) {
+               current->pdeath_signal = 0;
+       } else {
+-              would_dump(bprm, bprm->file);
+               if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)
+                       set_dumpable(current->mm, suid_dumpable);
+       }
+@@ -1587,6 +1600,8 @@ static int do_execveat_common(int fd, st
+       if (retval < 0)
+               goto out;
++      would_dump(bprm, bprm->file);
++
+       retval = exec_binprm(bprm);
+       if (retval < 0)
+               goto out;
+--- a/include/linux/capability.h
++++ b/include/linux/capability.h
+@@ -247,6 +247,7 @@ static inline bool ns_capable_noaudit(st
+       return true;
+ }
+ #endif /* CONFIG_MULTIUSER */
++extern bool privileged_wrt_inode_uidgid(struct user_namespace *ns, const struct inode *inode);
+ extern bool capable_wrt_inode_uidgid(const struct inode *inode, int cap);
+ extern bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap);
+--- a/kernel/capability.c
++++ b/kernel/capability.c
+@@ -457,6 +457,19 @@ bool file_ns_capable(const struct file *
+ EXPORT_SYMBOL(file_ns_capable);
+ /**
++ * privileged_wrt_inode_uidgid - Do capabilities in the namespace work over the inode?
++ * @ns: The user namespace in question
++ * @inode: The inode in question
++ *
++ * Return true if the inode uid and gid are within the namespace.
++ */
++bool privileged_wrt_inode_uidgid(struct user_namespace *ns, const struct inode *inode)
++{
++      return kuid_has_mapping(ns, inode->i_uid) &&
++              kgid_has_mapping(ns, inode->i_gid);
++}
++
++/**
+  * capable_wrt_inode_uidgid - Check nsown_capable and uid and gid mapped
+  * @inode: The inode in question
+  * @cap: The capability in question
+@@ -469,7 +482,6 @@ bool capable_wrt_inode_uidgid(const stru
+ {
+       struct user_namespace *ns = current_user_ns();
+-      return ns_capable(ns, cap) && kuid_has_mapping(ns, inode->i_uid) &&
+-              kgid_has_mapping(ns, inode->i_gid);
++      return ns_capable(ns, cap) && privileged_wrt_inode_uidgid(ns, inode);
+ }
+ EXPORT_SYMBOL(capable_wrt_inode_uidgid);
diff --git a/queue-4.4/ext4-add-sanity-checking-to-count_overhead.patch b/queue-4.4/ext4-add-sanity-checking-to-count_overhead.patch
new file mode 100644 (file)
index 0000000..58703d4
--- /dev/null
@@ -0,0 +1,42 @@
+From c48ae41bafe31e9a66d8be2ced4e42a6b57fa814 Mon Sep 17 00:00:00 2001
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Fri, 18 Nov 2016 13:37:47 -0500
+Subject: ext4: add sanity checking to count_overhead()
+
+From: Theodore Ts'o <tytso@mit.edu>
+
+commit c48ae41bafe31e9a66d8be2ced4e42a6b57fa814 upstream.
+
+The commit "ext4: sanity check the block and cluster size at mount
+time" should prevent any problems, but in case the superblock is
+modified while the file system is mounted, add an extra safety check
+to make sure we won't overrun the allocated buffer.
+
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/super.c |   11 ++++++++---
+ 1 file changed, 8 insertions(+), 3 deletions(-)
+
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -3037,10 +3037,15 @@ static int count_overhead(struct super_b
+                       ext4_set_bit(s++, buf);
+                       count++;
+               }
+-              for (j = ext4_bg_num_gdb(sb, grp); j > 0; j--) {
+-                      ext4_set_bit(EXT4_B2C(sbi, s++), buf);
+-                      count++;
++              j = ext4_bg_num_gdb(sb, grp);
++              if (s + j > EXT4_BLOCKS_PER_GROUP(sb)) {
++                      ext4_error(sb, "Invalid number of block group "
++                                 "descriptor blocks: %d", j);
++                      j = EXT4_BLOCKS_PER_GROUP(sb) - s;
+               }
++              count += j;
++              for (; j > 0; j--)
++                      ext4_set_bit(EXT4_B2C(sbi, s++), buf);
+       }
+       if (!count)
+               return 0;
diff --git a/queue-4.4/ext4-do-not-perform-data-journaling-when-data-is-encrypted.patch b/queue-4.4/ext4-do-not-perform-data-journaling-when-data-is-encrypted.patch
new file mode 100644 (file)
index 0000000..1ecca43
--- /dev/null
@@ -0,0 +1,93 @@
+From 73b92a2a5e97d17cc4d5c4fe9d724d3273fb6fd2 Mon Sep 17 00:00:00 2001
+From: Sergey Karamov <skaramov@google.com>
+Date: Sat, 10 Dec 2016 17:54:58 -0500
+Subject: ext4: do not perform data journaling when data is encrypted
+
+From: Sergey Karamov <skaramov@google.com>
+
+commit 73b92a2a5e97d17cc4d5c4fe9d724d3273fb6fd2 upstream.
+
+Currently data journalling is incompatible with encryption: enabling both
+at the same time has never been supported by design, and would result in
+unpredictable behavior. However, users are not precluded from turning on
+both features simultaneously. This change programmatically replaces data
+journaling for encrypted regular files with ordered data journaling mode.
+
+Background:
+Journaling encrypted data has not been supported because it operates on
+buffer heads of the page in the page cache. Namely, when the commit
+happens, which could be up to five seconds after caching, the commit
+thread uses the buffer heads attached to the page to copy the contents of
+the page to the journal. With encryption, it would have been required to
+keep the bounce buffer with ciphertext for up to the aforementioned five
+seconds, since the page cache can only hold plaintext and could not be
+used for journaling. Alternatively, it would be required to setup the
+journal to initiate a callback at the commit time to perform deferred
+encryption - in this case, not only would the data have to be written
+twice, but it would also have to be encrypted twice. This level of
+complexity was not justified for a mode that in practice is very rarely
+used because of the overhead from the data journalling.
+
+Solution:
+If data=journaled has been set as a mount option for a filesystem, or if
+journaling is enabled on a regular file, do not perform journaling if the
+file is also encrypted, instead fall back to the data=ordered mode for the
+file.
+
+Rationale:
+The intent is to allow seamless and proper filesystem operation when
+journaling and encryption have both been enabled, and have these two
+conflicting features gracefully resolved by the filesystem.
+
+Fixes: 4461471107b7
+Signed-off-by: Sergey Karamov <skaramov@google.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/ext4_jbd2.h |   14 ++++++++------
+ fs/ext4/super.c     |    5 +++++
+ 2 files changed, 13 insertions(+), 6 deletions(-)
+
+--- a/fs/ext4/ext4_jbd2.h
++++ b/fs/ext4/ext4_jbd2.h
+@@ -395,17 +395,19 @@ static inline int ext4_inode_journal_mod
+               return EXT4_INODE_WRITEBACK_DATA_MODE;  /* writeback */
+       /* We do not support data journalling with delayed allocation */
+       if (!S_ISREG(inode->i_mode) ||
+-          test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
+-              return EXT4_INODE_JOURNAL_DATA_MODE;    /* journal data */
+-      if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA) &&
+-          !test_opt(inode->i_sb, DELALLOC))
++          test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ||
++          (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA) &&
++          !test_opt(inode->i_sb, DELALLOC))) {
++              /* We do not support data journalling for encrypted data */
++              if (S_ISREG(inode->i_mode) && ext4_encrypted_inode(inode))
++                      return EXT4_INODE_ORDERED_DATA_MODE;  /* ordered */
+               return EXT4_INODE_JOURNAL_DATA_MODE;    /* journal data */
++      }
+       if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
+               return EXT4_INODE_ORDERED_DATA_MODE;    /* ordered */
+       if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
+               return EXT4_INODE_WRITEBACK_DATA_MODE;  /* writeback */
+-      else
+-              BUG();
++      BUG();
+ }
+ static inline int ext4_should_journal_data(struct inode *inode)
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -3345,6 +3345,11 @@ static int ext4_fill_super(struct super_
+                                "both data=journal and dax");
+                       goto failed_mount;
+               }
++              if (ext4_has_feature_encrypt(sb)) {
++                      ext4_msg(sb, KERN_WARNING,
++                               "encrypted files will use data=ordered "
++                               "instead of data journaling mode");
++              }
+               if (test_opt(sb, DELALLOC))
+                       clear_opt(sb, DELALLOC);
+       } else {
diff --git a/queue-4.4/ext4-fix-in-superblock-mount-options-processing.patch b/queue-4.4/ext4-fix-in-superblock-mount-options-processing.patch
new file mode 100644 (file)
index 0000000..f156d61
--- /dev/null
@@ -0,0 +1,105 @@
+From 5aee0f8a3f42c94c5012f1673420aee96315925a Mon Sep 17 00:00:00 2001
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Fri, 18 Nov 2016 13:24:26 -0500
+Subject: ext4: fix in-superblock mount options processing
+
+From: Theodore Ts'o <tytso@mit.edu>
+
+commit 5aee0f8a3f42c94c5012f1673420aee96315925a upstream.
+
+Fix a large number of problems with how we handle mount options in the
+superblock.  For one, if the string in the superblock is long enough
+that it is not null terminated, we could run off the end of the string
+and try to interpret superblocks fields as characters.  It's unlikely
+this will cause a security problem, but it could result in an invalid
+parse.  Also, parse_options is destructive to the string, so in some
+cases if there is a comma-separated string, it would be modified in
+the superblock.  (Fortunately it only happens on file systems with a
+1k block size.)
+
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/super.c |   38 +++++++++++++++++++++++---------------
+ 1 file changed, 23 insertions(+), 15 deletions(-)
+
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -3130,7 +3130,7 @@ static int ext4_fill_super(struct super_
+       char *orig_data = kstrdup(data, GFP_KERNEL);
+       struct buffer_head *bh;
+       struct ext4_super_block *es = NULL;
+-      struct ext4_sb_info *sbi;
++      struct ext4_sb_info *sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
+       ext4_fsblk_t block;
+       ext4_fsblk_t sb_block = get_sb_block(&data);
+       ext4_fsblk_t logical_sb_block;
+@@ -3149,16 +3149,14 @@ static int ext4_fill_super(struct super_
+       unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
+       ext4_group_t first_not_zeroed;
+-      sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
+-      if (!sbi)
+-              goto out_free_orig;
++      if ((data && !orig_data) || !sbi)
++              goto out_free_base;
+       sbi->s_blockgroup_lock =
+               kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
+-      if (!sbi->s_blockgroup_lock) {
+-              kfree(sbi);
+-              goto out_free_orig;
+-      }
++      if (!sbi->s_blockgroup_lock)
++              goto out_free_base;
++
+       sb->s_fs_info = sbi;
+       sbi->s_sb = sb;
+       sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
+@@ -3304,11 +3302,19 @@ static int ext4_fill_super(struct super_
+        */
+       sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
+-      if (!parse_options((char *) sbi->s_es->s_mount_opts, sb,
+-                         &journal_devnum, &journal_ioprio, 0)) {
+-              ext4_msg(sb, KERN_WARNING,
+-                       "failed to parse options in superblock: %s",
+-                       sbi->s_es->s_mount_opts);
++      if (sbi->s_es->s_mount_opts[0]) {
++              char *s_mount_opts = kstrndup(sbi->s_es->s_mount_opts,
++                                            sizeof(sbi->s_es->s_mount_opts),
++                                            GFP_KERNEL);
++              if (!s_mount_opts)
++                      goto failed_mount;
++              if (!parse_options(s_mount_opts, sb, &journal_devnum,
++                                 &journal_ioprio, 0)) {
++                      ext4_msg(sb, KERN_WARNING,
++                               "failed to parse options in superblock: %s",
++                               s_mount_opts);
++              }
++              kfree(s_mount_opts);
+       }
+       sbi->s_def_mount_opt = sbi->s_mount_opt;
+       if (!parse_options((char *) data, sb, &journal_devnum,
+@@ -3991,7 +3997,9 @@ no_journal:
+       if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount"))
+               ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
+-                       "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts,
++                       "Opts: %.*s%s%s", descr,
++                       (int) sizeof(sbi->s_es->s_mount_opts),
++                       sbi->s_es->s_mount_opts,
+                        *sbi->s_es->s_mount_opts ? "; " : "", orig_data);
+       if (es->s_error_count)
+@@ -4061,8 +4069,8 @@ failed_mount:
+ out_fail:
+       sb->s_fs_info = NULL;
+       kfree(sbi->s_blockgroup_lock);
++out_free_base:
+       kfree(sbi);
+-out_free_orig:
+       kfree(orig_data);
+       return err ? err : ret;
+ }
diff --git a/queue-4.4/ext4-fix-mballoc-breakage-with-64k-block-size.patch b/queue-4.4/ext4-fix-mballoc-breakage-with-64k-block-size.patch
new file mode 100644 (file)
index 0000000..74c0e3e
--- /dev/null
@@ -0,0 +1,35 @@
+From 69e43e8cc971a79dd1ee5d4343d8e63f82725123 Mon Sep 17 00:00:00 2001
+From: Chandan Rajendra <chandan@linux.vnet.ibm.com>
+Date: Mon, 14 Nov 2016 21:04:37 -0500
+Subject: ext4: fix mballoc breakage with 64k block size
+
+From: Chandan Rajendra <chandan@linux.vnet.ibm.com>
+
+commit 69e43e8cc971a79dd1ee5d4343d8e63f82725123 upstream.
+
+'border' variable is set to a value of 2 times the block size of the
+underlying filesystem. With 64k block size, the resulting value won't
+fit into a 16-bit variable. Hence this commit changes the data type of
+'border' to 'unsigned int'.
+
+Fixes: c9de560ded61f
+Signed-off-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Reviewed-by: Andreas Dilger <adilger@dilger.ca>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/mballoc.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -669,7 +669,7 @@ static void ext4_mb_mark_free_simple(str
+       ext4_grpblk_t min;
+       ext4_grpblk_t max;
+       ext4_grpblk_t chunk;
+-      unsigned short border;
++      unsigned int border;
+       BUG_ON(len > EXT4_CLUSTERS_PER_GROUP(sb));
diff --git a/queue-4.4/ext4-fix-stack-memory-corruption-with-64k-block-size.patch b/queue-4.4/ext4-fix-stack-memory-corruption-with-64k-block-size.patch
new file mode 100644 (file)
index 0000000..1c30ffe
--- /dev/null
@@ -0,0 +1,36 @@
+From 30a9d7afe70ed6bd9191d3000e2ef1a34fb58493 Mon Sep 17 00:00:00 2001
+From: Chandan Rajendra <chandan@linux.vnet.ibm.com>
+Date: Mon, 14 Nov 2016 21:26:26 -0500
+Subject: ext4: fix stack memory corruption with 64k block size
+
+From: Chandan Rajendra <chandan@linux.vnet.ibm.com>
+
+commit 30a9d7afe70ed6bd9191d3000e2ef1a34fb58493 upstream.
+
+The number of 'counters' elements needed in 'struct sg' is
+super_block->s_blocksize_bits + 2. Presently we have 16 'counters'
+elements in the array. This is insufficient for block sizes >= 32k. In
+such cases the memcpy operation performed in ext4_mb_seq_groups_show()
+would cause stack memory corruption.
+
+Fixes: c9de560ded61f
+Signed-off-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/mballoc.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -2287,7 +2287,7 @@ static int ext4_mb_seq_groups_show(struc
+       struct ext4_group_info *grinfo;
+       struct sg {
+               struct ext4_group_info info;
+-              ext4_grpblk_t counters[16];
++              ext4_grpblk_t counters[EXT4_MAX_BLOCK_LOG_SIZE + 2];
+       } sg;
+       group--;
diff --git a/queue-4.4/ext4-reject-inodes-with-negative-size.patch b/queue-4.4/ext4-reject-inodes-with-negative-size.patch
new file mode 100644 (file)
index 0000000..5281ac5
--- /dev/null
@@ -0,0 +1,45 @@
+From 7e6e1ef48fc02f3ac5d0edecbb0c6087cd758d58 Mon Sep 17 00:00:00 2001
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+Date: Sat, 10 Dec 2016 09:55:01 -0500
+Subject: ext4: reject inodes with negative size
+
+From: Darrick J. Wong <darrick.wong@oracle.com>
+
+commit 7e6e1ef48fc02f3ac5d0edecbb0c6087cd758d58 upstream.
+
+Don't load an inode with a negative size; this causes integer overflow
+problems in the VFS.
+
+[ Added EXT4_ERROR_INODE() to mark file system as corrupted. -TYT]
+
+Fixes: a48380f769df (ext4: rename i_dir_acl to i_size_high)
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/inode.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -4175,6 +4175,7 @@ struct inode *ext4_iget(struct super_blo
+       struct inode *inode;
+       journal_t *journal = EXT4_SB(sb)->s_journal;
+       long ret;
++      loff_t size;
+       int block;
+       uid_t i_uid;
+       gid_t i_gid;
+@@ -4266,6 +4267,11 @@ struct inode *ext4_iget(struct super_blo
+               ei->i_file_acl |=
+                       ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
+       inode->i_size = ext4_isize(raw_inode);
++      if ((size = i_size_read(inode)) < 0) {
++              EXT4_ERROR_INODE(inode, "bad i_size value: %lld", size);
++              ret = -EFSCORRUPTED;
++              goto bad_inode;
++      }
+       ei->i_disksize = inode->i_size;
+ #ifdef CONFIG_QUOTA
+       ei->i_reserved_quota = 0;
diff --git a/queue-4.4/ext4-return-enomem-instead-of-success.patch b/queue-4.4/ext4-return-enomem-instead-of-success.patch
new file mode 100644 (file)
index 0000000..6d8f344
--- /dev/null
@@ -0,0 +1,34 @@
+From 578620f451f836389424833f1454eeeb2ffc9e9f Mon Sep 17 00:00:00 2001
+From: Dan Carpenter <dan.carpenter@oracle.com>
+Date: Sat, 10 Dec 2016 09:56:01 -0500
+Subject: ext4: return -ENOMEM instead of success
+
+From: Dan Carpenter <dan.carpenter@oracle.com>
+
+commit 578620f451f836389424833f1454eeeb2ffc9e9f upstream.
+
+We should set the error code if kzalloc() fails.
+
+Fixes: 67cf5b09a46f ("ext4: add the basic function for inline data support")
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/inline.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/fs/ext4/inline.c
++++ b/fs/ext4/inline.c
+@@ -336,8 +336,10 @@ static int ext4_update_inline_data(handl
+       len -= EXT4_MIN_INLINE_DATA_SIZE;
+       value = kzalloc(len, GFP_NOFS);
+-      if (!value)
++      if (!value) {
++              error = -ENOMEM;
+               goto out;
++      }
+       error = ext4_xattr_ibody_get(inode, i.name_index, i.name,
+                                    value, len);
diff --git a/queue-4.4/ext4-use-more-strict-checks-for-inodes_per_block-on-mount.patch b/queue-4.4/ext4-use-more-strict-checks-for-inodes_per_block-on-mount.patch
new file mode 100644 (file)
index 0000000..c134b6e
--- /dev/null
@@ -0,0 +1,55 @@
+From cd6bb35bf7f6d7d922509bf50265383a0ceabe96 Mon Sep 17 00:00:00 2001
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Fri, 18 Nov 2016 13:28:30 -0500
+Subject: ext4: use more strict checks for inodes_per_block on mount
+
+From: Theodore Ts'o <tytso@mit.edu>
+
+commit cd6bb35bf7f6d7d922509bf50265383a0ceabe96 upstream.
+
+Centralize the checks for inodes_per_block and be more strict to make
+sure the inodes_per_block_group can't end up being zero.
+
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Reviewed-by: Andreas Dilger <adilger@dilger.ca>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/super.c |   15 ++++++---------
+ 1 file changed, 6 insertions(+), 9 deletions(-)
+
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -3496,12 +3496,16 @@ static int ext4_fill_super(struct super_
+       sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
+       sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
+-      if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0)
+-              goto cantfind_ext4;
+       sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb);
+       if (sbi->s_inodes_per_block == 0)
+               goto cantfind_ext4;
++      if (sbi->s_inodes_per_group < sbi->s_inodes_per_block ||
++          sbi->s_inodes_per_group > blocksize * 8) {
++              ext4_msg(sb, KERN_ERR, "invalid inodes per group: %lu\n",
++                       sbi->s_blocks_per_group);
++              goto failed_mount;
++      }
+       sbi->s_itb_per_group = sbi->s_inodes_per_group /
+                                       sbi->s_inodes_per_block;
+       sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb);
+@@ -3584,13 +3588,6 @@ static int ext4_fill_super(struct super_
+       }
+       sbi->s_cluster_ratio = clustersize / blocksize;
+-      if (sbi->s_inodes_per_group > blocksize * 8) {
+-              ext4_msg(sb, KERN_ERR,
+-                     "#inodes per group too big: %lu",
+-                     sbi->s_inodes_per_group);
+-              goto failed_mount;
+-      }
+-
+       /* Do we have standard group size of clustersize * 8 blocks ? */
+       if (sbi->s_blocks_per_group == clustersize << 3)
+               set_opt2(sb, STD_GROUP_SIZE);
diff --git a/queue-4.4/f2fs-set-owner-for-debugfs-status-file-s-file_operations.patch b/queue-4.4/f2fs-set-owner-for-debugfs-status-file-s-file_operations.patch
new file mode 100644 (file)
index 0000000..0dbb25b
--- /dev/null
@@ -0,0 +1,63 @@
+From 05e6ea2685c964db1e675a24a4f4e2adc22d2388 Mon Sep 17 00:00:00 2001
+From: Nicolai Stange <nicstange@gmail.com>
+Date: Sun, 20 Nov 2016 19:57:23 +0100
+Subject: f2fs: set ->owner for debugfs status file's file_operations
+
+From: Nicolai Stange <nicstange@gmail.com>
+
+commit 05e6ea2685c964db1e675a24a4f4e2adc22d2388 upstream.
+
+The struct file_operations instance serving the f2fs/status debugfs file
+lacks an initialization of its ->owner.
+
+This means that although that file might have been opened, the f2fs module
+can still get removed. Any further operation on that opened file, releasing
+included,  will cause accesses to unmapped memory.
+
+Indeed, Mike Marshall reported the following:
+
+  BUG: unable to handle kernel paging request at ffffffffa0307430
+  IP: [<ffffffff8132a224>] full_proxy_release+0x24/0x90
+  <...>
+  Call Trace:
+   [] __fput+0xdf/0x1d0
+   [] ____fput+0xe/0x10
+   [] task_work_run+0x8e/0xc0
+   [] do_exit+0x2ae/0xae0
+   [] ? __audit_syscall_entry+0xae/0x100
+   [] ? syscall_trace_enter+0x1ca/0x310
+   [] do_group_exit+0x44/0xc0
+   [] SyS_exit_group+0x14/0x20
+   [] do_syscall_64+0x61/0x150
+   [] entry_SYSCALL64_slow_path+0x25/0x25
+  <...>
+  ---[ end trace f22ae883fa3ea6b8 ]---
+  Fixing recursive fault but reboot is needed!
+
+Fix this by initializing the f2fs/status file_operations' ->owner with
+THIS_MODULE.
+
+This will allow debugfs to grab a reference to the f2fs module upon any
+open on that file, thus preventing it from getting removed.
+
+Fixes: 902829aa0b72 ("f2fs: move proc files to debugfs")
+Reported-by: Mike Marshall <hubcap@omnibond.com>
+Reported-by: Martin Brandenburg <martin@omnibond.com>
+Signed-off-by: Nicolai Stange <nicstange@gmail.com>
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/f2fs/debug.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/f2fs/debug.c
++++ b/fs/f2fs/debug.c
+@@ -352,6 +352,7 @@ static int stat_open(struct inode *inode
+ }
+ static const struct file_operations stat_fops = {
++      .owner = THIS_MODULE,
+       .open = stat_open,
+       .read = seq_read,
+       .llseek = seq_lseek,
diff --git a/queue-4.4/fs-exec-apply-cloexec-before-changing-dumpable-task-flags.patch b/queue-4.4/fs-exec-apply-cloexec-before-changing-dumpable-task-flags.patch
new file mode 100644 (file)
index 0000000..a100826
--- /dev/null
@@ -0,0 +1,79 @@
+From 613cc2b6f272c1a8ad33aefa21cad77af23139f7 Mon Sep 17 00:00:00 2001
+From: Aleksa Sarai <asarai@suse.de>
+Date: Wed, 21 Dec 2016 16:26:24 +1100
+Subject: fs: exec: apply CLOEXEC before changing dumpable task flags
+
+From: Aleksa Sarai <asarai@suse.de>
+
+commit 613cc2b6f272c1a8ad33aefa21cad77af23139f7 upstream.
+
+If you have a process that has set itself to be non-dumpable, and it
+then undergoes exec(2), any CLOEXEC file descriptors it has open are
+"exposed" during a race window between the dumpable flags of the process
+being reset for exec(2) and CLOEXEC being applied to the file
+descriptors. This can be exploited by a process by attempting to access
+/proc/<pid>/fd/... during this window, without requiring CAP_SYS_PTRACE.
+
+The race in question is after set_dumpable has been (for get_link,
+though the trace is basically the same for readlink):
+
+[vfs]
+-> proc_pid_link_inode_operations.get_link
+   -> proc_pid_get_link
+      -> proc_fd_access_allowed
+         -> ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
+
+Which will return 0, during the race window and CLOEXEC file descriptors
+will still be open during this window because do_close_on_exec has not
+been called yet. As a result, the ordering of these calls should be
+reversed to avoid this race window.
+
+This is of particular concern to container runtimes, where joining a
+PID namespace with file descriptors referring to the host filesystem
+can result in security issues (since PRCTL_SET_DUMPABLE doesn't protect
+against access of CLOEXEC file descriptors -- file descriptors which may
+reference filesystem objects the container shouldn't have access to).
+
+Cc: dev@opencontainers.org
+Reported-by: Michael Crosby <crosbymichael@gmail.com>
+Signed-off-by: Aleksa Sarai <asarai@suse.de>
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/exec.c |   10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+--- a/fs/exec.c
++++ b/fs/exec.c
+@@ -19,7 +19,7 @@
+  * current->executable is only used by the procfs.  This allows a dispatch
+  * table to check for several different types  of binary formats.  We keep
+  * trying until we recognize the file or we run out of supported binary
+- * formats. 
++ * formats.
+  */
+ #include <linux/slab.h>
+@@ -1114,6 +1114,13 @@ int flush_old_exec(struct linux_binprm *
+       flush_thread();
+       current->personality &= ~bprm->per_clear;
++      /*
++       * We have to apply CLOEXEC before we change whether the process is
++       * dumpable (in setup_new_exec) to avoid a race with a process in userspace
++       * trying to access the should-be-closed file descriptors of a process
++       * undergoing exec(2).
++       */
++      do_close_on_exec(current->files);
+       return 0;
+ out:
+@@ -1176,7 +1183,6 @@ void setup_new_exec(struct linux_binprm
+          group */
+       current->self_exec_id++;
+       flush_signal_handlers(current, 0);
+-      do_close_on_exec(current->files);
+ }
+ EXPORT_SYMBOL(setup_new_exec);
diff --git a/queue-4.4/loop-return-proper-error-from-loop_queue_rq.patch b/queue-4.4/loop-return-proper-error-from-loop_queue_rq.patch
new file mode 100644 (file)
index 0000000..fc1fd8b
--- /dev/null
@@ -0,0 +1,32 @@
+From b4a567e8114327518c09f5632339a5954ab975a3 Mon Sep 17 00:00:00 2001
+From: Omar Sandoval <osandov@fb.com>
+Date: Mon, 14 Nov 2016 14:56:17 -0800
+Subject: loop: return proper error from loop_queue_rq()
+
+From: Omar Sandoval <osandov@fb.com>
+
+commit b4a567e8114327518c09f5632339a5954ab975a3 upstream.
+
+->queue_rq() should return one of the BLK_MQ_RQ_QUEUE_* constants, not
+an errno.
+
+Fixes: f4aa4c7bbac6 ("block: loop: convert to per-device workqueue")
+Signed-off-by: Omar Sandoval <osandov@fb.com>
+Signed-off-by: Jens Axboe <axboe@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/block/loop.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/block/loop.c
++++ b/drivers/block/loop.c
+@@ -1657,7 +1657,7 @@ static int loop_queue_rq(struct blk_mq_h
+       blk_mq_start_request(bd->rq);
+       if (lo->lo_state != Lo_bound)
+-              return -EIO;
++              return BLK_MQ_RQ_QUEUE_ERROR;
+       if (lo->use_dio && !(cmd->rq->cmd_flags & (REQ_FLUSH |
+                                       REQ_DISCARD)))
diff --git a/queue-4.4/mm-add-a-user_ns-owner-to-mm_struct-and-fix-ptrace-permission-checks.patch b/queue-4.4/mm-add-a-user_ns-owner-to-mm_struct-and-fix-ptrace-permission-checks.patch
new file mode 100644 (file)
index 0000000..e879a72
--- /dev/null
@@ -0,0 +1,181 @@
+From bfedb589252c01fa505ac9f6f2a3d5d68d707ef4 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Thu, 13 Oct 2016 21:23:16 -0500
+Subject: mm: Add a user_ns owner to mm_struct and fix ptrace permission checks
+
+From: Eric W. Biederman <ebiederm@xmission.com>
+
+commit bfedb589252c01fa505ac9f6f2a3d5d68d707ef4 upstream.
+
+During exec dumpable is cleared if the file that is being executed is
+not readable by the user executing the file.  A bug in
+ptrace_may_access allows reading the file if the executable happens to
+enter into a subordinate user namespace (aka clone(CLONE_NEWUSER),
+unshare(CLONE_NEWUSER), or setns(fd, CLONE_NEWUSER).
+
+This problem is fixed with only necessary userspace breakage by adding
+a user namespace owner to mm_struct, captured at the time of exec, so
+it is clear in which user namespace CAP_SYS_PTRACE must be present in
+to be able to safely give read permission to the executable.
+
+The function ptrace_may_access is modified to verify that the ptracer
+has CAP_SYS_ADMIN in task->mm->user_ns instead of task->cred->user_ns.
+This ensures that if the task changes it's cred into a subordinate
+user namespace it does not become ptraceable.
+
+The function ptrace_attach is modified to only set PT_PTRACE_CAP when
+CAP_SYS_PTRACE is held over task->mm->user_ns.  The intent of
+PT_PTRACE_CAP is to be a flag to note that whatever permission changes
+the task might go through the tracer has sufficient permissions for
+it not to be an issue.  task->cred->user_ns is always the same
+as or descendent of mm->user_ns.  Which guarantees that having
+CAP_SYS_PTRACE over mm->user_ns is the worst case for the tasks
+credentials.
+
+To prevent regressions mm->dumpable and mm->user_ns are not considered
+when a task has no mm.  As simply failing ptrace_may_attach causes
+regressions in privileged applications attempting to read things
+such as /proc/<pid>/stat
+
+Acked-by: Kees Cook <keescook@chromium.org>
+Tested-by: Cyrill Gorcunov <gorcunov@openvz.org>
+Fixes: 8409cca70561 ("userns: allow ptrace from non-init user namespaces")
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/mm_types.h |    1 +
+ kernel/fork.c            |    9 ++++++---
+ kernel/ptrace.c          |   26 +++++++++++---------------
+ mm/init-mm.c             |    2 ++
+ 4 files changed, 20 insertions(+), 18 deletions(-)
+
+--- a/include/linux/mm_types.h
++++ b/include/linux/mm_types.h
+@@ -469,6 +469,7 @@ struct mm_struct {
+        */
+       struct task_struct __rcu *owner;
+ #endif
++      struct user_namespace *user_ns;
+       /* store ref to file /proc/<pid>/exe symlink points to */
+       struct file __rcu *exe_file;
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -585,7 +585,8 @@ static void mm_init_owner(struct mm_stru
+ #endif
+ }
+-static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
++static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
++      struct user_namespace *user_ns)
+ {
+       mm->mmap = NULL;
+       mm->mm_rb = RB_ROOT;
+@@ -625,6 +626,7 @@ static struct mm_struct *mm_init(struct
+       if (init_new_context(p, mm))
+               goto fail_nocontext;
++      mm->user_ns = get_user_ns(user_ns);
+       return mm;
+ fail_nocontext:
+@@ -670,7 +672,7 @@ struct mm_struct *mm_alloc(void)
+               return NULL;
+       memset(mm, 0, sizeof(*mm));
+-      return mm_init(mm, current);
++      return mm_init(mm, current, current_user_ns());
+ }
+ /*
+@@ -685,6 +687,7 @@ void __mmdrop(struct mm_struct *mm)
+       destroy_context(mm);
+       mmu_notifier_mm_destroy(mm);
+       check_mm(mm);
++      put_user_ns(mm->user_ns);
+       free_mm(mm);
+ }
+ EXPORT_SYMBOL_GPL(__mmdrop);
+@@ -942,7 +945,7 @@ static struct mm_struct *dup_mm(struct t
+       memcpy(mm, oldmm, sizeof(*mm));
+-      if (!mm_init(mm, tsk))
++      if (!mm_init(mm, tsk, mm->user_ns))
+               goto fail_nomem;
+       err = dup_mmap(mm, oldmm);
+--- a/kernel/ptrace.c
++++ b/kernel/ptrace.c
+@@ -219,7 +219,7 @@ static int ptrace_has_cap(struct user_na
+ static int __ptrace_may_access(struct task_struct *task, unsigned int mode)
+ {
+       const struct cred *cred = current_cred(), *tcred;
+-      int dumpable = 0;
++      struct mm_struct *mm;
+       kuid_t caller_uid;
+       kgid_t caller_gid;
+@@ -270,16 +270,11 @@ static int __ptrace_may_access(struct ta
+       return -EPERM;
+ ok:
+       rcu_read_unlock();
+-      smp_rmb();
+-      if (task->mm)
+-              dumpable = get_dumpable(task->mm);
+-      rcu_read_lock();
+-      if (dumpable != SUID_DUMP_USER &&
+-          !ptrace_has_cap(__task_cred(task)->user_ns, mode)) {
+-              rcu_read_unlock();
+-              return -EPERM;
+-      }
+-      rcu_read_unlock();
++      mm = task->mm;
++      if (mm &&
++          ((get_dumpable(mm) != SUID_DUMP_USER) &&
++           !ptrace_has_cap(mm->user_ns, mode)))
++          return -EPERM;
+       return security_ptrace_access_check(task, mode);
+ }
+@@ -330,6 +325,11 @@ static int ptrace_attach(struct task_str
+       task_lock(task);
+       retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH_REALCREDS);
++      if (!retval) {
++              struct mm_struct *mm = task->mm;
++              if (mm && ns_capable(mm->user_ns, CAP_SYS_PTRACE))
++                      flags |= PT_PTRACE_CAP;
++      }
+       task_unlock(task);
+       if (retval)
+               goto unlock_creds;
+@@ -343,10 +343,6 @@ static int ptrace_attach(struct task_str
+       if (seize)
+               flags |= PT_SEIZED;
+-      rcu_read_lock();
+-      if (ns_capable(__task_cred(task)->user_ns, CAP_SYS_PTRACE))
+-              flags |= PT_PTRACE_CAP;
+-      rcu_read_unlock();
+       task->ptrace = flags;
+       __ptrace_link(task, current);
+--- a/mm/init-mm.c
++++ b/mm/init-mm.c
+@@ -6,6 +6,7 @@
+ #include <linux/cpumask.h>
+ #include <linux/atomic.h>
++#include <linux/user_namespace.h>
+ #include <asm/pgtable.h>
+ #include <asm/mmu.h>
+@@ -21,5 +22,6 @@ struct mm_struct init_mm = {
+       .mmap_sem       = __RWSEM_INITIALIZER(init_mm.mmap_sem),
+       .page_table_lock =  __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
+       .mmlist         = LIST_HEAD_INIT(init_mm.mmlist),
++      .user_ns        = &init_user_ns,
+       INIT_MM_CONTEXT(init_mm)
+ };
diff --git a/queue-4.4/mm-vmscan.c-set-correct-defer-count-for-shrinker.patch b/queue-4.4/mm-vmscan.c-set-correct-defer-count-for-shrinker.patch
new file mode 100644 (file)
index 0000000..71395f6
--- /dev/null
@@ -0,0 +1,85 @@
+From 5f33a0803bbd781de916f5c7448cbbbbc763d911 Mon Sep 17 00:00:00 2001
+From: Shaohua Li <shli@fb.com>
+Date: Mon, 12 Dec 2016 16:41:50 -0800
+Subject: mm/vmscan.c: set correct defer count for shrinker
+
+From: Shaohua Li <shli@fb.com>
+
+commit 5f33a0803bbd781de916f5c7448cbbbbc763d911 upstream.
+
+Our system uses significantly more slab memory with memcg enabled with
+the latest kernel.  With 3.10 kernel, slab uses 2G memory, while with
+4.6 kernel, 6G memory is used.  The shrinker has problem.  Let's see we
+have two memcg for one shrinker.  In do_shrink_slab:
+
+1. Check cg1.  nr_deferred = 0, assume total_scan = 700.  batch size
+   is 1024, then no memory is freed.  nr_deferred = 700
+
+2. Check cg2.  nr_deferred = 700.  Assume freeable = 20, then
+   total_scan = 10 or 40.  Let's assume it's 10.  No memory is freed.
+   nr_deferred = 10.
+
+The deferred share of cg1 is lost in this case.  kswapd will free no
+memory even run above steps again and again.
+
+The fix makes sure one memcg's deferred share isn't lost.
+
+Link: http://lkml.kernel.org/r/2414be961b5d25892060315fbb56bb19d81d0c07.1476227351.git.shli@fb.com
+Signed-off-by: Shaohua Li <shli@fb.com>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Vladimir Davydov <vdavydov@parallels.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/vmscan.c |   14 +++++++++++---
+ 1 file changed, 11 insertions(+), 3 deletions(-)
+
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -277,6 +277,7 @@ static unsigned long do_shrink_slab(stru
+       int nid = shrinkctl->nid;
+       long batch_size = shrinker->batch ? shrinker->batch
+                                         : SHRINK_BATCH;
++      long scanned = 0, next_deferred;
+       freeable = shrinker->count_objects(shrinker, shrinkctl);
+       if (freeable == 0)
+@@ -298,7 +299,9 @@ static unsigned long do_shrink_slab(stru
+               pr_err("shrink_slab: %pF negative objects to delete nr=%ld\n",
+                      shrinker->scan_objects, total_scan);
+               total_scan = freeable;
+-      }
++              next_deferred = nr;
++      } else
++              next_deferred = total_scan;
+       /*
+        * We need to avoid excessive windup on filesystem shrinkers
+@@ -355,17 +358,22 @@ static unsigned long do_shrink_slab(stru
+               count_vm_events(SLABS_SCANNED, nr_to_scan);
+               total_scan -= nr_to_scan;
++              scanned += nr_to_scan;
+               cond_resched();
+       }
++      if (next_deferred >= scanned)
++              next_deferred -= scanned;
++      else
++              next_deferred = 0;
+       /*
+        * move the unused scan count back into the shrinker in a
+        * manner that handles concurrent updates. If we exhausted the
+        * scan, there is no need to do an update.
+        */
+-      if (total_scan > 0)
+-              new_nr = atomic_long_add_return(total_scan,
++      if (next_deferred > 0)
++              new_nr = atomic_long_add_return(next_deferred,
+                                               &shrinker->nr_deferred[nid]);
+       else
+               new_nr = atomic_long_read(&shrinker->nr_deferred[nid]);
diff --git a/queue-4.4/ptrace-capture-the-ptracer-s-creds-not-pt_ptrace_cap.patch b/queue-4.4/ptrace-capture-the-ptracer-s-creds-not-pt_ptrace_cap.patch
new file mode 100644 (file)
index 0000000..7d36874
--- /dev/null
@@ -0,0 +1,147 @@
+From 64b875f7ac8a5d60a4e191479299e931ee949b67 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Mon, 14 Nov 2016 18:48:07 -0600
+Subject: ptrace: Capture the ptracer's creds not PT_PTRACE_CAP
+
+From: Eric W. Biederman <ebiederm@xmission.com>
+
+commit 64b875f7ac8a5d60a4e191479299e931ee949b67 upstream.
+
+When the flag PT_PTRACE_CAP was added the PTRACE_TRACEME path was
+overlooked.  This can result in incorrect behavior when an application
+like strace traces an exec of a setuid executable.
+
+Further PT_PTRACE_CAP does not have enough information for making good
+security decisions as it does not report which user namespace the
+capability is in.  This has already allowed one mistake through
+insufficient granulariy.
+
+I found this issue when I was testing another corner case of exec and
+discovered that I could not get strace to set PT_PTRACE_CAP even when
+running strace as root with a full set of caps.
+
+This change fixes the above issue with strace allowing stracing as
+root a setuid executable without disabling setuid.  More fundamentaly
+this change allows what is allowable at all times, by using the correct
+information in it's decision.
+
+Fixes: 4214e42f96d4 ("v2.4.9.11 -> v2.4.9.12")
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/exec.c                  |    2 +-
+ include/linux/capability.h |    1 +
+ include/linux/ptrace.h     |    1 -
+ include/linux/sched.h      |    1 +
+ kernel/capability.c        |   20 ++++++++++++++++++++
+ kernel/ptrace.c            |   12 +++++++-----
+ 6 files changed, 30 insertions(+), 7 deletions(-)
+
+--- a/fs/exec.c
++++ b/fs/exec.c
+@@ -1273,7 +1273,7 @@ static void check_unsafe_exec(struct lin
+       unsigned n_fs;
+       if (p->ptrace) {
+-              if (p->ptrace & PT_PTRACE_CAP)
++              if (ptracer_capable(p, current_user_ns()))
+                       bprm->unsafe |= LSM_UNSAFE_PTRACE_CAP;
+               else
+                       bprm->unsafe |= LSM_UNSAFE_PTRACE;
+--- a/include/linux/capability.h
++++ b/include/linux/capability.h
+@@ -250,6 +250,7 @@ static inline bool ns_capable_noaudit(st
+ extern bool privileged_wrt_inode_uidgid(struct user_namespace *ns, const struct inode *inode);
+ extern bool capable_wrt_inode_uidgid(const struct inode *inode, int cap);
+ extern bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap);
++extern bool ptracer_capable(struct task_struct *tsk, struct user_namespace *ns);
+ /* audit system wants to get cap info from files as well */
+ extern int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps);
+--- a/include/linux/ptrace.h
++++ b/include/linux/ptrace.h
+@@ -19,7 +19,6 @@
+ #define PT_SEIZED     0x00010000      /* SEIZE used, enable new behavior */
+ #define PT_PTRACED    0x00000001
+ #define PT_DTRACE     0x00000002      /* delayed trace (used on m68k, i386) */
+-#define PT_PTRACE_CAP 0x00000004      /* ptracer can follow suid-exec */
+ #define PT_OPT_FLAG_SHIFT     3
+ /* PT_TRACE_* event enable flags */
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1540,6 +1540,7 @@ struct task_struct {
+       struct list_head cpu_timers[3];
+ /* process credentials */
++      const struct cred __rcu *ptracer_cred; /* Tracer's credentials at attach */
+       const struct cred __rcu *real_cred; /* objective and real subjective task
+                                        * credentials (COW) */
+       const struct cred __rcu *cred;  /* effective (overridable) subjective task
+--- a/kernel/capability.c
++++ b/kernel/capability.c
+@@ -485,3 +485,23 @@ bool capable_wrt_inode_uidgid(const stru
+       return ns_capable(ns, cap) && privileged_wrt_inode_uidgid(ns, inode);
+ }
+ EXPORT_SYMBOL(capable_wrt_inode_uidgid);
++
++/**
++ * ptracer_capable - Determine if the ptracer holds CAP_SYS_PTRACE in the namespace
++ * @tsk: The task that may be ptraced
++ * @ns: The user namespace to search for CAP_SYS_PTRACE in
++ *
++ * Return true if the task that is ptracing the current task had CAP_SYS_PTRACE
++ * in the specified user namespace.
++ */
++bool ptracer_capable(struct task_struct *tsk, struct user_namespace *ns)
++{
++      int ret = 0;  /* An absent tracer adds no restrictions */
++      const struct cred *cred;
++      rcu_read_lock();
++      cred = rcu_dereference(tsk->ptracer_cred);
++      if (cred)
++              ret = security_capable_noaudit(cred, ns, CAP_SYS_PTRACE);
++      rcu_read_unlock();
++      return (ret == 0);
++}
+--- a/kernel/ptrace.c
++++ b/kernel/ptrace.c
+@@ -39,6 +39,9 @@ void __ptrace_link(struct task_struct *c
+       BUG_ON(!list_empty(&child->ptrace_entry));
+       list_add(&child->ptrace_entry, &new_parent->ptraced);
+       child->parent = new_parent;
++      rcu_read_lock();
++      child->ptracer_cred = get_cred(__task_cred(new_parent));
++      rcu_read_unlock();
+ }
+ /**
+@@ -71,11 +74,15 @@ void __ptrace_link(struct task_struct *c
+  */
+ void __ptrace_unlink(struct task_struct *child)
+ {
++      const struct cred *old_cred;
+       BUG_ON(!child->ptrace);
+       child->ptrace = 0;
+       child->parent = child->real_parent;
+       list_del_init(&child->ptrace_entry);
++      old_cred = child->ptracer_cred;
++      child->ptracer_cred = NULL;
++      put_cred(old_cred);
+       spin_lock(&child->sighand->siglock);
+@@ -325,11 +332,6 @@ static int ptrace_attach(struct task_str
+       task_lock(task);
+       retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH_REALCREDS);
+-      if (!retval) {
+-              struct mm_struct *mm = task->mm;
+-              if (mm && ns_capable(mm->user_ns, CAP_SYS_PTRACE))
+-                      flags |= PT_PTRACE_CAP;
+-      }
+       task_unlock(task);
+       if (retval)
+               goto unlock_creds;
index ea92807d0049e4cfe8aaffc68cad2cea1a1552a4..f10b3234d4700929db753acf6fce4ace87d85b32 100644 (file)
@@ -25,3 +25,20 @@ btrfs-don-t-leak-reloc-root-nodes-on-error.patch
 btrfs-fix-memory-leak-in-do_walk_down.patch
 btrfs-don-t-bug-during-drop-snapshot.patch
 btrfs-make-file-clone-aware-of-fatal-signals.patch
+exec-ensure-mm-user_ns-contains-the-execed-files.patch
+fs-exec-apply-cloexec-before-changing-dumpable-task-flags.patch
+block_dev-don-t-test-bdev-bd_contains-when-it-is-not-stable.patch
+mm-add-a-user_ns-owner-to-mm_struct-and-fix-ptrace-permission-checks.patch
+ptrace-capture-the-ptracer-s-creds-not-pt_ptrace_cap.patch
+crypto-caam-fix-aead-givenc-descriptors.patch
+ext4-fix-mballoc-breakage-with-64k-block-size.patch
+ext4-fix-stack-memory-corruption-with-64k-block-size.patch
+ext4-use-more-strict-checks-for-inodes_per_block-on-mount.patch
+ext4-fix-in-superblock-mount-options-processing.patch
+ext4-add-sanity-checking-to-count_overhead.patch
+ext4-reject-inodes-with-negative-size.patch
+ext4-return-enomem-instead-of-success.patch
+ext4-do-not-perform-data-journaling-when-data-is-encrypted.patch
+f2fs-set-owner-for-debugfs-status-file-s-file_operations.patch
+loop-return-proper-error-from-loop_queue_rq.patch
+mm-vmscan.c-set-correct-defer-count-for-shrinker.patch