]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.6-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 14 Aug 2016 18:37:49 +0000 (20:37 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 14 Aug 2016 18:37:49 +0000 (20:37 +0200)
added patches:
block-fix-use-after-free-in-seq-file.patch
crypto-gcm-filter-out-async-ghash-if-necessary.patch
crypto-scatterwalk-fix-test-in-scatterwalk_done.patch
ext4-check-for-extents-that-wrap-around.patch
ext4-don-t-call-ext4_should_journal_data-on-the-journal-inode.patch
ext4-fix-deadlock-during-page-writeback.patch
ext4-short-cut-orphan-cleanup-on-error.patch
ext4-validate-s_reserved_gdt_blocks-on-mount.patch
fs-dcache.c-avoid-soft-lockup-in-dput.patch
fuse-fix-wrong-assignment-of-flags-in-fuse_send_init.patch
fuse-fsync-did-not-return-io-errors.patch
fuse-fuse_flush-must-check-mapping-flags-for-errors.patch
radix-tree-account-nodes-to-memcg-only-if-explicitly-requested.patch
revert-cpufreq-pcc-cpufreq-update-default-value-of-cpuinfo_transition_latency.patch
revert-mm-mempool-only-set-__gfp_nomemalloc-if-there-are-free-elements.patch
serial-mvebu-uart-free-the-irq-in-shutdown.patch
sysv-ipc-fix-security-layer-leaking.patch
x86-microcode-fix-suspend-to-ram-with-builtin-microcode.patch
x86-power-64-fix-hibernation-return-address-corruption.patch
x86-syscalls-64-add-compat_sys_keyctl-for-32-bit-userspace.patch

21 files changed:
queue-4.6/block-fix-use-after-free-in-seq-file.patch [new file with mode: 0644]
queue-4.6/crypto-gcm-filter-out-async-ghash-if-necessary.patch [new file with mode: 0644]
queue-4.6/crypto-scatterwalk-fix-test-in-scatterwalk_done.patch [new file with mode: 0644]
queue-4.6/ext4-check-for-extents-that-wrap-around.patch [new file with mode: 0644]
queue-4.6/ext4-don-t-call-ext4_should_journal_data-on-the-journal-inode.patch [new file with mode: 0644]
queue-4.6/ext4-fix-deadlock-during-page-writeback.patch [new file with mode: 0644]
queue-4.6/ext4-short-cut-orphan-cleanup-on-error.patch [new file with mode: 0644]
queue-4.6/ext4-validate-s_reserved_gdt_blocks-on-mount.patch [new file with mode: 0644]
queue-4.6/fs-dcache.c-avoid-soft-lockup-in-dput.patch [new file with mode: 0644]
queue-4.6/fuse-fix-wrong-assignment-of-flags-in-fuse_send_init.patch [new file with mode: 0644]
queue-4.6/fuse-fsync-did-not-return-io-errors.patch [new file with mode: 0644]
queue-4.6/fuse-fuse_flush-must-check-mapping-flags-for-errors.patch [new file with mode: 0644]
queue-4.6/radix-tree-account-nodes-to-memcg-only-if-explicitly-requested.patch [new file with mode: 0644]
queue-4.6/revert-cpufreq-pcc-cpufreq-update-default-value-of-cpuinfo_transition_latency.patch [new file with mode: 0644]
queue-4.6/revert-mm-mempool-only-set-__gfp_nomemalloc-if-there-are-free-elements.patch [new file with mode: 0644]
queue-4.6/serial-mvebu-uart-free-the-irq-in-shutdown.patch [new file with mode: 0644]
queue-4.6/series
queue-4.6/sysv-ipc-fix-security-layer-leaking.patch [new file with mode: 0644]
queue-4.6/x86-microcode-fix-suspend-to-ram-with-builtin-microcode.patch [new file with mode: 0644]
queue-4.6/x86-power-64-fix-hibernation-return-address-corruption.patch [new file with mode: 0644]
queue-4.6/x86-syscalls-64-add-compat_sys_keyctl-for-32-bit-userspace.patch [new file with mode: 0644]

diff --git a/queue-4.6/block-fix-use-after-free-in-seq-file.patch b/queue-4.6/block-fix-use-after-free-in-seq-file.patch
new file mode 100644 (file)
index 0000000..32e915b
--- /dev/null
@@ -0,0 +1,112 @@
+From 77da160530dd1dc94f6ae15a981f24e5f0021e84 Mon Sep 17 00:00:00 2001
+From: Vegard Nossum <vegard.nossum@oracle.com>
+Date: Fri, 29 Jul 2016 10:40:31 +0200
+Subject: block: fix use-after-free in seq file
+
+From: Vegard Nossum <vegard.nossum@oracle.com>
+
+commit 77da160530dd1dc94f6ae15a981f24e5f0021e84 upstream.
+
+I got a KASAN report of use-after-free:
+
+    ==================================================================
+    BUG: KASAN: use-after-free in klist_iter_exit+0x61/0x70 at addr ffff8800b6581508
+    Read of size 8 by task trinity-c1/315
+    =============================================================================
+    BUG kmalloc-32 (Not tainted): kasan: bad access detected
+    -----------------------------------------------------------------------------
+
+    Disabling lock debugging due to kernel taint
+    INFO: Allocated in disk_seqf_start+0x66/0x110 age=144 cpu=1 pid=315
+            ___slab_alloc+0x4f1/0x520
+            __slab_alloc.isra.58+0x56/0x80
+            kmem_cache_alloc_trace+0x260/0x2a0
+            disk_seqf_start+0x66/0x110
+            traverse+0x176/0x860
+            seq_read+0x7e3/0x11a0
+            proc_reg_read+0xbc/0x180
+            do_loop_readv_writev+0x134/0x210
+            do_readv_writev+0x565/0x660
+            vfs_readv+0x67/0xa0
+            do_preadv+0x126/0x170
+            SyS_preadv+0xc/0x10
+            do_syscall_64+0x1a1/0x460
+            return_from_SYSCALL_64+0x0/0x6a
+    INFO: Freed in disk_seqf_stop+0x42/0x50 age=160 cpu=1 pid=315
+            __slab_free+0x17a/0x2c0
+            kfree+0x20a/0x220
+            disk_seqf_stop+0x42/0x50
+            traverse+0x3b5/0x860
+            seq_read+0x7e3/0x11a0
+            proc_reg_read+0xbc/0x180
+            do_loop_readv_writev+0x134/0x210
+            do_readv_writev+0x565/0x660
+            vfs_readv+0x67/0xa0
+            do_preadv+0x126/0x170
+            SyS_preadv+0xc/0x10
+            do_syscall_64+0x1a1/0x460
+            return_from_SYSCALL_64+0x0/0x6a
+
+    CPU: 1 PID: 315 Comm: trinity-c1 Tainted: G    B           4.7.0+ #62
+    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
+     ffffea0002d96000 ffff880119b9f918 ffffffff81d6ce81 ffff88011a804480
+     ffff8800b6581500 ffff880119b9f948 ffffffff8146c7bd ffff88011a804480
+     ffffea0002d96000 ffff8800b6581500 fffffffffffffff4 ffff880119b9f970
+    Call Trace:
+     [<ffffffff81d6ce81>] dump_stack+0x65/0x84
+     [<ffffffff8146c7bd>] print_trailer+0x10d/0x1a0
+     [<ffffffff814704ff>] object_err+0x2f/0x40
+     [<ffffffff814754d1>] kasan_report_error+0x221/0x520
+     [<ffffffff8147590e>] __asan_report_load8_noabort+0x3e/0x40
+     [<ffffffff83888161>] klist_iter_exit+0x61/0x70
+     [<ffffffff82404389>] class_dev_iter_exit+0x9/0x10
+     [<ffffffff81d2e8ea>] disk_seqf_stop+0x3a/0x50
+     [<ffffffff8151f812>] seq_read+0x4b2/0x11a0
+     [<ffffffff815f8fdc>] proc_reg_read+0xbc/0x180
+     [<ffffffff814b24e4>] do_loop_readv_writev+0x134/0x210
+     [<ffffffff814b4c45>] do_readv_writev+0x565/0x660
+     [<ffffffff814b8a17>] vfs_readv+0x67/0xa0
+     [<ffffffff814b8de6>] do_preadv+0x126/0x170
+     [<ffffffff814b92ec>] SyS_preadv+0xc/0x10
+
+This problem can occur in the following situation:
+
+open()
+ - pread()
+    - .seq_start()
+       - iter = kmalloc() // succeeds
+       - seqf->private = iter
+    - .seq_stop()
+       - kfree(seqf->private)
+ - pread()
+    - .seq_start()
+       - iter = kmalloc() // fails
+    - .seq_stop()
+       - class_dev_iter_exit(seqf->private) // boom! old pointer
+
+As the comment in disk_seqf_stop() says, stop is called even if start
+failed, so we need to reinitialise the private pointer to NULL when seq
+iteration stops.
+
+An alternative would be to set the private pointer to NULL when the
+kmalloc() in disk_seqf_start() fails.
+
+Signed-off-by: Vegard Nossum <vegard.nossum@oracle.com>
+Acked-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Jens Axboe <axboe@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ block/genhd.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/block/genhd.c
++++ b/block/genhd.c
+@@ -856,6 +856,7 @@ static void disk_seqf_stop(struct seq_fi
+       if (iter) {
+               class_dev_iter_exit(iter);
+               kfree(iter);
++              seqf->private = NULL;
+       }
+ }
diff --git a/queue-4.6/crypto-gcm-filter-out-async-ghash-if-necessary.patch b/queue-4.6/crypto-gcm-filter-out-async-ghash-if-necessary.patch
new file mode 100644 (file)
index 0000000..1221720
--- /dev/null
@@ -0,0 +1,36 @@
+From b30bdfa86431afbafe15284a3ad5ac19b49b88e3 Mon Sep 17 00:00:00 2001
+From: Herbert Xu <herbert@gondor.apana.org.au>
+Date: Wed, 15 Jun 2016 22:27:05 +0800
+Subject: crypto: gcm - Filter out async ghash if necessary
+
+From: Herbert Xu <herbert@gondor.apana.org.au>
+
+commit b30bdfa86431afbafe15284a3ad5ac19b49b88e3 upstream.
+
+As it is if you ask for a sync gcm you may actually end up with
+an async one because it does not filter out async implementations
+of ghash.
+
+This patch fixes this by adding the necessary filter when looking
+for ghash.
+
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ crypto/gcm.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/crypto/gcm.c
++++ b/crypto/gcm.c
+@@ -639,7 +639,9 @@ static int crypto_gcm_create_common(stru
+       ghash_alg = crypto_find_alg(ghash_name, &crypto_ahash_type,
+                                   CRYPTO_ALG_TYPE_HASH,
+-                                  CRYPTO_ALG_TYPE_AHASH_MASK);
++                                  CRYPTO_ALG_TYPE_AHASH_MASK |
++                                  crypto_requires_sync(algt->type,
++                                                       algt->mask));
+       if (IS_ERR(ghash_alg))
+               return PTR_ERR(ghash_alg);
diff --git a/queue-4.6/crypto-scatterwalk-fix-test-in-scatterwalk_done.patch b/queue-4.6/crypto-scatterwalk-fix-test-in-scatterwalk_done.patch
new file mode 100644 (file)
index 0000000..d3bddb7
--- /dev/null
@@ -0,0 +1,39 @@
+From 5f070e81bee35f1b7bd1477bb223a873ff657803 Mon Sep 17 00:00:00 2001
+From: Herbert Xu <herbert@gondor.apana.org.au>
+Date: Tue, 12 Jul 2016 13:17:57 +0800
+Subject: crypto: scatterwalk - Fix test in scatterwalk_done
+
+From: Herbert Xu <herbert@gondor.apana.org.au>
+
+commit 5f070e81bee35f1b7bd1477bb223a873ff657803 upstream.
+
+When there is more data to be processed, the current test in
+scatterwalk_done may prevent us from calling pagedone even when
+we should.
+
+In particular, if we're on an SG entry spanning multiple pages
+where the last page is not a full page, we will incorrectly skip
+calling pagedone on the second last page.
+
+This patch fixes this by adding a separate test for whether we've
+reached the end of a page.
+
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ crypto/scatterwalk.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/crypto/scatterwalk.c
++++ b/crypto/scatterwalk.c
+@@ -72,7 +72,8 @@ static void scatterwalk_pagedone(struct
+ void scatterwalk_done(struct scatter_walk *walk, int out, int more)
+ {
+-      if (!(scatterwalk_pagelen(walk) & (PAGE_SIZE - 1)) || !more)
++      if (!more || walk->offset >= walk->sg->offset + walk->sg->length ||
++          !(walk->offset & (PAGE_SIZE - 1)))
+               scatterwalk_pagedone(walk, out, more);
+ }
+ EXPORT_SYMBOL_GPL(scatterwalk_done);
diff --git a/queue-4.6/ext4-check-for-extents-that-wrap-around.patch b/queue-4.6/ext4-check-for-extents-that-wrap-around.patch
new file mode 100644 (file)
index 0000000..bedcde1
--- /dev/null
@@ -0,0 +1,55 @@
+From f70749ca42943faa4d4dcce46dfdcaadb1d0c4b6 Mon Sep 17 00:00:00 2001
+From: Vegard Nossum <vegard.nossum@oracle.com>
+Date: Thu, 30 Jun 2016 11:53:46 -0400
+Subject: ext4: check for extents that wrap around
+
+From: Vegard Nossum <vegard.nossum@oracle.com>
+
+commit f70749ca42943faa4d4dcce46dfdcaadb1d0c4b6 upstream.
+
+An extent with lblock = 4294967295 and len = 1 will pass the
+ext4_valid_extent() test:
+
+       ext4_lblk_t last = lblock + len - 1;
+
+       if (len == 0 || lblock > last)
+               return 0;
+
+since last = 4294967295 + 1 - 1 = 4294967295. This would later trigger
+the BUG_ON(es->es_lblk + es->es_len < es->es_lblk) in ext4_es_end().
+
+We can simplify it by removing the - 1 altogether and changing the test
+to use lblock + len <= lblock, since now if len = 0, then lblock + 0 ==
+lblock and it fails, and if len > 0 then lblock + len > lblock in order
+to pass (i.e. it doesn't overflow).
+
+Fixes: 5946d0893 ("ext4: check for overlapping extents in ext4_valid_extent_entries()")
+Fixes: 2f974865f ("ext4: check for zero length extent explicitly")
+Cc: Eryu Guan <guaneryu@gmail.com>
+Signed-off-by: Phil Turnbull <phil.turnbull@oracle.com>
+Signed-off-by: Vegard Nossum <vegard.nossum@oracle.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/extents.c |    8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/fs/ext4/extents.c
++++ b/fs/ext4/extents.c
+@@ -376,9 +376,13 @@ static int ext4_valid_extent(struct inod
+       ext4_fsblk_t block = ext4_ext_pblock(ext);
+       int len = ext4_ext_get_actual_len(ext);
+       ext4_lblk_t lblock = le32_to_cpu(ext->ee_block);
+-      ext4_lblk_t last = lblock + len - 1;
+-      if (len == 0 || lblock > last)
++      /*
++       * We allow neither:
++       *  - zero length
++       *  - overflow/wrap-around
++       */
++      if (lblock + len <= lblock)
+               return 0;
+       return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
+ }
diff --git a/queue-4.6/ext4-don-t-call-ext4_should_journal_data-on-the-journal-inode.patch b/queue-4.6/ext4-don-t-call-ext4_should_journal_data-on-the-journal-inode.patch
new file mode 100644 (file)
index 0000000..c19617e
--- /dev/null
@@ -0,0 +1,44 @@
+From 6a7fd522a7c94cdef0a3b08acf8e6702056e635c Mon Sep 17 00:00:00 2001
+From: Vegard Nossum <vegard.nossum@oracle.com>
+Date: Mon, 4 Jul 2016 11:03:00 -0400
+Subject: ext4: don't call ext4_should_journal_data() on the journal inode
+
+From: Vegard Nossum <vegard.nossum@oracle.com>
+
+commit 6a7fd522a7c94cdef0a3b08acf8e6702056e635c upstream.
+
+If ext4_fill_super() fails early, it's possible for ext4_evict_inode()
+to call ext4_should_journal_data() before superblock options and flags
+are fully set up.  In that case, the iput() on the journal inode can
+end up causing a BUG().
+
+Work around this problem by reordering the tests so we only call
+ext4_should_journal_data() after we know it's not the journal inode.
+
+Fixes: 2d859db3e4 ("ext4: fix data corruption in inodes with journalled data")
+Fixes: 2b405bfa84 ("ext4: fix data=journal fast mount/umount hang")
+Cc: Jan Kara <jack@suse.cz>
+Signed-off-by: Vegard Nossum <vegard.nossum@oracle.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/inode.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -205,9 +205,9 @@ void ext4_evict_inode(struct inode *inod
+                * Note that directories do not have this problem because they
+                * don't use page cache.
+                */
+-              if (ext4_should_journal_data(inode) &&
+-                  (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) &&
+-                  inode->i_ino != EXT4_JOURNAL_INO) {
++              if (inode->i_ino != EXT4_JOURNAL_INO &&
++                  ext4_should_journal_data(inode) &&
++                  (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) {
+                       journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
+                       tid_t commit_tid = EXT4_I(inode)->i_datasync_tid;
diff --git a/queue-4.6/ext4-fix-deadlock-during-page-writeback.patch b/queue-4.6/ext4-fix-deadlock-during-page-writeback.patch
new file mode 100644 (file)
index 0000000..e5a681c
--- /dev/null
@@ -0,0 +1,78 @@
+From 646caa9c8e196880b41cd3e3d33a2ebc752bdb85 Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Mon, 4 Jul 2016 10:14:01 -0400
+Subject: ext4: fix deadlock during page writeback
+
+From: Jan Kara <jack@suse.cz>
+
+commit 646caa9c8e196880b41cd3e3d33a2ebc752bdb85 upstream.
+
+Commit 06bd3c36a733 (ext4: fix data exposure after a crash) uncovered a
+deadlock in ext4_writepages() which was previously much harder to hit.
+After this commit xfstest generic/130 reproduces the deadlock on small
+filesystems.
+
+The problem happens when ext4_do_update_inode() sets LARGE_FILE feature
+and marks current inode handle as synchronous. That subsequently results
+in ext4_journal_stop() called from ext4_writepages() to block waiting for
+transaction commit while still holding page locks, reference to io_end,
+and some prepared bio in mpd structure each of which can possibly block
+transaction commit from completing and thus results in deadlock.
+
+Fix the problem by releasing page locks, io_end reference, and
+submitting prepared bio before calling ext4_journal_stop().
+
+[ Changed to defer the call to ext4_journal_stop() only if the handle
+  is synchronous.  --tytso ]
+
+Reported-and-tested-by: Eryu Guan <eguan@redhat.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/inode.c |   29 ++++++++++++++++++++++++++---
+ 1 file changed, 26 insertions(+), 3 deletions(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -2741,13 +2741,36 @@ retry:
+                               done = true;
+                       }
+               }
+-              ext4_journal_stop(handle);
++              /*
++               * Caution: If the handle is synchronous,
++               * ext4_journal_stop() can wait for transaction commit
++               * to finish which may depend on writeback of pages to
++               * complete or on page lock to be released.  In that
++               * case, we have to wait until after after we have
++               * submitted all the IO, released page locks we hold,
++               * and dropped io_end reference (for extent conversion
++               * to be able to complete) before stopping the handle.
++               */
++              if (!ext4_handle_valid(handle) || handle->h_sync == 0) {
++                      ext4_journal_stop(handle);
++                      handle = NULL;
++              }
+               /* Submit prepared bio */
+               ext4_io_submit(&mpd.io_submit);
+               /* Unlock pages we didn't use */
+               mpage_release_unused_pages(&mpd, give_up_on_write);
+-              /* Drop our io_end reference we got from init */
+-              ext4_put_io_end(mpd.io_submit.io_end);
++              /*
++               * Drop our io_end reference we got from init. We have
++               * to be careful and use deferred io_end finishing if
++               * we are still holding the transaction as we can
++               * release the last reference to io_end which may end
++               * up doing unwritten extent conversion.
++               */
++              if (handle) {
++                      ext4_put_io_end_defer(mpd.io_submit.io_end);
++                      ext4_journal_stop(handle);
++              } else
++                      ext4_put_io_end(mpd.io_submit.io_end);
+               if (ret == -ENOSPC && sbi->s_journal) {
+                       /*
diff --git a/queue-4.6/ext4-short-cut-orphan-cleanup-on-error.patch b/queue-4.6/ext4-short-cut-orphan-cleanup-on-error.patch
new file mode 100644 (file)
index 0000000..79a1ff4
--- /dev/null
@@ -0,0 +1,60 @@
+From c65d5c6c81a1f27dec5f627f67840726fcd146de Mon Sep 17 00:00:00 2001
+From: Vegard Nossum <vegard.nossum@oracle.com>
+Date: Thu, 14 Jul 2016 23:21:35 -0400
+Subject: ext4: short-cut orphan cleanup on error
+
+From: Vegard Nossum <vegard.nossum@oracle.com>
+
+commit c65d5c6c81a1f27dec5f627f67840726fcd146de upstream.
+
+If we encounter a filesystem error during orphan cleanup, we should stop.
+Otherwise, we may end up in an infinite loop where the same inode is
+processed again and again.
+
+    EXT4-fs (loop0): warning: checktime reached, running e2fsck is recommended
+    EXT4-fs error (device loop0): ext4_mb_generate_buddy:758: group 2, block bitmap and bg descriptor inconsistent: 6117 vs 0 free clusters
+    Aborting journal on device loop0-8.
+    EXT4-fs (loop0): Remounting filesystem read-only
+    EXT4-fs error (device loop0) in ext4_free_blocks:4895: Journal has aborted
+    EXT4-fs error (device loop0) in ext4_do_update_inode:4893: Journal has aborted
+    EXT4-fs error (device loop0) in ext4_do_update_inode:4893: Journal has aborted
+    EXT4-fs error (device loop0) in ext4_ext_remove_space:3068: IO failure
+    EXT4-fs error (device loop0) in ext4_ext_truncate:4667: Journal has aborted
+    EXT4-fs error (device loop0) in ext4_orphan_del:2927: Journal has aborted
+    EXT4-fs error (device loop0) in ext4_do_update_inode:4893: Journal has aborted
+    EXT4-fs (loop0): Inode 16 (00000000618192a0): orphan list check failed!
+    [...]
+    EXT4-fs (loop0): Inode 16 (0000000061819748): orphan list check failed!
+    [...]
+    EXT4-fs (loop0): Inode 16 (0000000061819bf0): orphan list check failed!
+    [...]
+
+See-also: c9eb13a9105 ("ext4: fix hang when processing corrupted orphaned inode list")
+Cc: Jan Kara <jack@suse.cz>
+Signed-off-by: Vegard Nossum <vegard.nossum@oracle.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/super.c |   10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -2277,6 +2277,16 @@ static void ext4_orphan_cleanup(struct s
+       while (es->s_last_orphan) {
+               struct inode *inode;
++              /*
++               * We may have encountered an error during cleanup; if
++               * so, skip the rest.
++               */
++              if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
++                      jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
++                      es->s_last_orphan = 0;
++                      break;
++              }
++
+               inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
+               if (IS_ERR(inode)) {
+                       es->s_last_orphan = 0;
diff --git a/queue-4.6/ext4-validate-s_reserved_gdt_blocks-on-mount.patch b/queue-4.6/ext4-validate-s_reserved_gdt_blocks-on-mount.patch
new file mode 100644 (file)
index 0000000..ab5c2b6
--- /dev/null
@@ -0,0 +1,54 @@
+From 5b9554dc5bf008ae7f68a52e3d7e76c0920938a2 Mon Sep 17 00:00:00 2001
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Tue, 5 Jul 2016 20:01:52 -0400
+Subject: ext4: validate s_reserved_gdt_blocks on mount
+
+From: Theodore Ts'o <tytso@mit.edu>
+
+commit 5b9554dc5bf008ae7f68a52e3d7e76c0920938a2 upstream.
+
+If s_reserved_gdt_blocks is extremely large, it's possible for
+ext4_init_block_bitmap(), which is called when ext4 sets up an
+uninitialized block bitmap, to corrupt random kernel memory.  Add the
+same checks which e2fsck has --- it must never be larger than
+blocksize / sizeof(__u32) --- and then add a backup check in
+ext4_init_block_bitmap() in case the superblock gets modified after
+the file system is mounted.
+
+Reported-by: Vegard Nossum <vegard.nossum@oracle.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/balloc.c |    3 +++
+ fs/ext4/super.c  |    7 +++++++
+ 2 files changed, 10 insertions(+)
+
+--- a/fs/ext4/balloc.c
++++ b/fs/ext4/balloc.c
+@@ -208,6 +208,9 @@ static int ext4_init_block_bitmap(struct
+       memset(bh->b_data, 0, sb->s_blocksize);
+       bit_max = ext4_num_base_meta_clusters(sb, block_group);
++      if ((bit_max >> 3) >= bh->b_size)
++              return -EFSCORRUPTED;
++
+       for (bit = 0; bit < bit_max; bit++)
+               ext4_set_bit(bit, bh->b_data);
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -3415,6 +3415,13 @@ static int ext4_fill_super(struct super_
+               goto failed_mount;
+       }
++      if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (blocksize / 4)) {
++              ext4_msg(sb, KERN_ERR,
++                       "Number of reserved GDT blocks insanely large: %d",
++                       le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks));
++              goto failed_mount;
++      }
++
+       if (sbi->s_mount_opt & EXT4_MOUNT_DAX) {
+               if (blocksize != PAGE_SIZE) {
+                       ext4_msg(sb, KERN_ERR,
diff --git a/queue-4.6/fs-dcache.c-avoid-soft-lockup-in-dput.patch b/queue-4.6/fs-dcache.c-avoid-soft-lockup-in-dput.patch
new file mode 100644 (file)
index 0000000..215fc90
--- /dev/null
@@ -0,0 +1,69 @@
+From 47be61845c775643f1aa4d2a54343549f943c94c Mon Sep 17 00:00:00 2001
+From: Wei Fang <fangwei1@huawei.com>
+Date: Wed, 6 Jul 2016 11:32:20 +0800
+Subject: fs/dcache.c: avoid soft-lockup in dput()
+
+From: Wei Fang <fangwei1@huawei.com>
+
+commit 47be61845c775643f1aa4d2a54343549f943c94c upstream.
+
+We triggered soft-lockup under stress test which
+open/access/write/close one file concurrently on more than
+five different CPUs:
+
+WARN: soft lockup - CPU#0 stuck for 11s! [who:30631]
+...
+[<ffffffc0003986f8>] dput+0x100/0x298
+[<ffffffc00038c2dc>] terminate_walk+0x4c/0x60
+[<ffffffc00038f56c>] path_lookupat+0x5cc/0x7a8
+[<ffffffc00038f780>] filename_lookup+0x38/0xf0
+[<ffffffc000391180>] user_path_at_empty+0x78/0xd0
+[<ffffffc0003911f4>] user_path_at+0x1c/0x28
+[<ffffffc00037d4fc>] SyS_faccessat+0xb4/0x230
+
+->d_lock trylock may failed many times because of concurrently
+operations, and dput() may execute a long time.
+
+Fix this by replacing cpu_relax() with cond_resched().
+dput() used to be sleepable, so make it sleepable again
+should be safe.
+
+Signed-off-by: Wei Fang <fangwei1@huawei.com>
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/dcache.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/fs/dcache.c
++++ b/fs/dcache.c
+@@ -578,7 +578,6 @@ static struct dentry *dentry_kill(struct
+ failed:
+       spin_unlock(&dentry->d_lock);
+-      cpu_relax();
+       return dentry; /* try again with same dentry */
+ }
+@@ -752,6 +751,8 @@ void dput(struct dentry *dentry)
+               return;
+ repeat:
++      might_sleep();
++
+       rcu_read_lock();
+       if (likely(fast_dput(dentry))) {
+               rcu_read_unlock();
+@@ -783,8 +784,10 @@ repeat:
+ kill_it:
+       dentry = dentry_kill(dentry);
+-      if (dentry)
++      if (dentry) {
++              cond_resched();
+               goto repeat;
++      }
+ }
+ EXPORT_SYMBOL(dput);
diff --git a/queue-4.6/fuse-fix-wrong-assignment-of-flags-in-fuse_send_init.patch b/queue-4.6/fuse-fix-wrong-assignment-of-flags-in-fuse_send_init.patch
new file mode 100644 (file)
index 0000000..c968425
--- /dev/null
@@ -0,0 +1,31 @@
+From 9446385f05c9af25fed53dbed3cc75763730be52 Mon Sep 17 00:00:00 2001
+From: Wei Fang <fangwei1@huawei.com>
+Date: Mon, 25 Jul 2016 21:17:04 +0800
+Subject: fuse: fix wrong assignment of ->flags in fuse_send_init()
+
+From: Wei Fang <fangwei1@huawei.com>
+
+commit 9446385f05c9af25fed53dbed3cc75763730be52 upstream.
+
+FUSE_HAS_IOCTL_DIR should be assigned to ->flags, it may be a typo.
+
+Signed-off-by: Wei Fang <fangwei1@huawei.com>
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Fixes: 69fe05c90ed5 ("fuse: add missing INIT flags")
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/fuse/inode.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/fuse/inode.c
++++ b/fs/fuse/inode.c
+@@ -926,7 +926,7 @@ static void fuse_send_init(struct fuse_c
+       arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
+               FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
+               FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
+-              FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
++              FUSE_FLOCK_LOCKS | FUSE_HAS_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
+               FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
+               FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT;
+       req->in.h.opcode = FUSE_INIT;
diff --git a/queue-4.6/fuse-fsync-did-not-return-io-errors.patch b/queue-4.6/fuse-fsync-did-not-return-io-errors.patch
new file mode 100644 (file)
index 0000000..604b6cc
--- /dev/null
@@ -0,0 +1,46 @@
+From ac7f052b9e1534c8248f814b6f0068ad8d4a06d2 Mon Sep 17 00:00:00 2001
+From: Alexey Kuznetsov <kuznet@parallels.com>
+Date: Tue, 19 Jul 2016 12:48:01 -0700
+Subject: fuse: fsync() did not return IO errors
+
+From: Alexey Kuznetsov <kuznet@parallels.com>
+
+commit ac7f052b9e1534c8248f814b6f0068ad8d4a06d2 upstream.
+
+Due to implementation of fuse writeback filemap_write_and_wait_range() does
+not catch errors. We have to do this directly after fuse_sync_writes()
+
+Signed-off-by: Alexey Kuznetsov <kuznet@virtuozzo.com>
+Signed-off-by: Maxim Patlasov <mpatlasov@virtuozzo.com>
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Fixes: 4d99ff8f12eb ("fuse: Turn writeback cache on")
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/fuse/file.c |   15 +++++++++++++++
+ 1 file changed, 15 insertions(+)
+
+--- a/fs/fuse/file.c
++++ b/fs/fuse/file.c
+@@ -462,6 +462,21 @@ int fuse_fsync_common(struct file *file,
+               goto out;
+       fuse_sync_writes(inode);
++
++      /*
++       * Due to implementation of fuse writeback
++       * filemap_write_and_wait_range() does not catch errors.
++       * We have to do this directly after fuse_sync_writes()
++       */
++      if (test_bit(AS_ENOSPC, &file->f_mapping->flags) &&
++          test_and_clear_bit(AS_ENOSPC, &file->f_mapping->flags))
++              err = -ENOSPC;
++      if (test_bit(AS_EIO, &file->f_mapping->flags) &&
++          test_and_clear_bit(AS_EIO, &file->f_mapping->flags))
++              err = -EIO;
++      if (err)
++              goto out;
++
+       err = sync_inode_metadata(inode, 1);
+       if (err)
+               goto out;
diff --git a/queue-4.6/fuse-fuse_flush-must-check-mapping-flags-for-errors.patch b/queue-4.6/fuse-fuse_flush-must-check-mapping-flags-for-errors.patch
new file mode 100644 (file)
index 0000000..c36f7df
--- /dev/null
@@ -0,0 +1,41 @@
+From 9ebce595f63a407c5cec98f98f9da8459b73740a Mon Sep 17 00:00:00 2001
+From: Maxim Patlasov <mpatlasov@virtuozzo.com>
+Date: Tue, 19 Jul 2016 18:12:26 -0700
+Subject: fuse: fuse_flush must check mapping->flags for errors
+
+From: Maxim Patlasov <mpatlasov@virtuozzo.com>
+
+commit 9ebce595f63a407c5cec98f98f9da8459b73740a upstream.
+
+fuse_flush() calls write_inode_now() that triggers writeback, but actual
+writeback will happen later, on fuse_sync_writes(). If an error happens,
+fuse_writepage_end() will set error bit in mapping->flags. So, we have to
+check mapping->flags after fuse_sync_writes().
+
+Signed-off-by: Maxim Patlasov <mpatlasov@virtuozzo.com>
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Fixes: 4d99ff8f12eb ("fuse: Turn writeback cache on")
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/fuse/file.c |    9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+--- a/fs/fuse/file.c
++++ b/fs/fuse/file.c
+@@ -417,6 +417,15 @@ static int fuse_flush(struct file *file,
+       fuse_sync_writes(inode);
+       inode_unlock(inode);
++      if (test_bit(AS_ENOSPC, &file->f_mapping->flags) &&
++          test_and_clear_bit(AS_ENOSPC, &file->f_mapping->flags))
++              err = -ENOSPC;
++      if (test_bit(AS_EIO, &file->f_mapping->flags) &&
++          test_and_clear_bit(AS_EIO, &file->f_mapping->flags))
++              err = -EIO;
++      if (err)
++              return err;
++
+       req = fuse_get_req_nofail_nopages(fc, file);
+       memset(&inarg, 0, sizeof(inarg));
+       inarg.fh = ff->fh;
diff --git a/queue-4.6/radix-tree-account-nodes-to-memcg-only-if-explicitly-requested.patch b/queue-4.6/radix-tree-account-nodes-to-memcg-only-if-explicitly-requested.patch
new file mode 100644 (file)
index 0000000..380ceb2
--- /dev/null
@@ -0,0 +1,82 @@
+From 05eb6e7263185a6bb0de9501ccf2addc52429414 Mon Sep 17 00:00:00 2001
+From: Vladimir Davydov <vdavydov@virtuozzo.com>
+Date: Tue, 2 Aug 2016 14:03:01 -0700
+Subject: radix-tree: account nodes to memcg only if explicitly requested
+
+From: Vladimir Davydov <vdavydov@virtuozzo.com>
+
+commit 05eb6e7263185a6bb0de9501ccf2addc52429414 upstream.
+
+Radix trees may be used not only for storing page cache pages, so
+unconditionally accounting radix tree nodes to the current memory cgroup
+is bad: if a radix tree node is used for storing data shared among
+different cgroups we risk pinning dead memory cgroups forever.
+
+So let's only account radix tree nodes if it was explicitly requested by
+passing __GFP_ACCOUNT to INIT_RADIX_TREE.  Currently, we only want to
+account page cache entries, so mark mapping->page_tree so.
+
+Fixes: 58e698af4c63 ("radix-tree: account radix_tree_node to memory cgroup")
+Link: http://lkml.kernel.org/r/1470057188-7864-1-git-send-email-vdavydov@virtuozzo.com
+Signed-off-by: Vladimir Davydov <vdavydov@virtuozzo.com>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/inode.c       |    2 +-
+ lib/radix-tree.c |   14 ++++++++++----
+ 2 files changed, 11 insertions(+), 5 deletions(-)
+
+--- a/fs/inode.c
++++ b/fs/inode.c
+@@ -344,7 +344,7 @@ EXPORT_SYMBOL(inc_nlink);
+ void address_space_init_once(struct address_space *mapping)
+ {
+       memset(mapping, 0, sizeof(*mapping));
+-      INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
++      INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC | __GFP_ACCOUNT);
+       spin_lock_init(&mapping->tree_lock);
+       init_rwsem(&mapping->i_mmap_rwsem);
+       INIT_LIST_HEAD(&mapping->private_list);
+--- a/lib/radix-tree.c
++++ b/lib/radix-tree.c
+@@ -228,10 +228,11 @@ radix_tree_node_alloc(struct radix_tree_
+               /*
+                * Even if the caller has preloaded, try to allocate from the
+-               * cache first for the new node to get accounted.
++               * cache first for the new node to get accounted to the memory
++               * cgroup.
+                */
+               ret = kmem_cache_alloc(radix_tree_node_cachep,
+-                                     gfp_mask | __GFP_ACCOUNT | __GFP_NOWARN);
++                                     gfp_mask | __GFP_NOWARN);
+               if (ret)
+                       goto out;
+@@ -254,8 +255,7 @@ radix_tree_node_alloc(struct radix_tree_
+               kmemleak_update_trace(ret);
+               goto out;
+       }
+-      ret = kmem_cache_alloc(radix_tree_node_cachep,
+-                             gfp_mask | __GFP_ACCOUNT);
++      ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
+ out:
+       BUG_ON(radix_tree_is_indirect_ptr(ret));
+       return ret;
+@@ -302,6 +302,12 @@ static int __radix_tree_preload(gfp_t gf
+       struct radix_tree_node *node;
+       int ret = -ENOMEM;
++      /*
++       * Nodes preloaded by one cgroup can be be used by another cgroup, so
++       * they should never be accounted to any particular memory cgroup.
++       */
++      gfp_mask &= ~__GFP_ACCOUNT;
++
+       preempt_disable();
+       rtp = this_cpu_ptr(&radix_tree_preloads);
+       while (rtp->nr < RADIX_TREE_PRELOAD_SIZE) {
diff --git a/queue-4.6/revert-cpufreq-pcc-cpufreq-update-default-value-of-cpuinfo_transition_latency.patch b/queue-4.6/revert-cpufreq-pcc-cpufreq-update-default-value-of-cpuinfo_transition_latency.patch
new file mode 100644 (file)
index 0000000..b7aa91d
--- /dev/null
@@ -0,0 +1,72 @@
+From da7d3abe1c9e5ebac2cf86f97e9e89888a5e2094 Mon Sep 17 00:00:00 2001
+From: Andreas Herrmann <aherrmann@suse.com>
+Date: Fri, 22 Jul 2016 17:14:11 +0200
+Subject: Revert "cpufreq: pcc-cpufreq: update default value of cpuinfo_transition_latency"
+
+From: Andreas Herrmann <aherrmann@suse.com>
+
+commit da7d3abe1c9e5ebac2cf86f97e9e89888a5e2094 upstream.
+
+This reverts commit 790d849bf811a8ab5d4cd2cce0f6fda92f6aebf2.
+
+Using a v4.7-rc7 kernel on a HP ProLiant triggered following messages
+
+ pcc-cpufreq: (v1.10.00) driver loaded with frequency limits: 1200 MHz, 2800 MHz
+ cpufreq: ondemand governor failed, too long transition latency of HW, fallback to performance governor
+
+The last line was shown for each CPU in the system.
+Testing v4.5 (where commit 790d849b was integrated) triggered
+similar messages. Same behaviour on a 2nd HP Proliant system.
+
+So commit 790d849bf (cpufreq: pcc-cpufreq: update default value of
+cpuinfo_transition_latency) causes the system to use performance
+governor which, I guess, was not the intention of the patch.
+
+Enabling debug output in pcc-cpufreq provides following verbose output:
+
+ pcc-cpufreq: (v1.10.00) driver loaded with frequency limits: 1200 MHz, 2800 MHz
+ pcc_get_offset: for CPU 0: pcc_cpu_data input_offset: 0x44, pcc_cpu_data output_offset: 0x48
+ init: policy->max is 2800000, policy->min is 1200000
+ get: get_freq for CPU 0
+ get: SUCCESS: (virtual) output_offset for cpu 0 is 0xffffc9000d7c0048, contains a value of: 0xff06. Speed is: 168000 MHz
+ cpufreq: ondemand governor failed, too long transition latency of HW, fallback to performance governor
+ target: CPU 0 should go to target freq: 2800000 (virtual) input_offset is 0xffffc9000d7c0044
+ target: was SUCCESSFUL for cpu 0
+
+I am asking to revert 790d849bf to re-enable usage of ondemand
+governor with pcc-cpufreq.
+
+Fixes: 790d849bf (cpufreq: pcc-cpufreq: update default value of cpuinfo_transition_latency)
+Signed-off-by: Andreas Herrmann <aherrmann@suse.com>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ Documentation/cpu-freq/pcc-cpufreq.txt |    4 ++--
+ drivers/cpufreq/pcc-cpufreq.c          |    2 --
+ 2 files changed, 2 insertions(+), 4 deletions(-)
+
+--- a/Documentation/cpu-freq/pcc-cpufreq.txt
++++ b/Documentation/cpu-freq/pcc-cpufreq.txt
+@@ -159,8 +159,8 @@ to be strictly associated with a P-state
+ 2.2 cpuinfo_transition_latency:
+ -------------------------------
+-The cpuinfo_transition_latency field is CPUFREQ_ETERNAL. The PCC specification
+-does not include a field to expose this value currently.
++The cpuinfo_transition_latency field is 0. The PCC specification does
++not include a field to expose this value currently.
+ 2.3 cpuinfo_cur_freq:
+ ---------------------
+--- a/drivers/cpufreq/pcc-cpufreq.c
++++ b/drivers/cpufreq/pcc-cpufreq.c
+@@ -555,8 +555,6 @@ static int pcc_cpufreq_cpu_init(struct c
+       policy->min = policy->cpuinfo.min_freq =
+               ioread32(&pcch_hdr->minimum_frequency) * 1000;
+-      policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
+-
+       pr_debug("init: policy->max is %d, policy->min is %d\n",
+               policy->max, policy->min);
+ out:
diff --git a/queue-4.6/revert-mm-mempool-only-set-__gfp_nomemalloc-if-there-are-free-elements.patch b/queue-4.6/revert-mm-mempool-only-set-__gfp_nomemalloc-if-there-are-free-elements.patch
new file mode 100644 (file)
index 0000000..95f3c19
--- /dev/null
@@ -0,0 +1,143 @@
+From 4e390b2b2f34b8daaabf2df1df0cf8f798b87ddb Mon Sep 17 00:00:00 2001
+From: Michal Hocko <mhocko@suse.com>
+Date: Thu, 28 Jul 2016 15:48:44 -0700
+Subject: Revert "mm, mempool: only set __GFP_NOMEMALLOC if there are free elements"
+
+From: Michal Hocko <mhocko@suse.com>
+
+commit 4e390b2b2f34b8daaabf2df1df0cf8f798b87ddb upstream.
+
+This reverts commit f9054c70d28b ("mm, mempool: only set __GFP_NOMEMALLOC
+if there are free elements").
+
+There has been a report about OOM killer invoked when swapping out to a
+dm-crypt device.  The primary reason seems to be that the swapout out IO
+managed to completely deplete memory reserves.  Ondrej was able to
+bisect and explained the issue by pointing to f9054c70d28b ("mm,
+mempool: only set __GFP_NOMEMALLOC if there are free elements").
+
+The reason is that the swapout path is not throttled properly because
+the md-raid layer needs to allocate from the generic_make_request path
+which means it allocates from the PF_MEMALLOC context.  dm layer uses
+mempool_alloc in order to guarantee a forward progress which used to
+inhibit access to memory reserves when using page allocator.  This has
+changed by f9054c70d28b ("mm, mempool: only set __GFP_NOMEMALLOC if
+there are free elements") which has dropped the __GFP_NOMEMALLOC
+protection when the memory pool is depleted.
+
+If we are running out of memory and the only way forward to free memory
+is to perform swapout we just keep consuming memory reserves rather than
+throttling the mempool allocations and allowing the pending IO to
+complete up to a moment when the memory is depleted completely and there
+is no way forward but invoking the OOM killer.  This is less than
+optimal.
+
+The original intention of f9054c70d28b was to help with the OOM
+situations where the oom victim depends on mempool allocation to make a
+forward progress.  David has mentioned the following backtrace:
+
+  schedule
+  schedule_timeout
+  io_schedule_timeout
+  mempool_alloc
+  __split_and_process_bio
+  dm_request
+  generic_make_request
+  submit_bio
+  mpage_readpages
+  ext4_readpages
+  __do_page_cache_readahead
+  ra_submit
+  filemap_fault
+  handle_mm_fault
+  __do_page_fault
+  do_page_fault
+  page_fault
+
+We do not know more about why the mempool is depleted without being
+replenished in time, though.  In any case the dm layer shouldn't depend
+on any allocations outside of the dedicated pools so a forward progress
+should be guaranteed.  If this is not the case then the dm should be
+fixed rather than papering over the problem and postponing it to later
+by accessing more memory reserves.
+
+mempools are a mechanism to maintain dedicated memory reserves to
+guaratee forward progress.  Allowing them an unbounded access to the
+page allocator memory reserves is going against the whole purpose of
+this mechanism.
+
+Bisected by Ondrej Kozina.
+
+[akpm@linux-foundation.org: coding-style fixes]
+Link: http://lkml.kernel.org/r/20160721145309.GR26379@dhcp22.suse.cz
+Signed-off-by: Michal Hocko <mhocko@suse.com>
+Reported-by: Ondrej Kozina <okozina@redhat.com>
+Reviewed-by: Johannes Weiner <hannes@cmpxchg.org>
+Acked-by: NeilBrown <neilb@suse.com>
+Cc: David Rientjes <rientjes@google.com>
+Cc: Mikulas Patocka <mpatocka@redhat.com>
+Cc: Ondrej Kozina <okozina@redhat.com>
+Cc: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
+Cc: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/mempool.c |   18 +++---------------
+ 1 file changed, 3 insertions(+), 15 deletions(-)
+
+--- a/mm/mempool.c
++++ b/mm/mempool.c
+@@ -310,7 +310,7 @@ EXPORT_SYMBOL(mempool_resize);
+  * returns NULL. Note that due to preallocation, this function
+  * *never* fails when called from process contexts. (it might
+  * fail if called from an IRQ context.)
+- * Note: neither __GFP_NOMEMALLOC nor __GFP_ZERO are supported.
++ * Note: using __GFP_ZERO is not supported.
+  */
+ void *mempool_alloc(mempool_t *pool, gfp_t gfp_mask)
+ {
+@@ -319,27 +319,16 @@ void *mempool_alloc(mempool_t *pool, gfp
+       wait_queue_t wait;
+       gfp_t gfp_temp;
+-      /* If oom killed, memory reserves are essential to prevent livelock */
+-      VM_WARN_ON_ONCE(gfp_mask & __GFP_NOMEMALLOC);
+-      /* No element size to zero on allocation */
+       VM_WARN_ON_ONCE(gfp_mask & __GFP_ZERO);
+-
+       might_sleep_if(gfp_mask & __GFP_DIRECT_RECLAIM);
++      gfp_mask |= __GFP_NOMEMALLOC;   /* don't allocate emergency reserves */
+       gfp_mask |= __GFP_NORETRY;      /* don't loop in __alloc_pages */
+       gfp_mask |= __GFP_NOWARN;       /* failures are OK */
+       gfp_temp = gfp_mask & ~(__GFP_DIRECT_RECLAIM|__GFP_IO);
+ repeat_alloc:
+-      if (likely(pool->curr_nr)) {
+-              /*
+-               * Don't allocate from emergency reserves if there are
+-               * elements available.  This check is racy, but it will
+-               * be rechecked each loop.
+-               */
+-              gfp_temp |= __GFP_NOMEMALLOC;
+-      }
+       element = pool->alloc(gfp_temp, pool->pool_data);
+       if (likely(element != NULL))
+@@ -363,12 +352,11 @@ repeat_alloc:
+        * We use gfp mask w/o direct reclaim or IO for the first round.  If
+        * alloc failed with that and @pool was empty, retry immediately.
+        */
+-      if ((gfp_temp & ~__GFP_NOMEMALLOC) != gfp_mask) {
++      if (gfp_temp != gfp_mask) {
+               spin_unlock_irqrestore(&pool->lock, flags);
+               gfp_temp = gfp_mask;
+               goto repeat_alloc;
+       }
+-      gfp_temp = gfp_mask;
+       /* We must not sleep if !__GFP_DIRECT_RECLAIM */
+       if (!(gfp_mask & __GFP_DIRECT_RECLAIM)) {
diff --git a/queue-4.6/serial-mvebu-uart-free-the-irq-in-shutdown.patch b/queue-4.6/serial-mvebu-uart-free-the-irq-in-shutdown.patch
new file mode 100644 (file)
index 0000000..2f8a76a
--- /dev/null
@@ -0,0 +1,41 @@
+From c2c1659b4f8f9e19fe82a4fd06cca4b3d59090ce Mon Sep 17 00:00:00 2001
+From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+Date: Thu, 16 Jun 2016 16:48:52 +0200
+Subject: serial: mvebu-uart: free the IRQ in ->shutdown()
+
+From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+
+commit c2c1659b4f8f9e19fe82a4fd06cca4b3d59090ce upstream.
+
+As suggested by the serial port infrastructure documentation, the IRQ is
+requested in ->startup(). However, it is never freed in the ->shutdown()
+hook.
+
+With simple systems that open the serial port once for all and always
+have at least one process that keep the serial port opened, there was no
+problem. But with a more complicated system (*cough* systemd *cough*),
+the serial port is opened/closed many times, which at some point no
+processes having the serial port open at all. Due to this ->startup()
+gets called again, tries to request_irq() again, which fails.
+
+Fixes: 30530791a7a0 ("serial: mvebu-uart: initial support for Armada-3700 serial port")
+Cc: Ofer Heifetz <oferh@marvell.com>
+Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/tty/serial/mvebu-uart.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/tty/serial/mvebu-uart.c
++++ b/drivers/tty/serial/mvebu-uart.c
+@@ -299,6 +299,8 @@ static int mvebu_uart_startup(struct uar
+ static void mvebu_uart_shutdown(struct uart_port *port)
+ {
+       writel(0, port->membase + UART_CTRL);
++
++      free_irq(port->irq, port);
+ }
+ static void mvebu_uart_set_termios(struct uart_port *port,
index 3d674d8b372e0c8ec9931700e5b8db7ff9eb3f61..c23e64f14f2640e75b89d8e13ea15166139da0a1 100644 (file)
@@ -33,3 +33,23 @@ powerpc-eeh-fix-invalid-cached-pe-primary-bus.patch
 powerpc-bpf-jit-disable-classic-bpf-jit-on-ppc64le.patch
 mm-memcontrol-fix-swap-counter-leak-on-swapout-from-offline-cgroup.patch
 mm-memcontrol-fix-memcg-id-ref-counter-on-swap-charge-move.patch
+x86-syscalls-64-add-compat_sys_keyctl-for-32-bit-userspace.patch
+block-fix-use-after-free-in-seq-file.patch
+sysv-ipc-fix-security-layer-leaking.patch
+radix-tree-account-nodes-to-memcg-only-if-explicitly-requested.patch
+x86-microcode-fix-suspend-to-ram-with-builtin-microcode.patch
+x86-power-64-fix-hibernation-return-address-corruption.patch
+fuse-fsync-did-not-return-io-errors.patch
+fuse-fuse_flush-must-check-mapping-flags-for-errors.patch
+fuse-fix-wrong-assignment-of-flags-in-fuse_send_init.patch
+revert-mm-mempool-only-set-__gfp_nomemalloc-if-there-are-free-elements.patch
+fs-dcache.c-avoid-soft-lockup-in-dput.patch
+revert-cpufreq-pcc-cpufreq-update-default-value-of-cpuinfo_transition_latency.patch
+crypto-gcm-filter-out-async-ghash-if-necessary.patch
+crypto-scatterwalk-fix-test-in-scatterwalk_done.patch
+serial-mvebu-uart-free-the-irq-in-shutdown.patch
+ext4-check-for-extents-that-wrap-around.patch
+ext4-fix-deadlock-during-page-writeback.patch
+ext4-don-t-call-ext4_should_journal_data-on-the-journal-inode.patch
+ext4-validate-s_reserved_gdt_blocks-on-mount.patch
+ext4-short-cut-orphan-cleanup-on-error.patch
diff --git a/queue-4.6/sysv-ipc-fix-security-layer-leaking.patch b/queue-4.6/sysv-ipc-fix-security-layer-leaking.patch
new file mode 100644 (file)
index 0000000..dc05fde
--- /dev/null
@@ -0,0 +1,112 @@
+From 9b24fef9f0410fb5364245d6cc2bd044cc064007 Mon Sep 17 00:00:00 2001
+From: Fabian Frederick <fabf@skynet.be>
+Date: Tue, 2 Aug 2016 14:03:07 -0700
+Subject: sysv, ipc: fix security-layer leaking
+
+From: Fabian Frederick <fabf@skynet.be>
+
+commit 9b24fef9f0410fb5364245d6cc2bd044cc064007 upstream.
+
+Commit 53dad6d3a8e5 ("ipc: fix race with LSMs") updated ipc_rcu_putref()
+to receive rcu freeing function but used generic ipc_rcu_free() instead
+of msg_rcu_free() which does security cleaning.
+
+Running LTP msgsnd06 with kmemleak gives the following:
+
+  cat /sys/kernel/debug/kmemleak
+
+  unreferenced object 0xffff88003c0a11f8 (size 8):
+    comm "msgsnd06", pid 1645, jiffies 4294672526 (age 6.549s)
+    hex dump (first 8 bytes):
+      1b 00 00 00 01 00 00 00                          ........
+    backtrace:
+      kmemleak_alloc+0x23/0x40
+      kmem_cache_alloc_trace+0xe1/0x180
+      selinux_msg_queue_alloc_security+0x3f/0xd0
+      security_msg_queue_alloc+0x2e/0x40
+      newque+0x4e/0x150
+      ipcget+0x159/0x1b0
+      SyS_msgget+0x39/0x40
+      entry_SYSCALL_64_fastpath+0x13/0x8f
+
+Manfred Spraul suggested to fix sem.c as well and Davidlohr Bueso to
+only use ipc_rcu_free in case of security allocation failure in newary()
+
+Fixes: 53dad6d3a8e ("ipc: fix race with LSMs")
+Link: http://lkml.kernel.org/r/1470083552-22966-1-git-send-email-fabf@skynet.be
+Signed-off-by: Fabian Frederick <fabf@skynet.be>
+Cc: Davidlohr Bueso <dbueso@suse.de>
+Cc: Manfred Spraul <manfred@colorfullife.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ ipc/msg.c |    2 +-
+ ipc/sem.c |   12 ++++++------
+ 2 files changed, 7 insertions(+), 7 deletions(-)
+
+--- a/ipc/msg.c
++++ b/ipc/msg.c
+@@ -680,7 +680,7 @@ long do_msgsnd(int msqid, long mtype, vo
+               rcu_read_lock();
+               ipc_lock_object(&msq->q_perm);
+-              ipc_rcu_putref(msq, ipc_rcu_free);
++              ipc_rcu_putref(msq, msg_rcu_free);
+               /* raced with RMID? */
+               if (!ipc_valid_object(&msq->q_perm)) {
+                       err = -EIDRM;
+--- a/ipc/sem.c
++++ b/ipc/sem.c
+@@ -449,7 +449,7 @@ static inline struct sem_array *sem_obta
+ static inline void sem_lock_and_putref(struct sem_array *sma)
+ {
+       sem_lock(sma, NULL, -1);
+-      ipc_rcu_putref(sma, ipc_rcu_free);
++      ipc_rcu_putref(sma, sem_rcu_free);
+ }
+ static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
+@@ -1392,7 +1392,7 @@ static int semctl_main(struct ipc_namesp
+                       rcu_read_unlock();
+                       sem_io = ipc_alloc(sizeof(ushort)*nsems);
+                       if (sem_io == NULL) {
+-                              ipc_rcu_putref(sma, ipc_rcu_free);
++                              ipc_rcu_putref(sma, sem_rcu_free);
+                               return -ENOMEM;
+                       }
+@@ -1426,20 +1426,20 @@ static int semctl_main(struct ipc_namesp
+               if (nsems > SEMMSL_FAST) {
+                       sem_io = ipc_alloc(sizeof(ushort)*nsems);
+                       if (sem_io == NULL) {
+-                              ipc_rcu_putref(sma, ipc_rcu_free);
++                              ipc_rcu_putref(sma, sem_rcu_free);
+                               return -ENOMEM;
+                       }
+               }
+               if (copy_from_user(sem_io, p, nsems*sizeof(ushort))) {
+-                      ipc_rcu_putref(sma, ipc_rcu_free);
++                      ipc_rcu_putref(sma, sem_rcu_free);
+                       err = -EFAULT;
+                       goto out_free;
+               }
+               for (i = 0; i < nsems; i++) {
+                       if (sem_io[i] > SEMVMX) {
+-                              ipc_rcu_putref(sma, ipc_rcu_free);
++                              ipc_rcu_putref(sma, sem_rcu_free);
+                               err = -ERANGE;
+                               goto out_free;
+                       }
+@@ -1731,7 +1731,7 @@ static struct sem_undo *find_alloc_undo(
+       /* step 2: allocate new undo structure */
+       new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
+       if (!new) {
+-              ipc_rcu_putref(sma, ipc_rcu_free);
++              ipc_rcu_putref(sma, sem_rcu_free);
+               return ERR_PTR(-ENOMEM);
+       }
diff --git a/queue-4.6/x86-microcode-fix-suspend-to-ram-with-builtin-microcode.patch b/queue-4.6/x86-microcode-fix-suspend-to-ram-with-builtin-microcode.patch
new file mode 100644 (file)
index 0000000..fbd570e
--- /dev/null
@@ -0,0 +1,100 @@
+From 4b703305d98bf7350d4b2953ee39a3aa2eeb1778 Mon Sep 17 00:00:00 2001
+From: Borislav Petkov <bp@suse.de>
+Date: Mon, 6 Jun 2016 17:10:43 +0200
+Subject: x86/microcode: Fix suspend to RAM with builtin microcode
+
+From: Borislav Petkov <bp@suse.de>
+
+commit 4b703305d98bf7350d4b2953ee39a3aa2eeb1778 upstream.
+
+Usually, after we have found the proper microcode blob for the current
+machine, we stash it away for later use with save_microcode_in_initrd().
+
+However, with builtin microcode which doesn't come from the initrd, we
+don't call that function because CONFIG_BLK_DEV_INITRD=n and even if
+set, we don't have a valid initrd.
+
+In order to fix this, let's make save_microcode_in_initrd() an
+fs_initcall which runs before rootfs_initcall() as this was the time it
+was called previously through:
+
+ rootfs_initcall(populate_rootfs)
+ |-> free_initrd()
+     |-> free_initrd_mem()
+         |-> save_microcode_in_initrd()
+
+Also, we make it run independently from initrd functionality being
+present or not.
+
+And since it is called in the microcode loader only now, we can also
+make it static.
+
+Reported-and-tested-by: Jim Bos <jim876@xs4all.nl>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/1465225850-7352-3-git-send-email-bp@alien8.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/microcode.h     |    2 --
+ arch/x86/kernel/cpu/microcode/core.c |    3 ++-
+ arch/x86/mm/init.c                   |    7 -------
+ 3 files changed, 2 insertions(+), 10 deletions(-)
+
+--- a/arch/x86/include/asm/microcode.h
++++ b/arch/x86/include/asm/microcode.h
+@@ -133,13 +133,11 @@ static inline unsigned int x86_cpuid_fam
+ #ifdef CONFIG_MICROCODE
+ extern void __init load_ucode_bsp(void);
+ extern void load_ucode_ap(void);
+-extern int __init save_microcode_in_initrd(void);
+ void reload_early_microcode(void);
+ extern bool get_builtin_firmware(struct cpio_data *cd, const char *name);
+ #else
+ static inline void __init load_ucode_bsp(void)                        { }
+ static inline void load_ucode_ap(void)                                { }
+-static inline int __init save_microcode_in_initrd(void)               { return 0; }
+ static inline void reload_early_microcode(void)                       { }
+ static inline bool
+ get_builtin_firmware(struct cpio_data *cd, const char *name)  { return false; }
+--- a/arch/x86/kernel/cpu/microcode/core.c
++++ b/arch/x86/kernel/cpu/microcode/core.c
+@@ -175,7 +175,7 @@ void load_ucode_ap(void)
+       }
+ }
+-int __init save_microcode_in_initrd(void)
++static int __init save_microcode_in_initrd(void)
+ {
+       struct cpuinfo_x86 *c = &boot_cpu_data;
+@@ -691,4 +691,5 @@ int __init microcode_init(void)
+       return error;
+ }
++fs_initcall(save_microcode_in_initrd);
+ late_initcall(microcode_init);
+--- a/arch/x86/mm/init.c
++++ b/arch/x86/mm/init.c
+@@ -696,13 +696,6 @@ void free_initmem(void)
+ void __init free_initrd_mem(unsigned long start, unsigned long end)
+ {
+       /*
+-       * Remember, initrd memory may contain microcode or other useful things.
+-       * Before we lose initrd mem, we need to find a place to hold them
+-       * now that normal virtual memory is enabled.
+-       */
+-      save_microcode_in_initrd();
+-
+-      /*
+        * end could be not aligned, and We can not align that,
+        * decompresser could be confused by aligned initrd_end
+        * We already reserve the end partial page before in
diff --git a/queue-4.6/x86-power-64-fix-hibernation-return-address-corruption.patch b/queue-4.6/x86-power-64-fix-hibernation-return-address-corruption.patch
new file mode 100644 (file)
index 0000000..4fa4b7b
--- /dev/null
@@ -0,0 +1,101 @@
+From 4ce827b4cc58bec7952591b96cce2b28553e4d5b Mon Sep 17 00:00:00 2001
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Thu, 28 Jul 2016 23:15:21 +0200
+Subject: x86/power/64: Fix hibernation return address corruption
+
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+
+commit 4ce827b4cc58bec7952591b96cce2b28553e4d5b upstream.
+
+In kernel bug 150021, a kernel panic was reported when restoring a
+hibernate image.  Only a picture of the oops was reported, so I can't
+paste the whole thing here.  But here are the most interesting parts:
+
+  kernel tried to execute NX-protected page - exploit attempt? (uid: 0)
+  BUG: unable to handle kernel paging request at ffff8804615cfd78
+  ...
+  RIP: ffff8804615cfd78
+  RSP: ffff8804615f0000
+  RBP: ffff8804615cfdc0
+  ...
+  Call Trace:
+   do_signal+0x23
+   exit_to_usermode_loop+0x64
+   ...
+
+The RIP is on the same page as RBP, so it apparently started executing
+on the stack.
+
+The bug was bisected to commit ef0f3ed5a4ac (x86/asm/power: Create
+stack frames in hibernate_asm_64.S), which in retrospect seems quite
+dangerous, since that code saves and restores the stack pointer from a
+global variable ('saved_context').
+
+There are a lot of moving parts in the hibernate save and restore paths,
+so I don't know exactly what caused the panic.  Presumably, a FRAME_END
+was executed without the corresponding FRAME_BEGIN, or vice versa.  That
+would corrupt the return address on the stack and would be consistent
+with the details of the above panic.
+
+[ rjw: One major problem is that by the time the FRAME_BEGIN in
+  restore_registers() is executed, the stack pointer value may not
+  be valid any more.  Namely, the stack area pointed to by it
+  previously may have been overwritten by some image memory contents
+  and that page frame may now be used for whatever different purpose
+  it had been allocated for before hibernation.  In that case, the
+  FRAME_BEGIN will corrupt that memory. ]
+
+Instead of doing the frame pointer save/restore around the bounds of the
+affected functions, just do it around the call to swsusp_save().
+
+That has the same effect of ensuring that if swsusp_save() sleeps, the
+frame pointers will be correct.  It's also a much more obviously safe
+way to do it than the original patch.  And objtool still doesn't report
+any warnings.
+
+Fixes: ef0f3ed5a4ac (x86/asm/power: Create stack frames in hibernate_asm_64.S)
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=150021
+Reported-by: Andre Reinke <andre.reinke@mailbox.org>
+Tested-by: Andre Reinke <andre.reinke@mailbox.org>
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Acked-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/power/hibernate_asm_64.S |    4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+--- a/arch/x86/power/hibernate_asm_64.S
++++ b/arch/x86/power/hibernate_asm_64.S
+@@ -24,7 +24,6 @@
+ #include <asm/frame.h>
+ ENTRY(swsusp_arch_suspend)
+-      FRAME_BEGIN
+       movq    $saved_context, %rax
+       movq    %rsp, pt_regs_sp(%rax)
+       movq    %rbp, pt_regs_bp(%rax)
+@@ -51,6 +50,7 @@ ENTRY(swsusp_arch_suspend)
+       movq    %cr3, %rax
+       movq    %rax, restore_cr3(%rip)
++      FRAME_BEGIN
+       call swsusp_save
+       FRAME_END
+       ret
+@@ -111,7 +111,6 @@ ENTRY(core_restore_code)
+        */
+ ENTRY(restore_registers)
+-      FRAME_BEGIN
+       /* go back to the original page tables */
+       movq    %rbx, %cr3
+@@ -152,6 +151,5 @@ ENTRY(restore_registers)
+       /* tell the hibernation core that we've just restored the memory */
+       movq    %rax, in_suspend(%rip)
+-      FRAME_END
+       ret
+ ENDPROC(restore_registers)
diff --git a/queue-4.6/x86-syscalls-64-add-compat_sys_keyctl-for-32-bit-userspace.patch b/queue-4.6/x86-syscalls-64-add-compat_sys_keyctl-for-32-bit-userspace.patch
new file mode 100644 (file)
index 0000000..61849c3
--- /dev/null
@@ -0,0 +1,46 @@
+From f7d665627e103e82d34306c7d3f6f46f387c0d8b Mon Sep 17 00:00:00 2001
+From: David Howells <dhowells@redhat.com>
+Date: Wed, 27 Jul 2016 11:42:38 +0100
+Subject: x86/syscalls/64: Add compat_sys_keyctl for 32-bit userspace
+
+From: David Howells <dhowells@redhat.com>
+
+commit f7d665627e103e82d34306c7d3f6f46f387c0d8b upstream.
+
+x86_64 needs to use compat_sys_keyctl for 32-bit userspace rather than
+calling sys_keyctl(). The latter will work in a lot of cases, thereby
+hiding the issue.
+
+Reported-by: Stephan Mueller <smueller@chronox.de>
+Tested-by: Stephan Mueller <smueller@chronox.de>
+Signed-off-by: David Howells <dhowells@redhat.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: keyrings@vger.kernel.org
+Cc: linux-security-module@vger.kernel.org
+Link: http://lkml.kernel.org/r/146961615805.14395.5581949237156769439.stgit@warthog.procyon.org.uk
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/entry/syscalls/syscall_32.tbl |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/entry/syscalls/syscall_32.tbl
++++ b/arch/x86/entry/syscalls/syscall_32.tbl
+@@ -294,7 +294,7 @@
+ # 285 sys_setaltroot
+ 286   i386    add_key                 sys_add_key
+ 287   i386    request_key             sys_request_key
+-288   i386    keyctl                  sys_keyctl
++288   i386    keyctl                  sys_keyctl                      compat_sys_keyctl
+ 289   i386    ioprio_set              sys_ioprio_set
+ 290   i386    ioprio_get              sys_ioprio_get
+ 291   i386    inotify_init            sys_inotify_init