From: Greg Kroah-Hartman Date: Sun, 14 Aug 2016 18:38:30 +0000 (+0200) Subject: 4.7-stable patches X-Git-Tag: v3.14.76~9 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=d033d661747b08c1d56c325df972ed4f901dcaa7;p=thirdparty%2Fkernel%2Fstable-queue.git 4.7-stable patches added patches: block-fix-use-after-free-in-seq-file.patch crypto-gcm-filter-out-async-ghash-if-necessary.patch crypto-scatterwalk-fix-test-in-scatterwalk_done.patch ext4-check-for-extents-that-wrap-around.patch ext4-don-t-call-ext4_should_journal_data-on-the-journal-inode.patch ext4-fix-deadlock-during-page-writeback.patch ext4-short-cut-orphan-cleanup-on-error.patch ext4-validate-s_reserved_gdt_blocks-on-mount.patch fs-dcache.c-avoid-soft-lockup-in-dput.patch fuse-fix-wrong-assignment-of-flags-in-fuse_send_init.patch fuse-fsync-did-not-return-io-errors.patch fuse-fuse_flush-must-check-mapping-flags-for-errors.patch radix-tree-account-nodes-to-memcg-only-if-explicitly-requested.patch revert-cpufreq-pcc-cpufreq-update-default-value-of-cpuinfo_transition_latency.patch revert-mm-mempool-only-set-__gfp_nomemalloc-if-there-are-free-elements.patch serial-mvebu-uart-free-the-irq-in-shutdown.patch sysv-ipc-fix-security-layer-leaking.patch x86-microcode-fix-suspend-to-ram-with-builtin-microcode.patch x86-power-64-fix-hibernation-return-address-corruption.patch x86-syscalls-64-add-compat_sys_keyctl-for-32-bit-userspace.patch --- diff --git a/queue-4.7/block-fix-use-after-free-in-seq-file.patch b/queue-4.7/block-fix-use-after-free-in-seq-file.patch new file mode 100644 index 00000000000..32e915bbc73 --- /dev/null +++ b/queue-4.7/block-fix-use-after-free-in-seq-file.patch @@ -0,0 +1,112 @@ +From 77da160530dd1dc94f6ae15a981f24e5f0021e84 Mon Sep 17 00:00:00 2001 +From: Vegard Nossum +Date: Fri, 29 Jul 2016 10:40:31 +0200 +Subject: block: fix use-after-free in seq file + +From: Vegard Nossum + +commit 77da160530dd1dc94f6ae15a981f24e5f0021e84 upstream. + +I got a KASAN report of use-after-free: + + ================================================================== + BUG: KASAN: use-after-free in klist_iter_exit+0x61/0x70 at addr ffff8800b6581508 + Read of size 8 by task trinity-c1/315 + ============================================================================= + BUG kmalloc-32 (Not tainted): kasan: bad access detected + ----------------------------------------------------------------------------- + + Disabling lock debugging due to kernel taint + INFO: Allocated in disk_seqf_start+0x66/0x110 age=144 cpu=1 pid=315 + ___slab_alloc+0x4f1/0x520 + __slab_alloc.isra.58+0x56/0x80 + kmem_cache_alloc_trace+0x260/0x2a0 + disk_seqf_start+0x66/0x110 + traverse+0x176/0x860 + seq_read+0x7e3/0x11a0 + proc_reg_read+0xbc/0x180 + do_loop_readv_writev+0x134/0x210 + do_readv_writev+0x565/0x660 + vfs_readv+0x67/0xa0 + do_preadv+0x126/0x170 + SyS_preadv+0xc/0x10 + do_syscall_64+0x1a1/0x460 + return_from_SYSCALL_64+0x0/0x6a + INFO: Freed in disk_seqf_stop+0x42/0x50 age=160 cpu=1 pid=315 + __slab_free+0x17a/0x2c0 + kfree+0x20a/0x220 + disk_seqf_stop+0x42/0x50 + traverse+0x3b5/0x860 + seq_read+0x7e3/0x11a0 + proc_reg_read+0xbc/0x180 + do_loop_readv_writev+0x134/0x210 + do_readv_writev+0x565/0x660 + vfs_readv+0x67/0xa0 + do_preadv+0x126/0x170 + SyS_preadv+0xc/0x10 + do_syscall_64+0x1a1/0x460 + return_from_SYSCALL_64+0x0/0x6a + + CPU: 1 PID: 315 Comm: trinity-c1 Tainted: G B 4.7.0+ #62 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 + ffffea0002d96000 ffff880119b9f918 ffffffff81d6ce81 ffff88011a804480 + ffff8800b6581500 ffff880119b9f948 ffffffff8146c7bd ffff88011a804480 + ffffea0002d96000 ffff8800b6581500 fffffffffffffff4 ffff880119b9f970 + Call Trace: + [] dump_stack+0x65/0x84 + [] print_trailer+0x10d/0x1a0 + [] object_err+0x2f/0x40 + [] kasan_report_error+0x221/0x520 + [] __asan_report_load8_noabort+0x3e/0x40 + [] klist_iter_exit+0x61/0x70 + [] class_dev_iter_exit+0x9/0x10 + [] disk_seqf_stop+0x3a/0x50 + [] seq_read+0x4b2/0x11a0 + [] proc_reg_read+0xbc/0x180 + [] do_loop_readv_writev+0x134/0x210 + [] do_readv_writev+0x565/0x660 + [] vfs_readv+0x67/0xa0 + [] do_preadv+0x126/0x170 + [] SyS_preadv+0xc/0x10 + +This problem can occur in the following situation: + +open() + - pread() + - .seq_start() + - iter = kmalloc() // succeeds + - seqf->private = iter + - .seq_stop() + - kfree(seqf->private) + - pread() + - .seq_start() + - iter = kmalloc() // fails + - .seq_stop() + - class_dev_iter_exit(seqf->private) // boom! old pointer + +As the comment in disk_seqf_stop() says, stop is called even if start +failed, so we need to reinitialise the private pointer to NULL when seq +iteration stops. + +An alternative would be to set the private pointer to NULL when the +kmalloc() in disk_seqf_start() fails. + +Signed-off-by: Vegard Nossum +Acked-by: Tejun Heo +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman + +--- + block/genhd.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/block/genhd.c ++++ b/block/genhd.c +@@ -856,6 +856,7 @@ static void disk_seqf_stop(struct seq_fi + if (iter) { + class_dev_iter_exit(iter); + kfree(iter); ++ seqf->private = NULL; + } + } + diff --git a/queue-4.7/crypto-gcm-filter-out-async-ghash-if-necessary.patch b/queue-4.7/crypto-gcm-filter-out-async-ghash-if-necessary.patch new file mode 100644 index 00000000000..1221720de0a --- /dev/null +++ b/queue-4.7/crypto-gcm-filter-out-async-ghash-if-necessary.patch @@ -0,0 +1,36 @@ +From b30bdfa86431afbafe15284a3ad5ac19b49b88e3 Mon Sep 17 00:00:00 2001 +From: Herbert Xu +Date: Wed, 15 Jun 2016 22:27:05 +0800 +Subject: crypto: gcm - Filter out async ghash if necessary + +From: Herbert Xu + +commit b30bdfa86431afbafe15284a3ad5ac19b49b88e3 upstream. + +As it is if you ask for a sync gcm you may actually end up with +an async one because it does not filter out async implementations +of ghash. + +This patch fixes this by adding the necessary filter when looking +for ghash. + +Signed-off-by: Herbert Xu +Signed-off-by: Greg Kroah-Hartman + +--- + crypto/gcm.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/crypto/gcm.c ++++ b/crypto/gcm.c +@@ -639,7 +639,9 @@ static int crypto_gcm_create_common(stru + + ghash_alg = crypto_find_alg(ghash_name, &crypto_ahash_type, + CRYPTO_ALG_TYPE_HASH, +- CRYPTO_ALG_TYPE_AHASH_MASK); ++ CRYPTO_ALG_TYPE_AHASH_MASK | ++ crypto_requires_sync(algt->type, ++ algt->mask)); + if (IS_ERR(ghash_alg)) + return PTR_ERR(ghash_alg); + diff --git a/queue-4.7/crypto-scatterwalk-fix-test-in-scatterwalk_done.patch b/queue-4.7/crypto-scatterwalk-fix-test-in-scatterwalk_done.patch new file mode 100644 index 00000000000..d3bddb76515 --- /dev/null +++ b/queue-4.7/crypto-scatterwalk-fix-test-in-scatterwalk_done.patch @@ -0,0 +1,39 @@ +From 5f070e81bee35f1b7bd1477bb223a873ff657803 Mon Sep 17 00:00:00 2001 +From: Herbert Xu +Date: Tue, 12 Jul 2016 13:17:57 +0800 +Subject: crypto: scatterwalk - Fix test in scatterwalk_done + +From: Herbert Xu + +commit 5f070e81bee35f1b7bd1477bb223a873ff657803 upstream. + +When there is more data to be processed, the current test in +scatterwalk_done may prevent us from calling pagedone even when +we should. + +In particular, if we're on an SG entry spanning multiple pages +where the last page is not a full page, we will incorrectly skip +calling pagedone on the second last page. + +This patch fixes this by adding a separate test for whether we've +reached the end of a page. + +Signed-off-by: Herbert Xu +Signed-off-by: Greg Kroah-Hartman + +--- + crypto/scatterwalk.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/crypto/scatterwalk.c ++++ b/crypto/scatterwalk.c +@@ -72,7 +72,8 @@ static void scatterwalk_pagedone(struct + + void scatterwalk_done(struct scatter_walk *walk, int out, int more) + { +- if (!(scatterwalk_pagelen(walk) & (PAGE_SIZE - 1)) || !more) ++ if (!more || walk->offset >= walk->sg->offset + walk->sg->length || ++ !(walk->offset & (PAGE_SIZE - 1))) + scatterwalk_pagedone(walk, out, more); + } + EXPORT_SYMBOL_GPL(scatterwalk_done); diff --git a/queue-4.7/ext4-check-for-extents-that-wrap-around.patch b/queue-4.7/ext4-check-for-extents-that-wrap-around.patch new file mode 100644 index 00000000000..ee26b7d5a1c --- /dev/null +++ b/queue-4.7/ext4-check-for-extents-that-wrap-around.patch @@ -0,0 +1,55 @@ +From f70749ca42943faa4d4dcce46dfdcaadb1d0c4b6 Mon Sep 17 00:00:00 2001 +From: Vegard Nossum +Date: Thu, 30 Jun 2016 11:53:46 -0400 +Subject: ext4: check for extents that wrap around + +From: Vegard Nossum + +commit f70749ca42943faa4d4dcce46dfdcaadb1d0c4b6 upstream. + +An extent with lblock = 4294967295 and len = 1 will pass the +ext4_valid_extent() test: + + ext4_lblk_t last = lblock + len - 1; + + if (len == 0 || lblock > last) + return 0; + +since last = 4294967295 + 1 - 1 = 4294967295. This would later trigger +the BUG_ON(es->es_lblk + es->es_len < es->es_lblk) in ext4_es_end(). + +We can simplify it by removing the - 1 altogether and changing the test +to use lblock + len <= lblock, since now if len = 0, then lblock + 0 == +lblock and it fails, and if len > 0 then lblock + len > lblock in order +to pass (i.e. it doesn't overflow). + +Fixes: 5946d0893 ("ext4: check for overlapping extents in ext4_valid_extent_entries()") +Fixes: 2f974865f ("ext4: check for zero length extent explicitly") +Cc: Eryu Guan +Signed-off-by: Phil Turnbull +Signed-off-by: Vegard Nossum +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/extents.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/fs/ext4/extents.c ++++ b/fs/ext4/extents.c +@@ -381,9 +381,13 @@ static int ext4_valid_extent(struct inod + ext4_fsblk_t block = ext4_ext_pblock(ext); + int len = ext4_ext_get_actual_len(ext); + ext4_lblk_t lblock = le32_to_cpu(ext->ee_block); +- ext4_lblk_t last = lblock + len - 1; + +- if (len == 0 || lblock > last) ++ /* ++ * We allow neither: ++ * - zero length ++ * - overflow/wrap-around ++ */ ++ if (lblock + len <= lblock) + return 0; + return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); + } diff --git a/queue-4.7/ext4-don-t-call-ext4_should_journal_data-on-the-journal-inode.patch b/queue-4.7/ext4-don-t-call-ext4_should_journal_data-on-the-journal-inode.patch new file mode 100644 index 00000000000..c19617e26d4 --- /dev/null +++ b/queue-4.7/ext4-don-t-call-ext4_should_journal_data-on-the-journal-inode.patch @@ -0,0 +1,44 @@ +From 6a7fd522a7c94cdef0a3b08acf8e6702056e635c Mon Sep 17 00:00:00 2001 +From: Vegard Nossum +Date: Mon, 4 Jul 2016 11:03:00 -0400 +Subject: ext4: don't call ext4_should_journal_data() on the journal inode + +From: Vegard Nossum + +commit 6a7fd522a7c94cdef0a3b08acf8e6702056e635c upstream. + +If ext4_fill_super() fails early, it's possible for ext4_evict_inode() +to call ext4_should_journal_data() before superblock options and flags +are fully set up. In that case, the iput() on the journal inode can +end up causing a BUG(). + +Work around this problem by reordering the tests so we only call +ext4_should_journal_data() after we know it's not the journal inode. + +Fixes: 2d859db3e4 ("ext4: fix data corruption in inodes with journalled data") +Fixes: 2b405bfa84 ("ext4: fix data=journal fast mount/umount hang") +Cc: Jan Kara +Signed-off-by: Vegard Nossum +Signed-off-by: Theodore Ts'o +Reviewed-by: Jan Kara +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/inode.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -205,9 +205,9 @@ void ext4_evict_inode(struct inode *inod + * Note that directories do not have this problem because they + * don't use page cache. + */ +- if (ext4_should_journal_data(inode) && +- (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) && +- inode->i_ino != EXT4_JOURNAL_INO) { ++ if (inode->i_ino != EXT4_JOURNAL_INO && ++ ext4_should_journal_data(inode) && ++ (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) { + journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; + tid_t commit_tid = EXT4_I(inode)->i_datasync_tid; + diff --git a/queue-4.7/ext4-fix-deadlock-during-page-writeback.patch b/queue-4.7/ext4-fix-deadlock-during-page-writeback.patch new file mode 100644 index 00000000000..48db2d72938 --- /dev/null +++ b/queue-4.7/ext4-fix-deadlock-during-page-writeback.patch @@ -0,0 +1,78 @@ +From 646caa9c8e196880b41cd3e3d33a2ebc752bdb85 Mon Sep 17 00:00:00 2001 +From: Jan Kara +Date: Mon, 4 Jul 2016 10:14:01 -0400 +Subject: ext4: fix deadlock during page writeback + +From: Jan Kara + +commit 646caa9c8e196880b41cd3e3d33a2ebc752bdb85 upstream. + +Commit 06bd3c36a733 (ext4: fix data exposure after a crash) uncovered a +deadlock in ext4_writepages() which was previously much harder to hit. +After this commit xfstest generic/130 reproduces the deadlock on small +filesystems. + +The problem happens when ext4_do_update_inode() sets LARGE_FILE feature +and marks current inode handle as synchronous. That subsequently results +in ext4_journal_stop() called from ext4_writepages() to block waiting for +transaction commit while still holding page locks, reference to io_end, +and some prepared bio in mpd structure each of which can possibly block +transaction commit from completing and thus results in deadlock. + +Fix the problem by releasing page locks, io_end reference, and +submitting prepared bio before calling ext4_journal_stop(). + +[ Changed to defer the call to ext4_journal_stop() only if the handle + is synchronous. --tytso ] + +Reported-and-tested-by: Eryu Guan +Signed-off-by: Theodore Ts'o +Signed-off-by: Jan Kara +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/inode.c | 29 ++++++++++++++++++++++++++--- + 1 file changed, 26 insertions(+), 3 deletions(-) + +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -2748,13 +2748,36 @@ retry: + done = true; + } + } +- ext4_journal_stop(handle); ++ /* ++ * Caution: If the handle is synchronous, ++ * ext4_journal_stop() can wait for transaction commit ++ * to finish which may depend on writeback of pages to ++ * complete or on page lock to be released. In that ++ * case, we have to wait until after after we have ++ * submitted all the IO, released page locks we hold, ++ * and dropped io_end reference (for extent conversion ++ * to be able to complete) before stopping the handle. ++ */ ++ if (!ext4_handle_valid(handle) || handle->h_sync == 0) { ++ ext4_journal_stop(handle); ++ handle = NULL; ++ } + /* Submit prepared bio */ + ext4_io_submit(&mpd.io_submit); + /* Unlock pages we didn't use */ + mpage_release_unused_pages(&mpd, give_up_on_write); +- /* Drop our io_end reference we got from init */ +- ext4_put_io_end(mpd.io_submit.io_end); ++ /* ++ * Drop our io_end reference we got from init. We have ++ * to be careful and use deferred io_end finishing if ++ * we are still holding the transaction as we can ++ * release the last reference to io_end which may end ++ * up doing unwritten extent conversion. ++ */ ++ if (handle) { ++ ext4_put_io_end_defer(mpd.io_submit.io_end); ++ ext4_journal_stop(handle); ++ } else ++ ext4_put_io_end(mpd.io_submit.io_end); + + if (ret == -ENOSPC && sbi->s_journal) { + /* diff --git a/queue-4.7/ext4-short-cut-orphan-cleanup-on-error.patch b/queue-4.7/ext4-short-cut-orphan-cleanup-on-error.patch new file mode 100644 index 00000000000..bfaa383d1de --- /dev/null +++ b/queue-4.7/ext4-short-cut-orphan-cleanup-on-error.patch @@ -0,0 +1,60 @@ +From c65d5c6c81a1f27dec5f627f67840726fcd146de Mon Sep 17 00:00:00 2001 +From: Vegard Nossum +Date: Thu, 14 Jul 2016 23:21:35 -0400 +Subject: ext4: short-cut orphan cleanup on error + +From: Vegard Nossum + +commit c65d5c6c81a1f27dec5f627f67840726fcd146de upstream. + +If we encounter a filesystem error during orphan cleanup, we should stop. +Otherwise, we may end up in an infinite loop where the same inode is +processed again and again. + + EXT4-fs (loop0): warning: checktime reached, running e2fsck is recommended + EXT4-fs error (device loop0): ext4_mb_generate_buddy:758: group 2, block bitmap and bg descriptor inconsistent: 6117 vs 0 free clusters + Aborting journal on device loop0-8. + EXT4-fs (loop0): Remounting filesystem read-only + EXT4-fs error (device loop0) in ext4_free_blocks:4895: Journal has aborted + EXT4-fs error (device loop0) in ext4_do_update_inode:4893: Journal has aborted + EXT4-fs error (device loop0) in ext4_do_update_inode:4893: Journal has aborted + EXT4-fs error (device loop0) in ext4_ext_remove_space:3068: IO failure + EXT4-fs error (device loop0) in ext4_ext_truncate:4667: Journal has aborted + EXT4-fs error (device loop0) in ext4_orphan_del:2927: Journal has aborted + EXT4-fs error (device loop0) in ext4_do_update_inode:4893: Journal has aborted + EXT4-fs (loop0): Inode 16 (00000000618192a0): orphan list check failed! + [...] + EXT4-fs (loop0): Inode 16 (0000000061819748): orphan list check failed! + [...] + EXT4-fs (loop0): Inode 16 (0000000061819bf0): orphan list check failed! + [...] + +See-also: c9eb13a9105 ("ext4: fix hang when processing corrupted orphaned inode list") +Cc: Jan Kara +Signed-off-by: Vegard Nossum +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/super.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -2278,6 +2278,16 @@ static void ext4_orphan_cleanup(struct s + while (es->s_last_orphan) { + struct inode *inode; + ++ /* ++ * We may have encountered an error during cleanup; if ++ * so, skip the rest. ++ */ ++ if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { ++ jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); ++ es->s_last_orphan = 0; ++ break; ++ } ++ + inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan)); + if (IS_ERR(inode)) { + es->s_last_orphan = 0; diff --git a/queue-4.7/ext4-validate-s_reserved_gdt_blocks-on-mount.patch b/queue-4.7/ext4-validate-s_reserved_gdt_blocks-on-mount.patch new file mode 100644 index 00000000000..09cd8d2b956 --- /dev/null +++ b/queue-4.7/ext4-validate-s_reserved_gdt_blocks-on-mount.patch @@ -0,0 +1,54 @@ +From 5b9554dc5bf008ae7f68a52e3d7e76c0920938a2 Mon Sep 17 00:00:00 2001 +From: Theodore Ts'o +Date: Tue, 5 Jul 2016 20:01:52 -0400 +Subject: ext4: validate s_reserved_gdt_blocks on mount + +From: Theodore Ts'o + +commit 5b9554dc5bf008ae7f68a52e3d7e76c0920938a2 upstream. + +If s_reserved_gdt_blocks is extremely large, it's possible for +ext4_init_block_bitmap(), which is called when ext4 sets up an +uninitialized block bitmap, to corrupt random kernel memory. Add the +same checks which e2fsck has --- it must never be larger than +blocksize / sizeof(__u32) --- and then add a backup check in +ext4_init_block_bitmap() in case the superblock gets modified after +the file system is mounted. + +Reported-by: Vegard Nossum +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/balloc.c | 3 +++ + fs/ext4/super.c | 7 +++++++ + 2 files changed, 10 insertions(+) + +--- a/fs/ext4/balloc.c ++++ b/fs/ext4/balloc.c +@@ -208,6 +208,9 @@ static int ext4_init_block_bitmap(struct + memset(bh->b_data, 0, sb->s_blocksize); + + bit_max = ext4_num_base_meta_clusters(sb, block_group); ++ if ((bit_max >> 3) >= bh->b_size) ++ return -EFSCORRUPTED; ++ + for (bit = 0; bit < bit_max; bit++) + ext4_set_bit(bit, bh->b_data); + +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -3416,6 +3416,13 @@ static int ext4_fill_super(struct super_ + goto failed_mount; + } + ++ if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (blocksize / 4)) { ++ ext4_msg(sb, KERN_ERR, ++ "Number of reserved GDT blocks insanely large: %d", ++ le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks)); ++ goto failed_mount; ++ } ++ + if (sbi->s_mount_opt & EXT4_MOUNT_DAX) { + err = bdev_dax_supported(sb, blocksize); + if (err) diff --git a/queue-4.7/fs-dcache.c-avoid-soft-lockup-in-dput.patch b/queue-4.7/fs-dcache.c-avoid-soft-lockup-in-dput.patch new file mode 100644 index 00000000000..5a67d63d0f1 --- /dev/null +++ b/queue-4.7/fs-dcache.c-avoid-soft-lockup-in-dput.patch @@ -0,0 +1,69 @@ +From 47be61845c775643f1aa4d2a54343549f943c94c Mon Sep 17 00:00:00 2001 +From: Wei Fang +Date: Wed, 6 Jul 2016 11:32:20 +0800 +Subject: fs/dcache.c: avoid soft-lockup in dput() + +From: Wei Fang + +commit 47be61845c775643f1aa4d2a54343549f943c94c upstream. + +We triggered soft-lockup under stress test which +open/access/write/close one file concurrently on more than +five different CPUs: + +WARN: soft lockup - CPU#0 stuck for 11s! [who:30631] +... +[] dput+0x100/0x298 +[] terminate_walk+0x4c/0x60 +[] path_lookupat+0x5cc/0x7a8 +[] filename_lookup+0x38/0xf0 +[] user_path_at_empty+0x78/0xd0 +[] user_path_at+0x1c/0x28 +[] SyS_faccessat+0xb4/0x230 + +->d_lock trylock may failed many times because of concurrently +operations, and dput() may execute a long time. + +Fix this by replacing cpu_relax() with cond_resched(). +dput() used to be sleepable, so make it sleepable again +should be safe. + +Signed-off-by: Wei Fang +Signed-off-by: Al Viro +Signed-off-by: Greg Kroah-Hartman + +--- + fs/dcache.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/fs/dcache.c ++++ b/fs/dcache.c +@@ -622,7 +622,6 @@ static struct dentry *dentry_kill(struct + + failed: + spin_unlock(&dentry->d_lock); +- cpu_relax(); + return dentry; /* try again with same dentry */ + } + +@@ -796,6 +795,8 @@ void dput(struct dentry *dentry) + return; + + repeat: ++ might_sleep(); ++ + rcu_read_lock(); + if (likely(fast_dput(dentry))) { + rcu_read_unlock(); +@@ -829,8 +830,10 @@ repeat: + + kill_it: + dentry = dentry_kill(dentry); +- if (dentry) ++ if (dentry) { ++ cond_resched(); + goto repeat; ++ } + } + EXPORT_SYMBOL(dput); + diff --git a/queue-4.7/fuse-fix-wrong-assignment-of-flags-in-fuse_send_init.patch b/queue-4.7/fuse-fix-wrong-assignment-of-flags-in-fuse_send_init.patch new file mode 100644 index 00000000000..167d9a0988c --- /dev/null +++ b/queue-4.7/fuse-fix-wrong-assignment-of-flags-in-fuse_send_init.patch @@ -0,0 +1,31 @@ +From 9446385f05c9af25fed53dbed3cc75763730be52 Mon Sep 17 00:00:00 2001 +From: Wei Fang +Date: Mon, 25 Jul 2016 21:17:04 +0800 +Subject: fuse: fix wrong assignment of ->flags in fuse_send_init() + +From: Wei Fang + +commit 9446385f05c9af25fed53dbed3cc75763730be52 upstream. + +FUSE_HAS_IOCTL_DIR should be assigned to ->flags, it may be a typo. + +Signed-off-by: Wei Fang +Signed-off-by: Miklos Szeredi +Fixes: 69fe05c90ed5 ("fuse: add missing INIT flags") +Signed-off-by: Greg Kroah-Hartman + +--- + fs/fuse/inode.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/fuse/inode.c ++++ b/fs/fuse/inode.c +@@ -942,7 +942,7 @@ static void fuse_send_init(struct fuse_c + arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | + FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | + FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | +- FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA | ++ FUSE_FLOCK_LOCKS | FUSE_HAS_IOCTL_DIR | FUSE_AUTO_INVAL_DATA | + FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO | + FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT | + FUSE_PARALLEL_DIROPS; diff --git a/queue-4.7/fuse-fsync-did-not-return-io-errors.patch b/queue-4.7/fuse-fsync-did-not-return-io-errors.patch new file mode 100644 index 00000000000..604b6cc30df --- /dev/null +++ b/queue-4.7/fuse-fsync-did-not-return-io-errors.patch @@ -0,0 +1,46 @@ +From ac7f052b9e1534c8248f814b6f0068ad8d4a06d2 Mon Sep 17 00:00:00 2001 +From: Alexey Kuznetsov +Date: Tue, 19 Jul 2016 12:48:01 -0700 +Subject: fuse: fsync() did not return IO errors + +From: Alexey Kuznetsov + +commit ac7f052b9e1534c8248f814b6f0068ad8d4a06d2 upstream. + +Due to implementation of fuse writeback filemap_write_and_wait_range() does +not catch errors. We have to do this directly after fuse_sync_writes() + +Signed-off-by: Alexey Kuznetsov +Signed-off-by: Maxim Patlasov +Signed-off-by: Miklos Szeredi +Fixes: 4d99ff8f12eb ("fuse: Turn writeback cache on") +Signed-off-by: Greg Kroah-Hartman + +--- + fs/fuse/file.c | 15 +++++++++++++++ + 1 file changed, 15 insertions(+) + +--- a/fs/fuse/file.c ++++ b/fs/fuse/file.c +@@ -462,6 +462,21 @@ int fuse_fsync_common(struct file *file, + goto out; + + fuse_sync_writes(inode); ++ ++ /* ++ * Due to implementation of fuse writeback ++ * filemap_write_and_wait_range() does not catch errors. ++ * We have to do this directly after fuse_sync_writes() ++ */ ++ if (test_bit(AS_ENOSPC, &file->f_mapping->flags) && ++ test_and_clear_bit(AS_ENOSPC, &file->f_mapping->flags)) ++ err = -ENOSPC; ++ if (test_bit(AS_EIO, &file->f_mapping->flags) && ++ test_and_clear_bit(AS_EIO, &file->f_mapping->flags)) ++ err = -EIO; ++ if (err) ++ goto out; ++ + err = sync_inode_metadata(inode, 1); + if (err) + goto out; diff --git a/queue-4.7/fuse-fuse_flush-must-check-mapping-flags-for-errors.patch b/queue-4.7/fuse-fuse_flush-must-check-mapping-flags-for-errors.patch new file mode 100644 index 00000000000..c36f7df82e2 --- /dev/null +++ b/queue-4.7/fuse-fuse_flush-must-check-mapping-flags-for-errors.patch @@ -0,0 +1,41 @@ +From 9ebce595f63a407c5cec98f98f9da8459b73740a Mon Sep 17 00:00:00 2001 +From: Maxim Patlasov +Date: Tue, 19 Jul 2016 18:12:26 -0700 +Subject: fuse: fuse_flush must check mapping->flags for errors + +From: Maxim Patlasov + +commit 9ebce595f63a407c5cec98f98f9da8459b73740a upstream. + +fuse_flush() calls write_inode_now() that triggers writeback, but actual +writeback will happen later, on fuse_sync_writes(). If an error happens, +fuse_writepage_end() will set error bit in mapping->flags. So, we have to +check mapping->flags after fuse_sync_writes(). + +Signed-off-by: Maxim Patlasov +Signed-off-by: Miklos Szeredi +Fixes: 4d99ff8f12eb ("fuse: Turn writeback cache on") +Signed-off-by: Greg Kroah-Hartman + +--- + fs/fuse/file.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +--- a/fs/fuse/file.c ++++ b/fs/fuse/file.c +@@ -417,6 +417,15 @@ static int fuse_flush(struct file *file, + fuse_sync_writes(inode); + inode_unlock(inode); + ++ if (test_bit(AS_ENOSPC, &file->f_mapping->flags) && ++ test_and_clear_bit(AS_ENOSPC, &file->f_mapping->flags)) ++ err = -ENOSPC; ++ if (test_bit(AS_EIO, &file->f_mapping->flags) && ++ test_and_clear_bit(AS_EIO, &file->f_mapping->flags)) ++ err = -EIO; ++ if (err) ++ return err; ++ + req = fuse_get_req_nofail_nopages(fc, file); + memset(&inarg, 0, sizeof(inarg)); + inarg.fh = ff->fh; diff --git a/queue-4.7/radix-tree-account-nodes-to-memcg-only-if-explicitly-requested.patch b/queue-4.7/radix-tree-account-nodes-to-memcg-only-if-explicitly-requested.patch new file mode 100644 index 00000000000..efaa8782500 --- /dev/null +++ b/queue-4.7/radix-tree-account-nodes-to-memcg-only-if-explicitly-requested.patch @@ -0,0 +1,82 @@ +From 05eb6e7263185a6bb0de9501ccf2addc52429414 Mon Sep 17 00:00:00 2001 +From: Vladimir Davydov +Date: Tue, 2 Aug 2016 14:03:01 -0700 +Subject: radix-tree: account nodes to memcg only if explicitly requested + +From: Vladimir Davydov + +commit 05eb6e7263185a6bb0de9501ccf2addc52429414 upstream. + +Radix trees may be used not only for storing page cache pages, so +unconditionally accounting radix tree nodes to the current memory cgroup +is bad: if a radix tree node is used for storing data shared among +different cgroups we risk pinning dead memory cgroups forever. + +So let's only account radix tree nodes if it was explicitly requested by +passing __GFP_ACCOUNT to INIT_RADIX_TREE. Currently, we only want to +account page cache entries, so mark mapping->page_tree so. + +Fixes: 58e698af4c63 ("radix-tree: account radix_tree_node to memory cgroup") +Link: http://lkml.kernel.org/r/1470057188-7864-1-git-send-email-vdavydov@virtuozzo.com +Signed-off-by: Vladimir Davydov +Acked-by: Johannes Weiner +Acked-by: Michal Hocko +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/inode.c | 2 +- + lib/radix-tree.c | 14 ++++++++++---- + 2 files changed, 11 insertions(+), 5 deletions(-) + +--- a/fs/inode.c ++++ b/fs/inode.c +@@ -345,7 +345,7 @@ EXPORT_SYMBOL(inc_nlink); + void address_space_init_once(struct address_space *mapping) + { + memset(mapping, 0, sizeof(*mapping)); +- INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC); ++ INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC | __GFP_ACCOUNT); + spin_lock_init(&mapping->tree_lock); + init_rwsem(&mapping->i_mmap_rwsem); + INIT_LIST_HEAD(&mapping->private_list); +--- a/lib/radix-tree.c ++++ b/lib/radix-tree.c +@@ -274,10 +274,11 @@ radix_tree_node_alloc(struct radix_tree_ + + /* + * Even if the caller has preloaded, try to allocate from the +- * cache first for the new node to get accounted. ++ * cache first for the new node to get accounted to the memory ++ * cgroup. + */ + ret = kmem_cache_alloc(radix_tree_node_cachep, +- gfp_mask | __GFP_ACCOUNT | __GFP_NOWARN); ++ gfp_mask | __GFP_NOWARN); + if (ret) + goto out; + +@@ -300,8 +301,7 @@ radix_tree_node_alloc(struct radix_tree_ + kmemleak_update_trace(ret); + goto out; + } +- ret = kmem_cache_alloc(radix_tree_node_cachep, +- gfp_mask | __GFP_ACCOUNT); ++ ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); + out: + BUG_ON(radix_tree_is_internal_node(ret)); + return ret; +@@ -348,6 +348,12 @@ static int __radix_tree_preload(gfp_t gf + struct radix_tree_node *node; + int ret = -ENOMEM; + ++ /* ++ * Nodes preloaded by one cgroup can be be used by another cgroup, so ++ * they should never be accounted to any particular memory cgroup. ++ */ ++ gfp_mask &= ~__GFP_ACCOUNT; ++ + preempt_disable(); + rtp = this_cpu_ptr(&radix_tree_preloads); + while (rtp->nr < RADIX_TREE_PRELOAD_SIZE) { diff --git a/queue-4.7/revert-cpufreq-pcc-cpufreq-update-default-value-of-cpuinfo_transition_latency.patch b/queue-4.7/revert-cpufreq-pcc-cpufreq-update-default-value-of-cpuinfo_transition_latency.patch new file mode 100644 index 00000000000..b7aa91deef9 --- /dev/null +++ b/queue-4.7/revert-cpufreq-pcc-cpufreq-update-default-value-of-cpuinfo_transition_latency.patch @@ -0,0 +1,72 @@ +From da7d3abe1c9e5ebac2cf86f97e9e89888a5e2094 Mon Sep 17 00:00:00 2001 +From: Andreas Herrmann +Date: Fri, 22 Jul 2016 17:14:11 +0200 +Subject: Revert "cpufreq: pcc-cpufreq: update default value of cpuinfo_transition_latency" + +From: Andreas Herrmann + +commit da7d3abe1c9e5ebac2cf86f97e9e89888a5e2094 upstream. + +This reverts commit 790d849bf811a8ab5d4cd2cce0f6fda92f6aebf2. + +Using a v4.7-rc7 kernel on a HP ProLiant triggered following messages + + pcc-cpufreq: (v1.10.00) driver loaded with frequency limits: 1200 MHz, 2800 MHz + cpufreq: ondemand governor failed, too long transition latency of HW, fallback to performance governor + +The last line was shown for each CPU in the system. +Testing v4.5 (where commit 790d849b was integrated) triggered +similar messages. Same behaviour on a 2nd HP Proliant system. + +So commit 790d849bf (cpufreq: pcc-cpufreq: update default value of +cpuinfo_transition_latency) causes the system to use performance +governor which, I guess, was not the intention of the patch. + +Enabling debug output in pcc-cpufreq provides following verbose output: + + pcc-cpufreq: (v1.10.00) driver loaded with frequency limits: 1200 MHz, 2800 MHz + pcc_get_offset: for CPU 0: pcc_cpu_data input_offset: 0x44, pcc_cpu_data output_offset: 0x48 + init: policy->max is 2800000, policy->min is 1200000 + get: get_freq for CPU 0 + get: SUCCESS: (virtual) output_offset for cpu 0 is 0xffffc9000d7c0048, contains a value of: 0xff06. Speed is: 168000 MHz + cpufreq: ondemand governor failed, too long transition latency of HW, fallback to performance governor + target: CPU 0 should go to target freq: 2800000 (virtual) input_offset is 0xffffc9000d7c0044 + target: was SUCCESSFUL for cpu 0 + +I am asking to revert 790d849bf to re-enable usage of ondemand +governor with pcc-cpufreq. + +Fixes: 790d849bf (cpufreq: pcc-cpufreq: update default value of cpuinfo_transition_latency) +Signed-off-by: Andreas Herrmann +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman + +--- + Documentation/cpu-freq/pcc-cpufreq.txt | 4 ++-- + drivers/cpufreq/pcc-cpufreq.c | 2 -- + 2 files changed, 2 insertions(+), 4 deletions(-) + +--- a/Documentation/cpu-freq/pcc-cpufreq.txt ++++ b/Documentation/cpu-freq/pcc-cpufreq.txt +@@ -159,8 +159,8 @@ to be strictly associated with a P-state + + 2.2 cpuinfo_transition_latency: + ------------------------------- +-The cpuinfo_transition_latency field is CPUFREQ_ETERNAL. The PCC specification +-does not include a field to expose this value currently. ++The cpuinfo_transition_latency field is 0. The PCC specification does ++not include a field to expose this value currently. + + 2.3 cpuinfo_cur_freq: + --------------------- +--- a/drivers/cpufreq/pcc-cpufreq.c ++++ b/drivers/cpufreq/pcc-cpufreq.c +@@ -555,8 +555,6 @@ static int pcc_cpufreq_cpu_init(struct c + policy->min = policy->cpuinfo.min_freq = + ioread32(&pcch_hdr->minimum_frequency) * 1000; + +- policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; +- + pr_debug("init: policy->max is %d, policy->min is %d\n", + policy->max, policy->min); + out: diff --git a/queue-4.7/revert-mm-mempool-only-set-__gfp_nomemalloc-if-there-are-free-elements.patch b/queue-4.7/revert-mm-mempool-only-set-__gfp_nomemalloc-if-there-are-free-elements.patch new file mode 100644 index 00000000000..d7dca4b13f8 --- /dev/null +++ b/queue-4.7/revert-mm-mempool-only-set-__gfp_nomemalloc-if-there-are-free-elements.patch @@ -0,0 +1,143 @@ +From 4e390b2b2f34b8daaabf2df1df0cf8f798b87ddb Mon Sep 17 00:00:00 2001 +From: Michal Hocko +Date: Thu, 28 Jul 2016 15:48:44 -0700 +Subject: Revert "mm, mempool: only set __GFP_NOMEMALLOC if there are free elements" + +From: Michal Hocko + +commit 4e390b2b2f34b8daaabf2df1df0cf8f798b87ddb upstream. + +This reverts commit f9054c70d28b ("mm, mempool: only set __GFP_NOMEMALLOC +if there are free elements"). + +There has been a report about OOM killer invoked when swapping out to a +dm-crypt device. The primary reason seems to be that the swapout out IO +managed to completely deplete memory reserves. Ondrej was able to +bisect and explained the issue by pointing to f9054c70d28b ("mm, +mempool: only set __GFP_NOMEMALLOC if there are free elements"). + +The reason is that the swapout path is not throttled properly because +the md-raid layer needs to allocate from the generic_make_request path +which means it allocates from the PF_MEMALLOC context. dm layer uses +mempool_alloc in order to guarantee a forward progress which used to +inhibit access to memory reserves when using page allocator. This has +changed by f9054c70d28b ("mm, mempool: only set __GFP_NOMEMALLOC if +there are free elements") which has dropped the __GFP_NOMEMALLOC +protection when the memory pool is depleted. + +If we are running out of memory and the only way forward to free memory +is to perform swapout we just keep consuming memory reserves rather than +throttling the mempool allocations and allowing the pending IO to +complete up to a moment when the memory is depleted completely and there +is no way forward but invoking the OOM killer. This is less than +optimal. + +The original intention of f9054c70d28b was to help with the OOM +situations where the oom victim depends on mempool allocation to make a +forward progress. David has mentioned the following backtrace: + + schedule + schedule_timeout + io_schedule_timeout + mempool_alloc + __split_and_process_bio + dm_request + generic_make_request + submit_bio + mpage_readpages + ext4_readpages + __do_page_cache_readahead + ra_submit + filemap_fault + handle_mm_fault + __do_page_fault + do_page_fault + page_fault + +We do not know more about why the mempool is depleted without being +replenished in time, though. In any case the dm layer shouldn't depend +on any allocations outside of the dedicated pools so a forward progress +should be guaranteed. If this is not the case then the dm should be +fixed rather than papering over the problem and postponing it to later +by accessing more memory reserves. + +mempools are a mechanism to maintain dedicated memory reserves to +guaratee forward progress. Allowing them an unbounded access to the +page allocator memory reserves is going against the whole purpose of +this mechanism. + +Bisected by Ondrej Kozina. + +[akpm@linux-foundation.org: coding-style fixes] +Link: http://lkml.kernel.org/r/20160721145309.GR26379@dhcp22.suse.cz +Signed-off-by: Michal Hocko +Reported-by: Ondrej Kozina +Reviewed-by: Johannes Weiner +Acked-by: NeilBrown +Cc: David Rientjes +Cc: Mikulas Patocka +Cc: Ondrej Kozina +Cc: Tetsuo Handa +Cc: Mel Gorman +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/mempool.c | 18 +++--------------- + 1 file changed, 3 insertions(+), 15 deletions(-) + +--- a/mm/mempool.c ++++ b/mm/mempool.c +@@ -306,7 +306,7 @@ EXPORT_SYMBOL(mempool_resize); + * returns NULL. Note that due to preallocation, this function + * *never* fails when called from process contexts. (it might + * fail if called from an IRQ context.) +- * Note: neither __GFP_NOMEMALLOC nor __GFP_ZERO are supported. ++ * Note: using __GFP_ZERO is not supported. + */ + void *mempool_alloc(mempool_t *pool, gfp_t gfp_mask) + { +@@ -315,27 +315,16 @@ void *mempool_alloc(mempool_t *pool, gfp + wait_queue_t wait; + gfp_t gfp_temp; + +- /* If oom killed, memory reserves are essential to prevent livelock */ +- VM_WARN_ON_ONCE(gfp_mask & __GFP_NOMEMALLOC); +- /* No element size to zero on allocation */ + VM_WARN_ON_ONCE(gfp_mask & __GFP_ZERO); +- + might_sleep_if(gfp_mask & __GFP_DIRECT_RECLAIM); + ++ gfp_mask |= __GFP_NOMEMALLOC; /* don't allocate emergency reserves */ + gfp_mask |= __GFP_NORETRY; /* don't loop in __alloc_pages */ + gfp_mask |= __GFP_NOWARN; /* failures are OK */ + + gfp_temp = gfp_mask & ~(__GFP_DIRECT_RECLAIM|__GFP_IO); + + repeat_alloc: +- if (likely(pool->curr_nr)) { +- /* +- * Don't allocate from emergency reserves if there are +- * elements available. This check is racy, but it will +- * be rechecked each loop. +- */ +- gfp_temp |= __GFP_NOMEMALLOC; +- } + + element = pool->alloc(gfp_temp, pool->pool_data); + if (likely(element != NULL)) +@@ -359,12 +348,11 @@ repeat_alloc: + * We use gfp mask w/o direct reclaim or IO for the first round. If + * alloc failed with that and @pool was empty, retry immediately. + */ +- if ((gfp_temp & ~__GFP_NOMEMALLOC) != gfp_mask) { ++ if (gfp_temp != gfp_mask) { + spin_unlock_irqrestore(&pool->lock, flags); + gfp_temp = gfp_mask; + goto repeat_alloc; + } +- gfp_temp = gfp_mask; + + /* We must not sleep if !__GFP_DIRECT_RECLAIM */ + if (!(gfp_mask & __GFP_DIRECT_RECLAIM)) { diff --git a/queue-4.7/serial-mvebu-uart-free-the-irq-in-shutdown.patch b/queue-4.7/serial-mvebu-uart-free-the-irq-in-shutdown.patch new file mode 100644 index 00000000000..cf0b757cf0e --- /dev/null +++ b/queue-4.7/serial-mvebu-uart-free-the-irq-in-shutdown.patch @@ -0,0 +1,41 @@ +From c2c1659b4f8f9e19fe82a4fd06cca4b3d59090ce Mon Sep 17 00:00:00 2001 +From: Thomas Petazzoni +Date: Thu, 16 Jun 2016 16:48:52 +0200 +Subject: serial: mvebu-uart: free the IRQ in ->shutdown() + +From: Thomas Petazzoni + +commit c2c1659b4f8f9e19fe82a4fd06cca4b3d59090ce upstream. + +As suggested by the serial port infrastructure documentation, the IRQ is +requested in ->startup(). However, it is never freed in the ->shutdown() +hook. + +With simple systems that open the serial port once for all and always +have at least one process that keep the serial port opened, there was no +problem. But with a more complicated system (*cough* systemd *cough*), +the serial port is opened/closed many times, which at some point no +processes having the serial port open at all. Due to this ->startup() +gets called again, tries to request_irq() again, which fails. + +Fixes: 30530791a7a0 ("serial: mvebu-uart: initial support for Armada-3700 serial port") +Cc: Ofer Heifetz +Signed-off-by: Thomas Petazzoni +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/tty/serial/mvebu-uart.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/tty/serial/mvebu-uart.c ++++ b/drivers/tty/serial/mvebu-uart.c +@@ -300,6 +300,8 @@ static int mvebu_uart_startup(struct uar + static void mvebu_uart_shutdown(struct uart_port *port) + { + writel(0, port->membase + UART_CTRL); ++ ++ free_irq(port->irq, port); + } + + static void mvebu_uart_set_termios(struct uart_port *port, diff --git a/queue-4.7/series b/queue-4.7/series index f5cf8820bb0..0aec8ce477b 100644 --- a/queue-4.7/series +++ b/queue-4.7/series @@ -18,3 +18,23 @@ apparmor-fix-ref-count-leak-when-profile-sha1-hash-is-read.patch random-strengthen-input-validation-for-rndaddtoentcnt.patch mm-memcontrol-fix-swap-counter-leak-on-swapout-from-offline-cgroup.patch mm-memcontrol-fix-memcg-id-ref-counter-on-swap-charge-move.patch +x86-syscalls-64-add-compat_sys_keyctl-for-32-bit-userspace.patch +block-fix-use-after-free-in-seq-file.patch +sysv-ipc-fix-security-layer-leaking.patch +radix-tree-account-nodes-to-memcg-only-if-explicitly-requested.patch +x86-microcode-fix-suspend-to-ram-with-builtin-microcode.patch +x86-power-64-fix-hibernation-return-address-corruption.patch +fuse-fsync-did-not-return-io-errors.patch +fuse-fuse_flush-must-check-mapping-flags-for-errors.patch +fuse-fix-wrong-assignment-of-flags-in-fuse_send_init.patch +revert-mm-mempool-only-set-__gfp_nomemalloc-if-there-are-free-elements.patch +fs-dcache.c-avoid-soft-lockup-in-dput.patch +revert-cpufreq-pcc-cpufreq-update-default-value-of-cpuinfo_transition_latency.patch +crypto-gcm-filter-out-async-ghash-if-necessary.patch +crypto-scatterwalk-fix-test-in-scatterwalk_done.patch +serial-mvebu-uart-free-the-irq-in-shutdown.patch +ext4-check-for-extents-that-wrap-around.patch +ext4-fix-deadlock-during-page-writeback.patch +ext4-don-t-call-ext4_should_journal_data-on-the-journal-inode.patch +ext4-validate-s_reserved_gdt_blocks-on-mount.patch +ext4-short-cut-orphan-cleanup-on-error.patch diff --git a/queue-4.7/sysv-ipc-fix-security-layer-leaking.patch b/queue-4.7/sysv-ipc-fix-security-layer-leaking.patch new file mode 100644 index 00000000000..dc05fde1ede --- /dev/null +++ b/queue-4.7/sysv-ipc-fix-security-layer-leaking.patch @@ -0,0 +1,112 @@ +From 9b24fef9f0410fb5364245d6cc2bd044cc064007 Mon Sep 17 00:00:00 2001 +From: Fabian Frederick +Date: Tue, 2 Aug 2016 14:03:07 -0700 +Subject: sysv, ipc: fix security-layer leaking + +From: Fabian Frederick + +commit 9b24fef9f0410fb5364245d6cc2bd044cc064007 upstream. + +Commit 53dad6d3a8e5 ("ipc: fix race with LSMs") updated ipc_rcu_putref() +to receive rcu freeing function but used generic ipc_rcu_free() instead +of msg_rcu_free() which does security cleaning. + +Running LTP msgsnd06 with kmemleak gives the following: + + cat /sys/kernel/debug/kmemleak + + unreferenced object 0xffff88003c0a11f8 (size 8): + comm "msgsnd06", pid 1645, jiffies 4294672526 (age 6.549s) + hex dump (first 8 bytes): + 1b 00 00 00 01 00 00 00 ........ + backtrace: + kmemleak_alloc+0x23/0x40 + kmem_cache_alloc_trace+0xe1/0x180 + selinux_msg_queue_alloc_security+0x3f/0xd0 + security_msg_queue_alloc+0x2e/0x40 + newque+0x4e/0x150 + ipcget+0x159/0x1b0 + SyS_msgget+0x39/0x40 + entry_SYSCALL_64_fastpath+0x13/0x8f + +Manfred Spraul suggested to fix sem.c as well and Davidlohr Bueso to +only use ipc_rcu_free in case of security allocation failure in newary() + +Fixes: 53dad6d3a8e ("ipc: fix race with LSMs") +Link: http://lkml.kernel.org/r/1470083552-22966-1-git-send-email-fabf@skynet.be +Signed-off-by: Fabian Frederick +Cc: Davidlohr Bueso +Cc: Manfred Spraul +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + ipc/msg.c | 2 +- + ipc/sem.c | 12 ++++++------ + 2 files changed, 7 insertions(+), 7 deletions(-) + +--- a/ipc/msg.c ++++ b/ipc/msg.c +@@ -680,7 +680,7 @@ long do_msgsnd(int msqid, long mtype, vo + rcu_read_lock(); + ipc_lock_object(&msq->q_perm); + +- ipc_rcu_putref(msq, ipc_rcu_free); ++ ipc_rcu_putref(msq, msg_rcu_free); + /* raced with RMID? */ + if (!ipc_valid_object(&msq->q_perm)) { + err = -EIDRM; +--- a/ipc/sem.c ++++ b/ipc/sem.c +@@ -449,7 +449,7 @@ static inline struct sem_array *sem_obta + static inline void sem_lock_and_putref(struct sem_array *sma) + { + sem_lock(sma, NULL, -1); +- ipc_rcu_putref(sma, ipc_rcu_free); ++ ipc_rcu_putref(sma, sem_rcu_free); + } + + static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) +@@ -1392,7 +1392,7 @@ static int semctl_main(struct ipc_namesp + rcu_read_unlock(); + sem_io = ipc_alloc(sizeof(ushort)*nsems); + if (sem_io == NULL) { +- ipc_rcu_putref(sma, ipc_rcu_free); ++ ipc_rcu_putref(sma, sem_rcu_free); + return -ENOMEM; + } + +@@ -1426,20 +1426,20 @@ static int semctl_main(struct ipc_namesp + if (nsems > SEMMSL_FAST) { + sem_io = ipc_alloc(sizeof(ushort)*nsems); + if (sem_io == NULL) { +- ipc_rcu_putref(sma, ipc_rcu_free); ++ ipc_rcu_putref(sma, sem_rcu_free); + return -ENOMEM; + } + } + + if (copy_from_user(sem_io, p, nsems*sizeof(ushort))) { +- ipc_rcu_putref(sma, ipc_rcu_free); ++ ipc_rcu_putref(sma, sem_rcu_free); + err = -EFAULT; + goto out_free; + } + + for (i = 0; i < nsems; i++) { + if (sem_io[i] > SEMVMX) { +- ipc_rcu_putref(sma, ipc_rcu_free); ++ ipc_rcu_putref(sma, sem_rcu_free); + err = -ERANGE; + goto out_free; + } +@@ -1731,7 +1731,7 @@ static struct sem_undo *find_alloc_undo( + /* step 2: allocate new undo structure */ + new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); + if (!new) { +- ipc_rcu_putref(sma, ipc_rcu_free); ++ ipc_rcu_putref(sma, sem_rcu_free); + return ERR_PTR(-ENOMEM); + } + diff --git a/queue-4.7/x86-microcode-fix-suspend-to-ram-with-builtin-microcode.patch b/queue-4.7/x86-microcode-fix-suspend-to-ram-with-builtin-microcode.patch new file mode 100644 index 00000000000..fbd570e2dbb --- /dev/null +++ b/queue-4.7/x86-microcode-fix-suspend-to-ram-with-builtin-microcode.patch @@ -0,0 +1,100 @@ +From 4b703305d98bf7350d4b2953ee39a3aa2eeb1778 Mon Sep 17 00:00:00 2001 +From: Borislav Petkov +Date: Mon, 6 Jun 2016 17:10:43 +0200 +Subject: x86/microcode: Fix suspend to RAM with builtin microcode + +From: Borislav Petkov + +commit 4b703305d98bf7350d4b2953ee39a3aa2eeb1778 upstream. + +Usually, after we have found the proper microcode blob for the current +machine, we stash it away for later use with save_microcode_in_initrd(). + +However, with builtin microcode which doesn't come from the initrd, we +don't call that function because CONFIG_BLK_DEV_INITRD=n and even if +set, we don't have a valid initrd. + +In order to fix this, let's make save_microcode_in_initrd() an +fs_initcall which runs before rootfs_initcall() as this was the time it +was called previously through: + + rootfs_initcall(populate_rootfs) + |-> free_initrd() + |-> free_initrd_mem() + |-> save_microcode_in_initrd() + +Also, we make it run independently from initrd functionality being +present or not. + +And since it is called in the microcode loader only now, we can also +make it static. + +Reported-and-tested-by: Jim Bos +Signed-off-by: Borislav Petkov +Cc: Andy Lutomirski +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Denys Vlasenko +Cc: H. Peter Anvin +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/1465225850-7352-3-git-send-email-bp@alien8.de +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/microcode.h | 2 -- + arch/x86/kernel/cpu/microcode/core.c | 3 ++- + arch/x86/mm/init.c | 7 ------- + 3 files changed, 2 insertions(+), 10 deletions(-) + +--- a/arch/x86/include/asm/microcode.h ++++ b/arch/x86/include/asm/microcode.h +@@ -133,13 +133,11 @@ static inline unsigned int x86_cpuid_fam + #ifdef CONFIG_MICROCODE + extern void __init load_ucode_bsp(void); + extern void load_ucode_ap(void); +-extern int __init save_microcode_in_initrd(void); + void reload_early_microcode(void); + extern bool get_builtin_firmware(struct cpio_data *cd, const char *name); + #else + static inline void __init load_ucode_bsp(void) { } + static inline void load_ucode_ap(void) { } +-static inline int __init save_microcode_in_initrd(void) { return 0; } + static inline void reload_early_microcode(void) { } + static inline bool + get_builtin_firmware(struct cpio_data *cd, const char *name) { return false; } +--- a/arch/x86/kernel/cpu/microcode/core.c ++++ b/arch/x86/kernel/cpu/microcode/core.c +@@ -175,7 +175,7 @@ void load_ucode_ap(void) + } + } + +-int __init save_microcode_in_initrd(void) ++static int __init save_microcode_in_initrd(void) + { + struct cpuinfo_x86 *c = &boot_cpu_data; + +@@ -691,4 +691,5 @@ int __init microcode_init(void) + return error; + + } ++fs_initcall(save_microcode_in_initrd); + late_initcall(microcode_init); +--- a/arch/x86/mm/init.c ++++ b/arch/x86/mm/init.c +@@ -696,13 +696,6 @@ void free_initmem(void) + void __init free_initrd_mem(unsigned long start, unsigned long end) + { + /* +- * Remember, initrd memory may contain microcode or other useful things. +- * Before we lose initrd mem, we need to find a place to hold them +- * now that normal virtual memory is enabled. +- */ +- save_microcode_in_initrd(); +- +- /* + * end could be not aligned, and We can not align that, + * decompresser could be confused by aligned initrd_end + * We already reserve the end partial page before in diff --git a/queue-4.7/x86-power-64-fix-hibernation-return-address-corruption.patch b/queue-4.7/x86-power-64-fix-hibernation-return-address-corruption.patch new file mode 100644 index 00000000000..7571bdcf381 --- /dev/null +++ b/queue-4.7/x86-power-64-fix-hibernation-return-address-corruption.patch @@ -0,0 +1,101 @@ +From 4ce827b4cc58bec7952591b96cce2b28553e4d5b Mon Sep 17 00:00:00 2001 +From: Josh Poimboeuf +Date: Thu, 28 Jul 2016 23:15:21 +0200 +Subject: x86/power/64: Fix hibernation return address corruption + +From: Josh Poimboeuf + +commit 4ce827b4cc58bec7952591b96cce2b28553e4d5b upstream. + +In kernel bug 150021, a kernel panic was reported when restoring a +hibernate image. Only a picture of the oops was reported, so I can't +paste the whole thing here. But here are the most interesting parts: + + kernel tried to execute NX-protected page - exploit attempt? (uid: 0) + BUG: unable to handle kernel paging request at ffff8804615cfd78 + ... + RIP: ffff8804615cfd78 + RSP: ffff8804615f0000 + RBP: ffff8804615cfdc0 + ... + Call Trace: + do_signal+0x23 + exit_to_usermode_loop+0x64 + ... + +The RIP is on the same page as RBP, so it apparently started executing +on the stack. + +The bug was bisected to commit ef0f3ed5a4ac (x86/asm/power: Create +stack frames in hibernate_asm_64.S), which in retrospect seems quite +dangerous, since that code saves and restores the stack pointer from a +global variable ('saved_context'). + +There are a lot of moving parts in the hibernate save and restore paths, +so I don't know exactly what caused the panic. Presumably, a FRAME_END +was executed without the corresponding FRAME_BEGIN, or vice versa. That +would corrupt the return address on the stack and would be consistent +with the details of the above panic. + +[ rjw: One major problem is that by the time the FRAME_BEGIN in + restore_registers() is executed, the stack pointer value may not + be valid any more. Namely, the stack area pointed to by it + previously may have been overwritten by some image memory contents + and that page frame may now be used for whatever different purpose + it had been allocated for before hibernation. In that case, the + FRAME_BEGIN will corrupt that memory. ] + +Instead of doing the frame pointer save/restore around the bounds of the +affected functions, just do it around the call to swsusp_save(). + +That has the same effect of ensuring that if swsusp_save() sleeps, the +frame pointers will be correct. It's also a much more obviously safe +way to do it than the original patch. And objtool still doesn't report +any warnings. + +Fixes: ef0f3ed5a4ac (x86/asm/power: Create stack frames in hibernate_asm_64.S) +Link: https://bugzilla.kernel.org/show_bug.cgi?id=150021 +Reported-by: Andre Reinke +Tested-by: Andre Reinke +Signed-off-by: Josh Poimboeuf +Acked-by: Ingo Molnar +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/power/hibernate_asm_64.S | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +--- a/arch/x86/power/hibernate_asm_64.S ++++ b/arch/x86/power/hibernate_asm_64.S +@@ -24,7 +24,6 @@ + #include + + ENTRY(swsusp_arch_suspend) +- FRAME_BEGIN + movq $saved_context, %rax + movq %rsp, pt_regs_sp(%rax) + movq %rbp, pt_regs_bp(%rax) +@@ -48,6 +47,7 @@ ENTRY(swsusp_arch_suspend) + movq %cr3, %rax + movq %rax, restore_cr3(%rip) + ++ FRAME_BEGIN + call swsusp_save + FRAME_END + ret +@@ -104,7 +104,6 @@ ENTRY(core_restore_code) + /* code below belongs to the image kernel */ + .align PAGE_SIZE + ENTRY(restore_registers) +- FRAME_BEGIN + /* go back to the original page tables */ + movq %r9, %cr3 + +@@ -145,6 +144,5 @@ ENTRY(restore_registers) + /* tell the hibernation core that we've just restored the memory */ + movq %rax, in_suspend(%rip) + +- FRAME_END + ret + ENDPROC(restore_registers) diff --git a/queue-4.7/x86-syscalls-64-add-compat_sys_keyctl-for-32-bit-userspace.patch b/queue-4.7/x86-syscalls-64-add-compat_sys_keyctl-for-32-bit-userspace.patch new file mode 100644 index 00000000000..61849c3bd8e --- /dev/null +++ b/queue-4.7/x86-syscalls-64-add-compat_sys_keyctl-for-32-bit-userspace.patch @@ -0,0 +1,46 @@ +From f7d665627e103e82d34306c7d3f6f46f387c0d8b Mon Sep 17 00:00:00 2001 +From: David Howells +Date: Wed, 27 Jul 2016 11:42:38 +0100 +Subject: x86/syscalls/64: Add compat_sys_keyctl for 32-bit userspace + +From: David Howells + +commit f7d665627e103e82d34306c7d3f6f46f387c0d8b upstream. + +x86_64 needs to use compat_sys_keyctl for 32-bit userspace rather than +calling sys_keyctl(). The latter will work in a lot of cases, thereby +hiding the issue. + +Reported-by: Stephan Mueller +Tested-by: Stephan Mueller +Signed-off-by: David Howells +Cc: Andy Lutomirski +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Denys Vlasenko +Cc: H. Peter Anvin +Cc: Josh Poimboeuf +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: keyrings@vger.kernel.org +Cc: linux-security-module@vger.kernel.org +Link: http://lkml.kernel.org/r/146961615805.14395.5581949237156769439.stgit@warthog.procyon.org.uk +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/entry/syscalls/syscall_32.tbl | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/entry/syscalls/syscall_32.tbl ++++ b/arch/x86/entry/syscalls/syscall_32.tbl +@@ -294,7 +294,7 @@ + # 285 sys_setaltroot + 286 i386 add_key sys_add_key + 287 i386 request_key sys_request_key +-288 i386 keyctl sys_keyctl ++288 i386 keyctl sys_keyctl compat_sys_keyctl + 289 i386 ioprio_set sys_ioprio_set + 290 i386 ioprio_get sys_ioprio_get + 291 i386 inotify_init sys_inotify_init