From: Greg Kroah-Hartman Date: Fri, 17 May 2019 15:39:44 +0000 (+0200) Subject: 4.14-stable patches X-Git-Tag: v4.9.178~25 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=093b13450a19658096ab65ea14c7bf63133aaf53;p=thirdparty%2Fkernel%2Fstable-queue.git 4.14-stable patches added patches: bcache-fix-a-race-between-cache-register-and-cacheset-unregister.patch bcache-never-set-key_ptrs-of-journal-key-to-0-in-journal_reclaim.patch btrfs-do-not-start-a-transaction-at-iterate_extent_inodes.patch btrfs-do-not-start-a-transaction-during-fiemap.patch ext4-actually-request-zeroing-of-inode-table-after-grow.patch ext4-avoid-drop-reference-to-iloc.bh-twice.patch ext4-fix-ext4_show_options-for-file-systems-w-o-journal.patch ext4-fix-use-after-free-race-with-debug_want_extra_isize.patch ext4-ignore-e_value_offs-for-xattrs-with-value-in-ea-inode.patch ext4-make-sanity-check-in-mballoc-more-strict.patch ipmi-ssif-compare-block-number-correctly-for-multi-part-return-messages.patch jbd2-check-superblock-mapped-prior-to-committing.patch mfd-da9063-fix-otp-control-register-names-to-match-datasheets-for-da9063-63l.patch mfd-max77620-fix-swapped-fps_period_max_us-values.patch mm-mincore.c-make-mincore-more-conservative.patch mtd-spi-nor-intel-spi-avoid-crossing-4k-address-boundary-on-read-write.patch ocfs2-fix-ocfs2-read-inode-data-panic-in-ocfs2_iget.patch tty-vt-fix-write-write-race-in-ioctl-kdskbsent-handler.patch tty-vt.c-fix-tiocl_blankscreen-console-blanking-if-blankinterval-0.patch userfaultfd-use-rcu-to-free-the-task-struct-when-fork-fails.patch --- diff --git a/queue-4.14/bcache-fix-a-race-between-cache-register-and-cacheset-unregister.patch b/queue-4.14/bcache-fix-a-race-between-cache-register-and-cacheset-unregister.patch new file mode 100644 index 00000000000..33d132104ea --- /dev/null +++ b/queue-4.14/bcache-fix-a-race-between-cache-register-and-cacheset-unregister.patch @@ -0,0 +1,81 @@ +From a4b732a248d12cbdb46999daf0bf288c011335eb Mon Sep 17 00:00:00 2001 +From: Liang Chen +Date: Thu, 25 Apr 2019 00:48:31 +0800 +Subject: bcache: fix a race between cache register and cacheset unregister + +From: Liang Chen + +commit a4b732a248d12cbdb46999daf0bf288c011335eb upstream. + +There is a race between cache device register and cache set unregister. +For an already registered cache device, register_bcache will call +bch_is_open to iterate through all cachesets and check every cache +there. The race occurs if cache_set_free executes at the same time and +clears the caches right before ca is dereferenced in bch_is_open_cache. +To close the race, let's make sure the clean up work is protected by +the bch_register_lock as well. + +This issue can be reproduced as follows, +while true; do echo /dev/XXX> /sys/fs/bcache/register ; done& +while true; do echo 1> /sys/block/XXX/bcache/set/unregister ; done & + +and results in the following oops, + +[ +0.000053] BUG: unable to handle kernel NULL pointer dereference at 0000000000000998 +[ +0.000457] #PF error: [normal kernel read fault] +[ +0.000464] PGD 800000003ca9d067 P4D 800000003ca9d067 PUD 3ca9c067 PMD 0 +[ +0.000388] Oops: 0000 [#1] SMP PTI +[ +0.000269] CPU: 1 PID: 3266 Comm: bash Not tainted 5.0.0+ #6 +[ +0.000346] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.fc28 04/01/2014 +[ +0.000472] RIP: 0010:register_bcache+0x1829/0x1990 [bcache] +[ +0.000344] Code: b0 48 83 e8 50 48 81 fa e0 e1 10 c0 0f 84 a9 00 00 00 48 89 c6 48 89 ca 0f b7 ba 54 04 00 00 4c 8b 82 60 0c 00 00 85 ff 74 2f <49> 3b a8 98 09 00 00 74 4e 44 8d 47 ff 31 ff 49 c1 e0 03 eb 0d +[ +0.000839] RSP: 0018:ffff92ee804cbd88 EFLAGS: 00010202 +[ +0.000328] RAX: ffffffffc010e190 RBX: ffff918b5c6b5000 RCX: ffff918b7d8e0000 +[ +0.000399] RDX: ffff918b7d8e0000 RSI: ffffffffc010e190 RDI: 0000000000000001 +[ +0.000398] RBP: ffff918b7d318340 R08: 0000000000000000 R09: ffffffffb9bd2d7a +[ +0.000385] R10: ffff918b7eb253c0 R11: ffffb95980f51200 R12: ffffffffc010e1a0 +[ +0.000411] R13: fffffffffffffff2 R14: 000000000000000b R15: ffff918b7e232620 +[ +0.000384] FS: 00007f955bec2740(0000) GS:ffff918b7eb00000(0000) knlGS:0000000000000000 +[ +0.000420] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ +0.000801] CR2: 0000000000000998 CR3: 000000003cad6000 CR4: 00000000001406e0 +[ +0.000837] Call Trace: +[ +0.000682] ? _cond_resched+0x10/0x20 +[ +0.000691] ? __kmalloc+0x131/0x1b0 +[ +0.000710] kernfs_fop_write+0xfa/0x170 +[ +0.000733] __vfs_write+0x2e/0x190 +[ +0.000688] ? inode_security+0x10/0x30 +[ +0.000698] ? selinux_file_permission+0xd2/0x120 +[ +0.000752] ? security_file_permission+0x2b/0x100 +[ +0.000753] vfs_write+0xa8/0x1a0 +[ +0.000676] ksys_write+0x4d/0xb0 +[ +0.000699] do_syscall_64+0x3a/0xf0 +[ +0.000692] entry_SYSCALL_64_after_hwframe+0x44/0xa9 + +Signed-off-by: Liang Chen +Cc: stable@vger.kernel.org +Signed-off-by: Coly Li +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/bcache/super.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/md/bcache/super.c ++++ b/drivers/md/bcache/super.c +@@ -1357,6 +1357,7 @@ static void cache_set_free(struct closur + bch_btree_cache_free(c); + bch_journal_free(c); + ++ mutex_lock(&bch_register_lock); + for_each_cache(ca, c, i) + if (ca) { + ca->set = NULL; +@@ -1379,7 +1380,6 @@ static void cache_set_free(struct closur + mempool_destroy(c->search); + kfree(c->devices); + +- mutex_lock(&bch_register_lock); + list_del(&c->list); + mutex_unlock(&bch_register_lock); + diff --git a/queue-4.14/bcache-never-set-key_ptrs-of-journal-key-to-0-in-journal_reclaim.patch b/queue-4.14/bcache-never-set-key_ptrs-of-journal-key-to-0-in-journal_reclaim.patch new file mode 100644 index 00000000000..73caa018638 --- /dev/null +++ b/queue-4.14/bcache-never-set-key_ptrs-of-journal-key-to-0-in-journal_reclaim.patch @@ -0,0 +1,96 @@ +From 1bee2addc0c8470c8aaa65ef0599eeae96dd88bc Mon Sep 17 00:00:00 2001 +From: Coly Li +Date: Thu, 25 Apr 2019 00:48:33 +0800 +Subject: bcache: never set KEY_PTRS of journal key to 0 in journal_reclaim() + +From: Coly Li + +commit 1bee2addc0c8470c8aaa65ef0599eeae96dd88bc upstream. + +In journal_reclaim() ja->cur_idx of each cache will be update to +reclaim available journal buckets. Variable 'int n' is used to count how +many cache is successfully reclaimed, then n is set to c->journal.key +by SET_KEY_PTRS(). Later in journal_write_unlocked(), a for_each_cache() +loop will write the jset data onto each cache. + +The problem is, if all jouranl buckets on each cache is full, the +following code in journal_reclaim(), + +529 for_each_cache(ca, c, iter) { +530 struct journal_device *ja = &ca->journal; +531 unsigned int next = (ja->cur_idx + 1) % ca->sb.njournal_buckets; +532 +533 /* No space available on this device */ +534 if (next == ja->discard_idx) +535 continue; +536 +537 ja->cur_idx = next; +538 k->ptr[n++] = MAKE_PTR(0, +539 bucket_to_sector(c, ca->sb.d[ja->cur_idx]), +540 ca->sb.nr_this_dev); +541 } +542 +543 bkey_init(k); +544 SET_KEY_PTRS(k, n); + +If there is no available bucket to reclaim, the if() condition at line +534 will always true, and n remains 0. Then at line 544, SET_KEY_PTRS() +will set KEY_PTRS field of c->journal.key to 0. + +Setting KEY_PTRS field of c->journal.key to 0 is wrong. Because in +journal_write_unlocked() the journal data is written in following loop, + +649 for (i = 0; i < KEY_PTRS(k); i++) { +650-671 submit journal data to cache device +672 } + +If KEY_PTRS field is set to 0 in jouranl_reclaim(), the journal data +won't be written to cache device here. If system crahed or rebooted +before bkeys of the lost journal entries written into btree nodes, data +corruption will be reported during bcache reload after rebooting the +system. + +Indeed there is only one cache in a cache set, there is no need to set +KEY_PTRS field in journal_reclaim() at all. But in order to keep the +for_each_cache() logic consistent for now, this patch fixes the above +problem by not setting 0 KEY_PTRS of journal key, if there is no bucket +available to reclaim. + +Signed-off-by: Coly Li +Reviewed-by: Hannes Reinecke +Cc: stable@vger.kernel.org +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/bcache/journal.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +--- a/drivers/md/bcache/journal.c ++++ b/drivers/md/bcache/journal.c +@@ -512,11 +512,11 @@ static void journal_reclaim(struct cache + ca->sb.nr_this_dev); + } + +- bkey_init(k); +- SET_KEY_PTRS(k, n); +- +- if (n) ++ if (n) { ++ bkey_init(k); ++ SET_KEY_PTRS(k, n); + c->journal.blocks_free = c->sb.bucket_size >> c->block_bits; ++ } + out: + if (!journal_full(&c->journal)) + __closure_wake_up(&c->journal.wait); +@@ -641,6 +641,9 @@ static void journal_write_unlocked(struc + ca->journal.seq[ca->journal.cur_idx] = w->data->seq; + } + ++ /* If KEY_PTRS(k) == 0, this jset gets lost in air */ ++ BUG_ON(i == 0); ++ + atomic_dec_bug(&fifo_back(&c->journal.pin)); + bch_journal_next(&c->journal); + journal_reclaim(c); diff --git a/queue-4.14/btrfs-do-not-start-a-transaction-at-iterate_extent_inodes.patch b/queue-4.14/btrfs-do-not-start-a-transaction-at-iterate_extent_inodes.patch new file mode 100644 index 00000000000..780879e6f05 --- /dev/null +++ b/queue-4.14/btrfs-do-not-start-a-transaction-at-iterate_extent_inodes.patch @@ -0,0 +1,116 @@ +From bfc61c36260ca990937539cd648ede3cd749bc10 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Wed, 17 Apr 2019 11:30:30 +0100 +Subject: Btrfs: do not start a transaction at iterate_extent_inodes() + +From: Filipe Manana + +commit bfc61c36260ca990937539cd648ede3cd749bc10 upstream. + +When finding out which inodes have references on a particular extent, done +by backref.c:iterate_extent_inodes(), from the BTRFS_IOC_LOGICAL_INO (both +v1 and v2) ioctl and from scrub we use the transaction join API to grab a +reference on the currently running transaction, since in order to give +accurate results we need to inspect the delayed references of the currently +running transaction. + +However, if there is currently no running transaction, the join operation +will create a new transaction. This is inefficient as the transaction will +eventually be committed, doing unnecessary IO and introducing a potential +point of failure that will lead to a transaction abort due to -ENOSPC, as +recently reported [1]. + +That's because the join, creates the transaction but does not reserve any +space, so when attempting to update the root item of the root passed to +btrfs_join_transaction(), during the transaction commit, we can end up +failling with -ENOSPC. Users of a join operation are supposed to actually +do some filesystem changes and reserve space by some means, which is not +the case of iterate_extent_inodes(), it is a read-only operation for all +contextes from which it is called. + +The reported [1] -ENOSPC failure stack trace is the following: + + heisenberg kernel: ------------[ cut here ]------------ + heisenberg kernel: BTRFS: Transaction aborted (error -28) + heisenberg kernel: WARNING: CPU: 0 PID: 7137 at fs/btrfs/root-tree.c:136 btrfs_update_root+0x22b/0x320 [btrfs] +(...) + heisenberg kernel: CPU: 0 PID: 7137 Comm: btrfs-transacti Not tainted 4.19.0-4-amd64 #1 Debian 4.19.28-2 + heisenberg kernel: Hardware name: FUJITSU LIFEBOOK U757/FJNB2A5, BIOS Version 1.21 03/19/2018 + heisenberg kernel: RIP: 0010:btrfs_update_root+0x22b/0x320 [btrfs] +(...) + heisenberg kernel: RSP: 0018:ffffb5448828bd40 EFLAGS: 00010286 + heisenberg kernel: RAX: 0000000000000000 RBX: ffff8ed56bccef50 RCX: 0000000000000006 + heisenberg kernel: RDX: 0000000000000007 RSI: 0000000000000092 RDI: ffff8ed6bda166a0 + heisenberg kernel: RBP: 00000000ffffffe4 R08: 00000000000003df R09: 0000000000000007 + heisenberg kernel: R10: 0000000000000000 R11: 0000000000000001 R12: ffff8ed63396a078 + heisenberg kernel: R13: ffff8ed092d7c800 R14: ffff8ed64f5db028 R15: ffff8ed6bd03d068 + heisenberg kernel: FS: 0000000000000000(0000) GS:ffff8ed6bda00000(0000) knlGS:0000000000000000 + heisenberg kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + heisenberg kernel: CR2: 00007f46f75f8000 CR3: 0000000310a0a002 CR4: 00000000003606f0 + heisenberg kernel: DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + heisenberg kernel: DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + heisenberg kernel: Call Trace: + heisenberg kernel: commit_fs_roots+0x166/0x1d0 [btrfs] + heisenberg kernel: ? _cond_resched+0x15/0x30 + heisenberg kernel: ? btrfs_run_delayed_refs+0xac/0x180 [btrfs] + heisenberg kernel: btrfs_commit_transaction+0x2bd/0x870 [btrfs] + heisenberg kernel: ? start_transaction+0x9d/0x3f0 [btrfs] + heisenberg kernel: transaction_kthread+0x147/0x180 [btrfs] + heisenberg kernel: ? btrfs_cleanup_transaction+0x530/0x530 [btrfs] + heisenberg kernel: kthread+0x112/0x130 + heisenberg kernel: ? kthread_bind+0x30/0x30 + heisenberg kernel: ret_from_fork+0x35/0x40 + heisenberg kernel: ---[ end trace 05de912e30e012d9 ]--- + +So fix that by using the attach API, which does not create a transaction +when there is currently no running transaction. + +[1] https://lore.kernel.org/linux-btrfs/b2a668d7124f1d3e410367f587926f622b3f03a4.camel@scientia.net/ + +Reported-by: Zygo Blaxell +CC: stable@vger.kernel.org # 4.4+ +Signed-off-by: Filipe Manana +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/backref.c | 18 ++++++++++++------ + 1 file changed, 12 insertions(+), 6 deletions(-) + +--- a/fs/btrfs/backref.c ++++ b/fs/btrfs/backref.c +@@ -1907,13 +1907,19 @@ int iterate_extent_inodes(struct btrfs_f + extent_item_objectid); + + if (!search_commit_root) { +- trans = btrfs_join_transaction(fs_info->extent_root); +- if (IS_ERR(trans)) +- return PTR_ERR(trans); ++ trans = btrfs_attach_transaction(fs_info->extent_root); ++ if (IS_ERR(trans)) { ++ if (PTR_ERR(trans) != -ENOENT && ++ PTR_ERR(trans) != -EROFS) ++ return PTR_ERR(trans); ++ trans = NULL; ++ } ++ } ++ ++ if (trans) + btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem); +- } else { ++ else + down_read(&fs_info->commit_root_sem); +- } + + ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid, + tree_mod_seq_elem.seq, &refs, +@@ -1945,7 +1951,7 @@ int iterate_extent_inodes(struct btrfs_f + + free_leaf_list(refs); + out: +- if (!search_commit_root) { ++ if (trans) { + btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); + btrfs_end_transaction(trans); + } else { diff --git a/queue-4.14/btrfs-do-not-start-a-transaction-during-fiemap.patch b/queue-4.14/btrfs-do-not-start-a-transaction-during-fiemap.patch new file mode 100644 index 00000000000..2abd8f69133 --- /dev/null +++ b/queue-4.14/btrfs-do-not-start-a-transaction-during-fiemap.patch @@ -0,0 +1,121 @@ +From 03628cdbc64db6262e50d0357960a4e9562676a1 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Mon, 15 Apr 2019 14:50:51 +0100 +Subject: Btrfs: do not start a transaction during fiemap + +From: Filipe Manana + +commit 03628cdbc64db6262e50d0357960a4e9562676a1 upstream. + +During fiemap, for regular extents (non inline) we need to check if they +are shared and if they are, set the shared bit. Checking if an extent is +shared requires checking the delayed references of the currently running +transaction, since some reference might have not yet hit the extent tree +and be only in the in-memory delayed references. + +However we were using a transaction join for this, which creates a new +transaction when there is no transaction currently running. That means +that two more potential failures can happen: creating the transaction and +committing it. Further, if no write activity is currently happening in the +system, and fiemap calls keep being done, we end up creating and +committing transactions that do nothing. + +In some extreme cases this can result in the commit of the transaction +created by fiemap to fail with ENOSPC when updating the root item of a +subvolume tree because a join does not reserve any space, leading to a +trace like the following: + + heisenberg kernel: ------------[ cut here ]------------ + heisenberg kernel: BTRFS: Transaction aborted (error -28) + heisenberg kernel: WARNING: CPU: 0 PID: 7137 at fs/btrfs/root-tree.c:136 btrfs_update_root+0x22b/0x320 [btrfs] +(...) + heisenberg kernel: CPU: 0 PID: 7137 Comm: btrfs-transacti Not tainted 4.19.0-4-amd64 #1 Debian 4.19.28-2 + heisenberg kernel: Hardware name: FUJITSU LIFEBOOK U757/FJNB2A5, BIOS Version 1.21 03/19/2018 + heisenberg kernel: RIP: 0010:btrfs_update_root+0x22b/0x320 [btrfs] +(...) + heisenberg kernel: RSP: 0018:ffffb5448828bd40 EFLAGS: 00010286 + heisenberg kernel: RAX: 0000000000000000 RBX: ffff8ed56bccef50 RCX: 0000000000000006 + heisenberg kernel: RDX: 0000000000000007 RSI: 0000000000000092 RDI: ffff8ed6bda166a0 + heisenberg kernel: RBP: 00000000ffffffe4 R08: 00000000000003df R09: 0000000000000007 + heisenberg kernel: R10: 0000000000000000 R11: 0000000000000001 R12: ffff8ed63396a078 + heisenberg kernel: R13: ffff8ed092d7c800 R14: ffff8ed64f5db028 R15: ffff8ed6bd03d068 + heisenberg kernel: FS: 0000000000000000(0000) GS:ffff8ed6bda00000(0000) knlGS:0000000000000000 + heisenberg kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + heisenberg kernel: CR2: 00007f46f75f8000 CR3: 0000000310a0a002 CR4: 00000000003606f0 + heisenberg kernel: DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + heisenberg kernel: DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + heisenberg kernel: Call Trace: + heisenberg kernel: commit_fs_roots+0x166/0x1d0 [btrfs] + heisenberg kernel: ? _cond_resched+0x15/0x30 + heisenberg kernel: ? btrfs_run_delayed_refs+0xac/0x180 [btrfs] + heisenberg kernel: btrfs_commit_transaction+0x2bd/0x870 [btrfs] + heisenberg kernel: ? start_transaction+0x9d/0x3f0 [btrfs] + heisenberg kernel: transaction_kthread+0x147/0x180 [btrfs] + heisenberg kernel: ? btrfs_cleanup_transaction+0x530/0x530 [btrfs] + heisenberg kernel: kthread+0x112/0x130 + heisenberg kernel: ? kthread_bind+0x30/0x30 + heisenberg kernel: ret_from_fork+0x35/0x40 + heisenberg kernel: ---[ end trace 05de912e30e012d9 ]--- + +Since fiemap (and btrfs_check_shared()) is a read-only operation, do not do +a transaction join to avoid the overhead of creating a new transaction (if +there is currently no running transaction) and introducing a potential +point of failure when the new transaction gets committed, instead use a +transaction attach to grab a handle for the currently running transaction +if any. + +Reported-by: Christoph Anton Mitterer +Link: https://lore.kernel.org/linux-btrfs/b2a668d7124f1d3e410367f587926f622b3f03a4.camel@scientia.net/ +Fixes: afce772e87c36c ("btrfs: fix check_shared for fiemap ioctl") +CC: stable@vger.kernel.org # 4.14+ +Reviewed-by: Qu Wenruo +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/backref.c | 16 ++++++++++------ + 1 file changed, 10 insertions(+), 6 deletions(-) + +--- a/fs/btrfs/backref.c ++++ b/fs/btrfs/backref.c +@@ -1452,8 +1452,8 @@ int btrfs_find_all_roots(struct btrfs_tr + * callers (such as fiemap) which want to know whether the extent is + * shared but do not need a ref count. + * +- * This attempts to allocate a transaction in order to account for +- * delayed refs, but continues on even when the alloc fails. ++ * This attempts to attach to the running transaction in order to account for ++ * delayed refs, but continues on even when no running transaction exists. + * + * Return: 0 if extent is not shared, 1 if it is shared, < 0 on error. + */ +@@ -1476,13 +1476,16 @@ int btrfs_check_shared(struct btrfs_root + tmp = ulist_alloc(GFP_NOFS); + roots = ulist_alloc(GFP_NOFS); + if (!tmp || !roots) { +- ulist_free(tmp); +- ulist_free(roots); +- return -ENOMEM; ++ ret = -ENOMEM; ++ goto out; + } + +- trans = btrfs_join_transaction(root); ++ trans = btrfs_attach_transaction(root); + if (IS_ERR(trans)) { ++ if (PTR_ERR(trans) != -ENOENT && PTR_ERR(trans) != -EROFS) { ++ ret = PTR_ERR(trans); ++ goto out; ++ } + trans = NULL; + down_read(&fs_info->commit_root_sem); + } else { +@@ -1515,6 +1518,7 @@ int btrfs_check_shared(struct btrfs_root + } else { + up_read(&fs_info->commit_root_sem); + } ++out: + ulist_free(tmp); + ulist_free(roots); + return ret; diff --git a/queue-4.14/ext4-actually-request-zeroing-of-inode-table-after-grow.patch b/queue-4.14/ext4-actually-request-zeroing-of-inode-table-after-grow.patch new file mode 100644 index 00000000000..d4c10694989 --- /dev/null +++ b/queue-4.14/ext4-actually-request-zeroing-of-inode-table-after-grow.patch @@ -0,0 +1,37 @@ +From 310a997fd74de778b9a4848a64be9cda9f18764a Mon Sep 17 00:00:00 2001 +From: Kirill Tkhai +Date: Thu, 25 Apr 2019 13:06:18 -0400 +Subject: ext4: actually request zeroing of inode table after grow + +From: Kirill Tkhai + +commit 310a997fd74de778b9a4848a64be9cda9f18764a upstream. + +It is never possible, that number of block groups decreases, +since only online grow is supported. + +But after a growing occured, we have to zero inode tables +for just created new block groups. + +Fixes: 19c5246d2516 ("ext4: add new online resize interface") +Signed-off-by: Kirill Tkhai +Signed-off-by: Theodore Ts'o +Reviewed-by: Jan Kara +Cc: stable@kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/ioctl.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/ext4/ioctl.c ++++ b/fs/ext4/ioctl.c +@@ -918,7 +918,7 @@ group_add_out: + if (err == 0) + err = err2; + mnt_drop_write_file(filp); +- if (!err && (o_group > EXT4_SB(sb)->s_groups_count) && ++ if (!err && (o_group < EXT4_SB(sb)->s_groups_count) && + ext4_has_group_desc_csum(sb) && + test_opt(sb, INIT_INODE_TABLE)) + err = ext4_register_li_request(sb, o_group); diff --git a/queue-4.14/ext4-avoid-drop-reference-to-iloc.bh-twice.patch b/queue-4.14/ext4-avoid-drop-reference-to-iloc.bh-twice.patch new file mode 100644 index 00000000000..af768c6ba78 --- /dev/null +++ b/queue-4.14/ext4-avoid-drop-reference-to-iloc.bh-twice.patch @@ -0,0 +1,34 @@ +From 8c380ab4b7b59c0c602743810be1b712514eaebc Mon Sep 17 00:00:00 2001 +From: Pan Bian +Date: Thu, 25 Apr 2019 11:44:15 -0400 +Subject: ext4: avoid drop reference to iloc.bh twice + +From: Pan Bian + +commit 8c380ab4b7b59c0c602743810be1b712514eaebc upstream. + +The reference to iloc.bh has been dropped in ext4_mark_iloc_dirty. +However, the reference is dropped again if error occurs during +ext4_handle_dirty_metadata, which may result in use-after-free bugs. + +Fixes: fb265c9cb49e("ext4: add ext4_sb_bread() to disambiguate ENOMEM cases") +Signed-off-by: Pan Bian +Signed-off-by: Theodore Ts'o +Reviewed-by: Jan Kara +Cc: stable@kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/resize.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/fs/ext4/resize.c ++++ b/fs/ext4/resize.c +@@ -849,6 +849,7 @@ static int add_new_gdb(handle_t *handle, + err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh); + if (unlikely(err)) { + ext4_std_error(sb, err); ++ iloc.bh = NULL; + goto errout; + } + brelse(dind); diff --git a/queue-4.14/ext4-fix-ext4_show_options-for-file-systems-w-o-journal.patch b/queue-4.14/ext4-fix-ext4_show_options-for-file-systems-w-o-journal.patch new file mode 100644 index 00000000000..376652d3739 --- /dev/null +++ b/queue-4.14/ext4-fix-ext4_show_options-for-file-systems-w-o-journal.patch @@ -0,0 +1,35 @@ +From 50b29d8f033a7c88c5bc011abc2068b1691ab755 Mon Sep 17 00:00:00 2001 +From: Debabrata Banerjee +Date: Tue, 30 Apr 2019 23:08:15 -0400 +Subject: ext4: fix ext4_show_options for file systems w/o journal + +From: Debabrata Banerjee + +commit 50b29d8f033a7c88c5bc011abc2068b1691ab755 upstream. + +Instead of removing EXT4_MOUNT_JOURNAL_CHECKSUM from s_def_mount_opt as +I assume was intended, all other options were blown away leading to +_ext4_show_options() output being incorrect. + +Fixes: 1e381f60dad9 ("ext4: do not allow journal_opts for fs w/o journal") +Signed-off-by: Debabrata Banerjee +Signed-off-by: Theodore Ts'o +Reviewed-by: Jan Kara +Cc: stable@kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/super.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -4209,7 +4209,7 @@ static int ext4_fill_super(struct super_ + "data=, fs mounted w/o journal"); + goto failed_mount_wq; + } +- sbi->s_def_mount_opt &= EXT4_MOUNT_JOURNAL_CHECKSUM; ++ sbi->s_def_mount_opt &= ~EXT4_MOUNT_JOURNAL_CHECKSUM; + clear_opt(sb, JOURNAL_CHECKSUM); + clear_opt(sb, DATA_FLAGS); + sbi->s_journal = NULL; diff --git a/queue-4.14/ext4-fix-use-after-free-race-with-debug_want_extra_isize.patch b/queue-4.14/ext4-fix-use-after-free-race-with-debug_want_extra_isize.patch new file mode 100644 index 00000000000..8bbac3887ff --- /dev/null +++ b/queue-4.14/ext4-fix-use-after-free-race-with-debug_want_extra_isize.patch @@ -0,0 +1,106 @@ +From 7bc04c5c2cc467c5b40f2b03ba08da174a0d5fa7 Mon Sep 17 00:00:00 2001 +From: Barret Rhoden +Date: Thu, 25 Apr 2019 11:55:50 -0400 +Subject: ext4: fix use-after-free race with debug_want_extra_isize + +From: Barret Rhoden + +commit 7bc04c5c2cc467c5b40f2b03ba08da174a0d5fa7 upstream. + +When remounting with debug_want_extra_isize, we were not performing the +same checks that we do during a normal mount. That allowed us to set a +value for s_want_extra_isize that reached outside the s_inode_size. + +Fixes: e2b911c53584 ("ext4: clean up feature test macros with predicate functions") +Reported-by: syzbot+f584efa0ac7213c226b7@syzkaller.appspotmail.com +Reviewed-by: Jan Kara +Signed-off-by: Barret Rhoden +Signed-off-by: Theodore Ts'o +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/super.c | 58 ++++++++++++++++++++++++++++++++------------------------ + 1 file changed, 34 insertions(+), 24 deletions(-) + +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -3454,6 +3454,37 @@ int ext4_calculate_overhead(struct super + return 0; + } + ++static void ext4_clamp_want_extra_isize(struct super_block *sb) ++{ ++ struct ext4_sb_info *sbi = EXT4_SB(sb); ++ struct ext4_super_block *es = sbi->s_es; ++ ++ /* determine the minimum size of new large inodes, if present */ ++ if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE && ++ sbi->s_want_extra_isize == 0) { ++ sbi->s_want_extra_isize = sizeof(struct ext4_inode) - ++ EXT4_GOOD_OLD_INODE_SIZE; ++ if (ext4_has_feature_extra_isize(sb)) { ++ if (sbi->s_want_extra_isize < ++ le16_to_cpu(es->s_want_extra_isize)) ++ sbi->s_want_extra_isize = ++ le16_to_cpu(es->s_want_extra_isize); ++ if (sbi->s_want_extra_isize < ++ le16_to_cpu(es->s_min_extra_isize)) ++ sbi->s_want_extra_isize = ++ le16_to_cpu(es->s_min_extra_isize); ++ } ++ } ++ /* Check if enough inode space is available */ ++ if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize > ++ sbi->s_inode_size) { ++ sbi->s_want_extra_isize = sizeof(struct ext4_inode) - ++ EXT4_GOOD_OLD_INODE_SIZE; ++ ext4_msg(sb, KERN_INFO, ++ "required extra inode space not available"); ++ } ++} ++ + static void ext4_set_resv_clusters(struct super_block *sb) + { + ext4_fsblk_t resv_clusters; +@@ -4320,30 +4351,7 @@ no_journal: + if (ext4_setup_super(sb, es, sb_rdonly(sb))) + sb->s_flags |= MS_RDONLY; + +- /* determine the minimum size of new large inodes, if present */ +- if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE && +- sbi->s_want_extra_isize == 0) { +- sbi->s_want_extra_isize = sizeof(struct ext4_inode) - +- EXT4_GOOD_OLD_INODE_SIZE; +- if (ext4_has_feature_extra_isize(sb)) { +- if (sbi->s_want_extra_isize < +- le16_to_cpu(es->s_want_extra_isize)) +- sbi->s_want_extra_isize = +- le16_to_cpu(es->s_want_extra_isize); +- if (sbi->s_want_extra_isize < +- le16_to_cpu(es->s_min_extra_isize)) +- sbi->s_want_extra_isize = +- le16_to_cpu(es->s_min_extra_isize); +- } +- } +- /* Check if enough inode space is available */ +- if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize > +- sbi->s_inode_size) { +- sbi->s_want_extra_isize = sizeof(struct ext4_inode) - +- EXT4_GOOD_OLD_INODE_SIZE; +- ext4_msg(sb, KERN_INFO, "required extra inode space not" +- "available"); +- } ++ ext4_clamp_want_extra_isize(sb); + + ext4_set_resv_clusters(sb); + +@@ -5128,6 +5136,8 @@ static int ext4_remount(struct super_blo + goto restore_opts; + } + ++ ext4_clamp_want_extra_isize(sb); ++ + if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^ + test_opt(sb, JOURNAL_CHECKSUM)) { + ext4_msg(sb, KERN_ERR, "changing journal_checksum " diff --git a/queue-4.14/ext4-ignore-e_value_offs-for-xattrs-with-value-in-ea-inode.patch b/queue-4.14/ext4-ignore-e_value_offs-for-xattrs-with-value-in-ea-inode.patch new file mode 100644 index 00000000000..06c3379c369 --- /dev/null +++ b/queue-4.14/ext4-ignore-e_value_offs-for-xattrs-with-value-in-ea-inode.patch @@ -0,0 +1,35 @@ +From e5d01196c0428a206f307e9ee5f6842964098ff0 Mon Sep 17 00:00:00 2001 +From: Theodore Ts'o +Date: Wed, 10 Apr 2019 00:37:36 -0400 +Subject: ext4: ignore e_value_offs for xattrs with value-in-ea-inode + +From: Theodore Ts'o + +commit e5d01196c0428a206f307e9ee5f6842964098ff0 upstream. + +In other places in fs/ext4/xattr.c, if e_value_inum is non-zero, the +code ignores the value in e_value_offs. The e_value_offs *should* be +zero, but we shouldn't depend upon it, since it might not be true in a +corrupted/fuzzed file system. + +Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=202897 +Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=202877 +Signed-off-by: Theodore Ts'o +Cc: stable@kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/xattr.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/ext4/xattr.c ++++ b/fs/ext4/xattr.c +@@ -1698,7 +1698,7 @@ static int ext4_xattr_set_entry(struct e + + /* No failures allowed past this point. */ + +- if (!s->not_found && here->e_value_size && here->e_value_offs) { ++ if (!s->not_found && here->e_value_size && !here->e_value_inum) { + /* Remove the old value. */ + void *first_val = s->base + min_offs; + size_t offs = le16_to_cpu(here->e_value_offs); diff --git a/queue-4.14/ext4-make-sanity-check-in-mballoc-more-strict.patch b/queue-4.14/ext4-make-sanity-check-in-mballoc-more-strict.patch new file mode 100644 index 00000000000..ae2b4b338ba --- /dev/null +++ b/queue-4.14/ext4-make-sanity-check-in-mballoc-more-strict.patch @@ -0,0 +1,35 @@ +From 31562b954b60f02acb91b7349dc6432d3f8c3c5f Mon Sep 17 00:00:00 2001 +From: Jan Kara +Date: Sat, 6 Apr 2019 18:33:06 -0400 +Subject: ext4: make sanity check in mballoc more strict + +From: Jan Kara + +commit 31562b954b60f02acb91b7349dc6432d3f8c3c5f upstream. + +The sanity check in mb_find_extent() only checked that returned extent +does not extend past blocksize * 8, however it should not extend past +EXT4_CLUSTERS_PER_GROUP(sb). This can happen when clusters_per_group < +blocksize * 8 and the tail of the bitmap is not properly filled by 1s +which happened e.g. when ancient kernels have grown the filesystem. + +Signed-off-by: Jan Kara +Signed-off-by: Theodore Ts'o +Cc: stable@kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/mballoc.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/ext4/mballoc.c ++++ b/fs/ext4/mballoc.c +@@ -1555,7 +1555,7 @@ static int mb_find_extent(struct ext4_bu + ex->fe_len += 1 << order; + } + +- if (ex->fe_start + ex->fe_len > (1 << (e4b->bd_blkbits + 3))) { ++ if (ex->fe_start + ex->fe_len > EXT4_CLUSTERS_PER_GROUP(e4b->bd_sb)) { + /* Should never happen! (but apparently sometimes does?!?) */ + WARN_ON(1); + ext4_error(e4b->bd_sb, "corruption or bug in mb_find_extent " diff --git a/queue-4.14/ipmi-ssif-compare-block-number-correctly-for-multi-part-return-messages.patch b/queue-4.14/ipmi-ssif-compare-block-number-correctly-for-multi-part-return-messages.patch new file mode 100644 index 00000000000..cdc60cbb822 --- /dev/null +++ b/queue-4.14/ipmi-ssif-compare-block-number-correctly-for-multi-part-return-messages.patch @@ -0,0 +1,50 @@ +From 55be8658c7e2feb11a5b5b33ee031791dbd23a69 Mon Sep 17 00:00:00 2001 +From: Kamlakant Patel +Date: Wed, 24 Apr 2019 11:50:43 +0000 +Subject: ipmi:ssif: compare block number correctly for multi-part return messages + +From: Kamlakant Patel + +commit 55be8658c7e2feb11a5b5b33ee031791dbd23a69 upstream. + +According to ipmi spec, block number is a number that is incremented, +starting with 0, for each new block of message data returned using the +middle transaction. + +Here, the 'blocknum' is data[0] which always starts from zero(0) and +'ssif_info->multi_pos' starts from 1. +So, we need to add +1 to blocknum while comparing with multi_pos. + +Fixes: 7d6380cd40f79 ("ipmi:ssif: Fix handling of multi-part return messages"). +Reported-by: Kiran Kolukuluru +Signed-off-by: Kamlakant Patel +Message-Id: <1556106615-18722-1-git-send-email-kamlakantp@marvell.com> +[Also added a debug log if the block numbers don't match.] +Signed-off-by: Corey Minyard +Cc: stable@vger.kernel.org # 4.4 +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/char/ipmi/ipmi_ssif.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/drivers/char/ipmi/ipmi_ssif.c ++++ b/drivers/char/ipmi/ipmi_ssif.c +@@ -703,12 +703,16 @@ static void msg_done_handler(struct ssif + /* End of read */ + len = ssif_info->multi_len; + data = ssif_info->data; +- } else if (blocknum != ssif_info->multi_pos) { ++ } else if (blocknum + 1 != ssif_info->multi_pos) { + /* + * Out of sequence block, just abort. Block + * numbers start at zero for the second block, + * but multi_pos starts at one, so the +1. + */ ++ if (ssif_info->ssif_debug & SSIF_DEBUG_MSG) ++ dev_dbg(&ssif_info->client->dev, ++ "Received message out of sequence, expected %u, got %u\n", ++ ssif_info->multi_pos - 1, blocknum); + result = -EIO; + } else { + ssif_inc_stat(ssif_info, received_message_parts); diff --git a/queue-4.14/jbd2-check-superblock-mapped-prior-to-committing.patch b/queue-4.14/jbd2-check-superblock-mapped-prior-to-committing.patch new file mode 100644 index 00000000000..ab10bffeca4 --- /dev/null +++ b/queue-4.14/jbd2-check-superblock-mapped-prior-to-committing.patch @@ -0,0 +1,49 @@ +From 742b06b5628f2cd23cb51a034cb54dc33c6162c5 Mon Sep 17 00:00:00 2001 +From: Jiufei Xue +Date: Sat, 6 Apr 2019 18:57:40 -0400 +Subject: jbd2: check superblock mapped prior to committing + +From: Jiufei Xue + +commit 742b06b5628f2cd23cb51a034cb54dc33c6162c5 upstream. + +We hit a BUG at fs/buffer.c:3057 if we detached the nbd device +before unmounting ext4 filesystem. + +The typical chain of events leading to the BUG: +jbd2_write_superblock + submit_bh + submit_bh_wbc + BUG_ON(!buffer_mapped(bh)); + +The block device is removed and all the pages are invalidated. JBD2 +was trying to write journal superblock to the block device which is +no longer present. + +Fix this by checking the journal superblock's buffer head prior to +submitting. + +Reported-by: Eric Ren +Signed-off-by: Jiufei Xue +Signed-off-by: Theodore Ts'o +Reviewed-by: Jan Kara +Cc: stable@kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + fs/jbd2/journal.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/fs/jbd2/journal.c ++++ b/fs/jbd2/journal.c +@@ -1353,6 +1353,10 @@ static int jbd2_write_superblock(journal + journal_superblock_t *sb = journal->j_superblock; + int ret; + ++ /* Buffer got discarded which means block device got invalidated */ ++ if (!buffer_mapped(bh)) ++ return -EIO; ++ + trace_jbd2_write_superblock(journal, write_flags); + if (!(journal->j_flags & JBD2_BARRIER)) + write_flags &= ~(REQ_FUA | REQ_PREFLUSH); diff --git a/queue-4.14/mfd-da9063-fix-otp-control-register-names-to-match-datasheets-for-da9063-63l.patch b/queue-4.14/mfd-da9063-fix-otp-control-register-names-to-match-datasheets-for-da9063-63l.patch new file mode 100644 index 00000000000..e35cb178e84 --- /dev/null +++ b/queue-4.14/mfd-da9063-fix-otp-control-register-names-to-match-datasheets-for-da9063-63l.patch @@ -0,0 +1,39 @@ +From 6b4814a9451add06d457e198be418bf6a3e6a990 Mon Sep 17 00:00:00 2001 +From: Steve Twiss +Date: Fri, 26 Apr 2019 14:33:35 +0100 +Subject: mfd: da9063: Fix OTP control register names to match datasheets for DA9063/63L + +From: Steve Twiss + +commit 6b4814a9451add06d457e198be418bf6a3e6a990 upstream. + +Mismatch between what is found in the Datasheets for DA9063 and DA9063L +provided by Dialog Semiconductor, and the register names provided in the +MFD registers file. The changes are for the OTP (one-time-programming) +control registers. The two naming errors are OPT instead of OTP, and +COUNT instead of CONT (i.e. control). + +Cc: Stable +Signed-off-by: Steve Twiss +Signed-off-by: Lee Jones +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/mfd/da9063/registers.h | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/include/linux/mfd/da9063/registers.h ++++ b/include/linux/mfd/da9063/registers.h +@@ -215,9 +215,9 @@ + + /* DA9063 Configuration registers */ + /* OTP */ +-#define DA9063_REG_OPT_COUNT 0x101 +-#define DA9063_REG_OPT_ADDR 0x102 +-#define DA9063_REG_OPT_DATA 0x103 ++#define DA9063_REG_OTP_CONT 0x101 ++#define DA9063_REG_OTP_ADDR 0x102 ++#define DA9063_REG_OTP_DATA 0x103 + + /* Customer Trim and Configuration */ + #define DA9063_REG_T_OFFSET 0x104 diff --git a/queue-4.14/mfd-max77620-fix-swapped-fps_period_max_us-values.patch b/queue-4.14/mfd-max77620-fix-swapped-fps_period_max_us-values.patch new file mode 100644 index 00000000000..2c381e4ec70 --- /dev/null +++ b/queue-4.14/mfd-max77620-fix-swapped-fps_period_max_us-values.patch @@ -0,0 +1,34 @@ +From ea611d1cc180fbb56982c83cd5142a2b34881f5c Mon Sep 17 00:00:00 2001 +From: Dmitry Osipenko +Date: Sun, 5 May 2019 18:43:22 +0300 +Subject: mfd: max77620: Fix swapped FPS_PERIOD_MAX_US values + +From: Dmitry Osipenko + +commit ea611d1cc180fbb56982c83cd5142a2b34881f5c upstream. + +The FPS_PERIOD_MAX_US definitions are swapped for MAX20024 and MAX77620, +fix it. + +Cc: stable +Signed-off-by: Dmitry Osipenko +Signed-off-by: Lee Jones +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/mfd/max77620.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/include/linux/mfd/max77620.h ++++ b/include/linux/mfd/max77620.h +@@ -136,8 +136,8 @@ + #define MAX77620_FPS_PERIOD_MIN_US 40 + #define MAX20024_FPS_PERIOD_MIN_US 20 + +-#define MAX77620_FPS_PERIOD_MAX_US 2560 +-#define MAX20024_FPS_PERIOD_MAX_US 5120 ++#define MAX20024_FPS_PERIOD_MAX_US 2560 ++#define MAX77620_FPS_PERIOD_MAX_US 5120 + + #define MAX77620_REG_FPS_GPIO1 0x54 + #define MAX77620_REG_FPS_GPIO2 0x55 diff --git a/queue-4.14/mm-mincore.c-make-mincore-more-conservative.patch b/queue-4.14/mm-mincore.c-make-mincore-more-conservative.patch new file mode 100644 index 00000000000..05841506c5c --- /dev/null +++ b/queue-4.14/mm-mincore.c-make-mincore-more-conservative.patch @@ -0,0 +1,95 @@ +From 134fca9063ad4851de767d1768180e5dede9a881 Mon Sep 17 00:00:00 2001 +From: Jiri Kosina +Date: Tue, 14 May 2019 15:41:38 -0700 +Subject: mm/mincore.c: make mincore() more conservative + +From: Jiri Kosina + +commit 134fca9063ad4851de767d1768180e5dede9a881 upstream. + +The semantics of what mincore() considers to be resident is not +completely clear, but Linux has always (since 2.3.52, which is when +mincore() was initially done) treated it as "page is available in page +cache". + +That's potentially a problem, as that [in]directly exposes +meta-information about pagecache / memory mapping state even about +memory not strictly belonging to the process executing the syscall, +opening possibilities for sidechannel attacks. + +Change the semantics of mincore() so that it only reveals pagecache +information for non-anonymous mappings that belog to files that the +calling process could (if it tried to) successfully open for writing; +otherwise we'd be including shared non-exclusive mappings, which + + - is the sidechannel + + - is not the usecase for mincore(), as that's primarily used for data, + not (shared) text + +[jkosina@suse.cz: v2] + Link: http://lkml.kernel.org/r/20190312141708.6652-2-vbabka@suse.cz +[mhocko@suse.com: restructure can_do_mincore() conditions] +Link: http://lkml.kernel.org/r/nycvar.YFH.7.76.1903062342020.19912@cbobk.fhfr.pm +Signed-off-by: Jiri Kosina +Signed-off-by: Vlastimil Babka +Acked-by: Josh Snyder +Acked-by: Michal Hocko +Originally-by: Linus Torvalds +Originally-by: Dominique Martinet +Cc: Andy Lutomirski +Cc: Dave Chinner +Cc: Kevin Easton +Cc: Matthew Wilcox +Cc: Cyril Hrubis +Cc: Tejun Heo +Cc: Kirill A. Shutemov +Cc: Daniel Gruss +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/mincore.c | 23 ++++++++++++++++++++++- + 1 file changed, 22 insertions(+), 1 deletion(-) + +--- a/mm/mincore.c ++++ b/mm/mincore.c +@@ -169,6 +169,22 @@ out: + return 0; + } + ++static inline bool can_do_mincore(struct vm_area_struct *vma) ++{ ++ if (vma_is_anonymous(vma)) ++ return true; ++ if (!vma->vm_file) ++ return false; ++ /* ++ * Reveal pagecache information only for non-anonymous mappings that ++ * correspond to the files the calling process could (if tried) open ++ * for writing; otherwise we'd be including shared non-exclusive ++ * mappings, which opens a side channel. ++ */ ++ return inode_owner_or_capable(file_inode(vma->vm_file)) || ++ inode_permission(file_inode(vma->vm_file), MAY_WRITE) == 0; ++} ++ + /* + * Do a chunk of "sys_mincore()". We've already checked + * all the arguments, we hold the mmap semaphore: we should +@@ -189,8 +205,13 @@ static long do_mincore(unsigned long add + vma = find_vma(current->mm, addr); + if (!vma || addr < vma->vm_start) + return -ENOMEM; +- mincore_walk.mm = vma->vm_mm; + end = min(vma->vm_end, addr + (pages << PAGE_SHIFT)); ++ if (!can_do_mincore(vma)) { ++ unsigned long pages = DIV_ROUND_UP(end - addr, PAGE_SIZE); ++ memset(vec, 1, pages); ++ return pages; ++ } ++ mincore_walk.mm = vma->vm_mm; + err = walk_page_range(addr, end, &mincore_walk); + if (err < 0) + return err; diff --git a/queue-4.14/mtd-spi-nor-intel-spi-avoid-crossing-4k-address-boundary-on-read-write.patch b/queue-4.14/mtd-spi-nor-intel-spi-avoid-crossing-4k-address-boundary-on-read-write.patch new file mode 100644 index 00000000000..02faf4170d1 --- /dev/null +++ b/queue-4.14/mtd-spi-nor-intel-spi-avoid-crossing-4k-address-boundary-on-read-write.patch @@ -0,0 +1,69 @@ +From 2b75ebeea6f4937d4d05ec4982c471cef9a29b7f Mon Sep 17 00:00:00 2001 +From: Alexander Sverdlin +Date: Tue, 19 Mar 2019 17:18:07 +0000 +Subject: mtd: spi-nor: intel-spi: Avoid crossing 4K address boundary on read/write + +From: Alexander Sverdlin + +commit 2b75ebeea6f4937d4d05ec4982c471cef9a29b7f upstream. + +It was observed that reads crossing 4K address boundary are failing. + +This limitation is mentioned in Intel documents: + +Intel(R) 9 Series Chipset Family Platform Controller Hub (PCH) Datasheet: + +"5.26.3 Flash Access +Program Register Access: +* Program Register Accesses are not allowed to cross a 4 KB boundary..." + +Enhanced Serial Peripheral Interface (eSPI) +Interface Base Specification (for Client and Server Platforms): + +"5.1.4 Address +For other memory transactions, the address may start or end at any byte +boundary. However, the address and payload length combination must not +cross the naturally aligned address boundary of the corresponding Maximum +Payload Size. It must not cross a 4 KB address boundary." + +Avoid this by splitting an operation crossing the boundary into two +operations. + +Fixes: 8afda8b26d01 ("spi-nor: Add support for Intel SPI serial flash controller") +Cc: stable@vger.kernel.org +Reported-by: Romain Porte +Tested-by: Pascal Fabreges +Signed-off-by: Alexander Sverdlin +Reviewed-by: Tudor Ambarus +Acked-by: Mika Westerberg +Signed-off-by: Miquel Raynal +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/mtd/spi-nor/intel-spi.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/drivers/mtd/spi-nor/intel-spi.c ++++ b/drivers/mtd/spi-nor/intel-spi.c +@@ -503,6 +503,10 @@ static ssize_t intel_spi_read(struct spi + while (len > 0) { + block_size = min_t(size_t, len, INTEL_SPI_FIFO_SZ); + ++ /* Read cannot cross 4K boundary */ ++ block_size = min_t(loff_t, from + block_size, ++ round_up(from + 1, SZ_4K)) - from; ++ + writel(from, ispi->base + FADDR); + + val = readl(ispi->base + HSFSTS_CTL); +@@ -553,6 +557,10 @@ static ssize_t intel_spi_write(struct sp + while (len > 0) { + block_size = min_t(size_t, len, INTEL_SPI_FIFO_SZ); + ++ /* Write cannot cross 4K boundary */ ++ block_size = min_t(loff_t, to + block_size, ++ round_up(to + 1, SZ_4K)) - to; ++ + writel(to, ispi->base + FADDR); + + val = readl(ispi->base + HSFSTS_CTL); diff --git a/queue-4.14/ocfs2-fix-ocfs2-read-inode-data-panic-in-ocfs2_iget.patch b/queue-4.14/ocfs2-fix-ocfs2-read-inode-data-panic-in-ocfs2_iget.patch new file mode 100644 index 00000000000..9338af4a454 --- /dev/null +++ b/queue-4.14/ocfs2-fix-ocfs2-read-inode-data-panic-in-ocfs2_iget.patch @@ -0,0 +1,180 @@ +From e091eab028f9253eac5c04f9141bbc9d170acab3 Mon Sep 17 00:00:00 2001 +From: Shuning Zhang +Date: Mon, 13 May 2019 17:15:56 -0700 +Subject: ocfs2: fix ocfs2 read inode data panic in ocfs2_iget + +From: Shuning Zhang + +commit e091eab028f9253eac5c04f9141bbc9d170acab3 upstream. + +In some cases, ocfs2_iget() reads the data of inode, which has been +deleted for some reason. That will make the system panic. So We should +judge whether this inode has been deleted, and tell the caller that the +inode is a bad inode. + +For example, the ocfs2 is used as the backed of nfs, and the client is +nfsv3. This issue can be reproduced by the following steps. + +on the nfs server side, +..../patha/pathb + +Step 1: The process A was scheduled before calling the function fh_verify. + +Step 2: The process B is removing the 'pathb', and just completed the call +to function dput. Then the dentry of 'pathb' has been deleted from the +dcache, and all ancestors have been deleted also. The relationship of +dentry and inode was deleted through the function hlist_del_init. The +following is the call stack. +dentry_iput->hlist_del_init(&dentry->d_u.d_alias) + +At this time, the inode is still in the dcache. + +Step 3: The process A call the function ocfs2_get_dentry, which get the +inode from dcache. Then the refcount of inode is 1. The following is the +call stack. +nfsd3_proc_getacl->fh_verify->exportfs_decode_fh->fh_to_dentry(ocfs2_get_dentry) + +Step 4: Dirty pages are flushed by bdi threads. So the inode of 'patha' +is evicted, and this directory was deleted. But the inode of 'pathb' +can't be evicted, because the refcount of the inode was 1. + +Step 5: The process A keep running, and call the function +reconnect_path(in exportfs_decode_fh), which call function +ocfs2_get_parent of ocfs2. Get the block number of parent +directory(patha) by the name of ... Then read the data from disk by the +block number. But this inode has been deleted, so the system panic. + +Process A Process B +1. in nfsd3_proc_getacl | +2. | dput +3. fh_to_dentry(ocfs2_get_dentry) | +4. bdi flush dirty cache | +5. ocfs2_iget | + +[283465.542049] OCFS2: ERROR (device sdp): ocfs2_validate_inode_block: +Invalid dinode #580640: OCFS2_VALID_FL not set + +[283465.545490] Kernel panic - not syncing: OCFS2: (device sdp): panic forced +after error + +[283465.546889] CPU: 5 PID: 12416 Comm: nfsd Tainted: G W +4.1.12-124.18.6.el6uek.bug28762940v3.x86_64 #2 +[283465.548382] Hardware name: VMware, Inc. VMware Virtual Platform/440BX +Desktop Reference Platform, BIOS 6.00 09/21/2015 +[283465.549657] 0000000000000000 ffff8800a56fb7b8 ffffffff816e839c +ffffffffa0514758 +[283465.550392] 000000000008dc20 ffff8800a56fb838 ffffffff816e62d3 +0000000000000008 +[283465.551056] ffff880000000010 ffff8800a56fb848 ffff8800a56fb7e8 +ffff88005df9f000 +[283465.551710] Call Trace: +[283465.552516] [] dump_stack+0x63/0x81 +[283465.553291] [] panic+0xcb/0x21b +[283465.554037] [] ocfs2_handle_error+0xf0/0xf0 [ocfs2] +[283465.554882] [] __ocfs2_error+0x67/0x70 [ocfs2] +[283465.555768] [] ocfs2_validate_inode_block+0x229/0x230 +[ocfs2] +[283465.556683] [] ocfs2_read_blocks+0x46c/0x7b0 [ocfs2] +[283465.557408] [] ? ocfs2_inode_cache_io_unlock+0x20/0x20 +[ocfs2] +[283465.557973] [] ocfs2_read_inode_block_full+0x3b/0x60 +[ocfs2] +[283465.558525] [] ocfs2_iget+0x4aa/0x880 [ocfs2] +[283465.559082] [] ocfs2_get_parent+0x9e/0x220 [ocfs2] +[283465.559622] [] reconnect_path+0xb5/0x300 +[283465.560156] [] exportfs_decode_fh+0xf6/0x2b0 +[283465.560708] [] ? nfsd_proc_getattr+0xa0/0xa0 [nfsd] +[283465.561262] [] ? prepare_creds+0x26/0x110 +[283465.561932] [] fh_verify+0x350/0x660 [nfsd] +[283465.562862] [] ? nfsd_cache_lookup+0x44/0x630 [nfsd] +[283465.563697] [] nfsd3_proc_getattr+0x69/0xf0 [nfsd] +[283465.564510] [] nfsd_dispatch+0xe0/0x290 [nfsd] +[283465.565358] [] ? svc_tcp_adjust_wspace+0x12/0x30 +[sunrpc] +[283465.566272] [] svc_process_common+0x412/0x6a0 [sunrpc] +[283465.567155] [] svc_process+0x123/0x210 [sunrpc] +[283465.568020] [] nfsd+0xff/0x170 [nfsd] +[283465.568962] [] ? nfsd_destroy+0x80/0x80 [nfsd] +[283465.570112] [] kthread+0xcb/0xf0 +[283465.571099] [] ? kthread_create_on_node+0x180/0x180 +[283465.572114] [] ret_from_fork+0x58/0x90 +[283465.573156] [] ? kthread_create_on_node+0x180/0x180 + +Link: http://lkml.kernel.org/r/1554185919-3010-1-git-send-email-sunny.s.zhang@oracle.com +Signed-off-by: Shuning Zhang +Reviewed-by: Joseph Qi +Cc: Mark Fasheh +Cc: Joel Becker +Cc: Junxiao Bi +Cc: Changwei Ge +Cc: piaojun +Cc: "Gang He" +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ocfs2/export.c | 30 +++++++++++++++++++++++++++++- + 1 file changed, 29 insertions(+), 1 deletion(-) + +--- a/fs/ocfs2/export.c ++++ b/fs/ocfs2/export.c +@@ -148,16 +148,24 @@ static struct dentry *ocfs2_get_parent(s + u64 blkno; + struct dentry *parent; + struct inode *dir = d_inode(child); ++ int set; + + trace_ocfs2_get_parent(child, child->d_name.len, child->d_name.name, + (unsigned long long)OCFS2_I(dir)->ip_blkno); + ++ status = ocfs2_nfs_sync_lock(OCFS2_SB(dir->i_sb), 1); ++ if (status < 0) { ++ mlog(ML_ERROR, "getting nfs sync lock(EX) failed %d\n", status); ++ parent = ERR_PTR(status); ++ goto bail; ++ } ++ + status = ocfs2_inode_lock(dir, NULL, 0); + if (status < 0) { + if (status != -ENOENT) + mlog_errno(status); + parent = ERR_PTR(status); +- goto bail; ++ goto unlock_nfs_sync; + } + + status = ocfs2_lookup_ino_from_name(dir, "..", 2, &blkno); +@@ -166,11 +174,31 @@ static struct dentry *ocfs2_get_parent(s + goto bail_unlock; + } + ++ status = ocfs2_test_inode_bit(OCFS2_SB(dir->i_sb), blkno, &set); ++ if (status < 0) { ++ if (status == -EINVAL) { ++ status = -ESTALE; ++ } else ++ mlog(ML_ERROR, "test inode bit failed %d\n", status); ++ parent = ERR_PTR(status); ++ goto bail_unlock; ++ } ++ ++ trace_ocfs2_get_dentry_test_bit(status, set); ++ if (!set) { ++ status = -ESTALE; ++ parent = ERR_PTR(status); ++ goto bail_unlock; ++ } ++ + parent = d_obtain_alias(ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0, 0)); + + bail_unlock: + ocfs2_inode_unlock(dir, 0); + ++unlock_nfs_sync: ++ ocfs2_nfs_sync_unlock(OCFS2_SB(dir->i_sb), 1); ++ + bail: + trace_ocfs2_get_parent_end(parent); + diff --git a/queue-4.14/series b/queue-4.14/series index d8cf46f2c80..a818cb1783f 100644 --- a/queue-4.14/series +++ b/queue-4.14/series @@ -29,3 +29,23 @@ alsa-hda-realtek-eapd-turn-on-later.patch asoc-max98090-fix-restore-of-dapm-muxes.patch asoc-rt5677-spi-disable-16bit-spi-transfers.patch bpf-arm64-remove-prefetch-insn-in-xadd-mapping.patch +mm-mincore.c-make-mincore-more-conservative.patch +ocfs2-fix-ocfs2-read-inode-data-panic-in-ocfs2_iget.patch +userfaultfd-use-rcu-to-free-the-task-struct-when-fork-fails.patch +mfd-da9063-fix-otp-control-register-names-to-match-datasheets-for-da9063-63l.patch +mfd-max77620-fix-swapped-fps_period_max_us-values.patch +mtd-spi-nor-intel-spi-avoid-crossing-4k-address-boundary-on-read-write.patch +tty-vt.c-fix-tiocl_blankscreen-console-blanking-if-blankinterval-0.patch +tty-vt-fix-write-write-race-in-ioctl-kdskbsent-handler.patch +jbd2-check-superblock-mapped-prior-to-committing.patch +ext4-make-sanity-check-in-mballoc-more-strict.patch +ext4-ignore-e_value_offs-for-xattrs-with-value-in-ea-inode.patch +ext4-avoid-drop-reference-to-iloc.bh-twice.patch +btrfs-do-not-start-a-transaction-during-fiemap.patch +btrfs-do-not-start-a-transaction-at-iterate_extent_inodes.patch +bcache-fix-a-race-between-cache-register-and-cacheset-unregister.patch +bcache-never-set-key_ptrs-of-journal-key-to-0-in-journal_reclaim.patch +ext4-fix-use-after-free-race-with-debug_want_extra_isize.patch +ext4-actually-request-zeroing-of-inode-table-after-grow.patch +ext4-fix-ext4_show_options-for-file-systems-w-o-journal.patch +ipmi-ssif-compare-block-number-correctly-for-multi-part-return-messages.patch diff --git a/queue-4.14/tty-vt-fix-write-write-race-in-ioctl-kdskbsent-handler.patch b/queue-4.14/tty-vt-fix-write-write-race-in-ioctl-kdskbsent-handler.patch new file mode 100644 index 00000000000..97eb799da5e --- /dev/null +++ b/queue-4.14/tty-vt-fix-write-write-race-in-ioctl-kdskbsent-handler.patch @@ -0,0 +1,183 @@ +From 46ca3f735f345c9d87383dd3a09fa5d43870770e Mon Sep 17 00:00:00 2001 +From: Sergei Trofimovich +Date: Sun, 10 Mar 2019 21:24:15 +0000 +Subject: tty/vt: fix write/write race in ioctl(KDSKBSENT) handler + +From: Sergei Trofimovich + +commit 46ca3f735f345c9d87383dd3a09fa5d43870770e upstream. + +The bug manifests as an attempt to access deallocated memory: + + BUG: unable to handle kernel paging request at ffff9c8735448000 + #PF error: [PROT] [WRITE] + PGD 288a05067 P4D 288a05067 PUD 288a07067 PMD 7f60c2063 PTE 80000007f5448161 + Oops: 0003 [#1] PREEMPT SMP + CPU: 6 PID: 388 Comm: loadkeys Tainted: G C 5.0.0-rc6-00153-g5ded5871030e #91 + Hardware name: Gigabyte Technology Co., Ltd. To be filled by O.E.M./H77M-D3H, BIOS F12 11/14/2013 + RIP: 0010:__memmove+0x81/0x1a0 + Code: 4c 89 4f 10 4c 89 47 18 48 8d 7f 20 73 d4 48 83 c2 20 e9 a2 00 00 00 66 90 48 89 d1 4c 8b 5c 16 f8 4c 8d 54 17 f8 48 c1 e9 03 48 a5 4d 89 1a e9 0c 01 00 00 0f 1f 40 00 48 89 d1 4c 8b 1e 49 + RSP: 0018:ffffa1b9002d7d08 EFLAGS: 00010203 + RAX: ffff9c873541af43 RBX: ffff9c873541af43 RCX: 00000c6f105cd6bf + RDX: 0000637882e986b6 RSI: ffff9c8735447ffb RDI: ffff9c8735447ffb + RBP: ffff9c8739cd3800 R08: ffff9c873b802f00 R09: 00000000fffff73b + R10: ffffffffb82b35f1 R11: 00505b1b004d5b1b R12: 0000000000000000 + R13: ffff9c873541af3d R14: 000000000000000b R15: 000000000000000c + FS: 00007f450c390580(0000) GS:ffff9c873f180000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: ffff9c8735448000 CR3: 00000007e213c002 CR4: 00000000000606e0 + Call Trace: + vt_do_kdgkb_ioctl+0x34d/0x440 + vt_ioctl+0xba3/0x1190 + ? __bpf_prog_run32+0x39/0x60 + ? mem_cgroup_commit_charge+0x7b/0x4e0 + tty_ioctl+0x23f/0x920 + ? preempt_count_sub+0x98/0xe0 + ? __seccomp_filter+0x67/0x600 + do_vfs_ioctl+0xa2/0x6a0 + ? syscall_trace_enter+0x192/0x2d0 + ksys_ioctl+0x3a/0x70 + __x64_sys_ioctl+0x16/0x20 + do_syscall_64+0x54/0xe0 + entry_SYSCALL_64_after_hwframe+0x49/0xbe + +The bug manifests on systemd systems with multiple vtcon devices: + # cat /sys/devices/virtual/vtconsole/vtcon0/name + (S) dummy device + # cat /sys/devices/virtual/vtconsole/vtcon1/name + (M) frame buffer device + +There systemd runs 'loadkeys' tool in tapallel for each vtcon +instance. This causes two parallel ioctl(KDSKBSENT) calls to +race into adding the same entry into 'func_table' array at: + + drivers/tty/vt/keyboard.c:vt_do_kdgkb_ioctl() + +The function has no locking around writes to 'func_table'. + +The simplest reproducer is to have initrams with the following +init on a 8-CPU machine x86_64: + + #!/bin/sh + + loadkeys -q windowkeys ru4 & + loadkeys -q windowkeys ru4 & + loadkeys -q windowkeys ru4 & + loadkeys -q windowkeys ru4 & + + loadkeys -q windowkeys ru4 & + loadkeys -q windowkeys ru4 & + loadkeys -q windowkeys ru4 & + loadkeys -q windowkeys ru4 & + wait + +The change adds lock on write path only. Reads are still racy. + +CC: Greg Kroah-Hartman +CC: Jiri Slaby +Link: https://lkml.org/lkml/2019/2/17/256 +Signed-off-by: Sergei Trofimovich +Cc: stable +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/tty/vt/keyboard.c | 33 +++++++++++++++++++++++++++------ + 1 file changed, 27 insertions(+), 6 deletions(-) + +--- a/drivers/tty/vt/keyboard.c ++++ b/drivers/tty/vt/keyboard.c +@@ -122,6 +122,7 @@ static const int NR_TYPES = ARRAY_SIZE(m + static struct input_handler kbd_handler; + static DEFINE_SPINLOCK(kbd_event_lock); + static DEFINE_SPINLOCK(led_lock); ++static DEFINE_SPINLOCK(func_buf_lock); /* guard 'func_buf' and friends */ + static unsigned long key_down[BITS_TO_LONGS(KEY_CNT)]; /* keyboard key bitmap */ + static unsigned char shift_down[NR_SHIFT]; /* shift state counters.. */ + static bool dead_key_next; +@@ -1959,11 +1960,12 @@ int vt_do_kdgkb_ioctl(int cmd, struct kb + char *p; + u_char *q; + u_char __user *up; +- int sz; ++ int sz, fnw_sz; + int delta; + char *first_free, *fj, *fnw; + int i, j, k; + int ret; ++ unsigned long flags; + + if (!capable(CAP_SYS_TTY_CONFIG)) + perm = 0; +@@ -2006,7 +2008,14 @@ int vt_do_kdgkb_ioctl(int cmd, struct kb + goto reterr; + } + ++ fnw = NULL; ++ fnw_sz = 0; ++ /* race aginst other writers */ ++ again: ++ spin_lock_irqsave(&func_buf_lock, flags); + q = func_table[i]; ++ ++ /* fj pointer to next entry after 'q' */ + first_free = funcbufptr + (funcbufsize - funcbufleft); + for (j = i+1; j < MAX_NR_FUNC && !func_table[j]; j++) + ; +@@ -2014,10 +2023,12 @@ int vt_do_kdgkb_ioctl(int cmd, struct kb + fj = func_table[j]; + else + fj = first_free; +- ++ /* buffer usage increase by new entry */ + delta = (q ? -strlen(q) : 1) + strlen(kbs->kb_string); ++ + if (delta <= funcbufleft) { /* it fits in current buf */ + if (j < MAX_NR_FUNC) { ++ /* make enough space for new entry at 'fj' */ + memmove(fj + delta, fj, first_free - fj); + for (k = j; k < MAX_NR_FUNC; k++) + if (func_table[k]) +@@ -2030,20 +2041,28 @@ int vt_do_kdgkb_ioctl(int cmd, struct kb + sz = 256; + while (sz < funcbufsize - funcbufleft + delta) + sz <<= 1; +- fnw = kmalloc(sz, GFP_KERNEL); +- if(!fnw) { +- ret = -ENOMEM; +- goto reterr; ++ if (fnw_sz != sz) { ++ spin_unlock_irqrestore(&func_buf_lock, flags); ++ kfree(fnw); ++ fnw = kmalloc(sz, GFP_KERNEL); ++ fnw_sz = sz; ++ if (!fnw) { ++ ret = -ENOMEM; ++ goto reterr; ++ } ++ goto again; + } + + if (!q) + func_table[i] = fj; ++ /* copy data before insertion point to new location */ + if (fj > funcbufptr) + memmove(fnw, funcbufptr, fj - funcbufptr); + for (k = 0; k < j; k++) + if (func_table[k]) + func_table[k] = fnw + (func_table[k] - funcbufptr); + ++ /* copy data after insertion point to new location */ + if (first_free > fj) { + memmove(fnw + (fj - funcbufptr) + delta, fj, first_free - fj); + for (k = j; k < MAX_NR_FUNC; k++) +@@ -2056,7 +2075,9 @@ int vt_do_kdgkb_ioctl(int cmd, struct kb + funcbufleft = funcbufleft - delta + sz - funcbufsize; + funcbufsize = sz; + } ++ /* finally insert item itself */ + strcpy(func_table[i], kbs->kb_string); ++ spin_unlock_irqrestore(&func_buf_lock, flags); + break; + } + ret = 0; diff --git a/queue-4.14/tty-vt.c-fix-tiocl_blankscreen-console-blanking-if-blankinterval-0.patch b/queue-4.14/tty-vt.c-fix-tiocl_blankscreen-console-blanking-if-blankinterval-0.patch new file mode 100644 index 00000000000..8e4969c46aa --- /dev/null +++ b/queue-4.14/tty-vt.c-fix-tiocl_blankscreen-console-blanking-if-blankinterval-0.patch @@ -0,0 +1,69 @@ +From 75ddbc1fb11efac87b611d48e9802f6fe2bb2163 Mon Sep 17 00:00:00 2001 +From: Yifeng Li +Date: Tue, 5 Mar 2019 07:02:49 +0800 +Subject: tty: vt.c: Fix TIOCL_BLANKSCREEN console blanking if blankinterval == 0 + +From: Yifeng Li + +commit 75ddbc1fb11efac87b611d48e9802f6fe2bb2163 upstream. + +Previously, in the userspace, it was possible to use the "setterm" command +from util-linux to blank the VT console by default, using the following +command. + +According to the man page, + +> The force option keeps the screen blank even if a key is pressed. + +It was implemented by calling TIOCL_BLANKSCREEN. + + case BLANKSCREEN: + ioctlarg = TIOCL_BLANKSCREEN; + if (ioctl(STDIN_FILENO, TIOCLINUX, &ioctlarg)) + warn(_("cannot force blank")); + break; + +However, after Linux 4.12, this command ceased to work anymore, which is +unexpected. By inspecting the kernel source, it shows that the issue was +triggered by the side-effect from commit a4199f5eb809 ("tty: Disable +default console blanking interval"). + +The console blanking is implemented by function do_blank_screen() in vt.c: +"blank_state" will be initialized to "blank_normal_wait" in con_init() if +AND ONLY IF ("blankinterval" > 0). If "blankinterval" is 0, "blank_state" +will be "blank_off" (== 0), and a call to do_blank_screen() will always +abort, even if a forced blanking is required from the user by calling +TIOCL_BLANKSCREEN, the console won't be blanked. + +This behavior is unexpected from a user's point-of-view, since it's not +mentioned in any documentation. The setterm man page suggests it will +always work, and the kernel comments in uapi/linux/tiocl.h says + +> /* keep screen blank even if a key is pressed */ +> #define TIOCL_BLANKSCREEN 14 + +To fix it, we simply remove the "blank_state != blank_off" check, as +pointed out by Nicolas Pitre, this check doesn't logically make sense +and it's safe to remove. + +Suggested-by: Nicolas Pitre +Fixes: a4199f5eb809 ("tty: Disable default console blanking interval") +Signed-off-by: Yifeng Li +Cc: stable +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/tty/vt/vt.c | 2 -- + 1 file changed, 2 deletions(-) + +--- a/drivers/tty/vt/vt.c ++++ b/drivers/tty/vt/vt.c +@@ -3840,8 +3840,6 @@ void do_blank_screen(int entering_gfx) + return; + } + +- if (blank_state != blank_normal_wait) +- return; + blank_state = blank_off; + + /* don't blank graphics */ diff --git a/queue-4.14/userfaultfd-use-rcu-to-free-the-task-struct-when-fork-fails.patch b/queue-4.14/userfaultfd-use-rcu-to-free-the-task-struct-when-fork-fails.patch new file mode 100644 index 00000000000..3cf3922d938 --- /dev/null +++ b/queue-4.14/userfaultfd-use-rcu-to-free-the-task-struct-when-fork-fails.patch @@ -0,0 +1,135 @@ +From c3f3ce049f7d97cc7ec9c01cb51d9ec74e0f37c2 Mon Sep 17 00:00:00 2001 +From: Andrea Arcangeli +Date: Tue, 14 May 2019 15:40:46 -0700 +Subject: userfaultfd: use RCU to free the task struct when fork fails + +From: Andrea Arcangeli + +commit c3f3ce049f7d97cc7ec9c01cb51d9ec74e0f37c2 upstream. + +The task structure is freed while get_mem_cgroup_from_mm() holds +rcu_read_lock() and dereferences mm->owner. + + get_mem_cgroup_from_mm() failing fork() + ---- --- + task = mm->owner + mm->owner = NULL; + free(task) + if (task) *task; /* use after free */ + +The fix consists in freeing the task with RCU also in the fork failure +case, exactly like it always happens for the regular exit(2) path. That +is enough to make the rcu_read_lock hold in get_mem_cgroup_from_mm() +(left side above) effective to avoid a use after free when dereferencing +the task structure. + +An alternate possible fix would be to defer the delivery of the +userfaultfd contexts to the monitor until after fork() is guaranteed to +succeed. Such a change would require more changes because it would +create a strict ordering dependency where the uffd methods would need to +be called beyond the last potentially failing branch in order to be +safe. This solution as opposed only adds the dependency to common code +to set mm->owner to NULL and to free the task struct that was pointed by +mm->owner with RCU, if fork ends up failing. The userfaultfd methods +can still be called anywhere during the fork runtime and the monitor +will keep discarding orphaned "mm" coming from failed forks in userland. + +This race condition couldn't trigger if CONFIG_MEMCG was set =n at build +time. + +[aarcange@redhat.com: improve changelog, reduce #ifdefs per Michal] + Link: http://lkml.kernel.org/r/20190429035752.4508-1-aarcange@redhat.com +Link: http://lkml.kernel.org/r/20190325225636.11635-2-aarcange@redhat.com +Fixes: 893e26e61d04 ("userfaultfd: non-cooperative: Add fork() event") +Signed-off-by: Andrea Arcangeli +Tested-by: zhong jiang +Reported-by: syzbot+cbb52e396df3e565ab02@syzkaller.appspotmail.com +Cc: Oleg Nesterov +Cc: Jann Horn +Cc: Hugh Dickins +Cc: Mike Rapoport +Cc: Mike Kravetz +Cc: Peter Xu +Cc: Jason Gunthorpe +Cc: "Kirill A . Shutemov" +Cc: Michal Hocko +Cc: zhong jiang +Cc: syzbot+cbb52e396df3e565ab02@syzkaller.appspotmail.com +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/fork.c | 31 +++++++++++++++++++++++++++++-- + 1 file changed, 29 insertions(+), 2 deletions(-) + +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -790,6 +790,15 @@ static void mm_init_aio(struct mm_struct + #endif + } + ++static __always_inline void mm_clear_owner(struct mm_struct *mm, ++ struct task_struct *p) ++{ ++#ifdef CONFIG_MEMCG ++ if (mm->owner == p) ++ WRITE_ONCE(mm->owner, NULL); ++#endif ++} ++ + static void mm_init_owner(struct mm_struct *mm, struct task_struct *p) + { + #ifdef CONFIG_MEMCG +@@ -1211,6 +1220,7 @@ static struct mm_struct *dup_mm(struct t + free_pt: + /* don't put binfmt in mmput, we haven't got module yet */ + mm->binfmt = NULL; ++ mm_init_owner(mm, NULL); + mmput(mm); + + fail_nomem: +@@ -1528,6 +1538,21 @@ static inline void rcu_copy_process(stru + #endif /* #ifdef CONFIG_TASKS_RCU */ + } + ++static void __delayed_free_task(struct rcu_head *rhp) ++{ ++ struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); ++ ++ free_task(tsk); ++} ++ ++static __always_inline void delayed_free_task(struct task_struct *tsk) ++{ ++ if (IS_ENABLED(CONFIG_MEMCG)) ++ call_rcu(&tsk->rcu, __delayed_free_task); ++ else ++ free_task(tsk); ++} ++ + /* + * This creates a new process as a copy of the old one, + * but does not actually start it yet. +@@ -1960,8 +1985,10 @@ bad_fork_cleanup_io: + bad_fork_cleanup_namespaces: + exit_task_namespaces(p); + bad_fork_cleanup_mm: +- if (p->mm) ++ if (p->mm) { ++ mm_clear_owner(p->mm, p); + mmput(p->mm); ++ } + bad_fork_cleanup_signal: + if (!(clone_flags & CLONE_THREAD)) + free_signal_struct(p->signal); +@@ -1992,7 +2019,7 @@ bad_fork_cleanup_count: + bad_fork_free: + p->state = TASK_DEAD; + put_task_stack(p); +- free_task(p); ++ delayed_free_task(p); + fork_out: + return ERR_PTR(retval); + }