From: Greg Kroah-Hartman Date: Fri, 17 May 2019 15:40:08 +0000 (+0200) Subject: 4.19-stable patches X-Git-Tag: v4.9.178~24 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=d074880a0a6e3b05ae22e84ba973f7113869c254;p=thirdparty%2Fkernel%2Fstable-queue.git 4.19-stable patches added patches: acpi-pm-set-enable_for_wake-for-wakeup-gpes-during-suspend-to-idle.patch bcache-fix-a-race-between-cache-register-and-cacheset-unregister.patch bcache-never-set-key_ptrs-of-journal-key-to-0-in-journal_reclaim.patch bpf-arm64-remove-prefetch-insn-in-xadd-mapping.patch btrfs-check-the-first-key-and-level-for-cached-extent-buffer.patch btrfs-correctly-free-extent-buffer-in-case-btree_read_extent_buffer_pages-fails.patch btrfs-do-not-start-a-transaction-at-iterate_extent_inodes.patch btrfs-do-not-start-a-transaction-during-fiemap.patch btrfs-honour-fitrim-range-constraints-during-free-space-trim.patch btrfs-send-flush-dellaloc-in-order-to-avoid-data-loss.patch crypto-ccree-add-function-to-handle-cryptocell-tee-fips-error.patch crypto-ccree-don-t-map-aead-key-and-iv-on-stack.patch crypto-ccree-don-t-map-mac-key-on-stack.patch crypto-ccree-fix-mem-leak-on-error-path.patch crypto-ccree-handle-tee-fips-error-during-power-management-resume.patch crypto-ccree-host_power_down_en-should-be-the-last-cc-access-during-suspend.patch crypto-ccree-pm-resume-first-enable-the-source-clk.patch crypto-ccree-remove-special-handling-of-chained-sg.patch crypto-ccree-use-correct-internal-state-sizes-for-export.patch ext4-actually-request-zeroing-of-inode-table-after-grow.patch ext4-avoid-drop-reference-to-iloc.bh-twice.patch ext4-fix-ext4_show_options-for-file-systems-w-o-journal.patch ext4-fix-use-after-free-race-with-debug_want_extra_isize.patch ext4-ignore-e_value_offs-for-xattrs-with-value-in-ea-inode.patch ext4-make-sanity-check-in-mballoc-more-strict.patch ext4-protect-journal-inode-s-blocks-using-block_validity.patch hugetlb-use-same-fault-hash-key-for-shared-and-private-mappings.patch ipmi-ssif-compare-block-number-correctly-for-multi-part-return-messages.patch jbd2-check-superblock-mapped-prior-to-committing.patch mfd-da9063-fix-otp-control-register-names-to-match-datasheets-for-da9063-63l.patch mfd-max77620-fix-swapped-fps_period_max_us-values.patch mm-huge_memory-fix-vmf_insert_pfn_-pmd-pud-crash-handle-unaligned-addresses.patch mm-hugetlb.c-don-t-put_page-in-lock-of-hugetlb_lock.patch mm-mincore.c-make-mincore-more-conservative.patch mtd-spi-nor-intel-spi-avoid-crossing-4k-address-boundary-on-read-write.patch ocfs2-fix-ocfs2-read-inode-data-panic-in-ocfs2_iget.patch tty-vt-fix-write-write-race-in-ioctl-kdskbsent-handler.patch tty-vt.c-fix-tiocl_blankscreen-console-blanking-if-blankinterval-0.patch userfaultfd-use-rcu-to-free-the-task-struct-when-fork-fails.patch --- diff --git a/queue-4.19/acpi-pm-set-enable_for_wake-for-wakeup-gpes-during-suspend-to-idle.patch b/queue-4.19/acpi-pm-set-enable_for_wake-for-wakeup-gpes-during-suspend-to-idle.patch new file mode 100644 index 00000000000..4e0868b9d0d --- /dev/null +++ b/queue-4.19/acpi-pm-set-enable_for_wake-for-wakeup-gpes-during-suspend-to-idle.patch @@ -0,0 +1,86 @@ +From 2f844b61db8297a1f7a06adf2eb5c43381f2c183 Mon Sep 17 00:00:00 2001 +From: Rajat Jain +Date: Mon, 13 May 2019 12:17:08 -0700 +Subject: ACPI: PM: Set enable_for_wake for wakeup GPEs during suspend-to-idle + +From: Rajat Jain + +commit 2f844b61db8297a1f7a06adf2eb5c43381f2c183 upstream. + +I noticed that recently multiple systems (chromebooks) couldn't wake +from S0ix using LID or Keyboard after updating to a newer kernel. I +bisected and it turned up commit f941d3e41da7 ("ACPI: EC / PM: Disable +non-wakeup GPEs for suspend-to-idle"). I checked that the issue got +fixed if that commit was reverted. + +I debugged and found that although PNP0C0D:00 (representing the LID) +is wake capable and should wakeup the system per the code in +acpi_wakeup_gpe_init() and in drivers/acpi/button.c: + +localhost /sys # cat /proc/acpi/wakeup +Device S-state Status Sysfs node +LID0 S4 *enabled platform:PNP0C0D:00 +CREC S5 *disabled platform:GOOG0004:00 + *disabled platform:cros-ec-dev.1.auto + *disabled platform:cros-ec-accel.0 + *disabled platform:cros-ec-accel.1 + *disabled platform:cros-ec-gyro.0 + *disabled platform:cros-ec-ring.0 + *disabled platform:cros-usbpd-charger.2.auto + *disabled platform:cros-usbpd-logger.3.auto +D015 S3 *enabled i2c:i2c-ELAN0000:00 +PENH S3 *enabled platform:PRP0001:00 +XHCI S3 *enabled pci:0000:00:14.0 +GLAN S4 *disabled +WIFI S3 *disabled pci:0000:00:14.3 +localhost /sys # + +On debugging, I found that its corresponding GPE is not being enabled. +The particular GPE's "gpe_register_info->enable_for_wake" does not +have any bits set when acpi_enable_all_wakeup_gpes() comes around to +use it. I looked at code and could not find any other code path that +should set the bits in "enable_for_wake" bitmask for the wake enabled +devices for s2idle. [I do see that it happens for S3 in +acpi_sleep_prepare()]. + +Thus I used the same call to enable the GPEs for wake enabled devices, +and verified that this fixes the regression I was seeing on multiple +of my devices. + +[ rjw: The problem is that commit f941d3e41da7 ("ACPI: EC / PM: + Disable non-wakeup GPEs for suspend-to-idle") forgot to add + the acpi_enable_wakeup_devices() call for s2idle along with + acpi_enable_all_wakeup_gpes(). ] + +Fixes: f941d3e41da7 ("ACPI: EC / PM: Disable non-wakeup GPEs for suspend-to-idle") +Link: https://bugzilla.kernel.org/show_bug.cgi?id=203579 +Signed-off-by: Rajat Jain +[ rjw: Subject & changelog ] +Cc: 5.0+ # 5.0+ +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/acpi/sleep.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/drivers/acpi/sleep.c ++++ b/drivers/acpi/sleep.c +@@ -977,6 +977,8 @@ static int acpi_s2idle_prepare(void) + if (acpi_sci_irq_valid()) + enable_irq_wake(acpi_sci_irq); + ++ acpi_enable_wakeup_devices(ACPI_STATE_S0); ++ + /* Change the configuration of GPEs to avoid spurious wakeup. */ + acpi_enable_all_wakeup_gpes(); + acpi_os_wait_events_complete(); +@@ -1026,6 +1028,8 @@ static void acpi_s2idle_restore(void) + { + acpi_enable_all_runtime_gpes(); + ++ acpi_disable_wakeup_devices(ACPI_STATE_S0); ++ + if (acpi_sci_irq_valid()) + disable_irq_wake(acpi_sci_irq); + diff --git a/queue-4.19/bcache-fix-a-race-between-cache-register-and-cacheset-unregister.patch b/queue-4.19/bcache-fix-a-race-between-cache-register-and-cacheset-unregister.patch new file mode 100644 index 00000000000..b6bd0c5ece2 --- /dev/null +++ b/queue-4.19/bcache-fix-a-race-between-cache-register-and-cacheset-unregister.patch @@ -0,0 +1,81 @@ +From a4b732a248d12cbdb46999daf0bf288c011335eb Mon Sep 17 00:00:00 2001 +From: Liang Chen +Date: Thu, 25 Apr 2019 00:48:31 +0800 +Subject: bcache: fix a race between cache register and cacheset unregister + +From: Liang Chen + +commit a4b732a248d12cbdb46999daf0bf288c011335eb upstream. + +There is a race between cache device register and cache set unregister. +For an already registered cache device, register_bcache will call +bch_is_open to iterate through all cachesets and check every cache +there. The race occurs if cache_set_free executes at the same time and +clears the caches right before ca is dereferenced in bch_is_open_cache. +To close the race, let's make sure the clean up work is protected by +the bch_register_lock as well. + +This issue can be reproduced as follows, +while true; do echo /dev/XXX> /sys/fs/bcache/register ; done& +while true; do echo 1> /sys/block/XXX/bcache/set/unregister ; done & + +and results in the following oops, + +[ +0.000053] BUG: unable to handle kernel NULL pointer dereference at 0000000000000998 +[ +0.000457] #PF error: [normal kernel read fault] +[ +0.000464] PGD 800000003ca9d067 P4D 800000003ca9d067 PUD 3ca9c067 PMD 0 +[ +0.000388] Oops: 0000 [#1] SMP PTI +[ +0.000269] CPU: 1 PID: 3266 Comm: bash Not tainted 5.0.0+ #6 +[ +0.000346] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.fc28 04/01/2014 +[ +0.000472] RIP: 0010:register_bcache+0x1829/0x1990 [bcache] +[ +0.000344] Code: b0 48 83 e8 50 48 81 fa e0 e1 10 c0 0f 84 a9 00 00 00 48 89 c6 48 89 ca 0f b7 ba 54 04 00 00 4c 8b 82 60 0c 00 00 85 ff 74 2f <49> 3b a8 98 09 00 00 74 4e 44 8d 47 ff 31 ff 49 c1 e0 03 eb 0d +[ +0.000839] RSP: 0018:ffff92ee804cbd88 EFLAGS: 00010202 +[ +0.000328] RAX: ffffffffc010e190 RBX: ffff918b5c6b5000 RCX: ffff918b7d8e0000 +[ +0.000399] RDX: ffff918b7d8e0000 RSI: ffffffffc010e190 RDI: 0000000000000001 +[ +0.000398] RBP: ffff918b7d318340 R08: 0000000000000000 R09: ffffffffb9bd2d7a +[ +0.000385] R10: ffff918b7eb253c0 R11: ffffb95980f51200 R12: ffffffffc010e1a0 +[ +0.000411] R13: fffffffffffffff2 R14: 000000000000000b R15: ffff918b7e232620 +[ +0.000384] FS: 00007f955bec2740(0000) GS:ffff918b7eb00000(0000) knlGS:0000000000000000 +[ +0.000420] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ +0.000801] CR2: 0000000000000998 CR3: 000000003cad6000 CR4: 00000000001406e0 +[ +0.000837] Call Trace: +[ +0.000682] ? _cond_resched+0x10/0x20 +[ +0.000691] ? __kmalloc+0x131/0x1b0 +[ +0.000710] kernfs_fop_write+0xfa/0x170 +[ +0.000733] __vfs_write+0x2e/0x190 +[ +0.000688] ? inode_security+0x10/0x30 +[ +0.000698] ? selinux_file_permission+0xd2/0x120 +[ +0.000752] ? security_file_permission+0x2b/0x100 +[ +0.000753] vfs_write+0xa8/0x1a0 +[ +0.000676] ksys_write+0x4d/0xb0 +[ +0.000699] do_syscall_64+0x3a/0xf0 +[ +0.000692] entry_SYSCALL_64_after_hwframe+0x44/0xa9 + +Signed-off-by: Liang Chen +Cc: stable@vger.kernel.org +Signed-off-by: Coly Li +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/bcache/super.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/md/bcache/super.c ++++ b/drivers/md/bcache/super.c +@@ -1511,6 +1511,7 @@ static void cache_set_free(struct closur + bch_btree_cache_free(c); + bch_journal_free(c); + ++ mutex_lock(&bch_register_lock); + for_each_cache(ca, c, i) + if (ca) { + ca->set = NULL; +@@ -1529,7 +1530,6 @@ static void cache_set_free(struct closur + mempool_exit(&c->search); + kfree(c->devices); + +- mutex_lock(&bch_register_lock); + list_del(&c->list); + mutex_unlock(&bch_register_lock); + diff --git a/queue-4.19/bcache-never-set-key_ptrs-of-journal-key-to-0-in-journal_reclaim.patch b/queue-4.19/bcache-never-set-key_ptrs-of-journal-key-to-0-in-journal_reclaim.patch new file mode 100644 index 00000000000..1831241bcab --- /dev/null +++ b/queue-4.19/bcache-never-set-key_ptrs-of-journal-key-to-0-in-journal_reclaim.patch @@ -0,0 +1,96 @@ +From 1bee2addc0c8470c8aaa65ef0599eeae96dd88bc Mon Sep 17 00:00:00 2001 +From: Coly Li +Date: Thu, 25 Apr 2019 00:48:33 +0800 +Subject: bcache: never set KEY_PTRS of journal key to 0 in journal_reclaim() + +From: Coly Li + +commit 1bee2addc0c8470c8aaa65ef0599eeae96dd88bc upstream. + +In journal_reclaim() ja->cur_idx of each cache will be update to +reclaim available journal buckets. Variable 'int n' is used to count how +many cache is successfully reclaimed, then n is set to c->journal.key +by SET_KEY_PTRS(). Later in journal_write_unlocked(), a for_each_cache() +loop will write the jset data onto each cache. + +The problem is, if all jouranl buckets on each cache is full, the +following code in journal_reclaim(), + +529 for_each_cache(ca, c, iter) { +530 struct journal_device *ja = &ca->journal; +531 unsigned int next = (ja->cur_idx + 1) % ca->sb.njournal_buckets; +532 +533 /* No space available on this device */ +534 if (next == ja->discard_idx) +535 continue; +536 +537 ja->cur_idx = next; +538 k->ptr[n++] = MAKE_PTR(0, +539 bucket_to_sector(c, ca->sb.d[ja->cur_idx]), +540 ca->sb.nr_this_dev); +541 } +542 +543 bkey_init(k); +544 SET_KEY_PTRS(k, n); + +If there is no available bucket to reclaim, the if() condition at line +534 will always true, and n remains 0. Then at line 544, SET_KEY_PTRS() +will set KEY_PTRS field of c->journal.key to 0. + +Setting KEY_PTRS field of c->journal.key to 0 is wrong. Because in +journal_write_unlocked() the journal data is written in following loop, + +649 for (i = 0; i < KEY_PTRS(k); i++) { +650-671 submit journal data to cache device +672 } + +If KEY_PTRS field is set to 0 in jouranl_reclaim(), the journal data +won't be written to cache device here. If system crahed or rebooted +before bkeys of the lost journal entries written into btree nodes, data +corruption will be reported during bcache reload after rebooting the +system. + +Indeed there is only one cache in a cache set, there is no need to set +KEY_PTRS field in journal_reclaim() at all. But in order to keep the +for_each_cache() logic consistent for now, this patch fixes the above +problem by not setting 0 KEY_PTRS of journal key, if there is no bucket +available to reclaim. + +Signed-off-by: Coly Li +Reviewed-by: Hannes Reinecke +Cc: stable@vger.kernel.org +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/bcache/journal.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +--- a/drivers/md/bcache/journal.c ++++ b/drivers/md/bcache/journal.c +@@ -540,11 +540,11 @@ static void journal_reclaim(struct cache + ca->sb.nr_this_dev); + } + +- bkey_init(k); +- SET_KEY_PTRS(k, n); +- +- if (n) ++ if (n) { ++ bkey_init(k); ++ SET_KEY_PTRS(k, n); + c->journal.blocks_free = c->sb.bucket_size >> c->block_bits; ++ } + out: + if (!journal_full(&c->journal)) + __closure_wake_up(&c->journal.wait); +@@ -671,6 +671,9 @@ static void journal_write_unlocked(struc + ca->journal.seq[ca->journal.cur_idx] = w->data->seq; + } + ++ /* If KEY_PTRS(k) == 0, this jset gets lost in air */ ++ BUG_ON(i == 0); ++ + atomic_dec_bug(&fifo_back(&c->journal.pin)); + bch_journal_next(&c->journal); + journal_reclaim(c); diff --git a/queue-4.19/bpf-arm64-remove-prefetch-insn-in-xadd-mapping.patch b/queue-4.19/bpf-arm64-remove-prefetch-insn-in-xadd-mapping.patch new file mode 100644 index 00000000000..6fe4607921c --- /dev/null +++ b/queue-4.19/bpf-arm64-remove-prefetch-insn-in-xadd-mapping.patch @@ -0,0 +1,53 @@ +From 8968c67a82ab7501bc3b9439c3624a49b42fe54c Mon Sep 17 00:00:00 2001 +From: Daniel Borkmann +Date: Fri, 26 Apr 2019 21:48:21 +0200 +Subject: bpf, arm64: remove prefetch insn in xadd mapping + +From: Daniel Borkmann + +commit 8968c67a82ab7501bc3b9439c3624a49b42fe54c upstream. + +Prefetch-with-intent-to-write is currently part of the XADD mapping in +the AArch64 JIT and follows the kernel's implementation of atomic_add. +This may interfere with other threads executing the LDXR/STXR loop, +leading to potential starvation and fairness issues. Drop the optional +prefetch instruction. + +Fixes: 85f68fe89832 ("bpf, arm64: implement jiting of BPF_XADD") +Reported-by: Will Deacon +Signed-off-by: Daniel Borkmann +Acked-by: Jean-Philippe Brucker +Acked-by: Will Deacon +Signed-off-by: Alexei Starovoitov +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm64/net/bpf_jit.h | 6 ------ + arch/arm64/net/bpf_jit_comp.c | 1 - + 2 files changed, 7 deletions(-) + +--- a/arch/arm64/net/bpf_jit.h ++++ b/arch/arm64/net/bpf_jit.h +@@ -100,12 +100,6 @@ + #define A64_STXR(sf, Rt, Rn, Rs) \ + A64_LSX(sf, Rt, Rn, Rs, STORE_EX) + +-/* Prefetch */ +-#define A64_PRFM(Rn, type, target, policy) \ +- aarch64_insn_gen_prefetch(Rn, AARCH64_INSN_PRFM_TYPE_##type, \ +- AARCH64_INSN_PRFM_TARGET_##target, \ +- AARCH64_INSN_PRFM_POLICY_##policy) +- + /* Add/subtract (immediate) */ + #define A64_ADDSUB_IMM(sf, Rd, Rn, imm12, type) \ + aarch64_insn_gen_add_sub_imm(Rd, Rn, imm12, \ +--- a/arch/arm64/net/bpf_jit_comp.c ++++ b/arch/arm64/net/bpf_jit_comp.c +@@ -736,7 +736,6 @@ emit_cond_jmp: + case BPF_STX | BPF_XADD | BPF_DW: + emit_a64_mov_i(1, tmp, off, ctx); + emit(A64_ADD(1, tmp, tmp, dst), ctx); +- emit(A64_PRFM(tmp, PST, L1, STRM), ctx); + emit(A64_LDXR(isdw, tmp2, tmp), ctx); + emit(A64_ADD(isdw, tmp2, tmp2, src), ctx); + emit(A64_STXR(isdw, tmp2, tmp, tmp3), ctx); diff --git a/queue-4.19/btrfs-check-the-first-key-and-level-for-cached-extent-buffer.patch b/queue-4.19/btrfs-check-the-first-key-and-level-for-cached-extent-buffer.patch new file mode 100644 index 00000000000..53f4233370c --- /dev/null +++ b/queue-4.19/btrfs-check-the-first-key-and-level-for-cached-extent-buffer.patch @@ -0,0 +1,166 @@ +From 448de471cd4cab0cedd15770082567a69a784a11 Mon Sep 17 00:00:00 2001 +From: Qu Wenruo +Date: Tue, 12 Mar 2019 17:10:40 +0800 +Subject: btrfs: Check the first key and level for cached extent buffer + +From: Qu Wenruo + +commit 448de471cd4cab0cedd15770082567a69a784a11 upstream. + +[BUG] +When reading a file from a fuzzed image, kernel can panic like: + + BTRFS warning (device loop0): csum failed root 5 ino 270 off 0 csum 0x98f94189 expected csum 0x00000000 mirror 1 + assertion failed: !memcmp_extent_buffer(b, &disk_key, offsetof(struct btrfs_leaf, items[0].key), sizeof(disk_key)), file: fs/btrfs/ctree.c, line: 2544 + ------------[ cut here ]------------ + kernel BUG at fs/btrfs/ctree.h:3500! + invalid opcode: 0000 [#1] PREEMPT SMP NOPTI + RIP: 0010:btrfs_search_slot.cold.24+0x61/0x63 [btrfs] + Call Trace: + btrfs_lookup_csum+0x52/0x150 [btrfs] + __btrfs_lookup_bio_sums+0x209/0x640 [btrfs] + btrfs_submit_bio_hook+0x103/0x170 [btrfs] + submit_one_bio+0x59/0x80 [btrfs] + extent_read_full_page+0x58/0x80 [btrfs] + generic_file_read_iter+0x2f6/0x9d0 + __vfs_read+0x14d/0x1a0 + vfs_read+0x8d/0x140 + ksys_read+0x52/0xc0 + do_syscall_64+0x60/0x210 + entry_SYSCALL_64_after_hwframe+0x49/0xbe + +[CAUSE] +The fuzzed image has a corrupted leaf whose first key doesn't match its +parent: + + checksum tree key (CSUM_TREE ROOT_ITEM 0) + node 29741056 level 1 items 14 free 107 generation 19 owner CSUM_TREE + fs uuid 3381d111-94a3-4ac7-8f39-611bbbdab7e6 + chunk uuid 9af1c3c7-2af5-488b-8553-530bd515f14c + ... + key (EXTENT_CSUM EXTENT_CSUM 79691776) block 29761536 gen 19 + + leaf 29761536 items 1 free space 1726 generation 19 owner CSUM_TREE + leaf 29761536 flags 0x1(WRITTEN) backref revision 1 + fs uuid 3381d111-94a3-4ac7-8f39-611bbbdab7e6 + chunk uuid 9af1c3c7-2af5-488b-8553-530bd515f14c + item 0 key (EXTENT_CSUM EXTENT_CSUM 8798638964736) itemoff 1751 itemsize 2244 + range start 8798638964736 end 8798641262592 length 2297856 + +When reading the above tree block, we have extent_buffer->refs = 2 in +the context: + +- initial one from __alloc_extent_buffer() + alloc_extent_buffer() + |- __alloc_extent_buffer() + |- atomic_set(&eb->refs, 1) + +- one being added to fs_info->buffer_radix + alloc_extent_buffer() + |- check_buffer_tree_ref() + |- atomic_inc(&eb->refs) + +So if even we call free_extent_buffer() in read_tree_block or other +similar situation, we only decrease the refs by 1, it doesn't reach 0 +and won't be freed right now. + +The staled eb and its corrupted content will still be kept cached. + +Furthermore, we have several extra cases where we either don't do first +key check or the check is not proper for all callers: + +- scrub + We just don't have first key in this context. + +- shared tree block + One tree block can be shared by several snapshot/subvolume trees. + In that case, the first key check for one subvolume doesn't apply to + another. + +So for the above reasons, a corrupted extent buffer can sneak into the +buffer cache. + +[FIX] +Call verify_level_key in read_block_for_search to do another +verification. For that purpose the function is exported. + +Due to above reasons, although we can free corrupted extent buffer from +cache, we still need the check in read_block_for_search(), for scrub and +shared tree blocks. + +Link: https://bugzilla.kernel.org/show_bug.cgi?id=202755 +Link: https://bugzilla.kernel.org/show_bug.cgi?id=202757 +Link: https://bugzilla.kernel.org/show_bug.cgi?id=202759 +Link: https://bugzilla.kernel.org/show_bug.cgi?id=202761 +Link: https://bugzilla.kernel.org/show_bug.cgi?id=202767 +Link: https://bugzilla.kernel.org/show_bug.cgi?id=202769 +Reported-by: Yoon Jungyeon +CC: stable@vger.kernel.org # 4.19+ +Signed-off-by: Qu Wenruo +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/ctree.c | 10 ++++++++++ + fs/btrfs/disk-io.c | 10 +++++----- + fs/btrfs/disk-io.h | 3 +++ + 3 files changed, 18 insertions(+), 5 deletions(-) + +--- a/fs/btrfs/ctree.c ++++ b/fs/btrfs/ctree.c +@@ -2436,6 +2436,16 @@ read_block_for_search(struct btrfs_root + if (tmp) { + /* first we do an atomic uptodate check */ + if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) { ++ /* ++ * Do extra check for first_key, eb can be stale due to ++ * being cached, read from scrub, or have multiple ++ * parents (shared tree blocks). ++ */ ++ if (btrfs_verify_level_key(fs_info, tmp, ++ parent_level - 1, &first_key, gen)) { ++ free_extent_buffer(tmp); ++ return -EUCLEAN; ++ } + *eb_ret = tmp; + return 0; + } +--- a/fs/btrfs/disk-io.c ++++ b/fs/btrfs/disk-io.c +@@ -408,9 +408,9 @@ static int btrfs_check_super_csum(struct + return ret; + } + +-static int verify_level_key(struct btrfs_fs_info *fs_info, +- struct extent_buffer *eb, int level, +- struct btrfs_key *first_key, u64 parent_transid) ++int btrfs_verify_level_key(struct btrfs_fs_info *fs_info, ++ struct extent_buffer *eb, int level, ++ struct btrfs_key *first_key, u64 parent_transid) + { + int found_level; + struct btrfs_key found_key; +@@ -487,8 +487,8 @@ static int btree_read_extent_buffer_page + if (verify_parent_transid(io_tree, eb, + parent_transid, 0)) + ret = -EIO; +- else if (verify_level_key(fs_info, eb, level, +- first_key, parent_transid)) ++ else if (btrfs_verify_level_key(fs_info, eb, level, ++ first_key, parent_transid)) + ret = -EUCLEAN; + else + break; +--- a/fs/btrfs/disk-io.h ++++ b/fs/btrfs/disk-io.h +@@ -39,6 +39,9 @@ static inline u64 btrfs_sb_offset(int mi + struct btrfs_device; + struct btrfs_fs_devices; + ++int btrfs_verify_level_key(struct btrfs_fs_info *fs_info, ++ struct extent_buffer *eb, int level, ++ struct btrfs_key *first_key, u64 parent_transid); + struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr, + u64 parent_transid, int level, + struct btrfs_key *first_key); diff --git a/queue-4.19/btrfs-correctly-free-extent-buffer-in-case-btree_read_extent_buffer_pages-fails.patch b/queue-4.19/btrfs-correctly-free-extent-buffer-in-case-btree_read_extent_buffer_pages-fails.patch new file mode 100644 index 00000000000..9865f058cd7 --- /dev/null +++ b/queue-4.19/btrfs-correctly-free-extent-buffer-in-case-btree_read_extent_buffer_pages-fails.patch @@ -0,0 +1,88 @@ +From 537f38f019fa0b762dbb4c0fc95d7fcce9db8e2d Mon Sep 17 00:00:00 2001 +From: Nikolay Borisov +Date: Thu, 14 Mar 2019 09:52:35 +0200 +Subject: btrfs: Correctly free extent buffer in case btree_read_extent_buffer_pages fails + +From: Nikolay Borisov + +commit 537f38f019fa0b762dbb4c0fc95d7fcce9db8e2d upstream. + +If a an eb fails to be read for whatever reason - it's corrupted on disk +and parent transid/key validations fail or IO for eb pages fail then +this buffer must be removed from the buffer cache. Currently the code +calls free_extent_buffer if an error occurs. Unfortunately this doesn't +achieve the desired behavior since btrfs_find_create_tree_block returns +with eb->refs == 2. + +On the other hand free_extent_buffer will only decrement the refs once +leaving it added to the buffer cache radix tree. This enables later +code to look up the buffer from the cache and utilize it potentially +leading to a crash. + +The correct way to free the buffer is call free_extent_buffer_stale. +This function will correctly call atomic_dec explicitly for the buffer +and subsequently call release_extent_buffer which will decrement the +final reference thus correctly remove the invalid buffer from buffer +cache. This change affects only newly allocated buffers since they have +eb->refs == 2. + +Link: https://bugzilla.kernel.org/show_bug.cgi?id=202755 +Reported-by: Jungyeon +CC: stable@vger.kernel.org # 4.4+ +Signed-off-by: Nikolay Borisov +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/disk-io.c | 17 +++++++++++------ + 1 file changed, 11 insertions(+), 6 deletions(-) + +--- a/fs/btrfs/disk-io.c ++++ b/fs/btrfs/disk-io.c +@@ -995,13 +995,18 @@ void readahead_tree_block(struct btrfs_f + { + struct extent_buffer *buf = NULL; + struct inode *btree_inode = fs_info->btree_inode; ++ int ret; + + buf = btrfs_find_create_tree_block(fs_info, bytenr); + if (IS_ERR(buf)) + return; +- read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, +- buf, WAIT_NONE, 0); +- free_extent_buffer(buf); ++ ++ ret = read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, ++ WAIT_NONE, 0); ++ if (ret < 0) ++ free_extent_buffer_stale(buf); ++ else ++ free_extent_buffer(buf); + } + + int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr, +@@ -1021,12 +1026,12 @@ int reada_tree_block_flagged(struct btrf + ret = read_extent_buffer_pages(io_tree, buf, WAIT_PAGE_LOCK, + mirror_num); + if (ret) { +- free_extent_buffer(buf); ++ free_extent_buffer_stale(buf); + return ret; + } + + if (test_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags)) { +- free_extent_buffer(buf); ++ free_extent_buffer_stale(buf); + return -EIO; + } else if (extent_buffer_uptodate(buf)) { + *eb = buf; +@@ -1080,7 +1085,7 @@ struct extent_buffer *read_tree_block(st + ret = btree_read_extent_buffer_pages(fs_info, buf, parent_transid, + level, first_key); + if (ret) { +- free_extent_buffer(buf); ++ free_extent_buffer_stale(buf); + return ERR_PTR(ret); + } + return buf; diff --git a/queue-4.19/btrfs-do-not-start-a-transaction-at-iterate_extent_inodes.patch b/queue-4.19/btrfs-do-not-start-a-transaction-at-iterate_extent_inodes.patch new file mode 100644 index 00000000000..9a478d5ea32 --- /dev/null +++ b/queue-4.19/btrfs-do-not-start-a-transaction-at-iterate_extent_inodes.patch @@ -0,0 +1,116 @@ +From bfc61c36260ca990937539cd648ede3cd749bc10 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Wed, 17 Apr 2019 11:30:30 +0100 +Subject: Btrfs: do not start a transaction at iterate_extent_inodes() + +From: Filipe Manana + +commit bfc61c36260ca990937539cd648ede3cd749bc10 upstream. + +When finding out which inodes have references on a particular extent, done +by backref.c:iterate_extent_inodes(), from the BTRFS_IOC_LOGICAL_INO (both +v1 and v2) ioctl and from scrub we use the transaction join API to grab a +reference on the currently running transaction, since in order to give +accurate results we need to inspect the delayed references of the currently +running transaction. + +However, if there is currently no running transaction, the join operation +will create a new transaction. This is inefficient as the transaction will +eventually be committed, doing unnecessary IO and introducing a potential +point of failure that will lead to a transaction abort due to -ENOSPC, as +recently reported [1]. + +That's because the join, creates the transaction but does not reserve any +space, so when attempting to update the root item of the root passed to +btrfs_join_transaction(), during the transaction commit, we can end up +failling with -ENOSPC. Users of a join operation are supposed to actually +do some filesystem changes and reserve space by some means, which is not +the case of iterate_extent_inodes(), it is a read-only operation for all +contextes from which it is called. + +The reported [1] -ENOSPC failure stack trace is the following: + + heisenberg kernel: ------------[ cut here ]------------ + heisenberg kernel: BTRFS: Transaction aborted (error -28) + heisenberg kernel: WARNING: CPU: 0 PID: 7137 at fs/btrfs/root-tree.c:136 btrfs_update_root+0x22b/0x320 [btrfs] +(...) + heisenberg kernel: CPU: 0 PID: 7137 Comm: btrfs-transacti Not tainted 4.19.0-4-amd64 #1 Debian 4.19.28-2 + heisenberg kernel: Hardware name: FUJITSU LIFEBOOK U757/FJNB2A5, BIOS Version 1.21 03/19/2018 + heisenberg kernel: RIP: 0010:btrfs_update_root+0x22b/0x320 [btrfs] +(...) + heisenberg kernel: RSP: 0018:ffffb5448828bd40 EFLAGS: 00010286 + heisenberg kernel: RAX: 0000000000000000 RBX: ffff8ed56bccef50 RCX: 0000000000000006 + heisenberg kernel: RDX: 0000000000000007 RSI: 0000000000000092 RDI: ffff8ed6bda166a0 + heisenberg kernel: RBP: 00000000ffffffe4 R08: 00000000000003df R09: 0000000000000007 + heisenberg kernel: R10: 0000000000000000 R11: 0000000000000001 R12: ffff8ed63396a078 + heisenberg kernel: R13: ffff8ed092d7c800 R14: ffff8ed64f5db028 R15: ffff8ed6bd03d068 + heisenberg kernel: FS: 0000000000000000(0000) GS:ffff8ed6bda00000(0000) knlGS:0000000000000000 + heisenberg kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + heisenberg kernel: CR2: 00007f46f75f8000 CR3: 0000000310a0a002 CR4: 00000000003606f0 + heisenberg kernel: DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + heisenberg kernel: DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + heisenberg kernel: Call Trace: + heisenberg kernel: commit_fs_roots+0x166/0x1d0 [btrfs] + heisenberg kernel: ? _cond_resched+0x15/0x30 + heisenberg kernel: ? btrfs_run_delayed_refs+0xac/0x180 [btrfs] + heisenberg kernel: btrfs_commit_transaction+0x2bd/0x870 [btrfs] + heisenberg kernel: ? start_transaction+0x9d/0x3f0 [btrfs] + heisenberg kernel: transaction_kthread+0x147/0x180 [btrfs] + heisenberg kernel: ? btrfs_cleanup_transaction+0x530/0x530 [btrfs] + heisenberg kernel: kthread+0x112/0x130 + heisenberg kernel: ? kthread_bind+0x30/0x30 + heisenberg kernel: ret_from_fork+0x35/0x40 + heisenberg kernel: ---[ end trace 05de912e30e012d9 ]--- + +So fix that by using the attach API, which does not create a transaction +when there is currently no running transaction. + +[1] https://lore.kernel.org/linux-btrfs/b2a668d7124f1d3e410367f587926f622b3f03a4.camel@scientia.net/ + +Reported-by: Zygo Blaxell +CC: stable@vger.kernel.org # 4.4+ +Signed-off-by: Filipe Manana +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/backref.c | 18 ++++++++++++------ + 1 file changed, 12 insertions(+), 6 deletions(-) + +--- a/fs/btrfs/backref.c ++++ b/fs/btrfs/backref.c +@@ -1908,13 +1908,19 @@ int iterate_extent_inodes(struct btrfs_f + extent_item_objectid); + + if (!search_commit_root) { +- trans = btrfs_join_transaction(fs_info->extent_root); +- if (IS_ERR(trans)) +- return PTR_ERR(trans); ++ trans = btrfs_attach_transaction(fs_info->extent_root); ++ if (IS_ERR(trans)) { ++ if (PTR_ERR(trans) != -ENOENT && ++ PTR_ERR(trans) != -EROFS) ++ return PTR_ERR(trans); ++ trans = NULL; ++ } ++ } ++ ++ if (trans) + btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem); +- } else { ++ else + down_read(&fs_info->commit_root_sem); +- } + + ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid, + tree_mod_seq_elem.seq, &refs, +@@ -1947,7 +1953,7 @@ int iterate_extent_inodes(struct btrfs_f + + free_leaf_list(refs); + out: +- if (!search_commit_root) { ++ if (trans) { + btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); + btrfs_end_transaction(trans); + } else { diff --git a/queue-4.19/btrfs-do-not-start-a-transaction-during-fiemap.patch b/queue-4.19/btrfs-do-not-start-a-transaction-during-fiemap.patch new file mode 100644 index 00000000000..2abd8f69133 --- /dev/null +++ b/queue-4.19/btrfs-do-not-start-a-transaction-during-fiemap.patch @@ -0,0 +1,121 @@ +From 03628cdbc64db6262e50d0357960a4e9562676a1 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Mon, 15 Apr 2019 14:50:51 +0100 +Subject: Btrfs: do not start a transaction during fiemap + +From: Filipe Manana + +commit 03628cdbc64db6262e50d0357960a4e9562676a1 upstream. + +During fiemap, for regular extents (non inline) we need to check if they +are shared and if they are, set the shared bit. Checking if an extent is +shared requires checking the delayed references of the currently running +transaction, since some reference might have not yet hit the extent tree +and be only in the in-memory delayed references. + +However we were using a transaction join for this, which creates a new +transaction when there is no transaction currently running. That means +that two more potential failures can happen: creating the transaction and +committing it. Further, if no write activity is currently happening in the +system, and fiemap calls keep being done, we end up creating and +committing transactions that do nothing. + +In some extreme cases this can result in the commit of the transaction +created by fiemap to fail with ENOSPC when updating the root item of a +subvolume tree because a join does not reserve any space, leading to a +trace like the following: + + heisenberg kernel: ------------[ cut here ]------------ + heisenberg kernel: BTRFS: Transaction aborted (error -28) + heisenberg kernel: WARNING: CPU: 0 PID: 7137 at fs/btrfs/root-tree.c:136 btrfs_update_root+0x22b/0x320 [btrfs] +(...) + heisenberg kernel: CPU: 0 PID: 7137 Comm: btrfs-transacti Not tainted 4.19.0-4-amd64 #1 Debian 4.19.28-2 + heisenberg kernel: Hardware name: FUJITSU LIFEBOOK U757/FJNB2A5, BIOS Version 1.21 03/19/2018 + heisenberg kernel: RIP: 0010:btrfs_update_root+0x22b/0x320 [btrfs] +(...) + heisenberg kernel: RSP: 0018:ffffb5448828bd40 EFLAGS: 00010286 + heisenberg kernel: RAX: 0000000000000000 RBX: ffff8ed56bccef50 RCX: 0000000000000006 + heisenberg kernel: RDX: 0000000000000007 RSI: 0000000000000092 RDI: ffff8ed6bda166a0 + heisenberg kernel: RBP: 00000000ffffffe4 R08: 00000000000003df R09: 0000000000000007 + heisenberg kernel: R10: 0000000000000000 R11: 0000000000000001 R12: ffff8ed63396a078 + heisenberg kernel: R13: ffff8ed092d7c800 R14: ffff8ed64f5db028 R15: ffff8ed6bd03d068 + heisenberg kernel: FS: 0000000000000000(0000) GS:ffff8ed6bda00000(0000) knlGS:0000000000000000 + heisenberg kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + heisenberg kernel: CR2: 00007f46f75f8000 CR3: 0000000310a0a002 CR4: 00000000003606f0 + heisenberg kernel: DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + heisenberg kernel: DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + heisenberg kernel: Call Trace: + heisenberg kernel: commit_fs_roots+0x166/0x1d0 [btrfs] + heisenberg kernel: ? _cond_resched+0x15/0x30 + heisenberg kernel: ? btrfs_run_delayed_refs+0xac/0x180 [btrfs] + heisenberg kernel: btrfs_commit_transaction+0x2bd/0x870 [btrfs] + heisenberg kernel: ? start_transaction+0x9d/0x3f0 [btrfs] + heisenberg kernel: transaction_kthread+0x147/0x180 [btrfs] + heisenberg kernel: ? btrfs_cleanup_transaction+0x530/0x530 [btrfs] + heisenberg kernel: kthread+0x112/0x130 + heisenberg kernel: ? kthread_bind+0x30/0x30 + heisenberg kernel: ret_from_fork+0x35/0x40 + heisenberg kernel: ---[ end trace 05de912e30e012d9 ]--- + +Since fiemap (and btrfs_check_shared()) is a read-only operation, do not do +a transaction join to avoid the overhead of creating a new transaction (if +there is currently no running transaction) and introducing a potential +point of failure when the new transaction gets committed, instead use a +transaction attach to grab a handle for the currently running transaction +if any. + +Reported-by: Christoph Anton Mitterer +Link: https://lore.kernel.org/linux-btrfs/b2a668d7124f1d3e410367f587926f622b3f03a4.camel@scientia.net/ +Fixes: afce772e87c36c ("btrfs: fix check_shared for fiemap ioctl") +CC: stable@vger.kernel.org # 4.14+ +Reviewed-by: Qu Wenruo +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/backref.c | 16 ++++++++++------ + 1 file changed, 10 insertions(+), 6 deletions(-) + +--- a/fs/btrfs/backref.c ++++ b/fs/btrfs/backref.c +@@ -1452,8 +1452,8 @@ int btrfs_find_all_roots(struct btrfs_tr + * callers (such as fiemap) which want to know whether the extent is + * shared but do not need a ref count. + * +- * This attempts to allocate a transaction in order to account for +- * delayed refs, but continues on even when the alloc fails. ++ * This attempts to attach to the running transaction in order to account for ++ * delayed refs, but continues on even when no running transaction exists. + * + * Return: 0 if extent is not shared, 1 if it is shared, < 0 on error. + */ +@@ -1476,13 +1476,16 @@ int btrfs_check_shared(struct btrfs_root + tmp = ulist_alloc(GFP_NOFS); + roots = ulist_alloc(GFP_NOFS); + if (!tmp || !roots) { +- ulist_free(tmp); +- ulist_free(roots); +- return -ENOMEM; ++ ret = -ENOMEM; ++ goto out; + } + +- trans = btrfs_join_transaction(root); ++ trans = btrfs_attach_transaction(root); + if (IS_ERR(trans)) { ++ if (PTR_ERR(trans) != -ENOENT && PTR_ERR(trans) != -EROFS) { ++ ret = PTR_ERR(trans); ++ goto out; ++ } + trans = NULL; + down_read(&fs_info->commit_root_sem); + } else { +@@ -1515,6 +1518,7 @@ int btrfs_check_shared(struct btrfs_root + } else { + up_read(&fs_info->commit_root_sem); + } ++out: + ulist_free(tmp); + ulist_free(roots); + return ret; diff --git a/queue-4.19/btrfs-honour-fitrim-range-constraints-during-free-space-trim.patch b/queue-4.19/btrfs-honour-fitrim-range-constraints-during-free-space-trim.patch new file mode 100644 index 00000000000..b66e6a68654 --- /dev/null +++ b/queue-4.19/btrfs-honour-fitrim-range-constraints-during-free-space-trim.patch @@ -0,0 +1,90 @@ +From c2d1b3aae33605a61cbab445d8ae1c708ccd2698 Mon Sep 17 00:00:00 2001 +From: Nikolay Borisov +Date: Mon, 25 Mar 2019 14:31:21 +0200 +Subject: btrfs: Honour FITRIM range constraints during free space trim + +From: Nikolay Borisov + +commit c2d1b3aae33605a61cbab445d8ae1c708ccd2698 upstream. + +Up until now trimming the freespace was done irrespective of what the +arguments of the FITRIM ioctl were. For example fstrim's -o/-l arguments +will be entirely ignored. Fix it by correctly handling those paramter. +This requires breaking if the found freespace extent is after the end of +the passed range as well as completing trim after trimming +fstrim_range::len bytes. + +Fixes: 499f377f49f0 ("btrfs: iterate over unused chunk space in FITRIM") +CC: stable@vger.kernel.org # 4.4+ +Signed-off-by: Nikolay Borisov +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/extent-tree.c | 25 +++++++++++++++++++------ + 1 file changed, 19 insertions(+), 6 deletions(-) + +--- a/fs/btrfs/extent-tree.c ++++ b/fs/btrfs/extent-tree.c +@@ -10789,9 +10789,9 @@ int btrfs_error_unpin_extent_range(struc + * held back allocations. + */ + static int btrfs_trim_free_extents(struct btrfs_device *device, +- u64 minlen, u64 *trimmed) ++ struct fstrim_range *range, u64 *trimmed) + { +- u64 start = 0, len = 0; ++ u64 start = range->start, len = 0; + int ret; + + *trimmed = 0; +@@ -10834,8 +10834,8 @@ static int btrfs_trim_free_extents(struc + if (!trans) + up_read(&fs_info->commit_root_sem); + +- ret = find_free_dev_extent_start(trans, device, minlen, start, +- &start, &len); ++ ret = find_free_dev_extent_start(trans, device, range->minlen, ++ start, &start, &len); + if (trans) { + up_read(&fs_info->commit_root_sem); + btrfs_put_transaction(trans); +@@ -10848,6 +10848,16 @@ static int btrfs_trim_free_extents(struc + break; + } + ++ /* If we are out of the passed range break */ ++ if (start > range->start + range->len - 1) { ++ mutex_unlock(&fs_info->chunk_mutex); ++ ret = 0; ++ break; ++ } ++ ++ start = max(range->start, start); ++ len = min(range->len, len); ++ + ret = btrfs_issue_discard(device->bdev, start, len, &bytes); + mutex_unlock(&fs_info->chunk_mutex); + +@@ -10857,6 +10867,10 @@ static int btrfs_trim_free_extents(struc + start += len; + *trimmed += bytes; + ++ /* We've trimmed enough */ ++ if (*trimmed >= range->len) ++ break; ++ + if (fatal_signal_pending(current)) { + ret = -ERESTARTSYS; + break; +@@ -10940,8 +10954,7 @@ int btrfs_trim_fs(struct btrfs_fs_info * + mutex_lock(&fs_info->fs_devices->device_list_mutex); + devices = &fs_info->fs_devices->devices; + list_for_each_entry(device, devices, dev_list) { +- ret = btrfs_trim_free_extents(device, range->minlen, +- &group_trimmed); ++ ret = btrfs_trim_free_extents(device, range, &group_trimmed); + if (ret) { + dev_failed++; + dev_ret = ret; diff --git a/queue-4.19/btrfs-send-flush-dellaloc-in-order-to-avoid-data-loss.patch b/queue-4.19/btrfs-send-flush-dellaloc-in-order-to-avoid-data-loss.patch new file mode 100644 index 00000000000..bee847a0798 --- /dev/null +++ b/queue-4.19/btrfs-send-flush-dellaloc-in-order-to-avoid-data-loss.patch @@ -0,0 +1,135 @@ +From 9f89d5de8631c7930898a601b6612e271aa2261c Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Mon, 15 Apr 2019 09:29:36 +0100 +Subject: Btrfs: send, flush dellaloc in order to avoid data loss + +From: Filipe Manana + +commit 9f89d5de8631c7930898a601b6612e271aa2261c upstream. + +When we set a subvolume to read-only mode we do not flush dellaloc for any +of its inodes (except if the filesystem is mounted with -o flushoncommit), +since it does not affect correctness for any subsequent operations - except +for a future send operation. The send operation will not be able to see the +delalloc data since the respective file extent items, inode item updates, +backreferences, etc, have not hit yet the subvolume and extent trees. + +Effectively this means data loss, since the send stream will not contain +any data from existing delalloc. Another problem from this is that if the +writeback starts and finishes while the send operation is in progress, we +have the subvolume tree being being modified concurrently which can result +in send failing unexpectedly with EIO or hitting runtime errors, assertion +failures or hitting BUG_ONs, etc. + +Simple reproducer: + + $ mkfs.btrfs -f /dev/sdb + $ mount /dev/sdb /mnt + + $ btrfs subvolume create /mnt/sv + $ xfs_io -f -c "pwrite -S 0xea 0 108K" /mnt/sv/foo + + $ btrfs property set /mnt/sv ro true + $ btrfs send -f /tmp/send.stream /mnt/sv + + $ od -t x1 -A d /mnt/sv/foo + 0000000 ea ea ea ea ea ea ea ea ea ea ea ea ea ea ea ea + * + 0110592 + + $ umount /mnt + $ mkfs.btrfs -f /dev/sdc + $ mount /dev/sdc /mnt + + $ btrfs receive -f /tmp/send.stream /mnt + $ echo $? + 0 + $ od -t x1 -A d /mnt/sv/foo + 0000000 + # ---> empty file + +Since this a problem that affects send only, fix it in send by flushing +dellaloc for all the roots used by the send operation before send starts +to process the commit roots. + +This is a problem that affects send since it was introduced (commit +31db9f7c23fbf7 ("Btrfs: introduce BTRFS_IOC_SEND for btrfs send/receive")) +but backporting it to older kernels has some dependencies: + +- For kernels between 3.19 and 4.20, it depends on commit 3cd24c698004d2 + ("btrfs: use tagged writepage to mitigate livelock of snapshot") because + the function btrfs_start_delalloc_snapshot() does not exist before that + commit. So one has to either pick that commit or replace the calls to + btrfs_start_delalloc_snapshot() in this patch with calls to + btrfs_start_delalloc_inodes(). + +- For kernels older than 3.19 it also requires commit e5fa8f865b3324 + ("Btrfs: ensure send always works on roots without orphans") because + it depends on the function ensure_commit_roots_uptodate() which that + commits introduced. + +- No dependencies for 5.0+ kernels. + +A test case for fstests follows soon. + +CC: stable@vger.kernel.org # 3.19+ +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/send.c | 36 ++++++++++++++++++++++++++++++++++++ + 1 file changed, 36 insertions(+) + +--- a/fs/btrfs/send.c ++++ b/fs/btrfs/send.c +@@ -6583,6 +6583,38 @@ commit_trans: + return btrfs_commit_transaction(trans); + } + ++/* ++ * Make sure any existing dellaloc is flushed for any root used by a send ++ * operation so that we do not miss any data and we do not race with writeback ++ * finishing and changing a tree while send is using the tree. This could ++ * happen if a subvolume is in RW mode, has delalloc, is turned to RO mode and ++ * a send operation then uses the subvolume. ++ * After flushing delalloc ensure_commit_roots_uptodate() must be called. ++ */ ++static int flush_delalloc_roots(struct send_ctx *sctx) ++{ ++ struct btrfs_root *root = sctx->parent_root; ++ int ret; ++ int i; ++ ++ if (root) { ++ ret = btrfs_start_delalloc_snapshot(root); ++ if (ret) ++ return ret; ++ btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX); ++ } ++ ++ for (i = 0; i < sctx->clone_roots_cnt; i++) { ++ root = sctx->clone_roots[i].root; ++ ret = btrfs_start_delalloc_snapshot(root); ++ if (ret) ++ return ret; ++ btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX); ++ } ++ ++ return 0; ++} ++ + static void btrfs_root_dec_send_in_progress(struct btrfs_root* root) + { + spin_lock(&root->root_item_lock); +@@ -6807,6 +6839,10 @@ long btrfs_ioctl_send(struct file *mnt_f + NULL); + sort_clone_roots = 1; + ++ ret = flush_delalloc_roots(sctx); ++ if (ret) ++ goto out; ++ + ret = ensure_commit_roots_uptodate(sctx); + if (ret) + goto out; diff --git a/queue-4.19/crypto-ccree-add-function-to-handle-cryptocell-tee-fips-error.patch b/queue-4.19/crypto-ccree-add-function-to-handle-cryptocell-tee-fips-error.patch new file mode 100644 index 00000000000..51d5df13694 --- /dev/null +++ b/queue-4.19/crypto-ccree-add-function-to-handle-cryptocell-tee-fips-error.patch @@ -0,0 +1,89 @@ +From 897ab2316910a66bb048f1c9cefa25e6a592dcd7 Mon Sep 17 00:00:00 2001 +From: Ofir Drang +Date: Thu, 18 Apr 2019 16:39:09 +0300 +Subject: crypto: ccree - add function to handle cryptocell tee fips error + +From: Ofir Drang + +commit 897ab2316910a66bb048f1c9cefa25e6a592dcd7 upstream. + +Adds function that checks if cryptocell tee fips error occurred +and in such case triggers system error through kernel panic. +Change fips function to use this new routine. + +Signed-off-by: Ofir Drang +Signed-off-by: Gilad Ben-Yossef +Cc: stable@vger.kernel.org # v4.19+ +Signed-off-by: Herbert Xu +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/crypto/ccree/cc_fips.c | 23 +++++++++++++++-------- + drivers/crypto/ccree/cc_fips.h | 2 ++ + 2 files changed, 17 insertions(+), 8 deletions(-) + +--- a/drivers/crypto/ccree/cc_fips.c ++++ b/drivers/crypto/ccree/cc_fips.c +@@ -72,20 +72,28 @@ static inline void tee_fips_error(struct + dev_err(dev, "TEE reported error!\n"); + } + ++/* ++ * This function check if cryptocell tee fips error occurred ++ * and in such case triggers system error ++ */ ++void cc_tee_handle_fips_error(struct cc_drvdata *p_drvdata) ++{ ++ struct device *dev = drvdata_to_dev(p_drvdata); ++ ++ if (!cc_get_tee_fips_status(p_drvdata)) ++ tee_fips_error(dev); ++} ++ + /* Deferred service handler, run as interrupt-fired tasklet */ + static void fips_dsr(unsigned long devarg) + { + struct cc_drvdata *drvdata = (struct cc_drvdata *)devarg; +- struct device *dev = drvdata_to_dev(drvdata); +- u32 irq, state, val; ++ u32 irq, val; + + irq = (drvdata->irq & (CC_GPR0_IRQ_MASK)); + + if (irq) { +- state = cc_ioread(drvdata, CC_REG(GPR_HOST)); +- +- if (state != (CC_FIPS_SYNC_TEE_STATUS | CC_FIPS_SYNC_MODULE_OK)) +- tee_fips_error(dev); ++ cc_tee_handle_fips_error(drvdata); + } + + /* after verifing that there is nothing to do, +@@ -113,8 +121,7 @@ int cc_fips_init(struct cc_drvdata *p_dr + dev_dbg(dev, "Initializing fips tasklet\n"); + tasklet_init(&fips_h->tasklet, fips_dsr, (unsigned long)p_drvdata); + +- if (!cc_get_tee_fips_status(p_drvdata)) +- tee_fips_error(dev); ++ cc_tee_handle_fips_error(p_drvdata); + + return 0; + } +--- a/drivers/crypto/ccree/cc_fips.h ++++ b/drivers/crypto/ccree/cc_fips.h +@@ -18,6 +18,7 @@ int cc_fips_init(struct cc_drvdata *p_dr + void cc_fips_fini(struct cc_drvdata *drvdata); + void fips_handler(struct cc_drvdata *drvdata); + void cc_set_ree_fips_status(struct cc_drvdata *drvdata, bool ok); ++void cc_tee_handle_fips_error(struct cc_drvdata *p_drvdata); + + #else /* CONFIG_CRYPTO_FIPS */ + +@@ -30,6 +31,7 @@ static inline void cc_fips_fini(struct c + static inline void cc_set_ree_fips_status(struct cc_drvdata *drvdata, + bool ok) {} + static inline void fips_handler(struct cc_drvdata *drvdata) {} ++static inline void cc_tee_handle_fips_error(struct cc_drvdata *p_drvdata) {} + + #endif /* CONFIG_CRYPTO_FIPS */ + diff --git a/queue-4.19/crypto-ccree-don-t-map-aead-key-and-iv-on-stack.patch b/queue-4.19/crypto-ccree-don-t-map-aead-key-and-iv-on-stack.patch new file mode 100644 index 00000000000..7f1d5799991 --- /dev/null +++ b/queue-4.19/crypto-ccree-don-t-map-aead-key-and-iv-on-stack.patch @@ -0,0 +1,120 @@ +From e8662a6a5f8f7f2cadc0edb934aef622d96ac3ee Mon Sep 17 00:00:00 2001 +From: Gilad Ben-Yossef +Date: Thu, 18 Apr 2019 16:39:05 +0300 +Subject: crypto: ccree - don't map AEAD key and IV on stack + +From: Gilad Ben-Yossef + +commit e8662a6a5f8f7f2cadc0edb934aef622d96ac3ee upstream. + +The AEAD authenc key and IVs might be passed to us on stack. Copy it to +a slab buffer before mapping to gurantee proper DMA mapping. + +Signed-off-by: Gilad Ben-Yossef +Cc: stable@vger.kernel.org # v4.19+ +Signed-off-by: Herbert Xu +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/crypto/ccree/cc_aead.c | 11 ++++++++++- + drivers/crypto/ccree/cc_buffer_mgr.c | 15 ++++++++++++--- + drivers/crypto/ccree/cc_driver.h | 1 + + 3 files changed, 23 insertions(+), 4 deletions(-) + +--- a/drivers/crypto/ccree/cc_aead.c ++++ b/drivers/crypto/ccree/cc_aead.c +@@ -415,7 +415,7 @@ static int validate_keys_sizes(struct cc + /* This function prepers the user key so it can pass to the hmac processing + * (copy to intenral buffer or hash in case of key longer than block + */ +-static int cc_get_plain_hmac_key(struct crypto_aead *tfm, const u8 *key, ++static int cc_get_plain_hmac_key(struct crypto_aead *tfm, const u8 *authkey, + unsigned int keylen) + { + dma_addr_t key_dma_addr = 0; +@@ -428,6 +428,7 @@ static int cc_get_plain_hmac_key(struct + unsigned int hashmode; + unsigned int idx = 0; + int rc = 0; ++ u8 *key = NULL; + struct cc_hw_desc desc[MAX_AEAD_SETKEY_SEQ]; + dma_addr_t padded_authkey_dma_addr = + ctx->auth_state.hmac.padded_authkey_dma_addr; +@@ -446,11 +447,17 @@ static int cc_get_plain_hmac_key(struct + } + + if (keylen != 0) { ++ ++ key = kmemdup(authkey, keylen, GFP_KERNEL); ++ if (!key) ++ return -ENOMEM; ++ + key_dma_addr = dma_map_single(dev, (void *)key, keylen, + DMA_TO_DEVICE); + if (dma_mapping_error(dev, key_dma_addr)) { + dev_err(dev, "Mapping key va=0x%p len=%u for DMA failed\n", + key, keylen); ++ kzfree(key); + return -ENOMEM; + } + if (keylen > blocksize) { +@@ -533,6 +540,8 @@ static int cc_get_plain_hmac_key(struct + if (key_dma_addr) + dma_unmap_single(dev, key_dma_addr, keylen, DMA_TO_DEVICE); + ++ kzfree(key); ++ + return rc; + } + +--- a/drivers/crypto/ccree/cc_buffer_mgr.c ++++ b/drivers/crypto/ccree/cc_buffer_mgr.c +@@ -560,6 +560,7 @@ void cc_unmap_aead_request(struct device + if (areq_ctx->gen_ctx.iv_dma_addr) { + dma_unmap_single(dev, areq_ctx->gen_ctx.iv_dma_addr, + hw_iv_size, DMA_BIDIRECTIONAL); ++ kzfree(areq_ctx->gen_ctx.iv); + } + + /* Release pool */ +@@ -664,19 +665,27 @@ static int cc_aead_chain_iv(struct cc_dr + struct aead_req_ctx *areq_ctx = aead_request_ctx(req); + unsigned int hw_iv_size = areq_ctx->hw_iv_size; + struct device *dev = drvdata_to_dev(drvdata); ++ gfp_t flags = cc_gfp_flags(&req->base); + int rc = 0; + + if (!req->iv) { + areq_ctx->gen_ctx.iv_dma_addr = 0; ++ areq_ctx->gen_ctx.iv = NULL; + goto chain_iv_exit; + } + +- areq_ctx->gen_ctx.iv_dma_addr = dma_map_single(dev, req->iv, +- hw_iv_size, +- DMA_BIDIRECTIONAL); ++ areq_ctx->gen_ctx.iv = kmemdup(req->iv, hw_iv_size, flags); ++ if (!areq_ctx->gen_ctx.iv) ++ return -ENOMEM; ++ ++ areq_ctx->gen_ctx.iv_dma_addr = ++ dma_map_single(dev, areq_ctx->gen_ctx.iv, hw_iv_size, ++ DMA_BIDIRECTIONAL); + if (dma_mapping_error(dev, areq_ctx->gen_ctx.iv_dma_addr)) { + dev_err(dev, "Mapping iv %u B at va=%pK for DMA failed\n", + hw_iv_size, req->iv); ++ kzfree(areq_ctx->gen_ctx.iv); ++ areq_ctx->gen_ctx.iv = NULL; + rc = -ENOMEM; + goto chain_iv_exit; + } +--- a/drivers/crypto/ccree/cc_driver.h ++++ b/drivers/crypto/ccree/cc_driver.h +@@ -162,6 +162,7 @@ struct cc_alg_template { + + struct async_gen_req_ctx { + dma_addr_t iv_dma_addr; ++ u8 *iv; + enum drv_crypto_direction op_type; + }; + diff --git a/queue-4.19/crypto-ccree-don-t-map-mac-key-on-stack.patch b/queue-4.19/crypto-ccree-don-t-map-mac-key-on-stack.patch new file mode 100644 index 00000000000..1328148f611 --- /dev/null +++ b/queue-4.19/crypto-ccree-don-t-map-mac-key-on-stack.patch @@ -0,0 +1,91 @@ +From 874e163759f27e0a9988c5d1f4605e3f25564fd2 Mon Sep 17 00:00:00 2001 +From: Gilad Ben-Yossef +Date: Thu, 18 Apr 2019 16:39:04 +0300 +Subject: crypto: ccree - don't map MAC key on stack + +From: Gilad Ben-Yossef + +commit 874e163759f27e0a9988c5d1f4605e3f25564fd2 upstream. + +The MAC hash key might be passed to us on stack. Copy it to +a slab buffer before mapping to gurantee proper DMA mapping. + +Signed-off-by: Gilad Ben-Yossef +Cc: stable@vger.kernel.org # v4.19+ +Signed-off-by: Herbert Xu +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/crypto/ccree/cc_hash.c | 24 +++++++++++++++++++++--- + 1 file changed, 21 insertions(+), 3 deletions(-) + +--- a/drivers/crypto/ccree/cc_hash.c ++++ b/drivers/crypto/ccree/cc_hash.c +@@ -64,6 +64,7 @@ struct cc_hash_alg { + struct hash_key_req_ctx { + u32 keylen; + dma_addr_t key_dma_addr; ++ u8 *key; + }; + + /* hash per-session context */ +@@ -724,13 +725,20 @@ static int cc_hash_setkey(struct crypto_ + ctx->key_params.keylen = keylen; + ctx->key_params.key_dma_addr = 0; + ctx->is_hmac = true; ++ ctx->key_params.key = NULL; + + if (keylen) { ++ ctx->key_params.key = kmemdup(key, keylen, GFP_KERNEL); ++ if (!ctx->key_params.key) ++ return -ENOMEM; ++ + ctx->key_params.key_dma_addr = +- dma_map_single(dev, (void *)key, keylen, DMA_TO_DEVICE); ++ dma_map_single(dev, (void *)ctx->key_params.key, keylen, ++ DMA_TO_DEVICE); + if (dma_mapping_error(dev, ctx->key_params.key_dma_addr)) { + dev_err(dev, "Mapping key va=0x%p len=%u for DMA failed\n", +- key, keylen); ++ ctx->key_params.key, keylen); ++ kzfree(ctx->key_params.key); + return -ENOMEM; + } + dev_dbg(dev, "mapping key-buffer: key_dma_addr=%pad keylen=%u\n", +@@ -881,6 +889,9 @@ out: + dev_dbg(dev, "Unmapped key-buffer: key_dma_addr=%pad keylen=%u\n", + &ctx->key_params.key_dma_addr, ctx->key_params.keylen); + } ++ ++ kzfree(ctx->key_params.key); ++ + return rc; + } + +@@ -907,11 +918,16 @@ static int cc_xcbc_setkey(struct crypto_ + + ctx->key_params.keylen = keylen; + ++ ctx->key_params.key = kmemdup(key, keylen, GFP_KERNEL); ++ if (!ctx->key_params.key) ++ return -ENOMEM; ++ + ctx->key_params.key_dma_addr = +- dma_map_single(dev, (void *)key, keylen, DMA_TO_DEVICE); ++ dma_map_single(dev, ctx->key_params.key, keylen, DMA_TO_DEVICE); + if (dma_mapping_error(dev, ctx->key_params.key_dma_addr)) { + dev_err(dev, "Mapping key va=0x%p len=%u for DMA failed\n", + key, keylen); ++ kzfree(ctx->key_params.key); + return -ENOMEM; + } + dev_dbg(dev, "mapping key-buffer: key_dma_addr=%pad keylen=%u\n", +@@ -963,6 +979,8 @@ static int cc_xcbc_setkey(struct crypto_ + dev_dbg(dev, "Unmapped key-buffer: key_dma_addr=%pad keylen=%u\n", + &ctx->key_params.key_dma_addr, ctx->key_params.keylen); + ++ kzfree(ctx->key_params.key); ++ + return rc; + } + diff --git a/queue-4.19/crypto-ccree-fix-mem-leak-on-error-path.patch b/queue-4.19/crypto-ccree-fix-mem-leak-on-error-path.patch new file mode 100644 index 00000000000..7e6a2754081 --- /dev/null +++ b/queue-4.19/crypto-ccree-fix-mem-leak-on-error-path.patch @@ -0,0 +1,55 @@ +From d574b707c873d6ef1a2a155f8cfcfecd821e9a2e Mon Sep 17 00:00:00 2001 +From: Gilad Ben-Yossef +Date: Thu, 18 Apr 2019 16:38:50 +0300 +Subject: crypto: ccree - fix mem leak on error path + +From: Gilad Ben-Yossef + +commit d574b707c873d6ef1a2a155f8cfcfecd821e9a2e upstream. + +Fix a memory leak on the error path of IV generation code. + +Signed-off-by: Gilad Ben-Yossef +Cc: stable@vger.kernel.org # v4.19+ +Signed-off-by: Herbert Xu +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/crypto/ccree/cc_ivgen.c | 9 +++------ + 1 file changed, 3 insertions(+), 6 deletions(-) + +--- a/drivers/crypto/ccree/cc_ivgen.c ++++ b/drivers/crypto/ccree/cc_ivgen.c +@@ -154,9 +154,6 @@ void cc_ivgen_fini(struct cc_drvdata *dr + } + + ivgen_ctx->pool = NULL_SRAM_ADDR; +- +- /* release "this" context */ +- kfree(ivgen_ctx); + } + + /*! +@@ -174,10 +171,12 @@ int cc_ivgen_init(struct cc_drvdata *drv + int rc; + + /* Allocate "this" context */ +- ivgen_ctx = kzalloc(sizeof(*ivgen_ctx), GFP_KERNEL); ++ ivgen_ctx = devm_kzalloc(device, sizeof(*ivgen_ctx), GFP_KERNEL); + if (!ivgen_ctx) + return -ENOMEM; + ++ drvdata->ivgen_handle = ivgen_ctx; ++ + /* Allocate pool's header for initial enc. key/IV */ + ivgen_ctx->pool_meta = dma_alloc_coherent(device, CC_IVPOOL_META_SIZE, + &ivgen_ctx->pool_meta_dma, +@@ -196,8 +195,6 @@ int cc_ivgen_init(struct cc_drvdata *drv + goto out; + } + +- drvdata->ivgen_handle = ivgen_ctx; +- + return cc_init_iv_sram(drvdata); + + out: diff --git a/queue-4.19/crypto-ccree-handle-tee-fips-error-during-power-management-resume.patch b/queue-4.19/crypto-ccree-handle-tee-fips-error-during-power-management-resume.patch new file mode 100644 index 00000000000..2d60a870963 --- /dev/null +++ b/queue-4.19/crypto-ccree-handle-tee-fips-error-during-power-management-resume.patch @@ -0,0 +1,48 @@ +From 7138377ce10455b7183c6dde4b2c51b33f464c45 Mon Sep 17 00:00:00 2001 +From: Ofir Drang +Date: Thu, 18 Apr 2019 16:39:10 +0300 +Subject: crypto: ccree - handle tee fips error during power management resume + +From: Ofir Drang + +commit 7138377ce10455b7183c6dde4b2c51b33f464c45 upstream. + +in order to support cryptocell tee fips error that may occurs while +cryptocell ree is suspended, an cc_tee_handle_fips_error call added +to the cc_pm_resume function. + +Signed-off-by: Ofir Drang +Signed-off-by: Gilad Ben-Yossef +Cc: stable@vger.kernel.org # v4.19+ +Signed-off-by: Herbert Xu +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/crypto/ccree/cc_pm.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/drivers/crypto/ccree/cc_pm.c ++++ b/drivers/crypto/ccree/cc_pm.c +@@ -11,6 +11,7 @@ + #include "cc_ivgen.h" + #include "cc_hash.h" + #include "cc_pm.h" ++#include "cc_fips.h" + + #define POWER_DOWN_ENABLE 0x01 + #define POWER_DOWN_DISABLE 0x00 +@@ -50,12 +51,13 @@ int cc_pm_resume(struct device *dev) + } + + cc_iowrite(drvdata, CC_REG(HOST_POWER_DOWN_EN), POWER_DOWN_DISABLE); +- + rc = init_cc_regs(drvdata, false); + if (rc) { + dev_err(dev, "init_cc_regs (%x)\n", rc); + return rc; + } ++ /* check if tee fips error occurred during power down */ ++ cc_tee_handle_fips_error(drvdata); + + rc = cc_resume_req_queue(drvdata); + if (rc) { diff --git a/queue-4.19/crypto-ccree-host_power_down_en-should-be-the-last-cc-access-during-suspend.patch b/queue-4.19/crypto-ccree-host_power_down_en-should-be-the-last-cc-access-during-suspend.patch new file mode 100644 index 00000000000..4b373d433a0 --- /dev/null +++ b/queue-4.19/crypto-ccree-host_power_down_en-should-be-the-last-cc-access-during-suspend.patch @@ -0,0 +1,41 @@ +From 3499efbeed39d114873267683b9e776bcb34b058 Mon Sep 17 00:00:00 2001 +From: Ofir Drang +Date: Thu, 18 Apr 2019 16:39:08 +0300 +Subject: crypto: ccree - HOST_POWER_DOWN_EN should be the last CC access during suspend + +From: Ofir Drang + +commit 3499efbeed39d114873267683b9e776bcb34b058 upstream. + +During power management suspend the driver need to prepare the device +for the power down operation and as a last indication write to the +HOST_POWER_DOWN_EN register which signals to the hardware that +The ccree is ready for power down. + +Signed-off-by: Ofir Drang +Signed-off-by: Gilad Ben-Yossef +Cc: stable@vger.kernel.org # v4.19+ +Signed-off-by: Herbert Xu +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/crypto/ccree/cc_pm.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/crypto/ccree/cc_pm.c ++++ b/drivers/crypto/ccree/cc_pm.c +@@ -25,13 +25,13 @@ int cc_pm_suspend(struct device *dev) + int rc; + + dev_dbg(dev, "set HOST_POWER_DOWN_EN\n"); +- cc_iowrite(drvdata, CC_REG(HOST_POWER_DOWN_EN), POWER_DOWN_ENABLE); + rc = cc_suspend_req_queue(drvdata); + if (rc) { + dev_err(dev, "cc_suspend_req_queue (%x)\n", rc); + return rc; + } + fini_cc_regs(drvdata); ++ cc_iowrite(drvdata, CC_REG(HOST_POWER_DOWN_EN), POWER_DOWN_ENABLE); + cc_clk_off(drvdata); + return 0; + } diff --git a/queue-4.19/crypto-ccree-pm-resume-first-enable-the-source-clk.patch b/queue-4.19/crypto-ccree-pm-resume-first-enable-the-source-clk.patch new file mode 100644 index 00000000000..ee9ae32409f --- /dev/null +++ b/queue-4.19/crypto-ccree-pm-resume-first-enable-the-source-clk.patch @@ -0,0 +1,42 @@ +From 7766dd774d80463cec7b81d90c8672af91de2da1 Mon Sep 17 00:00:00 2001 +From: Ofir Drang +Date: Thu, 18 Apr 2019 16:39:06 +0300 +Subject: crypto: ccree - pm resume first enable the source clk + +From: Ofir Drang + +commit 7766dd774d80463cec7b81d90c8672af91de2da1 upstream. + +On power management resume function first enable the device clk source +to allow access to the device registers. + +Signed-off-by: Ofir Drang +Signed-off-by: Gilad Ben-Yossef +Cc: stable@vger.kernel.org # v4.19+ +Signed-off-by: Herbert Xu +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/crypto/ccree/cc_pm.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/drivers/crypto/ccree/cc_pm.c ++++ b/drivers/crypto/ccree/cc_pm.c +@@ -42,14 +42,15 @@ int cc_pm_resume(struct device *dev) + struct cc_drvdata *drvdata = dev_get_drvdata(dev); + + dev_dbg(dev, "unset HOST_POWER_DOWN_EN\n"); +- cc_iowrite(drvdata, CC_REG(HOST_POWER_DOWN_EN), POWER_DOWN_DISABLE); +- ++ /* Enables the device source clk */ + rc = cc_clk_on(drvdata); + if (rc) { + dev_err(dev, "failed getting clock back on. We're toast.\n"); + return rc; + } + ++ cc_iowrite(drvdata, CC_REG(HOST_POWER_DOWN_EN), POWER_DOWN_DISABLE); ++ + rc = init_cc_regs(drvdata, false); + if (rc) { + dev_err(dev, "init_cc_regs (%x)\n", rc); diff --git a/queue-4.19/crypto-ccree-remove-special-handling-of-chained-sg.patch b/queue-4.19/crypto-ccree-remove-special-handling-of-chained-sg.patch new file mode 100644 index 00000000000..b7ac3d43401 --- /dev/null +++ b/queue-4.19/crypto-ccree-remove-special-handling-of-chained-sg.patch @@ -0,0 +1,216 @@ +From c4b22bf51b815fb61a35a27fc847a88bc28ebb63 Mon Sep 17 00:00:00 2001 +From: Gilad Ben-Yossef +Date: Thu, 18 Apr 2019 16:38:48 +0300 +Subject: crypto: ccree - remove special handling of chained sg + +From: Gilad Ben-Yossef + +commit c4b22bf51b815fb61a35a27fc847a88bc28ebb63 upstream. + +We were handling chained scattergather lists with specialized code +needlessly as the regular sg APIs handle them just fine. The code +handling this also had an (unused) code path with a use-before-init +error, flagged by Coverity. + +Remove all special handling of chained sg and leave their handling +to the regular sg APIs. + +Signed-off-by: Gilad Ben-Yossef +Cc: stable@vger.kernel.org # v4.19+ +Signed-off-by: Herbert Xu +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/crypto/ccree/cc_buffer_mgr.c | 98 +++++++---------------------------- + 1 file changed, 22 insertions(+), 76 deletions(-) + +--- a/drivers/crypto/ccree/cc_buffer_mgr.c ++++ b/drivers/crypto/ccree/cc_buffer_mgr.c +@@ -83,24 +83,17 @@ static void cc_copy_mac(struct device *d + */ + static unsigned int cc_get_sgl_nents(struct device *dev, + struct scatterlist *sg_list, +- unsigned int nbytes, u32 *lbytes, +- bool *is_chained) ++ unsigned int nbytes, u32 *lbytes) + { + unsigned int nents = 0; + + while (nbytes && sg_list) { +- if (sg_list->length) { +- nents++; +- /* get the number of bytes in the last entry */ +- *lbytes = nbytes; +- nbytes -= (sg_list->length > nbytes) ? +- nbytes : sg_list->length; +- sg_list = sg_next(sg_list); +- } else { +- sg_list = (struct scatterlist *)sg_page(sg_list); +- if (is_chained) +- *is_chained = true; +- } ++ nents++; ++ /* get the number of bytes in the last entry */ ++ *lbytes = nbytes; ++ nbytes -= (sg_list->length > nbytes) ? ++ nbytes : sg_list->length; ++ sg_list = sg_next(sg_list); + } + dev_dbg(dev, "nents %d last bytes %d\n", nents, *lbytes); + return nents; +@@ -142,7 +135,7 @@ void cc_copy_sg_portion(struct device *d + { + u32 nents, lbytes; + +- nents = cc_get_sgl_nents(dev, sg, end, &lbytes, NULL); ++ nents = cc_get_sgl_nents(dev, sg, end, &lbytes); + sg_copy_buffer(sg, nents, (void *)dest, (end - to_skip + 1), to_skip, + (direct == CC_SG_TO_BUF)); + } +@@ -311,40 +304,10 @@ static void cc_add_sg_entry(struct devic + sgl_data->num_of_buffers++; + } + +-static int cc_dma_map_sg(struct device *dev, struct scatterlist *sg, u32 nents, +- enum dma_data_direction direction) +-{ +- u32 i, j; +- struct scatterlist *l_sg = sg; +- +- for (i = 0; i < nents; i++) { +- if (!l_sg) +- break; +- if (dma_map_sg(dev, l_sg, 1, direction) != 1) { +- dev_err(dev, "dma_map_page() sg buffer failed\n"); +- goto err; +- } +- l_sg = sg_next(l_sg); +- } +- return nents; +- +-err: +- /* Restore mapped parts */ +- for (j = 0; j < i; j++) { +- if (!sg) +- break; +- dma_unmap_sg(dev, sg, 1, direction); +- sg = sg_next(sg); +- } +- return 0; +-} +- + static int cc_map_sg(struct device *dev, struct scatterlist *sg, + unsigned int nbytes, int direction, u32 *nents, + u32 max_sg_nents, u32 *lbytes, u32 *mapped_nents) + { +- bool is_chained = false; +- + if (sg_is_last(sg)) { + /* One entry only case -set to DLLI */ + if (dma_map_sg(dev, sg, 1, direction) != 1) { +@@ -358,35 +321,21 @@ static int cc_map_sg(struct device *dev, + *nents = 1; + *mapped_nents = 1; + } else { /*sg_is_last*/ +- *nents = cc_get_sgl_nents(dev, sg, nbytes, lbytes, +- &is_chained); ++ *nents = cc_get_sgl_nents(dev, sg, nbytes, lbytes); + if (*nents > max_sg_nents) { + *nents = 0; + dev_err(dev, "Too many fragments. current %d max %d\n", + *nents, max_sg_nents); + return -ENOMEM; + } +- if (!is_chained) { +- /* In case of mmu the number of mapped nents might +- * be changed from the original sgl nents +- */ +- *mapped_nents = dma_map_sg(dev, sg, *nents, direction); +- if (*mapped_nents == 0) { +- *nents = 0; +- dev_err(dev, "dma_map_sg() sg buffer failed\n"); +- return -ENOMEM; +- } +- } else { +- /*In this case the driver maps entry by entry so it +- * must have the same nents before and after map +- */ +- *mapped_nents = cc_dma_map_sg(dev, sg, *nents, +- direction); +- if (*mapped_nents != *nents) { +- *nents = *mapped_nents; +- dev_err(dev, "dma_map_sg() sg buffer failed\n"); +- return -ENOMEM; +- } ++ /* In case of mmu the number of mapped nents might ++ * be changed from the original sgl nents ++ */ ++ *mapped_nents = dma_map_sg(dev, sg, *nents, direction); ++ if (*mapped_nents == 0) { ++ *nents = 0; ++ dev_err(dev, "dma_map_sg() sg buffer failed\n"); ++ return -ENOMEM; + } + } + +@@ -571,7 +520,6 @@ void cc_unmap_aead_request(struct device + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct cc_drvdata *drvdata = dev_get_drvdata(dev); + u32 dummy; +- bool chained; + u32 size_to_unmap = 0; + + if (areq_ctx->mac_buf_dma_addr) { +@@ -636,15 +584,14 @@ void cc_unmap_aead_request(struct device + size_to_unmap += crypto_aead_ivsize(tfm); + + dma_unmap_sg(dev, req->src, +- cc_get_sgl_nents(dev, req->src, size_to_unmap, +- &dummy, &chained), ++ cc_get_sgl_nents(dev, req->src, size_to_unmap, &dummy), + DMA_BIDIRECTIONAL); + if (req->src != req->dst) { + dev_dbg(dev, "Unmapping dst sgl: req->dst=%pK\n", + sg_virt(req->dst)); + dma_unmap_sg(dev, req->dst, + cc_get_sgl_nents(dev, req->dst, size_to_unmap, +- &dummy, &chained), ++ &dummy), + DMA_BIDIRECTIONAL); + } + if (drvdata->coherent && +@@ -1022,7 +969,6 @@ static int cc_aead_chain_data(struct cc_ + unsigned int size_for_map = req->assoclen + req->cryptlen; + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + u32 sg_index = 0; +- bool chained = false; + bool is_gcm4543 = areq_ctx->is_gcm4543; + u32 size_to_skip = req->assoclen; + +@@ -1043,7 +989,7 @@ static int cc_aead_chain_data(struct cc_ + size_for_map += (direct == DRV_CRYPTO_DIRECTION_ENCRYPT) ? + authsize : 0; + src_mapped_nents = cc_get_sgl_nents(dev, req->src, size_for_map, +- &src_last_bytes, &chained); ++ &src_last_bytes); + sg_index = areq_ctx->src_sgl->length; + //check where the data starts + while (sg_index <= size_to_skip) { +@@ -1085,7 +1031,7 @@ static int cc_aead_chain_data(struct cc_ + } + + dst_mapped_nents = cc_get_sgl_nents(dev, req->dst, size_for_map, +- &dst_last_bytes, &chained); ++ &dst_last_bytes); + sg_index = areq_ctx->dst_sgl->length; + offset = size_to_skip; + +@@ -1486,7 +1432,7 @@ int cc_map_hash_request_update(struct cc + dev_dbg(dev, " less than one block: curr_buff=%pK *curr_buff_cnt=0x%X copy_to=%pK\n", + curr_buff, *curr_buff_cnt, &curr_buff[*curr_buff_cnt]); + areq_ctx->in_nents = +- cc_get_sgl_nents(dev, src, nbytes, &dummy, NULL); ++ cc_get_sgl_nents(dev, src, nbytes, &dummy); + sg_copy_to_buffer(src, areq_ctx->in_nents, + &curr_buff[*curr_buff_cnt], nbytes); + *curr_buff_cnt += nbytes; diff --git a/queue-4.19/crypto-ccree-use-correct-internal-state-sizes-for-export.patch b/queue-4.19/crypto-ccree-use-correct-internal-state-sizes-for-export.patch new file mode 100644 index 00000000000..3b3f1817403 --- /dev/null +++ b/queue-4.19/crypto-ccree-use-correct-internal-state-sizes-for-export.patch @@ -0,0 +1,44 @@ +From f3df82b468f00cca241d96ee3697c9a5e7fb6bd0 Mon Sep 17 00:00:00 2001 +From: Gilad Ben-Yossef +Date: Thu, 18 Apr 2019 16:39:02 +0300 +Subject: crypto: ccree - use correct internal state sizes for export + +From: Gilad Ben-Yossef + +commit f3df82b468f00cca241d96ee3697c9a5e7fb6bd0 upstream. + +We were computing the size of the import buffer based on the digest size +but the 318 and 224 byte variants use 512 and 256 bytes internal state +sizes respectfully, thus causing the import buffer to overrun. + +Fix it by using the right sizes. + +Signed-off-by: Gilad Ben-Yossef +Cc: stable@vger.kernel.org # v4.19+ +Signed-off-by: Herbert Xu +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/crypto/ccree/cc_hash.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/crypto/ccree/cc_hash.c ++++ b/drivers/crypto/ccree/cc_hash.c +@@ -1616,7 +1616,7 @@ static struct cc_hash_template driver_ha + .setkey = cc_hash_setkey, + .halg = { + .digestsize = SHA224_DIGEST_SIZE, +- .statesize = CC_STATE_SIZE(SHA224_DIGEST_SIZE), ++ .statesize = CC_STATE_SIZE(SHA256_DIGEST_SIZE), + }, + }, + .hash_mode = DRV_HASH_SHA224, +@@ -1641,7 +1641,7 @@ static struct cc_hash_template driver_ha + .setkey = cc_hash_setkey, + .halg = { + .digestsize = SHA384_DIGEST_SIZE, +- .statesize = CC_STATE_SIZE(SHA384_DIGEST_SIZE), ++ .statesize = CC_STATE_SIZE(SHA512_DIGEST_SIZE), + }, + }, + .hash_mode = DRV_HASH_SHA384, diff --git a/queue-4.19/ext4-actually-request-zeroing-of-inode-table-after-grow.patch b/queue-4.19/ext4-actually-request-zeroing-of-inode-table-after-grow.patch new file mode 100644 index 00000000000..cf6326d5c2f --- /dev/null +++ b/queue-4.19/ext4-actually-request-zeroing-of-inode-table-after-grow.patch @@ -0,0 +1,37 @@ +From 310a997fd74de778b9a4848a64be9cda9f18764a Mon Sep 17 00:00:00 2001 +From: Kirill Tkhai +Date: Thu, 25 Apr 2019 13:06:18 -0400 +Subject: ext4: actually request zeroing of inode table after grow + +From: Kirill Tkhai + +commit 310a997fd74de778b9a4848a64be9cda9f18764a upstream. + +It is never possible, that number of block groups decreases, +since only online grow is supported. + +But after a growing occured, we have to zero inode tables +for just created new block groups. + +Fixes: 19c5246d2516 ("ext4: add new online resize interface") +Signed-off-by: Kirill Tkhai +Signed-off-by: Theodore Ts'o +Reviewed-by: Jan Kara +Cc: stable@kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/ioctl.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/ext4/ioctl.c ++++ b/fs/ext4/ioctl.c +@@ -977,7 +977,7 @@ mext_out: + if (err == 0) + err = err2; + mnt_drop_write_file(filp); +- if (!err && (o_group > EXT4_SB(sb)->s_groups_count) && ++ if (!err && (o_group < EXT4_SB(sb)->s_groups_count) && + ext4_has_group_desc_csum(sb) && + test_opt(sb, INIT_INODE_TABLE)) + err = ext4_register_li_request(sb, o_group); diff --git a/queue-4.19/ext4-avoid-drop-reference-to-iloc.bh-twice.patch b/queue-4.19/ext4-avoid-drop-reference-to-iloc.bh-twice.patch new file mode 100644 index 00000000000..91b680039e2 --- /dev/null +++ b/queue-4.19/ext4-avoid-drop-reference-to-iloc.bh-twice.patch @@ -0,0 +1,34 @@ +From 8c380ab4b7b59c0c602743810be1b712514eaebc Mon Sep 17 00:00:00 2001 +From: Pan Bian +Date: Thu, 25 Apr 2019 11:44:15 -0400 +Subject: ext4: avoid drop reference to iloc.bh twice + +From: Pan Bian + +commit 8c380ab4b7b59c0c602743810be1b712514eaebc upstream. + +The reference to iloc.bh has been dropped in ext4_mark_iloc_dirty. +However, the reference is dropped again if error occurs during +ext4_handle_dirty_metadata, which may result in use-after-free bugs. + +Fixes: fb265c9cb49e("ext4: add ext4_sb_bread() to disambiguate ENOMEM cases") +Signed-off-by: Pan Bian +Signed-off-by: Theodore Ts'o +Reviewed-by: Jan Kara +Cc: stable@kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/resize.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/fs/ext4/resize.c ++++ b/fs/ext4/resize.c +@@ -874,6 +874,7 @@ static int add_new_gdb(handle_t *handle, + err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh); + if (unlikely(err)) { + ext4_std_error(sb, err); ++ iloc.bh = NULL; + goto errout; + } + brelse(dind); diff --git a/queue-4.19/ext4-fix-ext4_show_options-for-file-systems-w-o-journal.patch b/queue-4.19/ext4-fix-ext4_show_options-for-file-systems-w-o-journal.patch new file mode 100644 index 00000000000..285c1a2b7ce --- /dev/null +++ b/queue-4.19/ext4-fix-ext4_show_options-for-file-systems-w-o-journal.patch @@ -0,0 +1,35 @@ +From 50b29d8f033a7c88c5bc011abc2068b1691ab755 Mon Sep 17 00:00:00 2001 +From: Debabrata Banerjee +Date: Tue, 30 Apr 2019 23:08:15 -0400 +Subject: ext4: fix ext4_show_options for file systems w/o journal + +From: Debabrata Banerjee + +commit 50b29d8f033a7c88c5bc011abc2068b1691ab755 upstream. + +Instead of removing EXT4_MOUNT_JOURNAL_CHECKSUM from s_def_mount_opt as +I assume was intended, all other options were blown away leading to +_ext4_show_options() output being incorrect. + +Fixes: 1e381f60dad9 ("ext4: do not allow journal_opts for fs w/o journal") +Signed-off-by: Debabrata Banerjee +Signed-off-by: Theodore Ts'o +Reviewed-by: Jan Kara +Cc: stable@kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/super.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -4270,7 +4270,7 @@ static int ext4_fill_super(struct super_ + "data=, fs mounted w/o journal"); + goto failed_mount_wq; + } +- sbi->s_def_mount_opt &= EXT4_MOUNT_JOURNAL_CHECKSUM; ++ sbi->s_def_mount_opt &= ~EXT4_MOUNT_JOURNAL_CHECKSUM; + clear_opt(sb, JOURNAL_CHECKSUM); + clear_opt(sb, DATA_FLAGS); + sbi->s_journal = NULL; diff --git a/queue-4.19/ext4-fix-use-after-free-race-with-debug_want_extra_isize.patch b/queue-4.19/ext4-fix-use-after-free-race-with-debug_want_extra_isize.patch new file mode 100644 index 00000000000..f7782008400 --- /dev/null +++ b/queue-4.19/ext4-fix-use-after-free-race-with-debug_want_extra_isize.patch @@ -0,0 +1,106 @@ +From 7bc04c5c2cc467c5b40f2b03ba08da174a0d5fa7 Mon Sep 17 00:00:00 2001 +From: Barret Rhoden +Date: Thu, 25 Apr 2019 11:55:50 -0400 +Subject: ext4: fix use-after-free race with debug_want_extra_isize + +From: Barret Rhoden + +commit 7bc04c5c2cc467c5b40f2b03ba08da174a0d5fa7 upstream. + +When remounting with debug_want_extra_isize, we were not performing the +same checks that we do during a normal mount. That allowed us to set a +value for s_want_extra_isize that reached outside the s_inode_size. + +Fixes: e2b911c53584 ("ext4: clean up feature test macros with predicate functions") +Reported-by: syzbot+f584efa0ac7213c226b7@syzkaller.appspotmail.com +Reviewed-by: Jan Kara +Signed-off-by: Barret Rhoden +Signed-off-by: Theodore Ts'o +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/super.c | 58 ++++++++++++++++++++++++++++++++------------------------ + 1 file changed, 34 insertions(+), 24 deletions(-) + +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -3514,6 +3514,37 @@ int ext4_calculate_overhead(struct super + return 0; + } + ++static void ext4_clamp_want_extra_isize(struct super_block *sb) ++{ ++ struct ext4_sb_info *sbi = EXT4_SB(sb); ++ struct ext4_super_block *es = sbi->s_es; ++ ++ /* determine the minimum size of new large inodes, if present */ ++ if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE && ++ sbi->s_want_extra_isize == 0) { ++ sbi->s_want_extra_isize = sizeof(struct ext4_inode) - ++ EXT4_GOOD_OLD_INODE_SIZE; ++ if (ext4_has_feature_extra_isize(sb)) { ++ if (sbi->s_want_extra_isize < ++ le16_to_cpu(es->s_want_extra_isize)) ++ sbi->s_want_extra_isize = ++ le16_to_cpu(es->s_want_extra_isize); ++ if (sbi->s_want_extra_isize < ++ le16_to_cpu(es->s_min_extra_isize)) ++ sbi->s_want_extra_isize = ++ le16_to_cpu(es->s_min_extra_isize); ++ } ++ } ++ /* Check if enough inode space is available */ ++ if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize > ++ sbi->s_inode_size) { ++ sbi->s_want_extra_isize = sizeof(struct ext4_inode) - ++ EXT4_GOOD_OLD_INODE_SIZE; ++ ext4_msg(sb, KERN_INFO, ++ "required extra inode space not available"); ++ } ++} ++ + static void ext4_set_resv_clusters(struct super_block *sb) + { + ext4_fsblk_t resv_clusters; +@@ -4388,30 +4419,7 @@ no_journal: + } else if (ret) + goto failed_mount4a; + +- /* determine the minimum size of new large inodes, if present */ +- if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE && +- sbi->s_want_extra_isize == 0) { +- sbi->s_want_extra_isize = sizeof(struct ext4_inode) - +- EXT4_GOOD_OLD_INODE_SIZE; +- if (ext4_has_feature_extra_isize(sb)) { +- if (sbi->s_want_extra_isize < +- le16_to_cpu(es->s_want_extra_isize)) +- sbi->s_want_extra_isize = +- le16_to_cpu(es->s_want_extra_isize); +- if (sbi->s_want_extra_isize < +- le16_to_cpu(es->s_min_extra_isize)) +- sbi->s_want_extra_isize = +- le16_to_cpu(es->s_min_extra_isize); +- } +- } +- /* Check if enough inode space is available */ +- if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize > +- sbi->s_inode_size) { +- sbi->s_want_extra_isize = sizeof(struct ext4_inode) - +- EXT4_GOOD_OLD_INODE_SIZE; +- ext4_msg(sb, KERN_INFO, "required extra inode space not" +- "available"); +- } ++ ext4_clamp_want_extra_isize(sb); + + ext4_set_resv_clusters(sb); + +@@ -5197,6 +5205,8 @@ static int ext4_remount(struct super_blo + goto restore_opts; + } + ++ ext4_clamp_want_extra_isize(sb); ++ + if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^ + test_opt(sb, JOURNAL_CHECKSUM)) { + ext4_msg(sb, KERN_ERR, "changing journal_checksum " diff --git a/queue-4.19/ext4-ignore-e_value_offs-for-xattrs-with-value-in-ea-inode.patch b/queue-4.19/ext4-ignore-e_value_offs-for-xattrs-with-value-in-ea-inode.patch new file mode 100644 index 00000000000..065940b8d42 --- /dev/null +++ b/queue-4.19/ext4-ignore-e_value_offs-for-xattrs-with-value-in-ea-inode.patch @@ -0,0 +1,35 @@ +From e5d01196c0428a206f307e9ee5f6842964098ff0 Mon Sep 17 00:00:00 2001 +From: Theodore Ts'o +Date: Wed, 10 Apr 2019 00:37:36 -0400 +Subject: ext4: ignore e_value_offs for xattrs with value-in-ea-inode + +From: Theodore Ts'o + +commit e5d01196c0428a206f307e9ee5f6842964098ff0 upstream. + +In other places in fs/ext4/xattr.c, if e_value_inum is non-zero, the +code ignores the value in e_value_offs. The e_value_offs *should* be +zero, but we shouldn't depend upon it, since it might not be true in a +corrupted/fuzzed file system. + +Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=202897 +Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=202877 +Signed-off-by: Theodore Ts'o +Cc: stable@kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/xattr.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/ext4/xattr.c ++++ b/fs/ext4/xattr.c +@@ -1700,7 +1700,7 @@ static int ext4_xattr_set_entry(struct e + + /* No failures allowed past this point. */ + +- if (!s->not_found && here->e_value_size && here->e_value_offs) { ++ if (!s->not_found && here->e_value_size && !here->e_value_inum) { + /* Remove the old value. */ + void *first_val = s->base + min_offs; + size_t offs = le16_to_cpu(here->e_value_offs); diff --git a/queue-4.19/ext4-make-sanity-check-in-mballoc-more-strict.patch b/queue-4.19/ext4-make-sanity-check-in-mballoc-more-strict.patch new file mode 100644 index 00000000000..5eba01063c9 --- /dev/null +++ b/queue-4.19/ext4-make-sanity-check-in-mballoc-more-strict.patch @@ -0,0 +1,35 @@ +From 31562b954b60f02acb91b7349dc6432d3f8c3c5f Mon Sep 17 00:00:00 2001 +From: Jan Kara +Date: Sat, 6 Apr 2019 18:33:06 -0400 +Subject: ext4: make sanity check in mballoc more strict + +From: Jan Kara + +commit 31562b954b60f02acb91b7349dc6432d3f8c3c5f upstream. + +The sanity check in mb_find_extent() only checked that returned extent +does not extend past blocksize * 8, however it should not extend past +EXT4_CLUSTERS_PER_GROUP(sb). This can happen when clusters_per_group < +blocksize * 8 and the tail of the bitmap is not properly filled by 1s +which happened e.g. when ancient kernels have grown the filesystem. + +Signed-off-by: Jan Kara +Signed-off-by: Theodore Ts'o +Cc: stable@kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/mballoc.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/ext4/mballoc.c ++++ b/fs/ext4/mballoc.c +@@ -1539,7 +1539,7 @@ static int mb_find_extent(struct ext4_bu + ex->fe_len += 1 << order; + } + +- if (ex->fe_start + ex->fe_len > (1 << (e4b->bd_blkbits + 3))) { ++ if (ex->fe_start + ex->fe_len > EXT4_CLUSTERS_PER_GROUP(e4b->bd_sb)) { + /* Should never happen! (but apparently sometimes does?!?) */ + WARN_ON(1); + ext4_error(e4b->bd_sb, "corruption or bug in mb_find_extent " diff --git a/queue-4.19/ext4-protect-journal-inode-s-blocks-using-block_validity.patch b/queue-4.19/ext4-protect-journal-inode-s-blocks-using-block_validity.patch new file mode 100644 index 00000000000..b94fed6b792 --- /dev/null +++ b/queue-4.19/ext4-protect-journal-inode-s-blocks-using-block_validity.patch @@ -0,0 +1,101 @@ +From 345c0dbf3a30872d9b204db96b5857cd00808cae Mon Sep 17 00:00:00 2001 +From: Theodore Ts'o +Date: Tue, 9 Apr 2019 23:37:08 -0400 +Subject: ext4: protect journal inode's blocks using block_validity + +From: Theodore Ts'o + +commit 345c0dbf3a30872d9b204db96b5857cd00808cae upstream. + +Add the blocks which belong to the journal inode to block_validity's +system zone so attempts to deallocate or overwrite the journal due a +corrupted file system where the journal blocks are also claimed by +another inode. + +Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=202879 +Signed-off-by: Theodore Ts'o +Cc: stable@kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/block_validity.c | 48 +++++++++++++++++++++++++++++++++++++++++++++++ + fs/ext4/inode.c | 4 +++ + 2 files changed, 52 insertions(+) + +--- a/fs/ext4/block_validity.c ++++ b/fs/ext4/block_validity.c +@@ -137,6 +137,48 @@ static void debug_print_tree(struct ext4 + printk(KERN_CONT "\n"); + } + ++static int ext4_protect_reserved_inode(struct super_block *sb, u32 ino) ++{ ++ struct inode *inode; ++ struct ext4_sb_info *sbi = EXT4_SB(sb); ++ struct ext4_map_blocks map; ++ u32 i = 0, err = 0, num, n; ++ ++ if ((ino < EXT4_ROOT_INO) || ++ (ino > le32_to_cpu(sbi->s_es->s_inodes_count))) ++ return -EINVAL; ++ inode = ext4_iget(sb, ino, EXT4_IGET_SPECIAL); ++ if (IS_ERR(inode)) ++ return PTR_ERR(inode); ++ num = (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; ++ while (i < num) { ++ map.m_lblk = i; ++ map.m_len = num - i; ++ n = ext4_map_blocks(NULL, inode, &map, 0); ++ if (n < 0) { ++ err = n; ++ break; ++ } ++ if (n == 0) { ++ i++; ++ } else { ++ if (!ext4_data_block_valid(sbi, map.m_pblk, n)) { ++ ext4_error(sb, "blocks %llu-%llu from inode %u " ++ "overlap system zone", map.m_pblk, ++ map.m_pblk + map.m_len - 1, ino); ++ err = -EFSCORRUPTED; ++ break; ++ } ++ err = add_system_zone(sbi, map.m_pblk, n); ++ if (err < 0) ++ break; ++ i += n; ++ } ++ } ++ iput(inode); ++ return err; ++} ++ + int ext4_setup_system_zone(struct super_block *sb) + { + ext4_group_t ngroups = ext4_get_groups_count(sb); +@@ -171,6 +213,12 @@ int ext4_setup_system_zone(struct super_ + if (ret) + return ret; + } ++ if (ext4_has_feature_journal(sb) && sbi->s_es->s_journal_inum) { ++ ret = ext4_protect_reserved_inode(sb, ++ le32_to_cpu(sbi->s_es->s_journal_inum)); ++ if (ret) ++ return ret; ++ } + + if (test_opt(sb, DEBUG)) + debug_print_tree(sbi); +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -399,6 +399,10 @@ static int __check_block_validity(struct + unsigned int line, + struct ext4_map_blocks *map) + { ++ if (ext4_has_feature_journal(inode->i_sb) && ++ (inode->i_ino == ++ le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum))) ++ return 0; + if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk, + map->m_len)) { + ext4_error_inode(inode, func, line, map->m_pblk, diff --git a/queue-4.19/hugetlb-use-same-fault-hash-key-for-shared-and-private-mappings.patch b/queue-4.19/hugetlb-use-same-fault-hash-key-for-shared-and-private-mappings.patch new file mode 100644 index 00000000000..5e9cdb7e212 --- /dev/null +++ b/queue-4.19/hugetlb-use-same-fault-hash-key-for-shared-and-private-mappings.patch @@ -0,0 +1,177 @@ +From 1b426bac66e6cc83c9f2d92b96e4e72acf43419a Mon Sep 17 00:00:00 2001 +From: Mike Kravetz +Date: Mon, 13 May 2019 17:19:41 -0700 +Subject: hugetlb: use same fault hash key for shared and private mappings + +From: Mike Kravetz + +commit 1b426bac66e6cc83c9f2d92b96e4e72acf43419a upstream. + +hugetlb uses a fault mutex hash table to prevent page faults of the +same pages concurrently. The key for shared and private mappings is +different. Shared keys off address_space and file index. Private keys +off mm and virtual address. Consider a private mappings of a populated +hugetlbfs file. A fault will map the page from the file and if needed +do a COW to map a writable page. + +Hugetlbfs hole punch uses the fault mutex to prevent mappings of file +pages. It uses the address_space file index key. However, private +mappings will use a different key and could race with this code to map +the file page. This causes problems (BUG) for the page cache remove +code as it expects the page to be unmapped. A sample stack is: + +page dumped because: VM_BUG_ON_PAGE(page_mapped(page)) +kernel BUG at mm/filemap.c:169! +... +RIP: 0010:unaccount_page_cache_page+0x1b8/0x200 +... +Call Trace: +__delete_from_page_cache+0x39/0x220 +delete_from_page_cache+0x45/0x70 +remove_inode_hugepages+0x13c/0x380 +? __add_to_page_cache_locked+0x162/0x380 +hugetlbfs_fallocate+0x403/0x540 +? _cond_resched+0x15/0x30 +? __inode_security_revalidate+0x5d/0x70 +? selinux_file_permission+0x100/0x130 +vfs_fallocate+0x13f/0x270 +ksys_fallocate+0x3c/0x80 +__x64_sys_fallocate+0x1a/0x20 +do_syscall_64+0x5b/0x180 +entry_SYSCALL_64_after_hwframe+0x44/0xa9 + +There seems to be another potential COW issue/race with this approach +of different private and shared keys as noted in commit 8382d914ebf7 +("mm, hugetlb: improve page-fault scalability"). + +Since every hugetlb mapping (even anon and private) is actually a file +mapping, just use the address_space index key for all mappings. This +results in potentially more hash collisions. However, this should not +be the common case. + +Link: http://lkml.kernel.org/r/20190328234704.27083-3-mike.kravetz@oracle.com +Link: http://lkml.kernel.org/r/20190412165235.t4sscoujczfhuiyt@linux-r8p5 +Fixes: b5cec28d36f5 ("hugetlbfs: truncate_hugepages() takes a range of pages") +Signed-off-by: Mike Kravetz +Reviewed-by: Naoya Horiguchi +Reviewed-by: Davidlohr Bueso +Cc: Joonsoo Kim +Cc: "Kirill A . Shutemov" +Cc: Michal Hocko +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/hugetlbfs/inode.c | 7 ++----- + include/linux/hugetlb.h | 4 +--- + mm/hugetlb.c | 22 ++++++---------------- + mm/userfaultfd.c | 3 +-- + 4 files changed, 10 insertions(+), 26 deletions(-) + +--- a/fs/hugetlbfs/inode.c ++++ b/fs/hugetlbfs/inode.c +@@ -426,9 +426,7 @@ static void remove_inode_hugepages(struc + u32 hash; + + index = page->index; +- hash = hugetlb_fault_mutex_hash(h, current->mm, +- &pseudo_vma, +- mapping, index, 0); ++ hash = hugetlb_fault_mutex_hash(h, mapping, index, 0); + mutex_lock(&hugetlb_fault_mutex_table[hash]); + + /* +@@ -625,8 +623,7 @@ static long hugetlbfs_fallocate(struct f + addr = index * hpage_size; + + /* mutex taken here, fault path and hole punch */ +- hash = hugetlb_fault_mutex_hash(h, mm, &pseudo_vma, mapping, +- index, addr); ++ hash = hugetlb_fault_mutex_hash(h, mapping, index, addr); + mutex_lock(&hugetlb_fault_mutex_table[hash]); + + /* See if already present in mapping to avoid alloc/free */ +--- a/include/linux/hugetlb.h ++++ b/include/linux/hugetlb.h +@@ -123,9 +123,7 @@ void move_hugetlb_state(struct page *old + void free_huge_page(struct page *page); + void hugetlb_fix_reserve_counts(struct inode *inode); + extern struct mutex *hugetlb_fault_mutex_table; +-u32 hugetlb_fault_mutex_hash(struct hstate *h, struct mm_struct *mm, +- struct vm_area_struct *vma, +- struct address_space *mapping, ++u32 hugetlb_fault_mutex_hash(struct hstate *h, struct address_space *mapping, + pgoff_t idx, unsigned long address); + + pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud); +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -3778,8 +3778,7 @@ retry: + * handling userfault. Reacquire after handling + * fault to make calling code simpler. + */ +- hash = hugetlb_fault_mutex_hash(h, mm, vma, mapping, +- idx, haddr); ++ hash = hugetlb_fault_mutex_hash(h, mapping, idx, haddr); + mutex_unlock(&hugetlb_fault_mutex_table[hash]); + ret = handle_userfault(&vmf, VM_UFFD_MISSING); + mutex_lock(&hugetlb_fault_mutex_table[hash]); +@@ -3887,21 +3886,14 @@ backout_unlocked: + } + + #ifdef CONFIG_SMP +-u32 hugetlb_fault_mutex_hash(struct hstate *h, struct mm_struct *mm, +- struct vm_area_struct *vma, +- struct address_space *mapping, ++u32 hugetlb_fault_mutex_hash(struct hstate *h, struct address_space *mapping, + pgoff_t idx, unsigned long address) + { + unsigned long key[2]; + u32 hash; + +- if (vma->vm_flags & VM_SHARED) { +- key[0] = (unsigned long) mapping; +- key[1] = idx; +- } else { +- key[0] = (unsigned long) mm; +- key[1] = address >> huge_page_shift(h); +- } ++ key[0] = (unsigned long) mapping; ++ key[1] = idx; + + hash = jhash2((u32 *)&key, sizeof(key)/sizeof(u32), 0); + +@@ -3912,9 +3904,7 @@ u32 hugetlb_fault_mutex_hash(struct hsta + * For uniprocesor systems we always use a single mutex, so just + * return 0 and avoid the hashing overhead. + */ +-u32 hugetlb_fault_mutex_hash(struct hstate *h, struct mm_struct *mm, +- struct vm_area_struct *vma, +- struct address_space *mapping, ++u32 hugetlb_fault_mutex_hash(struct hstate *h, struct address_space *mapping, + pgoff_t idx, unsigned long address) + { + return 0; +@@ -3959,7 +3949,7 @@ vm_fault_t hugetlb_fault(struct mm_struc + * get spurious allocation failures if two CPUs race to instantiate + * the same page in the page cache. + */ +- hash = hugetlb_fault_mutex_hash(h, mm, vma, mapping, idx, haddr); ++ hash = hugetlb_fault_mutex_hash(h, mapping, idx, haddr); + mutex_lock(&hugetlb_fault_mutex_table[hash]); + + entry = huge_ptep_get(ptep); +--- a/mm/userfaultfd.c ++++ b/mm/userfaultfd.c +@@ -271,8 +271,7 @@ retry: + */ + idx = linear_page_index(dst_vma, dst_addr); + mapping = dst_vma->vm_file->f_mapping; +- hash = hugetlb_fault_mutex_hash(h, dst_mm, dst_vma, mapping, +- idx, dst_addr); ++ hash = hugetlb_fault_mutex_hash(h, mapping, idx, dst_addr); + mutex_lock(&hugetlb_fault_mutex_table[hash]); + + err = -ENOMEM; diff --git a/queue-4.19/ipmi-ssif-compare-block-number-correctly-for-multi-part-return-messages.patch b/queue-4.19/ipmi-ssif-compare-block-number-correctly-for-multi-part-return-messages.patch new file mode 100644 index 00000000000..d7c4e11b1e3 --- /dev/null +++ b/queue-4.19/ipmi-ssif-compare-block-number-correctly-for-multi-part-return-messages.patch @@ -0,0 +1,50 @@ +From 55be8658c7e2feb11a5b5b33ee031791dbd23a69 Mon Sep 17 00:00:00 2001 +From: Kamlakant Patel +Date: Wed, 24 Apr 2019 11:50:43 +0000 +Subject: ipmi:ssif: compare block number correctly for multi-part return messages + +From: Kamlakant Patel + +commit 55be8658c7e2feb11a5b5b33ee031791dbd23a69 upstream. + +According to ipmi spec, block number is a number that is incremented, +starting with 0, for each new block of message data returned using the +middle transaction. + +Here, the 'blocknum' is data[0] which always starts from zero(0) and +'ssif_info->multi_pos' starts from 1. +So, we need to add +1 to blocknum while comparing with multi_pos. + +Fixes: 7d6380cd40f79 ("ipmi:ssif: Fix handling of multi-part return messages"). +Reported-by: Kiran Kolukuluru +Signed-off-by: Kamlakant Patel +Message-Id: <1556106615-18722-1-git-send-email-kamlakantp@marvell.com> +[Also added a debug log if the block numbers don't match.] +Signed-off-by: Corey Minyard +Cc: stable@vger.kernel.org # 4.4 +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/char/ipmi/ipmi_ssif.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/drivers/char/ipmi/ipmi_ssif.c ++++ b/drivers/char/ipmi/ipmi_ssif.c +@@ -688,12 +688,16 @@ static void msg_done_handler(struct ssif + /* End of read */ + len = ssif_info->multi_len; + data = ssif_info->data; +- } else if (blocknum != ssif_info->multi_pos) { ++ } else if (blocknum + 1 != ssif_info->multi_pos) { + /* + * Out of sequence block, just abort. Block + * numbers start at zero for the second block, + * but multi_pos starts at one, so the +1. + */ ++ if (ssif_info->ssif_debug & SSIF_DEBUG_MSG) ++ dev_dbg(&ssif_info->client->dev, ++ "Received message out of sequence, expected %u, got %u\n", ++ ssif_info->multi_pos - 1, blocknum); + result = -EIO; + } else { + ssif_inc_stat(ssif_info, received_message_parts); diff --git a/queue-4.19/jbd2-check-superblock-mapped-prior-to-committing.patch b/queue-4.19/jbd2-check-superblock-mapped-prior-to-committing.patch new file mode 100644 index 00000000000..afdf5a732c3 --- /dev/null +++ b/queue-4.19/jbd2-check-superblock-mapped-prior-to-committing.patch @@ -0,0 +1,49 @@ +From 742b06b5628f2cd23cb51a034cb54dc33c6162c5 Mon Sep 17 00:00:00 2001 +From: Jiufei Xue +Date: Sat, 6 Apr 2019 18:57:40 -0400 +Subject: jbd2: check superblock mapped prior to committing + +From: Jiufei Xue + +commit 742b06b5628f2cd23cb51a034cb54dc33c6162c5 upstream. + +We hit a BUG at fs/buffer.c:3057 if we detached the nbd device +before unmounting ext4 filesystem. + +The typical chain of events leading to the BUG: +jbd2_write_superblock + submit_bh + submit_bh_wbc + BUG_ON(!buffer_mapped(bh)); + +The block device is removed and all the pages are invalidated. JBD2 +was trying to write journal superblock to the block device which is +no longer present. + +Fix this by checking the journal superblock's buffer head prior to +submitting. + +Reported-by: Eric Ren +Signed-off-by: Jiufei Xue +Signed-off-by: Theodore Ts'o +Reviewed-by: Jan Kara +Cc: stable@kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + fs/jbd2/journal.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/fs/jbd2/journal.c ++++ b/fs/jbd2/journal.c +@@ -1366,6 +1366,10 @@ static int jbd2_write_superblock(journal + journal_superblock_t *sb = journal->j_superblock; + int ret; + ++ /* Buffer got discarded which means block device got invalidated */ ++ if (!buffer_mapped(bh)) ++ return -EIO; ++ + trace_jbd2_write_superblock(journal, write_flags); + if (!(journal->j_flags & JBD2_BARRIER)) + write_flags &= ~(REQ_FUA | REQ_PREFLUSH); diff --git a/queue-4.19/mfd-da9063-fix-otp-control-register-names-to-match-datasheets-for-da9063-63l.patch b/queue-4.19/mfd-da9063-fix-otp-control-register-names-to-match-datasheets-for-da9063-63l.patch new file mode 100644 index 00000000000..e35cb178e84 --- /dev/null +++ b/queue-4.19/mfd-da9063-fix-otp-control-register-names-to-match-datasheets-for-da9063-63l.patch @@ -0,0 +1,39 @@ +From 6b4814a9451add06d457e198be418bf6a3e6a990 Mon Sep 17 00:00:00 2001 +From: Steve Twiss +Date: Fri, 26 Apr 2019 14:33:35 +0100 +Subject: mfd: da9063: Fix OTP control register names to match datasheets for DA9063/63L + +From: Steve Twiss + +commit 6b4814a9451add06d457e198be418bf6a3e6a990 upstream. + +Mismatch between what is found in the Datasheets for DA9063 and DA9063L +provided by Dialog Semiconductor, and the register names provided in the +MFD registers file. The changes are for the OTP (one-time-programming) +control registers. The two naming errors are OPT instead of OTP, and +COUNT instead of CONT (i.e. control). + +Cc: Stable +Signed-off-by: Steve Twiss +Signed-off-by: Lee Jones +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/mfd/da9063/registers.h | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/include/linux/mfd/da9063/registers.h ++++ b/include/linux/mfd/da9063/registers.h +@@ -215,9 +215,9 @@ + + /* DA9063 Configuration registers */ + /* OTP */ +-#define DA9063_REG_OPT_COUNT 0x101 +-#define DA9063_REG_OPT_ADDR 0x102 +-#define DA9063_REG_OPT_DATA 0x103 ++#define DA9063_REG_OTP_CONT 0x101 ++#define DA9063_REG_OTP_ADDR 0x102 ++#define DA9063_REG_OTP_DATA 0x103 + + /* Customer Trim and Configuration */ + #define DA9063_REG_T_OFFSET 0x104 diff --git a/queue-4.19/mfd-max77620-fix-swapped-fps_period_max_us-values.patch b/queue-4.19/mfd-max77620-fix-swapped-fps_period_max_us-values.patch new file mode 100644 index 00000000000..2c381e4ec70 --- /dev/null +++ b/queue-4.19/mfd-max77620-fix-swapped-fps_period_max_us-values.patch @@ -0,0 +1,34 @@ +From ea611d1cc180fbb56982c83cd5142a2b34881f5c Mon Sep 17 00:00:00 2001 +From: Dmitry Osipenko +Date: Sun, 5 May 2019 18:43:22 +0300 +Subject: mfd: max77620: Fix swapped FPS_PERIOD_MAX_US values + +From: Dmitry Osipenko + +commit ea611d1cc180fbb56982c83cd5142a2b34881f5c upstream. + +The FPS_PERIOD_MAX_US definitions are swapped for MAX20024 and MAX77620, +fix it. + +Cc: stable +Signed-off-by: Dmitry Osipenko +Signed-off-by: Lee Jones +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/mfd/max77620.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/include/linux/mfd/max77620.h ++++ b/include/linux/mfd/max77620.h +@@ -136,8 +136,8 @@ + #define MAX77620_FPS_PERIOD_MIN_US 40 + #define MAX20024_FPS_PERIOD_MIN_US 20 + +-#define MAX77620_FPS_PERIOD_MAX_US 2560 +-#define MAX20024_FPS_PERIOD_MAX_US 5120 ++#define MAX20024_FPS_PERIOD_MAX_US 2560 ++#define MAX77620_FPS_PERIOD_MAX_US 5120 + + #define MAX77620_REG_FPS_GPIO1 0x54 + #define MAX77620_REG_FPS_GPIO2 0x55 diff --git a/queue-4.19/mm-huge_memory-fix-vmf_insert_pfn_-pmd-pud-crash-handle-unaligned-addresses.patch b/queue-4.19/mm-huge_memory-fix-vmf_insert_pfn_-pmd-pud-crash-handle-unaligned-addresses.patch new file mode 100644 index 00000000000..1e4c9eff30a --- /dev/null +++ b/queue-4.19/mm-huge_memory-fix-vmf_insert_pfn_-pmd-pud-crash-handle-unaligned-addresses.patch @@ -0,0 +1,171 @@ +From fce86ff5802bac3a7b19db171aa1949ef9caac31 Mon Sep 17 00:00:00 2001 +From: Dan Williams +Date: Mon, 13 May 2019 17:15:33 -0700 +Subject: mm/huge_memory: fix vmf_insert_pfn_{pmd, pud}() crash, handle unaligned addresses + +From: Dan Williams + +commit fce86ff5802bac3a7b19db171aa1949ef9caac31 upstream. + +Starting with c6f3c5ee40c1 ("mm/huge_memory.c: fix modifying of page +protection by insert_pfn_pmd()") vmf_insert_pfn_pmd() internally calls +pmdp_set_access_flags(). That helper enforces a pmd aligned @address +argument via VM_BUG_ON() assertion. + +Update the implementation to take a 'struct vm_fault' argument directly +and apply the address alignment fixup internally to fix crash signatures +like: + + kernel BUG at arch/x86/mm/pgtable.c:515! + invalid opcode: 0000 [#1] SMP NOPTI + CPU: 51 PID: 43713 Comm: java Tainted: G OE 4.19.35 #1 + [..] + RIP: 0010:pmdp_set_access_flags+0x48/0x50 + [..] + Call Trace: + vmf_insert_pfn_pmd+0x198/0x350 + dax_iomap_fault+0xe82/0x1190 + ext4_dax_huge_fault+0x103/0x1f0 + ? __switch_to_asm+0x40/0x70 + __handle_mm_fault+0x3f6/0x1370 + ? __switch_to_asm+0x34/0x70 + ? __switch_to_asm+0x40/0x70 + handle_mm_fault+0xda/0x200 + __do_page_fault+0x249/0x4f0 + do_page_fault+0x32/0x110 + ? page_fault+0x8/0x30 + page_fault+0x1e/0x30 + +Link: http://lkml.kernel.org/r/155741946350.372037.11148198430068238140.stgit@dwillia2-desk3.amr.corp.intel.com +Fixes: c6f3c5ee40c1 ("mm/huge_memory.c: fix modifying of page protection by insert_pfn_pmd()") +Signed-off-by: Dan Williams +Reported-by: Piotr Balcer +Tested-by: Yan Ma +Tested-by: Pankaj Gupta +Reviewed-by: Matthew Wilcox +Reviewed-by: Jan Kara +Reviewed-by: Aneesh Kumar K.V +Cc: Chandan Rajendra +Cc: Souptick Joarder +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/dax/device.c | 6 ++---- + fs/dax.c | 6 ++---- + include/linux/huge_mm.h | 6 ++---- + mm/huge_memory.c | 16 ++++++++++------ + 4 files changed, 16 insertions(+), 18 deletions(-) + +--- a/drivers/dax/device.c ++++ b/drivers/dax/device.c +@@ -325,8 +325,7 @@ static vm_fault_t __dev_dax_pmd_fault(st + + *pfn = phys_to_pfn_t(phys, dax_region->pfn_flags); + +- return vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd, *pfn, +- vmf->flags & FAULT_FLAG_WRITE); ++ return vmf_insert_pfn_pmd(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE); + } + + #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD +@@ -376,8 +375,7 @@ static vm_fault_t __dev_dax_pud_fault(st + + *pfn = phys_to_pfn_t(phys, dax_region->pfn_flags); + +- return vmf_insert_pfn_pud(vmf->vma, vmf->address, vmf->pud, *pfn, +- vmf->flags & FAULT_FLAG_WRITE); ++ return vmf_insert_pfn_pud(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE); + } + #else + static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax, +--- a/fs/dax.c ++++ b/fs/dax.c +@@ -1660,8 +1660,7 @@ static vm_fault_t dax_iomap_pmd_fault(st + } + + trace_dax_pmd_insert_mapping(inode, vmf, PMD_SIZE, pfn, entry); +- result = vmf_insert_pfn_pmd(vma, vmf->address, vmf->pmd, pfn, +- write); ++ result = vmf_insert_pfn_pmd(vmf, pfn, write); + break; + case IOMAP_UNWRITTEN: + case IOMAP_HOLE: +@@ -1775,8 +1774,7 @@ static vm_fault_t dax_insert_pfn_mkwrite + break; + #ifdef CONFIG_FS_DAX_PMD + case PE_SIZE_PMD: +- ret = vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd, +- pfn, true); ++ ret = vmf_insert_pfn_pmd(vmf, pfn, FAULT_FLAG_WRITE); + break; + #endif + default: +--- a/include/linux/huge_mm.h ++++ b/include/linux/huge_mm.h +@@ -47,10 +47,8 @@ extern bool move_huge_pmd(struct vm_area + extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, + unsigned long addr, pgprot_t newprot, + int prot_numa); +-vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, +- pmd_t *pmd, pfn_t pfn, bool write); +-vm_fault_t vmf_insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr, +- pud_t *pud, pfn_t pfn, bool write); ++vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write); ++vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write); + enum transparent_hugepage_flag { + TRANSPARENT_HUGEPAGE_FLAG, + TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -772,11 +772,13 @@ out_unlock: + pte_free(mm, pgtable); + } + +-vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, +- pmd_t *pmd, pfn_t pfn, bool write) ++vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write) + { ++ unsigned long addr = vmf->address & PMD_MASK; ++ struct vm_area_struct *vma = vmf->vma; + pgprot_t pgprot = vma->vm_page_prot; + pgtable_t pgtable = NULL; ++ + /* + * If we had pmd_special, we could avoid all these restrictions, + * but we need to be consistent with PTEs and architectures that +@@ -799,7 +801,7 @@ vm_fault_t vmf_insert_pfn_pmd(struct vm_ + + track_pfn_insert(vma, &pgprot, pfn); + +- insert_pfn_pmd(vma, addr, pmd, pfn, pgprot, write, pgtable); ++ insert_pfn_pmd(vma, addr, vmf->pmd, pfn, pgprot, write, pgtable); + return VM_FAULT_NOPAGE; + } + EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd); +@@ -848,10 +850,12 @@ out_unlock: + spin_unlock(ptl); + } + +-vm_fault_t vmf_insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr, +- pud_t *pud, pfn_t pfn, bool write) ++vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write) + { ++ unsigned long addr = vmf->address & PUD_MASK; ++ struct vm_area_struct *vma = vmf->vma; + pgprot_t pgprot = vma->vm_page_prot; ++ + /* + * If we had pud_special, we could avoid all these restrictions, + * but we need to be consistent with PTEs and architectures that +@@ -868,7 +872,7 @@ vm_fault_t vmf_insert_pfn_pud(struct vm_ + + track_pfn_insert(vma, &pgprot, pfn); + +- insert_pfn_pud(vma, addr, pud, pfn, pgprot, write); ++ insert_pfn_pud(vma, addr, vmf->pud, pfn, pgprot, write); + return VM_FAULT_NOPAGE; + } + EXPORT_SYMBOL_GPL(vmf_insert_pfn_pud); diff --git a/queue-4.19/mm-hugetlb.c-don-t-put_page-in-lock-of-hugetlb_lock.patch b/queue-4.19/mm-hugetlb.c-don-t-put_page-in-lock-of-hugetlb_lock.patch new file mode 100644 index 00000000000..e4c58371e45 --- /dev/null +++ b/queue-4.19/mm-hugetlb.c-don-t-put_page-in-lock-of-hugetlb_lock.patch @@ -0,0 +1,77 @@ +From 2bf753e64b4a702e27ce26ff520c59563c62f96b Mon Sep 17 00:00:00 2001 +From: Kai Shen +Date: Mon, 13 May 2019 17:15:37 -0700 +Subject: mm/hugetlb.c: don't put_page in lock of hugetlb_lock + +From: Kai Shen + +commit 2bf753e64b4a702e27ce26ff520c59563c62f96b upstream. + +spinlock recursion happened when do LTP test: +#!/bin/bash +./runltp -p -f hugetlb & +./runltp -p -f hugetlb & +./runltp -p -f hugetlb & +./runltp -p -f hugetlb & +./runltp -p -f hugetlb & + +The dtor returned by get_compound_page_dtor in __put_compound_page may be +the function of free_huge_page which will lock the hugetlb_lock, so don't +put_page in lock of hugetlb_lock. + + BUG: spinlock recursion on CPU#0, hugemmap05/1079 + lock: hugetlb_lock+0x0/0x18, .magic: dead4ead, .owner: hugemmap05/1079, .owner_cpu: 0 + Call trace: + dump_backtrace+0x0/0x198 + show_stack+0x24/0x30 + dump_stack+0xa4/0xcc + spin_dump+0x84/0xa8 + do_raw_spin_lock+0xd0/0x108 + _raw_spin_lock+0x20/0x30 + free_huge_page+0x9c/0x260 + __put_compound_page+0x44/0x50 + __put_page+0x2c/0x60 + alloc_surplus_huge_page.constprop.19+0xf0/0x140 + hugetlb_acct_memory+0x104/0x378 + hugetlb_reserve_pages+0xe0/0x250 + hugetlbfs_file_mmap+0xc0/0x140 + mmap_region+0x3e8/0x5b0 + do_mmap+0x280/0x460 + vm_mmap_pgoff+0xf4/0x128 + ksys_mmap_pgoff+0xb4/0x258 + __arm64_sys_mmap+0x34/0x48 + el0_svc_common+0x78/0x130 + el0_svc_handler+0x38/0x78 + el0_svc+0x8/0xc + +Link: http://lkml.kernel.org/r/b8ade452-2d6b-0372-32c2-703644032b47@huawei.com +Fixes: 9980d744a0 ("mm, hugetlb: get rid of surplus page accounting tricks") +Signed-off-by: Kai Shen +Signed-off-by: Feilong Lin +Reported-by: Wang Wang +Reviewed-by: Oscar Salvador +Reviewed-by: Mike Kravetz +Reviewed-by: Andrew Morton +Acked-by: Michal Hocko +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/hugetlb.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -1572,8 +1572,9 @@ static struct page *alloc_surplus_huge_p + */ + if (h->surplus_huge_pages >= h->nr_overcommit_huge_pages) { + SetPageHugeTemporary(page); ++ spin_unlock(&hugetlb_lock); + put_page(page); +- page = NULL; ++ return NULL; + } else { + h->surplus_huge_pages++; + h->surplus_huge_pages_node[page_to_nid(page)]++; diff --git a/queue-4.19/mm-mincore.c-make-mincore-more-conservative.patch b/queue-4.19/mm-mincore.c-make-mincore-more-conservative.patch new file mode 100644 index 00000000000..05841506c5c --- /dev/null +++ b/queue-4.19/mm-mincore.c-make-mincore-more-conservative.patch @@ -0,0 +1,95 @@ +From 134fca9063ad4851de767d1768180e5dede9a881 Mon Sep 17 00:00:00 2001 +From: Jiri Kosina +Date: Tue, 14 May 2019 15:41:38 -0700 +Subject: mm/mincore.c: make mincore() more conservative + +From: Jiri Kosina + +commit 134fca9063ad4851de767d1768180e5dede9a881 upstream. + +The semantics of what mincore() considers to be resident is not +completely clear, but Linux has always (since 2.3.52, which is when +mincore() was initially done) treated it as "page is available in page +cache". + +That's potentially a problem, as that [in]directly exposes +meta-information about pagecache / memory mapping state even about +memory not strictly belonging to the process executing the syscall, +opening possibilities for sidechannel attacks. + +Change the semantics of mincore() so that it only reveals pagecache +information for non-anonymous mappings that belog to files that the +calling process could (if it tried to) successfully open for writing; +otherwise we'd be including shared non-exclusive mappings, which + + - is the sidechannel + + - is not the usecase for mincore(), as that's primarily used for data, + not (shared) text + +[jkosina@suse.cz: v2] + Link: http://lkml.kernel.org/r/20190312141708.6652-2-vbabka@suse.cz +[mhocko@suse.com: restructure can_do_mincore() conditions] +Link: http://lkml.kernel.org/r/nycvar.YFH.7.76.1903062342020.19912@cbobk.fhfr.pm +Signed-off-by: Jiri Kosina +Signed-off-by: Vlastimil Babka +Acked-by: Josh Snyder +Acked-by: Michal Hocko +Originally-by: Linus Torvalds +Originally-by: Dominique Martinet +Cc: Andy Lutomirski +Cc: Dave Chinner +Cc: Kevin Easton +Cc: Matthew Wilcox +Cc: Cyril Hrubis +Cc: Tejun Heo +Cc: Kirill A. Shutemov +Cc: Daniel Gruss +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/mincore.c | 23 ++++++++++++++++++++++- + 1 file changed, 22 insertions(+), 1 deletion(-) + +--- a/mm/mincore.c ++++ b/mm/mincore.c +@@ -169,6 +169,22 @@ out: + return 0; + } + ++static inline bool can_do_mincore(struct vm_area_struct *vma) ++{ ++ if (vma_is_anonymous(vma)) ++ return true; ++ if (!vma->vm_file) ++ return false; ++ /* ++ * Reveal pagecache information only for non-anonymous mappings that ++ * correspond to the files the calling process could (if tried) open ++ * for writing; otherwise we'd be including shared non-exclusive ++ * mappings, which opens a side channel. ++ */ ++ return inode_owner_or_capable(file_inode(vma->vm_file)) || ++ inode_permission(file_inode(vma->vm_file), MAY_WRITE) == 0; ++} ++ + /* + * Do a chunk of "sys_mincore()". We've already checked + * all the arguments, we hold the mmap semaphore: we should +@@ -189,8 +205,13 @@ static long do_mincore(unsigned long add + vma = find_vma(current->mm, addr); + if (!vma || addr < vma->vm_start) + return -ENOMEM; +- mincore_walk.mm = vma->vm_mm; + end = min(vma->vm_end, addr + (pages << PAGE_SHIFT)); ++ if (!can_do_mincore(vma)) { ++ unsigned long pages = DIV_ROUND_UP(end - addr, PAGE_SIZE); ++ memset(vec, 1, pages); ++ return pages; ++ } ++ mincore_walk.mm = vma->vm_mm; + err = walk_page_range(addr, end, &mincore_walk); + if (err < 0) + return err; diff --git a/queue-4.19/mtd-spi-nor-intel-spi-avoid-crossing-4k-address-boundary-on-read-write.patch b/queue-4.19/mtd-spi-nor-intel-spi-avoid-crossing-4k-address-boundary-on-read-write.patch new file mode 100644 index 00000000000..53bfc44bd99 --- /dev/null +++ b/queue-4.19/mtd-spi-nor-intel-spi-avoid-crossing-4k-address-boundary-on-read-write.patch @@ -0,0 +1,69 @@ +From 2b75ebeea6f4937d4d05ec4982c471cef9a29b7f Mon Sep 17 00:00:00 2001 +From: Alexander Sverdlin +Date: Tue, 19 Mar 2019 17:18:07 +0000 +Subject: mtd: spi-nor: intel-spi: Avoid crossing 4K address boundary on read/write + +From: Alexander Sverdlin + +commit 2b75ebeea6f4937d4d05ec4982c471cef9a29b7f upstream. + +It was observed that reads crossing 4K address boundary are failing. + +This limitation is mentioned in Intel documents: + +Intel(R) 9 Series Chipset Family Platform Controller Hub (PCH) Datasheet: + +"5.26.3 Flash Access +Program Register Access: +* Program Register Accesses are not allowed to cross a 4 KB boundary..." + +Enhanced Serial Peripheral Interface (eSPI) +Interface Base Specification (for Client and Server Platforms): + +"5.1.4 Address +For other memory transactions, the address may start or end at any byte +boundary. However, the address and payload length combination must not +cross the naturally aligned address boundary of the corresponding Maximum +Payload Size. It must not cross a 4 KB address boundary." + +Avoid this by splitting an operation crossing the boundary into two +operations. + +Fixes: 8afda8b26d01 ("spi-nor: Add support for Intel SPI serial flash controller") +Cc: stable@vger.kernel.org +Reported-by: Romain Porte +Tested-by: Pascal Fabreges +Signed-off-by: Alexander Sverdlin +Reviewed-by: Tudor Ambarus +Acked-by: Mika Westerberg +Signed-off-by: Miquel Raynal +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/mtd/spi-nor/intel-spi.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/drivers/mtd/spi-nor/intel-spi.c ++++ b/drivers/mtd/spi-nor/intel-spi.c +@@ -632,6 +632,10 @@ static ssize_t intel_spi_read(struct spi + while (len > 0) { + block_size = min_t(size_t, len, INTEL_SPI_FIFO_SZ); + ++ /* Read cannot cross 4K boundary */ ++ block_size = min_t(loff_t, from + block_size, ++ round_up(from + 1, SZ_4K)) - from; ++ + writel(from, ispi->base + FADDR); + + val = readl(ispi->base + HSFSTS_CTL); +@@ -685,6 +689,10 @@ static ssize_t intel_spi_write(struct sp + while (len > 0) { + block_size = min_t(size_t, len, INTEL_SPI_FIFO_SZ); + ++ /* Write cannot cross 4K boundary */ ++ block_size = min_t(loff_t, to + block_size, ++ round_up(to + 1, SZ_4K)) - to; ++ + writel(to, ispi->base + FADDR); + + val = readl(ispi->base + HSFSTS_CTL); diff --git a/queue-4.19/ocfs2-fix-ocfs2-read-inode-data-panic-in-ocfs2_iget.patch b/queue-4.19/ocfs2-fix-ocfs2-read-inode-data-panic-in-ocfs2_iget.patch new file mode 100644 index 00000000000..9338af4a454 --- /dev/null +++ b/queue-4.19/ocfs2-fix-ocfs2-read-inode-data-panic-in-ocfs2_iget.patch @@ -0,0 +1,180 @@ +From e091eab028f9253eac5c04f9141bbc9d170acab3 Mon Sep 17 00:00:00 2001 +From: Shuning Zhang +Date: Mon, 13 May 2019 17:15:56 -0700 +Subject: ocfs2: fix ocfs2 read inode data panic in ocfs2_iget + +From: Shuning Zhang + +commit e091eab028f9253eac5c04f9141bbc9d170acab3 upstream. + +In some cases, ocfs2_iget() reads the data of inode, which has been +deleted for some reason. That will make the system panic. So We should +judge whether this inode has been deleted, and tell the caller that the +inode is a bad inode. + +For example, the ocfs2 is used as the backed of nfs, and the client is +nfsv3. This issue can be reproduced by the following steps. + +on the nfs server side, +..../patha/pathb + +Step 1: The process A was scheduled before calling the function fh_verify. + +Step 2: The process B is removing the 'pathb', and just completed the call +to function dput. Then the dentry of 'pathb' has been deleted from the +dcache, and all ancestors have been deleted also. The relationship of +dentry and inode was deleted through the function hlist_del_init. The +following is the call stack. +dentry_iput->hlist_del_init(&dentry->d_u.d_alias) + +At this time, the inode is still in the dcache. + +Step 3: The process A call the function ocfs2_get_dentry, which get the +inode from dcache. Then the refcount of inode is 1. The following is the +call stack. +nfsd3_proc_getacl->fh_verify->exportfs_decode_fh->fh_to_dentry(ocfs2_get_dentry) + +Step 4: Dirty pages are flushed by bdi threads. So the inode of 'patha' +is evicted, and this directory was deleted. But the inode of 'pathb' +can't be evicted, because the refcount of the inode was 1. + +Step 5: The process A keep running, and call the function +reconnect_path(in exportfs_decode_fh), which call function +ocfs2_get_parent of ocfs2. Get the block number of parent +directory(patha) by the name of ... Then read the data from disk by the +block number. But this inode has been deleted, so the system panic. + +Process A Process B +1. in nfsd3_proc_getacl | +2. | dput +3. fh_to_dentry(ocfs2_get_dentry) | +4. bdi flush dirty cache | +5. ocfs2_iget | + +[283465.542049] OCFS2: ERROR (device sdp): ocfs2_validate_inode_block: +Invalid dinode #580640: OCFS2_VALID_FL not set + +[283465.545490] Kernel panic - not syncing: OCFS2: (device sdp): panic forced +after error + +[283465.546889] CPU: 5 PID: 12416 Comm: nfsd Tainted: G W +4.1.12-124.18.6.el6uek.bug28762940v3.x86_64 #2 +[283465.548382] Hardware name: VMware, Inc. VMware Virtual Platform/440BX +Desktop Reference Platform, BIOS 6.00 09/21/2015 +[283465.549657] 0000000000000000 ffff8800a56fb7b8 ffffffff816e839c +ffffffffa0514758 +[283465.550392] 000000000008dc20 ffff8800a56fb838 ffffffff816e62d3 +0000000000000008 +[283465.551056] ffff880000000010 ffff8800a56fb848 ffff8800a56fb7e8 +ffff88005df9f000 +[283465.551710] Call Trace: +[283465.552516] [] dump_stack+0x63/0x81 +[283465.553291] [] panic+0xcb/0x21b +[283465.554037] [] ocfs2_handle_error+0xf0/0xf0 [ocfs2] +[283465.554882] [] __ocfs2_error+0x67/0x70 [ocfs2] +[283465.555768] [] ocfs2_validate_inode_block+0x229/0x230 +[ocfs2] +[283465.556683] [] ocfs2_read_blocks+0x46c/0x7b0 [ocfs2] +[283465.557408] [] ? ocfs2_inode_cache_io_unlock+0x20/0x20 +[ocfs2] +[283465.557973] [] ocfs2_read_inode_block_full+0x3b/0x60 +[ocfs2] +[283465.558525] [] ocfs2_iget+0x4aa/0x880 [ocfs2] +[283465.559082] [] ocfs2_get_parent+0x9e/0x220 [ocfs2] +[283465.559622] [] reconnect_path+0xb5/0x300 +[283465.560156] [] exportfs_decode_fh+0xf6/0x2b0 +[283465.560708] [] ? nfsd_proc_getattr+0xa0/0xa0 [nfsd] +[283465.561262] [] ? prepare_creds+0x26/0x110 +[283465.561932] [] fh_verify+0x350/0x660 [nfsd] +[283465.562862] [] ? nfsd_cache_lookup+0x44/0x630 [nfsd] +[283465.563697] [] nfsd3_proc_getattr+0x69/0xf0 [nfsd] +[283465.564510] [] nfsd_dispatch+0xe0/0x290 [nfsd] +[283465.565358] [] ? svc_tcp_adjust_wspace+0x12/0x30 +[sunrpc] +[283465.566272] [] svc_process_common+0x412/0x6a0 [sunrpc] +[283465.567155] [] svc_process+0x123/0x210 [sunrpc] +[283465.568020] [] nfsd+0xff/0x170 [nfsd] +[283465.568962] [] ? nfsd_destroy+0x80/0x80 [nfsd] +[283465.570112] [] kthread+0xcb/0xf0 +[283465.571099] [] ? kthread_create_on_node+0x180/0x180 +[283465.572114] [] ret_from_fork+0x58/0x90 +[283465.573156] [] ? kthread_create_on_node+0x180/0x180 + +Link: http://lkml.kernel.org/r/1554185919-3010-1-git-send-email-sunny.s.zhang@oracle.com +Signed-off-by: Shuning Zhang +Reviewed-by: Joseph Qi +Cc: Mark Fasheh +Cc: Joel Becker +Cc: Junxiao Bi +Cc: Changwei Ge +Cc: piaojun +Cc: "Gang He" +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ocfs2/export.c | 30 +++++++++++++++++++++++++++++- + 1 file changed, 29 insertions(+), 1 deletion(-) + +--- a/fs/ocfs2/export.c ++++ b/fs/ocfs2/export.c +@@ -148,16 +148,24 @@ static struct dentry *ocfs2_get_parent(s + u64 blkno; + struct dentry *parent; + struct inode *dir = d_inode(child); ++ int set; + + trace_ocfs2_get_parent(child, child->d_name.len, child->d_name.name, + (unsigned long long)OCFS2_I(dir)->ip_blkno); + ++ status = ocfs2_nfs_sync_lock(OCFS2_SB(dir->i_sb), 1); ++ if (status < 0) { ++ mlog(ML_ERROR, "getting nfs sync lock(EX) failed %d\n", status); ++ parent = ERR_PTR(status); ++ goto bail; ++ } ++ + status = ocfs2_inode_lock(dir, NULL, 0); + if (status < 0) { + if (status != -ENOENT) + mlog_errno(status); + parent = ERR_PTR(status); +- goto bail; ++ goto unlock_nfs_sync; + } + + status = ocfs2_lookup_ino_from_name(dir, "..", 2, &blkno); +@@ -166,11 +174,31 @@ static struct dentry *ocfs2_get_parent(s + goto bail_unlock; + } + ++ status = ocfs2_test_inode_bit(OCFS2_SB(dir->i_sb), blkno, &set); ++ if (status < 0) { ++ if (status == -EINVAL) { ++ status = -ESTALE; ++ } else ++ mlog(ML_ERROR, "test inode bit failed %d\n", status); ++ parent = ERR_PTR(status); ++ goto bail_unlock; ++ } ++ ++ trace_ocfs2_get_dentry_test_bit(status, set); ++ if (!set) { ++ status = -ESTALE; ++ parent = ERR_PTR(status); ++ goto bail_unlock; ++ } ++ + parent = d_obtain_alias(ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0, 0)); + + bail_unlock: + ocfs2_inode_unlock(dir, 0); + ++unlock_nfs_sync: ++ ocfs2_nfs_sync_unlock(OCFS2_SB(dir->i_sb), 1); ++ + bail: + trace_ocfs2_get_parent_end(parent); + diff --git a/queue-4.19/series b/queue-4.19/series index c36b5664cf0..b3cebf46470 100644 --- a/queue-4.19/series +++ b/queue-4.19/series @@ -40,3 +40,42 @@ asoc-max98090-fix-restore-of-dapm-muxes.patch asoc-rt5677-spi-disable-16bit-spi-transfers.patch asoc-fsl_esai-fix-missing-break-in-switch-statement.patch asoc-codec-hdac_hdmi-add-device_link-to-card-device.patch +bpf-arm64-remove-prefetch-insn-in-xadd-mapping.patch +crypto-ccree-remove-special-handling-of-chained-sg.patch +crypto-ccree-fix-mem-leak-on-error-path.patch +crypto-ccree-don-t-map-mac-key-on-stack.patch +crypto-ccree-use-correct-internal-state-sizes-for-export.patch +crypto-ccree-don-t-map-aead-key-and-iv-on-stack.patch +crypto-ccree-pm-resume-first-enable-the-source-clk.patch +crypto-ccree-host_power_down_en-should-be-the-last-cc-access-during-suspend.patch +crypto-ccree-add-function-to-handle-cryptocell-tee-fips-error.patch +crypto-ccree-handle-tee-fips-error-during-power-management-resume.patch +mm-mincore.c-make-mincore-more-conservative.patch +mm-huge_memory-fix-vmf_insert_pfn_-pmd-pud-crash-handle-unaligned-addresses.patch +mm-hugetlb.c-don-t-put_page-in-lock-of-hugetlb_lock.patch +hugetlb-use-same-fault-hash-key-for-shared-and-private-mappings.patch +ocfs2-fix-ocfs2-read-inode-data-panic-in-ocfs2_iget.patch +userfaultfd-use-rcu-to-free-the-task-struct-when-fork-fails.patch +acpi-pm-set-enable_for_wake-for-wakeup-gpes-during-suspend-to-idle.patch +mfd-da9063-fix-otp-control-register-names-to-match-datasheets-for-da9063-63l.patch +mfd-max77620-fix-swapped-fps_period_max_us-values.patch +mtd-spi-nor-intel-spi-avoid-crossing-4k-address-boundary-on-read-write.patch +tty-vt.c-fix-tiocl_blankscreen-console-blanking-if-blankinterval-0.patch +tty-vt-fix-write-write-race-in-ioctl-kdskbsent-handler.patch +jbd2-check-superblock-mapped-prior-to-committing.patch +ext4-make-sanity-check-in-mballoc-more-strict.patch +ext4-protect-journal-inode-s-blocks-using-block_validity.patch +ext4-ignore-e_value_offs-for-xattrs-with-value-in-ea-inode.patch +ext4-avoid-drop-reference-to-iloc.bh-twice.patch +ext4-fix-use-after-free-race-with-debug_want_extra_isize.patch +ext4-actually-request-zeroing-of-inode-table-after-grow.patch +ext4-fix-ext4_show_options-for-file-systems-w-o-journal.patch +btrfs-check-the-first-key-and-level-for-cached-extent-buffer.patch +btrfs-correctly-free-extent-buffer-in-case-btree_read_extent_buffer_pages-fails.patch +btrfs-honour-fitrim-range-constraints-during-free-space-trim.patch +btrfs-send-flush-dellaloc-in-order-to-avoid-data-loss.patch +btrfs-do-not-start-a-transaction-during-fiemap.patch +btrfs-do-not-start-a-transaction-at-iterate_extent_inodes.patch +bcache-fix-a-race-between-cache-register-and-cacheset-unregister.patch +bcache-never-set-key_ptrs-of-journal-key-to-0-in-journal_reclaim.patch +ipmi-ssif-compare-block-number-correctly-for-multi-part-return-messages.patch diff --git a/queue-4.19/tty-vt-fix-write-write-race-in-ioctl-kdskbsent-handler.patch b/queue-4.19/tty-vt-fix-write-write-race-in-ioctl-kdskbsent-handler.patch new file mode 100644 index 00000000000..2b606ad5f10 --- /dev/null +++ b/queue-4.19/tty-vt-fix-write-write-race-in-ioctl-kdskbsent-handler.patch @@ -0,0 +1,183 @@ +From 46ca3f735f345c9d87383dd3a09fa5d43870770e Mon Sep 17 00:00:00 2001 +From: Sergei Trofimovich +Date: Sun, 10 Mar 2019 21:24:15 +0000 +Subject: tty/vt: fix write/write race in ioctl(KDSKBSENT) handler + +From: Sergei Trofimovich + +commit 46ca3f735f345c9d87383dd3a09fa5d43870770e upstream. + +The bug manifests as an attempt to access deallocated memory: + + BUG: unable to handle kernel paging request at ffff9c8735448000 + #PF error: [PROT] [WRITE] + PGD 288a05067 P4D 288a05067 PUD 288a07067 PMD 7f60c2063 PTE 80000007f5448161 + Oops: 0003 [#1] PREEMPT SMP + CPU: 6 PID: 388 Comm: loadkeys Tainted: G C 5.0.0-rc6-00153-g5ded5871030e #91 + Hardware name: Gigabyte Technology Co., Ltd. To be filled by O.E.M./H77M-D3H, BIOS F12 11/14/2013 + RIP: 0010:__memmove+0x81/0x1a0 + Code: 4c 89 4f 10 4c 89 47 18 48 8d 7f 20 73 d4 48 83 c2 20 e9 a2 00 00 00 66 90 48 89 d1 4c 8b 5c 16 f8 4c 8d 54 17 f8 48 c1 e9 03 48 a5 4d 89 1a e9 0c 01 00 00 0f 1f 40 00 48 89 d1 4c 8b 1e 49 + RSP: 0018:ffffa1b9002d7d08 EFLAGS: 00010203 + RAX: ffff9c873541af43 RBX: ffff9c873541af43 RCX: 00000c6f105cd6bf + RDX: 0000637882e986b6 RSI: ffff9c8735447ffb RDI: ffff9c8735447ffb + RBP: ffff9c8739cd3800 R08: ffff9c873b802f00 R09: 00000000fffff73b + R10: ffffffffb82b35f1 R11: 00505b1b004d5b1b R12: 0000000000000000 + R13: ffff9c873541af3d R14: 000000000000000b R15: 000000000000000c + FS: 00007f450c390580(0000) GS:ffff9c873f180000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: ffff9c8735448000 CR3: 00000007e213c002 CR4: 00000000000606e0 + Call Trace: + vt_do_kdgkb_ioctl+0x34d/0x440 + vt_ioctl+0xba3/0x1190 + ? __bpf_prog_run32+0x39/0x60 + ? mem_cgroup_commit_charge+0x7b/0x4e0 + tty_ioctl+0x23f/0x920 + ? preempt_count_sub+0x98/0xe0 + ? __seccomp_filter+0x67/0x600 + do_vfs_ioctl+0xa2/0x6a0 + ? syscall_trace_enter+0x192/0x2d0 + ksys_ioctl+0x3a/0x70 + __x64_sys_ioctl+0x16/0x20 + do_syscall_64+0x54/0xe0 + entry_SYSCALL_64_after_hwframe+0x49/0xbe + +The bug manifests on systemd systems with multiple vtcon devices: + # cat /sys/devices/virtual/vtconsole/vtcon0/name + (S) dummy device + # cat /sys/devices/virtual/vtconsole/vtcon1/name + (M) frame buffer device + +There systemd runs 'loadkeys' tool in tapallel for each vtcon +instance. This causes two parallel ioctl(KDSKBSENT) calls to +race into adding the same entry into 'func_table' array at: + + drivers/tty/vt/keyboard.c:vt_do_kdgkb_ioctl() + +The function has no locking around writes to 'func_table'. + +The simplest reproducer is to have initrams with the following +init on a 8-CPU machine x86_64: + + #!/bin/sh + + loadkeys -q windowkeys ru4 & + loadkeys -q windowkeys ru4 & + loadkeys -q windowkeys ru4 & + loadkeys -q windowkeys ru4 & + + loadkeys -q windowkeys ru4 & + loadkeys -q windowkeys ru4 & + loadkeys -q windowkeys ru4 & + loadkeys -q windowkeys ru4 & + wait + +The change adds lock on write path only. Reads are still racy. + +CC: Greg Kroah-Hartman +CC: Jiri Slaby +Link: https://lkml.org/lkml/2019/2/17/256 +Signed-off-by: Sergei Trofimovich +Cc: stable +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/tty/vt/keyboard.c | 33 +++++++++++++++++++++++++++------ + 1 file changed, 27 insertions(+), 6 deletions(-) + +--- a/drivers/tty/vt/keyboard.c ++++ b/drivers/tty/vt/keyboard.c +@@ -123,6 +123,7 @@ static const int NR_TYPES = ARRAY_SIZE(m + static struct input_handler kbd_handler; + static DEFINE_SPINLOCK(kbd_event_lock); + static DEFINE_SPINLOCK(led_lock); ++static DEFINE_SPINLOCK(func_buf_lock); /* guard 'func_buf' and friends */ + static unsigned long key_down[BITS_TO_LONGS(KEY_CNT)]; /* keyboard key bitmap */ + static unsigned char shift_down[NR_SHIFT]; /* shift state counters.. */ + static bool dead_key_next; +@@ -1990,11 +1991,12 @@ int vt_do_kdgkb_ioctl(int cmd, struct kb + char *p; + u_char *q; + u_char __user *up; +- int sz; ++ int sz, fnw_sz; + int delta; + char *first_free, *fj, *fnw; + int i, j, k; + int ret; ++ unsigned long flags; + + if (!capable(CAP_SYS_TTY_CONFIG)) + perm = 0; +@@ -2037,7 +2039,14 @@ int vt_do_kdgkb_ioctl(int cmd, struct kb + goto reterr; + } + ++ fnw = NULL; ++ fnw_sz = 0; ++ /* race aginst other writers */ ++ again: ++ spin_lock_irqsave(&func_buf_lock, flags); + q = func_table[i]; ++ ++ /* fj pointer to next entry after 'q' */ + first_free = funcbufptr + (funcbufsize - funcbufleft); + for (j = i+1; j < MAX_NR_FUNC && !func_table[j]; j++) + ; +@@ -2045,10 +2054,12 @@ int vt_do_kdgkb_ioctl(int cmd, struct kb + fj = func_table[j]; + else + fj = first_free; +- ++ /* buffer usage increase by new entry */ + delta = (q ? -strlen(q) : 1) + strlen(kbs->kb_string); ++ + if (delta <= funcbufleft) { /* it fits in current buf */ + if (j < MAX_NR_FUNC) { ++ /* make enough space for new entry at 'fj' */ + memmove(fj + delta, fj, first_free - fj); + for (k = j; k < MAX_NR_FUNC; k++) + if (func_table[k]) +@@ -2061,20 +2072,28 @@ int vt_do_kdgkb_ioctl(int cmd, struct kb + sz = 256; + while (sz < funcbufsize - funcbufleft + delta) + sz <<= 1; +- fnw = kmalloc(sz, GFP_KERNEL); +- if(!fnw) { +- ret = -ENOMEM; +- goto reterr; ++ if (fnw_sz != sz) { ++ spin_unlock_irqrestore(&func_buf_lock, flags); ++ kfree(fnw); ++ fnw = kmalloc(sz, GFP_KERNEL); ++ fnw_sz = sz; ++ if (!fnw) { ++ ret = -ENOMEM; ++ goto reterr; ++ } ++ goto again; + } + + if (!q) + func_table[i] = fj; ++ /* copy data before insertion point to new location */ + if (fj > funcbufptr) + memmove(fnw, funcbufptr, fj - funcbufptr); + for (k = 0; k < j; k++) + if (func_table[k]) + func_table[k] = fnw + (func_table[k] - funcbufptr); + ++ /* copy data after insertion point to new location */ + if (first_free > fj) { + memmove(fnw + (fj - funcbufptr) + delta, fj, first_free - fj); + for (k = j; k < MAX_NR_FUNC; k++) +@@ -2087,7 +2106,9 @@ int vt_do_kdgkb_ioctl(int cmd, struct kb + funcbufleft = funcbufleft - delta + sz - funcbufsize; + funcbufsize = sz; + } ++ /* finally insert item itself */ + strcpy(func_table[i], kbs->kb_string); ++ spin_unlock_irqrestore(&func_buf_lock, flags); + break; + } + ret = 0; diff --git a/queue-4.19/tty-vt.c-fix-tiocl_blankscreen-console-blanking-if-blankinterval-0.patch b/queue-4.19/tty-vt.c-fix-tiocl_blankscreen-console-blanking-if-blankinterval-0.patch new file mode 100644 index 00000000000..76f3190480d --- /dev/null +++ b/queue-4.19/tty-vt.c-fix-tiocl_blankscreen-console-blanking-if-blankinterval-0.patch @@ -0,0 +1,69 @@ +From 75ddbc1fb11efac87b611d48e9802f6fe2bb2163 Mon Sep 17 00:00:00 2001 +From: Yifeng Li +Date: Tue, 5 Mar 2019 07:02:49 +0800 +Subject: tty: vt.c: Fix TIOCL_BLANKSCREEN console blanking if blankinterval == 0 + +From: Yifeng Li + +commit 75ddbc1fb11efac87b611d48e9802f6fe2bb2163 upstream. + +Previously, in the userspace, it was possible to use the "setterm" command +from util-linux to blank the VT console by default, using the following +command. + +According to the man page, + +> The force option keeps the screen blank even if a key is pressed. + +It was implemented by calling TIOCL_BLANKSCREEN. + + case BLANKSCREEN: + ioctlarg = TIOCL_BLANKSCREEN; + if (ioctl(STDIN_FILENO, TIOCLINUX, &ioctlarg)) + warn(_("cannot force blank")); + break; + +However, after Linux 4.12, this command ceased to work anymore, which is +unexpected. By inspecting the kernel source, it shows that the issue was +triggered by the side-effect from commit a4199f5eb809 ("tty: Disable +default console blanking interval"). + +The console blanking is implemented by function do_blank_screen() in vt.c: +"blank_state" will be initialized to "blank_normal_wait" in con_init() if +AND ONLY IF ("blankinterval" > 0). If "blankinterval" is 0, "blank_state" +will be "blank_off" (== 0), and a call to do_blank_screen() will always +abort, even if a forced blanking is required from the user by calling +TIOCL_BLANKSCREEN, the console won't be blanked. + +This behavior is unexpected from a user's point-of-view, since it's not +mentioned in any documentation. The setterm man page suggests it will +always work, and the kernel comments in uapi/linux/tiocl.h says + +> /* keep screen blank even if a key is pressed */ +> #define TIOCL_BLANKSCREEN 14 + +To fix it, we simply remove the "blank_state != blank_off" check, as +pointed out by Nicolas Pitre, this check doesn't logically make sense +and it's safe to remove. + +Suggested-by: Nicolas Pitre +Fixes: a4199f5eb809 ("tty: Disable default console blanking interval") +Signed-off-by: Yifeng Li +Cc: stable +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/tty/vt/vt.c | 2 -- + 1 file changed, 2 deletions(-) + +--- a/drivers/tty/vt/vt.c ++++ b/drivers/tty/vt/vt.c +@@ -4155,8 +4155,6 @@ void do_blank_screen(int entering_gfx) + return; + } + +- if (blank_state != blank_normal_wait) +- return; + blank_state = blank_off; + + /* don't blank graphics */ diff --git a/queue-4.19/userfaultfd-use-rcu-to-free-the-task-struct-when-fork-fails.patch b/queue-4.19/userfaultfd-use-rcu-to-free-the-task-struct-when-fork-fails.patch new file mode 100644 index 00000000000..261b035b127 --- /dev/null +++ b/queue-4.19/userfaultfd-use-rcu-to-free-the-task-struct-when-fork-fails.patch @@ -0,0 +1,135 @@ +From c3f3ce049f7d97cc7ec9c01cb51d9ec74e0f37c2 Mon Sep 17 00:00:00 2001 +From: Andrea Arcangeli +Date: Tue, 14 May 2019 15:40:46 -0700 +Subject: userfaultfd: use RCU to free the task struct when fork fails + +From: Andrea Arcangeli + +commit c3f3ce049f7d97cc7ec9c01cb51d9ec74e0f37c2 upstream. + +The task structure is freed while get_mem_cgroup_from_mm() holds +rcu_read_lock() and dereferences mm->owner. + + get_mem_cgroup_from_mm() failing fork() + ---- --- + task = mm->owner + mm->owner = NULL; + free(task) + if (task) *task; /* use after free */ + +The fix consists in freeing the task with RCU also in the fork failure +case, exactly like it always happens for the regular exit(2) path. That +is enough to make the rcu_read_lock hold in get_mem_cgroup_from_mm() +(left side above) effective to avoid a use after free when dereferencing +the task structure. + +An alternate possible fix would be to defer the delivery of the +userfaultfd contexts to the monitor until after fork() is guaranteed to +succeed. Such a change would require more changes because it would +create a strict ordering dependency where the uffd methods would need to +be called beyond the last potentially failing branch in order to be +safe. This solution as opposed only adds the dependency to common code +to set mm->owner to NULL and to free the task struct that was pointed by +mm->owner with RCU, if fork ends up failing. The userfaultfd methods +can still be called anywhere during the fork runtime and the monitor +will keep discarding orphaned "mm" coming from failed forks in userland. + +This race condition couldn't trigger if CONFIG_MEMCG was set =n at build +time. + +[aarcange@redhat.com: improve changelog, reduce #ifdefs per Michal] + Link: http://lkml.kernel.org/r/20190429035752.4508-1-aarcange@redhat.com +Link: http://lkml.kernel.org/r/20190325225636.11635-2-aarcange@redhat.com +Fixes: 893e26e61d04 ("userfaultfd: non-cooperative: Add fork() event") +Signed-off-by: Andrea Arcangeli +Tested-by: zhong jiang +Reported-by: syzbot+cbb52e396df3e565ab02@syzkaller.appspotmail.com +Cc: Oleg Nesterov +Cc: Jann Horn +Cc: Hugh Dickins +Cc: Mike Rapoport +Cc: Mike Kravetz +Cc: Peter Xu +Cc: Jason Gunthorpe +Cc: "Kirill A . Shutemov" +Cc: Michal Hocko +Cc: zhong jiang +Cc: syzbot+cbb52e396df3e565ab02@syzkaller.appspotmail.com +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/fork.c | 31 +++++++++++++++++++++++++++++-- + 1 file changed, 29 insertions(+), 2 deletions(-) + +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -907,6 +907,15 @@ static void mm_init_aio(struct mm_struct + #endif + } + ++static __always_inline void mm_clear_owner(struct mm_struct *mm, ++ struct task_struct *p) ++{ ++#ifdef CONFIG_MEMCG ++ if (mm->owner == p) ++ WRITE_ONCE(mm->owner, NULL); ++#endif ++} ++ + static void mm_init_owner(struct mm_struct *mm, struct task_struct *p) + { + #ifdef CONFIG_MEMCG +@@ -1286,6 +1295,7 @@ static struct mm_struct *dup_mm(struct t + free_pt: + /* don't put binfmt in mmput, we haven't got module yet */ + mm->binfmt = NULL; ++ mm_init_owner(mm, NULL); + mmput(mm); + + fail_nomem: +@@ -1617,6 +1627,21 @@ static inline void rcu_copy_process(stru + #endif /* #ifdef CONFIG_TASKS_RCU */ + } + ++static void __delayed_free_task(struct rcu_head *rhp) ++{ ++ struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); ++ ++ free_task(tsk); ++} ++ ++static __always_inline void delayed_free_task(struct task_struct *tsk) ++{ ++ if (IS_ENABLED(CONFIG_MEMCG)) ++ call_rcu(&tsk->rcu, __delayed_free_task); ++ else ++ free_task(tsk); ++} ++ + /* + * This creates a new process as a copy of the old one, + * but does not actually start it yet. +@@ -2072,8 +2097,10 @@ bad_fork_cleanup_io: + bad_fork_cleanup_namespaces: + exit_task_namespaces(p); + bad_fork_cleanup_mm: +- if (p->mm) ++ if (p->mm) { ++ mm_clear_owner(p->mm, p); + mmput(p->mm); ++ } + bad_fork_cleanup_signal: + if (!(clone_flags & CLONE_THREAD)) + free_signal_struct(p->signal); +@@ -2104,7 +2131,7 @@ bad_fork_cleanup_count: + bad_fork_free: + p->state = TASK_DEAD; + put_task_stack(p); +- free_task(p); ++ delayed_free_task(p); + fork_out: + spin_lock_irq(¤t->sighand->siglock); + hlist_del_init(&delayed.node);