From: Greg Kroah-Hartman Date: Mon, 28 Sep 2020 14:31:10 +0000 (+0200) Subject: 5.4-stable patches X-Git-Tag: v4.4.238~26 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=f3568e871e7a1cabc1d253a300b909cd3dc77a22;p=thirdparty%2Fkernel%2Fstable-queue.git 5.4-stable patches added patches: btrfs-fix-overflow-when-copying-corrupt-csums-for-a-message.patch dmabuf-fix-null-pointer-dereference-in-dma_buf_release.patch kprobes-fix-to-check-probe-enabled-before-disarm_kprobe_ftrace.patch kprobes-tracing-kprobes-fix-to-kill-kprobes-on-initmem-after-boot.patch mm-gup-fix-gup_fast-with-dynamic-page-table-folding.patch mm-thp-swap-fix-allocating-cluster-for-swapfile-by-mistake.patch s390-dasd-fix-zero-write-for-fba-devices.patch s390-zcrypt-fix-zcrypt_perdev_reqcnt-ioctl.patch tracing-fix-double-free.patch --- diff --git a/queue-5.4/btrfs-fix-overflow-when-copying-corrupt-csums-for-a-message.patch b/queue-5.4/btrfs-fix-overflow-when-copying-corrupt-csums-for-a-message.patch new file mode 100644 index 00000000000..45af66f601c --- /dev/null +++ b/queue-5.4/btrfs-fix-overflow-when-copying-corrupt-csums-for-a-message.patch @@ -0,0 +1,105 @@ +From 35be8851d172c6e3db836c0f28c19087b10c9e00 Mon Sep 17 00:00:00 2001 +From: Johannes Thumshirn +Date: Mon, 21 Sep 2020 16:57:14 +0900 +Subject: btrfs: fix overflow when copying corrupt csums for a message + +From: Johannes Thumshirn + +commit 35be8851d172c6e3db836c0f28c19087b10c9e00 upstream. + +Syzkaller reported a buffer overflow in btree_readpage_end_io_hook() +when loop mounting a crafted image: + + detected buffer overflow in memcpy + ------------[ cut here ]------------ + kernel BUG at lib/string.c:1129! + invalid opcode: 0000 [#1] PREEMPT SMP KASAN + CPU: 1 PID: 26 Comm: kworker/u4:2 Not tainted 5.9.0-rc4-syzkaller #0 + Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 + Workqueue: btrfs-endio-meta btrfs_work_helper + RIP: 0010:fortify_panic+0xf/0x20 lib/string.c:1129 + RSP: 0018:ffffc90000e27980 EFLAGS: 00010286 + RAX: 0000000000000022 RBX: ffff8880a80dca64 RCX: 0000000000000000 + RDX: ffff8880a90860c0 RSI: ffffffff815dba07 RDI: fffff520001c4f22 + RBP: ffff8880a80dca00 R08: 0000000000000022 R09: ffff8880ae7318e7 + R10: 0000000000000000 R11: 0000000000077578 R12: 00000000ffffff6e + R13: 0000000000000008 R14: ffffc90000e27a40 R15: 1ffff920001c4f3c + FS: 0000000000000000(0000) GS:ffff8880ae700000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 0000557335f440d0 CR3: 000000009647d000 CR4: 00000000001506e0 + DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + Call Trace: + memcpy include/linux/string.h:405 [inline] + btree_readpage_end_io_hook.cold+0x206/0x221 fs/btrfs/disk-io.c:642 + end_bio_extent_readpage+0x4de/0x10c0 fs/btrfs/extent_io.c:2854 + bio_endio+0x3cf/0x7f0 block/bio.c:1449 + end_workqueue_fn+0x114/0x170 fs/btrfs/disk-io.c:1695 + btrfs_work_helper+0x221/0xe20 fs/btrfs/async-thread.c:318 + process_one_work+0x94c/0x1670 kernel/workqueue.c:2269 + worker_thread+0x64c/0x1120 kernel/workqueue.c:2415 + kthread+0x3b5/0x4a0 kernel/kthread.c:292 + ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:294 + Modules linked in: + ---[ end trace b68924293169feef ]--- + RIP: 0010:fortify_panic+0xf/0x20 lib/string.c:1129 + RSP: 0018:ffffc90000e27980 EFLAGS: 00010286 + RAX: 0000000000000022 RBX: ffff8880a80dca64 RCX: 0000000000000000 + RDX: ffff8880a90860c0 RSI: ffffffff815dba07 RDI: fffff520001c4f22 + RBP: ffff8880a80dca00 R08: 0000000000000022 R09: ffff8880ae7318e7 + R10: 0000000000000000 R11: 0000000000077578 R12: 00000000ffffff6e + R13: 0000000000000008 R14: ffffc90000e27a40 R15: 1ffff920001c4f3c + FS: 0000000000000000(0000) GS:ffff8880ae700000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 00007f95b7c4d008 CR3: 000000009647d000 CR4: 00000000001506e0 + DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + +The overflow happens, because in btree_readpage_end_io_hook() we assume +that we have found a 4 byte checksum instead of the real possible 32 +bytes we have for the checksums. + +With the fix applied: + +[ 35.726623] BTRFS: device fsid 815caf9a-dc43-4d2a-ac54-764b8333d765 devid 1 transid 5 /dev/loop0 scanned by syz-repro (215) +[ 35.738994] BTRFS info (device loop0): disk space caching is enabled +[ 35.738998] BTRFS info (device loop0): has skinny extents +[ 35.743337] BTRFS warning (device loop0): loop0 checksum verify failed on 1052672 wanted 0xf9c035fc8d239a54 found 0x67a25c14b7eabcf9 level 0 +[ 35.743420] BTRFS error (device loop0): failed to read chunk root +[ 35.745899] BTRFS error (device loop0): open_ctree failed + +Reported-by: syzbot+e864a35d361e1d4e29a5@syzkaller.appspotmail.com +Fixes: d5178578bcd4 ("btrfs: directly call into crypto framework for checksumming") +CC: stable@vger.kernel.org # 5.4+ +Signed-off-by: Johannes Thumshirn +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/disk-io.c | 11 +++++------ + 1 file changed, 5 insertions(+), 6 deletions(-) + +--- a/fs/btrfs/disk-io.c ++++ b/fs/btrfs/disk-io.c +@@ -649,16 +649,15 @@ static int btree_readpage_end_io_hook(st + goto err; + + if (memcmp_extent_buffer(eb, result, 0, csum_size)) { +- u32 val; +- u32 found = 0; +- +- memcpy(&found, result, csum_size); ++ u8 val[BTRFS_CSUM_SIZE] = { 0 }; + + read_extent_buffer(eb, &val, 0, csum_size); + btrfs_warn_rl(fs_info, +- "%s checksum verify failed on %llu wanted %x found %x level %d", ++ "%s checksum verify failed on %llu wanted " CSUM_FMT " found " CSUM_FMT " level %d", + fs_info->sb->s_id, eb->start, +- val, found, btrfs_header_level(eb)); ++ CSUM_FMT_VALUE(csum_size, val), ++ CSUM_FMT_VALUE(csum_size, result), ++ btrfs_header_level(eb)); + ret = -EUCLEAN; + goto err; + } diff --git a/queue-5.4/dmabuf-fix-null-pointer-dereference-in-dma_buf_release.patch b/queue-5.4/dmabuf-fix-null-pointer-dereference-in-dma_buf_release.patch new file mode 100644 index 00000000000..2a073755d35 --- /dev/null +++ b/queue-5.4/dmabuf-fix-null-pointer-dereference-in-dma_buf_release.patch @@ -0,0 +1,53 @@ +From 19a508bd1ad8e444de86873bf2f2b2ab8edd6552 Mon Sep 17 00:00:00 2001 +From: Charan Teja Reddy +Date: Fri, 18 Sep 2020 16:02:31 +0530 +Subject: dmabuf: fix NULL pointer dereference in dma_buf_release() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Charan Teja Reddy + +commit 19a508bd1ad8e444de86873bf2f2b2ab8edd6552 upstream. + +NULL pointer dereference is observed while exporting the dmabuf but +failed to allocate the 'struct file' which results into the dropping of +the allocated dentry corresponding to this file in the dmabuf fs, which +is ending up in dma_buf_release() and accessing the uninitialzed +dentry->d_fsdata. + +Call stack on 5.4 is below: + dma_buf_release+0x2c/0x254 drivers/dma-buf/dma-buf.c:88 + __dentry_kill+0x294/0x31c fs/dcache.c:584 + dentry_kill fs/dcache.c:673 [inline] + dput+0x250/0x380 fs/dcache.c:859 + path_put+0x24/0x40 fs/namei.c:485 + alloc_file_pseudo+0x1a4/0x200 fs/file_table.c:235 + dma_buf_getfile drivers/dma-buf/dma-buf.c:473 [inline] + dma_buf_export+0x25c/0x3ec drivers/dma-buf/dma-buf.c:585 + +Fix this by checking for the valid pointer in the dentry->d_fsdata. + +Fixes: 4ab59c3c638c ("dma-buf: Move dma_buf_release() from fops to dentry_ops") +Cc: [5.7+] +Signed-off-by: Charan Teja Reddy +Reviewed-by: Christian König +Link: https://patchwork.freedesktop.org/patch/391319/ +Signed-off-by: Christian König +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/dma-buf/dma-buf.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/dma-buf/dma-buf.c ++++ b/drivers/dma-buf/dma-buf.c +@@ -59,6 +59,8 @@ static void dma_buf_release(struct dentr + struct dma_buf *dmabuf; + + dmabuf = dentry->d_fsdata; ++ if (unlikely(!dmabuf)) ++ return; + + BUG_ON(dmabuf->vmapping_counter); + diff --git a/queue-5.4/kprobes-fix-to-check-probe-enabled-before-disarm_kprobe_ftrace.patch b/queue-5.4/kprobes-fix-to-check-probe-enabled-before-disarm_kprobe_ftrace.patch new file mode 100644 index 00000000000..421ef46ccdb --- /dev/null +++ b/queue-5.4/kprobes-fix-to-check-probe-enabled-before-disarm_kprobe_ftrace.patch @@ -0,0 +1,99 @@ +From 3031313eb3d549b7ad6f9fbcc52ba04412e3eb9e Mon Sep 17 00:00:00 2001 +From: Masami Hiramatsu +Date: Tue, 1 Sep 2020 00:12:07 +0900 +Subject: kprobes: Fix to check probe enabled before disarm_kprobe_ftrace() + +From: Masami Hiramatsu + +commit 3031313eb3d549b7ad6f9fbcc52ba04412e3eb9e upstream. + +Commit 0cb2f1372baa ("kprobes: Fix NULL pointer dereference at +kprobe_ftrace_handler") fixed one bug but not completely fixed yet. +If we run a kprobe_module.tc of ftracetest, kernel showed a warning +as below. + +# ./ftracetest test.d/kprobe/kprobe_module.tc +=== Ftrace unit tests === +[1] Kprobe dynamic event - probing module +... +[ 22.400215] ------------[ cut here ]------------ +[ 22.400962] Failed to disarm kprobe-ftrace at trace_printk_irq_work+0x0/0x7e [trace_printk] (-2) +[ 22.402139] WARNING: CPU: 7 PID: 200 at kernel/kprobes.c:1091 __disarm_kprobe_ftrace.isra.0+0x7e/0xa0 +[ 22.403358] Modules linked in: trace_printk(-) +[ 22.404028] CPU: 7 PID: 200 Comm: rmmod Not tainted 5.9.0-rc2+ #66 +[ 22.404870] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1 04/01/2014 +[ 22.406139] RIP: 0010:__disarm_kprobe_ftrace.isra.0+0x7e/0xa0 +[ 22.406947] Code: 30 8b 03 eb c9 80 3d e5 09 1f 01 00 75 dc 49 8b 34 24 89 c2 48 c7 c7 a0 c2 05 82 89 45 e4 c6 05 cc 09 1f 01 01 e8 a9 c7 f0 ff <0f> 0b 8b 45 e4 eb b9 89 c6 48 c7 c7 70 c2 05 82 89 45 e4 e8 91 c7 +[ 22.409544] RSP: 0018:ffffc90000237df0 EFLAGS: 00010286 +[ 22.410385] RAX: 0000000000000000 RBX: ffffffff83066024 RCX: 0000000000000000 +[ 22.411434] RDX: 0000000000000001 RSI: ffffffff810de8d3 RDI: ffffffff810de8d3 +[ 22.412687] RBP: ffffc90000237e10 R08: 0000000000000001 R09: 0000000000000001 +[ 22.413762] R10: 0000000000000000 R11: 0000000000000001 R12: ffff88807c478640 +[ 22.414852] R13: ffffffff8235ebc0 R14: ffffffffa00060c0 R15: 0000000000000000 +[ 22.415941] FS: 00000000019d48c0(0000) GS:ffff88807d7c0000(0000) knlGS:0000000000000000 +[ 22.417264] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 22.418176] CR2: 00000000005bb7e3 CR3: 0000000078f7a000 CR4: 00000000000006a0 +[ 22.419309] Call Trace: +[ 22.419990] kill_kprobe+0x94/0x160 +[ 22.420652] kprobes_module_callback+0x64/0x230 +[ 22.421470] notifier_call_chain+0x4f/0x70 +[ 22.422184] blocking_notifier_call_chain+0x49/0x70 +[ 22.422979] __x64_sys_delete_module+0x1ac/0x240 +[ 22.423733] do_syscall_64+0x38/0x50 +[ 22.424366] entry_SYSCALL_64_after_hwframe+0x44/0xa9 +[ 22.425176] RIP: 0033:0x4bb81d +[ 22.425741] Code: 00 c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 e0 ff ff ff f7 d8 64 89 01 48 +[ 22.428726] RSP: 002b:00007ffc70fef008 EFLAGS: 00000246 ORIG_RAX: 00000000000000b0 +[ 22.430169] RAX: ffffffffffffffda RBX: 00000000019d48a0 RCX: 00000000004bb81d +[ 22.431375] RDX: 0000000000000000 RSI: 0000000000000880 RDI: 00007ffc70fef028 +[ 22.432543] RBP: 0000000000000880 R08: 00000000ffffffff R09: 00007ffc70fef320 +[ 22.433692] R10: 0000000000656300 R11: 0000000000000246 R12: 00007ffc70fef028 +[ 22.434635] R13: 0000000000000000 R14: 0000000000000002 R15: 0000000000000000 +[ 22.435682] irq event stamp: 1169 +[ 22.436240] hardirqs last enabled at (1179): [] console_unlock+0x422/0x580 +[ 22.437466] hardirqs last disabled at (1188): [] console_unlock+0x7b/0x580 +[ 22.438608] softirqs last enabled at (866): [] __do_softirq+0x38e/0x490 +[ 22.439637] softirqs last disabled at (859): [] asm_call_on_stack+0x12/0x20 +[ 22.440690] ---[ end trace 1e7ce7e1e4567276 ]--- +[ 22.472832] trace_kprobe: This probe might be able to register after target module is loaded. Continue. + +This is because the kill_kprobe() calls disarm_kprobe_ftrace() even +if the given probe is not enabled. In that case, ftrace_set_filter_ip() +fails because the given probe point is not registered to ftrace. + +Fix to check the given (going) probe is enabled before invoking +disarm_kprobe_ftrace(). + +Link: https://lkml.kernel.org/r/159888672694.1411785.5987998076694782591.stgit@devnote2 + +Fixes: 0cb2f1372baa ("kprobes: Fix NULL pointer dereference at kprobe_ftrace_handler") +Cc: Ingo Molnar +Cc: "Naveen N . Rao" +Cc: Anil S Keshavamurthy +Cc: David Miller +Cc: Muchun Song +Cc: Chengming Zhou +Cc: stable@vger.kernel.org +Signed-off-by: Masami Hiramatsu +Signed-off-by: Steven Rostedt (VMware) +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/kprobes.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/kernel/kprobes.c ++++ b/kernel/kprobes.c +@@ -2110,9 +2110,10 @@ static void kill_kprobe(struct kprobe *p + + /* + * The module is going away. We should disarm the kprobe which +- * is using ftrace. ++ * is using ftrace, because ftrace framework is still available at ++ * MODULE_STATE_GOING notification. + */ +- if (kprobe_ftrace(p)) ++ if (kprobe_ftrace(p) && !kprobe_disabled(p) && !kprobes_all_disarmed) + disarm_kprobe_ftrace(p); + } + diff --git a/queue-5.4/kprobes-tracing-kprobes-fix-to-kill-kprobes-on-initmem-after-boot.patch b/queue-5.4/kprobes-tracing-kprobes-fix-to-kill-kprobes-on-initmem-after-boot.patch new file mode 100644 index 00000000000..56179e04c40 --- /dev/null +++ b/queue-5.4/kprobes-tracing-kprobes-fix-to-kill-kprobes-on-initmem-after-boot.patch @@ -0,0 +1,104 @@ +From 82d083ab60c3693201c6f5c7a5f23a6ed422098d Mon Sep 17 00:00:00 2001 +From: Masami Hiramatsu +Date: Thu, 10 Sep 2020 17:55:05 +0900 +Subject: kprobes: tracing/kprobes: Fix to kill kprobes on initmem after boot + +From: Masami Hiramatsu + +commit 82d083ab60c3693201c6f5c7a5f23a6ed422098d upstream. + +Since kprobe_event= cmdline option allows user to put kprobes on the +functions in initmem, kprobe has to make such probes gone after boot. +Currently the probes on the init functions in modules will be handled +by module callback, but the kernel init text isn't handled. +Without this, kprobes may access non-exist text area to disable or +remove it. + +Link: https://lkml.kernel.org/r/159972810544.428528.1839307531600646955.stgit@devnote2 + +Fixes: 970988e19eb0 ("tracing/kprobe: Add kprobe_event= boot parameter") +Cc: Jonathan Corbet +Cc: Shuah Khan +Cc: Randy Dunlap +Cc: Ingo Molnar +Cc: stable@vger.kernel.org +Signed-off-by: Masami Hiramatsu +Signed-off-by: Steven Rostedt (VMware) +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/kprobes.h | 5 +++++ + init/main.c | 2 ++ + kernel/kprobes.c | 22 ++++++++++++++++++++++ + 3 files changed, 29 insertions(+) + +--- a/include/linux/kprobes.h ++++ b/include/linux/kprobes.h +@@ -369,6 +369,8 @@ void unregister_kretprobes(struct kretpr + void kprobe_flush_task(struct task_struct *tk); + void recycle_rp_inst(struct kretprobe_instance *ri, struct hlist_head *head); + ++void kprobe_free_init_mem(void); ++ + int disable_kprobe(struct kprobe *kp); + int enable_kprobe(struct kprobe *kp); + +@@ -426,6 +428,9 @@ static inline void unregister_kretprobes + static inline void kprobe_flush_task(struct task_struct *tk) + { + } ++static inline void kprobe_free_init_mem(void) ++{ ++} + static inline int disable_kprobe(struct kprobe *kp) + { + return -ENOSYS; +--- a/init/main.c ++++ b/init/main.c +@@ -32,6 +32,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -1111,6 +1112,7 @@ static int __ref kernel_init(void *unuse + kernel_init_freeable(); + /* need to finish all async __init code before freeing the memory */ + async_synchronize_full(); ++ kprobe_free_init_mem(); + ftrace_free_init_mem(); + free_initmem(); + mark_readonly(); +--- a/kernel/kprobes.c ++++ b/kernel/kprobes.c +@@ -2309,6 +2309,28 @@ static struct notifier_block kprobe_modu + extern unsigned long __start_kprobe_blacklist[]; + extern unsigned long __stop_kprobe_blacklist[]; + ++void kprobe_free_init_mem(void) ++{ ++ void *start = (void *)(&__init_begin); ++ void *end = (void *)(&__init_end); ++ struct hlist_head *head; ++ struct kprobe *p; ++ int i; ++ ++ mutex_lock(&kprobe_mutex); ++ ++ /* Kill all kprobes on initmem */ ++ for (i = 0; i < KPROBE_TABLE_SIZE; i++) { ++ head = &kprobe_table[i]; ++ hlist_for_each_entry(p, head, hlist) { ++ if (start <= (void *)p->addr && (void *)p->addr < end) ++ kill_kprobe(p); ++ } ++ } ++ ++ mutex_unlock(&kprobe_mutex); ++} ++ + static int __init init_kprobes(void) + { + int i, err = 0; diff --git a/queue-5.4/mm-gup-fix-gup_fast-with-dynamic-page-table-folding.patch b/queue-5.4/mm-gup-fix-gup_fast-with-dynamic-page-table-folding.patch new file mode 100644 index 00000000000..b0e8ea10379 --- /dev/null +++ b/queue-5.4/mm-gup-fix-gup_fast-with-dynamic-page-table-folding.patch @@ -0,0 +1,268 @@ +From d3f7b1bb204099f2f7306318896223e8599bb6a2 Mon Sep 17 00:00:00 2001 +From: Vasily Gorbik +Date: Fri, 25 Sep 2020 21:19:10 -0700 +Subject: mm/gup: fix gup_fast with dynamic page table folding + +From: Vasily Gorbik + +commit d3f7b1bb204099f2f7306318896223e8599bb6a2 upstream. + +Currently to make sure that every page table entry is read just once +gup_fast walks perform READ_ONCE and pass pXd value down to the next +gup_pXd_range function by value e.g.: + + static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end, + unsigned int flags, struct page **pages, int *nr) + ... + pudp = pud_offset(&p4d, addr); + +This function passes a reference on that local value copy to pXd_offset, +and might get the very same pointer in return. This happens when the +level is folded (on most arches), and that pointer should not be +iterated. + +On s390 due to the fact that each task might have different 5,4 or +3-level address translation and hence different levels folded the logic +is more complex and non-iteratable pointer to a local copy leads to +severe problems. + +Here is an example of what happens with gup_fast on s390, for a task +with 3-level paging, crossing a 2 GB pud boundary: + + // addr = 0x1007ffff000, end = 0x10080001000 + static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end, + unsigned int flags, struct page **pages, int *nr) + { + unsigned long next; + pud_t *pudp; + + // pud_offset returns &p4d itself (a pointer to a value on stack) + pudp = pud_offset(&p4d, addr); + do { + // on second iteratation reading "random" stack value + pud_t pud = READ_ONCE(*pudp); + + // next = 0x10080000000, due to PUD_SIZE/MASK != PGDIR_SIZE/MASK on s390 + next = pud_addr_end(addr, end); + ... + } while (pudp++, addr = next, addr != end); // pudp++ iterating over stack + + return 1; + } + +This happens since s390 moved to common gup code with commit +d1874a0c2805 ("s390/mm: make the pxd_offset functions more robust") and +commit 1a42010cdc26 ("s390/mm: convert to the generic +get_user_pages_fast code"). + +s390 tried to mimic static level folding by changing pXd_offset +primitives to always calculate top level page table offset in pgd_offset +and just return the value passed when pXd_offset has to act as folded. + +What is crucial for gup_fast and what has been overlooked is that +PxD_SIZE/MASK and thus pXd_addr_end should also change correspondingly. +And the latter is not possible with dynamic folding. + +To fix the issue in addition to pXd values pass original pXdp pointers +down to gup_pXd_range functions. And introduce pXd_offset_lockless +helpers, which take an additional pXd entry value parameter. This has +already been discussed in + + https://lkml.kernel.org/r/20190418100218.0a4afd51@mschwideX1 + +Fixes: 1a42010cdc26 ("s390/mm: convert to the generic get_user_pages_fast code") +Signed-off-by: Vasily Gorbik +Signed-off-by: Andrew Morton +Reviewed-by: Gerald Schaefer +Reviewed-by: Alexander Gordeev +Reviewed-by: Jason Gunthorpe +Reviewed-by: Mike Rapoport +Reviewed-by: John Hubbard +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Dave Hansen +Cc: Russell King +Cc: Catalin Marinas +Cc: Will Deacon +Cc: Michael Ellerman +Cc: Benjamin Herrenschmidt +Cc: Paul Mackerras +Cc: Jeff Dike +Cc: Richard Weinberger +Cc: Dave Hansen +Cc: Andy Lutomirski +Cc: Thomas Gleixner +Cc: Ingo Molnar +Cc: Borislav Petkov +Cc: Arnd Bergmann +Cc: Andrey Ryabinin +Cc: Heiko Carstens +Cc: Christian Borntraeger +Cc: Claudio Imbrenda +Cc: [5.2+] +Link: https://lkml.kernel.org/r/patch.git-943f1e5dcff2.your-ad-here.call-01599856292-ext-8676@work.hours +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + arch/s390/include/asm/pgtable.h | 44 +++++++++++++++++++++++++++++----------- + include/asm-generic/pgtable.h | 10 +++++++++ + mm/gup.c | 18 ++++++++-------- + 3 files changed, 51 insertions(+), 21 deletions(-) + +--- a/arch/s390/include/asm/pgtable.h ++++ b/arch/s390/include/asm/pgtable.h +@@ -1247,26 +1247,46 @@ static inline pgd_t *pgd_offset_raw(pgd_ + #define pgd_offset(mm, address) pgd_offset_raw(READ_ONCE((mm)->pgd), address) + #define pgd_offset_k(address) pgd_offset(&init_mm, address) + +-static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) ++static inline p4d_t *p4d_offset_lockless(pgd_t *pgdp, pgd_t pgd, unsigned long address) + { +- if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R1) +- return (p4d_t *) pgd_deref(*pgd) + p4d_index(address); +- return (p4d_t *) pgd; ++ if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R1) ++ return (p4d_t *) pgd_deref(pgd) + p4d_index(address); ++ return (p4d_t *) pgdp; + } ++#define p4d_offset_lockless p4d_offset_lockless + +-static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) ++static inline p4d_t *p4d_offset(pgd_t *pgdp, unsigned long address) + { +- if ((p4d_val(*p4d) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R2) +- return (pud_t *) p4d_deref(*p4d) + pud_index(address); +- return (pud_t *) p4d; ++ return p4d_offset_lockless(pgdp, *pgdp, address); + } + +-static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) ++static inline pud_t *pud_offset_lockless(p4d_t *p4dp, p4d_t p4d, unsigned long address) + { +- if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R3) +- return (pmd_t *) pud_deref(*pud) + pmd_index(address); +- return (pmd_t *) pud; ++ if ((p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R2) ++ return (pud_t *) p4d_deref(p4d) + pud_index(address); ++ return (pud_t *) p4dp; + } ++#define pud_offset_lockless pud_offset_lockless ++ ++static inline pud_t *pud_offset(p4d_t *p4dp, unsigned long address) ++{ ++ return pud_offset_lockless(p4dp, *p4dp, address); ++} ++#define pud_offset pud_offset ++ ++static inline pmd_t *pmd_offset_lockless(pud_t *pudp, pud_t pud, unsigned long address) ++{ ++ if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R3) ++ return (pmd_t *) pud_deref(pud) + pmd_index(address); ++ return (pmd_t *) pudp; ++} ++#define pmd_offset_lockless pmd_offset_lockless ++ ++static inline pmd_t *pmd_offset(pud_t *pudp, unsigned long address) ++{ ++ return pmd_offset_lockless(pudp, *pudp, address); ++} ++#define pmd_offset pmd_offset + + static inline pte_t *pte_offset(pmd_t *pmd, unsigned long address) + { +--- a/include/asm-generic/pgtable.h ++++ b/include/asm-generic/pgtable.h +@@ -1171,6 +1171,16 @@ static inline bool arch_has_pfn_modify_c + #endif + #endif + ++#ifndef p4d_offset_lockless ++#define p4d_offset_lockless(pgdp, pgd, address) p4d_offset(&(pgd), address) ++#endif ++#ifndef pud_offset_lockless ++#define pud_offset_lockless(p4dp, p4d, address) pud_offset(&(p4d), address) ++#endif ++#ifndef pmd_offset_lockless ++#define pmd_offset_lockless(pudp, pud, address) pmd_offset(&(pud), address) ++#endif ++ + /* + * On some architectures it depends on the mm if the p4d/pud or pmd + * layer of the page table hierarchy is folded or not. +--- a/mm/gup.c ++++ b/mm/gup.c +@@ -2184,13 +2184,13 @@ static int gup_huge_pgd(pgd_t orig, pgd_ + return 1; + } + +-static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, ++static int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, unsigned long end, + unsigned int flags, struct page **pages, int *nr) + { + unsigned long next; + pmd_t *pmdp; + +- pmdp = pmd_offset(&pud, addr); ++ pmdp = pmd_offset_lockless(pudp, pud, addr); + do { + pmd_t pmd = READ_ONCE(*pmdp); + +@@ -2227,13 +2227,13 @@ static int gup_pmd_range(pud_t pud, unsi + return 1; + } + +-static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end, ++static int gup_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr, unsigned long end, + unsigned int flags, struct page **pages, int *nr) + { + unsigned long next; + pud_t *pudp; + +- pudp = pud_offset(&p4d, addr); ++ pudp = pud_offset_lockless(p4dp, p4d, addr); + do { + pud_t pud = READ_ONCE(*pudp); + +@@ -2248,20 +2248,20 @@ static int gup_pud_range(p4d_t p4d, unsi + if (!gup_huge_pd(__hugepd(pud_val(pud)), addr, + PUD_SHIFT, next, flags, pages, nr)) + return 0; +- } else if (!gup_pmd_range(pud, addr, next, flags, pages, nr)) ++ } else if (!gup_pmd_range(pudp, pud, addr, next, flags, pages, nr)) + return 0; + } while (pudp++, addr = next, addr != end); + + return 1; + } + +-static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end, ++static int gup_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, unsigned long end, + unsigned int flags, struct page **pages, int *nr) + { + unsigned long next; + p4d_t *p4dp; + +- p4dp = p4d_offset(&pgd, addr); ++ p4dp = p4d_offset_lockless(pgdp, pgd, addr); + do { + p4d_t p4d = READ_ONCE(*p4dp); + +@@ -2273,7 +2273,7 @@ static int gup_p4d_range(pgd_t pgd, unsi + if (!gup_huge_pd(__hugepd(p4d_val(p4d)), addr, + P4D_SHIFT, next, flags, pages, nr)) + return 0; +- } else if (!gup_pud_range(p4d, addr, next, flags, pages, nr)) ++ } else if (!gup_pud_range(p4dp, p4d, addr, next, flags, pages, nr)) + return 0; + } while (p4dp++, addr = next, addr != end); + +@@ -2301,7 +2301,7 @@ static void gup_pgd_range(unsigned long + if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr, + PGDIR_SHIFT, next, flags, pages, nr)) + return; +- } else if (!gup_p4d_range(pgd, addr, next, flags, pages, nr)) ++ } else if (!gup_p4d_range(pgdp, pgd, addr, next, flags, pages, nr)) + return; + } while (pgdp++, addr = next, addr != end); + } diff --git a/queue-5.4/mm-thp-swap-fix-allocating-cluster-for-swapfile-by-mistake.patch b/queue-5.4/mm-thp-swap-fix-allocating-cluster-for-swapfile-by-mistake.patch new file mode 100644 index 00000000000..c8b1de9f38d --- /dev/null +++ b/queue-5.4/mm-thp-swap-fix-allocating-cluster-for-swapfile-by-mistake.patch @@ -0,0 +1,85 @@ +From 41663430588c737dd735bad5a0d1ba325dcabd59 Mon Sep 17 00:00:00 2001 +From: Gao Xiang +Date: Fri, 25 Sep 2020 21:19:01 -0700 +Subject: mm, THP, swap: fix allocating cluster for swapfile by mistake + +From: Gao Xiang + +commit 41663430588c737dd735bad5a0d1ba325dcabd59 upstream. + +SWP_FS is used to make swap_{read,write}page() go through the +filesystem, and it's only used for swap files over NFS. So, !SWP_FS +means non NFS for now, it could be either file backed or device backed. +Something similar goes with legacy SWP_FILE. + +So in order to achieve the goal of the original patch, SWP_BLKDEV should +be used instead. + +FS corruption can be observed with SSD device + XFS + fragmented +swapfile due to CONFIG_THP_SWAP=y. + +I reproduced the issue with the following details: + +Environment: + + QEMU + upstream kernel + buildroot + NVMe (2 GB) + +Kernel config: + + CONFIG_BLK_DEV_NVME=y + CONFIG_THP_SWAP=y + +Some reproducible steps: + + mkfs.xfs -f /dev/nvme0n1 + mkdir /tmp/mnt + mount /dev/nvme0n1 /tmp/mnt + bs="32k" + sz="1024m" # doesn't matter too much, I also tried 16m + xfs_io -f -c "pwrite -R -b $bs 0 $sz" -c "fdatasync" /tmp/mnt/sw + xfs_io -f -c "pwrite -R -b $bs 0 $sz" -c "fdatasync" /tmp/mnt/sw + xfs_io -f -c "pwrite -R -b $bs 0 $sz" -c "fdatasync" /tmp/mnt/sw + xfs_io -f -c "pwrite -F -S 0 -b $bs 0 $sz" -c "fdatasync" /tmp/mnt/sw + xfs_io -f -c "pwrite -R -b $bs 0 $sz" -c "fsync" /tmp/mnt/sw + + mkswap /tmp/mnt/sw + swapon /tmp/mnt/sw + + stress --vm 2 --vm-bytes 600M # doesn't matter too much as well + +Symptoms: + - FS corruption (e.g. checksum failure) + - memory corruption at: 0xd2808010 + - segfault + +Fixes: f0eea189e8e9 ("mm, THP, swap: Don't allocate huge cluster for file backed swap device") +Fixes: 38d8b4e6bdc8 ("mm, THP, swap: delay splitting THP during swap out") +Signed-off-by: Gao Xiang +Signed-off-by: Andrew Morton +Reviewed-by: "Huang, Ying" +Reviewed-by: Yang Shi +Acked-by: Rafael Aquini +Cc: Matthew Wilcox +Cc: Carlos Maiolino +Cc: Eric Sandeen +Cc: Dave Chinner +Cc: +Link: https://lkml.kernel.org/r/20200820045323.7809-1-hsiangkao@redhat.com +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/swapfile.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/mm/swapfile.c ++++ b/mm/swapfile.c +@@ -1038,7 +1038,7 @@ start_over: + goto nextsi; + } + if (size == SWAPFILE_CLUSTER) { +- if (!(si->flags & SWP_FS)) ++ if (si->flags & SWP_BLKDEV) + n_ret = swap_alloc_cluster(si, swp_entries); + } else + n_ret = scan_swap_map_slots(si, SWAP_HAS_CACHE, diff --git a/queue-5.4/s390-dasd-fix-zero-write-for-fba-devices.patch b/queue-5.4/s390-dasd-fix-zero-write-for-fba-devices.patch new file mode 100644 index 00000000000..7037d4f5114 --- /dev/null +++ b/queue-5.4/s390-dasd-fix-zero-write-for-fba-devices.patch @@ -0,0 +1,69 @@ +From 709192d531e5b0a91f20aa14abfe2fc27ddd47af Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Jan=20H=C3=B6ppner?= +Date: Mon, 14 Sep 2020 13:56:47 +0200 +Subject: s390/dasd: Fix zero write for FBA devices +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Jan Höppner + +commit 709192d531e5b0a91f20aa14abfe2fc27ddd47af upstream. + +A discard request that writes zeros using the global kernel internal +ZERO_PAGE will fail for machines with more than 2GB of memory due to the +location of the ZERO_PAGE. + +Fix this by using a driver owned global zero page allocated with GFP_DMA +flag set. + +Fixes: 28b841b3a7cb ("s390/dasd: Add discard support for FBA devices") +Signed-off-by: Jan Höppner +Reviewed-by: Stefan Haberland +Cc: # 4.14+ +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/s390/block/dasd_fba.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +--- a/drivers/s390/block/dasd_fba.c ++++ b/drivers/s390/block/dasd_fba.c +@@ -40,6 +40,7 @@ + MODULE_LICENSE("GPL"); + + static struct dasd_discipline dasd_fba_discipline; ++static void *dasd_fba_zero_page; + + struct dasd_fba_private { + struct dasd_fba_characteristics rdc_data; +@@ -270,7 +271,7 @@ static void ccw_write_zero(struct ccw1 * + ccw->cmd_code = DASD_FBA_CCW_WRITE; + ccw->flags |= CCW_FLAG_SLI; + ccw->count = count; +- ccw->cda = (__u32) (addr_t) page_to_phys(ZERO_PAGE(0)); ++ ccw->cda = (__u32) (addr_t) dasd_fba_zero_page; + } + + /* +@@ -830,6 +831,11 @@ dasd_fba_init(void) + int ret; + + ASCEBC(dasd_fba_discipline.ebcname, 4); ++ ++ dasd_fba_zero_page = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA); ++ if (!dasd_fba_zero_page) ++ return -ENOMEM; ++ + ret = ccw_driver_register(&dasd_fba_driver); + if (!ret) + wait_for_device_probe(); +@@ -841,6 +847,7 @@ static void __exit + dasd_fba_cleanup(void) + { + ccw_driver_unregister(&dasd_fba_driver); ++ free_page((unsigned long)dasd_fba_zero_page); + } + + module_init(dasd_fba_init); diff --git a/queue-5.4/s390-zcrypt-fix-zcrypt_perdev_reqcnt-ioctl.patch b/queue-5.4/s390-zcrypt-fix-zcrypt_perdev_reqcnt-ioctl.patch new file mode 100644 index 00000000000..9397b9aa28a --- /dev/null +++ b/queue-5.4/s390-zcrypt-fix-zcrypt_perdev_reqcnt-ioctl.patch @@ -0,0 +1,37 @@ +From f7e80983f0cf470bb82036e73bff4d5a7daf8fc2 Mon Sep 17 00:00:00 2001 +From: Christian Borntraeger +Date: Mon, 21 Sep 2020 12:48:36 +0200 +Subject: s390/zcrypt: Fix ZCRYPT_PERDEV_REQCNT ioctl + +From: Christian Borntraeger + +commit f7e80983f0cf470bb82036e73bff4d5a7daf8fc2 upstream. + +reqcnt is an u32 pointer but we do copy sizeof(reqcnt) which is the +size of the pointer. This means we only copy 8 byte. Let us copy +the full monty. + +Signed-off-by: Christian Borntraeger +Cc: Harald Freudenberger +Cc: stable@vger.kernel.org +Fixes: af4a72276d49 ("s390/zcrypt: Support up to 256 crypto adapters.") +Reviewed-by: Harald Freudenberger +Signed-off-by: Vasily Gorbik +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/s390/crypto/zcrypt_api.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/s390/crypto/zcrypt_api.c ++++ b/drivers/s390/crypto/zcrypt_api.c +@@ -1419,7 +1419,8 @@ static long zcrypt_unlocked_ioctl(struct + if (!reqcnt) + return -ENOMEM; + zcrypt_perdev_reqcnt(reqcnt, AP_DEVICES); +- if (copy_to_user((int __user *) arg, reqcnt, sizeof(reqcnt))) ++ if (copy_to_user((int __user *) arg, reqcnt, ++ sizeof(u32) * AP_DEVICES)) + rc = -EFAULT; + kfree(reqcnt); + return rc; diff --git a/queue-5.4/series b/queue-5.4/series index cb3c0f44fbb..874e3a29433 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -374,3 +374,12 @@ alsa-usb-audio-add-delay-quirk-for-h570e-usb-headsets.patch alsa-hda-realtek-couldn-t-detect-mic-if-booting-with-headset-plugged.patch alsa-hda-realtek-enable-front-panel-headset-led-on-lenovo-thinkstation-p520.patch lib-string.c-implement-stpcpy.patch +tracing-fix-double-free.patch +s390-dasd-fix-zero-write-for-fba-devices.patch +kprobes-fix-to-check-probe-enabled-before-disarm_kprobe_ftrace.patch +kprobes-tracing-kprobes-fix-to-kill-kprobes-on-initmem-after-boot.patch +btrfs-fix-overflow-when-copying-corrupt-csums-for-a-message.patch +dmabuf-fix-null-pointer-dereference-in-dma_buf_release.patch +mm-thp-swap-fix-allocating-cluster-for-swapfile-by-mistake.patch +mm-gup-fix-gup_fast-with-dynamic-page-table-folding.patch +s390-zcrypt-fix-zcrypt_perdev_reqcnt-ioctl.patch diff --git a/queue-5.4/tracing-fix-double-free.patch b/queue-5.4/tracing-fix-double-free.patch new file mode 100644 index 00000000000..b9a12276dc1 --- /dev/null +++ b/queue-5.4/tracing-fix-double-free.patch @@ -0,0 +1,46 @@ +From 46bbe5c671e06f070428b9be142cc4ee5cedebac Mon Sep 17 00:00:00 2001 +From: Tom Rix +Date: Mon, 7 Sep 2020 06:58:45 -0700 +Subject: tracing: fix double free + +From: Tom Rix + +commit 46bbe5c671e06f070428b9be142cc4ee5cedebac upstream. + +clang static analyzer reports this problem + +trace_events_hist.c:3824:3: warning: Attempt to free + released memory + kfree(hist_data->attrs->var_defs.name[i]); + +In parse_var_defs() if there is a problem allocating +var_defs.expr, the earlier var_defs.name is freed. +This free is duplicated by free_var_defs() which frees +the rest of the list. + +Because free_var_defs() has to run anyway, remove the +second free fom parse_var_defs(). + +Link: https://lkml.kernel.org/r/20200907135845.15804-1-trix@redhat.com + +Cc: stable@vger.kernel.org +Fixes: 30350d65ac56 ("tracing: Add variable support to hist triggers") +Reviewed-by: Tom Zanussi +Signed-off-by: Tom Rix +Signed-off-by: Steven Rostedt (VMware) +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/trace/trace_events_hist.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -4770,7 +4770,6 @@ static int parse_var_defs(struct hist_tr + + s = kstrdup(field_str, GFP_KERNEL); + if (!s) { +- kfree(hist_data->attrs->var_defs.name[n_vars]); + ret = -ENOMEM; + goto free; + }