]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.8-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 28 Sep 2020 14:31:32 +0000 (16:31 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 28 Sep 2020 14:31:32 +0000 (16:31 +0200)
added patches:
btrfs-fix-overflow-when-copying-corrupt-csums-for-a-message.patch
btrfs-fix-put-of-uninitialized-kobject-after-seed-device-delete.patch
dmabuf-fix-null-pointer-dereference-in-dma_buf_release.patch
kprobes-fix-to-check-probe-enabled-before-disarm_kprobe_ftrace.patch
kprobes-tracing-kprobes-fix-to-kill-kprobes-on-initmem-after-boot.patch
lib-bootconfig-fix-a-bug-of-breaking-existing-tree-nodes.patch
lib-bootconfig-fix-to-remove-tailing-spaces-after-value.patch
media-cec-adap.c-don-t-use-flush_scheduled_work.patch
mips-loongson2ef-disable-loongson-mmi-instructions.patch
mm-don-t-rely-on-system-state-to-detect-hot-plug-operations.patch
mm-gup-fix-gup_fast-with-dynamic-page-table-folding.patch
mm-replace-memmap_context-by-meminit_context.patch
mm-thp-swap-fix-allocating-cluster-for-swapfile-by-mistake.patch
mt76-mt7615-use-v1-mcu-api-on-mt7615-to-fix-issues-with-adding-removing-stations.patch
s390-dasd-fix-zero-write-for-fba-devices.patch
s390-zcrypt-fix-zcrypt_perdev_reqcnt-ioctl.patch
tracing-fix-double-free.patch

18 files changed:
queue-5.8/btrfs-fix-overflow-when-copying-corrupt-csums-for-a-message.patch [new file with mode: 0644]
queue-5.8/btrfs-fix-put-of-uninitialized-kobject-after-seed-device-delete.patch [new file with mode: 0644]
queue-5.8/dmabuf-fix-null-pointer-dereference-in-dma_buf_release.patch [new file with mode: 0644]
queue-5.8/kprobes-fix-to-check-probe-enabled-before-disarm_kprobe_ftrace.patch [new file with mode: 0644]
queue-5.8/kprobes-tracing-kprobes-fix-to-kill-kprobes-on-initmem-after-boot.patch [new file with mode: 0644]
queue-5.8/lib-bootconfig-fix-a-bug-of-breaking-existing-tree-nodes.patch [new file with mode: 0644]
queue-5.8/lib-bootconfig-fix-to-remove-tailing-spaces-after-value.patch [new file with mode: 0644]
queue-5.8/media-cec-adap.c-don-t-use-flush_scheduled_work.patch [new file with mode: 0644]
queue-5.8/mips-loongson2ef-disable-loongson-mmi-instructions.patch [new file with mode: 0644]
queue-5.8/mm-don-t-rely-on-system-state-to-detect-hot-plug-operations.patch [new file with mode: 0644]
queue-5.8/mm-gup-fix-gup_fast-with-dynamic-page-table-folding.patch [new file with mode: 0644]
queue-5.8/mm-replace-memmap_context-by-meminit_context.patch [new file with mode: 0644]
queue-5.8/mm-thp-swap-fix-allocating-cluster-for-swapfile-by-mistake.patch [new file with mode: 0644]
queue-5.8/mt76-mt7615-use-v1-mcu-api-on-mt7615-to-fix-issues-with-adding-removing-stations.patch [new file with mode: 0644]
queue-5.8/s390-dasd-fix-zero-write-for-fba-devices.patch [new file with mode: 0644]
queue-5.8/s390-zcrypt-fix-zcrypt_perdev_reqcnt-ioctl.patch [new file with mode: 0644]
queue-5.8/series
queue-5.8/tracing-fix-double-free.patch [new file with mode: 0644]

diff --git a/queue-5.8/btrfs-fix-overflow-when-copying-corrupt-csums-for-a-message.patch b/queue-5.8/btrfs-fix-overflow-when-copying-corrupt-csums-for-a-message.patch
new file mode 100644 (file)
index 0000000..388fd73
--- /dev/null
@@ -0,0 +1,105 @@
+From 35be8851d172c6e3db836c0f28c19087b10c9e00 Mon Sep 17 00:00:00 2001
+From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Date: Mon, 21 Sep 2020 16:57:14 +0900
+Subject: btrfs: fix overflow when copying corrupt csums for a message
+
+From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+
+commit 35be8851d172c6e3db836c0f28c19087b10c9e00 upstream.
+
+Syzkaller reported a buffer overflow in btree_readpage_end_io_hook()
+when loop mounting a crafted image:
+
+  detected buffer overflow in memcpy
+  ------------[ cut here ]------------
+  kernel BUG at lib/string.c:1129!
+  invalid opcode: 0000 [#1] PREEMPT SMP KASAN
+  CPU: 1 PID: 26 Comm: kworker/u4:2 Not tainted 5.9.0-rc4-syzkaller #0
+  Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+  Workqueue: btrfs-endio-meta btrfs_work_helper
+  RIP: 0010:fortify_panic+0xf/0x20 lib/string.c:1129
+  RSP: 0018:ffffc90000e27980 EFLAGS: 00010286
+  RAX: 0000000000000022 RBX: ffff8880a80dca64 RCX: 0000000000000000
+  RDX: ffff8880a90860c0 RSI: ffffffff815dba07 RDI: fffff520001c4f22
+  RBP: ffff8880a80dca00 R08: 0000000000000022 R09: ffff8880ae7318e7
+  R10: 0000000000000000 R11: 0000000000077578 R12: 00000000ffffff6e
+  R13: 0000000000000008 R14: ffffc90000e27a40 R15: 1ffff920001c4f3c
+  FS:  0000000000000000(0000) GS:ffff8880ae700000(0000) knlGS:0000000000000000
+  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+  CR2: 0000557335f440d0 CR3: 000000009647d000 CR4: 00000000001506e0
+  DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+  DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+  Call Trace:
+   memcpy include/linux/string.h:405 [inline]
+   btree_readpage_end_io_hook.cold+0x206/0x221 fs/btrfs/disk-io.c:642
+   end_bio_extent_readpage+0x4de/0x10c0 fs/btrfs/extent_io.c:2854
+   bio_endio+0x3cf/0x7f0 block/bio.c:1449
+   end_workqueue_fn+0x114/0x170 fs/btrfs/disk-io.c:1695
+   btrfs_work_helper+0x221/0xe20 fs/btrfs/async-thread.c:318
+   process_one_work+0x94c/0x1670 kernel/workqueue.c:2269
+   worker_thread+0x64c/0x1120 kernel/workqueue.c:2415
+   kthread+0x3b5/0x4a0 kernel/kthread.c:292
+   ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:294
+  Modules linked in:
+  ---[ end trace b68924293169feef ]---
+  RIP: 0010:fortify_panic+0xf/0x20 lib/string.c:1129
+  RSP: 0018:ffffc90000e27980 EFLAGS: 00010286
+  RAX: 0000000000000022 RBX: ffff8880a80dca64 RCX: 0000000000000000
+  RDX: ffff8880a90860c0 RSI: ffffffff815dba07 RDI: fffff520001c4f22
+  RBP: ffff8880a80dca00 R08: 0000000000000022 R09: ffff8880ae7318e7
+  R10: 0000000000000000 R11: 0000000000077578 R12: 00000000ffffff6e
+  R13: 0000000000000008 R14: ffffc90000e27a40 R15: 1ffff920001c4f3c
+  FS:  0000000000000000(0000) GS:ffff8880ae700000(0000) knlGS:0000000000000000
+  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+  CR2: 00007f95b7c4d008 CR3: 000000009647d000 CR4: 00000000001506e0
+  DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+  DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+
+The overflow happens, because in btree_readpage_end_io_hook() we assume
+that we have found a 4 byte checksum instead of the real possible 32
+bytes we have for the checksums.
+
+With the fix applied:
+
+[   35.726623] BTRFS: device fsid 815caf9a-dc43-4d2a-ac54-764b8333d765 devid 1 transid 5 /dev/loop0 scanned by syz-repro (215)
+[   35.738994] BTRFS info (device loop0): disk space caching is enabled
+[   35.738998] BTRFS info (device loop0): has skinny extents
+[   35.743337] BTRFS warning (device loop0): loop0 checksum verify failed on 1052672 wanted 0xf9c035fc8d239a54 found 0x67a25c14b7eabcf9 level 0
+[   35.743420] BTRFS error (device loop0): failed to read chunk root
+[   35.745899] BTRFS error (device loop0): open_ctree failed
+
+Reported-by: syzbot+e864a35d361e1d4e29a5@syzkaller.appspotmail.com
+Fixes: d5178578bcd4 ("btrfs: directly call into crypto framework for checksumming")
+CC: stable@vger.kernel.org # 5.4+
+Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/disk-io.c |   11 +++++------
+ 1 file changed, 5 insertions(+), 6 deletions(-)
+
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -636,16 +636,15 @@ static int btree_readpage_end_io_hook(st
+       csum_tree_block(eb, result);
+       if (memcmp_extent_buffer(eb, result, 0, csum_size)) {
+-              u32 val;
+-              u32 found = 0;
+-
+-              memcpy(&found, result, csum_size);
++              u8 val[BTRFS_CSUM_SIZE] = { 0 };
+               read_extent_buffer(eb, &val, 0, csum_size);
+               btrfs_warn_rl(fs_info,
+-              "%s checksum verify failed on %llu wanted %x found %x level %d",
++      "%s checksum verify failed on %llu wanted " CSUM_FMT " found " CSUM_FMT " level %d",
+                             fs_info->sb->s_id, eb->start,
+-                            val, found, btrfs_header_level(eb));
++                            CSUM_FMT_VALUE(csum_size, val),
++                            CSUM_FMT_VALUE(csum_size, result),
++                            btrfs_header_level(eb));
+               ret = -EUCLEAN;
+               goto err;
+       }
diff --git a/queue-5.8/btrfs-fix-put-of-uninitialized-kobject-after-seed-device-delete.patch b/queue-5.8/btrfs-fix-put-of-uninitialized-kobject-after-seed-device-delete.patch
new file mode 100644 (file)
index 0000000..9f7cbd9
--- /dev/null
@@ -0,0 +1,82 @@
+From b5ddcffa37778244d5e786fe32f778edf2bfc93e Mon Sep 17 00:00:00 2001
+From: Anand Jain <anand.jain@oracle.com>
+Date: Sat, 5 Sep 2020 01:34:21 +0800
+Subject: btrfs: fix put of uninitialized kobject after seed device delete
+
+From: Anand Jain <anand.jain@oracle.com>
+
+commit b5ddcffa37778244d5e786fe32f778edf2bfc93e upstream.
+
+The following test case leads to NULL kobject free error:
+
+  mount seed /mnt
+  add sprout to /mnt
+  umount /mnt
+  mount sprout to /mnt
+  delete seed
+
+  kobject: '(null)' (00000000dd2b87e4): is not initialized, yet kobject_put() is being called.
+  WARNING: CPU: 1 PID: 15784 at lib/kobject.c:736 kobject_put+0x80/0x350
+  RIP: 0010:kobject_put+0x80/0x350
+  ::
+  Call Trace:
+  btrfs_sysfs_remove_devices_dir+0x6e/0x160 [btrfs]
+  btrfs_rm_device.cold+0xa8/0x298 [btrfs]
+  btrfs_ioctl+0x206c/0x22a0 [btrfs]
+  ksys_ioctl+0xe2/0x140
+  __x64_sys_ioctl+0x1e/0x29
+  do_syscall_64+0x96/0x150
+  entry_SYSCALL_64_after_hwframe+0x44/0xa9
+  RIP: 0033:0x7f4047c6288b
+  ::
+
+This is because, at the end of the seed device-delete, we try to remove
+the seed's devid sysfs entry. But for the seed devices under the sprout
+fs, we don't initialize the devid kobject yet. So add a kobject state
+check, which takes care of the bug.
+
+Fixes: 668e48af7a94 ("btrfs: sysfs, add devid/dev_state kobject and device attributes")
+CC: stable@vger.kernel.org # 5.6+
+Signed-off-by: Anand Jain <anand.jain@oracle.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/sysfs.c |   16 ++++++++++------
+ 1 file changed, 10 insertions(+), 6 deletions(-)
+
+--- a/fs/btrfs/sysfs.c
++++ b/fs/btrfs/sysfs.c
+@@ -1165,10 +1165,12 @@ int btrfs_sysfs_remove_devices_dir(struc
+                                         disk_kobj->name);
+               }
+-              kobject_del(&one_device->devid_kobj);
+-              kobject_put(&one_device->devid_kobj);
++              if (one_device->devid_kobj.state_initialized) {
++                      kobject_del(&one_device->devid_kobj);
++                      kobject_put(&one_device->devid_kobj);
+-              wait_for_completion(&one_device->kobj_unregister);
++                      wait_for_completion(&one_device->kobj_unregister);
++              }
+               return 0;
+       }
+@@ -1181,10 +1183,12 @@ int btrfs_sysfs_remove_devices_dir(struc
+                       sysfs_remove_link(fs_devices->devices_kobj,
+                                         disk_kobj->name);
+               }
+-              kobject_del(&one_device->devid_kobj);
+-              kobject_put(&one_device->devid_kobj);
++              if (one_device->devid_kobj.state_initialized) {
++                      kobject_del(&one_device->devid_kobj);
++                      kobject_put(&one_device->devid_kobj);
+-              wait_for_completion(&one_device->kobj_unregister);
++                      wait_for_completion(&one_device->kobj_unregister);
++              }
+       }
+       return 0;
diff --git a/queue-5.8/dmabuf-fix-null-pointer-dereference-in-dma_buf_release.patch b/queue-5.8/dmabuf-fix-null-pointer-dereference-in-dma_buf_release.patch
new file mode 100644 (file)
index 0000000..2a07375
--- /dev/null
@@ -0,0 +1,53 @@
+From 19a508bd1ad8e444de86873bf2f2b2ab8edd6552 Mon Sep 17 00:00:00 2001
+From: Charan Teja Reddy <charante@codeaurora.org>
+Date: Fri, 18 Sep 2020 16:02:31 +0530
+Subject: dmabuf: fix NULL pointer dereference in dma_buf_release()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Charan Teja Reddy <charante@codeaurora.org>
+
+commit 19a508bd1ad8e444de86873bf2f2b2ab8edd6552 upstream.
+
+NULL pointer dereference is observed while exporting the dmabuf but
+failed to allocate the 'struct file' which results into the dropping of
+the allocated dentry corresponding to this file in the dmabuf fs, which
+is ending up in dma_buf_release() and accessing the uninitialzed
+dentry->d_fsdata.
+
+Call stack on 5.4 is below:
+ dma_buf_release+0x2c/0x254 drivers/dma-buf/dma-buf.c:88
+ __dentry_kill+0x294/0x31c fs/dcache.c:584
+ dentry_kill fs/dcache.c:673 [inline]
+ dput+0x250/0x380 fs/dcache.c:859
+ path_put+0x24/0x40 fs/namei.c:485
+ alloc_file_pseudo+0x1a4/0x200 fs/file_table.c:235
+ dma_buf_getfile drivers/dma-buf/dma-buf.c:473 [inline]
+ dma_buf_export+0x25c/0x3ec drivers/dma-buf/dma-buf.c:585
+
+Fix this by checking for the valid pointer in the dentry->d_fsdata.
+
+Fixes: 4ab59c3c638c ("dma-buf: Move dma_buf_release() from fops to dentry_ops")
+Cc: <stable@vger.kernel.org> [5.7+]
+Signed-off-by: Charan Teja Reddy <charante@codeaurora.org>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Link: https://patchwork.freedesktop.org/patch/391319/
+Signed-off-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/dma-buf/dma-buf.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/dma-buf/dma-buf.c
++++ b/drivers/dma-buf/dma-buf.c
+@@ -59,6 +59,8 @@ static void dma_buf_release(struct dentr
+       struct dma_buf *dmabuf;
+       dmabuf = dentry->d_fsdata;
++      if (unlikely(!dmabuf))
++              return;
+       BUG_ON(dmabuf->vmapping_counter);
diff --git a/queue-5.8/kprobes-fix-to-check-probe-enabled-before-disarm_kprobe_ftrace.patch b/queue-5.8/kprobes-fix-to-check-probe-enabled-before-disarm_kprobe_ftrace.patch
new file mode 100644 (file)
index 0000000..0a43bdd
--- /dev/null
@@ -0,0 +1,99 @@
+From 3031313eb3d549b7ad6f9fbcc52ba04412e3eb9e Mon Sep 17 00:00:00 2001
+From: Masami Hiramatsu <mhiramat@kernel.org>
+Date: Tue, 1 Sep 2020 00:12:07 +0900
+Subject: kprobes: Fix to check probe enabled before disarm_kprobe_ftrace()
+
+From: Masami Hiramatsu <mhiramat@kernel.org>
+
+commit 3031313eb3d549b7ad6f9fbcc52ba04412e3eb9e upstream.
+
+Commit 0cb2f1372baa ("kprobes: Fix NULL pointer dereference at
+kprobe_ftrace_handler") fixed one bug but not completely fixed yet.
+If we run a kprobe_module.tc of ftracetest, kernel showed a warning
+as below.
+
+# ./ftracetest test.d/kprobe/kprobe_module.tc
+=== Ftrace unit tests ===
+[1] Kprobe dynamic event - probing module
+...
+[   22.400215] ------------[ cut here ]------------
+[   22.400962] Failed to disarm kprobe-ftrace at trace_printk_irq_work+0x0/0x7e [trace_printk] (-2)
+[   22.402139] WARNING: CPU: 7 PID: 200 at kernel/kprobes.c:1091 __disarm_kprobe_ftrace.isra.0+0x7e/0xa0
+[   22.403358] Modules linked in: trace_printk(-)
+[   22.404028] CPU: 7 PID: 200 Comm: rmmod Not tainted 5.9.0-rc2+ #66
+[   22.404870] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1 04/01/2014
+[   22.406139] RIP: 0010:__disarm_kprobe_ftrace.isra.0+0x7e/0xa0
+[   22.406947] Code: 30 8b 03 eb c9 80 3d e5 09 1f 01 00 75 dc 49 8b 34 24 89 c2 48 c7 c7 a0 c2 05 82 89 45 e4 c6 05 cc 09 1f 01 01 e8 a9 c7 f0 ff <0f> 0b 8b 45 e4 eb b9 89 c6 48 c7 c7 70 c2 05 82 89 45 e4 e8 91 c7
+[   22.409544] RSP: 0018:ffffc90000237df0 EFLAGS: 00010286
+[   22.410385] RAX: 0000000000000000 RBX: ffffffff83066024 RCX: 0000000000000000
+[   22.411434] RDX: 0000000000000001 RSI: ffffffff810de8d3 RDI: ffffffff810de8d3
+[   22.412687] RBP: ffffc90000237e10 R08: 0000000000000001 R09: 0000000000000001
+[   22.413762] R10: 0000000000000000 R11: 0000000000000001 R12: ffff88807c478640
+[   22.414852] R13: ffffffff8235ebc0 R14: ffffffffa00060c0 R15: 0000000000000000
+[   22.415941] FS:  00000000019d48c0(0000) GS:ffff88807d7c0000(0000) knlGS:0000000000000000
+[   22.417264] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[   22.418176] CR2: 00000000005bb7e3 CR3: 0000000078f7a000 CR4: 00000000000006a0
+[   22.419309] Call Trace:
+[   22.419990]  kill_kprobe+0x94/0x160
+[   22.420652]  kprobes_module_callback+0x64/0x230
+[   22.421470]  notifier_call_chain+0x4f/0x70
+[   22.422184]  blocking_notifier_call_chain+0x49/0x70
+[   22.422979]  __x64_sys_delete_module+0x1ac/0x240
+[   22.423733]  do_syscall_64+0x38/0x50
+[   22.424366]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
+[   22.425176] RIP: 0033:0x4bb81d
+[   22.425741] Code: 00 c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 e0 ff ff ff f7 d8 64 89 01 48
+[   22.428726] RSP: 002b:00007ffc70fef008 EFLAGS: 00000246 ORIG_RAX: 00000000000000b0
+[   22.430169] RAX: ffffffffffffffda RBX: 00000000019d48a0 RCX: 00000000004bb81d
+[   22.431375] RDX: 0000000000000000 RSI: 0000000000000880 RDI: 00007ffc70fef028
+[   22.432543] RBP: 0000000000000880 R08: 00000000ffffffff R09: 00007ffc70fef320
+[   22.433692] R10: 0000000000656300 R11: 0000000000000246 R12: 00007ffc70fef028
+[   22.434635] R13: 0000000000000000 R14: 0000000000000002 R15: 0000000000000000
+[   22.435682] irq event stamp: 1169
+[   22.436240] hardirqs last  enabled at (1179): [<ffffffff810df542>] console_unlock+0x422/0x580
+[   22.437466] hardirqs last disabled at (1188): [<ffffffff810df19b>] console_unlock+0x7b/0x580
+[   22.438608] softirqs last  enabled at (866): [<ffffffff81c0038e>] __do_softirq+0x38e/0x490
+[   22.439637] softirqs last disabled at (859): [<ffffffff81a00f42>] asm_call_on_stack+0x12/0x20
+[   22.440690] ---[ end trace 1e7ce7e1e4567276 ]---
+[   22.472832] trace_kprobe: This probe might be able to register after target module is loaded. Continue.
+
+This is because the kill_kprobe() calls disarm_kprobe_ftrace() even
+if the given probe is not enabled. In that case, ftrace_set_filter_ip()
+fails because the given probe point is not registered to ftrace.
+
+Fix to check the given (going) probe is enabled before invoking
+disarm_kprobe_ftrace().
+
+Link: https://lkml.kernel.org/r/159888672694.1411785.5987998076694782591.stgit@devnote2
+
+Fixes: 0cb2f1372baa ("kprobes: Fix NULL pointer dereference at kprobe_ftrace_handler")
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: "Naveen N . Rao" <naveen.n.rao@linux.ibm.com>
+Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
+Cc: David Miller <davem@davemloft.net>
+Cc: Muchun Song <songmuchun@bytedance.com>
+Cc: Chengming Zhou <zhouchengming@bytedance.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/kprobes.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/kernel/kprobes.c
++++ b/kernel/kprobes.c
+@@ -2130,9 +2130,10 @@ static void kill_kprobe(struct kprobe *p
+       /*
+        * The module is going away. We should disarm the kprobe which
+-       * is using ftrace.
++       * is using ftrace, because ftrace framework is still available at
++       * MODULE_STATE_GOING notification.
+        */
+-      if (kprobe_ftrace(p))
++      if (kprobe_ftrace(p) && !kprobe_disabled(p) && !kprobes_all_disarmed)
+               disarm_kprobe_ftrace(p);
+ }
diff --git a/queue-5.8/kprobes-tracing-kprobes-fix-to-kill-kprobes-on-initmem-after-boot.patch b/queue-5.8/kprobes-tracing-kprobes-fix-to-kill-kprobes-on-initmem-after-boot.patch
new file mode 100644 (file)
index 0000000..50c8872
--- /dev/null
@@ -0,0 +1,104 @@
+From 82d083ab60c3693201c6f5c7a5f23a6ed422098d Mon Sep 17 00:00:00 2001
+From: Masami Hiramatsu <mhiramat@kernel.org>
+Date: Thu, 10 Sep 2020 17:55:05 +0900
+Subject: kprobes: tracing/kprobes: Fix to kill kprobes on initmem after boot
+
+From: Masami Hiramatsu <mhiramat@kernel.org>
+
+commit 82d083ab60c3693201c6f5c7a5f23a6ed422098d upstream.
+
+Since kprobe_event= cmdline option allows user to put kprobes on the
+functions in initmem, kprobe has to make such probes gone after boot.
+Currently the probes on the init functions in modules will be handled
+by module callback, but the kernel init text isn't handled.
+Without this, kprobes may access non-exist text area to disable or
+remove it.
+
+Link: https://lkml.kernel.org/r/159972810544.428528.1839307531600646955.stgit@devnote2
+
+Fixes: 970988e19eb0 ("tracing/kprobe: Add kprobe_event= boot parameter")
+Cc: Jonathan Corbet <corbet@lwn.net>
+Cc: Shuah Khan <skhan@linuxfoundation.org>
+Cc: Randy Dunlap <rdunlap@infradead.org>
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: stable@vger.kernel.org
+Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/kprobes.h |    5 +++++
+ init/main.c             |    2 ++
+ kernel/kprobes.c        |   22 ++++++++++++++++++++++
+ 3 files changed, 29 insertions(+)
+
+--- a/include/linux/kprobes.h
++++ b/include/linux/kprobes.h
+@@ -369,6 +369,8 @@ void unregister_kretprobes(struct kretpr
+ void kprobe_flush_task(struct task_struct *tk);
+ void recycle_rp_inst(struct kretprobe_instance *ri, struct hlist_head *head);
++void kprobe_free_init_mem(void);
++
+ int disable_kprobe(struct kprobe *kp);
+ int enable_kprobe(struct kprobe *kp);
+@@ -426,6 +428,9 @@ static inline void unregister_kretprobes
+ static inline void kprobe_flush_task(struct task_struct *tk)
+ {
+ }
++static inline void kprobe_free_init_mem(void)
++{
++}
+ static inline int disable_kprobe(struct kprobe *kp)
+ {
+       return -ENOSYS;
+--- a/init/main.c
++++ b/init/main.c
+@@ -33,6 +33,7 @@
+ #include <linux/nmi.h>
+ #include <linux/percpu.h>
+ #include <linux/kmod.h>
++#include <linux/kprobes.h>
+ #include <linux/vmalloc.h>
+ #include <linux/kernel_stat.h>
+ #include <linux/start_kernel.h>
+@@ -1401,6 +1402,7 @@ static int __ref kernel_init(void *unuse
+       kernel_init_freeable();
+       /* need to finish all async __init code before freeing the memory */
+       async_synchronize_full();
++      kprobe_free_init_mem();
+       ftrace_free_init_mem();
+       free_initmem();
+       mark_readonly();
+--- a/kernel/kprobes.c
++++ b/kernel/kprobes.c
+@@ -2406,6 +2406,28 @@ static struct notifier_block kprobe_modu
+ extern unsigned long __start_kprobe_blacklist[];
+ extern unsigned long __stop_kprobe_blacklist[];
++void kprobe_free_init_mem(void)
++{
++      void *start = (void *)(&__init_begin);
++      void *end = (void *)(&__init_end);
++      struct hlist_head *head;
++      struct kprobe *p;
++      int i;
++
++      mutex_lock(&kprobe_mutex);
++
++      /* Kill all kprobes on initmem */
++      for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
++              head = &kprobe_table[i];
++              hlist_for_each_entry(p, head, hlist) {
++                      if (start <= (void *)p->addr && (void *)p->addr < end)
++                              kill_kprobe(p);
++              }
++      }
++
++      mutex_unlock(&kprobe_mutex);
++}
++
+ static int __init init_kprobes(void)
+ {
+       int i, err = 0;
diff --git a/queue-5.8/lib-bootconfig-fix-a-bug-of-breaking-existing-tree-nodes.patch b/queue-5.8/lib-bootconfig-fix-a-bug-of-breaking-existing-tree-nodes.patch
new file mode 100644 (file)
index 0000000..92908b2
--- /dev/null
@@ -0,0 +1,123 @@
+From ead1e19ad905b97261f0ad7a98bb64abb9323b2b Mon Sep 17 00:00:00 2001
+From: Masami Hiramatsu <mhiramat@kernel.org>
+Date: Mon, 21 Sep 2020 18:44:42 +0900
+Subject: lib/bootconfig: Fix a bug of breaking existing tree nodes
+
+From: Masami Hiramatsu <mhiramat@kernel.org>
+
+commit ead1e19ad905b97261f0ad7a98bb64abb9323b2b upstream.
+
+Fix a bug of breaking existing tree nodes by parsing the second
+and subsequent braces. Since the bootconfig parser uses the
+node.next field as a flag of current parent node, but this will
+break the existing tree if the same key node is specified again
+in the bootconfig.
+
+For example, the following bootconfig should be foo.buz and bar.
+
+foo
+bar
+foo { buz }
+
+However, when parsing the brace "{", it breaks foo->bar link
+by marking open-brace node. So the bootconfig unlinks bar
+from the bootconfig internal tree.
+
+This introduces a stack outside of the tree and record the
+last open-brace on the stack instead of using node.next field.
+
+Link: https://lkml.kernel.org/r/160068148267.1088739.8264704338030168660.stgit@devnote2
+
+Fixes: 76db5a27a827 ("bootconfig: Add Extra Boot Config support")
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ lib/bootconfig.c |   36 +++++++++++++++++++++++-------------
+ 1 file changed, 23 insertions(+), 13 deletions(-)
+
+--- a/lib/bootconfig.c
++++ b/lib/bootconfig.c
+@@ -31,6 +31,8 @@ static size_t xbc_data_size __initdata;
+ static struct xbc_node *last_parent __initdata;
+ static const char *xbc_err_msg __initdata;
+ static int xbc_err_pos __initdata;
++static int open_brace[XBC_DEPTH_MAX] __initdata;
++static int brace_index __initdata;
+ static int __init xbc_parse_error(const char *msg, const char *p)
+ {
+@@ -423,27 +425,27 @@ static char *skip_spaces_until_newline(c
+       return p;
+ }
+-static int __init __xbc_open_brace(void)
++static int __init __xbc_open_brace(char *p)
+ {
+-      /* Mark the last key as open brace */
+-      last_parent->next = XBC_NODE_MAX;
++      /* Push the last key as open brace */
++      open_brace[brace_index++] = xbc_node_index(last_parent);
++      if (brace_index >= XBC_DEPTH_MAX)
++              return xbc_parse_error("Exceed max depth of braces", p);
+       return 0;
+ }
+ static int __init __xbc_close_brace(char *p)
+ {
+-      struct xbc_node *node;
+-
+-      if (!last_parent || last_parent->next != XBC_NODE_MAX)
++      brace_index--;
++      if (!last_parent || brace_index < 0 ||
++          (open_brace[brace_index] != xbc_node_index(last_parent)))
+               return xbc_parse_error("Unexpected closing brace", p);
+-      node = last_parent;
+-      node->next = 0;
+-      do {
+-              node = xbc_node_get_parent(node);
+-      } while (node && node->next != XBC_NODE_MAX);
+-      last_parent = node;
++      if (brace_index == 0)
++              last_parent = NULL;
++      else
++              last_parent = &xbc_nodes[open_brace[brace_index - 1]];
+       return 0;
+ }
+@@ -651,7 +653,7 @@ static int __init xbc_open_brace(char **
+               return ret;
+       *k = n;
+-      return __xbc_open_brace();
++      return __xbc_open_brace(n - 1);
+ }
+ static int __init xbc_close_brace(char **k, char *n)
+@@ -671,6 +673,13 @@ static int __init xbc_verify_tree(void)
+       int i, depth, len, wlen;
+       struct xbc_node *n, *m;
++      /* Brace closing */
++      if (brace_index) {
++              n = &xbc_nodes[open_brace[brace_index]];
++              return xbc_parse_error("Brace is not closed",
++                                      xbc_node_get_data(n));
++      }
++
+       /* Empty tree */
+       if (xbc_node_num == 0) {
+               xbc_parse_error("Empty config", xbc_data);
+@@ -735,6 +744,7 @@ void __init xbc_destroy_all(void)
+       xbc_node_num = 0;
+       memblock_free(__pa(xbc_nodes), sizeof(struct xbc_node) * XBC_NODE_MAX);
+       xbc_nodes = NULL;
++      brace_index = 0;
+ }
+ /**
diff --git a/queue-5.8/lib-bootconfig-fix-to-remove-tailing-spaces-after-value.patch b/queue-5.8/lib-bootconfig-fix-to-remove-tailing-spaces-after-value.patch
new file mode 100644 (file)
index 0000000..ac93d3d
--- /dev/null
@@ -0,0 +1,52 @@
+From c7af4ecdffe1537ba8aeed0ac12c3326f908df43 Mon Sep 17 00:00:00 2001
+From: Masami Hiramatsu <mhiramat@kernel.org>
+Date: Mon, 21 Sep 2020 18:44:51 +0900
+Subject: lib/bootconfig: Fix to remove tailing spaces after value
+
+From: Masami Hiramatsu <mhiramat@kernel.org>
+
+commit c7af4ecdffe1537ba8aeed0ac12c3326f908df43 upstream.
+
+Fix to remove tailing spaces after value. If there is a space
+after value, the bootconfig failed to remove it because it
+applies strim() before replacing the delimiter with null.
+
+For example,
+
+foo = var    # comment
+
+was parsed as below.
+
+foo="var    "
+
+but user will expect
+
+foo="var"
+
+This fixes it by applying strim() after removing the delimiter.
+
+Link: https://lkml.kernel.org/r/160068149134.1088739.8868306567670058853.stgit@devnote2
+
+Fixes: 76db5a27a827 ("bootconfig: Add Extra Boot Config support")
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ lib/bootconfig.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/lib/bootconfig.c
++++ b/lib/bootconfig.c
+@@ -486,8 +486,8 @@ static int __init __xbc_parse_value(char
+                       break;
+               }
+               if (strchr(",;\n#}", c)) {
+-                      v = strim(v);
+                       *p++ = '\0';
++                      v = strim(v);
+                       break;
+               }
+       }
diff --git a/queue-5.8/media-cec-adap.c-don-t-use-flush_scheduled_work.patch b/queue-5.8/media-cec-adap.c-don-t-use-flush_scheduled_work.patch
new file mode 100644 (file)
index 0000000..b03e837
--- /dev/null
@@ -0,0 +1,43 @@
+From 288eceb0858323d66bff03cf386630a797b248ad Mon Sep 17 00:00:00 2001
+From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
+Date: Tue, 8 Sep 2020 12:02:53 +0200
+Subject: media: cec-adap.c: don't use flush_scheduled_work()
+
+From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
+
+commit 288eceb0858323d66bff03cf386630a797b248ad upstream.
+
+For some inexplicable reason I decided to call flush_scheduled_work()
+instead of cancel_delayed_work_sync(). The problem with that is that
+flush_scheduled_work() waits for *all* queued scheduled work to be
+completed instead of just the work itself.
+
+This can cause a deadlock if a CEC driver also schedules work that
+takes the same lock. See the comments for flush_scheduled_work() in
+linux/workqueue.h.
+
+This is exactly what has been observed a few times.
+
+This patch simply replaces flush_scheduled_work() by
+cancel_delayed_work_sync().
+
+Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
+Cc: <stable@vger.kernel.org>      # for v5.8 and up
+Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/media/cec/core/cec-adap.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/media/cec/core/cec-adap.c
++++ b/drivers/media/cec/core/cec-adap.c
+@@ -1199,7 +1199,7 @@ void cec_received_msg_ts(struct cec_adap
+                       /* Cancel the pending timeout work */
+                       if (!cancel_delayed_work(&data->work)) {
+                               mutex_unlock(&adap->lock);
+-                              flush_scheduled_work();
++                              cancel_delayed_work_sync(&data->work);
+                               mutex_lock(&adap->lock);
+                       }
+                       /*
diff --git a/queue-5.8/mips-loongson2ef-disable-loongson-mmi-instructions.patch b/queue-5.8/mips-loongson2ef-disable-loongson-mmi-instructions.patch
new file mode 100644 (file)
index 0000000..6a38279
--- /dev/null
@@ -0,0 +1,37 @@
+From b13812ddea615b6507beef24f76540c0c1143c5c Mon Sep 17 00:00:00 2001
+From: Jiaxun Yang <jiaxun.yang@flygoat.com>
+Date: Wed, 23 Sep 2020 18:33:12 +0800
+Subject: MIPS: Loongson2ef: Disable Loongson MMI instructions
+
+From: Jiaxun Yang <jiaxun.yang@flygoat.com>
+
+commit b13812ddea615b6507beef24f76540c0c1143c5c upstream.
+
+It was missed when I was forking Loongson2ef from Loongson64 but
+should be applied to Loongson2ef as march=loongson2f
+will also enable Loongson MMI in GCC-9+.
+
+Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
+Fixes: 71e2f4dd5a65 ("MIPS: Fork loongson2ef from loongson64")
+Reported-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
+Cc: stable@vger.kernel.org # v5.8+
+Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/mips/loongson2ef/Platform |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/arch/mips/loongson2ef/Platform
++++ b/arch/mips/loongson2ef/Platform
+@@ -22,6 +22,10 @@ ifdef CONFIG_CPU_LOONGSON2F_WORKAROUNDS
+   endif
+ endif
++# Some -march= flags enable MMI instructions, and GCC complains about that
++# support being enabled alongside -msoft-float. Thus explicitly disable MMI.
++cflags-y += $(call cc-option,-mno-loongson-mmi)
++
+ #
+ # Loongson Machines' Support
+ #
diff --git a/queue-5.8/mm-don-t-rely-on-system-state-to-detect-hot-plug-operations.patch b/queue-5.8/mm-don-t-rely-on-system-state-to-detect-hot-plug-operations.patch
new file mode 100644 (file)
index 0000000..c132f9e
--- /dev/null
@@ -0,0 +1,269 @@
+From f85086f95fa36194eb0db5cd5c12e56801b98523 Mon Sep 17 00:00:00 2001
+From: Laurent Dufour <ldufour@linux.ibm.com>
+Date: Fri, 25 Sep 2020 21:19:31 -0700
+Subject: mm: don't rely on system state to detect hot-plug operations
+
+From: Laurent Dufour <ldufour@linux.ibm.com>
+
+commit f85086f95fa36194eb0db5cd5c12e56801b98523 upstream.
+
+In register_mem_sect_under_node() the system_state's value is checked to
+detect whether the call is made during boot time or during an hot-plug
+operation.  Unfortunately, that check against SYSTEM_BOOTING is wrong
+because regular memory is registered at SYSTEM_SCHEDULING state.  In
+addition, memory hot-plug operation can be triggered at this system
+state by the ACPI [1].  So checking against the system state is not
+enough.
+
+The consequence is that on system with interleaved node's ranges like this:
+
+ Early memory node ranges
+   node   1: [mem 0x0000000000000000-0x000000011fffffff]
+   node   2: [mem 0x0000000120000000-0x000000014fffffff]
+   node   1: [mem 0x0000000150000000-0x00000001ffffffff]
+   node   0: [mem 0x0000000200000000-0x000000048fffffff]
+   node   2: [mem 0x0000000490000000-0x00000007ffffffff]
+
+This can be seen on PowerPC LPAR after multiple memory hot-plug and
+hot-unplug operations are done.  At the next reboot the node's memory
+ranges can be interleaved and since the call to link_mem_sections() is
+made in topology_init() while the system is in the SYSTEM_SCHEDULING
+state, the node's id is not checked, and the sections registered to
+multiple nodes:
+
+  $ ls -l /sys/devices/system/memory/memory21/node*
+  total 0
+  lrwxrwxrwx 1 root root     0 Aug 24 05:27 node1 -> ../../node/node1
+  lrwxrwxrwx 1 root root     0 Aug 24 05:27 node2 -> ../../node/node2
+
+In that case, the system is able to boot but if later one of theses
+memory blocks is hot-unplugged and then hot-plugged, the sysfs
+inconsistency is detected and this is triggering a BUG_ON():
+
+  kernel BUG at /Users/laurent/src/linux-ppc/mm/memory_hotplug.c:1084!
+  Oops: Exception in kernel mode, sig: 5 [#1]
+  LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
+  Modules linked in: rpadlpar_io rpaphp pseries_rng rng_core vmx_crypto gf128mul binfmt_misc ip_tables x_tables xfs libcrc32c crc32c_vpmsum autofs4
+  CPU: 8 PID: 10256 Comm: drmgr Not tainted 5.9.0-rc1+ #25
+  Call Trace:
+    add_memory_resource+0x23c/0x340 (unreliable)
+    __add_memory+0x5c/0xf0
+    dlpar_add_lmb+0x1b4/0x500
+    dlpar_memory+0x1f8/0xb80
+    handle_dlpar_errorlog+0xc0/0x190
+    dlpar_store+0x198/0x4a0
+    kobj_attr_store+0x30/0x50
+    sysfs_kf_write+0x64/0x90
+    kernfs_fop_write+0x1b0/0x290
+    vfs_write+0xe8/0x290
+    ksys_write+0xdc/0x130
+    system_call_exception+0x160/0x270
+    system_call_common+0xf0/0x27c
+
+This patch addresses the root cause by not relying on the system_state
+value to detect whether the call is due to a hot-plug operation.  An
+extra parameter is added to link_mem_sections() detailing whether the
+operation is due to a hot-plug operation.
+
+[1] According to Oscar Salvador, using this qemu command line, ACPI
+memory hotplug operations are raised at SYSTEM_SCHEDULING state:
+
+  $QEMU -enable-kvm -machine pc -smp 4,sockets=4,cores=1,threads=1 -cpu host -monitor pty \
+        -m size=$MEM,slots=255,maxmem=4294967296k  \
+        -numa node,nodeid=0,cpus=0-3,mem=512 -numa node,nodeid=1,mem=512 \
+        -object memory-backend-ram,id=memdimm0,size=134217728 -device pc-dimm,node=0,memdev=memdimm0,id=dimm0,slot=0 \
+        -object memory-backend-ram,id=memdimm1,size=134217728 -device pc-dimm,node=0,memdev=memdimm1,id=dimm1,slot=1 \
+        -object memory-backend-ram,id=memdimm2,size=134217728 -device pc-dimm,node=0,memdev=memdimm2,id=dimm2,slot=2 \
+        -object memory-backend-ram,id=memdimm3,size=134217728 -device pc-dimm,node=0,memdev=memdimm3,id=dimm3,slot=3 \
+        -object memory-backend-ram,id=memdimm4,size=134217728 -device pc-dimm,node=1,memdev=memdimm4,id=dimm4,slot=4 \
+        -object memory-backend-ram,id=memdimm5,size=134217728 -device pc-dimm,node=1,memdev=memdimm5,id=dimm5,slot=5 \
+        -object memory-backend-ram,id=memdimm6,size=134217728 -device pc-dimm,node=1,memdev=memdimm6,id=dimm6,slot=6 \
+
+Fixes: 4fbce633910e ("mm/memory_hotplug.c: make register_mem_sect_under_node() a callback of walk_memory_range()")
+Signed-off-by: Laurent Dufour <ldufour@linux.ibm.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Reviewed-by: David Hildenbrand <david@redhat.com>
+Reviewed-by: Oscar Salvador <osalvador@suse.de>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: "Rafael J. Wysocki" <rafael@kernel.org>
+Cc: Fenghua Yu <fenghua.yu@intel.com>
+Cc: Nathan Lynch <nathanl@linux.ibm.com>
+Cc: Scott Cheloha <cheloha@linux.ibm.com>
+Cc: Tony Luck <tony.luck@intel.com>
+Cc: <stable@vger.kernel.org>
+Link: https://lkml.kernel.org/r/20200915094143.79181-3-ldufour@linux.ibm.com
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/base/node.c  |   85 +++++++++++++++++++++++++++++++++------------------
+ include/linux/node.h |   11 ++++--
+ mm/memory_hotplug.c  |    3 +
+ 3 files changed, 64 insertions(+), 35 deletions(-)
+
+--- a/drivers/base/node.c
++++ b/drivers/base/node.c
+@@ -761,14 +761,36 @@ static int __ref get_nid_for_pfn(unsigne
+       return pfn_to_nid(pfn);
+ }
++static int do_register_memory_block_under_node(int nid,
++                                             struct memory_block *mem_blk)
++{
++      int ret;
++
++      /*
++       * If this memory block spans multiple nodes, we only indicate
++       * the last processed node.
++       */
++      mem_blk->nid = nid;
++
++      ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj,
++                                     &mem_blk->dev.kobj,
++                                     kobject_name(&mem_blk->dev.kobj));
++      if (ret)
++              return ret;
++
++      return sysfs_create_link_nowarn(&mem_blk->dev.kobj,
++                              &node_devices[nid]->dev.kobj,
++                              kobject_name(&node_devices[nid]->dev.kobj));
++}
++
+ /* register memory section under specified node if it spans that node */
+-static int register_mem_sect_under_node(struct memory_block *mem_blk,
+-                                       void *arg)
++static int register_mem_block_under_node_early(struct memory_block *mem_blk,
++                                             void *arg)
+ {
+       unsigned long memory_block_pfns = memory_block_size_bytes() / PAGE_SIZE;
+       unsigned long start_pfn = section_nr_to_pfn(mem_blk->start_section_nr);
+       unsigned long end_pfn = start_pfn + memory_block_pfns - 1;
+-      int ret, nid = *(int *)arg;
++      int nid = *(int *)arg;
+       unsigned long pfn;
+       for (pfn = start_pfn; pfn <= end_pfn; pfn++) {
+@@ -785,39 +807,34 @@ static int register_mem_sect_under_node(
+               }
+               /*
+-               * We need to check if page belongs to nid only for the boot
+-               * case, during hotplug we know that all pages in the memory
+-               * block belong to the same node.
+-               */
+-              if (system_state == SYSTEM_BOOTING) {
+-                      page_nid = get_nid_for_pfn(pfn);
+-                      if (page_nid < 0)
+-                              continue;
+-                      if (page_nid != nid)
+-                              continue;
+-              }
+-
+-              /*
+-               * If this memory block spans multiple nodes, we only indicate
+-               * the last processed node.
++               * We need to check if page belongs to nid only at the boot
++               * case because node's ranges can be interleaved.
+                */
+-              mem_blk->nid = nid;
+-
+-              ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj,
+-                                      &mem_blk->dev.kobj,
+-                                      kobject_name(&mem_blk->dev.kobj));
+-              if (ret)
+-                      return ret;
++              page_nid = get_nid_for_pfn(pfn);
++              if (page_nid < 0)
++                      continue;
++              if (page_nid != nid)
++                      continue;
+-              return sysfs_create_link_nowarn(&mem_blk->dev.kobj,
+-                              &node_devices[nid]->dev.kobj,
+-                              kobject_name(&node_devices[nid]->dev.kobj));
++              return do_register_memory_block_under_node(nid, mem_blk);
+       }
+       /* mem section does not span the specified node */
+       return 0;
+ }
+ /*
++ * During hotplug we know that all pages in the memory block belong to the same
++ * node.
++ */
++static int register_mem_block_under_node_hotplug(struct memory_block *mem_blk,
++                                               void *arg)
++{
++      int nid = *(int *)arg;
++
++      return do_register_memory_block_under_node(nid, mem_blk);
++}
++
++/*
+  * Unregister a memory block device under the node it spans. Memory blocks
+  * with multiple nodes cannot be offlined and therefore also never be removed.
+  */
+@@ -832,11 +849,19 @@ void unregister_memory_block_under_nodes
+                         kobject_name(&node_devices[mem_blk->nid]->dev.kobj));
+ }
+-int link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn)
++int link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn,
++                    enum meminit_context context)
+ {
++      walk_memory_blocks_func_t func;
++
++      if (context == MEMINIT_HOTPLUG)
++              func = register_mem_block_under_node_hotplug;
++      else
++              func = register_mem_block_under_node_early;
++
+       return walk_memory_blocks(PFN_PHYS(start_pfn),
+                                 PFN_PHYS(end_pfn - start_pfn), (void *)&nid,
+-                                register_mem_sect_under_node);
++                                func);
+ }
+ #ifdef CONFIG_HUGETLBFS
+--- a/include/linux/node.h
++++ b/include/linux/node.h
+@@ -99,11 +99,13 @@ extern struct node *node_devices[];
+ typedef  void (*node_registration_func_t)(struct node *);
+ #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_NUMA)
+-extern int link_mem_sections(int nid, unsigned long start_pfn,
+-                           unsigned long end_pfn);
++int link_mem_sections(int nid, unsigned long start_pfn,
++                    unsigned long end_pfn,
++                    enum meminit_context context);
+ #else
+ static inline int link_mem_sections(int nid, unsigned long start_pfn,
+-                                  unsigned long end_pfn)
++                                  unsigned long end_pfn,
++                                  enum meminit_context context)
+ {
+       return 0;
+ }
+@@ -128,7 +130,8 @@ static inline int register_one_node(int
+               if (error)
+                       return error;
+               /* link memory sections under this node */
+-              error = link_mem_sections(nid, start_pfn, end_pfn);
++              error = link_mem_sections(nid, start_pfn, end_pfn,
++                                        MEMINIT_EARLY);
+       }
+       return error;
+--- a/mm/memory_hotplug.c
++++ b/mm/memory_hotplug.c
+@@ -1065,7 +1065,8 @@ int __ref add_memory_resource(int nid, s
+       }
+       /* link memory sections under this node.*/
+-      ret = link_mem_sections(nid, PFN_DOWN(start), PFN_UP(start + size - 1));
++      ret = link_mem_sections(nid, PFN_DOWN(start), PFN_UP(start + size - 1),
++                              MEMINIT_HOTPLUG);
+       BUG_ON(ret);
+       /* create new memmap entry */
diff --git a/queue-5.8/mm-gup-fix-gup_fast-with-dynamic-page-table-folding.patch b/queue-5.8/mm-gup-fix-gup_fast-with-dynamic-page-table-folding.patch
new file mode 100644 (file)
index 0000000..fbee69c
--- /dev/null
@@ -0,0 +1,266 @@
+From d3f7b1bb204099f2f7306318896223e8599bb6a2 Mon Sep 17 00:00:00 2001
+From: Vasily Gorbik <gor@linux.ibm.com>
+Date: Fri, 25 Sep 2020 21:19:10 -0700
+Subject: mm/gup: fix gup_fast with dynamic page table folding
+
+From: Vasily Gorbik <gor@linux.ibm.com>
+
+commit d3f7b1bb204099f2f7306318896223e8599bb6a2 upstream.
+
+Currently to make sure that every page table entry is read just once
+gup_fast walks perform READ_ONCE and pass pXd value down to the next
+gup_pXd_range function by value e.g.:
+
+  static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end,
+                           unsigned int flags, struct page **pages, int *nr)
+  ...
+          pudp = pud_offset(&p4d, addr);
+
+This function passes a reference on that local value copy to pXd_offset,
+and might get the very same pointer in return.  This happens when the
+level is folded (on most arches), and that pointer should not be
+iterated.
+
+On s390 due to the fact that each task might have different 5,4 or
+3-level address translation and hence different levels folded the logic
+is more complex and non-iteratable pointer to a local copy leads to
+severe problems.
+
+Here is an example of what happens with gup_fast on s390, for a task
+with 3-level paging, crossing a 2 GB pud boundary:
+
+  // addr = 0x1007ffff000, end = 0x10080001000
+  static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end,
+                           unsigned int flags, struct page **pages, int *nr)
+  {
+        unsigned long next;
+        pud_t *pudp;
+
+        // pud_offset returns &p4d itself (a pointer to a value on stack)
+        pudp = pud_offset(&p4d, addr);
+        do {
+                // on second iteratation reading "random" stack value
+                pud_t pud = READ_ONCE(*pudp);
+
+                // next = 0x10080000000, due to PUD_SIZE/MASK != PGDIR_SIZE/MASK on s390
+                next = pud_addr_end(addr, end);
+                ...
+        } while (pudp++, addr = next, addr != end); // pudp++ iterating over stack
+
+        return 1;
+  }
+
+This happens since s390 moved to common gup code with commit
+d1874a0c2805 ("s390/mm: make the pxd_offset functions more robust") and
+commit 1a42010cdc26 ("s390/mm: convert to the generic
+get_user_pages_fast code").
+
+s390 tried to mimic static level folding by changing pXd_offset
+primitives to always calculate top level page table offset in pgd_offset
+and just return the value passed when pXd_offset has to act as folded.
+
+What is crucial for gup_fast and what has been overlooked is that
+PxD_SIZE/MASK and thus pXd_addr_end should also change correspondingly.
+And the latter is not possible with dynamic folding.
+
+To fix the issue in addition to pXd values pass original pXdp pointers
+down to gup_pXd_range functions.  And introduce pXd_offset_lockless
+helpers, which take an additional pXd entry value parameter.  This has
+already been discussed in
+
+  https://lkml.kernel.org/r/20190418100218.0a4afd51@mschwideX1
+
+Fixes: 1a42010cdc26 ("s390/mm: convert to the generic get_user_pages_fast code")
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Reviewed-by: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+Reviewed-by: Alexander Gordeev <agordeev@linux.ibm.com>
+Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
+Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
+Reviewed-by: John Hubbard <jhubbard@nvidia.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Russell King <linux@armlinux.org.uk>
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Will Deacon <will@kernel.org>
+Cc: Michael Ellerman <mpe@ellerman.id.au>
+Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Cc: Paul Mackerras <paulus@samba.org>
+Cc: Jeff Dike <jdike@addtoit.com>
+Cc: Richard Weinberger <richard@nod.at>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Arnd Bergmann <arnd@arndb.de>
+Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Cc: Heiko Carstens <hca@linux.ibm.com>
+Cc: Christian Borntraeger <borntraeger@de.ibm.com>
+Cc: Claudio Imbrenda <imbrenda@linux.ibm.com>
+Cc: <stable@vger.kernel.org>   [5.2+]
+Link: https://lkml.kernel.org/r/patch.git-943f1e5dcff2.your-ad-here.call-01599856292-ext-8676@work.hours
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/s390/include/asm/pgtable.h |   42 ++++++++++++++++++++++++++++------------
+ include/linux/pgtable.h         |   10 +++++++++
+ mm/gup.c                        |   18 ++++++++---------
+ 3 files changed, 49 insertions(+), 21 deletions(-)
+
+--- a/arch/s390/include/asm/pgtable.h
++++ b/arch/s390/include/asm/pgtable.h
+@@ -1260,26 +1260,44 @@ static inline pgd_t *pgd_offset_raw(pgd_
+ #define pgd_offset(mm, address) pgd_offset_raw(READ_ONCE((mm)->pgd), address)
+-static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
++static inline p4d_t *p4d_offset_lockless(pgd_t *pgdp, pgd_t pgd, unsigned long address)
+ {
+-      if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R1)
+-              return (p4d_t *) pgd_deref(*pgd) + p4d_index(address);
+-      return (p4d_t *) pgd;
++      if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R1)
++              return (p4d_t *) pgd_deref(pgd) + p4d_index(address);
++      return (p4d_t *) pgdp;
+ }
++#define p4d_offset_lockless p4d_offset_lockless
+-static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
++static inline p4d_t *p4d_offset(pgd_t *pgdp, unsigned long address)
+ {
+-      if ((p4d_val(*p4d) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R2)
+-              return (pud_t *) p4d_deref(*p4d) + pud_index(address);
+-      return (pud_t *) p4d;
++      return p4d_offset_lockless(pgdp, *pgdp, address);
++}
++
++static inline pud_t *pud_offset_lockless(p4d_t *p4dp, p4d_t p4d, unsigned long address)
++{
++      if ((p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R2)
++              return (pud_t *) p4d_deref(p4d) + pud_index(address);
++      return (pud_t *) p4dp;
++}
++#define pud_offset_lockless pud_offset_lockless
++
++static inline pud_t *pud_offset(p4d_t *p4dp, unsigned long address)
++{
++      return pud_offset_lockless(p4dp, *p4dp, address);
+ }
+ #define pud_offset pud_offset
+-static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
++static inline pmd_t *pmd_offset_lockless(pud_t *pudp, pud_t pud, unsigned long address)
++{
++      if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R3)
++              return (pmd_t *) pud_deref(pud) + pmd_index(address);
++      return (pmd_t *) pudp;
++}
++#define pmd_offset_lockless pmd_offset_lockless
++
++static inline pmd_t *pmd_offset(pud_t *pudp, unsigned long address)
+ {
+-      if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R3)
+-              return (pmd_t *) pud_deref(*pud) + pmd_index(address);
+-      return (pmd_t *) pud;
++      return pmd_offset_lockless(pudp, *pudp, address);
+ }
+ #define pmd_offset pmd_offset
+--- a/include/linux/pgtable.h
++++ b/include/linux/pgtable.h
+@@ -1424,6 +1424,16 @@ typedef unsigned int pgtbl_mod_mask;
+ #define mm_pmd_folded(mm)     __is_defined(__PAGETABLE_PMD_FOLDED)
+ #endif
++#ifndef p4d_offset_lockless
++#define p4d_offset_lockless(pgdp, pgd, address) p4d_offset(&(pgd), address)
++#endif
++#ifndef pud_offset_lockless
++#define pud_offset_lockless(p4dp, p4d, address) pud_offset(&(p4d), address)
++#endif
++#ifndef pmd_offset_lockless
++#define pmd_offset_lockless(pudp, pud, address) pmd_offset(&(pud), address)
++#endif
++
+ /*
+  * p?d_leaf() - true if this entry is a final mapping to a physical address.
+  * This differs from p?d_huge() by the fact that they are always available (if
+--- a/mm/gup.c
++++ b/mm/gup.c
+@@ -2574,13 +2574,13 @@ static int gup_huge_pgd(pgd_t orig, pgd_
+       return 1;
+ }
+-static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
++static int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, unsigned long end,
+               unsigned int flags, struct page **pages, int *nr)
+ {
+       unsigned long next;
+       pmd_t *pmdp;
+-      pmdp = pmd_offset(&pud, addr);
++      pmdp = pmd_offset_lockless(pudp, pud, addr);
+       do {
+               pmd_t pmd = READ_ONCE(*pmdp);
+@@ -2617,13 +2617,13 @@ static int gup_pmd_range(pud_t pud, unsi
+       return 1;
+ }
+-static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end,
++static int gup_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr, unsigned long end,
+                        unsigned int flags, struct page **pages, int *nr)
+ {
+       unsigned long next;
+       pud_t *pudp;
+-      pudp = pud_offset(&p4d, addr);
++      pudp = pud_offset_lockless(p4dp, p4d, addr);
+       do {
+               pud_t pud = READ_ONCE(*pudp);
+@@ -2638,20 +2638,20 @@ static int gup_pud_range(p4d_t p4d, unsi
+                       if (!gup_huge_pd(__hugepd(pud_val(pud)), addr,
+                                        PUD_SHIFT, next, flags, pages, nr))
+                               return 0;
+-              } else if (!gup_pmd_range(pud, addr, next, flags, pages, nr))
++              } else if (!gup_pmd_range(pudp, pud, addr, next, flags, pages, nr))
+                       return 0;
+       } while (pudp++, addr = next, addr != end);
+       return 1;
+ }
+-static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
++static int gup_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, unsigned long end,
+                        unsigned int flags, struct page **pages, int *nr)
+ {
+       unsigned long next;
+       p4d_t *p4dp;
+-      p4dp = p4d_offset(&pgd, addr);
++      p4dp = p4d_offset_lockless(pgdp, pgd, addr);
+       do {
+               p4d_t p4d = READ_ONCE(*p4dp);
+@@ -2663,7 +2663,7 @@ static int gup_p4d_range(pgd_t pgd, unsi
+                       if (!gup_huge_pd(__hugepd(p4d_val(p4d)), addr,
+                                        P4D_SHIFT, next, flags, pages, nr))
+                               return 0;
+-              } else if (!gup_pud_range(p4d, addr, next, flags, pages, nr))
++              } else if (!gup_pud_range(p4dp, p4d, addr, next, flags, pages, nr))
+                       return 0;
+       } while (p4dp++, addr = next, addr != end);
+@@ -2691,7 +2691,7 @@ static void gup_pgd_range(unsigned long
+                       if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
+                                        PGDIR_SHIFT, next, flags, pages, nr))
+                               return;
+-              } else if (!gup_p4d_range(pgd, addr, next, flags, pages, nr))
++              } else if (!gup_p4d_range(pgdp, pgd, addr, next, flags, pages, nr))
+                       return;
+       } while (pgdp++, addr = next, addr != end);
+ }
diff --git a/queue-5.8/mm-replace-memmap_context-by-meminit_context.patch b/queue-5.8/mm-replace-memmap_context-by-meminit_context.patch
new file mode 100644 (file)
index 0000000..80768bd
--- /dev/null
@@ -0,0 +1,233 @@
+From c1d0da83358a2316d9be7f229f26126dbaa07468 Mon Sep 17 00:00:00 2001
+From: Laurent Dufour <ldufour@linux.ibm.com>
+Date: Fri, 25 Sep 2020 21:19:28 -0700
+Subject: mm: replace memmap_context by meminit_context
+
+From: Laurent Dufour <ldufour@linux.ibm.com>
+
+commit c1d0da83358a2316d9be7f229f26126dbaa07468 upstream.
+
+Patch series "mm: fix memory to node bad links in sysfs", v3.
+
+Sometimes, firmware may expose interleaved memory layout like this:
+
+ Early memory node ranges
+   node   1: [mem 0x0000000000000000-0x000000011fffffff]
+   node   2: [mem 0x0000000120000000-0x000000014fffffff]
+   node   1: [mem 0x0000000150000000-0x00000001ffffffff]
+   node   0: [mem 0x0000000200000000-0x000000048fffffff]
+   node   2: [mem 0x0000000490000000-0x00000007ffffffff]
+
+In that case, we can see memory blocks assigned to multiple nodes in
+sysfs:
+
+  $ ls -l /sys/devices/system/memory/memory21
+  total 0
+  lrwxrwxrwx 1 root root     0 Aug 24 05:27 node1 -> ../../node/node1
+  lrwxrwxrwx 1 root root     0 Aug 24 05:27 node2 -> ../../node/node2
+  -rw-r--r-- 1 root root 65536 Aug 24 05:27 online
+  -r--r--r-- 1 root root 65536 Aug 24 05:27 phys_device
+  -r--r--r-- 1 root root 65536 Aug 24 05:27 phys_index
+  drwxr-xr-x 2 root root     0 Aug 24 05:27 power
+  -r--r--r-- 1 root root 65536 Aug 24 05:27 removable
+  -rw-r--r-- 1 root root 65536 Aug 24 05:27 state
+  lrwxrwxrwx 1 root root     0 Aug 24 05:25 subsystem -> ../../../../bus/memory
+  -rw-r--r-- 1 root root 65536 Aug 24 05:25 uevent
+  -r--r--r-- 1 root root 65536 Aug 24 05:27 valid_zones
+
+The same applies in the node's directory with a memory21 link in both
+the node1 and node2's directory.
+
+This is wrong but doesn't prevent the system to run.  However when
+later, one of these memory blocks is hot-unplugged and then hot-plugged,
+the system is detecting an inconsistency in the sysfs layout and a
+BUG_ON() is raised:
+
+  kernel BUG at /Users/laurent/src/linux-ppc/mm/memory_hotplug.c:1084!
+  LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
+  Modules linked in: rpadlpar_io rpaphp pseries_rng rng_core vmx_crypto gf128mul binfmt_misc ip_tables x_tables xfs libcrc32c crc32c_vpmsum autofs4
+  CPU: 8 PID: 10256 Comm: drmgr Not tainted 5.9.0-rc1+ #25
+  Call Trace:
+    add_memory_resource+0x23c/0x340 (unreliable)
+    __add_memory+0x5c/0xf0
+    dlpar_add_lmb+0x1b4/0x500
+    dlpar_memory+0x1f8/0xb80
+    handle_dlpar_errorlog+0xc0/0x190
+    dlpar_store+0x198/0x4a0
+    kobj_attr_store+0x30/0x50
+    sysfs_kf_write+0x64/0x90
+    kernfs_fop_write+0x1b0/0x290
+    vfs_write+0xe8/0x290
+    ksys_write+0xdc/0x130
+    system_call_exception+0x160/0x270
+    system_call_common+0xf0/0x27c
+
+This has been seen on PowerPC LPAR.
+
+The root cause of this issue is that when node's memory is registered,
+the range used can overlap another node's range, thus the memory block
+is registered to multiple nodes in sysfs.
+
+There are two issues here:
+
+ (a) The sysfs memory and node's layouts are broken due to these
+     multiple links
+
+ (b) The link errors in link_mem_sections() should not lead to a system
+     panic.
+
+To address (a) register_mem_sect_under_node should not rely on the
+system state to detect whether the link operation is triggered by a hot
+plug operation or not.  This is addressed by the patches 1 and 2 of this
+series.
+
+Issue (b) will be addressed separately.
+
+This patch (of 2):
+
+The memmap_context enum is used to detect whether a memory operation is
+due to a hot-add operation or happening at boot time.
+
+Make it general to the hotplug operation and rename it as
+meminit_context.
+
+There is no functional change introduced by this patch
+
+Suggested-by: David Hildenbrand <david@redhat.com>
+Signed-off-by: Laurent Dufour <ldufour@linux.ibm.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Reviewed-by: David Hildenbrand <david@redhat.com>
+Reviewed-by: Oscar Salvador <osalvador@suse.de>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: "Rafael J . Wysocki" <rafael@kernel.org>
+Cc: Nathan Lynch <nathanl@linux.ibm.com>
+Cc: Scott Cheloha <cheloha@linux.ibm.com>
+Cc: Tony Luck <tony.luck@intel.com>
+Cc: Fenghua Yu <fenghua.yu@intel.com>
+Cc: <stable@vger.kernel.org>
+Link: https://lkml.kernel.org/r/20200915094143.79181-1-ldufour@linux.ibm.com
+Link: https://lkml.kernel.org/r/20200915132624.9723-1-ldufour@linux.ibm.com
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/ia64/mm/init.c    |    6 +++---
+ include/linux/mm.h     |    2 +-
+ include/linux/mmzone.h |   11 ++++++++---
+ mm/memory_hotplug.c    |    2 +-
+ mm/page_alloc.c        |   10 +++++-----
+ 5 files changed, 18 insertions(+), 13 deletions(-)
+
+--- a/arch/ia64/mm/init.c
++++ b/arch/ia64/mm/init.c
+@@ -538,7 +538,7 @@ virtual_memmap_init(u64 start, u64 end,
+       if (map_start < map_end)
+               memmap_init_zone((unsigned long)(map_end - map_start),
+                                args->nid, args->zone, page_to_pfn(map_start),
+-                               MEMMAP_EARLY, NULL);
++                               MEMINIT_EARLY, NULL);
+       return 0;
+ }
+@@ -547,8 +547,8 @@ memmap_init (unsigned long size, int nid
+            unsigned long start_pfn)
+ {
+       if (!vmem_map) {
+-              memmap_init_zone(size, nid, zone, start_pfn, MEMMAP_EARLY,
+-                              NULL);
++              memmap_init_zone(size, nid, zone, start_pfn,
++                               MEMINIT_EARLY, NULL);
+       } else {
+               struct page *start;
+               struct memmap_init_callback_data args;
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -2445,7 +2445,7 @@ extern int __meminit __early_pfn_to_nid(
+ extern void set_dma_reserve(unsigned long new_dma_reserve);
+ extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long,
+-              enum memmap_context, struct vmem_altmap *);
++              enum meminit_context, struct vmem_altmap *);
+ extern void setup_per_zone_wmarks(void);
+ extern int __meminit init_per_zone_wmark_min(void);
+ extern void mem_init(void);
+--- a/include/linux/mmzone.h
++++ b/include/linux/mmzone.h
+@@ -799,10 +799,15 @@ bool zone_watermark_ok(struct zone *z, u
+               unsigned int alloc_flags);
+ bool zone_watermark_ok_safe(struct zone *z, unsigned int order,
+               unsigned long mark, int highest_zoneidx);
+-enum memmap_context {
+-      MEMMAP_EARLY,
+-      MEMMAP_HOTPLUG,
++/*
++ * Memory initialization context, use to differentiate memory added by
++ * the platform statically or via memory hotplug interface.
++ */
++enum meminit_context {
++      MEMINIT_EARLY,
++      MEMINIT_HOTPLUG,
+ };
++
+ extern void init_currently_empty_zone(struct zone *zone, unsigned long start_pfn,
+                                    unsigned long size);
+--- a/mm/memory_hotplug.c
++++ b/mm/memory_hotplug.c
+@@ -719,7 +719,7 @@ void __ref move_pfn_range_to_zone(struct
+        * are reserved so nobody should be touching them so we should be safe
+        */
+       memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn,
+-                      MEMMAP_HOTPLUG, altmap);
++                       MEMINIT_HOTPLUG, altmap);
+       set_zone_contiguous(zone);
+ }
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -5952,7 +5952,7 @@ overlap_memmap_init(unsigned long zone,
+  * done. Non-atomic initialization, single-pass.
+  */
+ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
+-              unsigned long start_pfn, enum memmap_context context,
++              unsigned long start_pfn, enum meminit_context context,
+               struct vmem_altmap *altmap)
+ {
+       unsigned long pfn, end_pfn = start_pfn + size;
+@@ -5984,7 +5984,7 @@ void __meminit memmap_init_zone(unsigned
+                * There can be holes in boot-time mem_map[]s handed to this
+                * function.  They do not exist on hotplugged memory.
+                */
+-              if (context == MEMMAP_EARLY) {
++              if (context == MEMINIT_EARLY) {
+                       if (overlap_memmap_init(zone, &pfn))
+                               continue;
+                       if (defer_init(nid, pfn, end_pfn))
+@@ -5993,7 +5993,7 @@ void __meminit memmap_init_zone(unsigned
+               page = pfn_to_page(pfn);
+               __init_single_page(page, pfn, zone, nid);
+-              if (context == MEMMAP_HOTPLUG)
++              if (context == MEMINIT_HOTPLUG)
+                       __SetPageReserved(page);
+               /*
+@@ -6076,7 +6076,7 @@ void __ref memmap_init_zone_device(struc
+                * check here not to call set_pageblock_migratetype() against
+                * pfn out of zone.
+                *
+-               * Please note that MEMMAP_HOTPLUG path doesn't clear memmap
++               * Please note that MEMINIT_HOTPLUG path doesn't clear memmap
+                * because this is done early in section_activate()
+                */
+               if (!(pfn & (pageblock_nr_pages - 1))) {
+@@ -6114,7 +6114,7 @@ void __meminit __weak memmap_init(unsign
+               if (end_pfn > start_pfn) {
+                       size = end_pfn - start_pfn;
+                       memmap_init_zone(size, nid, zone, start_pfn,
+-                                       MEMMAP_EARLY, NULL);
++                                       MEMINIT_EARLY, NULL);
+               }
+       }
+ }
diff --git a/queue-5.8/mm-thp-swap-fix-allocating-cluster-for-swapfile-by-mistake.patch b/queue-5.8/mm-thp-swap-fix-allocating-cluster-for-swapfile-by-mistake.patch
new file mode 100644 (file)
index 0000000..500f633
--- /dev/null
@@ -0,0 +1,85 @@
+From 41663430588c737dd735bad5a0d1ba325dcabd59 Mon Sep 17 00:00:00 2001
+From: Gao Xiang <hsiangkao@redhat.com>
+Date: Fri, 25 Sep 2020 21:19:01 -0700
+Subject: mm, THP, swap: fix allocating cluster for swapfile by mistake
+
+From: Gao Xiang <hsiangkao@redhat.com>
+
+commit 41663430588c737dd735bad5a0d1ba325dcabd59 upstream.
+
+SWP_FS is used to make swap_{read,write}page() go through the
+filesystem, and it's only used for swap files over NFS.  So, !SWP_FS
+means non NFS for now, it could be either file backed or device backed.
+Something similar goes with legacy SWP_FILE.
+
+So in order to achieve the goal of the original patch, SWP_BLKDEV should
+be used instead.
+
+FS corruption can be observed with SSD device + XFS + fragmented
+swapfile due to CONFIG_THP_SWAP=y.
+
+I reproduced the issue with the following details:
+
+Environment:
+
+  QEMU + upstream kernel + buildroot + NVMe (2 GB)
+
+Kernel config:
+
+  CONFIG_BLK_DEV_NVME=y
+  CONFIG_THP_SWAP=y
+
+Some reproducible steps:
+
+  mkfs.xfs -f /dev/nvme0n1
+  mkdir /tmp/mnt
+  mount /dev/nvme0n1 /tmp/mnt
+  bs="32k"
+  sz="1024m"    # doesn't matter too much, I also tried 16m
+  xfs_io -f -c "pwrite -R -b $bs 0 $sz" -c "fdatasync" /tmp/mnt/sw
+  xfs_io -f -c "pwrite -R -b $bs 0 $sz" -c "fdatasync" /tmp/mnt/sw
+  xfs_io -f -c "pwrite -R -b $bs 0 $sz" -c "fdatasync" /tmp/mnt/sw
+  xfs_io -f -c "pwrite -F -S 0 -b $bs 0 $sz" -c "fdatasync" /tmp/mnt/sw
+  xfs_io -f -c "pwrite -R -b $bs 0 $sz" -c "fsync" /tmp/mnt/sw
+
+  mkswap /tmp/mnt/sw
+  swapon /tmp/mnt/sw
+
+  stress --vm 2 --vm-bytes 600M   # doesn't matter too much as well
+
+Symptoms:
+ - FS corruption (e.g. checksum failure)
+ - memory corruption at: 0xd2808010
+ - segfault
+
+Fixes: f0eea189e8e9 ("mm, THP, swap: Don't allocate huge cluster for file backed swap device")
+Fixes: 38d8b4e6bdc8 ("mm, THP, swap: delay splitting THP during swap out")
+Signed-off-by: Gao Xiang <hsiangkao@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Reviewed-by: "Huang, Ying" <ying.huang@intel.com>
+Reviewed-by: Yang Shi <shy828301@gmail.com>
+Acked-by: Rafael Aquini <aquini@redhat.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Carlos Maiolino <cmaiolino@redhat.com>
+Cc: Eric Sandeen <esandeen@redhat.com>
+Cc: Dave Chinner <david@fromorbit.com>
+Cc: <stable@vger.kernel.org>
+Link: https://lkml.kernel.org/r/20200820045323.7809-1-hsiangkao@redhat.com
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/swapfile.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/swapfile.c
++++ b/mm/swapfile.c
+@@ -1074,7 +1074,7 @@ start_over:
+                       goto nextsi;
+               }
+               if (size == SWAPFILE_CLUSTER) {
+-                      if (!(si->flags & SWP_FS))
++                      if (si->flags & SWP_BLKDEV)
+                               n_ret = swap_alloc_cluster(si, swp_entries);
+               } else
+                       n_ret = scan_swap_map_slots(si, SWAP_HAS_CACHE,
diff --git a/queue-5.8/mt76-mt7615-use-v1-mcu-api-on-mt7615-to-fix-issues-with-adding-removing-stations.patch b/queue-5.8/mt76-mt7615-use-v1-mcu-api-on-mt7615-to-fix-issues-with-adding-removing-stations.patch
new file mode 100644 (file)
index 0000000..cd5f8e8
--- /dev/null
@@ -0,0 +1,42 @@
+From d1c9da9e4c938e8bbf8b0ef9e5772b97db5639e9 Mon Sep 17 00:00:00 2001
+From: Felix Fietkau <nbd@nbd.name>
+Date: Wed, 12 Aug 2020 12:23:32 +0200
+Subject: mt76: mt7615: use v1 MCU API on MT7615 to fix issues with adding/removing stations
+
+From: Felix Fietkau <nbd@nbd.name>
+
+commit d1c9da9e4c938e8bbf8b0ef9e5772b97db5639e9 upstream.
+
+The implementation of embedding WTBL update inside the STA_REC update is buggy
+on the MT7615 v2 firmware. This leads to connection issues after a station has
+connected and disconnected again.
+
+Switch to the v1 MCU API ops, since they have received much more testing and
+should be more stable.
+
+On MT7622 and later, the v2 API is more actively used, so we should keep using
+it as well.
+
+Fixes: 6849e29ed92e ("mt76: mt7615: add starec operating flow for firmware v2")
+Cc: stable@vger.kernel.org
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
+Link: https://lore.kernel.org/r/20200812102332.11812-1-nbd@nbd.name
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/wireless/mediatek/mt76/mt7615/mcu.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
++++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+@@ -2014,7 +2014,8 @@ static int mt7615_load_n9(struct mt7615_
+                sizeof(dev->mt76.hw->wiphy->fw_version),
+                "%.10s-%.15s", hdr->fw_ver, hdr->build_date);
+-      if (!strncmp(hdr->fw_ver, "2.0", sizeof(hdr->fw_ver))) {
++      if (!is_mt7615(&dev->mt76) &&
++          !strncmp(hdr->fw_ver, "2.0", sizeof(hdr->fw_ver))) {
+               dev->fw_ver = MT7615_FIRMWARE_V2;
+               dev->mcu_ops = &sta_update_ops;
+       } else {
diff --git a/queue-5.8/s390-dasd-fix-zero-write-for-fba-devices.patch b/queue-5.8/s390-dasd-fix-zero-write-for-fba-devices.patch
new file mode 100644 (file)
index 0000000..7037d4f
--- /dev/null
@@ -0,0 +1,69 @@
+From 709192d531e5b0a91f20aa14abfe2fc27ddd47af Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Jan=20H=C3=B6ppner?= <hoeppner@linux.ibm.com>
+Date: Mon, 14 Sep 2020 13:56:47 +0200
+Subject: s390/dasd: Fix zero write for FBA devices
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Jan Höppner <hoeppner@linux.ibm.com>
+
+commit 709192d531e5b0a91f20aa14abfe2fc27ddd47af upstream.
+
+A discard request that writes zeros using the global kernel internal
+ZERO_PAGE will fail for machines with more than 2GB of memory due to the
+location of the ZERO_PAGE.
+
+Fix this by using a driver owned global zero page allocated with GFP_DMA
+flag set.
+
+Fixes: 28b841b3a7cb ("s390/dasd: Add discard support for FBA devices")
+Signed-off-by: Jan Höppner <hoeppner@linux.ibm.com>
+Reviewed-by: Stefan Haberland <sth@linux.ibm.com>
+Cc: <stable@vger.kernel.org> # 4.14+
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/s390/block/dasd_fba.c |    9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/drivers/s390/block/dasd_fba.c
++++ b/drivers/s390/block/dasd_fba.c
+@@ -40,6 +40,7 @@
+ MODULE_LICENSE("GPL");
+ static struct dasd_discipline dasd_fba_discipline;
++static void *dasd_fba_zero_page;
+ struct dasd_fba_private {
+       struct dasd_fba_characteristics rdc_data;
+@@ -270,7 +271,7 @@ static void ccw_write_zero(struct ccw1 *
+       ccw->cmd_code = DASD_FBA_CCW_WRITE;
+       ccw->flags |= CCW_FLAG_SLI;
+       ccw->count = count;
+-      ccw->cda = (__u32) (addr_t) page_to_phys(ZERO_PAGE(0));
++      ccw->cda = (__u32) (addr_t) dasd_fba_zero_page;
+ }
+ /*
+@@ -830,6 +831,11 @@ dasd_fba_init(void)
+       int ret;
+       ASCEBC(dasd_fba_discipline.ebcname, 4);
++
++      dasd_fba_zero_page = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA);
++      if (!dasd_fba_zero_page)
++              return -ENOMEM;
++
+       ret = ccw_driver_register(&dasd_fba_driver);
+       if (!ret)
+               wait_for_device_probe();
+@@ -841,6 +847,7 @@ static void __exit
+ dasd_fba_cleanup(void)
+ {
+       ccw_driver_unregister(&dasd_fba_driver);
++      free_page((unsigned long)dasd_fba_zero_page);
+ }
+ module_init(dasd_fba_init);
diff --git a/queue-5.8/s390-zcrypt-fix-zcrypt_perdev_reqcnt-ioctl.patch b/queue-5.8/s390-zcrypt-fix-zcrypt_perdev_reqcnt-ioctl.patch
new file mode 100644 (file)
index 0000000..7b2b30d
--- /dev/null
@@ -0,0 +1,37 @@
+From f7e80983f0cf470bb82036e73bff4d5a7daf8fc2 Mon Sep 17 00:00:00 2001
+From: Christian Borntraeger <borntraeger@de.ibm.com>
+Date: Mon, 21 Sep 2020 12:48:36 +0200
+Subject: s390/zcrypt: Fix ZCRYPT_PERDEV_REQCNT ioctl
+
+From: Christian Borntraeger <borntraeger@de.ibm.com>
+
+commit f7e80983f0cf470bb82036e73bff4d5a7daf8fc2 upstream.
+
+reqcnt is an u32 pointer but we do copy sizeof(reqcnt) which is the
+size of the pointer. This means we only copy 8 byte. Let us copy
+the full monty.
+
+Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
+Cc: Harald Freudenberger <freude@linux.ibm.com>
+Cc: stable@vger.kernel.org
+Fixes: af4a72276d49 ("s390/zcrypt: Support up to 256 crypto adapters.")
+Reviewed-by: Harald Freudenberger <freude@linux.ibm.com>
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/s390/crypto/zcrypt_api.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/s390/crypto/zcrypt_api.c
++++ b/drivers/s390/crypto/zcrypt_api.c
+@@ -1429,7 +1429,8 @@ static long zcrypt_unlocked_ioctl(struct
+               if (!reqcnt)
+                       return -ENOMEM;
+               zcrypt_perdev_reqcnt(reqcnt, AP_DEVICES);
+-              if (copy_to_user((int __user *) arg, reqcnt, sizeof(reqcnt)))
++              if (copy_to_user((int __user *) arg, reqcnt,
++                               sizeof(u32) * AP_DEVICES))
+                       rc = -EFAULT;
+               kfree(reqcnt);
+               return rc;
index 0a40139f3b15da8384c331c740f010b97d8f0a95..9f04a1eb5305586cddfb3f69616d6a264ea4d2ba 100644 (file)
@@ -77,3 +77,20 @@ alsa-usb-audio-add-delay-quirk-for-h570e-usb-headsets.patch
 alsa-hda-realtek-couldn-t-detect-mic-if-booting-with-headset-plugged.patch
 alsa-hda-realtek-enable-front-panel-headset-led-on-lenovo-thinkstation-p520.patch
 lib-string.c-implement-stpcpy.patch
+tracing-fix-double-free.patch
+s390-dasd-fix-zero-write-for-fba-devices.patch
+mt76-mt7615-use-v1-mcu-api-on-mt7615-to-fix-issues-with-adding-removing-stations.patch
+lib-bootconfig-fix-a-bug-of-breaking-existing-tree-nodes.patch
+lib-bootconfig-fix-to-remove-tailing-spaces-after-value.patch
+kprobes-fix-to-check-probe-enabled-before-disarm_kprobe_ftrace.patch
+kprobes-tracing-kprobes-fix-to-kill-kprobes-on-initmem-after-boot.patch
+btrfs-fix-put-of-uninitialized-kobject-after-seed-device-delete.patch
+btrfs-fix-overflow-when-copying-corrupt-csums-for-a-message.patch
+media-cec-adap.c-don-t-use-flush_scheduled_work.patch
+mips-loongson2ef-disable-loongson-mmi-instructions.patch
+dmabuf-fix-null-pointer-dereference-in-dma_buf_release.patch
+mm-thp-swap-fix-allocating-cluster-for-swapfile-by-mistake.patch
+mm-gup-fix-gup_fast-with-dynamic-page-table-folding.patch
+mm-replace-memmap_context-by-meminit_context.patch
+mm-don-t-rely-on-system-state-to-detect-hot-plug-operations.patch
+s390-zcrypt-fix-zcrypt_perdev_reqcnt-ioctl.patch
diff --git a/queue-5.8/tracing-fix-double-free.patch b/queue-5.8/tracing-fix-double-free.patch
new file mode 100644 (file)
index 0000000..5dcee97
--- /dev/null
@@ -0,0 +1,46 @@
+From 46bbe5c671e06f070428b9be142cc4ee5cedebac Mon Sep 17 00:00:00 2001
+From: Tom Rix <trix@redhat.com>
+Date: Mon, 7 Sep 2020 06:58:45 -0700
+Subject: tracing: fix double free
+
+From: Tom Rix <trix@redhat.com>
+
+commit 46bbe5c671e06f070428b9be142cc4ee5cedebac upstream.
+
+clang static analyzer reports this problem
+
+trace_events_hist.c:3824:3: warning: Attempt to free
+  released memory
+    kfree(hist_data->attrs->var_defs.name[i]);
+
+In parse_var_defs() if there is a problem allocating
+var_defs.expr, the earlier var_defs.name is freed.
+This free is duplicated by free_var_defs() which frees
+the rest of the list.
+
+Because free_var_defs() has to run anyway, remove the
+second free fom parse_var_defs().
+
+Link: https://lkml.kernel.org/r/20200907135845.15804-1-trix@redhat.com
+
+Cc: stable@vger.kernel.org
+Fixes: 30350d65ac56 ("tracing: Add variable support to hist triggers")
+Reviewed-by: Tom Zanussi <tom.zanussi@linux.intel.com>
+Signed-off-by: Tom Rix <trix@redhat.com>
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/trace/trace_events_hist.c |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/kernel/trace/trace_events_hist.c
++++ b/kernel/trace/trace_events_hist.c
+@@ -3865,7 +3865,6 @@ static int parse_var_defs(struct hist_tr
+                       s = kstrdup(field_str, GFP_KERNEL);
+                       if (!s) {
+-                              kfree(hist_data->attrs->var_defs.name[n_vars]);
+                               ret = -ENOMEM;
+                               goto free;
+                       }