From c70c160a43a1e695533b86ba35e8f612352b60f3 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Tue, 8 Oct 2019 21:26:32 -0400 Subject: [PATCH] fixes for 5.3 Signed-off-by: Sasha Levin --- ...ng-writeback_fid-on-mmap-with-type-p.patch | 47 +++++ .../9p-transport-error-uninitialized.patch | 91 +++++++++ ...ckdep_assert_held-into-elevator_exit.patch | 67 ++++++ ...-fix-bpf_event_output-re-entry-issue.patch | 155 ++++++++++++++ ...sts-failure-due-to-uninitialized-i_m.patch | 88 ++++++++ ...p_gen-under-spinlock-in-ceph_add_cap.patch | 64 ++++++ ...ories-inode-i_blkbits-initialization.patch | 50 +++++ ...onnection-if-session-hang-in-opening.patch | 46 +++++ ...qcom-tsens-fix-memory-leak-from-qfpr.patch | 128 ++++++++++++ ...-for-valid-number-of-registers-to-re.patch | 39 ++++ ...ix-kfd-related-kernel-oops-on-hawaii.patch | 41 ++++ ...earlier-when-radeon.cik_-si_support-.patch | 119 +++++++++++ ...ble-null-pointer-dereferences-in-enc.patch | 46 +++++ ...use-fix-memleak-in-cuse_channel_open.patch | 39 ++++ queue-5.3/fuse-fix-request-limit.patch | 48 +++++ ...sable-dma-processing-on-the-lenovo-y.patch | 65 ++++++ ...lways-return-negative-code-for-error.patch | 44 ++++ ...ma-fix-freeing-ongoing-ahash_request.patch | 42 ++++ ...ents-writeback.h-fix-wstringop-trunc.patch | 191 ++++++++++++++++++ ...wngrading-default-page-sizes-in-allo.patch | 50 +++++ ...-elfcore.c-include-proper-prototypes.patch | 51 +++++ ...sistency-check-on-injected-exception.patch | 48 +++++ ...false-uninitialized-variable-warning.patch | 36 ++++ ...bnvdimm-fix-endian-conversion-issues.patch | 83 ++++++++ ...it_test-fix-acpi_handle-redefinition.patch | 70 +++++++ ...-initialize-bad-block-for-volatile-n.patch | 110 ++++++++++ ...lower-fail-in-case-user-specifies-mu.patch | 53 +++++ ...rochip-always-set-regmap-stride-to-1.patch | 51 +++++ ...tables-allow-lookups-in-dynamic-sets.patch | 107 ++++++++++ ...ry-leak-in-nfp_abm_u32_knode_replace.patch | 69 +++++++ ...b-point-to-right-memory-window-index.patch | 53 +++++ ...etection-of-java-11-openjdk-devel-pa.patch | 62 ++++++ ...o-clear-tev-nargs-in-clear_probe_tra.patch | 66 ++++++ ...previous-counts-on-repeat-with-inter.patch | 168 +++++++++++++++ ...libunwind-build-failure-on-i386-syst.patch | 51 +++++ ...o-clear-the-return-on-close-layout-s.patch | 48 +++++ ...-check-in-case-requested-period-cann.patch | 49 +++++ ...rrupts-being-erroneously-enabled-in-.patch | 62 ++++++ ...igration-to-invalid-cpu-in-__set_cpu.patch | 85 ++++++++ ...-call-sync_core-only-before-usermode.patch | 50 +++++ ...-fix-private-expedited-registration-.patch | 53 +++++ ...just-strobemeta-loop-to-satisfy-late.patch | 51 +++++ ...s-seccomp-fix-build-on-older-kernels.patch | 80 ++++++++ queue-5.3/series | 53 +++++ ...try-to-parse-incomplete-rpc-messages.patch | 63 ++++++ ...-errors-should-always-set-task-tk_rp.patch | 78 +++++++ ...after-free-when-unregistering-therma.patch | 134 ++++++++++++ ...mal_hwmon-sanitize-thermal_zone-type.patch | 53 +++++ ...st-hrtimer-fix-a-race-in-bc_set_next.patch | 173 ++++++++++++++++ ...chdog-aspeed-add-support-for-ast2600.patch | 47 +++++ ...sable-the-stackleak-gcc-plugin-for-t.patch | 53 +++++ .../xen-pci-reserve-mcfg-areas-earlier.patch | 90 +++++++++ ...d-queue-size-grows-after-a-reconnect.patch | 122 +++++++++++ ...xprt_congested-in-xprtrdma-s-slot-me.patch | 50 +++++ 54 files changed, 3932 insertions(+) create mode 100644 queue-5.3/9p-avoid-attaching-writeback_fid-on-mmap-with-type-p.patch create mode 100644 queue-5.3/9p-transport-error-uninitialized.patch create mode 100644 queue-5.3/blk-mq-move-lockdep_assert_held-into-elevator_exit.patch create mode 100644 queue-5.3/bpf-fix-bpf_event_output-re-entry-issue.patch create mode 100644 queue-5.3/btrfs-fix-selftests-failure-due-to-uninitialized-i_m.patch create mode 100644 queue-5.3/ceph-fetch-cap_gen-under-spinlock-in-ceph_add_cap.patch create mode 100644 queue-5.3/ceph-fix-directories-inode-i_blkbits-initialization.patch create mode 100644 queue-5.3/ceph-reconnect-connection-if-session-hang-in-opening.patch create mode 100644 queue-5.3/drivers-thermal-qcom-tsens-fix-memory-leak-from-qfpr.patch create mode 100644 queue-5.3/drm-amdgpu-check-for-valid-number-of-registers-to-re.patch create mode 100644 queue-5.3/drm-amdgpu-fix-kfd-related-kernel-oops-on-hawaii.patch create mode 100644 queue-5.3/drm-radeon-bail-earlier-when-radeon.cik_-si_support-.patch create mode 100644 queue-5.3/fs-nfs-fix-possible-null-pointer-dereferences-in-enc.patch create mode 100644 queue-5.3/fuse-fix-memleak-in-cuse_channel_open.patch create mode 100644 queue-5.3/fuse-fix-request-limit.patch create mode 100644 queue-5.3/i2c-qcom-geni-disable-dma-processing-on-the-lenovo-y.patch create mode 100644 queue-5.3/ima-always-return-negative-code-for-error.patch create mode 100644 queue-5.3/ima-fix-freeing-ongoing-ahash_request.patch create mode 100644 queue-5.3/include-trace-events-writeback.h-fix-wstringop-trunc.patch create mode 100644 queue-5.3/iommu-amd-fix-downgrading-default-page-sizes-in-allo.patch create mode 100644 queue-5.3/kernel-elfcore.c-include-proper-prototypes.patch create mode 100644 queue-5.3/kvm-nvmx-fix-consistency-check-on-injected-exception.patch create mode 100644 queue-5.3/libbpf-fix-false-uninitialized-variable-warning.patch create mode 100644 queue-5.3/libnvdimm-fix-endian-conversion-issues.patch create mode 100644 queue-5.3/libnvdimm-nfit_test-fix-acpi_handle-redefinition.patch create mode 100644 queue-5.3/libnvdimm-region-initialize-bad-block-for-volatile-n.patch create mode 100644 queue-5.3/mlxsw-spectrum_flower-fail-in-case-user-specifies-mu.patch create mode 100644 queue-5.3/net-dsa-microchip-always-set-regmap-stride-to-1.patch create mode 100644 queue-5.3/netfilter-nf_tables-allow-lookups-in-dynamic-sets.patch create mode 100644 queue-5.3/nfp-abm-fix-memory-leak-in-nfp_abm_u32_knode_replace.patch create mode 100644 queue-5.3/ntb-point-to-right-memory-window-index.patch create mode 100644 queue-5.3/perf-build-add-detection-of-java-11-openjdk-devel-pa.patch create mode 100644 queue-5.3/perf-probe-fix-to-clear-tev-nargs-in-clear_probe_tra.patch create mode 100644 queue-5.3/perf-stat-reset-previous-counts-on-repeat-with-inter.patch create mode 100644 queue-5.3/perf-unwind-fix-libunwind-build-failure-on-i386-syst.patch create mode 100644 queue-5.3/pnfs-ensure-we-do-clear-the-return-on-close-layout-s.patch create mode 100644 queue-5.3/pwm-stm32-lp-add-check-in-case-requested-period-cann.patch create mode 100644 queue-5.3/riscv-avoid-interrupts-being-erroneously-enabled-in-.patch create mode 100644 queue-5.3/sched-core-fix-migration-to-invalid-cpu-in-__set_cpu.patch create mode 100644 queue-5.3/sched-membarrier-call-sync_core-only-before-usermode.patch create mode 100644 queue-5.3/sched-membarrier-fix-private-expedited-registration-.patch create mode 100644 queue-5.3/selftests-bpf-adjust-strobemeta-loop-to-satisfy-late.patch create mode 100644 queue-5.3/selftests-seccomp-fix-build-on-older-kernels.patch create mode 100644 queue-5.3/sunrpc-don-t-try-to-parse-incomplete-rpc-messages.patch create mode 100644 queue-5.3/sunrpc-rpc-level-errors-should-always-set-task-tk_rp.patch create mode 100644 queue-5.3/thermal-fix-use-after-free-when-unregistering-therma.patch create mode 100644 queue-5.3/thermal_hwmon-sanitize-thermal_zone-type.patch create mode 100644 queue-5.3/tick-broadcast-hrtimer-fix-a-race-in-bc_set_next.patch create mode 100644 queue-5.3/watchdog-aspeed-add-support-for-ast2600.patch create mode 100644 queue-5.3/x86-purgatory-disable-the-stackleak-gcc-plugin-for-t.patch create mode 100644 queue-5.3/xen-pci-reserve-mcfg-areas-earlier.patch create mode 100644 queue-5.3/xprtrdma-send-queue-size-grows-after-a-reconnect.patch create mode 100644 queue-5.3/xprtrdma-toggle-xprt_congested-in-xprtrdma-s-slot-me.patch diff --git a/queue-5.3/9p-avoid-attaching-writeback_fid-on-mmap-with-type-p.patch b/queue-5.3/9p-avoid-attaching-writeback_fid-on-mmap-with-type-p.patch new file mode 100644 index 00000000000..11e2e8c15a7 --- /dev/null +++ b/queue-5.3/9p-avoid-attaching-writeback_fid-on-mmap-with-type-p.patch @@ -0,0 +1,47 @@ +From c2f8483ee65024d27d2230c64d2f2148ec13e903 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 20 Aug 2019 18:03:25 +0800 +Subject: 9p: avoid attaching writeback_fid on mmap with type PRIVATE + +From: Chengguang Xu + +[ Upstream commit c87a37ebd40b889178664c2c09cc187334146292 ] + +Currently on mmap cache policy, we always attach writeback_fid +whether mmap type is SHARED or PRIVATE. However, in the use case +of kata-container which combines 9p(Guest OS) with overlayfs(Host OS), +this behavior will trigger overlayfs' copy-up when excute command +inside container. + +Link: http://lkml.kernel.org/r/20190820100325.10313-1-cgxu519@zoho.com.cn +Signed-off-by: Chengguang Xu +Signed-off-by: Dominique Martinet +Signed-off-by: Sasha Levin +--- + fs/9p/vfs_file.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c +index 4cc966a31cb37..fe7f0bd2048e4 100644 +--- a/fs/9p/vfs_file.c ++++ b/fs/9p/vfs_file.c +@@ -513,6 +513,7 @@ v9fs_mmap_file_mmap(struct file *filp, struct vm_area_struct *vma) + v9inode = V9FS_I(inode); + mutex_lock(&v9inode->v_mutex); + if (!v9inode->writeback_fid && ++ (vma->vm_flags & VM_SHARED) && + (vma->vm_flags & VM_WRITE)) { + /* + * clone a fid and add it to writeback_fid +@@ -614,6 +615,8 @@ static void v9fs_mmap_vm_close(struct vm_area_struct *vma) + (vma->vm_end - vma->vm_start - 1), + }; + ++ if (!(vma->vm_flags & VM_SHARED)) ++ return; + + p9_debug(P9_DEBUG_VFS, "9p VMA close, %p, flushing", vma); + +-- +2.20.1 + diff --git a/queue-5.3/9p-transport-error-uninitialized.patch b/queue-5.3/9p-transport-error-uninitialized.patch new file mode 100644 index 00000000000..3bfe8fff4e4 --- /dev/null +++ b/queue-5.3/9p-transport-error-uninitialized.patch @@ -0,0 +1,91 @@ +From c851e10a05a423cbe934aa3b4e5412d611df1ec9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 13 Jun 2019 15:08:54 +0800 +Subject: 9p: Transport error uninitialized + +From: Lu Shuaibing + +[ Upstream commit 0ce772fe79b68f83df40f07f28207b292785c677 ] + +The p9_tag_alloc() does not initialize the transport error t_err field. +The struct p9_req_t *req is allocated and stored in a struct p9_client +variable. The field t_err is never initialized before p9_conn_cancel() +checks its value. + +KUMSAN(KernelUninitializedMemorySantizer, a new error detection tool) +reports this bug. + +================================================================== +BUG: KUMSAN: use of uninitialized memory in p9_conn_cancel+0x2d9/0x3b0 +Read of size 4 at addr ffff88805f9b600c by task kworker/1:2/1216 + +CPU: 1 PID: 1216 Comm: kworker/1:2 Not tainted 5.2.0-rc4+ #28 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 +Workqueue: events p9_write_work +Call Trace: + dump_stack+0x75/0xae + __kumsan_report+0x17c/0x3e6 + kumsan_report+0xe/0x20 + p9_conn_cancel+0x2d9/0x3b0 + p9_write_work+0x183/0x4a0 + process_one_work+0x4d1/0x8c0 + worker_thread+0x6e/0x780 + kthread+0x1ca/0x1f0 + ret_from_fork+0x35/0x40 + +Allocated by task 1979: + save_stack+0x19/0x80 + __kumsan_kmalloc.constprop.3+0xbc/0x120 + kmem_cache_alloc+0xa7/0x170 + p9_client_prepare_req.part.9+0x3b/0x380 + p9_client_rpc+0x15e/0x880 + p9_client_create+0x3d0/0xac0 + v9fs_session_init+0x192/0xc80 + v9fs_mount+0x67/0x470 + legacy_get_tree+0x70/0xd0 + vfs_get_tree+0x4a/0x1c0 + do_mount+0xba9/0xf90 + ksys_mount+0xa8/0x120 + __x64_sys_mount+0x62/0x70 + do_syscall_64+0x6d/0x1e0 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + +Freed by task 0: +(stack is not available) + +The buggy address belongs to the object at ffff88805f9b6008 + which belongs to the cache p9_req_t of size 144 +The buggy address is located 4 bytes inside of + 144-byte region [ffff88805f9b6008, ffff88805f9b6098) +The buggy address belongs to the page: +page:ffffea00017e6d80 refcount:1 mapcount:0 mapping:ffff888068b63740 index:0xffff88805f9b7d90 compound_mapcount: 0 +flags: 0x100000000010200(slab|head) +raw: 0100000000010200 ffff888068b66450 ffff888068b66450 ffff888068b63740 +raw: ffff88805f9b7d90 0000000000100001 00000001ffffffff 0000000000000000 +page dumped because: kumsan: bad access detected +================================================================== + +Link: http://lkml.kernel.org/r/20190613070854.10434-1-shuaibinglu@126.com +Signed-off-by: Lu Shuaibing +[dominique.martinet@cea.fr: grouped the added init with the others] +Signed-off-by: Dominique Martinet +Signed-off-by: Sasha Levin +--- + net/9p/client.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/net/9p/client.c b/net/9p/client.c +index 9622f3e469f67..1d48afc7033ca 100644 +--- a/net/9p/client.c ++++ b/net/9p/client.c +@@ -281,6 +281,7 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size) + + p9pdu_reset(&req->tc); + p9pdu_reset(&req->rc); ++ req->t_err = 0; + req->status = REQ_STATUS_ALLOC; + init_waitqueue_head(&req->wq); + INIT_LIST_HEAD(&req->req_list); +-- +2.20.1 + diff --git a/queue-5.3/blk-mq-move-lockdep_assert_held-into-elevator_exit.patch b/queue-5.3/blk-mq-move-lockdep_assert_held-into-elevator_exit.patch new file mode 100644 index 00000000000..c49249c1a41 --- /dev/null +++ b/queue-5.3/blk-mq-move-lockdep_assert_held-into-elevator_exit.patch @@ -0,0 +1,67 @@ +From 3bbcac811490bdb39f78995d31f3a0f17596bb78 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 26 Sep 2019 06:23:54 +0800 +Subject: blk-mq: move lockdep_assert_held() into elevator_exit + +From: Ming Lei + +[ Upstream commit 284b94be1925dbe035ce5218d8b5c197321262c7 ] + +Commit c48dac137a62 ("block: don't hold q->sysfs_lock in elevator_init_mq") +removes q->sysfs_lock from elevator_init_mq(), but forgot to deal with +lockdep_assert_held() called in blk_mq_sched_free_requests() which is +run in failure path of elevator_init_mq(). + +blk_mq_sched_free_requests() is called in the following 3 functions: + + elevator_init_mq() + elevator_exit() + blk_cleanup_queue() + +In blk_cleanup_queue(), blk_mq_sched_free_requests() is followed exactly +by 'mutex_lock(&q->sysfs_lock)'. + +So moving the lockdep_assert_held() from blk_mq_sched_free_requests() +into elevator_exit() for fixing the report by syzbot. + +Reported-by: syzbot+da3b7677bb913dc1b737@syzkaller.appspotmail.com +Fixed: c48dac137a62 ("block: don't hold q->sysfs_lock in elevator_init_mq") +Reviewed-by: Bart Van Assche +Reviewed-by: Damien Le Moal +Signed-off-by: Ming Lei +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + block/blk-mq-sched.c | 2 -- + block/blk.h | 2 ++ + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c +index c9d183d6c4999..ca22afd47b3dc 100644 +--- a/block/blk-mq-sched.c ++++ b/block/blk-mq-sched.c +@@ -555,8 +555,6 @@ void blk_mq_sched_free_requests(struct request_queue *q) + struct blk_mq_hw_ctx *hctx; + int i; + +- lockdep_assert_held(&q->sysfs_lock); +- + queue_for_each_hw_ctx(q, hctx, i) { + if (hctx->sched_tags) + blk_mq_free_rqs(q->tag_set, hctx->sched_tags, i); +diff --git a/block/blk.h b/block/blk.h +index d5edfd73d45ea..0685c45e3d96e 100644 +--- a/block/blk.h ++++ b/block/blk.h +@@ -201,6 +201,8 @@ void elv_unregister_queue(struct request_queue *q); + static inline void elevator_exit(struct request_queue *q, + struct elevator_queue *e) + { ++ lockdep_assert_held(&q->sysfs_lock); ++ + blk_mq_sched_free_requests(q); + __elevator_exit(q, e); + } +-- +2.20.1 + diff --git a/queue-5.3/bpf-fix-bpf_event_output-re-entry-issue.patch b/queue-5.3/bpf-fix-bpf_event_output-re-entry-issue.patch new file mode 100644 index 00000000000..6b7b4903d35 --- /dev/null +++ b/queue-5.3/bpf-fix-bpf_event_output-re-entry-issue.patch @@ -0,0 +1,155 @@ +From b3c1195318ae0efd1557341904fc4f9443c2d57c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 25 Sep 2019 16:43:12 -0700 +Subject: bpf: Fix bpf_event_output re-entry issue + +From: Allan Zhang + +[ Upstream commit 768fb61fcc13b2acaca758275d54c09a65e2968b ] + +BPF_PROG_TYPE_SOCK_OPS program can reenter bpf_event_output because it +can be called from atomic and non-atomic contexts since we don't have +bpf_prog_active to prevent it happen. + +This patch enables 3 levels of nesting to support normal, irq and nmi +context. + +We can easily reproduce the issue by running netperf crr mode with 100 +flows and 10 threads from netperf client side. + +Here is the whole stack dump: + +[ 515.228898] WARNING: CPU: 20 PID: 14686 at kernel/trace/bpf_trace.c:549 bpf_event_output+0x1f9/0x220 +[ 515.228903] CPU: 20 PID: 14686 Comm: tcp_crr Tainted: G W 4.15.0-smp-fixpanic #44 +[ 515.228904] Hardware name: Intel TBG,ICH10/Ikaria_QC_1b, BIOS 1.22.0 06/04/2018 +[ 515.228905] RIP: 0010:bpf_event_output+0x1f9/0x220 +[ 515.228906] RSP: 0018:ffff9a57ffc03938 EFLAGS: 00010246 +[ 515.228907] RAX: 0000000000000012 RBX: 0000000000000001 RCX: 0000000000000000 +[ 515.228907] RDX: 0000000000000000 RSI: 0000000000000096 RDI: ffffffff836b0f80 +[ 515.228908] RBP: ffff9a57ffc039c8 R08: 0000000000000004 R09: 0000000000000012 +[ 515.228908] R10: ffff9a57ffc1de40 R11: 0000000000000000 R12: 0000000000000002 +[ 515.228909] R13: ffff9a57e13bae00 R14: 00000000ffffffff R15: ffff9a57ffc1e2c0 +[ 515.228910] FS: 00007f5a3e6ec700(0000) GS:ffff9a57ffc00000(0000) knlGS:0000000000000000 +[ 515.228910] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 515.228911] CR2: 0000537082664fff CR3: 000000061fed6002 CR4: 00000000000226f0 +[ 515.228911] Call Trace: +[ 515.228913] +[ 515.228919] [] bpf_sockopt_event_output+0x3b/0x50 +[ 515.228923] [] ? bpf_ktime_get_ns+0xe/0x10 +[ 515.228927] [] ? __cgroup_bpf_run_filter_sock_ops+0x85/0x100 +[ 515.228930] [] ? tcp_init_transfer+0x125/0x150 +[ 515.228933] [] ? tcp_finish_connect+0x89/0x110 +[ 515.228936] [] ? tcp_rcv_state_process+0x704/0x1010 +[ 515.228939] [] ? sk_filter_trim_cap+0x53/0x2a0 +[ 515.228942] [] ? tcp_v6_inbound_md5_hash+0x6f/0x1d0 +[ 515.228945] [] ? tcp_v6_do_rcv+0x1c0/0x460 +[ 515.228947] [] ? tcp_v6_rcv+0x9f8/0xb30 +[ 515.228951] [] ? ip6_route_input+0x190/0x220 +[ 515.228955] [] ? ip6_protocol_deliver_rcu+0x6d/0x450 +[ 515.228958] [] ? ip6_rcv_finish+0xb6/0x170 +[ 515.228961] [] ? ip6_protocol_deliver_rcu+0x450/0x450 +[ 515.228963] [] ? ipv6_rcv+0x61/0xe0 +[ 515.228966] [] ? ipv6_list_rcv+0x330/0x330 +[ 515.228969] [] ? __netif_receive_skb_one_core+0x5b/0xa0 +[ 515.228972] [] ? __netif_receive_skb+0x21/0x70 +[ 515.228975] [] ? process_backlog+0xb2/0x150 +[ 515.228978] [] ? net_rx_action+0x16f/0x410 +[ 515.228982] [] ? __do_softirq+0xdd/0x305 +[ 515.228986] [] ? irq_exit+0x9c/0xb0 +[ 515.228989] [] ? smp_call_function_single_interrupt+0x65/0x120 +[ 515.228991] [] ? call_function_single_interrupt+0x81/0x90 +[ 515.228992] +[ 515.228996] [] ? io_serial_in+0x20/0x20 +[ 515.229000] [] ? console_unlock+0x230/0x490 +[ 515.229003] [] ? vprintk_emit+0x26a/0x2a0 +[ 515.229006] [] ? vprintk_default+0x1f/0x30 +[ 515.229008] [] ? vprintk_func+0x35/0x70 +[ 515.229011] [] ? printk+0x50/0x66 +[ 515.229013] [] ? bpf_event_output+0xb7/0x220 +[ 515.229016] [] ? bpf_sockopt_event_output+0x3b/0x50 +[ 515.229019] [] ? bpf_ktime_get_ns+0xe/0x10 +[ 515.229023] [] ? release_sock+0x97/0xb0 +[ 515.229026] [] ? tcp_recvmsg+0x31a/0xda0 +[ 515.229029] [] ? __cgroup_bpf_run_filter_sock_ops+0x85/0x100 +[ 515.229032] [] ? tcp_set_state+0x191/0x1b0 +[ 515.229035] [] ? tcp_disconnect+0x2e/0x600 +[ 515.229038] [] ? tcp_close+0x3eb/0x460 +[ 515.229040] [] ? inet_release+0x42/0x70 +[ 515.229043] [] ? inet6_release+0x39/0x50 +[ 515.229046] [] ? __sock_release+0x4d/0xd0 +[ 515.229049] [] ? sock_close+0x15/0x20 +[ 515.229052] [] ? __fput+0xe7/0x1f0 +[ 515.229055] [] ? ____fput+0xe/0x10 +[ 515.229058] [] ? task_work_run+0x82/0xb0 +[ 515.229061] [] ? exit_to_usermode_loop+0x7e/0x11f +[ 515.229064] [] ? do_syscall_64+0x111/0x130 +[ 515.229067] [] ? entry_SYSCALL_64_after_hwframe+0x3d/0xa2 + +Fixes: a5a3a828cd00 ("bpf: add perf event notificaton support for sock_ops") +Signed-off-by: Allan Zhang +Signed-off-by: Daniel Borkmann +Reviewed-by: Stanislav Fomichev +Reviewed-by: Eric Dumazet +Acked-by: John Fastabend +Link: https://lore.kernel.org/bpf/20190925234312.94063-2-allanzhang@google.com +Signed-off-by: Sasha Levin +--- + kernel/trace/bpf_trace.c | 26 +++++++++++++++++++++----- + 1 file changed, 21 insertions(+), 5 deletions(-) + +diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c +index ca1255d145766..3e38a010003c9 100644 +--- a/kernel/trace/bpf_trace.c ++++ b/kernel/trace/bpf_trace.c +@@ -500,14 +500,17 @@ static const struct bpf_func_proto bpf_perf_event_output_proto = { + .arg5_type = ARG_CONST_SIZE_OR_ZERO, + }; + +-static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs); +-static DEFINE_PER_CPU(struct perf_sample_data, bpf_misc_sd); ++static DEFINE_PER_CPU(int, bpf_event_output_nest_level); ++struct bpf_nested_pt_regs { ++ struct pt_regs regs[3]; ++}; ++static DEFINE_PER_CPU(struct bpf_nested_pt_regs, bpf_pt_regs); ++static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_misc_sds); + + u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, + void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy) + { +- struct perf_sample_data *sd = this_cpu_ptr(&bpf_misc_sd); +- struct pt_regs *regs = this_cpu_ptr(&bpf_pt_regs); ++ int nest_level = this_cpu_inc_return(bpf_event_output_nest_level); + struct perf_raw_frag frag = { + .copy = ctx_copy, + .size = ctx_size, +@@ -522,12 +525,25 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, + .data = meta, + }, + }; ++ struct perf_sample_data *sd; ++ struct pt_regs *regs; ++ u64 ret; ++ ++ if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(bpf_misc_sds.sds))) { ++ ret = -EBUSY; ++ goto out; ++ } ++ sd = this_cpu_ptr(&bpf_misc_sds.sds[nest_level - 1]); ++ regs = this_cpu_ptr(&bpf_pt_regs.regs[nest_level - 1]); + + perf_fetch_caller_regs(regs); + perf_sample_data_init(sd, 0, 0); + sd->raw = &raw; + +- return __bpf_perf_event_output(regs, map, flags, sd); ++ ret = __bpf_perf_event_output(regs, map, flags, sd); ++out: ++ this_cpu_dec(bpf_event_output_nest_level); ++ return ret; + } + + BPF_CALL_0(bpf_get_current_task) +-- +2.20.1 + diff --git a/queue-5.3/btrfs-fix-selftests-failure-due-to-uninitialized-i_m.patch b/queue-5.3/btrfs-fix-selftests-failure-due-to-uninitialized-i_m.patch new file mode 100644 index 00000000000..ba1728645e3 --- /dev/null +++ b/queue-5.3/btrfs-fix-selftests-failure-due-to-uninitialized-i_m.patch @@ -0,0 +1,88 @@ +From fa013b05aa4f0ef2ccad219534972059a839077d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 18 Sep 2019 13:08:52 +0100 +Subject: Btrfs: fix selftests failure due to uninitialized i_mode in test + inodes + +From: Filipe Manana + +[ Upstream commit 9f7fec0ba89108b9385f1b9fb167861224912a4a ] + +Some of the self tests create a test inode, setup some extents and then do +calls to btrfs_get_extent() to test that the corresponding extent maps +exist and are correct. However btrfs_get_extent(), since the 5.2 merge +window, now errors out when it finds a regular or prealloc extent for an +inode that does not correspond to a regular file (its ->i_mode is not +S_IFREG). This causes the self tests to fail sometimes, specially when +KASAN, slub_debug and page poisoning are enabled: + + $ modprobe btrfs + modprobe: ERROR: could not insert 'btrfs': Invalid argument + + $ dmesg + [ 9414.691648] Btrfs loaded, crc32c=crc32c-intel, debug=on, assert=on, integrity-checker=on, ref-verify=on + [ 9414.692655] BTRFS: selftest: sectorsize: 4096 nodesize: 4096 + [ 9414.692658] BTRFS: selftest: running btrfs free space cache tests + [ 9414.692918] BTRFS: selftest: running extent only tests + [ 9414.693061] BTRFS: selftest: running bitmap only tests + [ 9414.693366] BTRFS: selftest: running bitmap and extent tests + [ 9414.696455] BTRFS: selftest: running space stealing from bitmap to extent tests + [ 9414.697131] BTRFS: selftest: running extent buffer operation tests + [ 9414.697133] BTRFS: selftest: running btrfs_split_item tests + [ 9414.697564] BTRFS: selftest: running extent I/O tests + [ 9414.697583] BTRFS: selftest: running find delalloc tests + [ 9415.081125] BTRFS: selftest: running find_first_clear_extent_bit test + [ 9415.081278] BTRFS: selftest: running extent buffer bitmap tests + [ 9415.124192] BTRFS: selftest: running inode tests + [ 9415.124195] BTRFS: selftest: running btrfs_get_extent tests + [ 9415.127909] BTRFS: selftest: running hole first btrfs_get_extent test + [ 9415.128343] BTRFS critical (device (efault)): regular/prealloc extent found for non-regular inode 256 + [ 9415.131428] BTRFS: selftest: fs/btrfs/tests/inode-tests.c:904 expected a real extent, got 0 + +This happens because the test inodes are created without ever initializing +the i_mode field of the inode, and neither VFS's new_inode() nor the btrfs +callback btrfs_alloc_inode() initialize the i_mode. Initialization of the +i_mode is done through the various callbacks used by the VFS to create +new inodes (regular files, directories, symlinks, tmpfiles, etc), which +all call btrfs_new_inode() which in turn calls inode_init_owner(), which +sets the inode's i_mode. Since the tests only uses new_inode() to create +the test inodes, the i_mode was never initialized. + +This always happens on a VM I used with kasan, slub_debug and many other +debug facilities enabled. It also happened to someone who reported this +on bugzilla (on a 5.3-rc). + +Fix this by setting i_mode to S_IFREG at btrfs_new_test_inode(). + +Fixes: 6bf9e4bd6a2778 ("btrfs: inode: Verify inode mode to avoid NULL pointer dereference") +Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=204397 +Signed-off-by: Filipe Manana +Reviewed-by: Qu Wenruo +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +--- + fs/btrfs/tests/btrfs-tests.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c +index 1e3ba49493995..814a918998ece 100644 +--- a/fs/btrfs/tests/btrfs-tests.c ++++ b/fs/btrfs/tests/btrfs-tests.c +@@ -51,7 +51,13 @@ static struct file_system_type test_type = { + + struct inode *btrfs_new_test_inode(void) + { +- return new_inode(test_mnt->mnt_sb); ++ struct inode *inode; ++ ++ inode = new_inode(test_mnt->mnt_sb); ++ if (inode) ++ inode_init_owner(inode, NULL, S_IFREG); ++ ++ return inode; + } + + static int btrfs_init_test_fs(void) +-- +2.20.1 + diff --git a/queue-5.3/ceph-fetch-cap_gen-under-spinlock-in-ceph_add_cap.patch b/queue-5.3/ceph-fetch-cap_gen-under-spinlock-in-ceph_add_cap.patch new file mode 100644 index 00000000000..bbb99c69ac7 --- /dev/null +++ b/queue-5.3/ceph-fetch-cap_gen-under-spinlock-in-ceph_add_cap.patch @@ -0,0 +1,64 @@ +From cd76dfac4f3d48f942246b34dcfc6060e124dcae Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 22 Jul 2019 13:12:01 -0400 +Subject: ceph: fetch cap_gen under spinlock in ceph_add_cap + +From: Jeff Layton + +[ Upstream commit 606d102327a45a49d293557527802ee7fbfd7af1 ] + +It's protected by the s_gen_ttl_lock, so we should fetch under it +and ensure that we're using the same generation in both places. + +Signed-off-by: Jeff Layton +Reviewed-by: "Yan, Zheng" +Signed-off-by: Ilya Dryomov +Signed-off-by: Sasha Levin +--- + fs/ceph/caps.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c +index ce0f5658720ab..8fd5301128106 100644 +--- a/fs/ceph/caps.c ++++ b/fs/ceph/caps.c +@@ -645,6 +645,7 @@ void ceph_add_cap(struct inode *inode, + struct ceph_cap *cap; + int mds = session->s_mds; + int actual_wanted; ++ u32 gen; + + dout("add_cap %p mds%d cap %llx %s seq %d\n", inode, + session->s_mds, cap_id, ceph_cap_string(issued), seq); +@@ -656,6 +657,10 @@ void ceph_add_cap(struct inode *inode, + if (fmode >= 0) + wanted |= ceph_caps_for_mode(fmode); + ++ spin_lock(&session->s_gen_ttl_lock); ++ gen = session->s_cap_gen; ++ spin_unlock(&session->s_gen_ttl_lock); ++ + cap = __get_cap_for_mds(ci, mds); + if (!cap) { + cap = *new_cap; +@@ -681,7 +686,7 @@ void ceph_add_cap(struct inode *inode, + list_move_tail(&cap->session_caps, &session->s_caps); + spin_unlock(&session->s_cap_lock); + +- if (cap->cap_gen < session->s_cap_gen) ++ if (cap->cap_gen < gen) + cap->issued = cap->implemented = CEPH_CAP_PIN; + + /* +@@ -775,7 +780,7 @@ void ceph_add_cap(struct inode *inode, + cap->seq = seq; + cap->issue_seq = seq; + cap->mseq = mseq; +- cap->cap_gen = session->s_cap_gen; ++ cap->cap_gen = gen; + + if (fmode >= 0) + __ceph_get_fmode(ci, fmode); +-- +2.20.1 + diff --git a/queue-5.3/ceph-fix-directories-inode-i_blkbits-initialization.patch b/queue-5.3/ceph-fix-directories-inode-i_blkbits-initialization.patch new file mode 100644 index 00000000000..048bac7ed40 --- /dev/null +++ b/queue-5.3/ceph-fix-directories-inode-i_blkbits-initialization.patch @@ -0,0 +1,50 @@ +From ddebe1f1528f7da8bdd2f948713898897fe015fc Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 23 Jul 2019 16:50:20 +0100 +Subject: ceph: fix directories inode i_blkbits initialization + +From: Luis Henriques + +[ Upstream commit 750670341a24cb714e624e0fd7da30900ad93752 ] + +When filling an inode with info from the MDS, i_blkbits is being +initialized using fl_stripe_unit, which contains the stripe unit in +bytes. Unfortunately, this doesn't make sense for directories as they +have fl_stripe_unit set to '0'. This means that i_blkbits will be set +to 0xff, causing an UBSAN undefined behaviour in i_blocksize(): + + UBSAN: Undefined behaviour in ./include/linux/fs.h:731:12 + shift exponent 255 is too large for 32-bit type 'int' + +Fix this by initializing i_blkbits to CEPH_BLOCK_SHIFT if fl_stripe_unit +is zero. + +Signed-off-by: Luis Henriques +Reviewed-by: Jeff Layton +Signed-off-by: Ilya Dryomov +Signed-off-by: Sasha Levin +--- + fs/ceph/inode.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c +index 18500edefc56f..3b537e7038c7a 100644 +--- a/fs/ceph/inode.c ++++ b/fs/ceph/inode.c +@@ -801,7 +801,12 @@ static int fill_inode(struct inode *inode, struct page *locked_page, + + /* update inode */ + inode->i_rdev = le32_to_cpu(info->rdev); +- inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; ++ /* directories have fl_stripe_unit set to zero */ ++ if (le32_to_cpu(info->layout.fl_stripe_unit)) ++ inode->i_blkbits = ++ fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; ++ else ++ inode->i_blkbits = CEPH_BLOCK_SHIFT; + + __ceph_update_quota(ci, iinfo->max_bytes, iinfo->max_files); + +-- +2.20.1 + diff --git a/queue-5.3/ceph-reconnect-connection-if-session-hang-in-opening.patch b/queue-5.3/ceph-reconnect-connection-if-session-hang-in-opening.patch new file mode 100644 index 00000000000..3d77f6b3ab6 --- /dev/null +++ b/queue-5.3/ceph-reconnect-connection-if-session-hang-in-opening.patch @@ -0,0 +1,46 @@ +From 6bed5fbe653eda8aef839e32ad7fb92ece775ce8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 28 Aug 2019 21:22:45 +0800 +Subject: ceph: reconnect connection if session hang in opening state + +From: Erqi Chen + +[ Upstream commit 71a228bc8d65900179e37ac309e678f8c523f133 ] + +If client mds session is evicted in CEPH_MDS_SESSION_OPENING state, +mds won't send session msg to client, and delayed_work skip +CEPH_MDS_SESSION_OPENING state session, the session hang forever. + +Allow ceph_con_keepalive to reconnect a session in OPENING to avoid +session hang. Also, ensure that we skip sessions in RESTARTING and +REJECTED states since those states can't be resurrected by issuing +a keepalive. + +Link: https://tracker.ceph.com/issues/41551 +Signed-off-by: Erqi Chen chenerqi@gmail.com +Reviewed-by: "Yan, Zheng" +Signed-off-by: Jeff Layton +Signed-off-by: Ilya Dryomov +Signed-off-by: Sasha Levin +--- + fs/ceph/mds_client.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c +index 920e9f048bd8f..b11af7d8e8e93 100644 +--- a/fs/ceph/mds_client.c ++++ b/fs/ceph/mds_client.c +@@ -4044,7 +4044,9 @@ static void delayed_work(struct work_struct *work) + pr_info("mds%d hung\n", s->s_mds); + } + } +- if (s->s_state < CEPH_MDS_SESSION_OPEN) { ++ if (s->s_state == CEPH_MDS_SESSION_NEW || ++ s->s_state == CEPH_MDS_SESSION_RESTARTING || ++ s->s_state == CEPH_MDS_SESSION_REJECTED) { + /* this mds is failed or recovering, just wait */ + ceph_put_mds_session(s); + continue; +-- +2.20.1 + diff --git a/queue-5.3/drivers-thermal-qcom-tsens-fix-memory-leak-from-qfpr.patch b/queue-5.3/drivers-thermal-qcom-tsens-fix-memory-leak-from-qfpr.patch new file mode 100644 index 00000000000..a6d49a0e09c --- /dev/null +++ b/queue-5.3/drivers-thermal-qcom-tsens-fix-memory-leak-from-qfpr.patch @@ -0,0 +1,128 @@ +From 8e587cd9d5f169d32a32c4e8aa247e0117ebf662 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 23 Aug 2019 10:38:35 +0100 +Subject: drivers: thermal: qcom: tsens: Fix memory leak from qfprom read + +From: Srinivas Kandagatla + +[ Upstream commit 6b8249abb093551ef173d13a25ed0044d5dd33e0 ] + +memory returned as part of nvmem_read via qfprom_read should be +freed by the consumer once done. +Existing code is not doing it so fix it. + +Below memory leak detected by kmemleak + [] kmemleak_alloc+0x50/0x84 + [] __kmalloc+0xe8/0x168 + [] nvmem_cell_read+0x30/0x80 + [] qfprom_read+0x4c/0x7c + [] calibrate_v1+0x34/0x204 + [] tsens_probe+0x164/0x258 + [] platform_drv_probe+0x80/0xa0 + [] really_probe+0x208/0x248 + [] driver_probe_device+0x98/0xc0 + [] __device_attach_driver+0x9c/0xac + [] bus_for_each_drv+0x60/0x8c + [] __device_attach+0x8c/0x100 + [] device_initial_probe+0x20/0x28 + [] bus_probe_device+0x34/0x7c + [] deferred_probe_work_func+0x6c/0x98 + [] process_one_work+0x160/0x2f8 + +Signed-off-by: Srinivas Kandagatla +Acked-by: Amit Kucheria +Signed-off-by: Zhang Rui +Signed-off-by: Sasha Levin +--- + drivers/thermal/qcom/tsens-8960.c | 2 ++ + drivers/thermal/qcom/tsens-v0_1.c | 12 ++++++++++-- + drivers/thermal/qcom/tsens-v1.c | 1 + + drivers/thermal/qcom/tsens.h | 1 + + 4 files changed, 14 insertions(+), 2 deletions(-) + +diff --git a/drivers/thermal/qcom/tsens-8960.c b/drivers/thermal/qcom/tsens-8960.c +index 8d9b721dadb65..e46a4e3f25c42 100644 +--- a/drivers/thermal/qcom/tsens-8960.c ++++ b/drivers/thermal/qcom/tsens-8960.c +@@ -229,6 +229,8 @@ static int calibrate_8960(struct tsens_priv *priv) + for (i = 0; i < num_read; i++, s++) + s->offset = data[i]; + ++ kfree(data); ++ + return 0; + } + +diff --git a/drivers/thermal/qcom/tsens-v0_1.c b/drivers/thermal/qcom/tsens-v0_1.c +index 6f26fadf4c279..055647bcee67d 100644 +--- a/drivers/thermal/qcom/tsens-v0_1.c ++++ b/drivers/thermal/qcom/tsens-v0_1.c +@@ -145,8 +145,10 @@ static int calibrate_8916(struct tsens_priv *priv) + return PTR_ERR(qfprom_cdata); + + qfprom_csel = (u32 *)qfprom_read(priv->dev, "calib_sel"); +- if (IS_ERR(qfprom_csel)) ++ if (IS_ERR(qfprom_csel)) { ++ kfree(qfprom_cdata); + return PTR_ERR(qfprom_csel); ++ } + + mode = (qfprom_csel[0] & MSM8916_CAL_SEL_MASK) >> MSM8916_CAL_SEL_SHIFT; + dev_dbg(priv->dev, "calibration mode is %d\n", mode); +@@ -181,6 +183,8 @@ static int calibrate_8916(struct tsens_priv *priv) + } + + compute_intercept_slope(priv, p1, p2, mode); ++ kfree(qfprom_cdata); ++ kfree(qfprom_csel); + + return 0; + } +@@ -198,8 +202,10 @@ static int calibrate_8974(struct tsens_priv *priv) + return PTR_ERR(calib); + + bkp = (u32 *)qfprom_read(priv->dev, "calib_backup"); +- if (IS_ERR(bkp)) ++ if (IS_ERR(bkp)) { ++ kfree(calib); + return PTR_ERR(bkp); ++ } + + calib_redun_sel = bkp[1] & BKP_REDUN_SEL; + calib_redun_sel >>= BKP_REDUN_SHIFT; +@@ -313,6 +319,8 @@ static int calibrate_8974(struct tsens_priv *priv) + } + + compute_intercept_slope(priv, p1, p2, mode); ++ kfree(calib); ++ kfree(bkp); + + return 0; + } +diff --git a/drivers/thermal/qcom/tsens-v1.c b/drivers/thermal/qcom/tsens-v1.c +index 10b595d4f6199..870f502f2cb6c 100644 +--- a/drivers/thermal/qcom/tsens-v1.c ++++ b/drivers/thermal/qcom/tsens-v1.c +@@ -138,6 +138,7 @@ static int calibrate_v1(struct tsens_priv *priv) + } + + compute_intercept_slope(priv, p1, p2, mode); ++ kfree(qfprom_cdata); + + return 0; + } +diff --git a/drivers/thermal/qcom/tsens.h b/drivers/thermal/qcom/tsens.h +index 2fd94997245bf..b89083b61c383 100644 +--- a/drivers/thermal/qcom/tsens.h ++++ b/drivers/thermal/qcom/tsens.h +@@ -17,6 +17,7 @@ + + #include + #include ++#include + + struct tsens_priv; + +-- +2.20.1 + diff --git a/queue-5.3/drm-amdgpu-check-for-valid-number-of-registers-to-re.patch b/queue-5.3/drm-amdgpu-check-for-valid-number-of-registers-to-re.patch new file mode 100644 index 00000000000..e2a5644f328 --- /dev/null +++ b/queue-5.3/drm-amdgpu-check-for-valid-number-of-registers-to-re.patch @@ -0,0 +1,39 @@ +From cd994470d686155b4b4ade8cb1713e3516e39d0b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 31 Aug 2019 21:25:36 +0200 +Subject: drm/amdgpu: Check for valid number of registers to read + +From: Trek + +[ Upstream commit 73d8e6c7b841d9bf298c8928f228fb433676635c ] + +Do not try to allocate any amount of memory requested by the user. +Instead limit it to 128 registers. Actually the longest series of +consecutive allowed registers are 48, mmGB_TILE_MODE0-31 and +mmGB_MACROTILE_MODE0-15 (0x2644-0x2673). + +Bug: https://bugs.freedesktop.org/show_bug.cgi?id=111273 +Signed-off-by: Trek +Signed-off-by: Alex Deucher +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +index 0cf7e8606fd3d..00beba533582c 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +@@ -662,6 +662,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file + if (sh_num == AMDGPU_INFO_MMR_SH_INDEX_MASK) + sh_num = 0xffffffff; + ++ if (info->read_mmr_reg.count > 128) ++ return -EINVAL; ++ + regs = kmalloc_array(info->read_mmr_reg.count, sizeof(*regs), GFP_KERNEL); + if (!regs) + return -ENOMEM; +-- +2.20.1 + diff --git a/queue-5.3/drm-amdgpu-fix-kfd-related-kernel-oops-on-hawaii.patch b/queue-5.3/drm-amdgpu-fix-kfd-related-kernel-oops-on-hawaii.patch new file mode 100644 index 00000000000..6d1b3ba687f --- /dev/null +++ b/queue-5.3/drm-amdgpu-fix-kfd-related-kernel-oops-on-hawaii.patch @@ -0,0 +1,41 @@ +From ca67e155aedff33a0ff514f3145e743f76fad72d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 5 Sep 2019 19:22:02 -0400 +Subject: drm/amdgpu: Fix KFD-related kernel oops on Hawaii +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Felix Kuehling + +[ Upstream commit dcafbd50f2e4d5cc964aae409fb5691b743fba23 ] + +Hawaii needs to flush caches explicitly, submitting an IB in a user +VMID from kernel mode. There is no s_fence in this case. + +Fixes: eb3961a57424 ("drm/amdgpu: remove fence context from the job") +Signed-off-by: Felix Kuehling +Reviewed-by: Christian König +Signed-off-by: Alex Deucher +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +index 7850084a05e3a..60655834d6498 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +@@ -143,7 +143,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, + /* ring tests don't use a job */ + if (job) { + vm = job->vm; +- fence_ctx = job->base.s_fence->scheduled.context; ++ fence_ctx = job->base.s_fence ? ++ job->base.s_fence->scheduled.context : 0; + } else { + vm = NULL; + fence_ctx = 0; +-- +2.20.1 + diff --git a/queue-5.3/drm-radeon-bail-earlier-when-radeon.cik_-si_support-.patch b/queue-5.3/drm-radeon-bail-earlier-when-radeon.cik_-si_support-.patch new file mode 100644 index 00000000000..a01f4317350 --- /dev/null +++ b/queue-5.3/drm-radeon-bail-earlier-when-radeon.cik_-si_support-.patch @@ -0,0 +1,119 @@ +From a6d3863806ee370fe4cebab2e5849866f95efd54 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 7 Sep 2019 22:32:38 +0200 +Subject: drm/radeon: Bail earlier when radeon.cik_/si_support=0 is passed +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Hans de Goede + +[ Upstream commit 9dbc88d013b79c62bd845cb9e7c0256e660967c5 ] + +Bail from the pci_driver probe function instead of from the drm_driver +load function. + +This avoid /dev/dri/card0 temporarily getting registered and then +unregistered again, sending unwanted add / remove udev events to +userspace. + +Specifically this avoids triggering the (userspace) bug fixed by this +plymouth merge-request: +https://gitlab.freedesktop.org/plymouth/plymouth/merge_requests/59 + +Note that despite that being an userspace bug, not sending unnecessary +udev events is a good idea in general. + +BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1490490 +Reviewed-by: Michel Dänzer +Signed-off-by: Hans de Goede +Signed-off-by: Alex Deucher +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/radeon/radeon_drv.c | 31 +++++++++++++++++++++++++++++ + drivers/gpu/drm/radeon/radeon_kms.c | 25 ----------------------- + 2 files changed, 31 insertions(+), 25 deletions(-) + +diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c +index 15d7bebe17294..5cc0fbb04ab14 100644 +--- a/drivers/gpu/drm/radeon/radeon_drv.c ++++ b/drivers/gpu/drm/radeon/radeon_drv.c +@@ -325,8 +325,39 @@ bool radeon_device_is_virtual(void); + static int radeon_pci_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) + { ++ unsigned long flags = 0; + int ret; + ++ if (!ent) ++ return -ENODEV; /* Avoid NULL-ptr deref in drm_get_pci_dev */ ++ ++ flags = ent->driver_data; ++ ++ if (!radeon_si_support) { ++ switch (flags & RADEON_FAMILY_MASK) { ++ case CHIP_TAHITI: ++ case CHIP_PITCAIRN: ++ case CHIP_VERDE: ++ case CHIP_OLAND: ++ case CHIP_HAINAN: ++ dev_info(&pdev->dev, ++ "SI support disabled by module param\n"); ++ return -ENODEV; ++ } ++ } ++ if (!radeon_cik_support) { ++ switch (flags & RADEON_FAMILY_MASK) { ++ case CHIP_KAVERI: ++ case CHIP_BONAIRE: ++ case CHIP_HAWAII: ++ case CHIP_KABINI: ++ case CHIP_MULLINS: ++ dev_info(&pdev->dev, ++ "CIK support disabled by module param\n"); ++ return -ENODEV; ++ } ++ } ++ + if (vga_switcheroo_client_probe_defer(pdev)) + return -EPROBE_DEFER; + +diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c +index 07f7ace42c4ba..e85c554eeaa94 100644 +--- a/drivers/gpu/drm/radeon/radeon_kms.c ++++ b/drivers/gpu/drm/radeon/radeon_kms.c +@@ -100,31 +100,6 @@ int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags) + struct radeon_device *rdev; + int r, acpi_status; + +- if (!radeon_si_support) { +- switch (flags & RADEON_FAMILY_MASK) { +- case CHIP_TAHITI: +- case CHIP_PITCAIRN: +- case CHIP_VERDE: +- case CHIP_OLAND: +- case CHIP_HAINAN: +- dev_info(dev->dev, +- "SI support disabled by module param\n"); +- return -ENODEV; +- } +- } +- if (!radeon_cik_support) { +- switch (flags & RADEON_FAMILY_MASK) { +- case CHIP_KAVERI: +- case CHIP_BONAIRE: +- case CHIP_HAWAII: +- case CHIP_KABINI: +- case CHIP_MULLINS: +- dev_info(dev->dev, +- "CIK support disabled by module param\n"); +- return -ENODEV; +- } +- } +- + rdev = kzalloc(sizeof(struct radeon_device), GFP_KERNEL); + if (rdev == NULL) { + return -ENOMEM; +-- +2.20.1 + diff --git a/queue-5.3/fs-nfs-fix-possible-null-pointer-dereferences-in-enc.patch b/queue-5.3/fs-nfs-fix-possible-null-pointer-dereferences-in-enc.patch new file mode 100644 index 00000000000..885b78fb295 --- /dev/null +++ b/queue-5.3/fs-nfs-fix-possible-null-pointer-dereferences-in-enc.patch @@ -0,0 +1,46 @@ +From 952f7e6cb2f44399f827891585c50242517f0493 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 26 Jul 2019 15:48:53 +0800 +Subject: fs: nfs: Fix possible null-pointer dereferences in encode_attrs() + +From: Jia-Ju Bai + +[ Upstream commit e2751463eaa6f9fec8fea80abbdc62dbc487b3c5 ] + +In encode_attrs(), there is an if statement on line 1145 to check +whether label is NULL: + if (label && (attrmask[2] & FATTR4_WORD2_SECURITY_LABEL)) + +When label is NULL, it is used on lines 1178-1181: + *p++ = cpu_to_be32(label->lfs); + *p++ = cpu_to_be32(label->pi); + *p++ = cpu_to_be32(label->len); + p = xdr_encode_opaque_fixed(p, label->label, label->len); + +To fix these bugs, label is checked before being used. + +These bugs are found by a static analysis tool STCheck written by us. + +Signed-off-by: Jia-Ju Bai +Signed-off-by: Anna Schumaker +Signed-off-by: Sasha Levin +--- + fs/nfs/nfs4xdr.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c +index 46a8d636d151e..ab07db0f07cde 100644 +--- a/fs/nfs/nfs4xdr.c ++++ b/fs/nfs/nfs4xdr.c +@@ -1174,7 +1174,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, + } else + *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME); + } +- if (bmval[2] & FATTR4_WORD2_SECURITY_LABEL) { ++ if (label && (bmval[2] & FATTR4_WORD2_SECURITY_LABEL)) { + *p++ = cpu_to_be32(label->lfs); + *p++ = cpu_to_be32(label->pi); + *p++ = cpu_to_be32(label->len); +-- +2.20.1 + diff --git a/queue-5.3/fuse-fix-memleak-in-cuse_channel_open.patch b/queue-5.3/fuse-fix-memleak-in-cuse_channel_open.patch new file mode 100644 index 00000000000..c2b6689417a --- /dev/null +++ b/queue-5.3/fuse-fix-memleak-in-cuse_channel_open.patch @@ -0,0 +1,39 @@ +From 750fbf9f33268be4bda6ca02e075f2224fd8eea8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 14 Aug 2019 15:59:09 +0800 +Subject: fuse: fix memleak in cuse_channel_open + +From: zhengbin + +[ Upstream commit 9ad09b1976c562061636ff1e01bfc3a57aebe56b ] + +If cuse_send_init fails, need to fuse_conn_put cc->fc. + +cuse_channel_open->fuse_conn_init->refcount_set(&fc->count, 1) + ->fuse_dev_alloc->fuse_conn_get + ->fuse_dev_free->fuse_conn_put + +Fixes: cc080e9e9be1 ("fuse: introduce per-instance fuse_dev structure") +Reported-by: Hulk Robot +Signed-off-by: zhengbin +Signed-off-by: Miklos Szeredi +Signed-off-by: Sasha Levin +--- + fs/fuse/cuse.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c +index bab7a0db81dd4..f3b7208846506 100644 +--- a/fs/fuse/cuse.c ++++ b/fs/fuse/cuse.c +@@ -519,6 +519,7 @@ static int cuse_channel_open(struct inode *inode, struct file *file) + rc = cuse_send_init(cc); + if (rc) { + fuse_dev_free(fud); ++ fuse_conn_put(&cc->fc); + return rc; + } + file->private_data = fud; +-- +2.20.1 + diff --git a/queue-5.3/fuse-fix-request-limit.patch b/queue-5.3/fuse-fix-request-limit.patch new file mode 100644 index 00000000000..6a4bf1cf6e8 --- /dev/null +++ b/queue-5.3/fuse-fix-request-limit.patch @@ -0,0 +1,48 @@ +From 0005f62280de65aec947e45e2ff81546e65e9a80 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 12 Sep 2019 14:28:13 +0200 +Subject: fuse: fix request limit + +From: Miklos Szeredi + +[ Upstream commit f22f812d5ce75a18b56073a7a63862e6ea764070 ] + +The size of struct fuse_req was reduced from 392B to 144B on a non-debug +config, thus the sanitize_global_limit() helper was setting a larger +default limit. This doesn't really reflect reduction in the memory used by +requests, since the fields removed from fuse_req were added to fuse_args +derived structs; e.g. sizeof(struct fuse_writepages_args) is 248B, thus +resulting in slightly more memory being used for writepage requests +overalll (due to using 256B slabs). + +Make the calculatation ignore the size of fuse_req and use the old 392B +value. + +Signed-off-by: Miklos Szeredi +Signed-off-by: Sasha Levin +--- + fs/fuse/inode.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c +index 987877860c019..f3104db3de83a 100644 +--- a/fs/fuse/inode.c ++++ b/fs/fuse/inode.c +@@ -823,9 +823,12 @@ static const struct super_operations fuse_super_operations = { + + static void sanitize_global_limit(unsigned *limit) + { ++ /* ++ * The default maximum number of async requests is calculated to consume ++ * 1/2^13 of the total memory, assuming 392 bytes per request. ++ */ + if (*limit == 0) +- *limit = ((totalram_pages() << PAGE_SHIFT) >> 13) / +- sizeof(struct fuse_req); ++ *limit = ((totalram_pages() << PAGE_SHIFT) >> 13) / 392; + + if (*limit >= 1 << 16) + *limit = (1 << 16) - 1; +-- +2.20.1 + diff --git a/queue-5.3/i2c-qcom-geni-disable-dma-processing-on-the-lenovo-y.patch b/queue-5.3/i2c-qcom-geni-disable-dma-processing-on-the-lenovo-y.patch new file mode 100644 index 00000000000..15f6e10aac6 --- /dev/null +++ b/queue-5.3/i2c-qcom-geni-disable-dma-processing-on-the-lenovo-y.patch @@ -0,0 +1,65 @@ +From 79082a42fb0562e4cfe33b955a9d85ba8e52ea28 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 5 Sep 2019 20:24:12 +0100 +Subject: i2c: qcom-geni: Disable DMA processing on the Lenovo Yoga C630 + +From: Lee Jones + +[ Upstream commit 127068abe85bf3dee50df51cb039a5a987a4a666 ] + +We have a production-level laptop (Lenovo Yoga C630) which is exhibiting +a rather horrific bug. When I2C HID devices are being scanned for at +boot-time the QCom Geni based I2C (Serial Engine) attempts to use DMA. +When it does, the laptop reboots and the user never sees the OS. + +Attempts are being made to debug the reason for the spontaneous reboot. +No luck so far, hence the requirement for this hot-fix. This workaround +will be removed once we have a viable fix. + +Signed-off-by: Lee Jones +Tested-by: Bjorn Andersson +Signed-off-by: Wolfram Sang +Signed-off-by: Sasha Levin +--- + drivers/i2c/busses/i2c-qcom-geni.c | 12 ++++++++---- + 1 file changed, 8 insertions(+), 4 deletions(-) + +diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c +index a89bfce5388ee..17abf60c94aeb 100644 +--- a/drivers/i2c/busses/i2c-qcom-geni.c ++++ b/drivers/i2c/busses/i2c-qcom-geni.c +@@ -355,11 +355,13 @@ static int geni_i2c_rx_one_msg(struct geni_i2c_dev *gi2c, struct i2c_msg *msg, + { + dma_addr_t rx_dma; + unsigned long time_left; +- void *dma_buf; ++ void *dma_buf = NULL; + struct geni_se *se = &gi2c->se; + size_t len = msg->len; + +- dma_buf = i2c_get_dma_safe_msg_buf(msg, 32); ++ if (!of_machine_is_compatible("lenovo,yoga-c630")) ++ dma_buf = i2c_get_dma_safe_msg_buf(msg, 32); ++ + if (dma_buf) + geni_se_select_mode(se, GENI_SE_DMA); + else +@@ -394,11 +396,13 @@ static int geni_i2c_tx_one_msg(struct geni_i2c_dev *gi2c, struct i2c_msg *msg, + { + dma_addr_t tx_dma; + unsigned long time_left; +- void *dma_buf; ++ void *dma_buf = NULL; + struct geni_se *se = &gi2c->se; + size_t len = msg->len; + +- dma_buf = i2c_get_dma_safe_msg_buf(msg, 32); ++ if (!of_machine_is_compatible("lenovo,yoga-c630")) ++ dma_buf = i2c_get_dma_safe_msg_buf(msg, 32); ++ + if (dma_buf) + geni_se_select_mode(se, GENI_SE_DMA); + else +-- +2.20.1 + diff --git a/queue-5.3/ima-always-return-negative-code-for-error.patch b/queue-5.3/ima-always-return-negative-code-for-error.patch new file mode 100644 index 00000000000..6a0df82d1ec --- /dev/null +++ b/queue-5.3/ima-always-return-negative-code-for-error.patch @@ -0,0 +1,44 @@ +From 727ce884179a759374a17b30b58965e7e032c1fa Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 2 Jul 2019 10:00:40 +0200 +Subject: ima: always return negative code for error + +From: Sascha Hauer + +[ Upstream commit f5e1040196dbfe14c77ce3dfe3b7b08d2d961e88 ] + +integrity_kernel_read() returns the number of bytes read. If this is +a short read then this positive value is returned from +ima_calc_file_hash_atfm(). Currently this is only indirectly called from +ima_calc_file_hash() and this function only tests for the return value +being zero or nonzero and also doesn't forward the return value. +Nevertheless there's no point in returning a positive value as an error, +so translate a short read into -EINVAL. + +Signed-off-by: Sascha Hauer +Signed-off-by: Mimi Zohar +Signed-off-by: Sasha Levin +--- + security/integrity/ima/ima_crypto.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c +index d4c7b8e1b083d..7532b062be594 100644 +--- a/security/integrity/ima/ima_crypto.c ++++ b/security/integrity/ima/ima_crypto.c +@@ -268,8 +268,11 @@ static int ima_calc_file_hash_atfm(struct file *file, + rbuf_len = min_t(loff_t, i_size - offset, rbuf_size[active]); + rc = integrity_kernel_read(file, offset, rbuf[active], + rbuf_len); +- if (rc != rbuf_len) ++ if (rc != rbuf_len) { ++ if (rc >= 0) ++ rc = -EINVAL; + goto out3; ++ } + + if (rbuf[1] && offset) { + /* Using two buffers, and it is not the first +-- +2.20.1 + diff --git a/queue-5.3/ima-fix-freeing-ongoing-ahash_request.patch b/queue-5.3/ima-fix-freeing-ongoing-ahash_request.patch new file mode 100644 index 00000000000..fbba55ff3fd --- /dev/null +++ b/queue-5.3/ima-fix-freeing-ongoing-ahash_request.patch @@ -0,0 +1,42 @@ +From 156d071f890cade7d5feb2d22e3420f639c0bfc7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 2 Jul 2019 10:00:41 +0200 +Subject: ima: fix freeing ongoing ahash_request + +From: Sascha Hauer + +[ Upstream commit 4ece3125f21b1d42b84896c5646dbf0e878464e1 ] + +integrity_kernel_read() can fail in which case we forward to call +ahash_request_free() on a currently running request. We have to wait +for its completion before we can free the request. + +This was observed by interrupting a "find / -type f -xdev -print0 | xargs -0 +cat 1>/dev/null" with ctrl-c on an IMA enabled filesystem. + +Signed-off-by: Sascha Hauer +Signed-off-by: Mimi Zohar +Signed-off-by: Sasha Levin +--- + security/integrity/ima/ima_crypto.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c +index 7532b062be594..73044fc6a9521 100644 +--- a/security/integrity/ima/ima_crypto.c ++++ b/security/integrity/ima/ima_crypto.c +@@ -271,6 +271,11 @@ static int ima_calc_file_hash_atfm(struct file *file, + if (rc != rbuf_len) { + if (rc >= 0) + rc = -EINVAL; ++ /* ++ * Forward current rc, do not overwrite with return value ++ * from ahash_wait() ++ */ ++ ahash_wait(ahash_rc, &wait); + goto out3; + } + +-- +2.20.1 + diff --git a/queue-5.3/include-trace-events-writeback.h-fix-wstringop-trunc.patch b/queue-5.3/include-trace-events-writeback.h-fix-wstringop-trunc.patch new file mode 100644 index 00000000000..24932c37060 --- /dev/null +++ b/queue-5.3/include-trace-events-writeback.h-fix-wstringop-trunc.patch @@ -0,0 +1,191 @@ +From 8d06f411650f94e1ddba1d90e51ffdaa56b9aef3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 25 Sep 2019 16:46:16 -0700 +Subject: include/trace/events/writeback.h: fix -Wstringop-truncation warnings + +From: Qian Cai + +[ Upstream commit d1a445d3b86c9341ce7a0954c23be0edb5c9bec5 ] + +There are many of those warnings. + +In file included from ./arch/powerpc/include/asm/paca.h:15, + from ./arch/powerpc/include/asm/current.h:13, + from ./include/linux/thread_info.h:21, + from ./include/asm-generic/preempt.h:5, + from ./arch/powerpc/include/generated/asm/preempt.h:1, + from ./include/linux/preempt.h:78, + from ./include/linux/spinlock.h:51, + from fs/fs-writeback.c:19: +In function 'strncpy', + inlined from 'perf_trace_writeback_page_template' at +./include/trace/events/writeback.h:56:1: +./include/linux/string.h:260:9: warning: '__builtin_strncpy' specified +bound 32 equals destination size [-Wstringop-truncation] + return __builtin_strncpy(p, q, size); + ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Fix it by using the new strscpy_pad() which was introduced in "lib/string: +Add strscpy_pad() function" and will always be NUL-terminated instead of +strncpy(). Also, change strlcpy() to use strscpy_pad() in this file for +consistency. + +Link: http://lkml.kernel.org/r/1564075099-27750-1-git-send-email-cai@lca.pw +Fixes: 455b2864686d ("writeback: Initial tracing support") +Fixes: 028c2dd184c0 ("writeback: Add tracing to balance_dirty_pages") +Fixes: e84d0a4f8e39 ("writeback: trace event writeback_queue_io") +Fixes: b48c104d2211 ("writeback: trace event bdi_dirty_ratelimit") +Fixes: cc1676d917f3 ("writeback: Move requeueing when I_SYNC set to writeback_sb_inodes()") +Fixes: 9fb0a7da0c52 ("writeback: add more tracepoints") +Signed-off-by: Qian Cai +Reviewed-by: Jan Kara +Cc: Tobin C. Harding +Cc: Steven Rostedt (VMware) +Cc: Ingo Molnar +Cc: Tejun Heo +Cc: Dave Chinner +Cc: Fengguang Wu +Cc: Jens Axboe +Cc: Joe Perches +Cc: Kees Cook +Cc: Jann Horn +Cc: Jonathan Corbet +Cc: Nitin Gote +Cc: Rasmus Villemoes +Cc: Stephen Kitt +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Sasha Levin +--- + include/trace/events/writeback.h | 38 +++++++++++++++++--------------- + 1 file changed, 20 insertions(+), 18 deletions(-) + +diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h +index aa7f3aeac7408..79095434c1be3 100644 +--- a/include/trace/events/writeback.h ++++ b/include/trace/events/writeback.h +@@ -66,8 +66,9 @@ DECLARE_EVENT_CLASS(writeback_page_template, + ), + + TP_fast_assign( +- strncpy(__entry->name, +- mapping ? dev_name(inode_to_bdi(mapping->host)->dev) : "(unknown)", 32); ++ strscpy_pad(__entry->name, ++ mapping ? dev_name(inode_to_bdi(mapping->host)->dev) : "(unknown)", ++ 32); + __entry->ino = mapping ? mapping->host->i_ino : 0; + __entry->index = page->index; + ), +@@ -110,8 +111,8 @@ DECLARE_EVENT_CLASS(writeback_dirty_inode_template, + struct backing_dev_info *bdi = inode_to_bdi(inode); + + /* may be called for files on pseudo FSes w/ unregistered bdi */ +- strncpy(__entry->name, +- bdi->dev ? dev_name(bdi->dev) : "(unknown)", 32); ++ strscpy_pad(__entry->name, ++ bdi->dev ? dev_name(bdi->dev) : "(unknown)", 32); + __entry->ino = inode->i_ino; + __entry->state = inode->i_state; + __entry->flags = flags; +@@ -190,8 +191,8 @@ DECLARE_EVENT_CLASS(writeback_write_inode_template, + ), + + TP_fast_assign( +- strncpy(__entry->name, +- dev_name(inode_to_bdi(inode)->dev), 32); ++ strscpy_pad(__entry->name, ++ dev_name(inode_to_bdi(inode)->dev), 32); + __entry->ino = inode->i_ino; + __entry->sync_mode = wbc->sync_mode; + __entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc); +@@ -234,8 +235,9 @@ DECLARE_EVENT_CLASS(writeback_work_class, + __field(unsigned int, cgroup_ino) + ), + TP_fast_assign( +- strncpy(__entry->name, +- wb->bdi->dev ? dev_name(wb->bdi->dev) : "(unknown)", 32); ++ strscpy_pad(__entry->name, ++ wb->bdi->dev ? dev_name(wb->bdi->dev) : ++ "(unknown)", 32); + __entry->nr_pages = work->nr_pages; + __entry->sb_dev = work->sb ? work->sb->s_dev : 0; + __entry->sync_mode = work->sync_mode; +@@ -288,7 +290,7 @@ DECLARE_EVENT_CLASS(writeback_class, + __field(unsigned int, cgroup_ino) + ), + TP_fast_assign( +- strncpy(__entry->name, dev_name(wb->bdi->dev), 32); ++ strscpy_pad(__entry->name, dev_name(wb->bdi->dev), 32); + __entry->cgroup_ino = __trace_wb_assign_cgroup(wb); + ), + TP_printk("bdi %s: cgroup_ino=%u", +@@ -310,7 +312,7 @@ TRACE_EVENT(writeback_bdi_register, + __array(char, name, 32) + ), + TP_fast_assign( +- strncpy(__entry->name, dev_name(bdi->dev), 32); ++ strscpy_pad(__entry->name, dev_name(bdi->dev), 32); + ), + TP_printk("bdi %s", + __entry->name +@@ -335,7 +337,7 @@ DECLARE_EVENT_CLASS(wbc_class, + ), + + TP_fast_assign( +- strncpy(__entry->name, dev_name(bdi->dev), 32); ++ strscpy_pad(__entry->name, dev_name(bdi->dev), 32); + __entry->nr_to_write = wbc->nr_to_write; + __entry->pages_skipped = wbc->pages_skipped; + __entry->sync_mode = wbc->sync_mode; +@@ -386,7 +388,7 @@ TRACE_EVENT(writeback_queue_io, + ), + TP_fast_assign( + unsigned long *older_than_this = work->older_than_this; +- strncpy(__entry->name, dev_name(wb->bdi->dev), 32); ++ strscpy_pad(__entry->name, dev_name(wb->bdi->dev), 32); + __entry->older = older_than_this ? *older_than_this : 0; + __entry->age = older_than_this ? + (jiffies - *older_than_this) * 1000 / HZ : -1; +@@ -472,7 +474,7 @@ TRACE_EVENT(bdi_dirty_ratelimit, + ), + + TP_fast_assign( +- strlcpy(__entry->bdi, dev_name(wb->bdi->dev), 32); ++ strscpy_pad(__entry->bdi, dev_name(wb->bdi->dev), 32); + __entry->write_bw = KBps(wb->write_bandwidth); + __entry->avg_write_bw = KBps(wb->avg_write_bandwidth); + __entry->dirty_rate = KBps(dirty_rate); +@@ -537,7 +539,7 @@ TRACE_EVENT(balance_dirty_pages, + + TP_fast_assign( + unsigned long freerun = (thresh + bg_thresh) / 2; +- strlcpy(__entry->bdi, dev_name(wb->bdi->dev), 32); ++ strscpy_pad(__entry->bdi, dev_name(wb->bdi->dev), 32); + + __entry->limit = global_wb_domain.dirty_limit; + __entry->setpoint = (global_wb_domain.dirty_limit + +@@ -597,8 +599,8 @@ TRACE_EVENT(writeback_sb_inodes_requeue, + ), + + TP_fast_assign( +- strncpy(__entry->name, +- dev_name(inode_to_bdi(inode)->dev), 32); ++ strscpy_pad(__entry->name, ++ dev_name(inode_to_bdi(inode)->dev), 32); + __entry->ino = inode->i_ino; + __entry->state = inode->i_state; + __entry->dirtied_when = inode->dirtied_when; +@@ -671,8 +673,8 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template, + ), + + TP_fast_assign( +- strncpy(__entry->name, +- dev_name(inode_to_bdi(inode)->dev), 32); ++ strscpy_pad(__entry->name, ++ dev_name(inode_to_bdi(inode)->dev), 32); + __entry->ino = inode->i_ino; + __entry->state = inode->i_state; + __entry->dirtied_when = inode->dirtied_when; +-- +2.20.1 + diff --git a/queue-5.3/iommu-amd-fix-downgrading-default-page-sizes-in-allo.patch b/queue-5.3/iommu-amd-fix-downgrading-default-page-sizes-in-allo.patch new file mode 100644 index 00000000000..138c206878f --- /dev/null +++ b/queue-5.3/iommu-amd-fix-downgrading-default-page-sizes-in-allo.patch @@ -0,0 +1,50 @@ +From 0690928395b5b4ce66ab828cb1ba569b254807cb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 13 Sep 2019 16:42:29 +0200 +Subject: iommu/amd: Fix downgrading default page-sizes in alloc_pte() + +From: Andrei Dulea + +[ Upstream commit 6ccb72f8374e17d60b58a7bfd5570496332c54e2 ] + +Downgrading an existing large mapping to a mapping using smaller +page-sizes works only for the mappings created with page-mode 7 (i.e. +non-default page size). + +Treat large mappings created with page-mode 0 (i.e. default page size) +like a non-present mapping and allow to overwrite it in alloc_pte(). + +While around, make sure that we flush the TLB only if we change an +existing mapping, otherwise we might end up acting on garbage PTEs. + +Fixes: 6d568ef9a622 ("iommu/amd: Allow downgrading page-sizes in alloc_pte()") +Signed-off-by: Andrei Dulea +Signed-off-by: Sasha Levin +--- + drivers/iommu/amd_iommu.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c +index e1259429ded2f..3b1d7ae6f75e0 100644 +--- a/drivers/iommu/amd_iommu.c ++++ b/drivers/iommu/amd_iommu.c +@@ -1490,6 +1490,7 @@ static u64 *alloc_pte(struct protection_domain *domain, + pte_level = PM_PTE_LEVEL(__pte); + + if (!IOMMU_PTE_PRESENT(__pte) || ++ pte_level == PAGE_MODE_NONE || + pte_level == PAGE_MODE_7_LEVEL) { + page = (u64 *)get_zeroed_page(gfp); + if (!page) +@@ -1500,7 +1501,7 @@ static u64 *alloc_pte(struct protection_domain *domain, + /* pte could have been changed somewhere. */ + if (cmpxchg64(pte, __pte, __npte) != __pte) + free_page((unsigned long)page); +- else if (pte_level == PAGE_MODE_7_LEVEL) ++ else if (IOMMU_PTE_PRESENT(__pte)) + domain->updated = true; + + continue; +-- +2.20.1 + diff --git a/queue-5.3/kernel-elfcore.c-include-proper-prototypes.patch b/queue-5.3/kernel-elfcore.c-include-proper-prototypes.patch new file mode 100644 index 00000000000..6988b232fb9 --- /dev/null +++ b/queue-5.3/kernel-elfcore.c-include-proper-prototypes.patch @@ -0,0 +1,51 @@ +From b0fe3e6ec7a7762d3bd3f86b8db06a656ac19665 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 25 Sep 2019 16:45:59 -0700 +Subject: kernel/elfcore.c: include proper prototypes + +From: Valdis Kletnieks + +[ Upstream commit 0f74914071ab7e7b78731ed62bf350e3a344e0a5 ] + +When building with W=1, gcc properly complains that there's no prototypes: + + CC kernel/elfcore.o +kernel/elfcore.c:7:17: warning: no previous prototype for 'elf_core_extra_phdrs' [-Wmissing-prototypes] + 7 | Elf_Half __weak elf_core_extra_phdrs(void) + | ^~~~~~~~~~~~~~~~~~~~ +kernel/elfcore.c:12:12: warning: no previous prototype for 'elf_core_write_extra_phdrs' [-Wmissing-prototypes] + 12 | int __weak elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset) + | ^~~~~~~~~~~~~~~~~~~~~~~~~~ +kernel/elfcore.c:17:12: warning: no previous prototype for 'elf_core_write_extra_data' [-Wmissing-prototypes] + 17 | int __weak elf_core_write_extra_data(struct coredump_params *cprm) + | ^~~~~~~~~~~~~~~~~~~~~~~~~ +kernel/elfcore.c:22:15: warning: no previous prototype for 'elf_core_extra_data_size' [-Wmissing-prototypes] + 22 | size_t __weak elf_core_extra_data_size(void) + | ^~~~~~~~~~~~~~~~~~~~~~~~ + +Provide the include file so gcc is happy, and we don't have potential code drift + +Link: http://lkml.kernel.org/r/29875.1565224705@turing-police +Signed-off-by: Valdis Kletnieks +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Sasha Levin +--- + kernel/elfcore.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/kernel/elfcore.c b/kernel/elfcore.c +index fc482c8e0bd88..57fb4dcff4349 100644 +--- a/kernel/elfcore.c ++++ b/kernel/elfcore.c +@@ -3,6 +3,7 @@ + #include + #include + #include ++#include + + Elf_Half __weak elf_core_extra_phdrs(void) + { +-- +2.20.1 + diff --git a/queue-5.3/kvm-nvmx-fix-consistency-check-on-injected-exception.patch b/queue-5.3/kvm-nvmx-fix-consistency-check-on-injected-exception.patch new file mode 100644 index 00000000000..3051d1246a8 --- /dev/null +++ b/queue-5.3/kvm-nvmx-fix-consistency-check-on-injected-exception.patch @@ -0,0 +1,48 @@ +From 94b0e37785033bc91dff5162810832fd97a53b03 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 1 Oct 2019 09:21:23 -0700 +Subject: KVM: nVMX: Fix consistency check on injected exception error code + +From: Sean Christopherson + +[ Upstream commit 567926cca99ba1750be8aae9c4178796bf9bb90b ] + +Current versions of Intel's SDM incorrectly state that "bits 31:15 of +the VM-Entry exception error-code field" must be zero. In reality, bits +31:16 must be zero, i.e. error codes are 16-bit values. + +The bogus error code check manifests as an unexpected VM-Entry failure +due to an invalid code field (error number 7) in L1, e.g. when injecting +a #GP with error_code=0x9f00. + +Nadav previously reported the bug[*], both to KVM and Intel, and fixed +the associated kvm-unit-test. + +[*] https://patchwork.kernel.org/patch/11124749/ + +Reported-by: Nadav Amit +Cc: stable@vger.kernel.org +Signed-off-by: Sean Christopherson +Reviewed-by: Jim Mattson +Signed-off-by: Paolo Bonzini +Signed-off-by: Sasha Levin +--- + arch/x86/kvm/vmx/nested.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c +index a3cba321b5c5d..61aa9421e27af 100644 +--- a/arch/x86/kvm/vmx/nested.c ++++ b/arch/x86/kvm/vmx/nested.c +@@ -2584,7 +2584,7 @@ static int nested_check_vm_entry_controls(struct kvm_vcpu *vcpu, + + /* VM-entry exception error code */ + if (has_error_code && +- vmcs12->vm_entry_exception_error_code & GENMASK(31, 15)) ++ vmcs12->vm_entry_exception_error_code & GENMASK(31, 16)) + return -EINVAL; + + /* VM-entry interruption-info field: reserved bits */ +-- +2.20.1 + diff --git a/queue-5.3/libbpf-fix-false-uninitialized-variable-warning.patch b/queue-5.3/libbpf-fix-false-uninitialized-variable-warning.patch new file mode 100644 index 00000000000..01c492bc591 --- /dev/null +++ b/queue-5.3/libbpf-fix-false-uninitialized-variable-warning.patch @@ -0,0 +1,36 @@ +From 29e563e7fa09dbd25fe70c377f671801cf10f023 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 25 Sep 2019 11:30:38 -0700 +Subject: libbpf: fix false uninitialized variable warning + +From: Andrii Nakryiko + +[ Upstream commit aef70a1f44c0b570e6345c02c2d240471859f0a4 ] + +Some compilers emit warning for potential uninitialized next_id usage. +The code is correct, but control flow is too complicated for some +compilers to figure this out. Re-initialize next_id to satisfy +compiler. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Daniel Borkmann +Signed-off-by: Sasha Levin +--- + tools/lib/bpf/btf_dump.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c +index 7065bb5b27525..e1357dbb16c24 100644 +--- a/tools/lib/bpf/btf_dump.c ++++ b/tools/lib/bpf/btf_dump.c +@@ -1213,6 +1213,7 @@ static void btf_dump_emit_type_chain(struct btf_dump *d, + return; + } + ++ next_id = decls->ids[decls->cnt - 1]; + next_t = btf__type_by_id(d->btf, next_id); + multidim = btf_kind_of(next_t) == BTF_KIND_ARRAY; + /* we need space if we have named non-pointer */ +-- +2.20.1 + diff --git a/queue-5.3/libnvdimm-fix-endian-conversion-issues.patch b/queue-5.3/libnvdimm-fix-endian-conversion-issues.patch new file mode 100644 index 00000000000..aeaa6033941 --- /dev/null +++ b/queue-5.3/libnvdimm-fix-endian-conversion-issues.patch @@ -0,0 +1,83 @@ +From 9fa730141e601a0eae8dd3f18bcc9fc9555d25d6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 9 Aug 2019 13:17:26 +0530 +Subject: =?UTF-8?q?libnvdimm:=20Fix=20endian=20conversion=20issues=C2=A0?= +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Aneesh Kumar K.V + +[ Upstream commit 86aa66687442ef45909ff9814b82b4d2bb892294 ] + +nd_label->dpa issue was observed when trying to enable the namespace created +with little-endian kernel on a big-endian kernel. That made me run +`sparse` on the rest of the code and other changes are the result of that. + +Fixes: d9b83c756953 ("libnvdimm, btt: rework error clearing") +Fixes: 9dedc73a4658 ("libnvdimm/btt: Fix LBA masking during 'free list' population") +Reviewed-by: Vishal Verma +Signed-off-by: Aneesh Kumar K.V +Link: https://lore.kernel.org/r/20190809074726.27815-1-aneesh.kumar@linux.ibm.com +Signed-off-by: Dan Williams +Signed-off-by: Sasha Levin +--- + drivers/nvdimm/btt.c | 8 ++++---- + drivers/nvdimm/namespace_devs.c | 7 ++++--- + 2 files changed, 8 insertions(+), 7 deletions(-) + +diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c +index a8d56887ec881..3e9f45aec8d18 100644 +--- a/drivers/nvdimm/btt.c ++++ b/drivers/nvdimm/btt.c +@@ -392,9 +392,9 @@ static int btt_flog_write(struct arena_info *arena, u32 lane, u32 sub, + arena->freelist[lane].sub = 1 - arena->freelist[lane].sub; + if (++(arena->freelist[lane].seq) == 4) + arena->freelist[lane].seq = 1; +- if (ent_e_flag(ent->old_map)) ++ if (ent_e_flag(le32_to_cpu(ent->old_map))) + arena->freelist[lane].has_err = 1; +- arena->freelist[lane].block = le32_to_cpu(ent_lba(ent->old_map)); ++ arena->freelist[lane].block = ent_lba(le32_to_cpu(ent->old_map)); + + return ret; + } +@@ -560,8 +560,8 @@ static int btt_freelist_init(struct arena_info *arena) + * FIXME: if error clearing fails during init, we want to make + * the BTT read-only + */ +- if (ent_e_flag(log_new.old_map) && +- !ent_normal(log_new.old_map)) { ++ if (ent_e_flag(le32_to_cpu(log_new.old_map)) && ++ !ent_normal(le32_to_cpu(log_new.old_map))) { + arena->freelist[i].has_err = 1; + ret = arena_clear_freelist_error(arena, i); + if (ret) +diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c +index a16e52251a305..102c9d5141ee8 100644 +--- a/drivers/nvdimm/namespace_devs.c ++++ b/drivers/nvdimm/namespace_devs.c +@@ -1987,7 +1987,7 @@ static struct device *create_namespace_pmem(struct nd_region *nd_region, + nd_mapping = &nd_region->mapping[i]; + label_ent = list_first_entry_or_null(&nd_mapping->labels, + typeof(*label_ent), list); +- label0 = label_ent ? label_ent->label : 0; ++ label0 = label_ent ? label_ent->label : NULL; + + if (!label0) { + WARN_ON(1); +@@ -2322,8 +2322,9 @@ static struct device **scan_labels(struct nd_region *nd_region) + continue; + + /* skip labels that describe extents outside of the region */ +- if (nd_label->dpa < nd_mapping->start || nd_label->dpa > map_end) +- continue; ++ if (__le64_to_cpu(nd_label->dpa) < nd_mapping->start || ++ __le64_to_cpu(nd_label->dpa) > map_end) ++ continue; + + i = add_namespace_resource(nd_region, nd_label, devs, count); + if (i < 0) +-- +2.20.1 + diff --git a/queue-5.3/libnvdimm-nfit_test-fix-acpi_handle-redefinition.patch b/queue-5.3/libnvdimm-nfit_test-fix-acpi_handle-redefinition.patch new file mode 100644 index 00000000000..bc65683fad8 --- /dev/null +++ b/queue-5.3/libnvdimm-nfit_test-fix-acpi_handle-redefinition.patch @@ -0,0 +1,70 @@ +From 89a50529bbb7cf7f384ec1b8112297821627bd65 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 17 Sep 2019 21:21:49 -0700 +Subject: libnvdimm/nfit_test: Fix acpi_handle redefinition + +From: Nathan Chancellor + +[ Upstream commit 59f08896f058a92f03a0041b397a1a227c5e8529 ] + +After commit 62974fc389b3 ("libnvdimm: Enable unit test infrastructure +compile checks"), clang warns: + +In file included from +../drivers/nvdimm/../../tools/testing/nvdimm/test/iomap.c:15: +../drivers/nvdimm/../../tools/testing/nvdimm/test/nfit_test.h:206:15: +warning: redefinition of typedef 'acpi_handle' is a C11 feature +[-Wtypedef-redefinition] +typedef void *acpi_handle; + ^ +../include/acpi/actypes.h:424:15: note: previous definition is here +typedef void *acpi_handle; /* Actually a ptr to a NS Node */ + ^ +1 warning generated. + +The include chain: + +iomap.c -> + linux/acpi.h -> + acpi/acpi.h -> + acpi/actypes.h + nfit_test.h + +Avoid this by including linux/acpi.h in nfit_test.h, which allows us to +remove both the typedef and the forward declaration of acpi_object. + +Link: https://github.com/ClangBuiltLinux/linux/issues/660 +Signed-off-by: Nathan Chancellor +Reviewed-by: Ira Weiny +Link: https://lore.kernel.org/r/20190918042148.77553-1-natechancellor@gmail.com +Signed-off-by: Dan Williams +Signed-off-by: Sasha Levin +--- + tools/testing/nvdimm/test/nfit_test.h | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h +index 448d686da8b13..0bf5640f1f071 100644 +--- a/tools/testing/nvdimm/test/nfit_test.h ++++ b/tools/testing/nvdimm/test/nfit_test.h +@@ -4,6 +4,7 @@ + */ + #ifndef __NFIT_TEST_H__ + #define __NFIT_TEST_H__ ++#include + #include + #include + #include +@@ -202,9 +203,6 @@ struct nd_intel_lss { + __u32 status; + } __packed; + +-union acpi_object; +-typedef void *acpi_handle; +- + typedef struct nfit_test_resource *(*nfit_test_lookup_fn)(resource_size_t); + typedef union acpi_object *(*nfit_test_evaluate_dsm_fn)(acpi_handle handle, + const guid_t *guid, u64 rev, u64 func, +-- +2.20.1 + diff --git a/queue-5.3/libnvdimm-region-initialize-bad-block-for-volatile-n.patch b/queue-5.3/libnvdimm-region-initialize-bad-block-for-volatile-n.patch new file mode 100644 index 00000000000..3d254e781cc --- /dev/null +++ b/queue-5.3/libnvdimm-region-initialize-bad-block-for-volatile-n.patch @@ -0,0 +1,110 @@ +From 427db1c213088f6efa1615adc74365c90608c9a3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 19 Sep 2019 14:03:55 +0530 +Subject: libnvdimm/region: Initialize bad block for volatile namespaces + +From: Aneesh Kumar K.V + +[ Upstream commit c42adf87e4e7ed77f6ffe288dc90f980d07d68df ] + +We do check for a bad block during namespace init and that use +region bad block list. We need to initialize the bad block +for volatile regions for this to work. We also observe a lockdep +warning as below because the lock is not initialized correctly +since we skip bad block init for volatile regions. + + INFO: trying to register non-static key. + the code is fine but needs lockdep annotation. + turning off the locking correctness validator. + CPU: 2 PID: 1 Comm: swapper/0 Not tainted 5.3.0-rc1-15699-g3dee241c937e #149 + Call Trace: + [c0000000f95cb250] [c00000000147dd84] dump_stack+0xe8/0x164 (unreliable) + [c0000000f95cb2a0] [c00000000022ccd8] register_lock_class+0x308/0xa60 + [c0000000f95cb3a0] [c000000000229cc0] __lock_acquire+0x170/0x1ff0 + [c0000000f95cb4c0] [c00000000022c740] lock_acquire+0x220/0x270 + [c0000000f95cb580] [c000000000a93230] badblocks_check+0xc0/0x290 + [c0000000f95cb5f0] [c000000000d97540] nd_pfn_validate+0x5c0/0x7f0 + [c0000000f95cb6d0] [c000000000d98300] nd_dax_probe+0xd0/0x1f0 + [c0000000f95cb760] [c000000000d9b66c] nd_pmem_probe+0x10c/0x160 + [c0000000f95cb790] [c000000000d7f5ec] nvdimm_bus_probe+0x10c/0x240 + [c0000000f95cb820] [c000000000d0f844] really_probe+0x254/0x4e0 + [c0000000f95cb8b0] [c000000000d0fdfc] driver_probe_device+0x16c/0x1e0 + [c0000000f95cb930] [c000000000d10238] device_driver_attach+0x68/0xa0 + [c0000000f95cb970] [c000000000d1040c] __driver_attach+0x19c/0x1c0 + [c0000000f95cb9f0] [c000000000d0c4c4] bus_for_each_dev+0x94/0x130 + [c0000000f95cba50] [c000000000d0f014] driver_attach+0x34/0x50 + [c0000000f95cba70] [c000000000d0e208] bus_add_driver+0x178/0x2f0 + [c0000000f95cbb00] [c000000000d117c8] driver_register+0x108/0x170 + [c0000000f95cbb70] [c000000000d7edb0] __nd_driver_register+0xe0/0x100 + [c0000000f95cbbd0] [c000000001a6baa4] nd_pmem_driver_init+0x34/0x48 + [c0000000f95cbbf0] [c0000000000106f4] do_one_initcall+0x1d4/0x4b0 + [c0000000f95cbcd0] [c0000000019f499c] kernel_init_freeable+0x544/0x65c + [c0000000f95cbdb0] [c000000000010d6c] kernel_init+0x2c/0x180 + [c0000000f95cbe20] [c00000000000b954] ret_from_kernel_thread+0x5c/0x68 + +Signed-off-by: Aneesh Kumar K.V +Link: https://lore.kernel.org/r/20190919083355.26340-1-aneesh.kumar@linux.ibm.com +Signed-off-by: Dan Williams +Signed-off-by: Sasha Levin +--- + drivers/nvdimm/bus.c | 2 +- + drivers/nvdimm/region.c | 4 ++-- + drivers/nvdimm/region_devs.c | 4 ++-- + 3 files changed, 5 insertions(+), 5 deletions(-) + +diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c +index 798c5c4aea9ca..bb3f20ebc276d 100644 +--- a/drivers/nvdimm/bus.c ++++ b/drivers/nvdimm/bus.c +@@ -182,7 +182,7 @@ static int nvdimm_clear_badblocks_region(struct device *dev, void *data) + sector_t sector; + + /* make sure device is a region */ +- if (!is_nd_pmem(dev)) ++ if (!is_memory(dev)) + return 0; + + nd_region = to_nd_region(dev); +diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c +index 37bf8719a2a44..0f6978e72e7cd 100644 +--- a/drivers/nvdimm/region.c ++++ b/drivers/nvdimm/region.c +@@ -34,7 +34,7 @@ static int nd_region_probe(struct device *dev) + if (rc) + return rc; + +- if (is_nd_pmem(&nd_region->dev)) { ++ if (is_memory(&nd_region->dev)) { + struct resource ndr_res; + + if (devm_init_badblocks(dev, &nd_region->bb)) +@@ -123,7 +123,7 @@ static void nd_region_notify(struct device *dev, enum nvdimm_event event) + struct nd_region *nd_region = to_nd_region(dev); + struct resource res; + +- if (is_nd_pmem(&nd_region->dev)) { ++ if (is_memory(&nd_region->dev)) { + res.start = nd_region->ndr_start; + res.end = nd_region->ndr_start + + nd_region->ndr_size - 1; +diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c +index af30cbe7a8ea2..47b48800fb758 100644 +--- a/drivers/nvdimm/region_devs.c ++++ b/drivers/nvdimm/region_devs.c +@@ -632,11 +632,11 @@ static umode_t region_visible(struct kobject *kobj, struct attribute *a, int n) + if (!is_memory(dev) && a == &dev_attr_dax_seed.attr) + return 0; + +- if (!is_nd_pmem(dev) && a == &dev_attr_badblocks.attr) ++ if (!is_memory(dev) && a == &dev_attr_badblocks.attr) + return 0; + + if (a == &dev_attr_resource.attr) { +- if (is_nd_pmem(dev)) ++ if (is_memory(dev)) + return 0400; + else + return 0; +-- +2.20.1 + diff --git a/queue-5.3/mlxsw-spectrum_flower-fail-in-case-user-specifies-mu.patch b/queue-5.3/mlxsw-spectrum_flower-fail-in-case-user-specifies-mu.patch new file mode 100644 index 00000000000..ffab2dd29e7 --- /dev/null +++ b/queue-5.3/mlxsw-spectrum_flower-fail-in-case-user-specifies-mu.patch @@ -0,0 +1,53 @@ +From 58fd4cb5a47f9a901b0a3758ac372b7bb477a5fe Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 26 Sep 2019 14:43:40 +0300 +Subject: mlxsw: spectrum_flower: Fail in case user specifies multiple mirror + actions + +From: Danielle Ratson + +[ Upstream commit 52feb8b588f6d23673dd7cc2b44b203493b627f6 ] + +The ASIC can only mirror a packet to one port, but when user is trying +to set more than one mirror action, it doesn't fail. + +Add a check if more than one mirror action was specified per rule and if so, +fail for not being supported. + +Fixes: d0d13c1858a11 ("mlxsw: spectrum_acl: Add support for mirror action") +Signed-off-by: Danielle Ratson +Acked-by: Jiri Pirko +Signed-off-by: Ido Schimmel +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +index 202e9a2460194..7c13656a83384 100644 +--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c ++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +@@ -21,6 +21,7 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, + struct netlink_ext_ack *extack) + { + const struct flow_action_entry *act; ++ int mirror_act_count = 0; + int err, i; + + if (!flow_action_has_entries(flow_action)) +@@ -95,6 +96,11 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, + case FLOW_ACTION_MIRRED: { + struct net_device *out_dev = act->dev; + ++ if (mirror_act_count++) { ++ NL_SET_ERR_MSG_MOD(extack, "Multiple mirror actions per rule are not supported"); ++ return -EOPNOTSUPP; ++ } ++ + err = mlxsw_sp_acl_rulei_act_mirror(mlxsw_sp, rulei, + block, out_dev, + extack); +-- +2.20.1 + diff --git a/queue-5.3/net-dsa-microchip-always-set-regmap-stride-to-1.patch b/queue-5.3/net-dsa-microchip-always-set-regmap-stride-to-1.patch new file mode 100644 index 00000000000..b66d6440eeb --- /dev/null +++ b/queue-5.3/net-dsa-microchip-always-set-regmap-stride-to-1.patch @@ -0,0 +1,51 @@ +From dd1cf3bbd9e966c79cba5f9ddbe9135f4b81299a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 26 Sep 2019 00:08:42 +0200 +Subject: net: dsa: microchip: Always set regmap stride to 1 + +From: Marek Vasut + +[ Upstream commit a3aa6e65beebf3780026753ebf39db19f4c92990 ] + +The regmap stride is set to 1 for regmap describing 8bit registers already. +However, for 16/32/64bit registers, the stride is 2/4/8 respectively. This +is not correct, as the switch protocol supports unaligned register reads +and writes and the KSZ87xx even uses such unaligned register accesses to +read e.g. MIB counter. + +This patch fixes MIB counter access on KSZ87xx. + +Signed-off-by: Marek Vasut +Cc: Andrew Lunn +Cc: David S. Miller +Cc: Florian Fainelli +Cc: George McCollister +Cc: Tristram Ha +Cc: Vivien Didelot +Cc: Woojung Huh +Fixes: 46558d601cb6 ("net: dsa: microchip: Initial SPI regmap support") +Fixes: 255b59ad0db2 ("net: dsa: microchip: Factor out regmap config generation into common header") +Reviewed-by: George McCollister +Tested-by: George McCollister +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/dsa/microchip/ksz_common.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h +index 72ec250b95408..823f544add0a3 100644 +--- a/drivers/net/dsa/microchip/ksz_common.h ++++ b/drivers/net/dsa/microchip/ksz_common.h +@@ -130,7 +130,7 @@ static inline void ksz_pwrite32(struct ksz_device *dev, int port, int offset, + { \ + .name = #width, \ + .val_bits = (width), \ +- .reg_stride = (width) / 8, \ ++ .reg_stride = 1, \ + .reg_bits = (regbits) + (regalign), \ + .pad_bits = (regpad), \ + .max_register = BIT(regbits) - 1, \ +-- +2.20.1 + diff --git a/queue-5.3/netfilter-nf_tables-allow-lookups-in-dynamic-sets.patch b/queue-5.3/netfilter-nf_tables-allow-lookups-in-dynamic-sets.patch new file mode 100644 index 00000000000..ba47d62b13c --- /dev/null +++ b/queue-5.3/netfilter-nf_tables-allow-lookups-in-dynamic-sets.patch @@ -0,0 +1,107 @@ +From a1b5c144324000c4448d4c1f6e7ec9dc40e4f332 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 19 Sep 2019 16:56:44 +0200 +Subject: netfilter: nf_tables: allow lookups in dynamic sets + +From: Florian Westphal + +[ Upstream commit acab713177377d9e0889c46bac7ff0cfb9a90c4d ] + +This un-breaks lookups in sets that have the 'dynamic' flag set. +Given this active example configuration: + +table filter { + set set1 { + type ipv4_addr + size 64 + flags dynamic,timeout + timeout 1m + } + + chain input { + type filter hook input priority 0; policy accept; + } +} + +... this works: +nft add rule ip filter input add @set1 { ip saddr } + +-> whenever rule is triggered, the source ip address is inserted +into the set (if it did not exist). + +This won't work: +nft add rule ip filter input ip saddr @set1 counter +Error: Could not process rule: Operation not supported + +In other words, we can add entries to the set, but then can't make +matching decision based on that set. + +That is just wrong -- all set backends support lookups (else they would +not be very useful). +The failure comes from an explicit rejection in nft_lookup.c. + +Looking at the history, it seems like NFT_SET_EVAL used to mean +'set contains expressions' (aka. "is a meter"), for instance something like + + nft add rule ip filter input meter example { ip saddr limit rate 10/second } + or + nft add rule ip filter input meter example { ip saddr counter } + +The actual meaning of NFT_SET_EVAL however, is +'set can be updated from the packet path'. + +'meters' and packet-path insertions into sets, such as +'add @set { ip saddr }' use exactly the same kernel code (nft_dynset.c) +and thus require a set backend that provides the ->update() function. + +The only set that provides this also is the only one that has the +NFT_SET_EVAL feature flag. + +Removing the wrong check makes the above example work. +While at it, also fix the flag check during set instantiation to +allow supported combinations only. + +Fixes: 8aeff920dcc9b3f ("netfilter: nf_tables: add stateful object reference to set elements") +Signed-off-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/nf_tables_api.c | 7 +++++-- + net/netfilter/nft_lookup.c | 3 --- + 2 files changed, 5 insertions(+), 5 deletions(-) + +diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c +index d47469f824a10..3b81323fa0171 100644 +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -3562,8 +3562,11 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, + NFT_SET_OBJECT)) + return -EINVAL; + /* Only one of these operations is supported */ +- if ((flags & (NFT_SET_MAP | NFT_SET_EVAL | NFT_SET_OBJECT)) == +- (NFT_SET_MAP | NFT_SET_EVAL | NFT_SET_OBJECT)) ++ if ((flags & (NFT_SET_MAP | NFT_SET_OBJECT)) == ++ (NFT_SET_MAP | NFT_SET_OBJECT)) ++ return -EOPNOTSUPP; ++ if ((flags & (NFT_SET_EVAL | NFT_SET_OBJECT)) == ++ (NFT_SET_EVAL | NFT_SET_OBJECT)) + return -EOPNOTSUPP; + } + +diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c +index c0560bf3c31bd..660bad688e2bc 100644 +--- a/net/netfilter/nft_lookup.c ++++ b/net/netfilter/nft_lookup.c +@@ -73,9 +73,6 @@ static int nft_lookup_init(const struct nft_ctx *ctx, + if (IS_ERR(set)) + return PTR_ERR(set); + +- if (set->flags & NFT_SET_EVAL) +- return -EOPNOTSUPP; +- + priv->sreg = nft_parse_register(tb[NFTA_LOOKUP_SREG]); + err = nft_validate_register_load(priv->sreg, set->klen); + if (err < 0) +-- +2.20.1 + diff --git a/queue-5.3/nfp-abm-fix-memory-leak-in-nfp_abm_u32_knode_replace.patch b/queue-5.3/nfp-abm-fix-memory-leak-in-nfp_abm_u32_knode_replace.patch new file mode 100644 index 00000000000..c55e6b9cb90 --- /dev/null +++ b/queue-5.3/nfp-abm-fix-memory-leak-in-nfp_abm_u32_knode_replace.patch @@ -0,0 +1,69 @@ +From bdbc6473b9134ec648ce42949819c46822002289 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 26 Sep 2019 20:51:46 -0500 +Subject: nfp: abm: fix memory leak in nfp_abm_u32_knode_replace + +From: Navid Emamdoost + +[ Upstream commit 78beef629fd95be4ed853b2d37b832f766bd96ca ] + +In nfp_abm_u32_knode_replace if the allocation for match fails it should +go to the error handling instead of returning. Updated other gotos to +have correct errno returned, too. + +Signed-off-by: Navid Emamdoost +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/netronome/nfp/abm/cls.c | 14 ++++++++++---- + 1 file changed, 10 insertions(+), 4 deletions(-) + +diff --git a/drivers/net/ethernet/netronome/nfp/abm/cls.c b/drivers/net/ethernet/netronome/nfp/abm/cls.c +index 23ebddfb95325..9f8a1f69c0c4c 100644 +--- a/drivers/net/ethernet/netronome/nfp/abm/cls.c ++++ b/drivers/net/ethernet/netronome/nfp/abm/cls.c +@@ -176,8 +176,10 @@ nfp_abm_u32_knode_replace(struct nfp_abm_link *alink, + u8 mask, val; + int err; + +- if (!nfp_abm_u32_check_knode(alink->abm, knode, proto, extack)) ++ if (!nfp_abm_u32_check_knode(alink->abm, knode, proto, extack)) { ++ err = -EOPNOTSUPP; + goto err_delete; ++ } + + tos_off = proto == htons(ETH_P_IP) ? 16 : 20; + +@@ -198,14 +200,18 @@ nfp_abm_u32_knode_replace(struct nfp_abm_link *alink, + if ((iter->val & cmask) == (val & cmask) && + iter->band != knode->res->classid) { + NL_SET_ERR_MSG_MOD(extack, "conflict with already offloaded filter"); ++ err = -EOPNOTSUPP; + goto err_delete; + } + } + + if (!match) { + match = kzalloc(sizeof(*match), GFP_KERNEL); +- if (!match) +- return -ENOMEM; ++ if (!match) { ++ err = -ENOMEM; ++ goto err_delete; ++ } ++ + list_add(&match->list, &alink->dscp_map); + } + match->handle = knode->handle; +@@ -221,7 +227,7 @@ nfp_abm_u32_knode_replace(struct nfp_abm_link *alink, + + err_delete: + nfp_abm_u32_knode_delete(alink, knode); +- return -EOPNOTSUPP; ++ return err; + } + + static int nfp_abm_setup_tc_block_cb(enum tc_setup_type type, +-- +2.20.1 + diff --git a/queue-5.3/ntb-point-to-right-memory-window-index.patch b/queue-5.3/ntb-point-to-right-memory-window-index.patch new file mode 100644 index 00000000000..f841ab03f5e --- /dev/null +++ b/queue-5.3/ntb-point-to-right-memory-window-index.patch @@ -0,0 +1,53 @@ +From ac17ba39dcb60b0f1df8d74869d0c3240f7352f0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 29 Mar 2019 11:32:50 +0000 +Subject: ntb: point to right memory window index + +From: Sanjay R Mehta + +[ Upstream commit ae89339b08f3fe02457ec9edd512ddc3d246d0f8 ] + +second parameter of ntb_peer_mw_get_addr is pointing to wrong memory +window index by passing "peer gidx" instead of "local gidx". + +For ex, "local gidx" value is '0' and "peer gidx" value is '1', then + +on peer side ntb_mw_set_trans() api is used as below with gidx pointing to +local side gidx which is '0', so memroy window '0' is chosen and XLAT '0' +will be programmed by peer side. + + ntb_mw_set_trans(perf->ntb, peer->pidx, peer->gidx, peer->inbuf_xlat, + peer->inbuf_size); + +Now, on local side ntb_peer_mw_get_addr() is been used as below with gidx +pointing to "peer gidx" which is '1', so pointing to memory window '1' +instead of memory window '0'. + + ntb_peer_mw_get_addr(perf->ntb, peer->gidx, &phys_addr, + &peer->outbuf_size); + +So this patch pass "local gidx" as parameter to ntb_peer_mw_get_addr(). + +Signed-off-by: Sanjay R Mehta +Signed-off-by: Jon Mason +Signed-off-by: Sasha Levin +--- + drivers/ntb/test/ntb_perf.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c +index d028331558ea7..e9b7c2dfc7301 100644 +--- a/drivers/ntb/test/ntb_perf.c ++++ b/drivers/ntb/test/ntb_perf.c +@@ -1378,7 +1378,7 @@ static int perf_setup_peer_mw(struct perf_peer *peer) + int ret; + + /* Get outbound MW parameters and map it */ +- ret = ntb_peer_mw_get_addr(perf->ntb, peer->gidx, &phys_addr, ++ ret = ntb_peer_mw_get_addr(perf->ntb, perf->gidx, &phys_addr, + &peer->outbuf_size); + if (ret) + return ret; +-- +2.20.1 + diff --git a/queue-5.3/perf-build-add-detection-of-java-11-openjdk-devel-pa.patch b/queue-5.3/perf-build-add-detection-of-java-11-openjdk-devel-pa.patch new file mode 100644 index 00000000000..4182597bf92 --- /dev/null +++ b/queue-5.3/perf-build-add-detection-of-java-11-openjdk-devel-pa.patch @@ -0,0 +1,62 @@ +From 17f8a43fd3eb0a9723d21669119d98aa13b16277 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 9 Sep 2019 13:41:16 +0200 +Subject: perf build: Add detection of java-11-openjdk-devel package + +From: Thomas Richter + +[ Upstream commit 815c1560bf8fd522b8d93a1d727868b910c1cc24 ] + +With Java 11 there is no seperate JRE anymore. + +Details: + + https://coderanch.com/t/701603/java/JRE-JDK + +Therefore the detection of the JRE needs to be adapted. + +This change works for s390 and x86. I have not tested other platforms. + +Committer testing: + +Continues to work with the OpenJDK 8: + + $ rm -f ~acme/lib64/libperf-jvmti.so + $ rpm -qa | grep jdk-devel + java-1.8.0-openjdk-devel-1.8.0.222.b10-0.fc30.x86_64 + $ git log --oneline -1 + a51937170f33 (HEAD -> perf/core) perf build: Add detection of java-11-openjdk-devel package + $ rm -rf /tmp/build/perf ; mkdir -p /tmp/build/perf ; make -C tools/perf O=/tmp/build/perf install > /dev/null 2>1 + $ ls -la ~acme/lib64/libperf-jvmti.so + -rwxr-xr-x. 1 acme acme 230744 Sep 24 16:46 /home/acme/lib64/libperf-jvmti.so + $ + +Suggested-by: Andreas Krebbel +Signed-off-by: Thomas Richter +Tested-by: Arnaldo Carvalho de Melo +Cc: Heiko Carstens +Cc: Hendrik Brueckner +Cc: Vasily Gorbik +Link: http://lore.kernel.org/lkml/20190909114116.50469-4-tmricht@linux.ibm.com +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Sasha Levin +--- + tools/perf/Makefile.config | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config +index 89ac5a1f1550e..3da3749118527 100644 +--- a/tools/perf/Makefile.config ++++ b/tools/perf/Makefile.config +@@ -908,7 +908,7 @@ ifndef NO_JVMTI + JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | awk '{print $$3}') + else + ifneq (,$(wildcard /usr/sbin/alternatives)) +- JDIR=$(shell /usr/sbin/alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g') ++ JDIR=$(shell /usr/sbin/alternatives --display java | tail -1 | cut -d' ' -f 5 | sed -e 's%/jre/bin/java.%%g' -e 's%/bin/java.%%g') + endif + endif + ifndef JDIR +-- +2.20.1 + diff --git a/queue-5.3/perf-probe-fix-to-clear-tev-nargs-in-clear_probe_tra.patch b/queue-5.3/perf-probe-fix-to-clear-tev-nargs-in-clear_probe_tra.patch new file mode 100644 index 00000000000..4b041b35d3b --- /dev/null +++ b/queue-5.3/perf-probe-fix-to-clear-tev-nargs-in-clear_probe_tra.patch @@ -0,0 +1,66 @@ +From 2e52bdb998b9cf41deff7d8dd6ce8a87e3b24638 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 16 Sep 2019 01:44:40 +0900 +Subject: perf probe: Fix to clear tev->nargs in clear_probe_trace_event() + +From: Masami Hiramatsu + +[ Upstream commit 9e6124d9d635957b56717f85219a88701617253f ] + +Since add_probe_trace_event() can reuse tf->tevs[i] after calling +clear_probe_trace_event(), this can make perf-probe crash if the 1st +attempt of probe event finding fails to find an event argument, and the +2nd attempt fails to find probe point. + +E.g. + $ perf probe -D "task_pid_nr tsk" + Failed to find 'tsk' in this function. + Failed to get entry address of warn_bad_vsyscall + Segmentation fault (core dumped) + +Committer testing: + +After the patch: + + $ perf probe -D "task_pid_nr tsk" + Failed to find 'tsk' in this function. + Failed to get entry address of warn_bad_vsyscall + Failed to get entry address of signal_fault + Failed to get entry address of show_signal + Failed to get entry address of umip_printk + Failed to get entry address of __bad_area_nosemaphore + + Failed to get entry address of sock_set_timeout + Failed to get entry address of tcp_recvmsg + Probe point 'task_pid_nr' not found. + Error: Failed to add events. + $ + +Fixes: 092b1f0b5f9f ("perf probe: Clear probe_trace_event when add_probe_trace_event() fails") +Signed-off-by: Masami Hiramatsu +Tested-by: Arnaldo Carvalho de Melo +Cc: Jiri Olsa +Cc: Namhyung Kim +Cc: Wang Nan +Link: http://lore.kernel.org/lkml/156856587999.25775.5145779959474477595.stgit@devnote2 +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Sasha Levin +--- + tools/perf/util/probe-event.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c +index 8394d48f8b32e..3355c445abedf 100644 +--- a/tools/perf/util/probe-event.c ++++ b/tools/perf/util/probe-event.c +@@ -2329,6 +2329,7 @@ void clear_probe_trace_event(struct probe_trace_event *tev) + } + } + zfree(&tev->args); ++ tev->nargs = 0; + } + + struct kprobe_blacklist_node { +-- +2.20.1 + diff --git a/queue-5.3/perf-stat-reset-previous-counts-on-repeat-with-inter.patch b/queue-5.3/perf-stat-reset-previous-counts-on-repeat-with-inter.patch new file mode 100644 index 00000000000..5923344f299 --- /dev/null +++ b/queue-5.3/perf-stat-reset-previous-counts-on-repeat-with-inter.patch @@ -0,0 +1,168 @@ +From 69574518a258745f67c7b3f9f0a3e4b45d90b844 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 4 Sep 2019 15:17:37 +0530 +Subject: perf stat: Reset previous counts on repeat with interval + +From: Srikar Dronamraju + +[ Upstream commit b63fd11cced17fcb8e133def29001b0f6aaa5e06 ] + +When using 'perf stat' with repeat and interval option, it shows wrong +values for events. + +The wrong values will be shown for the first interval on the second and +subsequent repetitions. + +Without the fix: + + # perf stat -r 3 -I 2000 -e faults -e sched:sched_switch -a sleep 5 + + 2.000282489 53 faults + 2.000282489 513 sched:sched_switch + 4.005478208 3,721 faults + 4.005478208 2,666 sched:sched_switch + 5.025470933 395 faults + 5.025470933 1,307 sched:sched_switch + 2.009602825 1,84,46,74,40,73,70,95,47,520 faults <------ + 2.009602825 1,84,46,74,40,73,70,95,49,568 sched:sched_switch <------ + 4.019612206 4,730 faults + 4.019612206 2,746 sched:sched_switch + 5.039615484 3,953 faults + 5.039615484 1,496 sched:sched_switch + 2.000274620 1,84,46,74,40,73,70,95,47,520 faults <------ + 2.000274620 1,84,46,74,40,73,70,95,47,520 sched:sched_switch <------ + 4.000480342 4,282 faults + 4.000480342 2,303 sched:sched_switch + 5.000916811 1,322 faults + 5.000916811 1,064 sched:sched_switch + # + +prev_raw_counts is allocated when using intervals. This is used when +calculating the difference in the counts of events when using interval. + +The current counts are stored in prev_raw_counts to calculate the +differences in the next iteration. + +On the first interval of the second and subsequent repetitions, +prev_raw_counts would be the values stored in the last interval of the +previous repetitions, while the current counts will only be for the +first interval of the current repetition. + +Hence there is a possibility of events showing up as big number. + +Fix this by resetting prev_raw_counts whenever perf stat repeats the +command. + +With the fix: + + # perf stat -r 3 -I 2000 -e faults -e sched:sched_switch -a sleep 5 + + 2.019349347 2,597 faults + 2.019349347 2,753 sched:sched_switch + 4.019577372 3,098 faults + 4.019577372 2,532 sched:sched_switch + 5.019415481 1,879 faults + 5.019415481 1,356 sched:sched_switch + 2.000178813 8,468 faults + 2.000178813 2,254 sched:sched_switch + 4.000404621 7,440 faults + 4.000404621 1,266 sched:sched_switch + 5.040196079 2,458 faults + 5.040196079 556 sched:sched_switch + 2.000191939 6,870 faults + 2.000191939 1,170 sched:sched_switch + 4.000414103 541 faults + 4.000414103 902 sched:sched_switch + 5.000809863 450 faults + 5.000809863 364 sched:sched_switch + # + +Committer notes: + +This was broken since the cset introducing the --interval feature, i.e. +--repeat + --interval wasn't tested at that point, add the Fixes tag so +that automatic scripts can pick this up. + +Fixes: 13370a9b5bb8 ("perf stat: Add interval printing") +Signed-off-by: Srikar Dronamraju +Acked-by: Jiri Olsa +Tested-by: Arnaldo Carvalho de Melo +Tested-by: Ravi Bangoria +Cc: Namhyung Kim +Cc: Naveen N. Rao +Cc: Stephane Eranian +Cc: stable@vger.kernel.org # v3.9+ +Link: http://lore.kernel.org/lkml/20190904094738.9558-2-srikar@linux.vnet.ibm.com +[ Fixed up conflicts with libperf, i.e. some perf_{evsel,evlist} lost the 'perf' prefix ] +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Sasha Levin +--- + tools/perf/builtin-stat.c | 3 +++ + tools/perf/util/stat.c | 17 +++++++++++++++++ + tools/perf/util/stat.h | 1 + + 3 files changed, 21 insertions(+) + +diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c +index 3e13b231f2f56..8ec06bf3372c6 100644 +--- a/tools/perf/builtin-stat.c ++++ b/tools/perf/builtin-stat.c +@@ -1961,6 +1961,9 @@ int cmd_stat(int argc, const char **argv) + fprintf(output, "[ perf stat: executing run #%d ... ]\n", + run_idx + 1); + ++ if (run_idx != 0) ++ perf_evlist__reset_prev_raw_counts(evsel_list); ++ + status = run_perf_stat(argc, argv, run_idx); + if (forever && status != -1 && !interval) { + print_counters(NULL, argc, argv); +diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c +index db8a6cf336bed..6ce66c2727474 100644 +--- a/tools/perf/util/stat.c ++++ b/tools/perf/util/stat.c +@@ -155,6 +155,15 @@ static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel) + evsel->prev_raw_counts = NULL; + } + ++static void perf_evsel__reset_prev_raw_counts(struct perf_evsel *evsel) ++{ ++ if (evsel->prev_raw_counts) { ++ evsel->prev_raw_counts->aggr.val = 0; ++ evsel->prev_raw_counts->aggr.ena = 0; ++ evsel->prev_raw_counts->aggr.run = 0; ++ } ++} ++ + static int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw) + { + int ncpus = perf_evsel__nr_cpus(evsel); +@@ -205,6 +214,14 @@ void perf_evlist__reset_stats(struct perf_evlist *evlist) + } + } + ++void perf_evlist__reset_prev_raw_counts(struct perf_evlist *evlist) ++{ ++ struct perf_evsel *evsel; ++ ++ evlist__for_each_entry(evlist, evsel) ++ perf_evsel__reset_prev_raw_counts(evsel); ++} ++ + static void zero_per_pkg(struct perf_evsel *counter) + { + if (counter->per_pkg_mask) +diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h +index 7032dd1eeac2f..9cd0d9cff374a 100644 +--- a/tools/perf/util/stat.h ++++ b/tools/perf/util/stat.h +@@ -194,6 +194,7 @@ void perf_stat__collect_metric_expr(struct perf_evlist *); + int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw); + void perf_evlist__free_stats(struct perf_evlist *evlist); + void perf_evlist__reset_stats(struct perf_evlist *evlist); ++void perf_evlist__reset_prev_raw_counts(struct perf_evlist *evlist); + + int perf_stat_process_counter(struct perf_stat_config *config, + struct perf_evsel *counter); +-- +2.20.1 + diff --git a/queue-5.3/perf-unwind-fix-libunwind-build-failure-on-i386-syst.patch b/queue-5.3/perf-unwind-fix-libunwind-build-failure-on-i386-syst.patch new file mode 100644 index 00000000000..b72ecde8a8b --- /dev/null +++ b/queue-5.3/perf-unwind-fix-libunwind-build-failure-on-i386-syst.patch @@ -0,0 +1,51 @@ +From f3f176a18b80067a39d350006b0bb5ba4c818874 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 26 Sep 2019 14:36:48 -0300 +Subject: perf unwind: Fix libunwind build failure on i386 systems + +From: Arnaldo Carvalho de Melo + +[ Upstream commit 26acf400d2dcc72c7e713e1f55db47ad92010cc2 ] + +Naresh Kamboju reported, that on the i386 build pr_err() +doesn't get defined properly due to header ordering: + + perf-in.o: In function `libunwind__x86_reg_id': + tools/perf/util/libunwind/../../arch/x86/util/unwind-libunwind.c:109: + undefined reference to `pr_err' + +Reported-by: Naresh Kamboju +Signed-off-by: Arnaldo Carvalho de Melo +Cc: David Ahern +Cc: Jiri Olsa +Cc: Linus Torvalds +Cc: Namhyung Kim +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: linux-kernel@vger.kernel.org +Signed-off-by: Ingo Molnar +Signed-off-by: Sasha Levin +--- + tools/perf/arch/x86/util/unwind-libunwind.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tools/perf/arch/x86/util/unwind-libunwind.c b/tools/perf/arch/x86/util/unwind-libunwind.c +index 05920e3edf7a7..47357973b55b2 100644 +--- a/tools/perf/arch/x86/util/unwind-libunwind.c ++++ b/tools/perf/arch/x86/util/unwind-libunwind.c +@@ -1,11 +1,11 @@ + // SPDX-License-Identifier: GPL-2.0 + + #include ++#include "../../util/debug.h" + #ifndef REMOTE_UNWIND_LIBUNWIND + #include + #include "perf_regs.h" + #include "../../util/unwind.h" +-#include "../../util/debug.h" + #endif + + #ifdef HAVE_ARCH_X86_64_SUPPORT +-- +2.20.1 + diff --git a/queue-5.3/pnfs-ensure-we-do-clear-the-return-on-close-layout-s.patch b/queue-5.3/pnfs-ensure-we-do-clear-the-return-on-close-layout-s.patch new file mode 100644 index 00000000000..c0144bc1e66 --- /dev/null +++ b/queue-5.3/pnfs-ensure-we-do-clear-the-return-on-close-layout-s.patch @@ -0,0 +1,48 @@ +From e7cc3fbe84fdb8e0b2d60942e29d16976a421298 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 20 Sep 2019 07:23:40 -0400 +Subject: pNFS: Ensure we do clear the return-on-close layout stateid on fatal + errors + +From: Trond Myklebust + +[ Upstream commit 9c47b18cf722184f32148784189fca945a7d0561 ] + +IF the server rejected our layout return with a state error such as +NFS4ERR_BAD_STATEID, or even a stale inode error, then we do want +to clear out all the remaining layout segments and mark that stateid +as invalid. + +Fixes: 1c5bd76d17cca ("pNFS: Enable layoutreturn operation for...") +Signed-off-by: Trond Myklebust +Signed-off-by: Anna Schumaker +Signed-off-by: Sasha Levin +--- + fs/nfs/pnfs.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c +index 4525d5acae386..0418b198edd3e 100644 +--- a/fs/nfs/pnfs.c ++++ b/fs/nfs/pnfs.c +@@ -1449,10 +1449,15 @@ void pnfs_roc_release(struct nfs4_layoutreturn_args *args, + const nfs4_stateid *res_stateid = NULL; + struct nfs4_xdr_opaque_data *ld_private = args->ld_private; + +- if (ret == 0) { +- arg_stateid = &args->stateid; ++ switch (ret) { ++ case -NFS4ERR_NOMATCHING_LAYOUT: ++ break; ++ case 0: + if (res->lrs_present) + res_stateid = &res->stateid; ++ /* Fallthrough */ ++ default: ++ arg_stateid = &args->stateid; + } + pnfs_layoutreturn_free_lsegs(lo, arg_stateid, &args->range, + res_stateid); +-- +2.20.1 + diff --git a/queue-5.3/pwm-stm32-lp-add-check-in-case-requested-period-cann.patch b/queue-5.3/pwm-stm32-lp-add-check-in-case-requested-period-cann.patch new file mode 100644 index 00000000000..8d4e690b242 --- /dev/null +++ b/queue-5.3/pwm-stm32-lp-add-check-in-case-requested-period-cann.patch @@ -0,0 +1,49 @@ +From 00f190051ff240a8fd998dade702e26e31119af3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 18 Sep 2019 16:54:21 +0200 +Subject: pwm: stm32-lp: Add check in case requested period cannot be achieved +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Fabrice Gasnier + +[ Upstream commit c91e3234c6035baf5a79763cb4fcd5d23ce75c2b ] + +LPTimer can use a 32KHz clock for counting. It depends on clock tree +configuration. In such a case, PWM output frequency range is limited. +Although unlikely, nothing prevents user from requesting a PWM frequency +above counting clock (32KHz for instance): +- This causes (prd - 1) = 0xffff to be written in ARR register later in +the apply() routine. +This results in badly configured PWM period (and also duty_cycle). +Add a check to report an error is such a case. + +Signed-off-by: Fabrice Gasnier +Reviewed-by: Uwe Kleine-König +Signed-off-by: Thierry Reding +Signed-off-by: Sasha Levin +--- + drivers/pwm/pwm-stm32-lp.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/drivers/pwm/pwm-stm32-lp.c b/drivers/pwm/pwm-stm32-lp.c +index 2211a642066db..97a9afa191ee0 100644 +--- a/drivers/pwm/pwm-stm32-lp.c ++++ b/drivers/pwm/pwm-stm32-lp.c +@@ -59,6 +59,12 @@ static int stm32_pwm_lp_apply(struct pwm_chip *chip, struct pwm_device *pwm, + /* Calculate the period and prescaler value */ + div = (unsigned long long)clk_get_rate(priv->clk) * state->period; + do_div(div, NSEC_PER_SEC); ++ if (!div) { ++ /* Clock is too slow to achieve requested period. */ ++ dev_dbg(priv->chip.dev, "Can't reach %u ns\n", state->period); ++ return -EINVAL; ++ } ++ + prd = div; + while (div > STM32_LPTIM_MAX_ARR) { + presc++; +-- +2.20.1 + diff --git a/queue-5.3/riscv-avoid-interrupts-being-erroneously-enabled-in-.patch b/queue-5.3/riscv-avoid-interrupts-being-erroneously-enabled-in-.patch new file mode 100644 index 00000000000..dfd360c1099 --- /dev/null +++ b/queue-5.3/riscv-avoid-interrupts-being-erroneously-enabled-in-.patch @@ -0,0 +1,62 @@ +From 0203e74e19c6d78e705723fb13cf96b8954af3aa Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 16 Sep 2019 16:47:41 +0800 +Subject: riscv: Avoid interrupts being erroneously enabled in + handle_exception() + +From: Vincent Chen + +[ Upstream commit c82dd6d078a2bb29d41eda032bb96d05699a524d ] + +When the handle_exception function addresses an exception, the interrupts +will be unconditionally enabled after finishing the context save. However, +It may erroneously enable the interrupts if the interrupts are disabled +before entering the handle_exception. + +For example, one of the WARN_ON() condition is satisfied in the scheduling +where the interrupt is disabled and rq.lock is locked. The WARN_ON will +trigger a break exception and the handle_exception function will enable the +interrupts before entering do_trap_break function. During the procedure, if +a timer interrupt is pending, it will be taken when interrupts are enabled. +In this case, it may cause a deadlock problem if the rq.lock is locked +again in the timer ISR. + +Hence, the handle_exception() can only enable interrupts when the state of +sstatus.SPIE is 1. + +This patch is tested on HiFive Unleashed board. + +Signed-off-by: Vincent Chen +Reviewed-by: Palmer Dabbelt +[paul.walmsley@sifive.com: updated to apply] +Fixes: bcae803a21317 ("RISC-V: Enable IRQ during exception handling") +Cc: David Abdurachmanov +Cc: stable@vger.kernel.org +Signed-off-by: Paul Walmsley +Signed-off-by: Sasha Levin +--- + arch/riscv/kernel/entry.S | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S +index bc7a56e1ca6f4..9b60878a4469c 100644 +--- a/arch/riscv/kernel/entry.S ++++ b/arch/riscv/kernel/entry.S +@@ -166,9 +166,13 @@ ENTRY(handle_exception) + move a0, sp /* pt_regs */ + tail do_IRQ + 1: +- /* Exceptions run with interrupts enabled */ ++ /* Exceptions run with interrupts enabled or disabled ++ depending on the state of sstatus.SR_SPIE */ ++ andi t0, s1, SR_SPIE ++ beqz t0, 1f + csrs sstatus, SR_SIE + ++1: + /* Handle syscalls */ + li t0, EXC_SYSCALL + beq s4, t0, handle_syscall +-- +2.20.1 + diff --git a/queue-5.3/sched-core-fix-migration-to-invalid-cpu-in-__set_cpu.patch b/queue-5.3/sched-core-fix-migration-to-invalid-cpu-in-__set_cpu.patch new file mode 100644 index 00000000000..b9450cec03c --- /dev/null +++ b/queue-5.3/sched-core-fix-migration-to-invalid-cpu-in-__set_cpu.patch @@ -0,0 +1,85 @@ +From 644ff2233246a34e3057ee6517083e919a1ef7a4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 16 Sep 2019 06:53:28 +0000 +Subject: sched/core: Fix migration to invalid CPU in __set_cpus_allowed_ptr() + +From: KeMeng Shi + +[ Upstream commit 714e501e16cd473538b609b3e351b2cc9f7f09ed ] + +An oops can be triggered in the scheduler when running qemu on arm64: + + Unable to handle kernel paging request at virtual address ffff000008effe40 + Internal error: Oops: 96000007 [#1] SMP + Process migration/0 (pid: 12, stack limit = 0x00000000084e3736) + pstate: 20000085 (nzCv daIf -PAN -UAO) + pc : __ll_sc___cmpxchg_case_acq_4+0x4/0x20 + lr : move_queued_task.isra.21+0x124/0x298 + ... + Call trace: + __ll_sc___cmpxchg_case_acq_4+0x4/0x20 + __migrate_task+0xc8/0xe0 + migration_cpu_stop+0x170/0x180 + cpu_stopper_thread+0xec/0x178 + smpboot_thread_fn+0x1ac/0x1e8 + kthread+0x134/0x138 + ret_from_fork+0x10/0x18 + +__set_cpus_allowed_ptr() will choose an active dest_cpu in affinity mask to +migrage the process if process is not currently running on any one of the +CPUs specified in affinity mask. __set_cpus_allowed_ptr() will choose an +invalid dest_cpu (dest_cpu >= nr_cpu_ids, 1024 in my virtual machine) if +CPUS in an affinity mask are deactived by cpu_down after cpumask_intersects +check. cpumask_test_cpu() of dest_cpu afterwards is overflown and may pass if +corresponding bit is coincidentally set. As a consequence, kernel will +access an invalid rq address associate with the invalid CPU in +migration_cpu_stop->__migrate_task->move_queued_task and the Oops occurs. + +The reproduce the crash: + + 1) A process repeatedly binds itself to cpu0 and cpu1 in turn by calling + sched_setaffinity. + + 2) A shell script repeatedly does "echo 0 > /sys/devices/system/cpu/cpu1/online" + and "echo 1 > /sys/devices/system/cpu/cpu1/online" in turn. + + 3) Oops appears if the invalid CPU is set in memory after tested cpumask. + +Signed-off-by: KeMeng Shi +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Valentin Schneider +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: https://lkml.kernel.org/r/1568616808-16808-1-git-send-email-shikemeng@huawei.com +Signed-off-by: Ingo Molnar +Signed-off-by: Sasha Levin +--- + kernel/sched/core.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index d38f007afea74..fffe790d98bb2 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -1537,7 +1537,8 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, + if (cpumask_equal(p->cpus_ptr, new_mask)) + goto out; + +- if (!cpumask_intersects(new_mask, cpu_valid_mask)) { ++ dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask); ++ if (dest_cpu >= nr_cpu_ids) { + ret = -EINVAL; + goto out; + } +@@ -1558,7 +1559,6 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, + if (cpumask_test_cpu(task_cpu(p), new_mask)) + goto out; + +- dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask); + if (task_running(rq, p) || p->state == TASK_WAKING) { + struct migration_arg arg = { p, dest_cpu }; + /* Need help from migration thread: drop lock and wait. */ +-- +2.20.1 + diff --git a/queue-5.3/sched-membarrier-call-sync_core-only-before-usermode.patch b/queue-5.3/sched-membarrier-call-sync_core-only-before-usermode.patch new file mode 100644 index 00000000000..364587f8ea1 --- /dev/null +++ b/queue-5.3/sched-membarrier-call-sync_core-only-before-usermode.patch @@ -0,0 +1,50 @@ +From c74b31f639b582f43c49d38a5902bf6284bd1668 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 19 Sep 2019 13:37:01 -0400 +Subject: sched/membarrier: Call sync_core only before usermode for same mm + +From: Mathieu Desnoyers + +[ Upstream commit 2840cf02fae627860156737e83326df354ee4ec6 ] + +When the prev and next task's mm change, switch_mm() provides the core +serializing guarantees before returning to usermode. The only case +where an explicit core serialization is needed is when the scheduler +keeps the same mm for prev and next. + +Suggested-by: Oleg Nesterov +Signed-off-by: Mathieu Desnoyers +Signed-off-by: Peter Zijlstra (Intel) +Cc: Chris Metcalf +Cc: Christoph Lameter +Cc: Eric W. Biederman +Cc: Kirill Tkhai +Cc: Linus Torvalds +Cc: Mike Galbraith +Cc: Paul E. McKenney +Cc: Peter Zijlstra +Cc: Russell King - ARM Linux admin +Cc: Thomas Gleixner +Link: https://lkml.kernel.org/r/20190919173705.2181-4-mathieu.desnoyers@efficios.com +Signed-off-by: Ingo Molnar +Signed-off-by: Sasha Levin +--- + include/linux/sched/mm.h | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h +index 4a7944078cc35..8557ec6642130 100644 +--- a/include/linux/sched/mm.h ++++ b/include/linux/sched/mm.h +@@ -362,6 +362,8 @@ enum { + + static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm) + { ++ if (current->mm != mm) ++ return; + if (likely(!(atomic_read(&mm->membarrier_state) & + MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE))) + return; +-- +2.20.1 + diff --git a/queue-5.3/sched-membarrier-fix-private-expedited-registration-.patch b/queue-5.3/sched-membarrier-fix-private-expedited-registration-.patch new file mode 100644 index 00000000000..b7eecdbd204 --- /dev/null +++ b/queue-5.3/sched-membarrier-fix-private-expedited-registration-.patch @@ -0,0 +1,53 @@ +From d61892cc0b7c61332c0790bd8e0e527f7f5f9cda Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 19 Sep 2019 13:36:59 -0400 +Subject: sched/membarrier: Fix private expedited registration check + +From: Mathieu Desnoyers + +[ Upstream commit fc0d77387cb5ae883fd774fc559e056a8dde024c ] + +Fix a logic flaw in the way membarrier_register_private_expedited() +handles ready state checks for private expedited sync core and private +expedited registrations. + +If a private expedited membarrier registration is first performed, and +then a private expedited sync_core registration is performed, the ready +state check will skip the second registration when it really should not. + +Signed-off-by: Mathieu Desnoyers +Signed-off-by: Peter Zijlstra (Intel) +Cc: Chris Metcalf +Cc: Christoph Lameter +Cc: Eric W. Biederman +Cc: Kirill Tkhai +Cc: Linus Torvalds +Cc: Mike Galbraith +Cc: Oleg Nesterov +Cc: Paul E. McKenney +Cc: Peter Zijlstra +Cc: Russell King - ARM Linux admin +Cc: Thomas Gleixner +Link: https://lkml.kernel.org/r/20190919173705.2181-2-mathieu.desnoyers@efficios.com +Signed-off-by: Ingo Molnar +Signed-off-by: Sasha Levin +--- + kernel/sched/membarrier.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c +index aa8d758041088..5110d91b1b0ea 100644 +--- a/kernel/sched/membarrier.c ++++ b/kernel/sched/membarrier.c +@@ -226,7 +226,7 @@ static int membarrier_register_private_expedited(int flags) + * groups, which use the same mm. (CLONE_VM but not + * CLONE_THREAD). + */ +- if (atomic_read(&mm->membarrier_state) & state) ++ if ((atomic_read(&mm->membarrier_state) & state) == state) + return 0; + atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED, &mm->membarrier_state); + if (flags & MEMBARRIER_FLAG_SYNC_CORE) +-- +2.20.1 + diff --git a/queue-5.3/selftests-bpf-adjust-strobemeta-loop-to-satisfy-late.patch b/queue-5.3/selftests-bpf-adjust-strobemeta-loop-to-satisfy-late.patch new file mode 100644 index 00000000000..fcacdd1aead --- /dev/null +++ b/queue-5.3/selftests-bpf-adjust-strobemeta-loop-to-satisfy-late.patch @@ -0,0 +1,51 @@ +From 460a63144c820681eef0ea0b8f9b80d49932a6a6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 25 Sep 2019 11:52:05 -0700 +Subject: selftests/bpf: adjust strobemeta loop to satisfy latest clang + +From: Andrii Nakryiko + +[ Upstream commit 4670d68b9254710fdeaf794cad54d8b2c9929e0a ] + +Some recent changes in latest Clang started causing the following +warning when unrolling strobemeta test case main loop: + + progs/strobemeta.h:416:2: warning: loop not unrolled: the optimizer was + unable to perform the requested transformation; the transformation might + be disabled or specified as part of an unsupported transformation + ordering [-Wpass-failed=transform-warning] + +This patch simplifies loop's exit condition to depend only on constant +max iteration number (STROBE_MAX_MAP_ENTRIES), while moving early +termination logic inside the loop body. The changes are equivalent from +program logic standpoint, but fixes the warning. It also appears to +improve generated BPF code, as it fixes previously failing non-unrolled +strobemeta test cases. + +Cc: Alexei Starovoitov +Signed-off-by: Andrii Nakryiko +Signed-off-by: Daniel Borkmann +Signed-off-by: Sasha Levin +--- + tools/testing/selftests/bpf/progs/strobemeta.h | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h +index 8a399bdfd9203..067eb625d01c5 100644 +--- a/tools/testing/selftests/bpf/progs/strobemeta.h ++++ b/tools/testing/selftests/bpf/progs/strobemeta.h +@@ -413,7 +413,10 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, + #else + #pragma unroll + #endif +- for (int i = 0; i < STROBE_MAX_MAP_ENTRIES && i < map.cnt; ++i) { ++ for (int i = 0; i < STROBE_MAX_MAP_ENTRIES; ++i) { ++ if (i >= map.cnt) ++ break; ++ + descr->key_lens[i] = 0; + len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN, + map.entries[i].key); +-- +2.20.1 + diff --git a/queue-5.3/selftests-seccomp-fix-build-on-older-kernels.patch b/queue-5.3/selftests-seccomp-fix-build-on-older-kernels.patch new file mode 100644 index 00000000000..4de50ce4687 --- /dev/null +++ b/queue-5.3/selftests-seccomp-fix-build-on-older-kernels.patch @@ -0,0 +1,80 @@ +From ac823fd26245a7033b5f0a77eb81179f655fe58e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 26 Aug 2019 08:43:02 -0600 +Subject: selftests/seccomp: fix build on older kernels +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Tycho Andersen + +[ Upstream commit 88282297fff00796e81f5e67734a6afdfb31fbc4 ] + +The seccomp selftest goes to some length to build against older kernel +headers, viz. all the #ifdefs at the beginning of the file. + +Commit 201766a20e30 ("ptrace: add PTRACE_GET_SYSCALL_INFO request") +introduces some additional macros, but doesn't do the #ifdef dance. +Let's add that dance here to avoid: + +gcc -Wl,-no-as-needed -Wall seccomp_bpf.c -lpthread -o seccomp_bpf +In file included from seccomp_bpf.c:51: +seccomp_bpf.c: In function ‘tracer_ptrace’: +seccomp_bpf.c:1787:20: error: ‘PTRACE_EVENTMSG_SYSCALL_ENTRY’ undeclared (first use in this function); did you mean ‘PTRACE_EVENT_CLONE’? + EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY + ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +../kselftest_harness.h:608:13: note: in definition of macro ‘__EXPECT’ + __typeof__(_expected) __exp = (_expected); \ + ^~~~~~~~~ +seccomp_bpf.c:1787:2: note: in expansion of macro ‘EXPECT_EQ’ + EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY + ^~~~~~~~~ +seccomp_bpf.c:1787:20: note: each undeclared identifier is reported only once for each function it appears in + EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY + ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +../kselftest_harness.h:608:13: note: in definition of macro ‘__EXPECT’ + __typeof__(_expected) __exp = (_expected); \ + ^~~~~~~~~ +seccomp_bpf.c:1787:2: note: in expansion of macro ‘EXPECT_EQ’ + EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY + ^~~~~~~~~ +seccomp_bpf.c:1788:6: error: ‘PTRACE_EVENTMSG_SYSCALL_EXIT’ undeclared (first use in this function); did you mean ‘PTRACE_EVENT_EXIT’? + : PTRACE_EVENTMSG_SYSCALL_EXIT, msg); + ^~~~~~~~~~~~~~~~~~~~~~~~~~~~ +../kselftest_harness.h:608:13: note: in definition of macro ‘__EXPECT’ + __typeof__(_expected) __exp = (_expected); \ + ^~~~~~~~~ +seccomp_bpf.c:1787:2: note: in expansion of macro ‘EXPECT_EQ’ + EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY + ^~~~~~~~~ +make: *** [Makefile:12: seccomp_bpf] Error 1 + +[skhan@linuxfoundation.org: Fix checkpatch error in commit log] +Signed-off-by: Tycho Andersen +Fixes: 201766a20e30 ("ptrace: add PTRACE_GET_SYSCALL_INFO request") +Acked-by: Kees Cook +Signed-off-by: Shuah Khan +Signed-off-by: Sasha Levin +--- + tools/testing/selftests/seccomp/seccomp_bpf.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c +index 6ef7f16c4cf52..7f8b5c8982e3b 100644 +--- a/tools/testing/selftests/seccomp/seccomp_bpf.c ++++ b/tools/testing/selftests/seccomp/seccomp_bpf.c +@@ -199,6 +199,11 @@ struct seccomp_notif_sizes { + }; + #endif + ++#ifndef PTRACE_EVENTMSG_SYSCALL_ENTRY ++#define PTRACE_EVENTMSG_SYSCALL_ENTRY 1 ++#define PTRACE_EVENTMSG_SYSCALL_EXIT 2 ++#endif ++ + #ifndef seccomp + int seccomp(unsigned int op, unsigned int flags, void *args) + { +-- +2.20.1 + diff --git a/queue-5.3/series b/queue-5.3/series index d2abaf8008b..df4d379f868 100644 --- a/queue-5.3/series +++ b/queue-5.3/series @@ -85,3 +85,56 @@ ieee802154-atusb-fix-use-after-free-at-disconnect.patch nl80211-validate-beacon-head.patch cfg80211-validate-ssid-mbssid-element-ordering-assumption.patch cfg80211-initialize-on-stack-chandefs.patch +drivers-thermal-qcom-tsens-fix-memory-leak-from-qfpr.patch +ima-always-return-negative-code-for-error.patch +ima-fix-freeing-ongoing-ahash_request.patch +fs-nfs-fix-possible-null-pointer-dereferences-in-enc.patch +xprtrdma-toggle-xprt_congested-in-xprtrdma-s-slot-me.patch +xprtrdma-send-queue-size-grows-after-a-reconnect.patch +9p-transport-error-uninitialized.patch +9p-avoid-attaching-writeback_fid-on-mmap-with-type-p.patch +xen-pci-reserve-mcfg-areas-earlier.patch +fuse-fix-request-limit.patch +ceph-fix-directories-inode-i_blkbits-initialization.patch +ceph-fetch-cap_gen-under-spinlock-in-ceph_add_cap.patch +ceph-reconnect-connection-if-session-hang-in-opening.patch +sunrpc-rpc-level-errors-should-always-set-task-tk_rp.patch +watchdog-aspeed-add-support-for-ast2600.patch +netfilter-nf_tables-allow-lookups-in-dynamic-sets.patch +drm-amdgpu-fix-kfd-related-kernel-oops-on-hawaii.patch +drm-amdgpu-check-for-valid-number-of-registers-to-re.patch +perf-probe-fix-to-clear-tev-nargs-in-clear_probe_tra.patch +pnfs-ensure-we-do-clear-the-return-on-close-layout-s.patch +sunrpc-don-t-try-to-parse-incomplete-rpc-messages.patch +pwm-stm32-lp-add-check-in-case-requested-period-cann.patch +selftests-seccomp-fix-build-on-older-kernels.patch +x86-purgatory-disable-the-stackleak-gcc-plugin-for-t.patch +ntb-point-to-right-memory-window-index.patch +thermal-fix-use-after-free-when-unregistering-therma.patch +thermal_hwmon-sanitize-thermal_zone-type.patch +iommu-amd-fix-downgrading-default-page-sizes-in-allo.patch +libnvdimm-region-initialize-bad-block-for-volatile-n.patch +libnvdimm-fix-endian-conversion-issues.patch +fuse-fix-memleak-in-cuse_channel_open.patch +libnvdimm-nfit_test-fix-acpi_handle-redefinition.patch +sched-membarrier-call-sync_core-only-before-usermode.patch +sched-membarrier-fix-private-expedited-registration-.patch +sched-core-fix-migration-to-invalid-cpu-in-__set_cpu.patch +perf-build-add-detection-of-java-11-openjdk-devel-pa.patch +include-trace-events-writeback.h-fix-wstringop-trunc.patch +selftests-bpf-adjust-strobemeta-loop-to-satisfy-late.patch +kernel-elfcore.c-include-proper-prototypes.patch +libbpf-fix-false-uninitialized-variable-warning.patch +blk-mq-move-lockdep_assert_held-into-elevator_exit.patch +bpf-fix-bpf_event_output-re-entry-issue.patch +net-dsa-microchip-always-set-regmap-stride-to-1.patch +i2c-qcom-geni-disable-dma-processing-on-the-lenovo-y.patch +perf-unwind-fix-libunwind-build-failure-on-i386-syst.patch +mlxsw-spectrum_flower-fail-in-case-user-specifies-mu.patch +nfp-abm-fix-memory-leak-in-nfp_abm_u32_knode_replace.patch +drm-radeon-bail-earlier-when-radeon.cik_-si_support-.patch +btrfs-fix-selftests-failure-due-to-uninitialized-i_m.patch +kvm-nvmx-fix-consistency-check-on-injected-exception.patch +tick-broadcast-hrtimer-fix-a-race-in-bc_set_next.patch +perf-stat-reset-previous-counts-on-repeat-with-inter.patch +riscv-avoid-interrupts-being-erroneously-enabled-in-.patch diff --git a/queue-5.3/sunrpc-don-t-try-to-parse-incomplete-rpc-messages.patch b/queue-5.3/sunrpc-don-t-try-to-parse-incomplete-rpc-messages.patch new file mode 100644 index 00000000000..4d1970df837 --- /dev/null +++ b/queue-5.3/sunrpc-don-t-try-to-parse-incomplete-rpc-messages.patch @@ -0,0 +1,63 @@ +From fab09570c8ffeeb747eaa80a2e6320b0bde9b440 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 16 Sep 2019 09:12:19 -0400 +Subject: SUNRPC: Don't try to parse incomplete RPC messages + +From: Trond Myklebust + +[ Upstream commit 9ba828861c56a21d211d5d10f5643774b1ea330d ] + +If the copy of the RPC reply into our buffers did not complete, and +we could end up with a truncated message. In that case, just resend +the call. + +Fixes: a0584ee9aed80 ("SUNRPC: Use struct xdr_stream when decoding...") +Signed-off-by: Trond Myklebust +Signed-off-by: Anna Schumaker +Signed-off-by: Sasha Levin +--- + net/sunrpc/clnt.c | 14 +++++++++++++- + 1 file changed, 13 insertions(+), 1 deletion(-) + +diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c +index e7fdc400506e8..f7f78566be463 100644 +--- a/net/sunrpc/clnt.c ++++ b/net/sunrpc/clnt.c +@@ -2482,6 +2482,7 @@ call_decode(struct rpc_task *task) + struct rpc_clnt *clnt = task->tk_client; + struct rpc_rqst *req = task->tk_rqstp; + struct xdr_stream xdr; ++ int err; + + dprint_status(task); + +@@ -2504,6 +2505,15 @@ call_decode(struct rpc_task *task) + * before it changed req->rq_reply_bytes_recvd. + */ + smp_rmb(); ++ ++ /* ++ * Did we ever call xprt_complete_rqst()? If not, we should assume ++ * the message is incomplete. ++ */ ++ err = -EAGAIN; ++ if (!req->rq_reply_bytes_recvd) ++ goto out; ++ + req->rq_rcv_buf.len = req->rq_private_buf.len; + + /* Check that the softirq receive buffer is valid */ +@@ -2512,7 +2522,9 @@ call_decode(struct rpc_task *task) + + xdr_init_decode(&xdr, &req->rq_rcv_buf, + req->rq_rcv_buf.head[0].iov_base, req); +- switch (rpc_decode_header(task, &xdr)) { ++ err = rpc_decode_header(task, &xdr); ++out: ++ switch (err) { + case 0: + task->tk_action = rpc_exit_task; + task->tk_status = rpcauth_unwrap_resp(task, &xdr); +-- +2.20.1 + diff --git a/queue-5.3/sunrpc-rpc-level-errors-should-always-set-task-tk_rp.patch b/queue-5.3/sunrpc-rpc-level-errors-should-always-set-task-tk_rp.patch new file mode 100644 index 00000000000..1e855c44d8c --- /dev/null +++ b/queue-5.3/sunrpc-rpc-level-errors-should-always-set-task-tk_rp.patch @@ -0,0 +1,78 @@ +From 11f0bf71723a18dc0df6a12af8eda25b22d4fa8b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 12 Sep 2019 08:06:51 -0400 +Subject: SUNRPC: RPC level errors should always set task->tk_rpc_status + +From: Trond Myklebust + +[ Upstream commit 714fbc73888f59321854e7f6c2f224213923bcad ] + +Ensure that we set task->tk_rpc_status for all RPC level errors so that +the caller can distinguish between those and server reply status errors. + +Signed-off-by: Trond Myklebust +Signed-off-by: Anna Schumaker +Signed-off-by: Sasha Levin +--- + net/sunrpc/clnt.c | 6 +++--- + net/sunrpc/sched.c | 5 ++++- + 2 files changed, 7 insertions(+), 4 deletions(-) + +diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c +index 7a75f34ad393b..e7fdc400506e8 100644 +--- a/net/sunrpc/clnt.c ++++ b/net/sunrpc/clnt.c +@@ -1837,7 +1837,7 @@ call_allocate(struct rpc_task *task) + return; + } + +- rpc_exit(task, -ERESTARTSYS); ++ rpc_call_rpcerror(task, -ERESTARTSYS); + } + + static int +@@ -2561,7 +2561,7 @@ rpc_encode_header(struct rpc_task *task, struct xdr_stream *xdr) + return 0; + out_fail: + trace_rpc_bad_callhdr(task); +- rpc_exit(task, error); ++ rpc_call_rpcerror(task, error); + return error; + } + +@@ -2628,7 +2628,7 @@ rpc_decode_header(struct rpc_task *task, struct xdr_stream *xdr) + return -EAGAIN; + } + out_err: +- rpc_exit(task, error); ++ rpc_call_rpcerror(task, error); + return error; + + out_unparsable: +diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c +index 1f275aba786fc..53934fe73a9db 100644 +--- a/net/sunrpc/sched.c ++++ b/net/sunrpc/sched.c +@@ -930,8 +930,10 @@ static void __rpc_execute(struct rpc_task *task) + /* + * Signalled tasks should exit rather than sleep. + */ +- if (RPC_SIGNALLED(task)) ++ if (RPC_SIGNALLED(task)) { ++ task->tk_rpc_status = -ERESTARTSYS; + rpc_exit(task, -ERESTARTSYS); ++ } + + /* + * The queue->lock protects against races with +@@ -967,6 +969,7 @@ static void __rpc_execute(struct rpc_task *task) + */ + dprintk("RPC: %5u got signal\n", task->tk_pid); + set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate); ++ task->tk_rpc_status = -ERESTARTSYS; + rpc_exit(task, -ERESTARTSYS); + } + dprintk("RPC: %5u sync task resuming\n", task->tk_pid); +-- +2.20.1 + diff --git a/queue-5.3/thermal-fix-use-after-free-when-unregistering-therma.patch b/queue-5.3/thermal-fix-use-after-free-when-unregistering-therma.patch new file mode 100644 index 00000000000..c43fd4a1dfb --- /dev/null +++ b/queue-5.3/thermal-fix-use-after-free-when-unregistering-therma.patch @@ -0,0 +1,134 @@ +From 75cdcfe1bcbb779ffb1a0118fe6fcc82a79d355f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 10 Jul 2019 13:14:52 +0300 +Subject: thermal: Fix use-after-free when unregistering thermal zone device + +From: Ido Schimmel + +[ Upstream commit 1851799e1d2978f68eea5d9dff322e121dcf59c1 ] + +thermal_zone_device_unregister() cancels the delayed work that polls the +thermal zone, but it does not wait for it to finish. This is racy with +respect to the freeing of the thermal zone device, which can result in a +use-after-free [1]. + +Fix this by waiting for the delayed work to finish before freeing the +thermal zone device. Note that thermal_zone_device_set_polling() is +never invoked from an atomic context, so it is safe to call +cancel_delayed_work_sync() that can block. + +[1] +[ +0.002221] ================================================================== +[ +0.000064] BUG: KASAN: use-after-free in __mutex_lock+0x1076/0x11c0 +[ +0.000016] Read of size 8 at addr ffff8881e48e0450 by task kworker/1:0/17 + +[ +0.000023] CPU: 1 PID: 17 Comm: kworker/1:0 Not tainted 5.2.0-rc6-custom-02495-g8e73ca3be4af #1701 +[ +0.000010] Hardware name: Mellanox Technologies Ltd. MSN2100-CB2FO/SA001017, BIOS 5.6.5 06/07/2016 +[ +0.000016] Workqueue: events_freezable_power_ thermal_zone_device_check +[ +0.000012] Call Trace: +[ +0.000021] dump_stack+0xa9/0x10e +[ +0.000020] print_address_description.cold.2+0x9/0x25e +[ +0.000018] __kasan_report.cold.3+0x78/0x9d +[ +0.000016] kasan_report+0xe/0x20 +[ +0.000016] __mutex_lock+0x1076/0x11c0 +[ +0.000014] step_wise_throttle+0x72/0x150 +[ +0.000018] handle_thermal_trip+0x167/0x760 +[ +0.000019] thermal_zone_device_update+0x19e/0x5f0 +[ +0.000019] process_one_work+0x969/0x16f0 +[ +0.000017] worker_thread+0x91/0xc40 +[ +0.000014] kthread+0x33d/0x400 +[ +0.000015] ret_from_fork+0x3a/0x50 + +[ +0.000020] Allocated by task 1: +[ +0.000015] save_stack+0x19/0x80 +[ +0.000015] __kasan_kmalloc.constprop.4+0xc1/0xd0 +[ +0.000014] kmem_cache_alloc_trace+0x152/0x320 +[ +0.000015] thermal_zone_device_register+0x1b4/0x13a0 +[ +0.000015] mlxsw_thermal_init+0xc92/0x23d0 +[ +0.000014] __mlxsw_core_bus_device_register+0x659/0x11b0 +[ +0.000013] mlxsw_core_bus_device_register+0x3d/0x90 +[ +0.000013] mlxsw_pci_probe+0x355/0x4b0 +[ +0.000014] local_pci_probe+0xc3/0x150 +[ +0.000013] pci_device_probe+0x280/0x410 +[ +0.000013] really_probe+0x26a/0xbb0 +[ +0.000013] driver_probe_device+0x208/0x2e0 +[ +0.000013] device_driver_attach+0xfe/0x140 +[ +0.000013] __driver_attach+0x110/0x310 +[ +0.000013] bus_for_each_dev+0x14b/0x1d0 +[ +0.000013] driver_register+0x1c0/0x400 +[ +0.000015] mlxsw_sp_module_init+0x5d/0xd3 +[ +0.000014] do_one_initcall+0x239/0x4dd +[ +0.000013] kernel_init_freeable+0x42b/0x4e8 +[ +0.000012] kernel_init+0x11/0x18b +[ +0.000013] ret_from_fork+0x3a/0x50 + +[ +0.000015] Freed by task 581: +[ +0.000013] save_stack+0x19/0x80 +[ +0.000014] __kasan_slab_free+0x125/0x170 +[ +0.000013] kfree+0xf3/0x310 +[ +0.000013] thermal_release+0xc7/0xf0 +[ +0.000014] device_release+0x77/0x200 +[ +0.000014] kobject_put+0x1a8/0x4c0 +[ +0.000014] device_unregister+0x38/0xc0 +[ +0.000014] thermal_zone_device_unregister+0x54e/0x6a0 +[ +0.000014] mlxsw_thermal_fini+0x184/0x35a +[ +0.000014] mlxsw_core_bus_device_unregister+0x10a/0x640 +[ +0.000013] mlxsw_devlink_core_bus_device_reload+0x92/0x210 +[ +0.000015] devlink_nl_cmd_reload+0x113/0x1f0 +[ +0.000014] genl_family_rcv_msg+0x700/0xee0 +[ +0.000013] genl_rcv_msg+0xca/0x170 +[ +0.000013] netlink_rcv_skb+0x137/0x3a0 +[ +0.000012] genl_rcv+0x29/0x40 +[ +0.000013] netlink_unicast+0x49b/0x660 +[ +0.000013] netlink_sendmsg+0x755/0xc90 +[ +0.000013] __sys_sendto+0x3de/0x430 +[ +0.000013] __x64_sys_sendto+0xe2/0x1b0 +[ +0.000013] do_syscall_64+0xa4/0x4d0 +[ +0.000013] entry_SYSCALL_64_after_hwframe+0x49/0xbe + +[ +0.000017] The buggy address belongs to the object at ffff8881e48e0008 + which belongs to the cache kmalloc-2k of size 2048 +[ +0.000012] The buggy address is located 1096 bytes inside of + 2048-byte region [ffff8881e48e0008, ffff8881e48e0808) +[ +0.000007] The buggy address belongs to the page: +[ +0.000012] page:ffffea0007923800 refcount:1 mapcount:0 mapping:ffff88823680d0c0 index:0x0 compound_mapcount: 0 +[ +0.000020] flags: 0x200000000010200(slab|head) +[ +0.000019] raw: 0200000000010200 ffffea0007682008 ffffea00076ab808 ffff88823680d0c0 +[ +0.000016] raw: 0000000000000000 00000000000d000d 00000001ffffffff 0000000000000000 +[ +0.000007] page dumped because: kasan: bad access detected + +[ +0.000012] Memory state around the buggy address: +[ +0.000012] ffff8881e48e0300: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb +[ +0.000012] ffff8881e48e0380: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb +[ +0.000012] >ffff8881e48e0400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb +[ +0.000008] ^ +[ +0.000012] ffff8881e48e0480: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb +[ +0.000012] ffff8881e48e0500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb +[ +0.000007] ================================================================== + +Fixes: b1569e99c795 ("ACPI: move thermal trip handling to generic thermal layer") +Reported-by: Jiri Pirko +Signed-off-by: Ido Schimmel +Acked-by: Jiri Pirko +Signed-off-by: Zhang Rui +Signed-off-by: Sasha Levin +--- + drivers/thermal/thermal_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c +index 6bab66e84eb58..ebe15f2cf7fc3 100644 +--- a/drivers/thermal/thermal_core.c ++++ b/drivers/thermal/thermal_core.c +@@ -304,7 +304,7 @@ static void thermal_zone_device_set_polling(struct thermal_zone_device *tz, + &tz->poll_queue, + msecs_to_jiffies(delay)); + else +- cancel_delayed_work(&tz->poll_queue); ++ cancel_delayed_work_sync(&tz->poll_queue); + } + + static void monitor_thermal_zone(struct thermal_zone_device *tz) +-- +2.20.1 + diff --git a/queue-5.3/thermal_hwmon-sanitize-thermal_zone-type.patch b/queue-5.3/thermal_hwmon-sanitize-thermal_zone-type.patch new file mode 100644 index 00000000000..2be896a4a6c --- /dev/null +++ b/queue-5.3/thermal_hwmon-sanitize-thermal_zone-type.patch @@ -0,0 +1,53 @@ +From db1fa333c145819ea92a918a70083f18b4e4c6c4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 26 Jul 2019 16:32:36 +0300 +Subject: thermal_hwmon: Sanitize thermal_zone type + +From: Stefan Mavrodiev + +[ Upstream commit 8c7aa184281c01fc26f319059efb94725012921d ] + +When calling thermal_add_hwmon_sysfs(), the device type is sanitized by +replacing '-' with '_'. However tz->type remains unsanitized. Thus +calling thermal_hwmon_lookup_by_type() returns no device. And if there is +no device, thermal_remove_hwmon_sysfs() fails with "hwmon device lookup +failed!". + +The result is unregisted hwmon devices in the sysfs. + +Fixes: 409ef0bacacf ("thermal_hwmon: Sanitize attribute name passed to hwmon") + +Signed-off-by: Stefan Mavrodiev +Signed-off-by: Zhang Rui +Signed-off-by: Sasha Levin +--- + drivers/thermal/thermal_hwmon.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/drivers/thermal/thermal_hwmon.c b/drivers/thermal/thermal_hwmon.c +index 40c69a533b240..dd5d8ee379287 100644 +--- a/drivers/thermal/thermal_hwmon.c ++++ b/drivers/thermal/thermal_hwmon.c +@@ -87,13 +87,17 @@ static struct thermal_hwmon_device * + thermal_hwmon_lookup_by_type(const struct thermal_zone_device *tz) + { + struct thermal_hwmon_device *hwmon; ++ char type[THERMAL_NAME_LENGTH]; + + mutex_lock(&thermal_hwmon_list_lock); +- list_for_each_entry(hwmon, &thermal_hwmon_list, node) +- if (!strcmp(hwmon->type, tz->type)) { ++ list_for_each_entry(hwmon, &thermal_hwmon_list, node) { ++ strcpy(type, tz->type); ++ strreplace(type, '-', '_'); ++ if (!strcmp(hwmon->type, type)) { + mutex_unlock(&thermal_hwmon_list_lock); + return hwmon; + } ++ } + mutex_unlock(&thermal_hwmon_list_lock); + + return NULL; +-- +2.20.1 + diff --git a/queue-5.3/tick-broadcast-hrtimer-fix-a-race-in-bc_set_next.patch b/queue-5.3/tick-broadcast-hrtimer-fix-a-race-in-bc_set_next.patch new file mode 100644 index 00000000000..bfffea67bd3 --- /dev/null +++ b/queue-5.3/tick-broadcast-hrtimer-fix-a-race-in-bc_set_next.patch @@ -0,0 +1,173 @@ +From 653435454a64337b2f789538a53727a2ab012b59 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 26 Sep 2019 15:51:01 +0200 +Subject: tick: broadcast-hrtimer: Fix a race in bc_set_next + +From: Balasubramani Vivekanandan + +[ Upstream commit b9023b91dd020ad7e093baa5122b6968c48cc9e0 ] + +When a cpu requests broadcasting, before starting the tick broadcast +hrtimer, bc_set_next() checks if the timer callback (bc_handler) is active +using hrtimer_try_to_cancel(). But hrtimer_try_to_cancel() does not provide +the required synchronization when the callback is active on other core. + +The callback could have already executed tick_handle_oneshot_broadcast() +and could have also returned. But still there is a small time window where +the hrtimer_try_to_cancel() returns -1. In that case bc_set_next() returns +without doing anything, but the next_event of the tick broadcast clock +device is already set to a timeout value. + +In the race condition diagram below, CPU #1 is running the timer callback +and CPU #2 is entering idle state and so calls bc_set_next(). + +In the worst case, the next_event will contain an expiry time, but the +hrtimer will not be started which happens when the racing callback returns +HRTIMER_NORESTART. The hrtimer might never recover if all further requests +from the CPUs to subscribe to tick broadcast have timeout greater than the +next_event of tick broadcast clock device. This leads to cascading of +failures and finally noticed as rcu stall warnings + +Here is a depiction of the race condition + +CPU #1 (Running timer callback) CPU #2 (Enter idle + and subscribe to + tick broadcast) +--------------------- --------------------- + +__run_hrtimer() tick_broadcast_enter() + + bc_handler() __tick_broadcast_oneshot_control() + + tick_handle_oneshot_broadcast() + + raw_spin_lock(&tick_broadcast_lock); + + dev->next_event = KTIME_MAX; //wait for tick_broadcast_lock + //next_event for tick broadcast clock + set to KTIME_MAX since no other cores + subscribed to tick broadcasting + + raw_spin_unlock(&tick_broadcast_lock); + + if (dev->next_event == KTIME_MAX) + return HRTIMER_NORESTART + // callback function exits without + restarting the hrtimer //tick_broadcast_lock acquired + raw_spin_lock(&tick_broadcast_lock); + + tick_broadcast_set_event() + + clockevents_program_event() + + dev->next_event = expires; + + bc_set_next() + + hrtimer_try_to_cancel() + //returns -1 since the timer + callback is active. Exits without + restarting the timer + cpu_base->running = NULL; + +The comment that hrtimer cannot be armed from within the callback is +wrong. It is fine to start the hrtimer from within the callback. Also it is +safe to start the hrtimer from the enter/exit idle code while the broadcast +handler is active. The enter/exit idle code and the broadcast handler are +synchronized using tick_broadcast_lock. So there is no need for the +existing try to cancel logic. All this can be removed which will eliminate +the race condition as well. + +Fixes: 5d1638acb9f6 ("tick: Introduce hrtimer based broadcast") +Originally-by: Thomas Gleixner +Signed-off-by: Balasubramani Vivekanandan +Signed-off-by: Thomas Gleixner +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/20190926135101.12102-2-balasubramani_vivekanandan@mentor.com +Signed-off-by: Sasha Levin +--- + kernel/time/tick-broadcast-hrtimer.c | 57 ++++++++++++++-------------- + 1 file changed, 29 insertions(+), 28 deletions(-) + +diff --git a/kernel/time/tick-broadcast-hrtimer.c b/kernel/time/tick-broadcast-hrtimer.c +index 5be6154e2fd2c..99fbfb8d9117c 100644 +--- a/kernel/time/tick-broadcast-hrtimer.c ++++ b/kernel/time/tick-broadcast-hrtimer.c +@@ -42,34 +42,39 @@ static int bc_shutdown(struct clock_event_device *evt) + */ + static int bc_set_next(ktime_t expires, struct clock_event_device *bc) + { +- int bc_moved; + /* +- * We try to cancel the timer first. If the callback is on +- * flight on some other cpu then we let it handle it. If we +- * were able to cancel the timer nothing can rearm it as we +- * own broadcast_lock. ++ * This is called either from enter/exit idle code or from the ++ * broadcast handler. In all cases tick_broadcast_lock is held. + * +- * However we can also be called from the event handler of +- * ce_broadcast_hrtimer itself when it expires. We cannot +- * restart the timer because we are in the callback, but we +- * can set the expiry time and let the callback return +- * HRTIMER_RESTART. ++ * hrtimer_cancel() cannot be called here neither from the ++ * broadcast handler nor from the enter/exit idle code. The idle ++ * code can run into the problem described in bc_shutdown() and the ++ * broadcast handler cannot wait for itself to complete for obvious ++ * reasons. + * +- * Since we are in the idle loop at this point and because +- * hrtimer_{start/cancel} functions call into tracing, +- * calls to these functions must be bound within RCU_NONIDLE. ++ * Each caller tries to arm the hrtimer on its own CPU, but if the ++ * hrtimer callbback function is currently running, then ++ * hrtimer_start() cannot move it and the timer stays on the CPU on ++ * which it is assigned at the moment. ++ * ++ * As this can be called from idle code, the hrtimer_start() ++ * invocation has to be wrapped with RCU_NONIDLE() as ++ * hrtimer_start() can call into tracing. + */ +- RCU_NONIDLE({ +- bc_moved = hrtimer_try_to_cancel(&bctimer) >= 0; +- if (bc_moved) +- hrtimer_start(&bctimer, expires, +- HRTIMER_MODE_ABS_PINNED);}); +- if (bc_moved) { +- /* Bind the "device" to the cpu */ +- bc->bound_on = smp_processor_id(); +- } else if (bc->bound_on == smp_processor_id()) { +- hrtimer_set_expires(&bctimer, expires); +- } ++ RCU_NONIDLE( { ++ hrtimer_start(&bctimer, expires, HRTIMER_MODE_ABS_PINNED); ++ /* ++ * The core tick broadcast mode expects bc->bound_on to be set ++ * correctly to prevent a CPU which has the broadcast hrtimer ++ * armed from going deep idle. ++ * ++ * As tick_broadcast_lock is held, nothing can change the cpu ++ * base which was just established in hrtimer_start() above. So ++ * the below access is safe even without holding the hrtimer ++ * base lock. ++ */ ++ bc->bound_on = bctimer.base->cpu_base->cpu; ++ } ); + return 0; + } + +@@ -95,10 +100,6 @@ static enum hrtimer_restart bc_handler(struct hrtimer *t) + { + ce_broadcast_hrtimer.event_handler(&ce_broadcast_hrtimer); + +- if (clockevent_state_oneshot(&ce_broadcast_hrtimer)) +- if (ce_broadcast_hrtimer.next_event != KTIME_MAX) +- return HRTIMER_RESTART; +- + return HRTIMER_NORESTART; + } + +-- +2.20.1 + diff --git a/queue-5.3/watchdog-aspeed-add-support-for-ast2600.patch b/queue-5.3/watchdog-aspeed-add-support-for-ast2600.patch new file mode 100644 index 00000000000..a38b7a9b0a9 --- /dev/null +++ b/queue-5.3/watchdog-aspeed-add-support-for-ast2600.patch @@ -0,0 +1,47 @@ +From 0e2bfaa722a2ec02b41467b88ccc41f9dcd49c89 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 19 Aug 2019 14:47:38 +0930 +Subject: watchdog: aspeed: Add support for AST2600 + +From: Ryan Chen + +[ Upstream commit b3528b4874480818e38e4da019d655413c233e6a ] + +The ast2600 can be supported by the same code as the ast2500. + +Signed-off-by: Ryan Chen +Signed-off-by: Joel Stanley +Reviewed-by: Guenter Roeck +Link: https://lore.kernel.org/r/20190819051738.17370-3-joel@jms.id.au +Signed-off-by: Guenter Roeck +Signed-off-by: Wim Van Sebroeck +Signed-off-by: Sasha Levin +--- + drivers/watchdog/aspeed_wdt.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/watchdog/aspeed_wdt.c b/drivers/watchdog/aspeed_wdt.c +index cc71861e033a5..5b64bc2e87888 100644 +--- a/drivers/watchdog/aspeed_wdt.c ++++ b/drivers/watchdog/aspeed_wdt.c +@@ -34,6 +34,7 @@ static const struct aspeed_wdt_config ast2500_config = { + static const struct of_device_id aspeed_wdt_of_table[] = { + { .compatible = "aspeed,ast2400-wdt", .data = &ast2400_config }, + { .compatible = "aspeed,ast2500-wdt", .data = &ast2500_config }, ++ { .compatible = "aspeed,ast2600-wdt", .data = &ast2500_config }, + { }, + }; + MODULE_DEVICE_TABLE(of, aspeed_wdt_of_table); +@@ -259,7 +260,8 @@ static int aspeed_wdt_probe(struct platform_device *pdev) + set_bit(WDOG_HW_RUNNING, &wdt->wdd.status); + } + +- if (of_device_is_compatible(np, "aspeed,ast2500-wdt")) { ++ if ((of_device_is_compatible(np, "aspeed,ast2500-wdt")) || ++ (of_device_is_compatible(np, "aspeed,ast2600-wdt"))) { + u32 reg = readl(wdt->base + WDT_RESET_WIDTH); + + reg &= config->ext_pulse_width_mask; +-- +2.20.1 + diff --git a/queue-5.3/x86-purgatory-disable-the-stackleak-gcc-plugin-for-t.patch b/queue-5.3/x86-purgatory-disable-the-stackleak-gcc-plugin-for-t.patch new file mode 100644 index 00000000000..8ba84beefaf --- /dev/null +++ b/queue-5.3/x86-purgatory-disable-the-stackleak-gcc-plugin-for-t.patch @@ -0,0 +1,53 @@ +From 28510a268607b7d5286f16c20c4a500415c40e35 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 23 Sep 2019 13:17:54 -0400 +Subject: x86/purgatory: Disable the stackleak GCC plugin for the purgatory + +From: Arvind Sankar + +[ Upstream commit ca14c996afe7228ff9b480cf225211cc17212688 ] + +Since commit: + + b059f801a937 ("x86/purgatory: Use CFLAGS_REMOVE rather than reset KBUILD_CFLAGS") + +kexec breaks if GCC_PLUGIN_STACKLEAK=y is enabled, as the purgatory +contains undefined references to stackleak_track_stack. + +Attempting to load a kexec kernel results in this failure: + + kexec: Undefined symbol: stackleak_track_stack + kexec-bzImage64: Loading purgatory failed + +Fix this by disabling the stackleak plugin for the purgatory. + +Signed-off-by: Arvind Sankar +Reviewed-by: Nick Desaulniers +Cc: Borislav Petkov +Cc: H. Peter Anvin +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Fixes: b059f801a937 ("x86/purgatory: Use CFLAGS_REMOVE rather than reset KBUILD_CFLAGS") +Link: https://lkml.kernel.org/r/20190923171753.GA2252517@rani.riverdale.lan +Signed-off-by: Ingo Molnar +Signed-off-by: Sasha Levin +--- + arch/x86/purgatory/Makefile | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile +index 10fb42da0007e..b81b5172cf994 100644 +--- a/arch/x86/purgatory/Makefile ++++ b/arch/x86/purgatory/Makefile +@@ -23,6 +23,7 @@ KCOV_INSTRUMENT := n + + PURGATORY_CFLAGS_REMOVE := -mcmodel=kernel + PURGATORY_CFLAGS := -mcmodel=large -ffreestanding -fno-zero-initialized-in-bss ++PURGATORY_CFLAGS += $(DISABLE_STACKLEAK_PLUGIN) + + # Default KBUILD_CFLAGS can have -pg option set when FTRACE is enabled. That + # in turn leaves some undefined symbols like __fentry__ in purgatory and not +-- +2.20.1 + diff --git a/queue-5.3/xen-pci-reserve-mcfg-areas-earlier.patch b/queue-5.3/xen-pci-reserve-mcfg-areas-earlier.patch new file mode 100644 index 00000000000..c0c6e946853 --- /dev/null +++ b/queue-5.3/xen-pci-reserve-mcfg-areas-earlier.patch @@ -0,0 +1,90 @@ +From f249e87d3ad549c9d1dec2282df074d55ac4a981 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 12 Sep 2019 19:31:51 +0100 +Subject: xen/pci: reserve MCFG areas earlier + +From: Igor Druzhinin + +[ Upstream commit a4098bc6eed5e31e0391bcc068e61804c98138df ] + +If MCFG area is not reserved in E820, Xen by default will defer its usage +until Dom0 registers it explicitly after ACPI parser recognizes it as +a reserved resource in DSDT. Having it reserved in E820 is not +mandatory according to "PCI Firmware Specification, rev 3.2" (par. 4.1.2) +and firmware is free to keep a hole in E820 in that place. Xen doesn't know +what exactly is inside this hole since it lacks full ACPI view of the +platform therefore it's potentially harmful to access MCFG region +without additional checks as some machines are known to provide +inconsistent information on the size of the region. + +Now xen_mcfg_late() runs after acpi_init() which is too late as some basic +PCI enumeration starts exactly there as well. Trying to register a device +prior to MCFG reservation causes multiple problems with PCIe extended +capability initializations in Xen (e.g. SR-IOV VF BAR sizing). There are +no convenient hooks for us to subscribe to so register MCFG areas earlier +upon the first invocation of xen_add_device(). It should be safe to do once +since all the boot time buses must have their MCFG areas in MCFG table +already and we don't support PCI bus hot-plug. + +Signed-off-by: Igor Druzhinin +Reviewed-by: Boris Ostrovsky +Signed-off-by: Boris Ostrovsky +Signed-off-by: Sasha Levin +--- + drivers/xen/pci.c | 21 +++++++++++++++------ + 1 file changed, 15 insertions(+), 6 deletions(-) + +diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c +index 3eeb9bea76300..224df03ce42e3 100644 +--- a/drivers/xen/pci.c ++++ b/drivers/xen/pci.c +@@ -17,6 +17,8 @@ + #include "../pci/pci.h" + #ifdef CONFIG_PCI_MMCONFIG + #include ++ ++static int xen_mcfg_late(void); + #endif + + static bool __read_mostly pci_seg_supported = true; +@@ -28,7 +30,18 @@ static int xen_add_device(struct device *dev) + #ifdef CONFIG_PCI_IOV + struct pci_dev *physfn = pci_dev->physfn; + #endif +- ++#ifdef CONFIG_PCI_MMCONFIG ++ static bool pci_mcfg_reserved = false; ++ /* ++ * Reserve MCFG areas in Xen on first invocation due to this being ++ * potentially called from inside of acpi_init immediately after ++ * MCFG table has been finally parsed. ++ */ ++ if (!pci_mcfg_reserved) { ++ xen_mcfg_late(); ++ pci_mcfg_reserved = true; ++ } ++#endif + if (pci_seg_supported) { + struct { + struct physdev_pci_device_add add; +@@ -201,7 +214,7 @@ static int __init register_xen_pci_notifier(void) + arch_initcall(register_xen_pci_notifier); + + #ifdef CONFIG_PCI_MMCONFIG +-static int __init xen_mcfg_late(void) ++static int xen_mcfg_late(void) + { + struct pci_mmcfg_region *cfg; + int rc; +@@ -240,8 +253,4 @@ static int __init xen_mcfg_late(void) + } + return 0; + } +-/* +- * Needs to be done after acpi_init which are subsys_initcall. +- */ +-subsys_initcall_sync(xen_mcfg_late); + #endif +-- +2.20.1 + diff --git a/queue-5.3/xprtrdma-send-queue-size-grows-after-a-reconnect.patch b/queue-5.3/xprtrdma-send-queue-size-grows-after-a-reconnect.patch new file mode 100644 index 00000000000..f3976790693 --- /dev/null +++ b/queue-5.3/xprtrdma-send-queue-size-grows-after-a-reconnect.patch @@ -0,0 +1,122 @@ +From 0a5066381db04632c174a9a2a2f8dc2a55ab51c8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 26 Aug 2019 13:12:57 -0400 +Subject: xprtrdma: Send Queue size grows after a reconnect + +From: Chuck Lever + +[ Upstream commit 98ef77d1aaa7a2f4e1b2a721faa084222021fda7 ] + +Eli Dorfman reports that after a series of idle disconnects, an +RPC/RDMA transport becomes unusable (rdma_create_qp returns +-ENOMEM). Problem was tracked down to increasing Send Queue size +after each reconnect. + +The rdma_create_qp() API does not promise to leave its @qp_init_attr +parameter unaltered. In fact, some drivers do modify one or more of +its fields. Thus our calls to rdma_create_qp must use a fresh copy +of ib_qp_init_attr each time. + +This fix is appropriate for kernels dating back to late 2007, though +it will have to be adapted, as the connect code has changed over the +years. + +Reported-by: Eli Dorfman +Signed-off-by: Chuck Lever +Signed-off-by: Anna Schumaker +Signed-off-by: Sasha Levin +--- + net/sunrpc/xprtrdma/verbs.c | 26 ++++++++++++++------------ + 1 file changed, 14 insertions(+), 12 deletions(-) + +diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c +index 805b1f35e1caa..2bd9b4de0e325 100644 +--- a/net/sunrpc/xprtrdma/verbs.c ++++ b/net/sunrpc/xprtrdma/verbs.c +@@ -605,10 +605,10 @@ void rpcrdma_ep_destroy(struct rpcrdma_xprt *r_xprt) + * Unlike a normal reconnection, a fresh PD and a new set + * of MRs and buffers is needed. + */ +-static int +-rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt, +- struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) ++static int rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt, ++ struct ib_qp_init_attr *qp_init_attr) + { ++ struct rpcrdma_ia *ia = &r_xprt->rx_ia; + int rc, err; + + trace_xprtrdma_reinsert(r_xprt); +@@ -625,7 +625,7 @@ rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt, + } + + rc = -ENETUNREACH; +- err = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr); ++ err = rdma_create_qp(ia->ri_id, ia->ri_pd, qp_init_attr); + if (err) { + pr_err("rpcrdma: rdma_create_qp returned %d\n", err); + goto out3; +@@ -642,16 +642,16 @@ rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt, + return rc; + } + +-static int +-rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep, +- struct rpcrdma_ia *ia) ++static int rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, ++ struct ib_qp_init_attr *qp_init_attr) + { ++ struct rpcrdma_ia *ia = &r_xprt->rx_ia; + struct rdma_cm_id *id, *old; + int err, rc; + + trace_xprtrdma_reconnect(r_xprt); + +- rpcrdma_ep_disconnect(ep, ia); ++ rpcrdma_ep_disconnect(&r_xprt->rx_ep, ia); + + rc = -EHOSTUNREACH; + id = rpcrdma_create_id(r_xprt, ia); +@@ -673,7 +673,7 @@ rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep, + goto out_destroy; + } + +- err = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr); ++ err = rdma_create_qp(id, ia->ri_pd, qp_init_attr); + if (err) + goto out_destroy; + +@@ -698,25 +698,27 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) + struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt, + rx_ia); + struct rpc_xprt *xprt = &r_xprt->rx_xprt; ++ struct ib_qp_init_attr qp_init_attr; + int rc; + + retry: ++ memcpy(&qp_init_attr, &ep->rep_attr, sizeof(qp_init_attr)); + switch (ep->rep_connected) { + case 0: + dprintk("RPC: %s: connecting...\n", __func__); +- rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr); ++ rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &qp_init_attr); + if (rc) { + rc = -ENETUNREACH; + goto out_noupdate; + } + break; + case -ENODEV: +- rc = rpcrdma_ep_recreate_xprt(r_xprt, ep, ia); ++ rc = rpcrdma_ep_recreate_xprt(r_xprt, &qp_init_attr); + if (rc) + goto out_noupdate; + break; + default: +- rc = rpcrdma_ep_reconnect(r_xprt, ep, ia); ++ rc = rpcrdma_ep_reconnect(r_xprt, &qp_init_attr); + if (rc) + goto out; + } +-- +2.20.1 + diff --git a/queue-5.3/xprtrdma-toggle-xprt_congested-in-xprtrdma-s-slot-me.patch b/queue-5.3/xprtrdma-toggle-xprt_congested-in-xprtrdma-s-slot-me.patch new file mode 100644 index 00000000000..463bc582fc2 --- /dev/null +++ b/queue-5.3/xprtrdma-toggle-xprt_congested-in-xprtrdma-s-slot-me.patch @@ -0,0 +1,50 @@ +From f20f60af2fec5cf3f903d861cf65019aa072bc60 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 19 Aug 2019 18:43:17 -0400 +Subject: xprtrdma: Toggle XPRT_CONGESTED in xprtrdma's slot methods + +From: Chuck Lever + +[ Upstream commit 395790566eec37706dedeb94779045adc3a7581e ] + +Commit 48be539dd44a ("xprtrdma: Introduce ->alloc_slot call-out for +xprtrdma") added a separate alloc_slot and free_slot to the RPC/RDMA +transport. Later, commit 75891f502f5f ("SUNRPC: Support for +congestion control when queuing is enabled") modified the generic +alloc/free_slot methods, but neglected the methods in xprtrdma. + +Found via code review. + +Fixes: 75891f502f5f ("SUNRPC: Support for congestion control ... ") +Signed-off-by: Chuck Lever +Signed-off-by: Anna Schumaker +Signed-off-by: Sasha Levin +--- + net/sunrpc/xprtrdma/transport.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c +index 2ec349ed47702..f4763e8a67617 100644 +--- a/net/sunrpc/xprtrdma/transport.c ++++ b/net/sunrpc/xprtrdma/transport.c +@@ -571,6 +571,7 @@ xprt_rdma_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task) + return; + + out_sleep: ++ set_bit(XPRT_CONGESTED, &xprt->state); + rpc_sleep_on(&xprt->backlog, task, NULL); + task->tk_status = -EAGAIN; + } +@@ -589,7 +590,8 @@ xprt_rdma_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *rqst) + + memset(rqst, 0, sizeof(*rqst)); + rpcrdma_buffer_put(&r_xprt->rx_buf, rpcr_to_rdmar(rqst)); +- rpc_wake_up_next(&xprt->backlog); ++ if (unlikely(!rpc_wake_up_next(&xprt->backlog))) ++ clear_bit(XPRT_CONGESTED, &xprt->state); + } + + static bool rpcrdma_check_regbuf(struct rpcrdma_xprt *r_xprt, +-- +2.20.1 + -- 2.47.2