From: Sasha Levin Date: Wed, 9 Oct 2019 01:26:33 +0000 (-0400) Subject: fixes for 4.14 X-Git-Tag: v4.14.149~19 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=a5d14787963e7118863499ad3ae32eccb9048237;p=thirdparty%2Fkernel%2Fstable-queue.git fixes for 4.14 Signed-off-by: Sasha Levin --- diff --git a/queue-4.14/9p-avoid-attaching-writeback_fid-on-mmap-with-type-p.patch b/queue-4.14/9p-avoid-attaching-writeback_fid-on-mmap-with-type-p.patch new file mode 100644 index 00000000000..580ed1fdbe2 --- /dev/null +++ b/queue-4.14/9p-avoid-attaching-writeback_fid-on-mmap-with-type-p.patch @@ -0,0 +1,47 @@ +From f8b838ad0fa7874d55b0a6696588c3a899c8a3de Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 20 Aug 2019 18:03:25 +0800 +Subject: 9p: avoid attaching writeback_fid on mmap with type PRIVATE + +From: Chengguang Xu + +[ Upstream commit c87a37ebd40b889178664c2c09cc187334146292 ] + +Currently on mmap cache policy, we always attach writeback_fid +whether mmap type is SHARED or PRIVATE. However, in the use case +of kata-container which combines 9p(Guest OS) with overlayfs(Host OS), +this behavior will trigger overlayfs' copy-up when excute command +inside container. + +Link: http://lkml.kernel.org/r/20190820100325.10313-1-cgxu519@zoho.com.cn +Signed-off-by: Chengguang Xu +Signed-off-by: Dominique Martinet +Signed-off-by: Sasha Levin +--- + fs/9p/vfs_file.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c +index 89e69904976a5..2651192f01667 100644 +--- a/fs/9p/vfs_file.c ++++ b/fs/9p/vfs_file.c +@@ -528,6 +528,7 @@ v9fs_mmap_file_mmap(struct file *filp, struct vm_area_struct *vma) + v9inode = V9FS_I(inode); + mutex_lock(&v9inode->v_mutex); + if (!v9inode->writeback_fid && ++ (vma->vm_flags & VM_SHARED) && + (vma->vm_flags & VM_WRITE)) { + /* + * clone a fid and add it to writeback_fid +@@ -629,6 +630,8 @@ static void v9fs_mmap_vm_close(struct vm_area_struct *vma) + (vma->vm_end - vma->vm_start - 1), + }; + ++ if (!(vma->vm_flags & VM_SHARED)) ++ return; + + p9_debug(P9_DEBUG_VFS, "9p VMA close, %p, flushing", vma); + +-- +2.20.1 + diff --git a/queue-4.14/block-ndb-add-wq_unbound-to-the-knbd-recv-workqueue.patch b/queue-4.14/block-ndb-add-wq_unbound-to-the-knbd-recv-workqueue.patch new file mode 100644 index 00000000000..a3df903a16a --- /dev/null +++ b/queue-4.14/block-ndb-add-wq_unbound-to-the-knbd-recv-workqueue.patch @@ -0,0 +1,37 @@ +From 3e704a4423781576fdf0d9884d17538f7523a0b8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 18 Sep 2017 13:08:51 -0700 +Subject: block/ndb: add WQ_UNBOUND to the knbd-recv workqueue + +From: Dan Melnic + +[ Upstream commit 2189c97cdbed630d5971ab22f05dc998774e354e ] + +Add WQ_UNBOUND to the knbd-recv workqueue so we're not bound +to a single CPU that is selected at device creation time. + +Signed-off-by: Dan Melnic +Reviewed-by: Josef Bacik +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + drivers/block/nbd.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c +index a65e4ed6c9372..14b491c5cf7b6 100644 +--- a/drivers/block/nbd.c ++++ b/drivers/block/nbd.c +@@ -2217,7 +2217,8 @@ static int __init nbd_init(void) + if (nbds_max > 1UL << (MINORBITS - part_shift)) + return -EINVAL; + recv_workqueue = alloc_workqueue("knbd-recv", +- WQ_MEM_RECLAIM | WQ_HIGHPRI, 0); ++ WQ_MEM_RECLAIM | WQ_HIGHPRI | ++ WQ_UNBOUND, 0); + if (!recv_workqueue) + return -ENOMEM; + +-- +2.20.1 + diff --git a/queue-4.14/ceph-fix-directories-inode-i_blkbits-initialization.patch b/queue-4.14/ceph-fix-directories-inode-i_blkbits-initialization.patch new file mode 100644 index 00000000000..b136d259184 --- /dev/null +++ b/queue-4.14/ceph-fix-directories-inode-i_blkbits-initialization.patch @@ -0,0 +1,50 @@ +From b9b8557f927affa43400874b20744c88351561e5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 23 Jul 2019 16:50:20 +0100 +Subject: ceph: fix directories inode i_blkbits initialization + +From: Luis Henriques + +[ Upstream commit 750670341a24cb714e624e0fd7da30900ad93752 ] + +When filling an inode with info from the MDS, i_blkbits is being +initialized using fl_stripe_unit, which contains the stripe unit in +bytes. Unfortunately, this doesn't make sense for directories as they +have fl_stripe_unit set to '0'. This means that i_blkbits will be set +to 0xff, causing an UBSAN undefined behaviour in i_blocksize(): + + UBSAN: Undefined behaviour in ./include/linux/fs.h:731:12 + shift exponent 255 is too large for 32-bit type 'int' + +Fix this by initializing i_blkbits to CEPH_BLOCK_SHIFT if fl_stripe_unit +is zero. + +Signed-off-by: Luis Henriques +Reviewed-by: Jeff Layton +Signed-off-by: Ilya Dryomov +Signed-off-by: Sasha Levin +--- + fs/ceph/inode.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c +index 9bda8c7a80a05..879bc08250931 100644 +--- a/fs/ceph/inode.c ++++ b/fs/ceph/inode.c +@@ -789,7 +789,12 @@ static int fill_inode(struct inode *inode, struct page *locked_page, + ci->i_version = le64_to_cpu(info->version); + inode->i_version++; + inode->i_rdev = le32_to_cpu(info->rdev); +- inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; ++ /* directories have fl_stripe_unit set to zero */ ++ if (le32_to_cpu(info->layout.fl_stripe_unit)) ++ inode->i_blkbits = ++ fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; ++ else ++ inode->i_blkbits = CEPH_BLOCK_SHIFT; + + if ((new_version || (new_issued & CEPH_CAP_AUTH_SHARED)) && + (issued & CEPH_CAP_AUTH_EXCL) == 0) { +-- +2.20.1 + diff --git a/queue-4.14/ceph-reconnect-connection-if-session-hang-in-opening.patch b/queue-4.14/ceph-reconnect-connection-if-session-hang-in-opening.patch new file mode 100644 index 00000000000..deb16fd9651 --- /dev/null +++ b/queue-4.14/ceph-reconnect-connection-if-session-hang-in-opening.patch @@ -0,0 +1,46 @@ +From 9e42bcc0b5499e4033726891f3275461ac5bd667 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 28 Aug 2019 21:22:45 +0800 +Subject: ceph: reconnect connection if session hang in opening state + +From: Erqi Chen + +[ Upstream commit 71a228bc8d65900179e37ac309e678f8c523f133 ] + +If client mds session is evicted in CEPH_MDS_SESSION_OPENING state, +mds won't send session msg to client, and delayed_work skip +CEPH_MDS_SESSION_OPENING state session, the session hang forever. + +Allow ceph_con_keepalive to reconnect a session in OPENING to avoid +session hang. Also, ensure that we skip sessions in RESTARTING and +REJECTED states since those states can't be resurrected by issuing +a keepalive. + +Link: https://tracker.ceph.com/issues/41551 +Signed-off-by: Erqi Chen chenerqi@gmail.com +Reviewed-by: "Yan, Zheng" +Signed-off-by: Jeff Layton +Signed-off-by: Ilya Dryomov +Signed-off-by: Sasha Levin +--- + fs/ceph/mds_client.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c +index e1ded4bd61154..b968334f841e8 100644 +--- a/fs/ceph/mds_client.c ++++ b/fs/ceph/mds_client.c +@@ -3543,7 +3543,9 @@ static void delayed_work(struct work_struct *work) + pr_info("mds%d hung\n", s->s_mds); + } + } +- if (s->s_state < CEPH_MDS_SESSION_OPEN) { ++ if (s->s_state == CEPH_MDS_SESSION_NEW || ++ s->s_state == CEPH_MDS_SESSION_RESTARTING || ++ s->s_state == CEPH_MDS_SESSION_REJECTED) { + /* this mds is failed or recovering, just wait */ + ceph_put_mds_session(s); + continue; +-- +2.20.1 + diff --git a/queue-4.14/drm-amdgpu-check-for-valid-number-of-registers-to-re.patch b/queue-4.14/drm-amdgpu-check-for-valid-number-of-registers-to-re.patch new file mode 100644 index 00000000000..dccf811f1ec --- /dev/null +++ b/queue-4.14/drm-amdgpu-check-for-valid-number-of-registers-to-re.patch @@ -0,0 +1,39 @@ +From 0d0b92bd9fdcd1ef6c111ecfd1176ad30a7d05ae Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 31 Aug 2019 21:25:36 +0200 +Subject: drm/amdgpu: Check for valid number of registers to read + +From: Trek + +[ Upstream commit 73d8e6c7b841d9bf298c8928f228fb433676635c ] + +Do not try to allocate any amount of memory requested by the user. +Instead limit it to 128 registers. Actually the longest series of +consecutive allowed registers are 48, mmGB_TILE_MODE0-31 and +mmGB_MACROTILE_MODE0-15 (0x2644-0x2673). + +Bug: https://bugs.freedesktop.org/show_bug.cgi?id=111273 +Signed-off-by: Trek +Signed-off-by: Alex Deucher +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +index e16229000a983..884ed359f2493 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +@@ -540,6 +540,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file + if (sh_num == AMDGPU_INFO_MMR_SH_INDEX_MASK) + sh_num = 0xffffffff; + ++ if (info->read_mmr_reg.count > 128) ++ return -EINVAL; ++ + regs = kmalloc_array(info->read_mmr_reg.count, sizeof(*regs), GFP_KERNEL); + if (!regs) + return -ENOMEM; +-- +2.20.1 + diff --git a/queue-4.14/drm-i915-userptr-acquire-the-page-lock-around-set_pa.patch b/queue-4.14/drm-i915-userptr-acquire-the-page-lock-around-set_pa.patch new file mode 100644 index 00000000000..35fa4ca3dc8 --- /dev/null +++ b/queue-4.14/drm-i915-userptr-acquire-the-page-lock-around-set_pa.patch @@ -0,0 +1,57 @@ +From 77fbf1f2ce3d14f2dcd6bbf3dbbe3716f062685f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 8 Jul 2019 15:03:27 +0100 +Subject: drm/i915/userptr: Acquire the page lock around set_page_dirty() + +From: Chris Wilson + +[ Upstream commit cb6d7c7dc7ff8cace666ddec66334117a6068ce2 ] + +set_page_dirty says: + + For pages with a mapping this should be done under the page lock + for the benefit of asynchronous memory errors who prefer a + consistent dirty state. This rule can be broken in some special + cases, but should be better not to. + +Under those rules, it is only safe for us to use the plain set_page_dirty +calls for shmemfs/anonymous memory. Userptr may be used with real +mappings and so needs to use the locked version (set_page_dirty_lock). + +Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=203317 +Fixes: 5cc9ed4b9a7a ("drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl") +References: 6dcc693bc57f ("ext4: warn when page is dirtied without buffers") +Signed-off-by: Chris Wilson +Cc: Tvrtko Ursulin +Cc: stable@vger.kernel.org +Reviewed-by: Tvrtko Ursulin +Link: https://patchwork.freedesktop.org/patch/msgid/20190708140327.26825-1-chris@chris-wilson.co.uk +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/i915/i915_gem_userptr.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c +index 05ae8c4a8a1b6..9760b67dab28b 100644 +--- a/drivers/gpu/drm/i915/i915_gem_userptr.c ++++ b/drivers/gpu/drm/i915/i915_gem_userptr.c +@@ -691,7 +691,15 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj, + + for_each_sgt_page(page, sgt_iter, pages) { + if (obj->mm.dirty) +- set_page_dirty(page); ++ /* ++ * As this may not be anonymous memory (e.g. shmem) ++ * but exist on a real mapping, we have to lock ++ * the page in order to dirty it -- holding ++ * the page reference is not sufficient to ++ * prevent the inode from being truncated. ++ * Play safe and take the lock. ++ */ ++ set_page_dirty_lock(page); + + mark_page_accessed(page); + put_page(page); +-- +2.20.1 + diff --git a/queue-4.14/fs-nfs-fix-possible-null-pointer-dereferences-in-enc.patch b/queue-4.14/fs-nfs-fix-possible-null-pointer-dereferences-in-enc.patch new file mode 100644 index 00000000000..a09101b07bf --- /dev/null +++ b/queue-4.14/fs-nfs-fix-possible-null-pointer-dereferences-in-enc.patch @@ -0,0 +1,46 @@ +From d143e18e15e29aad2ccda08c1adcf5434acd59ba Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 26 Jul 2019 15:48:53 +0800 +Subject: fs: nfs: Fix possible null-pointer dereferences in encode_attrs() + +From: Jia-Ju Bai + +[ Upstream commit e2751463eaa6f9fec8fea80abbdc62dbc487b3c5 ] + +In encode_attrs(), there is an if statement on line 1145 to check +whether label is NULL: + if (label && (attrmask[2] & FATTR4_WORD2_SECURITY_LABEL)) + +When label is NULL, it is used on lines 1178-1181: + *p++ = cpu_to_be32(label->lfs); + *p++ = cpu_to_be32(label->pi); + *p++ = cpu_to_be32(label->len); + p = xdr_encode_opaque_fixed(p, label->label, label->len); + +To fix these bugs, label is checked before being used. + +These bugs are found by a static analysis tool STCheck written by us. + +Signed-off-by: Jia-Ju Bai +Signed-off-by: Anna Schumaker +Signed-off-by: Sasha Levin +--- + fs/nfs/nfs4xdr.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c +index 549c916d28599..525684b0056fc 100644 +--- a/fs/nfs/nfs4xdr.c ++++ b/fs/nfs/nfs4xdr.c +@@ -1132,7 +1132,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, + } else + *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME); + } +- if (bmval[2] & FATTR4_WORD2_SECURITY_LABEL) { ++ if (label && (bmval[2] & FATTR4_WORD2_SECURITY_LABEL)) { + *p++ = cpu_to_be32(label->lfs); + *p++ = cpu_to_be32(label->pi); + *p++ = cpu_to_be32(label->len); +-- +2.20.1 + diff --git a/queue-4.14/fuse-fix-memleak-in-cuse_channel_open.patch b/queue-4.14/fuse-fix-memleak-in-cuse_channel_open.patch new file mode 100644 index 00000000000..5ba80eae57c --- /dev/null +++ b/queue-4.14/fuse-fix-memleak-in-cuse_channel_open.patch @@ -0,0 +1,39 @@ +From 95342965d7dbfd5f71d8a401a37eef6ed5ce0a96 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 14 Aug 2019 15:59:09 +0800 +Subject: fuse: fix memleak in cuse_channel_open + +From: zhengbin + +[ Upstream commit 9ad09b1976c562061636ff1e01bfc3a57aebe56b ] + +If cuse_send_init fails, need to fuse_conn_put cc->fc. + +cuse_channel_open->fuse_conn_init->refcount_set(&fc->count, 1) + ->fuse_dev_alloc->fuse_conn_get + ->fuse_dev_free->fuse_conn_put + +Fixes: cc080e9e9be1 ("fuse: introduce per-instance fuse_dev structure") +Reported-by: Hulk Robot +Signed-off-by: zhengbin +Signed-off-by: Miklos Szeredi +Signed-off-by: Sasha Levin +--- + fs/fuse/cuse.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c +index e9e97803442a6..55db06c7c587e 100644 +--- a/fs/fuse/cuse.c ++++ b/fs/fuse/cuse.c +@@ -513,6 +513,7 @@ static int cuse_channel_open(struct inode *inode, struct file *file) + rc = cuse_send_init(cc); + if (rc) { + fuse_dev_free(fud); ++ fuse_conn_put(&cc->fc); + return rc; + } + file->private_data = fud; +-- +2.20.1 + diff --git a/queue-4.14/ima-always-return-negative-code-for-error.patch b/queue-4.14/ima-always-return-negative-code-for-error.patch new file mode 100644 index 00000000000..7f49cb06ba6 --- /dev/null +++ b/queue-4.14/ima-always-return-negative-code-for-error.patch @@ -0,0 +1,44 @@ +From 36019990b9801724454082f5d70c1235a63aba14 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 2 Jul 2019 10:00:40 +0200 +Subject: ima: always return negative code for error + +From: Sascha Hauer + +[ Upstream commit f5e1040196dbfe14c77ce3dfe3b7b08d2d961e88 ] + +integrity_kernel_read() returns the number of bytes read. If this is +a short read then this positive value is returned from +ima_calc_file_hash_atfm(). Currently this is only indirectly called from +ima_calc_file_hash() and this function only tests for the return value +being zero or nonzero and also doesn't forward the return value. +Nevertheless there's no point in returning a positive value as an error, +so translate a short read into -EINVAL. + +Signed-off-by: Sascha Hauer +Signed-off-by: Mimi Zohar +Signed-off-by: Sasha Levin +--- + security/integrity/ima/ima_crypto.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c +index af680b5b678a4..06b0ee75f34fb 100644 +--- a/security/integrity/ima/ima_crypto.c ++++ b/security/integrity/ima/ima_crypto.c +@@ -293,8 +293,11 @@ static int ima_calc_file_hash_atfm(struct file *file, + rbuf_len = min_t(loff_t, i_size - offset, rbuf_size[active]); + rc = integrity_kernel_read(file, offset, rbuf[active], + rbuf_len); +- if (rc != rbuf_len) ++ if (rc != rbuf_len) { ++ if (rc >= 0) ++ rc = -EINVAL; + goto out3; ++ } + + if (rbuf[1] && offset) { + /* Using two buffers, and it is not the first +-- +2.20.1 + diff --git a/queue-4.14/kernel-elfcore.c-include-proper-prototypes.patch b/queue-4.14/kernel-elfcore.c-include-proper-prototypes.patch new file mode 100644 index 00000000000..f9f561dd6b2 --- /dev/null +++ b/queue-4.14/kernel-elfcore.c-include-proper-prototypes.patch @@ -0,0 +1,51 @@ +From 933a63f6dca66021f102b652e7f0b0afa54e40ef Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 25 Sep 2019 16:45:59 -0700 +Subject: kernel/elfcore.c: include proper prototypes + +From: Valdis Kletnieks + +[ Upstream commit 0f74914071ab7e7b78731ed62bf350e3a344e0a5 ] + +When building with W=1, gcc properly complains that there's no prototypes: + + CC kernel/elfcore.o +kernel/elfcore.c:7:17: warning: no previous prototype for 'elf_core_extra_phdrs' [-Wmissing-prototypes] + 7 | Elf_Half __weak elf_core_extra_phdrs(void) + | ^~~~~~~~~~~~~~~~~~~~ +kernel/elfcore.c:12:12: warning: no previous prototype for 'elf_core_write_extra_phdrs' [-Wmissing-prototypes] + 12 | int __weak elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset) + | ^~~~~~~~~~~~~~~~~~~~~~~~~~ +kernel/elfcore.c:17:12: warning: no previous prototype for 'elf_core_write_extra_data' [-Wmissing-prototypes] + 17 | int __weak elf_core_write_extra_data(struct coredump_params *cprm) + | ^~~~~~~~~~~~~~~~~~~~~~~~~ +kernel/elfcore.c:22:15: warning: no previous prototype for 'elf_core_extra_data_size' [-Wmissing-prototypes] + 22 | size_t __weak elf_core_extra_data_size(void) + | ^~~~~~~~~~~~~~~~~~~~~~~~ + +Provide the include file so gcc is happy, and we don't have potential code drift + +Link: http://lkml.kernel.org/r/29875.1565224705@turing-police +Signed-off-by: Valdis Kletnieks +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Sasha Levin +--- + kernel/elfcore.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/kernel/elfcore.c b/kernel/elfcore.c +index fc482c8e0bd88..57fb4dcff4349 100644 +--- a/kernel/elfcore.c ++++ b/kernel/elfcore.c +@@ -3,6 +3,7 @@ + #include + #include + #include ++#include + + Elf_Half __weak elf_core_extra_phdrs(void) + { +-- +2.20.1 + diff --git a/queue-4.14/kvm-ppc-book3s-hv-xive-free-escalation-interrupts-be.patch b/queue-4.14/kvm-ppc-book3s-hv-xive-free-escalation-interrupts-be.patch new file mode 100644 index 00000000000..edde867d3e9 --- /dev/null +++ b/queue-4.14/kvm-ppc-book3s-hv-xive-free-escalation-interrupts-be.patch @@ -0,0 +1,86 @@ +From fcd0d40737ac47e8468b030b43bfcf9bf3e3a250 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 6 Aug 2019 19:25:38 +0200 +Subject: KVM: PPC: Book3S HV: XIVE: Free escalation interrupts before + disabling the VP +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Cédric Le Goater + +[ Upstream commit 237aed48c642328ff0ab19b63423634340224a06 ] + +When a vCPU is brought done, the XIVE VP (Virtual Processor) is first +disabled and then the event notification queues are freed. When freeing +the queues, we check for possible escalation interrupts and free them +also. + +But when a XIVE VP is disabled, the underlying XIVE ENDs also are +disabled in OPAL. When an END (Event Notification Descriptor) is +disabled, its ESB pages (ESn and ESe) are disabled and loads return all +1s. Which means that any access on the ESB page of the escalation +interrupt will return invalid values. + +When an interrupt is freed, the shutdown handler computes a 'saved_p' +field from the value returned by a load in xive_do_source_set_mask(). +This value is incorrect for escalation interrupts for the reason +described above. + +This has no impact on Linux/KVM today because we don't make use of it +but we will introduce in future changes a xive_get_irqchip_state() +handler. This handler will use the 'saved_p' field to return the state +of an interrupt and 'saved_p' being incorrect, softlockup will occur. + +Fix the vCPU cleanup sequence by first freeing the escalation interrupts +if any, then disable the XIVE VP and last free the queues. + +Fixes: 90c73795afa2 ("KVM: PPC: Book3S HV: Add a new KVM device for the XIVE native exploitation mode") +Fixes: 5af50993850a ("KVM: PPC: Book3S HV: Native usage of the XIVE interrupt controller") +Cc: stable@vger.kernel.org # v4.12+ +Signed-off-by: Cédric Le Goater +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20190806172538.5087-1-clg@kaod.org +Signed-off-by: Sasha Levin +--- + arch/powerpc/kvm/book3s_xive.c | 18 ++++++++++-------- + 1 file changed, 10 insertions(+), 8 deletions(-) + +diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c +index 3c75eee45edf9..46f99fc1901c8 100644 +--- a/arch/powerpc/kvm/book3s_xive.c ++++ b/arch/powerpc/kvm/book3s_xive.c +@@ -1001,20 +1001,22 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu) + /* Mask the VP IPI */ + xive_vm_esb_load(&xc->vp_ipi_data, XIVE_ESB_SET_PQ_01); + +- /* Disable the VP */ +- xive_native_disable_vp(xc->vp_id); +- +- /* Free the queues & associated interrupts */ ++ /* Free escalations */ + for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { +- struct xive_q *q = &xc->queues[i]; +- +- /* Free the escalation irq */ + if (xc->esc_virq[i]) { + free_irq(xc->esc_virq[i], vcpu); + irq_dispose_mapping(xc->esc_virq[i]); + kfree(xc->esc_virq_names[i]); + } +- /* Free the queue */ ++ } ++ ++ /* Disable the VP */ ++ xive_native_disable_vp(xc->vp_id); ++ ++ /* Free the queues */ ++ for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { ++ struct xive_q *q = &xc->queues[i]; ++ + xive_native_disable_queue(xc->vp_id, q, i); + if (q->qpage) { + free_pages((unsigned long)q->qpage, +-- +2.20.1 + diff --git a/queue-4.14/nbd-fix-crash-when-the-blksize-is-zero.patch b/queue-4.14/nbd-fix-crash-when-the-blksize-is-zero.patch new file mode 100644 index 00000000000..1891ab3ef16 --- /dev/null +++ b/queue-4.14/nbd-fix-crash-when-the-blksize-is-zero.patch @@ -0,0 +1,87 @@ +From 1c2eb536117c97565297822c1fd33d4593f544fb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 29 May 2019 15:16:05 -0500 +Subject: nbd: fix crash when the blksize is zero + +From: Xiubo Li + +[ Upstream commit 553768d1169a48c0cd87c4eb4ab57534ee663415 ] + +This will allow the blksize to be set zero and then use 1024 as +default. + +Reviewed-by: Josef Bacik +Signed-off-by: Xiubo Li +[fix to use goto out instead of return in genl_connect] +Signed-off-by: Mike Christie +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + drivers/block/nbd.c | 23 ++++++++++++++++++++--- + 1 file changed, 20 insertions(+), 3 deletions(-) + +diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c +index e4b049f281f50..a65e4ed6c9372 100644 +--- a/drivers/block/nbd.c ++++ b/drivers/block/nbd.c +@@ -132,6 +132,8 @@ static struct dentry *nbd_dbg_dir; + + #define NBD_MAGIC 0x68797548 + ++#define NBD_DEF_BLKSIZE 1024 ++ + static unsigned int nbds_max = 16; + static int max_part = 16; + static struct workqueue_struct *recv_workqueue; +@@ -1216,6 +1218,14 @@ static void nbd_clear_sock_ioctl(struct nbd_device *nbd, + nbd_config_put(nbd); + } + ++static bool nbd_is_valid_blksize(unsigned long blksize) ++{ ++ if (!blksize || !is_power_of_2(blksize) || blksize < 512 || ++ blksize > PAGE_SIZE) ++ return false; ++ return true; ++} ++ + /* Must be called with config_lock held */ + static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, + unsigned int cmd, unsigned long arg) +@@ -1231,8 +1241,9 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, + case NBD_SET_SOCK: + return nbd_add_socket(nbd, arg, false); + case NBD_SET_BLKSIZE: +- if (!arg || !is_power_of_2(arg) || arg < 512 || +- arg > PAGE_SIZE) ++ if (!arg) ++ arg = NBD_DEF_BLKSIZE; ++ if (!nbd_is_valid_blksize(arg)) + return -EINVAL; + nbd_size_set(nbd, arg, + div_s64(config->bytesize, arg)); +@@ -1312,7 +1323,7 @@ static struct nbd_config *nbd_alloc_config(void) + atomic_set(&config->recv_threads, 0); + init_waitqueue_head(&config->recv_wq); + init_waitqueue_head(&config->conn_wait); +- config->blksize = 1024; ++ config->blksize = NBD_DEF_BLKSIZE; + atomic_set(&config->live_connections, 0); + try_module_get(THIS_MODULE); + return config; +@@ -1744,6 +1755,12 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info) + if (info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]) { + u64 bsize = + nla_get_u64(info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]); ++ if (!bsize) ++ bsize = NBD_DEF_BLKSIZE; ++ if (!nbd_is_valid_blksize(bsize)) { ++ ret = -EINVAL; ++ goto out; ++ } + nbd_size_set(nbd, bsize, div64_u64(config->bytesize, bsize)); + } + if (info->attrs[NBD_ATTR_TIMEOUT]) { +-- +2.20.1 + diff --git a/queue-4.14/nbd-fix-max-number-of-supported-devs.patch b/queue-4.14/nbd-fix-max-number-of-supported-devs.patch new file mode 100644 index 00000000000..b06d4fc3736 --- /dev/null +++ b/queue-4.14/nbd-fix-max-number-of-supported-devs.patch @@ -0,0 +1,163 @@ +From 7208a5ce0e992ed339e66d4ce5194b1b945d2df7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 4 Aug 2019 14:10:06 -0500 +Subject: nbd: fix max number of supported devs + +From: Mike Christie + +[ Upstream commit e9e006f5fcf2bab59149cb38a48a4817c1b538b4 ] + +This fixes a bug added in 4.10 with commit: + +commit 9561a7ade0c205bc2ee035a2ac880478dcc1a024 +Author: Josef Bacik +Date: Tue Nov 22 14:04:40 2016 -0500 + + nbd: add multi-connection support + +that limited the number of devices to 256. Before the patch we could +create 1000s of devices, but the patch switched us from using our +own thread to using a work queue which has a default limit of 256 +active works. + +The problem is that our recv_work function sits in a loop until +disconnection but only handles IO for one connection. The work is +started when the connection is started/restarted, but if we end up +creating 257 or more connections, the queue_work call just queues +connection257+'s recv_work and that waits for connection 1 - 256's +recv_work to be disconnected and that work instance completing. + +Instead of reverting back to kthreads, this has us allocate a +workqueue_struct per device, so we can block in the work. + +Cc: stable@vger.kernel.org +Reviewed-by: Josef Bacik +Signed-off-by: Mike Christie +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + drivers/block/nbd.c | 39 +++++++++++++++++++++++++-------------- + 1 file changed, 25 insertions(+), 14 deletions(-) + +diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c +index 14b491c5cf7b6..a234600849558 100644 +--- a/drivers/block/nbd.c ++++ b/drivers/block/nbd.c +@@ -106,6 +106,7 @@ struct nbd_device { + struct nbd_config *config; + struct mutex config_lock; + struct gendisk *disk; ++ struct workqueue_struct *recv_workq; + + struct list_head list; + struct task_struct *task_recv; +@@ -136,7 +137,6 @@ static struct dentry *nbd_dbg_dir; + + static unsigned int nbds_max = 16; + static int max_part = 16; +-static struct workqueue_struct *recv_workqueue; + static int part_shift; + + static int nbd_dev_dbg_init(struct nbd_device *nbd); +@@ -1015,7 +1015,7 @@ static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg) + /* We take the tx_mutex in an error path in the recv_work, so we + * need to queue_work outside of the tx_mutex. + */ +- queue_work(recv_workqueue, &args->work); ++ queue_work(nbd->recv_workq, &args->work); + + atomic_inc(&config->live_connections); + wake_up(&config->conn_wait); +@@ -1120,6 +1120,10 @@ static void nbd_config_put(struct nbd_device *nbd) + kfree(nbd->config); + nbd->config = NULL; + ++ if (nbd->recv_workq) ++ destroy_workqueue(nbd->recv_workq); ++ nbd->recv_workq = NULL; ++ + nbd->tag_set.timeout = 0; + queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); + +@@ -1145,6 +1149,14 @@ static int nbd_start_device(struct nbd_device *nbd) + return -EINVAL; + } + ++ nbd->recv_workq = alloc_workqueue("knbd%d-recv", ++ WQ_MEM_RECLAIM | WQ_HIGHPRI | ++ WQ_UNBOUND, 0, nbd->index); ++ if (!nbd->recv_workq) { ++ dev_err(disk_to_dev(nbd->disk), "Could not allocate knbd recv work queue.\n"); ++ return -ENOMEM; ++ } ++ + blk_mq_update_nr_hw_queues(&nbd->tag_set, config->num_connections); + nbd->task_recv = current; + +@@ -1175,7 +1187,7 @@ static int nbd_start_device(struct nbd_device *nbd) + INIT_WORK(&args->work, recv_work); + args->nbd = nbd; + args->index = i; +- queue_work(recv_workqueue, &args->work); ++ queue_work(nbd->recv_workq, &args->work); + } + nbd_size_update(nbd); + return error; +@@ -1195,8 +1207,10 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b + mutex_unlock(&nbd->config_lock); + ret = wait_event_interruptible(config->recv_wq, + atomic_read(&config->recv_threads) == 0); +- if (ret) ++ if (ret) { + sock_shutdown(nbd); ++ flush_workqueue(nbd->recv_workq); ++ } + mutex_lock(&nbd->config_lock); + bd_set_size(bdev, 0); + /* user requested, ignore socket errors */ +@@ -1836,6 +1850,12 @@ static void nbd_disconnect_and_put(struct nbd_device *nbd) + mutex_lock(&nbd->config_lock); + nbd_disconnect(nbd); + mutex_unlock(&nbd->config_lock); ++ /* ++ * Make sure recv thread has finished, so it does not drop the last ++ * config ref and try to destroy the workqueue from inside the work ++ * queue. ++ */ ++ flush_workqueue(nbd->recv_workq); + if (test_and_clear_bit(NBD_HAS_CONFIG_REF, + &nbd->config->runtime_flags)) + nbd_config_put(nbd); +@@ -2216,20 +2236,12 @@ static int __init nbd_init(void) + + if (nbds_max > 1UL << (MINORBITS - part_shift)) + return -EINVAL; +- recv_workqueue = alloc_workqueue("knbd-recv", +- WQ_MEM_RECLAIM | WQ_HIGHPRI | +- WQ_UNBOUND, 0); +- if (!recv_workqueue) +- return -ENOMEM; + +- if (register_blkdev(NBD_MAJOR, "nbd")) { +- destroy_workqueue(recv_workqueue); ++ if (register_blkdev(NBD_MAJOR, "nbd")) + return -EIO; +- } + + if (genl_register_family(&nbd_genl_family)) { + unregister_blkdev(NBD_MAJOR, "nbd"); +- destroy_workqueue(recv_workqueue); + return -EINVAL; + } + nbd_dbg_init(); +@@ -2271,7 +2283,6 @@ static void __exit nbd_cleanup(void) + + idr_destroy(&nbd_index_idr); + genl_unregister_family(&nbd_genl_family); +- destroy_workqueue(recv_workqueue); + unregister_blkdev(NBD_MAJOR, "nbd"); + } + +-- +2.20.1 + diff --git a/queue-4.14/netfilter-nf_tables-allow-lookups-in-dynamic-sets.patch b/queue-4.14/netfilter-nf_tables-allow-lookups-in-dynamic-sets.patch new file mode 100644 index 00000000000..98b10374edd --- /dev/null +++ b/queue-4.14/netfilter-nf_tables-allow-lookups-in-dynamic-sets.patch @@ -0,0 +1,107 @@ +From 7d77324e0bcf4f616cfea96435b435961057ab93 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 19 Sep 2019 16:56:44 +0200 +Subject: netfilter: nf_tables: allow lookups in dynamic sets + +From: Florian Westphal + +[ Upstream commit acab713177377d9e0889c46bac7ff0cfb9a90c4d ] + +This un-breaks lookups in sets that have the 'dynamic' flag set. +Given this active example configuration: + +table filter { + set set1 { + type ipv4_addr + size 64 + flags dynamic,timeout + timeout 1m + } + + chain input { + type filter hook input priority 0; policy accept; + } +} + +... this works: +nft add rule ip filter input add @set1 { ip saddr } + +-> whenever rule is triggered, the source ip address is inserted +into the set (if it did not exist). + +This won't work: +nft add rule ip filter input ip saddr @set1 counter +Error: Could not process rule: Operation not supported + +In other words, we can add entries to the set, but then can't make +matching decision based on that set. + +That is just wrong -- all set backends support lookups (else they would +not be very useful). +The failure comes from an explicit rejection in nft_lookup.c. + +Looking at the history, it seems like NFT_SET_EVAL used to mean +'set contains expressions' (aka. "is a meter"), for instance something like + + nft add rule ip filter input meter example { ip saddr limit rate 10/second } + or + nft add rule ip filter input meter example { ip saddr counter } + +The actual meaning of NFT_SET_EVAL however, is +'set can be updated from the packet path'. + +'meters' and packet-path insertions into sets, such as +'add @set { ip saddr }' use exactly the same kernel code (nft_dynset.c) +and thus require a set backend that provides the ->update() function. + +The only set that provides this also is the only one that has the +NFT_SET_EVAL feature flag. + +Removing the wrong check makes the above example work. +While at it, also fix the flag check during set instantiation to +allow supported combinations only. + +Fixes: 8aeff920dcc9b3f ("netfilter: nf_tables: add stateful object reference to set elements") +Signed-off-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/nf_tables_api.c | 7 +++++-- + net/netfilter/nft_lookup.c | 3 --- + 2 files changed, 5 insertions(+), 5 deletions(-) + +diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c +index b149a72190846..7ef126489d4ed 100644 +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -3131,8 +3131,11 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, + NFT_SET_OBJECT)) + return -EINVAL; + /* Only one of these operations is supported */ +- if ((flags & (NFT_SET_MAP | NFT_SET_EVAL | NFT_SET_OBJECT)) == +- (NFT_SET_MAP | NFT_SET_EVAL | NFT_SET_OBJECT)) ++ if ((flags & (NFT_SET_MAP | NFT_SET_OBJECT)) == ++ (NFT_SET_MAP | NFT_SET_OBJECT)) ++ return -EOPNOTSUPP; ++ if ((flags & (NFT_SET_EVAL | NFT_SET_OBJECT)) == ++ (NFT_SET_EVAL | NFT_SET_OBJECT)) + return -EOPNOTSUPP; + } + +diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c +index 475570e89ede7..44015a151ad69 100644 +--- a/net/netfilter/nft_lookup.c ++++ b/net/netfilter/nft_lookup.c +@@ -76,9 +76,6 @@ static int nft_lookup_init(const struct nft_ctx *ctx, + if (IS_ERR(set)) + return PTR_ERR(set); + +- if (set->flags & NFT_SET_EVAL) +- return -EOPNOTSUPP; +- + priv->sreg = nft_parse_register(tb[NFTA_LOOKUP_SREG]); + err = nft_validate_register_load(priv->sreg, set->klen); + if (err < 0) +-- +2.20.1 + diff --git a/queue-4.14/perf-build-add-detection-of-java-11-openjdk-devel-pa.patch b/queue-4.14/perf-build-add-detection-of-java-11-openjdk-devel-pa.patch new file mode 100644 index 00000000000..4636e108379 --- /dev/null +++ b/queue-4.14/perf-build-add-detection-of-java-11-openjdk-devel-pa.patch @@ -0,0 +1,62 @@ +From fa3565062a568d46b913585901246c2049aafc61 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 9 Sep 2019 13:41:16 +0200 +Subject: perf build: Add detection of java-11-openjdk-devel package + +From: Thomas Richter + +[ Upstream commit 815c1560bf8fd522b8d93a1d727868b910c1cc24 ] + +With Java 11 there is no seperate JRE anymore. + +Details: + + https://coderanch.com/t/701603/java/JRE-JDK + +Therefore the detection of the JRE needs to be adapted. + +This change works for s390 and x86. I have not tested other platforms. + +Committer testing: + +Continues to work with the OpenJDK 8: + + $ rm -f ~acme/lib64/libperf-jvmti.so + $ rpm -qa | grep jdk-devel + java-1.8.0-openjdk-devel-1.8.0.222.b10-0.fc30.x86_64 + $ git log --oneline -1 + a51937170f33 (HEAD -> perf/core) perf build: Add detection of java-11-openjdk-devel package + $ rm -rf /tmp/build/perf ; mkdir -p /tmp/build/perf ; make -C tools/perf O=/tmp/build/perf install > /dev/null 2>1 + $ ls -la ~acme/lib64/libperf-jvmti.so + -rwxr-xr-x. 1 acme acme 230744 Sep 24 16:46 /home/acme/lib64/libperf-jvmti.so + $ + +Suggested-by: Andreas Krebbel +Signed-off-by: Thomas Richter +Tested-by: Arnaldo Carvalho de Melo +Cc: Heiko Carstens +Cc: Hendrik Brueckner +Cc: Vasily Gorbik +Link: http://lore.kernel.org/lkml/20190909114116.50469-4-tmricht@linux.ibm.com +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Sasha Levin +--- + tools/perf/Makefile.config | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config +index f362ee46506ad..b97e31498ff76 100644 +--- a/tools/perf/Makefile.config ++++ b/tools/perf/Makefile.config +@@ -795,7 +795,7 @@ ifndef NO_JVMTI + JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | awk '{print $$3}') + else + ifneq (,$(wildcard /usr/sbin/alternatives)) +- JDIR=$(shell /usr/sbin/alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g') ++ JDIR=$(shell /usr/sbin/alternatives --display java | tail -1 | cut -d' ' -f 5 | sed -e 's%/jre/bin/java.%%g' -e 's%/bin/java.%%g') + endif + endif + ifndef JDIR +-- +2.20.1 + diff --git a/queue-4.14/perf-stat-fix-a-segmentation-fault-when-using-repeat.patch b/queue-4.14/perf-stat-fix-a-segmentation-fault-when-using-repeat.patch new file mode 100644 index 00000000000..0b8d662f24a --- /dev/null +++ b/queue-4.14/perf-stat-fix-a-segmentation-fault-when-using-repeat.patch @@ -0,0 +1,108 @@ +From 20132c0c5ade1135dbf2fab094bd82e512a6029c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 4 Sep 2019 15:17:38 +0530 +Subject: perf stat: Fix a segmentation fault when using repeat forever + +From: Srikar Dronamraju + +[ Upstream commit 443f2d5ba13d65ccfd879460f77941875159d154 ] + +Observe a segmentation fault when 'perf stat' is asked to repeat forever +with the interval option. + +Without fix: + + # perf stat -r 0 -I 5000 -e cycles -a sleep 10 + # time counts unit events + 5.000211692 3,13,89,82,34,157 cycles + 10.000380119 1,53,98,52,22,294 cycles + 10.040467280 17,16,79,265 cycles + Segmentation fault + +This problem was only observed when we use forever option aka -r 0 and +works with limited repeats. Calling print_counter with ts being set to +NULL, is not a correct option when interval is set. Hence avoid +print_counter(NULL,..) if interval is set. + +With fix: + + # perf stat -r 0 -I 5000 -e cycles -a sleep 10 + # time counts unit events + 5.019866622 3,15,14,43,08,697 cycles + 10.039865756 3,15,16,31,95,261 cycles + 10.059950628 1,26,05,47,158 cycles + 5.009902655 3,14,52,62,33,932 cycles + 10.019880228 3,14,52,22,89,154 cycles + 10.030543876 66,90,18,333 cycles + 5.009848281 3,14,51,98,25,437 cycles + 10.029854402 3,15,14,93,04,918 cycles + 5.009834177 3,14,51,95,92,316 cycles + +Committer notes: + +Did the 'git bisect' to find the cset introducing the problem to add the +Fixes tag below, and at that time the problem reproduced as: + + (gdb) run stat -r0 -I500 sleep 1 + + Program received signal SIGSEGV, Segmentation fault. + print_interval (prefix=prefix@entry=0x7fffffffc8d0 "", ts=ts@entry=0x0) at builtin-stat.c:866 + 866 sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); + (gdb) bt + #0 print_interval (prefix=prefix@entry=0x7fffffffc8d0 "", ts=ts@entry=0x0) at builtin-stat.c:866 + #1 0x000000000041860a in print_counters (ts=ts@entry=0x0, argc=argc@entry=2, argv=argv@entry=0x7fffffffd640) at builtin-stat.c:938 + #2 0x0000000000419a7f in cmd_stat (argc=2, argv=0x7fffffffd640, prefix=) at builtin-stat.c:1411 + #3 0x000000000045c65a in run_builtin (p=p@entry=0x6291b8 , argc=argc@entry=5, argv=argv@entry=0x7fffffffd640) at perf.c:370 + #4 0x000000000045c893 in handle_internal_command (argc=5, argv=0x7fffffffd640) at perf.c:429 + #5 0x000000000045c8f1 in run_argv (argcp=argcp@entry=0x7fffffffd4ac, argv=argv@entry=0x7fffffffd4a0) at perf.c:473 + #6 0x000000000045cac9 in main (argc=, argv=) at perf.c:588 + (gdb) + +Mostly the same as just before this patch: + + Program received signal SIGSEGV, Segmentation fault. + 0x00000000005874a7 in print_interval (config=0xa1f2a0 , evlist=0xbc9b90, prefix=0x7fffffffd1c0 "`", ts=0x0) at util/stat-display.c:964 + 964 sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, config->csv_sep); + (gdb) bt + #0 0x00000000005874a7 in print_interval (config=0xa1f2a0 , evlist=0xbc9b90, prefix=0x7fffffffd1c0 "`", ts=0x0) at util/stat-display.c:964 + #1 0x0000000000588047 in perf_evlist__print_counters (evlist=0xbc9b90, config=0xa1f2a0 , _target=0xa1f0c0 , ts=0x0, argc=2, argv=0x7fffffffd670) + at util/stat-display.c:1172 + #2 0x000000000045390f in print_counters (ts=0x0, argc=2, argv=0x7fffffffd670) at builtin-stat.c:656 + #3 0x0000000000456bb5 in cmd_stat (argc=2, argv=0x7fffffffd670) at builtin-stat.c:1960 + #4 0x00000000004dd2e0 in run_builtin (p=0xa30e00 , argc=5, argv=0x7fffffffd670) at perf.c:310 + #5 0x00000000004dd54d in handle_internal_command (argc=5, argv=0x7fffffffd670) at perf.c:362 + #6 0x00000000004dd694 in run_argv (argcp=0x7fffffffd4cc, argv=0x7fffffffd4c0) at perf.c:406 + #7 0x00000000004dda11 in main (argc=5, argv=0x7fffffffd670) at perf.c:531 + (gdb) + +Fixes: d4f63a4741a8 ("perf stat: Introduce print_counters function") +Signed-off-by: Srikar Dronamraju +Acked-by: Jiri Olsa +Tested-by: Arnaldo Carvalho de Melo +Tested-by: Ravi Bangoria +Cc: Namhyung Kim +Cc: Naveen N. Rao +Cc: stable@vger.kernel.org # v4.2+ +Link: http://lore.kernel.org/lkml/20190904094738.9558-3-srikar@linux.vnet.ibm.com +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Sasha Levin +--- + tools/perf/builtin-stat.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c +index 52486c90ab936..b6c1c9939c2f1 100644 +--- a/tools/perf/builtin-stat.c ++++ b/tools/perf/builtin-stat.c +@@ -2770,7 +2770,7 @@ int cmd_stat(int argc, const char **argv) + run_idx + 1); + + status = run_perf_stat(argc, argv); +- if (forever && status != -1) { ++ if (forever && status != -1 && !interval) { + print_counters(NULL, argc, argv); + perf_stat__reset_stats(); + } +-- +2.20.1 + diff --git a/queue-4.14/perf-stat-reset-previous-counts-on-repeat-with-inter.patch b/queue-4.14/perf-stat-reset-previous-counts-on-repeat-with-inter.patch new file mode 100644 index 00000000000..a696f1ee8eb --- /dev/null +++ b/queue-4.14/perf-stat-reset-previous-counts-on-repeat-with-inter.patch @@ -0,0 +1,168 @@ +From 3c519160ce059fbf89152de799f8b173e5c0ecbf Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 4 Sep 2019 15:17:37 +0530 +Subject: perf stat: Reset previous counts on repeat with interval + +From: Srikar Dronamraju + +[ Upstream commit b63fd11cced17fcb8e133def29001b0f6aaa5e06 ] + +When using 'perf stat' with repeat and interval option, it shows wrong +values for events. + +The wrong values will be shown for the first interval on the second and +subsequent repetitions. + +Without the fix: + + # perf stat -r 3 -I 2000 -e faults -e sched:sched_switch -a sleep 5 + + 2.000282489 53 faults + 2.000282489 513 sched:sched_switch + 4.005478208 3,721 faults + 4.005478208 2,666 sched:sched_switch + 5.025470933 395 faults + 5.025470933 1,307 sched:sched_switch + 2.009602825 1,84,46,74,40,73,70,95,47,520 faults <------ + 2.009602825 1,84,46,74,40,73,70,95,49,568 sched:sched_switch <------ + 4.019612206 4,730 faults + 4.019612206 2,746 sched:sched_switch + 5.039615484 3,953 faults + 5.039615484 1,496 sched:sched_switch + 2.000274620 1,84,46,74,40,73,70,95,47,520 faults <------ + 2.000274620 1,84,46,74,40,73,70,95,47,520 sched:sched_switch <------ + 4.000480342 4,282 faults + 4.000480342 2,303 sched:sched_switch + 5.000916811 1,322 faults + 5.000916811 1,064 sched:sched_switch + # + +prev_raw_counts is allocated when using intervals. This is used when +calculating the difference in the counts of events when using interval. + +The current counts are stored in prev_raw_counts to calculate the +differences in the next iteration. + +On the first interval of the second and subsequent repetitions, +prev_raw_counts would be the values stored in the last interval of the +previous repetitions, while the current counts will only be for the +first interval of the current repetition. + +Hence there is a possibility of events showing up as big number. + +Fix this by resetting prev_raw_counts whenever perf stat repeats the +command. + +With the fix: + + # perf stat -r 3 -I 2000 -e faults -e sched:sched_switch -a sleep 5 + + 2.019349347 2,597 faults + 2.019349347 2,753 sched:sched_switch + 4.019577372 3,098 faults + 4.019577372 2,532 sched:sched_switch + 5.019415481 1,879 faults + 5.019415481 1,356 sched:sched_switch + 2.000178813 8,468 faults + 2.000178813 2,254 sched:sched_switch + 4.000404621 7,440 faults + 4.000404621 1,266 sched:sched_switch + 5.040196079 2,458 faults + 5.040196079 556 sched:sched_switch + 2.000191939 6,870 faults + 2.000191939 1,170 sched:sched_switch + 4.000414103 541 faults + 4.000414103 902 sched:sched_switch + 5.000809863 450 faults + 5.000809863 364 sched:sched_switch + # + +Committer notes: + +This was broken since the cset introducing the --interval feature, i.e. +--repeat + --interval wasn't tested at that point, add the Fixes tag so +that automatic scripts can pick this up. + +Fixes: 13370a9b5bb8 ("perf stat: Add interval printing") +Signed-off-by: Srikar Dronamraju +Acked-by: Jiri Olsa +Tested-by: Arnaldo Carvalho de Melo +Tested-by: Ravi Bangoria +Cc: Namhyung Kim +Cc: Naveen N. Rao +Cc: Stephane Eranian +Cc: stable@vger.kernel.org # v3.9+ +Link: http://lore.kernel.org/lkml/20190904094738.9558-2-srikar@linux.vnet.ibm.com +[ Fixed up conflicts with libperf, i.e. some perf_{evsel,evlist} lost the 'perf' prefix ] +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Sasha Levin +--- + tools/perf/builtin-stat.c | 3 +++ + tools/perf/util/stat.c | 17 +++++++++++++++++ + tools/perf/util/stat.h | 1 + + 3 files changed, 21 insertions(+) + +diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c +index b6c1c9939c2f1..0801e0ffba4ae 100644 +--- a/tools/perf/builtin-stat.c ++++ b/tools/perf/builtin-stat.c +@@ -2769,6 +2769,9 @@ int cmd_stat(int argc, const char **argv) + fprintf(output, "[ perf stat: executing run #%d ... ]\n", + run_idx + 1); + ++ if (run_idx != 0) ++ perf_evlist__reset_prev_raw_counts(evsel_list); ++ + status = run_perf_stat(argc, argv); + if (forever && status != -1 && !interval) { + print_counters(NULL, argc, argv); +diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c +index c9bae5fb8b479..d028c2786802e 100644 +--- a/tools/perf/util/stat.c ++++ b/tools/perf/util/stat.c +@@ -154,6 +154,15 @@ static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel) + evsel->prev_raw_counts = NULL; + } + ++static void perf_evsel__reset_prev_raw_counts(struct perf_evsel *evsel) ++{ ++ if (evsel->prev_raw_counts) { ++ evsel->prev_raw_counts->aggr.val = 0; ++ evsel->prev_raw_counts->aggr.ena = 0; ++ evsel->prev_raw_counts->aggr.run = 0; ++ } ++} ++ + static int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw) + { + int ncpus = perf_evsel__nr_cpus(evsel); +@@ -204,6 +213,14 @@ void perf_evlist__reset_stats(struct perf_evlist *evlist) + } + } + ++void perf_evlist__reset_prev_raw_counts(struct perf_evlist *evlist) ++{ ++ struct perf_evsel *evsel; ++ ++ evlist__for_each_entry(evlist, evsel) ++ perf_evsel__reset_prev_raw_counts(evsel); ++} ++ + static void zero_per_pkg(struct perf_evsel *counter) + { + if (counter->per_pkg_mask) +diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h +index 96326b1f94438..bdfbed8e2df28 100644 +--- a/tools/perf/util/stat.h ++++ b/tools/perf/util/stat.h +@@ -100,6 +100,7 @@ void perf_stat__collect_metric_expr(struct perf_evlist *); + int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw); + void perf_evlist__free_stats(struct perf_evlist *evlist); + void perf_evlist__reset_stats(struct perf_evlist *evlist); ++void perf_evlist__reset_prev_raw_counts(struct perf_evlist *evlist); + + int perf_stat_process_counter(struct perf_stat_config *config, + struct perf_evsel *counter); +-- +2.20.1 + diff --git a/queue-4.14/perf-tools-fix-segfault-in-cpu_cache_level__read.patch b/queue-4.14/perf-tools-fix-segfault-in-cpu_cache_level__read.patch new file mode 100644 index 00000000000..f2fa23bcfd4 --- /dev/null +++ b/queue-4.14/perf-tools-fix-segfault-in-cpu_cache_level__read.patch @@ -0,0 +1,63 @@ +From 8902bc1fb81aa7b694fba14590f0d893c5efb32a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 12 Sep 2019 12:52:35 +0200 +Subject: perf tools: Fix segfault in cpu_cache_level__read() + +From: Jiri Olsa + +[ Upstream commit 0216234c2eed1367a318daeb9f4a97d8217412a0 ] + +We release wrong pointer on error path in cpu_cache_level__read +function, leading to segfault: + + (gdb) r record ls + Starting program: /root/perf/tools/perf/perf record ls + ... + [ perf record: Woken up 1 times to write data ] + double free or corruption (out) + + Thread 1 "perf" received signal SIGABRT, Aborted. + 0x00007ffff7463798 in raise () from /lib64/power9/libc.so.6 + (gdb) bt + #0 0x00007ffff7463798 in raise () from /lib64/power9/libc.so.6 + #1 0x00007ffff7443bac in abort () from /lib64/power9/libc.so.6 + #2 0x00007ffff74af8bc in __libc_message () from /lib64/power9/libc.so.6 + #3 0x00007ffff74b92b8 in malloc_printerr () from /lib64/power9/libc.so.6 + #4 0x00007ffff74bb874 in _int_free () from /lib64/power9/libc.so.6 + #5 0x0000000010271260 in __zfree (ptr=0x7fffffffa0b0) at ../../lib/zalloc.. + #6 0x0000000010139340 in cpu_cache_level__read (cache=0x7fffffffa090, cac.. + #7 0x0000000010143c90 in build_caches (cntp=0x7fffffffa118, size= +Cc: Alexander Shishkin +Cc: Michael Petlan +Cc: Namhyung Kim +Cc: Peter Zijlstra +Cc: stable@vger.kernel.org: # v4.6+ +Link: http://lore.kernel.org/lkml/20190912105235.10689-1-jolsa@kernel.org +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Sasha Levin +--- + tools/perf/util/header.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c +index e1fe446f65daa..c892bb4f26c31 100644 +--- a/tools/perf/util/header.c ++++ b/tools/perf/util/header.c +@@ -1063,7 +1063,7 @@ static int cpu_cache_level__read(struct cpu_cache_level *cache, u32 cpu, u16 lev + + scnprintf(file, PATH_MAX, "%s/shared_cpu_list", path); + if (sysfs__read_str(file, &cache->map, &len)) { +- free(cache->map); ++ free(cache->size); + free(cache->type); + return -1; + } +-- +2.20.1 + diff --git a/queue-4.14/perf-unwind-fix-libunwind-build-failure-on-i386-syst.patch b/queue-4.14/perf-unwind-fix-libunwind-build-failure-on-i386-syst.patch new file mode 100644 index 00000000000..0b0a135ced6 --- /dev/null +++ b/queue-4.14/perf-unwind-fix-libunwind-build-failure-on-i386-syst.patch @@ -0,0 +1,51 @@ +From 055b8cc0e20deba0cf1a12fe581b9c202c16fbe6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 26 Sep 2019 14:36:48 -0300 +Subject: perf unwind: Fix libunwind build failure on i386 systems + +From: Arnaldo Carvalho de Melo + +[ Upstream commit 26acf400d2dcc72c7e713e1f55db47ad92010cc2 ] + +Naresh Kamboju reported, that on the i386 build pr_err() +doesn't get defined properly due to header ordering: + + perf-in.o: In function `libunwind__x86_reg_id': + tools/perf/util/libunwind/../../arch/x86/util/unwind-libunwind.c:109: + undefined reference to `pr_err' + +Reported-by: Naresh Kamboju +Signed-off-by: Arnaldo Carvalho de Melo +Cc: David Ahern +Cc: Jiri Olsa +Cc: Linus Torvalds +Cc: Namhyung Kim +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: linux-kernel@vger.kernel.org +Signed-off-by: Ingo Molnar +Signed-off-by: Sasha Levin +--- + tools/perf/arch/x86/util/unwind-libunwind.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tools/perf/arch/x86/util/unwind-libunwind.c b/tools/perf/arch/x86/util/unwind-libunwind.c +index 05920e3edf7a7..47357973b55b2 100644 +--- a/tools/perf/arch/x86/util/unwind-libunwind.c ++++ b/tools/perf/arch/x86/util/unwind-libunwind.c +@@ -1,11 +1,11 @@ + // SPDX-License-Identifier: GPL-2.0 + + #include ++#include "../../util/debug.h" + #ifndef REMOTE_UNWIND_LIBUNWIND + #include + #include "perf_regs.h" + #include "../../util/unwind.h" +-#include "../../util/debug.h" + #endif + + #ifdef HAVE_ARCH_X86_64_SUPPORT +-- +2.20.1 + diff --git a/queue-4.14/pnfs-ensure-we-do-clear-the-return-on-close-layout-s.patch b/queue-4.14/pnfs-ensure-we-do-clear-the-return-on-close-layout-s.patch new file mode 100644 index 00000000000..b739682148e --- /dev/null +++ b/queue-4.14/pnfs-ensure-we-do-clear-the-return-on-close-layout-s.patch @@ -0,0 +1,48 @@ +From 01e1fa662c38b168c522c19dead65b629e89cc11 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 20 Sep 2019 07:23:40 -0400 +Subject: pNFS: Ensure we do clear the return-on-close layout stateid on fatal + errors + +From: Trond Myklebust + +[ Upstream commit 9c47b18cf722184f32148784189fca945a7d0561 ] + +IF the server rejected our layout return with a state error such as +NFS4ERR_BAD_STATEID, or even a stale inode error, then we do want +to clear out all the remaining layout segments and mark that stateid +as invalid. + +Fixes: 1c5bd76d17cca ("pNFS: Enable layoutreturn operation for...") +Signed-off-by: Trond Myklebust +Signed-off-by: Anna Schumaker +Signed-off-by: Sasha Levin +--- + fs/nfs/pnfs.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c +index 96867fb159bf7..ec04cce31814b 100644 +--- a/fs/nfs/pnfs.c ++++ b/fs/nfs/pnfs.c +@@ -1319,10 +1319,15 @@ void pnfs_roc_release(struct nfs4_layoutreturn_args *args, + const nfs4_stateid *res_stateid = NULL; + struct nfs4_xdr_opaque_data *ld_private = args->ld_private; + +- if (ret == 0) { +- arg_stateid = &args->stateid; ++ switch (ret) { ++ case -NFS4ERR_NOMATCHING_LAYOUT: ++ break; ++ case 0: + if (res->lrs_present) + res_stateid = &res->stateid; ++ /* Fallthrough */ ++ default: ++ arg_stateid = &args->stateid; + } + pnfs_layoutreturn_free_lsegs(lo, arg_stateid, &args->range, + res_stateid); +-- +2.20.1 + diff --git a/queue-4.14/powerpc-pseries-fix-cpu_hotplug_lock-acquisition-in-.patch b/queue-4.14/powerpc-pseries-fix-cpu_hotplug_lock-acquisition-in-.patch new file mode 100644 index 00000000000..8a3cb70ffe9 --- /dev/null +++ b/queue-4.14/powerpc-pseries-fix-cpu_hotplug_lock-acquisition-in-.patch @@ -0,0 +1,176 @@ +From d3624668bbdb89b47fe8d2722ff73e3d59d7e390 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 15 May 2019 13:15:52 +0530 +Subject: powerpc/pseries: Fix cpu_hotplug_lock acquisition in resize_hpt() + +From: Gautham R. Shenoy + +[ Upstream commit c784be435d5dae28d3b03db31753dd7a18733f0c ] + +The calls to arch_add_memory()/arch_remove_memory() are always made +with the read-side cpu_hotplug_lock acquired via memory_hotplug_begin(). +On pSeries, arch_add_memory()/arch_remove_memory() eventually call +resize_hpt() which in turn calls stop_machine() which acquires the +read-side cpu_hotplug_lock again, thereby resulting in the recursive +acquisition of this lock. + +In the absence of CONFIG_PROVE_LOCKING, we hadn't observed a system +lockup during a memory hotplug operation because cpus_read_lock() is a +per-cpu rwsem read, which, in the fast-path (in the absence of the +writer, which in our case is a CPU-hotplug operation) simply +increments the read_count on the semaphore. Thus a recursive read in +the fast-path doesn't cause any problems. + +However, we can hit this problem in practice if there is a concurrent +CPU-Hotplug operation in progress which is waiting to acquire the +write-side of the lock. This will cause the second recursive read to +block until the writer finishes. While the writer is blocked since the +first read holds the lock. Thus both the reader as well as the writers +fail to make any progress thereby blocking both CPU-Hotplug as well as +Memory Hotplug operations. + +Memory-Hotplug CPU-Hotplug +CPU 0 CPU 1 +------ ------ + +1. down_read(cpu_hotplug_lock.rw_sem) + [memory_hotplug_begin] + 2. down_write(cpu_hotplug_lock.rw_sem) + [cpu_up/cpu_down] +3. down_read(cpu_hotplug_lock.rw_sem) + [stop_machine()] + +Lockdep complains as follows in these code-paths. + + swapper/0/1 is trying to acquire lock: + (____ptrval____) (cpu_hotplug_lock.rw_sem){++++}, at: stop_machine+0x2c/0x60 + +but task is already holding lock: +(____ptrval____) (cpu_hotplug_lock.rw_sem){++++}, at: mem_hotplug_begin+0x20/0x50 + + other info that might help us debug this: + Possible unsafe locking scenario: + + CPU0 + ---- + lock(cpu_hotplug_lock.rw_sem); + lock(cpu_hotplug_lock.rw_sem); + + *** DEADLOCK *** + + May be due to missing lock nesting notation + + 3 locks held by swapper/0/1: + #0: (____ptrval____) (&dev->mutex){....}, at: __driver_attach+0x12c/0x1b0 + #1: (____ptrval____) (cpu_hotplug_lock.rw_sem){++++}, at: mem_hotplug_begin+0x20/0x50 + #2: (____ptrval____) (mem_hotplug_lock.rw_sem){++++}, at: percpu_down_write+0x54/0x1a0 + +stack backtrace: + CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.0.0-rc5-58373-gbc99402235f3-dirty #166 + Call Trace: + dump_stack+0xe8/0x164 (unreliable) + __lock_acquire+0x1110/0x1c70 + lock_acquire+0x240/0x290 + cpus_read_lock+0x64/0xf0 + stop_machine+0x2c/0x60 + pseries_lpar_resize_hpt+0x19c/0x2c0 + resize_hpt_for_hotplug+0x70/0xd0 + arch_add_memory+0x58/0xfc + devm_memremap_pages+0x5e8/0x8f0 + pmem_attach_disk+0x764/0x830 + nvdimm_bus_probe+0x118/0x240 + really_probe+0x230/0x4b0 + driver_probe_device+0x16c/0x1e0 + __driver_attach+0x148/0x1b0 + bus_for_each_dev+0x90/0x130 + driver_attach+0x34/0x50 + bus_add_driver+0x1a8/0x360 + driver_register+0x108/0x170 + __nd_driver_register+0xd0/0xf0 + nd_pmem_driver_init+0x34/0x48 + do_one_initcall+0x1e0/0x45c + kernel_init_freeable+0x540/0x64c + kernel_init+0x2c/0x160 + ret_from_kernel_thread+0x5c/0x68 + +Fix this issue by + 1) Requiring all the calls to pseries_lpar_resize_hpt() be made + with cpu_hotplug_lock held. + + 2) In pseries_lpar_resize_hpt() invoke stop_machine_cpuslocked() + as a consequence of 1) + + 3) To satisfy 1), in hpt_order_set(), call mmu_hash_ops.resize_hpt() + with cpu_hotplug_lock held. + +Fixes: dbcf929c0062 ("powerpc/pseries: Add support for hash table resizing") +Cc: stable@vger.kernel.org # v4.11+ +Reported-by: Aneesh Kumar K.V +Signed-off-by: Gautham R. Shenoy +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/1557906352-29048-1-git-send-email-ego@linux.vnet.ibm.com +Signed-off-by: Sasha Levin +--- + arch/powerpc/mm/hash_utils_64.c | 9 ++++++++- + arch/powerpc/platforms/pseries/lpar.c | 8 ++++++-- + 2 files changed, 14 insertions(+), 3 deletions(-) + +diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c +index 87687e46b48bb..58c14749bb0c1 100644 +--- a/arch/powerpc/mm/hash_utils_64.c ++++ b/arch/powerpc/mm/hash_utils_64.c +@@ -35,6 +35,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -1852,10 +1853,16 @@ static int hpt_order_get(void *data, u64 *val) + + static int hpt_order_set(void *data, u64 val) + { ++ int ret; ++ + if (!mmu_hash_ops.resize_hpt) + return -ENODEV; + +- return mmu_hash_ops.resize_hpt(val); ++ cpus_read_lock(); ++ ret = mmu_hash_ops.resize_hpt(val); ++ cpus_read_unlock(); ++ ++ return ret; + } + + DEFINE_SIMPLE_ATTRIBUTE(fops_hpt_order, hpt_order_get, hpt_order_set, "%llu\n"); +diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c +index 55e97565ed2dd..eb738ef577926 100644 +--- a/arch/powerpc/platforms/pseries/lpar.c ++++ b/arch/powerpc/platforms/pseries/lpar.c +@@ -643,7 +643,10 @@ static int pseries_lpar_resize_hpt_commit(void *data) + return 0; + } + +-/* Must be called in user context */ ++/* ++ * Must be called in process context. The caller must hold the ++ * cpus_lock. ++ */ + static int pseries_lpar_resize_hpt(unsigned long shift) + { + struct hpt_resize_state state = { +@@ -699,7 +702,8 @@ static int pseries_lpar_resize_hpt(unsigned long shift) + + t1 = ktime_get(); + +- rc = stop_machine(pseries_lpar_resize_hpt_commit, &state, NULL); ++ rc = stop_machine_cpuslocked(pseries_lpar_resize_hpt_commit, ++ &state, NULL); + + t2 = ktime_get(); + +-- +2.20.1 + diff --git a/queue-4.14/pwm-stm32-lp-add-check-in-case-requested-period-cann.patch b/queue-4.14/pwm-stm32-lp-add-check-in-case-requested-period-cann.patch new file mode 100644 index 00000000000..88d1c741388 --- /dev/null +++ b/queue-4.14/pwm-stm32-lp-add-check-in-case-requested-period-cann.patch @@ -0,0 +1,49 @@ +From 83b439a68c69a2f8bc84072638076ede5b9df7e2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 18 Sep 2019 16:54:21 +0200 +Subject: pwm: stm32-lp: Add check in case requested period cannot be achieved +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Fabrice Gasnier + +[ Upstream commit c91e3234c6035baf5a79763cb4fcd5d23ce75c2b ] + +LPTimer can use a 32KHz clock for counting. It depends on clock tree +configuration. In such a case, PWM output frequency range is limited. +Although unlikely, nothing prevents user from requesting a PWM frequency +above counting clock (32KHz for instance): +- This causes (prd - 1) = 0xffff to be written in ARR register later in +the apply() routine. +This results in badly configured PWM period (and also duty_cycle). +Add a check to report an error is such a case. + +Signed-off-by: Fabrice Gasnier +Reviewed-by: Uwe Kleine-König +Signed-off-by: Thierry Reding +Signed-off-by: Sasha Levin +--- + drivers/pwm/pwm-stm32-lp.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/drivers/pwm/pwm-stm32-lp.c b/drivers/pwm/pwm-stm32-lp.c +index 9793b296108ff..3f2e4ef695d75 100644 +--- a/drivers/pwm/pwm-stm32-lp.c ++++ b/drivers/pwm/pwm-stm32-lp.c +@@ -59,6 +59,12 @@ static int stm32_pwm_lp_apply(struct pwm_chip *chip, struct pwm_device *pwm, + /* Calculate the period and prescaler value */ + div = (unsigned long long)clk_get_rate(priv->clk) * state->period; + do_div(div, NSEC_PER_SEC); ++ if (!div) { ++ /* Clock is too slow to achieve requested period. */ ++ dev_dbg(priv->chip.dev, "Can't reach %u ns\n", state->period); ++ return -EINVAL; ++ } ++ + prd = div; + while (div > STM32_LPTIM_MAX_ARR) { + presc++; +-- +2.20.1 + diff --git a/queue-4.14/sched-core-fix-migration-to-invalid-cpu-in-__set_cpu.patch b/queue-4.14/sched-core-fix-migration-to-invalid-cpu-in-__set_cpu.patch new file mode 100644 index 00000000000..396ea0eae4f --- /dev/null +++ b/queue-4.14/sched-core-fix-migration-to-invalid-cpu-in-__set_cpu.patch @@ -0,0 +1,85 @@ +From 86ce63e06a0a63589e5e143c211b717751914e2d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 16 Sep 2019 06:53:28 +0000 +Subject: sched/core: Fix migration to invalid CPU in __set_cpus_allowed_ptr() + +From: KeMeng Shi + +[ Upstream commit 714e501e16cd473538b609b3e351b2cc9f7f09ed ] + +An oops can be triggered in the scheduler when running qemu on arm64: + + Unable to handle kernel paging request at virtual address ffff000008effe40 + Internal error: Oops: 96000007 [#1] SMP + Process migration/0 (pid: 12, stack limit = 0x00000000084e3736) + pstate: 20000085 (nzCv daIf -PAN -UAO) + pc : __ll_sc___cmpxchg_case_acq_4+0x4/0x20 + lr : move_queued_task.isra.21+0x124/0x298 + ... + Call trace: + __ll_sc___cmpxchg_case_acq_4+0x4/0x20 + __migrate_task+0xc8/0xe0 + migration_cpu_stop+0x170/0x180 + cpu_stopper_thread+0xec/0x178 + smpboot_thread_fn+0x1ac/0x1e8 + kthread+0x134/0x138 + ret_from_fork+0x10/0x18 + +__set_cpus_allowed_ptr() will choose an active dest_cpu in affinity mask to +migrage the process if process is not currently running on any one of the +CPUs specified in affinity mask. __set_cpus_allowed_ptr() will choose an +invalid dest_cpu (dest_cpu >= nr_cpu_ids, 1024 in my virtual machine) if +CPUS in an affinity mask are deactived by cpu_down after cpumask_intersects +check. cpumask_test_cpu() of dest_cpu afterwards is overflown and may pass if +corresponding bit is coincidentally set. As a consequence, kernel will +access an invalid rq address associate with the invalid CPU in +migration_cpu_stop->__migrate_task->move_queued_task and the Oops occurs. + +The reproduce the crash: + + 1) A process repeatedly binds itself to cpu0 and cpu1 in turn by calling + sched_setaffinity. + + 2) A shell script repeatedly does "echo 0 > /sys/devices/system/cpu/cpu1/online" + and "echo 1 > /sys/devices/system/cpu/cpu1/online" in turn. + + 3) Oops appears if the invalid CPU is set in memory after tested cpumask. + +Signed-off-by: KeMeng Shi +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Valentin Schneider +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: https://lkml.kernel.org/r/1568616808-16808-1-git-send-email-shikemeng@huawei.com +Signed-off-by: Ingo Molnar +Signed-off-by: Sasha Levin +--- + kernel/sched/core.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index 3d24d401b9d42..32ba789c544ca 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -1111,7 +1111,8 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, + if (cpumask_equal(&p->cpus_allowed, new_mask)) + goto out; + +- if (!cpumask_intersects(new_mask, cpu_valid_mask)) { ++ dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask); ++ if (dest_cpu >= nr_cpu_ids) { + ret = -EINVAL; + goto out; + } +@@ -1132,7 +1133,6 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, + if (cpumask_test_cpu(task_cpu(p), new_mask)) + goto out; + +- dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask); + if (task_running(rq, p) || p->state == TASK_WAKING) { + struct migration_arg arg = { p, dest_cpu }; + /* Need help from migration thread: drop lock and wait. */ +-- +2.20.1 + diff --git a/queue-4.14/series b/queue-4.14/series index 16af8392f67..969babd083c 100644 --- a/queue-4.14/series +++ b/queue-4.14/series @@ -25,3 +25,31 @@ revert-locking-pvqspinlock-don-t-wait-if-vcpu-is-preempted.patch xen-xenbus-fix-self-deadlock-after-killing-user-process.patch ieee802154-atusb-fix-use-after-free-at-disconnect.patch cfg80211-initialize-on-stack-chandefs.patch +ima-always-return-negative-code-for-error.patch +fs-nfs-fix-possible-null-pointer-dereferences-in-enc.patch +9p-avoid-attaching-writeback_fid-on-mmap-with-type-p.patch +xen-pci-reserve-mcfg-areas-earlier.patch +ceph-fix-directories-inode-i_blkbits-initialization.patch +ceph-reconnect-connection-if-session-hang-in-opening.patch +watchdog-aspeed-add-support-for-ast2600.patch +netfilter-nf_tables-allow-lookups-in-dynamic-sets.patch +drm-amdgpu-check-for-valid-number-of-registers-to-re.patch +pnfs-ensure-we-do-clear-the-return-on-close-layout-s.patch +pwm-stm32-lp-add-check-in-case-requested-period-cann.patch +thermal-fix-use-after-free-when-unregistering-therma.patch +fuse-fix-memleak-in-cuse_channel_open.patch +sched-core-fix-migration-to-invalid-cpu-in-__set_cpu.patch +perf-build-add-detection-of-java-11-openjdk-devel-pa.patch +kernel-elfcore.c-include-proper-prototypes.patch +perf-unwind-fix-libunwind-build-failure-on-i386-syst.patch +kvm-ppc-book3s-hv-xive-free-escalation-interrupts-be.patch +nbd-fix-crash-when-the-blksize-is-zero.patch +block-ndb-add-wq_unbound-to-the-knbd-recv-workqueue.patch +nbd-fix-max-number-of-supported-devs.patch +powerpc-pseries-fix-cpu_hotplug_lock-acquisition-in-.patch +tools-lib-traceevent-do-not-free-tep-cmdlines-in-add.patch +tick-broadcast-hrtimer-fix-a-race-in-bc_set_next.patch +perf-tools-fix-segfault-in-cpu_cache_level__read.patch +perf-stat-fix-a-segmentation-fault-when-using-repeat.patch +perf-stat-reset-previous-counts-on-repeat-with-inter.patch +drm-i915-userptr-acquire-the-page-lock-around-set_pa.patch diff --git a/queue-4.14/thermal-fix-use-after-free-when-unregistering-therma.patch b/queue-4.14/thermal-fix-use-after-free-when-unregistering-therma.patch new file mode 100644 index 00000000000..349dc71063e --- /dev/null +++ b/queue-4.14/thermal-fix-use-after-free-when-unregistering-therma.patch @@ -0,0 +1,134 @@ +From bb8c8d7dfaf61897a792df9759cb79a298556a23 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 10 Jul 2019 13:14:52 +0300 +Subject: thermal: Fix use-after-free when unregistering thermal zone device + +From: Ido Schimmel + +[ Upstream commit 1851799e1d2978f68eea5d9dff322e121dcf59c1 ] + +thermal_zone_device_unregister() cancels the delayed work that polls the +thermal zone, but it does not wait for it to finish. This is racy with +respect to the freeing of the thermal zone device, which can result in a +use-after-free [1]. + +Fix this by waiting for the delayed work to finish before freeing the +thermal zone device. Note that thermal_zone_device_set_polling() is +never invoked from an atomic context, so it is safe to call +cancel_delayed_work_sync() that can block. + +[1] +[ +0.002221] ================================================================== +[ +0.000064] BUG: KASAN: use-after-free in __mutex_lock+0x1076/0x11c0 +[ +0.000016] Read of size 8 at addr ffff8881e48e0450 by task kworker/1:0/17 + +[ +0.000023] CPU: 1 PID: 17 Comm: kworker/1:0 Not tainted 5.2.0-rc6-custom-02495-g8e73ca3be4af #1701 +[ +0.000010] Hardware name: Mellanox Technologies Ltd. MSN2100-CB2FO/SA001017, BIOS 5.6.5 06/07/2016 +[ +0.000016] Workqueue: events_freezable_power_ thermal_zone_device_check +[ +0.000012] Call Trace: +[ +0.000021] dump_stack+0xa9/0x10e +[ +0.000020] print_address_description.cold.2+0x9/0x25e +[ +0.000018] __kasan_report.cold.3+0x78/0x9d +[ +0.000016] kasan_report+0xe/0x20 +[ +0.000016] __mutex_lock+0x1076/0x11c0 +[ +0.000014] step_wise_throttle+0x72/0x150 +[ +0.000018] handle_thermal_trip+0x167/0x760 +[ +0.000019] thermal_zone_device_update+0x19e/0x5f0 +[ +0.000019] process_one_work+0x969/0x16f0 +[ +0.000017] worker_thread+0x91/0xc40 +[ +0.000014] kthread+0x33d/0x400 +[ +0.000015] ret_from_fork+0x3a/0x50 + +[ +0.000020] Allocated by task 1: +[ +0.000015] save_stack+0x19/0x80 +[ +0.000015] __kasan_kmalloc.constprop.4+0xc1/0xd0 +[ +0.000014] kmem_cache_alloc_trace+0x152/0x320 +[ +0.000015] thermal_zone_device_register+0x1b4/0x13a0 +[ +0.000015] mlxsw_thermal_init+0xc92/0x23d0 +[ +0.000014] __mlxsw_core_bus_device_register+0x659/0x11b0 +[ +0.000013] mlxsw_core_bus_device_register+0x3d/0x90 +[ +0.000013] mlxsw_pci_probe+0x355/0x4b0 +[ +0.000014] local_pci_probe+0xc3/0x150 +[ +0.000013] pci_device_probe+0x280/0x410 +[ +0.000013] really_probe+0x26a/0xbb0 +[ +0.000013] driver_probe_device+0x208/0x2e0 +[ +0.000013] device_driver_attach+0xfe/0x140 +[ +0.000013] __driver_attach+0x110/0x310 +[ +0.000013] bus_for_each_dev+0x14b/0x1d0 +[ +0.000013] driver_register+0x1c0/0x400 +[ +0.000015] mlxsw_sp_module_init+0x5d/0xd3 +[ +0.000014] do_one_initcall+0x239/0x4dd +[ +0.000013] kernel_init_freeable+0x42b/0x4e8 +[ +0.000012] kernel_init+0x11/0x18b +[ +0.000013] ret_from_fork+0x3a/0x50 + +[ +0.000015] Freed by task 581: +[ +0.000013] save_stack+0x19/0x80 +[ +0.000014] __kasan_slab_free+0x125/0x170 +[ +0.000013] kfree+0xf3/0x310 +[ +0.000013] thermal_release+0xc7/0xf0 +[ +0.000014] device_release+0x77/0x200 +[ +0.000014] kobject_put+0x1a8/0x4c0 +[ +0.000014] device_unregister+0x38/0xc0 +[ +0.000014] thermal_zone_device_unregister+0x54e/0x6a0 +[ +0.000014] mlxsw_thermal_fini+0x184/0x35a +[ +0.000014] mlxsw_core_bus_device_unregister+0x10a/0x640 +[ +0.000013] mlxsw_devlink_core_bus_device_reload+0x92/0x210 +[ +0.000015] devlink_nl_cmd_reload+0x113/0x1f0 +[ +0.000014] genl_family_rcv_msg+0x700/0xee0 +[ +0.000013] genl_rcv_msg+0xca/0x170 +[ +0.000013] netlink_rcv_skb+0x137/0x3a0 +[ +0.000012] genl_rcv+0x29/0x40 +[ +0.000013] netlink_unicast+0x49b/0x660 +[ +0.000013] netlink_sendmsg+0x755/0xc90 +[ +0.000013] __sys_sendto+0x3de/0x430 +[ +0.000013] __x64_sys_sendto+0xe2/0x1b0 +[ +0.000013] do_syscall_64+0xa4/0x4d0 +[ +0.000013] entry_SYSCALL_64_after_hwframe+0x49/0xbe + +[ +0.000017] The buggy address belongs to the object at ffff8881e48e0008 + which belongs to the cache kmalloc-2k of size 2048 +[ +0.000012] The buggy address is located 1096 bytes inside of + 2048-byte region [ffff8881e48e0008, ffff8881e48e0808) +[ +0.000007] The buggy address belongs to the page: +[ +0.000012] page:ffffea0007923800 refcount:1 mapcount:0 mapping:ffff88823680d0c0 index:0x0 compound_mapcount: 0 +[ +0.000020] flags: 0x200000000010200(slab|head) +[ +0.000019] raw: 0200000000010200 ffffea0007682008 ffffea00076ab808 ffff88823680d0c0 +[ +0.000016] raw: 0000000000000000 00000000000d000d 00000001ffffffff 0000000000000000 +[ +0.000007] page dumped because: kasan: bad access detected + +[ +0.000012] Memory state around the buggy address: +[ +0.000012] ffff8881e48e0300: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb +[ +0.000012] ffff8881e48e0380: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb +[ +0.000012] >ffff8881e48e0400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb +[ +0.000008] ^ +[ +0.000012] ffff8881e48e0480: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb +[ +0.000012] ffff8881e48e0500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb +[ +0.000007] ================================================================== + +Fixes: b1569e99c795 ("ACPI: move thermal trip handling to generic thermal layer") +Reported-by: Jiri Pirko +Signed-off-by: Ido Schimmel +Acked-by: Jiri Pirko +Signed-off-by: Zhang Rui +Signed-off-by: Sasha Levin +--- + drivers/thermal/thermal_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c +index 17d6079c76429..456ef213dc141 100644 +--- a/drivers/thermal/thermal_core.c ++++ b/drivers/thermal/thermal_core.c +@@ -299,7 +299,7 @@ static void thermal_zone_device_set_polling(struct thermal_zone_device *tz, + mod_delayed_work(system_freezable_wq, &tz->poll_queue, + msecs_to_jiffies(delay)); + else +- cancel_delayed_work(&tz->poll_queue); ++ cancel_delayed_work_sync(&tz->poll_queue); + } + + static void monitor_thermal_zone(struct thermal_zone_device *tz) +-- +2.20.1 + diff --git a/queue-4.14/tick-broadcast-hrtimer-fix-a-race-in-bc_set_next.patch b/queue-4.14/tick-broadcast-hrtimer-fix-a-race-in-bc_set_next.patch new file mode 100644 index 00000000000..f445fb483ac --- /dev/null +++ b/queue-4.14/tick-broadcast-hrtimer-fix-a-race-in-bc_set_next.patch @@ -0,0 +1,173 @@ +From dc9408099d688618574fa6311308a663d0bd60b6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 26 Sep 2019 15:51:01 +0200 +Subject: tick: broadcast-hrtimer: Fix a race in bc_set_next + +From: Balasubramani Vivekanandan + +[ Upstream commit b9023b91dd020ad7e093baa5122b6968c48cc9e0 ] + +When a cpu requests broadcasting, before starting the tick broadcast +hrtimer, bc_set_next() checks if the timer callback (bc_handler) is active +using hrtimer_try_to_cancel(). But hrtimer_try_to_cancel() does not provide +the required synchronization when the callback is active on other core. + +The callback could have already executed tick_handle_oneshot_broadcast() +and could have also returned. But still there is a small time window where +the hrtimer_try_to_cancel() returns -1. In that case bc_set_next() returns +without doing anything, but the next_event of the tick broadcast clock +device is already set to a timeout value. + +In the race condition diagram below, CPU #1 is running the timer callback +and CPU #2 is entering idle state and so calls bc_set_next(). + +In the worst case, the next_event will contain an expiry time, but the +hrtimer will not be started which happens when the racing callback returns +HRTIMER_NORESTART. The hrtimer might never recover if all further requests +from the CPUs to subscribe to tick broadcast have timeout greater than the +next_event of tick broadcast clock device. This leads to cascading of +failures and finally noticed as rcu stall warnings + +Here is a depiction of the race condition + +CPU #1 (Running timer callback) CPU #2 (Enter idle + and subscribe to + tick broadcast) +--------------------- --------------------- + +__run_hrtimer() tick_broadcast_enter() + + bc_handler() __tick_broadcast_oneshot_control() + + tick_handle_oneshot_broadcast() + + raw_spin_lock(&tick_broadcast_lock); + + dev->next_event = KTIME_MAX; //wait for tick_broadcast_lock + //next_event for tick broadcast clock + set to KTIME_MAX since no other cores + subscribed to tick broadcasting + + raw_spin_unlock(&tick_broadcast_lock); + + if (dev->next_event == KTIME_MAX) + return HRTIMER_NORESTART + // callback function exits without + restarting the hrtimer //tick_broadcast_lock acquired + raw_spin_lock(&tick_broadcast_lock); + + tick_broadcast_set_event() + + clockevents_program_event() + + dev->next_event = expires; + + bc_set_next() + + hrtimer_try_to_cancel() + //returns -1 since the timer + callback is active. Exits without + restarting the timer + cpu_base->running = NULL; + +The comment that hrtimer cannot be armed from within the callback is +wrong. It is fine to start the hrtimer from within the callback. Also it is +safe to start the hrtimer from the enter/exit idle code while the broadcast +handler is active. The enter/exit idle code and the broadcast handler are +synchronized using tick_broadcast_lock. So there is no need for the +existing try to cancel logic. All this can be removed which will eliminate +the race condition as well. + +Fixes: 5d1638acb9f6 ("tick: Introduce hrtimer based broadcast") +Originally-by: Thomas Gleixner +Signed-off-by: Balasubramani Vivekanandan +Signed-off-by: Thomas Gleixner +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/20190926135101.12102-2-balasubramani_vivekanandan@mentor.com +Signed-off-by: Sasha Levin +--- + kernel/time/tick-broadcast-hrtimer.c | 57 ++++++++++++++-------------- + 1 file changed, 29 insertions(+), 28 deletions(-) + +diff --git a/kernel/time/tick-broadcast-hrtimer.c b/kernel/time/tick-broadcast-hrtimer.c +index 58045eb976c38..c750c80570e88 100644 +--- a/kernel/time/tick-broadcast-hrtimer.c ++++ b/kernel/time/tick-broadcast-hrtimer.c +@@ -44,34 +44,39 @@ static int bc_shutdown(struct clock_event_device *evt) + */ + static int bc_set_next(ktime_t expires, struct clock_event_device *bc) + { +- int bc_moved; + /* +- * We try to cancel the timer first. If the callback is on +- * flight on some other cpu then we let it handle it. If we +- * were able to cancel the timer nothing can rearm it as we +- * own broadcast_lock. ++ * This is called either from enter/exit idle code or from the ++ * broadcast handler. In all cases tick_broadcast_lock is held. + * +- * However we can also be called from the event handler of +- * ce_broadcast_hrtimer itself when it expires. We cannot +- * restart the timer because we are in the callback, but we +- * can set the expiry time and let the callback return +- * HRTIMER_RESTART. ++ * hrtimer_cancel() cannot be called here neither from the ++ * broadcast handler nor from the enter/exit idle code. The idle ++ * code can run into the problem described in bc_shutdown() and the ++ * broadcast handler cannot wait for itself to complete for obvious ++ * reasons. + * +- * Since we are in the idle loop at this point and because +- * hrtimer_{start/cancel} functions call into tracing, +- * calls to these functions must be bound within RCU_NONIDLE. ++ * Each caller tries to arm the hrtimer on its own CPU, but if the ++ * hrtimer callbback function is currently running, then ++ * hrtimer_start() cannot move it and the timer stays on the CPU on ++ * which it is assigned at the moment. ++ * ++ * As this can be called from idle code, the hrtimer_start() ++ * invocation has to be wrapped with RCU_NONIDLE() as ++ * hrtimer_start() can call into tracing. + */ +- RCU_NONIDLE({ +- bc_moved = hrtimer_try_to_cancel(&bctimer) >= 0; +- if (bc_moved) +- hrtimer_start(&bctimer, expires, +- HRTIMER_MODE_ABS_PINNED);}); +- if (bc_moved) { +- /* Bind the "device" to the cpu */ +- bc->bound_on = smp_processor_id(); +- } else if (bc->bound_on == smp_processor_id()) { +- hrtimer_set_expires(&bctimer, expires); +- } ++ RCU_NONIDLE( { ++ hrtimer_start(&bctimer, expires, HRTIMER_MODE_ABS_PINNED); ++ /* ++ * The core tick broadcast mode expects bc->bound_on to be set ++ * correctly to prevent a CPU which has the broadcast hrtimer ++ * armed from going deep idle. ++ * ++ * As tick_broadcast_lock is held, nothing can change the cpu ++ * base which was just established in hrtimer_start() above. So ++ * the below access is safe even without holding the hrtimer ++ * base lock. ++ */ ++ bc->bound_on = bctimer.base->cpu_base->cpu; ++ } ); + return 0; + } + +@@ -97,10 +102,6 @@ static enum hrtimer_restart bc_handler(struct hrtimer *t) + { + ce_broadcast_hrtimer.event_handler(&ce_broadcast_hrtimer); + +- if (clockevent_state_oneshot(&ce_broadcast_hrtimer)) +- if (ce_broadcast_hrtimer.next_event != KTIME_MAX) +- return HRTIMER_RESTART; +- + return HRTIMER_NORESTART; + } + +-- +2.20.1 + diff --git a/queue-4.14/tools-lib-traceevent-do-not-free-tep-cmdlines-in-add.patch b/queue-4.14/tools-lib-traceevent-do-not-free-tep-cmdlines-in-add.patch new file mode 100644 index 00000000000..8b094bffed9 --- /dev/null +++ b/queue-4.14/tools-lib-traceevent-do-not-free-tep-cmdlines-in-add.patch @@ -0,0 +1,57 @@ +From 0878dafe40bdef001fe83a72d4fdeef36f2016c5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 28 Aug 2019 15:05:28 -0400 +Subject: tools lib traceevent: Do not free tep->cmdlines in add_new_comm() on + failure + +From: Steven Rostedt (VMware) + +[ Upstream commit e0d2615856b2046c2e8d5bfd6933f37f69703b0b ] + +If the re-allocation of tep->cmdlines succeeds, then the previous +allocation of tep->cmdlines will be freed. If we later fail in +add_new_comm(), we must not free cmdlines, and also should assign +tep->cmdlines to the new allocation. Otherwise when freeing tep, the +tep->cmdlines will be pointing to garbage. + +Fixes: a6d2a61ac653a ("tools lib traceevent: Remove some die() calls") +Signed-off-by: Steven Rostedt (VMware) +Cc: Andrew Morton +Cc: Jiri Olsa +Cc: Namhyung Kim +Cc: linux-trace-devel@vger.kernel.org +Cc: stable@vger.kernel.org +Link: http://lkml.kernel.org/r/20190828191819.970121417@goodmis.org +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Sasha Levin +--- + tools/lib/traceevent/event-parse.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c +index 7989dd6289e7a..8211e8010e09b 100644 +--- a/tools/lib/traceevent/event-parse.c ++++ b/tools/lib/traceevent/event-parse.c +@@ -268,10 +268,10 @@ static int add_new_comm(struct pevent *pevent, const char *comm, int pid) + errno = ENOMEM; + return -1; + } ++ pevent->cmdlines = cmdlines; + + cmdlines[pevent->cmdline_count].comm = strdup(comm); + if (!cmdlines[pevent->cmdline_count].comm) { +- free(cmdlines); + errno = ENOMEM; + return -1; + } +@@ -282,7 +282,6 @@ static int add_new_comm(struct pevent *pevent, const char *comm, int pid) + pevent->cmdline_count++; + + qsort(cmdlines, pevent->cmdline_count, sizeof(*cmdlines), cmdline_cmp); +- pevent->cmdlines = cmdlines; + + return 0; + } +-- +2.20.1 + diff --git a/queue-4.14/watchdog-aspeed-add-support-for-ast2600.patch b/queue-4.14/watchdog-aspeed-add-support-for-ast2600.patch new file mode 100644 index 00000000000..ff4e2dddfaa --- /dev/null +++ b/queue-4.14/watchdog-aspeed-add-support-for-ast2600.patch @@ -0,0 +1,47 @@ +From 385d941e8b0c07d54c00f1d50cca929fa8fa9e87 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 19 Aug 2019 14:47:38 +0930 +Subject: watchdog: aspeed: Add support for AST2600 + +From: Ryan Chen + +[ Upstream commit b3528b4874480818e38e4da019d655413c233e6a ] + +The ast2600 can be supported by the same code as the ast2500. + +Signed-off-by: Ryan Chen +Signed-off-by: Joel Stanley +Reviewed-by: Guenter Roeck +Link: https://lore.kernel.org/r/20190819051738.17370-3-joel@jms.id.au +Signed-off-by: Guenter Roeck +Signed-off-by: Wim Van Sebroeck +Signed-off-by: Sasha Levin +--- + drivers/watchdog/aspeed_wdt.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/watchdog/aspeed_wdt.c b/drivers/watchdog/aspeed_wdt.c +index fd91007b4e41b..cee7334b2a000 100644 +--- a/drivers/watchdog/aspeed_wdt.c ++++ b/drivers/watchdog/aspeed_wdt.c +@@ -38,6 +38,7 @@ static const struct aspeed_wdt_config ast2500_config = { + static const struct of_device_id aspeed_wdt_of_table[] = { + { .compatible = "aspeed,ast2400-wdt", .data = &ast2400_config }, + { .compatible = "aspeed,ast2500-wdt", .data = &ast2500_config }, ++ { .compatible = "aspeed,ast2600-wdt", .data = &ast2500_config }, + { }, + }; + MODULE_DEVICE_TABLE(of, aspeed_wdt_of_table); +@@ -257,7 +258,8 @@ static int aspeed_wdt_probe(struct platform_device *pdev) + set_bit(WDOG_HW_RUNNING, &wdt->wdd.status); + } + +- if (of_device_is_compatible(np, "aspeed,ast2500-wdt")) { ++ if ((of_device_is_compatible(np, "aspeed,ast2500-wdt")) || ++ (of_device_is_compatible(np, "aspeed,ast2600-wdt"))) { + u32 reg = readl(wdt->base + WDT_RESET_WIDTH); + + reg &= config->ext_pulse_width_mask; +-- +2.20.1 + diff --git a/queue-4.14/xen-pci-reserve-mcfg-areas-earlier.patch b/queue-4.14/xen-pci-reserve-mcfg-areas-earlier.patch new file mode 100644 index 00000000000..329f68c7a73 --- /dev/null +++ b/queue-4.14/xen-pci-reserve-mcfg-areas-earlier.patch @@ -0,0 +1,90 @@ +From b58cc84c5ff1892218406df05892f930e8162053 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 12 Sep 2019 19:31:51 +0100 +Subject: xen/pci: reserve MCFG areas earlier + +From: Igor Druzhinin + +[ Upstream commit a4098bc6eed5e31e0391bcc068e61804c98138df ] + +If MCFG area is not reserved in E820, Xen by default will defer its usage +until Dom0 registers it explicitly after ACPI parser recognizes it as +a reserved resource in DSDT. Having it reserved in E820 is not +mandatory according to "PCI Firmware Specification, rev 3.2" (par. 4.1.2) +and firmware is free to keep a hole in E820 in that place. Xen doesn't know +what exactly is inside this hole since it lacks full ACPI view of the +platform therefore it's potentially harmful to access MCFG region +without additional checks as some machines are known to provide +inconsistent information on the size of the region. + +Now xen_mcfg_late() runs after acpi_init() which is too late as some basic +PCI enumeration starts exactly there as well. Trying to register a device +prior to MCFG reservation causes multiple problems with PCIe extended +capability initializations in Xen (e.g. SR-IOV VF BAR sizing). There are +no convenient hooks for us to subscribe to so register MCFG areas earlier +upon the first invocation of xen_add_device(). It should be safe to do once +since all the boot time buses must have their MCFG areas in MCFG table +already and we don't support PCI bus hot-plug. + +Signed-off-by: Igor Druzhinin +Reviewed-by: Boris Ostrovsky +Signed-off-by: Boris Ostrovsky +Signed-off-by: Sasha Levin +--- + drivers/xen/pci.c | 21 +++++++++++++++------ + 1 file changed, 15 insertions(+), 6 deletions(-) + +diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c +index 7494dbeb4409c..db58aaa4dc598 100644 +--- a/drivers/xen/pci.c ++++ b/drivers/xen/pci.c +@@ -29,6 +29,8 @@ + #include "../pci/pci.h" + #ifdef CONFIG_PCI_MMCONFIG + #include ++ ++static int xen_mcfg_late(void); + #endif + + static bool __read_mostly pci_seg_supported = true; +@@ -40,7 +42,18 @@ static int xen_add_device(struct device *dev) + #ifdef CONFIG_PCI_IOV + struct pci_dev *physfn = pci_dev->physfn; + #endif +- ++#ifdef CONFIG_PCI_MMCONFIG ++ static bool pci_mcfg_reserved = false; ++ /* ++ * Reserve MCFG areas in Xen on first invocation due to this being ++ * potentially called from inside of acpi_init immediately after ++ * MCFG table has been finally parsed. ++ */ ++ if (!pci_mcfg_reserved) { ++ xen_mcfg_late(); ++ pci_mcfg_reserved = true; ++ } ++#endif + if (pci_seg_supported) { + struct { + struct physdev_pci_device_add add; +@@ -213,7 +226,7 @@ static int __init register_xen_pci_notifier(void) + arch_initcall(register_xen_pci_notifier); + + #ifdef CONFIG_PCI_MMCONFIG +-static int __init xen_mcfg_late(void) ++static int xen_mcfg_late(void) + { + struct pci_mmcfg_region *cfg; + int rc; +@@ -252,8 +265,4 @@ static int __init xen_mcfg_late(void) + } + return 0; + } +-/* +- * Needs to be done after acpi_init which are subsys_initcall. +- */ +-subsys_initcall_sync(xen_mcfg_late); + #endif +-- +2.20.1 +