]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
fixes for 4.14
authorSasha Levin <sashal@kernel.org>
Wed, 9 Oct 2019 01:26:33 +0000 (21:26 -0400)
committerSasha Levin <sashal@kernel.org>
Wed, 9 Oct 2019 01:26:33 +0000 (21:26 -0400)
Signed-off-by: Sasha Levin <sashal@kernel.org>
29 files changed:
queue-4.14/9p-avoid-attaching-writeback_fid-on-mmap-with-type-p.patch [new file with mode: 0644]
queue-4.14/block-ndb-add-wq_unbound-to-the-knbd-recv-workqueue.patch [new file with mode: 0644]
queue-4.14/ceph-fix-directories-inode-i_blkbits-initialization.patch [new file with mode: 0644]
queue-4.14/ceph-reconnect-connection-if-session-hang-in-opening.patch [new file with mode: 0644]
queue-4.14/drm-amdgpu-check-for-valid-number-of-registers-to-re.patch [new file with mode: 0644]
queue-4.14/drm-i915-userptr-acquire-the-page-lock-around-set_pa.patch [new file with mode: 0644]
queue-4.14/fs-nfs-fix-possible-null-pointer-dereferences-in-enc.patch [new file with mode: 0644]
queue-4.14/fuse-fix-memleak-in-cuse_channel_open.patch [new file with mode: 0644]
queue-4.14/ima-always-return-negative-code-for-error.patch [new file with mode: 0644]
queue-4.14/kernel-elfcore.c-include-proper-prototypes.patch [new file with mode: 0644]
queue-4.14/kvm-ppc-book3s-hv-xive-free-escalation-interrupts-be.patch [new file with mode: 0644]
queue-4.14/nbd-fix-crash-when-the-blksize-is-zero.patch [new file with mode: 0644]
queue-4.14/nbd-fix-max-number-of-supported-devs.patch [new file with mode: 0644]
queue-4.14/netfilter-nf_tables-allow-lookups-in-dynamic-sets.patch [new file with mode: 0644]
queue-4.14/perf-build-add-detection-of-java-11-openjdk-devel-pa.patch [new file with mode: 0644]
queue-4.14/perf-stat-fix-a-segmentation-fault-when-using-repeat.patch [new file with mode: 0644]
queue-4.14/perf-stat-reset-previous-counts-on-repeat-with-inter.patch [new file with mode: 0644]
queue-4.14/perf-tools-fix-segfault-in-cpu_cache_level__read.patch [new file with mode: 0644]
queue-4.14/perf-unwind-fix-libunwind-build-failure-on-i386-syst.patch [new file with mode: 0644]
queue-4.14/pnfs-ensure-we-do-clear-the-return-on-close-layout-s.patch [new file with mode: 0644]
queue-4.14/powerpc-pseries-fix-cpu_hotplug_lock-acquisition-in-.patch [new file with mode: 0644]
queue-4.14/pwm-stm32-lp-add-check-in-case-requested-period-cann.patch [new file with mode: 0644]
queue-4.14/sched-core-fix-migration-to-invalid-cpu-in-__set_cpu.patch [new file with mode: 0644]
queue-4.14/series
queue-4.14/thermal-fix-use-after-free-when-unregistering-therma.patch [new file with mode: 0644]
queue-4.14/tick-broadcast-hrtimer-fix-a-race-in-bc_set_next.patch [new file with mode: 0644]
queue-4.14/tools-lib-traceevent-do-not-free-tep-cmdlines-in-add.patch [new file with mode: 0644]
queue-4.14/watchdog-aspeed-add-support-for-ast2600.patch [new file with mode: 0644]
queue-4.14/xen-pci-reserve-mcfg-areas-earlier.patch [new file with mode: 0644]

diff --git a/queue-4.14/9p-avoid-attaching-writeback_fid-on-mmap-with-type-p.patch b/queue-4.14/9p-avoid-attaching-writeback_fid-on-mmap-with-type-p.patch
new file mode 100644 (file)
index 0000000..580ed1f
--- /dev/null
@@ -0,0 +1,47 @@
+From f8b838ad0fa7874d55b0a6696588c3a899c8a3de Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 20 Aug 2019 18:03:25 +0800
+Subject: 9p: avoid attaching writeback_fid on mmap with type PRIVATE
+
+From: Chengguang Xu <cgxu519@zoho.com.cn>
+
+[ Upstream commit c87a37ebd40b889178664c2c09cc187334146292 ]
+
+Currently on mmap cache policy, we always attach writeback_fid
+whether mmap type is SHARED or PRIVATE. However, in the use case
+of kata-container which combines 9p(Guest OS) with overlayfs(Host OS),
+this behavior will trigger overlayfs' copy-up when excute command
+inside container.
+
+Link: http://lkml.kernel.org/r/20190820100325.10313-1-cgxu519@zoho.com.cn
+Signed-off-by: Chengguang Xu <cgxu519@zoho.com.cn>
+Signed-off-by: Dominique Martinet <dominique.martinet@cea.fr>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/9p/vfs_file.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
+index 89e69904976a5..2651192f01667 100644
+--- a/fs/9p/vfs_file.c
++++ b/fs/9p/vfs_file.c
+@@ -528,6 +528,7 @@ v9fs_mmap_file_mmap(struct file *filp, struct vm_area_struct *vma)
+       v9inode = V9FS_I(inode);
+       mutex_lock(&v9inode->v_mutex);
+       if (!v9inode->writeback_fid &&
++          (vma->vm_flags & VM_SHARED) &&
+           (vma->vm_flags & VM_WRITE)) {
+               /*
+                * clone a fid and add it to writeback_fid
+@@ -629,6 +630,8 @@ static void v9fs_mmap_vm_close(struct vm_area_struct *vma)
+                       (vma->vm_end - vma->vm_start - 1),
+       };
++      if (!(vma->vm_flags & VM_SHARED))
++              return;
+       p9_debug(P9_DEBUG_VFS, "9p VMA close, %p, flushing", vma);
+-- 
+2.20.1
+
diff --git a/queue-4.14/block-ndb-add-wq_unbound-to-the-knbd-recv-workqueue.patch b/queue-4.14/block-ndb-add-wq_unbound-to-the-knbd-recv-workqueue.patch
new file mode 100644 (file)
index 0000000..a3df903
--- /dev/null
@@ -0,0 +1,37 @@
+From 3e704a4423781576fdf0d9884d17538f7523a0b8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Sep 2017 13:08:51 -0700
+Subject: block/ndb: add WQ_UNBOUND to the knbd-recv workqueue
+
+From: Dan Melnic <dmm@fb.com>
+
+[ Upstream commit 2189c97cdbed630d5971ab22f05dc998774e354e ]
+
+Add WQ_UNBOUND to the knbd-recv workqueue so we're not bound
+to a single CPU that is selected at device creation time.
+
+Signed-off-by: Dan Melnic <dmm@fb.com>
+Reviewed-by: Josef Bacik <jbacik@fb.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/block/nbd.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
+index a65e4ed6c9372..14b491c5cf7b6 100644
+--- a/drivers/block/nbd.c
++++ b/drivers/block/nbd.c
+@@ -2217,7 +2217,8 @@ static int __init nbd_init(void)
+       if (nbds_max > 1UL << (MINORBITS - part_shift))
+               return -EINVAL;
+       recv_workqueue = alloc_workqueue("knbd-recv",
+-                                       WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
++                                       WQ_MEM_RECLAIM | WQ_HIGHPRI |
++                                       WQ_UNBOUND, 0);
+       if (!recv_workqueue)
+               return -ENOMEM;
+-- 
+2.20.1
+
diff --git a/queue-4.14/ceph-fix-directories-inode-i_blkbits-initialization.patch b/queue-4.14/ceph-fix-directories-inode-i_blkbits-initialization.patch
new file mode 100644 (file)
index 0000000..b136d25
--- /dev/null
@@ -0,0 +1,50 @@
+From b9b8557f927affa43400874b20744c88351561e5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 Jul 2019 16:50:20 +0100
+Subject: ceph: fix directories inode i_blkbits initialization
+
+From: Luis Henriques <lhenriques@suse.com>
+
+[ Upstream commit 750670341a24cb714e624e0fd7da30900ad93752 ]
+
+When filling an inode with info from the MDS, i_blkbits is being
+initialized using fl_stripe_unit, which contains the stripe unit in
+bytes.  Unfortunately, this doesn't make sense for directories as they
+have fl_stripe_unit set to '0'.  This means that i_blkbits will be set
+to 0xff, causing an UBSAN undefined behaviour in i_blocksize():
+
+  UBSAN: Undefined behaviour in ./include/linux/fs.h:731:12
+  shift exponent 255 is too large for 32-bit type 'int'
+
+Fix this by initializing i_blkbits to CEPH_BLOCK_SHIFT if fl_stripe_unit
+is zero.
+
+Signed-off-by: Luis Henriques <lhenriques@suse.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ceph/inode.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
+index 9bda8c7a80a05..879bc08250931 100644
+--- a/fs/ceph/inode.c
++++ b/fs/ceph/inode.c
+@@ -789,7 +789,12 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
+       ci->i_version = le64_to_cpu(info->version);
+       inode->i_version++;
+       inode->i_rdev = le32_to_cpu(info->rdev);
+-      inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
++      /* directories have fl_stripe_unit set to zero */
++      if (le32_to_cpu(info->layout.fl_stripe_unit))
++              inode->i_blkbits =
++                      fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
++      else
++              inode->i_blkbits = CEPH_BLOCK_SHIFT;
+       if ((new_version || (new_issued & CEPH_CAP_AUTH_SHARED)) &&
+           (issued & CEPH_CAP_AUTH_EXCL) == 0) {
+-- 
+2.20.1
+
diff --git a/queue-4.14/ceph-reconnect-connection-if-session-hang-in-opening.patch b/queue-4.14/ceph-reconnect-connection-if-session-hang-in-opening.patch
new file mode 100644 (file)
index 0000000..deb16fd
--- /dev/null
@@ -0,0 +1,46 @@
+From 9e42bcc0b5499e4033726891f3275461ac5bd667 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 28 Aug 2019 21:22:45 +0800
+Subject: ceph: reconnect connection if session hang in opening state
+
+From: Erqi Chen <chenerqi@gmail.com>
+
+[ Upstream commit 71a228bc8d65900179e37ac309e678f8c523f133 ]
+
+If client mds session is evicted in CEPH_MDS_SESSION_OPENING state,
+mds won't send session msg to client, and delayed_work skip
+CEPH_MDS_SESSION_OPENING state session, the session hang forever.
+
+Allow ceph_con_keepalive to reconnect a session in OPENING to avoid
+session hang. Also, ensure that we skip sessions in RESTARTING and
+REJECTED states since those states can't be resurrected by issuing
+a keepalive.
+
+Link: https://tracker.ceph.com/issues/41551
+Signed-off-by: Erqi Chen chenerqi@gmail.com
+Reviewed-by: "Yan, Zheng" <zyan@redhat.com>
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ceph/mds_client.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
+index e1ded4bd61154..b968334f841e8 100644
+--- a/fs/ceph/mds_client.c
++++ b/fs/ceph/mds_client.c
+@@ -3543,7 +3543,9 @@ static void delayed_work(struct work_struct *work)
+                               pr_info("mds%d hung\n", s->s_mds);
+                       }
+               }
+-              if (s->s_state < CEPH_MDS_SESSION_OPEN) {
++              if (s->s_state == CEPH_MDS_SESSION_NEW ||
++                  s->s_state == CEPH_MDS_SESSION_RESTARTING ||
++                  s->s_state == CEPH_MDS_SESSION_REJECTED) {
+                       /* this mds is failed or recovering, just wait */
+                       ceph_put_mds_session(s);
+                       continue;
+-- 
+2.20.1
+
diff --git a/queue-4.14/drm-amdgpu-check-for-valid-number-of-registers-to-re.patch b/queue-4.14/drm-amdgpu-check-for-valid-number-of-registers-to-re.patch
new file mode 100644 (file)
index 0000000..dccf811
--- /dev/null
@@ -0,0 +1,39 @@
+From 0d0b92bd9fdcd1ef6c111ecfd1176ad30a7d05ae Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 31 Aug 2019 21:25:36 +0200
+Subject: drm/amdgpu: Check for valid number of registers to read
+
+From: Trek <trek00@inbox.ru>
+
+[ Upstream commit 73d8e6c7b841d9bf298c8928f228fb433676635c ]
+
+Do not try to allocate any amount of memory requested by the user.
+Instead limit it to 128 registers. Actually the longest series of
+consecutive allowed registers are 48, mmGB_TILE_MODE0-31 and
+mmGB_MACROTILE_MODE0-15 (0x2644-0x2673).
+
+Bug: https://bugs.freedesktop.org/show_bug.cgi?id=111273
+Signed-off-by: Trek <trek00@inbox.ru>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+index e16229000a983..884ed359f2493 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+@@ -540,6 +540,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
+               if (sh_num == AMDGPU_INFO_MMR_SH_INDEX_MASK)
+                       sh_num = 0xffffffff;
++              if (info->read_mmr_reg.count > 128)
++                      return -EINVAL;
++
+               regs = kmalloc_array(info->read_mmr_reg.count, sizeof(*regs), GFP_KERNEL);
+               if (!regs)
+                       return -ENOMEM;
+-- 
+2.20.1
+
diff --git a/queue-4.14/drm-i915-userptr-acquire-the-page-lock-around-set_pa.patch b/queue-4.14/drm-i915-userptr-acquire-the-page-lock-around-set_pa.patch
new file mode 100644 (file)
index 0000000..35fa4ca
--- /dev/null
@@ -0,0 +1,57 @@
+From 77fbf1f2ce3d14f2dcd6bbf3dbbe3716f062685f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 8 Jul 2019 15:03:27 +0100
+Subject: drm/i915/userptr: Acquire the page lock around set_page_dirty()
+
+From: Chris Wilson <chris@chris-wilson.co.uk>
+
+[ Upstream commit cb6d7c7dc7ff8cace666ddec66334117a6068ce2 ]
+
+set_page_dirty says:
+
+       For pages with a mapping this should be done under the page lock
+       for the benefit of asynchronous memory errors who prefer a
+       consistent dirty state. This rule can be broken in some special
+       cases, but should be better not to.
+
+Under those rules, it is only safe for us to use the plain set_page_dirty
+calls for shmemfs/anonymous memory. Userptr may be used with real
+mappings and so needs to use the locked version (set_page_dirty_lock).
+
+Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=203317
+Fixes: 5cc9ed4b9a7a ("drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl")
+References: 6dcc693bc57f ("ext4: warn when page is dirtied without buffers")
+Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
+Cc: stable@vger.kernel.org
+Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20190708140327.26825-1-chris@chris-wilson.co.uk
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/i915/i915_gem_userptr.c | 10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
+index 05ae8c4a8a1b6..9760b67dab28b 100644
+--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
++++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
+@@ -691,7 +691,15 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj,
+       for_each_sgt_page(page, sgt_iter, pages) {
+               if (obj->mm.dirty)
+-                      set_page_dirty(page);
++                      /*
++                       * As this may not be anonymous memory (e.g. shmem)
++                       * but exist on a real mapping, we have to lock
++                       * the page in order to dirty it -- holding
++                       * the page reference is not sufficient to
++                       * prevent the inode from being truncated.
++                       * Play safe and take the lock.
++                       */
++                      set_page_dirty_lock(page);
+               mark_page_accessed(page);
+               put_page(page);
+-- 
+2.20.1
+
diff --git a/queue-4.14/fs-nfs-fix-possible-null-pointer-dereferences-in-enc.patch b/queue-4.14/fs-nfs-fix-possible-null-pointer-dereferences-in-enc.patch
new file mode 100644 (file)
index 0000000..a09101b
--- /dev/null
@@ -0,0 +1,46 @@
+From d143e18e15e29aad2ccda08c1adcf5434acd59ba Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 26 Jul 2019 15:48:53 +0800
+Subject: fs: nfs: Fix possible null-pointer dereferences in encode_attrs()
+
+From: Jia-Ju Bai <baijiaju1990@gmail.com>
+
+[ Upstream commit e2751463eaa6f9fec8fea80abbdc62dbc487b3c5 ]
+
+In encode_attrs(), there is an if statement on line 1145 to check
+whether label is NULL:
+    if (label && (attrmask[2] & FATTR4_WORD2_SECURITY_LABEL))
+
+When label is NULL, it is used on lines 1178-1181:
+    *p++ = cpu_to_be32(label->lfs);
+    *p++ = cpu_to_be32(label->pi);
+    *p++ = cpu_to_be32(label->len);
+    p = xdr_encode_opaque_fixed(p, label->label, label->len);
+
+To fix these bugs, label is checked before being used.
+
+These bugs are found by a static analysis tool STCheck written by us.
+
+Signed-off-by: Jia-Ju Bai <baijiaju1990@gmail.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfs/nfs4xdr.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
+index 549c916d28599..525684b0056fc 100644
+--- a/fs/nfs/nfs4xdr.c
++++ b/fs/nfs/nfs4xdr.c
+@@ -1132,7 +1132,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
+               } else
+                       *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME);
+       }
+-      if (bmval[2] & FATTR4_WORD2_SECURITY_LABEL) {
++      if (label && (bmval[2] & FATTR4_WORD2_SECURITY_LABEL)) {
+               *p++ = cpu_to_be32(label->lfs);
+               *p++ = cpu_to_be32(label->pi);
+               *p++ = cpu_to_be32(label->len);
+-- 
+2.20.1
+
diff --git a/queue-4.14/fuse-fix-memleak-in-cuse_channel_open.patch b/queue-4.14/fuse-fix-memleak-in-cuse_channel_open.patch
new file mode 100644 (file)
index 0000000..5ba80ea
--- /dev/null
@@ -0,0 +1,39 @@
+From 95342965d7dbfd5f71d8a401a37eef6ed5ce0a96 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 14 Aug 2019 15:59:09 +0800
+Subject: fuse: fix memleak in cuse_channel_open
+
+From: zhengbin <zhengbin13@huawei.com>
+
+[ Upstream commit 9ad09b1976c562061636ff1e01bfc3a57aebe56b ]
+
+If cuse_send_init fails, need to fuse_conn_put cc->fc.
+
+cuse_channel_open->fuse_conn_init->refcount_set(&fc->count, 1)
+                 ->fuse_dev_alloc->fuse_conn_get
+                 ->fuse_dev_free->fuse_conn_put
+
+Fixes: cc080e9e9be1 ("fuse: introduce per-instance fuse_dev structure")
+Reported-by: Hulk Robot <hulkci@huawei.com>
+Signed-off-by: zhengbin <zhengbin13@huawei.com>
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/fuse/cuse.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
+index e9e97803442a6..55db06c7c587e 100644
+--- a/fs/fuse/cuse.c
++++ b/fs/fuse/cuse.c
+@@ -513,6 +513,7 @@ static int cuse_channel_open(struct inode *inode, struct file *file)
+       rc = cuse_send_init(cc);
+       if (rc) {
+               fuse_dev_free(fud);
++              fuse_conn_put(&cc->fc);
+               return rc;
+       }
+       file->private_data = fud;
+-- 
+2.20.1
+
diff --git a/queue-4.14/ima-always-return-negative-code-for-error.patch b/queue-4.14/ima-always-return-negative-code-for-error.patch
new file mode 100644 (file)
index 0000000..7f49cb0
--- /dev/null
@@ -0,0 +1,44 @@
+From 36019990b9801724454082f5d70c1235a63aba14 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 2 Jul 2019 10:00:40 +0200
+Subject: ima: always return negative code for error
+
+From: Sascha Hauer <s.hauer@pengutronix.de>
+
+[ Upstream commit f5e1040196dbfe14c77ce3dfe3b7b08d2d961e88 ]
+
+integrity_kernel_read() returns the number of bytes read. If this is
+a short read then this positive value is returned from
+ima_calc_file_hash_atfm(). Currently this is only indirectly called from
+ima_calc_file_hash() and this function only tests for the return value
+being zero or nonzero and also doesn't forward the return value.
+Nevertheless there's no point in returning a positive value as an error,
+so translate a short read into -EINVAL.
+
+Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
+Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ security/integrity/ima/ima_crypto.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c
+index af680b5b678a4..06b0ee75f34fb 100644
+--- a/security/integrity/ima/ima_crypto.c
++++ b/security/integrity/ima/ima_crypto.c
+@@ -293,8 +293,11 @@ static int ima_calc_file_hash_atfm(struct file *file,
+               rbuf_len = min_t(loff_t, i_size - offset, rbuf_size[active]);
+               rc = integrity_kernel_read(file, offset, rbuf[active],
+                                          rbuf_len);
+-              if (rc != rbuf_len)
++              if (rc != rbuf_len) {
++                      if (rc >= 0)
++                              rc = -EINVAL;
+                       goto out3;
++              }
+               if (rbuf[1] && offset) {
+                       /* Using two buffers, and it is not the first
+-- 
+2.20.1
+
diff --git a/queue-4.14/kernel-elfcore.c-include-proper-prototypes.patch b/queue-4.14/kernel-elfcore.c-include-proper-prototypes.patch
new file mode 100644 (file)
index 0000000..f9f561d
--- /dev/null
@@ -0,0 +1,51 @@
+From 933a63f6dca66021f102b652e7f0b0afa54e40ef Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 25 Sep 2019 16:45:59 -0700
+Subject: kernel/elfcore.c: include proper prototypes
+
+From: Valdis Kletnieks <valdis.kletnieks@vt.edu>
+
+[ Upstream commit 0f74914071ab7e7b78731ed62bf350e3a344e0a5 ]
+
+When building with W=1, gcc properly complains that there's no prototypes:
+
+  CC      kernel/elfcore.o
+kernel/elfcore.c:7:17: warning: no previous prototype for 'elf_core_extra_phdrs' [-Wmissing-prototypes]
+    7 | Elf_Half __weak elf_core_extra_phdrs(void)
+      |                 ^~~~~~~~~~~~~~~~~~~~
+kernel/elfcore.c:12:12: warning: no previous prototype for 'elf_core_write_extra_phdrs' [-Wmissing-prototypes]
+   12 | int __weak elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset)
+      |            ^~~~~~~~~~~~~~~~~~~~~~~~~~
+kernel/elfcore.c:17:12: warning: no previous prototype for 'elf_core_write_extra_data' [-Wmissing-prototypes]
+   17 | int __weak elf_core_write_extra_data(struct coredump_params *cprm)
+      |            ^~~~~~~~~~~~~~~~~~~~~~~~~
+kernel/elfcore.c:22:15: warning: no previous prototype for 'elf_core_extra_data_size' [-Wmissing-prototypes]
+   22 | size_t __weak elf_core_extra_data_size(void)
+      |               ^~~~~~~~~~~~~~~~~~~~~~~~
+
+Provide the include file so gcc is happy, and we don't have potential code drift
+
+Link: http://lkml.kernel.org/r/29875.1565224705@turing-police
+Signed-off-by: Valdis Kletnieks <valdis.kletnieks@vt.edu>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/elfcore.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/kernel/elfcore.c b/kernel/elfcore.c
+index fc482c8e0bd88..57fb4dcff4349 100644
+--- a/kernel/elfcore.c
++++ b/kernel/elfcore.c
+@@ -3,6 +3,7 @@
+ #include <linux/fs.h>
+ #include <linux/mm.h>
+ #include <linux/binfmts.h>
++#include <linux/elfcore.h>
+ Elf_Half __weak elf_core_extra_phdrs(void)
+ {
+-- 
+2.20.1
+
diff --git a/queue-4.14/kvm-ppc-book3s-hv-xive-free-escalation-interrupts-be.patch b/queue-4.14/kvm-ppc-book3s-hv-xive-free-escalation-interrupts-be.patch
new file mode 100644 (file)
index 0000000..edde867
--- /dev/null
@@ -0,0 +1,86 @@
+From fcd0d40737ac47e8468b030b43bfcf9bf3e3a250 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 6 Aug 2019 19:25:38 +0200
+Subject: KVM: PPC: Book3S HV: XIVE: Free escalation interrupts before
+ disabling the VP
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Cédric Le Goater <clg@kaod.org>
+
+[ Upstream commit 237aed48c642328ff0ab19b63423634340224a06 ]
+
+When a vCPU is brought done, the XIVE VP (Virtual Processor) is first
+disabled and then the event notification queues are freed. When freeing
+the queues, we check for possible escalation interrupts and free them
+also.
+
+But when a XIVE VP is disabled, the underlying XIVE ENDs also are
+disabled in OPAL. When an END (Event Notification Descriptor) is
+disabled, its ESB pages (ESn and ESe) are disabled and loads return all
+1s. Which means that any access on the ESB page of the escalation
+interrupt will return invalid values.
+
+When an interrupt is freed, the shutdown handler computes a 'saved_p'
+field from the value returned by a load in xive_do_source_set_mask().
+This value is incorrect for escalation interrupts for the reason
+described above.
+
+This has no impact on Linux/KVM today because we don't make use of it
+but we will introduce in future changes a xive_get_irqchip_state()
+handler. This handler will use the 'saved_p' field to return the state
+of an interrupt and 'saved_p' being incorrect, softlockup will occur.
+
+Fix the vCPU cleanup sequence by first freeing the escalation interrupts
+if any, then disable the XIVE VP and last free the queues.
+
+Fixes: 90c73795afa2 ("KVM: PPC: Book3S HV: Add a new KVM device for the XIVE native exploitation mode")
+Fixes: 5af50993850a ("KVM: PPC: Book3S HV: Native usage of the XIVE interrupt controller")
+Cc: stable@vger.kernel.org # v4.12+
+Signed-off-by: Cédric Le Goater <clg@kaod.org>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20190806172538.5087-1-clg@kaod.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/powerpc/kvm/book3s_xive.c | 18 ++++++++++--------
+ 1 file changed, 10 insertions(+), 8 deletions(-)
+
+diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
+index 3c75eee45edf9..46f99fc1901c8 100644
+--- a/arch/powerpc/kvm/book3s_xive.c
++++ b/arch/powerpc/kvm/book3s_xive.c
+@@ -1001,20 +1001,22 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
+       /* Mask the VP IPI */
+       xive_vm_esb_load(&xc->vp_ipi_data, XIVE_ESB_SET_PQ_01);
+-      /* Disable the VP */
+-      xive_native_disable_vp(xc->vp_id);
+-
+-      /* Free the queues & associated interrupts */
++      /* Free escalations */
+       for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
+-              struct xive_q *q = &xc->queues[i];
+-
+-              /* Free the escalation irq */
+               if (xc->esc_virq[i]) {
+                       free_irq(xc->esc_virq[i], vcpu);
+                       irq_dispose_mapping(xc->esc_virq[i]);
+                       kfree(xc->esc_virq_names[i]);
+               }
+-              /* Free the queue */
++      }
++
++      /* Disable the VP */
++      xive_native_disable_vp(xc->vp_id);
++
++      /* Free the queues */
++      for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
++              struct xive_q *q = &xc->queues[i];
++
+               xive_native_disable_queue(xc->vp_id, q, i);
+               if (q->qpage) {
+                       free_pages((unsigned long)q->qpage,
+-- 
+2.20.1
+
diff --git a/queue-4.14/nbd-fix-crash-when-the-blksize-is-zero.patch b/queue-4.14/nbd-fix-crash-when-the-blksize-is-zero.patch
new file mode 100644 (file)
index 0000000..1891ab3
--- /dev/null
@@ -0,0 +1,87 @@
+From 1c2eb536117c97565297822c1fd33d4593f544fb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 29 May 2019 15:16:05 -0500
+Subject: nbd: fix crash when the blksize is zero
+
+From: Xiubo Li <xiubli@redhat.com>
+
+[ Upstream commit 553768d1169a48c0cd87c4eb4ab57534ee663415 ]
+
+This will allow the blksize to be set zero and then use 1024 as
+default.
+
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Xiubo Li <xiubli@redhat.com>
+[fix to use goto out instead of return in genl_connect]
+Signed-off-by: Mike Christie <mchristi@redhat.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/block/nbd.c | 23 ++++++++++++++++++++---
+ 1 file changed, 20 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
+index e4b049f281f50..a65e4ed6c9372 100644
+--- a/drivers/block/nbd.c
++++ b/drivers/block/nbd.c
+@@ -132,6 +132,8 @@ static struct dentry *nbd_dbg_dir;
+ #define NBD_MAGIC 0x68797548
++#define NBD_DEF_BLKSIZE 1024
++
+ static unsigned int nbds_max = 16;
+ static int max_part = 16;
+ static struct workqueue_struct *recv_workqueue;
+@@ -1216,6 +1218,14 @@ static void nbd_clear_sock_ioctl(struct nbd_device *nbd,
+               nbd_config_put(nbd);
+ }
++static bool nbd_is_valid_blksize(unsigned long blksize)
++{
++      if (!blksize || !is_power_of_2(blksize) || blksize < 512 ||
++          blksize > PAGE_SIZE)
++              return false;
++      return true;
++}
++
+ /* Must be called with config_lock held */
+ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
+                      unsigned int cmd, unsigned long arg)
+@@ -1231,8 +1241,9 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
+       case NBD_SET_SOCK:
+               return nbd_add_socket(nbd, arg, false);
+       case NBD_SET_BLKSIZE:
+-              if (!arg || !is_power_of_2(arg) || arg < 512 ||
+-                  arg > PAGE_SIZE)
++              if (!arg)
++                      arg = NBD_DEF_BLKSIZE;
++              if (!nbd_is_valid_blksize(arg))
+                       return -EINVAL;
+               nbd_size_set(nbd, arg,
+                            div_s64(config->bytesize, arg));
+@@ -1312,7 +1323,7 @@ static struct nbd_config *nbd_alloc_config(void)
+       atomic_set(&config->recv_threads, 0);
+       init_waitqueue_head(&config->recv_wq);
+       init_waitqueue_head(&config->conn_wait);
+-      config->blksize = 1024;
++      config->blksize = NBD_DEF_BLKSIZE;
+       atomic_set(&config->live_connections, 0);
+       try_module_get(THIS_MODULE);
+       return config;
+@@ -1744,6 +1755,12 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
+       if (info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]) {
+               u64 bsize =
+                       nla_get_u64(info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]);
++              if (!bsize)
++                      bsize = NBD_DEF_BLKSIZE;
++              if (!nbd_is_valid_blksize(bsize)) {
++                      ret = -EINVAL;
++                      goto out;
++              }
+               nbd_size_set(nbd, bsize, div64_u64(config->bytesize, bsize));
+       }
+       if (info->attrs[NBD_ATTR_TIMEOUT]) {
+-- 
+2.20.1
+
diff --git a/queue-4.14/nbd-fix-max-number-of-supported-devs.patch b/queue-4.14/nbd-fix-max-number-of-supported-devs.patch
new file mode 100644 (file)
index 0000000..b06d4fc
--- /dev/null
@@ -0,0 +1,163 @@
+From 7208a5ce0e992ed339e66d4ce5194b1b945d2df7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 4 Aug 2019 14:10:06 -0500
+Subject: nbd: fix max number of supported devs
+
+From: Mike Christie <mchristi@redhat.com>
+
+[ Upstream commit e9e006f5fcf2bab59149cb38a48a4817c1b538b4 ]
+
+This fixes a bug added in 4.10 with commit:
+
+commit 9561a7ade0c205bc2ee035a2ac880478dcc1a024
+Author: Josef Bacik <jbacik@fb.com>
+Date:   Tue Nov 22 14:04:40 2016 -0500
+
+    nbd: add multi-connection support
+
+that limited the number of devices to 256. Before the patch we could
+create 1000s of devices, but the patch switched us from using our
+own thread to using a work queue which has a default limit of 256
+active works.
+
+The problem is that our recv_work function sits in a loop until
+disconnection but only handles IO for one connection. The work is
+started when the connection is started/restarted, but if we end up
+creating 257 or more connections, the queue_work call just queues
+connection257+'s recv_work and that waits for connection 1 - 256's
+recv_work to be disconnected and that work instance completing.
+
+Instead of reverting back to kthreads, this has us allocate a
+workqueue_struct per device, so we can block in the work.
+
+Cc: stable@vger.kernel.org
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Mike Christie <mchristi@redhat.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/block/nbd.c | 39 +++++++++++++++++++++++++--------------
+ 1 file changed, 25 insertions(+), 14 deletions(-)
+
+diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
+index 14b491c5cf7b6..a234600849558 100644
+--- a/drivers/block/nbd.c
++++ b/drivers/block/nbd.c
+@@ -106,6 +106,7 @@ struct nbd_device {
+       struct nbd_config *config;
+       struct mutex config_lock;
+       struct gendisk *disk;
++      struct workqueue_struct *recv_workq;
+       struct list_head list;
+       struct task_struct *task_recv;
+@@ -136,7 +137,6 @@ static struct dentry *nbd_dbg_dir;
+ static unsigned int nbds_max = 16;
+ static int max_part = 16;
+-static struct workqueue_struct *recv_workqueue;
+ static int part_shift;
+ static int nbd_dev_dbg_init(struct nbd_device *nbd);
+@@ -1015,7 +1015,7 @@ static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg)
+               /* We take the tx_mutex in an error path in the recv_work, so we
+                * need to queue_work outside of the tx_mutex.
+                */
+-              queue_work(recv_workqueue, &args->work);
++              queue_work(nbd->recv_workq, &args->work);
+               atomic_inc(&config->live_connections);
+               wake_up(&config->conn_wait);
+@@ -1120,6 +1120,10 @@ static void nbd_config_put(struct nbd_device *nbd)
+               kfree(nbd->config);
+               nbd->config = NULL;
++              if (nbd->recv_workq)
++                      destroy_workqueue(nbd->recv_workq);
++              nbd->recv_workq = NULL;
++
+               nbd->tag_set.timeout = 0;
+               queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
+@@ -1145,6 +1149,14 @@ static int nbd_start_device(struct nbd_device *nbd)
+               return -EINVAL;
+       }
++      nbd->recv_workq = alloc_workqueue("knbd%d-recv",
++                                        WQ_MEM_RECLAIM | WQ_HIGHPRI |
++                                        WQ_UNBOUND, 0, nbd->index);
++      if (!nbd->recv_workq) {
++              dev_err(disk_to_dev(nbd->disk), "Could not allocate knbd recv work queue.\n");
++              return -ENOMEM;
++      }
++
+       blk_mq_update_nr_hw_queues(&nbd->tag_set, config->num_connections);
+       nbd->task_recv = current;
+@@ -1175,7 +1187,7 @@ static int nbd_start_device(struct nbd_device *nbd)
+               INIT_WORK(&args->work, recv_work);
+               args->nbd = nbd;
+               args->index = i;
+-              queue_work(recv_workqueue, &args->work);
++              queue_work(nbd->recv_workq, &args->work);
+       }
+       nbd_size_update(nbd);
+       return error;
+@@ -1195,8 +1207,10 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b
+       mutex_unlock(&nbd->config_lock);
+       ret = wait_event_interruptible(config->recv_wq,
+                                        atomic_read(&config->recv_threads) == 0);
+-      if (ret)
++      if (ret) {
+               sock_shutdown(nbd);
++              flush_workqueue(nbd->recv_workq);
++      }
+       mutex_lock(&nbd->config_lock);
+       bd_set_size(bdev, 0);
+       /* user requested, ignore socket errors */
+@@ -1836,6 +1850,12 @@ static void nbd_disconnect_and_put(struct nbd_device *nbd)
+       mutex_lock(&nbd->config_lock);
+       nbd_disconnect(nbd);
+       mutex_unlock(&nbd->config_lock);
++      /*
++       * Make sure recv thread has finished, so it does not drop the last
++       * config ref and try to destroy the workqueue from inside the work
++       * queue.
++       */
++      flush_workqueue(nbd->recv_workq);
+       if (test_and_clear_bit(NBD_HAS_CONFIG_REF,
+                              &nbd->config->runtime_flags))
+               nbd_config_put(nbd);
+@@ -2216,20 +2236,12 @@ static int __init nbd_init(void)
+       if (nbds_max > 1UL << (MINORBITS - part_shift))
+               return -EINVAL;
+-      recv_workqueue = alloc_workqueue("knbd-recv",
+-                                       WQ_MEM_RECLAIM | WQ_HIGHPRI |
+-                                       WQ_UNBOUND, 0);
+-      if (!recv_workqueue)
+-              return -ENOMEM;
+-      if (register_blkdev(NBD_MAJOR, "nbd")) {
+-              destroy_workqueue(recv_workqueue);
++      if (register_blkdev(NBD_MAJOR, "nbd"))
+               return -EIO;
+-      }
+       if (genl_register_family(&nbd_genl_family)) {
+               unregister_blkdev(NBD_MAJOR, "nbd");
+-              destroy_workqueue(recv_workqueue);
+               return -EINVAL;
+       }
+       nbd_dbg_init();
+@@ -2271,7 +2283,6 @@ static void __exit nbd_cleanup(void)
+       idr_destroy(&nbd_index_idr);
+       genl_unregister_family(&nbd_genl_family);
+-      destroy_workqueue(recv_workqueue);
+       unregister_blkdev(NBD_MAJOR, "nbd");
+ }
+-- 
+2.20.1
+
diff --git a/queue-4.14/netfilter-nf_tables-allow-lookups-in-dynamic-sets.patch b/queue-4.14/netfilter-nf_tables-allow-lookups-in-dynamic-sets.patch
new file mode 100644 (file)
index 0000000..98b1037
--- /dev/null
@@ -0,0 +1,107 @@
+From 7d77324e0bcf4f616cfea96435b435961057ab93 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 19 Sep 2019 16:56:44 +0200
+Subject: netfilter: nf_tables: allow lookups in dynamic sets
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit acab713177377d9e0889c46bac7ff0cfb9a90c4d ]
+
+This un-breaks lookups in sets that have the 'dynamic' flag set.
+Given this active example configuration:
+
+table filter {
+  set set1 {
+    type ipv4_addr
+    size 64
+    flags dynamic,timeout
+    timeout 1m
+  }
+
+  chain input {
+     type filter hook input priority 0; policy accept;
+  }
+}
+
+... this works:
+nft add rule ip filter input add @set1 { ip saddr }
+
+-> whenever rule is triggered, the source ip address is inserted
+into the set (if it did not exist).
+
+This won't work:
+nft add rule ip filter input ip saddr @set1 counter
+Error: Could not process rule: Operation not supported
+
+In other words, we can add entries to the set, but then can't make
+matching decision based on that set.
+
+That is just wrong -- all set backends support lookups (else they would
+not be very useful).
+The failure comes from an explicit rejection in nft_lookup.c.
+
+Looking at the history, it seems like NFT_SET_EVAL used to mean
+'set contains expressions' (aka. "is a meter"), for instance something like
+
+ nft add rule ip filter input meter example { ip saddr limit rate 10/second }
+ or
+ nft add rule ip filter input meter example { ip saddr counter }
+
+The actual meaning of NFT_SET_EVAL however, is
+'set can be updated from the packet path'.
+
+'meters' and packet-path insertions into sets, such as
+'add @set { ip saddr }' use exactly the same kernel code (nft_dynset.c)
+and thus require a set backend that provides the ->update() function.
+
+The only set that provides this also is the only one that has the
+NFT_SET_EVAL feature flag.
+
+Removing the wrong check makes the above example work.
+While at it, also fix the flag check during set instantiation to
+allow supported combinations only.
+
+Fixes: 8aeff920dcc9b3f ("netfilter: nf_tables: add stateful object reference to set elements")
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_tables_api.c | 7 +++++--
+ net/netfilter/nft_lookup.c    | 3 ---
+ 2 files changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index b149a72190846..7ef126489d4ed 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -3131,8 +3131,11 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
+                             NFT_SET_OBJECT))
+                       return -EINVAL;
+               /* Only one of these operations is supported */
+-              if ((flags & (NFT_SET_MAP | NFT_SET_EVAL | NFT_SET_OBJECT)) ==
+-                           (NFT_SET_MAP | NFT_SET_EVAL | NFT_SET_OBJECT))
++              if ((flags & (NFT_SET_MAP | NFT_SET_OBJECT)) ==
++                           (NFT_SET_MAP | NFT_SET_OBJECT))
++                      return -EOPNOTSUPP;
++              if ((flags & (NFT_SET_EVAL | NFT_SET_OBJECT)) ==
++                           (NFT_SET_EVAL | NFT_SET_OBJECT))
+                       return -EOPNOTSUPP;
+       }
+diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
+index 475570e89ede7..44015a151ad69 100644
+--- a/net/netfilter/nft_lookup.c
++++ b/net/netfilter/nft_lookup.c
+@@ -76,9 +76,6 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
+       if (IS_ERR(set))
+               return PTR_ERR(set);
+-      if (set->flags & NFT_SET_EVAL)
+-              return -EOPNOTSUPP;
+-
+       priv->sreg = nft_parse_register(tb[NFTA_LOOKUP_SREG]);
+       err = nft_validate_register_load(priv->sreg, set->klen);
+       if (err < 0)
+-- 
+2.20.1
+
diff --git a/queue-4.14/perf-build-add-detection-of-java-11-openjdk-devel-pa.patch b/queue-4.14/perf-build-add-detection-of-java-11-openjdk-devel-pa.patch
new file mode 100644 (file)
index 0000000..4636e10
--- /dev/null
@@ -0,0 +1,62 @@
+From fa3565062a568d46b913585901246c2049aafc61 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 9 Sep 2019 13:41:16 +0200
+Subject: perf build: Add detection of java-11-openjdk-devel package
+
+From: Thomas Richter <tmricht@linux.ibm.com>
+
+[ Upstream commit 815c1560bf8fd522b8d93a1d727868b910c1cc24 ]
+
+With Java 11 there is no seperate JRE anymore.
+
+Details:
+
+  https://coderanch.com/t/701603/java/JRE-JDK
+
+Therefore the detection of the JRE needs to be adapted.
+
+This change works for s390 and x86.  I have not tested other platforms.
+
+Committer testing:
+
+Continues to work with the OpenJDK 8:
+
+  $ rm -f ~acme/lib64/libperf-jvmti.so
+  $ rpm -qa | grep jdk-devel
+  java-1.8.0-openjdk-devel-1.8.0.222.b10-0.fc30.x86_64
+  $ git log --oneline -1
+  a51937170f33 (HEAD -> perf/core) perf build: Add detection of java-11-openjdk-devel package
+  $ rm -rf /tmp/build/perf ; mkdir -p /tmp/build/perf ; make -C tools/perf O=/tmp/build/perf install > /dev/null 2>1
+  $ ls -la ~acme/lib64/libperf-jvmti.so
+  -rwxr-xr-x. 1 acme acme 230744 Sep 24 16:46 /home/acme/lib64/libperf-jvmti.so
+  $
+
+Suggested-by: Andreas Krebbel <krebbel@linux.ibm.com>
+Signed-off-by: Thomas Richter <tmricht@linux.ibm.com>
+Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
+Cc: Hendrik Brueckner <brueckner@linux.ibm.com>
+Cc: Vasily Gorbik <gor@linux.ibm.com>
+Link: http://lore.kernel.org/lkml/20190909114116.50469-4-tmricht@linux.ibm.com
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/Makefile.config | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
+index f362ee46506ad..b97e31498ff76 100644
+--- a/tools/perf/Makefile.config
++++ b/tools/perf/Makefile.config
+@@ -795,7 +795,7 @@ ifndef NO_JVMTI
+     JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | awk '{print $$3}')
+   else
+     ifneq (,$(wildcard /usr/sbin/alternatives))
+-      JDIR=$(shell /usr/sbin/alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g')
++      JDIR=$(shell /usr/sbin/alternatives --display java | tail -1 | cut -d' ' -f 5 | sed -e 's%/jre/bin/java.%%g' -e 's%/bin/java.%%g')
+     endif
+   endif
+   ifndef JDIR
+-- 
+2.20.1
+
diff --git a/queue-4.14/perf-stat-fix-a-segmentation-fault-when-using-repeat.patch b/queue-4.14/perf-stat-fix-a-segmentation-fault-when-using-repeat.patch
new file mode 100644 (file)
index 0000000..0b8d662
--- /dev/null
@@ -0,0 +1,108 @@
+From 20132c0c5ade1135dbf2fab094bd82e512a6029c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 4 Sep 2019 15:17:38 +0530
+Subject: perf stat: Fix a segmentation fault when using repeat forever
+
+From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
+
+[ Upstream commit 443f2d5ba13d65ccfd879460f77941875159d154 ]
+
+Observe a segmentation fault when 'perf stat' is asked to repeat forever
+with the interval option.
+
+Without fix:
+
+  # perf stat -r 0 -I 5000 -e cycles -a sleep 10
+  #           time             counts unit events
+       5.000211692  3,13,89,82,34,157      cycles
+      10.000380119  1,53,98,52,22,294      cycles
+      10.040467280       17,16,79,265      cycles
+  Segmentation fault
+
+This problem was only observed when we use forever option aka -r 0 and
+works with limited repeats. Calling print_counter with ts being set to
+NULL, is not a correct option when interval is set. Hence avoid
+print_counter(NULL,..)  if interval is set.
+
+With fix:
+
+  # perf stat -r 0 -I 5000 -e cycles -a sleep 10
+   #           time             counts unit events
+       5.019866622  3,15,14,43,08,697      cycles
+      10.039865756  3,15,16,31,95,261      cycles
+      10.059950628     1,26,05,47,158      cycles
+       5.009902655  3,14,52,62,33,932      cycles
+      10.019880228  3,14,52,22,89,154      cycles
+      10.030543876       66,90,18,333      cycles
+       5.009848281  3,14,51,98,25,437      cycles
+      10.029854402  3,15,14,93,04,918      cycles
+       5.009834177  3,14,51,95,92,316      cycles
+
+Committer notes:
+
+Did the 'git bisect' to find the cset introducing the problem to add the
+Fixes tag below, and at that time the problem reproduced as:
+
+  (gdb) run stat -r0 -I500 sleep 1
+  <SNIP>
+  Program received signal SIGSEGV, Segmentation fault.
+  print_interval (prefix=prefix@entry=0x7fffffffc8d0 "", ts=ts@entry=0x0) at builtin-stat.c:866
+  866          sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep);
+  (gdb) bt
+  #0  print_interval (prefix=prefix@entry=0x7fffffffc8d0 "", ts=ts@entry=0x0) at builtin-stat.c:866
+  #1  0x000000000041860a in print_counters (ts=ts@entry=0x0, argc=argc@entry=2, argv=argv@entry=0x7fffffffd640) at builtin-stat.c:938
+  #2  0x0000000000419a7f in cmd_stat (argc=2, argv=0x7fffffffd640, prefix=<optimized out>) at builtin-stat.c:1411
+  #3  0x000000000045c65a in run_builtin (p=p@entry=0x6291b8 <commands+216>, argc=argc@entry=5, argv=argv@entry=0x7fffffffd640) at perf.c:370
+  #4  0x000000000045c893 in handle_internal_command (argc=5, argv=0x7fffffffd640) at perf.c:429
+  #5  0x000000000045c8f1 in run_argv (argcp=argcp@entry=0x7fffffffd4ac, argv=argv@entry=0x7fffffffd4a0) at perf.c:473
+  #6  0x000000000045cac9 in main (argc=<optimized out>, argv=<optimized out>) at perf.c:588
+  (gdb)
+
+Mostly the same as just before this patch:
+
+  Program received signal SIGSEGV, Segmentation fault.
+  0x00000000005874a7 in print_interval (config=0xa1f2a0 <stat_config>, evlist=0xbc9b90, prefix=0x7fffffffd1c0 "`", ts=0x0) at util/stat-display.c:964
+  964          sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, config->csv_sep);
+  (gdb) bt
+  #0  0x00000000005874a7 in print_interval (config=0xa1f2a0 <stat_config>, evlist=0xbc9b90, prefix=0x7fffffffd1c0 "`", ts=0x0) at util/stat-display.c:964
+  #1  0x0000000000588047 in perf_evlist__print_counters (evlist=0xbc9b90, config=0xa1f2a0 <stat_config>, _target=0xa1f0c0 <target>, ts=0x0, argc=2, argv=0x7fffffffd670)
+      at util/stat-display.c:1172
+  #2  0x000000000045390f in print_counters (ts=0x0, argc=2, argv=0x7fffffffd670) at builtin-stat.c:656
+  #3  0x0000000000456bb5 in cmd_stat (argc=2, argv=0x7fffffffd670) at builtin-stat.c:1960
+  #4  0x00000000004dd2e0 in run_builtin (p=0xa30e00 <commands+288>, argc=5, argv=0x7fffffffd670) at perf.c:310
+  #5  0x00000000004dd54d in handle_internal_command (argc=5, argv=0x7fffffffd670) at perf.c:362
+  #6  0x00000000004dd694 in run_argv (argcp=0x7fffffffd4cc, argv=0x7fffffffd4c0) at perf.c:406
+  #7  0x00000000004dda11 in main (argc=5, argv=0x7fffffffd670) at perf.c:531
+  (gdb)
+
+Fixes: d4f63a4741a8 ("perf stat: Introduce print_counters function")
+Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
+Acked-by: Jiri Olsa <jolsa@kernel.org>
+Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Tested-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
+Cc: Namhyung Kim <namhyung@kernel.org>
+Cc: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
+Cc: stable@vger.kernel.org # v4.2+
+Link: http://lore.kernel.org/lkml/20190904094738.9558-3-srikar@linux.vnet.ibm.com
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/builtin-stat.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
+index 52486c90ab936..b6c1c9939c2f1 100644
+--- a/tools/perf/builtin-stat.c
++++ b/tools/perf/builtin-stat.c
+@@ -2770,7 +2770,7 @@ int cmd_stat(int argc, const char **argv)
+                               run_idx + 1);
+               status = run_perf_stat(argc, argv);
+-              if (forever && status != -1) {
++              if (forever && status != -1 && !interval) {
+                       print_counters(NULL, argc, argv);
+                       perf_stat__reset_stats();
+               }
+-- 
+2.20.1
+
diff --git a/queue-4.14/perf-stat-reset-previous-counts-on-repeat-with-inter.patch b/queue-4.14/perf-stat-reset-previous-counts-on-repeat-with-inter.patch
new file mode 100644 (file)
index 0000000..a696f1e
--- /dev/null
@@ -0,0 +1,168 @@
+From 3c519160ce059fbf89152de799f8b173e5c0ecbf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 4 Sep 2019 15:17:37 +0530
+Subject: perf stat: Reset previous counts on repeat with interval
+
+From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
+
+[ Upstream commit b63fd11cced17fcb8e133def29001b0f6aaa5e06 ]
+
+When using 'perf stat' with repeat and interval option, it shows wrong
+values for events.
+
+The wrong values will be shown for the first interval on the second and
+subsequent repetitions.
+
+Without the fix:
+
+  # perf stat -r 3 -I 2000 -e faults -e sched:sched_switch -a sleep 5
+
+     2.000282489                 53      faults
+     2.000282489                513      sched:sched_switch
+     4.005478208              3,721      faults
+     4.005478208              2,666      sched:sched_switch
+     5.025470933                395      faults
+     5.025470933              1,307      sched:sched_switch
+     2.009602825 1,84,46,74,40,73,70,95,47,520      faults             <------
+     2.009602825 1,84,46,74,40,73,70,95,49,568      sched:sched_switch  <------
+     4.019612206              4,730      faults
+     4.019612206              2,746      sched:sched_switch
+     5.039615484              3,953      faults
+     5.039615484              1,496      sched:sched_switch
+     2.000274620 1,84,46,74,40,73,70,95,47,520      faults             <------
+     2.000274620 1,84,46,74,40,73,70,95,47,520      sched:sched_switch <------
+     4.000480342              4,282      faults
+     4.000480342              2,303      sched:sched_switch
+     5.000916811              1,322      faults
+     5.000916811              1,064      sched:sched_switch
+  #
+
+prev_raw_counts is allocated when using intervals. This is used when
+calculating the difference in the counts of events when using interval.
+
+The current counts are stored in prev_raw_counts to calculate the
+differences in the next iteration.
+
+On the first interval of the second and subsequent repetitions,
+prev_raw_counts would be the values stored in the last interval of the
+previous repetitions, while the current counts will only be for the
+first interval of the current repetition.
+
+Hence there is a possibility of events showing up as big number.
+
+Fix this by resetting prev_raw_counts whenever perf stat repeats the
+command.
+
+With the fix:
+
+  # perf stat -r 3 -I 2000 -e faults -e sched:sched_switch -a sleep 5
+
+     2.019349347              2,597      faults
+     2.019349347              2,753      sched:sched_switch
+     4.019577372              3,098      faults
+     4.019577372              2,532      sched:sched_switch
+     5.019415481              1,879      faults
+     5.019415481              1,356      sched:sched_switch
+     2.000178813              8,468      faults
+     2.000178813              2,254      sched:sched_switch
+     4.000404621              7,440      faults
+     4.000404621              1,266      sched:sched_switch
+     5.040196079              2,458      faults
+     5.040196079                556      sched:sched_switch
+     2.000191939              6,870      faults
+     2.000191939              1,170      sched:sched_switch
+     4.000414103                541      faults
+     4.000414103                902      sched:sched_switch
+     5.000809863                450      faults
+     5.000809863                364      sched:sched_switch
+  #
+
+Committer notes:
+
+This was broken since the cset introducing the --interval feature, i.e.
+--repeat + --interval wasn't tested at that point, add the Fixes tag so
+that automatic scripts can pick this up.
+
+Fixes: 13370a9b5bb8 ("perf stat: Add interval printing")
+Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
+Acked-by: Jiri Olsa <jolsa@kernel.org>
+Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Tested-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
+Cc: Namhyung Kim <namhyung@kernel.org>
+Cc: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
+Cc: Stephane Eranian <eranian@google.com>
+Cc: stable@vger.kernel.org # v3.9+
+Link: http://lore.kernel.org/lkml/20190904094738.9558-2-srikar@linux.vnet.ibm.com
+[ Fixed up conflicts with libperf, i.e. some perf_{evsel,evlist} lost the 'perf' prefix ]
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/builtin-stat.c |  3 +++
+ tools/perf/util/stat.c    | 17 +++++++++++++++++
+ tools/perf/util/stat.h    |  1 +
+ 3 files changed, 21 insertions(+)
+
+diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
+index b6c1c9939c2f1..0801e0ffba4ae 100644
+--- a/tools/perf/builtin-stat.c
++++ b/tools/perf/builtin-stat.c
+@@ -2769,6 +2769,9 @@ int cmd_stat(int argc, const char **argv)
+                       fprintf(output, "[ perf stat: executing run #%d ... ]\n",
+                               run_idx + 1);
++              if (run_idx != 0)
++                      perf_evlist__reset_prev_raw_counts(evsel_list);
++
+               status = run_perf_stat(argc, argv);
+               if (forever && status != -1 && !interval) {
+                       print_counters(NULL, argc, argv);
+diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
+index c9bae5fb8b479..d028c2786802e 100644
+--- a/tools/perf/util/stat.c
++++ b/tools/perf/util/stat.c
+@@ -154,6 +154,15 @@ static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel)
+       evsel->prev_raw_counts = NULL;
+ }
++static void perf_evsel__reset_prev_raw_counts(struct perf_evsel *evsel)
++{
++      if (evsel->prev_raw_counts) {
++              evsel->prev_raw_counts->aggr.val = 0;
++              evsel->prev_raw_counts->aggr.ena = 0;
++              evsel->prev_raw_counts->aggr.run = 0;
++       }
++}
++
+ static int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw)
+ {
+       int ncpus = perf_evsel__nr_cpus(evsel);
+@@ -204,6 +213,14 @@ void perf_evlist__reset_stats(struct perf_evlist *evlist)
+       }
+ }
++void perf_evlist__reset_prev_raw_counts(struct perf_evlist *evlist)
++{
++      struct perf_evsel *evsel;
++
++      evlist__for_each_entry(evlist, evsel)
++              perf_evsel__reset_prev_raw_counts(evsel);
++}
++
+ static void zero_per_pkg(struct perf_evsel *counter)
+ {
+       if (counter->per_pkg_mask)
+diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
+index 96326b1f94438..bdfbed8e2df28 100644
+--- a/tools/perf/util/stat.h
++++ b/tools/perf/util/stat.h
+@@ -100,6 +100,7 @@ void perf_stat__collect_metric_expr(struct perf_evlist *);
+ int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw);
+ void perf_evlist__free_stats(struct perf_evlist *evlist);
+ void perf_evlist__reset_stats(struct perf_evlist *evlist);
++void perf_evlist__reset_prev_raw_counts(struct perf_evlist *evlist);
+ int perf_stat_process_counter(struct perf_stat_config *config,
+                             struct perf_evsel *counter);
+-- 
+2.20.1
+
diff --git a/queue-4.14/perf-tools-fix-segfault-in-cpu_cache_level__read.patch b/queue-4.14/perf-tools-fix-segfault-in-cpu_cache_level__read.patch
new file mode 100644 (file)
index 0000000..f2fa23b
--- /dev/null
@@ -0,0 +1,63 @@
+From 8902bc1fb81aa7b694fba14590f0d893c5efb32a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 12 Sep 2019 12:52:35 +0200
+Subject: perf tools: Fix segfault in cpu_cache_level__read()
+
+From: Jiri Olsa <jolsa@kernel.org>
+
+[ Upstream commit 0216234c2eed1367a318daeb9f4a97d8217412a0 ]
+
+We release wrong pointer on error path in cpu_cache_level__read
+function, leading to segfault:
+
+  (gdb) r record ls
+  Starting program: /root/perf/tools/perf/perf record ls
+  ...
+  [ perf record: Woken up 1 times to write data ]
+  double free or corruption (out)
+
+  Thread 1 "perf" received signal SIGABRT, Aborted.
+  0x00007ffff7463798 in raise () from /lib64/power9/libc.so.6
+  (gdb) bt
+  #0  0x00007ffff7463798 in raise () from /lib64/power9/libc.so.6
+  #1  0x00007ffff7443bac in abort () from /lib64/power9/libc.so.6
+  #2  0x00007ffff74af8bc in __libc_message () from /lib64/power9/libc.so.6
+  #3  0x00007ffff74b92b8 in malloc_printerr () from /lib64/power9/libc.so.6
+  #4  0x00007ffff74bb874 in _int_free () from /lib64/power9/libc.so.6
+  #5  0x0000000010271260 in __zfree (ptr=0x7fffffffa0b0) at ../../lib/zalloc..
+  #6  0x0000000010139340 in cpu_cache_level__read (cache=0x7fffffffa090, cac..
+  #7  0x0000000010143c90 in build_caches (cntp=0x7fffffffa118, size=<optimiz..
+  ...
+
+Releasing the proper pointer.
+
+Fixes: 720e98b5faf1 ("perf tools: Add perf data cache feature")
+Signed-off-by: Jiri Olsa <jolsa@kernel.org>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: Michael Petlan <mpetlan@redhat.com>
+Cc: Namhyung Kim <namhyung@kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: stable@vger.kernel.org: # v4.6+
+Link: http://lore.kernel.org/lkml/20190912105235.10689-1-jolsa@kernel.org
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/util/header.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
+index e1fe446f65daa..c892bb4f26c31 100644
+--- a/tools/perf/util/header.c
++++ b/tools/perf/util/header.c
+@@ -1063,7 +1063,7 @@ static int cpu_cache_level__read(struct cpu_cache_level *cache, u32 cpu, u16 lev
+       scnprintf(file, PATH_MAX, "%s/shared_cpu_list", path);
+       if (sysfs__read_str(file, &cache->map, &len)) {
+-              free(cache->map);
++              free(cache->size);
+               free(cache->type);
+               return -1;
+       }
+-- 
+2.20.1
+
diff --git a/queue-4.14/perf-unwind-fix-libunwind-build-failure-on-i386-syst.patch b/queue-4.14/perf-unwind-fix-libunwind-build-failure-on-i386-syst.patch
new file mode 100644 (file)
index 0000000..0b0a135
--- /dev/null
@@ -0,0 +1,51 @@
+From 055b8cc0e20deba0cf1a12fe581b9c202c16fbe6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 26 Sep 2019 14:36:48 -0300
+Subject: perf unwind: Fix libunwind build failure on i386 systems
+
+From: Arnaldo Carvalho de Melo <acme@redhat.com>
+
+[ Upstream commit 26acf400d2dcc72c7e713e1f55db47ad92010cc2 ]
+
+Naresh Kamboju reported, that on the i386 build pr_err()
+doesn't get defined properly due to header ordering:
+
+  perf-in.o: In function `libunwind__x86_reg_id':
+  tools/perf/util/libunwind/../../arch/x86/util/unwind-libunwind.c:109:
+  undefined reference to `pr_err'
+
+Reported-by: Naresh Kamboju <naresh.kamboju@linaro.org>
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Cc: David Ahern <dsahern@gmail.com>
+Cc: Jiri Olsa <jolsa@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Namhyung Kim <namhyung@kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/arch/x86/util/unwind-libunwind.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tools/perf/arch/x86/util/unwind-libunwind.c b/tools/perf/arch/x86/util/unwind-libunwind.c
+index 05920e3edf7a7..47357973b55b2 100644
+--- a/tools/perf/arch/x86/util/unwind-libunwind.c
++++ b/tools/perf/arch/x86/util/unwind-libunwind.c
+@@ -1,11 +1,11 @@
+ // SPDX-License-Identifier: GPL-2.0
+ #include <errno.h>
++#include "../../util/debug.h"
+ #ifndef REMOTE_UNWIND_LIBUNWIND
+ #include <libunwind.h>
+ #include "perf_regs.h"
+ #include "../../util/unwind.h"
+-#include "../../util/debug.h"
+ #endif
+ #ifdef HAVE_ARCH_X86_64_SUPPORT
+-- 
+2.20.1
+
diff --git a/queue-4.14/pnfs-ensure-we-do-clear-the-return-on-close-layout-s.patch b/queue-4.14/pnfs-ensure-we-do-clear-the-return-on-close-layout-s.patch
new file mode 100644 (file)
index 0000000..b739682
--- /dev/null
@@ -0,0 +1,48 @@
+From 01e1fa662c38b168c522c19dead65b629e89cc11 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 20 Sep 2019 07:23:40 -0400
+Subject: pNFS: Ensure we do clear the return-on-close layout stateid on fatal
+ errors
+
+From: Trond Myklebust <trondmy@gmail.com>
+
+[ Upstream commit 9c47b18cf722184f32148784189fca945a7d0561 ]
+
+IF the server rejected our layout return with a state error such as
+NFS4ERR_BAD_STATEID, or even a stale inode error, then we do want
+to clear out all the remaining layout segments and mark that stateid
+as invalid.
+
+Fixes: 1c5bd76d17cca ("pNFS: Enable layoutreturn operation for...")
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfs/pnfs.c | 9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
+index 96867fb159bf7..ec04cce31814b 100644
+--- a/fs/nfs/pnfs.c
++++ b/fs/nfs/pnfs.c
+@@ -1319,10 +1319,15 @@ void pnfs_roc_release(struct nfs4_layoutreturn_args *args,
+       const nfs4_stateid *res_stateid = NULL;
+       struct nfs4_xdr_opaque_data *ld_private = args->ld_private;
+-      if (ret == 0) {
+-              arg_stateid = &args->stateid;
++      switch (ret) {
++      case -NFS4ERR_NOMATCHING_LAYOUT:
++              break;
++      case 0:
+               if (res->lrs_present)
+                       res_stateid = &res->stateid;
++              /* Fallthrough */
++      default:
++              arg_stateid = &args->stateid;
+       }
+       pnfs_layoutreturn_free_lsegs(lo, arg_stateid, &args->range,
+                       res_stateid);
+-- 
+2.20.1
+
diff --git a/queue-4.14/powerpc-pseries-fix-cpu_hotplug_lock-acquisition-in-.patch b/queue-4.14/powerpc-pseries-fix-cpu_hotplug_lock-acquisition-in-.patch
new file mode 100644 (file)
index 0000000..8a3cb70
--- /dev/null
@@ -0,0 +1,176 @@
+From d3624668bbdb89b47fe8d2722ff73e3d59d7e390 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 May 2019 13:15:52 +0530
+Subject: powerpc/pseries: Fix cpu_hotplug_lock acquisition in resize_hpt()
+
+From: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
+
+[ Upstream commit c784be435d5dae28d3b03db31753dd7a18733f0c ]
+
+The calls to arch_add_memory()/arch_remove_memory() are always made
+with the read-side cpu_hotplug_lock acquired via memory_hotplug_begin().
+On pSeries, arch_add_memory()/arch_remove_memory() eventually call
+resize_hpt() which in turn calls stop_machine() which acquires the
+read-side cpu_hotplug_lock again, thereby resulting in the recursive
+acquisition of this lock.
+
+In the absence of CONFIG_PROVE_LOCKING, we hadn't observed a system
+lockup during a memory hotplug operation because cpus_read_lock() is a
+per-cpu rwsem read, which, in the fast-path (in the absence of the
+writer, which in our case is a CPU-hotplug operation) simply
+increments the read_count on the semaphore. Thus a recursive read in
+the fast-path doesn't cause any problems.
+
+However, we can hit this problem in practice if there is a concurrent
+CPU-Hotplug operation in progress which is waiting to acquire the
+write-side of the lock. This will cause the second recursive read to
+block until the writer finishes. While the writer is blocked since the
+first read holds the lock. Thus both the reader as well as the writers
+fail to make any progress thereby blocking both CPU-Hotplug as well as
+Memory Hotplug operations.
+
+Memory-Hotplug                         CPU-Hotplug
+CPU 0                                  CPU 1
+------                                  ------
+
+1. down_read(cpu_hotplug_lock.rw_sem)
+   [memory_hotplug_begin]
+                                       2. down_write(cpu_hotplug_lock.rw_sem)
+                                       [cpu_up/cpu_down]
+3. down_read(cpu_hotplug_lock.rw_sem)
+   [stop_machine()]
+
+Lockdep complains as follows in these code-paths.
+
+ swapper/0/1 is trying to acquire lock:
+ (____ptrval____) (cpu_hotplug_lock.rw_sem){++++}, at: stop_machine+0x2c/0x60
+
+but task is already holding lock:
+(____ptrval____) (cpu_hotplug_lock.rw_sem){++++}, at: mem_hotplug_begin+0x20/0x50
+
+ other info that might help us debug this:
+  Possible unsafe locking scenario:
+
+        CPU0
+        ----
+   lock(cpu_hotplug_lock.rw_sem);
+   lock(cpu_hotplug_lock.rw_sem);
+
+  *** DEADLOCK ***
+
+  May be due to missing lock nesting notation
+
+ 3 locks held by swapper/0/1:
+  #0: (____ptrval____) (&dev->mutex){....}, at: __driver_attach+0x12c/0x1b0
+  #1: (____ptrval____) (cpu_hotplug_lock.rw_sem){++++}, at: mem_hotplug_begin+0x20/0x50
+  #2: (____ptrval____) (mem_hotplug_lock.rw_sem){++++}, at: percpu_down_write+0x54/0x1a0
+
+stack backtrace:
+ CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.0.0-rc5-58373-gbc99402235f3-dirty #166
+ Call Trace:
+   dump_stack+0xe8/0x164 (unreliable)
+   __lock_acquire+0x1110/0x1c70
+   lock_acquire+0x240/0x290
+   cpus_read_lock+0x64/0xf0
+   stop_machine+0x2c/0x60
+   pseries_lpar_resize_hpt+0x19c/0x2c0
+   resize_hpt_for_hotplug+0x70/0xd0
+   arch_add_memory+0x58/0xfc
+   devm_memremap_pages+0x5e8/0x8f0
+   pmem_attach_disk+0x764/0x830
+   nvdimm_bus_probe+0x118/0x240
+   really_probe+0x230/0x4b0
+   driver_probe_device+0x16c/0x1e0
+   __driver_attach+0x148/0x1b0
+   bus_for_each_dev+0x90/0x130
+   driver_attach+0x34/0x50
+   bus_add_driver+0x1a8/0x360
+   driver_register+0x108/0x170
+   __nd_driver_register+0xd0/0xf0
+   nd_pmem_driver_init+0x34/0x48
+   do_one_initcall+0x1e0/0x45c
+   kernel_init_freeable+0x540/0x64c
+   kernel_init+0x2c/0x160
+   ret_from_kernel_thread+0x5c/0x68
+
+Fix this issue by
+  1) Requiring all the calls to pseries_lpar_resize_hpt() be made
+     with cpu_hotplug_lock held.
+
+  2) In pseries_lpar_resize_hpt() invoke stop_machine_cpuslocked()
+     as a consequence of 1)
+
+  3) To satisfy 1), in hpt_order_set(), call mmu_hash_ops.resize_hpt()
+     with cpu_hotplug_lock held.
+
+Fixes: dbcf929c0062 ("powerpc/pseries: Add support for hash table resizing")
+Cc: stable@vger.kernel.org # v4.11+
+Reported-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/1557906352-29048-1-git-send-email-ego@linux.vnet.ibm.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/powerpc/mm/hash_utils_64.c       | 9 ++++++++-
+ arch/powerpc/platforms/pseries/lpar.c | 8 ++++++--
+ 2 files changed, 14 insertions(+), 3 deletions(-)
+
+diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
+index 87687e46b48bb..58c14749bb0c1 100644
+--- a/arch/powerpc/mm/hash_utils_64.c
++++ b/arch/powerpc/mm/hash_utils_64.c
+@@ -35,6 +35,7 @@
+ #include <linux/memblock.h>
+ #include <linux/context_tracking.h>
+ #include <linux/libfdt.h>
++#include <linux/cpu.h>
+ #include <asm/debugfs.h>
+ #include <asm/processor.h>
+@@ -1852,10 +1853,16 @@ static int hpt_order_get(void *data, u64 *val)
+ static int hpt_order_set(void *data, u64 val)
+ {
++      int ret;
++
+       if (!mmu_hash_ops.resize_hpt)
+               return -ENODEV;
+-      return mmu_hash_ops.resize_hpt(val);
++      cpus_read_lock();
++      ret = mmu_hash_ops.resize_hpt(val);
++      cpus_read_unlock();
++
++      return ret;
+ }
+ DEFINE_SIMPLE_ATTRIBUTE(fops_hpt_order, hpt_order_get, hpt_order_set, "%llu\n");
+diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
+index 55e97565ed2dd..eb738ef577926 100644
+--- a/arch/powerpc/platforms/pseries/lpar.c
++++ b/arch/powerpc/platforms/pseries/lpar.c
+@@ -643,7 +643,10 @@ static int pseries_lpar_resize_hpt_commit(void *data)
+       return 0;
+ }
+-/* Must be called in user context */
++/*
++ * Must be called in process context. The caller must hold the
++ * cpus_lock.
++ */
+ static int pseries_lpar_resize_hpt(unsigned long shift)
+ {
+       struct hpt_resize_state state = {
+@@ -699,7 +702,8 @@ static int pseries_lpar_resize_hpt(unsigned long shift)
+       t1 = ktime_get();
+-      rc = stop_machine(pseries_lpar_resize_hpt_commit, &state, NULL);
++      rc = stop_machine_cpuslocked(pseries_lpar_resize_hpt_commit,
++                                   &state, NULL);
+       t2 = ktime_get();
+-- 
+2.20.1
+
diff --git a/queue-4.14/pwm-stm32-lp-add-check-in-case-requested-period-cann.patch b/queue-4.14/pwm-stm32-lp-add-check-in-case-requested-period-cann.patch
new file mode 100644 (file)
index 0000000..88d1c74
--- /dev/null
@@ -0,0 +1,49 @@
+From 83b439a68c69a2f8bc84072638076ede5b9df7e2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 18 Sep 2019 16:54:21 +0200
+Subject: pwm: stm32-lp: Add check in case requested period cannot be achieved
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Fabrice Gasnier <fabrice.gasnier@st.com>
+
+[ Upstream commit c91e3234c6035baf5a79763cb4fcd5d23ce75c2b ]
+
+LPTimer can use a 32KHz clock for counting. It depends on clock tree
+configuration. In such a case, PWM output frequency range is limited.
+Although unlikely, nothing prevents user from requesting a PWM frequency
+above counting clock (32KHz for instance):
+- This causes (prd - 1) = 0xffff to be written in ARR register later in
+the apply() routine.
+This results in badly configured PWM period (and also duty_cycle).
+Add a check to report an error is such a case.
+
+Signed-off-by: Fabrice Gasnier <fabrice.gasnier@st.com>
+Reviewed-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
+Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/pwm/pwm-stm32-lp.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/drivers/pwm/pwm-stm32-lp.c b/drivers/pwm/pwm-stm32-lp.c
+index 9793b296108ff..3f2e4ef695d75 100644
+--- a/drivers/pwm/pwm-stm32-lp.c
++++ b/drivers/pwm/pwm-stm32-lp.c
+@@ -59,6 +59,12 @@ static int stm32_pwm_lp_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+       /* Calculate the period and prescaler value */
+       div = (unsigned long long)clk_get_rate(priv->clk) * state->period;
+       do_div(div, NSEC_PER_SEC);
++      if (!div) {
++              /* Clock is too slow to achieve requested period. */
++              dev_dbg(priv->chip.dev, "Can't reach %u ns\n",  state->period);
++              return -EINVAL;
++      }
++
+       prd = div;
+       while (div > STM32_LPTIM_MAX_ARR) {
+               presc++;
+-- 
+2.20.1
+
diff --git a/queue-4.14/sched-core-fix-migration-to-invalid-cpu-in-__set_cpu.patch b/queue-4.14/sched-core-fix-migration-to-invalid-cpu-in-__set_cpu.patch
new file mode 100644 (file)
index 0000000..396ea0e
--- /dev/null
@@ -0,0 +1,85 @@
+From 86ce63e06a0a63589e5e143c211b717751914e2d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 16 Sep 2019 06:53:28 +0000
+Subject: sched/core: Fix migration to invalid CPU in __set_cpus_allowed_ptr()
+
+From: KeMeng Shi <shikemeng@huawei.com>
+
+[ Upstream commit 714e501e16cd473538b609b3e351b2cc9f7f09ed ]
+
+An oops can be triggered in the scheduler when running qemu on arm64:
+
+ Unable to handle kernel paging request at virtual address ffff000008effe40
+ Internal error: Oops: 96000007 [#1] SMP
+ Process migration/0 (pid: 12, stack limit = 0x00000000084e3736)
+ pstate: 20000085 (nzCv daIf -PAN -UAO)
+ pc : __ll_sc___cmpxchg_case_acq_4+0x4/0x20
+ lr : move_queued_task.isra.21+0x124/0x298
+ ...
+ Call trace:
+  __ll_sc___cmpxchg_case_acq_4+0x4/0x20
+  __migrate_task+0xc8/0xe0
+  migration_cpu_stop+0x170/0x180
+  cpu_stopper_thread+0xec/0x178
+  smpboot_thread_fn+0x1ac/0x1e8
+  kthread+0x134/0x138
+  ret_from_fork+0x10/0x18
+
+__set_cpus_allowed_ptr() will choose an active dest_cpu in affinity mask to
+migrage the process if process is not currently running on any one of the
+CPUs specified in affinity mask. __set_cpus_allowed_ptr() will choose an
+invalid dest_cpu (dest_cpu >= nr_cpu_ids, 1024 in my virtual machine) if
+CPUS in an affinity mask are deactived by cpu_down after cpumask_intersects
+check. cpumask_test_cpu() of dest_cpu afterwards is overflown and may pass if
+corresponding bit is coincidentally set. As a consequence, kernel will
+access an invalid rq address associate with the invalid CPU in
+migration_cpu_stop->__migrate_task->move_queued_task and the Oops occurs.
+
+The reproduce the crash:
+
+  1) A process repeatedly binds itself to cpu0 and cpu1 in turn by calling
+  sched_setaffinity.
+
+  2) A shell script repeatedly does "echo 0 > /sys/devices/system/cpu/cpu1/online"
+  and "echo 1 > /sys/devices/system/cpu/cpu1/online" in turn.
+
+  3) Oops appears if the invalid CPU is set in memory after tested cpumask.
+
+Signed-off-by: KeMeng Shi <shikemeng@huawei.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: https://lkml.kernel.org/r/1568616808-16808-1-git-send-email-shikemeng@huawei.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/core.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index 3d24d401b9d42..32ba789c544ca 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -1111,7 +1111,8 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
+       if (cpumask_equal(&p->cpus_allowed, new_mask))
+               goto out;
+-      if (!cpumask_intersects(new_mask, cpu_valid_mask)) {
++      dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
++      if (dest_cpu >= nr_cpu_ids) {
+               ret = -EINVAL;
+               goto out;
+       }
+@@ -1132,7 +1133,6 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
+       if (cpumask_test_cpu(task_cpu(p), new_mask))
+               goto out;
+-      dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
+       if (task_running(rq, p) || p->state == TASK_WAKING) {
+               struct migration_arg arg = { p, dest_cpu };
+               /* Need help from migration thread: drop lock and wait. */
+-- 
+2.20.1
+
index 16af8392f67cf8f388fac59267f2407b25b35393..969babd083cf9c56223ef7d5d8b5cb85c5b68482 100644 (file)
@@ -25,3 +25,31 @@ revert-locking-pvqspinlock-don-t-wait-if-vcpu-is-preempted.patch
 xen-xenbus-fix-self-deadlock-after-killing-user-process.patch
 ieee802154-atusb-fix-use-after-free-at-disconnect.patch
 cfg80211-initialize-on-stack-chandefs.patch
+ima-always-return-negative-code-for-error.patch
+fs-nfs-fix-possible-null-pointer-dereferences-in-enc.patch
+9p-avoid-attaching-writeback_fid-on-mmap-with-type-p.patch
+xen-pci-reserve-mcfg-areas-earlier.patch
+ceph-fix-directories-inode-i_blkbits-initialization.patch
+ceph-reconnect-connection-if-session-hang-in-opening.patch
+watchdog-aspeed-add-support-for-ast2600.patch
+netfilter-nf_tables-allow-lookups-in-dynamic-sets.patch
+drm-amdgpu-check-for-valid-number-of-registers-to-re.patch
+pnfs-ensure-we-do-clear-the-return-on-close-layout-s.patch
+pwm-stm32-lp-add-check-in-case-requested-period-cann.patch
+thermal-fix-use-after-free-when-unregistering-therma.patch
+fuse-fix-memleak-in-cuse_channel_open.patch
+sched-core-fix-migration-to-invalid-cpu-in-__set_cpu.patch
+perf-build-add-detection-of-java-11-openjdk-devel-pa.patch
+kernel-elfcore.c-include-proper-prototypes.patch
+perf-unwind-fix-libunwind-build-failure-on-i386-syst.patch
+kvm-ppc-book3s-hv-xive-free-escalation-interrupts-be.patch
+nbd-fix-crash-when-the-blksize-is-zero.patch
+block-ndb-add-wq_unbound-to-the-knbd-recv-workqueue.patch
+nbd-fix-max-number-of-supported-devs.patch
+powerpc-pseries-fix-cpu_hotplug_lock-acquisition-in-.patch
+tools-lib-traceevent-do-not-free-tep-cmdlines-in-add.patch
+tick-broadcast-hrtimer-fix-a-race-in-bc_set_next.patch
+perf-tools-fix-segfault-in-cpu_cache_level__read.patch
+perf-stat-fix-a-segmentation-fault-when-using-repeat.patch
+perf-stat-reset-previous-counts-on-repeat-with-inter.patch
+drm-i915-userptr-acquire-the-page-lock-around-set_pa.patch
diff --git a/queue-4.14/thermal-fix-use-after-free-when-unregistering-therma.patch b/queue-4.14/thermal-fix-use-after-free-when-unregistering-therma.patch
new file mode 100644 (file)
index 0000000..349dc71
--- /dev/null
@@ -0,0 +1,134 @@
+From bb8c8d7dfaf61897a792df9759cb79a298556a23 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 10 Jul 2019 13:14:52 +0300
+Subject: thermal: Fix use-after-free when unregistering thermal zone device
+
+From: Ido Schimmel <idosch@mellanox.com>
+
+[ Upstream commit 1851799e1d2978f68eea5d9dff322e121dcf59c1 ]
+
+thermal_zone_device_unregister() cancels the delayed work that polls the
+thermal zone, but it does not wait for it to finish. This is racy with
+respect to the freeing of the thermal zone device, which can result in a
+use-after-free [1].
+
+Fix this by waiting for the delayed work to finish before freeing the
+thermal zone device. Note that thermal_zone_device_set_polling() is
+never invoked from an atomic context, so it is safe to call
+cancel_delayed_work_sync() that can block.
+
+[1]
+[  +0.002221] ==================================================================
+[  +0.000064] BUG: KASAN: use-after-free in __mutex_lock+0x1076/0x11c0
+[  +0.000016] Read of size 8 at addr ffff8881e48e0450 by task kworker/1:0/17
+
+[  +0.000023] CPU: 1 PID: 17 Comm: kworker/1:0 Not tainted 5.2.0-rc6-custom-02495-g8e73ca3be4af #1701
+[  +0.000010] Hardware name: Mellanox Technologies Ltd. MSN2100-CB2FO/SA001017, BIOS 5.6.5 06/07/2016
+[  +0.000016] Workqueue: events_freezable_power_ thermal_zone_device_check
+[  +0.000012] Call Trace:
+[  +0.000021]  dump_stack+0xa9/0x10e
+[  +0.000020]  print_address_description.cold.2+0x9/0x25e
+[  +0.000018]  __kasan_report.cold.3+0x78/0x9d
+[  +0.000016]  kasan_report+0xe/0x20
+[  +0.000016]  __mutex_lock+0x1076/0x11c0
+[  +0.000014]  step_wise_throttle+0x72/0x150
+[  +0.000018]  handle_thermal_trip+0x167/0x760
+[  +0.000019]  thermal_zone_device_update+0x19e/0x5f0
+[  +0.000019]  process_one_work+0x969/0x16f0
+[  +0.000017]  worker_thread+0x91/0xc40
+[  +0.000014]  kthread+0x33d/0x400
+[  +0.000015]  ret_from_fork+0x3a/0x50
+
+[  +0.000020] Allocated by task 1:
+[  +0.000015]  save_stack+0x19/0x80
+[  +0.000015]  __kasan_kmalloc.constprop.4+0xc1/0xd0
+[  +0.000014]  kmem_cache_alloc_trace+0x152/0x320
+[  +0.000015]  thermal_zone_device_register+0x1b4/0x13a0
+[  +0.000015]  mlxsw_thermal_init+0xc92/0x23d0
+[  +0.000014]  __mlxsw_core_bus_device_register+0x659/0x11b0
+[  +0.000013]  mlxsw_core_bus_device_register+0x3d/0x90
+[  +0.000013]  mlxsw_pci_probe+0x355/0x4b0
+[  +0.000014]  local_pci_probe+0xc3/0x150
+[  +0.000013]  pci_device_probe+0x280/0x410
+[  +0.000013]  really_probe+0x26a/0xbb0
+[  +0.000013]  driver_probe_device+0x208/0x2e0
+[  +0.000013]  device_driver_attach+0xfe/0x140
+[  +0.000013]  __driver_attach+0x110/0x310
+[  +0.000013]  bus_for_each_dev+0x14b/0x1d0
+[  +0.000013]  driver_register+0x1c0/0x400
+[  +0.000015]  mlxsw_sp_module_init+0x5d/0xd3
+[  +0.000014]  do_one_initcall+0x239/0x4dd
+[  +0.000013]  kernel_init_freeable+0x42b/0x4e8
+[  +0.000012]  kernel_init+0x11/0x18b
+[  +0.000013]  ret_from_fork+0x3a/0x50
+
+[  +0.000015] Freed by task 581:
+[  +0.000013]  save_stack+0x19/0x80
+[  +0.000014]  __kasan_slab_free+0x125/0x170
+[  +0.000013]  kfree+0xf3/0x310
+[  +0.000013]  thermal_release+0xc7/0xf0
+[  +0.000014]  device_release+0x77/0x200
+[  +0.000014]  kobject_put+0x1a8/0x4c0
+[  +0.000014]  device_unregister+0x38/0xc0
+[  +0.000014]  thermal_zone_device_unregister+0x54e/0x6a0
+[  +0.000014]  mlxsw_thermal_fini+0x184/0x35a
+[  +0.000014]  mlxsw_core_bus_device_unregister+0x10a/0x640
+[  +0.000013]  mlxsw_devlink_core_bus_device_reload+0x92/0x210
+[  +0.000015]  devlink_nl_cmd_reload+0x113/0x1f0
+[  +0.000014]  genl_family_rcv_msg+0x700/0xee0
+[  +0.000013]  genl_rcv_msg+0xca/0x170
+[  +0.000013]  netlink_rcv_skb+0x137/0x3a0
+[  +0.000012]  genl_rcv+0x29/0x40
+[  +0.000013]  netlink_unicast+0x49b/0x660
+[  +0.000013]  netlink_sendmsg+0x755/0xc90
+[  +0.000013]  __sys_sendto+0x3de/0x430
+[  +0.000013]  __x64_sys_sendto+0xe2/0x1b0
+[  +0.000013]  do_syscall_64+0xa4/0x4d0
+[  +0.000013]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
+
+[  +0.000017] The buggy address belongs to the object at ffff8881e48e0008
+               which belongs to the cache kmalloc-2k of size 2048
+[  +0.000012] The buggy address is located 1096 bytes inside of
+               2048-byte region [ffff8881e48e0008, ffff8881e48e0808)
+[  +0.000007] The buggy address belongs to the page:
+[  +0.000012] page:ffffea0007923800 refcount:1 mapcount:0 mapping:ffff88823680d0c0 index:0x0 compound_mapcount: 0
+[  +0.000020] flags: 0x200000000010200(slab|head)
+[  +0.000019] raw: 0200000000010200 ffffea0007682008 ffffea00076ab808 ffff88823680d0c0
+[  +0.000016] raw: 0000000000000000 00000000000d000d 00000001ffffffff 0000000000000000
+[  +0.000007] page dumped because: kasan: bad access detected
+
+[  +0.000012] Memory state around the buggy address:
+[  +0.000012]  ffff8881e48e0300: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+[  +0.000012]  ffff8881e48e0380: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+[  +0.000012] >ffff8881e48e0400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+[  +0.000008]                                                  ^
+[  +0.000012]  ffff8881e48e0480: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+[  +0.000012]  ffff8881e48e0500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+[  +0.000007] ==================================================================
+
+Fixes: b1569e99c795 ("ACPI: move thermal trip handling to generic thermal layer")
+Reported-by: Jiri Pirko <jiri@mellanox.com>
+Signed-off-by: Ido Schimmel <idosch@mellanox.com>
+Acked-by: Jiri Pirko <jiri@mellanox.com>
+Signed-off-by: Zhang Rui <rui.zhang@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/thermal/thermal_core.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
+index 17d6079c76429..456ef213dc141 100644
+--- a/drivers/thermal/thermal_core.c
++++ b/drivers/thermal/thermal_core.c
+@@ -299,7 +299,7 @@ static void thermal_zone_device_set_polling(struct thermal_zone_device *tz,
+               mod_delayed_work(system_freezable_wq, &tz->poll_queue,
+                                msecs_to_jiffies(delay));
+       else
+-              cancel_delayed_work(&tz->poll_queue);
++              cancel_delayed_work_sync(&tz->poll_queue);
+ }
+ static void monitor_thermal_zone(struct thermal_zone_device *tz)
+-- 
+2.20.1
+
diff --git a/queue-4.14/tick-broadcast-hrtimer-fix-a-race-in-bc_set_next.patch b/queue-4.14/tick-broadcast-hrtimer-fix-a-race-in-bc_set_next.patch
new file mode 100644 (file)
index 0000000..f445fb4
--- /dev/null
@@ -0,0 +1,173 @@
+From dc9408099d688618574fa6311308a663d0bd60b6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 26 Sep 2019 15:51:01 +0200
+Subject: tick: broadcast-hrtimer: Fix a race in bc_set_next
+
+From: Balasubramani Vivekanandan <balasubramani_vivekanandan@mentor.com>
+
+[ Upstream commit b9023b91dd020ad7e093baa5122b6968c48cc9e0 ]
+
+When a cpu requests broadcasting, before starting the tick broadcast
+hrtimer, bc_set_next() checks if the timer callback (bc_handler) is active
+using hrtimer_try_to_cancel(). But hrtimer_try_to_cancel() does not provide
+the required synchronization when the callback is active on other core.
+
+The callback could have already executed tick_handle_oneshot_broadcast()
+and could have also returned. But still there is a small time window where
+the hrtimer_try_to_cancel() returns -1. In that case bc_set_next() returns
+without doing anything, but the next_event of the tick broadcast clock
+device is already set to a timeout value.
+
+In the race condition diagram below, CPU #1 is running the timer callback
+and CPU #2 is entering idle state and so calls bc_set_next().
+
+In the worst case, the next_event will contain an expiry time, but the
+hrtimer will not be started which happens when the racing callback returns
+HRTIMER_NORESTART. The hrtimer might never recover if all further requests
+from the CPUs to subscribe to tick broadcast have timeout greater than the
+next_event of tick broadcast clock device. This leads to cascading of
+failures and finally noticed as rcu stall warnings
+
+Here is a depiction of the race condition
+
+CPU #1 (Running timer callback)                   CPU #2 (Enter idle
+                                                  and subscribe to
+                                                  tick broadcast)
+---------------------                             ---------------------
+
+__run_hrtimer()                                   tick_broadcast_enter()
+
+  bc_handler()                                      __tick_broadcast_oneshot_control()
+
+    tick_handle_oneshot_broadcast()
+
+      raw_spin_lock(&tick_broadcast_lock);
+
+      dev->next_event = KTIME_MAX;                  //wait for tick_broadcast_lock
+      //next_event for tick broadcast clock
+      set to KTIME_MAX since no other cores
+      subscribed to tick broadcasting
+
+      raw_spin_unlock(&tick_broadcast_lock);
+
+    if (dev->next_event == KTIME_MAX)
+      return HRTIMER_NORESTART
+    // callback function exits without
+       restarting the hrtimer                      //tick_broadcast_lock acquired
+                                                   raw_spin_lock(&tick_broadcast_lock);
+
+                                                   tick_broadcast_set_event()
+
+                                                     clockevents_program_event()
+
+                                                       dev->next_event = expires;
+
+                                                       bc_set_next()
+
+                                                         hrtimer_try_to_cancel()
+                                                         //returns -1 since the timer
+                                                         callback is active. Exits without
+                                                         restarting the timer
+  cpu_base->running = NULL;
+
+The comment that hrtimer cannot be armed from within the callback is
+wrong. It is fine to start the hrtimer from within the callback. Also it is
+safe to start the hrtimer from the enter/exit idle code while the broadcast
+handler is active. The enter/exit idle code and the broadcast handler are
+synchronized using tick_broadcast_lock. So there is no need for the
+existing try to cancel logic. All this can be removed which will eliminate
+the race condition as well.
+
+Fixes: 5d1638acb9f6 ("tick: Introduce hrtimer based broadcast")
+Originally-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Balasubramani Vivekanandan <balasubramani_vivekanandan@mentor.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20190926135101.12102-2-balasubramani_vivekanandan@mentor.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/time/tick-broadcast-hrtimer.c | 57 ++++++++++++++--------------
+ 1 file changed, 29 insertions(+), 28 deletions(-)
+
+diff --git a/kernel/time/tick-broadcast-hrtimer.c b/kernel/time/tick-broadcast-hrtimer.c
+index 58045eb976c38..c750c80570e88 100644
+--- a/kernel/time/tick-broadcast-hrtimer.c
++++ b/kernel/time/tick-broadcast-hrtimer.c
+@@ -44,34 +44,39 @@ static int bc_shutdown(struct clock_event_device *evt)
+  */
+ static int bc_set_next(ktime_t expires, struct clock_event_device *bc)
+ {
+-      int bc_moved;
+       /*
+-       * We try to cancel the timer first. If the callback is on
+-       * flight on some other cpu then we let it handle it. If we
+-       * were able to cancel the timer nothing can rearm it as we
+-       * own broadcast_lock.
++       * This is called either from enter/exit idle code or from the
++       * broadcast handler. In all cases tick_broadcast_lock is held.
+        *
+-       * However we can also be called from the event handler of
+-       * ce_broadcast_hrtimer itself when it expires. We cannot
+-       * restart the timer because we are in the callback, but we
+-       * can set the expiry time and let the callback return
+-       * HRTIMER_RESTART.
++       * hrtimer_cancel() cannot be called here neither from the
++       * broadcast handler nor from the enter/exit idle code. The idle
++       * code can run into the problem described in bc_shutdown() and the
++       * broadcast handler cannot wait for itself to complete for obvious
++       * reasons.
+        *
+-       * Since we are in the idle loop at this point and because
+-       * hrtimer_{start/cancel} functions call into tracing,
+-       * calls to these functions must be bound within RCU_NONIDLE.
++       * Each caller tries to arm the hrtimer on its own CPU, but if the
++       * hrtimer callbback function is currently running, then
++       * hrtimer_start() cannot move it and the timer stays on the CPU on
++       * which it is assigned at the moment.
++       *
++       * As this can be called from idle code, the hrtimer_start()
++       * invocation has to be wrapped with RCU_NONIDLE() as
++       * hrtimer_start() can call into tracing.
+        */
+-      RCU_NONIDLE({
+-                      bc_moved = hrtimer_try_to_cancel(&bctimer) >= 0;
+-                      if (bc_moved)
+-                              hrtimer_start(&bctimer, expires,
+-                                            HRTIMER_MODE_ABS_PINNED);});
+-      if (bc_moved) {
+-              /* Bind the "device" to the cpu */
+-              bc->bound_on = smp_processor_id();
+-      } else if (bc->bound_on == smp_processor_id()) {
+-              hrtimer_set_expires(&bctimer, expires);
+-      }
++      RCU_NONIDLE( {
++              hrtimer_start(&bctimer, expires, HRTIMER_MODE_ABS_PINNED);
++              /*
++               * The core tick broadcast mode expects bc->bound_on to be set
++               * correctly to prevent a CPU which has the broadcast hrtimer
++               * armed from going deep idle.
++               *
++               * As tick_broadcast_lock is held, nothing can change the cpu
++               * base which was just established in hrtimer_start() above. So
++               * the below access is safe even without holding the hrtimer
++               * base lock.
++               */
++              bc->bound_on = bctimer.base->cpu_base->cpu;
++      } );
+       return 0;
+ }
+@@ -97,10 +102,6 @@ static enum hrtimer_restart bc_handler(struct hrtimer *t)
+ {
+       ce_broadcast_hrtimer.event_handler(&ce_broadcast_hrtimer);
+-      if (clockevent_state_oneshot(&ce_broadcast_hrtimer))
+-              if (ce_broadcast_hrtimer.next_event != KTIME_MAX)
+-                      return HRTIMER_RESTART;
+-
+       return HRTIMER_NORESTART;
+ }
+-- 
+2.20.1
+
diff --git a/queue-4.14/tools-lib-traceevent-do-not-free-tep-cmdlines-in-add.patch b/queue-4.14/tools-lib-traceevent-do-not-free-tep-cmdlines-in-add.patch
new file mode 100644 (file)
index 0000000..8b094bf
--- /dev/null
@@ -0,0 +1,57 @@
+From 0878dafe40bdef001fe83a72d4fdeef36f2016c5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 28 Aug 2019 15:05:28 -0400
+Subject: tools lib traceevent: Do not free tep->cmdlines in add_new_comm() on
+ failure
+
+From: Steven Rostedt (VMware) <rostedt@goodmis.org>
+
+[ Upstream commit e0d2615856b2046c2e8d5bfd6933f37f69703b0b ]
+
+If the re-allocation of tep->cmdlines succeeds, then the previous
+allocation of tep->cmdlines will be freed. If we later fail in
+add_new_comm(), we must not free cmdlines, and also should assign
+tep->cmdlines to the new allocation. Otherwise when freeing tep, the
+tep->cmdlines will be pointing to garbage.
+
+Fixes: a6d2a61ac653a ("tools lib traceevent: Remove some die() calls")
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Jiri Olsa <jolsa@redhat.com>
+Cc: Namhyung Kim <namhyung@kernel.org>
+Cc: linux-trace-devel@vger.kernel.org
+Cc: stable@vger.kernel.org
+Link: http://lkml.kernel.org/r/20190828191819.970121417@goodmis.org
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/lib/traceevent/event-parse.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
+index 7989dd6289e7a..8211e8010e09b 100644
+--- a/tools/lib/traceevent/event-parse.c
++++ b/tools/lib/traceevent/event-parse.c
+@@ -268,10 +268,10 @@ static int add_new_comm(struct pevent *pevent, const char *comm, int pid)
+               errno = ENOMEM;
+               return -1;
+       }
++      pevent->cmdlines = cmdlines;
+       cmdlines[pevent->cmdline_count].comm = strdup(comm);
+       if (!cmdlines[pevent->cmdline_count].comm) {
+-              free(cmdlines);
+               errno = ENOMEM;
+               return -1;
+       }
+@@ -282,7 +282,6 @@ static int add_new_comm(struct pevent *pevent, const char *comm, int pid)
+               pevent->cmdline_count++;
+       qsort(cmdlines, pevent->cmdline_count, sizeof(*cmdlines), cmdline_cmp);
+-      pevent->cmdlines = cmdlines;
+       return 0;
+ }
+-- 
+2.20.1
+
diff --git a/queue-4.14/watchdog-aspeed-add-support-for-ast2600.patch b/queue-4.14/watchdog-aspeed-add-support-for-ast2600.patch
new file mode 100644 (file)
index 0000000..ff4e2dd
--- /dev/null
@@ -0,0 +1,47 @@
+From 385d941e8b0c07d54c00f1d50cca929fa8fa9e87 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 19 Aug 2019 14:47:38 +0930
+Subject: watchdog: aspeed: Add support for AST2600
+
+From: Ryan Chen <ryan_chen@aspeedtech.com>
+
+[ Upstream commit b3528b4874480818e38e4da019d655413c233e6a ]
+
+The ast2600 can be supported by the same code as the ast2500.
+
+Signed-off-by: Ryan Chen <ryan_chen@aspeedtech.com>
+Signed-off-by: Joel Stanley <joel@jms.id.au>
+Reviewed-by: Guenter Roeck <linux@roeck-us.net>
+Link: https://lore.kernel.org/r/20190819051738.17370-3-joel@jms.id.au
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Wim Van Sebroeck <wim@linux-watchdog.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/watchdog/aspeed_wdt.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/watchdog/aspeed_wdt.c b/drivers/watchdog/aspeed_wdt.c
+index fd91007b4e41b..cee7334b2a000 100644
+--- a/drivers/watchdog/aspeed_wdt.c
++++ b/drivers/watchdog/aspeed_wdt.c
+@@ -38,6 +38,7 @@ static const struct aspeed_wdt_config ast2500_config = {
+ static const struct of_device_id aspeed_wdt_of_table[] = {
+       { .compatible = "aspeed,ast2400-wdt", .data = &ast2400_config },
+       { .compatible = "aspeed,ast2500-wdt", .data = &ast2500_config },
++      { .compatible = "aspeed,ast2600-wdt", .data = &ast2500_config },
+       { },
+ };
+ MODULE_DEVICE_TABLE(of, aspeed_wdt_of_table);
+@@ -257,7 +258,8 @@ static int aspeed_wdt_probe(struct platform_device *pdev)
+               set_bit(WDOG_HW_RUNNING, &wdt->wdd.status);
+       }
+-      if (of_device_is_compatible(np, "aspeed,ast2500-wdt")) {
++      if ((of_device_is_compatible(np, "aspeed,ast2500-wdt")) ||
++              (of_device_is_compatible(np, "aspeed,ast2600-wdt"))) {
+               u32 reg = readl(wdt->base + WDT_RESET_WIDTH);
+               reg &= config->ext_pulse_width_mask;
+-- 
+2.20.1
+
diff --git a/queue-4.14/xen-pci-reserve-mcfg-areas-earlier.patch b/queue-4.14/xen-pci-reserve-mcfg-areas-earlier.patch
new file mode 100644 (file)
index 0000000..329f68c
--- /dev/null
@@ -0,0 +1,90 @@
+From b58cc84c5ff1892218406df05892f930e8162053 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 12 Sep 2019 19:31:51 +0100
+Subject: xen/pci: reserve MCFG areas earlier
+
+From: Igor Druzhinin <igor.druzhinin@citrix.com>
+
+[ Upstream commit a4098bc6eed5e31e0391bcc068e61804c98138df ]
+
+If MCFG area is not reserved in E820, Xen by default will defer its usage
+until Dom0 registers it explicitly after ACPI parser recognizes it as
+a reserved resource in DSDT. Having it reserved in E820 is not
+mandatory according to "PCI Firmware Specification, rev 3.2" (par. 4.1.2)
+and firmware is free to keep a hole in E820 in that place. Xen doesn't know
+what exactly is inside this hole since it lacks full ACPI view of the
+platform therefore it's potentially harmful to access MCFG region
+without additional checks as some machines are known to provide
+inconsistent information on the size of the region.
+
+Now xen_mcfg_late() runs after acpi_init() which is too late as some basic
+PCI enumeration starts exactly there as well. Trying to register a device
+prior to MCFG reservation causes multiple problems with PCIe extended
+capability initializations in Xen (e.g. SR-IOV VF BAR sizing). There are
+no convenient hooks for us to subscribe to so register MCFG areas earlier
+upon the first invocation of xen_add_device(). It should be safe to do once
+since all the boot time buses must have their MCFG areas in MCFG table
+already and we don't support PCI bus hot-plug.
+
+Signed-off-by: Igor Druzhinin <igor.druzhinin@citrix.com>
+Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/xen/pci.c | 21 +++++++++++++++------
+ 1 file changed, 15 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c
+index 7494dbeb4409c..db58aaa4dc598 100644
+--- a/drivers/xen/pci.c
++++ b/drivers/xen/pci.c
+@@ -29,6 +29,8 @@
+ #include "../pci/pci.h"
+ #ifdef CONFIG_PCI_MMCONFIG
+ #include <asm/pci_x86.h>
++
++static int xen_mcfg_late(void);
+ #endif
+ static bool __read_mostly pci_seg_supported = true;
+@@ -40,7 +42,18 @@ static int xen_add_device(struct device *dev)
+ #ifdef CONFIG_PCI_IOV
+       struct pci_dev *physfn = pci_dev->physfn;
+ #endif
+-
++#ifdef CONFIG_PCI_MMCONFIG
++      static bool pci_mcfg_reserved = false;
++      /*
++       * Reserve MCFG areas in Xen on first invocation due to this being
++       * potentially called from inside of acpi_init immediately after
++       * MCFG table has been finally parsed.
++       */
++      if (!pci_mcfg_reserved) {
++              xen_mcfg_late();
++              pci_mcfg_reserved = true;
++      }
++#endif
+       if (pci_seg_supported) {
+               struct {
+                       struct physdev_pci_device_add add;
+@@ -213,7 +226,7 @@ static int __init register_xen_pci_notifier(void)
+ arch_initcall(register_xen_pci_notifier);
+ #ifdef CONFIG_PCI_MMCONFIG
+-static int __init xen_mcfg_late(void)
++static int xen_mcfg_late(void)
+ {
+       struct pci_mmcfg_region *cfg;
+       int rc;
+@@ -252,8 +265,4 @@ static int __init xen_mcfg_late(void)
+       }
+       return 0;
+ }
+-/*
+- * Needs to be done after acpi_init which are subsys_initcall.
+- */
+-subsys_initcall_sync(xen_mcfg_late);
+ #endif
+-- 
+2.20.1
+