Fixes for 5.7

author Sasha Levin <sashal@kernel.org>

Sat, 4 Jul 2020 16:29:56 +0000 (12:29 -0400)

committer Sasha Levin <sashal@kernel.org>

Sat, 4 Jul 2020 16:29:56 +0000 (12:29 -0400)
author Sasha Levin <sashal@kernel.org>
Sat, 4 Jul 2020 16:29:56 +0000 (12:29 -0400)
committer Sasha Levin <sashal@kernel.org>
Sat, 4 Jul 2020 16:29:56 +0000 (12:29 -0400)
diff --git a/queue-5.7/drm-amdgpu-disable-ras-query-and-iject-during-gpu-re.patch b/queue-5.7/drm-amdgpu-disable-ras-query-and-iject-during-gpu-re.patch

new file mode 100644 (file)

index 0000000..c55ef7e
--- /dev/null
+++ b/queue-5.7/drm-amdgpu-disable-ras-query-and-iject-during-gpu-re.patch
@@ -0,0 +1,129 @@
+From 2d3b41695e0b3b40306466929f2bdf5ad9edea01 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 25 Mar 2020 16:01:14 +0800
+Subject: drm/amdgpu: disable ras query and iject during gpu reset
+
+From: John Clements <john.clements@amd.com>
+
+[ Upstream commit 61380faa4b4cc577df8a7ff5db5859bac6b351f7 ]
+
+added flag to ras context to indicate if ras query functionality is ready
+
+Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
+Signed-off-by: John Clements <john.clements@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  3 +++
+ drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c    | 24 +++++++++++++++++++---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h    |  4 ++++
+ 3 files changed, 28 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+index affde2de2a0db..59288653412db 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+@@ -4091,6 +4091,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
+       need_full_reset = job_signaled = false;
+       INIT_LIST_HEAD(&device_list);
+ 
++      amdgpu_ras_set_error_query_ready(adev, false);
++
+       dev_info(adev->dev, "GPU %s begin!\n",
+               (in_ras_intr && !use_baco) ? "jobs stop":"reset");
+ 
+@@ -4147,6 +4149,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
+       /* block all schedulers and reset given job's ring */
+       list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
+               if (tmp_adev != adev) {
++                      amdgpu_ras_set_error_query_ready(tmp_adev, false);
+                       amdgpu_device_lock_adev(tmp_adev, false);
+                       if (!amdgpu_sriov_vf(tmp_adev))
+                                       amdgpu_amdkfd_pre_reset(tmp_adev);
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+index ab379b44679cc..aa6148d12d5a4 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+@@ -80,6 +80,20 @@ atomic_t amdgpu_ras_in_intr = ATOMIC_INIT(0);
+ static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
+                               uint64_t addr);
+ 
++void amdgpu_ras_set_error_query_ready(struct amdgpu_device *adev, bool ready)
++{
++      if (adev)
++              amdgpu_ras_get_context(adev)->error_query_ready = ready;
++}
++
++bool amdgpu_ras_get_error_query_ready(struct amdgpu_device *adev)
++{
++      if (adev)
++              return amdgpu_ras_get_context(adev)->error_query_ready;
++
++      return false;
++}
++
+ static ssize_t amdgpu_ras_debugfs_read(struct file *f, char __user *buf,
+                                       size_t size, loff_t *pos)
+ {
+@@ -281,7 +295,7 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *
+       struct ras_debug_if data;
+       int ret = 0;
+ 
+-      if (amdgpu_ras_intr_triggered()) {
++      if (!amdgpu_ras_get_error_query_ready(adev)) {
+               DRM_WARN("RAS WARN: error injection currently inaccessible\n");
+               return size;
+       }
+@@ -399,7 +413,7 @@ static ssize_t amdgpu_ras_sysfs_read(struct device *dev,
+               .head = obj->head,
+       };
+ 
+-      if (amdgpu_ras_intr_triggered())
++      if (!amdgpu_ras_get_error_query_ready(obj->adev))
+               return snprintf(buf, PAGE_SIZE,
+                               "Query currently inaccessible\n");
+ 
+@@ -1896,8 +1910,10 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev,
+       }
+ 
+       /* in resume phase, no need to create ras fs node */
+-      if (adev->in_suspend || adev->in_gpu_reset)
++      if (adev->in_suspend || adev->in_gpu_reset) {
++              amdgpu_ras_set_error_query_ready(adev, true);
+               return 0;
++      }
+ 
+       if (ih_info->cb) {
+               r = amdgpu_ras_interrupt_add_handler(adev, ih_info);
+@@ -1909,6 +1925,8 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev,
+       if (r)
+               goto sysfs;
+ 
++      amdgpu_ras_set_error_query_ready(adev, true);
++
+       return 0;
+ cleanup:
+       amdgpu_ras_sysfs_remove(adev, ras_block);
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+index 55c3eceb390d4..e7df5d8429f82 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+@@ -334,6 +334,8 @@ struct amdgpu_ras {
+       uint32_t flags;
+       bool reboot;
+       struct amdgpu_ras_eeprom_control eeprom_control;
++
++      bool error_query_ready;
+ };
+ 
+ struct ras_fs_data {
+@@ -629,4 +631,6 @@ static inline void amdgpu_ras_intr_cleared(void)
+ 
+ void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev);
+ 
++void amdgpu_ras_set_error_query_ready(struct amdgpu_device *adev, bool ready);
++
+ #endif
+-- 
+2.25.1
+
diff --git a/queue-5.7/drm-amdgpu-fix-kernel-page-fault-issue-by-ras-recove.patch b/queue-5.7/drm-amdgpu-fix-kernel-page-fault-issue-by-ras-recove.patch

new file mode 100644 (file)

index 0000000..11d1c58
--- /dev/null
+++ b/queue-5.7/drm-amdgpu-fix-kernel-page-fault-issue-by-ras-recove.patch
@@ -0,0 +1,50 @@
+From 5a211368384eb937b650d165da935777996f9977 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 16 Apr 2020 23:41:07 +0800
+Subject: drm/amdgpu: fix kernel page fault issue by ras recovery on sGPU
+
+From: Guchun Chen <guchun.chen@amd.com>
+
+[ Upstream commit 12c17b9d62663c14a5343d6742682b3e67280754 ]
+
+When running ras uncorrectable error injection and triggering GPU
+reset on sGPU, below issue is observed. It's caused by the list
+uninitialized when accessing.
+
+[   80.047227] BUG: unable to handle page fault for address: ffffffffc0f4f750
+[   80.047300] #PF: supervisor write access in kernel mode
+[   80.047351] #PF: error_code(0x0003) - permissions violation
+[   80.047404] PGD 12c20e067 P4D 12c20e067 PUD 12c210067 PMD 41c4ee067 PTE 404316061
+[   80.047477] Oops: 0003 [#1] SMP PTI
+[   80.047516] CPU: 7 PID: 377 Comm: kworker/7:2 Tainted: G           OE     5.4.0-rc7-guchchen #1
+[   80.047594] Hardware name: System manufacturer System Product Name/TUF Z370-PLUS GAMING II, BIOS 0411 09/21/2018
+[   80.047888] Workqueue: events amdgpu_ras_do_recovery [amdgpu]
+
+Signed-off-by: Guchun Chen <guchun.chen@amd.com>
+Reviewed-by: John Clements <John.Clements@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+index b0aa4e1ed4df7..cd18596b47d33 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+@@ -1444,9 +1444,10 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
+       struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, false);
+ 
+       /* Build list of devices to query RAS related errors */
+-      if  (hive && adev->gmc.xgmi.num_physical_nodes > 1) {
++      if  (hive && adev->gmc.xgmi.num_physical_nodes > 1)
+               device_list_handle = &hive->device_list;
+-      } else {
++      else {
++              INIT_LIST_HEAD(&device_list);
+               list_add_tail(&adev->gmc.xgmi.head, &device_list);
+               device_list_handle = &device_list;
+       }
+-- 
+2.25.1
+
diff --git a/queue-5.7/drm-amdgpu-fix-non-pointer-dereference-for-non-ras-s.patch b/queue-5.7/drm-amdgpu-fix-non-pointer-dereference-for-non-ras-s.patch

new file mode 100644 (file)

index 0000000..5f72d34
--- /dev/null
+++ b/queue-5.7/drm-amdgpu-fix-non-pointer-dereference-for-non-ras-s.patch
@@ -0,0 +1,69 @@
+From 45e2259f997932f1ffe6344d8edc124c3562ec48 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 27 Mar 2020 15:39:06 +0800
+Subject: drm/amdgpu: fix non-pointer dereference for non-RAS supported
+
+From: Evan Quan <evan.quan@amd.com>
+
+[ Upstream commit a9d82d2f91297679cfafd7e61c4bccdca6cd550d ]
+
+Backtrace on gpu recover test on Navi10.
+
+[ 1324.516681] RIP: 0010:amdgpu_ras_set_error_query_ready+0x15/0x20 [amdgpu]
+[ 1324.523778] Code: 4c 89 f7 e8 cd a2 a0 d8 e9 99 fe ff ff 45 31 ff e9 91 fe ff ff 0f 1f 44 00 00 55 48 85 ff 48 89 e5 74 0e 48 8b 87 d8 2b 01 00 <40> 88 b0 38 01 00 00 5d c3 66 90 0f 1f 44 00 00 55 31 c0 48 85 ff
+[ 1324.543452] RSP: 0018:ffffaa1040e4bd28 EFLAGS: 00010286
+[ 1324.549025] RAX: 0000000000000000 RBX: ffff911198b20000 RCX: 0000000000000000
+[ 1324.556217] RDX: 00000000000c0a01 RSI: 0000000000000000 RDI: ffff911198b20000
+[ 1324.563514] RBP: ffffaa1040e4bd28 R08: 0000000000001000 R09: ffff91119d0028c0
+[ 1324.570804] R10: ffffffff9a606b40 R11: 0000000000000000 R12: 0000000000000000
+[ 1324.578413] R13: ffffaa1040e4bd70 R14: ffff911198b20000 R15: 0000000000000000
+[ 1324.586464] FS:  00007f4441cbf540(0000) GS:ffff91119ed80000(0000) knlGS:0000000000000000
+[ 1324.595434] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[ 1324.601345] CR2: 0000000000000138 CR3: 00000003fcdf8004 CR4: 00000000003606e0
+[ 1324.608694] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+[ 1324.616303] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+[ 1324.623678] Call Trace:
+[ 1324.626270]  amdgpu_device_gpu_recover+0x6e7/0xc50 [amdgpu]
+[ 1324.632018]  ? seq_printf+0x4e/0x70
+[ 1324.636652]  amdgpu_debugfs_gpu_recover+0x50/0x80 [amdgpu]
+[ 1324.643371]  seq_read+0xda/0x420
+[ 1324.647601]  full_proxy_read+0x5c/0x90
+[ 1324.652426]  __vfs_read+0x1b/0x40
+[ 1324.656734]  vfs_read+0x8e/0x130
+[ 1324.660981]  ksys_read+0xa7/0xe0
+[ 1324.665201]  __x64_sys_read+0x1a/0x20
+[ 1324.669907]  do_syscall_64+0x57/0x1c0
+[ 1324.674517]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
+[ 1324.680654] RIP: 0033:0x7f44417cf081
+
+Signed-off-by: Evan Quan <evan.quan@amd.com>
+Reviewed-by: John Clements <John.Clements@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+index aa6148d12d5a4..b0aa4e1ed4df7 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+@@ -82,13 +82,13 @@ static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
+ 
+ void amdgpu_ras_set_error_query_ready(struct amdgpu_device *adev, bool ready)
+ {
+-      if (adev)
++      if (adev && amdgpu_ras_get_context(adev))
+               amdgpu_ras_get_context(adev)->error_query_ready = ready;
+ }
+ 
+ bool amdgpu_ras_get_error_query_ready(struct amdgpu_device *adev)
+ {
+-      if (adev)
++      if (adev && amdgpu_ras_get_context(adev))
+               return amdgpu_ras_get_context(adev)->error_query_ready;
+ 
+       return false;
+-- 
+2.25.1
+
diff --git a/queue-5.7/drm-i915-gt-mark-timeline-cacheline-as-destroyed-aft.patch b/queue-5.7/drm-i915-gt-mark-timeline-cacheline-as-destroyed-aft.patch

new file mode 100644 (file)

index 0000000..375a649
--- /dev/null
+++ b/queue-5.7/drm-i915-gt-mark-timeline-cacheline-as-destroyed-aft.patch
@@ -0,0 +1,86 @@
+From a43143a8d4bebae62e9222ed3e35be96fefcfa12 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 23 Mar 2020 09:28:34 +0000
+Subject: drm/i915/gt: Mark timeline->cacheline as destroyed after rcu grace
+ period
+
+From: Chris Wilson <chris@chris-wilson.co.uk>
+
+[ Upstream commit 8e87e0139aff59c5961347ab1ef06814f092c439 ]
+
+Since we take advantage of RCU for some i915_active objects, like the
+intel_timeline_cacheline, we need to delay the i915_active_fini until
+after the RCU grace period and we perform the kfree -- that is until
+after all RCU protected readers.
+
+<3> [108.204873] ODEBUG: assert_init not available (active state 0) object type: i915_active hint: __cacheline_active+0x0/0x80 [i915]
+<4> [108.207377] WARNING: CPU: 3 PID: 2342 at lib/debugobjects.c:488 debug_print_object+0x67/0x90
+<4> [108.207400] Modules linked in: vgem snd_hda_codec_hdmi x86_pkg_temp_thermal coretemp crct10dif_pclmul crc32_pclmul snd_hda_intel ghash_clmulni_intel snd_intel_dspcfg snd_hda_codec ax88179_178a snd_hwdep usbnet btusb snd_hda_core btrtl mii btbcm btintel snd_pcm bluetooth ecdh_generic ecc i915 i2c_hid pinctrl_sunrisepoint pinctrl_intel intel_lpss_pci prime_numbers
+<4> [108.207587] CPU: 3 PID: 2342 Comm: gem_exec_parall Tainted: G     U            5.6.0-rc6-CI-Patchwork_17047+ #1
+<4> [108.207609] Hardware name: Google Soraka/Soraka, BIOS MrChromebox-4.10 08/25/2019
+<4> [108.207639] RIP: 0010:debug_print_object+0x67/0x90
+<4> [108.207668] Code: 83 c2 01 8b 4b 14 4c 8b 45 00 89 15 87 d2 8a 02 8b 53 10 4c 89 e6 48 c7 c7 38 2b 32 82 48 8b 14 d5 80 2f 07 82 e8 49 d5 b7 ff <0f> 0b 5b 83 05 c3 f6 22 01 01 5d 41 5c c3 83 05 b8 f6 22 01 01 c3
+<4> [108.207692] RSP: 0018:ffffc90000e7f890 EFLAGS: 00010282
+<4> [108.207723] RAX: 0000000000000000 RBX: ffffc90000e7f8b0 RCX: 0000000000000001
+<4> [108.207747] RDX: 0000000080000001 RSI: ffff88817ada8cb8 RDI: 00000000ffffffff
+<4> [108.207770] RBP: ffffffffa0341cc0 R08: ffff88816b5a8948 R09: 0000000000000000
+<4> [108.207792] R10: 0000000000000000 R11: 0000000000000000 R12: ffffffff82322d54
+<4> [108.207814] R13: ffffffffa0341cc0 R14: ffffffff83df9568 R15: ffff88816064f400
+<4> [108.207839] FS:  00007f437d753700(0000) GS:ffff88817ad80000(0000) knlGS:0000000000000000
+<4> [108.207863] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+<4> [108.207887] CR2: 00007f2ad1fb5000 CR3: 00000001725d8004 CR4: 00000000003606e0
+<4> [108.207907] Call Trace:
+<4> [108.207959]  debug_object_assert_init+0x15c/0x180
+<4> [108.208475]  ? i915_active_acquire_if_busy+0x10/0x50 [i915]
+<4> [108.208513]  ? rcu_read_lock_held+0x4d/0x60
+<4> [108.208970]  i915_active_acquire_if_busy+0x10/0x50 [i915]
+<4> [108.209380]  intel_timeline_read_hwsp+0x81/0x540 [i915]
+<4> [108.210262]  __emit_semaphore_wait+0x45/0x1b0 [i915]
+<4> [108.210726]  ? i915_request_await_dma_fence+0x143/0x560 [i915]
+<4> [108.211156]  i915_request_await_dma_fence+0x28a/0x560 [i915]
+<4> [108.211633]  i915_request_await_object+0x24a/0x3f0 [i915]
+<4> [108.212102]  eb_submit.isra.47+0x58f/0x920 [i915]
+<4> [108.212622]  i915_gem_do_execbuffer+0x1706/0x2c70 [i915]
+<4> [108.213071]  ? i915_gem_execbuffer2_ioctl+0xc0/0x470 [i915]
+
+Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+Reviewed-by: Matthew Auld <matthew.auld@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20200323092841.22240-1-chris@chris-wilson.co.uk
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/i915/gt/intel_timeline.c | 12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
+index 08b56d7ab4f45..92da746f01c1e 100644
+--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
++++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
+@@ -119,6 +119,15 @@ static void __idle_hwsp_free(struct intel_timeline_hwsp *hwsp, int cacheline)
+       spin_unlock_irqrestore(&gt->hwsp_lock, flags);
+ }
+ 
++static void __rcu_cacheline_free(struct rcu_head *rcu)
++{
++      struct intel_timeline_cacheline *cl =
++              container_of(rcu, typeof(*cl), rcu);
++
++      i915_active_fini(&cl->active);
++      kfree(cl);
++}
++
+ static void __idle_cacheline_free(struct intel_timeline_cacheline *cl)
+ {
+       GEM_BUG_ON(!i915_active_is_idle(&cl->active));
+@@ -127,8 +136,7 @@ static void __idle_cacheline_free(struct intel_timeline_cacheline *cl)
+       i915_vma_put(cl->hwsp->vma);
+       __idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS));
+ 
+-      i915_active_fini(&cl->active);
+-      kfree_rcu(cl, rcu);
++      call_rcu(&cl->rcu, __rcu_cacheline_free);
+ }
+ 
+ __i915_active_call
+-- 
+2.25.1
+
diff --git a/queue-5.7/io_uring-fix-current-mm-null-dereference-on-exit.patch b/queue-5.7/io_uring-fix-current-mm-null-dereference-on-exit.patch

new file mode 100644 (file)

index 0000000..aff1918
--- /dev/null
+++ b/queue-5.7/io_uring-fix-current-mm-null-dereference-on-exit.patch
@@ -0,0 +1,80 @@
+From d40214fc74233df6621c6c3b1248aa1bec2dbcf2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 25 Jun 2020 12:37:11 +0300
+Subject: io_uring: fix current->mm NULL dereference on exit
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+[ Upstream commit d60b5fbc1ce8210759b568da49d149b868e7c6d3 ]
+
+Don't reissue requests from io_iopoll_reap_events(), the task may not
+have mm, which ends up with NULL. It's better to kill everything off on
+exit anyway.
+
+[  677.734670] RIP: 0010:io_iopoll_complete+0x27e/0x630
+...
+[  677.734679] Call Trace:
+[  677.734695]  ? __send_signal+0x1f2/0x420
+[  677.734698]  ? _raw_spin_unlock_irqrestore+0x24/0x40
+[  677.734699]  ? send_signal+0xf5/0x140
+[  677.734700]  io_iopoll_getevents+0x12f/0x1a0
+[  677.734702]  io_iopoll_reap_events.part.0+0x5e/0xa0
+[  677.734703]  io_ring_ctx_wait_and_kill+0x132/0x1c0
+[  677.734704]  io_uring_release+0x20/0x30
+[  677.734706]  __fput+0xcd/0x230
+[  677.734707]  ____fput+0xe/0x10
+[  677.734709]  task_work_run+0x67/0xa0
+[  677.734710]  do_exit+0x35d/0xb70
+[  677.734712]  do_group_exit+0x43/0xa0
+[  677.734713]  get_signal+0x140/0x900
+[  677.734715]  do_signal+0x37/0x780
+[  677.734717]  ? enqueue_hrtimer+0x41/0xb0
+[  677.734718]  ? recalibrate_cpu_khz+0x10/0x10
+[  677.734720]  ? ktime_get+0x3e/0xa0
+[  677.734721]  ? lapic_next_deadline+0x26/0x30
+[  677.734723]  ? tick_program_event+0x4d/0x90
+[  677.734724]  ? __hrtimer_get_next_event+0x4d/0x80
+[  677.734726]  __prepare_exit_to_usermode+0x126/0x1c0
+[  677.734741]  prepare_exit_to_usermode+0x9/0x40
+[  677.734742]  idtentry_exit_cond_rcu+0x4c/0x60
+[  677.734743]  sysvec_reschedule_ipi+0x92/0x160
+[  677.734744]  ? asm_sysvec_reschedule_ipi+0xa/0x20
+[  677.734745]  asm_sysvec_reschedule_ipi+0x12/0x20
+
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/io_uring.c | 9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+diff --git a/fs/io_uring.c b/fs/io_uring.c
+index 63a456921903e..71d281f68ed83 100644
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -858,6 +858,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
+                                struct io_uring_files_update *ip,
+                                unsigned nr_args);
+ static int io_grab_files(struct io_kiocb *req);
++static void io_complete_rw_common(struct kiocb *kiocb, long res);
+ static void io_cleanup_req(struct io_kiocb *req);
+ static int io_file_get(struct io_submit_state *state, struct io_kiocb *req,
+                      int fd, struct file **out_file, bool fixed);
+@@ -1697,6 +1698,14 @@ static void io_iopoll_queue(struct list_head *again)
+       do {
+               req = list_first_entry(again, struct io_kiocb, list);
+               list_del(&req->list);
++
++              /* shouldn't happen unless io_uring is dying, cancel reqs */
++              if (unlikely(!current->mm)) {
++                      io_complete_rw_common(&req->rw.kiocb, -EAGAIN);
++                      io_put_req(req);
++                      continue;
++              }
++
+               refcount_inc(&req->refs);
+               io_queue_async_work(req);
+       } while (!list_empty(again));
+-- 
+2.25.1
+
diff --git a/queue-5.7/io_uring-fix-io_sq_thread-no-schedule-when-busy.patch b/queue-5.7/io_uring-fix-io_sq_thread-no-schedule-when-busy.patch

new file mode 100644 (file)

index 0000000..6d8c8ef
--- /dev/null
+++ b/queue-5.7/io_uring-fix-io_sq_thread-no-schedule-when-busy.patch
@@ -0,0 +1,54 @@
+From 4facbc6278d7946f8f884a73a288270194e98c6c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 Jun 2020 19:34:06 +0800
+Subject: io_uring: fix io_sq_thread no schedule when busy
+
+From: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
+
+[ Upstream commit b772f07add1c0b22e02c0f1e96f647560679d3a9 ]
+
+When the user consumes and generates sqe at a fast rate,
+io_sqring_entries can always get sqe, and ret will not be equal to -EBUSY,
+so that io_sq_thread will never call cond_resched or schedule, and then
+we will get the following system error prompt:
+
+rcu: INFO: rcu_sched self-detected stall on CPU
+or
+watchdog: BUG: soft lockup-CPU#23 stuck for 112s! [io_uring-sq:1863]
+
+This patch checks whether need to call cond_resched() by checking
+the need_resched() function every cycle.
+
+Suggested-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/io_uring.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/fs/io_uring.c b/fs/io_uring.c
+index bb74e45941af2..63a456921903e 100644
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -6084,7 +6084,7 @@ static int io_sq_thread(void *data)
+                * If submit got -EBUSY, flag us as needing the application
+                * to enter the kernel to reap and flush events.
+                */
+-              if (!to_submit || ret == -EBUSY) {
++              if (!to_submit || ret == -EBUSY || need_resched()) {
+                       /*
+                        * Drop cur_mm before scheduling, we can't hold it for
+                        * long periods (or over schedule()). Do this before
+@@ -6100,7 +6100,7 @@ static int io_sq_thread(void *data)
+                        * more IO, we should wait for the application to
+                        * reap events and wake us up.
+                        */
+-                      if (!list_empty(&ctx->poll_list) ||
++                      if (!list_empty(&ctx->poll_list) || need_resched() ||
+                           (!time_after(jiffies, timeout) && ret != -EBUSY &&
+                           !percpu_ref_is_dying(&ctx->refs))) {
+                               if (current->task_works)
+-- 
+2.25.1
+
diff --git a/queue-5.7/io_uring-fix-sq-io-poll-with-unsupported-opcodes.patch b/queue-5.7/io_uring-fix-sq-io-poll-with-unsupported-opcodes.patch

new file mode 100644 (file)

index 0000000..6d550e2
--- /dev/null
+++ b/queue-5.7/io_uring-fix-sq-io-poll-with-unsupported-opcodes.patch
@@ -0,0 +1,128 @@
+From 63b7431c62d87074ff975d1d32dcd2915d679727 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 3 Jun 2020 18:03:22 +0300
+Subject: io_uring: fix {SQ,IO}POLL with unsupported opcodes
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+[ Upstream commit 3232dd02af65f2d01be641120d2a710176b0c7a7 ]
+
+IORING_SETUP_IOPOLL is defined only for read/write, other opcodes should
+be disallowed, otherwise it'll get an error as below. Also refuse
+open/close with SQPOLL, as the polling thread wouldn't know which file
+table to use.
+
+RIP: 0010:io_iopoll_getevents+0x111/0x5a0
+Call Trace:
+ ? _raw_spin_unlock_irqrestore+0x24/0x40
+ ? do_send_sig_info+0x64/0x90
+ io_iopoll_reap_events.part.0+0x5e/0xa0
+ io_ring_ctx_wait_and_kill+0x132/0x1c0
+ io_uring_release+0x20/0x30
+ __fput+0xcd/0x230
+ ____fput+0xe/0x10
+ task_work_run+0x67/0xa0
+ do_exit+0x353/0xb10
+ ? handle_mm_fault+0xd4/0x200
+ ? syscall_trace_enter+0x18c/0x2c0
+ do_group_exit+0x43/0xa0
+ __x64_sys_exit_group+0x18/0x20
+ do_syscall_64+0x60/0x1e0
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+[axboe: allow provide/remove buffers and files update]
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/io_uring.c | 18 ++++++++++++++++++
+ 1 file changed, 18 insertions(+)
+
+diff --git a/fs/io_uring.c b/fs/io_uring.c
+index 4ab1728de247c..bb74e45941af2 100644
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -2748,6 +2748,8 @@ static int io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+ 
+       if (req->flags & REQ_F_NEED_CLEANUP)
+               return 0;
++      if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
++              return -EINVAL;
+ 
+       sp->file_in = NULL;
+       sp->off_in = READ_ONCE(sqe->splice_off_in);
+@@ -2910,6 +2912,8 @@ static int io_fallocate_prep(struct io_kiocb *req,
+ {
+       if (sqe->ioprio || sqe->buf_index || sqe->rw_flags)
+               return -EINVAL;
++      if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
++              return -EINVAL;
+ 
+       req->sync.off = READ_ONCE(sqe->off);
+       req->sync.len = READ_ONCE(sqe->addr);
+@@ -2935,6 +2939,8 @@ static int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+       const char __user *fname;
+       int ret;
+ 
++      if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
++              return -EINVAL;
+       if (sqe->ioprio || sqe->buf_index)
+               return -EINVAL;
+       if (req->flags & REQ_F_FIXED_FILE)
+@@ -2968,6 +2974,8 @@ static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+       size_t len;
+       int ret;
+ 
++      if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
++              return -EINVAL;
+       if (sqe->ioprio || sqe->buf_index)
+               return -EINVAL;
+       if (req->flags & REQ_F_FIXED_FILE)
+@@ -3207,6 +3215,8 @@ static int io_epoll_ctl_prep(struct io_kiocb *req,
+ #if defined(CONFIG_EPOLL)
+       if (sqe->ioprio || sqe->buf_index)
+               return -EINVAL;
++      if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
++              return -EINVAL;
+ 
+       req->epoll.epfd = READ_ONCE(sqe->fd);
+       req->epoll.op = READ_ONCE(sqe->len);
+@@ -3251,6 +3261,8 @@ static int io_madvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+ #if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU)
+       if (sqe->ioprio || sqe->buf_index || sqe->off)
+               return -EINVAL;
++      if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
++              return -EINVAL;
+ 
+       req->madvise.addr = READ_ONCE(sqe->addr);
+       req->madvise.len = READ_ONCE(sqe->len);
+@@ -3285,6 +3297,8 @@ static int io_fadvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+ {
+       if (sqe->ioprio || sqe->buf_index || sqe->addr)
+               return -EINVAL;
++      if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
++              return -EINVAL;
+ 
+       req->fadvise.offset = READ_ONCE(sqe->off);
+       req->fadvise.len = READ_ONCE(sqe->len);
+@@ -3322,6 +3336,8 @@ static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+       unsigned lookup_flags;
+       int ret;
+ 
++      if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
++              return -EINVAL;
+       if (sqe->ioprio || sqe->buf_index)
+               return -EINVAL;
+       if (req->flags & REQ_F_FIXED_FILE)
+@@ -3402,6 +3418,8 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+        */
+       req->work.flags |= IO_WQ_WORK_NO_CANCEL;
+ 
++      if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
++              return -EINVAL;
+       if (sqe->ioprio || sqe->off || sqe->addr || sqe->len ||
+           sqe->rw_flags || sqe->buf_index)
+               return -EINVAL;
+-- 
+2.25.1
+
diff --git a/queue-5.7/kgdb-avoid-suspicious-rcu-usage-warning.patch b/queue-5.7/kgdb-avoid-suspicious-rcu-usage-warning.patch

new file mode 100644 (file)

index 0000000..1c8932f
--- /dev/null
+++ b/queue-5.7/kgdb-avoid-suspicious-rcu-usage-warning.patch
@@ -0,0 +1,109 @@
+From 85cb1d24dc2d267f0a1d12045fcc9c39b4931703 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 2 Jun 2020 15:47:39 -0700
+Subject: kgdb: Avoid suspicious RCU usage warning
+
+From: Douglas Anderson <dianders@chromium.org>
+
+[ Upstream commit 440ab9e10e2e6e5fd677473ee6f9e3af0f6904d6 ]
+
+At times when I'm using kgdb I see a splat on my console about
+suspicious RCU usage.  I managed to come up with a case that could
+reproduce this that looked like this:
+
+  WARNING: suspicious RCU usage
+  5.7.0-rc4+ #609 Not tainted
+  -----------------------------
+  kernel/pid.c:395 find_task_by_pid_ns() needs rcu_read_lock() protection!
+
+  other info that might help us debug this:
+
+    rcu_scheduler_active = 2, debug_locks = 1
+  3 locks held by swapper/0/1:
+   #0: ffffff81b6b8e988 (&dev->mutex){....}-{3:3}, at: __device_attach+0x40/0x13c
+   #1: ffffffd01109e9e8 (dbg_master_lock){....}-{2:2}, at: kgdb_cpu_enter+0x20c/0x7ac
+   #2: ffffffd01109ea90 (dbg_slave_lock){....}-{2:2}, at: kgdb_cpu_enter+0x3ec/0x7ac
+
+  stack backtrace:
+  CPU: 7 PID: 1 Comm: swapper/0 Not tainted 5.7.0-rc4+ #609
+  Hardware name: Google Cheza (rev3+) (DT)
+  Call trace:
+   dump_backtrace+0x0/0x1b8
+   show_stack+0x1c/0x24
+   dump_stack+0xd4/0x134
+   lockdep_rcu_suspicious+0xf0/0x100
+   find_task_by_pid_ns+0x5c/0x80
+   getthread+0x8c/0xb0
+   gdb_serial_stub+0x9d4/0xd04
+   kgdb_cpu_enter+0x284/0x7ac
+   kgdb_handle_exception+0x174/0x20c
+   kgdb_brk_fn+0x24/0x30
+   call_break_hook+0x6c/0x7c
+   brk_handler+0x20/0x5c
+   do_debug_exception+0x1c8/0x22c
+   el1_sync_handler+0x3c/0xe4
+   el1_sync+0x7c/0x100
+   rpmh_rsc_probe+0x38/0x420
+   platform_drv_probe+0x94/0xb4
+   really_probe+0x134/0x300
+   driver_probe_device+0x68/0x100
+   __device_attach_driver+0x90/0xa8
+   bus_for_each_drv+0x84/0xcc
+   __device_attach+0xb4/0x13c
+   device_initial_probe+0x18/0x20
+   bus_probe_device+0x38/0x98
+   device_add+0x38c/0x420
+
+If I understand properly we should just be able to blanket kgdb under
+one big RCU read lock and the problem should go away.  We'll add it to
+the beast-of-a-function known as kgdb_cpu_enter().
+
+With this I no longer get any splats and things seem to work fine.
+
+Signed-off-by: Douglas Anderson <dianders@chromium.org>
+Link: https://lore.kernel.org/r/20200602154729.v2.1.I70e0d4fd46d5ed2aaf0c98a355e8e1b7a5bb7e4e@changeid
+Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/debug/debug_core.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
+index d47c7d6656cd3..9be6accf8fe3d 100644
+--- a/kernel/debug/debug_core.c
++++ b/kernel/debug/debug_core.c
+@@ -577,6 +577,7 @@ static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs,
+               arch_kgdb_ops.disable_hw_break(regs);
+ 
+ acquirelock:
++      rcu_read_lock();
+       /*
+        * Interrupts will be restored by the 'trap return' code, except when
+        * single stepping.
+@@ -636,6 +637,7 @@ return_normal:
+                       atomic_dec(&slaves_in_kgdb);
+                       dbg_touch_watchdogs();
+                       local_irq_restore(flags);
++                      rcu_read_unlock();
+                       return 0;
+               }
+               cpu_relax();
+@@ -654,6 +656,7 @@ return_normal:
+               raw_spin_unlock(&dbg_master_lock);
+               dbg_touch_watchdogs();
+               local_irq_restore(flags);
++              rcu_read_unlock();
+ 
+               goto acquirelock;
+       }
+@@ -777,6 +780,7 @@ kgdb_restore:
+       raw_spin_unlock(&dbg_master_lock);
+       dbg_touch_watchdogs();
+       local_irq_restore(flags);
++      rcu_read_unlock();
+ 
+       return kgdb_info[cpu].ret_state;
+ }
+-- 
+2.25.1
+
diff --git a/queue-5.7/mm-dump_page-do-not-crash-with-invalid-mapping-point.patch b/queue-5.7/mm-dump_page-do-not-crash-with-invalid-mapping-point.patch

new file mode 100644 (file)

index 0000000..5a26952
--- /dev/null
+++ b/queue-5.7/mm-dump_page-do-not-crash-with-invalid-mapping-point.patch
@@ -0,0 +1,168 @@
+From a5f36522bacc5ad37ab1f9e7f5261a922a922ffb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 1 Jun 2020 21:46:03 -0700
+Subject: mm, dump_page(): do not crash with invalid mapping pointer
+
+From: Vlastimil Babka <vbabka@suse.cz>
+
+[ Upstream commit 002ae7057069538aa3afd500f6f60a429cb948b2 ]
+
+We have seen a following problem on a RPi4 with 1G RAM:
+
+    BUG: Bad page state in process systemd-hwdb  pfn:35601
+    page:ffff7e0000d58040 refcount:15 mapcount:131221 mapping:efd8fe765bc80080 index:0x1 compound_mapcount: -32767
+    Unable to handle kernel paging request at virtual address efd8fe765bc80080
+    Mem abort info:
+      ESR = 0x96000004
+      Exception class = DABT (current EL), IL = 32 bits
+      SET = 0, FnV = 0
+      EA = 0, S1PTW = 0
+    Data abort info:
+      ISV = 0, ISS = 0x00000004
+      CM = 0, WnR = 0
+    [efd8fe765bc80080] address between user and kernel address ranges
+    Internal error: Oops: 96000004 [#1] SMP
+    Modules linked in: btrfs libcrc32c xor xor_neon zlib_deflate raid6_pq mmc_block xhci_pci xhci_hcd usbcore sdhci_iproc sdhci_pltfm sdhci mmc_core clk_raspberrypi gpio_raspberrypi_exp pcie_brcmstb bcm2835_dma gpio_regulator phy_generic fixed sg scsi_mod efivarfs
+    Supported: No, Unreleased kernel
+    CPU: 3 PID: 408 Comm: systemd-hwdb Not tainted 5.3.18-8-default #1 SLE15-SP2 (unreleased)
+    Hardware name: raspberrypi rpi/rpi, BIOS 2020.01 02/21/2020
+    pstate: 40000085 (nZcv daIf -PAN -UAO)
+    pc : __dump_page+0x268/0x368
+    lr : __dump_page+0xc4/0x368
+    sp : ffff000012563860
+    x29: ffff000012563860 x28: ffff80003ddc4300
+    x27: 0000000000000010 x26: 000000000000003f
+    x25: ffff7e0000d58040 x24: 000000000000000f
+    x23: efd8fe765bc80080 x22: 0000000000020095
+    x21: efd8fe765bc80080 x20: ffff000010ede8b0
+    x19: ffff7e0000d58040 x18: ffffffffffffffff
+    x17: 0000000000000001 x16: 0000000000000007
+    x15: ffff000011689708 x14: 3030386362353637
+    x13: 6566386466653a67 x12: 6e697070616d2031
+    x11: 32323133313a746e x10: 756f6370616d2035
+    x9 : ffff00001168a840 x8 : ffff00001077a670
+    x7 : 000000000000013d x6 : ffff0000118a43b5
+    x5 : 0000000000000001 x4 : ffff80003dd9e2c8
+    x3 : ffff80003dd9e2c8 x2 : 911c8d7c2f483500
+    x1 : dead000000000100 x0 : efd8fe765bc80080
+    Call trace:
+     __dump_page+0x268/0x368
+     bad_page+0xd4/0x168
+     check_new_page_bad+0x80/0xb8
+     rmqueue_bulk.constprop.26+0x4d8/0x788
+     get_page_from_freelist+0x4d4/0x1228
+     __alloc_pages_nodemask+0x134/0xe48
+     alloc_pages_vma+0x198/0x1c0
+     do_anonymous_page+0x1a4/0x4d8
+     __handle_mm_fault+0x4e8/0x560
+     handle_mm_fault+0x104/0x1e0
+     do_page_fault+0x1e8/0x4c0
+     do_translation_fault+0xb0/0xc0
+     do_mem_abort+0x50/0xb0
+     el0_da+0x24/0x28
+    Code: f9401025 8b8018a0 9a851005 17ffffca (f94002a0)
+
+Besides the underlying issue with page->mapping containing a bogus value
+for some reason, we can see that __dump_page() crashed by trying to read
+the pointer at mapping->host, turning a recoverable warning into full
+Oops.
+
+It can be expected that when page is reported as bad state for some
+reason, the pointers there should not be trusted blindly.
+
+So this patch treats all data in __dump_page() that depends on
+page->mapping as lava, using probe_kernel_read_strict().  Ideally this
+would include the dentry->d_parent recursively, but that would mean
+changing printk handler for %pd.  Chances of reaching the dentry
+printing part with an initially bogus mapping pointer should be rather
+low, though.
+
+Also prefix printing mapping->a_ops with a description of what is being
+printed.  In case the value is bogus, %ps will print raw value instead
+of the symbol name and then it's not obvious at all that it's printing
+a_ops.
+
+Reported-by: Petr Tesarik <ptesarik@suse.cz>
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: John Hubbard <jhubbard@nvidia.com>
+Link: http://lkml.kernel.org/r/20200331165454.12263-1-vbabka@suse.cz
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/debug.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++------
+ 1 file changed, 50 insertions(+), 6 deletions(-)
+
+diff --git a/mm/debug.c b/mm/debug.c
+index 2189357f09871..f2ede2df585a9 100644
+--- a/mm/debug.c
++++ b/mm/debug.c
+@@ -110,13 +110,57 @@ void __dump_page(struct page *page, const char *reason)
+       else if (PageAnon(page))
+               type = "anon ";
+       else if (mapping) {
+-              if (mapping->host && mapping->host->i_dentry.first) {
+-                      struct dentry *dentry;
+-                      dentry = container_of(mapping->host->i_dentry.first, struct dentry, d_u.d_alias);
+-                      pr_warn("%ps name:\"%pd\"\n", mapping->a_ops, dentry);
+-              } else
+-                      pr_warn("%ps\n", mapping->a_ops);
++              const struct inode *host;
++              const struct address_space_operations *a_ops;
++              const struct hlist_node *dentry_first;
++              const struct dentry *dentry_ptr;
++              struct dentry dentry;
++
++              /*
++               * mapping can be invalid pointer and we don't want to crash
++               * accessing it, so probe everything depending on it carefully
++               */
++              if (probe_kernel_read_strict(&host, &mapping->host,
++                                              sizeof(struct inode *)) ||
++                  probe_kernel_read_strict(&a_ops, &mapping->a_ops,
++                              sizeof(struct address_space_operations *))) {
++                      pr_warn("failed to read mapping->host or a_ops, mapping not a valid kernel address?\n");
++                      goto out_mapping;
++              }
++
++              if (!host) {
++                      pr_warn("mapping->a_ops:%ps\n", a_ops);
++                      goto out_mapping;
++              }
++
++              if (probe_kernel_read_strict(&dentry_first,
++                      &host->i_dentry.first, sizeof(struct hlist_node *))) {
++                      pr_warn("mapping->a_ops:%ps with invalid mapping->host inode address %px\n",
++                              a_ops, host);
++                      goto out_mapping;
++              }
++
++              if (!dentry_first) {
++                      pr_warn("mapping->a_ops:%ps\n", a_ops);
++                      goto out_mapping;
++              }
++
++              dentry_ptr = container_of(dentry_first, struct dentry, d_u.d_alias);
++              if (probe_kernel_read_strict(&dentry, dentry_ptr,
++                                                      sizeof(struct dentry))) {
++                      pr_warn("mapping->aops:%ps with invalid mapping->host->i_dentry.first %px\n",
++                              a_ops, dentry_ptr);
++              } else {
++                      /*
++                       * if dentry is corrupted, the %pd handler may still
++                       * crash, but it's unlikely that we reach here with a
++                       * corrupted struct page
++                       */
++                      pr_warn("mapping->aops:%ps dentry name:\"%pd\"\n",
++                                                              a_ops, &dentry);
++              }
+       }
++out_mapping:
+       BUILD_BUG_ON(ARRAY_SIZE(pageflag_names) != __NR_PAGEFLAGS + 1);
+ 
+       pr_warn("%sflags: %#lx(%pGp)%s\n", type, page->flags, &page->flags,
+-- 
+2.25.1
+
diff --git a/queue-5.7/mm-slub-fix-stack-overruns-with-slub_stats.patch b/queue-5.7/mm-slub-fix-stack-overruns-with-slub_stats.patch

new file mode 100644 (file)

index 0000000..859c4ef
--- /dev/null
+++ b/queue-5.7/mm-slub-fix-stack-overruns-with-slub_stats.patch
@@ -0,0 +1,90 @@
+From 1e0dc7359392d6722e92a9b7f6ceb6ba715a3f58 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 1 Jun 2020 21:45:57 -0700
+Subject: mm/slub: fix stack overruns with SLUB_STATS
+
+From: Qian Cai <cai@lca.pw>
+
+[ Upstream commit a68ee0573991e90af2f1785db309206408bad3e5 ]
+
+There is no need to copy SLUB_STATS items from root memcg cache to new
+memcg cache copies.  Doing so could result in stack overruns because the
+store function only accepts 0 to clear the stat and returns an error for
+everything else while the show method would print out the whole stat.
+
+Then, the mismatch of the lengths returns from show and store methods
+happens in memcg_propagate_slab_attrs():
+
+       else if (root_cache->max_attr_size < ARRAY_SIZE(mbuf))
+               buf = mbuf;
+
+max_attr_size is only 2 from slab_attr_store(), then, it uses mbuf[64]
+in show_stat() later where a bounch of sprintf() would overrun the stack
+variable.  Fix it by always allocating a page of buffer to be used in
+show_stat() if SLUB_STATS=y which should only be used for debug purpose.
+
+  # echo 1 > /sys/kernel/slab/fs_cache/shrink
+  BUG: KASAN: stack-out-of-bounds in number+0x421/0x6e0
+  Write of size 1 at addr ffffc900256cfde0 by task kworker/76:0/53251
+
+  Hardware name: HPE ProLiant DL385 Gen10/ProLiant DL385 Gen10, BIOS A40 07/10/2019
+  Workqueue: memcg_kmem_cache memcg_kmem_cache_create_func
+  Call Trace:
+    number+0x421/0x6e0
+    vsnprintf+0x451/0x8e0
+    sprintf+0x9e/0xd0
+    show_stat+0x124/0x1d0
+    alloc_slowpath_show+0x13/0x20
+    __kmem_cache_create+0x47a/0x6b0
+
+  addr ffffc900256cfde0 is located in stack of task kworker/76:0/53251 at offset 0 in frame:
+   process_one_work+0x0/0xb90
+
+  this frame has 1 object:
+   [32, 72) 'lockdep_map'
+
+  Memory state around the buggy address:
+   ffffc900256cfc80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+   ffffc900256cfd00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+  >ffffc900256cfd80: 00 00 00 00 00 00 00 00 00 00 00 00 f1 f1 f1 f1
+                                                         ^
+   ffffc900256cfe00: 00 00 00 00 00 f2 f2 f2 00 00 00 00 00 00 00 00
+   ffffc900256cfe80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+  ==================================================================
+  Kernel panic - not syncing: stack-protector: Kernel stack is corrupted in: __kmem_cache_create+0x6ac/0x6b0
+  Workqueue: memcg_kmem_cache memcg_kmem_cache_create_func
+  Call Trace:
+    __kmem_cache_create+0x6ac/0x6b0
+
+Fixes: 107dab5c92d5 ("slub: slub-specific propagation changes")
+Signed-off-by: Qian Cai <cai@lca.pw>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Cc: Glauber Costa <glauber@scylladb.com>
+Cc: Christoph Lameter <cl@linux.com>
+Cc: Pekka Enberg <penberg@kernel.org>
+Cc: David Rientjes <rientjes@google.com>
+Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Link: http://lkml.kernel.org/r/20200429222356.4322-1-cai@lca.pw
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/slub.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/mm/slub.c b/mm/slub.c
+index 63f372366ec59..660f4324c0972 100644
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -5681,7 +5681,8 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s)
+                */
+               if (buffer)
+                       buf = buffer;
+-              else if (root_cache->max_attr_size < ARRAY_SIZE(mbuf))
++              else if (root_cache->max_attr_size < ARRAY_SIZE(mbuf) &&
++                       !IS_ENABLED(CONFIG_SLUB_STATS))
+                       buf = mbuf;
+               else {
+                       buffer = (char *) get_zeroed_page(GFP_KERNEL);
+-- 
+2.25.1
+
diff --git a/queue-5.7/mm-slub.c-fix-corrupted-freechain-in-deactivate_slab.patch b/queue-5.7/mm-slub.c-fix-corrupted-freechain-in-deactivate_slab.patch

new file mode 100644 (file)

index 0000000..e50f0f2
--- /dev/null
+++ b/queue-5.7/mm-slub.c-fix-corrupted-freechain-in-deactivate_slab.patch
@@ -0,0 +1,115 @@
+From 8e1c4be9a4c25e4bbeda3ed8146345d3314fa410 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 1 Jun 2020 21:45:47 -0700
+Subject: mm/slub.c: fix corrupted freechain in deactivate_slab()
+
+From: Dongli Zhang <dongli.zhang@oracle.com>
+
+[ Upstream commit 52f23478081ae0dcdb95d1650ea1e7d52d586829 ]
+
+The slub_debug is able to fix the corrupted slab freelist/page.
+However, alloc_debug_processing() only checks the validity of current
+and next freepointer during allocation path.  As a result, once some
+objects have their freepointers corrupted, deactivate_slab() may lead to
+page fault.
+
+Below is from a test kernel module when 'slub_debug=PUF,kmalloc-128
+slub_nomerge'.  The test kernel corrupts the freepointer of one free
+object on purpose.  Unfortunately, deactivate_slab() does not detect it
+when iterating the freechain.
+
+  BUG: unable to handle page fault for address: 00000000123456f8
+  #PF: supervisor read access in kernel mode
+  #PF: error_code(0x0000) - not-present page
+  PGD 0 P4D 0
+  Oops: 0000 [#1] SMP PTI
+  ... ...
+  RIP: 0010:deactivate_slab.isra.92+0xed/0x490
+  ... ...
+  Call Trace:
+   ___slab_alloc+0x536/0x570
+   __slab_alloc+0x17/0x30
+   __kmalloc+0x1d9/0x200
+   ext4_htree_store_dirent+0x30/0xf0
+   htree_dirblock_to_tree+0xcb/0x1c0
+   ext4_htree_fill_tree+0x1bc/0x2d0
+   ext4_readdir+0x54f/0x920
+   iterate_dir+0x88/0x190
+   __x64_sys_getdents+0xa6/0x140
+   do_syscall_64+0x49/0x170
+   entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+Therefore, this patch adds extra consistency check in deactivate_slab().
+Once an object's freepointer is corrupted, all following objects
+starting at this object are isolated.
+
+[akpm@linux-foundation.org: fix build with CONFIG_SLAB_DEBUG=n]
+Signed-off-by: Dongli Zhang <dongli.zhang@oracle.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Cc: Joe Jin <joe.jin@oracle.com>
+Cc: Christoph Lameter <cl@linux.com>
+Cc: Pekka Enberg <penberg@kernel.org>
+Cc: David Rientjes <rientjes@google.com>
+Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Link: http://lkml.kernel.org/r/20200331031450.12182-1-dongli.zhang@oracle.com
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/slub.c | 27 +++++++++++++++++++++++++++
+ 1 file changed, 27 insertions(+)
+
+diff --git a/mm/slub.c b/mm/slub.c
+index 63bd39c476431..63f372366ec59 100644
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -679,6 +679,20 @@ static void slab_fix(struct kmem_cache *s, char *fmt, ...)
+       va_end(args);
+ }
+ 
++static bool freelist_corrupted(struct kmem_cache *s, struct page *page,
++                             void *freelist, void *nextfree)
++{
++      if ((s->flags & SLAB_CONSISTENCY_CHECKS) &&
++          !check_valid_pointer(s, page, nextfree)) {
++              object_err(s, page, freelist, "Freechain corrupt");
++              freelist = NULL;
++              slab_fix(s, "Isolate corrupted freechain");
++              return true;
++      }
++
++      return false;
++}
++
+ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
+ {
+       unsigned int off;       /* Offset of last byte */
+@@ -1410,6 +1424,11 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node,
+ static inline void dec_slabs_node(struct kmem_cache *s, int node,
+                                                       int objects) {}
+ 
++static bool freelist_corrupted(struct kmem_cache *s, struct page *page,
++                             void *freelist, void *nextfree)
++{
++      return false;
++}
+ #endif /* CONFIG_SLUB_DEBUG */
+ 
+ /*
+@@ -2093,6 +2112,14 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
+               void *prior;
+               unsigned long counters;
+ 
++              /*
++               * If 'nextfree' is invalid, it is possible that the object at
++               * 'freelist' is already corrupted.  So isolate all objects
++               * starting at 'freelist'.
++               */
++              if (freelist_corrupted(s, page, freelist, nextfree))
++                      break;
++
+               do {
+                       prior = page->freelist;
+                       counters = page->counters;
+-- 
+2.25.1
+
diff --git a/queue-5.7/nvme-fix-possible-deadlock-when-i-o-is-blocked.patch b/queue-5.7/nvme-fix-possible-deadlock-when-i-o-is-blocked.patch

new file mode 100644 (file)

index 0000000..1ed4554
--- /dev/null
+++ b/queue-5.7/nvme-fix-possible-deadlock-when-i-o-is-blocked.patch
@@ -0,0 +1,124 @@
+From 887bad9523741466ddcf7a4f4a7235d0d08437d4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 24 Jun 2020 01:53:08 -0700
+Subject: nvme: fix possible deadlock when I/O is blocked
+
+From: Sagi Grimberg <sagi@grimberg.me>
+
+[ Upstream commit 3b4b19721ec652ad2c4fe51dfbe5124212b5f581 ]
+
+Revert fab7772bfbcf ("nvme-multipath: revalidate nvme_ns_head gendisk
+in nvme_validate_ns")
+
+When adding a new namespace to the head disk (via nvme_mpath_set_live)
+we will see partition scan which triggers I/O on the mpath device node.
+This process will usually be triggered from the scan_work which holds
+the scan_lock. If I/O blocks (if we got ana change currently have only
+available paths but none are accessible) this can deadlock on the head
+disk bd_mutex as both partition scan I/O takes it, and head disk revalidation
+takes it to check for resize (also triggered from scan_work on a different
+path). See trace [1].
+
+The mpath disk revalidation was originally added to detect online disk
+size change, but this is no longer needed since commit cb224c3af4df
+("nvme: Convert to use set_capacity_revalidate_and_notify") which already
+updates resize info without unnecessarily revalidating the disk (the
+mpath disk doesn't even implement .revalidate_disk fop).
+
+[1]:
+--
+kernel: INFO: task kworker/u65:9:494 blocked for more than 241 seconds.
+kernel:       Tainted: G           OE     5.3.5-050305-generic #201910071830
+kernel: "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+kernel: kworker/u65:9   D    0   494      2 0x80004000
+kernel: Workqueue: nvme-wq nvme_scan_work [nvme_core]
+kernel: Call Trace:
+kernel:  __schedule+0x2b9/0x6c0
+kernel:  schedule+0x42/0xb0
+kernel:  schedule_preempt_disabled+0xe/0x10
+kernel:  __mutex_lock.isra.0+0x182/0x4f0
+kernel:  __mutex_lock_slowpath+0x13/0x20
+kernel:  mutex_lock+0x2e/0x40
+kernel:  revalidate_disk+0x63/0xa0
+kernel:  __nvme_revalidate_disk+0xfe/0x110 [nvme_core]
+kernel:  nvme_revalidate_disk+0xa4/0x160 [nvme_core]
+kernel:  ? evict+0x14c/0x1b0
+kernel:  revalidate_disk+0x2b/0xa0
+kernel:  nvme_validate_ns+0x49/0x940 [nvme_core]
+kernel:  ? blk_mq_free_request+0xd2/0x100
+kernel:  ? __nvme_submit_sync_cmd+0xbe/0x1e0 [nvme_core]
+kernel:  nvme_scan_work+0x24f/0x380 [nvme_core]
+kernel:  process_one_work+0x1db/0x380
+kernel:  worker_thread+0x249/0x400
+kernel:  kthread+0x104/0x140
+kernel:  ? process_one_work+0x380/0x380
+kernel:  ? kthread_park+0x80/0x80
+kernel:  ret_from_fork+0x1f/0x40
+...
+kernel: INFO: task kworker/u65:1:2630 blocked for more than 241 seconds.
+kernel:       Tainted: G           OE     5.3.5-050305-generic #201910071830
+kernel: "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+kernel: kworker/u65:1   D    0  2630      2 0x80004000
+kernel: Workqueue: nvme-wq nvme_scan_work [nvme_core]
+kernel: Call Trace:
+kernel:  __schedule+0x2b9/0x6c0
+kernel:  schedule+0x42/0xb0
+kernel:  io_schedule+0x16/0x40
+kernel:  do_read_cache_page+0x438/0x830
+kernel:  ? __switch_to_asm+0x34/0x70
+kernel:  ? file_fdatawait_range+0x30/0x30
+kernel:  read_cache_page+0x12/0x20
+kernel:  read_dev_sector+0x27/0xc0
+kernel:  read_lba+0xc1/0x220
+kernel:  ? kmem_cache_alloc_trace+0x19c/0x230
+kernel:  efi_partition+0x1e6/0x708
+kernel:  ? vsnprintf+0x39e/0x4e0
+kernel:  ? snprintf+0x49/0x60
+kernel:  check_partition+0x154/0x244
+kernel:  rescan_partitions+0xae/0x280
+kernel:  __blkdev_get+0x40f/0x560
+kernel:  blkdev_get+0x3d/0x140
+kernel:  __device_add_disk+0x388/0x480
+kernel:  device_add_disk+0x13/0x20
+kernel:  nvme_mpath_set_live+0x119/0x140 [nvme_core]
+kernel:  nvme_update_ns_ana_state+0x5c/0x60 [nvme_core]
+kernel:  nvme_set_ns_ana_state+0x1e/0x30 [nvme_core]
+kernel:  nvme_parse_ana_log+0xa1/0x180 [nvme_core]
+kernel:  ? nvme_update_ns_ana_state+0x60/0x60 [nvme_core]
+kernel:  nvme_mpath_add_disk+0x47/0x90 [nvme_core]
+kernel:  nvme_validate_ns+0x396/0x940 [nvme_core]
+kernel:  ? blk_mq_free_request+0xd2/0x100
+kernel:  nvme_scan_work+0x24f/0x380 [nvme_core]
+kernel:  process_one_work+0x1db/0x380
+kernel:  worker_thread+0x249/0x400
+kernel:  kthread+0x104/0x140
+kernel:  ? process_one_work+0x380/0x380
+kernel:  ? kthread_park+0x80/0x80
+kernel:  ret_from_fork+0x1f/0x40
+--
+
+Fixes: fab7772bfbcf ("nvme-multipath: revalidate nvme_ns_head gendisk
+in nvme_validate_ns")
+Signed-off-by: Anton Eidelman <anton@lightbitslabs.com>
+Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nvme/host/core.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
+index 887139f8fa53b..85ce6c682849e 100644
+--- a/drivers/nvme/host/core.c
++++ b/drivers/nvme/host/core.c
+@@ -1910,7 +1910,6 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
+       if (ns->head->disk) {
+               nvme_update_disk_info(ns->head->disk, ns, id);
+               blk_queue_stack_limits(ns->head->disk->queue, ns->queue);
+-              revalidate_disk(ns->head->disk);
+       }
+ #endif
+ }
+-- 
+2.25.1
+
diff --git a/queue-5.7/nvme-multipath-fix-bogus-request-queue-reference-put.patch b/queue-5.7/nvme-multipath-fix-bogus-request-queue-reference-put.patch

new file mode 100644 (file)

index 0000000..c855d3c
--- /dev/null
+++ b/queue-5.7/nvme-multipath-fix-bogus-request-queue-reference-put.patch
@@ -0,0 +1,84 @@
+From 7a92228148b921abe3ffd8966dfd49413aafea67 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 24 Jun 2020 01:53:12 -0700
+Subject: nvme-multipath: fix bogus request queue reference put
+
+From: Sagi Grimberg <sagi@grimberg.me>
+
+[ Upstream commit c31244669f57963b6ce133a5555b118fc50aec95 ]
+
+The mpath disk node takes a reference on the request mpath
+request queue when adding live path to the mpath gendisk.
+However if we connected to an inaccessible path device_add_disk
+is not called, so if we disconnect and remove the mpath gendisk
+we endup putting an reference on the request queue that was
+never taken [1].
+
+Fix that to check if we ever added a live path (using
+NVME_NS_HEAD_HAS_DISK flag) and if not, clear the disk->queue
+reference.
+
+[1]:
+------------[ cut here ]------------
+refcount_t: underflow; use-after-free.
+WARNING: CPU: 1 PID: 1372 at lib/refcount.c:28 refcount_warn_saturate+0xa6/0xf0
+CPU: 1 PID: 1372 Comm: nvme Tainted: G           O      5.7.0-rc2+ #3
+Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.13.0-1ubuntu1 04/01/2014
+RIP: 0010:refcount_warn_saturate+0xa6/0xf0
+RSP: 0018:ffffb29e8053bdc0 EFLAGS: 00010282
+RAX: 0000000000000000 RBX: ffff8b7a2f4fc060 RCX: 0000000000000007
+RDX: 0000000000000007 RSI: 0000000000000092 RDI: ffff8b7a3ec99980
+RBP: ffff8b7a2f4fc000 R08: 00000000000002e1 R09: 0000000000000004
+R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000000
+R13: fffffffffffffff2 R14: ffffb29e8053bf08 R15: ffff8b7a320e2da0
+FS:  00007f135d4ca800(0000) GS:ffff8b7a3ec80000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00005651178c0c30 CR3: 000000003b650005 CR4: 0000000000360ee0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+ disk_release+0xa2/0xc0
+ device_release+0x28/0x80
+ kobject_put+0xa5/0x1b0
+ nvme_put_ns_head+0x26/0x70 [nvme_core]
+ nvme_put_ns+0x30/0x60 [nvme_core]
+ nvme_remove_namespaces+0x9b/0xe0 [nvme_core]
+ nvme_do_delete_ctrl+0x43/0x5c [nvme_core]
+ nvme_sysfs_delete.cold+0x8/0xd [nvme_core]
+ kernfs_fop_write+0xc1/0x1a0
+ vfs_write+0xb6/0x1a0
+ ksys_write+0x5f/0xe0
+ do_syscall_64+0x52/0x1a0
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+Reported-by: Anton Eidelman <anton@lightbitslabs.com>
+Tested-by: Anton Eidelman <anton@lightbitslabs.com>
+Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nvme/host/multipath.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
+index d1cb65698288b..03bc3aba09871 100644
+--- a/drivers/nvme/host/multipath.c
++++ b/drivers/nvme/host/multipath.c
+@@ -691,6 +691,14 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
+       kblockd_schedule_work(&head->requeue_work);
+       flush_work(&head->requeue_work);
+       blk_cleanup_queue(head->disk->queue);
++      if (!test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
++              /*
++               * if device_add_disk wasn't called, prevent
++               * disk release to put a bogus reference on the
++               * request queue
++               */
++              head->disk->queue = NULL;
++      }
+       put_disk(head->disk);
+ }
+ 
+-- 
+2.25.1
+
diff --git a/queue-5.7/nvme-multipath-fix-deadlock-between-ana_work-and-sca.patch b/queue-5.7/nvme-multipath-fix-deadlock-between-ana_work-and-sca.patch

new file mode 100644 (file)

index 0000000..5a970f1
--- /dev/null
+++ b/queue-5.7/nvme-multipath-fix-deadlock-between-ana_work-and-sca.patch
@@ -0,0 +1,134 @@
+From a78bc3d6bec05bec167b99ed2fdf2c05e57c98fa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 24 Jun 2020 01:53:09 -0700
+Subject: nvme-multipath: fix deadlock between ana_work and scan_work
+
+From: Anton Eidelman <anton@lightbitslabs.com>
+
+[ Upstream commit 489dd102a2c7c94d783a35f9412eb085b8da1aa4 ]
+
+When scan_work calls nvme_mpath_add_disk() this holds ana_lock
+and invokes nvme_parse_ana_log(), which may issue IO
+in device_add_disk() and hang waiting for an accessible path.
+While nvme_mpath_set_live() only called when nvme_state_is_live(),
+a transition may cause NVME_SC_ANA_TRANSITION and requeue the IO.
+
+In order to recover and complete the IO ana_work on the same ctrl
+should be able to update the path state and remove NVME_NS_ANA_PENDING.
+
+The deadlock occurs because scan_work keeps holding ana_lock,
+so ana_work hangs [1].
+
+Fix:
+Now nvme_mpath_add_disk() uses nvme_parse_ana_log() to obtain a copy
+of the ANA group desc, and then calls nvme_update_ns_ana_state() without
+holding ana_lock.
+
+[1]:
+kernel: Workqueue: nvme-wq nvme_scan_work [nvme_core]
+kernel: Call Trace:
+kernel:  __schedule+0x2b9/0x6c0
+kernel:  schedule+0x42/0xb0
+kernel:  io_schedule+0x16/0x40
+kernel:  do_read_cache_page+0x438/0x830
+kernel:  read_cache_page+0x12/0x20
+kernel:  read_dev_sector+0x27/0xc0
+kernel:  read_lba+0xc1/0x220
+kernel:  efi_partition+0x1e6/0x708
+kernel:  check_partition+0x154/0x244
+kernel:  rescan_partitions+0xae/0x280
+kernel:  __blkdev_get+0x40f/0x560
+kernel:  blkdev_get+0x3d/0x140
+kernel:  __device_add_disk+0x388/0x480
+kernel:  device_add_disk+0x13/0x20
+kernel:  nvme_mpath_set_live+0x119/0x140 [nvme_core]
+kernel:  nvme_update_ns_ana_state+0x5c/0x60 [nvme_core]
+kernel:  nvme_set_ns_ana_state+0x1e/0x30 [nvme_core]
+kernel:  nvme_parse_ana_log+0xa1/0x180 [nvme_core]
+kernel:  nvme_mpath_add_disk+0x47/0x90 [nvme_core]
+kernel:  nvme_validate_ns+0x396/0x940 [nvme_core]
+kernel:  nvme_scan_work+0x24f/0x380 [nvme_core]
+kernel:  process_one_work+0x1db/0x380
+kernel:  worker_thread+0x249/0x400
+kernel:  kthread+0x104/0x140
+
+kernel: Workqueue: nvme-wq nvme_ana_work [nvme_core]
+kernel: Call Trace:
+kernel:  __schedule+0x2b9/0x6c0
+kernel:  schedule+0x42/0xb0
+kernel:  schedule_preempt_disabled+0xe/0x10
+kernel:  __mutex_lock.isra.0+0x182/0x4f0
+kernel:  ? __switch_to_asm+0x34/0x70
+kernel:  ? select_task_rq_fair+0x1aa/0x5c0
+kernel:  ? kvm_sched_clock_read+0x11/0x20
+kernel:  ? sched_clock+0x9/0x10
+kernel:  __mutex_lock_slowpath+0x13/0x20
+kernel:  mutex_lock+0x2e/0x40
+kernel:  nvme_read_ana_log+0x3a/0x100 [nvme_core]
+kernel:  nvme_ana_work+0x15/0x20 [nvme_core]
+kernel:  process_one_work+0x1db/0x380
+kernel:  worker_thread+0x4d/0x400
+kernel:  kthread+0x104/0x140
+kernel:  ? process_one_work+0x380/0x380
+kernel:  ? kthread_park+0x80/0x80
+kernel:  ret_from_fork+0x35/0x40
+
+Fixes: 0d0b660f214d ("nvme: add ANA support")
+Signed-off-by: Anton Eidelman <anton@lightbitslabs.com>
+Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nvme/host/multipath.c | 24 ++++++++++++++++--------
+ 1 file changed, 16 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
+index 91a8b1ce5a3a2..f4287d8550a9f 100644
+--- a/drivers/nvme/host/multipath.c
++++ b/drivers/nvme/host/multipath.c
+@@ -639,26 +639,34 @@ static ssize_t ana_state_show(struct device *dev, struct device_attribute *attr,
+ }
+ DEVICE_ATTR_RO(ana_state);
+ 
+-static int nvme_set_ns_ana_state(struct nvme_ctrl *ctrl,
++static int nvme_lookup_ana_group_desc(struct nvme_ctrl *ctrl,
+               struct nvme_ana_group_desc *desc, void *data)
+ {
+-      struct nvme_ns *ns = data;
++      struct nvme_ana_group_desc *dst = data;
+ 
+-      if (ns->ana_grpid == le32_to_cpu(desc->grpid)) {
+-              nvme_update_ns_ana_state(desc, ns);
+-              return -ENXIO; /* just break out of the loop */
+-      }
++      if (desc->grpid != dst->grpid)
++              return 0;
+ 
+-      return 0;
++      *dst = *desc;
++      return -ENXIO; /* just break out of the loop */
+ }
+ 
+ void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id)
+ {
+       if (nvme_ctrl_use_ana(ns->ctrl)) {
++              struct nvme_ana_group_desc desc = {
++                      .grpid = id->anagrpid,
++                      .state = 0,
++              };
++
+               mutex_lock(&ns->ctrl->ana_lock);
+               ns->ana_grpid = le32_to_cpu(id->anagrpid);
+-              nvme_parse_ana_log(ns->ctrl, ns, nvme_set_ns_ana_state);
++              nvme_parse_ana_log(ns->ctrl, &desc, nvme_lookup_ana_group_desc);
+               mutex_unlock(&ns->ctrl->ana_lock);
++              if (desc.state) {
++                      /* found the group desc: update */
++                      nvme_update_ns_ana_state(&desc, ns);
++              }
+       } else {
+               ns->ana_state = NVME_ANA_OPTIMIZED; 
+               nvme_mpath_set_live(ns);
+-- 
+2.25.1
+
diff --git a/queue-5.7/nvme-multipath-fix-deadlock-due-to-head-lock.patch b/queue-5.7/nvme-multipath-fix-deadlock-due-to-head-lock.patch

new file mode 100644 (file)

index 0000000..42ae4b0
--- /dev/null
+++ b/queue-5.7/nvme-multipath-fix-deadlock-due-to-head-lock.patch
@@ -0,0 +1,124 @@
+From d10f0b086190b4e1bbd3e0ee93e02bd9235e3df1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 24 Jun 2020 01:53:11 -0700
+Subject: nvme-multipath: fix deadlock due to head->lock
+
+From: Anton Eidelman <anton@lightbitslabs.com>
+
+[ Upstream commit d8a22f85609fadb46ba699e0136cc3ebdeebff79 ]
+
+In the following scenario scan_work and ana_work will deadlock:
+
+When scan_work calls nvme_mpath_add_disk() this holds ana_lock
+and invokes nvme_parse_ana_log(), which may issue IO
+in device_add_disk() and hang waiting for an accessible path.
+
+While nvme_mpath_set_live() only called when nvme_state_is_live(),
+a transition may cause NVME_SC_ANA_TRANSITION and requeue the IO.
+
+Since nvme_mpath_set_live() holds ns->head->lock, an ana_work on
+ANY ctrl will not be able to complete nvme_mpath_set_live()
+on the same ns->head, which is required in order to update
+the new accessible path and remove NVME_NS_ANA_PENDING..
+Therefore IO never completes: deadlock [1].
+
+Fix:
+Move device_add_disk out of the head->lock and protect it with an
+atomic test_and_set for a new NVME_NS_HEAD_HAS_DISK bit.
+
+[1]:
+kernel: INFO: task kworker/u8:2:160 blocked for more than 120 seconds.
+kernel:       Tainted: G           OE     5.3.5-050305-generic #201910071830
+kernel: "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+kernel: kworker/u8:2    D    0   160      2 0x80004000
+kernel: Workqueue: nvme-wq nvme_ana_work [nvme_core]
+kernel: Call Trace:
+kernel:  __schedule+0x2b9/0x6c0
+kernel:  schedule+0x42/0xb0
+kernel:  schedule_preempt_disabled+0xe/0x10
+kernel:  __mutex_lock.isra.0+0x182/0x4f0
+kernel:  __mutex_lock_slowpath+0x13/0x20
+kernel:  mutex_lock+0x2e/0x40
+kernel:  nvme_update_ns_ana_state+0x22/0x60 [nvme_core]
+kernel:  nvme_update_ana_state+0xca/0xe0 [nvme_core]
+kernel:  nvme_parse_ana_log+0xa1/0x180 [nvme_core]
+kernel:  nvme_read_ana_log+0x76/0x100 [nvme_core]
+kernel:  nvme_ana_work+0x15/0x20 [nvme_core]
+kernel:  process_one_work+0x1db/0x380
+kernel:  worker_thread+0x4d/0x400
+kernel:  kthread+0x104/0x140
+kernel:  ret_from_fork+0x35/0x40
+kernel: INFO: task kworker/u8:4:439 blocked for more than 120 seconds.
+kernel:       Tainted: G           OE     5.3.5-050305-generic #201910071830
+kernel: "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+kernel: kworker/u8:4    D    0   439      2 0x80004000
+kernel: Workqueue: nvme-wq nvme_scan_work [nvme_core]
+kernel: Call Trace:
+kernel:  __schedule+0x2b9/0x6c0
+kernel:  schedule+0x42/0xb0
+kernel:  io_schedule+0x16/0x40
+kernel:  do_read_cache_page+0x438/0x830
+kernel:  read_cache_page+0x12/0x20
+kernel:  read_dev_sector+0x27/0xc0
+kernel:  read_lba+0xc1/0x220
+kernel:  efi_partition+0x1e6/0x708
+kernel:  check_partition+0x154/0x244
+kernel:  rescan_partitions+0xae/0x280
+kernel:  __blkdev_get+0x40f/0x560
+kernel:  blkdev_get+0x3d/0x140
+kernel:  __device_add_disk+0x388/0x480
+kernel:  device_add_disk+0x13/0x20
+kernel:  nvme_mpath_set_live+0x119/0x140 [nvme_core]
+kernel:  nvme_update_ns_ana_state+0x5c/0x60 [nvme_core]
+kernel:  nvme_mpath_add_disk+0xbe/0x100 [nvme_core]
+kernel:  nvme_validate_ns+0x396/0x940 [nvme_core]
+kernel:  nvme_scan_work+0x256/0x390 [nvme_core]
+kernel:  process_one_work+0x1db/0x380
+kernel:  worker_thread+0x4d/0x400
+kernel:  kthread+0x104/0x140
+kernel:  ret_from_fork+0x35/0x40
+
+Fixes: 0d0b660f214d ("nvme: add ANA support")
+Signed-off-by: Anton Eidelman <anton@lightbitslabs.com>
+Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nvme/host/multipath.c | 4 ++--
+ drivers/nvme/host/nvme.h      | 2 ++
+ 2 files changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
+index f4287d8550a9f..d1cb65698288b 100644
+--- a/drivers/nvme/host/multipath.c
++++ b/drivers/nvme/host/multipath.c
+@@ -413,11 +413,11 @@ static void nvme_mpath_set_live(struct nvme_ns *ns)
+       if (!head->disk)
+               return;
+ 
+-      mutex_lock(&head->lock);
+-      if (!(head->disk->flags & GENHD_FL_UP))
++      if (!test_and_set_bit(NVME_NSHEAD_DISK_LIVE, &head->flags))
+               device_add_disk(&head->subsys->dev, head->disk,
+                               nvme_ns_id_attr_groups);
+ 
++      mutex_lock(&head->lock);
+       if (nvme_path_is_optimized(ns)) {
+               int node, srcu_idx;
+ 
+diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
+index 2e04a36296d95..719342600be62 100644
+--- a/drivers/nvme/host/nvme.h
++++ b/drivers/nvme/host/nvme.h
+@@ -359,6 +359,8 @@ struct nvme_ns_head {
+       spinlock_t              requeue_lock;
+       struct work_struct      requeue_work;
+       struct mutex            lock;
++      unsigned long           flags;
++#define NVME_NSHEAD_DISK_LIVE 0
+       struct nvme_ns __rcu    *current_path[];
+ #endif
+ };
+-- 
+2.25.1
+
diff --git a/queue-5.7/nvme-multipath-set-bdi-capabilities-once.patch b/queue-5.7/nvme-multipath-set-bdi-capabilities-once.patch

new file mode 100644 (file)

index 0000000..939da10
--- /dev/null
+++ b/queue-5.7/nvme-multipath-set-bdi-capabilities-once.patch
@@ -0,0 +1,70 @@
+From 8e16bee27179da297be319641e3f3d39830b6fc0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 9 Apr 2020 09:09:04 -0700
+Subject: nvme-multipath: set bdi capabilities once
+
+From: Keith Busch <kbusch@kernel.org>
+
+[ Upstream commit b2ce4d90690bd29ce5b554e203cd03682dd59697 ]
+
+The queues' backing device info capabilities don't change with each
+namespace revalidation. Set it only when each path's request_queue
+is initially added to a multipath queue.
+
+Signed-off-by: Keith Busch <kbusch@kernel.org>
+Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nvme/host/core.c      | 7 -------
+ drivers/nvme/host/multipath.c | 8 ++++++++
+ 2 files changed, 8 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
+index 7b4cbe2c69541..887139f8fa53b 100644
+--- a/drivers/nvme/host/core.c
++++ b/drivers/nvme/host/core.c
+@@ -1910,13 +1910,6 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
+       if (ns->head->disk) {
+               nvme_update_disk_info(ns->head->disk, ns, id);
+               blk_queue_stack_limits(ns->head->disk->queue, ns->queue);
+-              if (bdi_cap_stable_pages_required(ns->queue->backing_dev_info)) {
+-                      struct backing_dev_info *info =
+-                              ns->head->disk->queue->backing_dev_info;
+-
+-                        info->capabilities |= BDI_CAP_STABLE_WRITES;
+-              }
+-
+               revalidate_disk(ns->head->disk);
+       }
+ #endif
+diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
+index 17f172cf456ad..91a8b1ce5a3a2 100644
+--- a/drivers/nvme/host/multipath.c
++++ b/drivers/nvme/host/multipath.c
+@@ -3,6 +3,7 @@
+  * Copyright (c) 2017-2018 Christoph Hellwig.
+  */
+ 
++#include <linux/backing-dev.h>
+ #include <linux/moduleparam.h>
+ #include <trace/events/block.h>
+ #include "nvme.h"
+@@ -662,6 +663,13 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id)
+               ns->ana_state = NVME_ANA_OPTIMIZED; 
+               nvme_mpath_set_live(ns);
+       }
++
++      if (bdi_cap_stable_pages_required(ns->queue->backing_dev_info)) {
++              struct backing_dev_info *info =
++                                      ns->head->disk->queue->backing_dev_info;
++
++              info->capabilities |= BDI_CAP_STABLE_WRITES;
++      }
+ }
+ 
+ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
+-- 
+2.25.1
+
diff --git a/queue-5.7/powerpc-book3s64-kvm-fix-secondary-page-table-walk-w.patch b/queue-5.7/powerpc-book3s64-kvm-fix-secondary-page-table-walk-w.patch

new file mode 100644 (file)

index 0000000..e14cd0c
--- /dev/null
+++ b/queue-5.7/powerpc-book3s64-kvm-fix-secondary-page-table-walk-w.patch
@@ -0,0 +1,129 @@
+From 715060350a8a13be53857cdbbc06ba460da8e4d2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 28 May 2020 13:34:56 +0530
+Subject: powerpc/book3s64/kvm: Fix secondary page table walk warning during
+ migration
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+
+[ Upstream commit bf8036a4098d1548cdccf9ed5c523ef4e83e3c68 ]
+
+This patch fixes the below warning reported during migration:
+
+  find_kvm_secondary_pte called with kvm mmu_lock not held
+  CPU: 23 PID: 5341 Comm: qemu-system-ppc Tainted: G        W         5.7.0-rc5-kvm-00211-g9ccf10d6d088 #432
+  NIP:  c008000000fe848c LR: c008000000fe8488 CTR: 0000000000000000
+  REGS: c000001e19f077e0 TRAP: 0700   Tainted: G        W          (5.7.0-rc5-kvm-00211-g9ccf10d6d088)
+  MSR:  9000000000029033 <SF,HV,EE,ME,IR,DR,RI,LE>  CR: 42222422  XER: 20040000
+  CFAR: c00000000012f5ac IRQMASK: 0
+  GPR00: c008000000fe8488 c000001e19f07a70 c008000000ffe200 0000000000000039
+  GPR04: 0000000000000001 c000001ffc8b4900 0000000000018840 0000000000000007
+  GPR08: 0000000000000003 0000000000000001 0000000000000007 0000000000000001
+  GPR12: 0000000000002000 c000001fff6d9400 000000011f884678 00007fff70b70000
+  GPR16: 00007fff7137cb90 00007fff7dcb4410 0000000000000001 0000000000000000
+  GPR20: 000000000ffe0000 0000000000000000 0000000000000001 0000000000000000
+  GPR24: 8000000000000000 0000000000000001 c000001e1f67e600 c000001e1fd82410
+  GPR28: 0000000000001000 c000001e2e410000 0000000000000fff 0000000000000ffe
+  NIP [c008000000fe848c] kvmppc_hv_get_dirty_log_radix+0x2e4/0x340 [kvm_hv]
+  LR [c008000000fe8488] kvmppc_hv_get_dirty_log_radix+0x2e0/0x340 [kvm_hv]
+  Call Trace:
+  [c000001e19f07a70] [c008000000fe8488] kvmppc_hv_get_dirty_log_radix+0x2e0/0x340 [kvm_hv] (unreliable)
+  [c000001e19f07b50] [c008000000fd42e4] kvm_vm_ioctl_get_dirty_log_hv+0x33c/0x3c0 [kvm_hv]
+  [c000001e19f07be0] [c008000000eea878] kvm_vm_ioctl_get_dirty_log+0x30/0x50 [kvm]
+  [c000001e19f07c00] [c008000000edc818] kvm_vm_ioctl+0x2b0/0xc00 [kvm]
+  [c000001e19f07d50] [c00000000046e148] ksys_ioctl+0xf8/0x150
+  [c000001e19f07da0] [c00000000046e1c8] sys_ioctl+0x28/0x80
+  [c000001e19f07dc0] [c00000000003652c] system_call_exception+0x16c/0x240
+  [c000001e19f07e20] [c00000000000d070] system_call_common+0xf0/0x278
+  Instruction dump:
+  7d3a512a 4200ffd0 7ffefb78 4bfffdc4 60000000 3c820000 e8848468 3c620000
+  e86384a8 38840010 4800673d e8410018 <0fe00000> 4bfffdd4 60000000 60000000
+
+Reported-by: Paul Mackerras <paulus@ozlabs.org>
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20200528080456.87797-1-aneesh.kumar@linux.ibm.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/powerpc/include/asm/kvm_book3s_64.h | 10 +++++++
+ arch/powerpc/kvm/book3s_64_mmu_radix.c   | 35 ++++++++++++++++++++----
+ 2 files changed, 39 insertions(+), 6 deletions(-)
+
+diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
+index 2c2635967d6e0..0431db7b82af7 100644
+--- a/arch/powerpc/include/asm/kvm_book3s_64.h
++++ b/arch/powerpc/include/asm/kvm_book3s_64.h
+@@ -635,6 +635,16 @@ extern void kvmhv_remove_nest_rmap_range(struct kvm *kvm,
+                               unsigned long gpa, unsigned long hpa,
+                               unsigned long nbytes);
+ 
++static inline pte_t *
++find_kvm_secondary_pte_unlocked(struct kvm *kvm, unsigned long ea,
++                              unsigned *hshift)
++{
++      pte_t *pte;
++
++      pte = __find_linux_pte(kvm->arch.pgtable, ea, NULL, hshift);
++      return pte;
++}
++
+ static inline pte_t *find_kvm_secondary_pte(struct kvm *kvm, unsigned long ea,
+                                           unsigned *hshift)
+ {
+diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
+index e9b3622405b1d..d4e532a63f08e 100644
+--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
++++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
+@@ -1052,7 +1052,7 @@ static int kvm_radix_test_clear_dirty(struct kvm *kvm,
+ {
+       unsigned long gfn = memslot->base_gfn + pagenum;
+       unsigned long gpa = gfn << PAGE_SHIFT;
+-      pte_t *ptep;
++      pte_t *ptep, pte;
+       unsigned int shift;
+       int ret = 0;
+       unsigned long old, *rmapp;
+@@ -1060,12 +1060,35 @@ static int kvm_radix_test_clear_dirty(struct kvm *kvm,
+       if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
+               return ret;
+ 
+-      ptep = find_kvm_secondary_pte(kvm, gpa, &shift);
+-      if (ptep && pte_present(*ptep) && pte_dirty(*ptep)) {
+-              ret = 1;
+-              if (shift)
+-                      ret = 1 << (shift - PAGE_SHIFT);
++      /*
++       * For performance reasons we don't hold kvm->mmu_lock while walking the
++       * partition scoped table.
++       */
++      ptep = find_kvm_secondary_pte_unlocked(kvm, gpa, &shift);
++      if (!ptep)
++              return 0;
++
++      pte = READ_ONCE(*ptep);
++      if (pte_present(pte) && pte_dirty(pte)) {
+               spin_lock(&kvm->mmu_lock);
++              /*
++               * Recheck the pte again
++               */
++              if (pte_val(pte) != pte_val(*ptep)) {
++                      /*
++                       * We have KVM_MEM_LOG_DIRTY_PAGES enabled. Hence we can
++                       * only find PAGE_SIZE pte entries here. We can continue
++                       * to use the pte addr returned by above page table
++                       * walk.
++                       */
++                      if (!pte_present(*ptep) || !pte_dirty(*ptep)) {
++                              spin_unlock(&kvm->mmu_lock);
++                              return 0;
++                      }
++              }
++
++              ret = 1;
++              VM_BUG_ON(shift);
+               old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_DIRTY, 0,
+                                             gpa, shift);
+               kvmppc_radix_tlbie_page(kvm, gpa, shift, kvm->arch.lpid);
+-- 
+2.25.1
+
diff --git a/queue-5.7/powerpc-kvm-book3s-add-helper-to-walk-partition-scop.patch b/queue-5.7/powerpc-kvm-book3s-add-helper-to-walk-partition-scop.patch

new file mode 100644 (file)

index 0000000..f256d14
--- /dev/null
+++ b/queue-5.7/powerpc-kvm-book3s-add-helper-to-walk-partition-scop.patch
@@ -0,0 +1,125 @@
+From 68c2647311131bdcce3df8eddb7fbc8e0c4147ed Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 5 May 2020 12:47:16 +0530
+Subject: powerpc/kvm/book3s: Add helper to walk partition scoped linux page
+ table.
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+
+[ Upstream commit 4b99412ed6972cc77c1f16009e1d00323fcef9ab ]
+
+The locking rules for walking partition scoped table is different from process
+scoped table. Hence add a helper for secondary linux page table walk and also
+add check whether we are holding the right locks.
+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20200505071729.54912-10-aneesh.kumar@linux.ibm.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/powerpc/include/asm/kvm_book3s_64.h | 13 +++++++++++++
+ arch/powerpc/kvm/book3s_64_mmu_radix.c   | 12 ++++++------
+ arch/powerpc/kvm/book3s_hv_nested.c      |  2 +-
+ 3 files changed, 20 insertions(+), 7 deletions(-)
+
+diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
+index 04b2b927bb5ae..2c2635967d6e0 100644
+--- a/arch/powerpc/include/asm/kvm_book3s_64.h
++++ b/arch/powerpc/include/asm/kvm_book3s_64.h
+@@ -14,6 +14,7 @@
+ #include <asm/book3s/64/mmu-hash.h>
+ #include <asm/cpu_has_feature.h>
+ #include <asm/ppc-opcode.h>
++#include <asm/pte-walk.h>
+ 
+ #ifdef CONFIG_PPC_PSERIES
+ static inline bool kvmhv_on_pseries(void)
+@@ -634,6 +635,18 @@ extern void kvmhv_remove_nest_rmap_range(struct kvm *kvm,
+                               unsigned long gpa, unsigned long hpa,
+                               unsigned long nbytes);
+ 
++static inline pte_t *find_kvm_secondary_pte(struct kvm *kvm, unsigned long ea,
++                                          unsigned *hshift)
++{
++      pte_t *pte;
++
++      VM_WARN(!spin_is_locked(&kvm->mmu_lock),
++              "%s called with kvm mmu_lock not held \n", __func__);
++      pte = __find_linux_pte(kvm->arch.pgtable, ea, NULL, hshift);
++
++      return pte;
++}
++
+ #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
+ 
+ #endif /* __ASM_KVM_BOOK3S_64_H__ */
+diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
+index bc6c1aa3d0e92..e9b3622405b1d 100644
+--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
++++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
+@@ -993,11 +993,11 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
+               return 0;
+       }
+ 
+-      ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
++      ptep = find_kvm_secondary_pte(kvm, gpa, &shift);
+       if (ptep && pte_present(*ptep))
+               kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot,
+                                kvm->arch.lpid);
+-      return 0;                               
++      return 0;
+ }
+ 
+ /* Called with kvm->mmu_lock held */
+@@ -1013,7 +1013,7 @@ int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
+       if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
+               return ref;
+ 
+-      ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
++      ptep = find_kvm_secondary_pte(kvm, gpa, &shift);
+       if (ptep && pte_present(*ptep) && pte_young(*ptep)) {
+               old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_ACCESSED, 0,
+                                             gpa, shift);
+@@ -1040,7 +1040,7 @@ int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
+       if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
+               return ref;
+ 
+-      ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
++      ptep = find_kvm_secondary_pte(kvm, gpa, &shift);
+       if (ptep && pte_present(*ptep) && pte_young(*ptep))
+               ref = 1;
+       return ref;
+@@ -1060,7 +1060,7 @@ static int kvm_radix_test_clear_dirty(struct kvm *kvm,
+       if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
+               return ret;
+ 
+-      ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
++      ptep = find_kvm_secondary_pte(kvm, gpa, &shift);
+       if (ptep && pte_present(*ptep) && pte_dirty(*ptep)) {
+               ret = 1;
+               if (shift)
+@@ -1121,7 +1121,7 @@ void kvmppc_radix_flush_memslot(struct kvm *kvm,
+       gpa = memslot->base_gfn << PAGE_SHIFT;
+       spin_lock(&kvm->mmu_lock);
+       for (n = memslot->npages; n; --n) {
+-              ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
++              ptep = find_kvm_secondary_pte(kvm, gpa, &shift);
+               if (ptep && pte_present(*ptep))
+                       kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot,
+                                        kvm->arch.lpid);
+diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
+index dc97e5be76f61..7f1fc5db13eab 100644
+--- a/arch/powerpc/kvm/book3s_hv_nested.c
++++ b/arch/powerpc/kvm/book3s_hv_nested.c
+@@ -1362,7 +1362,7 @@ static long int __kvmhv_nested_page_fault(struct kvm_run *run,
+       /* See if can find translation in our partition scoped tables for L1 */
+       pte = __pte(0);
+       spin_lock(&kvm->mmu_lock);
+-      pte_p = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
++      pte_p = find_kvm_secondary_pte(kvm, gpa, &shift);
+       if (!shift)
+               shift = PAGE_SHIFT;
+       if (pte_p)
+-- 
+2.25.1
+
diff --git a/queue-5.7/rxrpc-fix-race-between-incoming-ack-parser-and-retra.patch b/queue-5.7/rxrpc-fix-race-between-incoming-ack-parser-and-retra.patch

new file mode 100644 (file)

index 0000000..780c78b
--- /dev/null
+++ b/queue-5.7/rxrpc-fix-race-between-incoming-ack-parser-and-retra.patch
@@ -0,0 +1,104 @@
+From 60f551918cd97f8d5f48bca2cb08cf0da61dd9ca Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 11 Jun 2020 21:57:00 +0100
+Subject: rxrpc: Fix race between incoming ACK parser and retransmitter
+
+From: David Howells <dhowells@redhat.com>
+
+[ Upstream commit 2ad6691d988c0c611362ddc2aad89e0fb50e3261 ]
+
+There's a race between the retransmission code and the received ACK parser.
+The problem is that the retransmission loop has to drop the lock under
+which it is iterating through the transmission buffer in order to transmit
+a packet, but whilst the lock is dropped, the ACK parser can crank the Tx
+window round and discard the packets from the buffer.
+
+The retransmission code then updated the annotations for the wrong packet
+and a later retransmission thought it had to retransmit a packet that
+wasn't there, leading to a NULL pointer dereference.
+
+Fix this by:
+
+ (1) Moving the annotation change to before we drop the lock prior to
+     transmission.  This means we can't vary the annotation depending on
+     the outcome of the transmission, but that's fine - we'll retransmit
+     again later if it failed now.
+
+ (2) Skipping the packet if the skb pointer is NULL.
+
+The following oops was seen:
+
+       BUG: kernel NULL pointer dereference, address: 000000000000002d
+       Workqueue: krxrpcd rxrpc_process_call
+       RIP: 0010:rxrpc_get_skb+0x14/0x8a
+       ...
+       Call Trace:
+        rxrpc_resend+0x331/0x41e
+        ? get_vtime_delta+0x13/0x20
+        rxrpc_process_call+0x3c0/0x4ac
+        process_one_work+0x18f/0x27f
+        worker_thread+0x1a3/0x247
+        ? create_worker+0x17d/0x17d
+        kthread+0xe6/0xeb
+        ? kthread_delayed_work_timer_fn+0x83/0x83
+        ret_from_fork+0x1f/0x30
+
+Fixes: 248f219cb8bc ("rxrpc: Rewrite the data and ack handling code")
+Signed-off-by: David Howells <dhowells@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/rxrpc/call_event.c | 29 +++++++++++------------------
+ 1 file changed, 11 insertions(+), 18 deletions(-)
+
+diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
+index 2a65ac41055f5..985fb89202d0c 100644
+--- a/net/rxrpc/call_event.c
++++ b/net/rxrpc/call_event.c
+@@ -248,7 +248,18 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
+               if (anno_type != RXRPC_TX_ANNO_RETRANS)
+                       continue;
+ 
++              /* We need to reset the retransmission state, but we need to do
++               * so before we drop the lock as a new ACK/NAK may come in and
++               * confuse things
++               */
++              annotation &= ~RXRPC_TX_ANNO_MASK;
++              annotation |= RXRPC_TX_ANNO_RESENT;
++              call->rxtx_annotations[ix] = annotation;
++
+               skb = call->rxtx_buffer[ix];
++              if (!skb)
++                      continue;
++
+               rxrpc_get_skb(skb, rxrpc_skb_got);
+               spin_unlock_bh(&call->lock);
+ 
+@@ -262,24 +273,6 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
+ 
+               rxrpc_free_skb(skb, rxrpc_skb_freed);
+               spin_lock_bh(&call->lock);
+-
+-              /* We need to clear the retransmit state, but there are two
+-               * things we need to be aware of: A new ACK/NAK might have been
+-               * received and the packet might have been hard-ACK'd (in which
+-               * case it will no longer be in the buffer).
+-               */
+-              if (after(seq, call->tx_hard_ack)) {
+-                      annotation = call->rxtx_annotations[ix];
+-                      anno_type = annotation & RXRPC_TX_ANNO_MASK;
+-                      if (anno_type == RXRPC_TX_ANNO_RETRANS ||
+-                          anno_type == RXRPC_TX_ANNO_NAK) {
+-                              annotation &= ~RXRPC_TX_ANNO_MASK;
+-                              annotation |= RXRPC_TX_ANNO_UNACK;
+-                      }
+-                      annotation |= RXRPC_TX_ANNO_RESENT;
+-                      call->rxtx_annotations[ix] = annotation;
+-              }
+-
+               if (after(call->tx_hard_ack, seq))
+                       seq = call->tx_hard_ack;
+       }
+-- 
+2.25.1
+
diff --git a/queue-5.7/s390-debug-avoid-kernel-warning-on-too-large-number-.patch b/queue-5.7/s390-debug-avoid-kernel-warning-on-too-large-number-.patch

new file mode 100644 (file)

index 0000000..f3dd5f4
--- /dev/null
+++ b/queue-5.7/s390-debug-avoid-kernel-warning-on-too-large-number-.patch
@@ -0,0 +1,41 @@
+From 3f022741052cd78f4ad6856fcf7930c4c0c6615c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 31 Mar 2020 05:57:23 -0400
+Subject: s390/debug: avoid kernel warning on too large number of pages
+
+From: Christian Borntraeger <borntraeger@de.ibm.com>
+
+[ Upstream commit 827c4913923e0b441ba07ba4cc41e01181102303 ]
+
+When specifying insanely large debug buffers a kernel warning is
+printed. The debug code does handle the error gracefully, though.
+Instead of duplicating the check let us silence the warning to
+avoid crashes when panic_on_warn is used.
+
+Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
+Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
+Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/s390/kernel/debug.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
+index 6d321f5f101d6..7184d55d87aae 100644
+--- a/arch/s390/kernel/debug.c
++++ b/arch/s390/kernel/debug.c
+@@ -198,9 +198,10 @@ static debug_entry_t ***debug_areas_alloc(int pages_per_area, int nr_areas)
+       if (!areas)
+               goto fail_malloc_areas;
+       for (i = 0; i < nr_areas; i++) {
++              /* GFP_NOWARN to avoid user triggerable WARN, we handle fails */
+               areas[i] = kmalloc_array(pages_per_area,
+                                        sizeof(debug_entry_t *),
+-                                       GFP_KERNEL);
++                                       GFP_KERNEL | __GFP_NOWARN);
+               if (!areas[i])
+                       goto fail_malloc_areas2;
+               for (j = 0; j < pages_per_area; j++) {
+-- 
+2.25.1
+
diff --git a/queue-5.7/sched-debug-make-sd-flags-sysctl-read-only.patch b/queue-5.7/sched-debug-make-sd-flags-sysctl-read-only.patch

new file mode 100644 (file)

index 0000000..9ff23b9
--- /dev/null
+++ b/queue-5.7/sched-debug-make-sd-flags-sysctl-read-only.patch
@@ -0,0 +1,48 @@
+From 063606d9898dc86bbbf7e0a61c30a0b0d05c8c4b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Apr 2020 22:05:05 +0100
+Subject: sched/debug: Make sd->flags sysctl read-only
+
+From: Valentin Schneider <valentin.schneider@arm.com>
+
+[ Upstream commit 9818427c6270a9ce8c52c8621026fe9cebae0f92 ]
+
+Writing to the sysctl of a sched_domain->flags directly updates the value of
+the field, and goes nowhere near update_top_cache_domain(). This means that
+the cached domain pointers can end up containing stale data (e.g. the
+domain pointed to doesn't have the relevant flag set anymore).
+
+Explicit domain walks that check for flags will be affected by
+the write, but this won't be in sync with the cached pointers which will
+still point to the domains that were cached at the last sched_domain
+build.
+
+In other words, writing to this interface is playing a dangerous game. It
+could be made to trigger an update of the cached sched_domain pointers when
+written to, but this does not seem to be worth the trouble. Make it
+read-only.
+
+Signed-off-by: Valentin Schneider <valentin.schneider@arm.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/20200415210512.805-3-valentin.schneider@arm.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/debug.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
+index 239970b991c03..0f4aaad236a9d 100644
+--- a/kernel/sched/debug.c
++++ b/kernel/sched/debug.c
+@@ -258,7 +258,7 @@ sd_alloc_ctl_domain_table(struct sched_domain *sd)
+       set_table_entry(&table[2], "busy_factor",         &sd->busy_factor,         sizeof(int),  0644, proc_dointvec_minmax);
+       set_table_entry(&table[3], "imbalance_pct",       &sd->imbalance_pct,       sizeof(int),  0644, proc_dointvec_minmax);
+       set_table_entry(&table[4], "cache_nice_tries",    &sd->cache_nice_tries,    sizeof(int),  0644, proc_dointvec_minmax);
+-      set_table_entry(&table[5], "flags",               &sd->flags,               sizeof(int),  0644, proc_dointvec_minmax);
++      set_table_entry(&table[5], "flags",               &sd->flags,               sizeof(int),  0444, proc_dointvec_minmax);
+       set_table_entry(&table[6], "max_newidle_lb_cost", &sd->max_newidle_lb_cost, sizeof(long), 0644, proc_doulongvec_minmax);
+       set_table_entry(&table[7], "name",                sd->name,            CORENAME_MAX_SIZE, 0444, proc_dostring);
+       /* &table[8] is terminator */
+-- 
+2.25.1
+
diff --git a/queue-5.7/seg6-fix-seg6_validate_srh-to-avoid-slab-out-of-boun.patch b/queue-5.7/seg6-fix-seg6_validate_srh-to-avoid-slab-out-of-boun.patch

new file mode 100644 (file)

index 0000000..5eb4dff
--- /dev/null
+++ b/queue-5.7/seg6-fix-seg6_validate_srh-to-avoid-slab-out-of-boun.patch
@@ -0,0 +1,169 @@
+From 2fee62416154243ada38b173bf0d55dfcf5a14a1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 3 Jun 2020 06:54:42 +0000
+Subject: seg6: fix seg6_validate_srh() to avoid slab-out-of-bounds
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Ahmed Abdelsalam <ahabdels@gmail.com>
+
+[ Upstream commit bb986a50421a11bf31a81afb15b9b8f45a4a3a11 ]
+
+The seg6_validate_srh() is used to validate SRH for three cases:
+
+case1: SRH of data-plane SRv6 packets to be processed by the Linux kernel.
+Case2: SRH of the netlink message received  from user-space (iproute2)
+Case3: SRH injected into packets through setsockopt
+
+In case1, the SRH can be encoded in the Reduced way (i.e., first SID is
+carried in DA only and not represented as SID in the SRH) and the
+seg6_validate_srh() now handles this case correctly.
+
+In case2 and case3, the SRH shouldn’t be encoded in the Reduced way
+otherwise we lose the first segment (i.e., the first hop).
+
+The current implementation of the seg6_validate_srh() allow SRH of case2
+and case3 to be encoded in the Reduced way. This leads a slab-out-of-bounds
+problem.
+
+This patch verifies SRH of case1, case2 and case3. Allowing case1 to be
+reduced while preventing SRH of case2 and case3 from being reduced .
+
+Reported-by: syzbot+e8c028b62439eac42073@syzkaller.appspotmail.com
+Reported-by: YueHaibing <yuehaibing@huawei.com>
+Fixes: 0cb7498f234e ("seg6: fix SRH processing to comply with RFC8754")
+Signed-off-by: Ahmed Abdelsalam <ahabdels@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/seg6.h       |  2 +-
+ net/core/filter.c        |  2 +-
+ net/ipv6/ipv6_sockglue.c |  2 +-
+ net/ipv6/seg6.c          | 16 ++++++++++------
+ net/ipv6/seg6_iptunnel.c |  2 +-
+ net/ipv6/seg6_local.c    |  6 +++---
+ 6 files changed, 17 insertions(+), 13 deletions(-)
+
+diff --git a/include/net/seg6.h b/include/net/seg6.h
+index 640724b352731..9d19c15e8545c 100644
+--- a/include/net/seg6.h
++++ b/include/net/seg6.h
+@@ -57,7 +57,7 @@ extern void seg6_iptunnel_exit(void);
+ extern int seg6_local_init(void);
+ extern void seg6_local_exit(void);
+ 
+-extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len);
++extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len, bool reduced);
+ extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh,
+                            int proto);
+ extern int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh);
+diff --git a/net/core/filter.c b/net/core/filter.c
+index 9512a9772d691..45fa65a289833 100644
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -4920,7 +4920,7 @@ static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len
+       int err;
+       struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *)hdr;
+ 
+-      if (!seg6_validate_srh(srh, len))
++      if (!seg6_validate_srh(srh, len, false))
+               return -EINVAL;
+ 
+       switch (type) {
+diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
+index 5af97b4f5df30..ff187fd2083ff 100644
+--- a/net/ipv6/ipv6_sockglue.c
++++ b/net/ipv6/ipv6_sockglue.c
+@@ -458,7 +458,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
+                               struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *)
+                                                         opt->srcrt;
+ 
+-                              if (!seg6_validate_srh(srh, optlen))
++                              if (!seg6_validate_srh(srh, optlen, false))
+                                       goto sticky_done;
+                               break;
+                       }
+diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
+index 37b434293bda3..d2f8138e5a73a 100644
+--- a/net/ipv6/seg6.c
++++ b/net/ipv6/seg6.c
+@@ -25,7 +25,7 @@
+ #include <net/seg6_hmac.h>
+ #endif
+ 
+-bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len)
++bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len, bool reduced)
+ {
+       unsigned int tlv_offset;
+       int max_last_entry;
+@@ -37,13 +37,17 @@ bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len)
+       if (((srh->hdrlen + 1) << 3) != len)
+               return false;
+ 
+-      max_last_entry = (srh->hdrlen / 2) - 1;
+-
+-      if (srh->first_segment > max_last_entry)
++      if (!reduced && srh->segments_left > srh->first_segment) {
+               return false;
++      } else {
++              max_last_entry = (srh->hdrlen / 2) - 1;
+ 
+-      if (srh->segments_left > srh->first_segment + 1)
+-              return false;
++              if (srh->first_segment > max_last_entry)
++                      return false;
++
++              if (srh->segments_left > srh->first_segment + 1)
++                      return false;
++      }
+ 
+       tlv_offset = sizeof(*srh) + ((srh->first_segment + 1) << 4);
+ 
+diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
+index c7cbfeae94f5e..e0e9f48ab14fe 100644
+--- a/net/ipv6/seg6_iptunnel.c
++++ b/net/ipv6/seg6_iptunnel.c
+@@ -426,7 +426,7 @@ static int seg6_build_state(struct net *net, struct nlattr *nla,
+       }
+ 
+       /* verify that SRH is consistent */
+-      if (!seg6_validate_srh(tuninfo->srh, tuninfo_len - sizeof(*tuninfo)))
++      if (!seg6_validate_srh(tuninfo->srh, tuninfo_len - sizeof(*tuninfo), false))
+               return -EINVAL;
+ 
+       newts = lwtunnel_state_alloc(tuninfo_len + sizeof(*slwt));
+diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
+index 52493423f3299..eba23279912df 100644
+--- a/net/ipv6/seg6_local.c
++++ b/net/ipv6/seg6_local.c
+@@ -87,7 +87,7 @@ static struct ipv6_sr_hdr *get_srh(struct sk_buff *skb)
+        */
+       srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
+ 
+-      if (!seg6_validate_srh(srh, len))
++      if (!seg6_validate_srh(srh, len, true))
+               return NULL;
+ 
+       return srh;
+@@ -495,7 +495,7 @@ bool seg6_bpf_has_valid_srh(struct sk_buff *skb)
+                       return false;
+ 
+               srh->hdrlen = (u8)(srh_state->hdrlen >> 3);
+-              if (!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3))
++              if (!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3, true))
+                       return false;
+ 
+               srh_state->valid = true;
+@@ -670,7 +670,7 @@ static int parse_nla_srh(struct nlattr **attrs, struct seg6_local_lwt *slwt)
+       if (len < sizeof(*srh) + sizeof(struct in6_addr))
+               return -EINVAL;
+ 
+-      if (!seg6_validate_srh(srh, len))
++      if (!seg6_validate_srh(srh, len, false))
+               return -EINVAL;
+ 
+       slwt->srh = kmemdup(srh, len, GFP_KERNEL);
+-- 
+2.25.1
+
diff --git a/queue-5.7/series b/queue-5.7/series

index a3da2fbfb46a744255553c555d9a251be4a6150e..d63d36da25c23d03d7b54ed18dca3222088c35b0 100644 (file)
--- a/queue-5.7/series
+++ b/queue-5.7/series
@@ -8,3 +8,33 @@ btrfs-fix-race-between-block-group-removal-and-block.patch
  mm-fix-swap-cache-node-allocation-mask.patch
  drm-amd-display-fix-incorrectly-pruned-modes-with-de.patch
  drm-amd-display-fix-ineffective-setting-of-max-bpc-p.patch
+seg6-fix-seg6_validate_srh-to-avoid-slab-out-of-boun.patch
+tipc-add-test-for-nagle-algorithm-effectiveness.patch
+tipc-fix-kernel-warning-in-tipc_msg_append.patch
+usbnet-smsc95xx-fix-use-after-free-after-removal.patch
+tipc-fix-null-pointer-dereference-in-__tipc_sendstre.patch
+drm-i915-gt-mark-timeline-cacheline-as-destroyed-aft.patch
+drm-amdgpu-disable-ras-query-and-iject-during-gpu-re.patch
+drm-amdgpu-fix-non-pointer-dereference-for-non-ras-s.patch
+drm-amdgpu-fix-kernel-page-fault-issue-by-ras-recove.patch
+sched-debug-make-sd-flags-sysctl-read-only.patch
+soc-ti-omap-prm-use-atomic-iopoll-instead-of-sleepin.patch
+powerpc-kvm-book3s-add-helper-to-walk-partition-scop.patch
+powerpc-book3s64-kvm-fix-secondary-page-table-walk-w.patch
+mm-slub.c-fix-corrupted-freechain-in-deactivate_slab.patch
+mm-slub-fix-stack-overruns-with-slub_stats.patch
+mm-dump_page-do-not-crash-with-invalid-mapping-point.patch
+io_uring-fix-sq-io-poll-with-unsupported-opcodes.patch
+rxrpc-fix-race-between-incoming-ack-parser-and-retra.patch
+usb-usbtest-fix-missing-kfree-dev-buf-in-usbtest_dis.patch
+tools-lib-traceevent-add-append-function-helper-for-.patch
+tools-lib-traceevent-handle-__attribute__-user-in-fi.patch
+s390-debug-avoid-kernel-warning-on-too-large-number-.patch
+io_uring-fix-io_sq_thread-no-schedule-when-busy.patch
+nvme-multipath-set-bdi-capabilities-once.patch
+nvme-fix-possible-deadlock-when-i-o-is-blocked.patch
+nvme-multipath-fix-deadlock-between-ana_work-and-sca.patch
+nvme-multipath-fix-deadlock-due-to-head-lock.patch
+nvme-multipath-fix-bogus-request-queue-reference-put.patch
+io_uring-fix-current-mm-null-dereference-on-exit.patch
+kgdb-avoid-suspicious-rcu-usage-warning.patch
diff --git a/queue-5.7/soc-ti-omap-prm-use-atomic-iopoll-instead-of-sleepin.patch b/queue-5.7/soc-ti-omap-prm-use-atomic-iopoll-instead-of-sleepin.patch

new file mode 100644 (file)

index 0000000..1d9c369
--- /dev/null
+++ b/queue-5.7/soc-ti-omap-prm-use-atomic-iopoll-instead-of-sleepin.patch
@@ -0,0 +1,45 @@
+From 1c988ce76f672c8249dd8c22c939b4d04b72c55f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 14 May 2020 10:37:18 +0300
+Subject: soc: ti: omap-prm: use atomic iopoll instead of sleeping one
+
+From: Tero Kristo <t-kristo@ti.com>
+
+[ Upstream commit 98ece19f247159a51003796ede7112fef2df5d7f ]
+
+The reset handling APIs for omap-prm can be invoked PM runtime which
+runs in atomic context. For this to work properly, switch to atomic
+iopoll version instead of the current which can sleep. Otherwise,
+this throws a "BUG: scheduling while atomic" warning. Issue is seen
+rather easily when CONFIG_PREEMPT is enabled.
+
+Signed-off-by: Tero Kristo <t-kristo@ti.com>
+Acked-by: Santosh Shilimkar <ssantosh@kernel.org>
+Signed-off-by: Tony Lindgren <tony@atomide.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/soc/ti/omap_prm.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/soc/ti/omap_prm.c b/drivers/soc/ti/omap_prm.c
+index 96c6f777519c0..c9b3f9ebf0bbf 100644
+--- a/drivers/soc/ti/omap_prm.c
++++ b/drivers/soc/ti/omap_prm.c
+@@ -256,10 +256,10 @@ static int omap_reset_deassert(struct reset_controller_dev *rcdev,
+               goto exit;
+ 
+       /* wait for the status to be set */
+-      ret = readl_relaxed_poll_timeout(reset->prm->base +
+-                                       reset->prm->data->rstst,
+-                                       v, v & BIT(st_bit), 1,
+-                                       OMAP_RESET_MAX_WAIT);
++      ret = readl_relaxed_poll_timeout_atomic(reset->prm->base +
++                                               reset->prm->data->rstst,
++                                               v, v & BIT(st_bit), 1,
++                                               OMAP_RESET_MAX_WAIT);
+       if (ret)
+               pr_err("%s: timedout waiting for %s:%lu\n", __func__,
+                      reset->prm->data->name, id);
+-- 
+2.25.1
+
diff --git a/queue-5.7/tipc-add-test-for-nagle-algorithm-effectiveness.patch b/queue-5.7/tipc-add-test-for-nagle-algorithm-effectiveness.patch

new file mode 100644 (file)

index 0000000..f1b859b
--- /dev/null
+++ b/queue-5.7/tipc-add-test-for-nagle-algorithm-effectiveness.patch
@@ -0,0 +1,282 @@
+From afcf3e9b57000d467f2a99e77ebdc09eca724d64 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 May 2020 16:38:38 +0700
+Subject: tipc: add test for Nagle algorithm effectiveness
+
+From: Tuong Lien <tuong.t.lien@dektech.com.au>
+
+[ Upstream commit 0a3e060f340dbe232ffa290c40f879b7f7db595b ]
+
+When streaming in Nagle mode, we try to bundle small messages from user
+as many as possible if there is one outstanding buffer, i.e. not ACK-ed
+by the receiving side, which helps boost up the overall throughput. So,
+the algorithm's effectiveness really depends on when Nagle ACK comes or
+what the specific network latency (RTT) is, compared to the user's
+message sending rate.
+
+In a bad case, the user's sending rate is low or the network latency is
+small, there will not be many bundles, so making a Nagle ACK or waiting
+for it is not meaningful.
+For example: a user sends its messages every 100ms and the RTT is 50ms,
+then for each messages, we require one Nagle ACK but then there is only
+one user message sent without any bundles.
+
+In a better case, even if we have a few bundles (e.g. the RTT = 300ms),
+but now the user sends messages in medium size, then there will not be
+any difference at all, that says 3 x 1000-byte data messages if bundled
+will still result in 3 bundles with MTU = 1500.
+
+When Nagle is ineffective, the delay in user message sending is clearly
+wasted instead of sending directly.
+
+Besides, adding Nagle ACKs will consume some processor load on both the
+sending and receiving sides.
+
+This commit adds a test on the effectiveness of the Nagle algorithm for
+an individual connection in the network on which it actually runs.
+Particularly, upon receipt of a Nagle ACK we will compare the number of
+bundles in the backlog queue to the number of user messages which would
+be sent directly without Nagle. If the ratio is good (e.g. >= 2), Nagle
+mode will be kept for further message sending. Otherwise, we will leave
+Nagle and put a 'penalty' on the connection, so it will have to spend
+more 'one-way' messages before being able to re-enter Nagle.
+
+In addition, the 'ack-required' bit is only set when really needed that
+the number of Nagle ACKs will be reduced during Nagle mode.
+
+Testing with benchmark showed that with the patch, there was not much
+difference in throughput for small messages since the tool continuously
+sends messages without a break, so Nagle would still take in effect.
+
+Acked-by: Ying Xue <ying.xue@windriver.com>
+Acked-by: Jon Maloy <jmaloy@redhat.com>
+Signed-off-by: Tuong Lien <tuong.t.lien@dektech.com.au>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/tipc/msg.c    |  3 ---
+ net/tipc/msg.h    | 14 +++++++++--
+ net/tipc/socket.c | 64 ++++++++++++++++++++++++++++++++++++++---------
+ 3 files changed, 64 insertions(+), 17 deletions(-)
+
+diff --git a/net/tipc/msg.c b/net/tipc/msg.c
+index 3ad411884e6c0..93966321f8929 100644
+--- a/net/tipc/msg.c
++++ b/net/tipc/msg.c
+@@ -235,9 +235,6 @@ int tipc_msg_append(struct tipc_msg *_hdr, struct msghdr *m, int dlen,
+                       msg_set_size(hdr, MIN_H_SIZE);
+                       __skb_queue_tail(txq, skb);
+                       total += 1;
+-                      if (prev)
+-                              msg_set_ack_required(buf_msg(prev), 0);
+-                      msg_set_ack_required(hdr, 1);
+               }
+               hdr = buf_msg(skb);
+               curr = msg_blocks(hdr);
+diff --git a/net/tipc/msg.h b/net/tipc/msg.h
+index 871feadbbc191..a4e2029170b1b 100644
+--- a/net/tipc/msg.h
++++ b/net/tipc/msg.h
+@@ -321,9 +321,19 @@ static inline int msg_ack_required(struct tipc_msg *m)
+       return msg_bits(m, 0, 18, 1);
+ }
+ 
+-static inline void msg_set_ack_required(struct tipc_msg *m, u32 d)
++static inline void msg_set_ack_required(struct tipc_msg *m)
+ {
+-      msg_set_bits(m, 0, 18, 1, d);
++      msg_set_bits(m, 0, 18, 1, 1);
++}
++
++static inline int msg_nagle_ack(struct tipc_msg *m)
++{
++      return msg_bits(m, 0, 18, 1);
++}
++
++static inline void msg_set_nagle_ack(struct tipc_msg *m)
++{
++      msg_set_bits(m, 0, 18, 1, 1);
+ }
+ 
+ static inline bool msg_is_rcast(struct tipc_msg *m)
+diff --git a/net/tipc/socket.c b/net/tipc/socket.c
+index e370ad0edd768..d6b67d07d22ec 100644
+--- a/net/tipc/socket.c
++++ b/net/tipc/socket.c
+@@ -48,6 +48,8 @@
+ #include "group.h"
+ #include "trace.h"
+ 
++#define NAGLE_START_INIT      4
++#define NAGLE_START_MAX               1024
+ #define CONN_TIMEOUT_DEFAULT    8000    /* default connect timeout = 8s */
+ #define CONN_PROBING_INTV     msecs_to_jiffies(3600000)  /* [ms] => 1 h */
+ #define TIPC_FWD_MSG          1
+@@ -119,7 +121,10 @@ struct tipc_sock {
+       struct rcu_head rcu;
+       struct tipc_group *group;
+       u32 oneway;
++      u32 nagle_start;
+       u16 snd_backlog;
++      u16 msg_acc;
++      u16 pkt_cnt;
+       bool expect_ack;
+       bool nodelay;
+       bool group_is_open;
+@@ -143,7 +148,7 @@ static int tipc_sk_insert(struct tipc_sock *tsk);
+ static void tipc_sk_remove(struct tipc_sock *tsk);
+ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz);
+ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz);
+-static void tipc_sk_push_backlog(struct tipc_sock *tsk);
++static void tipc_sk_push_backlog(struct tipc_sock *tsk, bool nagle_ack);
+ 
+ static const struct proto_ops packet_ops;
+ static const struct proto_ops stream_ops;
+@@ -474,6 +479,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
+       tsk = tipc_sk(sk);
+       tsk->max_pkt = MAX_PKT_DEFAULT;
+       tsk->maxnagle = 0;
++      tsk->nagle_start = NAGLE_START_INIT;
+       INIT_LIST_HEAD(&tsk->publications);
+       INIT_LIST_HEAD(&tsk->cong_links);
+       msg = &tsk->phdr;
+@@ -541,7 +547,7 @@ static void __tipc_shutdown(struct socket *sock, int error)
+                                           !tsk_conn_cong(tsk)));
+ 
+       /* Push out delayed messages if in Nagle mode */
+-      tipc_sk_push_backlog(tsk);
++      tipc_sk_push_backlog(tsk, false);
+       /* Remove pending SYN */
+       __skb_queue_purge(&sk->sk_write_queue);
+ 
+@@ -1252,14 +1258,37 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
+ /* tipc_sk_push_backlog(): send accumulated buffers in socket write queue
+  *                         when socket is in Nagle mode
+  */
+-static void tipc_sk_push_backlog(struct tipc_sock *tsk)
++static void tipc_sk_push_backlog(struct tipc_sock *tsk, bool nagle_ack)
+ {
+       struct sk_buff_head *txq = &tsk->sk.sk_write_queue;
++      struct sk_buff *skb = skb_peek_tail(txq);
+       struct net *net = sock_net(&tsk->sk);
+       u32 dnode = tsk_peer_node(tsk);
+-      struct sk_buff *skb = skb_peek(txq);
+       int rc;
+ 
++      if (nagle_ack) {
++              tsk->pkt_cnt += skb_queue_len(txq);
++              if (!tsk->pkt_cnt || tsk->msg_acc / tsk->pkt_cnt < 2) {
++                      tsk->oneway = 0;
++                      if (tsk->nagle_start < NAGLE_START_MAX)
++                              tsk->nagle_start *= 2;
++                      tsk->expect_ack = false;
++                      pr_debug("tsk %10u: bad nagle %u -> %u, next start %u!\n",
++                               tsk->portid, tsk->msg_acc, tsk->pkt_cnt,
++                               tsk->nagle_start);
++              } else {
++                      tsk->nagle_start = NAGLE_START_INIT;
++                      if (skb) {
++                              msg_set_ack_required(buf_msg(skb));
++                              tsk->expect_ack = true;
++                      } else {
++                              tsk->expect_ack = false;
++                      }
++              }
++              tsk->msg_acc = 0;
++              tsk->pkt_cnt = 0;
++      }
++
+       if (!skb || tsk->cong_link_cnt)
+               return;
+ 
+@@ -1267,9 +1296,10 @@ static void tipc_sk_push_backlog(struct tipc_sock *tsk)
+       if (msg_is_syn(buf_msg(skb)))
+               return;
+ 
++      if (tsk->msg_acc)
++              tsk->pkt_cnt += skb_queue_len(txq);
+       tsk->snt_unacked += tsk->snd_backlog;
+       tsk->snd_backlog = 0;
+-      tsk->expect_ack = true;
+       rc = tipc_node_xmit(net, txq, dnode, tsk->portid);
+       if (rc == -ELINKCONG)
+               tsk->cong_link_cnt = 1;
+@@ -1322,8 +1352,7 @@ static void tipc_sk_conn_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb,
+               return;
+       } else if (mtyp == CONN_ACK) {
+               was_cong = tsk_conn_cong(tsk);
+-              tsk->expect_ack = false;
+-              tipc_sk_push_backlog(tsk);
++              tipc_sk_push_backlog(tsk, msg_nagle_ack(hdr));
+               tsk->snt_unacked -= msg_conn_ack(hdr);
+               if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
+                       tsk->snd_win = msg_adv_win(hdr);
+@@ -1516,6 +1545,7 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen)
+       struct tipc_sock *tsk = tipc_sk(sk);
+       struct tipc_msg *hdr = &tsk->phdr;
+       struct net *net = sock_net(sk);
++      struct sk_buff *skb;
+       u32 dnode = tsk_peer_node(tsk);
+       int maxnagle = tsk->maxnagle;
+       int maxpkt = tsk->max_pkt;
+@@ -1544,17 +1574,25 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen)
+                       break;
+               send = min_t(size_t, dlen - sent, TIPC_MAX_USER_MSG_SIZE);
+               blocks = tsk->snd_backlog;
+-              if (tsk->oneway++ >= 4 && send <= maxnagle) {
++              if (tsk->oneway++ >= tsk->nagle_start && send <= maxnagle) {
+                       rc = tipc_msg_append(hdr, m, send, maxnagle, txq);
+                       if (unlikely(rc < 0))
+                               break;
+                       blocks += rc;
++                      tsk->msg_acc++;
+                       if (blocks <= 64 && tsk->expect_ack) {
+                               tsk->snd_backlog = blocks;
+                               sent += send;
+                               break;
++                      } else if (blocks > 64) {
++                              tsk->pkt_cnt += skb_queue_len(txq);
++                      } else {
++                              skb = skb_peek_tail(txq);
++                              msg_set_ack_required(buf_msg(skb));
++                              tsk->expect_ack = true;
++                              tsk->msg_acc = 0;
++                              tsk->pkt_cnt = 0;
+                       }
+-                      tsk->expect_ack = true;
+               } else {
+                       rc = tipc_msg_build(hdr, m, sent, send, maxpkt, txq);
+                       if (unlikely(rc != send))
+@@ -2091,7 +2129,7 @@ static void tipc_sk_proto_rcv(struct sock *sk,
+               smp_wmb();
+               tsk->cong_link_cnt--;
+               wakeup = true;
+-              tipc_sk_push_backlog(tsk);
++              tipc_sk_push_backlog(tsk, false);
+               break;
+       case GROUP_PROTOCOL:
+               tipc_group_proto_rcv(grp, &wakeup, hdr, inputq, xmitq);
+@@ -2180,7 +2218,7 @@ static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb,
+               return false;
+       case TIPC_ESTABLISHED:
+               if (!skb_queue_empty(&sk->sk_write_queue))
+-                      tipc_sk_push_backlog(tsk);
++                      tipc_sk_push_backlog(tsk, false);
+               /* Accept only connection-based messages sent by peer */
+               if (likely(con_msg && !err && pport == oport &&
+                          pnode == onode)) {
+@@ -2188,8 +2226,10 @@ static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb,
+                               struct sk_buff *skb;
+ 
+                               skb = tipc_sk_build_ack(tsk);
+-                              if (skb)
++                              if (skb) {
++                                      msg_set_nagle_ack(buf_msg(skb));
+                                       __skb_queue_tail(xmitq, skb);
++                              }
+                       }
+                       return true;
+               }
+-- 
+2.25.1
+
diff --git a/queue-5.7/tipc-fix-kernel-warning-in-tipc_msg_append.patch b/queue-5.7/tipc-fix-kernel-warning-in-tipc_msg_append.patch

new file mode 100644 (file)

index 0000000..300ffb4
--- /dev/null
+++ b/queue-5.7/tipc-fix-kernel-warning-in-tipc_msg_append.patch
@@ -0,0 +1,80 @@
+From 188dba631a2392fe38321d576a8e5d0d098545bf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 11 Jun 2020 17:07:35 +0700
+Subject: tipc: fix kernel WARNING in tipc_msg_append()
+
+From: Tuong Lien <tuong.t.lien@dektech.com.au>
+
+[ Upstream commit c9aa81faf19115fc2e732e7f210b37bb316987ff ]
+
+syzbot found the following issue:
+
+WARNING: CPU: 0 PID: 6808 at include/linux/thread_info.h:150 check_copy_size include/linux/thread_info.h:150 [inline]
+WARNING: CPU: 0 PID: 6808 at include/linux/thread_info.h:150 copy_from_iter include/linux/uio.h:144 [inline]
+WARNING: CPU: 0 PID: 6808 at include/linux/thread_info.h:150 tipc_msg_append+0x49a/0x5e0 net/tipc/msg.c:242
+Kernel panic - not syncing: panic_on_warn set ...
+
+This happens after commit 5e9eeccc58f3 ("tipc: fix NULL pointer
+dereference in streaming") that tried to build at least one buffer even
+when the message data length is zero... However, it now exposes another
+bug that the 'mss' can be zero and the 'cpy' will be negative, thus the
+above kernel WARNING will appear!
+The zero value of 'mss' is never expected because it means Nagle is not
+enabled for the socket (actually the socket type was 'SOCK_SEQPACKET'),
+so the function 'tipc_msg_append()' must not be called at all. But that
+was in this particular case since the message data length was zero, and
+the 'send <= maxnagle' check became true.
+
+We resolve the issue by explicitly checking if Nagle is enabled for the
+socket, i.e. 'maxnagle != 0' before calling the 'tipc_msg_append()'. We
+also reinforce the function to against such a negative values if any.
+
+Reported-by: syzbot+75139a7d2605236b0b7f@syzkaller.appspotmail.com
+Fixes: c0bceb97db9e ("tipc: add smart nagle feature")
+Acked-by: Jon Maloy <jmaloy@redhat.com>
+Signed-off-by: Tuong Lien <tuong.t.lien@dektech.com.au>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/tipc/msg.c    | 4 ++--
+ net/tipc/socket.c | 3 ++-
+ 2 files changed, 4 insertions(+), 3 deletions(-)
+
+diff --git a/net/tipc/msg.c b/net/tipc/msg.c
+index 93966321f8929..560d7a4c0ffff 100644
+--- a/net/tipc/msg.c
++++ b/net/tipc/msg.c
+@@ -239,14 +239,14 @@ int tipc_msg_append(struct tipc_msg *_hdr, struct msghdr *m, int dlen,
+               hdr = buf_msg(skb);
+               curr = msg_blocks(hdr);
+               mlen = msg_size(hdr);
+-              cpy = min_t(int, rem, mss - mlen);
++              cpy = min_t(size_t, rem, mss - mlen);
+               if (cpy != copy_from_iter(skb->data + mlen, cpy, &m->msg_iter))
+                       return -EFAULT;
+               msg_set_size(hdr, mlen + cpy);
+               skb_put(skb, cpy);
+               rem -= cpy;
+               total += msg_blocks(hdr) - curr;
+-      } while (rem);
++      } while (rem > 0);
+       return total - accounted;
+ }
+ 
+diff --git a/net/tipc/socket.c b/net/tipc/socket.c
+index d6b67d07d22ec..62fc871a8d673 100644
+--- a/net/tipc/socket.c
++++ b/net/tipc/socket.c
+@@ -1574,7 +1574,8 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen)
+                       break;
+               send = min_t(size_t, dlen - sent, TIPC_MAX_USER_MSG_SIZE);
+               blocks = tsk->snd_backlog;
+-              if (tsk->oneway++ >= tsk->nagle_start && send <= maxnagle) {
++              if (tsk->oneway++ >= tsk->nagle_start && maxnagle &&
++                  send <= maxnagle) {
+                       rc = tipc_msg_append(hdr, m, send, maxnagle, txq);
+                       if (unlikely(rc < 0))
+                               break;
+-- 
+2.25.1
+
diff --git a/queue-5.7/tipc-fix-null-pointer-dereference-in-__tipc_sendstre.patch b/queue-5.7/tipc-fix-null-pointer-dereference-in-__tipc_sendstre.patch

new file mode 100644 (file)

index 0000000..1346fbc
--- /dev/null
+++ b/queue-5.7/tipc-fix-null-pointer-dereference-in-__tipc_sendstre.patch
@@ -0,0 +1,44 @@
+From ed1a8378cb5dee5efa09e36fe297b4e4c31faa94 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 28 May 2020 22:34:07 +0800
+Subject: tipc: Fix NULL pointer dereference in __tipc_sendstream()
+
+From: YueHaibing <yuehaibing@huawei.com>
+
+[ Upstream commit 4c21daae3dbc9f8536cc18e6e53627821fa2c90c ]
+
+tipc_sendstream() may send zero length packet, then tipc_msg_append()
+do not alloc skb, skb_peek_tail() will get NULL, msg_set_ack_required
+will trigger NULL pointer dereference.
+
+Reported-by: syzbot+8eac6d030e7807c21d32@syzkaller.appspotmail.com
+Fixes: 0a3e060f340d ("tipc: add test for Nagle algorithm effectiveness")
+Signed-off-by: YueHaibing <yuehaibing@huawei.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/tipc/socket.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+diff --git a/net/tipc/socket.c b/net/tipc/socket.c
+index 62fc871a8d673..f02f2abf6e3c0 100644
+--- a/net/tipc/socket.c
++++ b/net/tipc/socket.c
+@@ -1589,8 +1589,12 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen)
+                               tsk->pkt_cnt += skb_queue_len(txq);
+                       } else {
+                               skb = skb_peek_tail(txq);
+-                              msg_set_ack_required(buf_msg(skb));
+-                              tsk->expect_ack = true;
++                              if (skb) {
++                                      msg_set_ack_required(buf_msg(skb));
++                                      tsk->expect_ack = true;
++                              } else {
++                                      tsk->expect_ack = false;
++                              }
+                               tsk->msg_acc = 0;
+                               tsk->pkt_cnt = 0;
+                       }
+-- 
+2.25.1
+
diff --git a/queue-5.7/tools-lib-traceevent-add-append-function-helper-for-.patch b/queue-5.7/tools-lib-traceevent-add-append-function-helper-for-.patch

new file mode 100644 (file)

index 0000000..0acf9ff
--- /dev/null
+++ b/queue-5.7/tools-lib-traceevent-add-append-function-helper-for-.patch
@@ -0,0 +1,243 @@
+From c51c0cfe5f766b271052a2b6d003ca9d41ce4701 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 24 Mar 2020 16:08:46 -0400
+Subject: tools lib traceevent: Add append() function helper for appending
+ strings
+
+From: Steven Rostedt (VMware) <rostedt@goodmis.org>
+
+[ Upstream commit 27d4d336f2872193e90ee5450559e1699fae0f6d ]
+
+There's several locations that open code realloc and strcat() to append
+text to strings. Add an append() function that takes a delimiter and a
+string to append to another string.
+
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Jaewon Lim <jaewon31.kim@samsung.com>
+Cc: Jiri Olsa <jolsa@redhat.com>
+Cc: Kees Kook <keescook@chromium.org>
+Cc: linux-mm@kvack.org
+Cc: linux-trace-devel@vger.kernel.org
+Cc: Namhyung Kim <namhyung@kernel.org>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Link: http://lore.kernel.org/lkml/20200324200956.515118403@goodmis.org
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/lib/traceevent/event-parse.c | 98 ++++++++++++------------------
+ 1 file changed, 40 insertions(+), 58 deletions(-)
+
+diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
+index e1bd2a93c6db8..eec96c31ea9e5 100644
+--- a/tools/lib/traceevent/event-parse.c
++++ b/tools/lib/traceevent/event-parse.c
+@@ -1425,6 +1425,19 @@ static unsigned int type_size(const char *name)
+       return 0;
+ }
+ 
++static int append(char **buf, const char *delim, const char *str)
++{
++      char *new_buf;
++
++      new_buf = realloc(*buf, strlen(*buf) + strlen(delim) + strlen(str) + 1);
++      if (!new_buf)
++              return -1;
++      strcat(new_buf, delim);
++      strcat(new_buf, str);
++      *buf = new_buf;
++      return 0;
++}
++
+ static int event_read_fields(struct tep_event *event, struct tep_format_field **fields)
+ {
+       struct tep_format_field *field = NULL;
+@@ -1432,6 +1445,7 @@ static int event_read_fields(struct tep_event *event, struct tep_format_field **
+       char *token;
+       char *last_token;
+       int count = 0;
++      int ret;
+ 
+       do {
+               unsigned int size_dynamic = 0;
+@@ -1490,24 +1504,15 @@ static int event_read_fields(struct tep_event *event, struct tep_format_field **
+                                       field->flags |= TEP_FIELD_IS_POINTER;
+ 
+                               if (field->type) {
+-                                      char *new_type;
+-                                      new_type = realloc(field->type,
+-                                                         strlen(field->type) +
+-                                                         strlen(last_token) + 2);
+-                                      if (!new_type) {
+-                                              free(last_token);
+-                                              goto fail;
+-                                      }
+-                                      field->type = new_type;
+-                                      strcat(field->type, " ");
+-                                      strcat(field->type, last_token);
++                                      ret = append(&field->type, " ", last_token);
+                                       free(last_token);
++                                      if (ret < 0)
++                                              goto fail;
+                               } else
+                                       field->type = last_token;
+                               last_token = token;
+                               continue;
+                       }
+-
+                       break;
+               }
+ 
+@@ -1523,8 +1528,6 @@ static int event_read_fields(struct tep_event *event, struct tep_format_field **
+               if (strcmp(token, "[") == 0) {
+                       enum tep_event_type last_type = type;
+                       char *brackets = token;
+-                      char *new_brackets;
+-                      int len;
+ 
+                       field->flags |= TEP_FIELD_IS_ARRAY;
+ 
+@@ -1536,29 +1539,27 @@ static int event_read_fields(struct tep_event *event, struct tep_format_field **
+                               field->arraylen = 0;
+ 
+                       while (strcmp(token, "]") != 0) {
++                              const char *delim;
++
+                               if (last_type == TEP_EVENT_ITEM &&
+                                   type == TEP_EVENT_ITEM)
+-                                      len = 2;
++                                      delim = " ";
+                               else
+-                                      len = 1;
++                                      delim = "";
++
+                               last_type = type;
+ 
+-                              new_brackets = realloc(brackets,
+-                                                     strlen(brackets) +
+-                                                     strlen(token) + len);
+-                              if (!new_brackets) {
++                              ret = append(&brackets, delim, token);
++                              if (ret < 0) {
+                                       free(brackets);
+                                       goto fail;
+                               }
+-                              brackets = new_brackets;
+-                              if (len == 2)
+-                                      strcat(brackets, " ");
+-                              strcat(brackets, token);
+                               /* We only care about the last token */
+                               field->arraylen = strtoul(token, NULL, 0);
+                               free_token(token);
+                               type = read_token(&token);
+                               if (type == TEP_EVENT_NONE) {
++                                      free(brackets);
+                                       do_warning_event(event, "failed to find token");
+                                       goto fail;
+                               }
+@@ -1566,13 +1567,11 @@ static int event_read_fields(struct tep_event *event, struct tep_format_field **
+ 
+                       free_token(token);
+ 
+-                      new_brackets = realloc(brackets, strlen(brackets) + 2);
+-                      if (!new_brackets) {
++                      ret = append(&brackets, "", "]");
++                      if (ret < 0) {
+                               free(brackets);
+                               goto fail;
+                       }
+-                      brackets = new_brackets;
+-                      strcat(brackets, "]");
+ 
+                       /* add brackets to type */
+ 
+@@ -1582,34 +1581,23 @@ static int event_read_fields(struct tep_event *event, struct tep_format_field **
+                        * the format: type [] item;
+                        */
+                       if (type == TEP_EVENT_ITEM) {
+-                              char *new_type;
+-                              new_type = realloc(field->type,
+-                                                 strlen(field->type) +
+-                                                 strlen(field->name) +
+-                                                 strlen(brackets) + 2);
+-                              if (!new_type) {
++                              ret = append(&field->type, " ", field->name);
++                              if (ret < 0) {
+                                       free(brackets);
+                                       goto fail;
+                               }
+-                              field->type = new_type;
+-                              strcat(field->type, " ");
+-                              strcat(field->type, field->name);
++                              ret = append(&field->type, "", brackets);
++
+                               size_dynamic = type_size(field->name);
+                               free_token(field->name);
+-                              strcat(field->type, brackets);
+                               field->name = field->alias = token;
+                               type = read_token(&token);
+                       } else {
+-                              char *new_type;
+-                              new_type = realloc(field->type,
+-                                                 strlen(field->type) +
+-                                                 strlen(brackets) + 1);
+-                              if (!new_type) {
++                              ret = append(&field->type, "", brackets);
++                              if (ret < 0) {
+                                       free(brackets);
+                                       goto fail;
+                               }
+-                              field->type = new_type;
+-                              strcat(field->type, brackets);
+                       }
+                       free(brackets);
+               }
+@@ -2046,19 +2034,16 @@ process_op(struct tep_event *event, struct tep_print_arg *arg, char **tok)
+               /* could just be a type pointer */
+               if ((strcmp(arg->op.op, "*") == 0) &&
+                   type == TEP_EVENT_DELIM && (strcmp(token, ")") == 0)) {
+-                      char *new_atom;
++                      int ret;
+ 
+                       if (left->type != TEP_PRINT_ATOM) {
+                               do_warning_event(event, "bad pointer type");
+                               goto out_free;
+                       }
+-                      new_atom = realloc(left->atom.atom,
+-                                          strlen(left->atom.atom) + 3);
+-                      if (!new_atom)
++                      ret = append(&left->atom.atom, " ", "*");
++                      if (ret < 0)
+                               goto out_warn_free;
+ 
+-                      left->atom.atom = new_atom;
+-                      strcat(left->atom.atom, " *");
+                       free(arg->op.op);
+                       *arg = *left;
+                       free(left);
+@@ -3151,18 +3136,15 @@ process_arg_token(struct tep_event *event, struct tep_print_arg *arg,
+               }
+               /* atoms can be more than one token long */
+               while (type == TEP_EVENT_ITEM) {
+-                      char *new_atom;
+-                      new_atom = realloc(atom,
+-                                         strlen(atom) + strlen(token) + 2);
+-                      if (!new_atom) {
++                      int ret;
++
++                      ret = append(&atom, " ", token);
++                      if (ret < 0) {
+                               free(atom);
+                               *tok = NULL;
+                               free_token(token);
+                               return TEP_EVENT_ERROR;
+                       }
+-                      atom = new_atom;
+-                      strcat(atom, " ");
+-                      strcat(atom, token);
+                       free_token(token);
+                       type = read_token_item(&token);
+               }
+-- 
+2.25.1
+
diff --git a/queue-5.7/tools-lib-traceevent-handle-__attribute__-user-in-fi.patch b/queue-5.7/tools-lib-traceevent-handle-__attribute__-user-in-fi.patch

new file mode 100644 (file)

index 0000000..a6f1edc
--- /dev/null
+++ b/queue-5.7/tools-lib-traceevent-handle-__attribute__-user-in-fi.patch
@@ -0,0 +1,98 @@
+From 21d1f87463af7f12d314b5fdd491dc6e0604f65e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 24 Mar 2020 16:08:47 -0400
+Subject: tools lib traceevent: Handle __attribute__((user)) in field names
+
+From: Steven Rostedt (VMware) <rostedt@goodmis.org>
+
+[ Upstream commit 74621d929d944529a5e2878a84f48bfa6fb69a66 ]
+
+Commit c61f13eaa1ee1 ("gcc-plugins: Add structleak for more stack
+initialization") added "__attribute__((user))" to the user when
+stackleak detector is enabled. This now appears in the field format of
+system call trace events for system calls that have user buffers. The
+"__attribute__((user))" breaks the parsing in libtraceevent. That needs
+to be handled.
+
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Jaewon Kim <jaewon31.kim@samsung.com>
+Cc: Jiri Olsa <jolsa@redhat.com>
+Cc: Kees Kook <keescook@chromium.org>
+Cc: Namhyung Kim <namhyung@kernel.org>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: linux-mm@kvack.org
+Cc: linux-trace-devel@vger.kernel.org
+Link: http://lore.kernel.org/lkml/20200324200956.663647256@goodmis.org
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/lib/traceevent/event-parse.c | 39 +++++++++++++++++++++++++++++-
+ 1 file changed, 38 insertions(+), 1 deletion(-)
+
+diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
+index eec96c31ea9e5..010e60d5a0817 100644
+--- a/tools/lib/traceevent/event-parse.c
++++ b/tools/lib/traceevent/event-parse.c
+@@ -1444,6 +1444,7 @@ static int event_read_fields(struct tep_event *event, struct tep_format_field **
+       enum tep_event_type type;
+       char *token;
+       char *last_token;
++      char *delim = " ";
+       int count = 0;
+       int ret;
+ 
+@@ -1504,13 +1505,49 @@ static int event_read_fields(struct tep_event *event, struct tep_format_field **
+                                       field->flags |= TEP_FIELD_IS_POINTER;
+ 
+                               if (field->type) {
+-                                      ret = append(&field->type, " ", last_token);
++                                      ret = append(&field->type, delim, last_token);
+                                       free(last_token);
+                                       if (ret < 0)
+                                               goto fail;
+                               } else
+                                       field->type = last_token;
+                               last_token = token;
++                              delim = " ";
++                              continue;
++                      }
++
++                      /* Handle __attribute__((user)) */
++                      if ((type == TEP_EVENT_DELIM) &&
++                          strcmp("__attribute__", last_token) == 0 &&
++                          token[0] == '(') {
++                              int depth = 1;
++                              int ret;
++
++                              ret = append(&field->type, " ", last_token);
++                              ret |= append(&field->type, "", "(");
++                              if (ret < 0)
++                                      goto fail;
++
++                              delim = " ";
++                              while ((type = read_token(&token)) != TEP_EVENT_NONE) {
++                                      if (type == TEP_EVENT_DELIM) {
++                                              if (token[0] == '(')
++                                                      depth++;
++                                              else if (token[0] == ')')
++                                                      depth--;
++                                              if (!depth)
++                                                      break;
++                                              ret = append(&field->type, "", token);
++                                              delim = "";
++                                      } else {
++                                              ret = append(&field->type, delim, token);
++                                              delim = " ";
++                                      }
++                                      if (ret < 0)
++                                              goto fail;
++                                      free(last_token);
++                                      last_token = token;
++                              }
+                               continue;
+                       }
+                       break;
+-- 
+2.25.1
+
diff --git a/queue-5.7/usb-usbtest-fix-missing-kfree-dev-buf-in-usbtest_dis.patch b/queue-5.7/usb-usbtest-fix-missing-kfree-dev-buf-in-usbtest_dis.patch

new file mode 100644 (file)

index 0000000..310b670
--- /dev/null
+++ b/queue-5.7/usb-usbtest-fix-missing-kfree-dev-buf-in-usbtest_dis.patch
@@ -0,0 +1,69 @@
+From 5ac3bf12c59f90852a2546e5f39915615364a1a4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 12 Jun 2020 11:52:10 +0800
+Subject: usb: usbtest: fix missing kfree(dev->buf) in usbtest_disconnect
+
+From: Zqiang <qiang.zhang@windriver.com>
+
+[ Upstream commit 28ebeb8db77035e058a510ce9bd17c2b9a009dba ]
+
+BUG: memory leak
+unreferenced object 0xffff888055046e00 (size 256):
+  comm "kworker/2:9", pid 2570, jiffies 4294942129 (age 1095.500s)
+  hex dump (first 32 bytes):
+    00 70 04 55 80 88 ff ff 18 bb 5a 81 ff ff ff ff  .p.U......Z.....
+    f5 96 78 81 ff ff ff ff 37 de 8e 81 ff ff ff ff  ..x.....7.......
+  backtrace:
+    [<00000000d121dccf>] kmemleak_alloc_recursive
+include/linux/kmemleak.h:43 [inline]
+    [<00000000d121dccf>] slab_post_alloc_hook mm/slab.h:586 [inline]
+    [<00000000d121dccf>] slab_alloc_node mm/slub.c:2786 [inline]
+    [<00000000d121dccf>] slab_alloc mm/slub.c:2794 [inline]
+    [<00000000d121dccf>] kmem_cache_alloc_trace+0x15e/0x2d0 mm/slub.c:2811
+    [<000000005c3c3381>] kmalloc include/linux/slab.h:555 [inline]
+    [<000000005c3c3381>] usbtest_probe+0x286/0x19d0
+drivers/usb/misc/usbtest.c:2790
+    [<000000001cec6910>] usb_probe_interface+0x2bd/0x870
+drivers/usb/core/driver.c:361
+    [<000000007806c118>] really_probe+0x48d/0x8f0 drivers/base/dd.c:551
+    [<00000000a3308c3e>] driver_probe_device+0xfc/0x2a0 drivers/base/dd.c:724
+    [<000000003ef66004>] __device_attach_driver+0x1b6/0x240
+drivers/base/dd.c:831
+    [<00000000eee53e97>] bus_for_each_drv+0x14e/0x1e0 drivers/base/bus.c:431
+    [<00000000bb0648d0>] __device_attach+0x1f9/0x350 drivers/base/dd.c:897
+    [<00000000838b324a>] device_initial_probe+0x1a/0x20 drivers/base/dd.c:944
+    [<0000000030d501c1>] bus_probe_device+0x1e1/0x280 drivers/base/bus.c:491
+    [<000000005bd7adef>] device_add+0x131d/0x1c40 drivers/base/core.c:2504
+    [<00000000a0937814>] usb_set_configuration+0xe84/0x1ab0
+drivers/usb/core/message.c:2030
+    [<00000000e3934741>] generic_probe+0x6a/0xe0 drivers/usb/core/generic.c:210
+    [<0000000098ade0f1>] usb_probe_device+0x90/0xd0
+drivers/usb/core/driver.c:266
+    [<000000007806c118>] really_probe+0x48d/0x8f0 drivers/base/dd.c:551
+    [<00000000a3308c3e>] driver_probe_device+0xfc/0x2a0 drivers/base/dd.c:724
+
+Acked-by: Alan Stern <stern@rowland.harvard.edu>
+Reported-by: Kyungtae Kim <kt0755@gmail.com>
+Signed-off-by: Zqiang <qiang.zhang@windriver.com>
+Link: https://lore.kernel.org/r/20200612035210.20494-1-qiang.zhang@windriver.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/usb/misc/usbtest.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/usb/misc/usbtest.c b/drivers/usb/misc/usbtest.c
+index 98ada1a3425c6..bae88893ee8e3 100644
+--- a/drivers/usb/misc/usbtest.c
++++ b/drivers/usb/misc/usbtest.c
+@@ -2873,6 +2873,7 @@ static void usbtest_disconnect(struct usb_interface *intf)
+ 
+       usb_set_intfdata(intf, NULL);
+       dev_dbg(&intf->dev, "disconnect\n");
++      kfree(dev->buf);
+       kfree(dev);
+ }
+ 
+-- 
+2.25.1
+
diff --git a/queue-5.7/usbnet-smsc95xx-fix-use-after-free-after-removal.patch b/queue-5.7/usbnet-smsc95xx-fix-use-after-free-after-removal.patch

new file mode 100644 (file)

index 0000000..d5963ba
--- /dev/null
+++ b/queue-5.7/usbnet-smsc95xx-fix-use-after-free-after-removal.patch
@@ -0,0 +1,49 @@
+From f4bdf789d33e4cc05c955a5ce8d549b1abf97364 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 21 Jun 2020 13:43:26 +0300
+Subject: usbnet: smsc95xx: Fix use-after-free after removal
+
+From: Tuomas Tynkkynen <tuomas.tynkkynen@iki.fi>
+
+[ Upstream commit b835a71ef64a61383c414d6bf2896d2c0161deca ]
+
+Syzbot reports an use-after-free in workqueue context:
+
+BUG: KASAN: use-after-free in mutex_unlock+0x19/0x40 kernel/locking/mutex.c:737
+ mutex_unlock+0x19/0x40 kernel/locking/mutex.c:737
+ __smsc95xx_mdio_read drivers/net/usb/smsc95xx.c:217 [inline]
+ smsc95xx_mdio_read+0x583/0x870 drivers/net/usb/smsc95xx.c:278
+ check_carrier+0xd1/0x2e0 drivers/net/usb/smsc95xx.c:644
+ process_one_work+0x777/0xf90 kernel/workqueue.c:2274
+ worker_thread+0xa8f/0x1430 kernel/workqueue.c:2420
+ kthread+0x2df/0x300 kernel/kthread.c:255
+
+It looks like that smsc95xx_unbind() is freeing the structures that are
+still in use by the concurrently running workqueue callback. Thus switch
+to using cancel_delayed_work_sync() to ensure the work callback really
+is no longer active.
+
+Reported-by: syzbot+29dc7d4ae19b703ff947@syzkaller.appspotmail.com
+Signed-off-by: Tuomas Tynkkynen <tuomas.tynkkynen@iki.fi>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/usb/smsc95xx.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
+index 355be77f42418..3cf4dc3433f91 100644
+--- a/drivers/net/usb/smsc95xx.c
++++ b/drivers/net/usb/smsc95xx.c
+@@ -1324,7 +1324,7 @@ static void smsc95xx_unbind(struct usbnet *dev, struct usb_interface *intf)
+       struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]);
+ 
+       if (pdata) {
+-              cancel_delayed_work(&pdata->carrier_check);
++              cancel_delayed_work_sync(&pdata->carrier_check);
+               netif_dbg(dev, ifdown, dev->net, "free pdata\n");
+               kfree(pdata);
+               pdata = NULL;
+-- 
+2.25.1
+
author	Sasha Levin <sashal@kernel.org>
	Sat, 4 Jul 2020 16:29:56 +0000 (12:29 -0400)
committer	Sasha Levin <sashal@kernel.org>
	Sat, 4 Jul 2020 16:29:56 +0000 (12:29 -0400)
queue-5.7/drm-amdgpu-disable-ras-query-and-iject-during-gpu-re.patch	[new file with mode: 0644]	patch \| blob
queue-5.7/drm-amdgpu-fix-kernel-page-fault-issue-by-ras-recove.patch	[new file with mode: 0644]	patch \| blob
queue-5.7/drm-amdgpu-fix-non-pointer-dereference-for-non-ras-s.patch	[new file with mode: 0644]	patch \| blob
queue-5.7/drm-i915-gt-mark-timeline-cacheline-as-destroyed-aft.patch	[new file with mode: 0644]	patch \| blob
queue-5.7/io_uring-fix-current-mm-null-dereference-on-exit.patch	[new file with mode: 0644]	patch \| blob
queue-5.7/io_uring-fix-io_sq_thread-no-schedule-when-busy.patch	[new file with mode: 0644]	patch \| blob
queue-5.7/io_uring-fix-sq-io-poll-with-unsupported-opcodes.patch	[new file with mode: 0644]	patch \| blob
queue-5.7/kgdb-avoid-suspicious-rcu-usage-warning.patch	[new file with mode: 0644]	patch \| blob
queue-5.7/mm-dump_page-do-not-crash-with-invalid-mapping-point.patch	[new file with mode: 0644]	patch \| blob
queue-5.7/mm-slub-fix-stack-overruns-with-slub_stats.patch	[new file with mode: 0644]	patch \| blob
queue-5.7/mm-slub.c-fix-corrupted-freechain-in-deactivate_slab.patch	[new file with mode: 0644]	patch \| blob
queue-5.7/nvme-fix-possible-deadlock-when-i-o-is-blocked.patch	[new file with mode: 0644]	patch \| blob
queue-5.7/nvme-multipath-fix-bogus-request-queue-reference-put.patch	[new file with mode: 0644]	patch \| blob
queue-5.7/nvme-multipath-fix-deadlock-between-ana_work-and-sca.patch	[new file with mode: 0644]	patch \| blob
queue-5.7/nvme-multipath-fix-deadlock-due-to-head-lock.patch	[new file with mode: 0644]	patch \| blob
queue-5.7/nvme-multipath-set-bdi-capabilities-once.patch	[new file with mode: 0644]	patch \| blob
queue-5.7/powerpc-book3s64-kvm-fix-secondary-page-table-walk-w.patch	[new file with mode: 0644]	patch \| blob
queue-5.7/powerpc-kvm-book3s-add-helper-to-walk-partition-scop.patch	[new file with mode: 0644]	patch \| blob
queue-5.7/rxrpc-fix-race-between-incoming-ack-parser-and-retra.patch	[new file with mode: 0644]	patch \| blob
queue-5.7/s390-debug-avoid-kernel-warning-on-too-large-number-.patch	[new file with mode: 0644]	patch \| blob
queue-5.7/sched-debug-make-sd-flags-sysctl-read-only.patch	[new file with mode: 0644]	patch \| blob
queue-5.7/seg6-fix-seg6_validate_srh-to-avoid-slab-out-of-boun.patch	[new file with mode: 0644]	patch \| blob
queue-5.7/series		patch \| blob \| blame \| history
queue-5.7/soc-ti-omap-prm-use-atomic-iopoll-instead-of-sleepin.patch	[new file with mode: 0644]	patch \| blob
queue-5.7/tipc-add-test-for-nagle-algorithm-effectiveness.patch	[new file with mode: 0644]	patch \| blob
queue-5.7/tipc-fix-kernel-warning-in-tipc_msg_append.patch	[new file with mode: 0644]	patch \| blob
queue-5.7/tipc-fix-null-pointer-dereference-in-__tipc_sendstre.patch	[new file with mode: 0644]	patch \| blob
queue-5.7/tools-lib-traceevent-add-append-function-helper-for-.patch	[new file with mode: 0644]	patch \| blob
queue-5.7/tools-lib-traceevent-handle-__attribute__-user-in-fi.patch	[new file with mode: 0644]	patch \| blob
queue-5.7/usb-usbtest-fix-missing-kfree-dev-buf-in-usbtest_dis.patch	[new file with mode: 0644]	patch \| blob
queue-5.7/usbnet-smsc95xx-fix-use-after-free-after-removal.patch	[new file with mode: 0644]	patch \| blob