]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.12-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 28 Jun 2021 13:09:19 +0000 (15:09 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 28 Jun 2021 13:09:19 +0000 (15:09 +0200)
added patches:
mm-hwpoison-do-not-lock-page-again-when-me_huge_page-successfully-recovers.patch
revert-drm-add-a-locked-version-of-drm_is_current_master.patch

queue-5.12/mm-hwpoison-do-not-lock-page-again-when-me_huge_page-successfully-recovers.patch [new file with mode: 0644]
queue-5.12/revert-drm-add-a-locked-version-of-drm_is_current_master.patch [new file with mode: 0644]
queue-5.12/series

diff --git a/queue-5.12/mm-hwpoison-do-not-lock-page-again-when-me_huge_page-successfully-recovers.patch b/queue-5.12/mm-hwpoison-do-not-lock-page-again-when-me_huge_page-successfully-recovers.patch
new file mode 100644 (file)
index 0000000..8e96fcd
--- /dev/null
@@ -0,0 +1,185 @@
+From ea6d0630100b285f059d0a8d8e86f38a46407536 Mon Sep 17 00:00:00 2001
+From: Naoya Horiguchi <naoya.horiguchi@nec.com>
+Date: Thu, 24 Jun 2021 18:40:01 -0700
+Subject: mm/hwpoison: do not lock page again when me_huge_page() successfully recovers
+
+From: Naoya Horiguchi <naoya.horiguchi@nec.com>
+
+commit ea6d0630100b285f059d0a8d8e86f38a46407536 upstream.
+
+Currently me_huge_page() temporary unlocks page to perform some actions
+then locks it again later.  My testcase (which calls hard-offline on
+some tail page in a hugetlb, then accesses the address of the hugetlb
+range) showed that page allocation code detects this page lock on buddy
+page and printed out "BUG: Bad page state" message.
+
+check_new_page_bad() does not consider a page with __PG_HWPOISON as bad
+page, so this flag works as kind of filter, but this filtering doesn't
+work in this case because the "bad page" is not the actual hwpoisoned
+page.  So stop locking page again.  Actions to be taken depend on the
+page type of the error, so page unlocking should be done in ->action()
+callbacks.  So let's make it assumed and change all existing callbacks
+that way.
+
+Link: https://lkml.kernel.org/r/20210609072029.74645-1-nao.horiguchi@gmail.com
+Fixes: commit 78bb920344b8 ("mm: hwpoison: dissolve in-use hugepage in unrecoverable memory error")
+Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Tony Luck <tony.luck@intel.com>
+Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/memory-failure.c |   44 ++++++++++++++++++++++++++++++--------------
+ 1 file changed, 30 insertions(+), 14 deletions(-)
+
+--- a/mm/memory-failure.c
++++ b/mm/memory-failure.c
+@@ -658,6 +658,7 @@ static int truncate_error_page(struct pa
+  */
+ static int me_kernel(struct page *p, unsigned long pfn)
+ {
++      unlock_page(p);
+       return MF_IGNORED;
+ }
+@@ -667,6 +668,7 @@ static int me_kernel(struct page *p, uns
+ static int me_unknown(struct page *p, unsigned long pfn)
+ {
+       pr_err("Memory failure: %#lx: Unknown page state\n", pfn);
++      unlock_page(p);
+       return MF_FAILED;
+ }
+@@ -675,6 +677,7 @@ static int me_unknown(struct page *p, un
+  */
+ static int me_pagecache_clean(struct page *p, unsigned long pfn)
+ {
++      int ret;
+       struct address_space *mapping;
+       delete_from_lru_cache(p);
+@@ -683,8 +686,10 @@ static int me_pagecache_clean(struct pag
+        * For anonymous pages we're done the only reference left
+        * should be the one m_f() holds.
+        */
+-      if (PageAnon(p))
+-              return MF_RECOVERED;
++      if (PageAnon(p)) {
++              ret = MF_RECOVERED;
++              goto out;
++      }
+       /*
+        * Now truncate the page in the page cache. This is really
+@@ -698,7 +703,8 @@ static int me_pagecache_clean(struct pag
+               /*
+                * Page has been teared down in the meanwhile
+                */
+-              return MF_FAILED;
++              ret = MF_FAILED;
++              goto out;
+       }
+       /*
+@@ -706,7 +712,10 @@ static int me_pagecache_clean(struct pag
+        *
+        * Open: to take i_mutex or not for this? Right now we don't.
+        */
+-      return truncate_error_page(p, pfn, mapping);
++      ret = truncate_error_page(p, pfn, mapping);
++out:
++      unlock_page(p);
++      return ret;
+ }
+ /*
+@@ -782,24 +791,26 @@ static int me_pagecache_dirty(struct pag
+  */
+ static int me_swapcache_dirty(struct page *p, unsigned long pfn)
+ {
++      int ret;
++
+       ClearPageDirty(p);
+       /* Trigger EIO in shmem: */
+       ClearPageUptodate(p);
+-      if (!delete_from_lru_cache(p))
+-              return MF_DELAYED;
+-      else
+-              return MF_FAILED;
++      ret = delete_from_lru_cache(p) ? MF_FAILED : MF_DELAYED;
++      unlock_page(p);
++      return ret;
+ }
+ static int me_swapcache_clean(struct page *p, unsigned long pfn)
+ {
++      int ret;
++
+       delete_from_swap_cache(p);
+-      if (!delete_from_lru_cache(p))
+-              return MF_RECOVERED;
+-      else
+-              return MF_FAILED;
++      ret = delete_from_lru_cache(p) ? MF_FAILED : MF_RECOVERED;
++      unlock_page(p);
++      return ret;
+ }
+ /*
+@@ -820,6 +831,7 @@ static int me_huge_page(struct page *p,
+       mapping = page_mapping(hpage);
+       if (mapping) {
+               res = truncate_error_page(hpage, pfn, mapping);
++              unlock_page(hpage);
+       } else {
+               res = MF_FAILED;
+               unlock_page(hpage);
+@@ -834,7 +846,6 @@ static int me_huge_page(struct page *p,
+                       page_ref_inc(p);
+                       res = MF_RECOVERED;
+               }
+-              lock_page(hpage);
+       }
+       return res;
+@@ -866,6 +877,8 @@ static struct page_state {
+       unsigned long mask;
+       unsigned long res;
+       enum mf_action_page_type type;
++
++      /* Callback ->action() has to unlock the relevant page inside it. */
+       int (*action)(struct page *p, unsigned long pfn);
+ } error_states[] = {
+       { reserved,     reserved,       MF_MSG_KERNEL,  me_kernel },
+@@ -929,6 +942,7 @@ static int page_action(struct page_state
+       int result;
+       int count;
++      /* page p should be unlocked after returning from ps->action().  */
+       result = ps->action(p, pfn);
+       count = page_count(p) - 1;
+@@ -1313,7 +1327,7 @@ static int memory_failure_hugetlb(unsign
+               goto out;
+       }
+-      res = identify_page_state(pfn, p, page_flags);
++      return identify_page_state(pfn, p, page_flags);
+ out:
+       unlock_page(head);
+       return res;
+@@ -1595,6 +1609,8 @@ try_again:
+ identify_page_state:
+       res = identify_page_state(pfn, p, page_flags);
++      mutex_unlock(&mf_mutex);
++      return res;
+ unlock_page:
+       unlock_page(p);
+ unlock_mutex:
diff --git a/queue-5.12/revert-drm-add-a-locked-version-of-drm_is_current_master.patch b/queue-5.12/revert-drm-add-a-locked-version-of-drm_is_current_master.patch
new file mode 100644 (file)
index 0000000..6f1b73d
--- /dev/null
@@ -0,0 +1,166 @@
+From f54b3ca7ea1e5e02f481cf4ca54568e57bd66086 Mon Sep 17 00:00:00 2001
+From: Daniel Vetter <daniel.vetter@ffwll.ch>
+Date: Tue, 22 Jun 2021 09:54:09 +0200
+Subject: Revert "drm: add a locked version of drm_is_current_master"
+
+From: Daniel Vetter <daniel.vetter@ffwll.ch>
+
+commit f54b3ca7ea1e5e02f481cf4ca54568e57bd66086 upstream.
+
+This reverts commit 1815d9c86e3090477fbde066ff314a7e9721ee0f.
+
+Unfortunately this inverts the locking hierarchy, so back to the
+drawing board. Full lockdep splat below:
+
+======================================================
+WARNING: possible circular locking dependency detected
+5.13.0-rc7-CI-CI_DRM_10254+ #1 Not tainted
+------------------------------------------------------
+kms_frontbuffer/1087 is trying to acquire lock:
+ffff88810dcd01a8 (&dev->master_mutex){+.+.}-{3:3}, at: drm_is_current_master+0x1b/0x40
+but task is already holding lock:
+ffff88810dcd0488 (&dev->mode_config.mutex){+.+.}-{3:3}, at: drm_mode_getconnector+0x1c6/0x4a0
+which lock already depends on the new lock.
+the existing dependency chain (in reverse order) is:
+-> #2 (&dev->mode_config.mutex){+.+.}-{3:3}:
+       __mutex_lock+0xab/0x970
+       drm_client_modeset_probe+0x22e/0xca0
+       __drm_fb_helper_initial_config_and_unlock+0x42/0x540
+       intel_fbdev_initial_config+0xf/0x20 [i915]
+       async_run_entry_fn+0x28/0x130
+       process_one_work+0x26d/0x5c0
+       worker_thread+0x37/0x380
+       kthread+0x144/0x170
+       ret_from_fork+0x1f/0x30
+-> #1 (&client->modeset_mutex){+.+.}-{3:3}:
+       __mutex_lock+0xab/0x970
+       drm_client_modeset_commit_locked+0x1c/0x180
+       drm_client_modeset_commit+0x1c/0x40
+       __drm_fb_helper_restore_fbdev_mode_unlocked+0x88/0xb0
+       drm_fb_helper_set_par+0x34/0x40
+       intel_fbdev_set_par+0x11/0x40 [i915]
+       fbcon_init+0x270/0x4f0
+       visual_init+0xc6/0x130
+       do_bind_con_driver+0x1e5/0x2d0
+       do_take_over_console+0x10e/0x180
+       do_fbcon_takeover+0x53/0xb0
+       register_framebuffer+0x22d/0x310
+       __drm_fb_helper_initial_config_and_unlock+0x36c/0x540
+       intel_fbdev_initial_config+0xf/0x20 [i915]
+       async_run_entry_fn+0x28/0x130
+       process_one_work+0x26d/0x5c0
+       worker_thread+0x37/0x380
+       kthread+0x144/0x170
+       ret_from_fork+0x1f/0x30
+-> #0 (&dev->master_mutex){+.+.}-{3:3}:
+       __lock_acquire+0x151e/0x2590
+       lock_acquire+0xd1/0x3d0
+       __mutex_lock+0xab/0x970
+       drm_is_current_master+0x1b/0x40
+       drm_mode_getconnector+0x37e/0x4a0
+       drm_ioctl_kernel+0xa8/0xf0
+       drm_ioctl+0x1e8/0x390
+       __x64_sys_ioctl+0x6a/0xa0
+       do_syscall_64+0x39/0xb0
+       entry_SYSCALL_64_after_hwframe+0x44/0xae
+other info that might help us debug this:
+Chain exists of: &dev->master_mutex --> &client->modeset_mutex --> &dev->mode_config.mutex
+ Possible unsafe locking scenario:
+       CPU0                    CPU1
+       ----                    ----
+  lock(&dev->mode_config.mutex);
+                               lock(&client->modeset_mutex);
+                               lock(&dev->mode_config.mutex);
+  lock(&dev->master_mutex);
+---
+ drivers/gpu/drm/drm_auth.c |   51 ++++++++++++++++-----------------------------
+ 1 file changed, 19 insertions(+), 32 deletions(-)
+
+--- a/drivers/gpu/drm/drm_auth.c
++++ b/drivers/gpu/drm/drm_auth.c
+@@ -61,35 +61,6 @@
+  * trusted clients.
+  */
+-static bool drm_is_current_master_locked(struct drm_file *fpriv)
+-{
+-      lockdep_assert_held_once(&fpriv->master->dev->master_mutex);
+-
+-      return fpriv->is_master && drm_lease_owner(fpriv->master) == fpriv->minor->dev->master;
+-}
+-
+-/**
+- * drm_is_current_master - checks whether @priv is the current master
+- * @fpriv: DRM file private
+- *
+- * Checks whether @fpriv is current master on its device. This decides whether a
+- * client is allowed to run DRM_MASTER IOCTLs.
+- *
+- * Most of the modern IOCTL which require DRM_MASTER are for kernel modesetting
+- * - the current master is assumed to own the non-shareable display hardware.
+- */
+-bool drm_is_current_master(struct drm_file *fpriv)
+-{
+-      bool ret;
+-
+-      mutex_lock(&fpriv->master->dev->master_mutex);
+-      ret = drm_is_current_master_locked(fpriv);
+-      mutex_unlock(&fpriv->master->dev->master_mutex);
+-
+-      return ret;
+-}
+-EXPORT_SYMBOL(drm_is_current_master);
+-
+ int drm_getmagic(struct drm_device *dev, void *data, struct drm_file *file_priv)
+ {
+       struct drm_auth *auth = data;
+@@ -252,7 +223,7 @@ int drm_setmaster_ioctl(struct drm_devic
+       if (ret)
+               goto out_unlock;
+-      if (drm_is_current_master_locked(file_priv))
++      if (drm_is_current_master(file_priv))
+               goto out_unlock;
+       if (dev->master) {
+@@ -301,7 +272,7 @@ int drm_dropmaster_ioctl(struct drm_devi
+       if (ret)
+               goto out_unlock;
+-      if (!drm_is_current_master_locked(file_priv)) {
++      if (!drm_is_current_master(file_priv)) {
+               ret = -EINVAL;
+               goto out_unlock;
+       }
+@@ -350,7 +321,7 @@ void drm_master_release(struct drm_file
+       if (file_priv->magic)
+               idr_remove(&file_priv->master->magic_map, file_priv->magic);
+-      if (!drm_is_current_master_locked(file_priv))
++      if (!drm_is_current_master(file_priv))
+               goto out;
+       drm_legacy_lock_master_cleanup(dev, master);
+@@ -372,6 +343,22 @@ out:
+ }
+ /**
++ * drm_is_current_master - checks whether @priv is the current master
++ * @fpriv: DRM file private
++ *
++ * Checks whether @fpriv is current master on its device. This decides whether a
++ * client is allowed to run DRM_MASTER IOCTLs.
++ *
++ * Most of the modern IOCTL which require DRM_MASTER are for kernel modesetting
++ * - the current master is assumed to own the non-shareable display hardware.
++ */
++bool drm_is_current_master(struct drm_file *fpriv)
++{
++      return fpriv->is_master && drm_lease_owner(fpriv->master) == fpriv->minor->dev->master;
++}
++EXPORT_SYMBOL(drm_is_current_master);
++
++/**
+  * drm_master_get - reference a master pointer
+  * @master: &struct drm_master
+  *
index e178456e0d397c38af3c9f2f034976c6791c5bd6..fdd408b840b4c968bb6fe83e1ba48c497f02f73e 100644 (file)
@@ -101,3 +101,5 @@ mm-futex-fix-shared-futex-pgoff-on-shmem-huge-page.patch
 kvm-svm-call-sev-guest-decommission-if-asid-binding-fails.patch
 swiotlb-manipulate-orig_addr-when-tlb_addr-has-offset.patch
 netfs-fix-test-for-whether-we-can-skip-read-when-writing-beyond-eof.patch
+mm-hwpoison-do-not-lock-page-again-when-me_huge_page-successfully-recovers.patch
+revert-drm-add-a-locked-version-of-drm_is_current_master.patch