5.19-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sun, 2 Oct 2022 10:29:02 +0000 (12:29 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sun, 2 Oct 2022 10:29:02 +0000 (12:29 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 2 Oct 2022 10:29:02 +0000 (12:29 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 2 Oct 2022 10:29:02 +0000 (12:29 +0200)
diff --git a/queue-5.19/mm-bring-back-update_mmu_cache-to-finish_fault.patch b/queue-5.19/mm-bring-back-update_mmu_cache-to-finish_fault.patch

new file mode 100644 (file)

index 0000000..4db8a05
--- /dev/null
+++ b/queue-5.19/mm-bring-back-update_mmu_cache-to-finish_fault.patch
@@ -0,0 +1,92 @@
+From 70427f6e9ecfc8c5f977b21dd9f846b3bda02500 Mon Sep 17 00:00:00 2001
+From: Sergei Antonov <saproj@gmail.com>
+Date: Thu, 8 Sep 2022 23:48:09 +0300
+Subject: mm: bring back update_mmu_cache() to finish_fault()
+
+From: Sergei Antonov <saproj@gmail.com>
+
+commit 70427f6e9ecfc8c5f977b21dd9f846b3bda02500 upstream.
+
+Running this test program on ARMv4 a few times (sometimes just once)
+reproduces the bug.
+
+int main()
+{
+        unsigned i;
+        char paragon[SIZE];
+        void* ptr;
+
+        memset(paragon, 0xAA, SIZE);
+        ptr = mmap(NULL, SIZE, PROT_READ | PROT_WRITE,
+                   MAP_ANON | MAP_SHARED, -1, 0);
+        if (ptr == MAP_FAILED) return 1;
+        printf("ptr = %p\n", ptr);
+        for (i=0;i<10000;i++){
+                memset(ptr, 0xAA, SIZE);
+                if (memcmp(ptr, paragon, SIZE)) {
+                        printf("Unexpected bytes on iteration %u!!!\n", i);
+                        break;
+                }
+        }
+        munmap(ptr, SIZE);
+}
+
+In the "ptr" buffer there appear runs of zero bytes which are aligned
+by 16 and their lengths are multiple of 16.
+
+Linux v5.11 does not have the bug, "git bisect" finds the first bad commit:
+f9ce0be71d1f ("mm: Cleanup faultaround and finish_fault() codepaths")
+
+Before the commit update_mmu_cache() was called during a call to
+filemap_map_pages() as well as finish_fault(). After the commit
+finish_fault() lacks it.
+
+Bring back update_mmu_cache() to finish_fault() to fix the bug.
+Also call update_mmu_tlb() only when returning VM_FAULT_NOPAGE to more
+closely reproduce the code of alloc_set_pte() function that existed before
+the commit.
+
+On many platforms update_mmu_cache() is nop:
+ x86, see arch/x86/include/asm/pgtable
+ ARMv6+, see arch/arm/include/asm/tlbflush.h
+So, it seems, few users ran into this bug.
+
+Link: https://lkml.kernel.org/r/20220908204809.2012451-1-saproj@gmail.com
+Fixes: f9ce0be71d1f ("mm: Cleanup faultaround and finish_fault() codepaths")
+Signed-off-by: Sergei Antonov <saproj@gmail.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Will Deacon <will@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/memory.c |   14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -4378,14 +4378,20 @@ vm_fault_t finish_fault(struct vm_fault
+ 
+       vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
+                                     vmf->address, &vmf->ptl);
+-      ret = 0;
++
+       /* Re-check under ptl */
+-      if (likely(!vmf_pte_changed(vmf)))
++      if (likely(!vmf_pte_changed(vmf))) {
+               do_set_pte(vmf, page, vmf->address);
+-      else
++
++              /* no need to invalidate: a not-present page won't be cached */
++              update_mmu_cache(vma, vmf->address, vmf->pte);
++
++              ret = 0;
++      } else {
++              update_mmu_tlb(vma, vmf->address, vmf->pte);
+               ret = VM_FAULT_NOPAGE;
++      }
+ 
+-      update_mmu_tlb(vma, vmf->address, vmf->pte);
+       pte_unmap_unlock(vmf->pte, vmf->ptl);
+       return ret;
+ }
diff --git a/queue-5.19/mm-fix-dereferencing-possible-err_ptr.patch b/queue-5.19/mm-fix-dereferencing-possible-err_ptr.patch

new file mode 100644 (file)

index 0000000..4ab1d8b
--- /dev/null
+++ b/queue-5.19/mm-fix-dereferencing-possible-err_ptr.patch
@@ -0,0 +1,39 @@
+From 4eb5bbde3ccb710d3b85bfb13466612e56393369 Mon Sep 17 00:00:00 2001
+From: Binyi Han <dantengknight@gmail.com>
+Date: Sun, 4 Sep 2022 00:46:47 -0700
+Subject: mm: fix dereferencing possible ERR_PTR
+
+From: Binyi Han <dantengknight@gmail.com>
+
+commit 4eb5bbde3ccb710d3b85bfb13466612e56393369 upstream.
+
+Smatch checker complains that 'secretmem_mnt' dereferencing possible
+ERR_PTR().  Let the function return if 'secretmem_mnt' is ERR_PTR, to
+avoid deferencing it.
+
+Link: https://lkml.kernel.org/r/20220904074647.GA64291@cloud-MacBookPro
+Fixes: 1507f51255c9f ("mm: introduce memfd_secret system call to create "secret" memory areas")
+Signed-off-by: Binyi Han <dantengknight@gmail.com>
+Reviewed-by: Andrew Morton <akpm@linux-foudation.org>
+Cc: Mike Rapoport <rppt@kernel.org>
+Cc: Ammar Faizi <ammarfaizi2@gnuweeb.org>
+Cc: Hagen Paul Pfeifer <hagen@jauu.net>
+Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/secretmem.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/secretmem.c
++++ b/mm/secretmem.c
+@@ -283,7 +283,7 @@ static int secretmem_init(void)
+ 
+       secretmem_mnt = kern_mount(&secretmem_fs);
+       if (IS_ERR(secretmem_mnt))
+-              ret = PTR_ERR(secretmem_mnt);
++              return PTR_ERR(secretmem_mnt);
+ 
+       /* prevent secretmem mappings from ever getting PROT_EXEC */
+       secretmem_mnt->mnt_flags |= MNT_NOEXEC;
diff --git a/queue-5.19/mm-fix-madivse_pageout-mishandling-on-non-lru-page.patch b/queue-5.19/mm-fix-madivse_pageout-mishandling-on-non-lru-page.patch

new file mode 100644 (file)

index 0000000..a9fef58
--- /dev/null
+++ b/queue-5.19/mm-fix-madivse_pageout-mishandling-on-non-lru-page.patch
@@ -0,0 +1,55 @@
+From 58d426a7ba92870d489686dfdb9d06b66815a2ab Mon Sep 17 00:00:00 2001
+From: Minchan Kim <minchan@kernel.org>
+Date: Thu, 8 Sep 2022 08:12:04 -0700
+Subject: mm: fix madivse_pageout mishandling on non-LRU page
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Minchan Kim <minchan@kernel.org>
+
+commit 58d426a7ba92870d489686dfdb9d06b66815a2ab upstream.
+
+MADV_PAGEOUT tries to isolate non-LRU pages and gets a warning from
+isolate_lru_page below.
+
+Fix it by checking PageLRU in advance.
+
+------------[ cut here ]------------
+trying to isolate tail page
+WARNING: CPU: 0 PID: 6175 at mm/folio-compat.c:158 isolate_lru_page+0x130/0x140
+Modules linked in:
+CPU: 0 PID: 6175 Comm: syz-executor.0 Not tainted 5.18.12 #1
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014
+RIP: 0010:isolate_lru_page+0x130/0x140
+
+Link: https://lore.kernel.org/linux-mm/485f8c33.2471b.182d5726afb.Coremail.hantianshuo@iie.ac.cn/
+Link: https://lkml.kernel.org/r/20220908151204.762596-1-minchan@kernel.org
+Fixes: 1a4e58cce84e ("mm: introduce MADV_PAGEOUT")
+Signed-off-by: Minchan Kim <minchan@kernel.org>
+Reported-by: 韩天ç`\95 <hantianshuo@iie.ac.cn>
+Suggested-by: Yang Shi <shy828301@gmail.com>
+Acked-by: Yang Shi <shy828301@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/madvise.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/mm/madvise.c
++++ b/mm/madvise.c
+@@ -451,8 +451,11 @@ regular_page:
+                       continue;
+               }
+ 
+-              /* Do not interfere with other mappings of this page */
+-              if (page_mapcount(page) != 1)
++              /*
++               * Do not interfere with other mappings of this page and
++               * non-LRU page.
++               */
++              if (!PageLRU(page) || page_mapcount(page) != 1)
+                       continue;
+ 
+               VM_BUG_ON_PAGE(PageTransCompound(page), page);
diff --git a/queue-5.19/mm-hugetlb-correct-demote-page-offset-logic.patch b/queue-5.19/mm-hugetlb-correct-demote-page-offset-logic.patch

new file mode 100644 (file)

index 0000000..e9d5aea
--- /dev/null
+++ b/queue-5.19/mm-hugetlb-correct-demote-page-offset-logic.patch
@@ -0,0 +1,67 @@
+From 317314527d173e1f139ceaf8cb87cb1746abf240 Mon Sep 17 00:00:00 2001
+From: Doug Berger <opendmb@gmail.com>
+Date: Wed, 14 Sep 2022 12:09:17 -0700
+Subject: mm/hugetlb: correct demote page offset logic
+
+From: Doug Berger <opendmb@gmail.com>
+
+commit 317314527d173e1f139ceaf8cb87cb1746abf240 upstream.
+
+With gigantic pages it may not be true that struct page structures are
+contiguous across the entire gigantic page.  The nth_page macro is used
+here in place of direct pointer arithmetic to correct for this.
+
+Mike said:
+
+: This error could cause addressing exceptions.  However, this is only
+: possible in configurations where CONFIG_SPARSEMEM &&
+: !CONFIG_SPARSEMEM_VMEMMAP.  Such a configuration option is rare and
+: unknown to be the default anywhere.
+
+Link: https://lkml.kernel.org/r/20220914190917.3517663-1-opendmb@gmail.com
+Fixes: 8531fc6f52f5 ("hugetlb: add hugetlb demote page support")
+Signed-off-by: Doug Berger <opendmb@gmail.com>
+Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
+Reviewed-by: Oscar Salvador <osalvador@suse.de>
+Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
+Cc: Muchun Song <songmuchun@bytedance.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/hugetlb.c |   14 ++++++++------
+ 1 file changed, 8 insertions(+), 6 deletions(-)
+
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -3418,6 +3418,7 @@ static int demote_free_huge_page(struct
+ {
+       int i, nid = page_to_nid(page);
+       struct hstate *target_hstate;
++      struct page *subpage;
+       int rc = 0;
+ 
+       target_hstate = size_to_hstate(PAGE_SIZE << h->demote_order);
+@@ -3451,15 +3452,16 @@ static int demote_free_huge_page(struct
+       mutex_lock(&target_hstate->resize_lock);
+       for (i = 0; i < pages_per_huge_page(h);
+                               i += pages_per_huge_page(target_hstate)) {
++              subpage = nth_page(page, i);
+               if (hstate_is_gigantic(target_hstate))
+-                      prep_compound_gigantic_page_for_demote(page + i,
++                      prep_compound_gigantic_page_for_demote(subpage,
+                                                       target_hstate->order);
+               else
+-                      prep_compound_page(page + i, target_hstate->order);
+-              set_page_private(page + i, 0);
+-              set_page_refcounted(page + i);
+-              prep_new_huge_page(target_hstate, page + i, nid);
+-              put_page(page + i);
++                      prep_compound_page(subpage, target_hstate->order);
++              set_page_private(subpage, 0);
++              set_page_refcounted(subpage);
++              prep_new_huge_page(target_hstate, subpage, nid);
++              put_page(subpage);
+       }
+       mutex_unlock(&target_hstate->resize_lock);
+ 
diff --git a/queue-5.19/mm-hwpoison-check-mm-when-killing-accessing-process.patch b/queue-5.19/mm-hwpoison-check-mm-when-killing-accessing-process.patch

new file mode 100644 (file)

index 0000000..f87265e
--- /dev/null
+++ b/queue-5.19/mm-hwpoison-check-mm-when-killing-accessing-process.patch
@@ -0,0 +1,53 @@
+From 77677cdbc2aa4b5d5d839562793d3d126201d18d Mon Sep 17 00:00:00 2001
+From: Shuai Xue <xueshuai@linux.alibaba.com>
+Date: Wed, 14 Sep 2022 14:49:35 +0800
+Subject: mm,hwpoison: check mm when killing accessing process
+
+From: Shuai Xue <xueshuai@linux.alibaba.com>
+
+commit 77677cdbc2aa4b5d5d839562793d3d126201d18d upstream.
+
+The GHES code calls memory_failure_queue() from IRQ context to queue work
+into workqueue and schedule it on the current CPU.  Then the work is
+processed in memory_failure_work_func() by kworker and calls
+memory_failure().
+
+When a page is already poisoned, commit a3f5d80ea401 ("mm,hwpoison: send
+SIGBUS with error virutal address") make memory_failure() call
+kill_accessing_process() that:
+
+    - holds mmap locking of current->mm
+    - does pagetable walk to find the error virtual address
+    - and sends SIGBUS to the current process with error info.
+
+However, the mm of kworker is not valid, resulting in a null-pointer
+dereference.  So check mm when killing the accessing process.
+
+[akpm@linux-foundation.org: remove unrelated whitespace alteration]
+Link: https://lkml.kernel.org/r/20220914064935.7851-1-xueshuai@linux.alibaba.com
+Fixes: a3f5d80ea401 ("mm,hwpoison: send SIGBUS with error virutal address")
+Signed-off-by: Shuai Xue <xueshuai@linux.alibaba.com>
+Reviewed-by: Miaohe Lin <linmiaohe@huawei.com>
+Acked-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
+Cc: Huang Ying <ying.huang@intel.com>
+Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
+Cc: Bixuan Cui <cuibixuan@linux.alibaba.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/memory-failure.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/mm/memory-failure.c
++++ b/mm/memory-failure.c
+@@ -697,6 +697,9 @@ static int kill_accessing_process(struct
+       };
+       priv.tk.tsk = p;
+ 
++      if (!p->mm)
++              return -EFAULT;
++
+       mmap_read_lock(p->mm);
+       ret = walk_page_range(p->mm, 0, TASK_SIZE, &hwp_walk_ops,
+                             (void *)&priv);
diff --git a/queue-5.19/mm-migrate_device.c-add-missing-flush_cache_page.patch b/queue-5.19/mm-migrate_device.c-add-missing-flush_cache_page.patch

new file mode 100644 (file)

index 0000000..5720ab5
--- /dev/null
+++ b/queue-5.19/mm-migrate_device.c-add-missing-flush_cache_page.patch
@@ -0,0 +1,51 @@
+From a3589e1d5fe39c3d9fdd291b111524b93d08bc32 Mon Sep 17 00:00:00 2001
+From: Alistair Popple <apopple@nvidia.com>
+Date: Fri, 2 Sep 2022 10:35:52 +1000
+Subject: mm/migrate_device.c: add missing flush_cache_page()
+
+From: Alistair Popple <apopple@nvidia.com>
+
+commit a3589e1d5fe39c3d9fdd291b111524b93d08bc32 upstream.
+
+Currently we only call flush_cache_page() for the anon_exclusive case,
+however in both cases we clear the pte so should flush the cache.
+
+Link: https://lkml.kernel.org/r/5676f30436ab71d1a587ac73f835ed8bd2113ff5.1662078528.git-series.apopple@nvidia.com
+Fixes: 8c3328f1f36a ("mm/migrate: migrate_vma() unmap page from vma while collecting pages")
+Signed-off-by: Alistair Popple <apopple@nvidia.com>
+Reviewed-by: David Hildenbrand <david@redhat.com>
+Acked-by: Peter Xu <peterx@redhat.com>
+Cc: Alex Sierra <alex.sierra@amd.com>
+Cc: Ben Skeggs <bskeggs@redhat.com>
+Cc: Felix Kuehling <Felix.Kuehling@amd.com>
+Cc: huang ying <huang.ying.caritas@gmail.com>
+Cc: "Huang, Ying" <ying.huang@intel.com>
+Cc: Jason Gunthorpe <jgg@nvidia.com>
+Cc: John Hubbard <jhubbard@nvidia.com>
+Cc: Karol Herbst <kherbst@redhat.com>
+Cc: Logan Gunthorpe <logang@deltatee.com>
+Cc: Lyude Paul <lyude@redhat.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Nadav Amit <nadav.amit@gmail.com>
+Cc: Paul Mackerras <paulus@ozlabs.org>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/migrate_device.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/migrate_device.c
++++ b/mm/migrate_device.c
+@@ -187,9 +187,9 @@ again:
+                       bool anon_exclusive;
+                       pte_t swp_pte;
+ 
++                      flush_cache_page(vma, addr, pte_pfn(*ptep));
+                       anon_exclusive = PageAnon(page) && PageAnonExclusive(page);
+                       if (anon_exclusive) {
+-                              flush_cache_page(vma, addr, pte_pfn(*ptep));
+                               ptep_clear_flush(vma, addr, ptep);
+ 
+                               if (page_try_share_anon_rmap(page)) {
diff --git a/queue-5.19/mm-migrate_device.c-copy-pte-dirty-bit-to-page.patch b/queue-5.19/mm-migrate_device.c-copy-pte-dirty-bit-to-page.patch

new file mode 100644 (file)

index 0000000..5cfc154
--- /dev/null
+++ b/queue-5.19/mm-migrate_device.c-copy-pte-dirty-bit-to-page.patch
@@ -0,0 +1,91 @@
+From fd35ca3d12cc9922d7d9a35f934e72132dbc4853 Mon Sep 17 00:00:00 2001
+From: Alistair Popple <apopple@nvidia.com>
+Date: Fri, 2 Sep 2022 10:35:53 +1000
+Subject: mm/migrate_device.c: copy pte dirty bit to page
+
+From: Alistair Popple <apopple@nvidia.com>
+
+commit fd35ca3d12cc9922d7d9a35f934e72132dbc4853 upstream.
+
+migrate_vma_setup() has a fast path in migrate_vma_collect_pmd() that
+installs migration entries directly if it can lock the migrating page.
+When removing a dirty pte the dirty bit is supposed to be carried over to
+the underlying page to prevent it being lost.
+
+Currently migrate_vma_*() can only be used for private anonymous mappings.
+That means loss of the dirty bit usually doesn't result in data loss
+because these pages are typically not file-backed.  However pages may be
+backed by swap storage which can result in data loss if an attempt is made
+to migrate a dirty page that doesn't yet have the PageDirty flag set.
+
+In this case migration will fail due to unexpected references but the
+dirty pte bit will be lost.  If the page is subsequently reclaimed data
+won't be written back to swap storage as it is considered uptodate,
+resulting in data loss if the page is subsequently accessed.
+
+Prevent this by copying the dirty bit to the page when removing the pte to
+match what try_to_migrate_one() does.
+
+Link: https://lkml.kernel.org/r/dd48e4882ce859c295c1a77612f66d198b0403f9.1662078528.git-series.apopple@nvidia.com
+Fixes: 8c3328f1f36a ("mm/migrate: migrate_vma() unmap page from vma while collecting pages")
+Signed-off-by: Alistair Popple <apopple@nvidia.com>
+Acked-by: Peter Xu <peterx@redhat.com>
+Reviewed-by: "Huang, Ying" <ying.huang@intel.com>
+Reported-by: "Huang, Ying" <ying.huang@intel.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Cc: Alex Sierra <alex.sierra@amd.com>
+Cc: Ben Skeggs <bskeggs@redhat.com>
+Cc: Felix Kuehling <Felix.Kuehling@amd.com>
+Cc: huang ying <huang.ying.caritas@gmail.com>
+Cc: Jason Gunthorpe <jgg@nvidia.com>
+Cc: John Hubbard <jhubbard@nvidia.com>
+Cc: Karol Herbst <kherbst@redhat.com>
+Cc: Logan Gunthorpe <logang@deltatee.com>
+Cc: Lyude Paul <lyude@redhat.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Nadav Amit <nadav.amit@gmail.com>
+Cc: Paul Mackerras <paulus@ozlabs.org>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/migrate_device.c |    9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/mm/migrate_device.c
++++ b/mm/migrate_device.c
+@@ -7,6 +7,7 @@
+ #include <linux/export.h>
+ #include <linux/memremap.h>
+ #include <linux/migrate.h>
++#include <linux/mm.h>
+ #include <linux/mm_inline.h>
+ #include <linux/mmu_notifier.h>
+ #include <linux/oom.h>
+@@ -190,7 +191,7 @@ again:
+                       flush_cache_page(vma, addr, pte_pfn(*ptep));
+                       anon_exclusive = PageAnon(page) && PageAnonExclusive(page);
+                       if (anon_exclusive) {
+-                              ptep_clear_flush(vma, addr, ptep);
++                              pte = ptep_clear_flush(vma, addr, ptep);
+ 
+                               if (page_try_share_anon_rmap(page)) {
+                                       set_pte_at(mm, addr, ptep, pte);
+@@ -200,11 +201,15 @@ again:
+                                       goto next;
+                               }
+                       } else {
+-                              ptep_get_and_clear(mm, addr, ptep);
++                              pte = ptep_get_and_clear(mm, addr, ptep);
+                       }
+ 
+                       migrate->cpages++;
+ 
++                      /* Set the dirty flag on the folio now the pte is gone. */
++                      if (pte_dirty(pte))
++                              folio_mark_dirty(page_folio(page));
++
+                       /* Setup special migration page table entry */
+                       if (mpfn & MIGRATE_PFN_WRITE)
+                               entry = make_writable_migration_entry(
diff --git a/queue-5.19/mm-migrate_device.c-flush-tlb-while-holding-ptl.patch b/queue-5.19/mm-migrate_device.c-flush-tlb-while-holding-ptl.patch

new file mode 100644 (file)

index 0000000..e8e56fd
--- /dev/null
+++ b/queue-5.19/mm-migrate_device.c-flush-tlb-while-holding-ptl.patch
@@ -0,0 +1,74 @@
+From 60bae73708963de4a17231077285bd9ff2f41c44 Mon Sep 17 00:00:00 2001
+From: Alistair Popple <apopple@nvidia.com>
+Date: Fri, 2 Sep 2022 10:35:51 +1000
+Subject: mm/migrate_device.c: flush TLB while holding PTL
+
+From: Alistair Popple <apopple@nvidia.com>
+
+commit 60bae73708963de4a17231077285bd9ff2f41c44 upstream.
+
+When clearing a PTE the TLB should be flushed whilst still holding the PTL
+to avoid a potential race with madvise/munmap/etc.  For example consider
+the following sequence:
+
+  CPU0                          CPU1
+  ----                          ----
+
+  migrate_vma_collect_pmd()
+  pte_unmap_unlock()
+                                madvise(MADV_DONTNEED)
+                                -> zap_pte_range()
+                                pte_offset_map_lock()
+                                [ PTE not present, TLB not flushed ]
+                                pte_unmap_unlock()
+                                [ page is still accessible via stale TLB ]
+  flush_tlb_range()
+
+In this case the page may still be accessed via the stale TLB entry after
+madvise returns.  Fix this by flushing the TLB while holding the PTL.
+
+Fixes: 8c3328f1f36a ("mm/migrate: migrate_vma() unmap page from vma while collecting pages")
+Link: https://lkml.kernel.org/r/9f801e9d8d830408f2ca27821f606e09aa856899.1662078528.git-series.apopple@nvidia.com
+Signed-off-by: Alistair Popple <apopple@nvidia.com>
+Reported-by: Nadav Amit <nadav.amit@gmail.com>
+Reviewed-by: "Huang, Ying" <ying.huang@intel.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Acked-by: Peter Xu <peterx@redhat.com>
+Cc: Alex Sierra <alex.sierra@amd.com>
+Cc: Ben Skeggs <bskeggs@redhat.com>
+Cc: Felix Kuehling <Felix.Kuehling@amd.com>
+Cc: huang ying <huang.ying.caritas@gmail.com>
+Cc: Jason Gunthorpe <jgg@nvidia.com>
+Cc: John Hubbard <jhubbard@nvidia.com>
+Cc: Karol Herbst <kherbst@redhat.com>
+Cc: Logan Gunthorpe <logang@deltatee.com>
+Cc: Lyude Paul <lyude@redhat.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Paul Mackerras <paulus@ozlabs.org>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/migrate_device.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/mm/migrate_device.c
++++ b/mm/migrate_device.c
+@@ -248,13 +248,14 @@ next:
+               migrate->dst[migrate->npages] = 0;
+               migrate->src[migrate->npages++] = mpfn;
+       }
+-      arch_leave_lazy_mmu_mode();
+-      pte_unmap_unlock(ptep - 1, ptl);
+ 
+       /* Only flush the TLB if we actually modified any entries */
+       if (unmapped)
+               flush_tlb_range(walk->vma, start, end);
+ 
++      arch_leave_lazy_mmu_mode();
++      pte_unmap_unlock(ptep - 1, ptl);
++
+       return 0;
+ }
+ 
diff --git a/queue-5.19/series b/queue-5.19/series

index 4caeb932b4377b1ff879aeb35747ec13275a776e..5bf92397224485328fc49ce95df8a2b73b93b7f9 100644 (file)
--- a/queue-5.19/series
+++ b/queue-5.19/series
@@ -36,3 +36,11 @@ mm-gup-fix-the-fast-gup-race-against-thp-collapse.patch
  mm-page_alloc-fix-race-condition-between-build_all_zonelists-and-page-allocation.patch
  mm-prevent-page_frag_alloc-from-corrupting-the-memory.patch
  mm-page_isolation-fix-isolate_single_pageblock-isolation-behavior.patch
+mm-fix-dereferencing-possible-err_ptr.patch
+mm-migrate_device.c-flush-tlb-while-holding-ptl.patch
+mm-migrate_device.c-add-missing-flush_cache_page.patch
+mm-migrate_device.c-copy-pte-dirty-bit-to-page.patch
+mm-fix-madivse_pageout-mishandling-on-non-lru-page.patch
+mm-bring-back-update_mmu_cache-to-finish_fault.patch
+mm-hugetlb-correct-demote-page-offset-logic.patch
+mm-hwpoison-check-mm-when-killing-accessing-process.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sun, 2 Oct 2022 10:29:02 +0000 (12:29 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sun, 2 Oct 2022 10:29:02 +0000 (12:29 +0200)
queue-5.19/mm-bring-back-update_mmu_cache-to-finish_fault.patch	[new file with mode: 0644]	patch \| blob
queue-5.19/mm-fix-dereferencing-possible-err_ptr.patch	[new file with mode: 0644]	patch \| blob
queue-5.19/mm-fix-madivse_pageout-mishandling-on-non-lru-page.patch	[new file with mode: 0644]	patch \| blob
queue-5.19/mm-hugetlb-correct-demote-page-offset-logic.patch	[new file with mode: 0644]	patch \| blob
queue-5.19/mm-hwpoison-check-mm-when-killing-accessing-process.patch	[new file with mode: 0644]	patch \| blob
queue-5.19/mm-migrate_device.c-add-missing-flush_cache_page.patch	[new file with mode: 0644]	patch \| blob
queue-5.19/mm-migrate_device.c-copy-pte-dirty-bit-to-page.patch	[new file with mode: 0644]	patch \| blob
queue-5.19/mm-migrate_device.c-flush-tlb-while-holding-ptl.patch	[new file with mode: 0644]	patch \| blob
queue-5.19/series		patch \| blob \| blame \| history