]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.19-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 4 Jan 2021 12:55:43 +0000 (13:55 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 4 Jan 2021 12:55:43 +0000 (13:55 +0100)
added patches:
asm-generic-tlb-arch-invert-config_have_rcu_table_invalidate.patch
asm-generic-tlb-avoid-potential-double-flush.patch
asm-generic-tlb-track-freeing-of-page-table-directories-in-struct-mmu_gather.patch
asm-generic-tlb-track-which-levels-of-the-page-tables-have-been-cleared.patch
mm-mmu_gather-invalidate-tlb-correctly-on-batch-allocation-failure-and-flush.patch
powerpc-mmu_gather-enable-rcu_table_free-even-for-smp-case.patch

queue-4.19/asm-generic-tlb-arch-invert-config_have_rcu_table_invalidate.patch [new file with mode: 0644]
queue-4.19/asm-generic-tlb-avoid-potential-double-flush.patch [new file with mode: 0644]
queue-4.19/asm-generic-tlb-track-freeing-of-page-table-directories-in-struct-mmu_gather.patch [new file with mode: 0644]
queue-4.19/asm-generic-tlb-track-which-levels-of-the-page-tables-have-been-cleared.patch [new file with mode: 0644]
queue-4.19/mm-mmu_gather-invalidate-tlb-correctly-on-batch-allocation-failure-and-flush.patch [new file with mode: 0644]
queue-4.19/powerpc-mmu_gather-enable-rcu_table_free-even-for-smp-case.patch [new file with mode: 0644]
queue-4.19/series

diff --git a/queue-4.19/asm-generic-tlb-arch-invert-config_have_rcu_table_invalidate.patch b/queue-4.19/asm-generic-tlb-arch-invert-config_have_rcu_table_invalidate.patch
new file mode 100644 (file)
index 0000000..9168c47
--- /dev/null
@@ -0,0 +1,89 @@
+From foo@baz Mon Jan  4 01:45:29 PM CET 2021
+From: Santosh Sivaraj <santosh@fossix.org>
+Date: Thu, 12 Mar 2020 18:57:37 +0530
+Subject: asm-generic/tlb, arch: Invert CONFIG_HAVE_RCU_TABLE_INVALIDATE
+To: <stable@vger.kernel.org>, linuxppc-dev <linuxppc-dev@lists.ozlabs.org>
+Cc: Michael Ellerman <mpe@ellerman.id.au>, Greg KH <greg@kroah.com>, Sasha Levin <sashal@kernel.org>, Peter Zijlstra <peterz@infradead.org>
+Message-ID: <20200312132740.225241-4-santosh@fossix.org>
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 96bc9567cbe112e9320250f01b9c060c882e8619 upstream.
+
+Make issuing a TLB invalidate for page-table pages the normal case.
+
+The reason is twofold:
+
+ - too many invalidates is safer than too few,
+ - most architectures use the linux page-tables natively
+   and would thus require this.
+
+Make it an opt-out, instead of an opt-in.
+
+No change in behavior intended.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: <stable@vger.kernel.org> # 4.19
+Signed-off-by: Santosh Sivaraj <santosh@fossix.org>
+[santosh: prerequisite for upcoming tlbflush backports]
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/Kconfig         |    2 +-
+ arch/powerpc/Kconfig |    1 +
+ arch/sparc/Kconfig   |    1 +
+ arch/x86/Kconfig     |    1 -
+ mm/memory.c          |    2 +-
+ 5 files changed, 4 insertions(+), 3 deletions(-)
+
+--- a/arch/Kconfig
++++ b/arch/Kconfig
+@@ -363,7 +363,7 @@ config HAVE_ARCH_JUMP_LABEL
+ config HAVE_RCU_TABLE_FREE
+       bool
+-config HAVE_RCU_TABLE_INVALIDATE
++config HAVE_RCU_TABLE_NO_INVALIDATE
+       bool
+ config ARCH_WANT_IRQS_OFF_ACTIVATE_MM
+--- a/arch/powerpc/Kconfig
++++ b/arch/powerpc/Kconfig
+@@ -217,6 +217,7 @@ config PPC
+       select HAVE_PERF_REGS
+       select HAVE_PERF_USER_STACK_DUMP
+       select HAVE_RCU_TABLE_FREE              if SMP
++      select HAVE_RCU_TABLE_NO_INVALIDATE     if HAVE_RCU_TABLE_FREE
+       select HAVE_REGS_AND_STACK_ACCESS_API
+       select HAVE_RELIABLE_STACKTRACE         if PPC64 && CPU_LITTLE_ENDIAN
+       select HAVE_SYSCALL_TRACEPOINTS
+--- a/arch/sparc/Kconfig
++++ b/arch/sparc/Kconfig
+@@ -64,6 +64,7 @@ config SPARC64
+       select HAVE_KRETPROBES
+       select HAVE_KPROBES
+       select HAVE_RCU_TABLE_FREE if SMP
++      select HAVE_RCU_TABLE_NO_INVALIDATE if HAVE_RCU_TABLE_FREE
+       select HAVE_MEMBLOCK_NODE_MAP
+       select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+       select HAVE_DYNAMIC_FTRACE
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -181,7 +181,6 @@ config X86
+       select HAVE_PERF_REGS
+       select HAVE_PERF_USER_STACK_DUMP
+       select HAVE_RCU_TABLE_FREE              if PARAVIRT
+-      select HAVE_RCU_TABLE_INVALIDATE        if HAVE_RCU_TABLE_FREE
+       select HAVE_REGS_AND_STACK_ACCESS_API
+       select HAVE_RELIABLE_STACKTRACE         if X86_64 && (UNWINDER_FRAME_POINTER || UNWINDER_ORC) && STACK_VALIDATION
+       select HAVE_STACKPROTECTOR              if CC_HAS_SANE_STACKPROTECTOR
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -339,7 +339,7 @@ bool __tlb_remove_page_size(struct mmu_g
+  */
+ static inline void tlb_table_invalidate(struct mmu_gather *tlb)
+ {
+-#ifdef CONFIG_HAVE_RCU_TABLE_INVALIDATE
++#ifndef CONFIG_HAVE_RCU_TABLE_NO_INVALIDATE
+       /*
+        * Invalidate page-table caches used by hardware walkers. Then we still
+        * need to RCU-sched wait while freeing the pages because software
diff --git a/queue-4.19/asm-generic-tlb-avoid-potential-double-flush.patch b/queue-4.19/asm-generic-tlb-avoid-potential-double-flush.patch
new file mode 100644 (file)
index 0000000..ce119ad
--- /dev/null
@@ -0,0 +1,61 @@
+From foo@baz Mon Jan  4 01:45:29 PM CET 2021
+From: Santosh Sivaraj <santosh@fossix.org>
+Date: Thu, 12 Mar 2020 18:57:40 +0530
+Subject: asm-generic/tlb: avoid potential double flush
+To: <stable@vger.kernel.org>, linuxppc-dev <linuxppc-dev@lists.ozlabs.org>
+Cc: Michael Ellerman <mpe@ellerman.id.au>, Greg KH <greg@kroah.com>, Sasha Levin <sashal@kernel.org>, Peter Zijlstra <peterz@infradead.org>, "Aneesh Kumar K . V" <aneesh.kumar@linux.ibm.com>
+Message-ID: <20200312132740.225241-7-santosh@fossix.org>
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 0758cd8304942292e95a0f750c374533db378b32 upstream.
+
+Aneesh reported that:
+
+       tlb_flush_mmu()
+         tlb_flush_mmu_tlbonly()
+           tlb_flush()                 <-- #1
+         tlb_flush_mmu_free()
+           tlb_table_flush()
+             tlb_table_invalidate()
+               tlb_flush_mmu_tlbonly()
+                 tlb_flush()           <-- #2
+
+does two TLBIs when tlb->fullmm, because __tlb_reset_range() will not
+clear tlb->end in that case.
+
+Observe that any caller to __tlb_adjust_range() also sets at least one of
+the tlb->freed_tables || tlb->cleared_p* bits, and those are
+unconditionally cleared by __tlb_reset_range().
+
+Change the condition for actually issuing TLBI to having one of those bits
+set, as opposed to having tlb->end != 0.
+
+Link: http://lkml.kernel.org/r/20200116064531.483522-4-aneesh.kumar@linux.ibm.com
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+Reported-by: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
+Cc: <stable@vger.kernel.org>  # 4.19
+Signed-off-by: Santosh Sivaraj <santosh@fossix.org>
+[santosh: backported to 4.19 stable]
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/asm-generic/tlb.h |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/include/asm-generic/tlb.h
++++ b/include/asm-generic/tlb.h
+@@ -179,7 +179,12 @@ static inline void __tlb_reset_range(str
+ static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
+ {
+-      if (!tlb->end)
++      /*
++       * Anything calling __tlb_adjust_range() also sets at least one of
++       * these bits.
++       */
++      if (!(tlb->freed_tables || tlb->cleared_ptes || tlb->cleared_pmds ||
++            tlb->cleared_puds || tlb->cleared_p4ds))
+               return;
+       tlb_flush(tlb);
diff --git a/queue-4.19/asm-generic-tlb-track-freeing-of-page-table-directories-in-struct-mmu_gather.patch b/queue-4.19/asm-generic-tlb-track-freeing-of-page-table-directories-in-struct-mmu_gather.patch
new file mode 100644 (file)
index 0000000..ff5176d
--- /dev/null
@@ -0,0 +1,106 @@
+From foo@baz Mon Jan  4 01:45:29 PM CET 2021
+From: Santosh Sivaraj <santosh@fossix.org>
+Date: Thu, 12 Mar 2020 18:57:35 +0530
+Subject: asm-generic/tlb: Track freeing of page-table directories in struct mmu_gather
+To: <stable@vger.kernel.org>, linuxppc-dev <linuxppc-dev@lists.ozlabs.org>
+Cc: Michael Ellerman <mpe@ellerman.id.au>, Greg KH <greg@kroah.com>, Sasha Levin <sashal@kernel.org>, Peter Zijlstra <peterz@infradead.org>, Will Deacon <will.deacon@arm.com>
+Message-ID: <20200312132740.225241-2-santosh@fossix.org>
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 22a61c3c4f1379ef8b0ce0d5cb78baf3178950e2 upstream
+
+Some architectures require different TLB invalidation instructions
+depending on whether it is only the last-level of page table being
+changed, or whether there are also changes to the intermediate
+(directory) entries higher up the tree.
+
+Add a new bit to the flags bitfield in struct mmu_gather so that the
+architecture code can operate accordingly if it's the intermediate
+levels being invalidated.
+
+Signed-off-by: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Will Deacon <will.deacon@arm.com>
+Cc: <stable@vger.kernel.org> # 4.19
+Signed-off-by: Santosh Sivaraj <santosh@fossix.org>
+[santosh: prerequisite for tlbflush backports]
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/asm-generic/tlb.h |   31 +++++++++++++++++++++++--------
+ 1 file changed, 23 insertions(+), 8 deletions(-)
+
+--- a/include/asm-generic/tlb.h
++++ b/include/asm-generic/tlb.h
+@@ -97,12 +97,22 @@ struct mmu_gather {
+ #endif
+       unsigned long           start;
+       unsigned long           end;
+-      /* we are in the middle of an operation to clear
+-       * a full mm and can make some optimizations */
+-      unsigned int            fullmm : 1,
+-      /* we have performed an operation which
+-       * requires a complete flush of the tlb */
+-                              need_flush_all : 1;
++      /*
++       * we are in the middle of an operation to clear
++       * a full mm and can make some optimizations
++       */
++      unsigned int            fullmm : 1;
++
++      /*
++       * we have performed an operation which
++       * requires a complete flush of the tlb
++       */
++      unsigned int            need_flush_all : 1;
++
++      /*
++       * we have removed page directories
++       */
++      unsigned int            freed_tables : 1;
+       struct mmu_gather_batch *active;
+       struct mmu_gather_batch local;
+@@ -137,6 +147,7 @@ static inline void __tlb_reset_range(str
+               tlb->start = TASK_SIZE;
+               tlb->end = 0;
+       }
++      tlb->freed_tables = 0;
+ }
+ static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
+@@ -278,6 +289,7 @@ static inline void tlb_remove_check_page
+ #define pte_free_tlb(tlb, ptep, address)                      \
+       do {                                                    \
+               __tlb_adjust_range(tlb, address, PAGE_SIZE);    \
++              tlb->freed_tables = 1;                  \
+               __pte_free_tlb(tlb, ptep, address);             \
+       } while (0)
+ #endif
+@@ -285,7 +297,8 @@ static inline void tlb_remove_check_page
+ #ifndef pmd_free_tlb
+ #define pmd_free_tlb(tlb, pmdp, address)                      \
+       do {                                                    \
+-              __tlb_adjust_range(tlb, address, PAGE_SIZE);            \
++              __tlb_adjust_range(tlb, address, PAGE_SIZE);    \
++              tlb->freed_tables = 1;                  \
+               __pmd_free_tlb(tlb, pmdp, address);             \
+       } while (0)
+ #endif
+@@ -295,6 +308,7 @@ static inline void tlb_remove_check_page
+ #define pud_free_tlb(tlb, pudp, address)                      \
+       do {                                                    \
+               __tlb_adjust_range(tlb, address, PAGE_SIZE);    \
++              tlb->freed_tables = 1;                  \
+               __pud_free_tlb(tlb, pudp, address);             \
+       } while (0)
+ #endif
+@@ -304,7 +318,8 @@ static inline void tlb_remove_check_page
+ #ifndef p4d_free_tlb
+ #define p4d_free_tlb(tlb, pudp, address)                      \
+       do {                                                    \
+-              __tlb_adjust_range(tlb, address, PAGE_SIZE);            \
++              __tlb_adjust_range(tlb, address, PAGE_SIZE);    \
++              tlb->freed_tables = 1;                  \
+               __p4d_free_tlb(tlb, pudp, address);             \
+       } while (0)
+ #endif
diff --git a/queue-4.19/asm-generic-tlb-track-which-levels-of-the-page-tables-have-been-cleared.patch b/queue-4.19/asm-generic-tlb-track-which-levels-of-the-page-tables-have-been-cleared.patch
new file mode 100644 (file)
index 0000000..0154e64
--- /dev/null
@@ -0,0 +1,184 @@
+From foo@baz Mon Jan  4 01:45:29 PM CET 2021
+From: Santosh Sivaraj <santosh@fossix.org>
+Date: Thu, 12 Mar 2020 18:57:36 +0530
+Subject: asm-generic/tlb: Track which levels of the page tables have been cleared
+To: <stable@vger.kernel.org>, linuxppc-dev <linuxppc-dev@lists.ozlabs.org>
+Cc: Michael Ellerman <mpe@ellerman.id.au>, Greg KH <greg@kroah.com>, Sasha Levin <sashal@kernel.org>, Will Deacon <will.deacon@arm.com>
+Message-ID: <20200312132740.225241-3-santosh@fossix.org>
+
+From: Will Deacon <will.deacon@arm.com>
+
+commit a6d60245d6d9b1caf66b0d94419988c4836980af upstream
+
+It is common for architectures with hugepage support to require only a
+single TLB invalidation operation per hugepage during unmap(), rather than
+iterating through the mapping at a PAGE_SIZE increment. Currently,
+however, the level in the page table where the unmap() operation occurs
+is not stored in the mmu_gather structure, therefore forcing
+architectures to issue additional TLB invalidation operations or to give
+up and over-invalidate by e.g. invalidating the entire TLB.
+
+Ideally, we could add an interval rbtree to the mmu_gather structure,
+which would allow us to associate the correct mapping granule with the
+various sub-mappings within the range being invalidated. However, this
+is costly in terms of book-keeping and memory management, so instead we
+approximate by keeping track of the page table levels that are cleared
+and provide a means to query the smallest granule required for invalidation.
+
+Signed-off-by: Will Deacon <will.deacon@arm.com>
+Cc: <stable@vger.kernel.org> # 4.19
+Signed-off-by: Santosh Sivaraj <santosh@fossix.org>
+[santosh: prerequisite for upcoming tlbflush backports]
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/asm-generic/tlb.h |   58 +++++++++++++++++++++++++++++++++++++++-------
+ mm/memory.c               |    4 ++-
+ 2 files changed, 53 insertions(+), 9 deletions(-)
+
+--- a/include/asm-generic/tlb.h
++++ b/include/asm-generic/tlb.h
+@@ -114,6 +114,14 @@ struct mmu_gather {
+        */
+       unsigned int            freed_tables : 1;
++      /*
++       * at which levels have we cleared entries?
++       */
++      unsigned int            cleared_ptes : 1;
++      unsigned int            cleared_pmds : 1;
++      unsigned int            cleared_puds : 1;
++      unsigned int            cleared_p4ds : 1;
++
+       struct mmu_gather_batch *active;
+       struct mmu_gather_batch local;
+       struct page             *__pages[MMU_GATHER_BUNDLE];
+@@ -148,6 +156,10 @@ static inline void __tlb_reset_range(str
+               tlb->end = 0;
+       }
+       tlb->freed_tables = 0;
++      tlb->cleared_ptes = 0;
++      tlb->cleared_pmds = 0;
++      tlb->cleared_puds = 0;
++      tlb->cleared_p4ds = 0;
+ }
+ static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
+@@ -197,6 +209,25 @@ static inline void tlb_remove_check_page
+ }
+ #endif
++static inline unsigned long tlb_get_unmap_shift(struct mmu_gather *tlb)
++{
++      if (tlb->cleared_ptes)
++              return PAGE_SHIFT;
++      if (tlb->cleared_pmds)
++              return PMD_SHIFT;
++      if (tlb->cleared_puds)
++              return PUD_SHIFT;
++      if (tlb->cleared_p4ds)
++              return P4D_SHIFT;
++
++      return PAGE_SHIFT;
++}
++
++static inline unsigned long tlb_get_unmap_size(struct mmu_gather *tlb)
++{
++      return 1UL << tlb_get_unmap_shift(tlb);
++}
++
+ /*
+  * In the case of tlb vma handling, we can optimise these away in the
+  * case where we're doing a full MM flush.  When we're doing a munmap,
+@@ -230,13 +261,19 @@ static inline void tlb_remove_check_page
+ #define tlb_remove_tlb_entry(tlb, ptep, address)              \
+       do {                                                    \
+               __tlb_adjust_range(tlb, address, PAGE_SIZE);    \
++              tlb->cleared_ptes = 1;                          \
+               __tlb_remove_tlb_entry(tlb, ptep, address);     \
+       } while (0)
+-#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address)           \
+-      do {                                                         \
+-              __tlb_adjust_range(tlb, address, huge_page_size(h)); \
+-              __tlb_remove_tlb_entry(tlb, ptep, address);          \
++#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address)      \
++      do {                                                    \
++              unsigned long _sz = huge_page_size(h);          \
++              __tlb_adjust_range(tlb, address, _sz);          \
++              if (_sz == PMD_SIZE)                            \
++                      tlb->cleared_pmds = 1;                  \
++              else if (_sz == PUD_SIZE)                       \
++                      tlb->cleared_puds = 1;                  \
++              __tlb_remove_tlb_entry(tlb, ptep, address);     \
+       } while (0)
+ /**
+@@ -250,6 +287,7 @@ static inline void tlb_remove_check_page
+ #define tlb_remove_pmd_tlb_entry(tlb, pmdp, address)                  \
+       do {                                                            \
+               __tlb_adjust_range(tlb, address, HPAGE_PMD_SIZE);       \
++              tlb->cleared_pmds = 1;                                  \
+               __tlb_remove_pmd_tlb_entry(tlb, pmdp, address);         \
+       } while (0)
+@@ -264,6 +302,7 @@ static inline void tlb_remove_check_page
+ #define tlb_remove_pud_tlb_entry(tlb, pudp, address)                  \
+       do {                                                            \
+               __tlb_adjust_range(tlb, address, HPAGE_PUD_SIZE);       \
++              tlb->cleared_puds = 1;                                  \
+               __tlb_remove_pud_tlb_entry(tlb, pudp, address);         \
+       } while (0)
+@@ -289,7 +328,8 @@ static inline void tlb_remove_check_page
+ #define pte_free_tlb(tlb, ptep, address)                      \
+       do {                                                    \
+               __tlb_adjust_range(tlb, address, PAGE_SIZE);    \
+-              tlb->freed_tables = 1;                  \
++              tlb->freed_tables = 1;                          \
++              tlb->cleared_pmds = 1;                          \
+               __pte_free_tlb(tlb, ptep, address);             \
+       } while (0)
+ #endif
+@@ -298,7 +338,8 @@ static inline void tlb_remove_check_page
+ #define pmd_free_tlb(tlb, pmdp, address)                      \
+       do {                                                    \
+               __tlb_adjust_range(tlb, address, PAGE_SIZE);    \
+-              tlb->freed_tables = 1;                  \
++              tlb->freed_tables = 1;                          \
++              tlb->cleared_puds = 1;                          \
+               __pmd_free_tlb(tlb, pmdp, address);             \
+       } while (0)
+ #endif
+@@ -308,7 +349,8 @@ static inline void tlb_remove_check_page
+ #define pud_free_tlb(tlb, pudp, address)                      \
+       do {                                                    \
+               __tlb_adjust_range(tlb, address, PAGE_SIZE);    \
+-              tlb->freed_tables = 1;                  \
++              tlb->freed_tables = 1;                          \
++              tlb->cleared_p4ds = 1;                          \
+               __pud_free_tlb(tlb, pudp, address);             \
+       } while (0)
+ #endif
+@@ -319,7 +361,7 @@ static inline void tlb_remove_check_page
+ #define p4d_free_tlb(tlb, pudp, address)                      \
+       do {                                                    \
+               __tlb_adjust_range(tlb, address, PAGE_SIZE);    \
+-              tlb->freed_tables = 1;                  \
++              tlb->freed_tables = 1;                          \
+               __p4d_free_tlb(tlb, pudp, address);             \
+       } while (0)
+ #endif
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -279,8 +279,10 @@ void arch_tlb_finish_mmu(struct mmu_gath
+ {
+       struct mmu_gather_batch *batch, *next;
+-      if (force)
++      if (force) {
++              __tlb_reset_range(tlb);
+               __tlb_adjust_range(tlb, start, end - start);
++      }
+       tlb_flush_mmu(tlb);
diff --git a/queue-4.19/mm-mmu_gather-invalidate-tlb-correctly-on-batch-allocation-failure-and-flush.patch b/queue-4.19/mm-mmu_gather-invalidate-tlb-correctly-on-batch-allocation-failure-and-flush.patch
new file mode 100644 (file)
index 0000000..9fcad81
--- /dev/null
@@ -0,0 +1,170 @@
+From foo@baz Mon Jan  4 01:45:29 PM CET 2021
+From: Santosh Sivaraj <santosh@fossix.org>
+Date: Thu, 12 Mar 2020 18:57:39 +0530
+Subject: mm/mmu_gather: invalidate TLB correctly on batch allocation failure and flush
+To: <stable@vger.kernel.org>, linuxppc-dev <linuxppc-dev@lists.ozlabs.org>
+Cc: Michael Ellerman <mpe@ellerman.id.au>, Greg KH <greg@kroah.com>, Sasha Levin <sashal@kernel.org>, Peter Zijlstra <peterz@infradead.org>, "Aneesh Kumar K . V" <aneesh.kumar@linux.ibm.com>
+Message-ID: <20200312132740.225241-6-santosh@fossix.org>
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 0ed1325967ab5f7a4549a2641c6ebe115f76e228 upstream.
+
+Architectures for which we have hardware walkers of Linux page table
+should flush TLB on mmu gather batch allocation failures and batch flush.
+Some architectures like POWER supports multiple translation modes (hash
+and radix) and in the case of POWER only radix translation mode needs the
+above TLBI.  This is because for hash translation mode kernel wants to
+avoid this extra flush since there are no hardware walkers of linux page
+table.  With radix translation, the hardware also walks linux page table
+and with that, kernel needs to make sure to TLB invalidate page walk cache
+before page table pages are freed.
+
+More details in commit d86564a2f085 ("mm/tlb, x86/mm: Support invalidating
+TLB caches for RCU_TABLE_FREE")
+
+The changes to sparc are to make sure we keep the old behavior since we
+are now removing HAVE_RCU_TABLE_NO_INVALIDATE.  The default value for
+tlb_needs_table_invalidate is to always force an invalidate and sparc can
+avoid the table invalidate.  Hence we define tlb_needs_table_invalidate to
+false for sparc architecture.
+
+Link: http://lkml.kernel.org/r/20200116064531.483522-3-aneesh.kumar@linux.ibm.com
+Fixes: a46cc7a90fd8 ("powerpc/mm/radix: Improve TLB/PWC flushes")
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+Cc: <stable@vger.kernel.org>  # 4.19
+Signed-off-by: Santosh Sivaraj <santosh@fossix.org>
+[santosh: backported to 4.19 stable]
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/Kconfig                    |    3 ---
+ arch/powerpc/Kconfig            |    1 -
+ arch/powerpc/include/asm/tlb.h  |   11 +++++++++++
+ arch/sparc/Kconfig              |    1 -
+ arch/sparc/include/asm/tlb_64.h |    9 +++++++++
+ include/asm-generic/tlb.h       |   15 +++++++++++++++
+ mm/memory.c                     |   16 ++++++++--------
+ 7 files changed, 43 insertions(+), 13 deletions(-)
+
+--- a/arch/Kconfig
++++ b/arch/Kconfig
+@@ -363,9 +363,6 @@ config HAVE_ARCH_JUMP_LABEL
+ config HAVE_RCU_TABLE_FREE
+       bool
+-config HAVE_RCU_TABLE_NO_INVALIDATE
+-      bool
+-
+ config ARCH_WANT_IRQS_OFF_ACTIVATE_MM
+       bool
+       help
+--- a/arch/powerpc/Kconfig
++++ b/arch/powerpc/Kconfig
+@@ -217,7 +217,6 @@ config PPC
+       select HAVE_PERF_REGS
+       select HAVE_PERF_USER_STACK_DUMP
+       select HAVE_RCU_TABLE_FREE
+-      select HAVE_RCU_TABLE_NO_INVALIDATE     if HAVE_RCU_TABLE_FREE
+       select HAVE_REGS_AND_STACK_ACCESS_API
+       select HAVE_RELIABLE_STACKTRACE         if PPC64 && CPU_LITTLE_ENDIAN
+       select HAVE_SYSCALL_TRACEPOINTS
+--- a/arch/powerpc/include/asm/tlb.h
++++ b/arch/powerpc/include/asm/tlb.h
+@@ -30,6 +30,17 @@
+ #define tlb_remove_check_page_size_change tlb_remove_check_page_size_change
+ extern void tlb_flush(struct mmu_gather *tlb);
++/*
++ * book3s:
++ * Hash does not use the linux page-tables, so we can avoid
++ * the TLB invalidate for page-table freeing, Radix otoh does use the
++ * page-tables and needs the TLBI.
++ *
++ * nohash:
++ * We still do TLB invalidate in the __pte_free_tlb routine before we
++ * add the page table pages to mmu gather table batch.
++ */
++#define tlb_needs_table_invalidate()  radix_enabled()
+ /* Get the generic bits... */
+ #include <asm-generic/tlb.h>
+--- a/arch/sparc/Kconfig
++++ b/arch/sparc/Kconfig
+@@ -64,7 +64,6 @@ config SPARC64
+       select HAVE_KRETPROBES
+       select HAVE_KPROBES
+       select HAVE_RCU_TABLE_FREE if SMP
+-      select HAVE_RCU_TABLE_NO_INVALIDATE if HAVE_RCU_TABLE_FREE
+       select HAVE_MEMBLOCK_NODE_MAP
+       select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+       select HAVE_DYNAMIC_FTRACE
+--- a/arch/sparc/include/asm/tlb_64.h
++++ b/arch/sparc/include/asm/tlb_64.h
+@@ -28,6 +28,15 @@ void flush_tlb_pending(void);
+ #define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
+ #define tlb_flush(tlb)        flush_tlb_pending()
++/*
++ * SPARC64's hardware TLB fill does not use the Linux page-tables
++ * and therefore we don't need a TLBI when freeing page-table pages.
++ */
++
++#ifdef CONFIG_HAVE_RCU_TABLE_FREE
++#define tlb_needs_table_invalidate()  (false)
++#endif
++
+ #include <asm-generic/tlb.h>
+ #endif /* _SPARC64_TLB_H */
+--- a/include/asm-generic/tlb.h
++++ b/include/asm-generic/tlb.h
+@@ -61,8 +61,23 @@ struct mmu_table_batch {
+ extern void tlb_table_flush(struct mmu_gather *tlb);
+ extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
++/*
++ * This allows an architecture that does not use the linux page-tables for
++ * hardware to skip the TLBI when freeing page tables.
++ */
++#ifndef tlb_needs_table_invalidate
++#define tlb_needs_table_invalidate() (true)
++#endif
++
++#else
++
++#ifdef tlb_needs_table_invalidate
++#error tlb_needs_table_invalidate() requires HAVE_RCU_TABLE_FREE
+ #endif
++#endif /* CONFIG_HAVE_RCU_TABLE_FREE */
++
++
+ /*
+  * If we can't allocate a page to make a big batch of page pointers
+  * to work on, then just handle a few from the on-stack structure.
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -339,14 +339,14 @@ bool __tlb_remove_page_size(struct mmu_g
+  */
+ static inline void tlb_table_invalidate(struct mmu_gather *tlb)
+ {
+-#ifndef CONFIG_HAVE_RCU_TABLE_NO_INVALIDATE
+-      /*
+-       * Invalidate page-table caches used by hardware walkers. Then we still
+-       * need to RCU-sched wait while freeing the pages because software
+-       * walkers can still be in-flight.
+-       */
+-      tlb_flush_mmu_tlbonly(tlb);
+-#endif
++      if (tlb_needs_table_invalidate()) {
++              /*
++               * Invalidate page-table caches used by hardware walkers. Then
++               * we still need to RCU-sched wait while freeing the pages
++               * because software walkers can still be in-flight.
++               */
++              tlb_flush_mmu_tlbonly(tlb);
++      }
+ }
+ static void tlb_remove_table_smp_sync(void *arg)
diff --git a/queue-4.19/powerpc-mmu_gather-enable-rcu_table_free-even-for-smp-case.patch b/queue-4.19/powerpc-mmu_gather-enable-rcu_table_free-even-for-smp-case.patch
new file mode 100644 (file)
index 0000000..317bd3d
--- /dev/null
@@ -0,0 +1,188 @@
+From foo@baz Mon Jan  4 01:45:29 PM CET 2021
+From: Santosh Sivaraj <santosh@fossix.org>
+Date: Thu, 12 Mar 2020 18:57:38 +0530
+Subject: powerpc/mmu_gather: enable RCU_TABLE_FREE even for !SMP case
+To: <stable@vger.kernel.org>, linuxppc-dev <linuxppc-dev@lists.ozlabs.org>
+Cc: Michael Ellerman <mpe@ellerman.id.au>, Greg KH <greg@kroah.com>, Sasha Levin <sashal@kernel.org>, "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
+Message-ID: <20200312132740.225241-5-santosh@fossix.org>
+
+From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
+
+commit 12e4d53f3f04e81f9e83d6fc10edc7314ab9f6b9 upstream.
+
+Patch series "Fixup page directory freeing", v4.
+
+This is a repost of patch series from Peter with the arch specific changes
+except ppc64 dropped.  ppc64 changes are added here because we are redoing
+the patch series on top of ppc64 changes.  This makes it easy to backport
+these changes.  Only the first 2 patches need to be backported to stable.
+
+The thing is, on anything SMP, freeing page directories should observe the
+exact same order as normal page freeing:
+
+ 1) unhook page/directory
+ 2) TLB invalidate
+ 3) free page/directory
+
+Without this, any concurrent page-table walk could end up with a
+Use-after-Free.  This is esp.  trivial for anything that has software
+page-table walkers (HAVE_FAST_GUP / software TLB fill) or the hardware
+caches partial page-walks (ie.  caches page directories).
+
+Even on UP this might give issues since mmu_gather is preemptible these
+days.  An interrupt or preempted task accessing user pages might stumble
+into the free page if the hardware caches page directories.
+
+This patch series fixes ppc64 and add generic MMU_GATHER changes to
+support the conversion of other architectures.  I haven't added patches
+w.r.t other architecture because they are yet to be acked.
+
+This patch (of 9):
+
+A followup patch is going to make sure we correctly invalidate page walk
+cache before we free page table pages.  In order to keep things simple
+enable RCU_TABLE_FREE even for !SMP so that we don't have to fixup the
+!SMP case differently in the followup patch
+
+!SMP case is right now broken for radix translation w.r.t page walk
+cache flush.  We can get interrupted in between page table free and
+that would imply we have page walk cache entries pointing to tables
+which got freed already.  Michael said "both our platforms that run on
+Power9 force SMP on in Kconfig, so the !SMP case is unlikely to be a
+problem for anyone in practice, unless they've hacked their kernel to
+build it !SMP."
+
+Link: http://lkml.kernel.org/r/20200116064531.483522-2-aneesh.kumar@linux.ibm.com
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+Cc: <stable@vger.kernel.org> # 4.19
+Signed-off-by: Santosh Sivaraj <santosh@fossix.org>
+[santosh: backported for 4.19 stable]
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/powerpc/Kconfig                         |    2 +-
+ arch/powerpc/include/asm/book3s/32/pgalloc.h |    8 --------
+ arch/powerpc/include/asm/book3s/64/pgalloc.h |    2 --
+ arch/powerpc/include/asm/nohash/32/pgalloc.h |    8 --------
+ arch/powerpc/include/asm/nohash/64/pgalloc.h |    9 +--------
+ arch/powerpc/mm/pgtable-book3s64.c           |    7 -------
+ 6 files changed, 2 insertions(+), 34 deletions(-)
+
+--- a/arch/powerpc/Kconfig
++++ b/arch/powerpc/Kconfig
+@@ -216,7 +216,7 @@ config PPC
+       select HAVE_HARDLOCKUP_DETECTOR_PERF    if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH
+       select HAVE_PERF_REGS
+       select HAVE_PERF_USER_STACK_DUMP
+-      select HAVE_RCU_TABLE_FREE              if SMP
++      select HAVE_RCU_TABLE_FREE
+       select HAVE_RCU_TABLE_NO_INVALIDATE     if HAVE_RCU_TABLE_FREE
+       select HAVE_REGS_AND_STACK_ACCESS_API
+       select HAVE_RELIABLE_STACKTRACE         if PPC64 && CPU_LITTLE_ENDIAN
+--- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
++++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
+@@ -110,7 +110,6 @@ static inline void pgtable_free(void *ta
+ #define check_pgt_cache()     do { } while (0)
+ #define get_hugepd_cache_index(x)  (x)
+-#ifdef CONFIG_SMP
+ static inline void pgtable_free_tlb(struct mmu_gather *tlb,
+                                   void *table, int shift)
+ {
+@@ -127,13 +126,6 @@ static inline void __tlb_remove_table(vo
+       pgtable_free(table, shift);
+ }
+-#else
+-static inline void pgtable_free_tlb(struct mmu_gather *tlb,
+-                                  void *table, int shift)
+-{
+-      pgtable_free(table, shift);
+-}
+-#endif
+ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
+                                 unsigned long address)
+--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
++++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
+@@ -47,9 +47,7 @@ extern pmd_t *pmd_fragment_alloc(struct
+ extern void pte_fragment_free(unsigned long *, int);
+ extern void pmd_fragment_free(unsigned long *);
+ extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift);
+-#ifdef CONFIG_SMP
+ extern void __tlb_remove_table(void *_table);
+-#endif
+ static inline pgd_t *radix__pgd_alloc(struct mm_struct *mm)
+ {
+--- a/arch/powerpc/include/asm/nohash/32/pgalloc.h
++++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h
+@@ -111,7 +111,6 @@ static inline void pgtable_free(void *ta
+ #define check_pgt_cache()     do { } while (0)
+ #define get_hugepd_cache_index(x)     (x)
+-#ifdef CONFIG_SMP
+ static inline void pgtable_free_tlb(struct mmu_gather *tlb,
+                                   void *table, int shift)
+ {
+@@ -128,13 +127,6 @@ static inline void __tlb_remove_table(vo
+       pgtable_free(table, shift);
+ }
+-#else
+-static inline void pgtable_free_tlb(struct mmu_gather *tlb,
+-                                  void *table, int shift)
+-{
+-      pgtable_free(table, shift);
+-}
+-#endif
+ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
+                                 unsigned long address)
+--- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
++++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
+@@ -142,7 +142,7 @@ static inline void pgtable_free(void *ta
+ }
+ #define get_hugepd_cache_index(x)     (x)
+-#ifdef CONFIG_SMP
++
+ static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
+ {
+       unsigned long pgf = (unsigned long)table;
+@@ -160,13 +160,6 @@ static inline void __tlb_remove_table(vo
+       pgtable_free(table, shift);
+ }
+-#else
+-static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
+-{
+-      pgtable_free(table, shift);
+-}
+-#endif
+-
+ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
+                                 unsigned long address)
+ {
+--- a/arch/powerpc/mm/pgtable-book3s64.c
++++ b/arch/powerpc/mm/pgtable-book3s64.c
+@@ -432,7 +432,6 @@ static inline void pgtable_free(void *ta
+       }
+ }
+-#ifdef CONFIG_SMP
+ void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int index)
+ {
+       unsigned long pgf = (unsigned long)table;
+@@ -449,12 +448,6 @@ void __tlb_remove_table(void *_table)
+       return pgtable_free(table, index);
+ }
+-#else
+-void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int index)
+-{
+-      return pgtable_free(table, index);
+-}
+-#endif
+ #ifdef CONFIG_PROC_FS
+ atomic_long_t direct_pages_count[MMU_PAGE_COUNT];
index 66eee5ed355e9a5b92e96699fbf77e665debcaa9..eabe06211bae8fc012ad7879fb2d97bee71ac8b4 100644 (file)
@@ -12,3 +12,9 @@ powerpc-bitops-fix-possible-undefined-behaviour-with.patch
 xen-gntdev.c-mark-pages-as-dirty.patch
 null_blk-fix-zone-size-initialization.patch
 of-fix-linker-section-match-table-corruption.patch
+asm-generic-tlb-track-freeing-of-page-table-directories-in-struct-mmu_gather.patch
+asm-generic-tlb-track-which-levels-of-the-page-tables-have-been-cleared.patch
+asm-generic-tlb-arch-invert-config_have_rcu_table_invalidate.patch
+powerpc-mmu_gather-enable-rcu_table_free-even-for-smp-case.patch
+mm-mmu_gather-invalidate-tlb-correctly-on-batch-allocation-failure-and-flush.patch
+asm-generic-tlb-avoid-potential-double-flush.patch