]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.9-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 11 Dec 2020 15:52:00 +0000 (16:52 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 11 Dec 2020 15:52:00 +0000 (16:52 +0100)
added patches:
kprobes-remove-nmi-context-check.patch
kprobes-tell-lockdep-about-kprobe-nesting.patch
mm-zsmalloc.c-drop-zsmalloc_pgtable_mapping.patch

queue-5.9/kprobes-remove-nmi-context-check.patch [new file with mode: 0644]
queue-5.9/kprobes-tell-lockdep-about-kprobe-nesting.patch [new file with mode: 0644]
queue-5.9/mm-zsmalloc.c-drop-zsmalloc_pgtable_mapping.patch [new file with mode: 0644]
queue-5.9/series

diff --git a/queue-5.9/kprobes-remove-nmi-context-check.patch b/queue-5.9/kprobes-remove-nmi-context-check.patch
new file mode 100644 (file)
index 0000000..7c91d64
--- /dev/null
@@ -0,0 +1,76 @@
+From foo@baz Fri Dec 11 03:46:57 PM CET 2020
+From: Masami Hiramatsu <mhiramat@kernel.org>
+Date: Fri, 11 Dec 2020 00:30:58 +0900
+Subject: kprobes: Remove NMI context check
+To: stable@vger.kernel.org
+Cc: Thomas Gleixner <tglx@linutronix.de>, Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>, x86@kernel.org, "H . Peter Anvin" <hpa@zytor.com>, linux-kernel@vger.kernel.org, "Naveen N . Rao" <naveen.n.rao@linux.ibm.com>, Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>, "David S . Miller" <davem@davemloft.net>, Masami Hiramatsu <mhiramat@kernel.org>, Solar Designer <solar@openwall.com>, Eddy_Wu@trendmicro.com, Peter Zijlstra <peterz@infradead.org>
+Message-ID: <160761425763.3585575.15837172081484340228.stgit@devnote2>
+
+From: Masami Hiramatsu <mhiramat@kernel.org>
+
+commit e03b4a084ea6b0a18b0e874baec439e69090c168 upstream.
+
+The in_nmi() check in pre_handler_kretprobe() is meant to avoid
+recursion, and blindly assumes that anything NMI is recursive.
+
+However, since commit:
+
+  9b38cc704e84 ("kretprobe: Prevent triggering kretprobe from within kprobe_flush_task")
+
+there is a better way to detect and avoid actual recursion.
+
+By setting a dummy kprobe, any actual exceptions will terminate early
+(by trying to handle the dummy kprobe), and recursion will not happen.
+
+Employ this to avoid the kretprobe_table_lock() recursion, replacing
+the over-eager in_nmi() check.
+
+Cc: stable@vger.kernel.org # 5.9.x
+Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Link: https://lkml.kernel.org/r/159870615628.1229682.6087311596892125907.stgit@devnote2
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/kprobes.c |   16 ++++------------
+ 1 file changed, 4 insertions(+), 12 deletions(-)
+
+--- a/kernel/kprobes.c
++++ b/kernel/kprobes.c
+@@ -1359,7 +1359,8 @@ static void cleanup_rp_inst(struct kretp
+       struct hlist_node *next;
+       struct hlist_head *head;
+-      /* No race here */
++      /* To avoid recursive kretprobe by NMI, set kprobe busy here */
++      kprobe_busy_begin();
+       for (hash = 0; hash < KPROBE_TABLE_SIZE; hash++) {
+               kretprobe_table_lock(hash, &flags);
+               head = &kretprobe_inst_table[hash];
+@@ -1369,6 +1370,8 @@ static void cleanup_rp_inst(struct kretp
+               }
+               kretprobe_table_unlock(hash, &flags);
+       }
++      kprobe_busy_end();
++
+       free_rp_inst(rp);
+ }
+ NOKPROBE_SYMBOL(cleanup_rp_inst);
+@@ -1937,17 +1940,6 @@ static int pre_handler_kretprobe(struct
+       unsigned long hash, flags = 0;
+       struct kretprobe_instance *ri;
+-      /*
+-       * To avoid deadlocks, prohibit return probing in NMI contexts,
+-       * just skip the probe and increase the (inexact) 'nmissed'
+-       * statistical counter, so that the user is informed that
+-       * something happened:
+-       */
+-      if (unlikely(in_nmi())) {
+-              rp->nmissed++;
+-              return 0;
+-      }
+-
+       /* TODO: consider to only swap the RA after the last pre_handler fired */
+       hash = hash_ptr(current, KPROBE_HASH_BITS);
+       raw_spin_lock_irqsave(&rp->lock, flags);
diff --git a/queue-5.9/kprobes-tell-lockdep-about-kprobe-nesting.patch b/queue-5.9/kprobes-tell-lockdep-about-kprobe-nesting.patch
new file mode 100644 (file)
index 0000000..d7cefad
--- /dev/null
@@ -0,0 +1,85 @@
+From foo@baz Fri Dec 11 03:46:57 PM CET 2020
+From: Masami Hiramatsu <mhiramat@kernel.org>
+Date: Fri, 11 Dec 2020 00:31:09 +0900
+Subject: kprobes: Tell lockdep about kprobe nesting
+To: stable@vger.kernel.org
+Cc: Thomas Gleixner <tglx@linutronix.de>, Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>, x86@kernel.org, "H . Peter Anvin" <hpa@zytor.com>, linux-kernel@vger.kernel.org, "Naveen N . Rao" <naveen.n.rao@linux.ibm.com>, Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>, "David S . Miller" <davem@davemloft.net>, Masami Hiramatsu <mhiramat@kernel.org>, Solar Designer <solar@openwall.com>, Eddy_Wu@trendmicro.com, Peter Zijlstra <peterz@infradead.org>
+Message-ID: <160761426940.3585575.9968752396885952490.stgit@devnote2>
+
+From: Steven Rostedt (VMware) <rostedt@goodmis.org>
+
+commit 645f224e7ba2f4200bf163153d384ceb0de5462e upstream.
+
+Since the kprobe handlers have protection that prohibits other handlers from
+executing in other contexts (like if an NMI comes in while processing a
+kprobe, and executes the same kprobe, it will get fail with a "busy"
+return). Lockdep is unaware of this protection. Use lockdep's nesting api to
+differentiate between locks taken in INT3 context and other context to
+suppress the false warnings.
+
+Link: https://lore.kernel.org/r/20201102160234.fa0ae70915ad9e2b21c08b85@kernel.org
+
+Cc: stable@vger.kernel.org # 5.9.x
+Cc: Peter Zijlstra <peterz@infradead.org>
+Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/kprobes.c |   25 +++++++++++++++++++++----
+ 1 file changed, 21 insertions(+), 4 deletions(-)
+
+--- a/kernel/kprobes.c
++++ b/kernel/kprobes.c
+@@ -1250,7 +1250,13 @@ __acquires(hlist_lock)
+       *head = &kretprobe_inst_table[hash];
+       hlist_lock = kretprobe_table_lock_ptr(hash);
+-      raw_spin_lock_irqsave(hlist_lock, *flags);
++      /*
++       * Nested is a workaround that will soon not be needed.
++       * There's other protections that make sure the same lock
++       * is not taken on the same CPU that lockdep is unaware of.
++       * Differentiate when it is taken in NMI context.
++       */
++      raw_spin_lock_irqsave_nested(hlist_lock, *flags, !!in_nmi());
+ }
+ NOKPROBE_SYMBOL(kretprobe_hash_lock);
+@@ -1259,7 +1265,13 @@ static void kretprobe_table_lock(unsigne
+ __acquires(hlist_lock)
+ {
+       raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
+-      raw_spin_lock_irqsave(hlist_lock, *flags);
++      /*
++       * Nested is a workaround that will soon not be needed.
++       * There's other protections that make sure the same lock
++       * is not taken on the same CPU that lockdep is unaware of.
++       * Differentiate when it is taken in NMI context.
++       */
++      raw_spin_lock_irqsave_nested(hlist_lock, *flags, !!in_nmi());
+ }
+ NOKPROBE_SYMBOL(kretprobe_table_lock);
+@@ -1942,7 +1954,12 @@ static int pre_handler_kretprobe(struct
+       /* TODO: consider to only swap the RA after the last pre_handler fired */
+       hash = hash_ptr(current, KPROBE_HASH_BITS);
+-      raw_spin_lock_irqsave(&rp->lock, flags);
++      /*
++       * Nested is a workaround that will soon not be needed.
++       * There's other protections that make sure the same lock
++       * is not taken on the same CPU that lockdep is unaware of.
++       */
++      raw_spin_lock_irqsave_nested(&rp->lock, flags, 1);
+       if (!hlist_empty(&rp->free_instances)) {
+               ri = hlist_entry(rp->free_instances.first,
+                               struct kretprobe_instance, hlist);
+@@ -1953,7 +1970,7 @@ static int pre_handler_kretprobe(struct
+               ri->task = current;
+               if (rp->entry_handler && rp->entry_handler(ri, regs)) {
+-                      raw_spin_lock_irqsave(&rp->lock, flags);
++                      raw_spin_lock_irqsave_nested(&rp->lock, flags, 1);
+                       hlist_add_head(&ri->hlist, &rp->free_instances);
+                       raw_spin_unlock_irqrestore(&rp->lock, flags);
+                       return 0;
diff --git a/queue-5.9/mm-zsmalloc.c-drop-zsmalloc_pgtable_mapping.patch b/queue-5.9/mm-zsmalloc.c-drop-zsmalloc_pgtable_mapping.patch
new file mode 100644 (file)
index 0000000..144e74a
--- /dev/null
@@ -0,0 +1,182 @@
+From e91d8d78237de8d7120c320b3645b7100848f24d Mon Sep 17 00:00:00 2001
+From: Minchan Kim <minchan@kernel.org>
+Date: Sat, 5 Dec 2020 22:14:51 -0800
+Subject: mm/zsmalloc.c: drop ZSMALLOC_PGTABLE_MAPPING
+
+From: Minchan Kim <minchan@kernel.org>
+
+commit e91d8d78237de8d7120c320b3645b7100848f24d upstream.
+
+While I was doing zram testing, I found sometimes decompression failed
+since the compression buffer was corrupted.  With investigation, I found
+below commit calls cond_resched unconditionally so it could make a
+problem in atomic context if the task is reschedule.
+
+  BUG: sleeping function called from invalid context at mm/vmalloc.c:108
+  in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 946, name: memhog
+  3 locks held by memhog/946:
+   #0: ffff9d01d4b193e8 (&mm->mmap_lock#2){++++}-{4:4}, at: __mm_populate+0x103/0x160
+   #1: ffffffffa3d53de0 (fs_reclaim){+.+.}-{0:0}, at: __alloc_pages_slowpath.constprop.0+0xa98/0x1160
+   #2: ffff9d01d56b8110 (&zspage->lock){.+.+}-{3:3}, at: zs_map_object+0x8e/0x1f0
+  CPU: 0 PID: 946 Comm: memhog Not tainted 5.9.3-00011-gc5bfc0287345-dirty #316
+  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1 04/01/2014
+  Call Trace:
+    unmap_kernel_range_noflush+0x2eb/0x350
+    unmap_kernel_range+0x14/0x30
+    zs_unmap_object+0xd5/0xe0
+    zram_bvec_rw.isra.0+0x38c/0x8e0
+    zram_rw_page+0x90/0x101
+    bdev_write_page+0x92/0xe0
+    __swap_writepage+0x94/0x4a0
+    pageout+0xe3/0x3a0
+    shrink_page_list+0xb94/0xd60
+    shrink_inactive_list+0x158/0x460
+
+We can fix this by removing the ZSMALLOC_PGTABLE_MAPPING feature (which
+contains the offending calling code) from zsmalloc.
+
+Even though this option showed some amount improvement(e.g., 30%) in
+some arm32 platforms, it has been headache to maintain since it have
+abused APIs[1](e.g., unmap_kernel_range in atomic context).
+
+Since we are approaching to deprecate 32bit machines and already made
+the config option available for only builtin build since v5.8, lastly it
+has been not default option in zsmalloc, it's time to drop the option
+for better maintenance.
+
+[1] http://lore.kernel.org/linux-mm/20201105170249.387069-1-minchan@kernel.org
+
+Fixes: e47110e90584 ("mm/vunmap: add cond_resched() in vunmap_pmd_range")
+Signed-off-by: Minchan Kim <minchan@kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
+Cc: Tony Lindgren <tony@atomide.com>
+Cc: Christoph Hellwig <hch@infradead.org>
+Cc: Harish Sriram <harish@linux.ibm.com>
+Cc: Uladzislau Rezki <urezki@gmail.com>
+Cc: <stable@vger.kernel.org>
+Link: https://lkml.kernel.org/r/20201117202916.GA3856507@google.com
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/configs/omap2plus_defconfig |    1 
+ include/linux/zsmalloc.h             |    1 
+ mm/Kconfig                           |   13 ---------
+ mm/zsmalloc.c                        |   48 -----------------------------------
+ 4 files changed, 63 deletions(-)
+
+--- a/arch/arm/configs/omap2plus_defconfig
++++ b/arch/arm/configs/omap2plus_defconfig
+@@ -81,7 +81,6 @@ CONFIG_PARTITION_ADVANCED=y
+ CONFIG_BINFMT_MISC=y
+ CONFIG_CMA=y
+ CONFIG_ZSMALLOC=m
+-CONFIG_ZSMALLOC_PGTABLE_MAPPING=y
+ CONFIG_NET=y
+ CONFIG_PACKET=y
+ CONFIG_UNIX=y
+--- a/include/linux/zsmalloc.h
++++ b/include/linux/zsmalloc.h
+@@ -20,7 +20,6 @@
+  * zsmalloc mapping modes
+  *
+  * NOTE: These only make a difference when a mapped object spans pages.
+- * They also have no effect when ZSMALLOC_PGTABLE_MAPPING is selected.
+  */
+ enum zs_mapmode {
+       ZS_MM_RW, /* normal read-write mapping */
+--- a/mm/Kconfig
++++ b/mm/Kconfig
+@@ -706,19 +706,6 @@ config ZSMALLOC
+         returned by an alloc().  This handle must be mapped in order to
+         access the allocated space.
+-config ZSMALLOC_PGTABLE_MAPPING
+-      bool "Use page table mapping to access object in zsmalloc"
+-      depends on ZSMALLOC=y
+-      help
+-        By default, zsmalloc uses a copy-based object mapping method to
+-        access allocations that span two pages. However, if a particular
+-        architecture (ex, ARM) performs VM mapping faster than copying,
+-        then you should select this. This causes zsmalloc to use page table
+-        mapping rather than copying for object mapping.
+-
+-        You can check speed with zsmalloc benchmark:
+-        https://github.com/spartacus06/zsmapbench
+-
+ config ZSMALLOC_STAT
+       bool "Export zsmalloc statistics"
+       depends on ZSMALLOC
+--- a/mm/zsmalloc.c
++++ b/mm/zsmalloc.c
+@@ -293,11 +293,7 @@ struct zspage {
+ };
+ struct mapping_area {
+-#ifdef CONFIG_ZSMALLOC_PGTABLE_MAPPING
+-      struct vm_struct *vm; /* vm area for mapping object that span pages */
+-#else
+       char *vm_buf; /* copy buffer for objects that span pages */
+-#endif
+       char *vm_addr; /* address of kmap_atomic()'ed pages */
+       enum zs_mapmode vm_mm; /* mapping mode */
+ };
+@@ -1113,48 +1109,6 @@ static struct zspage *find_get_zspage(st
+       return zspage;
+ }
+-#ifdef CONFIG_ZSMALLOC_PGTABLE_MAPPING
+-static inline int __zs_cpu_up(struct mapping_area *area)
+-{
+-      /*
+-       * Make sure we don't leak memory if a cpu UP notification
+-       * and zs_init() race and both call zs_cpu_up() on the same cpu
+-       */
+-      if (area->vm)
+-              return 0;
+-      area->vm = alloc_vm_area(PAGE_SIZE * 2, NULL);
+-      if (!area->vm)
+-              return -ENOMEM;
+-      return 0;
+-}
+-
+-static inline void __zs_cpu_down(struct mapping_area *area)
+-{
+-      if (area->vm)
+-              free_vm_area(area->vm);
+-      area->vm = NULL;
+-}
+-
+-static inline void *__zs_map_object(struct mapping_area *area,
+-                              struct page *pages[2], int off, int size)
+-{
+-      unsigned long addr = (unsigned long)area->vm->addr;
+-
+-      BUG_ON(map_kernel_range(addr, PAGE_SIZE * 2, PAGE_KERNEL, pages) < 0);
+-      area->vm_addr = area->vm->addr;
+-      return area->vm_addr + off;
+-}
+-
+-static inline void __zs_unmap_object(struct mapping_area *area,
+-                              struct page *pages[2], int off, int size)
+-{
+-      unsigned long addr = (unsigned long)area->vm_addr;
+-
+-      unmap_kernel_range(addr, PAGE_SIZE * 2);
+-}
+-
+-#else /* CONFIG_ZSMALLOC_PGTABLE_MAPPING */
+-
+ static inline int __zs_cpu_up(struct mapping_area *area)
+ {
+       /*
+@@ -1235,8 +1189,6 @@ out:
+       pagefault_enable();
+ }
+-#endif /* CONFIG_ZSMALLOC_PGTABLE_MAPPING */
+-
+ static int zs_cpu_prepare(unsigned int cpu)
+ {
+       struct mapping_area *area;
index 4689d40df82a97b58339d0e58a237b92ad6dad35..d4c63438ce9a22188fa5e82d17024c10a89304ea 100644 (file)
@@ -1 +1,4 @@
 kbuild-do-not-emit-debug-info-for-assembly-with-llvm_ias-1.patch
+mm-zsmalloc.c-drop-zsmalloc_pgtable_mapping.patch
+kprobes-remove-nmi-context-check.patch
+kprobes-tell-lockdep-about-kprobe-nesting.patch