From: Greg Kroah-Hartman Date: Mon, 14 Dec 2020 15:47:39 +0000 (+0100) Subject: 5.4-stable patches X-Git-Tag: v5.10.1~9 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=b30003460753640a5b318091366e4b8aad8fa4c0;p=thirdparty%2Fkernel%2Fstable-queue.git 5.4-stable patches added patches: mm-zsmalloc.c-drop-zsmalloc_pgtable_mapping.patch --- diff --git a/queue-5.4/mm-zsmalloc.c-drop-zsmalloc_pgtable_mapping.patch b/queue-5.4/mm-zsmalloc.c-drop-zsmalloc_pgtable_mapping.patch new file mode 100644 index 00000000000..15aeb27fe56 --- /dev/null +++ b/queue-5.4/mm-zsmalloc.c-drop-zsmalloc_pgtable_mapping.patch @@ -0,0 +1,170 @@ +From e91d8d78237de8d7120c320b3645b7100848f24d Mon Sep 17 00:00:00 2001 +From: Minchan Kim +Date: Sat, 5 Dec 2020 22:14:51 -0800 +Subject: mm/zsmalloc.c: drop ZSMALLOC_PGTABLE_MAPPING + +From: Minchan Kim + +commit e91d8d78237de8d7120c320b3645b7100848f24d upstream. + +While I was doing zram testing, I found sometimes decompression failed +since the compression buffer was corrupted. With investigation, I found +below commit calls cond_resched unconditionally so it could make a +problem in atomic context if the task is reschedule. + + BUG: sleeping function called from invalid context at mm/vmalloc.c:108 + in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 946, name: memhog + 3 locks held by memhog/946: + #0: ffff9d01d4b193e8 (&mm->mmap_lock#2){++++}-{4:4}, at: __mm_populate+0x103/0x160 + #1: ffffffffa3d53de0 (fs_reclaim){+.+.}-{0:0}, at: __alloc_pages_slowpath.constprop.0+0xa98/0x1160 + #2: ffff9d01d56b8110 (&zspage->lock){.+.+}-{3:3}, at: zs_map_object+0x8e/0x1f0 + CPU: 0 PID: 946 Comm: memhog Not tainted 5.9.3-00011-gc5bfc0287345-dirty #316 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1 04/01/2014 + Call Trace: + unmap_kernel_range_noflush+0x2eb/0x350 + unmap_kernel_range+0x14/0x30 + zs_unmap_object+0xd5/0xe0 + zram_bvec_rw.isra.0+0x38c/0x8e0 + zram_rw_page+0x90/0x101 + bdev_write_page+0x92/0xe0 + __swap_writepage+0x94/0x4a0 + pageout+0xe3/0x3a0 + shrink_page_list+0xb94/0xd60 + shrink_inactive_list+0x158/0x460 + +We can fix this by removing the ZSMALLOC_PGTABLE_MAPPING feature (which +contains the offending calling code) from zsmalloc. + +Even though this option showed some amount improvement(e.g., 30%) in +some arm32 platforms, it has been headache to maintain since it have +abused APIs[1](e.g., unmap_kernel_range in atomic context). + +Since we are approaching to deprecate 32bit machines and already made +the config option available for only builtin build since v5.8, lastly it +has been not default option in zsmalloc, it's time to drop the option +for better maintenance. + +[1] http://lore.kernel.org/linux-mm/20201105170249.387069-1-minchan@kernel.org + +Fixes: e47110e90584 ("mm/vunmap: add cond_resched() in vunmap_pmd_range") +Signed-off-by: Minchan Kim +Signed-off-by: Andrew Morton +Reviewed-by: Sergey Senozhatsky +Cc: Tony Lindgren +Cc: Christoph Hellwig +Cc: Harish Sriram +Cc: Uladzislau Rezki +Cc: +Link: https://lkml.kernel.org/r/20201117202916.GA3856507@google.com +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + + +--- + include/linux/zsmalloc.h | 1 - + mm/Kconfig | 13 ------------- + mm/zsmalloc.c | 46 ---------------------------------------------- + 3 files changed, 60 deletions(-) + +--- a/include/linux/zsmalloc.h ++++ b/include/linux/zsmalloc.h +@@ -20,7 +20,6 @@ + * zsmalloc mapping modes + * + * NOTE: These only make a difference when a mapped object spans pages. +- * They also have no effect when PGTABLE_MAPPING is selected. + */ + enum zs_mapmode { + ZS_MM_RW, /* normal read-write mapping */ +--- a/mm/Kconfig ++++ b/mm/Kconfig +@@ -576,19 +576,6 @@ config ZSMALLOC + returned by an alloc(). This handle must be mapped in order to + access the allocated space. + +-config PGTABLE_MAPPING +- bool "Use page table mapping to access object in zsmalloc" +- depends on ZSMALLOC +- help +- By default, zsmalloc uses a copy-based object mapping method to +- access allocations that span two pages. However, if a particular +- architecture (ex, ARM) performs VM mapping faster than copying, +- then you should select this. This causes zsmalloc to use page table +- mapping rather than copying for object mapping. +- +- You can check speed with zsmalloc benchmark: +- https://github.com/spartacus06/zsmapbench +- + config ZSMALLOC_STAT + bool "Export zsmalloc statistics" + depends on ZSMALLOC +--- a/mm/zsmalloc.c ++++ b/mm/zsmalloc.c +@@ -293,11 +293,7 @@ struct zspage { + }; + + struct mapping_area { +-#ifdef CONFIG_PGTABLE_MAPPING +- struct vm_struct *vm; /* vm area for mapping object that span pages */ +-#else + char *vm_buf; /* copy buffer for objects that span pages */ +-#endif + char *vm_addr; /* address of kmap_atomic()'ed pages */ + enum zs_mapmode vm_mm; /* mapping mode */ + }; +@@ -1113,46 +1109,6 @@ static struct zspage *find_get_zspage(st + return zspage; + } + +-#ifdef CONFIG_PGTABLE_MAPPING +-static inline int __zs_cpu_up(struct mapping_area *area) +-{ +- /* +- * Make sure we don't leak memory if a cpu UP notification +- * and zs_init() race and both call zs_cpu_up() on the same cpu +- */ +- if (area->vm) +- return 0; +- area->vm = alloc_vm_area(PAGE_SIZE * 2, NULL); +- if (!area->vm) +- return -ENOMEM; +- return 0; +-} +- +-static inline void __zs_cpu_down(struct mapping_area *area) +-{ +- if (area->vm) +- free_vm_area(area->vm); +- area->vm = NULL; +-} +- +-static inline void *__zs_map_object(struct mapping_area *area, +- struct page *pages[2], int off, int size) +-{ +- BUG_ON(map_vm_area(area->vm, PAGE_KERNEL, pages)); +- area->vm_addr = area->vm->addr; +- return area->vm_addr + off; +-} +- +-static inline void __zs_unmap_object(struct mapping_area *area, +- struct page *pages[2], int off, int size) +-{ +- unsigned long addr = (unsigned long)area->vm_addr; +- +- unmap_kernel_range(addr, PAGE_SIZE * 2); +-} +- +-#else /* CONFIG_PGTABLE_MAPPING */ +- + static inline int __zs_cpu_up(struct mapping_area *area) + { + /* +@@ -1233,8 +1189,6 @@ out: + pagefault_enable(); + } + +-#endif /* CONFIG_PGTABLE_MAPPING */ +- + static int zs_cpu_prepare(unsigned int cpu) + { + struct mapping_area *area; diff --git a/queue-5.4/series b/queue-5.4/series index b8b4ff96821..b06671514ec 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -34,3 +34,4 @@ x86-membarrier-get-rid-of-a-dubious-optimization.patch x86-apic-vector-fix-ordering-in-vector-assignment.patch dm-raid-fix-discard-limits-for-raid1-and-raid10.patch md-change-mddev-chunk_sectors-from-int-to-unsigned.patch +mm-zsmalloc.c-drop-zsmalloc_pgtable_mapping.patch