From: Greg Kroah-Hartman Date: Wed, 14 Mar 2018 10:53:24 +0000 (+0100) Subject: move page_alloc patch to pending X-Git-Tag: v4.14.27~1 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=dde106e5b03a399ee62eceeb56ce967cd2c54673;p=thirdparty%2Fkernel%2Fstable-queue.git move page_alloc patch to pending --- diff --git a/queue-4.15/mm-page_alloc-fix-memmap_init_zone-pageblock-alignment.patch b/pending/mm-page_alloc-fix-memmap_init_zone-pageblock-alignment.patch similarity index 100% rename from queue-4.15/mm-page_alloc-fix-memmap_init_zone-pageblock-alignment.patch rename to pending/mm-page_alloc-fix-memmap_init_zone-pageblock-alignment.patch diff --git a/queue-4.14/mm-page_alloc-fix-memmap_init_zone-pageblock-alignment.patch b/queue-4.14/mm-page_alloc-fix-memmap_init_zone-pageblock-alignment.patch deleted file mode 100644 index 5f10ae6d19f..00000000000 --- a/queue-4.14/mm-page_alloc-fix-memmap_init_zone-pageblock-alignment.patch +++ /dev/null @@ -1,166 +0,0 @@ -From 864b75f9d6b0100bb24fdd9a20d156e7cda9b5ae Mon Sep 17 00:00:00 2001 -From: Daniel Vacek -Date: Fri, 9 Mar 2018 15:51:13 -0800 -Subject: mm/page_alloc: fix memmap_init_zone pageblock alignment - -From: Daniel Vacek - -commit 864b75f9d6b0100bb24fdd9a20d156e7cda9b5ae upstream. - -Commit b92df1de5d28 ("mm: page_alloc: skip over regions of invalid pfns -where possible") introduced a bug where move_freepages() triggers a -VM_BUG_ON() on uninitialized page structure due to pageblock alignment. -To fix this, simply align the skipped pfns in memmap_init_zone() the -same way as in move_freepages_block(). - -Seen in one of the RHEL reports: - - crash> log | grep -e BUG -e RIP -e Call.Trace -e move_freepages_block -e rmqueue -e freelist -A1 - kernel BUG at mm/page_alloc.c:1389! - invalid opcode: 0000 [#1] SMP - -- - RIP: 0010:[] [] move_freepages+0x15e/0x160 - RSP: 0018:ffff88054d727688 EFLAGS: 00010087 - -- - Call Trace: - [] move_freepages_block+0x73/0x80 - [] __rmqueue+0x263/0x460 - [] get_page_from_freelist+0x7e1/0x9e0 - [] __alloc_pages_nodemask+0x176/0x420 - -- - RIP [] move_freepages+0x15e/0x160 - RSP - - crash> page_init_bug -v | grep RAM - 1000 - 9bfff System RAM (620.00 KiB) - 100000 - 430bffff System RAM ( 1.05 GiB = 1071.75 MiB = 1097472.00 KiB) - 4b0c8000 - 4bf9cfff System RAM ( 14.83 MiB = 15188.00 KiB) - 4bfac000 - 646b1fff System RAM (391.02 MiB = 400408.00 KiB) - 7b788000 - 7b7fffff System RAM (480.00 KiB) - 100000000 - 67fffffff System RAM ( 22.00 GiB) - - crash> page_init_bug | head -6 - 7b788000 - 7b7fffff System RAM (480.00 KiB) - 1fffff00000000 0 1 DMA32 4096 1048575 - 505736 505344 505855 - 0 0 0 DMA 1 4095 - 1fffff00000400 0 1 DMA32 4096 1048575 - BUG, zones differ! - -Note that this range follows two not populated sections -68000000-77ffffff in this zone. 7b788000-7b7fffff is the first one -after a gap. This makes memmap_init_zone() skip all the pfns up to the -beginning of this range. But this range is not pageblock (2M) aligned. -In fact no range has to be. - - crash> kmem -p 77fff000 78000000 7b5ff000 7b600000 7b787000 7b788000 - PAGE PHYSICAL MAPPING INDEX CNT FLAGS - ffffea0001e00000 78000000 0 0 0 0 - ffffea0001ed7fc0 7b5ff000 0 0 0 0 - ffffea0001ed8000 7b600000 0 0 0 0 <<<< - ffffea0001ede1c0 7b787000 0 0 0 0 - ffffea0001ede200 7b788000 0 0 1 1fffff00000000 - -Top part of page flags should contain nodeid and zonenr, which is not -the case for page ffffea0001ed8000 here (<<<<). - - crash> log | grep -o fffea0001ed[^\ ]* | sort -u - fffea0001ed8000 - fffea0001eded20 - fffea0001edffc0 - - crash> bt -r | grep -o fffea0001ed[^\ ]* | sort -u - fffea0001ed8000 - fffea0001eded00 - fffea0001eded20 - fffea0001edffc0 - -Initialization of the whole beginning of the section is skipped up to -the start of the range due to the commit b92df1de5d28. Now any code -calling move_freepages_block() (like reusing the page from a freelist as -in this example) with a page from the beginning of the range will get -the page rounded down to start_page ffffea0001ed8000 and passed to -move_freepages() which crashes on assertion getting wrong zonenr. - - > VM_BUG_ON(page_zone(start_page) != page_zone(end_page)); - -Note, page_zone() derives the zone from page flags here. - -From similar machine before commit b92df1de5d28: - - crash> kmem -p 77fff000 78000000 7b5ff000 7b600000 7b7fe000 7b7ff000 - PAGE PHYSICAL MAPPING INDEX CNT FLAGS - fffff73941e00000 78000000 0 0 1 1fffff00000000 - fffff73941ed7fc0 7b5ff000 0 0 1 1fffff00000000 - fffff73941ed8000 7b600000 0 0 1 1fffff00000000 - fffff73941edff80 7b7fe000 0 0 1 1fffff00000000 - fffff73941edffc0 7b7ff000 ffff8e67e04d3ae0 ad84 1 1fffff00020068 uptodate,lru,active,mappedtodisk - -All the pages since the beginning of the section are initialized. -move_freepages()' not gonna blow up. - -The same machine with this fix applied: - - crash> kmem -p 77fff000 78000000 7b5ff000 7b600000 7b7fe000 7b7ff000 - PAGE PHYSICAL MAPPING INDEX CNT FLAGS - ffffea0001e00000 78000000 0 0 0 0 - ffffea0001e00000 7b5ff000 0 0 0 0 - ffffea0001ed8000 7b600000 0 0 1 1fffff00000000 - ffffea0001edff80 7b7fe000 0 0 1 1fffff00000000 - ffffea0001edffc0 7b7ff000 ffff88017fb13720 8 2 1fffff00020068 uptodate,lru,active,mappedtodisk - -At least the bare minimum of pages is initialized preventing the crash -as well. - -Customers started to report this as soon as 7.4 (where b92df1de5d28 was -merged in RHEL) was released. I remember reports from -September/October-ish times. It's not easily reproduced and happens on -a handful of machines only. I guess that's why. But that does not make -it less serious, I think. - -Though there actually is a report here: - https://bugzilla.kernel.org/show_bug.cgi?id=196443 - -And there are reports for Fedora from July: - https://bugzilla.redhat.com/show_bug.cgi?id=1473242 -and CentOS: - https://bugs.centos.org/view.php?id=13964 -and we internally track several dozens reports for RHEL bug - https://bugzilla.redhat.com/show_bug.cgi?id=1525121 - -Link: http://lkml.kernel.org/r/0485727b2e82da7efbce5f6ba42524b429d0391a.1520011945.git.neelx@redhat.com -Fixes: b92df1de5d28 ("mm: page_alloc: skip over regions of invalid pfns where possible") -Signed-off-by: Daniel Vacek -Cc: Mel Gorman -Cc: Michal Hocko -Cc: Paul Burton -Cc: Pavel Tatashin -Cc: Vlastimil Babka -Cc: -Signed-off-by: Andrew Morton -Signed-off-by: Linus Torvalds -Signed-off-by: Greg Kroah-Hartman - ---- - mm/page_alloc.c | 9 +++++++-- - 1 file changed, 7 insertions(+), 2 deletions(-) - ---- a/mm/page_alloc.c -+++ b/mm/page_alloc.c -@@ -5307,9 +5307,14 @@ void __meminit memmap_init_zone(unsigned - /* - * Skip to the pfn preceding the next valid one (or - * end_pfn), such that we hit a valid pfn (or end_pfn) -- * on our next iteration of the loop. -+ * on our next iteration of the loop. Note that it needs -+ * to be pageblock aligned even when the region itself -+ * is not. move_freepages_block() can shift ahead of -+ * the valid region but still depends on correct page -+ * metadata. - */ -- pfn = memblock_next_valid_pfn(pfn, end_pfn) - 1; -+ pfn = (memblock_next_valid_pfn(pfn, end_pfn) & -+ ~(pageblock_nr_pages-1)) - 1; - #endif - continue; - } diff --git a/queue-4.14/series b/queue-4.14/series index 51d96237f7c..a18753fd2da 100644 --- a/queue-4.14/series +++ b/queue-4.14/series @@ -82,7 +82,6 @@ pci-dwc-fix-enumeration-end-when-reaching-root-subordinate.patch input-matrix_keypad-fix-race-when-disabling-interrupts.patch lib-bug.c-exclude-non-bug-warn-exceptions-from-report_bug.patch mm-memblock.c-hardcode-the-end_pfn-being-1.patch -mm-page_alloc-fix-memmap_init_zone-pageblock-alignment.patch documentation-sphinx-fix-directive-import-error.patch loop-fix-lost-writes-caused-by-missing-flag.patch virtio_ring-fix-num_free-handling-in-error-case.patch diff --git a/queue-4.15/series b/queue-4.15/series index 0a57b377e6b..64fc17ab48a 100644 --- a/queue-4.15/series +++ b/queue-4.15/series @@ -46,7 +46,6 @@ revert-input-synaptics-lenovo-thinkpad-t460p-devices-should-use-rmi.patch bug-use-pb-in-bug-and-stack-protector-failure.patch lib-bug.c-exclude-non-bug-warn-exceptions-from-report_bug.patch mm-memblock.c-hardcode-the-end_pfn-being-1.patch -mm-page_alloc-fix-memmap_init_zone-pageblock-alignment.patch documentation-sphinx-fix-directive-import-error.patch loop-fix-lost-writes-caused-by-missing-flag.patch virtio_ring-fix-num_free-handling-in-error-case.patch