From: Greg Kroah-Hartman Date: Wed, 17 Oct 2018 07:51:49 +0000 (+0200) Subject: 4.18-stable patches X-Git-Tag: v4.9.134~4 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=2c1b72d10aad54a67bfc0e30d81b00cfff1902b7;p=thirdparty%2Fkernel%2Fstable-queue.git 4.18-stable patches added patches: kvm-ppc-book3s-hv-avoid-crash-from-thp-collapse-during-radix-page-fault.patch --- diff --git a/queue-4.18/add-tests-for-memory.oom.group.patch b/queue-4.18/add-tests-for-memory.oom.group.patch deleted file mode 100644 index b3146cefcdb..00000000000 --- a/queue-4.18/add-tests-for-memory.oom.group.patch +++ /dev/null @@ -1,296 +0,0 @@ -From foo@baz Tue Oct 16 11:10:21 CEST 2018 -From: Jay Kamat -Date: Fri, 7 Sep 2018 14:34:05 -0700 -Subject: Add tests for memory.oom.group - -From: Jay Kamat - -[ Upstream commit a987785dcd6c8ae2915460582aebd6481c81eb67 ] - -Add tests for memory.oom.group for the following cases: -- Killing all processes in a leaf cgroup, but leaving the - parent untouched -- Killing all processes in a parent and leaf cgroup -- Keeping processes marked by OOM_SCORE_ADJ_MIN alive when considered - for being killed by the group oom killer. - -Signed-off-by: Jay Kamat -Acked-by: Roman Gushchin -Signed-off-by: Shuah Khan (Samsung OSG) -Signed-off-by: Sasha Levin -Signed-off-by: Greg Kroah-Hartman ---- - tools/testing/selftests/cgroup/cgroup_util.c | 21 ++ - tools/testing/selftests/cgroup/cgroup_util.h | 1 - tools/testing/selftests/cgroup/test_memcontrol.c | 205 +++++++++++++++++++++++ - 3 files changed, 227 insertions(+) - ---- a/tools/testing/selftests/cgroup/cgroup_util.c -+++ b/tools/testing/selftests/cgroup/cgroup_util.c -@@ -340,3 +340,24 @@ int is_swap_enabled(void) - - return cnt > 1; - } -+ -+int set_oom_adj_score(int pid, int score) -+{ -+ char path[PATH_MAX]; -+ int fd, len; -+ -+ sprintf(path, "/proc/%d/oom_score_adj", pid); -+ -+ fd = open(path, O_WRONLY | O_APPEND); -+ if (fd < 0) -+ return fd; -+ -+ len = dprintf(fd, "%d", score); -+ if (len < 0) { -+ close(fd); -+ return len; -+ } -+ -+ close(fd); -+ return 0; -+} ---- a/tools/testing/selftests/cgroup/cgroup_util.h -+++ b/tools/testing/selftests/cgroup/cgroup_util.h -@@ -39,3 +39,4 @@ extern int get_temp_fd(void); - extern int alloc_pagecache(int fd, size_t size); - extern int alloc_anon(const char *cgroup, void *arg); - extern int is_swap_enabled(void); -+extern int set_oom_adj_score(int pid, int score); ---- a/tools/testing/selftests/cgroup/test_memcontrol.c -+++ b/tools/testing/selftests/cgroup/test_memcontrol.c -@@ -2,6 +2,7 @@ - #define _GNU_SOURCE - - #include -+#include - #include - #include - #include -@@ -202,6 +203,36 @@ static int alloc_pagecache_50M_noexit(co - return 0; - } - -+static int alloc_anon_noexit(const char *cgroup, void *arg) -+{ -+ int ppid = getppid(); -+ -+ if (alloc_anon(cgroup, arg)) -+ return -1; -+ -+ while (getppid() == ppid) -+ sleep(1); -+ -+ return 0; -+} -+ -+/* -+ * Wait until processes are killed asynchronously by the OOM killer -+ * If we exceed a timeout, fail. -+ */ -+static int cg_test_proc_killed(const char *cgroup) -+{ -+ int limit; -+ -+ for (limit = 10; limit > 0; limit--) { -+ if (cg_read_strcmp(cgroup, "cgroup.procs", "") == 0) -+ return 0; -+ -+ usleep(100000); -+ } -+ return -1; -+} -+ - /* - * First, this test creates the following hierarchy: - * A memory.min = 50M, memory.max = 200M -@@ -964,6 +995,177 @@ cleanup: - return ret; - } - -+/* -+ * This test disables swapping and tries to allocate anonymous memory -+ * up to OOM with memory.group.oom set. Then it checks that all -+ * processes in the leaf (but not the parent) were killed. -+ */ -+static int test_memcg_oom_group_leaf_events(const char *root) -+{ -+ int ret = KSFT_FAIL; -+ char *parent, *child; -+ -+ parent = cg_name(root, "memcg_test_0"); -+ child = cg_name(root, "memcg_test_0/memcg_test_1"); -+ -+ if (!parent || !child) -+ goto cleanup; -+ -+ if (cg_create(parent)) -+ goto cleanup; -+ -+ if (cg_create(child)) -+ goto cleanup; -+ -+ if (cg_write(parent, "cgroup.subtree_control", "+memory")) -+ goto cleanup; -+ -+ if (cg_write(child, "memory.max", "50M")) -+ goto cleanup; -+ -+ if (cg_write(child, "memory.swap.max", "0")) -+ goto cleanup; -+ -+ if (cg_write(child, "memory.oom.group", "1")) -+ goto cleanup; -+ -+ cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60)); -+ cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); -+ cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); -+ if (!cg_run(child, alloc_anon, (void *)MB(100))) -+ goto cleanup; -+ -+ if (cg_test_proc_killed(child)) -+ goto cleanup; -+ -+ if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0) -+ goto cleanup; -+ -+ if (cg_read_key_long(parent, "memory.events", "oom_kill ") != 0) -+ goto cleanup; -+ -+ ret = KSFT_PASS; -+ -+cleanup: -+ if (child) -+ cg_destroy(child); -+ if (parent) -+ cg_destroy(parent); -+ free(child); -+ free(parent); -+ -+ return ret; -+} -+ -+/* -+ * This test disables swapping and tries to allocate anonymous memory -+ * up to OOM with memory.group.oom set. Then it checks that all -+ * processes in the parent and leaf were killed. -+ */ -+static int test_memcg_oom_group_parent_events(const char *root) -+{ -+ int ret = KSFT_FAIL; -+ char *parent, *child; -+ -+ parent = cg_name(root, "memcg_test_0"); -+ child = cg_name(root, "memcg_test_0/memcg_test_1"); -+ -+ if (!parent || !child) -+ goto cleanup; -+ -+ if (cg_create(parent)) -+ goto cleanup; -+ -+ if (cg_create(child)) -+ goto cleanup; -+ -+ if (cg_write(parent, "memory.max", "80M")) -+ goto cleanup; -+ -+ if (cg_write(parent, "memory.swap.max", "0")) -+ goto cleanup; -+ -+ if (cg_write(parent, "memory.oom.group", "1")) -+ goto cleanup; -+ -+ cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60)); -+ cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); -+ cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); -+ -+ if (!cg_run(child, alloc_anon, (void *)MB(100))) -+ goto cleanup; -+ -+ if (cg_test_proc_killed(child)) -+ goto cleanup; -+ if (cg_test_proc_killed(parent)) -+ goto cleanup; -+ -+ ret = KSFT_PASS; -+ -+cleanup: -+ if (child) -+ cg_destroy(child); -+ if (parent) -+ cg_destroy(parent); -+ free(child); -+ free(parent); -+ -+ return ret; -+} -+ -+/* -+ * This test disables swapping and tries to allocate anonymous memory -+ * up to OOM with memory.group.oom set. Then it checks that all -+ * processes were killed except those set with OOM_SCORE_ADJ_MIN -+ */ -+static int test_memcg_oom_group_score_events(const char *root) -+{ -+ int ret = KSFT_FAIL; -+ char *memcg; -+ int safe_pid; -+ -+ memcg = cg_name(root, "memcg_test_0"); -+ -+ if (!memcg) -+ goto cleanup; -+ -+ if (cg_create(memcg)) -+ goto cleanup; -+ -+ if (cg_write(memcg, "memory.max", "50M")) -+ goto cleanup; -+ -+ if (cg_write(memcg, "memory.swap.max", "0")) -+ goto cleanup; -+ -+ if (cg_write(memcg, "memory.oom.group", "1")) -+ goto cleanup; -+ -+ safe_pid = cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1)); -+ if (set_oom_adj_score(safe_pid, OOM_SCORE_ADJ_MIN)) -+ goto cleanup; -+ -+ cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1)); -+ if (!cg_run(memcg, alloc_anon, (void *)MB(100))) -+ goto cleanup; -+ -+ if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3) -+ goto cleanup; -+ -+ if (kill(safe_pid, SIGKILL)) -+ goto cleanup; -+ -+ ret = KSFT_PASS; -+ -+cleanup: -+ if (memcg) -+ cg_destroy(memcg); -+ free(memcg); -+ -+ return ret; -+} -+ -+ - #define T(x) { x, #x } - struct memcg_test { - int (*fn)(const char *root); -@@ -978,6 +1180,9 @@ struct memcg_test { - T(test_memcg_oom_events), - T(test_memcg_swap_max), - T(test_memcg_sock), -+ T(test_memcg_oom_group_leaf_events), -+ T(test_memcg_oom_group_parent_events), -+ T(test_memcg_oom_group_score_events), - }; - #undef T - diff --git a/queue-4.18/kvm-ppc-book3s-hv-avoid-crash-from-thp-collapse-during-radix-page-fault.patch b/queue-4.18/kvm-ppc-book3s-hv-avoid-crash-from-thp-collapse-during-radix-page-fault.patch new file mode 100644 index 00000000000..bca7f51bc9a --- /dev/null +++ b/queue-4.18/kvm-ppc-book3s-hv-avoid-crash-from-thp-collapse-during-radix-page-fault.patch @@ -0,0 +1,55 @@ +From 6579804c431712d56956a63b1a01509441cc6800 Mon Sep 17 00:00:00 2001 +From: Paul Mackerras +Date: Thu, 4 Oct 2018 14:51:11 +1000 +Subject: KVM: PPC: Book3S HV: Avoid crash from THP collapse during radix page fault + +From: Paul Mackerras + +commit 6579804c431712d56956a63b1a01509441cc6800 upstream. + +Commit 71d29f43b633 ("KVM: PPC: Book3S HV: Don't use compound_order to +determine host mapping size", 2018-09-11) added a call to +__find_linux_pte() and a dereference of the returned PTE pointer to the +radix page fault path in the common case where the page is normal +system memory. Previously, __find_linux_pte() was only called for +mappings to physical addresses which don't have a page struct (e.g. +memory-mapped I/O) or where the page struct is marked as reserved +memory. + +This exposes us to the possibility that the returned PTE pointer +could be NULL, for example in the case of a concurrent THP collapse +operation. Dereferencing the returned NULL pointer causes a host +crash. + +To fix this, we check for NULL, and if it is NULL, we retry the +operation by returning to the guest, with the expectation that it +will generate the same page fault again (unless of course it has +been fixed up by another CPU in the meantime). + +Fixes: 71d29f43b633 ("KVM: PPC: Book3S HV: Don't use compound_order to determine host mapping size") +Signed-off-by: Paul Mackerras +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/kvm/book3s_64_mmu_radix.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c ++++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c +@@ -659,6 +659,16 @@ int kvmppc_book3s_radix_page_fault(struc + */ + local_irq_disable(); + ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift); ++ /* ++ * If the PTE disappeared temporarily due to a THP ++ * collapse, just return and let the guest try again. ++ */ ++ if (!ptep) { ++ local_irq_enable(); ++ if (page) ++ put_page(page); ++ return RESUME_GUEST; ++ } + pte = *ptep; + local_irq_enable(); + diff --git a/queue-4.18/series b/queue-4.18/series index 0fba4f69374..e5a422c1d0f 100644 --- a/queue-4.18/series +++ b/queue-4.18/series @@ -76,7 +76,6 @@ asoc-rsnd-adg-care-clock-frequency-size.patch asoc-rsnd-don-t-fallback-to-pio-mode-when-eprobe_defer.patch hwmon-nct6775-fix-access-to-fan-pulse-registers.patch fix-cg_read_strcmp.patch -add-tests-for-memory.oom.group.patch asoc-amd-ensure-reset-bit-is-cleared-before-configuring.patch drm-pl111-make-sure-of_device_id-tables-are-null-terminated.patch bluetooth-smp-fix-trying-to-use-non-existent-local-oob-data.patch @@ -133,3 +132,4 @@ mm-thp-fix-call-to-mmu_notifier-in-set_pmd_migration_entry-v2.patch filesystem-dax-fix-dax_layout_busy_page-livelock.patch mm-preserve-_page_devmap-across-mprotect-calls.patch i2c-i2c-scmi-fix-for-i2c_smbus_write_block_data.patch +kvm-ppc-book3s-hv-avoid-crash-from-thp-collapse-during-radix-page-fault.patch