From: Greg Kroah-Hartman Date: Mon, 11 Apr 2022 14:57:28 +0000 (+0200) Subject: 5.15-stable patches X-Git-Tag: v4.9.310~26 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=469d0d8891baf8670bce41da40bba96280bbcd69;p=thirdparty%2Fkernel%2Fstable-queue.git 5.15-stable patches added patches: mm-don-t-skip-swap-entry-even-if-zap_details-specified.patch selftests-cgroup-make-cg_create-use-0755-for-permission-instead-of-0644.patch selftests-cgroup-test-open-time-cgroup-namespace-usage-for-migration-checks.patch selftests-cgroup-test-open-time-credential-usage-for-migration-checks.patch --- diff --git a/queue-5.15/mm-don-t-skip-swap-entry-even-if-zap_details-specified.patch b/queue-5.15/mm-don-t-skip-swap-entry-even-if-zap_details-specified.patch new file mode 100644 index 00000000000..5e1717011d5 --- /dev/null +++ b/queue-5.15/mm-don-t-skip-swap-entry-even-if-zap_details-specified.patch @@ -0,0 +1,178 @@ +From 5abfd71d936a8aefd9f9ccd299dea7a164a5d455 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Tue, 22 Mar 2022 14:42:15 -0700 +Subject: mm: don't skip swap entry even if zap_details specified + +From: Peter Xu + +commit 5abfd71d936a8aefd9f9ccd299dea7a164a5d455 upstream. + +Patch series "mm: Rework zap ptes on swap entries", v5. + +Patch 1 should fix a long standing bug for zap_pte_range() on +zap_details usage. The risk is we could have some swap entries skipped +while we should have zapped them. + +Migration entries are not the major concern because file backed memory +always zap in the pattern that "first time without page lock, then +re-zap with page lock" hence the 2nd zap will always make sure all +migration entries are already recovered. + +However there can be issues with real swap entries got skipped +errornoously. There's a reproducer provided in commit message of patch +1 for that. + +Patch 2-4 are cleanups that are based on patch 1. After the whole +patchset applied, we should have a very clean view of zap_pte_range(). + +Only patch 1 needs to be backported to stable if necessary. + +This patch (of 4): + +The "details" pointer shouldn't be the token to decide whether we should +skip swap entries. + +For example, when the callers specified details->zap_mapping==NULL, it +means the user wants to zap all the pages (including COWed pages), then +we need to look into swap entries because there can be private COWed +pages that was swapped out. + +Skipping some swap entries when details is non-NULL may lead to wrongly +leaving some of the swap entries while we should have zapped them. + +A reproducer of the problem: + +===8<=== + #define _GNU_SOURCE /* See feature_test_macros(7) */ + #include + #include + #include + #include + #include + + int page_size; + int shmem_fd; + char *buffer; + + void main(void) + { + int ret; + char val; + + page_size = getpagesize(); + shmem_fd = memfd_create("test", 0); + assert(shmem_fd >= 0); + + ret = ftruncate(shmem_fd, page_size * 2); + assert(ret == 0); + + buffer = mmap(NULL, page_size * 2, PROT_READ | PROT_WRITE, + MAP_PRIVATE, shmem_fd, 0); + assert(buffer != MAP_FAILED); + + /* Write private page, swap it out */ + buffer[page_size] = 1; + madvise(buffer, page_size * 2, MADV_PAGEOUT); + + /* This should drop private buffer[page_size] already */ + ret = ftruncate(shmem_fd, page_size); + assert(ret == 0); + /* Recover the size */ + ret = ftruncate(shmem_fd, page_size * 2); + assert(ret == 0); + + /* Re-read the data, it should be all zero */ + val = buffer[page_size]; + if (val == 0) + printf("Good\n"); + else + printf("BUG\n"); + } +===8<=== + +We don't need to touch up the pmd path, because pmd never had a issue with +swap entries. For example, shmem pmd migration will always be split into +pte level, and same to swapping on anonymous. + +Add another helper should_zap_cows() so that we can also check whether we +should zap private mappings when there's no page pointer specified. + +This patch drops that trick, so we handle swap ptes coherently. Meanwhile +we should do the same check upon migration entry, hwpoison entry and +genuine swap entries too. + +To be explicit, we should still remember to keep the private entries if +even_cows==false, and always zap them when even_cows==true. + +The issue seems to exist starting from the initial commit of git. + +[peterx@redhat.com: comment tweaks] + Link: https://lkml.kernel.org/r/20220217060746.71256-2-peterx@redhat.com + +Link: https://lkml.kernel.org/r/20220217060746.71256-1-peterx@redhat.com +Link: https://lkml.kernel.org/r/20220216094810.60572-1-peterx@redhat.com +Link: https://lkml.kernel.org/r/20220216094810.60572-2-peterx@redhat.com +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Peter Xu +Reviewed-by: John Hubbard +Cc: David Hildenbrand +Cc: Hugh Dickins +Cc: Alistair Popple +Cc: Andrea Arcangeli +Cc: "Kirill A . Shutemov" +Cc: Matthew Wilcox +Cc: Vlastimil Babka +Cc: Yang Shi +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + mm/memory.c | 25 +++++++++++++++++++------ + 1 file changed, 19 insertions(+), 6 deletions(-) + +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -1301,6 +1301,17 @@ copy_page_range(struct vm_area_struct *d + return ret; + } + ++/* Whether we should zap all COWed (private) pages too */ ++static inline bool should_zap_cows(struct zap_details *details) ++{ ++ /* By default, zap all pages */ ++ if (!details) ++ return true; ++ ++ /* Or, we zap COWed pages only if the caller wants to */ ++ return !details->check_mapping; ++} ++ + static unsigned long zap_pte_range(struct mmu_gather *tlb, + struct vm_area_struct *vma, pmd_t *pmd, + unsigned long addr, unsigned long end, +@@ -1396,16 +1407,18 @@ again: + continue; + } + +- /* If details->check_mapping, we leave swap entries. */ +- if (unlikely(details)) +- continue; +- +- if (!non_swap_entry(entry)) ++ if (!non_swap_entry(entry)) { ++ /* Genuine swap entry, hence a private anon page */ ++ if (!should_zap_cows(details)) ++ continue; + rss[MM_SWAPENTS]--; +- else if (is_migration_entry(entry)) { ++ } else if (is_migration_entry(entry)) { + struct page *page; + + page = pfn_swap_entry_to_page(entry); ++ if (details && details->check_mapping && ++ details->check_mapping != page_rmapping(page)) ++ continue; + rss[mm_counter(page)]--; + } + if (unlikely(!free_swap_and_cache(entry))) diff --git a/queue-5.15/selftests-cgroup-make-cg_create-use-0755-for-permission-instead-of-0644.patch b/queue-5.15/selftests-cgroup-make-cg_create-use-0755-for-permission-instead-of-0644.patch new file mode 100644 index 00000000000..2f7aec97df8 --- /dev/null +++ b/queue-5.15/selftests-cgroup-make-cg_create-use-0755-for-permission-instead-of-0644.patch @@ -0,0 +1,34 @@ +From b09c2baa56347ae65795350dfcc633dedb1c2970 Mon Sep 17 00:00:00 2001 +From: Tejun Heo +Date: Thu, 6 Jan 2022 11:02:29 -1000 +Subject: selftests: cgroup: Make cg_create() use 0755 for permission instead of 0644 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Tejun Heo + +commit b09c2baa56347ae65795350dfcc633dedb1c2970 upstream. + +0644 is an odd perm to create a cgroup which is a directory. Use the regular +0755 instead. This is necessary for euid switching test case. + +Reviewed-by: Michal Koutný +Signed-off-by: Tejun Heo +Signed-off-by: Ovidiu Panait +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/cgroup/cgroup_util.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/tools/testing/selftests/cgroup/cgroup_util.c ++++ b/tools/testing/selftests/cgroup/cgroup_util.c +@@ -221,7 +221,7 @@ int cg_find_unified_root(char *root, siz + + int cg_create(const char *cgroup) + { +- return mkdir(cgroup, 0644); ++ return mkdir(cgroup, 0755); + } + + int cg_wait_for_proc_count(const char *cgroup, int count) diff --git a/queue-5.15/selftests-cgroup-test-open-time-cgroup-namespace-usage-for-migration-checks.patch b/queue-5.15/selftests-cgroup-test-open-time-cgroup-namespace-usage-for-migration-checks.patch new file mode 100644 index 00000000000..cb7ebfc0930 --- /dev/null +++ b/queue-5.15/selftests-cgroup-test-open-time-cgroup-namespace-usage-for-migration-checks.patch @@ -0,0 +1,148 @@ +From bf35a7879f1dfb0d050fe779168bcf25c7de66f5 Mon Sep 17 00:00:00 2001 +From: Tejun Heo +Date: Thu, 6 Jan 2022 11:02:29 -1000 +Subject: selftests: cgroup: Test open-time cgroup namespace usage for migration checks +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Tejun Heo + +commit bf35a7879f1dfb0d050fe779168bcf25c7de66f5 upstream. + +When a task is writing to an fd opened by a different task, the perm check +should use the cgroup namespace of the latter task. Add a test for it. + +Tested-by: Michal Koutný +Signed-off-by: Tejun Heo +Signed-off-by: Ovidiu Panait +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/cgroup/test_core.c | 97 +++++++++++++++++++++++++++++ + 1 file changed, 97 insertions(+) + +--- a/tools/testing/selftests/cgroup/test_core.c ++++ b/tools/testing/selftests/cgroup/test_core.c +@@ -1,11 +1,14 @@ + /* SPDX-License-Identifier: GPL-2.0 */ + ++#define _GNU_SOURCE + #include ++#include + #include + #include + #include + #include + #include ++#include + #include + #include + #include +@@ -741,6 +744,99 @@ cleanup: + return ret; + } + ++struct lesser_ns_open_thread_arg { ++ const char *path; ++ int fd; ++ int err; ++}; ++ ++static int lesser_ns_open_thread_fn(void *arg) ++{ ++ struct lesser_ns_open_thread_arg *targ = arg; ++ ++ targ->fd = open(targ->path, O_RDWR); ++ targ->err = errno; ++ return 0; ++} ++ ++/* ++ * cgroup migration permission check should be performed based on the cgroup ++ * namespace at the time of open instead of write. ++ */ ++static int test_cgcore_lesser_ns_open(const char *root) ++{ ++ static char stack[65536]; ++ const uid_t test_euid = 65534; /* usually nobody, any !root is fine */ ++ int ret = KSFT_FAIL; ++ char *cg_test_a = NULL, *cg_test_b = NULL; ++ char *cg_test_a_procs = NULL, *cg_test_b_procs = NULL; ++ int cg_test_b_procs_fd = -1; ++ struct lesser_ns_open_thread_arg targ = { .fd = -1 }; ++ pid_t pid; ++ int status; ++ ++ cg_test_a = cg_name(root, "cg_test_a"); ++ cg_test_b = cg_name(root, "cg_test_b"); ++ ++ if (!cg_test_a || !cg_test_b) ++ goto cleanup; ++ ++ cg_test_a_procs = cg_name(cg_test_a, "cgroup.procs"); ++ cg_test_b_procs = cg_name(cg_test_b, "cgroup.procs"); ++ ++ if (!cg_test_a_procs || !cg_test_b_procs) ++ goto cleanup; ++ ++ if (cg_create(cg_test_a) || cg_create(cg_test_b)) ++ goto cleanup; ++ ++ if (cg_enter_current(cg_test_b)) ++ goto cleanup; ++ ++ if (chown(cg_test_a_procs, test_euid, -1) || ++ chown(cg_test_b_procs, test_euid, -1)) ++ goto cleanup; ++ ++ targ.path = cg_test_b_procs; ++ pid = clone(lesser_ns_open_thread_fn, stack + sizeof(stack), ++ CLONE_NEWCGROUP | CLONE_FILES | CLONE_VM | SIGCHLD, ++ &targ); ++ if (pid < 0) ++ goto cleanup; ++ ++ if (waitpid(pid, &status, 0) < 0) ++ goto cleanup; ++ ++ if (!WIFEXITED(status)) ++ goto cleanup; ++ ++ cg_test_b_procs_fd = targ.fd; ++ if (cg_test_b_procs_fd < 0) ++ goto cleanup; ++ ++ if (cg_enter_current(cg_test_a)) ++ goto cleanup; ++ ++ if ((status = write(cg_test_b_procs_fd, "0", 1)) >= 0 || errno != ENOENT) ++ goto cleanup; ++ ++ ret = KSFT_PASS; ++ ++cleanup: ++ cg_enter_current(root); ++ if (cg_test_b_procs_fd >= 0) ++ close(cg_test_b_procs_fd); ++ if (cg_test_b) ++ cg_destroy(cg_test_b); ++ if (cg_test_a) ++ cg_destroy(cg_test_a); ++ free(cg_test_b_procs); ++ free(cg_test_a_procs); ++ free(cg_test_b); ++ free(cg_test_a); ++ return ret; ++} ++ + #define T(x) { x, #x } + struct corecg_test { + int (*fn)(const char *root); +@@ -757,6 +853,7 @@ struct corecg_test { + T(test_cgcore_thread_migration), + T(test_cgcore_destroy), + T(test_cgcore_lesser_euid_open), ++ T(test_cgcore_lesser_ns_open), + }; + #undef T + diff --git a/queue-5.15/selftests-cgroup-test-open-time-credential-usage-for-migration-checks.patch b/queue-5.15/selftests-cgroup-test-open-time-credential-usage-for-migration-checks.patch new file mode 100644 index 00000000000..52e05470e95 --- /dev/null +++ b/queue-5.15/selftests-cgroup-test-open-time-credential-usage-for-migration-checks.patch @@ -0,0 +1,107 @@ +From 613e040e4dc285367bff0f8f75ea59839bc10947 Mon Sep 17 00:00:00 2001 +From: Tejun Heo +Date: Thu, 6 Jan 2022 11:02:29 -1000 +Subject: selftests: cgroup: Test open-time credential usage for migration checks +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Tejun Heo + +commit 613e040e4dc285367bff0f8f75ea59839bc10947 upstream. + +When a task is writing to an fd opened by a different task, the perm check +should use the credentials of the latter task. Add a test for it. + +Tested-by: Michal Koutný +Signed-off-by: Tejun Heo +Signed-off-by: Ovidiu Panait +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/cgroup/test_core.c | 68 +++++++++++++++++++++++++++++ + 1 file changed, 68 insertions(+) + +--- a/tools/testing/selftests/cgroup/test_core.c ++++ b/tools/testing/selftests/cgroup/test_core.c +@@ -674,6 +674,73 @@ cleanup: + return ret; + } + ++/* ++ * cgroup migration permission check should be performed based on the ++ * credentials at the time of open instead of write. ++ */ ++static int test_cgcore_lesser_euid_open(const char *root) ++{ ++ const uid_t test_euid = 65534; /* usually nobody, any !root is fine */ ++ int ret = KSFT_FAIL; ++ char *cg_test_a = NULL, *cg_test_b = NULL; ++ char *cg_test_a_procs = NULL, *cg_test_b_procs = NULL; ++ int cg_test_b_procs_fd = -1; ++ uid_t saved_uid; ++ ++ cg_test_a = cg_name(root, "cg_test_a"); ++ cg_test_b = cg_name(root, "cg_test_b"); ++ ++ if (!cg_test_a || !cg_test_b) ++ goto cleanup; ++ ++ cg_test_a_procs = cg_name(cg_test_a, "cgroup.procs"); ++ cg_test_b_procs = cg_name(cg_test_b, "cgroup.procs"); ++ ++ if (!cg_test_a_procs || !cg_test_b_procs) ++ goto cleanup; ++ ++ if (cg_create(cg_test_a) || cg_create(cg_test_b)) ++ goto cleanup; ++ ++ if (cg_enter_current(cg_test_a)) ++ goto cleanup; ++ ++ if (chown(cg_test_a_procs, test_euid, -1) || ++ chown(cg_test_b_procs, test_euid, -1)) ++ goto cleanup; ++ ++ saved_uid = geteuid(); ++ if (seteuid(test_euid)) ++ goto cleanup; ++ ++ cg_test_b_procs_fd = open(cg_test_b_procs, O_RDWR); ++ ++ if (seteuid(saved_uid)) ++ goto cleanup; ++ ++ if (cg_test_b_procs_fd < 0) ++ goto cleanup; ++ ++ if (write(cg_test_b_procs_fd, "0", 1) >= 0 || errno != EACCES) ++ goto cleanup; ++ ++ ret = KSFT_PASS; ++ ++cleanup: ++ cg_enter_current(root); ++ if (cg_test_b_procs_fd >= 0) ++ close(cg_test_b_procs_fd); ++ if (cg_test_b) ++ cg_destroy(cg_test_b); ++ if (cg_test_a) ++ cg_destroy(cg_test_a); ++ free(cg_test_b_procs); ++ free(cg_test_a_procs); ++ free(cg_test_b); ++ free(cg_test_a); ++ return ret; ++} ++ + #define T(x) { x, #x } + struct corecg_test { + int (*fn)(const char *root); +@@ -689,6 +756,7 @@ struct corecg_test { + T(test_cgcore_proc_migration), + T(test_cgcore_thread_migration), + T(test_cgcore_destroy), ++ T(test_cgcore_lesser_euid_open), + }; + #undef T + diff --git a/queue-5.15/series b/queue-5.15/series index c6092a08360..c9a792cba11 100644 --- a/queue-5.15/series +++ b/queue-5.15/series @@ -261,3 +261,7 @@ dmaengine-revert-dmaengine-shdma-fix-runtime-pm-imbalance-on-error.patch kvm-avoid-null-pointer-dereference-in-kvm_dirty_ring_push.patch revert-net-mlx5-accept-devlink-user-input-after-driver-initialization-complete.patch ubsan-remove-config_ubsan_object_size.patch +selftests-cgroup-make-cg_create-use-0755-for-permission-instead-of-0644.patch +selftests-cgroup-test-open-time-credential-usage-for-migration-checks.patch +selftests-cgroup-test-open-time-cgroup-namespace-usage-for-migration-checks.patch +mm-don-t-skip-swap-entry-even-if-zap_details-specified.patch