]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.15-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 11 Apr 2022 14:57:28 +0000 (16:57 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 11 Apr 2022 14:57:28 +0000 (16:57 +0200)
added patches:
mm-don-t-skip-swap-entry-even-if-zap_details-specified.patch
selftests-cgroup-make-cg_create-use-0755-for-permission-instead-of-0644.patch
selftests-cgroup-test-open-time-cgroup-namespace-usage-for-migration-checks.patch
selftests-cgroup-test-open-time-credential-usage-for-migration-checks.patch

queue-5.15/mm-don-t-skip-swap-entry-even-if-zap_details-specified.patch [new file with mode: 0644]
queue-5.15/selftests-cgroup-make-cg_create-use-0755-for-permission-instead-of-0644.patch [new file with mode: 0644]
queue-5.15/selftests-cgroup-test-open-time-cgroup-namespace-usage-for-migration-checks.patch [new file with mode: 0644]
queue-5.15/selftests-cgroup-test-open-time-credential-usage-for-migration-checks.patch [new file with mode: 0644]
queue-5.15/series

diff --git a/queue-5.15/mm-don-t-skip-swap-entry-even-if-zap_details-specified.patch b/queue-5.15/mm-don-t-skip-swap-entry-even-if-zap_details-specified.patch
new file mode 100644 (file)
index 0000000..5e17170
--- /dev/null
@@ -0,0 +1,178 @@
+From 5abfd71d936a8aefd9f9ccd299dea7a164a5d455 Mon Sep 17 00:00:00 2001
+From: Peter Xu <peterx@redhat.com>
+Date: Tue, 22 Mar 2022 14:42:15 -0700
+Subject: mm: don't skip swap entry even if zap_details specified
+
+From: Peter Xu <peterx@redhat.com>
+
+commit 5abfd71d936a8aefd9f9ccd299dea7a164a5d455 upstream.
+
+Patch series "mm: Rework zap ptes on swap entries", v5.
+
+Patch 1 should fix a long standing bug for zap_pte_range() on
+zap_details usage.  The risk is we could have some swap entries skipped
+while we should have zapped them.
+
+Migration entries are not the major concern because file backed memory
+always zap in the pattern that "first time without page lock, then
+re-zap with page lock" hence the 2nd zap will always make sure all
+migration entries are already recovered.
+
+However there can be issues with real swap entries got skipped
+errornoously.  There's a reproducer provided in commit message of patch
+1 for that.
+
+Patch 2-4 are cleanups that are based on patch 1.  After the whole
+patchset applied, we should have a very clean view of zap_pte_range().
+
+Only patch 1 needs to be backported to stable if necessary.
+
+This patch (of 4):
+
+The "details" pointer shouldn't be the token to decide whether we should
+skip swap entries.
+
+For example, when the callers specified details->zap_mapping==NULL, it
+means the user wants to zap all the pages (including COWed pages), then
+we need to look into swap entries because there can be private COWed
+pages that was swapped out.
+
+Skipping some swap entries when details is non-NULL may lead to wrongly
+leaving some of the swap entries while we should have zapped them.
+
+A reproducer of the problem:
+
+===8<===
+        #define _GNU_SOURCE         /* See feature_test_macros(7) */
+        #include <stdio.h>
+        #include <assert.h>
+        #include <unistd.h>
+        #include <sys/mman.h>
+        #include <sys/types.h>
+
+        int page_size;
+        int shmem_fd;
+        char *buffer;
+
+        void main(void)
+        {
+                int ret;
+                char val;
+
+                page_size = getpagesize();
+                shmem_fd = memfd_create("test", 0);
+                assert(shmem_fd >= 0);
+
+                ret = ftruncate(shmem_fd, page_size * 2);
+                assert(ret == 0);
+
+                buffer = mmap(NULL, page_size * 2, PROT_READ | PROT_WRITE,
+                                MAP_PRIVATE, shmem_fd, 0);
+                assert(buffer != MAP_FAILED);
+
+                /* Write private page, swap it out */
+                buffer[page_size] = 1;
+                madvise(buffer, page_size * 2, MADV_PAGEOUT);
+
+                /* This should drop private buffer[page_size] already */
+                ret = ftruncate(shmem_fd, page_size);
+                assert(ret == 0);
+                /* Recover the size */
+                ret = ftruncate(shmem_fd, page_size * 2);
+                assert(ret == 0);
+
+                /* Re-read the data, it should be all zero */
+                val = buffer[page_size];
+                if (val == 0)
+                        printf("Good\n");
+                else
+                        printf("BUG\n");
+        }
+===8<===
+
+We don't need to touch up the pmd path, because pmd never had a issue with
+swap entries.  For example, shmem pmd migration will always be split into
+pte level, and same to swapping on anonymous.
+
+Add another helper should_zap_cows() so that we can also check whether we
+should zap private mappings when there's no page pointer specified.
+
+This patch drops that trick, so we handle swap ptes coherently.  Meanwhile
+we should do the same check upon migration entry, hwpoison entry and
+genuine swap entries too.
+
+To be explicit, we should still remember to keep the private entries if
+even_cows==false, and always zap them when even_cows==true.
+
+The issue seems to exist starting from the initial commit of git.
+
+[peterx@redhat.com: comment tweaks]
+  Link: https://lkml.kernel.org/r/20220217060746.71256-2-peterx@redhat.com
+
+Link: https://lkml.kernel.org/r/20220217060746.71256-1-peterx@redhat.com
+Link: https://lkml.kernel.org/r/20220216094810.60572-1-peterx@redhat.com
+Link: https://lkml.kernel.org/r/20220216094810.60572-2-peterx@redhat.com
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Peter Xu <peterx@redhat.com>
+Reviewed-by: John Hubbard <jhubbard@nvidia.com>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: "Kirill A . Shutemov" <kirill@shutemov.name>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/memory.c |   25 +++++++++++++++++++------
+ 1 file changed, 19 insertions(+), 6 deletions(-)
+
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -1301,6 +1301,17 @@ copy_page_range(struct vm_area_struct *d
+       return ret;
+ }
++/* Whether we should zap all COWed (private) pages too */
++static inline bool should_zap_cows(struct zap_details *details)
++{
++      /* By default, zap all pages */
++      if (!details)
++              return true;
++
++      /* Or, we zap COWed pages only if the caller wants to */
++      return !details->check_mapping;
++}
++
+ static unsigned long zap_pte_range(struct mmu_gather *tlb,
+                               struct vm_area_struct *vma, pmd_t *pmd,
+                               unsigned long addr, unsigned long end,
+@@ -1396,16 +1407,18 @@ again:
+                       continue;
+               }
+-              /* If details->check_mapping, we leave swap entries. */
+-              if (unlikely(details))
+-                      continue;
+-
+-              if (!non_swap_entry(entry))
++              if (!non_swap_entry(entry)) {
++                      /* Genuine swap entry, hence a private anon page */
++                      if (!should_zap_cows(details))
++                              continue;
+                       rss[MM_SWAPENTS]--;
+-              else if (is_migration_entry(entry)) {
++              } else if (is_migration_entry(entry)) {
+                       struct page *page;
+                       page = pfn_swap_entry_to_page(entry);
++                      if (details && details->check_mapping &&
++                          details->check_mapping != page_rmapping(page))
++                              continue;
+                       rss[mm_counter(page)]--;
+               }
+               if (unlikely(!free_swap_and_cache(entry)))
diff --git a/queue-5.15/selftests-cgroup-make-cg_create-use-0755-for-permission-instead-of-0644.patch b/queue-5.15/selftests-cgroup-make-cg_create-use-0755-for-permission-instead-of-0644.patch
new file mode 100644 (file)
index 0000000..2f7aec9
--- /dev/null
@@ -0,0 +1,34 @@
+From b09c2baa56347ae65795350dfcc633dedb1c2970 Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Thu, 6 Jan 2022 11:02:29 -1000
+Subject: selftests: cgroup: Make cg_create() use 0755 for permission instead of 0644
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Tejun Heo <tj@kernel.org>
+
+commit b09c2baa56347ae65795350dfcc633dedb1c2970 upstream.
+
+0644 is an odd perm to create a cgroup which is a directory. Use the regular
+0755 instead. This is necessary for euid switching test case.
+
+Reviewed-by: Michal Koutný <mkoutny@suse.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Ovidiu Panait <ovidiu.panait@windriver.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/cgroup/cgroup_util.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/tools/testing/selftests/cgroup/cgroup_util.c
++++ b/tools/testing/selftests/cgroup/cgroup_util.c
+@@ -221,7 +221,7 @@ int cg_find_unified_root(char *root, siz
+ int cg_create(const char *cgroup)
+ {
+-      return mkdir(cgroup, 0644);
++      return mkdir(cgroup, 0755);
+ }
+ int cg_wait_for_proc_count(const char *cgroup, int count)
diff --git a/queue-5.15/selftests-cgroup-test-open-time-cgroup-namespace-usage-for-migration-checks.patch b/queue-5.15/selftests-cgroup-test-open-time-cgroup-namespace-usage-for-migration-checks.patch
new file mode 100644 (file)
index 0000000..cb7ebfc
--- /dev/null
@@ -0,0 +1,148 @@
+From bf35a7879f1dfb0d050fe779168bcf25c7de66f5 Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Thu, 6 Jan 2022 11:02:29 -1000
+Subject: selftests: cgroup: Test open-time cgroup namespace usage for migration checks
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Tejun Heo <tj@kernel.org>
+
+commit bf35a7879f1dfb0d050fe779168bcf25c7de66f5 upstream.
+
+When a task is writing to an fd opened by a different task, the perm check
+should use the cgroup namespace of the latter task. Add a test for it.
+
+Tested-by: Michal Koutný <mkoutny@suse.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Ovidiu Panait <ovidiu.panait@windriver.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/cgroup/test_core.c |   97 +++++++++++++++++++++++++++++
+ 1 file changed, 97 insertions(+)
+
+--- a/tools/testing/selftests/cgroup/test_core.c
++++ b/tools/testing/selftests/cgroup/test_core.c
+@@ -1,11 +1,14 @@
+ /* SPDX-License-Identifier: GPL-2.0 */
++#define _GNU_SOURCE
+ #include <linux/limits.h>
++#include <linux/sched.h>
+ #include <sys/types.h>
+ #include <sys/mman.h>
+ #include <sys/wait.h>
+ #include <unistd.h>
+ #include <fcntl.h>
++#include <sched.h>
+ #include <stdio.h>
+ #include <errno.h>
+ #include <signal.h>
+@@ -741,6 +744,99 @@ cleanup:
+       return ret;
+ }
++struct lesser_ns_open_thread_arg {
++      const char      *path;
++      int             fd;
++      int             err;
++};
++
++static int lesser_ns_open_thread_fn(void *arg)
++{
++      struct lesser_ns_open_thread_arg *targ = arg;
++
++      targ->fd = open(targ->path, O_RDWR);
++      targ->err = errno;
++      return 0;
++}
++
++/*
++ * cgroup migration permission check should be performed based on the cgroup
++ * namespace at the time of open instead of write.
++ */
++static int test_cgcore_lesser_ns_open(const char *root)
++{
++      static char stack[65536];
++      const uid_t test_euid = 65534;  /* usually nobody, any !root is fine */
++      int ret = KSFT_FAIL;
++      char *cg_test_a = NULL, *cg_test_b = NULL;
++      char *cg_test_a_procs = NULL, *cg_test_b_procs = NULL;
++      int cg_test_b_procs_fd = -1;
++      struct lesser_ns_open_thread_arg targ = { .fd = -1 };
++      pid_t pid;
++      int status;
++
++      cg_test_a = cg_name(root, "cg_test_a");
++      cg_test_b = cg_name(root, "cg_test_b");
++
++      if (!cg_test_a || !cg_test_b)
++              goto cleanup;
++
++      cg_test_a_procs = cg_name(cg_test_a, "cgroup.procs");
++      cg_test_b_procs = cg_name(cg_test_b, "cgroup.procs");
++
++      if (!cg_test_a_procs || !cg_test_b_procs)
++              goto cleanup;
++
++      if (cg_create(cg_test_a) || cg_create(cg_test_b))
++              goto cleanup;
++
++      if (cg_enter_current(cg_test_b))
++              goto cleanup;
++
++      if (chown(cg_test_a_procs, test_euid, -1) ||
++          chown(cg_test_b_procs, test_euid, -1))
++              goto cleanup;
++
++      targ.path = cg_test_b_procs;
++      pid = clone(lesser_ns_open_thread_fn, stack + sizeof(stack),
++                  CLONE_NEWCGROUP | CLONE_FILES | CLONE_VM | SIGCHLD,
++                  &targ);
++      if (pid < 0)
++              goto cleanup;
++
++      if (waitpid(pid, &status, 0) < 0)
++              goto cleanup;
++
++      if (!WIFEXITED(status))
++              goto cleanup;
++
++      cg_test_b_procs_fd = targ.fd;
++      if (cg_test_b_procs_fd < 0)
++              goto cleanup;
++
++      if (cg_enter_current(cg_test_a))
++              goto cleanup;
++
++      if ((status = write(cg_test_b_procs_fd, "0", 1)) >= 0 || errno != ENOENT)
++              goto cleanup;
++
++      ret = KSFT_PASS;
++
++cleanup:
++      cg_enter_current(root);
++      if (cg_test_b_procs_fd >= 0)
++              close(cg_test_b_procs_fd);
++      if (cg_test_b)
++              cg_destroy(cg_test_b);
++      if (cg_test_a)
++              cg_destroy(cg_test_a);
++      free(cg_test_b_procs);
++      free(cg_test_a_procs);
++      free(cg_test_b);
++      free(cg_test_a);
++      return ret;
++}
++
+ #define T(x) { x, #x }
+ struct corecg_test {
+       int (*fn)(const char *root);
+@@ -757,6 +853,7 @@ struct corecg_test {
+       T(test_cgcore_thread_migration),
+       T(test_cgcore_destroy),
+       T(test_cgcore_lesser_euid_open),
++      T(test_cgcore_lesser_ns_open),
+ };
+ #undef T
diff --git a/queue-5.15/selftests-cgroup-test-open-time-credential-usage-for-migration-checks.patch b/queue-5.15/selftests-cgroup-test-open-time-credential-usage-for-migration-checks.patch
new file mode 100644 (file)
index 0000000..52e0547
--- /dev/null
@@ -0,0 +1,107 @@
+From 613e040e4dc285367bff0f8f75ea59839bc10947 Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Thu, 6 Jan 2022 11:02:29 -1000
+Subject: selftests: cgroup: Test open-time credential usage for migration checks
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Tejun Heo <tj@kernel.org>
+
+commit 613e040e4dc285367bff0f8f75ea59839bc10947 upstream.
+
+When a task is writing to an fd opened by a different task, the perm check
+should use the credentials of the latter task. Add a test for it.
+
+Tested-by: Michal Koutný <mkoutny@suse.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Ovidiu Panait <ovidiu.panait@windriver.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/cgroup/test_core.c |   68 +++++++++++++++++++++++++++++
+ 1 file changed, 68 insertions(+)
+
+--- a/tools/testing/selftests/cgroup/test_core.c
++++ b/tools/testing/selftests/cgroup/test_core.c
+@@ -674,6 +674,73 @@ cleanup:
+       return ret;
+ }
++/*
++ * cgroup migration permission check should be performed based on the
++ * credentials at the time of open instead of write.
++ */
++static int test_cgcore_lesser_euid_open(const char *root)
++{
++      const uid_t test_euid = 65534;  /* usually nobody, any !root is fine */
++      int ret = KSFT_FAIL;
++      char *cg_test_a = NULL, *cg_test_b = NULL;
++      char *cg_test_a_procs = NULL, *cg_test_b_procs = NULL;
++      int cg_test_b_procs_fd = -1;
++      uid_t saved_uid;
++
++      cg_test_a = cg_name(root, "cg_test_a");
++      cg_test_b = cg_name(root, "cg_test_b");
++
++      if (!cg_test_a || !cg_test_b)
++              goto cleanup;
++
++      cg_test_a_procs = cg_name(cg_test_a, "cgroup.procs");
++      cg_test_b_procs = cg_name(cg_test_b, "cgroup.procs");
++
++      if (!cg_test_a_procs || !cg_test_b_procs)
++              goto cleanup;
++
++      if (cg_create(cg_test_a) || cg_create(cg_test_b))
++              goto cleanup;
++
++      if (cg_enter_current(cg_test_a))
++              goto cleanup;
++
++      if (chown(cg_test_a_procs, test_euid, -1) ||
++          chown(cg_test_b_procs, test_euid, -1))
++              goto cleanup;
++
++      saved_uid = geteuid();
++      if (seteuid(test_euid))
++              goto cleanup;
++
++      cg_test_b_procs_fd = open(cg_test_b_procs, O_RDWR);
++
++      if (seteuid(saved_uid))
++              goto cleanup;
++
++      if (cg_test_b_procs_fd < 0)
++              goto cleanup;
++
++      if (write(cg_test_b_procs_fd, "0", 1) >= 0 || errno != EACCES)
++              goto cleanup;
++
++      ret = KSFT_PASS;
++
++cleanup:
++      cg_enter_current(root);
++      if (cg_test_b_procs_fd >= 0)
++              close(cg_test_b_procs_fd);
++      if (cg_test_b)
++              cg_destroy(cg_test_b);
++      if (cg_test_a)
++              cg_destroy(cg_test_a);
++      free(cg_test_b_procs);
++      free(cg_test_a_procs);
++      free(cg_test_b);
++      free(cg_test_a);
++      return ret;
++}
++
+ #define T(x) { x, #x }
+ struct corecg_test {
+       int (*fn)(const char *root);
+@@ -689,6 +756,7 @@ struct corecg_test {
+       T(test_cgcore_proc_migration),
+       T(test_cgcore_thread_migration),
+       T(test_cgcore_destroy),
++      T(test_cgcore_lesser_euid_open),
+ };
+ #undef T
index c6092a08360100c728cab0bef383cc81d4de5dfd..c9a792cba118a4ef55de2a889ec24c2db23d2087 100644 (file)
@@ -261,3 +261,7 @@ dmaengine-revert-dmaengine-shdma-fix-runtime-pm-imbalance-on-error.patch
 kvm-avoid-null-pointer-dereference-in-kvm_dirty_ring_push.patch
 revert-net-mlx5-accept-devlink-user-input-after-driver-initialization-complete.patch
 ubsan-remove-config_ubsan_object_size.patch
+selftests-cgroup-make-cg_create-use-0755-for-permission-instead-of-0644.patch
+selftests-cgroup-test-open-time-credential-usage-for-migration-checks.patch
+selftests-cgroup-test-open-time-cgroup-namespace-usage-for-migration-checks.patch
+mm-don-t-skip-swap-entry-even-if-zap_details-specified.patch