From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 12 Mar 2021 10:15:29 +0000 (+0100)
Subject: 5.4-stable patches
X-Git-Tag: v4.4.262~118
X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=8d906a1a7a6b0d4b130566d6bf38717524237229;p=thirdparty%2Fkernel%2Fstable-queue.git

5.4-stable patches

added patches:
	cifs-return-proper-error-code-in-statfs-2.patch
	mount-fix-mounting-of-detached-mounts-onto-targets-that-reside-on-shared-mounts.patch
	revert-mm-slub-consider-rest-of-partial-list-if-acquire_slab-fails.patch
---

diff --git a/queue-5.4/cifs-return-proper-error-code-in-statfs-2.patch b/queue-5.4/cifs-return-proper-error-code-in-statfs-2.patch
new file mode 100644
index 00000000000..1616675b133
--- /dev/null
+++ b/queue-5.4/cifs-return-proper-error-code-in-statfs-2.patch
@@ -0,0 +1,35 @@
+From 14302ee3301b3a77b331cc14efb95bf7184c73cc Mon Sep 17 00:00:00 2001
+From: Paulo Alcantara <pc@cjr.nz>
+Date: Mon, 8 Mar 2021 12:00:49 -0300
+Subject: cifs: return proper error code in statfs(2)
+
+From: Paulo Alcantara <pc@cjr.nz>
+
+commit 14302ee3301b3a77b331cc14efb95bf7184c73cc upstream.
+
+In cifs_statfs(), if server->ops->queryfs is not NULL, then we should
+use its return value rather than always returning 0.  Instead, use rc
+variable as it is properly set to 0 in case there is no
+server->ops->queryfs.
+
+Signed-off-by: Paulo Alcantara (SUSE) <pc@cjr.nz>
+Reviewed-by: Aurelien Aptel <aaptel@suse.com>
+Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
+CC: <stable@vger.kernel.org>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/cifs/cifsfs.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/cifs/cifsfs.c
++++ b/fs/cifs/cifsfs.c
+@@ -278,7 +278,7 @@ cifs_statfs(struct dentry *dentry, struc
+ 		rc = server->ops->queryfs(xid, tcon, buf);
+ 
+ 	free_xid(xid);
+-	return 0;
++	return rc;
+ }
+ 
+ static long cifs_fallocate(struct file *file, int mode, loff_t off, loff_t len)
diff --git a/queue-5.4/mount-fix-mounting-of-detached-mounts-onto-targets-that-reside-on-shared-mounts.patch b/queue-5.4/mount-fix-mounting-of-detached-mounts-onto-targets-that-reside-on-shared-mounts.patch
new file mode 100644
index 00000000000..85fa4173b6c
--- /dev/null
+++ b/queue-5.4/mount-fix-mounting-of-detached-mounts-onto-targets-that-reside-on-shared-mounts.patch
@@ -0,0 +1,321 @@
+From ee2e3f50629f17b0752b55b2566c15ce8dafb557 Mon Sep 17 00:00:00 2001
+From: Christian Brauner <christian.brauner@ubuntu.com>
+Date: Sat, 6 Mar 2021 11:10:10 +0100
+Subject: mount: fix mounting of detached mounts onto targets that reside on shared mounts
+
+From: Christian Brauner <christian.brauner@ubuntu.com>
+
+commit ee2e3f50629f17b0752b55b2566c15ce8dafb557 upstream.
+
+Creating a series of detached mounts, attaching them to the filesystem,
+and unmounting them can be used to trigger an integer overflow in
+ns->mounts causing the kernel to block any new mounts in count_mounts()
+and returning ENOSPC because it falsely assumes that the maximum number
+of mounts in the mount namespace has been reached, i.e. it thinks it
+can't fit the new mounts into the mount namespace anymore.
+
+Depending on the number of mounts in your system, this can be reproduced
+on any kernel that supportes open_tree() and move_mount() by compiling
+and running the following program:
+
+  /* SPDX-License-Identifier: LGPL-2.1+ */
+
+  #define _GNU_SOURCE
+  #include <errno.h>
+  #include <fcntl.h>
+  #include <getopt.h>
+  #include <limits.h>
+  #include <stdbool.h>
+  #include <stdio.h>
+  #include <stdlib.h>
+  #include <string.h>
+  #include <sys/mount.h>
+  #include <sys/stat.h>
+  #include <sys/syscall.h>
+  #include <sys/types.h>
+  #include <unistd.h>
+
+  /* open_tree() */
+  #ifndef OPEN_TREE_CLONE
+  #define OPEN_TREE_CLONE 1
+  #endif
+
+  #ifndef OPEN_TREE_CLOEXEC
+  #define OPEN_TREE_CLOEXEC O_CLOEXEC
+  #endif
+
+  #ifndef __NR_open_tree
+          #if defined __alpha__
+                  #define __NR_open_tree 538
+          #elif defined _MIPS_SIM
+                  #if _MIPS_SIM == _MIPS_SIM_ABI32        /* o32 */
+                          #define __NR_open_tree 4428
+                  #endif
+                  #if _MIPS_SIM == _MIPS_SIM_NABI32       /* n32 */
+                          #define __NR_open_tree 6428
+                  #endif
+                  #if _MIPS_SIM == _MIPS_SIM_ABI64        /* n64 */
+                          #define __NR_open_tree 5428
+                  #endif
+          #elif defined __ia64__
+                  #define __NR_open_tree (428 + 1024)
+          #else
+                  #define __NR_open_tree 428
+          #endif
+  #endif
+
+  /* move_mount() */
+  #ifndef MOVE_MOUNT_F_EMPTY_PATH
+  #define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */
+  #endif
+
+  #ifndef __NR_move_mount
+          #if defined __alpha__
+                  #define __NR_move_mount 539
+          #elif defined _MIPS_SIM
+                  #if _MIPS_SIM == _MIPS_SIM_ABI32        /* o32 */
+                          #define __NR_move_mount 4429
+                  #endif
+                  #if _MIPS_SIM == _MIPS_SIM_NABI32       /* n32 */
+                          #define __NR_move_mount 6429
+                  #endif
+                  #if _MIPS_SIM == _MIPS_SIM_ABI64        /* n64 */
+                          #define __NR_move_mount 5429
+                  #endif
+          #elif defined __ia64__
+                  #define __NR_move_mount (428 + 1024)
+          #else
+                  #define __NR_move_mount 429
+          #endif
+  #endif
+
+  static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags)
+  {
+          return syscall(__NR_open_tree, dfd, filename, flags);
+  }
+
+  static inline int sys_move_mount(int from_dfd, const char *from_pathname, int to_dfd,
+                                   const char *to_pathname, unsigned int flags)
+  {
+          return syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd, to_pathname, flags);
+  }
+
+  static bool is_shared_mountpoint(const char *path)
+  {
+          bool shared = false;
+          FILE *f = NULL;
+          char *line = NULL;
+          int i;
+          size_t len = 0;
+
+          f = fopen("/proc/self/mountinfo", "re");
+          if (!f)
+                  return 0;
+
+          while (getline(&line, &len, f) > 0) {
+                  char *slider1, *slider2;
+
+                  for (slider1 = line, i = 0; slider1 && i < 4; i++)
+                          slider1 = strchr(slider1 + 1, ' ');
+
+                  if (!slider1)
+                          continue;
+
+                  slider2 = strchr(slider1 + 1, ' ');
+                  if (!slider2)
+                          continue;
+
+                  *slider2 = '\0';
+                  if (strcmp(slider1 + 1, path) == 0) {
+                          /* This is the path. Is it shared? */
+                          slider1 = strchr(slider2 + 1, ' ');
+                          if (slider1 && strstr(slider1, "shared:")) {
+                                  shared = true;
+                                  break;
+                          }
+                  }
+          }
+          fclose(f);
+          free(line);
+
+          return shared;
+  }
+
+  static void usage(void)
+  {
+          const char *text = "mount-new [--recursive] <base-dir>\n";
+          fprintf(stderr, "%s", text);
+          _exit(EXIT_SUCCESS);
+  }
+
+  #define exit_usage(format, ...)                              \
+          ({                                                   \
+                  fprintf(stderr, format "\n", ##__VA_ARGS__); \
+                  usage();                                     \
+          })
+
+  #define exit_log(format, ...)                                \
+          ({                                                   \
+                  fprintf(stderr, format "\n", ##__VA_ARGS__); \
+                  exit(EXIT_FAILURE);                          \
+          })
+
+  static const struct option longopts[] = {
+          {"help",        no_argument,            0,      'a'},
+          { NULL,         no_argument,            0,       0 },
+  };
+
+  int main(int argc, char *argv[])
+  {
+          int exit_code = EXIT_SUCCESS, index = 0;
+          int dfd, fd_tree, new_argc, ret;
+          char *base_dir;
+          char *const *new_argv;
+          char target[PATH_MAX];
+
+          while ((ret = getopt_long_only(argc, argv, "", longopts, &index)) != -1) {
+                  switch (ret) {
+                  case 'a':
+                          /* fallthrough */
+                  default:
+                          usage();
+                  }
+          }
+
+          new_argv = &argv[optind];
+          new_argc = argc - optind;
+          if (new_argc < 1)
+                  exit_usage("Missing base directory\n");
+          base_dir = new_argv[0];
+
+          if (*base_dir != '/')
+                  exit_log("Please specify an absolute path");
+
+          /* Ensure that target is a shared mountpoint. */
+          if (!is_shared_mountpoint(base_dir))
+                  exit_log("Please ensure that \"%s\" is a shared mountpoint", base_dir);
+
+          dfd = open(base_dir, O_RDONLY | O_DIRECTORY | O_CLOEXEC);
+          if (dfd < 0)
+                  exit_log("%m - Failed to open base directory \"%s\"", base_dir);
+
+          ret = mkdirat(dfd, "detached-move-mount", 0755);
+          if (ret < 0)
+                  exit_log("%m - Failed to create required temporary directories");
+
+          ret = snprintf(target, sizeof(target), "%s/detached-move-mount", base_dir);
+          if (ret < 0 || (size_t)ret >= sizeof(target))
+                  exit_log("%m - Failed to assemble target path");
+
+          /*
+           * Having a mount table with 10000 mounts is already quite excessive
+           * and shoult account even for weird test systems.
+           */
+          for (size_t i = 0; i < 10000; i++) {
+                  fd_tree = sys_open_tree(dfd, "detached-move-mount",
+                                          OPEN_TREE_CLONE |
+                                          OPEN_TREE_CLOEXEC |
+                                          AT_EMPTY_PATH);
+                  if (fd_tree < 0) {
+                          fprintf(stderr, "%m - Failed to open %d(detached-move-mount)", dfd);
+                          exit_code = EXIT_FAILURE;
+                          break;
+                  }
+
+                  ret = sys_move_mount(fd_tree, "", dfd, "detached-move-mount", MOVE_MOUNT_F_EMPTY_PATH);
+                  if (ret < 0) {
+                          if (errno == ENOSPC)
+                                  fprintf(stderr, "%m - Buggy mount counting");
+                          else
+                                  fprintf(stderr, "%m - Failed to attach mount to %d(detached-move-mount)", dfd);
+                          exit_code = EXIT_FAILURE;
+                          break;
+                  }
+                  close(fd_tree);
+
+                  ret = umount2(target, MNT_DETACH);
+                  if (ret < 0) {
+                          fprintf(stderr, "%m - Failed to unmount %s", target);
+                          exit_code = EXIT_FAILURE;
+                          break;
+                  }
+          }
+
+          (void)unlinkat(dfd, "detached-move-mount", AT_REMOVEDIR);
+          close(dfd);
+
+          exit(exit_code);
+  }
+
+and wait for the kernel to refuse any new mounts by returning ENOSPC.
+How many iterations are needed depends on the number of mounts in your
+system. Assuming you have something like 50 mounts on a standard system
+it should be almost instantaneous.
+
+The root cause of this is that detached mounts aren't handled correctly
+when source and target mount are identical and reside on a shared mount
+causing a broken mount tree where the detached source itself is
+propagated which propagation prevents for regular bind-mounts and new
+mounts. This ultimately leads to a miscalculation of the number of
+mounts in the mount namespace.
+
+Detached mounts created via
+open_tree(fd, path, OPEN_TREE_CLONE)
+are essentially like an unattached new mount, or an unattached
+bind-mount. They can then later on be attached to the filesystem via
+move_mount() which calls into attach_recursive_mount(). Part of
+attaching it to the filesystem is making sure that mounts get correctly
+propagated in case the destination mountpoint is MS_SHARED, i.e. is a
+shared mountpoint. This is done by calling into propagate_mnt() which
+walks the list of peers calling propagate_one() on each mount in this
+list making sure it receives the propagation event.
+The propagate_one() functions thereby skips both new mounts and bind
+mounts to not propagate them "into themselves". Both are identified by
+checking whether the mount is already attached to any mount namespace in
+mnt->mnt_ns. The is what the IS_MNT_NEW() helper is responsible for.
+
+However, detached mounts have an anonymous mount namespace attached to
+them stashed in mnt->mnt_ns which means that IS_MNT_NEW() doesn't
+realize they need to be skipped causing the mount to propagate "into
+itself" breaking the mount table and causing a disconnect between the
+number of mounts recorded as being beneath or reachable from the target
+mountpoint and the number of mounts actually recorded/counted in
+ns->mounts ultimately causing an overflow which in turn prevents any new
+mounts via the ENOSPC issue.
+
+So teach propagation to handle detached mounts by making it aware of
+them. I've been tracking this issue down for the last couple of days and
+then verifying that the fix is correct by
+unmounting everything in my current mount table leaving only /proc and
+/sys mounted and running the reproducer above overnight verifying the
+number of mounts counted in ns->mounts. With this fix the counts are
+correct and the ENOSPC issue can't be reproduced.
+
+This change will only have an effect on mounts created with the new
+mount API since detached mounts cannot be created with the old mount API
+so regressions are extremely unlikely.
+
+Link: https://lore.kernel.org/r/20210306101010.243666-1-christian.brauner@ubuntu.com
+Fixes: 2db154b3ea8e ("vfs: syscall: Add move_mount(2) to move mounts around")
+Cc: David Howells <dhowells@redhat.com>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: linux-fsdevel@vger.kernel.org
+Cc: <stable@vger.kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/pnode.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/pnode.h
++++ b/fs/pnode.h
+@@ -12,7 +12,7 @@
+ 
+ #define IS_MNT_SHARED(m) ((m)->mnt.mnt_flags & MNT_SHARED)
+ #define IS_MNT_SLAVE(m) ((m)->mnt_master)
+-#define IS_MNT_NEW(m)  (!(m)->mnt_ns)
++#define IS_MNT_NEW(m)  (!(m)->mnt_ns || is_anon_ns((m)->mnt_ns))
+ #define CLEAR_MNT_SHARED(m) ((m)->mnt.mnt_flags &= ~MNT_SHARED)
+ #define IS_MNT_UNBINDABLE(m) ((m)->mnt.mnt_flags & MNT_UNBINDABLE)
+ #define IS_MNT_MARKED(m) ((m)->mnt.mnt_flags & MNT_MARKED)
diff --git a/queue-5.4/revert-mm-slub-consider-rest-of-partial-list-if-acquire_slab-fails.patch b/queue-5.4/revert-mm-slub-consider-rest-of-partial-list-if-acquire_slab-fails.patch
new file mode 100644
index 00000000000..95e4e728025
--- /dev/null
+++ b/queue-5.4/revert-mm-slub-consider-rest-of-partial-list-if-acquire_slab-fails.patch
@@ -0,0 +1,58 @@
+From 9b1ea29bc0d7b94d420f96a0f4121403efc3dd85 Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Wed, 10 Mar 2021 10:18:04 -0800
+Subject: Revert "mm, slub: consider rest of partial list if acquire_slab() fails"
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 9b1ea29bc0d7b94d420f96a0f4121403efc3dd85 upstream.
+
+This reverts commit 8ff60eb052eeba95cfb3efe16b08c9199f8121cf.
+
+The kernel test robot reports a huge performance regression due to the
+commit, and the reason seems fairly straightforward: when there is
+contention on the page list (which is what causes acquire_slab() to
+fail), we do _not_ want to just loop and try again, because that will
+transfer the contention to the 'n->list_lock' spinlock we hold, and
+just make things even worse.
+
+This is admittedly likely a problem only on big machines - the kernel
+test robot report comes from a 96-thread dual socket Intel Xeon Gold
+6252 setup, but the regression there really is quite noticeable:
+
+   -47.9% regression of stress-ng.rawpkt.ops_per_sec
+
+and the commit that was marked as being fixed (7ced37197196: "slub:
+Acquire_slab() avoid loop") actually did the loop exit early very
+intentionally (the hint being that "avoid loop" part of that commit
+message), exactly to avoid this issue.
+
+The correct thing to do may be to pick some kind of reasonable middle
+ground: instead of breaking out of the loop on the very first sign of
+contention, or trying over and over and over again, the right thing may
+be to re-try _once_, and then give up on the second failure (or pick
+your favorite value for "once"..).
+
+Reported-by: kernel test robot <oliver.sang@intel.com>
+Link: https://lore.kernel.org/lkml/20210301080404.GF12822@xsang-OptiPlex-9020/
+Cc: Jann Horn <jannh@google.com>
+Cc: David Rientjes <rientjes@google.com>
+Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Acked-by: Christoph Lameter <cl@linux.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/slub.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -1887,7 +1887,7 @@ static void *get_partial_node(struct kme
+ 
+ 		t = acquire_slab(s, n, page, object == NULL, &objects);
+ 		if (!t)
+-			continue; /* cmpxchg raced */
++			break;
+ 
+ 		available += objects;
+ 		if (!object) {
diff --git a/queue-5.4/series b/queue-5.4/series
index 091499e66e0..35c414effe9 100644
--- a/queue-5.4/series
+++ b/queue-5.4/series
@@ -21,3 +21,6 @@ samples-bpf-add-missing-munmap-in-xdpsock.patch
 ibmvnic-always-store-valid-mac-address.patch
 mt76-dma-do-not-report-truncated-frames-to-mac80211.patch
 powerpc-603-fix-protection-of-user-pages-mapped-with-prot_none.patch
+mount-fix-mounting-of-detached-mounts-onto-targets-that-reside-on-shared-mounts.patch
+cifs-return-proper-error-code-in-statfs-2.patch
+revert-mm-slub-consider-rest-of-partial-list-if-acquire_slab-fails.patch