From: Greg Kroah-Hartman Date: Fri, 12 Mar 2021 10:15:29 +0000 (+0100) Subject: 5.4-stable patches X-Git-Tag: v4.4.262~118 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=8d906a1a7a6b0d4b130566d6bf38717524237229;p=thirdparty%2Fkernel%2Fstable-queue.git 5.4-stable patches added patches: cifs-return-proper-error-code-in-statfs-2.patch mount-fix-mounting-of-detached-mounts-onto-targets-that-reside-on-shared-mounts.patch revert-mm-slub-consider-rest-of-partial-list-if-acquire_slab-fails.patch --- diff --git a/queue-5.4/cifs-return-proper-error-code-in-statfs-2.patch b/queue-5.4/cifs-return-proper-error-code-in-statfs-2.patch new file mode 100644 index 00000000000..1616675b133 --- /dev/null +++ b/queue-5.4/cifs-return-proper-error-code-in-statfs-2.patch @@ -0,0 +1,35 @@ +From 14302ee3301b3a77b331cc14efb95bf7184c73cc Mon Sep 17 00:00:00 2001 +From: Paulo Alcantara +Date: Mon, 8 Mar 2021 12:00:49 -0300 +Subject: cifs: return proper error code in statfs(2) + +From: Paulo Alcantara + +commit 14302ee3301b3a77b331cc14efb95bf7184c73cc upstream. + +In cifs_statfs(), if server->ops->queryfs is not NULL, then we should +use its return value rather than always returning 0. Instead, use rc +variable as it is properly set to 0 in case there is no +server->ops->queryfs. + +Signed-off-by: Paulo Alcantara (SUSE) +Reviewed-by: Aurelien Aptel +Reviewed-by: Ronnie Sahlberg +CC: +Signed-off-by: Steve French +Signed-off-by: Greg Kroah-Hartman +--- + fs/cifs/cifsfs.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/cifs/cifsfs.c ++++ b/fs/cifs/cifsfs.c +@@ -278,7 +278,7 @@ cifs_statfs(struct dentry *dentry, struc + rc = server->ops->queryfs(xid, tcon, buf); + + free_xid(xid); +- return 0; ++ return rc; + } + + static long cifs_fallocate(struct file *file, int mode, loff_t off, loff_t len) diff --git a/queue-5.4/mount-fix-mounting-of-detached-mounts-onto-targets-that-reside-on-shared-mounts.patch b/queue-5.4/mount-fix-mounting-of-detached-mounts-onto-targets-that-reside-on-shared-mounts.patch new file mode 100644 index 00000000000..85fa4173b6c --- /dev/null +++ b/queue-5.4/mount-fix-mounting-of-detached-mounts-onto-targets-that-reside-on-shared-mounts.patch @@ -0,0 +1,321 @@ +From ee2e3f50629f17b0752b55b2566c15ce8dafb557 Mon Sep 17 00:00:00 2001 +From: Christian Brauner +Date: Sat, 6 Mar 2021 11:10:10 +0100 +Subject: mount: fix mounting of detached mounts onto targets that reside on shared mounts + +From: Christian Brauner + +commit ee2e3f50629f17b0752b55b2566c15ce8dafb557 upstream. + +Creating a series of detached mounts, attaching them to the filesystem, +and unmounting them can be used to trigger an integer overflow in +ns->mounts causing the kernel to block any new mounts in count_mounts() +and returning ENOSPC because it falsely assumes that the maximum number +of mounts in the mount namespace has been reached, i.e. it thinks it +can't fit the new mounts into the mount namespace anymore. + +Depending on the number of mounts in your system, this can be reproduced +on any kernel that supportes open_tree() and move_mount() by compiling +and running the following program: + + /* SPDX-License-Identifier: LGPL-2.1+ */ + + #define _GNU_SOURCE + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + + /* open_tree() */ + #ifndef OPEN_TREE_CLONE + #define OPEN_TREE_CLONE 1 + #endif + + #ifndef OPEN_TREE_CLOEXEC + #define OPEN_TREE_CLOEXEC O_CLOEXEC + #endif + + #ifndef __NR_open_tree + #if defined __alpha__ + #define __NR_open_tree 538 + #elif defined _MIPS_SIM + #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ + #define __NR_open_tree 4428 + #endif + #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ + #define __NR_open_tree 6428 + #endif + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_open_tree 5428 + #endif + #elif defined __ia64__ + #define __NR_open_tree (428 + 1024) + #else + #define __NR_open_tree 428 + #endif + #endif + + /* move_mount() */ + #ifndef MOVE_MOUNT_F_EMPTY_PATH + #define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */ + #endif + + #ifndef __NR_move_mount + #if defined __alpha__ + #define __NR_move_mount 539 + #elif defined _MIPS_SIM + #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ + #define __NR_move_mount 4429 + #endif + #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ + #define __NR_move_mount 6429 + #endif + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_move_mount 5429 + #endif + #elif defined __ia64__ + #define __NR_move_mount (428 + 1024) + #else + #define __NR_move_mount 429 + #endif + #endif + + static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags) + { + return syscall(__NR_open_tree, dfd, filename, flags); + } + + static inline int sys_move_mount(int from_dfd, const char *from_pathname, int to_dfd, + const char *to_pathname, unsigned int flags) + { + return syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd, to_pathname, flags); + } + + static bool is_shared_mountpoint(const char *path) + { + bool shared = false; + FILE *f = NULL; + char *line = NULL; + int i; + size_t len = 0; + + f = fopen("/proc/self/mountinfo", "re"); + if (!f) + return 0; + + while (getline(&line, &len, f) > 0) { + char *slider1, *slider2; + + for (slider1 = line, i = 0; slider1 && i < 4; i++) + slider1 = strchr(slider1 + 1, ' '); + + if (!slider1) + continue; + + slider2 = strchr(slider1 + 1, ' '); + if (!slider2) + continue; + + *slider2 = '\0'; + if (strcmp(slider1 + 1, path) == 0) { + /* This is the path. Is it shared? */ + slider1 = strchr(slider2 + 1, ' '); + if (slider1 && strstr(slider1, "shared:")) { + shared = true; + break; + } + } + } + fclose(f); + free(line); + + return shared; + } + + static void usage(void) + { + const char *text = "mount-new [--recursive] \n"; + fprintf(stderr, "%s", text); + _exit(EXIT_SUCCESS); + } + + #define exit_usage(format, ...) \ + ({ \ + fprintf(stderr, format "\n", ##__VA_ARGS__); \ + usage(); \ + }) + + #define exit_log(format, ...) \ + ({ \ + fprintf(stderr, format "\n", ##__VA_ARGS__); \ + exit(EXIT_FAILURE); \ + }) + + static const struct option longopts[] = { + {"help", no_argument, 0, 'a'}, + { NULL, no_argument, 0, 0 }, + }; + + int main(int argc, char *argv[]) + { + int exit_code = EXIT_SUCCESS, index = 0; + int dfd, fd_tree, new_argc, ret; + char *base_dir; + char *const *new_argv; + char target[PATH_MAX]; + + while ((ret = getopt_long_only(argc, argv, "", longopts, &index)) != -1) { + switch (ret) { + case 'a': + /* fallthrough */ + default: + usage(); + } + } + + new_argv = &argv[optind]; + new_argc = argc - optind; + if (new_argc < 1) + exit_usage("Missing base directory\n"); + base_dir = new_argv[0]; + + if (*base_dir != '/') + exit_log("Please specify an absolute path"); + + /* Ensure that target is a shared mountpoint. */ + if (!is_shared_mountpoint(base_dir)) + exit_log("Please ensure that \"%s\" is a shared mountpoint", base_dir); + + dfd = open(base_dir, O_RDONLY | O_DIRECTORY | O_CLOEXEC); + if (dfd < 0) + exit_log("%m - Failed to open base directory \"%s\"", base_dir); + + ret = mkdirat(dfd, "detached-move-mount", 0755); + if (ret < 0) + exit_log("%m - Failed to create required temporary directories"); + + ret = snprintf(target, sizeof(target), "%s/detached-move-mount", base_dir); + if (ret < 0 || (size_t)ret >= sizeof(target)) + exit_log("%m - Failed to assemble target path"); + + /* + * Having a mount table with 10000 mounts is already quite excessive + * and shoult account even for weird test systems. + */ + for (size_t i = 0; i < 10000; i++) { + fd_tree = sys_open_tree(dfd, "detached-move-mount", + OPEN_TREE_CLONE | + OPEN_TREE_CLOEXEC | + AT_EMPTY_PATH); + if (fd_tree < 0) { + fprintf(stderr, "%m - Failed to open %d(detached-move-mount)", dfd); + exit_code = EXIT_FAILURE; + break; + } + + ret = sys_move_mount(fd_tree, "", dfd, "detached-move-mount", MOVE_MOUNT_F_EMPTY_PATH); + if (ret < 0) { + if (errno == ENOSPC) + fprintf(stderr, "%m - Buggy mount counting"); + else + fprintf(stderr, "%m - Failed to attach mount to %d(detached-move-mount)", dfd); + exit_code = EXIT_FAILURE; + break; + } + close(fd_tree); + + ret = umount2(target, MNT_DETACH); + if (ret < 0) { + fprintf(stderr, "%m - Failed to unmount %s", target); + exit_code = EXIT_FAILURE; + break; + } + } + + (void)unlinkat(dfd, "detached-move-mount", AT_REMOVEDIR); + close(dfd); + + exit(exit_code); + } + +and wait for the kernel to refuse any new mounts by returning ENOSPC. +How many iterations are needed depends on the number of mounts in your +system. Assuming you have something like 50 mounts on a standard system +it should be almost instantaneous. + +The root cause of this is that detached mounts aren't handled correctly +when source and target mount are identical and reside on a shared mount +causing a broken mount tree where the detached source itself is +propagated which propagation prevents for regular bind-mounts and new +mounts. This ultimately leads to a miscalculation of the number of +mounts in the mount namespace. + +Detached mounts created via +open_tree(fd, path, OPEN_TREE_CLONE) +are essentially like an unattached new mount, or an unattached +bind-mount. They can then later on be attached to the filesystem via +move_mount() which calls into attach_recursive_mount(). Part of +attaching it to the filesystem is making sure that mounts get correctly +propagated in case the destination mountpoint is MS_SHARED, i.e. is a +shared mountpoint. This is done by calling into propagate_mnt() which +walks the list of peers calling propagate_one() on each mount in this +list making sure it receives the propagation event. +The propagate_one() functions thereby skips both new mounts and bind +mounts to not propagate them "into themselves". Both are identified by +checking whether the mount is already attached to any mount namespace in +mnt->mnt_ns. The is what the IS_MNT_NEW() helper is responsible for. + +However, detached mounts have an anonymous mount namespace attached to +them stashed in mnt->mnt_ns which means that IS_MNT_NEW() doesn't +realize they need to be skipped causing the mount to propagate "into +itself" breaking the mount table and causing a disconnect between the +number of mounts recorded as being beneath or reachable from the target +mountpoint and the number of mounts actually recorded/counted in +ns->mounts ultimately causing an overflow which in turn prevents any new +mounts via the ENOSPC issue. + +So teach propagation to handle detached mounts by making it aware of +them. I've been tracking this issue down for the last couple of days and +then verifying that the fix is correct by +unmounting everything in my current mount table leaving only /proc and +/sys mounted and running the reproducer above overnight verifying the +number of mounts counted in ns->mounts. With this fix the counts are +correct and the ENOSPC issue can't be reproduced. + +This change will only have an effect on mounts created with the new +mount API since detached mounts cannot be created with the old mount API +so regressions are extremely unlikely. + +Link: https://lore.kernel.org/r/20210306101010.243666-1-christian.brauner@ubuntu.com +Fixes: 2db154b3ea8e ("vfs: syscall: Add move_mount(2) to move mounts around") +Cc: David Howells +Cc: Al Viro +Cc: linux-fsdevel@vger.kernel.org +Cc: +Reviewed-by: Christoph Hellwig +Signed-off-by: Christian Brauner +Signed-off-by: Greg Kroah-Hartman +--- + fs/pnode.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/pnode.h ++++ b/fs/pnode.h +@@ -12,7 +12,7 @@ + + #define IS_MNT_SHARED(m) ((m)->mnt.mnt_flags & MNT_SHARED) + #define IS_MNT_SLAVE(m) ((m)->mnt_master) +-#define IS_MNT_NEW(m) (!(m)->mnt_ns) ++#define IS_MNT_NEW(m) (!(m)->mnt_ns || is_anon_ns((m)->mnt_ns)) + #define CLEAR_MNT_SHARED(m) ((m)->mnt.mnt_flags &= ~MNT_SHARED) + #define IS_MNT_UNBINDABLE(m) ((m)->mnt.mnt_flags & MNT_UNBINDABLE) + #define IS_MNT_MARKED(m) ((m)->mnt.mnt_flags & MNT_MARKED) diff --git a/queue-5.4/revert-mm-slub-consider-rest-of-partial-list-if-acquire_slab-fails.patch b/queue-5.4/revert-mm-slub-consider-rest-of-partial-list-if-acquire_slab-fails.patch new file mode 100644 index 00000000000..95e4e728025 --- /dev/null +++ b/queue-5.4/revert-mm-slub-consider-rest-of-partial-list-if-acquire_slab-fails.patch @@ -0,0 +1,58 @@ +From 9b1ea29bc0d7b94d420f96a0f4121403efc3dd85 Mon Sep 17 00:00:00 2001 +From: Linus Torvalds +Date: Wed, 10 Mar 2021 10:18:04 -0800 +Subject: Revert "mm, slub: consider rest of partial list if acquire_slab() fails" + +From: Linus Torvalds + +commit 9b1ea29bc0d7b94d420f96a0f4121403efc3dd85 upstream. + +This reverts commit 8ff60eb052eeba95cfb3efe16b08c9199f8121cf. + +The kernel test robot reports a huge performance regression due to the +commit, and the reason seems fairly straightforward: when there is +contention on the page list (which is what causes acquire_slab() to +fail), we do _not_ want to just loop and try again, because that will +transfer the contention to the 'n->list_lock' spinlock we hold, and +just make things even worse. + +This is admittedly likely a problem only on big machines - the kernel +test robot report comes from a 96-thread dual socket Intel Xeon Gold +6252 setup, but the regression there really is quite noticeable: + + -47.9% regression of stress-ng.rawpkt.ops_per_sec + +and the commit that was marked as being fixed (7ced37197196: "slub: +Acquire_slab() avoid loop") actually did the loop exit early very +intentionally (the hint being that "avoid loop" part of that commit +message), exactly to avoid this issue. + +The correct thing to do may be to pick some kind of reasonable middle +ground: instead of breaking out of the loop on the very first sign of +contention, or trying over and over and over again, the right thing may +be to re-try _once_, and then give up on the second failure (or pick +your favorite value for "once"..). + +Reported-by: kernel test robot +Link: https://lore.kernel.org/lkml/20210301080404.GF12822@xsang-OptiPlex-9020/ +Cc: Jann Horn +Cc: David Rientjes +Cc: Joonsoo Kim +Acked-by: Christoph Lameter +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + mm/slub.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -1887,7 +1887,7 @@ static void *get_partial_node(struct kme + + t = acquire_slab(s, n, page, object == NULL, &objects); + if (!t) +- continue; /* cmpxchg raced */ ++ break; + + available += objects; + if (!object) { diff --git a/queue-5.4/series b/queue-5.4/series index 091499e66e0..35c414effe9 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -21,3 +21,6 @@ samples-bpf-add-missing-munmap-in-xdpsock.patch ibmvnic-always-store-valid-mac-address.patch mt76-dma-do-not-report-truncated-frames-to-mac80211.patch powerpc-603-fix-protection-of-user-pages-mapped-with-prot_none.patch +mount-fix-mounting-of-detached-mounts-onto-targets-that-reside-on-shared-mounts.patch +cifs-return-proper-error-code-in-statfs-2.patch +revert-mm-slub-consider-rest-of-partial-list-if-acquire_slab-fails.patch