mount: separate the flags accessed only under namespace_sem

author Al Viro <viro@zeniv.linux.org.uk>

Sat, 21 Jun 2025 22:06:19 +0000 (18:06 -0400)

committer Al Viro <viro@zeniv.linux.org.uk>

Sun, 29 Jun 2025 23:03:29 +0000 (19:03 -0400)
author Al Viro <viro@zeniv.linux.org.uk>
Sat, 21 Jun 2025 22:06:19 +0000 (18:06 -0400)
committer Al Viro <viro@zeniv.linux.org.uk>
Sun, 29 Jun 2025 23:03:29 +0000 (19:03 -0400)
diff --git a/Documentation/filesystems/propagate_umount.txt b/Documentation/filesystems/propagate_umount.txt

index 6906903a8aa29f0073882bbf8853ac4d3e6ed40e..c90349e5b889fbc1e16742f1c4d64da8650496c7 100644 (file)
--- a/Documentation/filesystems/propagate_umount.txt
+++ b/Documentation/filesystems/propagate_umount.txt
@@ -453,11 +453,11 @@ original set.
  So let's go for
         * original set ("set").  Linkage via mnt_list
         * undecided candidates ("candidates").  Subset of a list,
-consisting of all its elements marked with a new flag (MNT_UMOUNT_CANDIDATE).
+consisting of all its elements marked with a new flag (T_UMOUNT_CANDIDATE).
  Initially all elements of the list will be marked that way; in the
  end the list will become empty and no mounts will remain marked with
  that flag.
-       * Reuse MNT_MARKED for "has been already seen by trim_ancestors()".
+       * Reuse T_MARKED for "has been already seen by trim_ancestors()".
         * anything in U that hadn't been in the original set - elements of
  candidates will gradually be either discarded or moved there.  In other
  words, it's the candidates we have already decided to unmount. Its role
@@ -465,13 +465,13 @@ is reasonably close to the old "to_umount", so let's use that name.
  Linkage via mnt_list.
  
  For gather_candidates() we'll need to maintain both candidates (S -
-set) and intersection of S with set.  Use MNT_UMOUNT_CANDIDATE for
+set) and intersection of S with set.  Use T_UMOUNT_CANDIDATE for
  all elements we encounter, putting the ones not already in the original
  set into the list of candidates.  When we are done, strip that flag from
  all elements of the original set.  That gives a cheap way to check
  if element belongs to S (in gather_candidates) and to candidates
  itself (at later stages).  Call that predicate is_candidate(); it would
-be m->mnt_flags & MNT_UMOUNT_CANDIDATE.
+be m->mnt_t_flags & T_UMOUNT_CANDIDATE.
  
  All elements of the original set are marked with MNT_UMOUNT and we'll
  need the same for elements added when joining the contents of to_umount
@@ -480,5 +480,5 @@ to to_umount; that's close to what the old 'umount_one' is doing, so
  let's keep that name.  It also gives us another predicate we need -
  "belongs to union of set and to_umount"; will_be_unmounted() for now.
  
-Removals from the candidates list should strip both MNT_MARKED and
-MNT_UMOUNT_CANDIDATE; call it remove_from_candidates_list().
+Removals from the candidates list should strip both T_MARKED and
+T_UMOUNT_CANDIDATE; call it remove_from_candidates_list().
diff --git a/fs/mount.h b/fs/mount.h

index 4355c482a841cebedb3170a8f7f2120f01f2695b..f299dc85446d70117fd943343a9409d0970f8626 100644 (file)
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -84,6 +84,7 @@ struct mount {
         struct list_head to_notify;     /* need to queue notification */
         struct mnt_namespace *prev_ns;  /* previous namespace (NULL if none) */
  #endif
+       int mnt_t_flags;                /* namespace_sem-protected flags */
         int mnt_id;                     /* mount identifier, reused */
         u64 mnt_id_unique;              /* mount ID unique until reboot */
         int mnt_group_id;               /* peer group identifier */
@@ -93,6 +94,22 @@ struct mount {
         struct mount *overmount;        /* mounted on ->mnt_root */
  } __randomize_layout;
  
+enum {
+       T_SHARED                = 1, /* mount is shared */
+       T_UNBINDABLE            = 2, /* mount is unbindable */
+       T_MARKED                = 4, /* internal mark for propagate_... */
+       T_UMOUNT_CANDIDATE      = 8, /* for propagate_umount */
+
+       /*
+        * T_SHARED_MASK is the set of flags that should be cleared when a
+        * mount becomes shared.  Currently, this is only the flag that says a
+        * mount cannot be bind mounted, since this is how we create a mount
+        * that shares events with another mount.  If you add a new T_*
+        * flag, consider how it interacts with shared mounts.
+        */
+       T_SHARED_MASK   = T_UNBINDABLE,
+};
+
  #define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */
  
  static inline struct mount *real_mount(struct vfsmount *mnt)
diff --git a/fs/namespace.c b/fs/namespace.c

index 4bdf6a6e75cab02486897cfc63cea240d00c6953..da27365418a5a6a76961444e2c8b6745f92dba9e 100644 (file)
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2917,10 +2917,8 @@ static int do_change_type(struct path *path, int ms_flags)
                         goto out_unlock;
         }
  
-       lock_mount_hash();
         for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
                 change_mnt_propagation(m, type);
-       unlock_mount_hash();
  
   out_unlock:
         namespace_unlock();
@@ -3409,9 +3407,7 @@ static int do_set_group(struct path *from_path, struct path *to_path)
         if (IS_MNT_SHARED(from)) {
                 to->mnt_group_id = from->mnt_group_id;
                 list_add(&to->mnt_share, &from->mnt_share);
-               lock_mount_hash();
                 set_mnt_shared(to);
-               unlock_mount_hash();
         }
  
         err = 0;
diff --git a/fs/pnode.c b/fs/pnode.c

index 827d71736ac5bdb7b849af5812870bc194107fa3..b997663de6d0b987f54fb690da08b60ba55679ee 100644 (file)
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -112,7 +112,7 @@ static int do_make_slave(struct mount *mnt)
  }
  
  /*
- * vfsmount lock must be held for write
+ * EXCL[namespace_sem]
   */
  void change_mnt_propagation(struct mount *mnt, int type)
  {
@@ -125,9 +125,9 @@ void change_mnt_propagation(struct mount *mnt, int type)
                 list_del_init(&mnt->mnt_slave);
                 mnt->mnt_master = NULL;
                 if (type == MS_UNBINDABLE)
-                       mnt->mnt.mnt_flags |= MNT_UNBINDABLE;
+                       mnt->mnt_t_flags |= T_UNBINDABLE;
                 else
-                       mnt->mnt.mnt_flags &= ~MNT_UNBINDABLE;
+                       mnt->mnt_t_flags &= ~T_UNBINDABLE;
         }
  }
  
@@ -263,9 +263,9 @@ static int propagate_one(struct mount *m, struct mountpoint *dest_mp)
                 return PTR_ERR(child);
         read_seqlock_excl(&mount_lock);
         mnt_set_mountpoint(m, dest_mp, child);
+       read_sequnlock_excl(&mount_lock);
         if (m->mnt_master != dest_master)
                 SET_MNT_MARK(m->mnt_master);
-       read_sequnlock_excl(&mount_lock);
         last_dest = m;
         last_source = child;
         hlist_add_head(&child->mnt_hash, list);
@@ -322,13 +322,11 @@ int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp,
                 } while (n != m);
         }
  out:
-       read_seqlock_excl(&mount_lock);
         hlist_for_each_entry(n, tree_list, mnt_hash) {
                 m = n->mnt_parent;
                 if (m->mnt_master != dest_mnt->mnt_master)
                         CLEAR_MNT_MARK(m->mnt_master);
         }
-       read_sequnlock_excl(&mount_lock);
         return ret;
  }
  
@@ -447,7 +445,7 @@ void propagate_mount_unlock(struct mount *mnt)
  
  static inline bool is_candidate(struct mount *m)
  {
-       return m->mnt.mnt_flags & MNT_UMOUNT_CANDIDATE;
+       return m->mnt_t_flags & T_UMOUNT_CANDIDATE;
  }
  
  static inline bool will_be_unmounted(struct mount *m)
@@ -464,7 +462,7 @@ static void umount_one(struct mount *m, struct list_head *to_umount)
  
  static void remove_from_candidate_list(struct mount *m)
  {
-       m->mnt.mnt_flags &= ~(MNT_MARKED | MNT_UMOUNT_CANDIDATE);
+       m->mnt_t_flags &= ~(T_MARKED | T_UMOUNT_CANDIDATE);
         list_del_init(&m->mnt_list);
  }
  
@@ -476,7 +474,7 @@ static void gather_candidates(struct list_head *set,
         list_for_each_entry(m, set, mnt_list) {
                 if (is_candidate(m))
                         continue;
-               m->mnt.mnt_flags |= MNT_UMOUNT_CANDIDATE;
+               m->mnt_t_flags |= T_UMOUNT_CANDIDATE;
                 p = m->mnt_parent;
                 q = propagation_next(p, p);
                 while (q) {
@@ -494,7 +492,7 @@ static void gather_candidates(struct list_head *set,
                                         q = skip_propagation_subtree(q, p);
                                         continue;
                                 }
-                               child->mnt.mnt_flags |= MNT_UMOUNT_CANDIDATE;
+                               child->mnt_t_flags |= T_UMOUNT_CANDIDATE;
                                 if (!will_be_unmounted(child))
                                         list_add(&child->mnt_list, candidates);
                         }
@@ -502,7 +500,7 @@ static void gather_candidates(struct list_head *set,
                 }
         }
         list_for_each_entry(m, set, mnt_list)
-               m->mnt.mnt_flags &= ~MNT_UMOUNT_CANDIDATE;
+               m->mnt_t_flags &= ~T_UMOUNT_CANDIDATE;
  }
  
  /*
@@ -519,7 +517,7 @@ static void trim_ancestors(struct mount *m)
                         return;
                 SET_MNT_MARK(m);
                 if (m != p->overmount)
-                       p->mnt.mnt_flags &= ~MNT_UMOUNT_CANDIDATE;
+                       p->mnt_t_flags &= ~T_UMOUNT_CANDIDATE;
         }
  }
  
diff --git a/fs/pnode.h b/fs/pnode.h

index 04f1ac53aa49793b86f4f6de8216d2112db5439d..507e30e7a420407f2b4fdbb52ea09da6bcf91256 100644 (file)
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -10,14 +10,14 @@
  #include <linux/list.h>
  #include "mount.h"
  
-#define IS_MNT_SHARED(m) ((m)->mnt.mnt_flags & MNT_SHARED)
+#define IS_MNT_SHARED(m) ((m)->mnt_t_flags & T_SHARED)
  #define IS_MNT_SLAVE(m) ((m)->mnt_master)
  #define IS_MNT_NEW(m) (!(m)->mnt_ns)
-#define CLEAR_MNT_SHARED(m) ((m)->mnt.mnt_flags &= ~MNT_SHARED)
-#define IS_MNT_UNBINDABLE(m) ((m)->mnt.mnt_flags & MNT_UNBINDABLE)
-#define IS_MNT_MARKED(m) ((m)->mnt.mnt_flags & MNT_MARKED)
-#define SET_MNT_MARK(m) ((m)->mnt.mnt_flags |= MNT_MARKED)
-#define CLEAR_MNT_MARK(m) ((m)->mnt.mnt_flags &= ~MNT_MARKED)
+#define CLEAR_MNT_SHARED(m) ((m)->mnt_t_flags &= ~T_SHARED)
+#define IS_MNT_UNBINDABLE(m) ((m)->mnt_t_flags & T_UNBINDABLE)
+#define IS_MNT_MARKED(m) ((m)->mnt_t_flags & T_MARKED)
+#define SET_MNT_MARK(m) ((m)->mnt_t_flags |= T_MARKED)
+#define CLEAR_MNT_MARK(m) ((m)->mnt_t_flags &= ~T_MARKED)
  #define IS_MNT_LOCKED(m) ((m)->mnt.mnt_flags & MNT_LOCKED)
  
  #define CL_EXPIRE              0x01
@@ -28,10 +28,13 @@
  #define CL_SHARED_TO_SLAVE     0x20
  #define CL_COPY_MNT_NS_FILE    0x40
  
+/*
+ * EXCL[namespace_sem]
+ */
  static inline void set_mnt_shared(struct mount *mnt)
  {
-       mnt->mnt.mnt_flags &= ~MNT_SHARED_MASK;
-       mnt->mnt.mnt_flags |= MNT_SHARED;
+       mnt->mnt_t_flags &= ~T_SHARED_MASK;
+       mnt->mnt_t_flags |= T_SHARED;
  }
  
  static inline bool peers(const struct mount *m1, const struct mount *m2)
diff --git a/include/linux/mount.h b/include/linux/mount.h

index 65fa8442c00ac7f2db52514f89f8b71bfb35e0ec..5f9c053b08971a94815ebe8ee23c040a929e2310 100644 (file)
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -35,12 +35,8 @@ enum mount_flags {
         MNT_SHRINKABLE  = 0x100,
         MNT_WRITE_HOLD  = 0x200,
  
-       MNT_SHARED      = 0x1000, /* if the vfsmount is a shared mount */
-       MNT_UNBINDABLE  = 0x2000, /* if the vfsmount is a unbindable mount */
-
         MNT_INTERNAL    = 0x4000,
  
-       MNT_UMOUNT_CANDIDATE    = 0x020000,
         MNT_LOCK_ATIME          = 0x040000,
         MNT_LOCK_NOEXEC         = 0x080000,
         MNT_LOCK_NOSUID         = 0x100000,
@@ -49,25 +45,15 @@ enum mount_flags {
         MNT_LOCKED              = 0x800000,
         MNT_DOOMED              = 0x1000000,
         MNT_SYNC_UMOUNT         = 0x2000000,
-       MNT_MARKED              = 0x4000000,
         MNT_UMOUNT              = 0x8000000,
  
-       /*
-        * MNT_SHARED_MASK is the set of flags that should be cleared when a
-        * mount becomes shared.  Currently, this is only the flag that says a
-        * mount cannot be bind mounted, since this is how we create a mount
-        * that shares events with another mount.  If you add a new MNT_*
-        * flag, consider how it interacts with shared mounts.
-        */
-       MNT_SHARED_MASK = MNT_UNBINDABLE,
         MNT_USER_SETTABLE_MASK  = MNT_NOSUID | MNT_NODEV | MNT_NOEXEC
                                   | MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME
                                   | MNT_READONLY | MNT_NOSYMFOLLOW,
         MNT_ATIME_MASK = MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME,
  
-       MNT_INTERNAL_FLAGS = MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL |
-                            MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED |
-                            MNT_LOCKED | MNT_UMOUNT_CANDIDATE,
+       MNT_INTERNAL_FLAGS = MNT_WRITE_HOLD | MNT_INTERNAL | MNT_DOOMED |
+                            MNT_SYNC_UMOUNT | MNT_LOCKED
  };
  
  struct vfsmount {
author	Al Viro <viro@zeniv.linux.org.uk>
	Sat, 21 Jun 2025 22:06:19 +0000 (18:06 -0400)
committer	Al Viro <viro@zeniv.linux.org.uk>
	Sun, 29 Jun 2025 23:03:29 +0000 (19:03 -0400)
Documentation/filesystems/propagate_umount.txt		patch \| blob \| blame \| history
fs/mount.h		patch \| blob \| blame \| history
fs/namespace.c		patch \| blob \| blame \| history
fs/pnode.c		patch \| blob \| blame \| history
fs/pnode.h		patch \| blob \| blame \| history
include/linux/mount.h		patch \| blob \| blame \| history