struct mount: relocate MNT_WRITE_HOLD bit

author Al Viro <viro@zeniv.linux.org.uk>

Wed, 27 Aug 2025 17:37:12 +0000 (13:37 -0400)

committer Al Viro <viro@zeniv.linux.org.uk>

Wed, 17 Sep 2025 19:58:29 +0000 (15:58 -0400)
author Al Viro <viro@zeniv.linux.org.uk>
Wed, 27 Aug 2025 17:37:12 +0000 (13:37 -0400)
committer Al Viro <viro@zeniv.linux.org.uk>
Wed, 17 Sep 2025 19:58:29 +0000 (15:58 -0400)
diff --git a/fs/mount.h b/fs/mount.h

index b208f69f69d79eb62bf80270a72b5de70e03b6fd..40cf16544317fdb1398963d6cffa6316ab0ae380 100644 (file)
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -66,7 +66,8 @@ struct mount {
         struct list_head mnt_child;     /* and going through their mnt_child */
         struct mount *mnt_next_for_sb;  /* the next two fields are hlist_node, */
         struct mount * __aligned(1) *mnt_pprev_for_sb;
-                                       /* except that LSB of pprev will be stolen */
+                                       /* except that LSB of pprev is stolen */
+#define WRITE_HOLD 1                   /* ... for use by mnt_hold_writers() */
         const char *mnt_devname;        /* Name of device e.g. /dev/dsk/hda1 */
         struct list_head mnt_list;
         struct list_head mnt_expire;    /* link in fs-specific expiry list */
@@ -244,4 +245,26 @@ static inline struct mount *topmost_overmount(struct mount *m)
         return m;
  }
  
+static inline bool __test_write_hold(struct mount * __aligned(1) *val)
+{
+       return (unsigned long)val & WRITE_HOLD;
+}
+
+static inline bool test_write_hold(const struct mount *m)
+{
+       return __test_write_hold(m->mnt_pprev_for_sb);
+}
+
+static inline void set_write_hold(struct mount *m)
+{
+       m->mnt_pprev_for_sb = (void *)((unsigned long)m->mnt_pprev_for_sb
+                                      | WRITE_HOLD);
+}
+
+static inline void clear_write_hold(struct mount *m)
+{
+       m->mnt_pprev_for_sb = (void *)((unsigned long)m->mnt_pprev_for_sb
+                                      & ~WRITE_HOLD);
+}
+
  struct mnt_namespace *mnt_ns_from_dentry(struct dentry *dentry);
diff --git a/fs/namespace.c b/fs/namespace.c

index 342dfd882b136f6d4c6af5628236c714f4a6cec0..714e159ed9cd54062d75d136a345f8ea91f3d2a4 100644 (file)
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -509,20 +509,20 @@ int mnt_get_write_access(struct vfsmount *m)
         mnt_inc_writers(mnt);
         /*
          * The store to mnt_inc_writers must be visible before we pass
-        * MNT_WRITE_HOLD loop below, so that the slowpath can see our
-        * incremented count after it has set MNT_WRITE_HOLD.
+        * WRITE_HOLD loop below, so that the slowpath can see our
+        * incremented count after it has set WRITE_HOLD.
          */
         smp_mb();
         might_lock(&mount_lock.lock);
-       while (READ_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) {
+       while (__test_write_hold(READ_ONCE(mnt->mnt_pprev_for_sb))) {
                 if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
                         cpu_relax();
                 } else {
                         /*
                          * This prevents priority inversion, if the task
-                        * setting MNT_WRITE_HOLD got preempted on a remote
+                        * setting WRITE_HOLD got preempted on a remote
                          * CPU, and it prevents life lock if the task setting
-                        * MNT_WRITE_HOLD has a lower priority and is bound to
+                        * WRITE_HOLD has a lower priority and is bound to
                          * the same CPU as the task that is spinning here.
                          */
                         preempt_enable();
@@ -533,7 +533,7 @@ int mnt_get_write_access(struct vfsmount *m)
         }
         /*
          * The barrier pairs with the barrier sb_start_ro_state_change() making
-        * sure that if we see MNT_WRITE_HOLD cleared, we will also see
+        * sure that if we see WRITE_HOLD cleared, we will also see
          * s_readonly_remount set (or even SB_RDONLY / MNT_READONLY flags) in
          * mnt_is_readonly() and bail in case we are racing with remount
          * read-only.
@@ -672,15 +672,15 @@ EXPORT_SYMBOL(mnt_drop_write_file);
   * @mnt.
   *
   * Context: This function expects lock_mount_hash() to be held serializing
- *          setting MNT_WRITE_HOLD.
+ *          setting WRITE_HOLD.
   * Return: On success 0 is returned.
   *        On error, -EBUSY is returned.
   */
  static inline int mnt_hold_writers(struct mount *mnt)
  {
-       mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
+       set_write_hold(mnt);
         /*
-        * After storing MNT_WRITE_HOLD, we'll read the counters. This store
+        * After storing WRITE_HOLD, we'll read the counters. This store
          * should be visible before we do.
          */
         smp_mb();
@@ -696,9 +696,9 @@ static inline int mnt_hold_writers(struct mount *mnt)
          * sum up each counter, if we read a counter before it is incremented,
          * but then read another CPU's count which it has been subsequently
          * decremented from -- we would see more decrements than we should.
-        * MNT_WRITE_HOLD protects against this scenario, because
+        * WRITE_HOLD protects against this scenario, because
          * mnt_want_write first increments count, then smp_mb, then spins on
-        * MNT_WRITE_HOLD, so it can't be decremented by another CPU while
+        * WRITE_HOLD, so it can't be decremented by another CPU while
          * we're counting up here.
          */
         if (mnt_get_writers(mnt) > 0)
@@ -720,14 +720,14 @@ static inline int mnt_hold_writers(struct mount *mnt)
   */
  static inline void mnt_unhold_writers(struct mount *mnt)
  {
-       if (!(mnt->mnt_flags & MNT_WRITE_HOLD))
+       if (!test_write_hold(mnt))
                 return;
         /*
-        * MNT_READONLY must become visible before ~MNT_WRITE_HOLD, so writers
+        * MNT_READONLY must become visible before ~WRITE_HOLD, so writers
          * that become unheld will see MNT_READONLY.
          */
         smp_wmb();
-       mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
+       clear_write_hold(mnt);
  }
  
  static inline void mnt_del_instance(struct mount *m)
@@ -766,7 +766,7 @@ int sb_prepare_remount_readonly(struct super_block *sb)
  {
         int err = 0;
  
-       /* Racy optimization.  Recheck the counter under MNT_WRITE_HOLD */
+       /* Racy optimization.  Recheck the counter under WRITE_HOLD */
         if (atomic_long_read(&sb->s_remove_count))
                 return -EBUSY;
  
@@ -784,8 +784,8 @@ int sb_prepare_remount_readonly(struct super_block *sb)
         if (!err)
                 sb_start_ro_state_change(sb);
         for (struct mount *m = sb->s_mounts; m; m = m->mnt_next_for_sb) {
-               if (m->mnt.mnt_flags & MNT_WRITE_HOLD)
-                       m->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
+               if (test_write_hold(m))
+                       clear_write_hold(m);
         }
         unlock_mount_hash();
  
diff --git a/include/linux/mount.h b/include/linux/mount.h

index 18e4b97f8a98d5e888562e127d93ba390542e4fb..85e97b9340ff5ba29763200281d451fc8dd9071d 100644 (file)
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -33,7 +33,6 @@ enum mount_flags {
         MNT_NOSYMFOLLOW = 0x80,
  
         MNT_SHRINKABLE  = 0x100,
-       MNT_WRITE_HOLD  = 0x200,
  
         MNT_INTERNAL    = 0x4000,
  
@@ -52,7 +51,7 @@ enum mount_flags {
                                   | MNT_READONLY | MNT_NOSYMFOLLOW,
         MNT_ATIME_MASK = MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME,
  
-       MNT_INTERNAL_FLAGS = MNT_WRITE_HOLD | MNT_INTERNAL | MNT_DOOMED |
+       MNT_INTERNAL_FLAGS = MNT_INTERNAL | MNT_DOOMED |
                              MNT_SYNC_UMOUNT | MNT_LOCKED
  };
author	Al Viro <viro@zeniv.linux.org.uk>
	Wed, 27 Aug 2025 17:37:12 +0000 (13:37 -0400)
committer	Al Viro <viro@zeniv.linux.org.uk>
	Wed, 17 Sep 2025 19:58:29 +0000 (15:58 -0400)
fs/mount.h		patch \| blob \| blame \| history
fs/namespace.c		patch \| blob \| blame \| history
include/linux/mount.h		patch \| blob \| blame \| history