From: Junxuan Liao Date: Mon, 23 Jun 2025 04:01:32 +0000 (-0500) Subject: docs/vfs: update references to i_mutex to i_rwsem X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2773d282cd56464f62e9b4703c41d2f733a67842;p=thirdparty%2Flinux.git docs/vfs: update references to i_mutex to i_rwsem VFS has switched to i_rwsem for ten years now (9902af79c01a: parallel lookups actual switch to rwsem), but the VFS documentation and comments still has references to i_mutex. Signed-off-by: Junxuan Liao Link: https://lore.kernel.org/72223729-5471-474a-af3c-f366691fba82@cs.wisc.edu Signed-off-by: Christian Brauner --- diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst index fd32a9a17bfb3..dd9da7e04a993 100644 --- a/Documentation/filesystems/vfs.rst +++ b/Documentation/filesystems/vfs.rst @@ -758,8 +758,9 @@ process is more complicated and uses write_begin/write_end or dirty_folio to write data into the address_space, and writepages to writeback data to storage. -Adding and removing pages to/from an address_space is protected by the -inode's i_mutex. +Removing pages from an address_space requires holding the inode's i_rwsem +exclusively, while adding pages to the address_space requires holding the +inode's i_mapping->invalidate_lock exclusively. When data is written to a page, the PG_Dirty flag should be set. It typically remains set until writepages asks for it to be written. This diff --git a/fs/attr.c b/fs/attr.c index 9caf63d20d03e..5425c1dbbff92 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -230,7 +230,7 @@ EXPORT_SYMBOL(setattr_prepare); * @inode: the inode to be truncated * @offset: the new size to assign to the inode * - * inode_newsize_ok must be called with i_mutex held. + * inode_newsize_ok must be called with i_rwsem held exclusively. * * inode_newsize_ok will check filesystem limits and ulimits to check that the * new inode size is within limits. inode_newsize_ok will also send SIGXFSZ @@ -318,7 +318,7 @@ static void setattr_copy_mgtime(struct inode *inode, const struct iattr *attr) * @inode: the inode to be updated * @attr: the new attributes * - * setattr_copy must be called with i_mutex held. + * setattr_copy must be called with i_rwsem held exclusively. * * setattr_copy updates the inode's metadata with that specified * in attr on idmapped mounts. Necessary permission checks to determine @@ -403,13 +403,13 @@ EXPORT_SYMBOL(may_setattr); * @attr: new attributes * @delegated_inode: returns inode, if the inode is delegated * - * The caller must hold the i_mutex on the affected object. + * The caller must hold the i_rwsem exclusively on the affected object. * * If notify_change discovers a delegation in need of breaking, * it will return -EWOULDBLOCK and return a reference to the inode in * delegated_inode. The caller should then break the delegation and * retry. Because breaking a delegation may take a long time, the - * caller should drop the i_mutex before doing so. + * caller should drop the i_rwsem before doing so. * * Alternatively, a caller may pass NULL for delegated_inode. This may * be appropriate for callers that expect the underlying filesystem not @@ -456,7 +456,7 @@ int notify_change(struct mnt_idmap *idmap, struct dentry *dentry, if (S_ISLNK(inode->i_mode)) return -EOPNOTSUPP; - /* Flag setting protected by i_mutex */ + /* Flag setting protected by i_rwsem */ if (is_sxid(attr->ia_mode)) inode->i_flags &= ~S_NOSEC; } diff --git a/fs/buffer.c b/fs/buffer.c index a14d281c6a746..1d34200f69c8f 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2609,7 +2609,7 @@ EXPORT_SYMBOL(cont_write_begin); * holes and correct delalloc and unwritten extent mapping on filesystems that * support these features. * - * We are not allowed to take the i_mutex here so we have to play games to + * We are not allowed to take the i_rwsem here so we have to play games to * protect against truncate races as the page could now be beyond EOF. Because * truncate writes the inode size before removing pages, once we have the * page lock we can determine safely if the page is beyond EOF. If it is not diff --git a/fs/dcache.c b/fs/dcache.c index 03d58b2d4fa34..ab8465ae9cad8 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2774,10 +2774,10 @@ static void copy_name(struct dentry *dentry, struct dentry *target) * @target: new dentry * @exchange: exchange the two dentries * - * Update the dcache to reflect the move of a file name. Negative - * dcache entries should not be moved in this way. Caller must hold - * rename_lock, the i_mutex of the source and target directories, - * and the sb->s_vfs_rename_mutex if they differ. See lock_rename(). + * Update the dcache to reflect the move of a file name. Negative dcache + * entries should not be moved in this way. Caller must hold rename_lock, the + * i_rwsem of the source and target directories (exclusively), and the sb-> + * s_vfs_rename_mutex if they differ. See lock_rename(). */ static void __d_move(struct dentry *dentry, struct dentry *target, bool exchange) @@ -2923,7 +2923,7 @@ struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2) * This helper attempts to cope with remotely renamed directories * * It assumes that the caller is already holding - * dentry->d_parent->d_inode->i_mutex, and rename_lock + * dentry->d_parent->d_inode->i_rwsem, and rename_lock * * Note: If ever the locking in lock_rename() changes, then please * remember to update this too... diff --git a/fs/direct-io.c b/fs/direct-io.c index bbd05f1a21453..1694ee9a93820 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -1083,8 +1083,8 @@ static inline int drop_refcount(struct dio *dio) * The locking rules are governed by the flags parameter: * - if the flags value contains DIO_LOCKING we use a fancy locking * scheme for dumb filesystems. - * For writes this function is called under i_mutex and returns with - * i_mutex held, for reads, i_mutex is not held on entry, but it is + * For writes this function is called under i_rwsem and returns with + * i_rwsem held, for reads, i_rwsem is not held on entry, but it is * taken and dropped again before returning. * - if the flags value does NOT contain DIO_LOCKING we don't use any * internal locking but rather rely on the filesystem to synchronize @@ -1094,7 +1094,7 @@ static inline int drop_refcount(struct dio *dio) * counter before starting direct I/O, and decrement it once we are done. * Truncate can wait for it to reach zero to provide exclusion. It is * expected that filesystem provide exclusion between new direct I/O - * and truncates. For DIO_LOCKING filesystems this is done by i_mutex, + * and truncates. For DIO_LOCKING filesystems this is done by i_rwsem, * but other filesystems need to take care of this on their own. * * NOTE: if you pass "sdio" to anything by pointer make sure that function @@ -1279,7 +1279,7 @@ ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, /* * All block lookups have been performed. For READ requests - * we can let i_mutex go now that its achieved its purpose + * we can let i_rwsem go now that its achieved its purpose * of protecting us from looking up uninitialized blocks. */ if (iov_iter_rw(iter) == READ && (dio->flags & DIO_LOCKING)) diff --git a/fs/inode.c b/fs/inode.c index 99318b157a9a1..a0150e2ef22a3 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1158,9 +1158,8 @@ void lockdep_annotate_inode_mutex_key(struct inode *inode) /* Set new key only if filesystem hasn't already changed it */ if (lockdep_match_class(&inode->i_rwsem, &type->i_mutex_key)) { /* - * ensure nobody is actually holding i_mutex + * ensure nobody is actually holding i_rwsem */ - // mutex_destroy(&inode->i_mutex); init_rwsem(&inode->i_rwsem); lockdep_set_class(&inode->i_rwsem, &type->i_mutex_dir_key); @@ -2615,7 +2614,7 @@ EXPORT_SYMBOL(inode_dio_finished); * proceed with a truncate or equivalent operation. * * Must be called under a lock that serializes taking new references - * to i_dio_count, usually by inode->i_mutex. + * to i_dio_count, usually by inode->i_rwsem. */ void inode_dio_wait(struct inode *inode) { @@ -2633,7 +2632,7 @@ EXPORT_SYMBOL(inode_dio_wait_interruptible); /* * inode_set_flags - atomically set some inode flags * - * Note: the caller should be holding i_mutex, or else be sure that + * Note: the caller should be holding i_rwsem exclusively, or else be sure that * they have exclusive access to the inode structure (i.e., while the * inode is being instantiated). The reason for the cmpxchg() loop * --- which wouldn't be necessary if all code paths which modify @@ -2641,7 +2640,7 @@ EXPORT_SYMBOL(inode_dio_wait_interruptible); * code path which doesn't today so we use cmpxchg() out of an abundance * of caution. * - * In the long run, i_mutex is overkill, and we should probably look + * In the long run, i_rwsem is overkill, and we should probably look * at using the i_lock spinlock to protect i_flags, and then make sure * it is so documented in include/linux/fs.h and that all code follows * the locking convention!! diff --git a/fs/libfs.c b/fs/libfs.c index 9ea0ecc325a81..4d1862f589e87 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -946,7 +946,8 @@ EXPORT_SYMBOL(simple_write_begin); * simple_write_end does the minimum needed for updating a folio after * writing is done. It has the same API signature as the .write_end of * address_space_operations vector. So it can just be set onto .write_end for - * FSes that don't need any other processing. i_mutex is assumed to be held. + * FSes that don't need any other processing. i_rwsem is assumed to be held + * exclusively. * Block based filesystems should use generic_write_end(). * NOTE: Even though i_size might get updated by this function, mark_inode_dirty * is not called, so a filesystem that actually does store data in .write_inode @@ -973,7 +974,7 @@ static int simple_write_end(struct file *file, struct address_space *mapping, } /* * No need to use i_size_read() here, the i_size - * cannot change under us because we hold the i_mutex. + * cannot change under us because we hold the i_rwsem. */ if (last_pos > inode->i_size) i_size_write(inode, last_pos); diff --git a/fs/locks.c b/fs/locks.c index f96024feab176..559f02aa41722 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1794,7 +1794,7 @@ generic_add_lease(struct file *filp, int arg, struct file_lease **flp, void **pr /* * In the delegation case we need mutual exclusion with - * a number of operations that take the i_mutex. We trylock + * a number of operations that take the i_rwsem. We trylock * because delegations are an optional optimization, and if * there's some chance of a conflict--we'd rather not * bother, maybe that's a sign this just isn't a good file to diff --git a/fs/namei.c b/fs/namei.c index 981da44e12919..f5c157290ce2e 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1469,7 +1469,7 @@ static int __traverse_mounts(struct path *path, unsigned flags, bool *jumped, int ret = 0; while (flags & DCACHE_MANAGED_DENTRY) { - /* Allow the filesystem to manage the transit without i_mutex + /* Allow the filesystem to manage the transit without i_rwsem * being held. */ if (flags & DCACHE_MANAGE_TRANSIT) { ret = path->dentry->d_op->d_manage(path, false); @@ -2945,7 +2945,7 @@ EXPORT_SYMBOL(try_lookup_noperm); * Note that this routine is purely a helper for filesystem usage and should * not be called by generic code. It does no permission checking. * - * The caller must hold base->i_mutex. + * The caller must hold base->i_rwsem. */ struct dentry *lookup_noperm(struct qstr *name, struct dentry *base) { @@ -2971,7 +2971,7 @@ EXPORT_SYMBOL(lookup_noperm); * * This can be used for in-kernel filesystem clients such as file servers. * - * The caller must hold base->i_mutex. + * The caller must hold base->i_rwsem. */ struct dentry *lookup_one(struct mnt_idmap *idmap, struct qstr *name, struct dentry *base) @@ -4542,13 +4542,13 @@ SYSCALL_DEFINE1(rmdir, const char __user *, pathname) * @dentry: victim * @delegated_inode: returns victim inode, if the inode is delegated. * - * The caller must hold dir->i_mutex. + * The caller must hold dir->i_rwsem exclusively. * * If vfs_unlink discovers a delegation, it will return -EWOULDBLOCK and * return a reference to the inode in delegated_inode. The caller * should then break the delegation on that inode and retry. Because * breaking a delegation may take a long time, the caller should drop - * dir->i_mutex before doing so. + * dir->i_rwsem before doing so. * * Alternatively, a caller may pass NULL for delegated_inode. This may * be appropriate for callers that expect the underlying filesystem not @@ -4607,7 +4607,7 @@ EXPORT_SYMBOL(vfs_unlink); /* * Make sure that the actual truncation of the file will occur outside its - * directory's i_mutex. Truncate can take a long time if there is a lot of + * directory's i_rwsem. Truncate can take a long time if there is a lot of * writeout happening, and we don't want to prevent access to the directory * while waiting on the I/O. */ @@ -4785,13 +4785,13 @@ SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newn * @new_dentry: where to create the new link * @delegated_inode: returns inode needing a delegation break * - * The caller must hold dir->i_mutex + * The caller must hold dir->i_rwsem exclusively. * * If vfs_link discovers a delegation on the to-be-linked file in need * of breaking, it will return -EWOULDBLOCK and return a reference to the * inode in delegated_inode. The caller should then break the delegation * and retry. Because breaking a delegation may take a long time, the - * caller should drop the i_mutex before doing so. + * caller should drop the i_rwsem before doing so. * * Alternatively, a caller may pass NULL for delegated_inode. This may * be appropriate for callers that expect the underlying filesystem not @@ -4987,7 +4987,7 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname * c) we may have to lock up to _four_ objects - parents and victim (if it exists), * and source (if it's a non-directory or a subdirectory that moves to * different parent). - * And that - after we got ->i_mutex on parents (until then we don't know + * And that - after we got ->i_rwsem on parents (until then we don't know * whether the target exists). Solution: try to be smart with locking * order for inodes. We rely on the fact that tree topology may change * only under ->s_vfs_rename_mutex _and_ that parent of the object we @@ -4999,9 +4999,9 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname * has no more than 1 dentry. If "hybrid" objects will ever appear, * we'd better make sure that there's no link(2) for them. * d) conversion from fhandle to dentry may come in the wrong moment - when - * we are removing the target. Solution: we will have to grab ->i_mutex + * we are removing the target. Solution: we will have to grab ->i_rwsem * in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on - * ->i_mutex on parents, which works but leads to some truly excessive + * ->i_rwsem on parents, which works but leads to some truly excessive * locking]. */ int vfs_rename(struct renamedata *rd) diff --git a/fs/namespace.c b/fs/namespace.c index e13d9ab4f5649..8a1bfdf862f89 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2053,7 +2053,7 @@ out: * detach_mounts allows lazily unmounting those mounts instead of * leaking them. * - * The caller may hold dentry->d_inode->i_mutex. + * The caller may hold dentry->d_inode->i_rwsem. */ void __detach_mounts(struct dentry *dentry) { diff --git a/fs/stack.c b/fs/stack.c index f189201199443..d8c782e064e3e 100644 --- a/fs/stack.c +++ b/fs/stack.c @@ -3,7 +3,7 @@ #include #include -/* does _NOT_ require i_mutex to be held. +/* does _NOT_ require i_rwsem to be held. * * This function cannot be inlined since i_size_{read,write} is rather * heavy-weight on 32-bit systems @@ -41,7 +41,7 @@ void fsstack_copy_inode_size(struct inode *dst, struct inode *src) * If CONFIG_SMP or CONFIG_PREEMPTION on 32-bit, it's vital for * fsstack_copy_inode_size() to hold some lock around * i_size_write(), otherwise i_size_read() may spin forever (see - * include/linux/fs.h). We don't necessarily hold i_mutex when this + * include/linux/fs.h). We don't necessarily hold i_rwsem when this * is called, so take i_lock for that case. * * And if on 32-bit, continue our effort to keep the two halves of diff --git a/fs/xattr.c b/fs/xattr.c index 8ec5b0204bfdc..c32e7d56a5d34 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -215,7 +215,7 @@ EXPORT_SYMBOL(__vfs_setxattr); * * returns the result of the internal setxattr or setsecurity operations. * - * This function requires the caller to lock the inode's i_mutex before it + * This function requires the caller to lock the inode's i_rwsem before it * is executed. It also assumes that the caller will make the appropriate * permission checks. */ diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 25c4a5afbd443..cfb0dd1ea49c7 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -230,7 +230,7 @@ struct handle_to_path_ctx { * directory. The name should be stored in the @name (with the * understanding that it is already pointing to a %NAME_MAX+1 sized * buffer. get_name() should return %0 on success, a negative error code - * or error. @get_name will be called without @parent->i_mutex held. + * or error. @get_name will be called without @parent->i_rwsem held. * * get_parent: * @get_parent should find the parent directory for the given @child which @@ -247,7 +247,7 @@ struct handle_to_path_ctx { * @commit_metadata should commit metadata changes to stable storage. * * Locking rules: - * get_parent is called with child->d_inode->i_mutex down + * get_parent is called with child->d_inode->i_rwsem down * get_name is not (which is possibly inconsistent) */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 1d9586a78041b..09e3e80b0528a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -837,7 +837,7 @@ static inline void inode_fake_hash(struct inode *inode) } /* - * inode->i_mutex nesting subclasses for the lock validator: + * inode->i_rwsem nesting subclasses for the lock validator: * * 0: the object of the current VFS operation * 1: parent @@ -989,7 +989,7 @@ static inline loff_t i_size_read(const struct inode *inode) /* * NOTE: unlike i_size_read(), i_size_write() does need locking around it - * (normally i_mutex), otherwise on 32bit/SMP an update of i_size_seqcount + * (normally i_rwsem), otherwise on 32bit/SMP an update of i_size_seqcount * can be lost, resulting in subsequent i_size_read() calls spinning forever. */ static inline void i_size_write(struct inode *inode, loff_t i_size) @@ -1921,7 +1921,7 @@ static inline void sb_end_intwrite(struct super_block *sb) * freeze protection should be the outermost lock. In particular, we have: * * sb_start_write - * -> i_mutex (write path, truncate, directory ops, ...) + * -> i_rwsem (write path, truncate, directory ops, ...) * -> s_umount (freeze_super, thaw_super) */ static inline void sb_start_write(struct super_block *sb) diff --git a/include/linux/fs_stack.h b/include/linux/fs_stack.h index 2b1f74b240707..0cc2fa283305b 100644 --- a/include/linux/fs_stack.h +++ b/include/linux/fs_stack.h @@ -3,7 +3,7 @@ #define _LINUX_FS_STACK_H /* This file defines generic functions used primarily by stackable - * filesystems; none of these functions require i_mutex to be held. + * filesystems; none of these functions require i_rwsem to be held. */ #include diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 06cc8888199e8..c334f82ed385a 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -19,7 +19,7 @@ static inline struct quota_info *sb_dqopt(struct super_block *sb) return &sb->s_dquot; } -/* i_mutex must being held */ +/* i_rwsem must being held */ static inline bool is_quota_modification(struct mnt_idmap *idmap, struct inode *inode, struct iattr *ia) {