In some setups directories can have many (usually negative) dentries.
Hence __fsnotify_update_child_dentry_flags() function can take a
significant amount of time. Since the bulk of this function happens
under inode->i_lock this causes a significant contention on the lock
when we remove the watch from the directory as the
__fsnotify_update_child_dentry_flags() call from fsnotify_recalc_mask()
races with __fsnotify_update_child_dentry_flags() calls from
__fsnotify_parent() happening on children. This can lead upto softlockup
reports reported by users.
Fix the problem by calling fsnotify_update_children_dentry_flags() to
set PARENT_WATCHED flags only when parent starts watching children.
When parent stops watching children, clear false positive PARENT_WATCHED
flags lazily in __fsnotify_parent() for each accessed child.
Suggested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Stephen Brennan <stephen.s.brennan@oracle.com>
Signed-off-by: Jan Kara <jack@suse.cz>
* parent cares. Thus when an event happens on a child it can quickly tell
* if there is a need to find a parent and send the event to the parent.
*/
-void __fsnotify_update_child_dentry_flags(struct inode *inode)
+void fsnotify_set_children_dentry_flags(struct inode *inode)
{
struct dentry *alias;
- int watched;
if (!S_ISDIR(inode->i_mode))
return;
- /* determine if the children should tell inode about their events */
- watched = fsnotify_inode_watches_children(inode);
-
spin_lock(&inode->i_lock);
/* run all of the dentries associated with this inode. Since this is a
* directory, there damn well better only be one item on this list */
continue;
spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
- if (watched)
- child->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED;
- else
- child->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED;
+ child->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED;
spin_unlock(&child->d_lock);
}
spin_unlock(&alias->d_lock);
spin_unlock(&inode->i_lock);
}
+/*
+ * Lazily clear false positive PARENT_WATCHED flag for child whose parent had
+ * stopped watching children.
+ */
+static void fsnotify_clear_child_dentry_flag(struct inode *pinode,
+ struct dentry *dentry)
+{
+ spin_lock(&dentry->d_lock);
+ /*
+ * d_lock is a sufficient barrier to prevent observing a non-watched
+ * parent state from before the fsnotify_set_children_dentry_flags()
+ * or fsnotify_update_flags() call that had set PARENT_WATCHED.
+ */
+ if (!fsnotify_inode_watches_children(pinode))
+ dentry->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED;
+ spin_unlock(&dentry->d_lock);
+}
+
/* Are inode/sb/mount interested in parent and name info with this event? */
static bool fsnotify_event_needs_parent(struct inode *inode, __u32 mnt_mask,
__u32 mask)
p_inode = parent->d_inode;
p_mask = fsnotify_inode_watches_children(p_inode);
if (unlikely(parent_watched && !p_mask))
- __fsnotify_update_child_dentry_flags(p_inode);
+ fsnotify_clear_child_dentry_flag(p_inode, dentry);
/*
* Include parent/name in notification either if some notification
* update the dentry->d_flags of all of inode's children to indicate if inode cares
* about events that happen to its children.
*/
-extern void __fsnotify_update_child_dentry_flags(struct inode *inode);
+extern void fsnotify_set_children_dentry_flags(struct inode *inode);
extern struct kmem_cache *fsnotify_mark_connector_cachep;
return fsnotify_update_iref(conn, want_iref);
}
+static bool fsnotify_conn_watches_children(
+ struct fsnotify_mark_connector *conn)
+{
+ if (conn->type != FSNOTIFY_OBJ_TYPE_INODE)
+ return false;
+
+ return fsnotify_inode_watches_children(fsnotify_conn_inode(conn));
+}
+
+static void fsnotify_conn_set_children_dentry_flags(
+ struct fsnotify_mark_connector *conn)
+{
+ if (conn->type != FSNOTIFY_OBJ_TYPE_INODE)
+ return;
+
+ fsnotify_set_children_dentry_flags(fsnotify_conn_inode(conn));
+}
+
/*
* Calculate mask of events for a list of marks. The caller must make sure
* connector and connector->obj cannot disappear under us. Callers achieve
*/
void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
{
+ bool update_children;
+
if (!conn)
return;
spin_lock(&conn->lock);
+ update_children = !fsnotify_conn_watches_children(conn);
__fsnotify_recalc_mask(conn);
+ update_children &= fsnotify_conn_watches_children(conn);
spin_unlock(&conn->lock);
- if (conn->type == FSNOTIFY_OBJ_TYPE_INODE)
- __fsnotify_update_child_dentry_flags(
- fsnotify_conn_inode(conn));
+ /*
+ * Set children's PARENT_WATCHED flags only if parent started watching.
+ * When parent stops watching, we clear false positive PARENT_WATCHED
+ * flags lazily in __fsnotify_parent().
+ */
+ if (update_children)
+ fsnotify_conn_set_children_dentry_flags(conn);
}
/* Free all connectors queued for freeing once SRCU period ends */
static inline int fsnotify_inode_watches_children(struct inode *inode)
{
+ __u32 parent_mask = READ_ONCE(inode->i_fsnotify_mask);
+
/* FS_EVENT_ON_CHILD is set if the inode may care */
- if (!(inode->i_fsnotify_mask & FS_EVENT_ON_CHILD))
+ if (!(parent_mask & FS_EVENT_ON_CHILD))
return 0;
/* this inode might care about child events, does it care about the
* specific set of events that can happen on a child? */
- return inode->i_fsnotify_mask & FS_EVENTS_POSS_ON_CHILD;
+ return parent_mask & FS_EVENTS_POSS_ON_CHILD;
}
/*
/*
* Serialisation of setting PARENT_WATCHED on the dentries is provided
* by d_lock. If inotify_inode_watched changes after we have taken
- * d_lock, the following __fsnotify_update_child_dentry_flags call will
+ * d_lock, the following fsnotify_set_children_dentry_flags call will
* find our entry, so it will spin until we complete here, and update
* us with the new state.
*/