]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/commitdiff
xfs: initialise attr fork on inode create
authorDave Chinner <dchinner@redhat.com>
Wed, 30 Jun 2021 22:28:13 +0000 (18:28 -0400)
committerEric Sandeen <sandeen@sandeen.net>
Wed, 30 Jun 2021 22:28:13 +0000 (18:28 -0400)
Source kernel commit: e6a688c3323840f3e388ba28fd2db86675b79917

When we allocate a new inode, we often need to add an attribute to
the inode as part of the create. This can happen as a result of
needing to add default ACLs or security labels before the inode is
made visible to userspace.

This is highly inefficient right now. We do the create transaction
to allocate the inode, then we do an "add attr fork" transaction to
modify the just created empty inode to set the inode fork offset to
allow attributes to be stored, then we go and do the attribute
creation.

This means 3 transactions instead of 1 to allocate an inode, and
this greatly increases the load on the CIL commit code, resulting in
excessive contention on the CIL spin locks and performance
degradation:

18.99%  [kernel]                [k] __pv_queued_spin_lock_slowpath
3.57%  [kernel]                [k] do_raw_spin_lock
2.51%  [kernel]                [k] __raw_callee_save___pv_queued_spin_unlock
2.48%  [kernel]                [k] memcpy
2.34%  [kernel]                [k] xfs_log_commit_cil

The typical profile resulting from running fsmark on a selinux enabled
filesytem is adds this overhead to the create path:

- 15.30% xfs_init_security
- 15.23% security_inode_init_security
- 13.05% xfs_initxattrs
- 12.94% xfs_attr_set
- 6.75% xfs_bmap_add_attrfork
- 5.51% xfs_trans_commit
- 5.48% __xfs_trans_commit
- 5.35% xfs_log_commit_cil
- 3.86% _raw_spin_lock
- do_raw_spin_lock
__pv_queued_spin_lock_slowpath
- 0.70% xfs_trans_alloc
0.52% xfs_trans_reserve
- 5.41% xfs_attr_set_args
- 5.39% xfs_attr_set_shortform.constprop.0
- 4.46% xfs_trans_commit
- 4.46% __xfs_trans_commit
- 4.33% xfs_log_commit_cil
- 2.74% _raw_spin_lock
- do_raw_spin_lock
__pv_queued_spin_lock_slowpath
0.60% xfs_inode_item_format
0.90% xfs_attr_try_sf_addname
- 1.99% selinux_inode_init_security
- 1.02% security_sid_to_context_force
- 1.00% security_sid_to_context_core
- 0.92% sidtab_entry_to_string
- 0.90% sidtab_sid2str_get
0.59% sidtab_sid2str_put.part.0
- 0.82% selinux_determine_inode_label
- 0.77% security_transition_sid
0.70% security_compute_sid.part.0

And fsmark creation rate performance drops by ~25%. The key point to
note here is that half the additional overhead comes from adding the
attribute fork to the newly created inode. That's crazy, considering
we can do this same thing at inode create time with a couple of
lines of code and no extra overhead.

So, if we know we are going to add an attribute immediately after
creating the inode, let's just initialise the attribute fork inside
the create transaction and chop that whole chunk of code out of
the create fast path. This completely removes the performance
drop caused by enabling SELinux, and the profile looks like:

- 8.99% xfs_init_security
- 9.00% security_inode_init_security
- 6.43% xfs_initxattrs
- 6.37% xfs_attr_set
- 5.45% xfs_attr_set_args
- 5.42% xfs_attr_set_shortform.constprop.0
- 4.51% xfs_trans_commit
- 4.54% __xfs_trans_commit
- 4.59% xfs_log_commit_cil
- 2.67% _raw_spin_lock
- 3.28% do_raw_spin_lock
3.08% __pv_queued_spin_lock_slowpath
0.66% xfs_inode_item_format
- 0.90% xfs_attr_try_sf_addname
- 0.60% xfs_trans_alloc
- 2.35% selinux_inode_init_security
- 1.25% security_sid_to_context_force
- 1.21% security_sid_to_context_core
- 1.19% sidtab_entry_to_string
- 1.20% sidtab_sid2str_get
- 0.86% sidtab_sid2str_put.part.0
- 0.62% _raw_spin_lock_irqsave
- 0.77% do_raw_spin_lock
__pv_queued_spin_lock_slowpath
- 0.84% selinux_determine_inode_label
- 0.83% security_transition_sid
0.86% security_compute_sid.part.0

Which indicates the XFS overhead of creating the selinux xattr has
been halved. This doesn't fix the CIL lock contention problem, just
means it's not a limiting factor for this workload. Lock contention
in the security subsystems is going to be an issue soon, though...

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
[djwong: fix compilation error when CONFIG_SECURITY=n]
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Gao Xiang <hsiangkao@redhat.com>
Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
libxfs/xfs_bmap.c
libxfs/xfs_inode_fork.c
libxfs/xfs_inode_fork.h

index 52e680dec6d494c6cb1e9c685f81ff9cfc278460..34f72001c767767b80d7019684c116f18fc698c6 100644 (file)
@@ -1020,7 +1020,9 @@ xfs_bmap_add_attrfork_local(
        return -EFSCORRUPTED;
 }
 
-/* Set an inode attr fork off based on the format */
+/*
+ * Set an inode attr fork offset based on the format of the data fork.
+ */
 int
 xfs_bmap_set_attrforkoff(
        struct xfs_inode        *ip,
@@ -1085,10 +1087,7 @@ xfs_bmap_add_attrfork(
                goto trans_cancel;
        ASSERT(ip->i_afp == NULL);
 
-       ip->i_afp = kmem_cache_zalloc(xfs_ifork_zone,
-                                     GFP_KERNEL | __GFP_NOFAIL);
-
-       ip->i_afp->if_format = XFS_DINODE_FMT_EXTENTS;
+       ip->i_afp = xfs_ifork_alloc(XFS_DINODE_FMT_EXTENTS, 0);
        ip->i_afp->if_flags = XFS_IFEXTENTS;
        logflags = 0;
        switch (ip->i_df.if_format) {
index cdf099385a0cecd57a679cd022d45594651fe3f6..731c5109bae5bd64585aa82913819517dc17306e 100644 (file)
@@ -280,6 +280,19 @@ xfs_dfork_attr_shortform_size(
        return be16_to_cpu(atp->hdr.totsize);
 }
 
+struct xfs_ifork *
+xfs_ifork_alloc(
+       enum xfs_dinode_fmt     format,
+       xfs_extnum_t            nextents)
+{
+       struct xfs_ifork        *ifp;
+
+       ifp = kmem_cache_zalloc(xfs_ifork_zone, GFP_NOFS | __GFP_NOFAIL);
+       ifp->if_format = format;
+       ifp->if_nextents = nextents;
+       return ifp;
+}
+
 int
 xfs_iformat_attr_fork(
        struct xfs_inode        *ip,
@@ -291,11 +304,8 @@ xfs_iformat_attr_fork(
         * Initialize the extent count early, as the per-format routines may
         * depend on it.
         */
-       ip->i_afp = kmem_cache_zalloc(xfs_ifork_zone, GFP_NOFS | __GFP_NOFAIL);
-       ip->i_afp->if_format = dip->di_aformat;
-       if (unlikely(ip->i_afp->if_format == 0)) /* pre IRIX 6.2 file system */
-               ip->i_afp->if_format = XFS_DINODE_FMT_EXTENTS;
-       ip->i_afp->if_nextents = be16_to_cpu(dip->di_anextents);
+       ip->i_afp = xfs_ifork_alloc(dip->di_aformat,
+                               be16_to_cpu(dip->di_anextents));
 
        switch (ip->i_afp->if_format) {
        case XFS_DINODE_FMT_LOCAL:
index 9e2137cd73724b965c4dca278ec3114d8a13b632..a0717ab0e5c574422fe800d7480ff7c555b21041 100644 (file)
@@ -141,6 +141,8 @@ static inline int8_t xfs_ifork_format(struct xfs_ifork *ifp)
        return ifp->if_format;
 }
 
+struct xfs_ifork *xfs_ifork_alloc(enum xfs_dinode_fmt format,
+                               xfs_extnum_t nextents);
 struct xfs_ifork *xfs_iext_state_to_fork(struct xfs_inode *ip, int state);
 
 int            xfs_iformat_data_fork(struct xfs_inode *, struct xfs_dinode *);