]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - libxfs/logitem.c
xfsprogs: Release v6.7.0
[thirdparty/xfsprogs-dev.git] / libxfs / logitem.c
CommitLineData
959ef981 1// SPDX-License-Identifier: GPL-2.0
2bd0ea18 2/*
da23017d
NS
3 * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
4 * All Rights Reserved.
2bd0ea18
NS
5 */
6
9c799827 7#include "libxfs_priv.h"
b626fb59
DC
8#include "xfs_fs.h"
9#include "xfs_shared.h"
10#include "xfs_format.h"
11#include "xfs_log_format.h"
12#include "xfs_trans_resv.h"
13#include "xfs_mount.h"
14#include "xfs_inode_buf.h"
15#include "xfs_inode_fork.h"
16#include "xfs_inode.h"
17#include "xfs_trans.h"
2bd0ea18 18
2e1394fc
DW
19struct kmem_cache *xfs_buf_item_cache;
20struct kmem_cache *xfs_ili_cache; /* inode log item cache */
2bd0ea18 21
5e656dbb
BN
22/*
23 * Following functions from fs/xfs/xfs_trans_buf.c
24 */
25
2bd0ea18
NS
26/*
27 * Check to see if a buffer matching the given parameters is already
c40bdaa2 28 * a part of the given transaction.
2bd0ea18 29 */
167137fe 30struct xfs_buf *
2bd0ea18 31xfs_trans_buf_item_match(
5bfc0742 32 xfs_trans_t *tp,
75c8b434 33 struct xfs_buftarg *btp,
a2ceac1f
DC
34 struct xfs_buf_map *map,
35 int nmaps)
2bd0ea18 36{
2fdd378a
DC
37 struct xfs_log_item *lip;
38 struct xfs_buf_log_item *blip;
a2ceac1f
DC
39 int len = 0;
40 int i;
41
42 for (i = 0; i < nmaps; i++)
43 len += map[i].bm_len;
c40bdaa2 44
2fdd378a
DC
45 list_for_each_entry(lip, &tp->t_items, li_trans) {
46 blip = (struct xfs_buf_log_item *)lip;
47 if (blip->bli_item.li_type == XFS_LI_BUF &&
ab434d12 48 blip->bli_buf->b_target->bt_bdev == btp->bt_bdev &&
d4aaa66b 49 xfs_buf_daddr(blip->bli_buf) == map[0].bm_bn &&
c0594dd6 50 blip->bli_buf->b_length == len) {
a2ceac1f 51 ASSERT(blip->bli_buf->b_map_count == nmaps);
2fdd378a 52 return blip->bli_buf;
a2ceac1f 53 }
2fdd378a 54 }
c40bdaa2 55
2fdd378a 56 return NULL;
2bd0ea18 57}
5e656dbb
BN
58/*
59 * The following are from fs/xfs/xfs_buf_item.c
60 */
61
05a3a389
DW
62static const struct xfs_item_ops xfs_buf_item_ops = {
63};
64
2bd0ea18
NS
65/*
66 * Allocate a new buf log item to go with the given buffer.
37d086ca 67 * Set the buffer's b_log_item field to point to the new
2bd0ea18
NS
68 * buf log item. If there are other item's attached to the
69 * buffer (see xfs_buf_attach_iodone() below), then put the
70 * buf log item at the front.
71 */
72void
73xfs_buf_item_init(
167137fe 74 struct xfs_buf *bp,
5bfc0742 75 xfs_mount_t *mp)
2bd0ea18
NS
76{
77 xfs_log_item_t *lip;
78 xfs_buf_log_item_t *bip;
79
80#ifdef LI_DEBUG
81 fprintf(stderr, "buf_item_init for buffer %p\n", bp);
82#endif
83
84 /*
85 * Check to see if there is already a buf log item for
5000d01d 86 * this buffer. If there is, it is guaranteed to be
2bd0ea18
NS
87 * the first. If we do already have one, there is
88 * nothing to do here so return.
89 */
37d086ca
CM
90 if (bp->b_log_item != NULL) {
91 lip = bp->b_log_item;
2bd0ea18
NS
92 if (lip->li_type == XFS_LI_BUF) {
93#ifdef LI_DEBUG
94 fprintf(stderr,
95 "reused buf item %p for pre-logged buffer %p\n",
96 lip, bp);
97#endif
98 return;
99 }
100 }
101
2e1394fc 102 bip = kmem_cache_zalloc(xfs_buf_item_cache, 0);
2bd0ea18
NS
103#ifdef LI_DEBUG
104 fprintf(stderr, "adding buf item %p for not-logged buffer %p\n",
105 bip, bp);
106#endif
05a3a389 107 xfs_log_item_init(mp, &bip->bli_item, XFS_LI_BUF, &xfs_buf_item_ops);
2bd0ea18 108 bip->bli_buf = bp;
cebe02e2 109 bip->__bli_format.blf_type = XFS_LI_BUF;
d4aaa66b 110 bip->__bli_format.blf_blkno = (int64_t)xfs_buf_daddr(bp);
c0594dd6 111 bip->__bli_format.blf_len = (unsigned short)bp->b_length;
37d086ca 112 bp->b_log_item = bip;
2bd0ea18
NS
113}
114
115
116/*
117 * Mark bytes first through last inclusive as dirty in the buf
118 * item's bitmap.
119 */
120void
121xfs_buf_item_log(
122 xfs_buf_log_item_t *bip,
123 uint first,
124 uint last)
125{
126 /*
127 * Mark the item as having some dirty data for
128 * quick reference in xfs_buf_item_dirty.
129 */
130 bip->bli_flags |= XFS_BLI_DIRTY;
131}
132
a565e345
DC
133static inline struct xfs_inode_log_item *INODE_ITEM(struct xfs_log_item *lip)
134{
135 return container_of(lip, struct xfs_inode_log_item, ili_item);
136}
137
138static uint64_t
139xfs_inode_item_sort(
140 struct xfs_log_item *lip)
141{
142 return INODE_ITEM(lip)->ili_inode->i_ino;
143}
144
145/*
146 * Prior to finally logging the inode, we have to ensure that all the
147 * per-modification inode state changes are applied. This includes VFS inode
148 * state updates, format conversions, verifier state synchronisation and
149 * ensuring the inode buffer remains in memory whilst the inode is dirty.
150 *
151 * We have to be careful when we grab the inode cluster buffer due to lock
152 * ordering constraints. The unlinked inode modifications (xfs_iunlink_item)
153 * require AGI -> inode cluster buffer lock order. The inode cluster buffer is
154 * not locked until ->precommit, so it happens after everything else has been
155 * modified.
156 *
157 * Further, we have AGI -> AGF lock ordering, and with O_TMPFILE handling we
158 * have AGI -> AGF -> iunlink item -> inode cluster buffer lock order. Hence we
159 * cannot safely lock the inode cluster buffer in xfs_trans_log_inode() because
160 * it can be called on a inode (e.g. via bumplink/droplink) before we take the
161 * AGF lock modifying directory blocks.
162 *
163 * Rather than force a complete rework of all the transactions to call
164 * xfs_trans_log_inode() once and once only at the end of every transaction, we
165 * move the pinning of the inode cluster buffer to a ->precommit operation. This
166 * matches how the xfs_iunlink_item locks the inode cluster buffer, and it
167 * ensures that the inode cluster buffer locking is always done last in a
168 * transaction. i.e. we ensure the lock order is always AGI -> AGF -> inode
169 * cluster buffer.
170 *
171 * If we return the inode number as the precommit sort key then we'll also
172 * guarantee that the order all inode cluster buffer locking is the same all the
173 * inodes and unlink items in the transaction.
174 */
175static int
176xfs_inode_item_precommit(
177 struct xfs_trans *tp,
178 struct xfs_log_item *lip)
179{
180 struct xfs_inode_log_item *iip = INODE_ITEM(lip);
181 struct xfs_inode *ip = iip->ili_inode;
182 struct inode *inode = VFS_I(ip);
183 unsigned int flags = iip->ili_dirty_flags;
184
185 /*
186 * Don't bother with i_lock for the I_DIRTY_TIME check here, as races
187 * don't matter - we either will need an extra transaction in 24 hours
188 * to log the timestamps, or will clear already cleared fields in the
189 * worst case.
190 */
191 if (inode->i_state & I_DIRTY_TIME) {
192 spin_lock(&inode->i_lock);
193 inode->i_state &= ~I_DIRTY_TIME;
194 spin_unlock(&inode->i_lock);
195 }
196
197 /*
198 * If we're updating the inode core or the timestamps and it's possible
199 * to upgrade this inode to bigtime format, do so now.
200 */
201 if ((flags & (XFS_ILOG_CORE | XFS_ILOG_TIMESTAMP)) &&
202 xfs_has_bigtime(ip->i_mount) &&
203 !xfs_inode_has_bigtime(ip)) {
204 ip->i_diflags2 |= XFS_DIFLAG2_BIGTIME;
205 flags |= XFS_ILOG_CORE;
206 }
207
208 /*
209 * Inode verifiers do not check that the extent size hint is an integer
210 * multiple of the rt extent size on a directory with both rtinherit
211 * and extszinherit flags set. If we're logging a directory that is
212 * misconfigured in this way, clear the hint.
213 */
214 if ((ip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
215 (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) &&
216 (ip->i_extsize % ip->i_mount->m_sb.sb_rextsize) > 0) {
217 ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE |
218 XFS_DIFLAG_EXTSZINHERIT);
219 ip->i_extsize = 0;
220 flags |= XFS_ILOG_CORE;
221 }
222
223 /*
224 * Record the specific change for fdatasync optimisation. This allows
225 * fdatasync to skip log forces for inodes that are only timestamp
226 * dirty. Once we've processed the XFS_ILOG_IVERSION flag, convert it
227 * to XFS_ILOG_CORE so that the actual on-disk dirty tracking
228 * (ili_fields) correctly tracks that the version has changed.
229 */
230 spin_lock(&iip->ili_lock);
231 iip->ili_fsync_fields |= (flags & ~XFS_ILOG_IVERSION);
232 if (flags & XFS_ILOG_IVERSION)
233 flags = ((flags & ~XFS_ILOG_IVERSION) | XFS_ILOG_CORE);
234
235 if (!iip->ili_item.li_buf) {
236 struct xfs_buf *bp;
237 int error;
238
239 /*
240 * We hold the ILOCK here, so this inode is not going to be
241 * flushed while we are here. Further, because there is no
242 * buffer attached to the item, we know that there is no IO in
243 * progress, so nothing will clear the ili_fields while we read
244 * in the buffer. Hence we can safely drop the spin lock and
245 * read the buffer knowing that the state will not change from
246 * here.
247 */
248 spin_unlock(&iip->ili_lock);
249 error = xfs_imap_to_bp(ip->i_mount, tp, &ip->i_imap, &bp);
250 if (error)
251 return error;
252
253 /*
254 * We need an explicit buffer reference for the log item but
255 * don't want the buffer to remain attached to the transaction.
256 * Hold the buffer but release the transaction reference once
257 * we've attached the inode log item to the buffer log item
258 * list.
259 */
260 xfs_buf_hold(bp);
261 spin_lock(&iip->ili_lock);
262 iip->ili_item.li_buf = bp;
263 bp->b_flags |= _XBF_INODES;
264 list_add_tail(&iip->ili_item.li_bio_list, &bp->b_li_list);
265 xfs_trans_brelse(tp, bp);
266 }
267
268 /*
269 * Always OR in the bits from the ili_last_fields field. This is to
270 * coordinate with the xfs_iflush() and xfs_buf_inode_iodone() routines
271 * in the eventual clearing of the ili_fields bits. See the big comment
272 * in xfs_iflush() for an explanation of this coordination mechanism.
273 */
274 iip->ili_fields |= (flags | iip->ili_last_fields);
275 spin_unlock(&iip->ili_lock);
276
277 /*
278 * We are done with the log item transaction dirty state, so clear it so
279 * that it doesn't pollute future transactions.
280 */
281 iip->ili_dirty_flags = 0;
282 return 0;
283}
284
05a3a389 285static const struct xfs_item_ops xfs_inode_item_ops = {
a565e345
DC
286 .iop_sort = xfs_inode_item_sort,
287 .iop_precommit = xfs_inode_item_precommit,
05a3a389
DW
288};
289
2bd0ea18
NS
290/*
291 * Initialize the inode log item for a newly allocated (in-core) inode.
292 */
293void
294xfs_inode_item_init(
ed8d09e1
CH
295 xfs_inode_t *ip,
296 xfs_mount_t *mp)
2bd0ea18 297{
ed8d09e1 298 struct xfs_inode_log_item *iip;
2bd0ea18
NS
299
300 ASSERT(ip->i_itemp == NULL);
2e1394fc 301 iip = ip->i_itemp = kmem_cache_zalloc(xfs_ili_cache, 0);
2bd0ea18
NS
302#ifdef LI_DEBUG
303 fprintf(stderr, "inode_item_init for inode %llu, iip=%p\n",
304 ip->i_ino, iip);
305#endif
306
686bddf9
DC
307 spin_lock_init(&iip->ili_lock);
308
05a3a389
DW
309 xfs_log_item_init(mp, &iip->ili_item, XFS_LI_INODE,
310 &xfs_inode_item_ops);
2bd0ea18 311 iip->ili_inode = ip;
2bd0ea18 312}