]>
Commit | Line | Data |
---|---|---|
959ef981 | 1 | // SPDX-License-Identifier: GPL-2.0 |
2bd0ea18 | 2 | /* |
da23017d NS |
3 | * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. |
4 | * All Rights Reserved. | |
2bd0ea18 NS |
5 | */ |
6 | ||
9c799827 | 7 | #include "libxfs_priv.h" |
b626fb59 DC |
8 | #include "xfs_fs.h" |
9 | #include "xfs_shared.h" | |
10 | #include "xfs_format.h" | |
11 | #include "xfs_log_format.h" | |
12 | #include "xfs_trans_resv.h" | |
13 | #include "xfs_mount.h" | |
14 | #include "xfs_inode_buf.h" | |
15 | #include "xfs_inode_fork.h" | |
16 | #include "xfs_inode.h" | |
17 | #include "xfs_trans.h" | |
2bd0ea18 | 18 | |
2e1394fc DW |
19 | struct kmem_cache *xfs_buf_item_cache; |
20 | struct kmem_cache *xfs_ili_cache; /* inode log item cache */ | |
2bd0ea18 | 21 | |
5e656dbb BN |
22 | /* |
23 | * Following functions from fs/xfs/xfs_trans_buf.c | |
24 | */ | |
25 | ||
2bd0ea18 NS |
26 | /* |
27 | * Check to see if a buffer matching the given parameters is already | |
c40bdaa2 | 28 | * a part of the given transaction. |
2bd0ea18 | 29 | */ |
167137fe | 30 | struct xfs_buf * |
2bd0ea18 | 31 | xfs_trans_buf_item_match( |
5bfc0742 | 32 | xfs_trans_t *tp, |
75c8b434 | 33 | struct xfs_buftarg *btp, |
a2ceac1f DC |
34 | struct xfs_buf_map *map, |
35 | int nmaps) | |
2bd0ea18 | 36 | { |
2fdd378a DC |
37 | struct xfs_log_item *lip; |
38 | struct xfs_buf_log_item *blip; | |
a2ceac1f DC |
39 | int len = 0; |
40 | int i; | |
41 | ||
42 | for (i = 0; i < nmaps; i++) | |
43 | len += map[i].bm_len; | |
c40bdaa2 | 44 | |
2fdd378a DC |
45 | list_for_each_entry(lip, &tp->t_items, li_trans) { |
46 | blip = (struct xfs_buf_log_item *)lip; | |
47 | if (blip->bli_item.li_type == XFS_LI_BUF && | |
ab434d12 | 48 | blip->bli_buf->b_target->bt_bdev == btp->bt_bdev && |
d4aaa66b | 49 | xfs_buf_daddr(blip->bli_buf) == map[0].bm_bn && |
c0594dd6 | 50 | blip->bli_buf->b_length == len) { |
a2ceac1f | 51 | ASSERT(blip->bli_buf->b_map_count == nmaps); |
2fdd378a | 52 | return blip->bli_buf; |
a2ceac1f | 53 | } |
2fdd378a | 54 | } |
c40bdaa2 | 55 | |
2fdd378a | 56 | return NULL; |
2bd0ea18 | 57 | } |
5e656dbb BN |
58 | /* |
59 | * The following are from fs/xfs/xfs_buf_item.c | |
60 | */ | |
61 | ||
05a3a389 DW |
62 | static const struct xfs_item_ops xfs_buf_item_ops = { |
63 | }; | |
64 | ||
2bd0ea18 NS |
65 | /* |
66 | * Allocate a new buf log item to go with the given buffer. | |
37d086ca | 67 | * Set the buffer's b_log_item field to point to the new |
2bd0ea18 NS |
68 | * buf log item. If there are other item's attached to the |
69 | * buffer (see xfs_buf_attach_iodone() below), then put the | |
70 | * buf log item at the front. | |
71 | */ | |
72 | void | |
73 | xfs_buf_item_init( | |
167137fe | 74 | struct xfs_buf *bp, |
5bfc0742 | 75 | xfs_mount_t *mp) |
2bd0ea18 NS |
76 | { |
77 | xfs_log_item_t *lip; | |
78 | xfs_buf_log_item_t *bip; | |
79 | ||
80 | #ifdef LI_DEBUG | |
81 | fprintf(stderr, "buf_item_init for buffer %p\n", bp); | |
82 | #endif | |
83 | ||
84 | /* | |
85 | * Check to see if there is already a buf log item for | |
5000d01d | 86 | * this buffer. If there is, it is guaranteed to be |
2bd0ea18 NS |
87 | * the first. If we do already have one, there is |
88 | * nothing to do here so return. | |
89 | */ | |
37d086ca CM |
90 | if (bp->b_log_item != NULL) { |
91 | lip = bp->b_log_item; | |
2bd0ea18 NS |
92 | if (lip->li_type == XFS_LI_BUF) { |
93 | #ifdef LI_DEBUG | |
94 | fprintf(stderr, | |
95 | "reused buf item %p for pre-logged buffer %p\n", | |
96 | lip, bp); | |
97 | #endif | |
98 | return; | |
99 | } | |
100 | } | |
101 | ||
2e1394fc | 102 | bip = kmem_cache_zalloc(xfs_buf_item_cache, 0); |
2bd0ea18 NS |
103 | #ifdef LI_DEBUG |
104 | fprintf(stderr, "adding buf item %p for not-logged buffer %p\n", | |
105 | bip, bp); | |
106 | #endif | |
05a3a389 | 107 | xfs_log_item_init(mp, &bip->bli_item, XFS_LI_BUF, &xfs_buf_item_ops); |
2bd0ea18 | 108 | bip->bli_buf = bp; |
cebe02e2 | 109 | bip->__bli_format.blf_type = XFS_LI_BUF; |
d4aaa66b | 110 | bip->__bli_format.blf_blkno = (int64_t)xfs_buf_daddr(bp); |
c0594dd6 | 111 | bip->__bli_format.blf_len = (unsigned short)bp->b_length; |
37d086ca | 112 | bp->b_log_item = bip; |
2bd0ea18 NS |
113 | } |
114 | ||
115 | ||
116 | /* | |
117 | * Mark bytes first through last inclusive as dirty in the buf | |
118 | * item's bitmap. | |
119 | */ | |
120 | void | |
121 | xfs_buf_item_log( | |
122 | xfs_buf_log_item_t *bip, | |
123 | uint first, | |
124 | uint last) | |
125 | { | |
126 | /* | |
127 | * Mark the item as having some dirty data for | |
128 | * quick reference in xfs_buf_item_dirty. | |
129 | */ | |
130 | bip->bli_flags |= XFS_BLI_DIRTY; | |
131 | } | |
132 | ||
a565e345 DC |
133 | static inline struct xfs_inode_log_item *INODE_ITEM(struct xfs_log_item *lip) |
134 | { | |
135 | return container_of(lip, struct xfs_inode_log_item, ili_item); | |
136 | } | |
137 | ||
138 | static uint64_t | |
139 | xfs_inode_item_sort( | |
140 | struct xfs_log_item *lip) | |
141 | { | |
142 | return INODE_ITEM(lip)->ili_inode->i_ino; | |
143 | } | |
144 | ||
145 | /* | |
146 | * Prior to finally logging the inode, we have to ensure that all the | |
147 | * per-modification inode state changes are applied. This includes VFS inode | |
148 | * state updates, format conversions, verifier state synchronisation and | |
149 | * ensuring the inode buffer remains in memory whilst the inode is dirty. | |
150 | * | |
151 | * We have to be careful when we grab the inode cluster buffer due to lock | |
152 | * ordering constraints. The unlinked inode modifications (xfs_iunlink_item) | |
153 | * require AGI -> inode cluster buffer lock order. The inode cluster buffer is | |
154 | * not locked until ->precommit, so it happens after everything else has been | |
155 | * modified. | |
156 | * | |
157 | * Further, we have AGI -> AGF lock ordering, and with O_TMPFILE handling we | |
158 | * have AGI -> AGF -> iunlink item -> inode cluster buffer lock order. Hence we | |
159 | * cannot safely lock the inode cluster buffer in xfs_trans_log_inode() because | |
160 | * it can be called on a inode (e.g. via bumplink/droplink) before we take the | |
161 | * AGF lock modifying directory blocks. | |
162 | * | |
163 | * Rather than force a complete rework of all the transactions to call | |
164 | * xfs_trans_log_inode() once and once only at the end of every transaction, we | |
165 | * move the pinning of the inode cluster buffer to a ->precommit operation. This | |
166 | * matches how the xfs_iunlink_item locks the inode cluster buffer, and it | |
167 | * ensures that the inode cluster buffer locking is always done last in a | |
168 | * transaction. i.e. we ensure the lock order is always AGI -> AGF -> inode | |
169 | * cluster buffer. | |
170 | * | |
171 | * If we return the inode number as the precommit sort key then we'll also | |
172 | * guarantee that the order all inode cluster buffer locking is the same all the | |
173 | * inodes and unlink items in the transaction. | |
174 | */ | |
175 | static int | |
176 | xfs_inode_item_precommit( | |
177 | struct xfs_trans *tp, | |
178 | struct xfs_log_item *lip) | |
179 | { | |
180 | struct xfs_inode_log_item *iip = INODE_ITEM(lip); | |
181 | struct xfs_inode *ip = iip->ili_inode; | |
182 | struct inode *inode = VFS_I(ip); | |
183 | unsigned int flags = iip->ili_dirty_flags; | |
184 | ||
185 | /* | |
186 | * Don't bother with i_lock for the I_DIRTY_TIME check here, as races | |
187 | * don't matter - we either will need an extra transaction in 24 hours | |
188 | * to log the timestamps, or will clear already cleared fields in the | |
189 | * worst case. | |
190 | */ | |
191 | if (inode->i_state & I_DIRTY_TIME) { | |
192 | spin_lock(&inode->i_lock); | |
193 | inode->i_state &= ~I_DIRTY_TIME; | |
194 | spin_unlock(&inode->i_lock); | |
195 | } | |
196 | ||
197 | /* | |
198 | * If we're updating the inode core or the timestamps and it's possible | |
199 | * to upgrade this inode to bigtime format, do so now. | |
200 | */ | |
201 | if ((flags & (XFS_ILOG_CORE | XFS_ILOG_TIMESTAMP)) && | |
202 | xfs_has_bigtime(ip->i_mount) && | |
203 | !xfs_inode_has_bigtime(ip)) { | |
204 | ip->i_diflags2 |= XFS_DIFLAG2_BIGTIME; | |
205 | flags |= XFS_ILOG_CORE; | |
206 | } | |
207 | ||
208 | /* | |
209 | * Inode verifiers do not check that the extent size hint is an integer | |
210 | * multiple of the rt extent size on a directory with both rtinherit | |
211 | * and extszinherit flags set. If we're logging a directory that is | |
212 | * misconfigured in this way, clear the hint. | |
213 | */ | |
214 | if ((ip->i_diflags & XFS_DIFLAG_RTINHERIT) && | |
215 | (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) && | |
216 | (ip->i_extsize % ip->i_mount->m_sb.sb_rextsize) > 0) { | |
217 | ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE | | |
218 | XFS_DIFLAG_EXTSZINHERIT); | |
219 | ip->i_extsize = 0; | |
220 | flags |= XFS_ILOG_CORE; | |
221 | } | |
222 | ||
223 | /* | |
224 | * Record the specific change for fdatasync optimisation. This allows | |
225 | * fdatasync to skip log forces for inodes that are only timestamp | |
226 | * dirty. Once we've processed the XFS_ILOG_IVERSION flag, convert it | |
227 | * to XFS_ILOG_CORE so that the actual on-disk dirty tracking | |
228 | * (ili_fields) correctly tracks that the version has changed. | |
229 | */ | |
230 | spin_lock(&iip->ili_lock); | |
231 | iip->ili_fsync_fields |= (flags & ~XFS_ILOG_IVERSION); | |
232 | if (flags & XFS_ILOG_IVERSION) | |
233 | flags = ((flags & ~XFS_ILOG_IVERSION) | XFS_ILOG_CORE); | |
234 | ||
235 | if (!iip->ili_item.li_buf) { | |
236 | struct xfs_buf *bp; | |
237 | int error; | |
238 | ||
239 | /* | |
240 | * We hold the ILOCK here, so this inode is not going to be | |
241 | * flushed while we are here. Further, because there is no | |
242 | * buffer attached to the item, we know that there is no IO in | |
243 | * progress, so nothing will clear the ili_fields while we read | |
244 | * in the buffer. Hence we can safely drop the spin lock and | |
245 | * read the buffer knowing that the state will not change from | |
246 | * here. | |
247 | */ | |
248 | spin_unlock(&iip->ili_lock); | |
249 | error = xfs_imap_to_bp(ip->i_mount, tp, &ip->i_imap, &bp); | |
250 | if (error) | |
251 | return error; | |
252 | ||
253 | /* | |
254 | * We need an explicit buffer reference for the log item but | |
255 | * don't want the buffer to remain attached to the transaction. | |
256 | * Hold the buffer but release the transaction reference once | |
257 | * we've attached the inode log item to the buffer log item | |
258 | * list. | |
259 | */ | |
260 | xfs_buf_hold(bp); | |
261 | spin_lock(&iip->ili_lock); | |
262 | iip->ili_item.li_buf = bp; | |
263 | bp->b_flags |= _XBF_INODES; | |
264 | list_add_tail(&iip->ili_item.li_bio_list, &bp->b_li_list); | |
265 | xfs_trans_brelse(tp, bp); | |
266 | } | |
267 | ||
268 | /* | |
269 | * Always OR in the bits from the ili_last_fields field. This is to | |
270 | * coordinate with the xfs_iflush() and xfs_buf_inode_iodone() routines | |
271 | * in the eventual clearing of the ili_fields bits. See the big comment | |
272 | * in xfs_iflush() for an explanation of this coordination mechanism. | |
273 | */ | |
274 | iip->ili_fields |= (flags | iip->ili_last_fields); | |
275 | spin_unlock(&iip->ili_lock); | |
276 | ||
277 | /* | |
278 | * We are done with the log item transaction dirty state, so clear it so | |
279 | * that it doesn't pollute future transactions. | |
280 | */ | |
281 | iip->ili_dirty_flags = 0; | |
282 | return 0; | |
283 | } | |
284 | ||
05a3a389 | 285 | static const struct xfs_item_ops xfs_inode_item_ops = { |
a565e345 DC |
286 | .iop_sort = xfs_inode_item_sort, |
287 | .iop_precommit = xfs_inode_item_precommit, | |
05a3a389 DW |
288 | }; |
289 | ||
2bd0ea18 NS |
290 | /* |
291 | * Initialize the inode log item for a newly allocated (in-core) inode. | |
292 | */ | |
293 | void | |
294 | xfs_inode_item_init( | |
ed8d09e1 CH |
295 | xfs_inode_t *ip, |
296 | xfs_mount_t *mp) | |
2bd0ea18 | 297 | { |
ed8d09e1 | 298 | struct xfs_inode_log_item *iip; |
2bd0ea18 NS |
299 | |
300 | ASSERT(ip->i_itemp == NULL); | |
2e1394fc | 301 | iip = ip->i_itemp = kmem_cache_zalloc(xfs_ili_cache, 0); |
2bd0ea18 NS |
302 | #ifdef LI_DEBUG |
303 | fprintf(stderr, "inode_item_init for inode %llu, iip=%p\n", | |
304 | ip->i_ino, iip); | |
305 | #endif | |
306 | ||
686bddf9 DC |
307 | spin_lock_init(&iip->ili_lock); |
308 | ||
05a3a389 DW |
309 | xfs_log_item_init(mp, &iip->ili_item, XFS_LI_INODE, |
310 | &xfs_inode_item_ops); | |
2bd0ea18 | 311 | iip->ili_inode = ip; |
2bd0ea18 | 312 | } |