]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - libxfs/xfs_defer.c
xfsprogs: Release v6.7.0
[thirdparty/xfsprogs-dev.git] / libxfs / xfs_defer.c
CommitLineData
37b3b4d6 1// SPDX-License-Identifier: GPL-2.0+
a18e1f79
DW
2/*
3 * Copyright (C) 2016 Oracle. All Rights Reserved.
a18e1f79 4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
a18e1f79
DW
5 */
6#include "libxfs_priv.h"
7#include "xfs_fs.h"
8#include "xfs_shared.h"
2cf10e4c 9#include "xfs_format.h"
a18e1f79 10#include "xfs_log_format.h"
2cf10e4c
ES
11#include "xfs_trans_resv.h"
12#include "xfs_mount.h"
a18e1f79
DW
13#include "xfs_defer.h"
14#include "xfs_trans.h"
2cf10e4c 15#include "xfs_inode.h"
a18e1f79 16#include "xfs_trace.h"
1577541c
DW
17#include "xfs_rmap.h"
18#include "xfs_refcount.h"
19#include "xfs_bmap.h"
7d84b02d 20#include "xfs_alloc.h"
cb787289
DC
21#include "xfs_da_format.h"
22#include "xfs_da_btree.h"
6bcbc244 23#include "xfs_attr.h"
1577541c
DW
24
25static struct kmem_cache *xfs_defer_pending_cache;
a18e1f79
DW
26
27/*
28 * Deferred Operations in XFS
29 *
30 * Due to the way locking rules work in XFS, certain transactions (block
31 * mapping and unmapping, typically) have permanent reservations so that
32 * we can roll the transaction to adhere to AG locking order rules and
33 * to unlock buffers between metadata updates. Prior to rmap/reflink,
34 * the mapping code had a mechanism to perform these deferrals for
35 * extents that were going to be freed; this code makes that facility
36 * more generic.
37 *
38 * When adding the reverse mapping and reflink features, it became
39 * necessary to perform complex remapping multi-transactions to comply
40 * with AG locking order rules, and to be able to spread a single
41 * refcount update operation (an operation on an n-block extent can
42 * update as many as n records!) among multiple transactions. XFS can
43 * roll a transaction to facilitate this, but using this facility
44 * requires us to log "intent" items in case log recovery needs to
45 * redo the operation, and to log "done" items to indicate that redo
46 * is not necessary.
47 *
48 * Deferred work is tracked in xfs_defer_pending items. Each pending
49 * item tracks one type of deferred work. Incoming work items (which
50 * have not yet had an intent logged) are attached to a pending item
51 * on the dop_intake list, where they wait for the caller to finish
52 * the deferred operations.
53 *
54 * Finishing a set of deferred operations is an involved process. To
55 * start, we define "rolling a deferred-op transaction" as follows:
56 *
57 * > For each xfs_defer_pending item on the dop_intake list,
58 * - Sort the work items in AG order. XFS locking
59 * order rules require us to lock buffers in AG order.
60 * - Create a log intent item for that type.
61 * - Attach it to the pending item.
62 * - Move the pending item from the dop_intake list to the
63 * dop_pending list.
64 * > Roll the transaction.
65 *
66 * NOTE: To avoid exceeding the transaction reservation, we limit the
67 * number of items that we attach to a given xfs_defer_pending.
68 *
69 * The actual finishing process looks like this:
70 *
71 * > For each xfs_defer_pending in the dop_pending list,
72 * - Roll the deferred-op transaction as above.
73 * - Create a log done item for that type, and attach it to the
74 * log intent item.
75 * - For each work item attached to the log intent item,
76 * * Perform the described action.
77 * * Attach the work item to the log done item.
0590692e
DW
78 * * If the result of doing the work was -EAGAIN, ->finish work
79 * wants a new transaction. See the "Requesting a Fresh
80 * Transaction while Finishing Deferred Work" section below for
81 * details.
a18e1f79
DW
82 *
83 * The key here is that we must log an intent item for all pending
84 * work items every time we roll the transaction, and that we must log
85 * a done item as soon as the work is completed. With this mechanism
86 * we can perform complex remapping operations, chaining intent items
87 * as needed.
88 *
0590692e
DW
89 * Requesting a Fresh Transaction while Finishing Deferred Work
90 *
91 * If ->finish_item decides that it needs a fresh transaction to
92 * finish the work, it must ask its caller (xfs_defer_finish) for a
93 * continuation. The most likely cause of this circumstance are the
94 * refcount adjust functions deciding that they've logged enough items
95 * to be at risk of exceeding the transaction reservation.
96 *
97 * To get a fresh transaction, we want to log the existing log done
98 * item to prevent the log intent item from replaying, immediately log
99 * a new log intent item with the unfinished work items, roll the
100 * transaction, and re-call ->finish_item wherever it left off. The
101 * log done item and the new log intent item must be in the same
102 * transaction or atomicity cannot be guaranteed; defer_finish ensures
103 * that this happens.
104 *
105 * This requires some coordination between ->finish_item and
106 * defer_finish. Upon deciding to request a new transaction,
107 * ->finish_item should update the current work item to reflect the
108 * unfinished work. Next, it should reset the log done item's list
109 * count to the number of items finished, and return -EAGAIN.
110 * defer_finish sees the -EAGAIN, logs the new log intent item
111 * with the remaining work items, and leaves the xfs_defer_pending
112 * item at the head of the dop_work queue. Then it rolls the
113 * transaction and picks up processing where it left off. It is
114 * required that ->finish_item must be careful to leave enough
115 * transaction reservation to fit the new log intent item.
116 *
a18e1f79
DW
117 * This is an example of remapping the extent (E, E+B) into file X at
118 * offset A and dealing with the extent (C, C+B) already being mapped
119 * there:
120 * +-------------------------------------------------+
121 * | Unmap file X startblock C offset A length B | t0
122 * | Intent to reduce refcount for extent (C, B) |
123 * | Intent to remove rmap (X, C, A, B) |
124 * | Intent to free extent (D, 1) (bmbt block) |
125 * | Intent to map (X, A, B) at startblock E |
126 * +-------------------------------------------------+
127 * | Map file X startblock E offset A length B | t1
128 * | Done mapping (X, E, A, B) |
129 * | Intent to increase refcount for extent (E, B) |
130 * | Intent to add rmap (X, E, A, B) |
131 * +-------------------------------------------------+
132 * | Reduce refcount for extent (C, B) | t2
0590692e
DW
133 * | Done reducing refcount for extent (C, 9) |
134 * | Intent to reduce refcount for extent (C+9, B-9) |
135 * | (ran out of space after 9 refcount updates) |
136 * +-------------------------------------------------+
137 * | Reduce refcount for extent (C+9, B+9) | t3
138 * | Done reducing refcount for extent (C+9, B-9) |
a18e1f79
DW
139 * | Increase refcount for extent (E, B) |
140 * | Done increasing refcount for extent (E, B) |
141 * | Intent to free extent (C, B) |
142 * | Intent to free extent (F, 1) (refcountbt block) |
143 * | Intent to remove rmap (F, 1, REFC) |
144 * +-------------------------------------------------+
0590692e 145 * | Remove rmap (X, C, A, B) | t4
a18e1f79
DW
146 * | Done removing rmap (X, C, A, B) |
147 * | Add rmap (X, E, A, B) |
148 * | Done adding rmap (X, E, A, B) |
149 * | Remove rmap (F, 1, REFC) |
150 * | Done removing rmap (F, 1, REFC) |
151 * +-------------------------------------------------+
0590692e 152 * | Free extent (C, B) | t5
a18e1f79
DW
153 * | Done freeing extent (C, B) |
154 * | Free extent (D, 1) |
155 * | Done freeing extent (D, 1) |
156 * | Free extent (F, 1) |
157 * | Done freeing extent (F, 1) |
158 * +-------------------------------------------------+
159 *
160 * If we should crash before t2 commits, log recovery replays
161 * the following intent items:
162 *
163 * - Intent to reduce refcount for extent (C, B)
164 * - Intent to remove rmap (X, C, A, B)
165 * - Intent to free extent (D, 1) (bmbt block)
166 * - Intent to increase refcount for extent (E, B)
167 * - Intent to add rmap (X, E, A, B)
168 *
169 * In the process of recovering, it should also generate and take care
170 * of these intent items:
171 *
172 * - Intent to free extent (C, B)
173 * - Intent to free extent (F, 1) (refcountbt block)
174 * - Intent to remove rmap (F, 1, REFC)
0590692e
DW
175 *
176 * Note that the continuation requested between t2 and t3 is likely to
177 * reoccur.
a18e1f79
DW
178 */
179
29ce8c42
DW
180static const struct xfs_defer_op_type *defer_op_types[] = {
181 [XFS_DEFER_OPS_TYPE_BMAP] = &xfs_bmap_update_defer_type,
182 [XFS_DEFER_OPS_TYPE_REFCOUNT] = &xfs_refcount_update_defer_type,
183 [XFS_DEFER_OPS_TYPE_RMAP] = &xfs_rmap_update_defer_type,
184 [XFS_DEFER_OPS_TYPE_FREE] = &xfs_extent_free_defer_type,
185 [XFS_DEFER_OPS_TYPE_AGFL_FREE] = &xfs_agfl_free_defer_type,
c6ad4bc1 186 [XFS_DEFER_OPS_TYPE_ATTR] = &xfs_attr_defer_type,
29ce8c42 187};
a18e1f79 188
128ba9ce
DW
189/*
190 * Ensure there's a log intent item associated with this deferred work item if
191 * the operation must be restarted on crash. Returns 1 if there's a log item;
192 * 0 if there isn't; or a negative errno.
193 */
194static int
fa8a37b6
CH
195xfs_defer_create_intent(
196 struct xfs_trans *tp,
197 struct xfs_defer_pending *dfp,
198 bool sort)
199{
200 const struct xfs_defer_op_type *ops = defer_op_types[dfp->dfp_type];
128ba9ce 201 struct xfs_log_item *lip;
fa8a37b6 202
128ba9ce
DW
203 if (dfp->dfp_intent)
204 return 1;
205
206 lip = ops->create_intent(tp, &dfp->dfp_work, dfp->dfp_count, sort);
207 if (!lip)
208 return 0;
209 if (IS_ERR(lip))
210 return PTR_ERR(lip);
211
212 dfp->dfp_intent = lip;
213 return 1;
fa8a37b6
CH
214}
215
a18e1f79
DW
216/*
217 * For each pending item in the intake list, log its intent item and the
218 * associated extents, then add the entire intake list to the end of
219 * the pending list.
128ba9ce
DW
220 *
221 * Returns 1 if at least one log item was associated with the deferred work;
222 * 0 if there are no log items; or a negative errno.
a18e1f79 223 */
128ba9ce 224static int
3e1da129 225xfs_defer_create_intents(
1e6b0a71 226 struct xfs_trans *tp)
a18e1f79 227{
a18e1f79 228 struct xfs_defer_pending *dfp;
128ba9ce 229 int ret = 0;
a18e1f79 230
92a8736e 231 list_for_each_entry(dfp, &tp->t_dfops, dfp_list) {
128ba9ce
DW
232 int ret2;
233
3e1da129 234 trace_xfs_defer_create_intent(tp->t_mountp, dfp);
128ba9ce
DW
235 ret2 = xfs_defer_create_intent(tp, dfp, true);
236 if (ret2 < 0)
237 return ret2;
238 ret |= ret2;
a18e1f79 239 }
93d8bb2f 240 return ret;
a18e1f79
DW
241}
242
a18e1f79 243STATIC void
bb98a33c
LL
244xfs_defer_pending_abort(
245 struct xfs_mount *mp,
246 struct list_head *dop_list)
a18e1f79
DW
247{
248 struct xfs_defer_pending *dfp;
c3514397 249 const struct xfs_defer_op_type *ops;
a18e1f79 250
077dd509 251 /* Abort intent items that don't have a done item. */
bb98a33c 252 list_for_each_entry(dfp, dop_list, dfp_list) {
c3514397 253 ops = defer_op_types[dfp->dfp_type];
bb98a33c 254 trace_xfs_defer_pending_abort(mp, dfp);
077dd509 255 if (dfp->dfp_intent && !dfp->dfp_done) {
c3514397 256 ops->abort_intent(dfp->dfp_intent);
077dd509
DW
257 dfp->dfp_intent = NULL;
258 }
a18e1f79 259 }
a18e1f79
DW
260}
261
bb98a33c
LL
262/* Abort all the intents that were committed. */
263STATIC void
264xfs_defer_trans_abort(
265 struct xfs_trans *tp,
266 struct list_head *dop_pending)
267{
268 trace_xfs_defer_trans_abort(tp, _RET_IP_);
269 xfs_defer_pending_abort(tp->t_mountp, dop_pending);
270}
271
b445674e
DW
272/*
273 * Capture resources that the caller said not to release ("held") when the
274 * transaction commits. Caller is responsible for zero-initializing @dres.
275 */
276static int
277xfs_defer_save_resources(
278 struct xfs_defer_resources *dres,
279 struct xfs_trans *tp)
a18e1f79 280{
f2ea6bf0 281 struct xfs_buf_log_item *bli;
c0860494 282 struct xfs_inode_log_item *ili;
f2ea6bf0 283 struct xfs_log_item *lip;
a18e1f79 284
b445674e 285 BUILD_BUG_ON(NBBY * sizeof(dres->dr_ordered) < XFS_DEFER_OPS_NR_BUFS);
e30de1a1 286
3e1da129 287 list_for_each_entry(lip, &tp->t_items, li_trans) {
f2ea6bf0
BF
288 switch (lip->li_type) {
289 case XFS_LI_BUF:
290 bli = container_of(lip, struct xfs_buf_log_item,
291 bli_item);
292 if (bli->bli_flags & XFS_BLI_HOLD) {
b445674e 293 if (dres->dr_bufs >= XFS_DEFER_OPS_NR_BUFS) {
f2ea6bf0
BF
294 ASSERT(0);
295 return -EFSCORRUPTED;
296 }
e30de1a1 297 if (bli->bli_flags & XFS_BLI_ORDERED)
b445674e
DW
298 dres->dr_ordered |=
299 (1U << dres->dr_bufs);
e30de1a1
DW
300 else
301 xfs_trans_dirty_buf(tp, bli->bli_buf);
b445674e 302 dres->dr_bp[dres->dr_bufs++] = bli->bli_buf;
f2ea6bf0
BF
303 }
304 break;
c0860494
BF
305 case XFS_LI_INODE:
306 ili = container_of(lip, struct xfs_inode_log_item,
307 ili_item);
308 if (ili->ili_lock_flags == 0) {
b445674e 309 if (dres->dr_inos >= XFS_DEFER_OPS_NR_INODES) {
c0860494
BF
310 ASSERT(0);
311 return -EFSCORRUPTED;
312 }
3e1da129 313 xfs_trans_log_inode(tp, ili->ili_inode,
c0860494 314 XFS_ILOG_CORE);
b445674e 315 dres->dr_ip[dres->dr_inos++] = ili->ili_inode;
c0860494
BF
316 }
317 break;
f2ea6bf0
BF
318 default:
319 break;
320 }
321 }
6f67c32d 322
b445674e
DW
323 return 0;
324}
325
326/* Attach the held resources to the transaction. */
327static void
328xfs_defer_restore_resources(
329 struct xfs_trans *tp,
330 struct xfs_defer_resources *dres)
331{
332 unsigned short i;
333
334 /* Rejoin the joined inodes. */
335 for (i = 0; i < dres->dr_inos; i++)
336 xfs_trans_ijoin(tp, dres->dr_ip[i], 0);
337
338 /* Rejoin the buffers and dirty them so the log moves forward. */
339 for (i = 0; i < dres->dr_bufs; i++) {
340 xfs_trans_bjoin(tp, dres->dr_bp[i]);
341 if (dres->dr_ordered & (1U << i))
342 xfs_trans_ordered_buf(tp, dres->dr_bp[i]);
343 xfs_trans_bhold(tp, dres->dr_bp[i]);
344 }
345}
346
347/* Roll a transaction so we can do some deferred op processing. */
348STATIC int
349xfs_defer_trans_roll(
350 struct xfs_trans **tpp)
351{
352 struct xfs_defer_resources dres = { };
353 int error;
354
355 error = xfs_defer_save_resources(&dres, *tpp);
356 if (error)
357 return error;
358
359 trace_xfs_defer_trans_roll(*tpp, _RET_IP_);
30ad7d6e 360
9833c1a4
DW
361 /*
362 * Roll the transaction. Rolling always given a new transaction (even
363 * if committing the old one fails!) to hand back to the caller, so we
364 * join the held resources to the new transaction so that we always
365 * return with the held resources joined to @tpp, no matter what
366 * happened.
367 */
3e1da129 368 error = xfs_trans_roll(tpp);
a18e1f79 369
b445674e 370 xfs_defer_restore_resources(*tpp, &dres);
6f67c32d 371
9833c1a4 372 if (error)
b445674e 373 trace_xfs_defer_trans_roll_error(*tpp, error);
a18e1f79
DW
374 return error;
375}
376
3e1da129
BF
377/*
378 * Free up any items left in the list.
379 */
380static void
381xfs_defer_cancel_list(
382 struct xfs_mount *mp,
383 struct list_head *dop_list)
384{
385 struct xfs_defer_pending *dfp;
386 struct xfs_defer_pending *pli;
387 struct list_head *pwi;
388 struct list_head *n;
c3514397 389 const struct xfs_defer_op_type *ops;
3e1da129
BF
390
391 /*
392 * Free the pending items. Caller should already have arranged
393 * for the intent items to be released.
394 */
395 list_for_each_entry_safe(dfp, pli, dop_list, dfp_list) {
c3514397 396 ops = defer_op_types[dfp->dfp_type];
3e1da129
BF
397 trace_xfs_defer_cancel_list(mp, dfp);
398 list_del(&dfp->dfp_list);
399 list_for_each_safe(pwi, n, &dfp->dfp_work) {
400 list_del(pwi);
401 dfp->dfp_count--;
7cb26322 402 trace_xfs_defer_cancel_item(mp, dfp, pwi);
c3514397 403 ops->cancel_item(pwi);
3e1da129
BF
404 }
405 ASSERT(dfp->dfp_count == 0);
1577541c 406 kmem_cache_free(xfs_defer_pending_cache, dfp);
3e1da129
BF
407 }
408}
409
1826a6b0
DW
410/*
411 * Prevent a log intent item from pinning the tail of the log by logging a
412 * done item to release the intent item; and then log a new intent item.
413 * The caller should provide a fresh transaction and roll it after we're done.
414 */
415static int
416xfs_defer_relog(
417 struct xfs_trans **tpp,
418 struct list_head *dfops)
419{
e49ec9ed 420 struct xlog *log = (*tpp)->t_mountp->m_log;
1826a6b0 421 struct xfs_defer_pending *dfp;
e49ec9ed
DW
422 xfs_lsn_t threshold_lsn = NULLCOMMITLSN;
423
1826a6b0
DW
424
425 ASSERT((*tpp)->t_flags & XFS_TRANS_PERM_LOG_RES);
426
427 list_for_each_entry(dfp, dfops, dfp_list) {
428 /*
429 * If the log intent item for this deferred op is not a part of
430 * the current log checkpoint, relog the intent item to keep
431 * the log tail moving forward. We're ok with this being racy
432 * because an incorrect decision means we'll be a little slower
433 * at pushing the tail.
434 */
435 if (dfp->dfp_intent == NULL ||
436 xfs_log_item_in_current_chkpt(dfp->dfp_intent))
437 continue;
438
e49ec9ed
DW
439 /*
440 * Figure out where we need the tail to be in order to maintain
441 * the minimum required free space in the log. Only sample
442 * the log threshold once per call.
443 */
444 if (threshold_lsn == NULLCOMMITLSN) {
445 threshold_lsn = xlog_grant_push_threshold(log, 0);
446 if (threshold_lsn == NULLCOMMITLSN)
447 break;
448 }
449 if (XFS_LSN_CMP(dfp->dfp_intent->li_lsn, threshold_lsn) >= 0)
450 continue;
451
1826a6b0
DW
452 trace_xfs_defer_relog_intent((*tpp)->t_mountp, dfp);
453 XFS_STATS_INC((*tpp)->t_mountp, defer_relog);
454 dfp->dfp_intent = xfs_trans_item_relog(dfp->dfp_intent, *tpp);
455 }
456
457 if ((*tpp)->t_flags & XFS_TRANS_DIRTY)
458 return xfs_defer_trans_roll(tpp);
459 return 0;
460}
461
ed0873e6
CH
462/*
463 * Log an intent-done item for the first pending intent, and finish the work
464 * items.
465 */
466static int
467xfs_defer_finish_one(
468 struct xfs_trans *tp,
469 struct xfs_defer_pending *dfp)
470{
471 const struct xfs_defer_op_type *ops = defer_op_types[dfp->dfp_type];
4371b480 472 struct xfs_btree_cur *state = NULL;
ed0873e6
CH
473 struct list_head *li, *n;
474 int error;
475
476 trace_xfs_defer_pending_finish(tp->t_mountp, dfp);
477
478 dfp->dfp_done = ops->create_done(tp, dfp->dfp_intent, dfp->dfp_count);
479 list_for_each_safe(li, n, &dfp->dfp_work) {
480 list_del(li);
481 dfp->dfp_count--;
7cb26322 482 trace_xfs_defer_finish_item(tp->t_mountp, dfp, li);
95e01274 483 error = ops->finish_item(tp, dfp->dfp_done, li, &state);
ed0873e6 484 if (error == -EAGAIN) {
128ba9ce
DW
485 int ret;
486
ed0873e6
CH
487 /*
488 * Caller wants a fresh transaction; put the work item
489 * back on the list and log a new log intent item to
490 * replace the old one. See "Requesting a Fresh
491 * Transaction while Finishing Deferred Work" above.
492 */
493 list_add(li, &dfp->dfp_work);
494 dfp->dfp_count++;
495 dfp->dfp_done = NULL;
953cc24b 496 dfp->dfp_intent = NULL;
128ba9ce
DW
497 ret = xfs_defer_create_intent(tp, dfp, false);
498 if (ret < 0)
499 error = ret;
ed0873e6
CH
500 }
501
502 if (error)
503 goto out;
504 }
505
506 /* Done with the dfp, free it. */
507 list_del(&dfp->dfp_list);
1577541c 508 kmem_cache_free(xfs_defer_pending_cache, dfp);
ed0873e6
CH
509out:
510 if (ops->finish_cleanup)
511 ops->finish_cleanup(tp, state, error);
512 return error;
513}
514
a18e1f79
DW
515/*
516 * Finish all the pending work. This involves logging intent items for
517 * any work items that wandered in since the last transaction roll (if
518 * one has even happened), rolling the transaction, and finishing the
519 * work items in the first item on the logged-and-pending list.
520 *
521 * If an inode is provided, relog it to the new transaction.
522 */
523int
ca7e896f 524xfs_defer_finish_noroll(
ac0a2228 525 struct xfs_trans **tp)
a18e1f79 526{
93d8bb2f 527 struct xfs_defer_pending *dfp = NULL;
a18e1f79 528 int error = 0;
3e1da129 529 LIST_HEAD(dop_pending);
a18e1f79
DW
530
531 ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
532
92a8736e 533 trace_xfs_defer_finish(*tp, _RET_IP_);
30ad7d6e 534
a18e1f79 535 /* Until we run out of pending work to finish... */
92a8736e 536 while (!list_empty(&dop_pending) || !list_empty(&(*tp)->t_dfops)) {
8c7a833f
DW
537 /*
538 * Deferred items that are created in the process of finishing
539 * other deferred work items should be queued at the head of
540 * the pending list, which puts them ahead of the deferred work
541 * that was created by the caller. This keeps the number of
542 * pending work items to a minimum, which decreases the amount
543 * of time that any one intent item can stick around in memory,
544 * pinning the log tail.
545 */
128ba9ce 546 int has_intents = xfs_defer_create_intents(*tp);
93d8bb2f 547
8c7a833f 548 list_splice_init(&(*tp)->t_dfops, &dop_pending);
a18e1f79 549
128ba9ce
DW
550 if (has_intents < 0) {
551 error = has_intents;
552 goto out_shutdown;
553 }
93d8bb2f
DC
554 if (has_intents || dfp) {
555 error = xfs_defer_trans_roll(tp);
556 if (error)
557 goto out_shutdown;
1826a6b0 558
93d8bb2f
DC
559 /* Relog intent items to keep the log moving. */
560 error = xfs_defer_relog(tp, &dop_pending);
561 if (error)
562 goto out_shutdown;
563 }
a18e1f79 564
3e1da129
BF
565 dfp = list_first_entry(&dop_pending, struct xfs_defer_pending,
566 dfp_list);
ed0873e6
CH
567 error = xfs_defer_finish_one(*tp, dfp);
568 if (error && error != -EAGAIN)
569 goto out_shutdown;
9f5a828b 570 }
1a3fd2f9 571
92a8736e 572 trace_xfs_defer_finish_done(*tp, _RET_IP_);
9f5a828b 573 return 0;
ed0873e6
CH
574
575out_shutdown:
576 xfs_defer_trans_abort(*tp, &dop_pending);
577 xfs_force_shutdown((*tp)->t_mountp, SHUTDOWN_CORRUPT_INCORE);
578 trace_xfs_defer_finish_error(*tp, error);
579 xfs_defer_cancel_list((*tp)->t_mountp, &dop_pending);
580 xfs_defer_cancel(*tp);
581 return error;
a18e1f79
DW
582}
583
ca7e896f
BF
584int
585xfs_defer_finish(
586 struct xfs_trans **tp)
587{
588 int error;
589
590 /*
591 * Finish and roll the transaction once more to avoid returning to the
592 * caller with a dirty transaction.
593 */
594 error = xfs_defer_finish_noroll(tp);
595 if (error)
596 return error;
597 if ((*tp)->t_flags & XFS_TRANS_DIRTY) {
598 error = xfs_defer_trans_roll(tp);
3e1da129
BF
599 if (error) {
600 xfs_force_shutdown((*tp)->t_mountp,
601 SHUTDOWN_CORRUPT_INCORE);
ca7e896f 602 return error;
3e1da129 603 }
ca7e896f 604 }
84347143
DW
605
606 /* Reset LOWMODE now that we've finished all the dfops. */
607 ASSERT(list_empty(&(*tp)->t_dfops));
608 (*tp)->t_flags &= ~XFS_TRANS_LOWMODE;
ca7e896f
BF
609 return 0;
610}
611
a18e1f79 612void
22913550 613xfs_defer_cancel(
3e1da129 614 struct xfs_trans *tp)
a18e1f79 615{
3e1da129 616 struct xfs_mount *mp = tp->t_mountp;
a18e1f79 617
92a8736e
BF
618 trace_xfs_defer_cancel(tp, _RET_IP_);
619 xfs_defer_cancel_list(mp, &tp->t_dfops);
a18e1f79
DW
620}
621
622/* Add an item for later deferred processing. */
623void
624xfs_defer_add(
21375e5d 625 struct xfs_trans *tp,
a18e1f79
DW
626 enum xfs_defer_ops_type type,
627 struct list_head *li)
628{
629 struct xfs_defer_pending *dfp = NULL;
7cb26322 630 const struct xfs_defer_op_type *ops = defer_op_types[type];
a18e1f79 631
21375e5d 632 ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
29ce8c42 633 BUILD_BUG_ON(ARRAY_SIZE(defer_op_types) != XFS_DEFER_OPS_TYPE_MAX);
21375e5d 634
a18e1f79
DW
635 /*
636 * Add the item to a pending item at the end of the intake list.
637 * If the last pending item has the same type, reuse it. Else,
638 * create a new pending item at the end of the intake list.
639 */
92a8736e
BF
640 if (!list_empty(&tp->t_dfops)) {
641 dfp = list_last_entry(&tp->t_dfops,
a18e1f79 642 struct xfs_defer_pending, dfp_list);
c3514397
DW
643 if (dfp->dfp_type != type ||
644 (ops->max_items && dfp->dfp_count >= ops->max_items))
a18e1f79
DW
645 dfp = NULL;
646 }
647 if (!dfp) {
1577541c
DW
648 dfp = kmem_cache_zalloc(xfs_defer_pending_cache,
649 GFP_NOFS | __GFP_NOFAIL);
c3514397 650 dfp->dfp_type = type;
a18e1f79 651 dfp->dfp_intent = NULL;
11d87237 652 dfp->dfp_done = NULL;
a18e1f79
DW
653 dfp->dfp_count = 0;
654 INIT_LIST_HEAD(&dfp->dfp_work);
92a8736e 655 list_add_tail(&dfp->dfp_list, &tp->t_dfops);
a18e1f79
DW
656 }
657
658 list_add_tail(li, &dfp->dfp_work);
7cb26322 659 trace_xfs_defer_add_item(tp->t_mountp, dfp, li);
a18e1f79
DW
660 dfp->dfp_count++;
661}
662
f5f4497f 663/*
92a8736e
BF
664 * Move deferred ops from one transaction to another and reset the source to
665 * initial state. This is primarily used to carry state forward across
666 * transaction rolls with pending dfops.
f5f4497f
BF
667 */
668void
669xfs_defer_move(
76a3c33d
BF
670 struct xfs_trans *dtp,
671 struct xfs_trans *stp)
f5f4497f 672{
92a8736e 673 list_splice_init(&stp->t_dfops, &dtp->t_dfops);
f5f4497f 674
565e96c6
BF
675 /*
676 * Low free space mode was historically controlled by a dfops field.
677 * This meant that low mode state potentially carried across multiple
678 * transaction rolls. Transfer low mode on a dfops move to preserve
679 * that behavior.
680 */
681 dtp->t_flags |= (stp->t_flags & XFS_TRANS_LOWMODE);
84347143 682 stp->t_flags &= ~XFS_TRANS_LOWMODE;
f5f4497f 683}
953cc24b
DW
684
685/*
686 * Prepare a chain of fresh deferred ops work items to be completed later. Log
687 * recovery requires the ability to put off until later the actual finishing
688 * work so that it can process unfinished items recovered from the log in
689 * correct order.
690 *
691 * Create and log intent items for all the work that we're capturing so that we
692 * can be assured that the items will get replayed if the system goes down
b1d41c43
DW
693 * before log recovery gets a chance to finish the work it put off. The entire
694 * deferred ops state is transferred to the capture structure and the
695 * transaction is then ready for the caller to commit it. If there are no
696 * intent items to capture, this function returns NULL.
50edfee5
DW
697 *
698 * If capture_ip is not NULL, the capture structure will obtain an extra
699 * reference to the inode.
953cc24b 700 */
b1d41c43
DW
701static struct xfs_defer_capture *
702xfs_defer_ops_capture(
7d6b86ac 703 struct xfs_trans *tp)
b1d41c43
DW
704{
705 struct xfs_defer_capture *dfc;
7d6b86ac
DW
706 unsigned short i;
707 int error;
b1d41c43
DW
708
709 if (list_empty(&tp->t_dfops))
710 return NULL;
711
128ba9ce
DW
712 error = xfs_defer_create_intents(tp);
713 if (error < 0)
714 return ERR_PTR(error);
715
b1d41c43
DW
716 /* Create an object to capture the defer ops. */
717 dfc = kmem_zalloc(sizeof(*dfc), KM_NOFS);
718 INIT_LIST_HEAD(&dfc->dfc_list);
719 INIT_LIST_HEAD(&dfc->dfc_dfops);
720
b1d41c43
DW
721 /* Move the dfops chain and transaction state to the capture struct. */
722 list_splice_init(&tp->t_dfops, &dfc->dfc_dfops);
723 dfc->dfc_tpflags = tp->t_flags & XFS_TRANS_LOWMODE;
724 tp->t_flags &= ~XFS_TRANS_LOWMODE;
725
75d8bf7e
DW
726 /* Capture the remaining block reservations along with the dfops. */
727 dfc->dfc_blkres = tp->t_blk_res - tp->t_blk_res_used;
728 dfc->dfc_rtxres = tp->t_rtx_res - tp->t_rtx_res_used;
729
bb6667dc
DW
730 /* Preserve the log reservation size. */
731 dfc->dfc_logres = tp->t_log_res;
732
7d6b86ac
DW
733 error = xfs_defer_save_resources(&dfc->dfc_held, tp);
734 if (error) {
735 /*
736 * Resource capture should never fail, but if it does, we
737 * still have to shut down the log and release things
738 * properly.
739 */
740 xfs_force_shutdown(tp->t_mountp, SHUTDOWN_CORRUPT_INCORE);
741 }
742
50edfee5 743 /*
7d6b86ac
DW
744 * Grab extra references to the inodes and buffers because callers are
745 * expected to release their held references after we commit the
746 * transaction.
50edfee5 747 */
7d6b86ac
DW
748 for (i = 0; i < dfc->dfc_held.dr_inos; i++) {
749 ASSERT(xfs_isilocked(dfc->dfc_held.dr_ip[i], XFS_ILOCK_EXCL));
750 ihold(VFS_I(dfc->dfc_held.dr_ip[i]));
50edfee5
DW
751 }
752
7d6b86ac
DW
753 for (i = 0; i < dfc->dfc_held.dr_bufs; i++)
754 xfs_buf_hold(dfc->dfc_held.dr_bp[i]);
755
b1d41c43
DW
756 return dfc;
757}
758
759/* Release all resources that we used to capture deferred ops. */
953cc24b 760void
97165047 761xfs_defer_ops_capture_abort(
b1d41c43
DW
762 struct xfs_mount *mp,
763 struct xfs_defer_capture *dfc)
764{
7d6b86ac
DW
765 unsigned short i;
766
97165047 767 xfs_defer_pending_abort(mp, &dfc->dfc_dfops);
b1d41c43 768 xfs_defer_cancel_list(mp, &dfc->dfc_dfops);
7d6b86ac
DW
769
770 for (i = 0; i < dfc->dfc_held.dr_bufs; i++)
771 xfs_buf_relse(dfc->dfc_held.dr_bp[i]);
772
773 for (i = 0; i < dfc->dfc_held.dr_inos; i++)
774 xfs_irele(dfc->dfc_held.dr_ip[i]);
775
b1d41c43
DW
776 kmem_free(dfc);
777}
778
779/*
780 * Capture any deferred ops and commit the transaction. This is the last step
50edfee5
DW
781 * needed to finish a log intent item that we recovered from the log. If any
782 * of the deferred ops operate on an inode, the caller must pass in that inode
783 * so that the reference can be transferred to the capture structure. The
784 * caller must hold ILOCK_EXCL on the inode, and must unlock it before calling
785 * xfs_defer_ops_continue.
b1d41c43
DW
786 */
787int
788xfs_defer_ops_capture_and_commit(
789 struct xfs_trans *tp,
790 struct list_head *capture_list)
791{
792 struct xfs_mount *mp = tp->t_mountp;
793 struct xfs_defer_capture *dfc;
794 int error;
795
796 /* If we don't capture anything, commit transaction and exit. */
7d6b86ac 797 dfc = xfs_defer_ops_capture(tp);
128ba9ce
DW
798 if (IS_ERR(dfc)) {
799 xfs_trans_cancel(tp);
800 return PTR_ERR(dfc);
801 }
b1d41c43
DW
802 if (!dfc)
803 return xfs_trans_commit(tp);
804
805 /* Commit the transaction and add the capture structure to the list. */
806 error = xfs_trans_commit(tp);
807 if (error) {
97165047 808 xfs_defer_ops_capture_abort(mp, dfc);
b1d41c43
DW
809 return error;
810 }
811
812 list_add_tail(&dfc->dfc_list, capture_list);
813 return 0;
814}
815
816/*
817 * Attach a chain of captured deferred ops to a new transaction and free the
50edfee5
DW
818 * capture structure. If an inode was captured, it will be passed back to the
819 * caller with ILOCK_EXCL held and joined to the transaction with lockflags==0.
820 * The caller now owns the inode reference.
b1d41c43
DW
821 */
822void
823xfs_defer_ops_continue(
824 struct xfs_defer_capture *dfc,
50edfee5 825 struct xfs_trans *tp,
7d6b86ac 826 struct xfs_defer_resources *dres)
953cc24b 827{
9c0383ad
AH
828 unsigned int i;
829
b1d41c43
DW
830 ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
831 ASSERT(!(tp->t_flags & XFS_TRANS_DIRTY));
832
9c0383ad 833 /* Lock the captured resources to the new transaction. */
7d6b86ac
DW
834 if (dfc->dfc_held.dr_inos == 2)
835 xfs_lock_two_inodes(dfc->dfc_held.dr_ip[0], XFS_ILOCK_EXCL,
836 dfc->dfc_held.dr_ip[1], XFS_ILOCK_EXCL);
837 else if (dfc->dfc_held.dr_inos == 1)
838 xfs_ilock(dfc->dfc_held.dr_ip[0], XFS_ILOCK_EXCL);
9c0383ad
AH
839
840 for (i = 0; i < dfc->dfc_held.dr_bufs; i++)
841 xfs_buf_lock(dfc->dfc_held.dr_bp[i]);
842
843 /* Join the captured resources to the new transaction. */
7d6b86ac
DW
844 xfs_defer_restore_resources(tp, &dfc->dfc_held);
845 memcpy(dres, &dfc->dfc_held, sizeof(struct xfs_defer_resources));
9c0383ad 846 dres->dr_bufs = 0;
50edfee5 847
b1d41c43
DW
848 /* Move captured dfops chain and state to the transaction. */
849 list_splice_init(&dfc->dfc_dfops, &tp->t_dfops);
850 tp->t_flags |= dfc->dfc_tpflags;
851
852 kmem_free(dfc);
953cc24b 853}
7d6b86ac
DW
854
855/* Release the resources captured and continued during recovery. */
856void
857xfs_defer_resources_rele(
858 struct xfs_defer_resources *dres)
859{
860 unsigned short i;
861
862 for (i = 0; i < dres->dr_inos; i++) {
863 xfs_iunlock(dres->dr_ip[i], XFS_ILOCK_EXCL);
864 xfs_irele(dres->dr_ip[i]);
865 dres->dr_ip[i] = NULL;
866 }
867
868 for (i = 0; i < dres->dr_bufs; i++) {
869 xfs_buf_relse(dres->dr_bp[i]);
870 dres->dr_bp[i] = NULL;
871 }
872
873 dres->dr_inos = 0;
874 dres->dr_bufs = 0;
875 dres->dr_ordered = 0;
876}
1577541c
DW
877
878static inline int __init
879xfs_defer_init_cache(void)
880{
881 xfs_defer_pending_cache = kmem_cache_create("xfs_defer_pending",
882 sizeof(struct xfs_defer_pending),
883 0, 0, NULL);
884
885 return xfs_defer_pending_cache != NULL ? 0 : -ENOMEM;
886}
887
888static inline void
889xfs_defer_destroy_cache(void)
890{
891 kmem_cache_destroy(xfs_defer_pending_cache);
892 xfs_defer_pending_cache = NULL;
893}
894
895/* Set up caches for deferred work items. */
896int __init
897xfs_defer_init_item_caches(void)
898{
899 int error;
900
901 error = xfs_defer_init_cache();
902 if (error)
903 return error;
904 error = xfs_rmap_intent_init_cache();
905 if (error)
906 goto err;
907 error = xfs_refcount_intent_init_cache();
908 if (error)
909 goto err;
910 error = xfs_bmap_intent_init_cache();
7d84b02d
DW
911 if (error)
912 goto err;
913 error = xfs_extfree_intent_init_cache();
3b0ca632
DW
914 if (error)
915 goto err;
916 error = xfs_attr_intent_init_cache();
6bcbc244
AH
917 if (error)
918 goto err;
1577541c
DW
919 return 0;
920err:
921 xfs_defer_destroy_item_caches();
922 return error;
923}
924
925/* Destroy all the deferred work item caches, if they've been allocated. */
926void
927xfs_defer_destroy_item_caches(void)
928{
3b0ca632 929 xfs_attr_intent_destroy_cache();
7d84b02d 930 xfs_extfree_intent_destroy_cache();
1577541c
DW
931 xfs_bmap_intent_destroy_cache();
932 xfs_refcount_intent_destroy_cache();
933 xfs_rmap_intent_destroy_cache();
934 xfs_defer_destroy_cache();
935}