]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - libxfs/xfs_refcount.c
xfs: remove xfs_defer_init() firstblock param
[thirdparty/xfsprogs-dev.git] / libxfs / xfs_refcount.c
CommitLineData
37b3b4d6 1// SPDX-License-Identifier: GPL-2.0+
bc859611
DW
2/*
3 * Copyright (C) 2016 Oracle. All Rights Reserved.
bc859611 4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
bc859611
DW
5 */
6#include "libxfs_priv.h"
7#include "xfs_fs.h"
8#include "xfs_shared.h"
9#include "xfs_format.h"
10#include "xfs_log_format.h"
11#include "xfs_trans_resv.h"
12#include "xfs_sb.h"
13#include "xfs_mount.h"
14#include "xfs_defer.h"
15#include "xfs_btree.h"
16#include "xfs_bmap.h"
17#include "xfs_refcount_btree.h"
18#include "xfs_alloc.h"
56d3fc2b 19#include "xfs_errortag.h"
bc859611
DW
20#include "xfs_trace.h"
21#include "xfs_cksum.h"
22#include "xfs_trans.h"
23#include "xfs_bit.h"
24#include "xfs_refcount.h"
10e65503 25#include "xfs_rmap.h"
bc859611 26
56ef8c33
DW
27/* Allowable refcount adjustment amounts. */
28enum xfs_refc_adjust_op {
29 XFS_REFCOUNT_ADJUST_INCREASE = 1,
30 XFS_REFCOUNT_ADJUST_DECREASE = -1,
10e65503
DW
31 XFS_REFCOUNT_ADJUST_COW_ALLOC = 0,
32 XFS_REFCOUNT_ADJUST_COW_FREE = -1,
56ef8c33
DW
33};
34
10e65503
DW
35STATIC int __xfs_refcount_cow_alloc(struct xfs_btree_cur *rcur,
36 xfs_agblock_t agbno, xfs_extlen_t aglen,
37 struct xfs_defer_ops *dfops);
38STATIC int __xfs_refcount_cow_free(struct xfs_btree_cur *rcur,
39 xfs_agblock_t agbno, xfs_extlen_t aglen,
40 struct xfs_defer_ops *dfops);
41
bc859611
DW
42/*
43 * Look up the first record less than or equal to [bno, len] in the btree
44 * given by cur.
45 */
46int
47xfs_refcount_lookup_le(
48 struct xfs_btree_cur *cur,
49 xfs_agblock_t bno,
50 int *stat)
51{
52 trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_private.a.agno, bno,
53 XFS_LOOKUP_LE);
54 cur->bc_rec.rc.rc_startblock = bno;
55 cur->bc_rec.rc.rc_blockcount = 0;
56 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
57}
58
59/*
60 * Look up the first record greater than or equal to [bno, len] in the btree
61 * given by cur.
62 */
63int
64xfs_refcount_lookup_ge(
65 struct xfs_btree_cur *cur,
66 xfs_agblock_t bno,
67 int *stat)
68{
69 trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_private.a.agno, bno,
70 XFS_LOOKUP_GE);
71 cur->bc_rec.rc.rc_startblock = bno;
72 cur->bc_rec.rc.rc_blockcount = 0;
73 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
74}
75
43e73fb0
DW
76/*
77 * Look up the first record equal to [bno, len] in the btree
78 * given by cur.
79 */
80int
81xfs_refcount_lookup_eq(
82 struct xfs_btree_cur *cur,
83 xfs_agblock_t bno,
84 int *stat)
85{
86 trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_private.a.agno, bno,
87 XFS_LOOKUP_LE);
88 cur->bc_rec.rc.rc_startblock = bno;
89 cur->bc_rec.rc.rc_blockcount = 0;
90 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
91}
92
10e65503 93/* Convert on-disk record to in-core format. */
aa465192 94void
10e65503
DW
95xfs_refcount_btrec_to_irec(
96 union xfs_btree_rec *rec,
97 struct xfs_refcount_irec *irec)
98{
99 irec->rc_startblock = be32_to_cpu(rec->refc.rc_startblock);
100 irec->rc_blockcount = be32_to_cpu(rec->refc.rc_blockcount);
101 irec->rc_refcount = be32_to_cpu(rec->refc.rc_refcount);
102}
103
bc859611
DW
104/*
105 * Get the data from the pointed-to record.
106 */
107int
108xfs_refcount_get_rec(
109 struct xfs_btree_cur *cur,
110 struct xfs_refcount_irec *irec,
111 int *stat)
112{
ec291989
DC
113 struct xfs_mount *mp = cur->bc_mp;
114 xfs_agnumber_t agno = cur->bc_private.a.agno;
10e65503
DW
115 union xfs_btree_rec *rec;
116 int error;
ec291989 117 xfs_agblock_t realstart;
bc859611
DW
118
119 error = xfs_btree_get_rec(cur, &rec, stat);
ec291989
DC
120 if (error || !*stat)
121 return error;
122
123 xfs_refcount_btrec_to_irec(rec, irec);
124
125 agno = cur->bc_private.a.agno;
126 if (irec->rc_blockcount == 0 || irec->rc_blockcount > MAXREFCEXTLEN)
127 goto out_bad_rec;
128
129 /* handle special COW-staging state */
130 realstart = irec->rc_startblock;
131 if (realstart & XFS_REFC_COW_START) {
132 if (irec->rc_refcount != 1)
133 goto out_bad_rec;
134 realstart &= ~XFS_REFC_COW_START;
135 } else if (irec->rc_refcount < 2) {
136 goto out_bad_rec;
bc859611 137 }
ec291989
DC
138
139 /* check for valid extent range, including overflow */
140 if (!xfs_verify_agbno(mp, agno, realstart))
141 goto out_bad_rec;
142 if (realstart > realstart + irec->rc_blockcount)
143 goto out_bad_rec;
144 if (!xfs_verify_agbno(mp, agno, realstart + irec->rc_blockcount - 1))
145 goto out_bad_rec;
146
147 if (irec->rc_refcount == 0 || irec->rc_refcount > MAXREFCOUNT)
148 goto out_bad_rec;
149
150 trace_xfs_refcount_get(cur->bc_mp, cur->bc_private.a.agno, irec);
151 return 0;
152
153out_bad_rec:
154 xfs_warn(mp,
155 "Refcount BTree record corruption in AG %d detected!", agno);
156 xfs_warn(mp,
157 "Start block 0x%x, block count 0x%x, references 0x%x",
158 irec->rc_startblock, irec->rc_blockcount, irec->rc_refcount);
159 return -EFSCORRUPTED;
bc859611
DW
160}
161
162/*
163 * Update the record referred to by cur to the value given
164 * by [bno, len, refcount].
165 * This either works (return 0) or gets an EFSCORRUPTED error.
166 */
167STATIC int
168xfs_refcount_update(
169 struct xfs_btree_cur *cur,
170 struct xfs_refcount_irec *irec)
171{
172 union xfs_btree_rec rec;
173 int error;
174
175 trace_xfs_refcount_update(cur->bc_mp, cur->bc_private.a.agno, irec);
176 rec.refc.rc_startblock = cpu_to_be32(irec->rc_startblock);
177 rec.refc.rc_blockcount = cpu_to_be32(irec->rc_blockcount);
178 rec.refc.rc_refcount = cpu_to_be32(irec->rc_refcount);
179 error = xfs_btree_update(cur, &rec);
180 if (error)
181 trace_xfs_refcount_update_error(cur->bc_mp,
182 cur->bc_private.a.agno, error, _RET_IP_);
183 return error;
184}
185
186/*
187 * Insert the record referred to by cur to the value given
188 * by [bno, len, refcount].
189 * This either works (return 0) or gets an EFSCORRUPTED error.
190 */
aa465192 191int
bc859611
DW
192xfs_refcount_insert(
193 struct xfs_btree_cur *cur,
194 struct xfs_refcount_irec *irec,
195 int *i)
196{
197 int error;
198
199 trace_xfs_refcount_insert(cur->bc_mp, cur->bc_private.a.agno, irec);
200 cur->bc_rec.rc.rc_startblock = irec->rc_startblock;
201 cur->bc_rec.rc.rc_blockcount = irec->rc_blockcount;
202 cur->bc_rec.rc.rc_refcount = irec->rc_refcount;
203 error = xfs_btree_insert(cur, i);
02afaae0
DC
204 if (error)
205 goto out_error;
bc859611 206 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, *i == 1, out_error);
02afaae0 207
bc859611
DW
208out_error:
209 if (error)
210 trace_xfs_refcount_insert_error(cur->bc_mp,
211 cur->bc_private.a.agno, error, _RET_IP_);
212 return error;
213}
214
215/*
216 * Remove the record referred to by cur, then set the pointer to the spot
217 * where the record could be re-inserted, in case we want to increment or
218 * decrement the cursor.
219 * This either works (return 0) or gets an EFSCORRUPTED error.
220 */
221STATIC int
222xfs_refcount_delete(
223 struct xfs_btree_cur *cur,
224 int *i)
225{
226 struct xfs_refcount_irec irec;
227 int found_rec;
228 int error;
229
230 error = xfs_refcount_get_rec(cur, &irec, &found_rec);
231 if (error)
232 goto out_error;
233 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
234 trace_xfs_refcount_delete(cur->bc_mp, cur->bc_private.a.agno, &irec);
235 error = xfs_btree_delete(cur, i);
236 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, *i == 1, out_error);
237 if (error)
238 goto out_error;
239 error = xfs_refcount_lookup_ge(cur, irec.rc_startblock, &found_rec);
240out_error:
241 if (error)
242 trace_xfs_refcount_delete_error(cur->bc_mp,
243 cur->bc_private.a.agno, error, _RET_IP_);
244 return error;
245}
56ef8c33
DW
246
247/*
248 * Adjusting the Reference Count
249 *
250 * As stated elsewhere, the reference count btree (refcbt) stores
251 * >1 reference counts for extents of physical blocks. In this
252 * operation, we're either raising or lowering the reference count of
253 * some subrange stored in the tree:
254 *
255 * <------ adjustment range ------>
256 * ----+ +---+-----+ +--+--------+---------
257 * 2 | | 3 | 4 | |17| 55 | 10
258 * ----+ +---+-----+ +--+--------+---------
259 * X axis is physical blocks number;
260 * reference counts are the numbers inside the rectangles
261 *
262 * The first thing we need to do is to ensure that there are no
263 * refcount extents crossing either boundary of the range to be
264 * adjusted. For any extent that does cross a boundary, split it into
265 * two extents so that we can increment the refcount of one of the
266 * pieces later:
267 *
268 * <------ adjustment range ------>
269 * ----+ +---+-----+ +--+--------+----+----
270 * 2 | | 3 | 2 | |17| 55 | 10 | 10
271 * ----+ +---+-----+ +--+--------+----+----
272 *
273 * For this next step, let's assume that all the physical blocks in
274 * the adjustment range are mapped to a file and are therefore in use
275 * at least once. Therefore, we can infer that any gap in the
276 * refcount tree within the adjustment range represents a physical
277 * extent with refcount == 1:
278 *
279 * <------ adjustment range ------>
280 * ----+---+---+-----+-+--+--------+----+----
281 * 2 |"1"| 3 | 2 |1|17| 55 | 10 | 10
282 * ----+---+---+-----+-+--+--------+----+----
283 * ^
284 *
285 * For each extent that falls within the interval range, figure out
286 * which extent is to the left or the right of that extent. Now we
287 * have a left, current, and right extent. If the new reference count
288 * of the center extent enables us to merge left, center, and right
289 * into one record covering all three, do so. If the center extent is
290 * at the left end of the range, abuts the left extent, and its new
291 * reference count matches the left extent's record, then merge them.
292 * If the center extent is at the right end of the range, abuts the
293 * right extent, and the reference counts match, merge those. In the
294 * example, we can left merge (assuming an increment operation):
295 *
296 * <------ adjustment range ------>
297 * --------+---+-----+-+--+--------+----+----
298 * 2 | 3 | 2 |1|17| 55 | 10 | 10
299 * --------+---+-----+-+--+--------+----+----
300 * ^
301 *
302 * For all other extents within the range, adjust the reference count
303 * or delete it if the refcount falls below 2. If we were
304 * incrementing, the end result looks like this:
305 *
306 * <------ adjustment range ------>
307 * --------+---+-----+-+--+--------+----+----
308 * 2 | 4 | 3 |2|18| 56 | 11 | 10
309 * --------+---+-----+-+--+--------+----+----
310 *
311 * The result of a decrement operation looks as such:
312 *
313 * <------ adjustment range ------>
314 * ----+ +---+ +--+--------+----+----
315 * 2 | | 2 | |16| 54 | 9 | 10
316 * ----+ +---+ +--+--------+----+----
317 * DDDD 111111DD
318 *
319 * The blocks marked "D" are freed; the blocks marked "1" are only
320 * referenced once and therefore the record is removed from the
321 * refcount btree.
322 */
323
324/* Next block after this extent. */
325static inline xfs_agblock_t
326xfs_refc_next(
327 struct xfs_refcount_irec *rc)
328{
329 return rc->rc_startblock + rc->rc_blockcount;
330}
331
332/*
333 * Split a refcount extent that crosses agbno.
334 */
335STATIC int
336xfs_refcount_split_extent(
337 struct xfs_btree_cur *cur,
338 xfs_agblock_t agbno,
339 bool *shape_changed)
340{
341 struct xfs_refcount_irec rcext, tmp;
342 int found_rec;
343 int error;
344
345 *shape_changed = false;
346 error = xfs_refcount_lookup_le(cur, agbno, &found_rec);
347 if (error)
348 goto out_error;
349 if (!found_rec)
350 return 0;
351
352 error = xfs_refcount_get_rec(cur, &rcext, &found_rec);
353 if (error)
354 goto out_error;
355 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
356 if (rcext.rc_startblock == agbno || xfs_refc_next(&rcext) <= agbno)
357 return 0;
358
359 *shape_changed = true;
360 trace_xfs_refcount_split_extent(cur->bc_mp, cur->bc_private.a.agno,
361 &rcext, agbno);
362
363 /* Establish the right extent. */
364 tmp = rcext;
365 tmp.rc_startblock = agbno;
366 tmp.rc_blockcount -= (agbno - rcext.rc_startblock);
367 error = xfs_refcount_update(cur, &tmp);
368 if (error)
369 goto out_error;
370
371 /* Insert the left extent. */
372 tmp = rcext;
373 tmp.rc_blockcount = agbno - rcext.rc_startblock;
374 error = xfs_refcount_insert(cur, &tmp, &found_rec);
375 if (error)
376 goto out_error;
377 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
378 return error;
379
380out_error:
381 trace_xfs_refcount_split_extent_error(cur->bc_mp,
382 cur->bc_private.a.agno, error, _RET_IP_);
383 return error;
384}
385
386/*
387 * Merge the left, center, and right extents.
388 */
389STATIC int
390xfs_refcount_merge_center_extents(
391 struct xfs_btree_cur *cur,
392 struct xfs_refcount_irec *left,
393 struct xfs_refcount_irec *center,
394 struct xfs_refcount_irec *right,
395 unsigned long long extlen,
56ef8c33
DW
396 xfs_extlen_t *aglen)
397{
398 int error;
399 int found_rec;
400
401 trace_xfs_refcount_merge_center_extents(cur->bc_mp,
402 cur->bc_private.a.agno, left, center, right);
403
404 /*
405 * Make sure the center and right extents are not in the btree.
406 * If the center extent was synthesized, the first delete call
407 * removes the right extent and we skip the second deletion.
408 * If center and right were in the btree, then the first delete
409 * call removes the center and the second one removes the right
410 * extent.
411 */
412 error = xfs_refcount_lookup_ge(cur, center->rc_startblock,
413 &found_rec);
414 if (error)
415 goto out_error;
416 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
417
418 error = xfs_refcount_delete(cur, &found_rec);
419 if (error)
420 goto out_error;
421 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
422
423 if (center->rc_refcount > 1) {
424 error = xfs_refcount_delete(cur, &found_rec);
425 if (error)
426 goto out_error;
427 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1,
428 out_error);
429 }
430
431 /* Enlarge the left extent. */
432 error = xfs_refcount_lookup_le(cur, left->rc_startblock,
433 &found_rec);
434 if (error)
435 goto out_error;
436 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
437
438 left->rc_blockcount = extlen;
439 error = xfs_refcount_update(cur, left);
440 if (error)
441 goto out_error;
442
443 *aglen = 0;
444 return error;
445
446out_error:
447 trace_xfs_refcount_merge_center_extents_error(cur->bc_mp,
448 cur->bc_private.a.agno, error, _RET_IP_);
449 return error;
450}
451
452/*
453 * Merge with the left extent.
454 */
455STATIC int
456xfs_refcount_merge_left_extent(
457 struct xfs_btree_cur *cur,
458 struct xfs_refcount_irec *left,
459 struct xfs_refcount_irec *cleft,
460 xfs_agblock_t *agbno,
461 xfs_extlen_t *aglen)
462{
463 int error;
464 int found_rec;
465
466 trace_xfs_refcount_merge_left_extent(cur->bc_mp,
467 cur->bc_private.a.agno, left, cleft);
468
469 /* If the extent at agbno (cleft) wasn't synthesized, remove it. */
470 if (cleft->rc_refcount > 1) {
471 error = xfs_refcount_lookup_le(cur, cleft->rc_startblock,
472 &found_rec);
473 if (error)
474 goto out_error;
475 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1,
476 out_error);
477
478 error = xfs_refcount_delete(cur, &found_rec);
479 if (error)
480 goto out_error;
481 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1,
482 out_error);
483 }
484
485 /* Enlarge the left extent. */
486 error = xfs_refcount_lookup_le(cur, left->rc_startblock,
487 &found_rec);
488 if (error)
489 goto out_error;
490 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
491
492 left->rc_blockcount += cleft->rc_blockcount;
493 error = xfs_refcount_update(cur, left);
494 if (error)
495 goto out_error;
496
497 *agbno += cleft->rc_blockcount;
498 *aglen -= cleft->rc_blockcount;
499 return error;
500
501out_error:
502 trace_xfs_refcount_merge_left_extent_error(cur->bc_mp,
503 cur->bc_private.a.agno, error, _RET_IP_);
504 return error;
505}
506
507/*
508 * Merge with the right extent.
509 */
510STATIC int
511xfs_refcount_merge_right_extent(
512 struct xfs_btree_cur *cur,
513 struct xfs_refcount_irec *right,
514 struct xfs_refcount_irec *cright,
56ef8c33
DW
515 xfs_extlen_t *aglen)
516{
517 int error;
518 int found_rec;
519
520 trace_xfs_refcount_merge_right_extent(cur->bc_mp,
521 cur->bc_private.a.agno, cright, right);
522
523 /*
524 * If the extent ending at agbno+aglen (cright) wasn't synthesized,
525 * remove it.
526 */
527 if (cright->rc_refcount > 1) {
528 error = xfs_refcount_lookup_le(cur, cright->rc_startblock,
529 &found_rec);
530 if (error)
531 goto out_error;
532 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1,
533 out_error);
534
535 error = xfs_refcount_delete(cur, &found_rec);
536 if (error)
537 goto out_error;
538 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1,
539 out_error);
540 }
541
542 /* Enlarge the right extent. */
543 error = xfs_refcount_lookup_le(cur, right->rc_startblock,
544 &found_rec);
545 if (error)
546 goto out_error;
547 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
548
549 right->rc_startblock -= cright->rc_blockcount;
550 right->rc_blockcount += cright->rc_blockcount;
551 error = xfs_refcount_update(cur, right);
552 if (error)
553 goto out_error;
554
555 *aglen -= cright->rc_blockcount;
556 return error;
557
558out_error:
559 trace_xfs_refcount_merge_right_extent_error(cur->bc_mp,
560 cur->bc_private.a.agno, error, _RET_IP_);
561 return error;
562}
563
10e65503
DW
564#define XFS_FIND_RCEXT_SHARED 1
565#define XFS_FIND_RCEXT_COW 2
56ef8c33
DW
566/*
567 * Find the left extent and the one after it (cleft). This function assumes
568 * that we've already split any extent crossing agbno.
569 */
570STATIC int
571xfs_refcount_find_left_extents(
572 struct xfs_btree_cur *cur,
573 struct xfs_refcount_irec *left,
574 struct xfs_refcount_irec *cleft,
575 xfs_agblock_t agbno,
10e65503
DW
576 xfs_extlen_t aglen,
577 int flags)
56ef8c33
DW
578{
579 struct xfs_refcount_irec tmp;
580 int error;
581 int found_rec;
582
583 left->rc_startblock = cleft->rc_startblock = NULLAGBLOCK;
584 error = xfs_refcount_lookup_le(cur, agbno - 1, &found_rec);
585 if (error)
586 goto out_error;
587 if (!found_rec)
588 return 0;
589
590 error = xfs_refcount_get_rec(cur, &tmp, &found_rec);
591 if (error)
592 goto out_error;
593 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
594
595 if (xfs_refc_next(&tmp) != agbno)
596 return 0;
10e65503
DW
597 if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2)
598 return 0;
599 if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1)
600 return 0;
56ef8c33
DW
601 /* We have a left extent; retrieve (or invent) the next right one */
602 *left = tmp;
603
604 error = xfs_btree_increment(cur, 0, &found_rec);
605 if (error)
606 goto out_error;
607 if (found_rec) {
608 error = xfs_refcount_get_rec(cur, &tmp, &found_rec);
609 if (error)
610 goto out_error;
611 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1,
612 out_error);
613
614 /* if tmp starts at the end of our range, just use that */
615 if (tmp.rc_startblock == agbno)
616 *cleft = tmp;
617 else {
618 /*
619 * There's a gap in the refcntbt at the start of the
620 * range we're interested in (refcount == 1) so
621 * synthesize the implied extent and pass it back.
622 * We assume here that the agbno/aglen range was
623 * passed in from a data fork extent mapping and
624 * therefore is allocated to exactly one owner.
625 */
626 cleft->rc_startblock = agbno;
627 cleft->rc_blockcount = min(aglen,
628 tmp.rc_startblock - agbno);
629 cleft->rc_refcount = 1;
630 }
631 } else {
632 /*
633 * No extents, so pretend that there's one covering the whole
634 * range.
635 */
636 cleft->rc_startblock = agbno;
637 cleft->rc_blockcount = aglen;
638 cleft->rc_refcount = 1;
639 }
640 trace_xfs_refcount_find_left_extent(cur->bc_mp, cur->bc_private.a.agno,
641 left, cleft, agbno);
642 return error;
643
644out_error:
645 trace_xfs_refcount_find_left_extent_error(cur->bc_mp,
646 cur->bc_private.a.agno, error, _RET_IP_);
647 return error;
648}
649
650/*
651 * Find the right extent and the one before it (cright). This function
652 * assumes that we've already split any extents crossing agbno + aglen.
653 */
654STATIC int
655xfs_refcount_find_right_extents(
656 struct xfs_btree_cur *cur,
657 struct xfs_refcount_irec *right,
658 struct xfs_refcount_irec *cright,
659 xfs_agblock_t agbno,
10e65503
DW
660 xfs_extlen_t aglen,
661 int flags)
56ef8c33
DW
662{
663 struct xfs_refcount_irec tmp;
664 int error;
665 int found_rec;
666
667 right->rc_startblock = cright->rc_startblock = NULLAGBLOCK;
668 error = xfs_refcount_lookup_ge(cur, agbno + aglen, &found_rec);
669 if (error)
670 goto out_error;
671 if (!found_rec)
672 return 0;
673
674 error = xfs_refcount_get_rec(cur, &tmp, &found_rec);
675 if (error)
676 goto out_error;
677 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
678
679 if (tmp.rc_startblock != agbno + aglen)
680 return 0;
10e65503
DW
681 if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2)
682 return 0;
683 if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1)
684 return 0;
56ef8c33
DW
685 /* We have a right extent; retrieve (or invent) the next left one */
686 *right = tmp;
687
688 error = xfs_btree_decrement(cur, 0, &found_rec);
689 if (error)
690 goto out_error;
691 if (found_rec) {
692 error = xfs_refcount_get_rec(cur, &tmp, &found_rec);
693 if (error)
694 goto out_error;
695 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1,
696 out_error);
697
698 /* if tmp ends at the end of our range, just use that */
699 if (xfs_refc_next(&tmp) == agbno + aglen)
700 *cright = tmp;
701 else {
702 /*
703 * There's a gap in the refcntbt at the end of the
704 * range we're interested in (refcount == 1) so
705 * create the implied extent and pass it back.
706 * We assume here that the agbno/aglen range was
707 * passed in from a data fork extent mapping and
708 * therefore is allocated to exactly one owner.
709 */
710 cright->rc_startblock = max(agbno, xfs_refc_next(&tmp));
711 cright->rc_blockcount = right->rc_startblock -
712 cright->rc_startblock;
713 cright->rc_refcount = 1;
714 }
715 } else {
716 /*
717 * No extents, so pretend that there's one covering the whole
718 * range.
719 */
720 cright->rc_startblock = agbno;
721 cright->rc_blockcount = aglen;
722 cright->rc_refcount = 1;
723 }
724 trace_xfs_refcount_find_right_extent(cur->bc_mp, cur->bc_private.a.agno,
725 cright, right, agbno + aglen);
726 return error;
727
728out_error:
729 trace_xfs_refcount_find_right_extent_error(cur->bc_mp,
730 cur->bc_private.a.agno, error, _RET_IP_);
731 return error;
732}
733
734/* Is this extent valid? */
735static inline bool
736xfs_refc_valid(
737 struct xfs_refcount_irec *rc)
738{
739 return rc->rc_startblock != NULLAGBLOCK;
740}
741
742/*
743 * Try to merge with any extents on the boundaries of the adjustment range.
744 */
745STATIC int
746xfs_refcount_merge_extents(
747 struct xfs_btree_cur *cur,
748 xfs_agblock_t *agbno,
749 xfs_extlen_t *aglen,
750 enum xfs_refc_adjust_op adjust,
10e65503 751 int flags,
56ef8c33
DW
752 bool *shape_changed)
753{
754 struct xfs_refcount_irec left = {0}, cleft = {0};
755 struct xfs_refcount_irec cright = {0}, right = {0};
756 int error;
757 unsigned long long ulen;
758 bool cequal;
759
760 *shape_changed = false;
761 /*
762 * Find the extent just below agbno [left], just above agbno [cleft],
763 * just below (agbno + aglen) [cright], and just above (agbno + aglen)
764 * [right].
765 */
766 error = xfs_refcount_find_left_extents(cur, &left, &cleft, *agbno,
10e65503 767 *aglen, flags);
56ef8c33
DW
768 if (error)
769 return error;
770 error = xfs_refcount_find_right_extents(cur, &right, &cright, *agbno,
10e65503 771 *aglen, flags);
56ef8c33
DW
772 if (error)
773 return error;
774
775 /* No left or right extent to merge; exit. */
776 if (!xfs_refc_valid(&left) && !xfs_refc_valid(&right))
777 return 0;
778
779 cequal = (cleft.rc_startblock == cright.rc_startblock) &&
780 (cleft.rc_blockcount == cright.rc_blockcount);
781
782 /* Try to merge left, cleft, and right. cleft must == cright. */
783 ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount +
784 right.rc_blockcount;
785 if (xfs_refc_valid(&left) && xfs_refc_valid(&right) &&
786 xfs_refc_valid(&cleft) && xfs_refc_valid(&cright) && cequal &&
787 left.rc_refcount == cleft.rc_refcount + adjust &&
788 right.rc_refcount == cleft.rc_refcount + adjust &&
789 ulen < MAXREFCEXTLEN) {
790 *shape_changed = true;
791 return xfs_refcount_merge_center_extents(cur, &left, &cleft,
1421de38 792 &right, ulen, aglen);
56ef8c33
DW
793 }
794
795 /* Try to merge left and cleft. */
796 ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount;
797 if (xfs_refc_valid(&left) && xfs_refc_valid(&cleft) &&
798 left.rc_refcount == cleft.rc_refcount + adjust &&
799 ulen < MAXREFCEXTLEN) {
800 *shape_changed = true;
801 error = xfs_refcount_merge_left_extent(cur, &left, &cleft,
802 agbno, aglen);
803 if (error)
804 return error;
805
806 /*
807 * If we just merged left + cleft and cleft == cright,
808 * we no longer have a cright to merge with right. We're done.
809 */
810 if (cequal)
811 return 0;
812 }
813
814 /* Try to merge cright and right. */
815 ulen = (unsigned long long)right.rc_blockcount + cright.rc_blockcount;
816 if (xfs_refc_valid(&right) && xfs_refc_valid(&cright) &&
817 right.rc_refcount == cright.rc_refcount + adjust &&
818 ulen < MAXREFCEXTLEN) {
819 *shape_changed = true;
820 return xfs_refcount_merge_right_extent(cur, &right, &cright,
1421de38 821 aglen);
56ef8c33
DW
822 }
823
824 return error;
825}
826
827/*
56ef8c33
DW
828 * XXX: This is a pretty hand-wavy estimate. The penalty for guessing
829 * true incorrectly is a shutdown FS; the penalty for guessing false
830 * incorrectly is more transaction rolls than might be necessary.
831 * Be conservative here.
832 */
833static bool
834xfs_refcount_still_have_space(
835 struct xfs_btree_cur *cur)
836{
837 unsigned long overhead;
838
839 overhead = cur->bc_private.a.priv.refc.shape_changes *
840 xfs_allocfree_log_count(cur->bc_mp, 1);
841 overhead *= cur->bc_mp->m_sb.sb_blocksize;
842
843 /*
844 * Only allow 2 refcount extent updates per transaction if the
845 * refcount continue update "error" has been injected.
846 */
847 if (cur->bc_private.a.priv.refc.nr_ops > 2 &&
848 XFS_TEST_ERROR(false, cur->bc_mp,
e2a190dd 849 XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE))
56ef8c33
DW
850 return false;
851
852 if (cur->bc_private.a.priv.refc.nr_ops == 0)
853 return true;
854 else if (overhead > cur->bc_tp->t_log_res)
855 return false;
856 return cur->bc_tp->t_log_res - overhead >
594956fa 857 cur->bc_private.a.priv.refc.nr_ops * XFS_REFCOUNT_ITEM_OVERHEAD;
56ef8c33
DW
858}
859
860/*
861 * Adjust the refcounts of middle extents. At this point we should have
862 * split extents that crossed the adjustment range; merged with adjacent
863 * extents; and updated agbno/aglen to reflect the merges. Therefore,
864 * all we have to do is update the extents inside [agbno, agbno + aglen].
865 */
866STATIC int
867xfs_refcount_adjust_extents(
868 struct xfs_btree_cur *cur,
869 xfs_agblock_t *agbno,
870 xfs_extlen_t *aglen,
871 enum xfs_refc_adjust_op adj,
872 struct xfs_defer_ops *dfops,
873 struct xfs_owner_info *oinfo)
874{
875 struct xfs_refcount_irec ext, tmp;
876 int error;
877 int found_rec, found_tmp;
878 xfs_fsblock_t fsbno;
879
880 /* Merging did all the work already. */
881 if (*aglen == 0)
882 return 0;
883
884 error = xfs_refcount_lookup_ge(cur, *agbno, &found_rec);
885 if (error)
886 goto out_error;
887
888 while (*aglen > 0 && xfs_refcount_still_have_space(cur)) {
889 error = xfs_refcount_get_rec(cur, &ext, &found_rec);
890 if (error)
891 goto out_error;
892 if (!found_rec) {
893 ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks;
894 ext.rc_blockcount = 0;
895 ext.rc_refcount = 0;
896 }
897
898 /*
899 * Deal with a hole in the refcount tree; if a file maps to
900 * these blocks and there's no refcountbt record, pretend that
901 * there is one with refcount == 1.
902 */
903 if (ext.rc_startblock != *agbno) {
904 tmp.rc_startblock = *agbno;
905 tmp.rc_blockcount = min(*aglen,
906 ext.rc_startblock - *agbno);
907 tmp.rc_refcount = 1 + adj;
908 trace_xfs_refcount_modify_extent(cur->bc_mp,
909 cur->bc_private.a.agno, &tmp);
910
911 /*
912 * Either cover the hole (increment) or
913 * delete the range (decrement).
914 */
915 if (tmp.rc_refcount) {
916 error = xfs_refcount_insert(cur, &tmp,
917 &found_tmp);
918 if (error)
919 goto out_error;
920 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp,
921 found_tmp == 1, out_error);
922 cur->bc_private.a.priv.refc.nr_ops++;
923 } else {
924 fsbno = XFS_AGB_TO_FSB(cur->bc_mp,
925 cur->bc_private.a.agno,
926 tmp.rc_startblock);
927 xfs_bmap_add_free(cur->bc_mp, dfops, fsbno,
928 tmp.rc_blockcount, oinfo);
929 }
930
931 (*agbno) += tmp.rc_blockcount;
932 (*aglen) -= tmp.rc_blockcount;
933
934 error = xfs_refcount_lookup_ge(cur, *agbno,
935 &found_rec);
936 if (error)
937 goto out_error;
938 }
939
940 /* Stop if there's nothing left to modify */
941 if (*aglen == 0 || !xfs_refcount_still_have_space(cur))
942 break;
943
944 /*
945 * Adjust the reference count and either update the tree
946 * (incr) or free the blocks (decr).
947 */
948 if (ext.rc_refcount == MAXREFCOUNT)
949 goto skip;
950 ext.rc_refcount += adj;
951 trace_xfs_refcount_modify_extent(cur->bc_mp,
952 cur->bc_private.a.agno, &ext);
953 if (ext.rc_refcount > 1) {
954 error = xfs_refcount_update(cur, &ext);
955 if (error)
956 goto out_error;
957 cur->bc_private.a.priv.refc.nr_ops++;
958 } else if (ext.rc_refcount == 1) {
959 error = xfs_refcount_delete(cur, &found_rec);
960 if (error)
961 goto out_error;
962 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp,
963 found_rec == 1, out_error);
964 cur->bc_private.a.priv.refc.nr_ops++;
965 goto advloop;
966 } else {
967 fsbno = XFS_AGB_TO_FSB(cur->bc_mp,
968 cur->bc_private.a.agno,
969 ext.rc_startblock);
970 xfs_bmap_add_free(cur->bc_mp, dfops, fsbno,
971 ext.rc_blockcount, oinfo);
972 }
973
974skip:
975 error = xfs_btree_increment(cur, 0, &found_rec);
976 if (error)
977 goto out_error;
978
979advloop:
980 (*agbno) += ext.rc_blockcount;
981 (*aglen) -= ext.rc_blockcount;
982 }
983
984 return error;
985out_error:
986 trace_xfs_refcount_modify_extent_error(cur->bc_mp,
987 cur->bc_private.a.agno, error, _RET_IP_);
988 return error;
989}
990
991/* Adjust the reference count of a range of AG blocks. */
992STATIC int
993xfs_refcount_adjust(
994 struct xfs_btree_cur *cur,
995 xfs_agblock_t agbno,
996 xfs_extlen_t aglen,
997 xfs_agblock_t *new_agbno,
998 xfs_extlen_t *new_aglen,
999 enum xfs_refc_adjust_op adj,
1000 struct xfs_defer_ops *dfops,
1001 struct xfs_owner_info *oinfo)
1002{
1003 bool shape_changed;
1004 int shape_changes = 0;
1005 int error;
1006
1007 *new_agbno = agbno;
1008 *new_aglen = aglen;
1009 if (adj == XFS_REFCOUNT_ADJUST_INCREASE)
1010 trace_xfs_refcount_increase(cur->bc_mp, cur->bc_private.a.agno,
1011 agbno, aglen);
1012 else
1013 trace_xfs_refcount_decrease(cur->bc_mp, cur->bc_private.a.agno,
1014 agbno, aglen);
1015
1016 /*
1017 * Ensure that no rcextents cross the boundary of the adjustment range.
1018 */
1019 error = xfs_refcount_split_extent(cur, agbno, &shape_changed);
1020 if (error)
1021 goto out_error;
1022 if (shape_changed)
1023 shape_changes++;
1024
1025 error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed);
1026 if (error)
1027 goto out_error;
1028 if (shape_changed)
1029 shape_changes++;
1030
1031 /*
1032 * Try to merge with the left or right extents of the range.
1033 */
1034 error = xfs_refcount_merge_extents(cur, new_agbno, new_aglen, adj,
10e65503 1035 XFS_FIND_RCEXT_SHARED, &shape_changed);
56ef8c33
DW
1036 if (error)
1037 goto out_error;
1038 if (shape_changed)
1039 shape_changes++;
1040 if (shape_changes)
1041 cur->bc_private.a.priv.refc.shape_changes++;
1042
1043 /* Now that we've taken care of the ends, adjust the middle extents */
1044 error = xfs_refcount_adjust_extents(cur, new_agbno, new_aglen,
1045 adj, dfops, oinfo);
1046 if (error)
1047 goto out_error;
1048
1049 return 0;
1050
1051out_error:
1052 trace_xfs_refcount_adjust_error(cur->bc_mp, cur->bc_private.a.agno,
1053 error, _RET_IP_);
1054 return error;
1055}
23a15a6c
DW
1056
1057/* Clean up after calling xfs_refcount_finish_one. */
1058void
1059xfs_refcount_finish_one_cleanup(
1060 struct xfs_trans *tp,
1061 struct xfs_btree_cur *rcur,
1062 int error)
1063{
1064 struct xfs_buf *agbp;
1065
1066 if (rcur == NULL)
1067 return;
1068 agbp = rcur->bc_private.a.agbp;
1069 xfs_btree_del_cursor(rcur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
1070 if (error)
1071 xfs_trans_brelse(tp, agbp);
1072}
1073
1074/*
1075 * Process one of the deferred refcount operations. We pass back the
1076 * btree cursor to maintain our lock on the btree between calls.
1077 * This saves time and eliminates a buffer deadlock between the
1078 * superblock and the AGF because we'll always grab them in the same
1079 * order.
1080 */
1081int
1082xfs_refcount_finish_one(
1083 struct xfs_trans *tp,
1084 struct xfs_defer_ops *dfops,
1085 enum xfs_refcount_intent_type type,
1086 xfs_fsblock_t startblock,
1087 xfs_extlen_t blockcount,
1088 xfs_fsblock_t *new_fsb,
1089 xfs_extlen_t *new_len,
1090 struct xfs_btree_cur **pcur)
1091{
1092 struct xfs_mount *mp = tp->t_mountp;
1093 struct xfs_btree_cur *rcur;
1094 struct xfs_buf *agbp = NULL;
1095 int error = 0;
1096 xfs_agnumber_t agno;
1097 xfs_agblock_t bno;
1098 xfs_agblock_t new_agbno;
1099 unsigned long nr_ops = 0;
1100 int shape_changes = 0;
1101
1102 agno = XFS_FSB_TO_AGNO(mp, startblock);
1103 ASSERT(agno != NULLAGNUMBER);
1104 bno = XFS_FSB_TO_AGBNO(mp, startblock);
1105
1106 trace_xfs_refcount_deferred(mp, XFS_FSB_TO_AGNO(mp, startblock),
1107 type, XFS_FSB_TO_AGBNO(mp, startblock),
1108 blockcount);
1109
1110 if (XFS_TEST_ERROR(false, mp,
e2a190dd 1111 XFS_ERRTAG_REFCOUNT_FINISH_ONE))
23a15a6c
DW
1112 return -EIO;
1113
1114 /*
1115 * If we haven't gotten a cursor or the cursor AG doesn't match
1116 * the startblock, get one now.
1117 */
1118 rcur = *pcur;
1119 if (rcur != NULL && rcur->bc_private.a.agno != agno) {
1120 nr_ops = rcur->bc_private.a.priv.refc.nr_ops;
1121 shape_changes = rcur->bc_private.a.priv.refc.shape_changes;
1122 xfs_refcount_finish_one_cleanup(tp, rcur, 0);
1123 rcur = NULL;
1124 *pcur = NULL;
1125 }
1126 if (rcur == NULL) {
1127 error = xfs_alloc_read_agf(tp->t_mountp, tp, agno,
1128 XFS_ALLOC_FLAG_FREEING, &agbp);
1129 if (error)
1130 return error;
1131 if (!agbp)
1132 return -EFSCORRUPTED;
1133
5ff5ced0 1134 rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno);
23a15a6c
DW
1135 if (!rcur) {
1136 error = -ENOMEM;
1137 goto out_cur;
1138 }
1139 rcur->bc_private.a.priv.refc.nr_ops = nr_ops;
1140 rcur->bc_private.a.priv.refc.shape_changes = shape_changes;
1141 }
1142 *pcur = rcur;
1143
1144 switch (type) {
1145 case XFS_REFCOUNT_INCREASE:
1146 error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno,
1147 new_len, XFS_REFCOUNT_ADJUST_INCREASE, dfops, NULL);
1148 *new_fsb = XFS_AGB_TO_FSB(mp, agno, new_agbno);
1149 break;
1150 case XFS_REFCOUNT_DECREASE:
1151 error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno,
1152 new_len, XFS_REFCOUNT_ADJUST_DECREASE, dfops, NULL);
1153 *new_fsb = XFS_AGB_TO_FSB(mp, agno, new_agbno);
1154 break;
10e65503
DW
1155 case XFS_REFCOUNT_ALLOC_COW:
1156 *new_fsb = startblock + blockcount;
1157 *new_len = 0;
1158 error = __xfs_refcount_cow_alloc(rcur, bno, blockcount, dfops);
1159 break;
1160 case XFS_REFCOUNT_FREE_COW:
1161 *new_fsb = startblock + blockcount;
1162 *new_len = 0;
1163 error = __xfs_refcount_cow_free(rcur, bno, blockcount, dfops);
1164 break;
23a15a6c
DW
1165 default:
1166 ASSERT(0);
1167 error = -EFSCORRUPTED;
1168 }
1169 if (!error && *new_len > 0)
1170 trace_xfs_refcount_finish_one_leftover(mp, agno, type,
1171 bno, blockcount, new_agbno, *new_len);
1172 return error;
1173
1174out_cur:
1175 xfs_trans_brelse(tp, agbp);
1176
1177 return error;
1178}
1179
1180/*
1181 * Record a refcount intent for later processing.
1182 */
1183static int
1184__xfs_refcount_add(
1185 struct xfs_mount *mp,
1186 struct xfs_defer_ops *dfops,
1187 enum xfs_refcount_intent_type type,
1188 xfs_fsblock_t startblock,
1189 xfs_extlen_t blockcount)
1190{
1191 struct xfs_refcount_intent *ri;
1192
1193 trace_xfs_refcount_defer(mp, XFS_FSB_TO_AGNO(mp, startblock),
1194 type, XFS_FSB_TO_AGBNO(mp, startblock),
1195 blockcount);
1196
1197 ri = kmem_alloc(sizeof(struct xfs_refcount_intent),
1198 KM_SLEEP | KM_NOFS);
1199 INIT_LIST_HEAD(&ri->ri_list);
1200 ri->ri_type = type;
1201 ri->ri_startblock = startblock;
1202 ri->ri_blockcount = blockcount;
1203
1204 xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_REFCOUNT, &ri->ri_list);
1205 return 0;
1206}
1207
1208/*
1209 * Increase the reference count of the blocks backing a file's extent.
1210 */
1211int
1212xfs_refcount_increase_extent(
1213 struct xfs_mount *mp,
1214 struct xfs_defer_ops *dfops,
1215 struct xfs_bmbt_irec *PREV)
1216{
1217 if (!xfs_sb_version_hasreflink(&mp->m_sb))
1218 return 0;
1219
1220 return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_INCREASE,
1221 PREV->br_startblock, PREV->br_blockcount);
1222}
1223
1224/*
1225 * Decrease the reference count of the blocks backing a file's extent.
1226 */
1227int
1228xfs_refcount_decrease_extent(
1229 struct xfs_mount *mp,
1230 struct xfs_defer_ops *dfops,
1231 struct xfs_bmbt_irec *PREV)
1232{
1233 if (!xfs_sb_version_hasreflink(&mp->m_sb))
1234 return 0;
1235
1236 return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_DECREASE,
1237 PREV->br_startblock, PREV->br_blockcount);
1238}
94097330
DW
1239
1240/*
1241 * Given an AG extent, find the lowest-numbered run of shared blocks
1242 * within that range and return the range in fbno/flen. If
1243 * find_end_of_shared is set, return the longest contiguous extent of
1244 * shared blocks; if not, just return the first extent we find. If no
1245 * shared blocks are found, fbno and flen will be set to NULLAGBLOCK
1246 * and 0, respectively.
1247 */
1248int
1249xfs_refcount_find_shared(
1250 struct xfs_btree_cur *cur,
1251 xfs_agblock_t agbno,
1252 xfs_extlen_t aglen,
1253 xfs_agblock_t *fbno,
1254 xfs_extlen_t *flen,
1255 bool find_end_of_shared)
1256{
1257 struct xfs_refcount_irec tmp;
1258 int i;
1259 int have;
1260 int error;
1261
1262 trace_xfs_refcount_find_shared(cur->bc_mp, cur->bc_private.a.agno,
1263 agbno, aglen);
1264
1265 /* By default, skip the whole range */
1266 *fbno = NULLAGBLOCK;
1267 *flen = 0;
1268
1269 /* Try to find a refcount extent that crosses the start */
1270 error = xfs_refcount_lookup_le(cur, agbno, &have);
1271 if (error)
1272 goto out_error;
1273 if (!have) {
1274 /* No left extent, look at the next one */
1275 error = xfs_btree_increment(cur, 0, &have);
1276 if (error)
1277 goto out_error;
1278 if (!have)
1279 goto done;
1280 }
1281 error = xfs_refcount_get_rec(cur, &tmp, &i);
1282 if (error)
1283 goto out_error;
1284 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, out_error);
1285
1286 /* If the extent ends before the start, look at the next one */
1287 if (tmp.rc_startblock + tmp.rc_blockcount <= agbno) {
1288 error = xfs_btree_increment(cur, 0, &have);
1289 if (error)
1290 goto out_error;
1291 if (!have)
1292 goto done;
1293 error = xfs_refcount_get_rec(cur, &tmp, &i);
1294 if (error)
1295 goto out_error;
1296 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, out_error);
1297 }
1298
1299 /* If the extent starts after the range we want, bail out */
1300 if (tmp.rc_startblock >= agbno + aglen)
1301 goto done;
1302
1303 /* We found the start of a shared extent! */
1304 if (tmp.rc_startblock < agbno) {
1305 tmp.rc_blockcount -= (agbno - tmp.rc_startblock);
1306 tmp.rc_startblock = agbno;
1307 }
1308
1309 *fbno = tmp.rc_startblock;
1310 *flen = min(tmp.rc_blockcount, agbno + aglen - *fbno);
1311 if (!find_end_of_shared)
1312 goto done;
1313
1314 /* Otherwise, find the end of this shared extent */
1315 while (*fbno + *flen < agbno + aglen) {
1316 error = xfs_btree_increment(cur, 0, &have);
1317 if (error)
1318 goto out_error;
1319 if (!have)
1320 break;
1321 error = xfs_refcount_get_rec(cur, &tmp, &i);
1322 if (error)
1323 goto out_error;
1324 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, out_error);
1325 if (tmp.rc_startblock >= agbno + aglen ||
1326 tmp.rc_startblock != *fbno + *flen)
1327 break;
1328 *flen = min(*flen + tmp.rc_blockcount, agbno + aglen - *fbno);
1329 }
1330
1331done:
1332 trace_xfs_refcount_find_shared_result(cur->bc_mp,
1333 cur->bc_private.a.agno, *fbno, *flen);
1334
1335out_error:
1336 if (error)
1337 trace_xfs_refcount_find_shared_error(cur->bc_mp,
1338 cur->bc_private.a.agno, error, _RET_IP_);
1339 return error;
1340}
10e65503
DW
1341
1342/*
1343 * Recovering CoW Blocks After a Crash
1344 *
1345 * Due to the way that the copy on write mechanism works, there's a window of
1346 * opportunity in which we can lose track of allocated blocks during a crash.
1347 * Because CoW uses delayed allocation in the in-core CoW fork, writeback
1348 * causes blocks to be allocated and stored in the CoW fork. The blocks are
1349 * no longer in the free space btree but are not otherwise recorded anywhere
1350 * until the write completes and the blocks are mapped into the file. A crash
1351 * in between allocation and remapping results in the replacement blocks being
1352 * lost. This situation is exacerbated by the CoW extent size hint because
1353 * allocations can hang around for long time.
1354 *
1355 * However, there is a place where we can record these allocations before they
1356 * become mappings -- the reference count btree. The btree does not record
1357 * extents with refcount == 1, so we can record allocations with a refcount of
1358 * 1. Blocks being used for CoW writeout cannot be shared, so there should be
1359 * no conflict with shared block records. These mappings should be created
1360 * when we allocate blocks to the CoW fork and deleted when they're removed
1361 * from the CoW fork.
1362 *
1363 * Minor nit: records for in-progress CoW allocations and records for shared
1364 * extents must never be merged, to preserve the property that (except for CoW
1365 * allocations) there are no refcount btree entries with refcount == 1. The
1366 * only time this could potentially happen is when unsharing a block that's
1367 * adjacent to CoW allocations, so we must be careful to avoid this.
1368 *
1369 * At mount time we recover lost CoW allocations by searching the refcount
1370 * btree for these refcount == 1 mappings. These represent CoW allocations
1371 * that were in progress at the time the filesystem went down, so we can free
1372 * them to get the space back.
1373 *
1374 * This mechanism is superior to creating EFIs for unmapped CoW extents for
1375 * several reasons -- first, EFIs pin the tail of the log and would have to be
1376 * periodically relogged to avoid filling up the log. Second, CoW completions
1377 * will have to file an EFD and create new EFIs for whatever remains in the
1378 * CoW fork; this partially takes care of (1) but extent-size reservations
1379 * will have to periodically relog even if there's no writeout in progress.
1380 * This can happen if the CoW extent size hint is set, which you really want.
1381 * Third, EFIs cannot currently be automatically relogged into newer
1382 * transactions to advance the log tail. Fourth, stuffing the log full of
1383 * EFIs places an upper bound on the number of CoW allocations that can be
1384 * held filesystem-wide at any given time. Recording them in the refcount
1385 * btree doesn't require us to maintain any state in memory and doesn't pin
1386 * the log.
1387 */
1388/*
1389 * Adjust the refcounts of CoW allocations. These allocations are "magic"
1390 * in that they're not referenced anywhere else in the filesystem, so we
1391 * stash them in the refcount btree with a refcount of 1 until either file
1392 * remapping (or CoW cancellation) happens.
1393 */
1394STATIC int
1395xfs_refcount_adjust_cow_extents(
1396 struct xfs_btree_cur *cur,
1397 xfs_agblock_t agbno,
1398 xfs_extlen_t aglen,
1421de38 1399 enum xfs_refc_adjust_op adj)
10e65503
DW
1400{
1401 struct xfs_refcount_irec ext, tmp;
1402 int error;
1403 int found_rec, found_tmp;
1404
1405 if (aglen == 0)
1406 return 0;
1407
1408 /* Find any overlapping refcount records */
1409 error = xfs_refcount_lookup_ge(cur, agbno, &found_rec);
1410 if (error)
1411 goto out_error;
1412 error = xfs_refcount_get_rec(cur, &ext, &found_rec);
1413 if (error)
1414 goto out_error;
1415 if (!found_rec) {
1416 ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks +
1417 XFS_REFC_COW_START;
1418 ext.rc_blockcount = 0;
1419 ext.rc_refcount = 0;
1420 }
1421
1422 switch (adj) {
1423 case XFS_REFCOUNT_ADJUST_COW_ALLOC:
1424 /* Adding a CoW reservation, there should be nothing here. */
1425 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp,
1426 ext.rc_startblock >= agbno + aglen, out_error);
1427
1428 tmp.rc_startblock = agbno;
1429 tmp.rc_blockcount = aglen;
1430 tmp.rc_refcount = 1;
1431 trace_xfs_refcount_modify_extent(cur->bc_mp,
1432 cur->bc_private.a.agno, &tmp);
1433
1434 error = xfs_refcount_insert(cur, &tmp,
1435 &found_tmp);
1436 if (error)
1437 goto out_error;
1438 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp,
1439 found_tmp == 1, out_error);
1440 break;
1441 case XFS_REFCOUNT_ADJUST_COW_FREE:
1442 /* Removing a CoW reservation, there should be one extent. */
1443 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp,
1444 ext.rc_startblock == agbno, out_error);
1445 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp,
1446 ext.rc_blockcount == aglen, out_error);
1447 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp,
1448 ext.rc_refcount == 1, out_error);
1449
1450 ext.rc_refcount = 0;
1451 trace_xfs_refcount_modify_extent(cur->bc_mp,
1452 cur->bc_private.a.agno, &ext);
1453 error = xfs_refcount_delete(cur, &found_rec);
1454 if (error)
1455 goto out_error;
1456 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp,
1457 found_rec == 1, out_error);
1458 break;
1459 default:
1460 ASSERT(0);
1461 }
1462
1463 return error;
1464out_error:
1465 trace_xfs_refcount_modify_extent_error(cur->bc_mp,
1466 cur->bc_private.a.agno, error, _RET_IP_);
1467 return error;
1468}
1469
1470/*
1471 * Add or remove refcount btree entries for CoW reservations.
1472 */
1473STATIC int
1474xfs_refcount_adjust_cow(
1475 struct xfs_btree_cur *cur,
1476 xfs_agblock_t agbno,
1477 xfs_extlen_t aglen,
1421de38 1478 enum xfs_refc_adjust_op adj)
10e65503
DW
1479{
1480 bool shape_changed;
1481 int error;
1482
1483 agbno += XFS_REFC_COW_START;
1484
1485 /*
1486 * Ensure that no rcextents cross the boundary of the adjustment range.
1487 */
1488 error = xfs_refcount_split_extent(cur, agbno, &shape_changed);
1489 if (error)
1490 goto out_error;
1491
1492 error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed);
1493 if (error)
1494 goto out_error;
1495
1496 /*
1497 * Try to merge with the left or right extents of the range.
1498 */
1499 error = xfs_refcount_merge_extents(cur, &agbno, &aglen, adj,
1500 XFS_FIND_RCEXT_COW, &shape_changed);
1501 if (error)
1502 goto out_error;
1503
1504 /* Now that we've taken care of the ends, adjust the middle extents */
1421de38 1505 error = xfs_refcount_adjust_cow_extents(cur, agbno, aglen, adj);
10e65503
DW
1506 if (error)
1507 goto out_error;
1508
1509 return 0;
1510
1511out_error:
1512 trace_xfs_refcount_adjust_cow_error(cur->bc_mp, cur->bc_private.a.agno,
1513 error, _RET_IP_);
1514 return error;
1515}
1516
1517/*
1518 * Record a CoW allocation in the refcount btree.
1519 */
1520STATIC int
1521__xfs_refcount_cow_alloc(
1522 struct xfs_btree_cur *rcur,
1523 xfs_agblock_t agbno,
1524 xfs_extlen_t aglen,
1525 struct xfs_defer_ops *dfops)
1526{
10e65503
DW
1527 trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_private.a.agno,
1528 agbno, aglen);
1529
1530 /* Add refcount btree reservation */
a44b23a0 1531 return xfs_refcount_adjust_cow(rcur, agbno, aglen,
1421de38 1532 XFS_REFCOUNT_ADJUST_COW_ALLOC);
10e65503
DW
1533}
1534
1535/*
1536 * Remove a CoW allocation from the refcount btree.
1537 */
1538STATIC int
1539__xfs_refcount_cow_free(
1540 struct xfs_btree_cur *rcur,
1541 xfs_agblock_t agbno,
1542 xfs_extlen_t aglen,
1543 struct xfs_defer_ops *dfops)
1544{
10e65503
DW
1545 trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_private.a.agno,
1546 agbno, aglen);
1547
1548 /* Remove refcount btree reservation */
a44b23a0 1549 return xfs_refcount_adjust_cow(rcur, agbno, aglen,
1421de38 1550 XFS_REFCOUNT_ADJUST_COW_FREE);
10e65503
DW
1551}
1552
1553/* Record a CoW staging extent in the refcount btree. */
1554int
1555xfs_refcount_alloc_cow_extent(
1556 struct xfs_mount *mp,
1557 struct xfs_defer_ops *dfops,
1558 xfs_fsblock_t fsb,
1559 xfs_extlen_t len)
1560{
a44b23a0
DW
1561 int error;
1562
10e65503
DW
1563 if (!xfs_sb_version_hasreflink(&mp->m_sb))
1564 return 0;
1565
a44b23a0 1566 error = __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW,
10e65503 1567 fsb, len);
a44b23a0
DW
1568 if (error)
1569 return error;
1570
1571 /* Add rmap entry */
1572 return xfs_rmap_alloc_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb),
1573 XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW);
10e65503
DW
1574}
1575
1576/* Forget a CoW staging event in the refcount btree. */
1577int
1578xfs_refcount_free_cow_extent(
1579 struct xfs_mount *mp,
1580 struct xfs_defer_ops *dfops,
1581 xfs_fsblock_t fsb,
1582 xfs_extlen_t len)
1583{
a44b23a0
DW
1584 int error;
1585
10e65503
DW
1586 if (!xfs_sb_version_hasreflink(&mp->m_sb))
1587 return 0;
1588
a44b23a0
DW
1589 /* Remove rmap entry */
1590 error = xfs_rmap_free_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb),
1591 XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW);
1592 if (error)
1593 return error;
1594
10e65503
DW
1595 return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_FREE_COW,
1596 fsb, len);
1597}
1598
1599struct xfs_refcount_recovery {
1600 struct list_head rr_list;
1601 struct xfs_refcount_irec rr_rrec;
1602};
1603
1604/* Stuff an extent on the recovery list. */
1605STATIC int
1606xfs_refcount_recover_extent(
1421de38 1607 struct xfs_btree_cur *cur,
10e65503
DW
1608 union xfs_btree_rec *rec,
1609 void *priv)
1610{
1611 struct list_head *debris = priv;
1612 struct xfs_refcount_recovery *rr;
1613
1614 if (be32_to_cpu(rec->refc.rc_refcount) != 1)
1615 return -EFSCORRUPTED;
1616
1617 rr = kmem_alloc(sizeof(struct xfs_refcount_recovery), KM_SLEEP);
1618 xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec);
1619 list_add_tail(&rr->rr_list, debris);
1620
1621 return 0;
1622}
1623
1624/* Find and remove leftover CoW reservations. */
1625int
1626xfs_refcount_recover_cow_leftovers(
1627 struct xfs_mount *mp,
1628 xfs_agnumber_t agno)
1629{
1630 struct xfs_trans *tp;
1631 struct xfs_btree_cur *cur;
1632 struct xfs_buf *agbp;
1633 struct xfs_refcount_recovery *rr, *n;
1634 struct list_head debris;
1635 union xfs_btree_irec low;
1636 union xfs_btree_irec high;
1637 struct xfs_defer_ops dfops;
1638 xfs_fsblock_t fsb;
1639 xfs_agblock_t agbno;
1640 int error;
1641
1642 if (mp->m_sb.sb_agblocks >= XFS_REFC_COW_START)
1643 return -EOPNOTSUPP;
1644
d4e8eb2e
DW
1645 INIT_LIST_HEAD(&debris);
1646
1647 /*
1648 * In this first part, we use an empty transaction to gather up
1649 * all the leftover CoW extents so that we can subsequently
1650 * delete them. The empty transaction is used to avoid
1651 * a buffer lock deadlock if there happens to be a loop in the
1652 * refcountbt because we're allowed to re-grab a buffer that is
1653 * already attached to our transaction. When we're done
1654 * recording the CoW debris we cancel the (empty) transaction
1655 * and everything goes away cleanly.
1656 */
1657 error = xfs_trans_alloc_empty(mp, &tp);
10e65503
DW
1658 if (error)
1659 return error;
d4e8eb2e
DW
1660
1661 error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp);
1662 if (error)
1663 goto out_trans;
d705962a
DW
1664 if (!agbp) {
1665 error = -ENOMEM;
1666 goto out_trans;
1667 }
5ff5ced0 1668 cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno);
10e65503
DW
1669
1670 /* Find all the leftover CoW staging extents. */
10e65503
DW
1671 memset(&low, 0, sizeof(low));
1672 memset(&high, 0, sizeof(high));
1673 low.rc.rc_startblock = XFS_REFC_COW_START;
1674 high.rc.rc_startblock = -1U;
1675 error = xfs_btree_query_range(cur, &low, &high,
1676 xfs_refcount_recover_extent, &debris);
1677 if (error)
22a69322 1678 goto out_cursor;
10e65503 1679 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
d4e8eb2e
DW
1680 xfs_trans_brelse(tp, agbp);
1681 xfs_trans_cancel(tp);
10e65503
DW
1682
1683 /* Now iterate the list to free the leftovers */
d4e8eb2e 1684 list_for_each_entry_safe(rr, n, &debris, rr_list) {
10e65503
DW
1685 /* Set up transaction. */
1686 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp);
1687 if (error)
1688 goto out_free;
1689
1690 trace_xfs_refcount_recover_extent(mp, agno, &rr->rr_rrec);
1691
1692 /* Free the orphan record */
5cebfb23 1693 xfs_defer_init(tp, &dfops);
10e65503
DW
1694 agbno = rr->rr_rrec.rc_startblock - XFS_REFC_COW_START;
1695 fsb = XFS_AGB_TO_FSB(mp, agno, agbno);
f50f99f4 1696 error = xfs_refcount_free_cow_extent(mp, tp->t_dfops, fsb,
10e65503
DW
1697 rr->rr_rrec.rc_blockcount);
1698 if (error)
1699 goto out_defer;
1700
1701 /* Free the block. */
f50f99f4 1702 xfs_bmap_add_free(mp, tp->t_dfops, fsb,
10e65503
DW
1703 rr->rr_rrec.rc_blockcount, NULL);
1704
f50f99f4 1705 error = xfs_defer_finish(&tp, tp->t_dfops);
10e65503
DW
1706 if (error)
1707 goto out_defer;
1708
1709 error = xfs_trans_commit(tp);
1710 if (error)
22a69322 1711 goto out_free;
d4e8eb2e
DW
1712
1713 list_del(&rr->rr_list);
1714 kmem_free(rr);
10e65503 1715 }
10e65503 1716
d4e8eb2e
DW
1717 return error;
1718out_defer:
f50f99f4 1719 xfs_defer_cancel(tp->t_dfops);
d4e8eb2e
DW
1720out_trans:
1721 xfs_trans_cancel(tp);
10e65503
DW
1722out_free:
1723 /* Free the leftover list */
1724 list_for_each_entry_safe(rr, n, &debris, rr_list) {
1725 list_del(&rr->rr_list);
1726 kmem_free(rr);
1727 }
10e65503
DW
1728 return error;
1729
22a69322 1730out_cursor:
10e65503 1731 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
d4e8eb2e
DW
1732 xfs_trans_brelse(tp, agbp);
1733 goto out_trans;
10e65503 1734}
1048d0e2
DW
1735
1736/* Is there a record covering a given extent? */
1737int
1738xfs_refcount_has_record(
1739 struct xfs_btree_cur *cur,
1740 xfs_agblock_t bno,
1741 xfs_extlen_t len,
1742 bool *exists)
1743{
1744 union xfs_btree_irec low;
1745 union xfs_btree_irec high;
1746
1747 memset(&low, 0, sizeof(low));
1748 low.rc.rc_startblock = bno;
1749 memset(&high, 0xFF, sizeof(high));
1750 high.rc.rc_startblock = bno + len - 1;
1751
1752 return xfs_btree_has_record(cur, &low, &high, exists);
1753}