]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - libxfs/xfs_alloc.c
xfs: null out bma->prev if no previous extent
[thirdparty/xfsprogs-dev.git] / libxfs / xfs_alloc.c
CommitLineData
37b3b4d6 1// SPDX-License-Identifier: GPL-2.0
2bd0ea18 2/*
5e656dbb 3 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
da23017d 4 * All Rights Reserved.
2bd0ea18 5 */
9c799827 6#include "libxfs_priv.h"
b626fb59
DC
7#include "xfs_fs.h"
8#include "xfs_format.h"
9#include "xfs_log_format.h"
10#include "xfs_shared.h"
11#include "xfs_trans_resv.h"
12#include "xfs_bit.h"
13#include "xfs_sb.h"
14#include "xfs_mount.h"
f944d3d0 15#include "xfs_defer.h"
b626fb59 16#include "xfs_btree.h"
631ac87a 17#include "xfs_rmap.h"
b626fb59
DC
18#include "xfs_alloc_btree.h"
19#include "xfs_alloc.h"
b626fb59
DC
20#include "xfs_trace.h"
21#include "xfs_trans.h"
cf8ce220 22#include "xfs_ag_resv.h"
d5c1b462
BF
23#include "xfs_bmap.h"
24
25extern kmem_zone_t *xfs_bmap_free_item_zone;
2bd0ea18 26
ff105f75
DC
27struct workqueue_struct *xfs_alloc_wq;
28
2bd0ea18 29#define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
5e656dbb
BN
30
31#define XFSA_FIXUP_BNO_OK 1
32#define XFSA_FIXUP_CNT_OK 2
33
5e656dbb
BN
34STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
35STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
36STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
2bd0ea18 37
b8165508
DC
38/*
39 * Size of the AGFL. For CRC-enabled filesystes we steal a couple of slots in
40 * the beginning of the block for a proper header with the location information
41 * and CRC.
42 */
43unsigned int
44xfs_agfl_size(
45 struct xfs_mount *mp)
46{
47 unsigned int size = mp->m_sb.sb_sectsize;
48
49 if (xfs_sb_version_hascrc(&mp->m_sb))
50 size -= sizeof(struct xfs_agfl);
51
52 return size / sizeof(xfs_agblock_t);
53}
54
2a96beb9
DW
55unsigned int
56xfs_refc_block(
57 struct xfs_mount *mp)
58{
59 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
60 return XFS_RMAP_BLOCK(mp) + 1;
61 if (xfs_sb_version_hasfinobt(&mp->m_sb))
62 return XFS_FIBT_BLOCK(mp) + 1;
63 return XFS_IBT_BLOCK(mp) + 1;
64}
65
ef5340cd
DW
66xfs_extlen_t
67xfs_prealloc_blocks(
68 struct xfs_mount *mp)
69{
2a96beb9
DW
70 if (xfs_sb_version_hasreflink(&mp->m_sb))
71 return xfs_refc_block(mp) + 1;
ef5340cd
DW
72 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
73 return XFS_RMAP_BLOCK(mp) + 1;
74 if (xfs_sb_version_hasfinobt(&mp->m_sb))
75 return XFS_FIBT_BLOCK(mp) + 1;
76 return XFS_IBT_BLOCK(mp) + 1;
77}
78
b8a8d6e5
DW
79/*
80 * In order to avoid ENOSPC-related deadlock caused by out-of-order locking of
81 * AGF buffer (PV 947395), we place constraints on the relationship among
82 * actual allocations for data blocks, freelist blocks, and potential file data
83 * bmap btree blocks. However, these restrictions may result in no actual space
84 * allocated for a delayed extent, for example, a data block in a certain AG is
85 * allocated but there is no additional block for the additional bmap btree
86 * block due to a split of the bmap btree of the file. The result of this may
87 * lead to an infinite loop when the file gets flushed to disk and all delayed
88 * extents need to be actually allocated. To get around this, we explicitly set
89 * aside a few blocks which will not be reserved in delayed allocation.
90 *
cf8ce220
DW
91 * We need to reserve 4 fsbs _per AG_ for the freelist and 4 more to handle a
92 * potential split of the file's bmap btree.
b8a8d6e5
DW
93 */
94unsigned int
95xfs_alloc_set_aside(
96 struct xfs_mount *mp)
97{
8eeb15ea 98 return mp->m_sb.sb_agcount * (XFS_ALLOC_AGFL_RESERVE + 4);
b8a8d6e5
DW
99}
100
101/*
102 * When deciding how much space to allocate out of an AG, we limit the
103 * allocation maximum size to the size the AG. However, we cannot use all the
104 * blocks in the AG - some are permanently used by metadata. These
105 * blocks are generally:
106 * - the AG superblock, AGF, AGI and AGFL
107 * - the AGF (bno and cnt) and AGI btree root blocks, and optionally
108 * the AGI free inode and rmap btree root blocks.
109 * - blocks on the AGFL according to xfs_alloc_set_aside() limits
110 * - the rmapbt root block
111 *
112 * The AG headers are sector sized, so the amount of space they take up is
113 * dependent on filesystem geometry. The others are all single blocks.
114 */
115unsigned int
116xfs_alloc_ag_max_usable(
117 struct xfs_mount *mp)
118{
119 unsigned int blocks;
120
121 blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */
122 blocks += XFS_ALLOC_AGFL_RESERVE;
123 blocks += 3; /* AGF, AGI btree root blocks */
124 if (xfs_sb_version_hasfinobt(&mp->m_sb))
125 blocks++; /* finobt root block */
126 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
127 blocks++; /* rmap root block */
868c70e3
DW
128 if (xfs_sb_version_hasreflink(&mp->m_sb))
129 blocks++; /* refcount root block */
b8a8d6e5
DW
130
131 return mp->m_sb.sb_agblocks - blocks;
132}
133
b194c7d8
BN
134/*
135 * Lookup the record equal to [bno, len] in the btree given by cur.
136 */
137STATIC int /* error */
138xfs_alloc_lookup_eq(
139 struct xfs_btree_cur *cur, /* btree cursor */
140 xfs_agblock_t bno, /* starting block of extent */
141 xfs_extlen_t len, /* length of extent */
142 int *stat) /* success/failure */
143{
7777d8c6
BF
144 int error;
145
b194c7d8
BN
146 cur->bc_rec.a.ar_startblock = bno;
147 cur->bc_rec.a.ar_blockcount = len;
7777d8c6
BF
148 error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
149 cur->bc_private.a.priv.abt.active = (*stat == 1);
150 return error;
b194c7d8
BN
151}
152
153/*
154 * Lookup the first record greater than or equal to [bno, len]
155 * in the btree given by cur.
156 */
a2ceac1f 157int /* error */
b194c7d8
BN
158xfs_alloc_lookup_ge(
159 struct xfs_btree_cur *cur, /* btree cursor */
160 xfs_agblock_t bno, /* starting block of extent */
161 xfs_extlen_t len, /* length of extent */
162 int *stat) /* success/failure */
163{
7777d8c6
BF
164 int error;
165
b194c7d8
BN
166 cur->bc_rec.a.ar_startblock = bno;
167 cur->bc_rec.a.ar_blockcount = len;
7777d8c6
BF
168 error = xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
169 cur->bc_private.a.priv.abt.active = (*stat == 1);
170 return error;
b194c7d8
BN
171}
172
173/*
174 * Lookup the first record less than or equal to [bno, len]
175 * in the btree given by cur.
176 */
1fe41a73 177int /* error */
b194c7d8
BN
178xfs_alloc_lookup_le(
179 struct xfs_btree_cur *cur, /* btree cursor */
180 xfs_agblock_t bno, /* starting block of extent */
181 xfs_extlen_t len, /* length of extent */
182 int *stat) /* success/failure */
183{
7777d8c6 184 int error;
b194c7d8
BN
185 cur->bc_rec.a.ar_startblock = bno;
186 cur->bc_rec.a.ar_blockcount = len;
7777d8c6
BF
187 error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
188 cur->bc_private.a.priv.abt.active = (*stat == 1);
189 return error;
190}
191
192static inline bool
193xfs_alloc_cur_active(
194 struct xfs_btree_cur *cur)
195{
196 return cur && cur->bc_private.a.priv.abt.active;
b194c7d8
BN
197}
198
199/*
200 * Update the record referred to by cur to the value given
201 * by [bno, len].
202 * This either works (return 0) or gets an EFSCORRUPTED error.
203 */
204STATIC int /* error */
205xfs_alloc_update(
206 struct xfs_btree_cur *cur, /* btree cursor */
207 xfs_agblock_t bno, /* starting block of extent */
208 xfs_extlen_t len) /* length of extent */
209{
210 union xfs_btree_rec rec;
211
212 rec.alloc.ar_startblock = cpu_to_be32(bno);
213 rec.alloc.ar_blockcount = cpu_to_be32(len);
214 return xfs_btree_update(cur, &rec);
215}
216
217/*
218 * Get the data from the pointed-to record.
219 */
a2ceac1f 220int /* error */
b194c7d8
BN
221xfs_alloc_get_rec(
222 struct xfs_btree_cur *cur, /* btree cursor */
223 xfs_agblock_t *bno, /* output: starting block of extent */
224 xfs_extlen_t *len, /* output: length of extent */
225 int *stat) /* output: success/failure */
226{
ec291989
DC
227 struct xfs_mount *mp = cur->bc_mp;
228 xfs_agnumber_t agno = cur->bc_private.a.agno;
b194c7d8
BN
229 union xfs_btree_rec *rec;
230 int error;
231
232 error = xfs_btree_get_rec(cur, &rec, stat);
3741f971
DW
233 if (error || !(*stat))
234 return error;
3741f971
DW
235
236 *bno = be32_to_cpu(rec->alloc.ar_startblock);
237 *len = be32_to_cpu(rec->alloc.ar_blockcount);
238
c25e4305
CM
239 if (*len == 0)
240 goto out_bad_rec;
241
ec291989
DC
242 /* check for valid extent range, including overflow */
243 if (!xfs_verify_agbno(mp, agno, *bno))
244 goto out_bad_rec;
245 if (*bno > *bno + *len)
246 goto out_bad_rec;
247 if (!xfs_verify_agbno(mp, agno, *bno + *len - 1))
248 goto out_bad_rec;
249
250 return 0;
251
252out_bad_rec:
253 xfs_warn(mp,
254 "%s Freespace BTree record corruption in AG %d detected!",
255 cur->bc_btnum == XFS_BTNUM_BNO ? "Block" : "Size", agno);
256 xfs_warn(mp,
257 "start block 0x%x block count 0x%x", *bno, *len);
258 return -EFSCORRUPTED;
b194c7d8
BN
259}
260
2bd0ea18
NS
261/*
262 * Compute aligned version of the found extent.
263 * Takes alignment and min length into account.
264 */
cd80de04 265STATIC bool
2bd0ea18 266xfs_alloc_compute_aligned(
a2ceac1f 267 xfs_alloc_arg_t *args, /* allocation argument structure */
2bd0ea18
NS
268 xfs_agblock_t foundbno, /* starting block in found extent */
269 xfs_extlen_t foundlen, /* length in found extent */
2bd0ea18 270 xfs_agblock_t *resbno, /* result block number */
cd80de04
CH
271 xfs_extlen_t *reslen, /* result length */
272 unsigned *busy_gen)
2bd0ea18 273{
cd80de04
CH
274 xfs_agblock_t bno = foundbno;
275 xfs_extlen_t len = foundlen;
ff3263dd 276 xfs_extlen_t diff;
cd80de04 277 bool busy;
2bd0ea18 278
a2ceac1f 279 /* Trim busy sections out of found extent */
cd80de04 280 busy = xfs_extent_busy_trim(args, &bno, &len, busy_gen);
a2ceac1f 281
ff3263dd
BF
282 /*
283 * If we have a largish extent that happens to start before min_agbno,
284 * see if we can shift it into range...
285 */
286 if (bno < args->min_agbno && bno + len > args->min_agbno) {
287 diff = args->min_agbno - bno;
288 if (len > diff) {
289 bno += diff;
290 len -= diff;
291 }
292 }
293
a2ceac1f
DC
294 if (args->alignment > 1 && len >= args->minlen) {
295 xfs_agblock_t aligned_bno = roundup(bno, args->alignment);
ff3263dd
BF
296
297 diff = aligned_bno - bno;
a2ceac1f
DC
298
299 *resbno = aligned_bno;
300 *reslen = diff >= len ? 0 : len - diff;
2bd0ea18 301 } else {
a2ceac1f
DC
302 *resbno = bno;
303 *reslen = len;
2bd0ea18 304 }
cd80de04
CH
305
306 return busy;
2bd0ea18
NS
307}
308
309/*
310 * Compute best start block and diff for "near" allocations.
311 * freelen >= wantlen already checked by caller.
312 */
313STATIC xfs_extlen_t /* difference value (absolute) */
314xfs_alloc_compute_diff(
315 xfs_agblock_t wantbno, /* target starting block */
316 xfs_extlen_t wantlen, /* target length */
317 xfs_extlen_t alignment, /* target alignment */
1fccd5c8 318 int datatype, /* are we allocating data? */
2bd0ea18
NS
319 xfs_agblock_t freebno, /* freespace's starting block */
320 xfs_extlen_t freelen, /* freespace's length */
321 xfs_agblock_t *newbnop) /* result: best start block from free */
322{
323 xfs_agblock_t freeend; /* end of freespace extent */
324 xfs_agblock_t newbno1; /* return block number */
325 xfs_agblock_t newbno2; /* other new block number */
0e266570
NS
326 xfs_extlen_t newlen1=0; /* length with newbno1 */
327 xfs_extlen_t newlen2=0; /* length with newbno2 */
2bd0ea18 328 xfs_agblock_t wantend; /* end of target extent */
a85522b6 329 bool userdata = datatype & XFS_ALLOC_USERDATA;
2bd0ea18
NS
330
331 ASSERT(freelen >= wantlen);
332 freeend = freebno + freelen;
333 wantend = wantbno + wantlen;
84a62eea
DC
334 /*
335 * We want to allocate from the start of a free extent if it is past
336 * the desired block or if we are allocating user data and the free
337 * extent is before desired block. The second case is there to allow
338 * for contiguous allocation from the remaining free space if the file
339 * grows in the short term.
340 */
341 if (freebno >= wantbno || (userdata && freeend < wantend)) {
2bd0ea18
NS
342 if ((newbno1 = roundup(freebno, alignment)) >= freeend)
343 newbno1 = NULLAGBLOCK;
344 } else if (freeend >= wantend && alignment > 1) {
345 newbno1 = roundup(wantbno, alignment);
346 newbno2 = newbno1 - alignment;
347 if (newbno1 >= freeend)
348 newbno1 = NULLAGBLOCK;
349 else
350 newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
351 if (newbno2 < freebno)
352 newbno2 = NULLAGBLOCK;
353 else
354 newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
355 if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
356 if (newlen1 < newlen2 ||
357 (newlen1 == newlen2 &&
358 XFS_ABSDIFF(newbno1, wantbno) >
359 XFS_ABSDIFF(newbno2, wantbno)))
360 newbno1 = newbno2;
361 } else if (newbno2 != NULLAGBLOCK)
362 newbno1 = newbno2;
363 } else if (freeend >= wantend) {
364 newbno1 = wantbno;
365 } else if (alignment > 1) {
366 newbno1 = roundup(freeend - wantlen, alignment);
367 if (newbno1 > freeend - wantlen &&
368 newbno1 - alignment >= freebno)
369 newbno1 -= alignment;
370 else if (newbno1 >= freeend)
371 newbno1 = NULLAGBLOCK;
372 } else
373 newbno1 = freeend - wantlen;
374 *newbnop = newbno1;
375 return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
376}
377
378/*
379 * Fix up the length, based on mod and prod.
380 * len should be k * prod + mod for some k.
381 * If len is too small it is returned unchanged.
382 * If len hits maxlen it is left alone.
383 */
384STATIC void
385xfs_alloc_fix_len(
dfc130f3 386 xfs_alloc_arg_t *args) /* allocation argument structure */
2bd0ea18
NS
387{
388 xfs_extlen_t k;
389 xfs_extlen_t rlen;
390
391 ASSERT(args->mod < args->prod);
392 rlen = args->len;
393 ASSERT(rlen >= args->minlen);
394 ASSERT(rlen <= args->maxlen);
395 if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
396 (args->mod == 0 && rlen < args->prod))
397 return;
398 k = rlen % args->prod;
399 if (k == args->mod)
400 return;
ff105f75
DC
401 if (k > args->mod)
402 rlen = rlen - (k - args->mod);
403 else
404 rlen = rlen - args->prod + (args->mod - k);
19ebedcf 405 /* casts to (int) catch length underflows */
ff105f75
DC
406 if ((int)rlen < (int)args->minlen)
407 return;
408 ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
409 ASSERT(rlen % args->prod == args->mod);
2c003dc2
CH
410 ASSERT(args->pag->pagf_freeblks + args->pag->pagf_flcount >=
411 rlen + args->minleft);
2bd0ea18
NS
412 args->len = rlen;
413}
414
2bd0ea18
NS
415/*
416 * Update the two btrees, logically removing from freespace the extent
417 * starting at rbno, rlen blocks. The extent is contained within the
418 * actual (current) free extent fbno for flen blocks.
419 * Flags are passed in indicating whether the cursors are set to the
420 * relevant records.
421 */
422STATIC int /* error code */
423xfs_alloc_fixup_trees(
dfc130f3
RC
424 xfs_btree_cur_t *cnt_cur, /* cursor for by-size btree */
425 xfs_btree_cur_t *bno_cur, /* cursor for by-block btree */
2bd0ea18
NS
426 xfs_agblock_t fbno, /* starting block of free extent */
427 xfs_extlen_t flen, /* length of free extent */
428 xfs_agblock_t rbno, /* starting block of returned extent */
429 xfs_extlen_t rlen, /* length of returned extent */
430 int flags) /* flags, XFSA_FIXUP_... */
431{
432 int error; /* error code */
433 int i; /* operation results */
434 xfs_agblock_t nfbno1; /* first new free startblock */
435 xfs_agblock_t nfbno2; /* second new free startblock */
0e266570
NS
436 xfs_extlen_t nflen1=0; /* first new free length */
437 xfs_extlen_t nflen2=0; /* second new free length */
19ebedcf
DC
438 struct xfs_mount *mp;
439
440 mp = cnt_cur->bc_mp;
2bd0ea18
NS
441
442 /*
443 * Look up the record in the by-size tree if necessary.
444 */
445 if (flags & XFSA_FIXUP_CNT_OK) {
446#ifdef DEBUG
0e266570 447 if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
2bd0ea18 448 return error;
19ebedcf 449 XFS_WANT_CORRUPTED_RETURN(mp,
2bd0ea18
NS
450 i == 1 && nfbno1 == fbno && nflen1 == flen);
451#endif
452 } else {
0e266570 453 if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
2bd0ea18 454 return error;
19ebedcf 455 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
2bd0ea18
NS
456 }
457 /*
458 * Look up the record in the by-block tree if necessary.
459 */
460 if (flags & XFSA_FIXUP_BNO_OK) {
461#ifdef DEBUG
0e266570 462 if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
2bd0ea18 463 return error;
19ebedcf 464 XFS_WANT_CORRUPTED_RETURN(mp,
2bd0ea18
NS
465 i == 1 && nfbno1 == fbno && nflen1 == flen);
466#endif
467 } else {
0e266570 468 if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
2bd0ea18 469 return error;
19ebedcf 470 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
2bd0ea18 471 }
b3563c19 472
2bd0ea18 473#ifdef DEBUG
b3563c19
BN
474 if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
475 struct xfs_btree_block *bnoblock;
476 struct xfs_btree_block *cntblock;
477
478 bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
479 cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
2bd0ea18 480
19ebedcf 481 XFS_WANT_CORRUPTED_RETURN(mp,
b3563c19 482 bnoblock->bb_numrecs == cntblock->bb_numrecs);
2bd0ea18
NS
483 }
484#endif
b3563c19 485
2bd0ea18
NS
486 /*
487 * Deal with all four cases: the allocated record is contained
488 * within the freespace record, so we can have new freespace
489 * at either (or both) end, or no freespace remaining.
490 */
491 if (rbno == fbno && rlen == flen)
492 nfbno1 = nfbno2 = NULLAGBLOCK;
493 else if (rbno == fbno) {
494 nfbno1 = rbno + rlen;
495 nflen1 = flen - rlen;
496 nfbno2 = NULLAGBLOCK;
497 } else if (rbno + rlen == fbno + flen) {
498 nfbno1 = fbno;
499 nflen1 = flen - rlen;
500 nfbno2 = NULLAGBLOCK;
501 } else {
502 nfbno1 = fbno;
503 nflen1 = rbno - fbno;
504 nfbno2 = rbno + rlen;
505 nflen2 = (fbno + flen) - nfbno2;
506 }
507 /*
508 * Delete the entry from the by-size btree.
509 */
b194c7d8 510 if ((error = xfs_btree_delete(cnt_cur, &i)))
2bd0ea18 511 return error;
19ebedcf 512 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
2bd0ea18
NS
513 /*
514 * Add new by-size btree entry(s).
515 */
516 if (nfbno1 != NULLAGBLOCK) {
0e266570 517 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
2bd0ea18 518 return error;
19ebedcf 519 XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
b194c7d8 520 if ((error = xfs_btree_insert(cnt_cur, &i)))
2bd0ea18 521 return error;
19ebedcf 522 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
2bd0ea18
NS
523 }
524 if (nfbno2 != NULLAGBLOCK) {
0e266570 525 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
2bd0ea18 526 return error;
19ebedcf 527 XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
b194c7d8 528 if ((error = xfs_btree_insert(cnt_cur, &i)))
2bd0ea18 529 return error;
19ebedcf 530 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
2bd0ea18
NS
531 }
532 /*
533 * Fix up the by-block btree entry(s).
534 */
535 if (nfbno1 == NULLAGBLOCK) {
536 /*
537 * No remaining freespace, just delete the by-block tree entry.
538 */
b194c7d8 539 if ((error = xfs_btree_delete(bno_cur, &i)))
2bd0ea18 540 return error;
19ebedcf 541 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
2bd0ea18
NS
542 } else {
543 /*
544 * Update the by-block entry to start later|be shorter.
545 */
0e266570 546 if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
2bd0ea18
NS
547 return error;
548 }
549 if (nfbno2 != NULLAGBLOCK) {
550 /*
551 * 2 resulting free entries, need to add one.
552 */
0e266570 553 if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
2bd0ea18 554 return error;
19ebedcf 555 XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
b194c7d8 556 if ((error = xfs_btree_insert(bno_cur, &i)))
2bd0ea18 557 return error;
19ebedcf 558 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
2bd0ea18
NS
559 }
560 return 0;
561}
562
bc01119d 563static xfs_failaddr_t
a2ceac1f
DC
564xfs_agfl_verify(
565 struct xfs_buf *bp)
566{
7861ef77 567 struct xfs_mount *mp = bp->b_mount;
a2ceac1f 568 struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
a2ceac1f
DC
569 int i;
570
95d9582b
DW
571 /*
572 * There is no verification of non-crc AGFLs because mkfs does not
573 * initialise the AGFL to zero or NULL. Hence the only valid part of the
574 * AGFL is what the AGF says is active. We can't get to the AGF, so we
575 * can't verify just those entries are valid.
576 */
577 if (!xfs_sb_version_hascrc(&mp->m_sb))
578 return NULL;
579
68dbe77f 580 if (!xfs_verify_magic(bp, agfl->agfl_magicnum))
bc01119d 581 return __this_address;
68dbe77f 582 if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid))
bc01119d 583 return __this_address;
dd5b876e
DC
584 /*
585 * during growfs operations, the perag is not fully initialised,
586 * so we can't use it for any useful checking. growfs ensures we can't
587 * use it by using uncached buffers that don't have the perag attached
588 * so we can detect and avoid this problem.
589 */
590 if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
bc01119d 591 return __this_address;
dd5b876e 592
b8165508 593 for (i = 0; i < xfs_agfl_size(mp); i++) {
dd5b876e 594 if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK &&
a2ceac1f 595 be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
bc01119d 596 return __this_address;
a2ceac1f 597 }
a65d8d29 598
bc01119d
DW
599 if (!xfs_log_check_lsn(mp, be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn)))
600 return __this_address;
601 return NULL;
dd5b876e
DC
602}
603
604static void
605xfs_agfl_read_verify(
606 struct xfs_buf *bp)
607{
7861ef77 608 struct xfs_mount *mp = bp->b_mount;
1e697959 609 xfs_failaddr_t fa;
dd5b876e
DC
610
611 /*
612 * There is no verification of non-crc AGFLs because mkfs does not
613 * initialise the AGFL to zero or NULL. Hence the only valid part of the
614 * AGFL is what the AGF says is active. We can't get to the AGF, so we
615 * can't verify just those entries are valid.
616 */
617 if (!xfs_sb_version_hascrc(&mp->m_sb))
618 return;
619
45922933 620 if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
1e697959
DW
621 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
622 else {
623 fa = xfs_agfl_verify(bp);
624 if (fa)
625 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
626 }
a2ceac1f
DC
627}
628
629static void
630xfs_agfl_write_verify(
631 struct xfs_buf *bp)
632{
7861ef77 633 struct xfs_mount *mp = bp->b_mount;
37d086ca 634 struct xfs_buf_log_item *bip = bp->b_log_item;
1e697959 635 xfs_failaddr_t fa;
a2ceac1f 636
dd5b876e
DC
637 /* no verification of non-crc AGFLs */
638 if (!xfs_sb_version_hascrc(&mp->m_sb))
639 return;
640
1e697959
DW
641 fa = xfs_agfl_verify(bp);
642 if (fa) {
643 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
dd5b876e
DC
644 return;
645 }
646
647 if (bip)
648 XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
649
43b5aeed 650 xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
a2ceac1f
DC
651}
652
653const struct xfs_buf_ops xfs_agfl_buf_ops = {
a3fac935 654 .name = "xfs_agfl",
68dbe77f 655 .magic = { cpu_to_be32(XFS_AGFL_MAGIC), cpu_to_be32(XFS_AGFL_MAGIC) },
a2ceac1f
DC
656 .verify_read = xfs_agfl_read_verify,
657 .verify_write = xfs_agfl_write_verify,
95d9582b 658 .verify_struct = xfs_agfl_verify,
a2ceac1f
DC
659};
660
2bd0ea18
NS
661/*
662 * Read in the allocation group free block array.
663 */
50bb67d6 664int /* error */
2bd0ea18
NS
665xfs_alloc_read_agfl(
666 xfs_mount_t *mp, /* mount point structure */
667 xfs_trans_t *tp, /* transaction pointer */
668 xfs_agnumber_t agno, /* allocation group number */
669 xfs_buf_t **bpp) /* buffer for the ag free block array */
670{
671 xfs_buf_t *bp; /* return value */
2bd0ea18
NS
672 int error;
673
674 ASSERT(agno != NULLAGNUMBER);
9440d84d
NS
675 error = xfs_trans_read_buf(
676 mp, tp, mp->m_ddev_targp,
677 XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
a2ceac1f 678 XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
9440d84d 679 if (error)
2bd0ea18 680 return error;
a2ceac1f 681 xfs_buf_set_ref(bp, XFS_AGFL_REF);
2bd0ea18
NS
682 *bpp = bp;
683 return 0;
684}
685
a2ceac1f
DC
686STATIC int
687xfs_alloc_update_counters(
688 struct xfs_trans *tp,
689 struct xfs_perag *pag,
690 struct xfs_buf *agbp,
691 long len)
692{
693 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
694
695 pag->pagf_freeblks += len;
696 be32_add_cpu(&agf->agf_freeblks, len);
697
698 xfs_trans_agblocks_delta(tp, len);
699 if (unlikely(be32_to_cpu(agf->agf_freeblks) >
a0264b73
DW
700 be32_to_cpu(agf->agf_length))) {
701 xfs_buf_corruption_error(agbp);
12b53197 702 return -EFSCORRUPTED;
a0264b73 703 }
a2ceac1f
DC
704
705 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
706 return 0;
707}
708
2bd0ea18 709/*
2950989c 710 * Block allocation algorithm and data structures.
2bd0ea18 711 */
2950989c
BF
712struct xfs_alloc_cur {
713 struct xfs_btree_cur *cnt; /* btree cursors */
714 struct xfs_btree_cur *bnolt;
715 struct xfs_btree_cur *bnogt;
4d66edb1 716 xfs_extlen_t cur_len;/* current search length */
19fe42e3
BF
717 xfs_agblock_t rec_bno;/* extent startblock */
718 xfs_extlen_t rec_len;/* extent length */
719 xfs_agblock_t bno; /* alloc bno */
720 xfs_extlen_t len; /* alloc len */
721 xfs_extlen_t diff; /* diff from search bno */
e055d59e
BF
722 unsigned int busy_gen;/* busy state */
723 bool busy;
2950989c
BF
724};
725
726/*
727 * Set up cursors, etc. in the extent allocation cursor. This function can be
728 * called multiple times to reset an initialized structure without having to
729 * reallocate cursors.
730 */
731static int
732xfs_alloc_cur_setup(
733 struct xfs_alloc_arg *args,
734 struct xfs_alloc_cur *acur)
735{
736 int error;
737 int i;
738
739 ASSERT(args->alignment == 1 || args->type != XFS_ALLOCTYPE_THIS_BNO);
740
4d66edb1 741 acur->cur_len = args->maxlen;
19fe42e3
BF
742 acur->rec_bno = 0;
743 acur->rec_len = 0;
744 acur->bno = 0;
745 acur->len = 0;
04ea0ac1 746 acur->diff = -1;
e055d59e
BF
747 acur->busy = false;
748 acur->busy_gen = 0;
749
2950989c
BF
750 /*
751 * Perform an initial cntbt lookup to check for availability of maxlen
752 * extents. If this fails, we'll return -ENOSPC to signal the caller to
753 * attempt a small allocation.
754 */
755 if (!acur->cnt)
756 acur->cnt = xfs_allocbt_init_cursor(args->mp, args->tp,
757 args->agbp, args->agno, XFS_BTNUM_CNT);
758 error = xfs_alloc_lookup_ge(acur->cnt, 0, args->maxlen, &i);
759 if (error)
760 return error;
761
762 /*
763 * Allocate the bnobt left and right search cursors.
764 */
765 if (!acur->bnolt)
766 acur->bnolt = xfs_allocbt_init_cursor(args->mp, args->tp,
767 args->agbp, args->agno, XFS_BTNUM_BNO);
768 if (!acur->bnogt)
769 acur->bnogt = xfs_allocbt_init_cursor(args->mp, args->tp,
770 args->agbp, args->agno, XFS_BTNUM_BNO);
771 return i == 1 ? 0 : -ENOSPC;
772}
773
774static void
775xfs_alloc_cur_close(
776 struct xfs_alloc_cur *acur,
777 bool error)
778{
779 int cur_error = XFS_BTREE_NOERROR;
780
781 if (error)
782 cur_error = XFS_BTREE_ERROR;
783
784 if (acur->cnt)
785 xfs_btree_del_cursor(acur->cnt, cur_error);
786 if (acur->bnolt)
787 xfs_btree_del_cursor(acur->bnolt, cur_error);
788 if (acur->bnogt)
789 xfs_btree_del_cursor(acur->bnogt, cur_error);
790 acur->cnt = acur->bnolt = acur->bnogt = NULL;
791}
2bd0ea18 792
04ea0ac1
BF
793/*
794 * Check an extent for allocation and track the best available candidate in the
795 * allocation structure. The cursor is deactivated if it has entered an out of
796 * range state based on allocation arguments. Optionally return the extent
797 * extent geometry and allocation status if requested by the caller.
798 */
799static int
800xfs_alloc_cur_check(
801 struct xfs_alloc_arg *args,
802 struct xfs_alloc_cur *acur,
803 struct xfs_btree_cur *cur,
804 int *new)
805{
806 int error, i;
807 xfs_agblock_t bno, bnoa, bnew;
808 xfs_extlen_t len, lena, diff = -1;
809 bool busy;
810 unsigned busy_gen = 0;
811 bool deactivate = false;
3ca39168 812 bool isbnobt = cur->bc_btnum == XFS_BTNUM_BNO;
04ea0ac1
BF
813
814 *new = 0;
815
816 error = xfs_alloc_get_rec(cur, &bno, &len, &i);
817 if (error)
818 return error;
819 XFS_WANT_CORRUPTED_RETURN(args->mp, i == 1);
820
821 /*
822 * Check minlen and deactivate a cntbt cursor if out of acceptable size
823 * range (i.e., walking backwards looking for a minlen extent).
824 */
825 if (len < args->minlen) {
3ca39168 826 deactivate = !isbnobt;
04ea0ac1
BF
827 goto out;
828 }
829
830 busy = xfs_alloc_compute_aligned(args, bno, len, &bnoa, &lena,
831 &busy_gen);
832 acur->busy |= busy;
833 if (busy)
834 acur->busy_gen = busy_gen;
835 /* deactivate a bnobt cursor outside of locality range */
3ca39168
BF
836 if (bnoa < args->min_agbno || bnoa > args->max_agbno) {
837 deactivate = isbnobt;
04ea0ac1 838 goto out;
3ca39168 839 }
04ea0ac1
BF
840 if (lena < args->minlen)
841 goto out;
842
843 args->len = XFS_EXTLEN_MIN(lena, args->maxlen);
844 xfs_alloc_fix_len(args);
845 ASSERT(args->len >= args->minlen);
846 if (args->len < acur->len)
847 goto out;
848
849 /*
850 * We have an aligned record that satisfies minlen and beats or matches
851 * the candidate extent size. Compare locality for near allocation mode.
852 */
853 ASSERT(args->type == XFS_ALLOCTYPE_NEAR_BNO);
854 diff = xfs_alloc_compute_diff(args->agbno, args->len,
855 args->alignment, args->datatype,
856 bnoa, lena, &bnew);
857 if (bnew == NULLAGBLOCK)
858 goto out;
3ca39168
BF
859
860 /*
861 * Deactivate a bnobt cursor with worse locality than the current best.
862 */
863 if (diff > acur->diff) {
864 deactivate = isbnobt;
04ea0ac1 865 goto out;
3ca39168 866 }
04ea0ac1
BF
867
868 ASSERT(args->len > acur->len ||
869 (args->len == acur->len && diff <= acur->diff));
870 acur->rec_bno = bno;
871 acur->rec_len = len;
872 acur->bno = bnew;
873 acur->len = args->len;
874 acur->diff = diff;
875 *new = 1;
876
4f2eee5a
BF
877 /*
878 * We're done if we found a perfect allocation. This only deactivates
879 * the current cursor, but this is just an optimization to terminate a
880 * cntbt search that otherwise runs to the edge of the tree.
881 */
882 if (acur->diff == 0 && acur->len == args->maxlen)
883 deactivate = true;
04ea0ac1
BF
884out:
885 if (deactivate)
886 cur->bc_private.a.priv.abt.active = false;
887 trace_xfs_alloc_cur_check(args->mp, cur->bc_btnum, bno, len, diff,
888 *new);
889 return 0;
890}
891
dacde37d
BF
892/*
893 * Complete an allocation of a candidate extent. Remove the extent from both
894 * trees and update the args structure.
895 */
896STATIC int
897xfs_alloc_cur_finish(
898 struct xfs_alloc_arg *args,
899 struct xfs_alloc_cur *acur)
900{
901 int error;
902
903 ASSERT(acur->cnt && acur->bnolt);
904 ASSERT(acur->bno >= acur->rec_bno);
905 ASSERT(acur->bno + acur->len <= acur->rec_bno + acur->rec_len);
906 ASSERT(acur->rec_bno + acur->rec_len <=
907 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
908
909 error = xfs_alloc_fixup_trees(acur->cnt, acur->bnolt, acur->rec_bno,
910 acur->rec_len, acur->bno, acur->len, 0);
911 if (error)
912 return error;
913
914 args->agbno = acur->bno;
915 args->len = acur->len;
916 args->wasfromfl = 0;
917
918 trace_xfs_alloc_cur(args);
919 return 0;
920}
921
4d66edb1
BF
922/*
923 * Locality allocation lookup algorithm. This expects a cntbt cursor and uses
924 * bno optimized lookup to search for extents with ideal size and locality.
925 */
926STATIC int
927xfs_alloc_cntbt_iter(
928 struct xfs_alloc_arg *args,
929 struct xfs_alloc_cur *acur)
930{
931 struct xfs_btree_cur *cur = acur->cnt;
932 xfs_agblock_t bno;
933 xfs_extlen_t len, cur_len;
934 int error;
935 int i;
936
937 if (!xfs_alloc_cur_active(cur))
938 return 0;
939
940 /* locality optimized lookup */
941 cur_len = acur->cur_len;
942 error = xfs_alloc_lookup_ge(cur, args->agbno, cur_len, &i);
943 if (error)
944 return error;
945 if (i == 0)
946 return 0;
947 error = xfs_alloc_get_rec(cur, &bno, &len, &i);
948 if (error)
949 return error;
950
951 /* check the current record and update search length from it */
952 error = xfs_alloc_cur_check(args, acur, cur, &i);
953 if (error)
954 return error;
955 ASSERT(len >= acur->cur_len);
956 acur->cur_len = len;
957
958 /*
959 * We looked up the first record >= [agbno, len] above. The agbno is a
960 * secondary key and so the current record may lie just before or after
961 * agbno. If it is past agbno, check the previous record too so long as
962 * the length matches as it may be closer. Don't check a smaller record
963 * because that could deactivate our cursor.
964 */
965 if (bno > args->agbno) {
966 error = xfs_btree_decrement(cur, 0, &i);
967 if (!error && i) {
968 error = xfs_alloc_get_rec(cur, &bno, &len, &i);
969 if (!error && i && len == acur->cur_len)
970 error = xfs_alloc_cur_check(args, acur, cur,
971 &i);
972 }
973 if (error)
974 return error;
975 }
976
977 /*
978 * Increment the search key until we find at least one allocation
979 * candidate or if the extent we found was larger. Otherwise, double the
980 * search key to optimize the search. Efficiency is more important here
981 * than absolute best locality.
982 */
983 cur_len <<= 1;
984 if (!acur->len || acur->cur_len >= cur_len)
985 acur->cur_len++;
986 else
987 acur->cur_len = cur_len;
988
989 return error;
990}
991
ba02381c
BF
992/*
993 * Deal with the case where only small freespaces remain. Either return the
994 * contents of the last freespace record, or allocate space from the freelist if
995 * there is nothing in the tree.
996 */
997STATIC int /* error */
998xfs_alloc_ag_vextent_small(
999 struct xfs_alloc_arg *args, /* allocation argument structure */
1000 struct xfs_btree_cur *ccur, /* optional by-size cursor */
1001 xfs_agblock_t *fbnop, /* result block number */
1002 xfs_extlen_t *flenp, /* result length */
1003 int *stat) /* status: 0-freelist, 1-normal/none */
1004{
1005 int error = 0;
1006 xfs_agblock_t fbno = NULLAGBLOCK;
1007 xfs_extlen_t flen = 0;
9e1862f0 1008 int i = 0;
ba02381c 1009
9e1862f0
BF
1010 /*
1011 * If a cntbt cursor is provided, try to allocate the largest record in
1012 * the tree. Try the AGFL if the cntbt is empty, otherwise fail the
1013 * allocation. Make sure to respect minleft even when pulling from the
1014 * freelist.
1015 */
1016 if (ccur)
1017 error = xfs_btree_decrement(ccur, 0, &i);
ba02381c
BF
1018 if (error)
1019 goto error;
1020 if (i) {
1021 error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i);
1022 if (error)
1023 goto error;
1024 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error);
1025 goto out;
1026 }
1027
1028 if (args->minlen != 1 || args->alignment != 1 ||
1029 args->resv == XFS_AG_RESV_AGFL ||
1030 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount) <=
1031 args->minleft))
1032 goto out;
1033
1034 error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
1035 if (error)
1036 goto error;
1037 if (fbno == NULLAGBLOCK)
1038 goto out;
1039
1040 xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
a85522b6 1041 (args->datatype & XFS_ALLOC_NOBUSY));
ba02381c 1042
a85522b6 1043 if (args->datatype & XFS_ALLOC_USERDATA) {
ba02381c
BF
1044 struct xfs_buf *bp;
1045
1046 bp = xfs_btree_get_bufs(args->mp, args->tp, args->agno, fbno);
1047 if (!bp) {
a0264b73 1048 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, args->mp);
ba02381c
BF
1049 error = -EFSCORRUPTED;
1050 goto error;
1051 }
1052 xfs_trans_binval(args->tp, bp);
1053 }
2d7ea81f
BF
1054 *fbnop = args->agbno = fbno;
1055 *flenp = args->len = 1;
ba02381c
BF
1056 XFS_WANT_CORRUPTED_GOTO(args->mp,
1057 fbno < be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1058 error);
1059 args->wasfromfl = 1;
1060 trace_xfs_alloc_small_freelist(args);
1061
1062 /*
1063 * If we're feeding an AGFL block to something that doesn't live in the
1064 * free space, we need to clear out the OWN_AG rmap.
1065 */
1066 error = xfs_rmap_free(args->tp, args->agbp, args->agno, fbno, 1,
1067 &XFS_RMAP_OINFO_AG);
1068 if (error)
1069 goto error;
1070
1071 *stat = 0;
1072 return 0;
1073
1074out:
1075 /*
1076 * Can't do the allocation, give up.
1077 */
1078 if (flen < args->minlen) {
1079 args->agbno = NULLAGBLOCK;
1080 trace_xfs_alloc_small_notenough(args);
1081 flen = 0;
1082 }
1083 *fbnop = fbno;
1084 *flenp = flen;
1085 *stat = 1;
1086 trace_xfs_alloc_small_done(args);
1087 return 0;
1088
1089error:
1090 trace_xfs_alloc_small_error(args);
1091 return error;
1092}
1093
2bd0ea18
NS
1094/*
1095 * Allocate a variable extent in the allocation group agno.
1096 * Type and bno are used to determine where in the allocation group the
1097 * extent will start.
1098 * Extent's length (returned in *len) will be between minlen and maxlen,
1099 * and of the form k * prod + mod unless there's nothing that large.
1100 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1101 */
1102STATIC int /* error */
1103xfs_alloc_ag_vextent(
dfc130f3 1104 xfs_alloc_arg_t *args) /* argument structure for allocation */
2bd0ea18 1105{
0e266570 1106 int error=0;
2bd0ea18
NS
1107
1108 ASSERT(args->minlen > 0);
1109 ASSERT(args->maxlen > 0);
1110 ASSERT(args->minlen <= args->maxlen);
1111 ASSERT(args->mod < args->prod);
1112 ASSERT(args->alignment > 0);
cf8ce220 1113
2bd0ea18
NS
1114 /*
1115 * Branch to correct routine based on the type.
1116 */
1117 args->wasfromfl = 0;
1118 switch (args->type) {
1119 case XFS_ALLOCTYPE_THIS_AG:
1120 error = xfs_alloc_ag_vextent_size(args);
1121 break;
1122 case XFS_ALLOCTYPE_NEAR_BNO:
1123 error = xfs_alloc_ag_vextent_near(args);
1124 break;
1125 case XFS_ALLOCTYPE_THIS_BNO:
1126 error = xfs_alloc_ag_vextent_exact(args);
1127 break;
1128 default:
1129 ASSERT(0);
1130 /* NOTREACHED */
1131 }
a2ceac1f
DC
1132
1133 if (error || args->agbno == NULLAGBLOCK)
2bd0ea18 1134 return error;
2bd0ea18 1135
a2ceac1f
DC
1136 ASSERT(args->len >= args->minlen);
1137 ASSERT(args->len <= args->maxlen);
9760cac2 1138 ASSERT(!args->wasfromfl || args->resv != XFS_AG_RESV_AGFL);
a2ceac1f
DC
1139 ASSERT(args->agbno % args->alignment == 0);
1140
631ac87a 1141 /* if not file data, insert new block into the reverse map btree */
3ee858aa 1142 if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) {
631ac87a
DW
1143 error = xfs_rmap_alloc(args->tp, args->agbp, args->agno,
1144 args->agbno, args->len, &args->oinfo);
1145 if (error)
1146 return error;
1147 }
1148
a2ceac1f
DC
1149 if (!args->wasfromfl) {
1150 error = xfs_alloc_update_counters(args->tp, args->pag,
1151 args->agbp,
1152 -((long)(args->len)));
1153 if (error)
1154 return error;
1155
1156 ASSERT(!xfs_extent_busy_search(args->mp, args->agno,
1157 args->agbno, args->len));
2bd0ea18 1158 }
a2ceac1f 1159
cf8ce220 1160 xfs_ag_resv_alloc_extent(args->pag, args->resv, args);
a2ceac1f 1161
79896434
BD
1162 XFS_STATS_INC(args->mp, xs_allocx);
1163 XFS_STATS_ADD(args->mp, xs_allocb, args->len);
a2ceac1f 1164 return error;
2bd0ea18
NS
1165}
1166
1167/*
1168 * Allocate a variable extent at exactly agno/bno.
1169 * Extent's length (returned in *len) will be between minlen and maxlen,
1170 * and of the form k * prod + mod unless there's nothing that large.
1171 * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
1172 */
1173STATIC int /* error */
1174xfs_alloc_ag_vextent_exact(
dfc130f3 1175 xfs_alloc_arg_t *args) /* allocation argument structure */
2bd0ea18 1176{
dfc130f3
RC
1177 xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */
1178 xfs_btree_cur_t *cnt_cur;/* by count btree cursor */
2bd0ea18
NS
1179 int error;
1180 xfs_agblock_t fbno; /* start block of found extent */
2bd0ea18 1181 xfs_extlen_t flen; /* length of found extent */
cd80de04
CH
1182 xfs_agblock_t tbno; /* start block of busy extent */
1183 xfs_extlen_t tlen; /* length of busy extent */
1184 xfs_agblock_t tend; /* end block of busy extent */
2bd0ea18 1185 int i; /* success/failure of operation */
cd80de04 1186 unsigned busy_gen;
2bd0ea18
NS
1187
1188 ASSERT(args->alignment == 1);
a2ceac1f 1189
2bd0ea18
NS
1190 /*
1191 * Allocate/initialize a cursor for the by-number freespace btree.
1192 */
b194c7d8 1193 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
56b2de80
DC
1194 args->agno, XFS_BTNUM_BNO);
1195
2bd0ea18
NS
1196 /*
1197 * Lookup bno and minlen in the btree (minlen is irrelevant, really).
1198 * Look for the closest free block <= bno, it must contain bno
1199 * if any free block does.
1200 */
56b2de80
DC
1201 error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
1202 if (error)
2bd0ea18 1203 goto error0;
56b2de80
DC
1204 if (!i)
1205 goto not_found;
1206
2bd0ea18
NS
1207 /*
1208 * Grab the freespace record.
1209 */
56b2de80
DC
1210 error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
1211 if (error)
2bd0ea18 1212 goto error0;
19ebedcf 1213 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
2bd0ea18 1214 ASSERT(fbno <= args->agbno);
56b2de80 1215
5000d01d 1216 /*
a2ceac1f
DC
1217 * Check for overlapping busy extents.
1218 */
cd80de04
CH
1219 tbno = fbno;
1220 tlen = flen;
1221 xfs_extent_busy_trim(args, &tbno, &tlen, &busy_gen);
a2ceac1f
DC
1222
1223 /*
1224 * Give up if the start of the extent is busy, or the freespace isn't
1225 * long enough for the minimum request.
2bd0ea18 1226 */
a2ceac1f
DC
1227 if (tbno > args->agbno)
1228 goto not_found;
1229 if (tlen < args->minlen)
1230 goto not_found;
1231 tend = tbno + tlen;
1232 if (tend < args->agbno + args->minlen)
56b2de80
DC
1233 goto not_found;
1234
2bd0ea18
NS
1235 /*
1236 * End of extent will be smaller of the freespace end and the
1237 * maximal requested end.
56b2de80 1238 *
2bd0ea18
NS
1239 * Fix the length according to mod and prod if given.
1240 */
a2ceac1f
DC
1241 args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
1242 - args->agbno;
2bd0ea18 1243 xfs_alloc_fix_len(args);
a2ceac1f 1244 ASSERT(args->agbno + args->len <= tend);
56b2de80 1245
2bd0ea18 1246 /*
a2ceac1f 1247 * We are allocating agbno for args->len
2bd0ea18
NS
1248 * Allocate/initialize a cursor for the by-size btree.
1249 */
b194c7d8
BN
1250 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1251 args->agno, XFS_BTNUM_CNT);
2bd0ea18 1252 ASSERT(args->agbno + args->len <=
6e3140c7 1253 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
56b2de80
DC
1254 error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
1255 args->len, XFSA_FIXUP_BNO_OK);
1256 if (error) {
2bd0ea18
NS
1257 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1258 goto error0;
1259 }
a2ceac1f 1260
2bd0ea18
NS
1261 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1262 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
a2ceac1f 1263
2bd0ea18 1264 args->wasfromfl = 0;
56b2de80
DC
1265 trace_xfs_alloc_exact_done(args);
1266 return 0;
1267
1268not_found:
1269 /* Didn't find it, return null. */
1270 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1271 args->agbno = NULLAGBLOCK;
1272 trace_xfs_alloc_exact_notfound(args);
2bd0ea18
NS
1273 return 0;
1274
1275error0:
1276 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
56b2de80
DC
1277 trace_xfs_alloc_exact_error(args);
1278 return error;
1279}
1280
1281/*
4f2eee5a
BF
1282 * Search a given number of btree records in a given direction. Check each
1283 * record against the good extent we've already found.
56b2de80
DC
1284 */
1285STATIC int
4f2eee5a 1286xfs_alloc_walk_iter(
3ca39168
BF
1287 struct xfs_alloc_arg *args,
1288 struct xfs_alloc_cur *acur,
1289 struct xfs_btree_cur *cur,
4f2eee5a
BF
1290 bool increment,
1291 bool find_one, /* quit on first candidate */
1292 int count, /* rec count (-1 for infinite) */
1293 int *stat)
56b2de80 1294{
56b2de80
DC
1295 int error;
1296 int i;
56b2de80 1297
4f2eee5a
BF
1298 *stat = 0;
1299
56b2de80 1300 /*
3ca39168
BF
1301 * Search so long as the cursor is active or we find a better extent.
1302 * The cursor is deactivated if it extends beyond the range of the
1303 * current allocation candidate.
56b2de80 1304 */
4f2eee5a 1305 while (xfs_alloc_cur_active(cur) && count) {
3ca39168 1306 error = xfs_alloc_cur_check(args, acur, cur, &i);
56b2de80 1307 if (error)
3ca39168 1308 return error;
4f2eee5a
BF
1309 if (i == 1) {
1310 *stat = 1;
1311 if (find_one)
1312 break;
1313 }
3ca39168
BF
1314 if (!xfs_alloc_cur_active(cur))
1315 break;
56b2de80 1316
3ca39168
BF
1317 if (increment)
1318 error = xfs_btree_increment(cur, 0, &i);
56b2de80 1319 else
3ca39168 1320 error = xfs_btree_decrement(cur, 0, &i);
56b2de80 1321 if (error)
3ca39168
BF
1322 return error;
1323 if (i == 0)
1324 cur->bc_private.a.priv.abt.active = false;
4f2eee5a
BF
1325
1326 if (count > 0)
1327 count--;
3ca39168 1328 }
56b2de80 1329
56b2de80 1330 return 0;
2bd0ea18
NS
1331}
1332
d0ebd9ee 1333/*
4d66edb1
BF
1334 * Search the by-bno and by-size btrees in parallel in search of an extent with
1335 * ideal locality based on the NEAR mode ->agbno locality hint.
d0ebd9ee
BF
1336 */
1337STATIC int
4d66edb1 1338xfs_alloc_ag_vextent_locality(
d0ebd9ee
BF
1339 struct xfs_alloc_arg *args,
1340 struct xfs_alloc_cur *acur,
1341 int *stat)
1342{
1343 struct xfs_btree_cur *fbcur = NULL;
1344 int error;
1345 int i;
1346 bool fbinc;
1347
1348 ASSERT(acur->len == 0);
1349 ASSERT(args->type == XFS_ALLOCTYPE_NEAR_BNO);
1350
1351 *stat = 0;
1352
4d66edb1
BF
1353 error = xfs_alloc_lookup_ge(acur->cnt, args->agbno, acur->cur_len, &i);
1354 if (error)
1355 return error;
d0ebd9ee
BF
1356 error = xfs_alloc_lookup_le(acur->bnolt, args->agbno, 0, &i);
1357 if (error)
1358 return error;
1359 error = xfs_alloc_lookup_ge(acur->bnogt, args->agbno, 0, &i);
1360 if (error)
1361 return error;
1362
1363 /*
4d66edb1
BF
1364 * Search the bnobt and cntbt in parallel. Search the bnobt left and
1365 * right and lookup the closest extent to the locality hint for each
1366 * extent size key in the cntbt. The entire search terminates
1367 * immediately on a bnobt hit because that means we've found best case
1368 * locality. Otherwise the search continues until the cntbt cursor runs
1369 * off the end of the tree. If no allocation candidate is found at this
1370 * point, give up on locality, walk backwards from the end of the cntbt
1371 * and take the first available extent.
1372 *
1373 * The parallel tree searches balance each other out to provide fairly
1374 * consistent performance for various situations. The bnobt search can
1375 * have pathological behavior in the worst case scenario of larger
1376 * allocation requests and fragmented free space. On the other hand, the
1377 * bnobt is able to satisfy most smaller allocation requests much more
1378 * quickly than the cntbt. The cntbt search can sift through fragmented
1379 * free space and sets of free extents for larger allocation requests
1380 * more quickly than the bnobt. Since the locality hint is just a hint
1381 * and we don't want to scan the entire bnobt for perfect locality, the
1382 * cntbt search essentially bounds the bnobt search such that we can
1383 * find good enough locality at reasonable performance in most cases.
d0ebd9ee
BF
1384 */
1385 while (xfs_alloc_cur_active(acur->bnolt) ||
4d66edb1
BF
1386 xfs_alloc_cur_active(acur->bnogt) ||
1387 xfs_alloc_cur_active(acur->cnt)) {
1388
1389 trace_xfs_alloc_cur_lookup(args);
1390
1391 /*
1392 * Search the bnobt left and right. In the case of a hit, finish
1393 * the search in the opposite direction and we're done.
1394 */
d0ebd9ee
BF
1395 error = xfs_alloc_walk_iter(args, acur, acur->bnolt, false,
1396 true, 1, &i);
1397 if (error)
1398 return error;
1399 if (i == 1) {
1400 trace_xfs_alloc_cur_left(args);
1401 fbcur = acur->bnogt;
1402 fbinc = true;
1403 break;
1404 }
d0ebd9ee
BF
1405 error = xfs_alloc_walk_iter(args, acur, acur->bnogt, true, true,
1406 1, &i);
1407 if (error)
1408 return error;
1409 if (i == 1) {
1410 trace_xfs_alloc_cur_right(args);
1411 fbcur = acur->bnolt;
1412 fbinc = false;
1413 break;
1414 }
4d66edb1
BF
1415
1416 /*
1417 * Check the extent with best locality based on the current
1418 * extent size search key and keep track of the best candidate.
1419 */
1420 error = xfs_alloc_cntbt_iter(args, acur);
1421 if (error)
1422 return error;
1423 if (!xfs_alloc_cur_active(acur->cnt)) {
1424 trace_xfs_alloc_cur_lookup_done(args);
1425 break;
1426 }
1427 }
1428
1429 /*
1430 * If we failed to find anything due to busy extents, return empty
1431 * handed so the caller can flush and retry. If no busy extents were
1432 * found, walk backwards from the end of the cntbt as a last resort.
1433 */
1434 if (!xfs_alloc_cur_active(acur->cnt) && !acur->len && !acur->busy) {
1435 error = xfs_btree_decrement(acur->cnt, 0, &i);
1436 if (error)
1437 return error;
1438 if (i) {
1439 acur->cnt->bc_private.a.priv.abt.active = true;
1440 fbcur = acur->cnt;
1441 fbinc = false;
1442 }
d0ebd9ee
BF
1443 }
1444
4d66edb1
BF
1445 /*
1446 * Search in the opposite direction for a better entry in the case of
1447 * a bnobt hit or walk backwards from the end of the cntbt.
1448 */
d0ebd9ee
BF
1449 if (fbcur) {
1450 error = xfs_alloc_walk_iter(args, acur, fbcur, fbinc, true, -1,
1451 &i);
1452 if (error)
1453 return error;
1454 }
1455
1456 if (acur->len)
1457 *stat = 1;
1458
1459 return 0;
1460}
1461
2bd0ea18
NS
1462/*
1463 * Allocate a variable extent near bno in the allocation group agno.
1464 * Extent's length (returned in len) will be between minlen and maxlen,
1465 * and of the form k * prod + mod unless there's nothing that large.
1466 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1467 */
2950989c 1468STATIC int
2bd0ea18 1469xfs_alloc_ag_vextent_near(
2950989c 1470 struct xfs_alloc_arg *args)
2bd0ea18 1471{
2950989c 1472 struct xfs_alloc_cur acur = {};
3ca39168
BF
1473 int error; /* error code */
1474 int i; /* result code, temporary */
3ca39168
BF
1475 xfs_agblock_t bno;
1476 xfs_extlen_t len;
6beba453 1477#ifdef DEBUG
2bd0ea18
NS
1478 /*
1479 * Randomly don't execute the first algorithm.
1480 */
2bd0ea18 1481 int dofirst; /* set to do first algorithm */
2bd0ea18 1482
49f693fa 1483 dofirst = prandom_u32() & 1;
2bd0ea18 1484#endif
a2ceac1f 1485
ff3263dd
BF
1486 /* handle unitialized agbno range so caller doesn't have to */
1487 if (!args->min_agbno && !args->max_agbno)
1488 args->max_agbno = args->mp->m_sb.sb_agblocks - 1;
1489 ASSERT(args->min_agbno <= args->max_agbno);
1490
1491 /* clamp agbno to the range if it's outside */
1492 if (args->agbno < args->min_agbno)
1493 args->agbno = args->min_agbno;
1494 if (args->agbno > args->max_agbno)
1495 args->agbno = args->max_agbno;
1496
a2ceac1f 1497restart:
3ca39168 1498 len = 0;
a2ceac1f 1499
2bd0ea18 1500 /*
2950989c
BF
1501 * Set up cursors and see if there are any free extents as big as
1502 * maxlen. If not, pick the last entry in the tree unless the tree is
1503 * empty.
5000d01d 1504 */
2950989c
BF
1505 error = xfs_alloc_cur_setup(args, &acur);
1506 if (error == -ENOSPC) {
3ca39168
BF
1507 error = xfs_alloc_ag_vextent_small(args, acur.cnt, &bno,
1508 &len, &i);
2950989c
BF
1509 if (error)
1510 goto out;
3ca39168 1511 if (i == 0 || len == 0) {
a2ceac1f 1512 trace_xfs_alloc_near_noentry(args);
2950989c 1513 goto out;
2bd0ea18
NS
1514 }
1515 ASSERT(i == 1);
2950989c
BF
1516 } else if (error) {
1517 goto out;
2bd0ea18 1518 }
a2ceac1f 1519
5000d01d 1520 /*
2bd0ea18
NS
1521 * First algorithm.
1522 * If the requested extent is large wrt the freespaces available
1523 * in this a.g., then the cursor will be pointing to a btree entry
1524 * near the right edge of the tree. If it's in the last btree leaf
1525 * block, then we just examine all the entries in that block
1526 * that are big enough, and pick the best one.
1527 * This is written as a while loop so we can break out of it,
1528 * but we never loop back to the top.
1529 */
2950989c 1530 while (xfs_btree_islastblock(acur.cnt, 0)) {
6beba453
DC
1531#ifdef DEBUG
1532 if (dofirst)
2bd0ea18
NS
1533 break;
1534#endif
1535 /*
1536 * Start from the entry that lookup found, sequence through
1537 * all larger free blocks. If we're actually pointing at a
1538 * record smaller than maxlen, go to the start of this block,
1539 * and skip all those smaller than minlen.
1540 */
3ca39168 1541 if (len || args->alignment > 1) {
2950989c 1542 acur.cnt->bc_ptrs[0] = 1;
2bd0ea18 1543 do {
3ca39168
BF
1544 error = xfs_alloc_get_rec(acur.cnt, &bno, &len,
1545 &i);
2950989c
BF
1546 if (error)
1547 goto out;
1548 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, out);
3ca39168 1549 if (len >= args->minlen)
2bd0ea18 1550 break;
2950989c
BF
1551 error = xfs_btree_increment(acur.cnt, 0, &i);
1552 if (error)
1553 goto out;
2bd0ea18 1554 } while (i);
3ca39168 1555 ASSERT(len >= args->minlen);
2bd0ea18
NS
1556 if (!i)
1557 break;
1558 }
4f2eee5a
BF
1559
1560 error = xfs_alloc_walk_iter(args, &acur, acur.cnt, true, false,
1561 -1, &i);
1562 if (error)
1563 goto out;
1564
2bd0ea18
NS
1565 /*
1566 * It didn't work. We COULD be in a case where
1567 * there's a good record somewhere, so try again.
1568 */
19fe42e3 1569 if (acur.len == 0)
2bd0ea18 1570 break;
2c003dc2 1571
56b2de80 1572 trace_xfs_alloc_near_first(args);
46f7e323 1573 goto alloc;
2bd0ea18 1574 }
2950989c 1575
2bd0ea18 1576 /*
4d66edb1
BF
1577 * Second algorithm. Combined cntbt and bnobt search to find ideal
1578 * locality.
2bd0ea18 1579 */
4d66edb1 1580 error = xfs_alloc_ag_vextent_locality(args, &acur, &i);
2950989c
BF
1581 if (error)
1582 goto out;
1583
2bd0ea18
NS
1584 /*
1585 * If we couldn't get anything, give up.
1586 */
3ca39168 1587 if (!acur.len) {
e055d59e 1588 if (acur.busy) {
a2ceac1f 1589 trace_xfs_alloc_near_busy(args);
e055d59e
BF
1590 xfs_extent_busy_flush(args->mp, args->pag,
1591 acur.busy_gen);
a2ceac1f
DC
1592 goto restart;
1593 }
56b2de80 1594 trace_xfs_alloc_size_neither(args);
2bd0ea18 1595 args->agbno = NULLAGBLOCK;
2950989c 1596 goto out;
2bd0ea18 1597 }
56b2de80 1598
46f7e323 1599alloc:
dacde37d
BF
1600 /* fix up btrees on a successful allocation */
1601 error = xfs_alloc_cur_finish(args, &acur);
56b2de80 1602
2950989c
BF
1603out:
1604 xfs_alloc_cur_close(&acur, error);
2bd0ea18
NS
1605 return error;
1606}
1607
1608/*
1609 * Allocate a variable extent anywhere in the allocation group agno.
1610 * Extent's length (returned in len) will be between minlen and maxlen,
1611 * and of the form k * prod + mod unless there's nothing that large.
1612 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1613 */
1614STATIC int /* error */
1615xfs_alloc_ag_vextent_size(
dfc130f3 1616 xfs_alloc_arg_t *args) /* allocation argument structure */
2bd0ea18 1617{
dfc130f3
RC
1618 xfs_btree_cur_t *bno_cur; /* cursor for bno btree */
1619 xfs_btree_cur_t *cnt_cur; /* cursor for cnt btree */
2bd0ea18
NS
1620 int error; /* error result */
1621 xfs_agblock_t fbno; /* start of found freespace */
1622 xfs_extlen_t flen; /* length of found freespace */
2bd0ea18
NS
1623 int i; /* temp status variable */
1624 xfs_agblock_t rbno; /* returned block number */
1625 xfs_extlen_t rlen; /* length of returned extent */
cd80de04
CH
1626 bool busy;
1627 unsigned busy_gen;
2bd0ea18 1628
a2ceac1f 1629restart:
2bd0ea18
NS
1630 /*
1631 * Allocate and initialize a cursor for the by-size btree.
1632 */
b194c7d8
BN
1633 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1634 args->agno, XFS_BTNUM_CNT);
2bd0ea18 1635 bno_cur = NULL;
cd80de04 1636 busy = false;
a2ceac1f 1637
2bd0ea18
NS
1638 /*
1639 * Look for an entry >= maxlen+alignment-1 blocks.
1640 */
0e266570
NS
1641 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
1642 args->maxlen + args->alignment - 1, &i)))
2bd0ea18 1643 goto error0;
a2ceac1f 1644
2bd0ea18 1645 /*
cd80de04
CH
1646 * If none then we have to settle for a smaller extent. In the case that
1647 * there are no large extents, this will return the last entry in the
1648 * tree unless the tree is empty. In the case that there are only busy
1649 * large extents, this will return the largest small extent unless there
a2ceac1f 1650 * are no smaller extents available.
5000d01d 1651 */
cd80de04 1652 if (!i) {
a2ceac1f
DC
1653 error = xfs_alloc_ag_vextent_small(args, cnt_cur,
1654 &fbno, &flen, &i);
1655 if (error)
2bd0ea18
NS
1656 goto error0;
1657 if (i == 0 || flen == 0) {
1658 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
56b2de80 1659 trace_xfs_alloc_size_noentry(args);
2bd0ea18
NS
1660 return 0;
1661 }
1662 ASSERT(i == 1);
cd80de04
CH
1663 busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno,
1664 &rlen, &busy_gen);
a2ceac1f
DC
1665 } else {
1666 /*
1667 * Search for a non-busy extent that is large enough.
a2ceac1f
DC
1668 */
1669 for (;;) {
1670 error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
1671 if (error)
1672 goto error0;
19ebedcf 1673 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
a2ceac1f 1674
cd80de04
CH
1675 busy = xfs_alloc_compute_aligned(args, fbno, flen,
1676 &rbno, &rlen, &busy_gen);
a2ceac1f
DC
1677
1678 if (rlen >= args->maxlen)
1679 break;
1680
1681 error = xfs_btree_increment(cnt_cur, 0, &i);
1682 if (error)
1683 goto error0;
1684 if (i == 0) {
1685 /*
1686 * Our only valid extents must have been busy.
1687 * Make it unbusy by forcing the log out and
cd80de04 1688 * retrying.
a2ceac1f
DC
1689 */
1690 xfs_btree_del_cursor(cnt_cur,
1691 XFS_BTREE_NOERROR);
1692 trace_xfs_alloc_size_busy(args);
cd80de04
CH
1693 xfs_extent_busy_flush(args->mp,
1694 args->pag, busy_gen);
a2ceac1f
DC
1695 goto restart;
1696 }
1697 }
2bd0ea18 1698 }
a2ceac1f 1699
2bd0ea18
NS
1700 /*
1701 * In the first case above, we got the last entry in the
1702 * by-size btree. Now we check to see if the space hits maxlen
1703 * once aligned; if not, we search left for something better.
1704 * This can't happen in the second case above.
1705 */
2bd0ea18 1706 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
19ebedcf 1707 XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
2bd0ea18
NS
1708 (rlen <= flen && rbno + rlen <= fbno + flen), error0);
1709 if (rlen < args->maxlen) {
1710 xfs_agblock_t bestfbno;
1711 xfs_extlen_t bestflen;
1712 xfs_agblock_t bestrbno;
1713 xfs_extlen_t bestrlen;
1714
1715 bestrlen = rlen;
1716 bestrbno = rbno;
1717 bestflen = flen;
1718 bestfbno = fbno;
1719 for (;;) {
b194c7d8 1720 if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
2bd0ea18
NS
1721 goto error0;
1722 if (i == 0)
1723 break;
0e266570
NS
1724 if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
1725 &i)))
2bd0ea18 1726 goto error0;
19ebedcf 1727 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
2bd0ea18
NS
1728 if (flen < bestrlen)
1729 break;
cd80de04
CH
1730 busy = xfs_alloc_compute_aligned(args, fbno, flen,
1731 &rbno, &rlen, &busy_gen);
2bd0ea18 1732 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
19ebedcf 1733 XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
2bd0ea18
NS
1734 (rlen <= flen && rbno + rlen <= fbno + flen),
1735 error0);
1736 if (rlen > bestrlen) {
1737 bestrlen = rlen;
1738 bestrbno = rbno;
1739 bestflen = flen;
1740 bestfbno = fbno;
1741 if (rlen == args->maxlen)
1742 break;
1743 }
5000d01d 1744 }
0e266570
NS
1745 if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
1746 &i)))
2bd0ea18 1747 goto error0;
19ebedcf 1748 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
2bd0ea18
NS
1749 rlen = bestrlen;
1750 rbno = bestrbno;
1751 flen = bestflen;
1752 fbno = bestfbno;
1753 }
1754 args->wasfromfl = 0;
1755 /*
1756 * Fix up the length.
1757 */
1758 args->len = rlen;
a2ceac1f 1759 if (rlen < args->minlen) {
cd80de04 1760 if (busy) {
a2ceac1f
DC
1761 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1762 trace_xfs_alloc_size_busy(args);
cd80de04 1763 xfs_extent_busy_flush(args->mp, args->pag, busy_gen);
a2ceac1f
DC
1764 goto restart;
1765 }
1766 goto out_nominleft;
2bd0ea18 1767 }
a2ceac1f
DC
1768 xfs_alloc_fix_len(args);
1769
2bd0ea18 1770 rlen = args->len;
19ebedcf 1771 XFS_WANT_CORRUPTED_GOTO(args->mp, rlen <= flen, error0);
2bd0ea18
NS
1772 /*
1773 * Allocate and initialize a cursor for the by-block tree.
1774 */
b194c7d8
BN
1775 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1776 args->agno, XFS_BTNUM_BNO);
0e266570
NS
1777 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
1778 rbno, rlen, XFSA_FIXUP_CNT_OK)))
2bd0ea18
NS
1779 goto error0;
1780 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1781 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1782 cnt_cur = bno_cur = NULL;
1783 args->len = rlen;
1784 args->agbno = rbno;
19ebedcf 1785 XFS_WANT_CORRUPTED_GOTO(args->mp,
2bd0ea18 1786 args->agbno + args->len <=
6e3140c7 1787 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
2bd0ea18 1788 error0);
56b2de80 1789 trace_xfs_alloc_size_done(args);
2bd0ea18
NS
1790 return 0;
1791
1792error0:
56b2de80 1793 trace_xfs_alloc_size_error(args);
2bd0ea18
NS
1794 if (cnt_cur)
1795 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1796 if (bno_cur)
1797 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1798 return error;
a2ceac1f
DC
1799
1800out_nominleft:
1801 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1802 trace_xfs_alloc_size_nominleft(args);
1803 args->agbno = NULLAGBLOCK;
1804 return 0;
2bd0ea18
NS
1805}
1806
2bd0ea18
NS
1807/*
1808 * Free the extent starting at agno/bno for length.
1809 */
85aec44f 1810STATIC int
2bd0ea18 1811xfs_free_ag_extent(
5837e73b
DW
1812 struct xfs_trans *tp,
1813 struct xfs_buf *agbp,
1814 xfs_agnumber_t agno,
1815 xfs_agblock_t bno,
1816 xfs_extlen_t len,
1817 const struct xfs_owner_info *oinfo,
1818 enum xfs_ag_resv_type type)
2bd0ea18 1819{
5837e73b
DW
1820 struct xfs_mount *mp;
1821 struct xfs_perag *pag;
1822 struct xfs_btree_cur *bno_cur;
1823 struct xfs_btree_cur *cnt_cur;
1824 xfs_agblock_t gtbno; /* start of right neighbor */
1825 xfs_extlen_t gtlen; /* length of right neighbor */
1826 xfs_agblock_t ltbno; /* start of left neighbor */
1827 xfs_extlen_t ltlen; /* length of left neighbor */
1828 xfs_agblock_t nbno; /* new starting block of freesp */
1829 xfs_extlen_t nlen; /* new length of freespace */
1830 int haveleft; /* have a left neighbor */
1831 int haveright; /* have a right neighbor */
1832 int i;
1833 int error;
2bd0ea18 1834
631ac87a 1835 bno_cur = cnt_cur = NULL;
2bd0ea18 1836 mp = tp->t_mountp;
631ac87a 1837
3ee858aa 1838 if (!xfs_rmap_should_skip_owner_update(oinfo)) {
631ac87a
DW
1839 error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo);
1840 if (error)
1841 goto error0;
1842 }
1843
5000d01d 1844 /*
2bd0ea18
NS
1845 * Allocate and initialize a cursor for the by-block btree.
1846 */
b194c7d8 1847 bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO);
5000d01d 1848 /*
2bd0ea18
NS
1849 * Look for a neighboring block on the left (lower block numbers)
1850 * that is contiguous with this space.
1851 */
0e266570 1852 if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
2bd0ea18
NS
1853 goto error0;
1854 if (haveleft) {
1855 /*
1856 * There is a block to our left.
1857 */
0e266570 1858 if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
2bd0ea18 1859 goto error0;
19ebedcf 1860 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
2bd0ea18
NS
1861 /*
1862 * It's not contiguous, though.
1863 */
1864 if (ltbno + ltlen < bno)
1865 haveleft = 0;
1866 else {
1867 /*
1868 * If this failure happens the request to free this
1869 * space was invalid, it's (partly) already free.
1870 * Very bad.
1871 */
19ebedcf
DC
1872 XFS_WANT_CORRUPTED_GOTO(mp,
1873 ltbno + ltlen <= bno, error0);
2bd0ea18
NS
1874 }
1875 }
5000d01d 1876 /*
2bd0ea18
NS
1877 * Look for a neighboring block on the right (higher block numbers)
1878 * that is contiguous with this space.
1879 */
b194c7d8 1880 if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
2bd0ea18
NS
1881 goto error0;
1882 if (haveright) {
1883 /*
1884 * There is a block to our right.
1885 */
0e266570 1886 if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
2bd0ea18 1887 goto error0;
19ebedcf 1888 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
2bd0ea18
NS
1889 /*
1890 * It's not contiguous, though.
1891 */
1892 if (bno + len < gtbno)
1893 haveright = 0;
1894 else {
1895 /*
1896 * If this failure happens the request to free this
1897 * space was invalid, it's (partly) already free.
1898 * Very bad.
1899 */
19ebedcf 1900 XFS_WANT_CORRUPTED_GOTO(mp, gtbno >= bno + len, error0);
2bd0ea18
NS
1901 }
1902 }
1903 /*
1904 * Now allocate and initialize a cursor for the by-size tree.
1905 */
b194c7d8 1906 cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT);
2bd0ea18
NS
1907 /*
1908 * Have both left and right contiguous neighbors.
1909 * Merge all three into a single free block.
1910 */
1911 if (haveleft && haveright) {
1912 /*
1913 * Delete the old by-size entry on the left.
1914 */
0e266570 1915 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
2bd0ea18 1916 goto error0;
19ebedcf 1917 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
b194c7d8 1918 if ((error = xfs_btree_delete(cnt_cur, &i)))
2bd0ea18 1919 goto error0;
19ebedcf 1920 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
2bd0ea18
NS
1921 /*
1922 * Delete the old by-size entry on the right.
1923 */
0e266570 1924 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
2bd0ea18 1925 goto error0;
19ebedcf 1926 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
b194c7d8 1927 if ((error = xfs_btree_delete(cnt_cur, &i)))
2bd0ea18 1928 goto error0;
19ebedcf 1929 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
2bd0ea18
NS
1930 /*
1931 * Delete the old by-block entry for the right block.
1932 */
b194c7d8 1933 if ((error = xfs_btree_delete(bno_cur, &i)))
2bd0ea18 1934 goto error0;
19ebedcf 1935 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
2bd0ea18
NS
1936 /*
1937 * Move the by-block cursor back to the left neighbor.
1938 */
b194c7d8 1939 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
2bd0ea18 1940 goto error0;
19ebedcf 1941 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
2bd0ea18
NS
1942#ifdef DEBUG
1943 /*
1944 * Check that this is the right record: delete didn't
1945 * mangle the cursor.
1946 */
1947 {
1948 xfs_agblock_t xxbno;
1949 xfs_extlen_t xxlen;
1950
0e266570
NS
1951 if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
1952 &i)))
2bd0ea18 1953 goto error0;
19ebedcf 1954 XFS_WANT_CORRUPTED_GOTO(mp,
2bd0ea18
NS
1955 i == 1 && xxbno == ltbno && xxlen == ltlen,
1956 error0);
1957 }
1958#endif
1959 /*
1960 * Update remaining by-block entry to the new, joined block.
1961 */
1962 nbno = ltbno;
1963 nlen = len + ltlen + gtlen;
0e266570 1964 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2bd0ea18
NS
1965 goto error0;
1966 }
1967 /*
1968 * Have only a left contiguous neighbor.
1969 * Merge it together with the new freespace.
1970 */
1971 else if (haveleft) {
1972 /*
1973 * Delete the old by-size entry on the left.
1974 */
0e266570 1975 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
2bd0ea18 1976 goto error0;
19ebedcf 1977 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
b194c7d8 1978 if ((error = xfs_btree_delete(cnt_cur, &i)))
2bd0ea18 1979 goto error0;
19ebedcf 1980 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
2bd0ea18
NS
1981 /*
1982 * Back up the by-block cursor to the left neighbor, and
1983 * update its length.
1984 */
b194c7d8 1985 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
2bd0ea18 1986 goto error0;
19ebedcf 1987 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
2bd0ea18
NS
1988 nbno = ltbno;
1989 nlen = len + ltlen;
0e266570 1990 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2bd0ea18
NS
1991 goto error0;
1992 }
1993 /*
1994 * Have only a right contiguous neighbor.
1995 * Merge it together with the new freespace.
1996 */
1997 else if (haveright) {
1998 /*
1999 * Delete the old by-size entry on the right.
2000 */
0e266570 2001 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
2bd0ea18 2002 goto error0;
19ebedcf 2003 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
b194c7d8 2004 if ((error = xfs_btree_delete(cnt_cur, &i)))
2bd0ea18 2005 goto error0;
19ebedcf 2006 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
2bd0ea18 2007 /*
5000d01d 2008 * Update the starting block and length of the right
2bd0ea18
NS
2009 * neighbor in the by-block tree.
2010 */
2011 nbno = bno;
2012 nlen = len + gtlen;
0e266570 2013 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2bd0ea18
NS
2014 goto error0;
2015 }
2016 /*
2017 * No contiguous neighbors.
2018 * Insert the new freespace into the by-block tree.
2019 */
2020 else {
2021 nbno = bno;
2022 nlen = len;
b194c7d8 2023 if ((error = xfs_btree_insert(bno_cur, &i)))
2bd0ea18 2024 goto error0;
19ebedcf 2025 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
2bd0ea18
NS
2026 }
2027 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
2028 bno_cur = NULL;
2029 /*
2030 * In all cases we need to insert the new freespace in the by-size tree.
2031 */
0e266570 2032 if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
2bd0ea18 2033 goto error0;
19ebedcf 2034 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, error0);
b194c7d8 2035 if ((error = xfs_btree_insert(cnt_cur, &i)))
2bd0ea18 2036 goto error0;
19ebedcf 2037 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
2bd0ea18
NS
2038 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
2039 cnt_cur = NULL;
a2ceac1f 2040
2bd0ea18
NS
2041 /*
2042 * Update the freespace totals in the ag and superblock.
2043 */
a2ceac1f
DC
2044 pag = xfs_perag_get(mp, agno);
2045 error = xfs_alloc_update_counters(tp, pag, agbp, len);
cf8ce220 2046 xfs_ag_resv_free_extent(pag, type, tp, len);
a2ceac1f
DC
2047 xfs_perag_put(pag);
2048 if (error)
2049 goto error0;
2050
79896434
BD
2051 XFS_STATS_INC(mp, xs_freex);
2052 XFS_STATS_ADD(mp, xs_freeb, len);
56b2de80 2053
65a15e06 2054 trace_xfs_free_extent(mp, agno, bno, len, type, haveleft, haveright);
3e535bba 2055
2bd0ea18
NS
2056 return 0;
2057
2058 error0:
65a15e06 2059 trace_xfs_free_extent(mp, agno, bno, len, type, -1, -1);
2bd0ea18
NS
2060 if (bno_cur)
2061 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
2062 if (cnt_cur)
2063 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
2064 return error;
2065}
2066
5000d01d 2067/*
2bd0ea18
NS
2068 * Visible (exported) allocation/free functions.
2069 * Some of these are used just by xfs_alloc_btree.c and this file.
2070 */
2071
2072/*
2073 * Compute and fill in value of m_ag_maxlevels.
2074 */
2075void
2076xfs_alloc_compute_maxlevels(
2077 xfs_mount_t *mp) /* file system mount structure */
2078{
1421de38 2079 mp->m_ag_maxlevels = xfs_btree_compute_maxlevels(mp->m_alloc_mnr,
730e2a19 2080 (mp->m_sb.sb_agblocks + 1) / 2);
2bd0ea18
NS
2081}
2082
56b2de80 2083/*
cf8ce220
DW
2084 * Find the length of the longest extent in an AG. The 'need' parameter
2085 * specifies how much space we're going to need for the AGFL and the
2086 * 'reserved' parameter tells us how many blocks in this AG are reserved for
2087 * other callers.
56b2de80
DC
2088 */
2089xfs_extlen_t
2090xfs_alloc_longest_free_extent(
72bda06d 2091 struct xfs_perag *pag,
cf8ce220
DW
2092 xfs_extlen_t need,
2093 xfs_extlen_t reserved)
56b2de80 2094{
72bda06d 2095 xfs_extlen_t delta = 0;
56b2de80 2096
cf8ce220
DW
2097 /*
2098 * If the AGFL needs a recharge, we'll have to subtract that from the
2099 * longest extent.
2100 */
56b2de80
DC
2101 if (need > pag->pagf_flcount)
2102 delta = need - pag->pagf_flcount;
2103
cf8ce220
DW
2104 /*
2105 * If we cannot maintain others' reservations with space from the
2106 * not-longest freesp extents, we'll have to subtract /that/ from
2107 * the longest extent too.
2108 */
2109 if (pag->pagf_freeblks - pag->pagf_longest < reserved)
2110 delta += reserved - (pag->pagf_freeblks - pag->pagf_longest);
2111
2112 /*
2113 * If the longest extent is long enough to satisfy all the
2114 * reservations and AGFL rules in place, we can return this extent.
2115 */
56b2de80 2116 if (pag->pagf_longest > delta)
14058d94
DC
2117 return min_t(xfs_extlen_t, pag->pag_mount->m_ag_max_usable,
2118 pag->pagf_longest - delta);
cf8ce220
DW
2119
2120 /* Otherwise, let the caller try for 1 block if there's space. */
56b2de80
DC
2121 return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
2122}
2123
de046644
DC
2124unsigned int
2125xfs_alloc_min_freelist(
2126 struct xfs_mount *mp,
2127 struct xfs_perag *pag)
2128{
2129 unsigned int min_free;
2130
2131 /* space needed by-bno freespace btree */
2132 min_free = min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_BNOi] + 1,
2133 mp->m_ag_maxlevels);
2134 /* space needed by-size freespace btree */
2135 min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1,
2136 mp->m_ag_maxlevels);
b8a8d6e5
DW
2137 /* space needed reverse mapping used space btree */
2138 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
2139 min_free += min_t(unsigned int,
2140 pag->pagf_levels[XFS_BTNUM_RMAPi] + 1,
2141 mp->m_rmap_maxlevels);
de046644
DC
2142
2143 return min_free;
2144}
2145
5515b7c1
DC
2146/*
2147 * Check if the operation we are fixing up the freelist for should go ahead or
2148 * not. If we are freeing blocks, we always allow it, otherwise the allocation
2149 * is dependent on whether the size and shape of free space available will
2150 * permit the requested allocation to take place.
2151 */
2152static bool
2153xfs_alloc_space_available(
2154 struct xfs_alloc_arg *args,
2155 xfs_extlen_t min_free,
2156 int flags)
2157{
2158 struct xfs_perag *pag = args->pag;
3fe4a6dd 2159 xfs_extlen_t alloc_len, longest;
cf8ce220 2160 xfs_extlen_t reservation; /* blocks that are still reserved */
5515b7c1 2161 int available;
580a4380 2162 xfs_extlen_t agflcount;
5515b7c1
DC
2163
2164 if (flags & XFS_ALLOC_FLAG_FREEING)
2165 return true;
2166
cf8ce220
DW
2167 reservation = xfs_ag_resv_needed(pag, args->resv);
2168
5515b7c1 2169 /* do we have enough contiguous free space for the allocation? */
3fe4a6dd 2170 alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop;
1421de38 2171 longest = xfs_alloc_longest_free_extent(pag, min_free, reservation);
3fe4a6dd 2172 if (longest < alloc_len)
5515b7c1
DC
2173 return false;
2174
580a4380
BF
2175 /*
2176 * Do we have enough free space remaining for the allocation? Don't
2177 * account extra agfl blocks because we are about to defer free them,
2178 * making them unavailable until the current transaction commits.
2179 */
2180 agflcount = min_t(xfs_extlen_t, pag->pagf_flcount, min_free);
2181 available = (int)(pag->pagf_freeblks + agflcount -
2c003dc2 2182 reservation - min_free - args->minleft);
3fe4a6dd 2183 if (available < (int)max(args->total, alloc_len))
5515b7c1
DC
2184 return false;
2185
2c003dc2
CH
2186 /*
2187 * Clamp maxlen to the amount of free space available for the actual
2188 * extent allocation.
2189 */
2190 if (available < (int)args->maxlen && !(flags & XFS_ALLOC_FLAG_CHECK)) {
2191 args->maxlen = available;
2192 ASSERT(args->maxlen > 0);
2193 ASSERT(args->maxlen >= args->minlen);
2194 }
2195
5515b7c1
DC
2196 return true;
2197}
2198
30c8be8a
BF
2199int
2200xfs_free_agfl_block(
2201 struct xfs_trans *tp,
2202 xfs_agnumber_t agno,
2203 xfs_agblock_t agbno,
2204 struct xfs_buf *agbp,
2205 struct xfs_owner_info *oinfo)
2206{
2207 int error;
2208 struct xfs_buf *bp;
2209
2210 error = xfs_free_ag_extent(tp, agbp, agno, agbno, 1, oinfo,
2211 XFS_AG_RESV_AGFL);
2212 if (error)
2213 return error;
2214
4aa01a59 2215 bp = xfs_btree_get_bufs(tp->t_mountp, tp, agno, agbno);
a0264b73
DW
2216 if (!bp) {
2217 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, tp->t_mountp);
30c8be8a 2218 return -EFSCORRUPTED;
a0264b73 2219 }
30c8be8a
BF
2220 xfs_trans_binval(tp, bp);
2221
2222 return 0;
2223}
2224
8dbee8f5
BF
2225/*
2226 * Check the agfl fields of the agf for inconsistency or corruption. The purpose
2227 * is to detect an agfl header padding mismatch between current and early v5
2228 * kernels. This problem manifests as a 1-slot size difference between the
2229 * on-disk flcount and the active [first, last] range of a wrapped agfl. This
2230 * may also catch variants of agfl count corruption unrelated to padding. Either
2231 * way, we'll reset the agfl and warn the user.
2232 *
2233 * Return true if a reset is required before the agfl can be used, false
2234 * otherwise.
2235 */
2236static bool
2237xfs_agfl_needs_reset(
2238 struct xfs_mount *mp,
2239 struct xfs_agf *agf)
2240{
2241 uint32_t f = be32_to_cpu(agf->agf_flfirst);
2242 uint32_t l = be32_to_cpu(agf->agf_fllast);
2243 uint32_t c = be32_to_cpu(agf->agf_flcount);
2244 int agfl_size = xfs_agfl_size(mp);
2245 int active;
2246
2247 /* no agfl header on v4 supers */
2248 if (!xfs_sb_version_hascrc(&mp->m_sb))
2249 return false;
2250
2251 /*
2252 * The agf read verifier catches severe corruption of these fields.
2253 * Repeat some sanity checks to cover a packed -> unpacked mismatch if
2254 * the verifier allows it.
2255 */
2256 if (f >= agfl_size || l >= agfl_size)
2257 return true;
2258 if (c > agfl_size)
2259 return true;
2260
2261 /*
2262 * Check consistency between the on-disk count and the active range. An
2263 * agfl padding mismatch manifests as an inconsistent flcount.
2264 */
2265 if (c && l >= f)
2266 active = l - f + 1;
2267 else if (c)
2268 active = agfl_size - f + l + 1;
2269 else
2270 active = 0;
2271
2272 return active != c;
2273}
2274
2275/*
2276 * Reset the agfl to an empty state. Ignore/drop any existing blocks since the
2277 * agfl content cannot be trusted. Warn the user that a repair is required to
2278 * recover leaked blocks.
2279 *
2280 * The purpose of this mechanism is to handle filesystems affected by the agfl
2281 * header padding mismatch problem. A reset keeps the filesystem online with a
2282 * relatively minor free space accounting inconsistency rather than suffer the
2283 * inevitable crash from use of an invalid agfl block.
2284 */
2285static void
2286xfs_agfl_reset(
2287 struct xfs_trans *tp,
2288 struct xfs_buf *agbp,
2289 struct xfs_perag *pag)
2290{
2291 struct xfs_mount *mp = tp->t_mountp;
2292 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
2293
2294 ASSERT(pag->pagf_agflreset);
2295 trace_xfs_agfl_reset(mp, agf, 0, _RET_IP_);
2296
2297 xfs_warn(mp,
2298 "WARNING: Reset corrupted AGFL on AG %u. %d blocks leaked. "
2299 "Please unmount and run xfs_repair.",
2300 pag->pag_agno, pag->pagf_flcount);
2301
2302 agf->agf_flfirst = 0;
2303 agf->agf_fllast = cpu_to_be32(xfs_agfl_size(mp) - 1);
2304 agf->agf_flcount = 0;
2305 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLLAST |
2306 XFS_AGF_FLCOUNT);
2307
2308 pag->pagf_flcount = 0;
2309 pag->pagf_agflreset = false;
2310}
2311
d5c1b462
BF
2312/*
2313 * Defer an AGFL block free. This is effectively equivalent to
2314 * xfs_bmap_add_free() with some special handling particular to AGFL blocks.
2315 *
2316 * Deferring AGFL frees helps prevent log reservation overruns due to too many
2317 * allocation operations in a transaction. AGFL frees are prone to this problem
2318 * because for one they are always freed one at a time. Further, an immediate
2319 * AGFL block free can cause a btree join and require another block free before
2320 * the real allocation can proceed. Deferring the free disconnects freeing up
2321 * the AGFL slot from freeing the block.
2322 */
2323STATIC void
2324xfs_defer_agfl_block(
21375e5d 2325 struct xfs_trans *tp,
d5c1b462
BF
2326 xfs_agnumber_t agno,
2327 xfs_fsblock_t agbno,
2328 struct xfs_owner_info *oinfo)
2329{
21375e5d 2330 struct xfs_mount *mp = tp->t_mountp;
d5c1b462
BF
2331 struct xfs_extent_free_item *new; /* new element */
2332
2333 ASSERT(xfs_bmap_free_item_zone != NULL);
2334 ASSERT(oinfo != NULL);
2335
6cd1e6db 2336 new = kmem_zone_alloc(xfs_bmap_free_item_zone, 0);
d5c1b462
BF
2337 new->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno);
2338 new->xefi_blockcount = 1;
2339 new->xefi_oinfo = *oinfo;
2340
2341 trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1);
2342
21375e5d 2343 xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &new->xefi_list);
d5c1b462
BF
2344}
2345
2bd0ea18
NS
2346/*
2347 * Decide whether to use this allocation group for this allocation.
2348 * If so, fix up the btree freelist's size.
2bd0ea18 2349 */
ff105f75 2350int /* error */
2bd0ea18 2351xfs_alloc_fix_freelist(
c98e644e
DC
2352 struct xfs_alloc_arg *args, /* allocation argument structure */
2353 int flags) /* XFS_ALLOC_FLAG_... */
2bd0ea18 2354{
c98e644e
DC
2355 struct xfs_mount *mp = args->mp;
2356 struct xfs_perag *pag = args->pag;
2357 struct xfs_trans *tp = args->tp;
2358 struct xfs_buf *agbp = NULL;
2359 struct xfs_buf *agflbp = NULL;
2360 struct xfs_alloc_arg targs; /* local allocation arguments */
2361 xfs_agblock_t bno; /* freelist block */
2362 xfs_extlen_t need; /* total blocks needed in freelist */
fcdd428c 2363 int error = 0;
c98e644e 2364
d85d3424
BF
2365 /* deferred ops (AGFL block frees) require permanent transactions */
2366 ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
2367
2bd0ea18 2368 if (!pag->pagf_init) {
c98e644e
DC
2369 error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
2370 if (error)
2371 goto out_no_agbp;
2bd0ea18 2372 if (!pag->pagf_init) {
5e656dbb
BN
2373 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
2374 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
c98e644e 2375 goto out_agbp_relse;
2bd0ea18 2376 }
c98e644e 2377 }
34317449 2378
5e656dbb 2379 /*
c98e644e
DC
2380 * If this is a metadata preferred pag and we are user data then try
2381 * somewhere else if we are not being asked to try harder at this
2382 * point
34317449 2383 */
a85522b6 2384 if (pag->pagf_metadata && (args->datatype & XFS_ALLOC_USERDATA) &&
5e656dbb
BN
2385 (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
2386 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
c98e644e 2387 goto out_agbp_relse;
34317449
NS
2388 }
2389
de046644 2390 need = xfs_alloc_min_freelist(mp, pag);
2c003dc2
CH
2391 if (!xfs_alloc_space_available(args, need, flags |
2392 XFS_ALLOC_FLAG_CHECK))
c98e644e 2393 goto out_agbp_relse;
5e656dbb 2394
2bd0ea18
NS
2395 /*
2396 * Get the a.g. freespace buffer.
2397 * Can fail if we're not blocking on locks, and it's held.
2398 */
c98e644e
DC
2399 if (!agbp) {
2400 error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
2401 if (error)
2402 goto out_no_agbp;
2403 if (!agbp) {
5e656dbb
BN
2404 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
2405 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
c98e644e 2406 goto out_no_agbp;
2bd0ea18
NS
2407 }
2408 }
72bda06d 2409
8dbee8f5
BF
2410 /* reset a padding mismatched agfl before final free space check */
2411 if (pag->pagf_agflreset)
2412 xfs_agfl_reset(tp, agbp, pag);
2413
72bda06d 2414 /* If there isn't enough total space or single-extent, reject it. */
de046644 2415 need = xfs_alloc_min_freelist(mp, pag);
c98e644e
DC
2416 if (!xfs_alloc_space_available(args, need, flags))
2417 goto out_agbp_relse;
5515b7c1 2418
2bd0ea18
NS
2419 /*
2420 * Make the freelist shorter if it's too long.
72bda06d 2421 *
c98e644e
DC
2422 * Note that from this point onwards, we will always release the agf and
2423 * agfl buffers on error. This handles the case where we error out and
2424 * the buffers are clean or may not have been joined to the transaction
2425 * and hence need to be released manually. If they have been joined to
2426 * the transaction, then xfs_trans_brelse() will handle them
2427 * appropriately based on the recursion count and dirty state of the
2428 * buffer.
2429 *
72bda06d
DC
2430 * XXX (dgc): When we have lots of free space, does this buy us
2431 * anything other than extra overhead when we need to put more blocks
2432 * back on the free list? Maybe we should only do this when space is
2433 * getting low or the AGFL is more than half full?
e365af6f
DW
2434 *
2435 * The NOSHRINK flag prevents the AGFL from being shrunk if it's too
2436 * big; the NORMAP flag prevents AGFL expand/shrink operations from
2437 * updating the rmapbt. Both flags are used in xfs_repair while we're
2438 * rebuilding the rmapbt, and neither are used by the kernel. They're
2439 * both required to ensure that rmaps are correctly recorded for the
2440 * regenerated AGFL, bnobt, and cntbt. See repair/phase5.c and
2441 * repair/rmap.c in xfsprogs for details.
2bd0ea18 2442 */
e365af6f 2443 memset(&targs, 0, sizeof(targs));
007347e3 2444 /* struct copy below */
e365af6f 2445 if (flags & XFS_ALLOC_FLAG_NORMAP)
007347e3 2446 targs.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
e365af6f 2447 else
007347e3 2448 targs.oinfo = XFS_RMAP_OINFO_AG;
e365af6f 2449 while (!(flags & XFS_ALLOC_FLAG_NOSHRINK) && pag->pagf_flcount > need) {
5e656dbb
BN
2450 error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
2451 if (error)
c98e644e 2452 goto out_agbp_relse;
30c8be8a 2453
6dc24128
BF
2454 /* defer agfl frees */
2455 xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo);
2bd0ea18 2456 }
72bda06d 2457
2bd0ea18
NS
2458 targs.tp = tp;
2459 targs.mp = mp;
2460 targs.agbp = agbp;
2461 targs.agno = args->agno;
cf8ce220 2462 targs.alignment = targs.minlen = targs.prod = 1;
2bd0ea18
NS
2463 targs.type = XFS_ALLOCTYPE_THIS_AG;
2464 targs.pag = pag;
72bda06d
DC
2465 error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp);
2466 if (error)
c98e644e 2467 goto out_agbp_relse;
72bda06d
DC
2468
2469 /* Make the freelist longer if it's too short. */
2470 while (pag->pagf_flcount < need) {
2bd0ea18 2471 targs.agbno = 0;
72bda06d 2472 targs.maxlen = need - pag->pagf_flcount;
9760cac2 2473 targs.resv = XFS_AG_RESV_AGFL;
72bda06d
DC
2474
2475 /* Allocate as many blocks as possible at once. */
2476 error = xfs_alloc_ag_vextent(&targs);
c98e644e
DC
2477 if (error)
2478 goto out_agflbp_relse;
2479
2bd0ea18 2480 /*
dfc130f3
RC
2481 * Stop if we run out. Won't happen if callers are obeying
2482 * the restrictions correctly. Can happen for free calls
2bd0ea18
NS
2483 * on a completely full ag.
2484 */
5e656dbb
BN
2485 if (targs.agbno == NULLAGBLOCK) {
2486 if (flags & XFS_ALLOC_FLAG_FREEING)
2487 break;
c98e644e 2488 goto out_agflbp_relse;
5e656dbb 2489 }
2bd0ea18
NS
2490 /*
2491 * Put each allocated block on the list.
2492 */
2493 for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
5e656dbb
BN
2494 error = xfs_alloc_put_freelist(tp, agbp,
2495 agflbp, bno, 0);
2496 if (error)
c98e644e 2497 goto out_agflbp_relse;
2bd0ea18
NS
2498 }
2499 }
cb4deb22 2500 xfs_trans_brelse(tp, agflbp);
2bd0ea18
NS
2501 args->agbp = agbp;
2502 return 0;
c98e644e
DC
2503
2504out_agflbp_relse:
2505 xfs_trans_brelse(tp, agflbp);
2506out_agbp_relse:
2507 if (agbp)
2508 xfs_trans_brelse(tp, agbp);
2509out_no_agbp:
2510 args->agbp = NULL;
2511 return error;
2bd0ea18
NS
2512}
2513
2514/*
2515 * Get a block from the freelist.
2516 * Returns with the buffer for the block gotten.
2517 */
2518int /* error */
2519xfs_alloc_get_freelist(
2520 xfs_trans_t *tp, /* transaction pointer */
2521 xfs_buf_t *agbp, /* buffer containing the agf structure */
cdded3d8
DC
2522 xfs_agblock_t *bnop, /* block address retrieved from freelist */
2523 int btreeblk) /* destination is a AGF btree */
2bd0ea18
NS
2524{
2525 xfs_agf_t *agf; /* a.g. freespace structure */
2bd0ea18
NS
2526 xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */
2527 xfs_agblock_t bno; /* block number returned */
dd5b876e 2528 __be32 *agfl_bno;
2bd0ea18 2529 int error;
cdded3d8 2530 int logflags;
dd5b876e 2531 xfs_mount_t *mp = tp->t_mountp;
2bd0ea18
NS
2532 xfs_perag_t *pag; /* per allocation group data */
2533
2bd0ea18
NS
2534 /*
2535 * Freelist is empty, give up.
2536 */
dd5b876e 2537 agf = XFS_BUF_TO_AGF(agbp);
46eca962 2538 if (!agf->agf_flcount) {
2bd0ea18
NS
2539 *bnop = NULLAGBLOCK;
2540 return 0;
2541 }
2542 /*
2543 * Read the array of free blocks.
2544 */
dd5b876e
DC
2545 error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno),
2546 &agflbp);
2547 if (error)
2bd0ea18 2548 return error;
dd5b876e
DC
2549
2550
2bd0ea18
NS
2551 /*
2552 * Get the block number and update the data structures.
2553 */
dd5b876e
DC
2554 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2555 bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
5e656dbb 2556 be32_add_cpu(&agf->agf_flfirst, 1);
2bd0ea18 2557 xfs_trans_brelse(tp, agflbp);
b8165508 2558 if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp))
46eca962 2559 agf->agf_flfirst = 0;
56b2de80
DC
2560
2561 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
8dbee8f5 2562 ASSERT(!pag->pagf_agflreset);
5e656dbb 2563 be32_add_cpu(&agf->agf_flcount, -1);
2bd0ea18
NS
2564 xfs_trans_agflist_delta(tp, -1);
2565 pag->pagf_flcount--;
cdded3d8
DC
2566
2567 logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
2568 if (btreeblk) {
5e656dbb 2569 be32_add_cpu(&agf->agf_btreeblks, 1);
cdded3d8
DC
2570 pag->pagf_btreeblks++;
2571 logflags |= XFS_AGF_BTREEBLKS;
2572 }
8648192f 2573 xfs_perag_put(pag);
cdded3d8 2574
cdded3d8 2575 xfs_alloc_log_agf(tp, agbp, logflags);
2bd0ea18 2576 *bnop = bno;
3e535bba 2577
2bd0ea18
NS
2578 return 0;
2579}
2580
2581/*
2582 * Log the given fields from the agf structure.
2583 */
2584void
2585xfs_alloc_log_agf(
2586 xfs_trans_t *tp, /* transaction pointer */
2587 xfs_buf_t *bp, /* buffer for a.g. freelist header */
dfc130f3 2588 int fields) /* mask of fields to be logged (XFS_AGF_...) */
2bd0ea18
NS
2589{
2590 int first; /* first byte offset */
2591 int last; /* last byte offset */
2592 static const short offsets[] = {
2593 offsetof(xfs_agf_t, agf_magicnum),
2594 offsetof(xfs_agf_t, agf_versionnum),
2595 offsetof(xfs_agf_t, agf_seqno),
2596 offsetof(xfs_agf_t, agf_length),
2597 offsetof(xfs_agf_t, agf_roots[0]),
2598 offsetof(xfs_agf_t, agf_levels[0]),
2599 offsetof(xfs_agf_t, agf_flfirst),
2600 offsetof(xfs_agf_t, agf_fllast),
2601 offsetof(xfs_agf_t, agf_flcount),
2602 offsetof(xfs_agf_t, agf_freeblks),
2603 offsetof(xfs_agf_t, agf_longest),
cdded3d8 2604 offsetof(xfs_agf_t, agf_btreeblks),
dd5b876e 2605 offsetof(xfs_agf_t, agf_uuid),
8511b71a 2606 offsetof(xfs_agf_t, agf_rmap_blocks),
bc859611
DW
2607 offsetof(xfs_agf_t, agf_refcount_blocks),
2608 offsetof(xfs_agf_t, agf_refcount_root),
2609 offsetof(xfs_agf_t, agf_refcount_level),
8511b71a
DW
2610 /* needed so that we don't log the whole rest of the structure: */
2611 offsetof(xfs_agf_t, agf_spare64),
2bd0ea18
NS
2612 sizeof(xfs_agf_t)
2613 };
2614
56b2de80
DC
2615 trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_);
2616
bdc16ee5 2617 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF);
dd5b876e 2618
2bd0ea18
NS
2619 xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
2620 xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
2621}
2622
2623/*
2624 * Interface for inode allocation to force the pag data to be initialized.
2625 */
2626int /* error */
2627xfs_alloc_pagf_init(
2628 xfs_mount_t *mp, /* file system mount structure */
2629 xfs_trans_t *tp, /* transaction pointer */
2630 xfs_agnumber_t agno, /* allocation group number */
2631 int flags) /* XFS_ALLOC_FLAGS_... */
2632{
7a3bffe4 2633 xfs_buf_t *bp;
2bd0ea18
NS
2634 int error;
2635
0e266570 2636 if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp)))
2bd0ea18
NS
2637 return error;
2638 if (bp)
2639 xfs_trans_brelse(tp, bp);
2640 return 0;
2641}
2642
2643/*
2644 * Put the block on the freelist for the allocation group.
2645 */
2646int /* error */
2647xfs_alloc_put_freelist(
2648 xfs_trans_t *tp, /* transaction pointer */
2649 xfs_buf_t *agbp, /* buffer for a.g. freelist header */
2650 xfs_buf_t *agflbp,/* buffer for a.g. free block array */
cdded3d8
DC
2651 xfs_agblock_t bno, /* block being freed */
2652 int btreeblk) /* block came from a AGF btree */
2bd0ea18
NS
2653{
2654 xfs_agf_t *agf; /* a.g. freespace structure */
5e656dbb 2655 __be32 *blockp;/* pointer to array entry */
2bd0ea18 2656 int error;
cdded3d8 2657 int logflags;
2bd0ea18
NS
2658 xfs_mount_t *mp; /* mount structure */
2659 xfs_perag_t *pag; /* per allocation group data */
dd5b876e
DC
2660 __be32 *agfl_bno;
2661 int startoff;
2bd0ea18
NS
2662
2663 agf = XFS_BUF_TO_AGF(agbp);
2664 mp = tp->t_mountp;
2665
2666 if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp,
6e3140c7 2667 be32_to_cpu(agf->agf_seqno), &agflbp)))
2bd0ea18 2668 return error;
5e656dbb 2669 be32_add_cpu(&agf->agf_fllast, 1);
b8165508 2670 if (be32_to_cpu(agf->agf_fllast) == xfs_agfl_size(mp))
46eca962 2671 agf->agf_fllast = 0;
56b2de80
DC
2672
2673 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
8dbee8f5 2674 ASSERT(!pag->pagf_agflreset);
5e656dbb 2675 be32_add_cpu(&agf->agf_flcount, 1);
2bd0ea18
NS
2676 xfs_trans_agflist_delta(tp, 1);
2677 pag->pagf_flcount++;
cdded3d8
DC
2678
2679 logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
2680 if (btreeblk) {
5e656dbb 2681 be32_add_cpu(&agf->agf_btreeblks, -1);
cdded3d8
DC
2682 pag->pagf_btreeblks--;
2683 logflags |= XFS_AGF_BTREEBLKS;
2684 }
56b2de80 2685 xfs_perag_put(pag);
cdded3d8 2686
5e656dbb
BN
2687 xfs_alloc_log_agf(tp, agbp, logflags);
2688
b8165508 2689 ASSERT(be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp));
dd5b876e
DC
2690
2691 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2692 blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
5e656dbb 2693 *blockp = cpu_to_be32(bno);
dd5b876e
DC
2694 startoff = (char *)blockp - (char *)agflbp->b_addr;
2695
cdded3d8 2696 xfs_alloc_log_agf(tp, agbp, logflags);
dd5b876e 2697
bdc16ee5 2698 xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF);
dd5b876e
DC
2699 xfs_trans_log_buf(tp, agflbp, startoff,
2700 startoff + sizeof(xfs_agblock_t) - 1);
2bd0ea18
NS
2701 return 0;
2702}
2703
bc01119d 2704static xfs_failaddr_t
a2ceac1f 2705xfs_agf_verify(
95d9582b
DW
2706 struct xfs_buf *bp)
2707{
7861ef77 2708 struct xfs_mount *mp = bp->b_mount;
95d9582b 2709 struct xfs_agf *agf = XFS_BUF_TO_AGF(bp);
a2ceac1f 2710
a65d8d29
BF
2711 if (xfs_sb_version_hascrc(&mp->m_sb)) {
2712 if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
bc01119d 2713 return __this_address;
a65d8d29
BF
2714 if (!xfs_log_check_lsn(mp,
2715 be64_to_cpu(XFS_BUF_TO_AGF(bp)->agf_lsn)))
bc01119d 2716 return __this_address;
a65d8d29 2717 }
a2ceac1f 2718
68dbe77f
BF
2719 if (!xfs_verify_magic(bp, agf->agf_magicnum))
2720 return __this_address;
2721
2722 if (!(XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
dd5b876e 2723 be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
b8165508
DC
2724 be32_to_cpu(agf->agf_flfirst) < xfs_agfl_size(mp) &&
2725 be32_to_cpu(agf->agf_fllast) < xfs_agfl_size(mp) &&
2726 be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp)))
bc01119d 2727 return __this_address;
a2ceac1f 2728
00795aae
DW
2729 if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
2730 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 ||
2731 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS ||
5a35bf2c 2732 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS)
bc01119d 2733 return __this_address;
5a35bf2c 2734
e37838e5 2735 if (xfs_sb_version_hasrmapbt(&mp->m_sb) &&
00795aae
DW
2736 (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 ||
2737 be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS))
bc01119d 2738 return __this_address;
e37838e5 2739
a2ceac1f
DC
2740 /*
2741 * during growfs operations, the perag is not fully initialised,
2742 * so we can't use it for any useful checking. growfs ensures we can't
2743 * use it by using uncached buffers that don't have the perag attached
2744 * so we can detect and avoid this problem.
2745 */
dd5b876e 2746 if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno)
bc01119d 2747 return __this_address;
a2ceac1f 2748
dd5b876e
DC
2749 if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
2750 be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length))
bc01119d 2751 return __this_address;
dd5b876e 2752
88ce0792 2753 if (xfs_sb_version_hasreflink(&mp->m_sb) &&
00795aae
DW
2754 (be32_to_cpu(agf->agf_refcount_level) < 1 ||
2755 be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS))
bc01119d 2756 return __this_address;
88ce0792 2757
bc01119d 2758 return NULL;
a2ceac1f 2759
a2ceac1f
DC
2760}
2761
2762static void
2763xfs_agf_read_verify(
2764 struct xfs_buf *bp)
2765{
7861ef77 2766 struct xfs_mount *mp = bp->b_mount;
1e697959 2767 xfs_failaddr_t fa;
dd5b876e 2768
45922933
DC
2769 if (xfs_sb_version_hascrc(&mp->m_sb) &&
2770 !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
1e697959
DW
2771 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
2772 else {
95d9582b 2773 fa = xfs_agf_verify(bp);
1e697959
DW
2774 if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_ALLOC_READ_AGF))
2775 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
2776 }
a2ceac1f
DC
2777}
2778
2779static void
2780xfs_agf_write_verify(
2781 struct xfs_buf *bp)
2782{
7861ef77 2783 struct xfs_mount *mp = bp->b_mount;
37d086ca 2784 struct xfs_buf_log_item *bip = bp->b_log_item;
1e697959 2785 xfs_failaddr_t fa;
dd5b876e 2786
95d9582b 2787 fa = xfs_agf_verify(bp);
1e697959
DW
2788 if (fa) {
2789 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
dd5b876e
DC
2790 return;
2791 }
2792
2793 if (!xfs_sb_version_hascrc(&mp->m_sb))
2794 return;
2795
2796 if (bip)
2797 XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
2798
43b5aeed 2799 xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
a2ceac1f
DC
2800}
2801
2802const struct xfs_buf_ops xfs_agf_buf_ops = {
a3fac935 2803 .name = "xfs_agf",
68dbe77f 2804 .magic = { cpu_to_be32(XFS_AGF_MAGIC), cpu_to_be32(XFS_AGF_MAGIC) },
a2ceac1f
DC
2805 .verify_read = xfs_agf_read_verify,
2806 .verify_write = xfs_agf_write_verify,
95d9582b 2807 .verify_struct = xfs_agf_verify,
a2ceac1f
DC
2808};
2809
2bd0ea18
NS
2810/*
2811 * Read in the allocation group header (free/alloc section).
2812 */
2813int /* error */
56b2de80
DC
2814xfs_read_agf(
2815 struct xfs_mount *mp, /* mount point structure */
2816 struct xfs_trans *tp, /* transaction pointer */
2817 xfs_agnumber_t agno, /* allocation group number */
2818 int flags, /* XFS_BUF_ */
2819 struct xfs_buf **bpp) /* buffer for the ag freelist header */
2bd0ea18 2820{
9440d84d 2821 int error;
2bd0ea18 2822
ff105f75
DC
2823 trace_xfs_read_agf(mp, agno);
2824
2bd0ea18 2825 ASSERT(agno != NULLAGNUMBER);
9440d84d
NS
2826 error = xfs_trans_read_buf(
2827 mp, tp, mp->m_ddev_targp,
2828 XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
a2ceac1f 2829 XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops);
9440d84d 2830 if (error)
2bd0ea18 2831 return error;
56b2de80 2832 if (!*bpp)
2bd0ea18 2833 return 0;
56b2de80 2834
a2ceac1f
DC
2835 ASSERT(!(*bpp)->b_error);
2836 xfs_buf_set_ref(*bpp, XFS_AGF_REF);
56b2de80
DC
2837 return 0;
2838}
2839
2840/*
2841 * Read in the allocation group header (free/alloc section).
2842 */
2843int /* error */
2844xfs_alloc_read_agf(
2845 struct xfs_mount *mp, /* mount point structure */
2846 struct xfs_trans *tp, /* transaction pointer */
2847 xfs_agnumber_t agno, /* allocation group number */
2848 int flags, /* XFS_ALLOC_FLAG_... */
2849 struct xfs_buf **bpp) /* buffer for the ag freelist header */
2850{
2851 struct xfs_agf *agf; /* ag freelist header */
2852 struct xfs_perag *pag; /* per allocation group data */
2853 int error;
2854
ff105f75 2855 trace_xfs_alloc_read_agf(mp, agno);
56b2de80 2856
ff105f75 2857 ASSERT(agno != NULLAGNUMBER);
56b2de80
DC
2858 error = xfs_read_agf(mp, tp, agno,
2859 (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
2860 bpp);
2861 if (error)
2862 return error;
2863 if (!*bpp)
2864 return 0;
a2ceac1f 2865 ASSERT(!(*bpp)->b_error);
56b2de80
DC
2866
2867 agf = XFS_BUF_TO_AGF(*bpp);
2868 pag = xfs_perag_get(mp, agno);
2bd0ea18 2869 if (!pag->pagf_init) {
6e3140c7 2870 pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
cdded3d8 2871 pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
6e3140c7
NS
2872 pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
2873 pag->pagf_longest = be32_to_cpu(agf->agf_longest);
2bd0ea18 2874 pag->pagf_levels[XFS_BTNUM_BNOi] =
6e3140c7 2875 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
2bd0ea18 2876 pag->pagf_levels[XFS_BTNUM_CNTi] =
6e3140c7 2877 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
e37838e5
DW
2878 pag->pagf_levels[XFS_BTNUM_RMAPi] =
2879 be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
88ce0792 2880 pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
2bd0ea18 2881 pag->pagf_init = 1;
8dbee8f5 2882 pag->pagf_agflreset = xfs_agfl_needs_reset(mp, agf);
2bd0ea18
NS
2883 }
2884#ifdef DEBUG
2885 else if (!XFS_FORCED_SHUTDOWN(mp)) {
6e3140c7 2886 ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
cdded3d8 2887 ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
6e3140c7
NS
2888 ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
2889 ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
2bd0ea18 2890 ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
6e3140c7 2891 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
2bd0ea18 2892 ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
6e3140c7 2893 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
2bd0ea18
NS
2894 }
2895#endif
56b2de80 2896 xfs_perag_put(pag);
2bd0ea18
NS
2897 return 0;
2898}
2899
2900/*
2901 * Allocate an extent (variable-size).
2902 * Depending on the allocation type, we either look in a single allocation
2903 * group or loop over the allocation groups to find the result.
2904 */
2905int /* error */
2906xfs_alloc_vextent(
03aeb29d 2907 struct xfs_alloc_arg *args) /* allocation argument structure */
2bd0ea18 2908{
03aeb29d
BF
2909 xfs_agblock_t agsize; /* allocation group size */
2910 int error;
2911 int flags; /* XFS_ALLOC_FLAG_... locking flags */
2912 struct xfs_mount *mp; /* mount structure pointer */
2913 xfs_agnumber_t sagno; /* starting allocation group number */
2914 xfs_alloctype_t type; /* input allocation type */
2915 int bump_rotor = 0;
2916 xfs_agnumber_t rotorstep = xfs_rotorstep; /* inode32 agf stepper */
2bd0ea18
NS
2917
2918 mp = args->mp;
2919 type = args->otype = args->type;
2920 args->agbno = NULLAGBLOCK;
2921 /*
2922 * Just fix this up, for the case where the last a.g. is shorter
2923 * (or there's only one a.g.) and the caller couldn't easily figure
2924 * that out (xfs_bmap_alloc).
2925 */
2926 agsize = mp->m_sb.sb_agblocks;
2927 if (args->maxlen > agsize)
2928 args->maxlen = agsize;
2929 if (args->alignment == 0)
2930 args->alignment = 1;
2931 ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount);
2932 ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize);
2933 ASSERT(args->minlen <= args->maxlen);
2934 ASSERT(args->minlen <= agsize);
2935 ASSERT(args->mod < args->prod);
2936 if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount ||
2937 XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize ||
2938 args->minlen > args->maxlen || args->minlen > agsize ||
2939 args->mod >= args->prod) {
2940 args->fsbno = NULLFSBLOCK;
56b2de80 2941 trace_xfs_alloc_vextent_badargs(args);
2bd0ea18
NS
2942 return 0;
2943 }
9baa549b 2944
2bd0ea18
NS
2945 switch (type) {
2946 case XFS_ALLOCTYPE_THIS_AG:
2947 case XFS_ALLOCTYPE_NEAR_BNO:
2948 case XFS_ALLOCTYPE_THIS_BNO:
2949 /*
2950 * These three force us into a single a.g.
2951 */
2952 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
56b2de80 2953 args->pag = xfs_perag_get(mp, args->agno);
2bd0ea18 2954 error = xfs_alloc_fix_freelist(args, 0);
2bd0ea18 2955 if (error) {
56b2de80 2956 trace_xfs_alloc_vextent_nofix(args);
2bd0ea18
NS
2957 goto error0;
2958 }
2959 if (!args->agbp) {
56b2de80 2960 trace_xfs_alloc_vextent_noagbp(args);
2bd0ea18
NS
2961 break;
2962 }
2963 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
0e266570 2964 if ((error = xfs_alloc_ag_vextent(args)))
2bd0ea18 2965 goto error0;
2bd0ea18
NS
2966 break;
2967 case XFS_ALLOCTYPE_START_BNO:
2968 /*
2969 * Try near allocation first, then anywhere-in-ag after
2970 * the first a.g. fails.
2971 */
1fccd5c8 2972 if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) &&
34317449 2973 (mp->m_flags & XFS_MOUNT_32BITINODES)) {
46eca962
NS
2974 args->fsbno = XFS_AGB_TO_FSB(mp,
2975 ((mp->m_agfrotor / rotorstep) %
2976 mp->m_sb.sb_agcount), 0);
34317449
NS
2977 bump_rotor = 1;
2978 }
2bd0ea18
NS
2979 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2980 args->type = XFS_ALLOCTYPE_NEAR_BNO;
2981 /* FALLTHROUGH */
2bd0ea18
NS
2982 case XFS_ALLOCTYPE_FIRST_AG:
2983 /*
2984 * Rotate through the allocation groups looking for a winner.
2985 */
f3eda3a5 2986 if (type == XFS_ALLOCTYPE_FIRST_AG) {
2bd0ea18
NS
2987 /*
2988 * Start with allocation group given by bno.
2989 */
2990 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2991 args->type = XFS_ALLOCTYPE_THIS_AG;
2992 sagno = 0;
2993 flags = 0;
2994 } else {
2bd0ea18
NS
2995 /*
2996 * Start with the given allocation group.
2997 */
2998 args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2999 flags = XFS_ALLOC_FLAG_TRYLOCK;
3000 }
3001 /*
3002 * Loop over allocation groups twice; first time with
3003 * trylock set, second time without.
3004 */
3005 for (;;) {
56b2de80 3006 args->pag = xfs_perag_get(mp, args->agno);
9baa549b 3007 error = xfs_alloc_fix_freelist(args, flags);
9baa549b 3008 if (error) {
56b2de80 3009 trace_xfs_alloc_vextent_nofix(args);
2bd0ea18
NS
3010 goto error0;
3011 }
3012 /*
3013 * If we get a buffer back then the allocation will fly.
3014 */
3015 if (args->agbp) {
0e266570 3016 if ((error = xfs_alloc_ag_vextent(args)))
2bd0ea18 3017 goto error0;
2bd0ea18
NS
3018 break;
3019 }
56b2de80
DC
3020
3021 trace_xfs_alloc_vextent_loopfailed(args);
3022
2bd0ea18
NS
3023 /*
3024 * Didn't work, figure out the next iteration.
3025 */
3026 if (args->agno == sagno &&
3027 type == XFS_ALLOCTYPE_START_BNO)
3028 args->type = XFS_ALLOCTYPE_THIS_AG;
5e656dbb
BN
3029 /*
3030 * For the first allocation, we can try any AG to get
3031 * space. However, if we already have allocated a
3032 * block, we don't want to try AGs whose number is below
3033 * sagno. Otherwise, we may end up with out-of-order
3034 * locking of AGF, which might cause deadlock.
3035 */
3036 if (++(args->agno) == mp->m_sb.sb_agcount) {
03aeb29d 3037 if (args->tp->t_firstblock != NULLFSBLOCK)
5e656dbb
BN
3038 args->agno = sagno;
3039 else
3040 args->agno = 0;
3041 }
5000d01d 3042 /*
2bd0ea18
NS
3043 * Reached the starting a.g., must either be done
3044 * or switch to non-trylock mode.
3045 */
3046 if (args->agno == sagno) {
a3b4a951 3047 if (flags == 0) {
2bd0ea18 3048 args->agbno = NULLAGBLOCK;
56b2de80 3049 trace_xfs_alloc_vextent_allfailed(args);
2bd0ea18
NS
3050 break;
3051 }
a3b4a951
CH
3052
3053 flags = 0;
3054 if (type == XFS_ALLOCTYPE_START_BNO) {
3055 args->agbno = XFS_FSB_TO_AGBNO(mp,
3056 args->fsbno);
3057 args->type = XFS_ALLOCTYPE_NEAR_BNO;
2bd0ea18
NS
3058 }
3059 }
56b2de80 3060 xfs_perag_put(args->pag);
2bd0ea18 3061 }
f3eda3a5 3062 if (bump_rotor) {
46eca962
NS
3063 if (args->agno == sagno)
3064 mp->m_agfrotor = (mp->m_agfrotor + 1) %
3065 (mp->m_sb.sb_agcount * rotorstep);
3066 else
3067 mp->m_agfrotor = (args->agno * rotorstep + 1) %
3068 (mp->m_sb.sb_agcount * rotorstep);
3069 }
2bd0ea18
NS
3070 break;
3071 default:
3072 ASSERT(0);
3073 /* NOTREACHED */
3074 }
3075 if (args->agbno == NULLAGBLOCK)
3076 args->fsbno = NULLFSBLOCK;
3077 else {
3078 args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
3079#ifdef DEBUG
3080 ASSERT(args->len >= args->minlen);
3081 ASSERT(args->len <= args->maxlen);
3082 ASSERT(args->agbno % args->alignment == 0);
3083 XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
3084 args->len);
3085#endif
9542ae13 3086
2bd0ea18 3087 }
56b2de80 3088 xfs_perag_put(args->pag);
2bd0ea18
NS
3089 return 0;
3090error0:
56b2de80 3091 xfs_perag_put(args->pag);
2bd0ea18
NS
3092 return error;
3093}
3094
2a6da3b8
DC
3095/* Ensure that the freelist is at full capacity. */
3096int
3097xfs_free_extent_fix_freelist(
3098 struct xfs_trans *tp,
3099 xfs_agnumber_t agno,
3100 struct xfs_buf **agbp)
2bd0ea18 3101{
2a6da3b8
DC
3102 struct xfs_alloc_arg args;
3103 int error;
2bd0ea18 3104
2a6da3b8 3105 memset(&args, 0, sizeof(struct xfs_alloc_arg));
2bd0ea18
NS
3106 args.tp = tp;
3107 args.mp = tp->t_mountp;
2a6da3b8 3108 args.agno = agno;
a2ceac1f
DC
3109
3110 /*
3111 * validate that the block number is legal - the enables us to detect
3112 * and handle a silent filesystem corruption rather than crashing.
3113 */
a2ceac1f 3114 if (args.agno >= args.mp->m_sb.sb_agcount)
12b53197 3115 return -EFSCORRUPTED;
a2ceac1f 3116
56b2de80 3117 args.pag = xfs_perag_get(args.mp, args.agno);
a2ceac1f
DC
3118 ASSERT(args.pag);
3119
3120 error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
3121 if (error)
2a6da3b8
DC
3122 goto out;
3123
3124 *agbp = args.agbp;
3125out:
3126 xfs_perag_put(args.pag);
3127 return error;
3128}
3129
3130/*
3131 * Free an extent.
3132 * Just break up the extent address and hand off to xfs_free_ag_extent
3133 * after fixing up the freelist.
3134 */
5837e73b 3135int
3a13f959 3136__xfs_free_extent(
5837e73b
DW
3137 struct xfs_trans *tp,
3138 xfs_fsblock_t bno,
3139 xfs_extlen_t len,
3140 const struct xfs_owner_info *oinfo,
3141 enum xfs_ag_resv_type type,
3142 bool skip_discard)
2a6da3b8 3143{
5837e73b
DW
3144 struct xfs_mount *mp = tp->t_mountp;
3145 struct xfs_buf *agbp;
3146 xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, bno);
3147 xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, bno);
3148 int error;
3149 unsigned int busy_flags = 0;
2a6da3b8
DC
3150
3151 ASSERT(len != 0);
9760cac2 3152 ASSERT(type != XFS_AG_RESV_AGFL);
2a6da3b8 3153
a9da40de 3154 if (XFS_TEST_ERROR(false, mp,
e2a190dd 3155 XFS_ERRTAG_FREE_EXTENT))
a9da40de
DW
3156 return -EIO;
3157
2a6da3b8
DC
3158 error = xfs_free_extent_fix_freelist(tp, agno, &agbp);
3159 if (error)
3160 return error;
3161
3162 XFS_WANT_CORRUPTED_GOTO(mp, agbno < mp->m_sb.sb_agblocks, err);
a2ceac1f
DC
3163
3164 /* validate the extent size is legal now we have the agf locked */
2a6da3b8
DC
3165 XFS_WANT_CORRUPTED_GOTO(mp,
3166 agbno + len <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_length),
3167 err);
a2ceac1f 3168
cf8ce220 3169 error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, type);
2a6da3b8
DC
3170 if (error)
3171 goto err;
3172
3a13f959
BF
3173 if (skip_discard)
3174 busy_flags |= XFS_EXTENT_BUSY_SKIP_DISCARD;
3175 xfs_extent_busy_insert(tp, agno, agbno, len, busy_flags);
2a6da3b8
DC
3176 return 0;
3177
3178err:
3179 xfs_trans_brelse(tp, agbp);
2bd0ea18
NS
3180 return error;
3181}
b3d83fa6
DW
3182
3183struct xfs_alloc_query_range_info {
3184 xfs_alloc_query_range_fn fn;
3185 void *priv;
3186};
3187
3188/* Format btree record and pass to our callback. */
3189STATIC int
3190xfs_alloc_query_range_helper(
3191 struct xfs_btree_cur *cur,
3192 union xfs_btree_rec *rec,
3193 void *priv)
3194{
3195 struct xfs_alloc_query_range_info *query = priv;
3196 struct xfs_alloc_rec_incore irec;
3197
3198 irec.ar_startblock = be32_to_cpu(rec->alloc.ar_startblock);
3199 irec.ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount);
3200 return query->fn(cur, &irec, query->priv);
3201}
3202
3203/* Find all free space within a given range of blocks. */
3204int
3205xfs_alloc_query_range(
3206 struct xfs_btree_cur *cur,
3207 struct xfs_alloc_rec_incore *low_rec,
3208 struct xfs_alloc_rec_incore *high_rec,
3209 xfs_alloc_query_range_fn fn,
3210 void *priv)
3211{
3212 union xfs_btree_irec low_brec;
3213 union xfs_btree_irec high_brec;
3214 struct xfs_alloc_query_range_info query;
3215
3216 ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
3217 low_brec.a = *low_rec;
3218 high_brec.a = *high_rec;
3219 query.priv = priv;
3220 query.fn = fn;
3221 return xfs_btree_query_range(cur, &low_brec, &high_brec,
3222 xfs_alloc_query_range_helper, &query);
3223}
7e05e856
DW
3224
3225/* Find all free space records. */
3226int
3227xfs_alloc_query_all(
3228 struct xfs_btree_cur *cur,
3229 xfs_alloc_query_range_fn fn,
3230 void *priv)
3231{
3232 struct xfs_alloc_query_range_info query;
3233
3234 ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
3235 query.priv = priv;
3236 query.fn = fn;
3237 return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query);
3238}
9bef6258 3239
1fe41a73
DW
3240/* Is there a record covering a given extent? */
3241int
3242xfs_alloc_has_record(
3243 struct xfs_btree_cur *cur,
3244 xfs_agblock_t bno,
3245 xfs_extlen_t len,
3246 bool *exists)
3247{
3248 union xfs_btree_irec low;
3249 union xfs_btree_irec high;
3250
3251 memset(&low, 0, sizeof(low));
3252 low.a.ar_startblock = bno;
3253 memset(&high, 0xFF, sizeof(high));
3254 high.a.ar_startblock = bno + len - 1;
3255
3256 return xfs_btree_has_record(cur, &low, &high, exists);
3257}
71a98c66
DW
3258
3259/*
3260 * Walk all the blocks in the AGFL. The @walk_fn can return any negative
4a509d6d 3261 * error code or XFS_ITER_*.
71a98c66
DW
3262 */
3263int
3264xfs_agfl_walk(
3265 struct xfs_mount *mp,
3266 struct xfs_agf *agf,
3267 struct xfs_buf *agflbp,
3268 xfs_agfl_walk_fn walk_fn,
3269 void *priv)
3270{
3271 __be32 *agfl_bno;
3272 unsigned int i;
3273 int error;
3274
3275 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
3276 i = be32_to_cpu(agf->agf_flfirst);
3277
3278 /* Nothing to walk in an empty AGFL. */
3279 if (agf->agf_flcount == cpu_to_be32(0))
3280 return 0;
3281
3282 /* Otherwise, walk from first to last, wrapping as needed. */
3283 for (;;) {
3284 error = walk_fn(mp, be32_to_cpu(agfl_bno[i]), priv);
3285 if (error)
3286 return error;
3287 if (i == be32_to_cpu(agf->agf_fllast))
3288 break;
3289 if (++i == xfs_agfl_size(mp))
3290 i = 0;
3291 }
3292
3293 return 0;
3294}