]> git.ipfire.org Git - people/ms/linux.git/blame - fs/xfs/libxfs/xfs_alloc.c
libxfs: resync with the userspace libxfs
[people/ms/linux.git] / fs / xfs / libxfs / xfs_alloc.c
CommitLineData
0b61f8a4 1// SPDX-License-Identifier: GPL-2.0
1da177e4 2/*
7b718769
NS
3 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
4 * All Rights Reserved.
1da177e4 5 */
1da177e4 6#include "xfs.h"
a844f451 7#include "xfs_fs.h"
70a9883c 8#include "xfs_format.h"
239880ef 9#include "xfs_log_format.h"
70a9883c 10#include "xfs_shared.h"
239880ef 11#include "xfs_trans_resv.h"
a844f451 12#include "xfs_bit.h"
1da177e4 13#include "xfs_sb.h"
1da177e4 14#include "xfs_mount.h"
3ab78df2 15#include "xfs_defer.h"
1da177e4 16#include "xfs_btree.h"
673930c3 17#include "xfs_rmap.h"
a4fbe6ab 18#include "xfs_alloc_btree.h"
1da177e4 19#include "xfs_alloc.h"
efc27b52 20#include "xfs_extent_busy.h"
e9e899a2 21#include "xfs_errortag.h"
1da177e4 22#include "xfs_error.h"
0b1b213f 23#include "xfs_trace.h"
239880ef 24#include "xfs_trans.h"
4e0e6040 25#include "xfs_buf_item.h"
239880ef 26#include "xfs_log.h"
3fd129b6 27#include "xfs_ag_resv.h"
f8f2835a
BF
28#include "xfs_bmap.h"
29
30extern kmem_zone_t *xfs_bmap_free_item_zone;
1da177e4 31
c999a223 32struct workqueue_struct *xfs_alloc_wq;
1da177e4
LT
33
34#define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
35
36#define XFSA_FIXUP_BNO_OK 1
37#define XFSA_FIXUP_CNT_OK 2
38
1da177e4
LT
39STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
40STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
41STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
1da177e4 42
a78ee256
DC
43/*
44 * Size of the AGFL. For CRC-enabled filesystes we steal a couple of slots in
45 * the beginning of the block for a proper header with the location information
46 * and CRC.
47 */
48unsigned int
49xfs_agfl_size(
50 struct xfs_mount *mp)
51{
52 unsigned int size = mp->m_sb.sb_sectsize;
53
54 if (xfs_sb_version_hascrc(&mp->m_sb))
55 size -= sizeof(struct xfs_agfl);
56
57 return size / sizeof(xfs_agblock_t);
58}
59
af30dfa1
DW
60unsigned int
61xfs_refc_block(
62 struct xfs_mount *mp)
63{
64 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
65 return XFS_RMAP_BLOCK(mp) + 1;
66 if (xfs_sb_version_hasfinobt(&mp->m_sb))
67 return XFS_FIBT_BLOCK(mp) + 1;
68 return XFS_IBT_BLOCK(mp) + 1;
69}
70
8018026e
DW
71xfs_extlen_t
72xfs_prealloc_blocks(
73 struct xfs_mount *mp)
74{
af30dfa1
DW
75 if (xfs_sb_version_hasreflink(&mp->m_sb))
76 return xfs_refc_block(mp) + 1;
8018026e
DW
77 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
78 return XFS_RMAP_BLOCK(mp) + 1;
79 if (xfs_sb_version_hasfinobt(&mp->m_sb))
80 return XFS_FIBT_BLOCK(mp) + 1;
81 return XFS_IBT_BLOCK(mp) + 1;
82}
83
52548852
DW
84/*
85 * In order to avoid ENOSPC-related deadlock caused by out-of-order locking of
86 * AGF buffer (PV 947395), we place constraints on the relationship among
87 * actual allocations for data blocks, freelist blocks, and potential file data
88 * bmap btree blocks. However, these restrictions may result in no actual space
89 * allocated for a delayed extent, for example, a data block in a certain AG is
90 * allocated but there is no additional block for the additional bmap btree
91 * block due to a split of the bmap btree of the file. The result of this may
92 * lead to an infinite loop when the file gets flushed to disk and all delayed
93 * extents need to be actually allocated. To get around this, we explicitly set
94 * aside a few blocks which will not be reserved in delayed allocation.
95 *
3fd129b6
DW
96 * We need to reserve 4 fsbs _per AG_ for the freelist and 4 more to handle a
97 * potential split of the file's bmap btree.
52548852
DW
98 */
99unsigned int
100xfs_alloc_set_aside(
101 struct xfs_mount *mp)
102{
5149fd32 103 return mp->m_sb.sb_agcount * (XFS_ALLOC_AGFL_RESERVE + 4);
52548852
DW
104}
105
106/*
107 * When deciding how much space to allocate out of an AG, we limit the
108 * allocation maximum size to the size the AG. However, we cannot use all the
109 * blocks in the AG - some are permanently used by metadata. These
110 * blocks are generally:
111 * - the AG superblock, AGF, AGI and AGFL
112 * - the AGF (bno and cnt) and AGI btree root blocks, and optionally
113 * the AGI free inode and rmap btree root blocks.
114 * - blocks on the AGFL according to xfs_alloc_set_aside() limits
115 * - the rmapbt root block
116 *
117 * The AG headers are sector sized, so the amount of space they take up is
118 * dependent on filesystem geometry. The others are all single blocks.
119 */
120unsigned int
121xfs_alloc_ag_max_usable(
122 struct xfs_mount *mp)
123{
124 unsigned int blocks;
125
126 blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */
127 blocks += XFS_ALLOC_AGFL_RESERVE;
128 blocks += 3; /* AGF, AGI btree root blocks */
129 if (xfs_sb_version_hasfinobt(&mp->m_sb))
130 blocks++; /* finobt root block */
131 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
132 blocks++; /* rmap root block */
d0e853f3
DW
133 if (xfs_sb_version_hasreflink(&mp->m_sb))
134 blocks++; /* refcount root block */
52548852
DW
135
136 return mp->m_sb.sb_agblocks - blocks;
137}
138
fe033cc8
CH
139/*
140 * Lookup the record equal to [bno, len] in the btree given by cur.
141 */
142STATIC int /* error */
143xfs_alloc_lookup_eq(
144 struct xfs_btree_cur *cur, /* btree cursor */
145 xfs_agblock_t bno, /* starting block of extent */
146 xfs_extlen_t len, /* length of extent */
147 int *stat) /* success/failure */
148{
f6b428a4
BF
149 int error;
150
fe033cc8
CH
151 cur->bc_rec.a.ar_startblock = bno;
152 cur->bc_rec.a.ar_blockcount = len;
f6b428a4
BF
153 error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
154 cur->bc_private.a.priv.abt.active = (*stat == 1);
155 return error;
fe033cc8
CH
156}
157
158/*
159 * Lookup the first record greater than or equal to [bno, len]
160 * in the btree given by cur.
161 */
a66d6363 162int /* error */
fe033cc8
CH
163xfs_alloc_lookup_ge(
164 struct xfs_btree_cur *cur, /* btree cursor */
165 xfs_agblock_t bno, /* starting block of extent */
166 xfs_extlen_t len, /* length of extent */
167 int *stat) /* success/failure */
168{
f6b428a4
BF
169 int error;
170
fe033cc8
CH
171 cur->bc_rec.a.ar_startblock = bno;
172 cur->bc_rec.a.ar_blockcount = len;
f6b428a4
BF
173 error = xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
174 cur->bc_private.a.priv.abt.active = (*stat == 1);
175 return error;
fe033cc8
CH
176}
177
178/*
179 * Lookup the first record less than or equal to [bno, len]
180 * in the btree given by cur.
181 */
ce1d802e 182int /* error */
fe033cc8
CH
183xfs_alloc_lookup_le(
184 struct xfs_btree_cur *cur, /* btree cursor */
185 xfs_agblock_t bno, /* starting block of extent */
186 xfs_extlen_t len, /* length of extent */
187 int *stat) /* success/failure */
188{
f6b428a4 189 int error;
fe033cc8
CH
190 cur->bc_rec.a.ar_startblock = bno;
191 cur->bc_rec.a.ar_blockcount = len;
f6b428a4
BF
192 error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
193 cur->bc_private.a.priv.abt.active = (*stat == 1);
194 return error;
195}
196
197static inline bool
198xfs_alloc_cur_active(
199 struct xfs_btree_cur *cur)
200{
201 return cur && cur->bc_private.a.priv.abt.active;
fe033cc8
CH
202}
203
278d0ca1
CH
204/*
205 * Update the record referred to by cur to the value given
206 * by [bno, len].
207 * This either works (return 0) or gets an EFSCORRUPTED error.
208 */
209STATIC int /* error */
210xfs_alloc_update(
211 struct xfs_btree_cur *cur, /* btree cursor */
212 xfs_agblock_t bno, /* starting block of extent */
213 xfs_extlen_t len) /* length of extent */
214{
215 union xfs_btree_rec rec;
216
217 rec.alloc.ar_startblock = cpu_to_be32(bno);
218 rec.alloc.ar_blockcount = cpu_to_be32(len);
219 return xfs_btree_update(cur, &rec);
220}
fe033cc8 221
8cc938fe
CH
222/*
223 * Get the data from the pointed-to record.
224 */
a46db608 225int /* error */
8cc938fe
CH
226xfs_alloc_get_rec(
227 struct xfs_btree_cur *cur, /* btree cursor */
228 xfs_agblock_t *bno, /* output: starting block of extent */
229 xfs_extlen_t *len, /* output: length of extent */
230 int *stat) /* output: success/failure */
231{
9e6c08d4
DC
232 struct xfs_mount *mp = cur->bc_mp;
233 xfs_agnumber_t agno = cur->bc_private.a.agno;
8cc938fe
CH
234 union xfs_btree_rec *rec;
235 int error;
236
237 error = xfs_btree_get_rec(cur, &rec, stat);
a37f7b12
DW
238 if (error || !(*stat))
239 return error;
a37f7b12
DW
240
241 *bno = be32_to_cpu(rec->alloc.ar_startblock);
242 *len = be32_to_cpu(rec->alloc.ar_blockcount);
243
efe80327
CM
244 if (*len == 0)
245 goto out_bad_rec;
246
9e6c08d4
DC
247 /* check for valid extent range, including overflow */
248 if (!xfs_verify_agbno(mp, agno, *bno))
249 goto out_bad_rec;
250 if (*bno > *bno + *len)
251 goto out_bad_rec;
252 if (!xfs_verify_agbno(mp, agno, *bno + *len - 1))
253 goto out_bad_rec;
254
255 return 0;
256
257out_bad_rec:
258 xfs_warn(mp,
259 "%s Freespace BTree record corruption in AG %d detected!",
260 cur->bc_btnum == XFS_BTNUM_BNO ? "Block" : "Size", agno);
261 xfs_warn(mp,
262 "start block 0x%x block count 0x%x", *bno, *len);
263 return -EFSCORRUPTED;
8cc938fe
CH
264}
265
1da177e4
LT
266/*
267 * Compute aligned version of the found extent.
268 * Takes alignment and min length into account.
269 */
ebf55872 270STATIC bool
1da177e4 271xfs_alloc_compute_aligned(
86fa8af6 272 xfs_alloc_arg_t *args, /* allocation argument structure */
1da177e4
LT
273 xfs_agblock_t foundbno, /* starting block in found extent */
274 xfs_extlen_t foundlen, /* length in found extent */
1da177e4 275 xfs_agblock_t *resbno, /* result block number */
ebf55872
CH
276 xfs_extlen_t *reslen, /* result length */
277 unsigned *busy_gen)
1da177e4 278{
ebf55872
CH
279 xfs_agblock_t bno = foundbno;
280 xfs_extlen_t len = foundlen;
bfe46d4e 281 xfs_extlen_t diff;
ebf55872 282 bool busy;
1da177e4 283
e26f0501 284 /* Trim busy sections out of found extent */
ebf55872 285 busy = xfs_extent_busy_trim(args, &bno, &len, busy_gen);
e26f0501 286
bfe46d4e
BF
287 /*
288 * If we have a largish extent that happens to start before min_agbno,
289 * see if we can shift it into range...
290 */
291 if (bno < args->min_agbno && bno + len > args->min_agbno) {
292 diff = args->min_agbno - bno;
293 if (len > diff) {
294 bno += diff;
295 len -= diff;
296 }
297 }
298
e26f0501
CH
299 if (args->alignment > 1 && len >= args->minlen) {
300 xfs_agblock_t aligned_bno = roundup(bno, args->alignment);
bfe46d4e
BF
301
302 diff = aligned_bno - bno;
e26f0501
CH
303
304 *resbno = aligned_bno;
305 *reslen = diff >= len ? 0 : len - diff;
1da177e4 306 } else {
e26f0501
CH
307 *resbno = bno;
308 *reslen = len;
1da177e4 309 }
ebf55872
CH
310
311 return busy;
1da177e4
LT
312}
313
314/*
315 * Compute best start block and diff for "near" allocations.
316 * freelen >= wantlen already checked by caller.
317 */
318STATIC xfs_extlen_t /* difference value (absolute) */
319xfs_alloc_compute_diff(
320 xfs_agblock_t wantbno, /* target starting block */
321 xfs_extlen_t wantlen, /* target length */
322 xfs_extlen_t alignment, /* target alignment */
292378ed 323 int datatype, /* are we allocating data? */
1da177e4
LT
324 xfs_agblock_t freebno, /* freespace's starting block */
325 xfs_extlen_t freelen, /* freespace's length */
326 xfs_agblock_t *newbnop) /* result: best start block from free */
327{
328 xfs_agblock_t freeend; /* end of freespace extent */
329 xfs_agblock_t newbno1; /* return block number */
330 xfs_agblock_t newbno2; /* other new block number */
331 xfs_extlen_t newlen1=0; /* length with newbno1 */
332 xfs_extlen_t newlen2=0; /* length with newbno2 */
333 xfs_agblock_t wantend; /* end of target extent */
c34d570d 334 bool userdata = datatype & XFS_ALLOC_USERDATA;
1da177e4
LT
335
336 ASSERT(freelen >= wantlen);
337 freeend = freebno + freelen;
338 wantend = wantbno + wantlen;
211d022c
JK
339 /*
340 * We want to allocate from the start of a free extent if it is past
341 * the desired block or if we are allocating user data and the free
342 * extent is before desired block. The second case is there to allow
343 * for contiguous allocation from the remaining free space if the file
344 * grows in the short term.
345 */
346 if (freebno >= wantbno || (userdata && freeend < wantend)) {
1da177e4
LT
347 if ((newbno1 = roundup(freebno, alignment)) >= freeend)
348 newbno1 = NULLAGBLOCK;
349 } else if (freeend >= wantend && alignment > 1) {
350 newbno1 = roundup(wantbno, alignment);
351 newbno2 = newbno1 - alignment;
352 if (newbno1 >= freeend)
353 newbno1 = NULLAGBLOCK;
354 else
355 newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
356 if (newbno2 < freebno)
357 newbno2 = NULLAGBLOCK;
358 else
359 newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
360 if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
361 if (newlen1 < newlen2 ||
362 (newlen1 == newlen2 &&
363 XFS_ABSDIFF(newbno1, wantbno) >
364 XFS_ABSDIFF(newbno2, wantbno)))
365 newbno1 = newbno2;
366 } else if (newbno2 != NULLAGBLOCK)
367 newbno1 = newbno2;
368 } else if (freeend >= wantend) {
369 newbno1 = wantbno;
370 } else if (alignment > 1) {
371 newbno1 = roundup(freeend - wantlen, alignment);
372 if (newbno1 > freeend - wantlen &&
373 newbno1 - alignment >= freebno)
374 newbno1 -= alignment;
375 else if (newbno1 >= freeend)
376 newbno1 = NULLAGBLOCK;
377 } else
378 newbno1 = freeend - wantlen;
379 *newbnop = newbno1;
380 return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
381}
382
383/*
384 * Fix up the length, based on mod and prod.
385 * len should be k * prod + mod for some k.
386 * If len is too small it is returned unchanged.
387 * If len hits maxlen it is left alone.
388 */
389STATIC void
390xfs_alloc_fix_len(
391 xfs_alloc_arg_t *args) /* allocation argument structure */
392{
393 xfs_extlen_t k;
394 xfs_extlen_t rlen;
395
396 ASSERT(args->mod < args->prod);
397 rlen = args->len;
398 ASSERT(rlen >= args->minlen);
399 ASSERT(rlen <= args->maxlen);
400 if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
401 (args->mod == 0 && rlen < args->prod))
402 return;
403 k = rlen % args->prod;
404 if (k == args->mod)
405 return;
30265117
JK
406 if (k > args->mod)
407 rlen = rlen - (k - args->mod);
408 else
409 rlen = rlen - args->prod + (args->mod - k);
3790a8cd 410 /* casts to (int) catch length underflows */
30265117
JK
411 if ((int)rlen < (int)args->minlen)
412 return;
413 ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
414 ASSERT(rlen % args->prod == args->mod);
54fee133
CH
415 ASSERT(args->pag->pagf_freeblks + args->pag->pagf_flcount >=
416 rlen + args->minleft);
1da177e4
LT
417 args->len = rlen;
418}
419
1da177e4
LT
420/*
421 * Update the two btrees, logically removing from freespace the extent
422 * starting at rbno, rlen blocks. The extent is contained within the
423 * actual (current) free extent fbno for flen blocks.
424 * Flags are passed in indicating whether the cursors are set to the
425 * relevant records.
426 */
427STATIC int /* error code */
428xfs_alloc_fixup_trees(
429 xfs_btree_cur_t *cnt_cur, /* cursor for by-size btree */
430 xfs_btree_cur_t *bno_cur, /* cursor for by-block btree */
431 xfs_agblock_t fbno, /* starting block of free extent */
432 xfs_extlen_t flen, /* length of free extent */
433 xfs_agblock_t rbno, /* starting block of returned extent */
434 xfs_extlen_t rlen, /* length of returned extent */
435 int flags) /* flags, XFSA_FIXUP_... */
436{
437 int error; /* error code */
438 int i; /* operation results */
439 xfs_agblock_t nfbno1; /* first new free startblock */
440 xfs_agblock_t nfbno2; /* second new free startblock */
441 xfs_extlen_t nflen1=0; /* first new free length */
442 xfs_extlen_t nflen2=0; /* second new free length */
5fb5aeee
ES
443 struct xfs_mount *mp;
444
445 mp = cnt_cur->bc_mp;
1da177e4
LT
446
447 /*
448 * Look up the record in the by-size tree if necessary.
449 */
450 if (flags & XFSA_FIXUP_CNT_OK) {
451#ifdef DEBUG
452 if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
453 return error;
f9e03706
DW
454 if (XFS_IS_CORRUPT(mp,
455 i != 1 ||
456 nfbno1 != fbno ||
457 nflen1 != flen))
458 return -EFSCORRUPTED;
1da177e4
LT
459#endif
460 } else {
461 if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
462 return error;
f9e03706
DW
463 if (XFS_IS_CORRUPT(mp, i != 1))
464 return -EFSCORRUPTED;
1da177e4
LT
465 }
466 /*
467 * Look up the record in the by-block tree if necessary.
468 */
469 if (flags & XFSA_FIXUP_BNO_OK) {
470#ifdef DEBUG
471 if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
472 return error;
f9e03706
DW
473 if (XFS_IS_CORRUPT(mp,
474 i != 1 ||
475 nfbno1 != fbno ||
476 nflen1 != flen))
477 return -EFSCORRUPTED;
1da177e4
LT
478#endif
479 } else {
480 if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
481 return error;
f9e03706
DW
482 if (XFS_IS_CORRUPT(mp, i != 1))
483 return -EFSCORRUPTED;
1da177e4 484 }
7cc95a82 485
1da177e4 486#ifdef DEBUG
7cc95a82
CH
487 if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
488 struct xfs_btree_block *bnoblock;
489 struct xfs_btree_block *cntblock;
490
491 bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
492 cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
1da177e4 493
f9e03706
DW
494 if (XFS_IS_CORRUPT(mp,
495 bnoblock->bb_numrecs !=
496 cntblock->bb_numrecs))
497 return -EFSCORRUPTED;
1da177e4
LT
498 }
499#endif
7cc95a82 500
1da177e4
LT
501 /*
502 * Deal with all four cases: the allocated record is contained
503 * within the freespace record, so we can have new freespace
504 * at either (or both) end, or no freespace remaining.
505 */
506 if (rbno == fbno && rlen == flen)
507 nfbno1 = nfbno2 = NULLAGBLOCK;
508 else if (rbno == fbno) {
509 nfbno1 = rbno + rlen;
510 nflen1 = flen - rlen;
511 nfbno2 = NULLAGBLOCK;
512 } else if (rbno + rlen == fbno + flen) {
513 nfbno1 = fbno;
514 nflen1 = flen - rlen;
515 nfbno2 = NULLAGBLOCK;
516 } else {
517 nfbno1 = fbno;
518 nflen1 = rbno - fbno;
519 nfbno2 = rbno + rlen;
520 nflen2 = (fbno + flen) - nfbno2;
521 }
522 /*
523 * Delete the entry from the by-size btree.
524 */
91cca5df 525 if ((error = xfs_btree_delete(cnt_cur, &i)))
1da177e4 526 return error;
f9e03706
DW
527 if (XFS_IS_CORRUPT(mp, i != 1))
528 return -EFSCORRUPTED;
1da177e4
LT
529 /*
530 * Add new by-size btree entry(s).
531 */
532 if (nfbno1 != NULLAGBLOCK) {
533 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
534 return error;
f9e03706
DW
535 if (XFS_IS_CORRUPT(mp, i != 0))
536 return -EFSCORRUPTED;
4b22a571 537 if ((error = xfs_btree_insert(cnt_cur, &i)))
1da177e4 538 return error;
f9e03706
DW
539 if (XFS_IS_CORRUPT(mp, i != 1))
540 return -EFSCORRUPTED;
1da177e4
LT
541 }
542 if (nfbno2 != NULLAGBLOCK) {
543 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
544 return error;
f9e03706
DW
545 if (XFS_IS_CORRUPT(mp, i != 0))
546 return -EFSCORRUPTED;
4b22a571 547 if ((error = xfs_btree_insert(cnt_cur, &i)))
1da177e4 548 return error;
f9e03706
DW
549 if (XFS_IS_CORRUPT(mp, i != 1))
550 return -EFSCORRUPTED;
1da177e4
LT
551 }
552 /*
553 * Fix up the by-block btree entry(s).
554 */
555 if (nfbno1 == NULLAGBLOCK) {
556 /*
557 * No remaining freespace, just delete the by-block tree entry.
558 */
91cca5df 559 if ((error = xfs_btree_delete(bno_cur, &i)))
1da177e4 560 return error;
f9e03706
DW
561 if (XFS_IS_CORRUPT(mp, i != 1))
562 return -EFSCORRUPTED;
1da177e4
LT
563 } else {
564 /*
565 * Update the by-block entry to start later|be shorter.
566 */
567 if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
568 return error;
569 }
570 if (nfbno2 != NULLAGBLOCK) {
571 /*
572 * 2 resulting free entries, need to add one.
573 */
574 if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
575 return error;
f9e03706
DW
576 if (XFS_IS_CORRUPT(mp, i != 0))
577 return -EFSCORRUPTED;
4b22a571 578 if ((error = xfs_btree_insert(bno_cur, &i)))
1da177e4 579 return error;
f9e03706
DW
580 if (XFS_IS_CORRUPT(mp, i != 1))
581 return -EFSCORRUPTED;
1da177e4
LT
582 }
583 return 0;
584}
585
a6a781a5 586static xfs_failaddr_t
612cfbfe 587xfs_agfl_verify(
bb80c6d7
DC
588 struct xfs_buf *bp)
589{
dbd329f1 590 struct xfs_mount *mp = bp->b_mount;
bb80c6d7 591 struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
bb80c6d7
DC
592 int i;
593
b5572597
DW
594 /*
595 * There is no verification of non-crc AGFLs because mkfs does not
596 * initialise the AGFL to zero or NULL. Hence the only valid part of the
597 * AGFL is what the AGF says is active. We can't get to the AGF, so we
598 * can't verify just those entries are valid.
599 */
600 if (!xfs_sb_version_hascrc(&mp->m_sb))
601 return NULL;
602
39708c20 603 if (!xfs_verify_magic(bp, agfl->agfl_magicnum))
a6a781a5 604 return __this_address;
39708c20 605 if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid))
a6a781a5 606 return __this_address;
77c95bba
CH
607 /*
608 * during growfs operations, the perag is not fully initialised,
609 * so we can't use it for any useful checking. growfs ensures we can't
610 * use it by using uncached buffers that don't have the perag attached
611 * so we can detect and avoid this problem.
612 */
613 if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
a6a781a5 614 return __this_address;
77c95bba 615
a78ee256 616 for (i = 0; i < xfs_agfl_size(mp); i++) {
77c95bba 617 if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK &&
bb80c6d7 618 be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
a6a781a5 619 return __this_address;
bb80c6d7 620 }
a45086e2 621
a6a781a5
DW
622 if (!xfs_log_check_lsn(mp, be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn)))
623 return __this_address;
624 return NULL;
77c95bba
CH
625}
626
627static void
628xfs_agfl_read_verify(
629 struct xfs_buf *bp)
630{
dbd329f1 631 struct xfs_mount *mp = bp->b_mount;
bc1a09b8 632 xfs_failaddr_t fa;
77c95bba
CH
633
634 /*
635 * There is no verification of non-crc AGFLs because mkfs does not
636 * initialise the AGFL to zero or NULL. Hence the only valid part of the
637 * AGFL is what the AGF says is active. We can't get to the AGF, so we
638 * can't verify just those entries are valid.
639 */
640 if (!xfs_sb_version_hascrc(&mp->m_sb))
641 return;
642
ce5028cf 643 if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
bc1a09b8
DW
644 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
645 else {
646 fa = xfs_agfl_verify(bp);
647 if (fa)
648 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
649 }
612cfbfe
DC
650}
651
1813dd64 652static void
612cfbfe
DC
653xfs_agfl_write_verify(
654 struct xfs_buf *bp)
655{
dbd329f1 656 struct xfs_mount *mp = bp->b_mount;
fb1755a6 657 struct xfs_buf_log_item *bip = bp->b_log_item;
bc1a09b8 658 xfs_failaddr_t fa;
612cfbfe 659
77c95bba
CH
660 /* no verification of non-crc AGFLs */
661 if (!xfs_sb_version_hascrc(&mp->m_sb))
662 return;
663
bc1a09b8
DW
664 fa = xfs_agfl_verify(bp);
665 if (fa) {
666 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
77c95bba
CH
667 return;
668 }
669
670 if (bip)
671 XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
672
f1dbcd7e 673 xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
bb80c6d7
DC
674}
675
1813dd64 676const struct xfs_buf_ops xfs_agfl_buf_ops = {
233135b7 677 .name = "xfs_agfl",
39708c20 678 .magic = { cpu_to_be32(XFS_AGFL_MAGIC), cpu_to_be32(XFS_AGFL_MAGIC) },
1813dd64
DC
679 .verify_read = xfs_agfl_read_verify,
680 .verify_write = xfs_agfl_write_verify,
b5572597 681 .verify_struct = xfs_agfl_verify,
1813dd64
DC
682};
683
1da177e4
LT
684/*
685 * Read in the allocation group free block array.
686 */
26788097 687int /* error */
1da177e4
LT
688xfs_alloc_read_agfl(
689 xfs_mount_t *mp, /* mount point structure */
690 xfs_trans_t *tp, /* transaction pointer */
691 xfs_agnumber_t agno, /* allocation group number */
692 xfs_buf_t **bpp) /* buffer for the ag free block array */
693{
694 xfs_buf_t *bp; /* return value */
695 int error;
696
697 ASSERT(agno != NULLAGNUMBER);
698 error = xfs_trans_read_buf(
699 mp, tp, mp->m_ddev_targp,
700 XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
1813dd64 701 XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
1da177e4
LT
702 if (error)
703 return error;
38f23232 704 xfs_buf_set_ref(bp, XFS_AGFL_REF);
1da177e4
LT
705 *bpp = bp;
706 return 0;
707}
708
ecb6928f
CH
709STATIC int
710xfs_alloc_update_counters(
711 struct xfs_trans *tp,
712 struct xfs_perag *pag,
713 struct xfs_buf *agbp,
714 long len)
715{
716 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
717
718 pag->pagf_freeblks += len;
719 be32_add_cpu(&agf->agf_freeblks, len);
720
721 xfs_trans_agblocks_delta(tp, len);
722 if (unlikely(be32_to_cpu(agf->agf_freeblks) >
a5155b87
DW
723 be32_to_cpu(agf->agf_length))) {
724 xfs_buf_corruption_error(agbp);
2451337d 725 return -EFSCORRUPTED;
a5155b87 726 }
ecb6928f
CH
727
728 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
729 return 0;
730}
731
1da177e4 732/*
f5e7dbea 733 * Block allocation algorithm and data structures.
1da177e4 734 */
f5e7dbea
BF
735struct xfs_alloc_cur {
736 struct xfs_btree_cur *cnt; /* btree cursors */
737 struct xfs_btree_cur *bnolt;
738 struct xfs_btree_cur *bnogt;
dc8e69bd 739 xfs_extlen_t cur_len;/* current search length */
c62321a2
BF
740 xfs_agblock_t rec_bno;/* extent startblock */
741 xfs_extlen_t rec_len;/* extent length */
742 xfs_agblock_t bno; /* alloc bno */
743 xfs_extlen_t len; /* alloc len */
744 xfs_extlen_t diff; /* diff from search bno */
d6d3aff2
BF
745 unsigned int busy_gen;/* busy state */
746 bool busy;
f5e7dbea
BF
747};
748
749/*
750 * Set up cursors, etc. in the extent allocation cursor. This function can be
751 * called multiple times to reset an initialized structure without having to
752 * reallocate cursors.
753 */
754static int
755xfs_alloc_cur_setup(
756 struct xfs_alloc_arg *args,
757 struct xfs_alloc_cur *acur)
758{
759 int error;
760 int i;
761
762 ASSERT(args->alignment == 1 || args->type != XFS_ALLOCTYPE_THIS_BNO);
763
dc8e69bd 764 acur->cur_len = args->maxlen;
c62321a2
BF
765 acur->rec_bno = 0;
766 acur->rec_len = 0;
767 acur->bno = 0;
768 acur->len = 0;
396bbf3c 769 acur->diff = -1;
d6d3aff2
BF
770 acur->busy = false;
771 acur->busy_gen = 0;
772
f5e7dbea
BF
773 /*
774 * Perform an initial cntbt lookup to check for availability of maxlen
775 * extents. If this fails, we'll return -ENOSPC to signal the caller to
776 * attempt a small allocation.
777 */
778 if (!acur->cnt)
779 acur->cnt = xfs_allocbt_init_cursor(args->mp, args->tp,
780 args->agbp, args->agno, XFS_BTNUM_CNT);
781 error = xfs_alloc_lookup_ge(acur->cnt, 0, args->maxlen, &i);
782 if (error)
783 return error;
784
785 /*
786 * Allocate the bnobt left and right search cursors.
787 */
788 if (!acur->bnolt)
789 acur->bnolt = xfs_allocbt_init_cursor(args->mp, args->tp,
790 args->agbp, args->agno, XFS_BTNUM_BNO);
791 if (!acur->bnogt)
792 acur->bnogt = xfs_allocbt_init_cursor(args->mp, args->tp,
793 args->agbp, args->agno, XFS_BTNUM_BNO);
794 return i == 1 ? 0 : -ENOSPC;
795}
796
797static void
798xfs_alloc_cur_close(
799 struct xfs_alloc_cur *acur,
800 bool error)
801{
802 int cur_error = XFS_BTREE_NOERROR;
803
804 if (error)
805 cur_error = XFS_BTREE_ERROR;
806
807 if (acur->cnt)
808 xfs_btree_del_cursor(acur->cnt, cur_error);
809 if (acur->bnolt)
810 xfs_btree_del_cursor(acur->bnolt, cur_error);
811 if (acur->bnogt)
812 xfs_btree_del_cursor(acur->bnogt, cur_error);
813 acur->cnt = acur->bnolt = acur->bnogt = NULL;
814}
1da177e4 815
396bbf3c
BF
816/*
817 * Check an extent for allocation and track the best available candidate in the
818 * allocation structure. The cursor is deactivated if it has entered an out of
819 * range state based on allocation arguments. Optionally return the extent
820 * extent geometry and allocation status if requested by the caller.
821 */
822static int
823xfs_alloc_cur_check(
824 struct xfs_alloc_arg *args,
825 struct xfs_alloc_cur *acur,
826 struct xfs_btree_cur *cur,
827 int *new)
828{
829 int error, i;
830 xfs_agblock_t bno, bnoa, bnew;
831 xfs_extlen_t len, lena, diff = -1;
832 bool busy;
833 unsigned busy_gen = 0;
834 bool deactivate = false;
fec0afda 835 bool isbnobt = cur->bc_btnum == XFS_BTNUM_BNO;
396bbf3c
BF
836
837 *new = 0;
838
839 error = xfs_alloc_get_rec(cur, &bno, &len, &i);
840 if (error)
841 return error;
f9e03706
DW
842 if (XFS_IS_CORRUPT(args->mp, i != 1))
843 return -EFSCORRUPTED;
396bbf3c
BF
844
845 /*
846 * Check minlen and deactivate a cntbt cursor if out of acceptable size
847 * range (i.e., walking backwards looking for a minlen extent).
848 */
849 if (len < args->minlen) {
fec0afda 850 deactivate = !isbnobt;
396bbf3c
BF
851 goto out;
852 }
853
854 busy = xfs_alloc_compute_aligned(args, bno, len, &bnoa, &lena,
855 &busy_gen);
856 acur->busy |= busy;
857 if (busy)
858 acur->busy_gen = busy_gen;
859 /* deactivate a bnobt cursor outside of locality range */
fec0afda
BF
860 if (bnoa < args->min_agbno || bnoa > args->max_agbno) {
861 deactivate = isbnobt;
396bbf3c 862 goto out;
fec0afda 863 }
396bbf3c
BF
864 if (lena < args->minlen)
865 goto out;
866
867 args->len = XFS_EXTLEN_MIN(lena, args->maxlen);
868 xfs_alloc_fix_len(args);
869 ASSERT(args->len >= args->minlen);
870 if (args->len < acur->len)
871 goto out;
872
873 /*
874 * We have an aligned record that satisfies minlen and beats or matches
875 * the candidate extent size. Compare locality for near allocation mode.
876 */
877 ASSERT(args->type == XFS_ALLOCTYPE_NEAR_BNO);
878 diff = xfs_alloc_compute_diff(args->agbno, args->len,
879 args->alignment, args->datatype,
880 bnoa, lena, &bnew);
881 if (bnew == NULLAGBLOCK)
882 goto out;
fec0afda
BF
883
884 /*
885 * Deactivate a bnobt cursor with worse locality than the current best.
886 */
887 if (diff > acur->diff) {
888 deactivate = isbnobt;
396bbf3c 889 goto out;
fec0afda 890 }
396bbf3c
BF
891
892 ASSERT(args->len > acur->len ||
893 (args->len == acur->len && diff <= acur->diff));
894 acur->rec_bno = bno;
895 acur->rec_len = len;
896 acur->bno = bnew;
897 acur->len = args->len;
898 acur->diff = diff;
899 *new = 1;
900
78d7aabd
BF
901 /*
902 * We're done if we found a perfect allocation. This only deactivates
903 * the current cursor, but this is just an optimization to terminate a
904 * cntbt search that otherwise runs to the edge of the tree.
905 */
906 if (acur->diff == 0 && acur->len == args->maxlen)
907 deactivate = true;
396bbf3c
BF
908out:
909 if (deactivate)
910 cur->bc_private.a.priv.abt.active = false;
911 trace_xfs_alloc_cur_check(args->mp, cur->bc_btnum, bno, len, diff,
912 *new);
913 return 0;
914}
915
d2968825
BF
916/*
917 * Complete an allocation of a candidate extent. Remove the extent from both
918 * trees and update the args structure.
919 */
920STATIC int
921xfs_alloc_cur_finish(
922 struct xfs_alloc_arg *args,
923 struct xfs_alloc_cur *acur)
924{
925 int error;
926
927 ASSERT(acur->cnt && acur->bnolt);
928 ASSERT(acur->bno >= acur->rec_bno);
929 ASSERT(acur->bno + acur->len <= acur->rec_bno + acur->rec_len);
930 ASSERT(acur->rec_bno + acur->rec_len <=
931 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
932
933 error = xfs_alloc_fixup_trees(acur->cnt, acur->bnolt, acur->rec_bno,
934 acur->rec_len, acur->bno, acur->len, 0);
935 if (error)
936 return error;
937
938 args->agbno = acur->bno;
939 args->len = acur->len;
940 args->wasfromfl = 0;
941
942 trace_xfs_alloc_cur(args);
943 return 0;
944}
945
dc8e69bd
BF
946/*
947 * Locality allocation lookup algorithm. This expects a cntbt cursor and uses
948 * bno optimized lookup to search for extents with ideal size and locality.
949 */
950STATIC int
951xfs_alloc_cntbt_iter(
952 struct xfs_alloc_arg *args,
953 struct xfs_alloc_cur *acur)
954{
955 struct xfs_btree_cur *cur = acur->cnt;
956 xfs_agblock_t bno;
957 xfs_extlen_t len, cur_len;
958 int error;
959 int i;
960
961 if (!xfs_alloc_cur_active(cur))
962 return 0;
963
964 /* locality optimized lookup */
965 cur_len = acur->cur_len;
966 error = xfs_alloc_lookup_ge(cur, args->agbno, cur_len, &i);
967 if (error)
968 return error;
969 if (i == 0)
970 return 0;
971 error = xfs_alloc_get_rec(cur, &bno, &len, &i);
972 if (error)
973 return error;
974
975 /* check the current record and update search length from it */
976 error = xfs_alloc_cur_check(args, acur, cur, &i);
977 if (error)
978 return error;
979 ASSERT(len >= acur->cur_len);
980 acur->cur_len = len;
981
982 /*
983 * We looked up the first record >= [agbno, len] above. The agbno is a
984 * secondary key and so the current record may lie just before or after
985 * agbno. If it is past agbno, check the previous record too so long as
986 * the length matches as it may be closer. Don't check a smaller record
987 * because that could deactivate our cursor.
988 */
989 if (bno > args->agbno) {
990 error = xfs_btree_decrement(cur, 0, &i);
991 if (!error && i) {
992 error = xfs_alloc_get_rec(cur, &bno, &len, &i);
993 if (!error && i && len == acur->cur_len)
994 error = xfs_alloc_cur_check(args, acur, cur,
995 &i);
996 }
997 if (error)
998 return error;
999 }
1000
1001 /*
1002 * Increment the search key until we find at least one allocation
1003 * candidate or if the extent we found was larger. Otherwise, double the
1004 * search key to optimize the search. Efficiency is more important here
1005 * than absolute best locality.
1006 */
1007 cur_len <<= 1;
1008 if (!acur->len || acur->cur_len >= cur_len)
1009 acur->cur_len++;
1010 else
1011 acur->cur_len = cur_len;
1012
1013 return error;
1014}
1015
c63cdd4f
BF
1016/*
1017 * Deal with the case where only small freespaces remain. Either return the
1018 * contents of the last freespace record, or allocate space from the freelist if
1019 * there is nothing in the tree.
1020 */
1021STATIC int /* error */
1022xfs_alloc_ag_vextent_small(
1023 struct xfs_alloc_arg *args, /* allocation argument structure */
1024 struct xfs_btree_cur *ccur, /* optional by-size cursor */
1025 xfs_agblock_t *fbnop, /* result block number */
1026 xfs_extlen_t *flenp, /* result length */
1027 int *stat) /* status: 0-freelist, 1-normal/none */
1028{
1029 int error = 0;
1030 xfs_agblock_t fbno = NULLAGBLOCK;
1031 xfs_extlen_t flen = 0;
6691cd92 1032 int i = 0;
c63cdd4f 1033
6691cd92
BF
1034 /*
1035 * If a cntbt cursor is provided, try to allocate the largest record in
1036 * the tree. Try the AGFL if the cntbt is empty, otherwise fail the
1037 * allocation. Make sure to respect minleft even when pulling from the
1038 * freelist.
1039 */
1040 if (ccur)
1041 error = xfs_btree_decrement(ccur, 0, &i);
c63cdd4f
BF
1042 if (error)
1043 goto error;
1044 if (i) {
1045 error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i);
1046 if (error)
1047 goto error;
f9e03706
DW
1048 if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1049 error = -EFSCORRUPTED;
1050 goto error;
1051 }
c63cdd4f
BF
1052 goto out;
1053 }
1054
1055 if (args->minlen != 1 || args->alignment != 1 ||
1056 args->resv == XFS_AG_RESV_AGFL ||
1057 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount) <=
1058 args->minleft))
1059 goto out;
1060
1061 error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
1062 if (error)
1063 goto error;
1064 if (fbno == NULLAGBLOCK)
1065 goto out;
1066
1067 xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
c34d570d 1068 (args->datatype & XFS_ALLOC_NOBUSY));
c63cdd4f 1069
c34d570d 1070 if (args->datatype & XFS_ALLOC_USERDATA) {
c63cdd4f
BF
1071 struct xfs_buf *bp;
1072
1073 bp = xfs_btree_get_bufs(args->mp, args->tp, args->agno, fbno);
a71895c5 1074 if (XFS_IS_CORRUPT(args->mp, !bp)) {
c63cdd4f
BF
1075 error = -EFSCORRUPTED;
1076 goto error;
1077 }
1078 xfs_trans_binval(args->tp, bp);
1079 }
7e36a3a6
BF
1080 *fbnop = args->agbno = fbno;
1081 *flenp = args->len = 1;
f9e03706
DW
1082 if (XFS_IS_CORRUPT(args->mp,
1083 fbno >= be32_to_cpu(
1084 XFS_BUF_TO_AGF(args->agbp)->agf_length))) {
1085 error = -EFSCORRUPTED;
1086 goto error;
1087 }
c63cdd4f
BF
1088 args->wasfromfl = 1;
1089 trace_xfs_alloc_small_freelist(args);
1090
1091 /*
1092 * If we're feeding an AGFL block to something that doesn't live in the
1093 * free space, we need to clear out the OWN_AG rmap.
1094 */
1095 error = xfs_rmap_free(args->tp, args->agbp, args->agno, fbno, 1,
1096 &XFS_RMAP_OINFO_AG);
1097 if (error)
1098 goto error;
1099
1100 *stat = 0;
1101 return 0;
1102
1103out:
1104 /*
1105 * Can't do the allocation, give up.
1106 */
1107 if (flen < args->minlen) {
1108 args->agbno = NULLAGBLOCK;
1109 trace_xfs_alloc_small_notenough(args);
1110 flen = 0;
1111 }
1112 *fbnop = fbno;
1113 *flenp = flen;
1114 *stat = 1;
1115 trace_xfs_alloc_small_done(args);
1116 return 0;
1117
1118error:
1119 trace_xfs_alloc_small_error(args);
1120 return error;
1121}
1122
1da177e4
LT
1123/*
1124 * Allocate a variable extent in the allocation group agno.
1125 * Type and bno are used to determine where in the allocation group the
1126 * extent will start.
1127 * Extent's length (returned in *len) will be between minlen and maxlen,
1128 * and of the form k * prod + mod unless there's nothing that large.
1129 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1130 */
1131STATIC int /* error */
1132xfs_alloc_ag_vextent(
1133 xfs_alloc_arg_t *args) /* argument structure for allocation */
1134{
1135 int error=0;
1da177e4
LT
1136
1137 ASSERT(args->minlen > 0);
1138 ASSERT(args->maxlen > 0);
1139 ASSERT(args->minlen <= args->maxlen);
1140 ASSERT(args->mod < args->prod);
1141 ASSERT(args->alignment > 0);
3fd129b6 1142
1da177e4
LT
1143 /*
1144 * Branch to correct routine based on the type.
1145 */
1146 args->wasfromfl = 0;
1147 switch (args->type) {
1148 case XFS_ALLOCTYPE_THIS_AG:
1149 error = xfs_alloc_ag_vextent_size(args);
1150 break;
1151 case XFS_ALLOCTYPE_NEAR_BNO:
1152 error = xfs_alloc_ag_vextent_near(args);
1153 break;
1154 case XFS_ALLOCTYPE_THIS_BNO:
1155 error = xfs_alloc_ag_vextent_exact(args);
1156 break;
1157 default:
1158 ASSERT(0);
1159 /* NOTREACHED */
1160 }
ecb6928f
CH
1161
1162 if (error || args->agbno == NULLAGBLOCK)
1da177e4 1163 return error;
ecb6928f
CH
1164
1165 ASSERT(args->len >= args->minlen);
1166 ASSERT(args->len <= args->maxlen);
0ab32086 1167 ASSERT(!args->wasfromfl || args->resv != XFS_AG_RESV_AGFL);
ecb6928f
CH
1168 ASSERT(args->agbno % args->alignment == 0);
1169
673930c3 1170 /* if not file data, insert new block into the reverse map btree */
33df3a9c 1171 if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) {
673930c3
DW
1172 error = xfs_rmap_alloc(args->tp, args->agbp, args->agno,
1173 args->agbno, args->len, &args->oinfo);
1174 if (error)
1175 return error;
1176 }
1177
ecb6928f
CH
1178 if (!args->wasfromfl) {
1179 error = xfs_alloc_update_counters(args->tp, args->pag,
1180 args->agbp,
1181 -((long)(args->len)));
1182 if (error)
1183 return error;
1184
4ecbfe63 1185 ASSERT(!xfs_extent_busy_search(args->mp, args->agno,
e26f0501 1186 args->agbno, args->len));
1da177e4 1187 }
ecb6928f 1188
3fd129b6 1189 xfs_ag_resv_alloc_extent(args->pag, args->resv, args);
ecb6928f 1190
ff6d6af2
BD
1191 XFS_STATS_INC(args->mp, xs_allocx);
1192 XFS_STATS_ADD(args->mp, xs_allocb, args->len);
ecb6928f 1193 return error;
1da177e4
LT
1194}
1195
1196/*
1197 * Allocate a variable extent at exactly agno/bno.
1198 * Extent's length (returned in *len) will be between minlen and maxlen,
1199 * and of the form k * prod + mod unless there's nothing that large.
1200 * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
1201 */
1202STATIC int /* error */
1203xfs_alloc_ag_vextent_exact(
1204 xfs_alloc_arg_t *args) /* allocation argument structure */
1205{
1206 xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */
1207 xfs_btree_cur_t *cnt_cur;/* by count btree cursor */
1da177e4
LT
1208 int error;
1209 xfs_agblock_t fbno; /* start block of found extent */
1da177e4 1210 xfs_extlen_t flen; /* length of found extent */
ebf55872
CH
1211 xfs_agblock_t tbno; /* start block of busy extent */
1212 xfs_extlen_t tlen; /* length of busy extent */
1213 xfs_agblock_t tend; /* end block of busy extent */
1da177e4 1214 int i; /* success/failure of operation */
ebf55872 1215 unsigned busy_gen;
1da177e4
LT
1216
1217 ASSERT(args->alignment == 1);
9f9baab3 1218
1da177e4
LT
1219 /*
1220 * Allocate/initialize a cursor for the by-number freespace btree.
1221 */
561f7d17 1222 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
9f9baab3
CH
1223 args->agno, XFS_BTNUM_BNO);
1224
1da177e4
LT
1225 /*
1226 * Lookup bno and minlen in the btree (minlen is irrelevant, really).
1227 * Look for the closest free block <= bno, it must contain bno
1228 * if any free block does.
1229 */
9f9baab3
CH
1230 error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
1231 if (error)
1da177e4 1232 goto error0;
9f9baab3
CH
1233 if (!i)
1234 goto not_found;
1235
1da177e4
LT
1236 /*
1237 * Grab the freespace record.
1238 */
9f9baab3
CH
1239 error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
1240 if (error)
1da177e4 1241 goto error0;
f9e03706
DW
1242 if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1243 error = -EFSCORRUPTED;
1244 goto error0;
1245 }
1da177e4 1246 ASSERT(fbno <= args->agbno);
9f9baab3 1247
1da177e4 1248 /*
e26f0501 1249 * Check for overlapping busy extents.
1da177e4 1250 */
ebf55872
CH
1251 tbno = fbno;
1252 tlen = flen;
1253 xfs_extent_busy_trim(args, &tbno, &tlen, &busy_gen);
e26f0501
CH
1254
1255 /*
1256 * Give up if the start of the extent is busy, or the freespace isn't
1257 * long enough for the minimum request.
1258 */
1259 if (tbno > args->agbno)
1260 goto not_found;
1261 if (tlen < args->minlen)
1262 goto not_found;
1263 tend = tbno + tlen;
1264 if (tend < args->agbno + args->minlen)
9f9baab3
CH
1265 goto not_found;
1266
1da177e4
LT
1267 /*
1268 * End of extent will be smaller of the freespace end and the
1269 * maximal requested end.
9f9baab3 1270 *
1da177e4
LT
1271 * Fix the length according to mod and prod if given.
1272 */
81463b1c
CS
1273 args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
1274 - args->agbno;
1da177e4 1275 xfs_alloc_fix_len(args);
81463b1c 1276 ASSERT(args->agbno + args->len <= tend);
9f9baab3 1277
1da177e4 1278 /*
81463b1c 1279 * We are allocating agbno for args->len
1da177e4
LT
1280 * Allocate/initialize a cursor for the by-size btree.
1281 */
561f7d17
CH
1282 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1283 args->agno, XFS_BTNUM_CNT);
1da177e4 1284 ASSERT(args->agbno + args->len <=
16259e7d 1285 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
9f9baab3
CH
1286 error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
1287 args->len, XFSA_FIXUP_BNO_OK);
1288 if (error) {
1da177e4
LT
1289 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1290 goto error0;
1291 }
9f9baab3 1292
1da177e4
LT
1293 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1294 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
0b1b213f 1295
1da177e4 1296 args->wasfromfl = 0;
9f9baab3
CH
1297 trace_xfs_alloc_exact_done(args);
1298 return 0;
1299
1300not_found:
1301 /* Didn't find it, return null. */
1302 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1303 args->agbno = NULLAGBLOCK;
1304 trace_xfs_alloc_exact_notfound(args);
1da177e4
LT
1305 return 0;
1306
1307error0:
1308 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
0b1b213f 1309 trace_xfs_alloc_exact_error(args);
1da177e4
LT
1310 return error;
1311}
1312
489a150f 1313/*
78d7aabd
BF
1314 * Search a given number of btree records in a given direction. Check each
1315 * record against the good extent we've already found.
489a150f
CH
1316 */
1317STATIC int
78d7aabd 1318xfs_alloc_walk_iter(
fec0afda
BF
1319 struct xfs_alloc_arg *args,
1320 struct xfs_alloc_cur *acur,
1321 struct xfs_btree_cur *cur,
78d7aabd
BF
1322 bool increment,
1323 bool find_one, /* quit on first candidate */
1324 int count, /* rec count (-1 for infinite) */
1325 int *stat)
489a150f 1326{
489a150f
CH
1327 int error;
1328 int i;
489a150f 1329
78d7aabd
BF
1330 *stat = 0;
1331
489a150f 1332 /*
fec0afda
BF
1333 * Search so long as the cursor is active or we find a better extent.
1334 * The cursor is deactivated if it extends beyond the range of the
1335 * current allocation candidate.
489a150f 1336 */
78d7aabd 1337 while (xfs_alloc_cur_active(cur) && count) {
fec0afda 1338 error = xfs_alloc_cur_check(args, acur, cur, &i);
489a150f 1339 if (error)
fec0afda 1340 return error;
78d7aabd
BF
1341 if (i == 1) {
1342 *stat = 1;
1343 if (find_one)
1344 break;
1345 }
fec0afda
BF
1346 if (!xfs_alloc_cur_active(cur))
1347 break;
489a150f 1348
fec0afda
BF
1349 if (increment)
1350 error = xfs_btree_increment(cur, 0, &i);
489a150f 1351 else
fec0afda 1352 error = xfs_btree_decrement(cur, 0, &i);
489a150f 1353 if (error)
fec0afda
BF
1354 return error;
1355 if (i == 0)
1356 cur->bc_private.a.priv.abt.active = false;
78d7aabd
BF
1357
1358 if (count > 0)
1359 count--;
fec0afda 1360 }
489a150f 1361
489a150f 1362 return 0;
489a150f
CH
1363}
1364
0e26d5ca 1365/*
dc8e69bd
BF
1366 * Search the by-bno and by-size btrees in parallel in search of an extent with
1367 * ideal locality based on the NEAR mode ->agbno locality hint.
0e26d5ca
BF
1368 */
1369STATIC int
dc8e69bd 1370xfs_alloc_ag_vextent_locality(
0e26d5ca
BF
1371 struct xfs_alloc_arg *args,
1372 struct xfs_alloc_cur *acur,
1373 int *stat)
1374{
1375 struct xfs_btree_cur *fbcur = NULL;
1376 int error;
1377 int i;
1378 bool fbinc;
1379
1380 ASSERT(acur->len == 0);
1381 ASSERT(args->type == XFS_ALLOCTYPE_NEAR_BNO);
1382
1383 *stat = 0;
1384
dc8e69bd
BF
1385 error = xfs_alloc_lookup_ge(acur->cnt, args->agbno, acur->cur_len, &i);
1386 if (error)
1387 return error;
0e26d5ca
BF
1388 error = xfs_alloc_lookup_le(acur->bnolt, args->agbno, 0, &i);
1389 if (error)
1390 return error;
1391 error = xfs_alloc_lookup_ge(acur->bnogt, args->agbno, 0, &i);
1392 if (error)
1393 return error;
1394
1395 /*
dc8e69bd
BF
1396 * Search the bnobt and cntbt in parallel. Search the bnobt left and
1397 * right and lookup the closest extent to the locality hint for each
1398 * extent size key in the cntbt. The entire search terminates
1399 * immediately on a bnobt hit because that means we've found best case
1400 * locality. Otherwise the search continues until the cntbt cursor runs
1401 * off the end of the tree. If no allocation candidate is found at this
1402 * point, give up on locality, walk backwards from the end of the cntbt
1403 * and take the first available extent.
1404 *
1405 * The parallel tree searches balance each other out to provide fairly
1406 * consistent performance for various situations. The bnobt search can
1407 * have pathological behavior in the worst case scenario of larger
1408 * allocation requests and fragmented free space. On the other hand, the
1409 * bnobt is able to satisfy most smaller allocation requests much more
1410 * quickly than the cntbt. The cntbt search can sift through fragmented
1411 * free space and sets of free extents for larger allocation requests
1412 * more quickly than the bnobt. Since the locality hint is just a hint
1413 * and we don't want to scan the entire bnobt for perfect locality, the
1414 * cntbt search essentially bounds the bnobt search such that we can
1415 * find good enough locality at reasonable performance in most cases.
0e26d5ca
BF
1416 */
1417 while (xfs_alloc_cur_active(acur->bnolt) ||
dc8e69bd
BF
1418 xfs_alloc_cur_active(acur->bnogt) ||
1419 xfs_alloc_cur_active(acur->cnt)) {
1420
1421 trace_xfs_alloc_cur_lookup(args);
1422
1423 /*
1424 * Search the bnobt left and right. In the case of a hit, finish
1425 * the search in the opposite direction and we're done.
1426 */
0e26d5ca
BF
1427 error = xfs_alloc_walk_iter(args, acur, acur->bnolt, false,
1428 true, 1, &i);
1429 if (error)
1430 return error;
1431 if (i == 1) {
1432 trace_xfs_alloc_cur_left(args);
1433 fbcur = acur->bnogt;
1434 fbinc = true;
1435 break;
1436 }
0e26d5ca
BF
1437 error = xfs_alloc_walk_iter(args, acur, acur->bnogt, true, true,
1438 1, &i);
1439 if (error)
1440 return error;
1441 if (i == 1) {
1442 trace_xfs_alloc_cur_right(args);
1443 fbcur = acur->bnolt;
1444 fbinc = false;
1445 break;
1446 }
dc8e69bd
BF
1447
1448 /*
1449 * Check the extent with best locality based on the current
1450 * extent size search key and keep track of the best candidate.
1451 */
1452 error = xfs_alloc_cntbt_iter(args, acur);
1453 if (error)
1454 return error;
1455 if (!xfs_alloc_cur_active(acur->cnt)) {
1456 trace_xfs_alloc_cur_lookup_done(args);
1457 break;
1458 }
1459 }
1460
1461 /*
1462 * If we failed to find anything due to busy extents, return empty
1463 * handed so the caller can flush and retry. If no busy extents were
1464 * found, walk backwards from the end of the cntbt as a last resort.
1465 */
1466 if (!xfs_alloc_cur_active(acur->cnt) && !acur->len && !acur->busy) {
1467 error = xfs_btree_decrement(acur->cnt, 0, &i);
1468 if (error)
1469 return error;
1470 if (i) {
1471 acur->cnt->bc_private.a.priv.abt.active = true;
1472 fbcur = acur->cnt;
1473 fbinc = false;
1474 }
0e26d5ca
BF
1475 }
1476
dc8e69bd
BF
1477 /*
1478 * Search in the opposite direction for a better entry in the case of
1479 * a bnobt hit or walk backwards from the end of the cntbt.
1480 */
0e26d5ca
BF
1481 if (fbcur) {
1482 error = xfs_alloc_walk_iter(args, acur, fbcur, fbinc, true, -1,
1483 &i);
1484 if (error)
1485 return error;
1486 }
1487
1488 if (acur->len)
1489 *stat = 1;
1490
1491 return 0;
1492}
1493
5113f8ec
DW
1494/* Check the last block of the cnt btree for allocations. */
1495static int
1496xfs_alloc_ag_vextent_lastblock(
1497 struct xfs_alloc_arg *args,
1498 struct xfs_alloc_cur *acur,
1499 xfs_agblock_t *bno,
1500 xfs_extlen_t *len,
1501 bool *allocated)
1502{
1503 int error;
1504 int i;
1505
1506#ifdef DEBUG
1507 /* Randomly don't execute the first algorithm. */
1508 if (prandom_u32() & 1)
1509 return 0;
1510#endif
1511
1512 /*
1513 * Start from the entry that lookup found, sequence through all larger
1514 * free blocks. If we're actually pointing at a record smaller than
1515 * maxlen, go to the start of this block, and skip all those smaller
1516 * than minlen.
1517 */
1518 if (len || args->alignment > 1) {
1519 acur->cnt->bc_ptrs[0] = 1;
1520 do {
1521 error = xfs_alloc_get_rec(acur->cnt, bno, len, &i);
1522 if (error)
1523 return error;
f9e03706
DW
1524 if (XFS_IS_CORRUPT(args->mp, i != 1))
1525 return -EFSCORRUPTED;
5113f8ec
DW
1526 if (*len >= args->minlen)
1527 break;
1528 error = xfs_btree_increment(acur->cnt, 0, &i);
1529 if (error)
1530 return error;
1531 } while (i);
1532 ASSERT(*len >= args->minlen);
1533 if (!i)
1534 return 0;
1535 }
1536
1537 error = xfs_alloc_walk_iter(args, acur, acur->cnt, true, false, -1, &i);
1538 if (error)
1539 return error;
1540
1541 /*
1542 * It didn't work. We COULD be in a case where there's a good record
1543 * somewhere, so try again.
1544 */
1545 if (acur->len == 0)
1546 return 0;
1547
1548 trace_xfs_alloc_near_first(args);
1549 *allocated = true;
1550 return 0;
1551}
1552
1da177e4
LT
1553/*
1554 * Allocate a variable extent near bno in the allocation group agno.
1555 * Extent's length (returned in len) will be between minlen and maxlen,
1556 * and of the form k * prod + mod unless there's nothing that large.
1557 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1558 */
f5e7dbea 1559STATIC int
1da177e4 1560xfs_alloc_ag_vextent_near(
f5e7dbea 1561 struct xfs_alloc_arg *args)
1da177e4 1562{
f5e7dbea 1563 struct xfs_alloc_cur acur = {};
fec0afda
BF
1564 int error; /* error code */
1565 int i; /* result code, temporary */
fec0afda
BF
1566 xfs_agblock_t bno;
1567 xfs_extlen_t len;
e26f0501 1568
cf085a1b 1569 /* handle uninitialized agbno range so caller doesn't have to */
bfe46d4e
BF
1570 if (!args->min_agbno && !args->max_agbno)
1571 args->max_agbno = args->mp->m_sb.sb_agblocks - 1;
1572 ASSERT(args->min_agbno <= args->max_agbno);
1573
1574 /* clamp agbno to the range if it's outside */
1575 if (args->agbno < args->min_agbno)
1576 args->agbno = args->min_agbno;
1577 if (args->agbno > args->max_agbno)
1578 args->agbno = args->max_agbno;
1579
e26f0501 1580restart:
fec0afda 1581 len = 0;
e26f0501 1582
1da177e4 1583 /*
f5e7dbea
BF
1584 * Set up cursors and see if there are any free extents as big as
1585 * maxlen. If not, pick the last entry in the tree unless the tree is
1586 * empty.
1da177e4 1587 */
f5e7dbea
BF
1588 error = xfs_alloc_cur_setup(args, &acur);
1589 if (error == -ENOSPC) {
fec0afda
BF
1590 error = xfs_alloc_ag_vextent_small(args, acur.cnt, &bno,
1591 &len, &i);
f5e7dbea
BF
1592 if (error)
1593 goto out;
fec0afda 1594 if (i == 0 || len == 0) {
e26f0501 1595 trace_xfs_alloc_near_noentry(args);
f5e7dbea 1596 goto out;
1da177e4
LT
1597 }
1598 ASSERT(i == 1);
f5e7dbea
BF
1599 } else if (error) {
1600 goto out;
1da177e4 1601 }
e26f0501 1602
1da177e4
LT
1603 /*
1604 * First algorithm.
1605 * If the requested extent is large wrt the freespaces available
1606 * in this a.g., then the cursor will be pointing to a btree entry
1607 * near the right edge of the tree. If it's in the last btree leaf
1608 * block, then we just examine all the entries in that block
1609 * that are big enough, and pick the best one.
1da177e4 1610 */
5113f8ec
DW
1611 if (xfs_btree_islastblock(acur.cnt, 0)) {
1612 bool allocated = false;
78d7aabd 1613
5113f8ec
DW
1614 error = xfs_alloc_ag_vextent_lastblock(args, &acur, &bno, &len,
1615 &allocated);
78d7aabd
BF
1616 if (error)
1617 goto out;
5113f8ec
DW
1618 if (allocated)
1619 goto alloc_finish;
1da177e4 1620 }
f5e7dbea 1621
1da177e4 1622 /*
dc8e69bd
BF
1623 * Second algorithm. Combined cntbt and bnobt search to find ideal
1624 * locality.
1da177e4 1625 */
dc8e69bd 1626 error = xfs_alloc_ag_vextent_locality(args, &acur, &i);
f5e7dbea
BF
1627 if (error)
1628 goto out;
1629
1da177e4
LT
1630 /*
1631 * If we couldn't get anything, give up.
1632 */
fec0afda 1633 if (!acur.len) {
d6d3aff2 1634 if (acur.busy) {
e26f0501 1635 trace_xfs_alloc_near_busy(args);
d6d3aff2
BF
1636 xfs_extent_busy_flush(args->mp, args->pag,
1637 acur.busy_gen);
e26f0501
CH
1638 goto restart;
1639 }
0b1b213f 1640 trace_xfs_alloc_size_neither(args);
1da177e4 1641 args->agbno = NULLAGBLOCK;
f5e7dbea 1642 goto out;
1da177e4 1643 }
489a150f 1644
5113f8ec 1645alloc_finish:
d2968825
BF
1646 /* fix up btrees on a successful allocation */
1647 error = xfs_alloc_cur_finish(args, &acur);
0b1b213f 1648
f5e7dbea
BF
1649out:
1650 xfs_alloc_cur_close(&acur, error);
1da177e4
LT
1651 return error;
1652}
1653
1654/*
1655 * Allocate a variable extent anywhere in the allocation group agno.
1656 * Extent's length (returned in len) will be between minlen and maxlen,
1657 * and of the form k * prod + mod unless there's nothing that large.
1658 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1659 */
1660STATIC int /* error */
1661xfs_alloc_ag_vextent_size(
1662 xfs_alloc_arg_t *args) /* allocation argument structure */
1663{
1664 xfs_btree_cur_t *bno_cur; /* cursor for bno btree */
1665 xfs_btree_cur_t *cnt_cur; /* cursor for cnt btree */
1666 int error; /* error result */
1667 xfs_agblock_t fbno; /* start of found freespace */
1668 xfs_extlen_t flen; /* length of found freespace */
1da177e4
LT
1669 int i; /* temp status variable */
1670 xfs_agblock_t rbno; /* returned block number */
1671 xfs_extlen_t rlen; /* length of returned extent */
ebf55872
CH
1672 bool busy;
1673 unsigned busy_gen;
1da177e4 1674
e26f0501 1675restart:
1da177e4
LT
1676 /*
1677 * Allocate and initialize a cursor for the by-size btree.
1678 */
561f7d17
CH
1679 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1680 args->agno, XFS_BTNUM_CNT);
1da177e4 1681 bno_cur = NULL;
ebf55872 1682 busy = false;
e26f0501 1683
1da177e4
LT
1684 /*
1685 * Look for an entry >= maxlen+alignment-1 blocks.
1686 */
1687 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
1688 args->maxlen + args->alignment - 1, &i)))
1689 goto error0;
e26f0501 1690
1da177e4 1691 /*
ebf55872
CH
1692 * If none then we have to settle for a smaller extent. In the case that
1693 * there are no large extents, this will return the last entry in the
1694 * tree unless the tree is empty. In the case that there are only busy
1695 * large extents, this will return the largest small extent unless there
e26f0501 1696 * are no smaller extents available.
1da177e4 1697 */
ebf55872 1698 if (!i) {
e26f0501
CH
1699 error = xfs_alloc_ag_vextent_small(args, cnt_cur,
1700 &fbno, &flen, &i);
1701 if (error)
1da177e4
LT
1702 goto error0;
1703 if (i == 0 || flen == 0) {
1704 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
0b1b213f 1705 trace_xfs_alloc_size_noentry(args);
1da177e4
LT
1706 return 0;
1707 }
1708 ASSERT(i == 1);
ebf55872
CH
1709 busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno,
1710 &rlen, &busy_gen);
e26f0501
CH
1711 } else {
1712 /*
1713 * Search for a non-busy extent that is large enough.
e26f0501
CH
1714 */
1715 for (;;) {
1716 error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
1717 if (error)
1718 goto error0;
f9e03706
DW
1719 if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1720 error = -EFSCORRUPTED;
1721 goto error0;
1722 }
e26f0501 1723
ebf55872
CH
1724 busy = xfs_alloc_compute_aligned(args, fbno, flen,
1725 &rbno, &rlen, &busy_gen);
e26f0501
CH
1726
1727 if (rlen >= args->maxlen)
1728 break;
1729
1730 error = xfs_btree_increment(cnt_cur, 0, &i);
1731 if (error)
1732 goto error0;
1733 if (i == 0) {
1734 /*
1735 * Our only valid extents must have been busy.
1736 * Make it unbusy by forcing the log out and
ebf55872 1737 * retrying.
e26f0501
CH
1738 */
1739 xfs_btree_del_cursor(cnt_cur,
1740 XFS_BTREE_NOERROR);
1741 trace_xfs_alloc_size_busy(args);
ebf55872
CH
1742 xfs_extent_busy_flush(args->mp,
1743 args->pag, busy_gen);
e26f0501
CH
1744 goto restart;
1745 }
1746 }
1da177e4 1747 }
e26f0501 1748
1da177e4
LT
1749 /*
1750 * In the first case above, we got the last entry in the
1751 * by-size btree. Now we check to see if the space hits maxlen
1752 * once aligned; if not, we search left for something better.
1753 * This can't happen in the second case above.
1754 */
1da177e4 1755 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
f9e03706
DW
1756 if (XFS_IS_CORRUPT(args->mp,
1757 rlen != 0 &&
1758 (rlen > flen ||
1759 rbno + rlen > fbno + flen))) {
1760 error = -EFSCORRUPTED;
1761 goto error0;
1762 }
1da177e4
LT
1763 if (rlen < args->maxlen) {
1764 xfs_agblock_t bestfbno;
1765 xfs_extlen_t bestflen;
1766 xfs_agblock_t bestrbno;
1767 xfs_extlen_t bestrlen;
1768
1769 bestrlen = rlen;
1770 bestrbno = rbno;
1771 bestflen = flen;
1772 bestfbno = fbno;
1773 for (;;) {
8df4da4a 1774 if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
1da177e4
LT
1775 goto error0;
1776 if (i == 0)
1777 break;
1778 if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
1779 &i)))
1780 goto error0;
f9e03706
DW
1781 if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1782 error = -EFSCORRUPTED;
1783 goto error0;
1784 }
1da177e4
LT
1785 if (flen < bestrlen)
1786 break;
ebf55872
CH
1787 busy = xfs_alloc_compute_aligned(args, fbno, flen,
1788 &rbno, &rlen, &busy_gen);
1da177e4 1789 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
f9e03706
DW
1790 if (XFS_IS_CORRUPT(args->mp,
1791 rlen != 0 &&
1792 (rlen > flen ||
1793 rbno + rlen > fbno + flen))) {
1794 error = -EFSCORRUPTED;
1795 goto error0;
1796 }
1da177e4
LT
1797 if (rlen > bestrlen) {
1798 bestrlen = rlen;
1799 bestrbno = rbno;
1800 bestflen = flen;
1801 bestfbno = fbno;
1802 if (rlen == args->maxlen)
1803 break;
1804 }
1805 }
1806 if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
1807 &i)))
1808 goto error0;
f9e03706
DW
1809 if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1810 error = -EFSCORRUPTED;
1811 goto error0;
1812 }
1da177e4
LT
1813 rlen = bestrlen;
1814 rbno = bestrbno;
1815 flen = bestflen;
1816 fbno = bestfbno;
1817 }
1818 args->wasfromfl = 0;
1819 /*
1820 * Fix up the length.
1821 */
1822 args->len = rlen;
e26f0501 1823 if (rlen < args->minlen) {
ebf55872 1824 if (busy) {
e26f0501
CH
1825 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1826 trace_xfs_alloc_size_busy(args);
ebf55872 1827 xfs_extent_busy_flush(args->mp, args->pag, busy_gen);
e26f0501
CH
1828 goto restart;
1829 }
1830 goto out_nominleft;
1da177e4 1831 }
e26f0501
CH
1832 xfs_alloc_fix_len(args);
1833
1da177e4 1834 rlen = args->len;
f9e03706
DW
1835 if (XFS_IS_CORRUPT(args->mp, rlen > flen)) {
1836 error = -EFSCORRUPTED;
1837 goto error0;
1838 }
1da177e4
LT
1839 /*
1840 * Allocate and initialize a cursor for the by-block tree.
1841 */
561f7d17
CH
1842 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1843 args->agno, XFS_BTNUM_BNO);
1da177e4
LT
1844 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
1845 rbno, rlen, XFSA_FIXUP_CNT_OK)))
1846 goto error0;
1847 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1848 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1849 cnt_cur = bno_cur = NULL;
1850 args->len = rlen;
1851 args->agbno = rbno;
f9e03706
DW
1852 if (XFS_IS_CORRUPT(args->mp,
1853 args->agbno + args->len >
1854 be32_to_cpu(
1855 XFS_BUF_TO_AGF(args->agbp)->agf_length))) {
1856 error = -EFSCORRUPTED;
1857 goto error0;
1858 }
0b1b213f 1859 trace_xfs_alloc_size_done(args);
1da177e4
LT
1860 return 0;
1861
1862error0:
0b1b213f 1863 trace_xfs_alloc_size_error(args);
1da177e4
LT
1864 if (cnt_cur)
1865 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1866 if (bno_cur)
1867 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1868 return error;
e26f0501
CH
1869
1870out_nominleft:
1871 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1872 trace_xfs_alloc_size_nominleft(args);
1873 args->agbno = NULLAGBLOCK;
1874 return 0;
1da177e4
LT
1875}
1876
1da177e4
LT
1877/*
1878 * Free the extent starting at agno/bno for length.
1879 */
340785cc 1880STATIC int
1da177e4 1881xfs_free_ag_extent(
66e3237e
DW
1882 struct xfs_trans *tp,
1883 struct xfs_buf *agbp,
1884 xfs_agnumber_t agno,
1885 xfs_agblock_t bno,
1886 xfs_extlen_t len,
1887 const struct xfs_owner_info *oinfo,
1888 enum xfs_ag_resv_type type)
1da177e4 1889{
66e3237e
DW
1890 struct xfs_mount *mp;
1891 struct xfs_perag *pag;
1892 struct xfs_btree_cur *bno_cur;
1893 struct xfs_btree_cur *cnt_cur;
1894 xfs_agblock_t gtbno; /* start of right neighbor */
1895 xfs_extlen_t gtlen; /* length of right neighbor */
1896 xfs_agblock_t ltbno; /* start of left neighbor */
1897 xfs_extlen_t ltlen; /* length of left neighbor */
1898 xfs_agblock_t nbno; /* new starting block of freesp */
1899 xfs_extlen_t nlen; /* new length of freespace */
1900 int haveleft; /* have a left neighbor */
1901 int haveright; /* have a right neighbor */
1902 int i;
1903 int error;
1da177e4 1904
673930c3 1905 bno_cur = cnt_cur = NULL;
1da177e4 1906 mp = tp->t_mountp;
673930c3 1907
33df3a9c 1908 if (!xfs_rmap_should_skip_owner_update(oinfo)) {
673930c3
DW
1909 error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo);
1910 if (error)
1911 goto error0;
1912 }
1913
1da177e4
LT
1914 /*
1915 * Allocate and initialize a cursor for the by-block btree.
1916 */
561f7d17 1917 bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO);
1da177e4
LT
1918 /*
1919 * Look for a neighboring block on the left (lower block numbers)
1920 * that is contiguous with this space.
1921 */
1922 if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
1923 goto error0;
1924 if (haveleft) {
1925 /*
1926 * There is a block to our left.
1927 */
1928 if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
1929 goto error0;
f9e03706
DW
1930 if (XFS_IS_CORRUPT(mp, i != 1)) {
1931 error = -EFSCORRUPTED;
1932 goto error0;
1933 }
1da177e4
LT
1934 /*
1935 * It's not contiguous, though.
1936 */
1937 if (ltbno + ltlen < bno)
1938 haveleft = 0;
1939 else {
1940 /*
1941 * If this failure happens the request to free this
1942 * space was invalid, it's (partly) already free.
1943 * Very bad.
1944 */
f9e03706
DW
1945 if (XFS_IS_CORRUPT(mp, ltbno + ltlen > bno)) {
1946 error = -EFSCORRUPTED;
1947 goto error0;
1948 }
1da177e4
LT
1949 }
1950 }
1951 /*
1952 * Look for a neighboring block on the right (higher block numbers)
1953 * that is contiguous with this space.
1954 */
637aa50f 1955 if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
1da177e4
LT
1956 goto error0;
1957 if (haveright) {
1958 /*
1959 * There is a block to our right.
1960 */
1961 if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
1962 goto error0;
f9e03706
DW
1963 if (XFS_IS_CORRUPT(mp, i != 1)) {
1964 error = -EFSCORRUPTED;
1965 goto error0;
1966 }
1da177e4
LT
1967 /*
1968 * It's not contiguous, though.
1969 */
1970 if (bno + len < gtbno)
1971 haveright = 0;
1972 else {
1973 /*
1974 * If this failure happens the request to free this
1975 * space was invalid, it's (partly) already free.
1976 * Very bad.
1977 */
f9e03706
DW
1978 if (XFS_IS_CORRUPT(mp, bno + len > gtbno)) {
1979 error = -EFSCORRUPTED;
1980 goto error0;
1981 }
1da177e4
LT
1982 }
1983 }
1984 /*
1985 * Now allocate and initialize a cursor for the by-size tree.
1986 */
561f7d17 1987 cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT);
1da177e4
LT
1988 /*
1989 * Have both left and right contiguous neighbors.
1990 * Merge all three into a single free block.
1991 */
1992 if (haveleft && haveright) {
1993 /*
1994 * Delete the old by-size entry on the left.
1995 */
1996 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1997 goto error0;
f9e03706
DW
1998 if (XFS_IS_CORRUPT(mp, i != 1)) {
1999 error = -EFSCORRUPTED;
2000 goto error0;
2001 }
91cca5df 2002 if ((error = xfs_btree_delete(cnt_cur, &i)))
1da177e4 2003 goto error0;
f9e03706
DW
2004 if (XFS_IS_CORRUPT(mp, i != 1)) {
2005 error = -EFSCORRUPTED;
2006 goto error0;
2007 }
1da177e4
LT
2008 /*
2009 * Delete the old by-size entry on the right.
2010 */
2011 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
2012 goto error0;
f9e03706
DW
2013 if (XFS_IS_CORRUPT(mp, i != 1)) {
2014 error = -EFSCORRUPTED;
2015 goto error0;
2016 }
91cca5df 2017 if ((error = xfs_btree_delete(cnt_cur, &i)))
1da177e4 2018 goto error0;
f9e03706
DW
2019 if (XFS_IS_CORRUPT(mp, i != 1)) {
2020 error = -EFSCORRUPTED;
2021 goto error0;
2022 }
1da177e4
LT
2023 /*
2024 * Delete the old by-block entry for the right block.
2025 */
91cca5df 2026 if ((error = xfs_btree_delete(bno_cur, &i)))
1da177e4 2027 goto error0;
f9e03706
DW
2028 if (XFS_IS_CORRUPT(mp, i != 1)) {
2029 error = -EFSCORRUPTED;
2030 goto error0;
2031 }
1da177e4
LT
2032 /*
2033 * Move the by-block cursor back to the left neighbor.
2034 */
8df4da4a 2035 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1da177e4 2036 goto error0;
f9e03706
DW
2037 if (XFS_IS_CORRUPT(mp, i != 1)) {
2038 error = -EFSCORRUPTED;
2039 goto error0;
2040 }
1da177e4
LT
2041#ifdef DEBUG
2042 /*
2043 * Check that this is the right record: delete didn't
2044 * mangle the cursor.
2045 */
2046 {
2047 xfs_agblock_t xxbno;
2048 xfs_extlen_t xxlen;
2049
2050 if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
2051 &i)))
2052 goto error0;
f9e03706
DW
2053 if (XFS_IS_CORRUPT(mp,
2054 i != 1 ||
2055 xxbno != ltbno ||
2056 xxlen != ltlen)) {
2057 error = -EFSCORRUPTED;
2058 goto error0;
2059 }
1da177e4
LT
2060 }
2061#endif
2062 /*
2063 * Update remaining by-block entry to the new, joined block.
2064 */
2065 nbno = ltbno;
2066 nlen = len + ltlen + gtlen;
2067 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2068 goto error0;
2069 }
2070 /*
2071 * Have only a left contiguous neighbor.
2072 * Merge it together with the new freespace.
2073 */
2074 else if (haveleft) {
2075 /*
2076 * Delete the old by-size entry on the left.
2077 */
2078 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
2079 goto error0;
f9e03706
DW
2080 if (XFS_IS_CORRUPT(mp, i != 1)) {
2081 error = -EFSCORRUPTED;
2082 goto error0;
2083 }
91cca5df 2084 if ((error = xfs_btree_delete(cnt_cur, &i)))
1da177e4 2085 goto error0;
f9e03706
DW
2086 if (XFS_IS_CORRUPT(mp, i != 1)) {
2087 error = -EFSCORRUPTED;
2088 goto error0;
2089 }
1da177e4
LT
2090 /*
2091 * Back up the by-block cursor to the left neighbor, and
2092 * update its length.
2093 */
8df4da4a 2094 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1da177e4 2095 goto error0;
f9e03706
DW
2096 if (XFS_IS_CORRUPT(mp, i != 1)) {
2097 error = -EFSCORRUPTED;
2098 goto error0;
2099 }
1da177e4
LT
2100 nbno = ltbno;
2101 nlen = len + ltlen;
2102 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2103 goto error0;
2104 }
2105 /*
2106 * Have only a right contiguous neighbor.
2107 * Merge it together with the new freespace.
2108 */
2109 else if (haveright) {
2110 /*
2111 * Delete the old by-size entry on the right.
2112 */
2113 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
2114 goto error0;
f9e03706
DW
2115 if (XFS_IS_CORRUPT(mp, i != 1)) {
2116 error = -EFSCORRUPTED;
2117 goto error0;
2118 }
91cca5df 2119 if ((error = xfs_btree_delete(cnt_cur, &i)))
1da177e4 2120 goto error0;
f9e03706
DW
2121 if (XFS_IS_CORRUPT(mp, i != 1)) {
2122 error = -EFSCORRUPTED;
2123 goto error0;
2124 }
1da177e4
LT
2125 /*
2126 * Update the starting block and length of the right
2127 * neighbor in the by-block tree.
2128 */
2129 nbno = bno;
2130 nlen = len + gtlen;
2131 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2132 goto error0;
2133 }
2134 /*
2135 * No contiguous neighbors.
2136 * Insert the new freespace into the by-block tree.
2137 */
2138 else {
2139 nbno = bno;
2140 nlen = len;
4b22a571 2141 if ((error = xfs_btree_insert(bno_cur, &i)))
1da177e4 2142 goto error0;
f9e03706
DW
2143 if (XFS_IS_CORRUPT(mp, i != 1)) {
2144 error = -EFSCORRUPTED;
2145 goto error0;
2146 }
1da177e4
LT
2147 }
2148 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
2149 bno_cur = NULL;
2150 /*
2151 * In all cases we need to insert the new freespace in the by-size tree.
2152 */
2153 if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
2154 goto error0;
f9e03706
DW
2155 if (XFS_IS_CORRUPT(mp, i != 0)) {
2156 error = -EFSCORRUPTED;
2157 goto error0;
2158 }
4b22a571 2159 if ((error = xfs_btree_insert(cnt_cur, &i)))
1da177e4 2160 goto error0;
f9e03706
DW
2161 if (XFS_IS_CORRUPT(mp, i != 1)) {
2162 error = -EFSCORRUPTED;
2163 goto error0;
2164 }
1da177e4
LT
2165 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
2166 cnt_cur = NULL;
ecb6928f 2167
1da177e4
LT
2168 /*
2169 * Update the freespace totals in the ag and superblock.
2170 */
ecb6928f
CH
2171 pag = xfs_perag_get(mp, agno);
2172 error = xfs_alloc_update_counters(tp, pag, agbp, len);
3fd129b6 2173 xfs_ag_resv_free_extent(pag, type, tp, len);
ecb6928f
CH
2174 xfs_perag_put(pag);
2175 if (error)
2176 goto error0;
2177
ff6d6af2
BD
2178 XFS_STATS_INC(mp, xs_freex);
2179 XFS_STATS_ADD(mp, xs_freeb, len);
0b1b213f 2180
21592863 2181 trace_xfs_free_extent(mp, agno, bno, len, type, haveleft, haveright);
1da177e4 2182
1da177e4
LT
2183 return 0;
2184
2185 error0:
21592863 2186 trace_xfs_free_extent(mp, agno, bno, len, type, -1, -1);
1da177e4
LT
2187 if (bno_cur)
2188 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
2189 if (cnt_cur)
2190 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
2191 return error;
2192}
2193
2194/*
2195 * Visible (exported) allocation/free functions.
2196 * Some of these are used just by xfs_alloc_btree.c and this file.
2197 */
2198
2199/*
2200 * Compute and fill in value of m_ag_maxlevels.
2201 */
2202void
2203xfs_alloc_compute_maxlevels(
2204 xfs_mount_t *mp) /* file system mount structure */
2205{
a1f69417 2206 mp->m_ag_maxlevels = xfs_btree_compute_maxlevels(mp->m_alloc_mnr,
19b54ee6 2207 (mp->m_sb.sb_agblocks + 1) / 2);
1da177e4
LT
2208}
2209
6cc87645 2210/*
3fd129b6
DW
2211 * Find the length of the longest extent in an AG. The 'need' parameter
2212 * specifies how much space we're going to need for the AGFL and the
2213 * 'reserved' parameter tells us how many blocks in this AG are reserved for
2214 * other callers.
6cc87645
DC
2215 */
2216xfs_extlen_t
2217xfs_alloc_longest_free_extent(
50adbcb4 2218 struct xfs_perag *pag,
3fd129b6
DW
2219 xfs_extlen_t need,
2220 xfs_extlen_t reserved)
6cc87645 2221{
50adbcb4 2222 xfs_extlen_t delta = 0;
6cc87645 2223
3fd129b6
DW
2224 /*
2225 * If the AGFL needs a recharge, we'll have to subtract that from the
2226 * longest extent.
2227 */
6cc87645
DC
2228 if (need > pag->pagf_flcount)
2229 delta = need - pag->pagf_flcount;
2230
3fd129b6
DW
2231 /*
2232 * If we cannot maintain others' reservations with space from the
2233 * not-longest freesp extents, we'll have to subtract /that/ from
2234 * the longest extent too.
2235 */
2236 if (pag->pagf_freeblks - pag->pagf_longest < reserved)
2237 delta += reserved - (pag->pagf_freeblks - pag->pagf_longest);
2238
2239 /*
2240 * If the longest extent is long enough to satisfy all the
2241 * reservations and AGFL rules in place, we can return this extent.
2242 */
6cc87645 2243 if (pag->pagf_longest > delta)
1c743574
DC
2244 return min_t(xfs_extlen_t, pag->pag_mount->m_ag_max_usable,
2245 pag->pagf_longest - delta);
3fd129b6
DW
2246
2247 /* Otherwise, let the caller try for 1 block if there's space. */
6cc87645
DC
2248 return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
2249}
2250
496817b4
DC
2251unsigned int
2252xfs_alloc_min_freelist(
2253 struct xfs_mount *mp,
2254 struct xfs_perag *pag)
2255{
2256 unsigned int min_free;
2257
2258 /* space needed by-bno freespace btree */
2259 min_free = min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_BNOi] + 1,
2260 mp->m_ag_maxlevels);
2261 /* space needed by-size freespace btree */
2262 min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1,
2263 mp->m_ag_maxlevels);
52548852
DW
2264 /* space needed reverse mapping used space btree */
2265 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
2266 min_free += min_t(unsigned int,
2267 pag->pagf_levels[XFS_BTNUM_RMAPi] + 1,
2268 mp->m_rmap_maxlevels);
496817b4
DC
2269
2270 return min_free;
2271}
2272
72d55285
DC
2273/*
2274 * Check if the operation we are fixing up the freelist for should go ahead or
2275 * not. If we are freeing blocks, we always allow it, otherwise the allocation
2276 * is dependent on whether the size and shape of free space available will
2277 * permit the requested allocation to take place.
2278 */
2279static bool
2280xfs_alloc_space_available(
2281 struct xfs_alloc_arg *args,
2282 xfs_extlen_t min_free,
2283 int flags)
2284{
2285 struct xfs_perag *pag = args->pag;
12ef8301 2286 xfs_extlen_t alloc_len, longest;
3fd129b6 2287 xfs_extlen_t reservation; /* blocks that are still reserved */
72d55285 2288 int available;
1ca89fbc 2289 xfs_extlen_t agflcount;
72d55285
DC
2290
2291 if (flags & XFS_ALLOC_FLAG_FREEING)
2292 return true;
2293
3fd129b6
DW
2294 reservation = xfs_ag_resv_needed(pag, args->resv);
2295
72d55285 2296 /* do we have enough contiguous free space for the allocation? */
12ef8301 2297 alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop;
a1f69417 2298 longest = xfs_alloc_longest_free_extent(pag, min_free, reservation);
12ef8301 2299 if (longest < alloc_len)
72d55285
DC
2300 return false;
2301
1ca89fbc
BF
2302 /*
2303 * Do we have enough free space remaining for the allocation? Don't
2304 * account extra agfl blocks because we are about to defer free them,
2305 * making them unavailable until the current transaction commits.
2306 */
2307 agflcount = min_t(xfs_extlen_t, pag->pagf_flcount, min_free);
2308 available = (int)(pag->pagf_freeblks + agflcount -
54fee133 2309 reservation - min_free - args->minleft);
12ef8301 2310 if (available < (int)max(args->total, alloc_len))
72d55285
DC
2311 return false;
2312
54fee133
CH
2313 /*
2314 * Clamp maxlen to the amount of free space available for the actual
2315 * extent allocation.
2316 */
2317 if (available < (int)args->maxlen && !(flags & XFS_ALLOC_FLAG_CHECK)) {
2318 args->maxlen = available;
2319 ASSERT(args->maxlen > 0);
2320 ASSERT(args->maxlen >= args->minlen);
2321 }
2322
72d55285
DC
2323 return true;
2324}
2325
4223f659
BF
2326int
2327xfs_free_agfl_block(
2328 struct xfs_trans *tp,
2329 xfs_agnumber_t agno,
2330 xfs_agblock_t agbno,
2331 struct xfs_buf *agbp,
2332 struct xfs_owner_info *oinfo)
2333{
2334 int error;
2335 struct xfs_buf *bp;
2336
2337 error = xfs_free_ag_extent(tp, agbp, agno, agbno, 1, oinfo,
2338 XFS_AG_RESV_AGFL);
2339 if (error)
2340 return error;
2341
f5b999c0 2342 bp = xfs_btree_get_bufs(tp->t_mountp, tp, agno, agbno);
a71895c5 2343 if (XFS_IS_CORRUPT(tp->t_mountp, !bp))
4223f659
BF
2344 return -EFSCORRUPTED;
2345 xfs_trans_binval(tp, bp);
2346
2347 return 0;
2348}
2349
a27ba260
BF
2350/*
2351 * Check the agfl fields of the agf for inconsistency or corruption. The purpose
2352 * is to detect an agfl header padding mismatch between current and early v5
2353 * kernels. This problem manifests as a 1-slot size difference between the
2354 * on-disk flcount and the active [first, last] range of a wrapped agfl. This
2355 * may also catch variants of agfl count corruption unrelated to padding. Either
2356 * way, we'll reset the agfl and warn the user.
2357 *
2358 * Return true if a reset is required before the agfl can be used, false
2359 * otherwise.
2360 */
2361static bool
2362xfs_agfl_needs_reset(
2363 struct xfs_mount *mp,
2364 struct xfs_agf *agf)
2365{
2366 uint32_t f = be32_to_cpu(agf->agf_flfirst);
2367 uint32_t l = be32_to_cpu(agf->agf_fllast);
2368 uint32_t c = be32_to_cpu(agf->agf_flcount);
2369 int agfl_size = xfs_agfl_size(mp);
2370 int active;
2371
2372 /* no agfl header on v4 supers */
2373 if (!xfs_sb_version_hascrc(&mp->m_sb))
2374 return false;
2375
2376 /*
2377 * The agf read verifier catches severe corruption of these fields.
2378 * Repeat some sanity checks to cover a packed -> unpacked mismatch if
2379 * the verifier allows it.
2380 */
2381 if (f >= agfl_size || l >= agfl_size)
2382 return true;
2383 if (c > agfl_size)
2384 return true;
2385
2386 /*
2387 * Check consistency between the on-disk count and the active range. An
2388 * agfl padding mismatch manifests as an inconsistent flcount.
2389 */
2390 if (c && l >= f)
2391 active = l - f + 1;
2392 else if (c)
2393 active = agfl_size - f + l + 1;
2394 else
2395 active = 0;
2396
2397 return active != c;
2398}
2399
2400/*
2401 * Reset the agfl to an empty state. Ignore/drop any existing blocks since the
2402 * agfl content cannot be trusted. Warn the user that a repair is required to
2403 * recover leaked blocks.
2404 *
2405 * The purpose of this mechanism is to handle filesystems affected by the agfl
2406 * header padding mismatch problem. A reset keeps the filesystem online with a
2407 * relatively minor free space accounting inconsistency rather than suffer the
2408 * inevitable crash from use of an invalid agfl block.
2409 */
2410static void
2411xfs_agfl_reset(
2412 struct xfs_trans *tp,
2413 struct xfs_buf *agbp,
2414 struct xfs_perag *pag)
2415{
2416 struct xfs_mount *mp = tp->t_mountp;
2417 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
2418
2419 ASSERT(pag->pagf_agflreset);
2420 trace_xfs_agfl_reset(mp, agf, 0, _RET_IP_);
2421
2422 xfs_warn(mp,
2423 "WARNING: Reset corrupted AGFL on AG %u. %d blocks leaked. "
2424 "Please unmount and run xfs_repair.",
2425 pag->pag_agno, pag->pagf_flcount);
2426
2427 agf->agf_flfirst = 0;
2428 agf->agf_fllast = cpu_to_be32(xfs_agfl_size(mp) - 1);
2429 agf->agf_flcount = 0;
2430 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLLAST |
2431 XFS_AGF_FLCOUNT);
2432
2433 pag->pagf_flcount = 0;
2434 pag->pagf_agflreset = false;
2435}
2436
f8f2835a
BF
2437/*
2438 * Defer an AGFL block free. This is effectively equivalent to
2439 * xfs_bmap_add_free() with some special handling particular to AGFL blocks.
2440 *
2441 * Deferring AGFL frees helps prevent log reservation overruns due to too many
2442 * allocation operations in a transaction. AGFL frees are prone to this problem
2443 * because for one they are always freed one at a time. Further, an immediate
2444 * AGFL block free can cause a btree join and require another block free before
2445 * the real allocation can proceed. Deferring the free disconnects freeing up
2446 * the AGFL slot from freeing the block.
2447 */
2448STATIC void
2449xfs_defer_agfl_block(
0f37d178 2450 struct xfs_trans *tp,
f8f2835a
BF
2451 xfs_agnumber_t agno,
2452 xfs_fsblock_t agbno,
2453 struct xfs_owner_info *oinfo)
2454{
0f37d178 2455 struct xfs_mount *mp = tp->t_mountp;
f8f2835a
BF
2456 struct xfs_extent_free_item *new; /* new element */
2457
2458 ASSERT(xfs_bmap_free_item_zone != NULL);
2459 ASSERT(oinfo != NULL);
2460
707e0dda 2461 new = kmem_zone_alloc(xfs_bmap_free_item_zone, 0);
f8f2835a
BF
2462 new->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno);
2463 new->xefi_blockcount = 1;
2464 new->xefi_oinfo = *oinfo;
2465
2466 trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1);
2467
0f37d178 2468 xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &new->xefi_list);
f8f2835a
BF
2469}
2470
1da177e4
LT
2471/*
2472 * Decide whether to use this allocation group for this allocation.
2473 * If so, fix up the btree freelist's size.
2474 */
2e9101da 2475int /* error */
1da177e4 2476xfs_alloc_fix_freelist(
396503fc
DC
2477 struct xfs_alloc_arg *args, /* allocation argument structure */
2478 int flags) /* XFS_ALLOC_FLAG_... */
1da177e4 2479{
396503fc
DC
2480 struct xfs_mount *mp = args->mp;
2481 struct xfs_perag *pag = args->pag;
2482 struct xfs_trans *tp = args->tp;
2483 struct xfs_buf *agbp = NULL;
2484 struct xfs_buf *agflbp = NULL;
2485 struct xfs_alloc_arg targs; /* local allocation arguments */
2486 xfs_agblock_t bno; /* freelist block */
2487 xfs_extlen_t need; /* total blocks needed in freelist */
c184f855 2488 int error = 0;
396503fc 2489
362f5e74
BF
2490 /* deferred ops (AGFL block frees) require permanent transactions */
2491 ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
2492
1da177e4 2493 if (!pag->pagf_init) {
396503fc
DC
2494 error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
2495 if (error)
2496 goto out_no_agbp;
1da177e4 2497 if (!pag->pagf_init) {
0e1edbd9
NS
2498 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
2499 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
396503fc 2500 goto out_agbp_relse;
1da177e4 2501 }
396503fc 2502 }
1da177e4 2503
0e1edbd9 2504 /*
396503fc
DC
2505 * If this is a metadata preferred pag and we are user data then try
2506 * somewhere else if we are not being asked to try harder at this
2507 * point
1da177e4 2508 */
c34d570d 2509 if (pag->pagf_metadata && (args->datatype & XFS_ALLOC_USERDATA) &&
0e1edbd9
NS
2510 (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
2511 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
396503fc 2512 goto out_agbp_relse;
1da177e4
LT
2513 }
2514
496817b4 2515 need = xfs_alloc_min_freelist(mp, pag);
54fee133
CH
2516 if (!xfs_alloc_space_available(args, need, flags |
2517 XFS_ALLOC_FLAG_CHECK))
396503fc 2518 goto out_agbp_relse;
0e1edbd9 2519
1da177e4
LT
2520 /*
2521 * Get the a.g. freespace buffer.
2522 * Can fail if we're not blocking on locks, and it's held.
2523 */
396503fc
DC
2524 if (!agbp) {
2525 error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
2526 if (error)
2527 goto out_no_agbp;
2528 if (!agbp) {
0e1edbd9
NS
2529 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
2530 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
396503fc 2531 goto out_no_agbp;
0e1edbd9 2532 }
1da177e4 2533 }
50adbcb4 2534
a27ba260
BF
2535 /* reset a padding mismatched agfl before final free space check */
2536 if (pag->pagf_agflreset)
2537 xfs_agfl_reset(tp, agbp, pag);
2538
50adbcb4 2539 /* If there isn't enough total space or single-extent, reject it. */
496817b4 2540 need = xfs_alloc_min_freelist(mp, pag);
396503fc
DC
2541 if (!xfs_alloc_space_available(args, need, flags))
2542 goto out_agbp_relse;
72d55285 2543
1da177e4
LT
2544 /*
2545 * Make the freelist shorter if it's too long.
50adbcb4 2546 *
396503fc
DC
2547 * Note that from this point onwards, we will always release the agf and
2548 * agfl buffers on error. This handles the case where we error out and
2549 * the buffers are clean or may not have been joined to the transaction
2550 * and hence need to be released manually. If they have been joined to
2551 * the transaction, then xfs_trans_brelse() will handle them
2552 * appropriately based on the recursion count and dirty state of the
2553 * buffer.
2554 *
50adbcb4
DC
2555 * XXX (dgc): When we have lots of free space, does this buy us
2556 * anything other than extra overhead when we need to put more blocks
2557 * back on the free list? Maybe we should only do this when space is
2558 * getting low or the AGFL is more than half full?
04f13060
DW
2559 *
2560 * The NOSHRINK flag prevents the AGFL from being shrunk if it's too
2561 * big; the NORMAP flag prevents AGFL expand/shrink operations from
2562 * updating the rmapbt. Both flags are used in xfs_repair while we're
2563 * rebuilding the rmapbt, and neither are used by the kernel. They're
2564 * both required to ensure that rmaps are correctly recorded for the
2565 * regenerated AGFL, bnobt, and cntbt. See repair/phase5.c and
2566 * repair/rmap.c in xfsprogs for details.
1da177e4 2567 */
04f13060 2568 memset(&targs, 0, sizeof(targs));
7280feda 2569 /* struct copy below */
04f13060 2570 if (flags & XFS_ALLOC_FLAG_NORMAP)
7280feda 2571 targs.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
04f13060 2572 else
7280feda 2573 targs.oinfo = XFS_RMAP_OINFO_AG;
04f13060 2574 while (!(flags & XFS_ALLOC_FLAG_NOSHRINK) && pag->pagf_flcount > need) {
92821e2b
DC
2575 error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
2576 if (error)
396503fc 2577 goto out_agbp_relse;
4223f659 2578
c03edc9e
BF
2579 /* defer agfl frees */
2580 xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo);
1da177e4 2581 }
50adbcb4 2582
1da177e4
LT
2583 targs.tp = tp;
2584 targs.mp = mp;
2585 targs.agbp = agbp;
2586 targs.agno = args->agno;
3fd129b6 2587 targs.alignment = targs.minlen = targs.prod = 1;
1da177e4
LT
2588 targs.type = XFS_ALLOCTYPE_THIS_AG;
2589 targs.pag = pag;
50adbcb4
DC
2590 error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp);
2591 if (error)
396503fc 2592 goto out_agbp_relse;
50adbcb4
DC
2593
2594 /* Make the freelist longer if it's too short. */
2595 while (pag->pagf_flcount < need) {
1da177e4 2596 targs.agbno = 0;
50adbcb4 2597 targs.maxlen = need - pag->pagf_flcount;
0ab32086 2598 targs.resv = XFS_AG_RESV_AGFL;
50adbcb4
DC
2599
2600 /* Allocate as many blocks as possible at once. */
2601 error = xfs_alloc_ag_vextent(&targs);
396503fc
DC
2602 if (error)
2603 goto out_agflbp_relse;
2604
1da177e4
LT
2605 /*
2606 * Stop if we run out. Won't happen if callers are obeying
2607 * the restrictions correctly. Can happen for free calls
2608 * on a completely full ag.
2609 */
d210a28c 2610 if (targs.agbno == NULLAGBLOCK) {
0e1edbd9
NS
2611 if (flags & XFS_ALLOC_FLAG_FREEING)
2612 break;
396503fc 2613 goto out_agflbp_relse;
d210a28c 2614 }
1da177e4
LT
2615 /*
2616 * Put each allocated block on the list.
2617 */
2618 for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
92821e2b
DC
2619 error = xfs_alloc_put_freelist(tp, agbp,
2620 agflbp, bno, 0);
2621 if (error)
396503fc 2622 goto out_agflbp_relse;
1da177e4
LT
2623 }
2624 }
e63a3690 2625 xfs_trans_brelse(tp, agflbp);
1da177e4
LT
2626 args->agbp = agbp;
2627 return 0;
396503fc
DC
2628
2629out_agflbp_relse:
2630 xfs_trans_brelse(tp, agflbp);
2631out_agbp_relse:
2632 if (agbp)
2633 xfs_trans_brelse(tp, agbp);
2634out_no_agbp:
2635 args->agbp = NULL;
2636 return error;
1da177e4
LT
2637}
2638
2639/*
2640 * Get a block from the freelist.
2641 * Returns with the buffer for the block gotten.
2642 */
2643int /* error */
2644xfs_alloc_get_freelist(
2645 xfs_trans_t *tp, /* transaction pointer */
2646 xfs_buf_t *agbp, /* buffer containing the agf structure */
92821e2b
DC
2647 xfs_agblock_t *bnop, /* block address retrieved from freelist */
2648 int btreeblk) /* destination is a AGF btree */
1da177e4
LT
2649{
2650 xfs_agf_t *agf; /* a.g. freespace structure */
1da177e4
LT
2651 xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */
2652 xfs_agblock_t bno; /* block number returned */
77c95bba 2653 __be32 *agfl_bno;
1da177e4 2654 int error;
92821e2b 2655 int logflags;
77c95bba 2656 xfs_mount_t *mp = tp->t_mountp;
1da177e4
LT
2657 xfs_perag_t *pag; /* per allocation group data */
2658
1da177e4
LT
2659 /*
2660 * Freelist is empty, give up.
2661 */
77c95bba 2662 agf = XFS_BUF_TO_AGF(agbp);
1da177e4
LT
2663 if (!agf->agf_flcount) {
2664 *bnop = NULLAGBLOCK;
2665 return 0;
2666 }
2667 /*
2668 * Read the array of free blocks.
2669 */
77c95bba
CH
2670 error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno),
2671 &agflbp);
2672 if (error)
1da177e4 2673 return error;
77c95bba
CH
2674
2675
1da177e4
LT
2676 /*
2677 * Get the block number and update the data structures.
2678 */
77c95bba
CH
2679 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2680 bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
413d57c9 2681 be32_add_cpu(&agf->agf_flfirst, 1);
1da177e4 2682 xfs_trans_brelse(tp, agflbp);
a78ee256 2683 if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp))
1da177e4 2684 agf->agf_flfirst = 0;
a862e0fd
DC
2685
2686 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
a27ba260 2687 ASSERT(!pag->pagf_agflreset);
413d57c9 2688 be32_add_cpu(&agf->agf_flcount, -1);
1da177e4
LT
2689 xfs_trans_agflist_delta(tp, -1);
2690 pag->pagf_flcount--;
92821e2b
DC
2691
2692 logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
2693 if (btreeblk) {
413d57c9 2694 be32_add_cpu(&agf->agf_btreeblks, 1);
92821e2b
DC
2695 pag->pagf_btreeblks++;
2696 logflags |= XFS_AGF_BTREEBLKS;
2697 }
fe5ed6c2 2698 xfs_perag_put(pag);
92821e2b 2699
92821e2b 2700 xfs_alloc_log_agf(tp, agbp, logflags);
1da177e4
LT
2701 *bnop = bno;
2702
1da177e4
LT
2703 return 0;
2704}
2705
2706/*
2707 * Log the given fields from the agf structure.
2708 */
2709void
2710xfs_alloc_log_agf(
2711 xfs_trans_t *tp, /* transaction pointer */
2712 xfs_buf_t *bp, /* buffer for a.g. freelist header */
2713 int fields) /* mask of fields to be logged (XFS_AGF_...) */
2714{
2715 int first; /* first byte offset */
2716 int last; /* last byte offset */
2717 static const short offsets[] = {
2718 offsetof(xfs_agf_t, agf_magicnum),
2719 offsetof(xfs_agf_t, agf_versionnum),
2720 offsetof(xfs_agf_t, agf_seqno),
2721 offsetof(xfs_agf_t, agf_length),
2722 offsetof(xfs_agf_t, agf_roots[0]),
2723 offsetof(xfs_agf_t, agf_levels[0]),
2724 offsetof(xfs_agf_t, agf_flfirst),
2725 offsetof(xfs_agf_t, agf_fllast),
2726 offsetof(xfs_agf_t, agf_flcount),
2727 offsetof(xfs_agf_t, agf_freeblks),
2728 offsetof(xfs_agf_t, agf_longest),
92821e2b 2729 offsetof(xfs_agf_t, agf_btreeblks),
4e0e6040 2730 offsetof(xfs_agf_t, agf_uuid),
f32866fd 2731 offsetof(xfs_agf_t, agf_rmap_blocks),
bdf28630
DW
2732 offsetof(xfs_agf_t, agf_refcount_blocks),
2733 offsetof(xfs_agf_t, agf_refcount_root),
2734 offsetof(xfs_agf_t, agf_refcount_level),
da1f039d
DW
2735 /* needed so that we don't log the whole rest of the structure: */
2736 offsetof(xfs_agf_t, agf_spare64),
1da177e4
LT
2737 sizeof(xfs_agf_t)
2738 };
2739
0b1b213f
CH
2740 trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_);
2741
61fe135c 2742 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF);
4e0e6040 2743
1da177e4
LT
2744 xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
2745 xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
2746}
2747
2748/*
2749 * Interface for inode allocation to force the pag data to be initialized.
2750 */
2751int /* error */
2752xfs_alloc_pagf_init(
2753 xfs_mount_t *mp, /* file system mount structure */
2754 xfs_trans_t *tp, /* transaction pointer */
2755 xfs_agnumber_t agno, /* allocation group number */
2756 int flags) /* XFS_ALLOC_FLAGS_... */
2757{
2758 xfs_buf_t *bp;
2759 int error;
2760
2761 if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp)))
2762 return error;
2763 if (bp)
2764 xfs_trans_brelse(tp, bp);
2765 return 0;
2766}
2767
2768/*
2769 * Put the block on the freelist for the allocation group.
2770 */
2771int /* error */
2772xfs_alloc_put_freelist(
2773 xfs_trans_t *tp, /* transaction pointer */
2774 xfs_buf_t *agbp, /* buffer for a.g. freelist header */
2775 xfs_buf_t *agflbp,/* buffer for a.g. free block array */
92821e2b
DC
2776 xfs_agblock_t bno, /* block being freed */
2777 int btreeblk) /* block came from a AGF btree */
1da177e4
LT
2778{
2779 xfs_agf_t *agf; /* a.g. freespace structure */
e2101005 2780 __be32 *blockp;/* pointer to array entry */
1da177e4 2781 int error;
92821e2b 2782 int logflags;
1da177e4
LT
2783 xfs_mount_t *mp; /* mount structure */
2784 xfs_perag_t *pag; /* per allocation group data */
77c95bba
CH
2785 __be32 *agfl_bno;
2786 int startoff;
1da177e4
LT
2787
2788 agf = XFS_BUF_TO_AGF(agbp);
2789 mp = tp->t_mountp;
2790
2791 if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp,
16259e7d 2792 be32_to_cpu(agf->agf_seqno), &agflbp)))
1da177e4 2793 return error;
413d57c9 2794 be32_add_cpu(&agf->agf_fllast, 1);
a78ee256 2795 if (be32_to_cpu(agf->agf_fllast) == xfs_agfl_size(mp))
1da177e4 2796 agf->agf_fllast = 0;
a862e0fd
DC
2797
2798 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
a27ba260 2799 ASSERT(!pag->pagf_agflreset);
413d57c9 2800 be32_add_cpu(&agf->agf_flcount, 1);
1da177e4
LT
2801 xfs_trans_agflist_delta(tp, 1);
2802 pag->pagf_flcount++;
92821e2b
DC
2803
2804 logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
2805 if (btreeblk) {
413d57c9 2806 be32_add_cpu(&agf->agf_btreeblks, -1);
92821e2b
DC
2807 pag->pagf_btreeblks--;
2808 logflags |= XFS_AGF_BTREEBLKS;
2809 }
a862e0fd 2810 xfs_perag_put(pag);
92821e2b 2811
92821e2b
DC
2812 xfs_alloc_log_agf(tp, agbp, logflags);
2813
a78ee256 2814 ASSERT(be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp));
77c95bba
CH
2815
2816 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2817 blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
e2101005 2818 *blockp = cpu_to_be32(bno);
77c95bba
CH
2819 startoff = (char *)blockp - (char *)agflbp->b_addr;
2820
92821e2b 2821 xfs_alloc_log_agf(tp, agbp, logflags);
77c95bba 2822
61fe135c 2823 xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF);
77c95bba
CH
2824 xfs_trans_log_buf(tp, agflbp, startoff,
2825 startoff + sizeof(xfs_agblock_t) - 1);
1da177e4
LT
2826 return 0;
2827}
2828
a6a781a5 2829static xfs_failaddr_t
612cfbfe 2830xfs_agf_verify(
b5572597
DW
2831 struct xfs_buf *bp)
2832{
dbd329f1 2833 struct xfs_mount *mp = bp->b_mount;
b5572597 2834 struct xfs_agf *agf = XFS_BUF_TO_AGF(bp);
5d5f527d 2835
a45086e2
BF
2836 if (xfs_sb_version_hascrc(&mp->m_sb)) {
2837 if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
a6a781a5 2838 return __this_address;
a45086e2
BF
2839 if (!xfs_log_check_lsn(mp,
2840 be64_to_cpu(XFS_BUF_TO_AGF(bp)->agf_lsn)))
a6a781a5 2841 return __this_address;
a45086e2 2842 }
5d5f527d 2843
39708c20
BF
2844 if (!xfs_verify_magic(bp, agf->agf_magicnum))
2845 return __this_address;
2846
2847 if (!(XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
4e0e6040 2848 be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
a78ee256
DC
2849 be32_to_cpu(agf->agf_flfirst) < xfs_agfl_size(mp) &&
2850 be32_to_cpu(agf->agf_fllast) < xfs_agfl_size(mp) &&
2851 be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp)))
a6a781a5 2852 return __this_address;
5d5f527d 2853
d2a047f3
DW
2854 if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
2855 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 ||
2856 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS ||
e1b05723 2857 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS)
a6a781a5 2858 return __this_address;
e1b05723 2859
b8704944 2860 if (xfs_sb_version_hasrmapbt(&mp->m_sb) &&
d2a047f3
DW
2861 (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 ||
2862 be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS))
a6a781a5 2863 return __this_address;
b8704944 2864
5d5f527d
DC
2865 /*
2866 * during growfs operations, the perag is not fully initialised,
2867 * so we can't use it for any useful checking. growfs ensures we can't
2868 * use it by using uncached buffers that don't have the perag attached
2869 * so we can detect and avoid this problem.
2870 */
4e0e6040 2871 if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno)
a6a781a5 2872 return __this_address;
5d5f527d 2873
4e0e6040
DC
2874 if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
2875 be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length))
a6a781a5 2876 return __this_address;
4e0e6040 2877
46eeb521 2878 if (xfs_sb_version_hasreflink(&mp->m_sb) &&
d2a047f3
DW
2879 (be32_to_cpu(agf->agf_refcount_level) < 1 ||
2880 be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS))
a6a781a5 2881 return __this_address;
46eeb521 2882
a6a781a5 2883 return NULL;
5d5f527d 2884
612cfbfe
DC
2885}
2886
1813dd64
DC
2887static void
2888xfs_agf_read_verify(
612cfbfe
DC
2889 struct xfs_buf *bp)
2890{
dbd329f1 2891 struct xfs_mount *mp = bp->b_mount;
bc1a09b8 2892 xfs_failaddr_t fa;
4e0e6040 2893
ce5028cf
ES
2894 if (xfs_sb_version_hascrc(&mp->m_sb) &&
2895 !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
bc1a09b8
DW
2896 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
2897 else {
b5572597 2898 fa = xfs_agf_verify(bp);
bc1a09b8
DW
2899 if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_ALLOC_READ_AGF))
2900 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
2901 }
612cfbfe 2902}
5d5f527d 2903
b0f539de 2904static void
1813dd64 2905xfs_agf_write_verify(
612cfbfe
DC
2906 struct xfs_buf *bp)
2907{
dbd329f1 2908 struct xfs_mount *mp = bp->b_mount;
fb1755a6 2909 struct xfs_buf_log_item *bip = bp->b_log_item;
bc1a09b8 2910 xfs_failaddr_t fa;
4e0e6040 2911
b5572597 2912 fa = xfs_agf_verify(bp);
bc1a09b8
DW
2913 if (fa) {
2914 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
4e0e6040
DC
2915 return;
2916 }
2917
2918 if (!xfs_sb_version_hascrc(&mp->m_sb))
2919 return;
2920
2921 if (bip)
2922 XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
2923
f1dbcd7e 2924 xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
5d5f527d
DC
2925}
2926
1813dd64 2927const struct xfs_buf_ops xfs_agf_buf_ops = {
233135b7 2928 .name = "xfs_agf",
39708c20 2929 .magic = { cpu_to_be32(XFS_AGF_MAGIC), cpu_to_be32(XFS_AGF_MAGIC) },
1813dd64
DC
2930 .verify_read = xfs_agf_read_verify,
2931 .verify_write = xfs_agf_write_verify,
b5572597 2932 .verify_struct = xfs_agf_verify,
1813dd64
DC
2933};
2934
1da177e4
LT
2935/*
2936 * Read in the allocation group header (free/alloc section).
2937 */
2938int /* error */
4805621a
CH
2939xfs_read_agf(
2940 struct xfs_mount *mp, /* mount point structure */
2941 struct xfs_trans *tp, /* transaction pointer */
2942 xfs_agnumber_t agno, /* allocation group number */
2943 int flags, /* XFS_BUF_ */
2944 struct xfs_buf **bpp) /* buffer for the ag freelist header */
1da177e4 2945{
1da177e4
LT
2946 int error;
2947
d123031a
DC
2948 trace_xfs_read_agf(mp, agno);
2949
1da177e4
LT
2950 ASSERT(agno != NULLAGNUMBER);
2951 error = xfs_trans_read_buf(
2952 mp, tp, mp->m_ddev_targp,
2953 XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
1813dd64 2954 XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops);
1da177e4
LT
2955 if (error)
2956 return error;
4805621a 2957 if (!*bpp)
1da177e4 2958 return 0;
4805621a 2959
5a52c2a5 2960 ASSERT(!(*bpp)->b_error);
38f23232 2961 xfs_buf_set_ref(*bpp, XFS_AGF_REF);
4805621a
CH
2962 return 0;
2963}
2964
2965/*
2966 * Read in the allocation group header (free/alloc section).
2967 */
2968int /* error */
2969xfs_alloc_read_agf(
2970 struct xfs_mount *mp, /* mount point structure */
2971 struct xfs_trans *tp, /* transaction pointer */
2972 xfs_agnumber_t agno, /* allocation group number */
2973 int flags, /* XFS_ALLOC_FLAG_... */
2974 struct xfs_buf **bpp) /* buffer for the ag freelist header */
2975{
2976 struct xfs_agf *agf; /* ag freelist header */
2977 struct xfs_perag *pag; /* per allocation group data */
2978 int error;
2979
d123031a 2980 trace_xfs_alloc_read_agf(mp, agno);
4805621a 2981
d123031a 2982 ASSERT(agno != NULLAGNUMBER);
4805621a 2983 error = xfs_read_agf(mp, tp, agno,
0cadda1c 2984 (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
4805621a
CH
2985 bpp);
2986 if (error)
2987 return error;
2988 if (!*bpp)
2989 return 0;
5a52c2a5 2990 ASSERT(!(*bpp)->b_error);
4805621a
CH
2991
2992 agf = XFS_BUF_TO_AGF(*bpp);
a862e0fd 2993 pag = xfs_perag_get(mp, agno);
1da177e4 2994 if (!pag->pagf_init) {
16259e7d 2995 pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
92821e2b 2996 pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
16259e7d
CH
2997 pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
2998 pag->pagf_longest = be32_to_cpu(agf->agf_longest);
1da177e4 2999 pag->pagf_levels[XFS_BTNUM_BNOi] =
16259e7d 3000 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
1da177e4 3001 pag->pagf_levels[XFS_BTNUM_CNTi] =
16259e7d 3002 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
b8704944
DW
3003 pag->pagf_levels[XFS_BTNUM_RMAPi] =
3004 be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
46eeb521 3005 pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
1da177e4 3006 pag->pagf_init = 1;
a27ba260 3007 pag->pagf_agflreset = xfs_agfl_needs_reset(mp, agf);
1da177e4
LT
3008 }
3009#ifdef DEBUG
3010 else if (!XFS_FORCED_SHUTDOWN(mp)) {
16259e7d 3011 ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
89b28393 3012 ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
16259e7d
CH
3013 ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
3014 ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
1da177e4 3015 ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
16259e7d 3016 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
1da177e4 3017 ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
16259e7d 3018 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
1da177e4
LT
3019 }
3020#endif
a862e0fd 3021 xfs_perag_put(pag);
1da177e4
LT
3022 return 0;
3023}
3024
3025/*
3026 * Allocate an extent (variable-size).
3027 * Depending on the allocation type, we either look in a single allocation
3028 * group or loop over the allocation groups to find the result.
3029 */
3030int /* error */
e04426b9 3031xfs_alloc_vextent(
64396ff2 3032 struct xfs_alloc_arg *args) /* allocation argument structure */
1da177e4 3033{
64396ff2
BF
3034 xfs_agblock_t agsize; /* allocation group size */
3035 int error;
3036 int flags; /* XFS_ALLOC_FLAG_... locking flags */
3037 struct xfs_mount *mp; /* mount structure pointer */
3038 xfs_agnumber_t sagno; /* starting allocation group number */
3039 xfs_alloctype_t type; /* input allocation type */
3040 int bump_rotor = 0;
3041 xfs_agnumber_t rotorstep = xfs_rotorstep; /* inode32 agf stepper */
1da177e4
LT
3042
3043 mp = args->mp;
3044 type = args->otype = args->type;
3045 args->agbno = NULLAGBLOCK;
3046 /*
3047 * Just fix this up, for the case where the last a.g. is shorter
3048 * (or there's only one a.g.) and the caller couldn't easily figure
3049 * that out (xfs_bmap_alloc).
3050 */
3051 agsize = mp->m_sb.sb_agblocks;
3052 if (args->maxlen > agsize)
3053 args->maxlen = agsize;
3054 if (args->alignment == 0)
3055 args->alignment = 1;
3056 ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount);
3057 ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize);
3058 ASSERT(args->minlen <= args->maxlen);
3059 ASSERT(args->minlen <= agsize);
3060 ASSERT(args->mod < args->prod);
3061 if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount ||
3062 XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize ||
3063 args->minlen > args->maxlen || args->minlen > agsize ||
3064 args->mod >= args->prod) {
3065 args->fsbno = NULLFSBLOCK;
0b1b213f 3066 trace_xfs_alloc_vextent_badargs(args);
1da177e4
LT
3067 return 0;
3068 }
1da177e4
LT
3069
3070 switch (type) {
3071 case XFS_ALLOCTYPE_THIS_AG:
3072 case XFS_ALLOCTYPE_NEAR_BNO:
3073 case XFS_ALLOCTYPE_THIS_BNO:
3074 /*
3075 * These three force us into a single a.g.
3076 */
3077 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
a862e0fd 3078 args->pag = xfs_perag_get(mp, args->agno);
1da177e4 3079 error = xfs_alloc_fix_freelist(args, 0);
1da177e4 3080 if (error) {
0b1b213f 3081 trace_xfs_alloc_vextent_nofix(args);
1da177e4
LT
3082 goto error0;
3083 }
3084 if (!args->agbp) {
0b1b213f 3085 trace_xfs_alloc_vextent_noagbp(args);
1da177e4
LT
3086 break;
3087 }
3088 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
3089 if ((error = xfs_alloc_ag_vextent(args)))
3090 goto error0;
1da177e4
LT
3091 break;
3092 case XFS_ALLOCTYPE_START_BNO:
3093 /*
3094 * Try near allocation first, then anywhere-in-ag after
3095 * the first a.g. fails.
3096 */
292378ed 3097 if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) &&
1da177e4
LT
3098 (mp->m_flags & XFS_MOUNT_32BITINODES)) {
3099 args->fsbno = XFS_AGB_TO_FSB(mp,
3100 ((mp->m_agfrotor / rotorstep) %
3101 mp->m_sb.sb_agcount), 0);
3102 bump_rotor = 1;
3103 }
3104 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
3105 args->type = XFS_ALLOCTYPE_NEAR_BNO;
3106 /* FALLTHROUGH */
1da177e4
LT
3107 case XFS_ALLOCTYPE_FIRST_AG:
3108 /*
3109 * Rotate through the allocation groups looking for a winner.
3110 */
8d242e93 3111 if (type == XFS_ALLOCTYPE_FIRST_AG) {
1da177e4
LT
3112 /*
3113 * Start with allocation group given by bno.
3114 */
3115 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
3116 args->type = XFS_ALLOCTYPE_THIS_AG;
3117 sagno = 0;
3118 flags = 0;
3119 } else {
1da177e4
LT
3120 /*
3121 * Start with the given allocation group.
3122 */
3123 args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno);
3124 flags = XFS_ALLOC_FLAG_TRYLOCK;
3125 }
3126 /*
3127 * Loop over allocation groups twice; first time with
3128 * trylock set, second time without.
3129 */
1da177e4 3130 for (;;) {
a862e0fd 3131 args->pag = xfs_perag_get(mp, args->agno);
1da177e4 3132 error = xfs_alloc_fix_freelist(args, flags);
1da177e4 3133 if (error) {
0b1b213f 3134 trace_xfs_alloc_vextent_nofix(args);
1da177e4
LT
3135 goto error0;
3136 }
3137 /*
3138 * If we get a buffer back then the allocation will fly.
3139 */
3140 if (args->agbp) {
3141 if ((error = xfs_alloc_ag_vextent(args)))
3142 goto error0;
3143 break;
3144 }
0b1b213f
CH
3145
3146 trace_xfs_alloc_vextent_loopfailed(args);
3147
1da177e4
LT
3148 /*
3149 * Didn't work, figure out the next iteration.
3150 */
3151 if (args->agno == sagno &&
3152 type == XFS_ALLOCTYPE_START_BNO)
3153 args->type = XFS_ALLOCTYPE_THIS_AG;
d210a28c
YL
3154 /*
3155 * For the first allocation, we can try any AG to get
3156 * space. However, if we already have allocated a
3157 * block, we don't want to try AGs whose number is below
3158 * sagno. Otherwise, we may end up with out-of-order
3159 * locking of AGF, which might cause deadlock.
3160 */
3161 if (++(args->agno) == mp->m_sb.sb_agcount) {
64396ff2 3162 if (args->tp->t_firstblock != NULLFSBLOCK)
d210a28c
YL
3163 args->agno = sagno;
3164 else
3165 args->agno = 0;
3166 }
1da177e4
LT
3167 /*
3168 * Reached the starting a.g., must either be done
3169 * or switch to non-trylock mode.
3170 */
3171 if (args->agno == sagno) {
255c5162 3172 if (flags == 0) {
1da177e4 3173 args->agbno = NULLAGBLOCK;
0b1b213f 3174 trace_xfs_alloc_vextent_allfailed(args);
1da177e4
LT
3175 break;
3176 }
255c5162
CH
3177
3178 flags = 0;
3179 if (type == XFS_ALLOCTYPE_START_BNO) {
3180 args->agbno = XFS_FSB_TO_AGBNO(mp,
3181 args->fsbno);
3182 args->type = XFS_ALLOCTYPE_NEAR_BNO;
1da177e4
LT
3183 }
3184 }
a862e0fd 3185 xfs_perag_put(args->pag);
1da177e4 3186 }
8d242e93 3187 if (bump_rotor) {
1da177e4
LT
3188 if (args->agno == sagno)
3189 mp->m_agfrotor = (mp->m_agfrotor + 1) %
3190 (mp->m_sb.sb_agcount * rotorstep);
3191 else
3192 mp->m_agfrotor = (args->agno * rotorstep + 1) %
3193 (mp->m_sb.sb_agcount * rotorstep);
3194 }
3195 break;
3196 default:
3197 ASSERT(0);
3198 /* NOTREACHED */
3199 }
3200 if (args->agbno == NULLAGBLOCK)
3201 args->fsbno = NULLFSBLOCK;
3202 else {
3203 args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
3204#ifdef DEBUG
3205 ASSERT(args->len >= args->minlen);
3206 ASSERT(args->len <= args->maxlen);
3207 ASSERT(args->agbno % args->alignment == 0);
3208 XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
3209 args->len);
3210#endif
3fbbbea3 3211
1da177e4 3212 }
a862e0fd 3213 xfs_perag_put(args->pag);
1da177e4
LT
3214 return 0;
3215error0:
a862e0fd 3216 xfs_perag_put(args->pag);
1da177e4
LT
3217 return error;
3218}
3219
4d89e20b
DC
3220/* Ensure that the freelist is at full capacity. */
3221int
3222xfs_free_extent_fix_freelist(
3223 struct xfs_trans *tp,
3224 xfs_agnumber_t agno,
3225 struct xfs_buf **agbp)
1da177e4 3226{
4d89e20b
DC
3227 struct xfs_alloc_arg args;
3228 int error;
1da177e4 3229
4d89e20b 3230 memset(&args, 0, sizeof(struct xfs_alloc_arg));
1da177e4
LT
3231 args.tp = tp;
3232 args.mp = tp->t_mountp;
4d89e20b 3233 args.agno = agno;
be65b18a
DC
3234
3235 /*
3236 * validate that the block number is legal - the enables us to detect
3237 * and handle a silent filesystem corruption rather than crashing.
3238 */
be65b18a 3239 if (args.agno >= args.mp->m_sb.sb_agcount)
2451337d 3240 return -EFSCORRUPTED;
be65b18a 3241
a862e0fd 3242 args.pag = xfs_perag_get(args.mp, args.agno);
be65b18a
DC
3243 ASSERT(args.pag);
3244
3245 error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
3246 if (error)
4d89e20b
DC
3247 goto out;
3248
3249 *agbp = args.agbp;
3250out:
3251 xfs_perag_put(args.pag);
3252 return error;
3253}
3254
3255/*
3256 * Free an extent.
3257 * Just break up the extent address and hand off to xfs_free_ag_extent
3258 * after fixing up the freelist.
3259 */
66e3237e 3260int
fcb762f5 3261__xfs_free_extent(
66e3237e
DW
3262 struct xfs_trans *tp,
3263 xfs_fsblock_t bno,
3264 xfs_extlen_t len,
3265 const struct xfs_owner_info *oinfo,
3266 enum xfs_ag_resv_type type,
3267 bool skip_discard)
4d89e20b 3268{
66e3237e
DW
3269 struct xfs_mount *mp = tp->t_mountp;
3270 struct xfs_buf *agbp;
3271 xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, bno);
3272 xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, bno);
3273 int error;
3274 unsigned int busy_flags = 0;
4d89e20b
DC
3275
3276 ASSERT(len != 0);
0ab32086 3277 ASSERT(type != XFS_AG_RESV_AGFL);
4d89e20b 3278
ba9e7802 3279 if (XFS_TEST_ERROR(false, mp,
9e24cfd0 3280 XFS_ERRTAG_FREE_EXTENT))
ba9e7802
DW
3281 return -EIO;
3282
4d89e20b
DC
3283 error = xfs_free_extent_fix_freelist(tp, agno, &agbp);
3284 if (error)
3285 return error;
3286
f9e03706
DW
3287 if (XFS_IS_CORRUPT(mp, agbno >= mp->m_sb.sb_agblocks)) {
3288 error = -EFSCORRUPTED;
3289 goto err;
3290 }
be65b18a
DC
3291
3292 /* validate the extent size is legal now we have the agf locked */
f9e03706
DW
3293 if (XFS_IS_CORRUPT(mp,
3294 agbno + len >
3295 be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_length))) {
3296 error = -EFSCORRUPTED;
3297 goto err;
3298 }
be65b18a 3299
3fd129b6 3300 error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, type);
4d89e20b
DC
3301 if (error)
3302 goto err;
3303
fcb762f5
BF
3304 if (skip_discard)
3305 busy_flags |= XFS_EXTENT_BUSY_SKIP_DISCARD;
3306 xfs_extent_busy_insert(tp, agno, agbno, len, busy_flags);
4d89e20b
DC
3307 return 0;
3308
3309err:
3310 xfs_trans_brelse(tp, agbp);
1da177e4
LT
3311 return error;
3312}
2d520bfa
DW
3313
3314struct xfs_alloc_query_range_info {
3315 xfs_alloc_query_range_fn fn;
3316 void *priv;
3317};
3318
3319/* Format btree record and pass to our callback. */
3320STATIC int
3321xfs_alloc_query_range_helper(
3322 struct xfs_btree_cur *cur,
3323 union xfs_btree_rec *rec,
3324 void *priv)
3325{
3326 struct xfs_alloc_query_range_info *query = priv;
3327 struct xfs_alloc_rec_incore irec;
3328
3329 irec.ar_startblock = be32_to_cpu(rec->alloc.ar_startblock);
3330 irec.ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount);
3331 return query->fn(cur, &irec, query->priv);
3332}
3333
3334/* Find all free space within a given range of blocks. */
3335int
3336xfs_alloc_query_range(
3337 struct xfs_btree_cur *cur,
3338 struct xfs_alloc_rec_incore *low_rec,
3339 struct xfs_alloc_rec_incore *high_rec,
3340 xfs_alloc_query_range_fn fn,
3341 void *priv)
3342{
3343 union xfs_btree_irec low_brec;
3344 union xfs_btree_irec high_brec;
3345 struct xfs_alloc_query_range_info query;
3346
3347 ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
3348 low_brec.a = *low_rec;
3349 high_brec.a = *high_rec;
3350 query.priv = priv;
3351 query.fn = fn;
3352 return xfs_btree_query_range(cur, &low_brec, &high_brec,
3353 xfs_alloc_query_range_helper, &query);
3354}
e9a2599a
DW
3355
3356/* Find all free space records. */
3357int
3358xfs_alloc_query_all(
3359 struct xfs_btree_cur *cur,
3360 xfs_alloc_query_range_fn fn,
3361 void *priv)
3362{
3363 struct xfs_alloc_query_range_info query;
3364
3365 ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
3366 query.priv = priv;
3367 query.fn = fn;
3368 return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query);
3369}
21ec5416 3370
ce1d802e
DW
3371/* Is there a record covering a given extent? */
3372int
3373xfs_alloc_has_record(
3374 struct xfs_btree_cur *cur,
3375 xfs_agblock_t bno,
3376 xfs_extlen_t len,
3377 bool *exists)
3378{
3379 union xfs_btree_irec low;
3380 union xfs_btree_irec high;
3381
3382 memset(&low, 0, sizeof(low));
3383 low.a.ar_startblock = bno;
3384 memset(&high, 0xFF, sizeof(high));
3385 high.a.ar_startblock = bno + len - 1;
3386
3387 return xfs_btree_has_record(cur, &low, &high, exists);
3388}
9f3a080e
DW
3389
3390/*
3391 * Walk all the blocks in the AGFL. The @walk_fn can return any negative
5bb46e3e 3392 * error code or XFS_ITER_*.
9f3a080e
DW
3393 */
3394int
3395xfs_agfl_walk(
3396 struct xfs_mount *mp,
3397 struct xfs_agf *agf,
3398 struct xfs_buf *agflbp,
3399 xfs_agfl_walk_fn walk_fn,
3400 void *priv)
3401{
3402 __be32 *agfl_bno;
3403 unsigned int i;
3404 int error;
3405
3406 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
3407 i = be32_to_cpu(agf->agf_flfirst);
3408
3409 /* Nothing to walk in an empty AGFL. */
3410 if (agf->agf_flcount == cpu_to_be32(0))
3411 return 0;
3412
3413 /* Otherwise, walk from first to last, wrapping as needed. */
3414 for (;;) {
3415 error = walk_fn(mp, be32_to_cpu(agfl_bno[i]), priv);
3416 if (error)
3417 return error;
3418 if (i == be32_to_cpu(agf->agf_fllast))
3419 break;
3420 if (++i == xfs_agfl_size(mp))
3421 i = 0;
3422 }
3423
3424 return 0;
3425}